mirror of
https://github.com/opencloud-eu/opencloud.git
synced 2026-04-22 11:09:02 -05:00
chore(deps): bump github.com/nats-io/nats-server/v2
Bumps [github.com/nats-io/nats-server/v2](https://github.com/nats-io/nats-server) from 2.10.16 to 2.10.18. - [Release notes](https://github.com/nats-io/nats-server/releases) - [Changelog](https://github.com/nats-io/nats-server/blob/main/.goreleaser.yml) - [Commits](https://github.com/nats-io/nats-server/compare/v2.10.16...v2.10.18) --- updated-dependencies: - dependency-name: github.com/nats-io/nats-server/v2 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] <support@github.com>
This commit is contained in:
committed by
Ralf Haferkamp
parent
ed13b043eb
commit
3f446bbf8b
+4
-6
@@ -5,7 +5,6 @@
|
||||
#include "textflag.h"
|
||||
|
||||
// func matchLen(a []byte, b []byte) int
|
||||
// Requires: BMI
|
||||
TEXT ·matchLen(SB), NOSPLIT, $0-56
|
||||
MOVQ a_base+0(FP), AX
|
||||
MOVQ b_base+24(FP), CX
|
||||
@@ -17,17 +16,16 @@ TEXT ·matchLen(SB), NOSPLIT, $0-56
|
||||
JB matchlen_match4_standalone
|
||||
|
||||
matchlen_loopback_standalone:
|
||||
MOVQ (AX)(SI*1), BX
|
||||
XORQ (CX)(SI*1), BX
|
||||
TESTQ BX, BX
|
||||
JZ matchlen_loop_standalone
|
||||
MOVQ (AX)(SI*1), BX
|
||||
XORQ (CX)(SI*1), BX
|
||||
JZ matchlen_loop_standalone
|
||||
|
||||
#ifdef GOAMD64_v3
|
||||
TZCNTQ BX, BX
|
||||
#else
|
||||
BSFQ BX, BX
|
||||
#endif
|
||||
SARQ $0x03, BX
|
||||
SHRL $0x03, BX
|
||||
LEAL (SI)(BX*1), SI
|
||||
JMP gen_match_len_end
|
||||
|
||||
|
||||
+1
-1
@@ -60,7 +60,7 @@
|
||||
//
|
||||
// The d variable is implicitly R_DST - R_DBASE, and len(dst)-d is R_DEND - R_DST.
|
||||
// The s variable is implicitly R_SRC - R_SBASE, and len(src)-s is R_SEND - R_SRC.
|
||||
TEXT ·s2Decode(SB), NOSPLIT, $56-64
|
||||
TEXT ·s2Decode(SB), NOSPLIT, $56-56
|
||||
// Initialize R_SRC, R_DST and R_DBASE-R_SEND.
|
||||
MOVD dst_base+0(FP), R_DBASE
|
||||
MOVD dst_len+8(FP), R_DLEN
|
||||
|
||||
+8
-2
@@ -17,6 +17,8 @@ const (
|
||||
S2IndexHeader = "s2idx\x00"
|
||||
S2IndexTrailer = "\x00xdi2s"
|
||||
maxIndexEntries = 1 << 16
|
||||
// If distance is less than this, we do not add the entry.
|
||||
minIndexDist = 1 << 20
|
||||
)
|
||||
|
||||
// Index represents an S2/Snappy index.
|
||||
@@ -72,6 +74,10 @@ func (i *Index) add(compressedOffset, uncompressedOffset int64) error {
|
||||
if latest.compressedOffset > compressedOffset {
|
||||
return fmt.Errorf("internal error: Earlier compressed received (%d > %d)", latest.uncompressedOffset, uncompressedOffset)
|
||||
}
|
||||
if latest.uncompressedOffset+minIndexDist > uncompressedOffset {
|
||||
// Only add entry if distance is large enough.
|
||||
return nil
|
||||
}
|
||||
}
|
||||
i.info = append(i.info, struct {
|
||||
compressedOffset int64
|
||||
@@ -122,7 +128,7 @@ func (i *Index) Find(offset int64) (compressedOff, uncompressedOff int64, err er
|
||||
|
||||
// reduce to stay below maxIndexEntries
|
||||
func (i *Index) reduce() {
|
||||
if len(i.info) < maxIndexEntries && i.estBlockUncomp >= 1<<20 {
|
||||
if len(i.info) < maxIndexEntries && i.estBlockUncomp >= minIndexDist {
|
||||
return
|
||||
}
|
||||
|
||||
@@ -132,7 +138,7 @@ func (i *Index) reduce() {
|
||||
j := 0
|
||||
|
||||
// Each block should be at least 1MB, but don't reduce below 1000 entries.
|
||||
for i.estBlockUncomp*(int64(removeN)+1) < 1<<20 && len(i.info)/(removeN+1) > 1000 {
|
||||
for i.estBlockUncomp*(int64(removeN)+1) < minIndexDist && len(i.info)/(removeN+1) > 1000 {
|
||||
removeN++
|
||||
}
|
||||
for idx := 0; idx < len(src); idx++ {
|
||||
|
||||
+5
-1
@@ -109,7 +109,11 @@ const (
|
||||
chunkTypeStreamIdentifier = 0xff
|
||||
)
|
||||
|
||||
var crcTable = crc32.MakeTable(crc32.Castagnoli)
|
||||
var (
|
||||
crcTable = crc32.MakeTable(crc32.Castagnoli)
|
||||
magicChunkSnappyBytes = []byte(magicChunkSnappy) // Can be passed to functions where it escapes.
|
||||
magicChunkBytes = []byte(magicChunk) // Can be passed to functions where it escapes.
|
||||
)
|
||||
|
||||
// crc implements the checksum specified in section 3 of
|
||||
// https://github.com/google/snappy/blob/master/framing_format.txt
|
||||
|
||||
+16
-10
@@ -239,6 +239,9 @@ func (w *Writer) ReadFrom(r io.Reader) (n int64, err error) {
|
||||
}
|
||||
}
|
||||
if n2 == 0 {
|
||||
if cap(inbuf) >= w.obufLen {
|
||||
w.buffers.Put(inbuf)
|
||||
}
|
||||
break
|
||||
}
|
||||
n += int64(n2)
|
||||
@@ -314,9 +317,9 @@ func (w *Writer) AddSkippableBlock(id uint8, data []byte) (err error) {
|
||||
hWriter := make(chan result)
|
||||
w.output <- hWriter
|
||||
if w.snappy {
|
||||
hWriter <- result{startOffset: w.uncompWritten, b: []byte(magicChunkSnappy)}
|
||||
hWriter <- result{startOffset: w.uncompWritten, b: magicChunkSnappyBytes}
|
||||
} else {
|
||||
hWriter <- result{startOffset: w.uncompWritten, b: []byte(magicChunk)}
|
||||
hWriter <- result{startOffset: w.uncompWritten, b: magicChunkBytes}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -370,9 +373,9 @@ func (w *Writer) EncodeBuffer(buf []byte) (err error) {
|
||||
hWriter := make(chan result)
|
||||
w.output <- hWriter
|
||||
if w.snappy {
|
||||
hWriter <- result{startOffset: w.uncompWritten, b: []byte(magicChunkSnappy)}
|
||||
hWriter <- result{startOffset: w.uncompWritten, b: magicChunkSnappyBytes}
|
||||
} else {
|
||||
hWriter <- result{startOffset: w.uncompWritten, b: []byte(magicChunk)}
|
||||
hWriter <- result{startOffset: w.uncompWritten, b: magicChunkBytes}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -478,9 +481,9 @@ func (w *Writer) write(p []byte) (nRet int, errRet error) {
|
||||
hWriter := make(chan result)
|
||||
w.output <- hWriter
|
||||
if w.snappy {
|
||||
hWriter <- result{startOffset: w.uncompWritten, b: []byte(magicChunkSnappy)}
|
||||
hWriter <- result{startOffset: w.uncompWritten, b: magicChunkSnappyBytes}
|
||||
} else {
|
||||
hWriter <- result{startOffset: w.uncompWritten, b: []byte(magicChunk)}
|
||||
hWriter <- result{startOffset: w.uncompWritten, b: magicChunkBytes}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -560,6 +563,9 @@ func (w *Writer) writeFull(inbuf []byte) (errRet error) {
|
||||
|
||||
if w.concurrency == 1 {
|
||||
_, err := w.writeSync(inbuf[obufHeaderLen:])
|
||||
if cap(inbuf) >= w.obufLen {
|
||||
w.buffers.Put(inbuf)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -569,9 +575,9 @@ func (w *Writer) writeFull(inbuf []byte) (errRet error) {
|
||||
hWriter := make(chan result)
|
||||
w.output <- hWriter
|
||||
if w.snappy {
|
||||
hWriter <- result{startOffset: w.uncompWritten, b: []byte(magicChunkSnappy)}
|
||||
hWriter <- result{startOffset: w.uncompWritten, b: magicChunkSnappyBytes}
|
||||
} else {
|
||||
hWriter <- result{startOffset: w.uncompWritten, b: []byte(magicChunk)}
|
||||
hWriter <- result{startOffset: w.uncompWritten, b: magicChunkBytes}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -637,9 +643,9 @@ func (w *Writer) writeSync(p []byte) (nRet int, errRet error) {
|
||||
var n int
|
||||
var err error
|
||||
if w.snappy {
|
||||
n, err = w.writer.Write([]byte(magicChunkSnappy))
|
||||
n, err = w.writer.Write(magicChunkSnappyBytes)
|
||||
} else {
|
||||
n, err = w.writer.Write([]byte(magicChunk))
|
||||
n, err = w.writer.Write(magicChunkBytes)
|
||||
}
|
||||
if err != nil {
|
||||
return 0, w.err(err)
|
||||
|
||||
+31
@@ -273,6 +273,9 @@ func BuildDict(o BuildDictOptions) ([]byte, error) {
|
||||
enc.Encode(&block, b)
|
||||
addValues(&remain, block.literals)
|
||||
litTotal += len(block.literals)
|
||||
if len(block.sequences) == 0 {
|
||||
continue
|
||||
}
|
||||
seqs += len(block.sequences)
|
||||
block.genCodes()
|
||||
addHist(&ll, block.coders.llEnc.Histogram())
|
||||
@@ -286,6 +289,9 @@ func BuildDict(o BuildDictOptions) ([]byte, error) {
|
||||
if offset == 0 {
|
||||
continue
|
||||
}
|
||||
if int(offset) >= len(o.History) {
|
||||
continue
|
||||
}
|
||||
if offset > 3 {
|
||||
newOffsets[offset-3]++
|
||||
} else {
|
||||
@@ -336,6 +342,9 @@ func BuildDict(o BuildDictOptions) ([]byte, error) {
|
||||
if seqs/nUsed < 512 {
|
||||
// Use 512 as minimum.
|
||||
nUsed = seqs / 512
|
||||
if nUsed == 0 {
|
||||
nUsed = 1
|
||||
}
|
||||
}
|
||||
copyHist := func(dst *fseEncoder, src *[256]int) ([]byte, error) {
|
||||
hist := dst.Histogram()
|
||||
@@ -358,6 +367,28 @@ func BuildDict(o BuildDictOptions) ([]byte, error) {
|
||||
fakeLength += v
|
||||
hist[i] = uint32(v)
|
||||
}
|
||||
|
||||
// Ensure we aren't trying to represent RLE.
|
||||
if maxCount == fakeLength {
|
||||
for i := range hist {
|
||||
if uint8(i) == maxSym {
|
||||
fakeLength++
|
||||
maxSym++
|
||||
hist[i+1] = 1
|
||||
if maxSym > 1 {
|
||||
break
|
||||
}
|
||||
}
|
||||
if hist[0] == 0 {
|
||||
fakeLength++
|
||||
hist[i] = 1
|
||||
if maxSym > 1 {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
dst.HistogramFinished(maxSym, maxCount)
|
||||
dst.reUsed = false
|
||||
dst.useRLE = false
|
||||
|
||||
+2
-2
@@ -162,12 +162,12 @@ finalize:
|
||||
MOVD h, ret+24(FP)
|
||||
RET
|
||||
|
||||
// func writeBlocks(d *Digest, b []byte) int
|
||||
// func writeBlocks(s *Digest, b []byte) int
|
||||
TEXT ·writeBlocks(SB), NOSPLIT|NOFRAME, $0-40
|
||||
LDP ·primes+0(SB), (prime1, prime2)
|
||||
|
||||
// Load state. Assume v[1-4] are stored contiguously.
|
||||
MOVD d+0(FP), digest
|
||||
MOVD s+0(FP), digest
|
||||
LDP 0(digest), (v1, v2)
|
||||
LDP 16(digest), (v3, v4)
|
||||
|
||||
|
||||
+4
-6
@@ -5,7 +5,6 @@
|
||||
#include "textflag.h"
|
||||
|
||||
// func matchLen(a []byte, b []byte) int
|
||||
// Requires: BMI
|
||||
TEXT ·matchLen(SB), NOSPLIT, $0-56
|
||||
MOVQ a_base+0(FP), AX
|
||||
MOVQ b_base+24(FP), CX
|
||||
@@ -17,17 +16,16 @@ TEXT ·matchLen(SB), NOSPLIT, $0-56
|
||||
JB matchlen_match4_standalone
|
||||
|
||||
matchlen_loopback_standalone:
|
||||
MOVQ (AX)(SI*1), BX
|
||||
XORQ (CX)(SI*1), BX
|
||||
TESTQ BX, BX
|
||||
JZ matchlen_loop_standalone
|
||||
MOVQ (AX)(SI*1), BX
|
||||
XORQ (CX)(SI*1), BX
|
||||
JZ matchlen_loop_standalone
|
||||
|
||||
#ifdef GOAMD64_v3
|
||||
TZCNTQ BX, BX
|
||||
#else
|
||||
BSFQ BX, BX
|
||||
#endif
|
||||
SARQ $0x03, BX
|
||||
SHRL $0x03, BX
|
||||
LEAL (SI)(BX*1), SI
|
||||
JMP gen_match_len_end
|
||||
|
||||
|
||||
+2
-4
@@ -12,13 +12,11 @@ linters:
|
||||
- goimports
|
||||
- misspell
|
||||
- govet
|
||||
- golint
|
||||
- revive
|
||||
- ineffassign
|
||||
- gosimple
|
||||
- deadcode
|
||||
- unparam
|
||||
- unused
|
||||
- structcheck
|
||||
|
||||
issues:
|
||||
exclude-use-default: false
|
||||
@@ -27,4 +25,4 @@ issues:
|
||||
- error strings should not be capitalized or end with punctuation or a newline
|
||||
- should have comment # TODO(aead): Remove once all exported ident. have comments!
|
||||
service:
|
||||
golangci-lint-version: 1.20.0 # use the fixed version to not introduce new linters unexpectedly
|
||||
golangci-lint-version: 1.51.2 # use the fixed version to not introduce new linters unexpectedly
|
||||
|
||||
+9
-9
@@ -42,17 +42,17 @@ So for moderately sized messages it tops out at about 15 GB/sec. Also for small
|
||||
|
||||
### ARM Performance
|
||||
|
||||
Below are the single core results on an EC2 m6g.4xlarge (Graviton2) instance for 256 bit outputs:
|
||||
Below are the single core results on an EC2 c7g.4xlarge (Graviton3) instance for 256 bit outputs:
|
||||
|
||||
```
|
||||
BenchmarkSum256_16 96.82 MB/s
|
||||
BenchmarkSum256_64 445.35 MB/s
|
||||
BenchmarkSum256_1K 2782.46 MB/s
|
||||
BenchmarkSum256_8K 4083.58 MB/s
|
||||
BenchmarkSum256_1M 4986.41 MB/s
|
||||
BenchmarkSum256_5M 4992.72 MB/s
|
||||
BenchmarkSum256_10M 4993.32 MB/s
|
||||
BenchmarkSum256_25M 4992.55 MB/s
|
||||
BenchmarkSum256_16 143.66 MB/s
|
||||
BenchmarkSum256_64 628.75 MB/s
|
||||
BenchmarkSum256_1K 3621.71 MB/s
|
||||
BenchmarkSum256_8K 5039.64 MB/s
|
||||
BenchmarkSum256_1M 5279.79 MB/s
|
||||
BenchmarkSum256_5M 5474.60 MB/s
|
||||
BenchmarkSum256_10M 5621.73 MB/s
|
||||
BenchmarkSum256_25M 5250.47 MB/s
|
||||
```
|
||||
|
||||
### ppc64le Performance
|
||||
|
||||
+132
@@ -0,0 +1,132 @@
|
||||
//
|
||||
// Copyright (c) 2024 Minio Inc. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
//+build !noasm,!appengine
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
TEXT ·getVectorLength(SB), NOSPLIT, $0
|
||||
WORD $0xd2800002 // mov x2, #0
|
||||
WORD $0x04225022 // addvl x2, x2, #1
|
||||
WORD $0xd37df042 // lsl x2, x2, #3
|
||||
WORD $0xd2800003 // mov x3, #0
|
||||
WORD $0x04635023 // addpl x3, x3, #1
|
||||
WORD $0xd37df063 // lsl x3, x3, #3
|
||||
MOVD R2, vl+0(FP)
|
||||
MOVD R3, pl+8(FP)
|
||||
RET
|
||||
|
||||
TEXT ·updateArm64Sve(SB), NOSPLIT, $0
|
||||
MOVD state+0(FP), R0
|
||||
MOVD msg_base+8(FP), R1
|
||||
MOVD msg_len+16(FP), R2 // length of message
|
||||
SUBS $32, R2
|
||||
BMI completeSve
|
||||
|
||||
WORD $0x2518e3e1 // ptrue p1.b
|
||||
WORD $0xa5e0a401 // ld1d z1.d, p1/z, [x0]
|
||||
WORD $0xa5e1a402 // ld1d z2.d, p1/z, [x0, #1, MUL VL]
|
||||
WORD $0xa5e2a403 // ld1d z3.d, p1/z, [x0, #2, MUL VL]
|
||||
WORD $0xa5e3a404 // ld1d z4.d, p1/z, [x0, #3, MUL VL]
|
||||
|
||||
// Load zipper merge constants table pointer
|
||||
MOVD $·zipperMergeSve(SB), R3
|
||||
WORD $0xa5e0a465 // ld1d z5.d, p1/z, [x3]
|
||||
WORD $0x25b8c006 // mov z6.s, #0
|
||||
WORD $0x25d8e3e2 // ptrue p2.d /* set every other lane for "s" type */
|
||||
|
||||
loopSve:
|
||||
WORD $0xa5e0a420 // ld1d z0.d, p1/z, [x1]
|
||||
ADD $32, R1
|
||||
|
||||
WORD $0x04e00042 // add z2.d, z2.d, z0.d
|
||||
WORD $0x04e30042 // add z2.d, z2.d, z3.d
|
||||
WORD $0x04e09420 // lsr z0.d, z1.d, #32
|
||||
WORD $0x05a6c847 // sel z7.s, p2, z2.s, z6.s
|
||||
WORD $0x04d004e0 // mul z0.d, p1/m, z0.d, z7.d
|
||||
WORD $0x04a33003 // eor z3.d, z0.d, z3.d
|
||||
WORD $0x04e10081 // add z1.d, z4.d, z1.d
|
||||
WORD $0x04e09440 // lsr z0.d, z2.d, #32
|
||||
WORD $0x05a6c827 // sel z7.s, p2, z1.s, z6.s
|
||||
WORD $0x04d004e0 // mul z0.d, p1/m, z0.d, z7.d
|
||||
WORD $0x04a43004 // eor z4.d, z0.d, z4.d
|
||||
WORD $0x05253040 // tbl z0.b, z2.b, z5.b
|
||||
WORD $0x04e00021 // add z1.d, z1.d, z0.d
|
||||
WORD $0x05253020 // tbl z0.b, z1.b, z5.b
|
||||
WORD $0x04e00042 // add z2.d, z2.d, z0.d
|
||||
|
||||
SUBS $32, R2
|
||||
BPL loopSve
|
||||
|
||||
WORD $0xe5e0e401 // st1d z1.d, p1, [x0]
|
||||
WORD $0xe5e1e402 // st1d z2.d, p1, [x0, #1, MUL VL]
|
||||
WORD $0xe5e2e403 // st1d z3.d, p1, [x0, #2, MUL VL]
|
||||
WORD $0xe5e3e404 // st1d z4.d, p1, [x0, #3, MUL VL]
|
||||
|
||||
completeSve:
|
||||
RET
|
||||
|
||||
TEXT ·updateArm64Sve2(SB), NOSPLIT, $0
|
||||
MOVD state+0(FP), R0
|
||||
MOVD msg_base+8(FP), R1
|
||||
MOVD msg_len+16(FP), R2 // length of message
|
||||
SUBS $32, R2
|
||||
BMI completeSve2
|
||||
|
||||
WORD $0x2518e3e1 // ptrue p1.b
|
||||
WORD $0xa5e0a401 // ld1d z1.d, p1/z, [x0]
|
||||
WORD $0xa5e1a402 // ld1d z2.d, p1/z, [x0, #1, MUL VL]
|
||||
WORD $0xa5e2a403 // ld1d z3.d, p1/z, [x0, #2, MUL VL]
|
||||
WORD $0xa5e3a404 // ld1d z4.d, p1/z, [x0, #3, MUL VL]
|
||||
|
||||
// Load zipper merge constants table pointer
|
||||
MOVD $·zipperMergeSve(SB), R3
|
||||
WORD $0xa5e0a465 // ld1d z5.d, p1/z, [x3]
|
||||
|
||||
loopSve2:
|
||||
WORD $0xa5e0a420 // ld1d z0.d, p1/z, [x1]
|
||||
ADD $32, R1
|
||||
|
||||
WORD $0x04e00042 // add z2.d, z2.d, z0.d
|
||||
WORD $0x04e30042 // add z2.d, z2.d, z3.d
|
||||
WORD $0x04e09420 // lsr z0.d, z1.d, #32
|
||||
WORD $0x45c27800 // umullb z0.d, z0.s, z2.s
|
||||
WORD $0x04a33003 // eor z3.d, z0.d, z3.d
|
||||
WORD $0x04e10081 // add z1.d, z4.d, z1.d
|
||||
WORD $0x04e09440 // lsr z0.d, z2.d, #32
|
||||
WORD $0x45c17800 // umullb z0.d, z0.s, z1.s
|
||||
WORD $0x04a43004 // eor z4.d, z0.d, z4.d
|
||||
WORD $0x05253040 // tbl z0.b, z2.b, z5.b
|
||||
WORD $0x04e00021 // add z1.d, z1.d, z0.d
|
||||
WORD $0x05253020 // tbl z0.b, z1.b, z5.b
|
||||
WORD $0x04e00042 // add z2.d, z2.d, z0.d
|
||||
|
||||
SUBS $32, R2
|
||||
BPL loopSve2
|
||||
|
||||
WORD $0xe5e0e401 // st1d z1.d, p1, [x0]
|
||||
WORD $0xe5e1e402 // st1d z2.d, p1, [x0, #1, MUL VL]
|
||||
WORD $0xe5e2e403 // st1d z3.d, p1, [x0, #2, MUL VL]
|
||||
WORD $0xe5e3e404 // st1d z4.d, p1, [x0, #3, MUL VL]
|
||||
|
||||
completeSve2:
|
||||
RET
|
||||
|
||||
DATA ·zipperMergeSve+0x00(SB)/8, $0x000f010e05020c03
|
||||
DATA ·zipperMergeSve+0x08(SB)/8, $0x070806090d0a040b
|
||||
DATA ·zipperMergeSve+0x10(SB)/8, $0x101f111e15121c13
|
||||
DATA ·zipperMergeSve+0x18(SB)/8, $0x171816191d1a141b
|
||||
GLOBL ·zipperMergeSve(SB), (NOPTR+RODATA), $32
|
||||
+3
@@ -2,6 +2,7 @@
|
||||
// Use of this source code is governed by a license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
//go:build amd64 && !gccgo && !appengine && !nacl && !noasm
|
||||
// +build amd64,!gccgo,!appengine,!nacl,!noasm
|
||||
|
||||
package highwayhash
|
||||
@@ -12,6 +13,8 @@ var (
|
||||
useSSE4 = cpu.X86.HasSSE41
|
||||
useAVX2 = cpu.X86.HasAVX2
|
||||
useNEON = false
|
||||
useSVE = false
|
||||
useSVE2 = false
|
||||
useVMX = false
|
||||
)
|
||||
|
||||
|
||||
+38
-4
@@ -1,24 +1,54 @@
|
||||
// Copyright (c) 2017 Minio Inc. All rights reserved.
|
||||
// Copyright (c) 2017-2024 Minio Inc. All rights reserved.
|
||||
// Use of this source code is governed by a license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
//+build !noasm,!appengine
|
||||
//go:build !noasm && !appengine
|
||||
// +build !noasm,!appengine
|
||||
|
||||
package highwayhash
|
||||
|
||||
import (
|
||||
"golang.org/x/sys/cpu"
|
||||
)
|
||||
|
||||
var (
|
||||
useSSE4 = false
|
||||
useAVX2 = false
|
||||
useNEON = true
|
||||
useNEON = cpu.ARM64.HasASIMD
|
||||
useSVE = cpu.ARM64.HasSVE
|
||||
useSVE2 = false // cpu.ARM64.HasSVE2 -- disable until tested on real hardware
|
||||
useVMX = false
|
||||
)
|
||||
|
||||
func init() {
|
||||
if useSVE {
|
||||
if vl, _ := getVectorLength(); vl != 256 {
|
||||
//
|
||||
// Since HighwahHash is designed for AVX2,
|
||||
// SVE/SVE2 instructions only run correctly
|
||||
// for vector length of 256
|
||||
//
|
||||
useSVE2 = false
|
||||
useSVE = false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//go:noescape
|
||||
func initializeArm64(state *[16]uint64, key []byte)
|
||||
|
||||
//go:noescape
|
||||
func updateArm64(state *[16]uint64, msg []byte)
|
||||
|
||||
//go:noescape
|
||||
func getVectorLength() (vl, pl uint64)
|
||||
|
||||
//go:noescape
|
||||
func updateArm64Sve(state *[16]uint64, msg []byte)
|
||||
|
||||
//go:noescape
|
||||
func updateArm64Sve2(state *[16]uint64, msg []byte)
|
||||
|
||||
//go:noescape
|
||||
func finalizeArm64(out []byte, state *[16]uint64)
|
||||
|
||||
@@ -31,7 +61,11 @@ func initialize(state *[16]uint64, key []byte) {
|
||||
}
|
||||
|
||||
func update(state *[16]uint64, msg []byte) {
|
||||
if useNEON {
|
||||
if useSVE2 {
|
||||
updateArm64Sve2(state, msg)
|
||||
} else if useSVE {
|
||||
updateArm64Sve(state, msg)
|
||||
} else if useNEON {
|
||||
updateArm64(state, msg)
|
||||
} else {
|
||||
updateGeneric(state, msg)
|
||||
|
||||
+216
-39
@@ -46,40 +46,113 @@ func initializeGeneric(state *[16]uint64, k []byte) {
|
||||
}
|
||||
|
||||
func updateGeneric(state *[16]uint64, msg []byte) {
|
||||
for len(msg) > 0 {
|
||||
// add message
|
||||
state[v1+0] += binary.LittleEndian.Uint64(msg)
|
||||
state[v1+1] += binary.LittleEndian.Uint64(msg[8:])
|
||||
state[v1+2] += binary.LittleEndian.Uint64(msg[16:])
|
||||
state[v1+3] += binary.LittleEndian.Uint64(msg[24:])
|
||||
|
||||
// v1 += mul0
|
||||
state[v1+0] += state[mul0+0]
|
||||
state[v1+1] += state[mul0+1]
|
||||
state[v1+2] += state[mul0+2]
|
||||
state[v1+3] += state[mul0+3]
|
||||
for len(msg) >= 32 {
|
||||
m := msg[:32]
|
||||
|
||||
// add message + mul0
|
||||
// Interleave operations to hide multiplication
|
||||
state[v1+0] += binary.LittleEndian.Uint64(m) + state[mul0+0]
|
||||
state[mul0+0] ^= uint64(uint32(state[v1+0])) * (state[v0+0] >> 32)
|
||||
state[mul0+1] ^= uint64(uint32(state[v1+1])) * (state[v0+1] >> 32)
|
||||
state[mul0+2] ^= uint64(uint32(state[v1+2])) * (state[v0+2] >> 32)
|
||||
state[mul0+3] ^= uint64(uint32(state[v1+3])) * (state[v0+3] >> 32)
|
||||
|
||||
// v0 += mul1
|
||||
state[v0+0] += state[mul1+0]
|
||||
state[v0+1] += state[mul1+1]
|
||||
state[v0+2] += state[mul1+2]
|
||||
state[v0+3] += state[mul1+3]
|
||||
|
||||
state[mul1+0] ^= uint64(uint32(state[v0+0])) * (state[v1+0] >> 32)
|
||||
|
||||
state[v1+1] += binary.LittleEndian.Uint64(m[8:]) + state[mul0+1]
|
||||
state[mul0+1] ^= uint64(uint32(state[v1+1])) * (state[v0+1] >> 32)
|
||||
state[v0+1] += state[mul1+1]
|
||||
state[mul1+1] ^= uint64(uint32(state[v0+1])) * (state[v1+1] >> 32)
|
||||
|
||||
state[v1+2] += binary.LittleEndian.Uint64(m[16:]) + state[mul0+2]
|
||||
state[mul0+2] ^= uint64(uint32(state[v1+2])) * (state[v0+2] >> 32)
|
||||
state[v0+2] += state[mul1+2]
|
||||
state[mul1+2] ^= uint64(uint32(state[v0+2])) * (state[v1+2] >> 32)
|
||||
|
||||
state[v1+3] += binary.LittleEndian.Uint64(m[24:]) + state[mul0+3]
|
||||
state[mul0+3] ^= uint64(uint32(state[v1+3])) * (state[v0+3] >> 32)
|
||||
state[v0+3] += state[mul1+3]
|
||||
state[mul1+3] ^= uint64(uint32(state[v0+3])) * (state[v1+3] >> 32)
|
||||
|
||||
zipperMerge(state[v1+0], state[v1+1], &state[v0+0], &state[v0+1])
|
||||
zipperMerge(state[v1+2], state[v1+3], &state[v0+2], &state[v0+3])
|
||||
// inlined: zipperMerge(state[v1+0], state[v1+1], &state[v0+0], &state[v0+1])
|
||||
{
|
||||
val0 := state[v1+0]
|
||||
val1 := state[v1+1]
|
||||
res := val0 & (0xff << (2 * 8))
|
||||
res2 := (val0 & (0xff << (7 * 8))) + (val1 & (0xff << (2 * 8)))
|
||||
res += (val1 & (0xff << (7 * 8))) >> 8
|
||||
res2 += (val0 & (0xff << (6 * 8))) >> 8
|
||||
res += ((val0 & (0xff << (5 * 8))) + (val1 & (0xff << (6 * 8)))) >> 16
|
||||
res2 += (val1 & (0xff << (5 * 8))) >> 16
|
||||
res += ((val0 & (0xff << (3 * 8))) + (val1 & (0xff << (4 * 8)))) >> 24
|
||||
res2 += ((val1 & (0xff << (3 * 8))) + (val0 & (0xff << (4 * 8)))) >> 24
|
||||
res += (val0 & (0xff << (1 * 8))) << 32
|
||||
res2 += (val1 & 0xff) << 48
|
||||
res += val0 << 56
|
||||
res2 += (val1 & (0xff << (1 * 8))) << 24
|
||||
|
||||
zipperMerge(state[v0+0], state[v0+1], &state[v1+0], &state[v1+1])
|
||||
zipperMerge(state[v0+2], state[v0+3], &state[v1+2], &state[v1+3])
|
||||
state[v0+0] += res
|
||||
state[v0+1] += res2
|
||||
}
|
||||
// zipperMerge(state[v1+2], state[v1+3], &state[v0+2], &state[v0+3])
|
||||
{
|
||||
val0 := state[v1+2]
|
||||
val1 := state[v1+3]
|
||||
res := val0 & (0xff << (2 * 8))
|
||||
res2 := (val0 & (0xff << (7 * 8))) + (val1 & (0xff << (2 * 8)))
|
||||
res += (val1 & (0xff << (7 * 8))) >> 8
|
||||
res2 += (val0 & (0xff << (6 * 8))) >> 8
|
||||
res += ((val0 & (0xff << (5 * 8))) + (val1 & (0xff << (6 * 8)))) >> 16
|
||||
res2 += (val1 & (0xff << (5 * 8))) >> 16
|
||||
res += ((val0 & (0xff << (3 * 8))) + (val1 & (0xff << (4 * 8)))) >> 24
|
||||
res2 += ((val1 & (0xff << (3 * 8))) + (val0 & (0xff << (4 * 8)))) >> 24
|
||||
res += (val0 & (0xff << (1 * 8))) << 32
|
||||
res2 += (val1 & 0xff) << 48
|
||||
res += val0 << 56
|
||||
res2 += (val1 & (0xff << (1 * 8))) << 24
|
||||
|
||||
state[v0+2] += res
|
||||
state[v0+3] += res2
|
||||
}
|
||||
|
||||
// inlined: zipperMerge(state[v0+0], state[v0+1], &state[v1+0], &state[v1+1])
|
||||
{
|
||||
val0 := state[v0+0]
|
||||
val1 := state[v0+1]
|
||||
res := val0 & (0xff << (2 * 8))
|
||||
res2 := (val0 & (0xff << (7 * 8))) + (val1 & (0xff << (2 * 8)))
|
||||
res += (val1 & (0xff << (7 * 8))) >> 8
|
||||
res2 += (val0 & (0xff << (6 * 8))) >> 8
|
||||
res += ((val0 & (0xff << (5 * 8))) + (val1 & (0xff << (6 * 8)))) >> 16
|
||||
res2 += (val1 & (0xff << (5 * 8))) >> 16
|
||||
res += ((val0 & (0xff << (3 * 8))) + (val1 & (0xff << (4 * 8)))) >> 24
|
||||
res2 += ((val1 & (0xff << (3 * 8))) + (val0 & (0xff << (4 * 8)))) >> 24
|
||||
res += (val0 & (0xff << (1 * 8))) << 32
|
||||
res2 += (val1 & 0xff) << 48
|
||||
res += val0 << 56
|
||||
res2 += (val1 & (0xff << (1 * 8))) << 24
|
||||
|
||||
state[v1+0] += res
|
||||
state[v1+1] += res2
|
||||
}
|
||||
|
||||
//inlined: zipperMerge(state[v0+2], state[v0+3], &state[v1+2], &state[v1+3])
|
||||
{
|
||||
val0 := state[v0+2]
|
||||
val1 := state[v0+3]
|
||||
res := val0 & (0xff << (2 * 8))
|
||||
res2 := (val0 & (0xff << (7 * 8))) + (val1 & (0xff << (2 * 8)))
|
||||
res += (val1 & (0xff << (7 * 8))) >> 8
|
||||
res2 += (val0 & (0xff << (6 * 8))) >> 8
|
||||
res += ((val0 & (0xff << (5 * 8))) + (val1 & (0xff << (6 * 8)))) >> 16
|
||||
res2 += (val1 & (0xff << (5 * 8))) >> 16
|
||||
res += ((val0 & (0xff << (3 * 8))) + (val1 & (0xff << (4 * 8)))) >> 24
|
||||
res2 += ((val1 & (0xff << (3 * 8))) + (val0 & (0xff << (4 * 8)))) >> 24
|
||||
res += (val0 & (0xff << (1 * 8))) << 32
|
||||
res2 += (val1 & 0xff) << 48
|
||||
res += val0 << 56
|
||||
res2 += (val1 & (0xff << (1 * 8))) << 24
|
||||
|
||||
state[v1+2] += res
|
||||
state[v1+3] += res2
|
||||
}
|
||||
msg = msg[32:]
|
||||
}
|
||||
}
|
||||
@@ -124,25 +197,129 @@ func finalizeGeneric(out []byte, state *[16]uint64) {
|
||||
}
|
||||
}
|
||||
|
||||
// Experiments on variations left for future reference...
|
||||
/*
|
||||
func zipperMerge(v0, v1 uint64, d0, d1 *uint64) {
|
||||
m0 := v0 & (0xFF << (2 * 8))
|
||||
m1 := (v1 & (0xFF << (7 * 8))) >> 8
|
||||
m2 := ((v0 & (0xFF << (5 * 8))) + (v1 & (0xFF << (6 * 8)))) >> 16
|
||||
m3 := ((v0 & (0xFF << (3 * 8))) + (v1 & (0xFF << (4 * 8)))) >> 24
|
||||
m4 := (v0 & (0xFF << (1 * 8))) << 32
|
||||
m5 := v0 << 56
|
||||
if true {
|
||||
// fastest. original interleaved...
|
||||
res := v0 & (0xff << (2 * 8))
|
||||
res2 := (v0 & (0xff << (7 * 8))) + (v1 & (0xff << (2 * 8)))
|
||||
res += (v1 & (0xff << (7 * 8))) >> 8
|
||||
res2 += (v0 & (0xff << (6 * 8))) >> 8
|
||||
res += ((v0 & (0xff << (5 * 8))) + (v1 & (0xff << (6 * 8)))) >> 16
|
||||
res2 += (v1 & (0xff << (5 * 8))) >> 16
|
||||
res += ((v0 & (0xff << (3 * 8))) + (v1 & (0xff << (4 * 8)))) >> 24
|
||||
res2 += ((v1 & (0xff << (3 * 8))) + (v0 & (0xff << (4 * 8)))) >> 24
|
||||
res += (v0 & (0xff << (1 * 8))) << 32
|
||||
res2 += (v1 & 0xff) << 48
|
||||
res += v0 << 56
|
||||
res2 += (v1 & (0xff << (1 * 8))) << 24
|
||||
|
||||
*d0 += m0 + m1 + m2 + m3 + m4 + m5
|
||||
*d0 += res
|
||||
*d1 += res2
|
||||
} else if false {
|
||||
// Reading bytes and combining into uint64
|
||||
var v0b [8]byte
|
||||
binary.LittleEndian.PutUint64(v0b[:], v0)
|
||||
var v1b [8]byte
|
||||
binary.LittleEndian.PutUint64(v1b[:], v1)
|
||||
var res, res2 uint64
|
||||
|
||||
m0 = (v0 & (0xFF << (7 * 8))) + (v1 & (0xFF << (2 * 8)))
|
||||
m1 = (v0 & (0xFF << (6 * 8))) >> 8
|
||||
m2 = (v1 & (0xFF << (5 * 8))) >> 16
|
||||
m3 = ((v1 & (0xFF << (3 * 8))) + (v0 & (0xFF << (4 * 8)))) >> 24
|
||||
m4 = (v1 & 0xFF) << 48
|
||||
m5 = (v1 & (0xFF << (1 * 8))) << 24
|
||||
res = uint64(v0b[0]) << (7 * 8)
|
||||
res2 = uint64(v1b[0]) << (6 * 8)
|
||||
res |= uint64(v0b[1]) << (5 * 8)
|
||||
res2 |= uint64(v1b[1]) << (4 * 8)
|
||||
res |= uint64(v0b[2]) << (2 * 8)
|
||||
res2 |= uint64(v1b[2]) << (2 * 8)
|
||||
res |= uint64(v0b[3])
|
||||
res2 |= uint64(v0b[4]) << (1 * 8)
|
||||
res |= uint64(v0b[5]) << (3 * 8)
|
||||
res2 |= uint64(v0b[6]) << (5 * 8)
|
||||
res |= uint64(v1b[4]) << (1 * 8)
|
||||
res2 |= uint64(v0b[7]) << (7 * 8)
|
||||
res |= uint64(v1b[6]) << (4 * 8)
|
||||
res2 |= uint64(v1b[3])
|
||||
res |= uint64(v1b[7]) << (6 * 8)
|
||||
res2 |= uint64(v1b[5]) << (3 * 8)
|
||||
|
||||
*d1 += m3 + m2 + m5 + m1 + m4 + m0
|
||||
*d0 += res
|
||||
*d1 += res2
|
||||
|
||||
} else if false {
|
||||
// bytes to bytes shuffle
|
||||
var v0b [8]byte
|
||||
binary.LittleEndian.PutUint64(v0b[:], v0)
|
||||
var v1b [8]byte
|
||||
binary.LittleEndian.PutUint64(v1b[:], v1)
|
||||
var res [8]byte
|
||||
|
||||
//res += ((v0 & (0xff << (3 * 8))) + (v1 & (0xff << (4 * 8)))) >> 24
|
||||
res[0] = v0b[3]
|
||||
res[1] = v1b[4]
|
||||
|
||||
// res := v0 & (0xff << (2 * 8))
|
||||
res[2] = v0b[2]
|
||||
|
||||
//res += ((v0 & (0xff << (5 * 8))) + (v1 & (0xff << (6 * 8)))) >> 16
|
||||
res[3] = v0b[5]
|
||||
res[4] = v1b[6]
|
||||
|
||||
//res += (v0 & (0xff << (1 * 8))) << 32
|
||||
res[5] = v0b[1]
|
||||
|
||||
//res += (v1 & (0xff << (7 * 8))) >> 8
|
||||
res[6] += v1b[7]
|
||||
|
||||
//res += v0 << 56
|
||||
res[7] = v0b[0]
|
||||
v0 = binary.LittleEndian.Uint64(res[:])
|
||||
*d0 += v0
|
||||
|
||||
//res += ((v1 & (0xff << (3 * 8))) + (v0 & (0xff << (4 * 8)))) >> 24
|
||||
res[0] = v1b[3]
|
||||
res[1] = v0b[4]
|
||||
|
||||
res[2] = v1b[2]
|
||||
|
||||
// res += (v1 & (0xff << (5 * 8))) >> 16
|
||||
res[3] = v1b[5]
|
||||
|
||||
//res += (v1 & (0xff << (1 * 8))) << 24
|
||||
res[4] = v1b[1]
|
||||
|
||||
// res += (v0 & (0xff << (6 * 8))) >> 8
|
||||
res[5] = v0b[6]
|
||||
|
||||
//res := (v0 & (0xff << (7 * 8))) + (v1 & (0xff << (2 * 8)))
|
||||
res[7] = v0b[7]
|
||||
|
||||
//res += (v1 & 0xff) << 48
|
||||
res[6] = v1b[0]
|
||||
|
||||
v0 = binary.LittleEndian.Uint64(res[:])
|
||||
*d1 += v0
|
||||
} else {
|
||||
// original.
|
||||
res := v0 & (0xff << (2 * 8))
|
||||
res += (v1 & (0xff << (7 * 8))) >> 8
|
||||
res += ((v0 & (0xff << (5 * 8))) + (v1 & (0xff << (6 * 8)))) >> 16
|
||||
res += ((v0 & (0xff << (3 * 8))) + (v1 & (0xff << (4 * 8)))) >> 24
|
||||
res += (v0 & (0xff << (1 * 8))) << 32
|
||||
res += v0 << 56
|
||||
|
||||
*d0 += res
|
||||
|
||||
res = (v0 & (0xff << (7 * 8))) + (v1 & (0xff << (2 * 8)))
|
||||
res += (v0 & (0xff << (6 * 8))) >> 8
|
||||
res += (v1 & (0xff << (5 * 8))) >> 16
|
||||
res += ((v1 & (0xff << (3 * 8))) + (v0 & (0xff << (4 * 8)))) >> 24
|
||||
res += (v1 & 0xff) << 48
|
||||
res += (v1 & (0xff << (1 * 8))) << 24
|
||||
|
||||
*d1 += res
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
// reduce v = [v0, v1, v2, v3] mod the irreducible polynomial x^128 + x^2 + x
|
||||
func reduceMod(v0, v1, v2, v3 uint64) (r0, r1 uint64) {
|
||||
|
||||
+4
-1
@@ -2,7 +2,8 @@
|
||||
// Use of this source code is governed by a license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
//+build !noasm,!appengine
|
||||
//go:build !noasm && !appengine
|
||||
// +build !noasm,!appengine
|
||||
|
||||
package highwayhash
|
||||
|
||||
@@ -10,6 +11,8 @@ var (
|
||||
useSSE4 = false
|
||||
useAVX2 = false
|
||||
useNEON = false
|
||||
useSVE = false
|
||||
useSVE2 = false
|
||||
useVMX = true
|
||||
)
|
||||
|
||||
|
||||
+3
@@ -2,6 +2,7 @@
|
||||
// Use of this source code is governed by a license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
//go:build noasm || (!amd64 && !arm64 && !ppc64le)
|
||||
// +build noasm !amd64,!arm64,!ppc64le
|
||||
|
||||
package highwayhash
|
||||
@@ -10,6 +11,8 @@ var (
|
||||
useSSE4 = false
|
||||
useAVX2 = false
|
||||
useNEON = false
|
||||
useSVE = false
|
||||
useSVE2 = false
|
||||
useVMX = false
|
||||
)
|
||||
|
||||
|
||||
+13
-3
@@ -152,10 +152,20 @@ type Mapping map[Subject][]WeightedMapping
|
||||
func (m *Mapping) Validate(vr *ValidationResults) {
|
||||
for ubFrom, wm := range (map[Subject][]WeightedMapping)(*m) {
|
||||
ubFrom.Validate(vr)
|
||||
perCluster := make(map[string]uint8)
|
||||
total := uint8(0)
|
||||
for _, wm := range wm {
|
||||
wm.Subject.Validate(vr)
|
||||
total += wm.GetWeight()
|
||||
for _, e := range wm {
|
||||
e.Subject.Validate(vr)
|
||||
if e.Cluster != "" {
|
||||
t := perCluster[e.Cluster]
|
||||
t += e.Weight
|
||||
perCluster[e.Cluster] = t
|
||||
if t > 100 {
|
||||
vr.AddError("Mapping %q in cluster %q exceeds 100%% among all of it's weighted to mappings", ubFrom, e.Cluster)
|
||||
}
|
||||
} else {
|
||||
total += e.GetWeight()
|
||||
}
|
||||
}
|
||||
if total > 100 {
|
||||
vr.AddError("Mapping %q exceeds 100%% among all of it's weighted to mappings", ubFrom)
|
||||
|
||||
+78
-30
@@ -96,6 +96,9 @@ type Account struct {
|
||||
nameTag string
|
||||
lastLimErr int64
|
||||
routePoolIdx int
|
||||
// Guarantee that only one goroutine can be running either checkJetStreamMigrate
|
||||
// or clearObserverState at a given time for this account to prevent interleaving.
|
||||
jscmMu sync.Mutex
|
||||
}
|
||||
|
||||
const (
|
||||
@@ -1479,6 +1482,10 @@ func (a *Account) addServiceImportWithClaim(destination *Account, from, to strin
|
||||
return err
|
||||
}
|
||||
|
||||
if err := a.serviceImportFormsCycle(destination, to); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
_, err := a.addServiceImport(destination, from, to, imClaim)
|
||||
|
||||
return err
|
||||
@@ -2466,6 +2473,10 @@ func (a *Account) AddMappedStreamImportWithClaim(account *Account, from, to stri
|
||||
return err
|
||||
}
|
||||
|
||||
if err := a.streamImportFormsCycle(account, from); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var (
|
||||
usePub bool
|
||||
tr *subjectTransform
|
||||
@@ -2811,9 +2822,12 @@ func (a *Account) isIssuerClaimTrusted(claims *jwt.ActivationClaims) bool {
|
||||
// check is done with the account's name, not the pointer. This is used
|
||||
// during config reload where we are comparing current and new config
|
||||
// in which pointers are different.
|
||||
// No lock is acquired in this function, so it is assumed that the
|
||||
// import maps are not changed while this executes.
|
||||
// Acquires `a` read lock, but `b` is assumed to not be accessed
|
||||
// by anyone but the caller (`b` is not registered anywhere).
|
||||
func (a *Account) checkStreamImportsEqual(b *Account) bool {
|
||||
a.mu.RLock()
|
||||
defer a.mu.RUnlock()
|
||||
|
||||
if len(a.imports.streams) != len(b.imports.streams) {
|
||||
return false
|
||||
}
|
||||
@@ -3181,6 +3195,9 @@ func (s *Server) updateAccountClaimsWithRefresh(a *Account, ac *jwt.AccountClaim
|
||||
a.nameTag = ac.Name
|
||||
a.tags = ac.Tags
|
||||
|
||||
// Grab trace label under lock.
|
||||
tl := a.traceLabel()
|
||||
|
||||
// Check for external authorization.
|
||||
if ac.HasExternalAuthorization() {
|
||||
a.extAuth = &jwt.ExternalAuthorization{}
|
||||
@@ -3201,10 +3218,10 @@ func (s *Server) updateAccountClaimsWithRefresh(a *Account, ac *jwt.AccountClaim
|
||||
}
|
||||
if a.imports.services != nil {
|
||||
old.imports.services = make(map[string]*serviceImport, len(a.imports.services))
|
||||
}
|
||||
for k, v := range a.imports.services {
|
||||
old.imports.services[k] = v
|
||||
delete(a.imports.services, k)
|
||||
for k, v := range a.imports.services {
|
||||
old.imports.services[k] = v
|
||||
delete(a.imports.services, k)
|
||||
}
|
||||
}
|
||||
|
||||
alteredScope := map[string]struct{}{}
|
||||
@@ -3274,13 +3291,13 @@ func (s *Server) updateAccountClaimsWithRefresh(a *Account, ac *jwt.AccountClaim
|
||||
for _, e := range ac.Exports {
|
||||
switch e.Type {
|
||||
case jwt.Stream:
|
||||
s.Debugf("Adding stream export %q for %s", e.Subject, a.traceLabel())
|
||||
s.Debugf("Adding stream export %q for %s", e.Subject, tl)
|
||||
if err := a.addStreamExportWithAccountPos(
|
||||
string(e.Subject), authAccounts(e.TokenReq), e.AccountTokenPosition); err != nil {
|
||||
s.Debugf("Error adding stream export to account [%s]: %v", a.traceLabel(), err.Error())
|
||||
s.Debugf("Error adding stream export to account [%s]: %v", tl, err.Error())
|
||||
}
|
||||
case jwt.Service:
|
||||
s.Debugf("Adding service export %q for %s", e.Subject, a.traceLabel())
|
||||
s.Debugf("Adding service export %q for %s", e.Subject, tl)
|
||||
rt := Singleton
|
||||
switch e.ResponseType {
|
||||
case jwt.ResponseTypeStream:
|
||||
@@ -3290,7 +3307,7 @@ func (s *Server) updateAccountClaimsWithRefresh(a *Account, ac *jwt.AccountClaim
|
||||
}
|
||||
if err := a.addServiceExportWithResponseAndAccountPos(
|
||||
string(e.Subject), rt, authAccounts(e.TokenReq), e.AccountTokenPosition); err != nil {
|
||||
s.Debugf("Error adding service export to account [%s]: %v", a.traceLabel(), err)
|
||||
s.Debugf("Error adding service export to account [%s]: %v", tl, err)
|
||||
continue
|
||||
}
|
||||
sub := string(e.Subject)
|
||||
@@ -3300,13 +3317,13 @@ func (s *Server) updateAccountClaimsWithRefresh(a *Account, ac *jwt.AccountClaim
|
||||
if e.Latency.Sampling == jwt.Headers {
|
||||
hdrNote = " (using headers)"
|
||||
}
|
||||
s.Debugf("Error adding latency tracking%s for service export to account [%s]: %v", hdrNote, a.traceLabel(), err)
|
||||
s.Debugf("Error adding latency tracking%s for service export to account [%s]: %v", hdrNote, tl, err)
|
||||
}
|
||||
}
|
||||
if e.ResponseThreshold != 0 {
|
||||
// Response threshold was set in options.
|
||||
if err := a.SetServiceExportResponseThreshold(sub, e.ResponseThreshold); err != nil {
|
||||
s.Debugf("Error adding service export response threshold for [%s]: %v", a.traceLabel(), err)
|
||||
s.Debugf("Error adding service export response threshold for [%s]: %v", tl, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -3351,34 +3368,31 @@ func (s *Server) updateAccountClaimsWithRefresh(a *Account, ac *jwt.AccountClaim
|
||||
}
|
||||
var incompleteImports []*jwt.Import
|
||||
for _, i := range ac.Imports {
|
||||
// check tmpAccounts with priority
|
||||
var acc *Account
|
||||
var err error
|
||||
if v, ok := s.tmpAccounts.Load(i.Account); ok {
|
||||
acc = v.(*Account)
|
||||
} else {
|
||||
acc, err = s.lookupAccount(i.Account)
|
||||
}
|
||||
acc, err := s.lookupAccount(i.Account)
|
||||
if acc == nil || err != nil {
|
||||
s.Errorf("Can't locate account [%s] for import of [%v] %s (err=%v)", i.Account, i.Subject, i.Type, err)
|
||||
incompleteImports = append(incompleteImports, i)
|
||||
continue
|
||||
}
|
||||
from := string(i.Subject)
|
||||
to := i.GetTo()
|
||||
// Capture trace labels.
|
||||
acc.mu.RLock()
|
||||
atl := acc.traceLabel()
|
||||
acc.mu.RUnlock()
|
||||
// Grab from and to
|
||||
from, to := string(i.Subject), i.GetTo()
|
||||
switch i.Type {
|
||||
case jwt.Stream:
|
||||
if i.LocalSubject != _EMPTY_ {
|
||||
// set local subject implies to is empty
|
||||
to = string(i.LocalSubject)
|
||||
s.Debugf("Adding stream import %s:%q for %s:%q", acc.traceLabel(), from, a.traceLabel(), to)
|
||||
s.Debugf("Adding stream import %s:%q for %s:%q", atl, from, tl, to)
|
||||
err = a.AddMappedStreamImportWithClaim(acc, from, to, i)
|
||||
} else {
|
||||
s.Debugf("Adding stream import %s:%q for %s:%q", acc.traceLabel(), from, a.traceLabel(), to)
|
||||
s.Debugf("Adding stream import %s:%q for %s:%q", atl, from, tl, to)
|
||||
err = a.AddStreamImportWithClaim(acc, from, to, i)
|
||||
}
|
||||
if err != nil {
|
||||
s.Debugf("Error adding stream import to account [%s]: %v", a.traceLabel(), err.Error())
|
||||
s.Debugf("Error adding stream import to account [%s]: %v", tl, err.Error())
|
||||
incompleteImports = append(incompleteImports, i)
|
||||
}
|
||||
case jwt.Service:
|
||||
@@ -3386,9 +3400,9 @@ func (s *Server) updateAccountClaimsWithRefresh(a *Account, ac *jwt.AccountClaim
|
||||
from = string(i.LocalSubject)
|
||||
to = string(i.Subject)
|
||||
}
|
||||
s.Debugf("Adding service import %s:%q for %s:%q", acc.traceLabel(), from, a.traceLabel(), to)
|
||||
s.Debugf("Adding service import %s:%q for %s:%q", atl, from, tl, to)
|
||||
if err := a.AddServiceImportWithClaim(acc, from, to, i); err != nil {
|
||||
s.Debugf("Error adding service import to account [%s]: %v", a.traceLabel(), err.Error())
|
||||
s.Debugf("Error adding service import to account [%s]: %v", tl, err.Error())
|
||||
incompleteImports = append(incompleteImports, i)
|
||||
}
|
||||
}
|
||||
@@ -3559,7 +3573,7 @@ func (s *Server) updateAccountClaimsWithRefresh(a *Account, ac *jwt.AccountClaim
|
||||
// regardless of enabled or disabled. It handles both cases.
|
||||
if jsEnabled {
|
||||
if err := s.configJetStream(a); err != nil {
|
||||
s.Errorf("Error configuring jetstream for account [%s]: %v", a.traceLabel(), err.Error())
|
||||
s.Errorf("Error configuring jetstream for account [%s]: %v", tl, err.Error())
|
||||
a.mu.Lock()
|
||||
// Absent reload of js server cfg, this is going to be broken until js is disabled
|
||||
a.incomplete = true
|
||||
@@ -3582,6 +3596,14 @@ func (s *Server) updateAccountClaimsWithRefresh(a *Account, ac *jwt.AccountClaim
|
||||
}
|
||||
c.mu.Lock()
|
||||
c.applyAccountLimits()
|
||||
// if we have an nkey user we are a callout user - save
|
||||
// the issuedAt, and nkey user id to honor revocations
|
||||
var nkeyUserID string
|
||||
var issuedAt int64
|
||||
if c.user != nil {
|
||||
issuedAt = c.user.Issued
|
||||
nkeyUserID = c.user.Nkey
|
||||
}
|
||||
theJWT := c.opts.JWT
|
||||
c.mu.Unlock()
|
||||
// Check for being revoked here. We use ac one to avoid the account lock.
|
||||
@@ -3600,6 +3622,27 @@ func (s *Server) updateAccountClaimsWithRefresh(a *Account, ac *jwt.AccountClaim
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
// if we extracted nkeyUserID and issuedAt we are a callout type
|
||||
// calloutIAT should only be set if we are in callout scenario as
|
||||
// the user JWT is _NOT_ associated with the client for callouts,
|
||||
// so we rely on the calloutIAT to know when the JWT was issued
|
||||
// revocations simply state that JWT issued before or by that date
|
||||
// are not valid
|
||||
if ac.Revocations != nil && nkeyUserID != _EMPTY_ && issuedAt > 0 {
|
||||
seconds, ok := ac.Revocations[jwt.All]
|
||||
if ok && seconds >= issuedAt {
|
||||
c.sendErrAndDebug("User Authentication Revoked")
|
||||
c.closeConnection(Revocation)
|
||||
continue
|
||||
}
|
||||
seconds, ok = ac.Revocations[nkeyUserID]
|
||||
if ok && seconds >= issuedAt {
|
||||
c.sendErrAndDebug("User Authentication Revoked")
|
||||
c.closeConnection(Revocation)
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check if the signing keys changed, might have to evict
|
||||
@@ -3667,8 +3710,13 @@ func (s *Server) buildInternalAccount(ac *jwt.AccountClaims) *Account {
|
||||
// We don't want to register an account that is in the process of
|
||||
// being built, however, to solve circular import dependencies, we
|
||||
// need to store it here.
|
||||
s.tmpAccounts.Store(ac.Subject, acc)
|
||||
if v, loaded := s.tmpAccounts.LoadOrStore(ac.Subject, acc); loaded {
|
||||
return v.(*Account)
|
||||
}
|
||||
|
||||
// Update based on claims.
|
||||
s.UpdateAccountClaims(acc, ac)
|
||||
|
||||
return acc
|
||||
}
|
||||
|
||||
@@ -3708,7 +3756,7 @@ func buildPermissionsFromJwt(uc *jwt.Permissions) *Permissions {
|
||||
|
||||
// Helper to build internal NKeyUser.
|
||||
func buildInternalNkeyUser(uc *jwt.UserClaims, acts map[string]struct{}, acc *Account) *NkeyUser {
|
||||
nu := &NkeyUser{Nkey: uc.Subject, Account: acc, AllowedConnectionTypes: acts}
|
||||
nu := &NkeyUser{Nkey: uc.Subject, Account: acc, AllowedConnectionTypes: acts, Issued: uc.IssuedAt}
|
||||
if uc.IssuerAccount != _EMPTY_ {
|
||||
nu.SigningKey = uc.Issuer
|
||||
}
|
||||
|
||||
+1
@@ -60,6 +60,7 @@ type ClientAuthentication interface {
|
||||
// NkeyUser is for multiple nkey based users
|
||||
type NkeyUser struct {
|
||||
Nkey string `json:"user"`
|
||||
Issued int64 `json:"issued,omitempty"` // this is a copy of the issued at (iat) field in the jwt
|
||||
Permissions *Permissions `json:"permissions,omitempty"`
|
||||
Account *Account `json:"account,omitempty"`
|
||||
SigningKey string `json:"signing_key,omitempty"`
|
||||
|
||||
+50
-42
@@ -847,7 +847,7 @@ func (c *client) applyAccountLimits() {
|
||||
c.msubs = jwt.NoLimit
|
||||
if c.opts.JWT != _EMPTY_ { // user jwt implies account
|
||||
if uc, _ := jwt.DecodeUserClaims(c.opts.JWT); uc != nil {
|
||||
c.mpay = int32(uc.Limits.Payload)
|
||||
atomic.StoreInt32(&c.mpay, int32(uc.Limits.Payload))
|
||||
c.msubs = int32(uc.Limits.Subs)
|
||||
if uc.IssuerAccount != _EMPTY_ && uc.IssuerAccount != uc.Issuer {
|
||||
if scope, ok := c.acc.signingKeys[uc.Issuer]; ok {
|
||||
@@ -2914,8 +2914,11 @@ func (c *client) addShadowSubscriptions(acc *Account, sub *subscription, enact b
|
||||
|
||||
// Add in the shadow subscription.
|
||||
func (c *client) addShadowSub(sub *subscription, ime *ime, enact bool) (*subscription, error) {
|
||||
im := ime.im
|
||||
c.mu.Lock()
|
||||
nsub := *sub // copy
|
||||
c.mu.Unlock()
|
||||
|
||||
im := ime.im
|
||||
nsub.im = im
|
||||
|
||||
if !im.usePub && ime.dyn && im.tr != nil {
|
||||
@@ -2950,8 +2953,10 @@ func (c *client) addShadowSub(sub *subscription, ime *ime, enact bool) (*subscri
|
||||
return nil, fmt.Errorf(errs)
|
||||
}
|
||||
|
||||
// Update our route map here.
|
||||
c.srv.updateRemoteSubscription(im.acc, &nsub, 1)
|
||||
// Update our route map here. But only if we are not a leaf node or a hub leafnode.
|
||||
if c.kind != LEAF || c.isHubLeafNode() {
|
||||
c.srv.updateRemoteSubscription(im.acc, &nsub, 1)
|
||||
}
|
||||
|
||||
return &nsub, nil
|
||||
}
|
||||
@@ -5228,48 +5233,51 @@ func (c *client) closeConnection(reason ClosedState) {
|
||||
// Unregister
|
||||
srv.removeClient(c)
|
||||
|
||||
// Update remote subscriptions.
|
||||
if acc != nil && (kind == CLIENT || kind == LEAF || kind == JETSTREAM) {
|
||||
qsubs := map[string]*qsub{}
|
||||
for _, sub := range subs {
|
||||
// Call unsubscribe here to cleanup shadow subscriptions and such.
|
||||
c.unsubscribe(acc, sub, true, false)
|
||||
// Update route as normal for a normal subscriber.
|
||||
if sub.queue == nil {
|
||||
if !spoke {
|
||||
srv.updateRouteSubscriptionMap(acc, sub, -1)
|
||||
if srv.gateway.enabled {
|
||||
srv.gatewayUpdateSubInterest(acc.Name, sub, -1)
|
||||
if acc != nil {
|
||||
// Update remote subscriptions.
|
||||
if kind == CLIENT || kind == LEAF || kind == JETSTREAM {
|
||||
qsubs := map[string]*qsub{}
|
||||
for _, sub := range subs {
|
||||
// Call unsubscribe here to cleanup shadow subscriptions and such.
|
||||
c.unsubscribe(acc, sub, true, false)
|
||||
// Update route as normal for a normal subscriber.
|
||||
if sub.queue == nil {
|
||||
if !spoke {
|
||||
srv.updateRouteSubscriptionMap(acc, sub, -1)
|
||||
if srv.gateway.enabled {
|
||||
srv.gatewayUpdateSubInterest(acc.Name, sub, -1)
|
||||
}
|
||||
}
|
||||
acc.updateLeafNodes(sub, -1)
|
||||
} else {
|
||||
// We handle queue subscribers special in case we
|
||||
// have a bunch we can just send one update to the
|
||||
// connected routes.
|
||||
num := int32(1)
|
||||
if kind == LEAF {
|
||||
num = sub.qw
|
||||
}
|
||||
key := keyFromSub(sub)
|
||||
if esub, ok := qsubs[key]; ok {
|
||||
esub.n += num
|
||||
} else {
|
||||
qsubs[key] = &qsub{sub, num}
|
||||
}
|
||||
}
|
||||
acc.updateLeafNodes(sub, -1)
|
||||
} else {
|
||||
// We handle queue subscribers special in case we
|
||||
// have a bunch we can just send one update to the
|
||||
// connected routes.
|
||||
num := int32(1)
|
||||
if kind == LEAF {
|
||||
num = sub.qw
|
||||
}
|
||||
// TODO(dlc) - Better to use string builder?
|
||||
key := bytesToString(sub.subject) + " " + bytesToString(sub.queue)
|
||||
if esub, ok := qsubs[key]; ok {
|
||||
esub.n += num
|
||||
} else {
|
||||
qsubs[key] = &qsub{sub, num}
|
||||
}
|
||||
// Process any qsubs here.
|
||||
for _, esub := range qsubs {
|
||||
if !spoke {
|
||||
srv.updateRouteSubscriptionMap(acc, esub.sub, -(esub.n))
|
||||
if srv.gateway.enabled {
|
||||
srv.gatewayUpdateSubInterest(acc.Name, esub.sub, -(esub.n))
|
||||
}
|
||||
}
|
||||
acc.updateLeafNodes(esub.sub, -(esub.n))
|
||||
}
|
||||
}
|
||||
// Process any qsubs here.
|
||||
for _, esub := range qsubs {
|
||||
if !spoke {
|
||||
srv.updateRouteSubscriptionMap(acc, esub.sub, -(esub.n))
|
||||
if srv.gateway.enabled {
|
||||
srv.gatewayUpdateSubInterest(acc.Name, esub.sub, -(esub.n))
|
||||
}
|
||||
}
|
||||
acc.updateLeafNodes(esub.sub, -(esub.n))
|
||||
}
|
||||
// Always remove from the account, otherwise we can leak clients.
|
||||
// Note that SYSTEM and ACCOUNT types from above cleanup their own subs.
|
||||
if prev := acc.removeClient(c); prev == 1 {
|
||||
srv.decActiveAccounts()
|
||||
}
|
||||
@@ -5419,7 +5427,7 @@ func (c *client) getAccAndResultFromCache() (*Account, *SublistResult) {
|
||||
|
||||
if genid := atomic.LoadUint64(&sl.genid); genid != pac.genid {
|
||||
ok = false
|
||||
delete(c.in.pacache, bytesToString(c.pa.pacache))
|
||||
c.in.pacache = make(map[string]*perAccountCache)
|
||||
} else {
|
||||
acc = pac.acc
|
||||
r = pac.results
|
||||
|
||||
+17
-3
@@ -14,6 +14,7 @@
|
||||
package server
|
||||
|
||||
import (
|
||||
"runtime/debug"
|
||||
"time"
|
||||
)
|
||||
|
||||
@@ -33,15 +34,28 @@ const (
|
||||
)
|
||||
|
||||
var (
|
||||
// gitCommit injected at build
|
||||
gitCommit string
|
||||
// gitCommit and serverVersion injected at build.
|
||||
gitCommit, serverVersion string
|
||||
// trustedKeys is a whitespace separated array of trusted operator's public nkeys.
|
||||
trustedKeys string
|
||||
)
|
||||
|
||||
func init() {
|
||||
// Use build info if present, it would be if building using 'go build .'
|
||||
// or when using a release.
|
||||
if info, ok := debug.ReadBuildInfo(); ok {
|
||||
for _, setting := range info.Settings {
|
||||
switch setting.Key {
|
||||
case "vcs.revision":
|
||||
gitCommit = setting.Value[:7]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const (
|
||||
// VERSION is the current version for the server.
|
||||
VERSION = "2.10.16"
|
||||
VERSION = "2.10.18"
|
||||
|
||||
// PROTO is the currently supported protocol.
|
||||
// 0 was the original
|
||||
|
||||
+74
-41
@@ -711,7 +711,7 @@ func (mset *stream) addConsumerWithAssignment(config *ConsumerConfig, oname stri
|
||||
}
|
||||
|
||||
mset.mu.RLock()
|
||||
s, jsa, tierName, cfg, acc := mset.srv, mset.jsa, mset.tier, mset.cfg, mset.acc
|
||||
s, jsa, cfg, acc := mset.srv, mset.jsa, mset.cfg, mset.acc
|
||||
retention := cfg.Retention
|
||||
mset.mu.RUnlock()
|
||||
|
||||
@@ -726,10 +726,8 @@ func (mset *stream) addConsumerWithAssignment(config *ConsumerConfig, oname stri
|
||||
return nil, NewJSConsumerConfigRequiredError()
|
||||
}
|
||||
|
||||
jsa.usageMu.RLock()
|
||||
selectedLimits, limitsFound := jsa.limits[tierName]
|
||||
jsa.usageMu.RUnlock()
|
||||
if !limitsFound {
|
||||
selectedLimits, _, _, _ := acc.selectLimits(config.replicas(&cfg))
|
||||
if selectedLimits == nil {
|
||||
return nil, NewJSNoLimitsError()
|
||||
}
|
||||
|
||||
@@ -737,10 +735,10 @@ func (mset *stream) addConsumerWithAssignment(config *ConsumerConfig, oname stri
|
||||
// Make sure we have sane defaults. Do so with the JS lock, otherwise a
|
||||
// badly timed meta snapshot can result in a race condition.
|
||||
mset.js.mu.Lock()
|
||||
setConsumerConfigDefaults(config, &mset.cfg, srvLim, &selectedLimits)
|
||||
setConsumerConfigDefaults(config, &mset.cfg, srvLim, selectedLimits)
|
||||
mset.js.mu.Unlock()
|
||||
|
||||
if err := checkConsumerCfg(config, srvLim, &cfg, acc, &selectedLimits, isRecovering); err != nil {
|
||||
if err := checkConsumerCfg(config, srvLim, &cfg, acc, selectedLimits, isRecovering); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
sampleFreq := 0
|
||||
@@ -2111,19 +2109,17 @@ func (o *consumer) loopAndForwardProposals(qch chan struct{}) {
|
||||
const maxBatch = 256 * 1024
|
||||
var entries []*Entry
|
||||
for sz := 0; proposal != nil; proposal = proposal.next {
|
||||
entry := entryPool.Get().(*Entry)
|
||||
entry.Type, entry.Data = EntryNormal, proposal.data
|
||||
entries = append(entries, entry)
|
||||
entries = append(entries, newEntry(EntryNormal, proposal.data))
|
||||
sz += len(proposal.data)
|
||||
if sz > maxBatch {
|
||||
node.ProposeDirect(entries)
|
||||
node.ProposeMulti(entries)
|
||||
// We need to re-create `entries` because there is a reference
|
||||
// to it in the node's pae map.
|
||||
sz, entries = 0, nil
|
||||
}
|
||||
}
|
||||
if len(entries) > 0 {
|
||||
node.ProposeDirect(entries)
|
||||
node.ProposeMulti(entries)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -2146,22 +2142,18 @@ func (o *consumer) loopAndForwardProposals(qch chan struct{}) {
|
||||
|
||||
// Lock should be held.
|
||||
func (o *consumer) propose(entry []byte) {
|
||||
var notify bool
|
||||
p := &proposal{data: entry}
|
||||
if o.phead == nil {
|
||||
o.phead = p
|
||||
notify = true
|
||||
} else {
|
||||
o.ptail.next = p
|
||||
}
|
||||
o.ptail = p
|
||||
|
||||
// Kick our looper routine if needed.
|
||||
if notify {
|
||||
select {
|
||||
case o.pch <- struct{}{}:
|
||||
default:
|
||||
}
|
||||
// Kick our looper routine.
|
||||
select {
|
||||
case o.pch <- struct{}{}:
|
||||
default:
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2633,17 +2625,24 @@ func (o *consumer) infoWithSnapAndReply(snap bool, reply string) *ConsumerInfo {
|
||||
TimeStamp: time.Now().UTC(),
|
||||
}
|
||||
|
||||
// If we are replicated and we are not the leader we need to pull certain data from our store.
|
||||
if rg != nil && rg.node != nil && !o.isLeader() && o.store != nil {
|
||||
// If we are replicated and we are not the leader or we are filtered, we need to pull certain data from our store.
|
||||
isLeader := o.isLeader()
|
||||
if rg != nil && rg.node != nil && o.store != nil && (!isLeader || o.isFiltered()) {
|
||||
state, err := o.store.BorrowState()
|
||||
if err != nil {
|
||||
o.mu.Unlock()
|
||||
return nil
|
||||
}
|
||||
info.Delivered.Consumer, info.Delivered.Stream = state.Delivered.Consumer, state.Delivered.Stream
|
||||
info.AckFloor.Consumer, info.AckFloor.Stream = state.AckFloor.Consumer, state.AckFloor.Stream
|
||||
info.NumAckPending = len(state.Pending)
|
||||
info.NumRedelivered = len(state.Redelivered)
|
||||
if !isLeader {
|
||||
info.Delivered.Consumer, info.Delivered.Stream = state.Delivered.Consumer, state.Delivered.Stream
|
||||
info.AckFloor.Consumer, info.AckFloor.Stream = state.AckFloor.Consumer, state.AckFloor.Stream
|
||||
info.NumAckPending = len(state.Pending)
|
||||
info.NumRedelivered = len(state.Redelivered)
|
||||
} else {
|
||||
// Since we are filtered and we are the leader we could have o.sseq that is skipped ahead.
|
||||
// To maintain consistency in reporting (e.g. jsz) we take the state for our delivered stream sequence.
|
||||
info.Delivered.Stream = state.Delivered.Stream
|
||||
}
|
||||
}
|
||||
|
||||
// Adjust active based on non-zero etc. Also make UTC here.
|
||||
@@ -2742,6 +2741,12 @@ func (o *consumer) processAckMsg(sseq, dseq, dc uint64, reply string, doSample b
|
||||
return
|
||||
}
|
||||
|
||||
// Check if this ack is above the current pointer to our next to deliver.
|
||||
// This could happen on a cooperative takeover with high speed deliveries.
|
||||
if sseq >= o.sseq {
|
||||
o.sseq = sseq + 1
|
||||
}
|
||||
|
||||
mset := o.mset
|
||||
if mset == nil || mset.closed.Load() {
|
||||
o.mu.Unlock()
|
||||
@@ -2763,8 +2768,12 @@ func (o *consumer) processAckMsg(sseq, dseq, dc uint64, reply string, doSample b
|
||||
delete(o.pending, sseq)
|
||||
// Use the original deliver sequence from our pending record.
|
||||
dseq = p.Sequence
|
||||
|
||||
// Only move floors if we matched an existing pending.
|
||||
if dseq == o.adflr+1 {
|
||||
if len(o.pending) == 0 {
|
||||
o.adflr = o.dseq - 1
|
||||
o.asflr = o.sseq - 1
|
||||
} else if dseq == o.adflr+1 {
|
||||
o.adflr, o.asflr = dseq, sseq
|
||||
for ss := sseq + 1; ss < o.sseq; ss++ {
|
||||
if p, ok := o.pending[ss]; ok {
|
||||
@@ -2775,11 +2784,6 @@ func (o *consumer) processAckMsg(sseq, dseq, dc uint64, reply string, doSample b
|
||||
}
|
||||
}
|
||||
}
|
||||
// If nothing left set consumer to current delivered.
|
||||
// Do not update stream.
|
||||
if len(o.pending) == 0 {
|
||||
o.adflr = o.dseq - 1
|
||||
}
|
||||
}
|
||||
delete(o.rdc, sseq)
|
||||
o.removeFromRedeliverQueue(sseq)
|
||||
@@ -4150,7 +4154,8 @@ func (o *consumer) checkNumPending() uint64 {
|
||||
if o.mset != nil {
|
||||
var state StreamState
|
||||
o.mset.store.FastState(&state)
|
||||
if o.sseq > state.LastSeq && o.npc != 0 || o.npc > int64(state.Msgs) {
|
||||
npc := o.numPending()
|
||||
if o.sseq > state.LastSeq && npc > 0 || npc > state.Msgs {
|
||||
// Re-calculate.
|
||||
o.streamNumPending()
|
||||
}
|
||||
@@ -4318,7 +4323,7 @@ func (o *consumer) deliverMsg(dsubj, ackReply string, pmsg *jsPubMsg, dc uint64,
|
||||
|
||||
// If we are ack none and mset is interest only we should make sure stream removes interest.
|
||||
if ap == AckNone && rp != LimitsPolicy {
|
||||
if o.node == nil || o.cfg.Direct {
|
||||
if mset != nil && mset.ackq != nil && (o.node == nil || o.cfg.Direct) {
|
||||
mset.ackq.push(seq)
|
||||
} else {
|
||||
o.updateAcks(dseq, seq, _EMPTY_)
|
||||
@@ -5218,18 +5223,19 @@ func (o *consumer) stopWithFlags(dflag, sdflag, doSignal, advisory bool) error {
|
||||
// ignoreInterest marks whether the consumer should be ignored when determining interest.
|
||||
// No lock held on entry.
|
||||
func (o *consumer) cleanupNoInterestMessages(mset *stream, ignoreInterest bool) {
|
||||
state := mset.state()
|
||||
stop := state.LastSeq
|
||||
o.mu.Lock()
|
||||
if !o.isLeader() {
|
||||
o.readStoredState(stop)
|
||||
o.readStoredState(0)
|
||||
}
|
||||
start := o.asflr
|
||||
o.mu.Unlock()
|
||||
|
||||
// Make sure we start at worst with first sequence in the stream.
|
||||
state := mset.state()
|
||||
if start < state.FirstSeq {
|
||||
start = state.FirstSeq
|
||||
}
|
||||
stop := state.LastSeq
|
||||
|
||||
// Consumer's interests are ignored by default. If we should not ignore interest, unset.
|
||||
co := o
|
||||
@@ -5238,13 +5244,37 @@ func (o *consumer) cleanupNoInterestMessages(mset *stream, ignoreInterest bool)
|
||||
}
|
||||
|
||||
var rmseqs []uint64
|
||||
mset.mu.Lock()
|
||||
mset.mu.RLock()
|
||||
|
||||
// If over this amount of messages to check, defer to checkInterestState() which
|
||||
// will do the right thing since we are now removed.
|
||||
// TODO(dlc) - Better way?
|
||||
const bailThresh = 100_000
|
||||
|
||||
// Check if we would be spending too much time here and defer to separate go routine.
|
||||
if len(mset.consumers) == 0 {
|
||||
mset.mu.RUnlock()
|
||||
mset.mu.Lock()
|
||||
defer mset.mu.Unlock()
|
||||
mset.store.Purge()
|
||||
var state StreamState
|
||||
mset.store.FastState(&state)
|
||||
mset.lseq = state.LastSeq
|
||||
// Also make sure we clear any pending acks.
|
||||
mset.clearAllPreAcksBelowFloor(state.FirstSeq)
|
||||
return
|
||||
} else if stop-start > bailThresh {
|
||||
mset.mu.RUnlock()
|
||||
go mset.checkInterestState()
|
||||
return
|
||||
}
|
||||
|
||||
for seq := start; seq <= stop; seq++ {
|
||||
if mset.noInterest(seq, co) {
|
||||
rmseqs = append(rmseqs, seq)
|
||||
}
|
||||
}
|
||||
mset.mu.Unlock()
|
||||
mset.mu.RUnlock()
|
||||
|
||||
// These can be removed.
|
||||
for _, seq := range rmseqs {
|
||||
@@ -5478,10 +5508,13 @@ func (o *consumer) checkStateForInterestStream() error {
|
||||
o.mu.RUnlock()
|
||||
|
||||
// If we have pending, we will need to walk through to delivered in case we missed any of those acks as well.
|
||||
if state != nil && len(state.Pending) > 0 {
|
||||
if state != nil && len(state.Pending) > 0 && state.AckFloor.Stream > 0 {
|
||||
for seq := state.AckFloor.Stream + 1; seq <= state.Delivered.Stream; seq++ {
|
||||
if _, ok := state.Pending[seq]; !ok {
|
||||
mset.ackMsg(o, seq)
|
||||
// Want to call needAck since it is filter aware.
|
||||
if o.needAck(seq, _EMPTY_) {
|
||||
mset.ackMsg(o, seq)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
+79
-6
@@ -98,6 +98,12 @@ const (
|
||||
// FIXME(dlc) - make configurable.
|
||||
var eventsHBInterval = 30 * time.Second
|
||||
|
||||
// Default minimum wait time for sending statsz
|
||||
const defaultStatszRateLimit = 1 * time.Second
|
||||
|
||||
// Variable version so we can set in tests.
|
||||
var statszRateLimit = defaultStatszRateLimit
|
||||
|
||||
type sysMsgHandler func(sub *subscription, client *client, acc *Account, subject, reply string, hdr, msg []byte)
|
||||
|
||||
// Used if we have to queue things internally to avoid the route/gw path.
|
||||
@@ -134,6 +140,7 @@ type internal struct {
|
||||
shash string
|
||||
inboxPre string
|
||||
remoteStatsSub *subscription
|
||||
lastStatsz time.Time
|
||||
}
|
||||
|
||||
// ServerStatsMsg is sent periodically with stats updates.
|
||||
@@ -807,6 +814,10 @@ func (s *Server) sendStatsz(subj string) {
|
||||
var m ServerStatsMsg
|
||||
s.updateServerUsage(&m.Stats)
|
||||
|
||||
if s.limitStatsz(subj) {
|
||||
return
|
||||
}
|
||||
|
||||
s.mu.RLock()
|
||||
defer s.mu.RUnlock()
|
||||
|
||||
@@ -948,6 +959,35 @@ func (s *Server) sendStatsz(subj string) {
|
||||
s.sendInternalMsg(subj, _EMPTY_, &m.Server, &m)
|
||||
}
|
||||
|
||||
// Limit updates to the heartbeat interval, max one second by default.
|
||||
func (s *Server) limitStatsz(subj string) bool {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
|
||||
if s.sys == nil {
|
||||
return true
|
||||
}
|
||||
|
||||
// Only limit the normal broadcast subject.
|
||||
if subj != fmt.Sprintf(serverStatsSubj, s.ID()) {
|
||||
return false
|
||||
}
|
||||
|
||||
interval := statszRateLimit
|
||||
if s.sys.cstatsz < interval {
|
||||
interval = s.sys.cstatsz
|
||||
}
|
||||
if time.Since(s.sys.lastStatsz) < interval {
|
||||
// Reschedule heartbeat for the next interval.
|
||||
if s.sys.stmr != nil {
|
||||
s.sys.stmr.Reset(time.Until(s.sys.lastStatsz.Add(interval)))
|
||||
}
|
||||
return true
|
||||
}
|
||||
s.sys.lastStatsz = time.Now()
|
||||
return false
|
||||
}
|
||||
|
||||
// Send out our statz update.
|
||||
// This should be wrapChk() to setup common locking.
|
||||
func (s *Server) heartbeatStatsz() {
|
||||
@@ -965,6 +1005,12 @@ func (s *Server) heartbeatStatsz() {
|
||||
go s.sendStatszUpdate()
|
||||
}
|
||||
|
||||
// Reset statsz rate limit for the next broadcast.
|
||||
// This should be wrapChk() to setup common locking.
|
||||
func (s *Server) resetLastStatsz() {
|
||||
s.sys.lastStatsz = time.Time{}
|
||||
}
|
||||
|
||||
func (s *Server) sendStatszUpdate() {
|
||||
s.sendStatsz(fmt.Sprintf(serverStatsSubj, s.ID()))
|
||||
}
|
||||
@@ -1019,44 +1065,56 @@ func (s *Server) Node() string {
|
||||
// Tradeoff is subscription and interest graph events vs connect and
|
||||
// disconnect events, etc.
|
||||
func (s *Server) initEventTracking() {
|
||||
if !s.EventsEnabled() {
|
||||
// Capture sys in case we are shutdown while setting up.
|
||||
s.mu.RLock()
|
||||
sys := s.sys
|
||||
s.mu.RUnlock()
|
||||
|
||||
if sys == nil || sys.client == nil || sys.account == nil {
|
||||
return
|
||||
}
|
||||
// Create a system hash which we use for other servers to target us specifically.
|
||||
s.sys.shash = getHash(s.info.Name)
|
||||
sys.shash = getHash(s.info.Name)
|
||||
|
||||
// This will be for all inbox responses.
|
||||
subject := fmt.Sprintf(inboxRespSubj, s.sys.shash, "*")
|
||||
subject := fmt.Sprintf(inboxRespSubj, sys.shash, "*")
|
||||
if _, err := s.sysSubscribe(subject, s.inboxReply); err != nil {
|
||||
s.Errorf("Error setting up internal tracking: %v", err)
|
||||
return
|
||||
}
|
||||
s.sys.inboxPre = subject
|
||||
sys.inboxPre = subject
|
||||
// This is for remote updates for connection accounting.
|
||||
subject = fmt.Sprintf(accConnsEventSubjOld, "*")
|
||||
if _, err := s.sysSubscribe(subject, s.noInlineCallback(s.remoteConnsUpdate)); err != nil {
|
||||
s.Errorf("Error setting up internal tracking for %s: %v", subject, err)
|
||||
return
|
||||
}
|
||||
// This will be for responses for account info that we send out.
|
||||
subject = fmt.Sprintf(connsRespSubj, s.info.ID)
|
||||
if _, err := s.sysSubscribe(subject, s.noInlineCallback(s.remoteConnsUpdate)); err != nil {
|
||||
s.Errorf("Error setting up internal tracking: %v", err)
|
||||
return
|
||||
}
|
||||
// Listen for broad requests to respond with number of subscriptions for a given subject.
|
||||
if _, err := s.sysSubscribe(accNumSubsReqSubj, s.noInlineCallback(s.nsubsRequest)); err != nil {
|
||||
s.Errorf("Error setting up internal tracking: %v", err)
|
||||
return
|
||||
}
|
||||
// Listen for statsz from others.
|
||||
subject = fmt.Sprintf(serverStatsSubj, "*")
|
||||
if sub, err := s.sysSubscribe(subject, s.noInlineCallback(s.remoteServerUpdate)); err != nil {
|
||||
s.Errorf("Error setting up internal tracking: %v", err)
|
||||
return
|
||||
} else {
|
||||
// Keep track of this one.
|
||||
s.sys.remoteStatsSub = sub
|
||||
sys.remoteStatsSub = sub
|
||||
}
|
||||
|
||||
// Listen for all server shutdowns.
|
||||
subject = fmt.Sprintf(shutdownEventSubj, "*")
|
||||
if _, err := s.sysSubscribe(subject, s.noInlineCallback(s.remoteServerShutdown)); err != nil {
|
||||
s.Errorf("Error setting up internal tracking: %v", err)
|
||||
return
|
||||
}
|
||||
// Listen for servers entering lame-duck mode.
|
||||
// NOTE: This currently is handled in the same way as a server shutdown, but has
|
||||
@@ -1064,6 +1122,7 @@ func (s *Server) initEventTracking() {
|
||||
subject = fmt.Sprintf(lameDuckEventSubj, "*")
|
||||
if _, err := s.sysSubscribe(subject, s.noInlineCallback(s.remoteServerShutdown)); err != nil {
|
||||
s.Errorf("Error setting up internal tracking: %v", err)
|
||||
return
|
||||
}
|
||||
// Listen for account claims updates.
|
||||
subscribeToUpdate := true
|
||||
@@ -1074,6 +1133,7 @@ func (s *Server) initEventTracking() {
|
||||
for _, sub := range []string{accUpdateEventSubjOld, accUpdateEventSubjNew} {
|
||||
if _, err := s.sysSubscribe(fmt.Sprintf(sub, "*"), s.noInlineCallback(s.accountClaimUpdate)); err != nil {
|
||||
s.Errorf("Error setting up internal tracking: %v", err)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1081,6 +1141,7 @@ func (s *Server) initEventTracking() {
|
||||
// This subscription is kept for backwards compatibility. Got replaced by ...PING.STATZ from below
|
||||
if _, err := s.sysSubscribe(serverStatsPingReqSubj, s.noInlineCallback(s.statszReq)); err != nil {
|
||||
s.Errorf("Error setting up internal tracking: %v", err)
|
||||
return
|
||||
}
|
||||
monSrvc := map[string]sysMsgHandler{
|
||||
"IDZ": s.idzReq,
|
||||
@@ -1134,10 +1195,12 @@ func (s *Server) initEventTracking() {
|
||||
subject = fmt.Sprintf(serverDirectReqSubj, s.info.ID, name)
|
||||
if _, err := s.sysSubscribe(subject, s.noInlineCallback(req)); err != nil {
|
||||
s.Errorf("Error setting up internal tracking: %v", err)
|
||||
return
|
||||
}
|
||||
subject = fmt.Sprintf(serverPingReqSubj, name)
|
||||
if _, err := s.sysSubscribe(subject, s.noInlineCallback(req)); err != nil {
|
||||
s.Errorf("Error setting up internal tracking: %v", err)
|
||||
return
|
||||
}
|
||||
}
|
||||
extractAccount := func(subject string) (string, error) {
|
||||
@@ -1230,6 +1293,7 @@ func (s *Server) initEventTracking() {
|
||||
for name, req := range monAccSrvc {
|
||||
if _, err := s.sysSubscribe(fmt.Sprintf(accDirectReqSubj, "*", name), s.noInlineCallback(req)); err != nil {
|
||||
s.Errorf("Error setting up internal tracking: %v", err)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1238,6 +1302,7 @@ func (s *Server) initEventTracking() {
|
||||
// is only one that will answer. This breaks tests since we still forward on remote server connect.
|
||||
if _, err := s.sysSubscribe(fmt.Sprintf(userDirectReqSubj, "*"), s.userInfoReq); err != nil {
|
||||
s.Errorf("Error setting up internal tracking: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
// For now only the STATZ subject has an account specific ping equivalent.
|
||||
@@ -1255,6 +1320,7 @@ func (s *Server) initEventTracking() {
|
||||
})
|
||||
})); err != nil {
|
||||
s.Errorf("Error setting up internal tracking: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
// Listen for updates when leaf nodes connect for a given account. This will
|
||||
@@ -1262,32 +1328,38 @@ func (s *Server) initEventTracking() {
|
||||
subject = fmt.Sprintf(leafNodeConnectEventSubj, "*")
|
||||
if _, err := s.sysSubscribe(subject, s.noInlineCallback(s.leafNodeConnected)); err != nil {
|
||||
s.Errorf("Error setting up internal tracking: %v", err)
|
||||
return
|
||||
}
|
||||
// For tracking remote latency measurements.
|
||||
subject = fmt.Sprintf(remoteLatencyEventSubj, s.sys.shash)
|
||||
subject = fmt.Sprintf(remoteLatencyEventSubj, sys.shash)
|
||||
if _, err := s.sysSubscribe(subject, s.noInlineCallback(s.remoteLatencyUpdate)); err != nil {
|
||||
s.Errorf("Error setting up internal latency tracking: %v", err)
|
||||
return
|
||||
}
|
||||
// This is for simple debugging of number of subscribers that exist in the system.
|
||||
if _, err := s.sysSubscribeInternal(accSubsSubj, s.noInlineCallback(s.debugSubscribers)); err != nil {
|
||||
s.Errorf("Error setting up internal debug service for subscribers: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
// Listen for requests to reload the server configuration.
|
||||
subject = fmt.Sprintf(serverReloadReqSubj, s.info.ID)
|
||||
if _, err := s.sysSubscribe(subject, s.noInlineCallback(s.reloadConfig)); err != nil {
|
||||
s.Errorf("Error setting up server reload handler: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
// Client connection kick
|
||||
subject = fmt.Sprintf(clientKickReqSubj, s.info.ID)
|
||||
if _, err := s.sysSubscribe(subject, s.noInlineCallback(s.kickClient)); err != nil {
|
||||
s.Errorf("Error setting up client kick service: %v", err)
|
||||
return
|
||||
}
|
||||
// Client connection LDM
|
||||
subject = fmt.Sprintf(clientLDMReqSubj, s.info.ID)
|
||||
if _, err := s.sysSubscribe(subject, s.noInlineCallback(s.ldmClient)); err != nil {
|
||||
s.Errorf("Error setting up client LDM service: %v", err)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1868,6 +1940,7 @@ func (s *Server) statszReq(sub *subscription, c *client, _ *Account, subject, re
|
||||
// No reply is a signal that we should use our normal broadcast subject.
|
||||
if reply == _EMPTY_ {
|
||||
reply = fmt.Sprintf(serverStatsSubj, s.info.ID)
|
||||
s.wrapChk(s.resetLastStatsz)
|
||||
}
|
||||
|
||||
opts := StatszEventOptions{}
|
||||
|
||||
+380
-146
@@ -214,7 +214,7 @@ type msgBlock struct {
|
||||
bytes uint64 // User visible bytes count.
|
||||
rbytes uint64 // Total bytes (raw) including deleted. Used for rolling to new blk.
|
||||
msgs uint64 // User visible message count.
|
||||
fss map[string]*SimpleState
|
||||
fss *stree.SubjectTree[SimpleState]
|
||||
kfn string
|
||||
lwts int64
|
||||
llts int64
|
||||
@@ -295,13 +295,13 @@ const (
|
||||
// Maximum size of a write buffer we may consider for re-use.
|
||||
maxBufReuse = 2 * 1024 * 1024
|
||||
// default cache buffer expiration
|
||||
defaultCacheBufferExpiration = 2 * time.Second
|
||||
defaultCacheBufferExpiration = 10 * time.Second
|
||||
// default sync interval
|
||||
defaultSyncInterval = 2 * time.Minute
|
||||
// default idle timeout to close FDs.
|
||||
closeFDsIdle = 30 * time.Second
|
||||
// default expiration time for mb.fss when idle.
|
||||
defaultFssExpiration = 10 * time.Second
|
||||
defaultFssExpiration = 2 * time.Minute
|
||||
// coalesceMinimum
|
||||
coalesceMinimum = 16 * 1024
|
||||
// maxFlushWait is maximum we will wait to gather messages to flush.
|
||||
@@ -1869,7 +1869,7 @@ func (mb *msgBlock) lastChecksum() []byte {
|
||||
mb.rbytes = uint64(fi.Size())
|
||||
}
|
||||
if mb.rbytes < checksumSize {
|
||||
return nil
|
||||
return lchk[:]
|
||||
}
|
||||
// Encrypted?
|
||||
// Check for encryption, we do not load keys on startup anymore so might need to load them here.
|
||||
@@ -2063,11 +2063,13 @@ func (fs *fileStore) expireMsgsOnRecover() {
|
||||
}
|
||||
// Make sure we do subject cleanup as well.
|
||||
mb.ensurePerSubjectInfoLoaded()
|
||||
for subj, ss := range mb.fss {
|
||||
mb.fss.Iter(func(bsubj []byte, ss *SimpleState) bool {
|
||||
subj := bytesToString(bsubj)
|
||||
for i := uint64(0); i < ss.Msgs; i++ {
|
||||
fs.removePerSubject(subj)
|
||||
}
|
||||
}
|
||||
return true
|
||||
})
|
||||
mb.dirtyCloseWithRemove(true)
|
||||
deleted++
|
||||
}
|
||||
@@ -2314,9 +2316,21 @@ func (mb *msgBlock) firstMatching(filter string, wc bool, start uint64, sm *Stor
|
||||
// Mark fss activity.
|
||||
mb.lsts = time.Now().UnixNano()
|
||||
|
||||
if filter == _EMPTY_ {
|
||||
filter = fwcs
|
||||
wc = true
|
||||
}
|
||||
|
||||
// If we only have 1 subject currently and it matches our filter we can also set isAll.
|
||||
if !isAll && len(mb.fss) == 1 {
|
||||
_, isAll = mb.fss[filter]
|
||||
if !isAll && mb.fss.Size() == 1 {
|
||||
if !wc {
|
||||
_, isAll = mb.fss.Find(stringToBytes(filter))
|
||||
} else {
|
||||
// Since mb.fss.Find won't work if filter is a wildcard, need to use Match instead.
|
||||
mb.fss.Match(stringToBytes(filter), func(subject []byte, _ *SimpleState) {
|
||||
isAll = true
|
||||
})
|
||||
}
|
||||
}
|
||||
// Make sure to start at mb.first.seq if fseq < mb.first.seq
|
||||
if seq := atomic.LoadUint64(&mb.first.seq); seq > fseq {
|
||||
@@ -2325,16 +2339,15 @@ func (mb *msgBlock) firstMatching(filter string, wc bool, start uint64, sm *Stor
|
||||
lseq := atomic.LoadUint64(&mb.last.seq)
|
||||
|
||||
// Optionally build the isMatch for wildcard filters.
|
||||
tsa := [32]string{}
|
||||
fsa := [32]string{}
|
||||
var fts []string
|
||||
_tsa, _fsa := [32]string{}, [32]string{}
|
||||
tsa, fsa := _tsa[:0], _fsa[:0]
|
||||
var isMatch func(subj string) bool
|
||||
// Decide to build.
|
||||
if wc {
|
||||
fts = tokenizeSubjectIntoSlice(fsa[:0], filter)
|
||||
fsa = tokenizeSubjectIntoSlice(fsa[:0], filter)
|
||||
isMatch = func(subj string) bool {
|
||||
tts := tokenizeSubjectIntoSlice(tsa[:0], subj)
|
||||
return isSubsetMatchTokenized(tts, fts)
|
||||
tsa = tokenizeSubjectIntoSlice(tsa[:0], subj)
|
||||
return isSubsetMatchTokenized(tsa, fsa)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2344,19 +2357,18 @@ func (mb *msgBlock) firstMatching(filter string, wc bool, start uint64, sm *Stor
|
||||
// If we do not think we should do a linear scan check how many fss we
|
||||
// would need to scan vs the full range of the linear walk. Optimize for
|
||||
// 25th quantile of a match in a linear walk. Filter should be a wildcard.
|
||||
if !doLinearScan && wc {
|
||||
doLinearScan = len(mb.fss)*4 > int(lseq-fseq)
|
||||
// We should consult fss if our cache is not loaded and we only have fss loaded.
|
||||
if !doLinearScan && wc && mb.cacheAlreadyLoaded() {
|
||||
doLinearScan = mb.fss.Size()*4 > int(lseq-fseq)
|
||||
}
|
||||
|
||||
if !doLinearScan {
|
||||
// If we have a wildcard match against all tracked subjects we know about.
|
||||
if wc {
|
||||
subs = subs[:0]
|
||||
for subj := range mb.fss {
|
||||
if isMatch(subj) {
|
||||
subs = append(subs, subj)
|
||||
}
|
||||
}
|
||||
mb.fss.Match(stringToBytes(filter), func(bsubj []byte, _ *SimpleState) {
|
||||
subs = append(subs, string(bsubj))
|
||||
})
|
||||
// Check if we matched anything
|
||||
if len(subs) == 0 {
|
||||
return nil, didLoad, ErrStoreMsgNotFound
|
||||
@@ -2364,7 +2376,7 @@ func (mb *msgBlock) firstMatching(filter string, wc bool, start uint64, sm *Stor
|
||||
}
|
||||
fseq = lseq + 1
|
||||
for _, subj := range subs {
|
||||
ss := mb.fss[subj]
|
||||
ss, _ := mb.fss.Find(stringToBytes(subj))
|
||||
if ss != nil && ss.firstNeedsUpdate {
|
||||
mb.recalculateFirstForSubj(subj, ss.First, ss)
|
||||
}
|
||||
@@ -2455,6 +2467,11 @@ func (mb *msgBlock) filteredPendingLocked(filter string, wc bool, sseq uint64) (
|
||||
}
|
||||
}
|
||||
|
||||
if filter == _EMPTY_ {
|
||||
filter = fwcs
|
||||
wc = true
|
||||
}
|
||||
|
||||
update := func(ss *SimpleState) {
|
||||
total += ss.Msgs
|
||||
if first == 0 || ss.First < first {
|
||||
@@ -2468,9 +2485,9 @@ func (mb *msgBlock) filteredPendingLocked(filter string, wc bool, sseq uint64) (
|
||||
// Make sure we have fss loaded.
|
||||
mb.ensurePerSubjectInfoLoaded()
|
||||
|
||||
tsa := [32]string{}
|
||||
fsa := [32]string{}
|
||||
fts := tokenizeSubjectIntoSlice(fsa[:0], filter)
|
||||
_tsa, _fsa := [32]string{}, [32]string{}
|
||||
tsa, fsa := _tsa[:0], _fsa[:0]
|
||||
fsa = tokenizeSubjectIntoSlice(fsa[:0], filter)
|
||||
|
||||
// 1. See if we match any subs from fss.
|
||||
// 2. If we match and the sseq is past ss.Last then we can use meta only.
|
||||
@@ -2480,25 +2497,26 @@ func (mb *msgBlock) filteredPendingLocked(filter string, wc bool, sseq uint64) (
|
||||
if !wc {
|
||||
return subj == filter
|
||||
}
|
||||
tts := tokenizeSubjectIntoSlice(tsa[:0], subj)
|
||||
return isSubsetMatchTokenized(tts, fts)
|
||||
tsa = tokenizeSubjectIntoSlice(tsa[:0], subj)
|
||||
return isSubsetMatchTokenized(tsa, fsa)
|
||||
}
|
||||
|
||||
var havePartial bool
|
||||
for subj, ss := range mb.fss {
|
||||
if isAll || isMatch(subj) {
|
||||
if ss.firstNeedsUpdate {
|
||||
mb.recalculateFirstForSubj(subj, ss.First, ss)
|
||||
}
|
||||
if sseq <= ss.First {
|
||||
update(ss)
|
||||
} else if sseq <= ss.Last {
|
||||
// We matched but its a partial.
|
||||
havePartial = true
|
||||
break
|
||||
}
|
||||
mb.fss.Match(stringToBytes(filter), func(bsubj []byte, ss *SimpleState) {
|
||||
if havePartial {
|
||||
// If we already found a partial then don't do anything else.
|
||||
return
|
||||
}
|
||||
}
|
||||
if ss.firstNeedsUpdate {
|
||||
mb.recalculateFirstForSubj(bytesToString(bsubj), ss.First, ss)
|
||||
}
|
||||
if sseq <= ss.First {
|
||||
update(ss)
|
||||
} else if sseq <= ss.Last {
|
||||
// We matched but its a partial.
|
||||
havePartial = true
|
||||
}
|
||||
})
|
||||
|
||||
// If we did not encounter any partials we can return here.
|
||||
if !havePartial {
|
||||
@@ -2589,9 +2607,85 @@ func (fs *fileStore) FilteredState(sseq uint64, subj string) SimpleState {
|
||||
return ss
|
||||
}
|
||||
|
||||
// This is used to see if we can selectively jump start blocks based on filter subject and a floor block index.
|
||||
// Will return -1 if no matches at all.
|
||||
func (fs *fileStore) checkSkipFirstBlock(filter string, wc bool) (int, int) {
|
||||
start, stop := uint32(math.MaxUint32), uint32(0)
|
||||
if wc {
|
||||
fs.psim.Match(stringToBytes(filter), func(_ []byte, psi *psi) {
|
||||
if psi.fblk < start {
|
||||
start = psi.fblk
|
||||
}
|
||||
if psi.lblk > stop {
|
||||
stop = psi.lblk
|
||||
}
|
||||
})
|
||||
} else if psi, ok := fs.psim.Find(stringToBytes(filter)); ok {
|
||||
start, stop = psi.fblk, psi.lblk
|
||||
}
|
||||
// Nothing found.
|
||||
if start == uint32(math.MaxUint32) {
|
||||
return -1, -1
|
||||
}
|
||||
// Here we need to translate this to index into fs.blks properly.
|
||||
mb := fs.bim[start]
|
||||
if mb == nil {
|
||||
// psim fblk can be lazy.
|
||||
i := start + 1
|
||||
for ; i <= stop; i++ {
|
||||
mb = fs.bim[i]
|
||||
if mb == nil {
|
||||
continue
|
||||
}
|
||||
if _, f, _ := mb.filteredPending(filter, wc, 0); f > 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
// Update fblk since fblk was outdated.
|
||||
if !wc {
|
||||
if psi, ok := fs.psim.Find(stringToBytes(filter)); ok {
|
||||
psi.fblk = i
|
||||
}
|
||||
} else {
|
||||
fs.psim.Match(stringToBytes(filter), func(subj []byte, psi *psi) {
|
||||
if i > psi.fblk {
|
||||
psi.fblk = i
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
// Still nothing.
|
||||
if mb == nil {
|
||||
return -1, -1
|
||||
}
|
||||
// Grab first index.
|
||||
fi, _ := fs.selectMsgBlockWithIndex(atomic.LoadUint64(&mb.last.seq))
|
||||
|
||||
// Grab last if applicable.
|
||||
var li int
|
||||
if mb = fs.bim[stop]; mb != nil {
|
||||
li, _ = fs.selectMsgBlockWithIndex(atomic.LoadUint64(&mb.last.seq))
|
||||
}
|
||||
|
||||
return fi, li
|
||||
}
|
||||
|
||||
// Optimized way for getting all num pending matching a filter subject.
|
||||
// Lock should be held.
|
||||
func (fs *fileStore) numFilteredPending(filter string, ss *SimpleState) {
|
||||
fs.numFilteredPendingWithLast(filter, true, ss)
|
||||
}
|
||||
|
||||
// Optimized way for getting all num pending matching a filter subject and first sequence only.
|
||||
// Lock should be held.
|
||||
func (fs *fileStore) numFilteredPendingNoLast(filter string, ss *SimpleState) {
|
||||
fs.numFilteredPendingWithLast(filter, false, ss)
|
||||
}
|
||||
|
||||
// Optimized way for getting all num pending matching a filter subject.
|
||||
// Optionally look up last sequence. Sometimes do not need last and this avoids cost.
|
||||
// Lock should be held.
|
||||
func (fs *fileStore) numFilteredPendingWithLast(filter string, last bool, ss *SimpleState) {
|
||||
isAll := filter == _EMPTY_ || filter == fwcs
|
||||
|
||||
// If isAll we do not need to do anything special to calculate the first and last and total.
|
||||
@@ -2601,29 +2695,52 @@ func (fs *fileStore) numFilteredPending(filter string, ss *SimpleState) {
|
||||
ss.Msgs = fs.state.Msgs
|
||||
return
|
||||
}
|
||||
// Always reset.
|
||||
ss.First, ss.Last, ss.Msgs = 0, 0, 0
|
||||
|
||||
if filter == _EMPTY_ {
|
||||
filter = fwcs
|
||||
}
|
||||
|
||||
start, stop := uint32(math.MaxUint32), uint32(0)
|
||||
fs.psim.Match(stringToBytes(filter), func(_ []byte, psi *psi) {
|
||||
ss.Msgs += psi.total
|
||||
// Keep track of start and stop indexes for this subject.
|
||||
if psi.fblk < start {
|
||||
start = psi.fblk
|
||||
}
|
||||
if psi.lblk > stop {
|
||||
stop = psi.lblk
|
||||
}
|
||||
})
|
||||
// We do need to figure out the first and last sequences.
|
||||
wc := subjectHasWildcard(filter)
|
||||
start, stop := uint32(math.MaxUint32), uint32(0)
|
||||
|
||||
if wc {
|
||||
fs.psim.Match(stringToBytes(filter), func(_ []byte, psi *psi) {
|
||||
ss.Msgs += psi.total
|
||||
// Keep track of start and stop indexes for this subject.
|
||||
if psi.fblk < start {
|
||||
start = psi.fblk
|
||||
}
|
||||
if psi.lblk > stop {
|
||||
stop = psi.lblk
|
||||
}
|
||||
})
|
||||
} else if psi, ok := fs.psim.Find(stringToBytes(filter)); ok {
|
||||
ss.Msgs += psi.total
|
||||
start, stop = psi.fblk, psi.lblk
|
||||
}
|
||||
|
||||
// Did not find anything.
|
||||
if stop == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
// Do start
|
||||
mb := fs.bim[start]
|
||||
if mb != nil {
|
||||
_, f, _ := mb.filteredPending(filter, wc, 0)
|
||||
ss.First = f
|
||||
}
|
||||
|
||||
if ss.First == 0 {
|
||||
// This is a miss. This can happen since psi.fblk is lazy, but should be very rare.
|
||||
for i := start + 1; i <= stop; i++ {
|
||||
// This is a miss. This can happen since psi.fblk is lazy.
|
||||
// We will make sure to update fblk.
|
||||
|
||||
// Hold this outside loop for psim fblk updates when done.
|
||||
i := start + 1
|
||||
for ; i <= stop; i++ {
|
||||
mb := fs.bim[i]
|
||||
if mb == nil {
|
||||
continue
|
||||
@@ -2633,11 +2750,25 @@ func (fs *fileStore) numFilteredPending(filter string, ss *SimpleState) {
|
||||
break
|
||||
}
|
||||
}
|
||||
// Update fblk since fblk was outdated.
|
||||
if !wc {
|
||||
if info, ok := fs.psim.Find(stringToBytes(filter)); ok {
|
||||
info.fblk = i
|
||||
}
|
||||
} else {
|
||||
fs.psim.Match(stringToBytes(filter), func(subj []byte, psi *psi) {
|
||||
if i > psi.fblk {
|
||||
psi.fblk = i
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
// Now last
|
||||
if mb = fs.bim[stop]; mb != nil {
|
||||
_, _, l := mb.filteredPending(filter, wc, 0)
|
||||
ss.Last = l
|
||||
// Now gather last sequence if asked to do so.
|
||||
if last {
|
||||
if mb = fs.bim[stop]; mb != nil {
|
||||
_, _, l := mb.filteredPending(filter, wc, 0)
|
||||
ss.Last = l
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2650,6 +2781,10 @@ func (fs *fileStore) SubjectsState(subject string) map[string]SimpleState {
|
||||
return nil
|
||||
}
|
||||
|
||||
if subject == _EMPTY_ {
|
||||
subject = fwcs
|
||||
}
|
||||
|
||||
start, stop := fs.blks[0], fs.lmb
|
||||
// We can short circuit if not a wildcard using psim for start and stop.
|
||||
if !subjectHasWildcard(subject) {
|
||||
@@ -2657,7 +2792,12 @@ func (fs *fileStore) SubjectsState(subject string) map[string]SimpleState {
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
start, stop = fs.bim[info.fblk], fs.bim[info.lblk]
|
||||
if f := fs.bim[info.fblk]; f != nil {
|
||||
start = f
|
||||
}
|
||||
if l := fs.bim[info.lblk]; l != nil {
|
||||
stop = l
|
||||
}
|
||||
}
|
||||
|
||||
// Aggregate fss.
|
||||
@@ -2681,21 +2821,20 @@ func (fs *fileStore) SubjectsState(subject string) map[string]SimpleState {
|
||||
}
|
||||
// Mark fss activity.
|
||||
mb.lsts = time.Now().UnixNano()
|
||||
for subj, ss := range mb.fss {
|
||||
if subject == _EMPTY_ || subject == fwcs || subjectIsSubsetMatch(subj, subject) {
|
||||
if ss.firstNeedsUpdate {
|
||||
mb.recalculateFirstForSubj(subj, ss.First, ss)
|
||||
}
|
||||
oss := fss[subj]
|
||||
if oss.First == 0 { // New
|
||||
fss[subj] = *ss
|
||||
} else {
|
||||
// Merge here.
|
||||
oss.Last, oss.Msgs = ss.Last, oss.Msgs+ss.Msgs
|
||||
fss[subj] = oss
|
||||
}
|
||||
mb.fss.Match(stringToBytes(subject), func(bsubj []byte, ss *SimpleState) {
|
||||
subj := string(bsubj)
|
||||
if ss.firstNeedsUpdate {
|
||||
mb.recalculateFirstForSubj(subj, ss.First, ss)
|
||||
}
|
||||
}
|
||||
oss := fss[subj]
|
||||
if oss.First == 0 { // New
|
||||
fss[subj] = *ss
|
||||
} else {
|
||||
// Merge here.
|
||||
oss.Last, oss.Msgs = ss.Last, oss.Msgs+ss.Msgs
|
||||
fss[subj] = oss
|
||||
}
|
||||
})
|
||||
if shouldExpire {
|
||||
// Expire this cache before moving on.
|
||||
mb.tryForceExpireCacheLocked()
|
||||
@@ -2723,6 +2862,10 @@ func (fs *fileStore) NumPending(sseq uint64, filter string, lastPerSubject bool)
|
||||
return 0, validThrough
|
||||
}
|
||||
|
||||
// If sseq is less then our first set to first.
|
||||
if sseq < fs.state.FirstSeq {
|
||||
sseq = fs.state.FirstSeq
|
||||
}
|
||||
// Track starting for both block for the sseq and staring block that matches any subject.
|
||||
var seqStart int
|
||||
// See if we need to figure out starting block per sseq.
|
||||
@@ -2734,16 +2877,14 @@ func (fs *fileStore) NumPending(sseq uint64, filter string, lastPerSubject bool)
|
||||
}
|
||||
|
||||
isAll := filter == _EMPTY_ || filter == fwcs
|
||||
if isAll && filter == _EMPTY_ {
|
||||
filter = fwcs
|
||||
}
|
||||
wc := subjectHasWildcard(filter)
|
||||
|
||||
// See if filter was provided but its the only subject.
|
||||
if !isAll && !wc && fs.psim.Size() == 1 {
|
||||
if _, ok := fs.psim.Find(stringToBytes(filter)); ok {
|
||||
isAll = true
|
||||
}
|
||||
}
|
||||
if isAll && filter == _EMPTY_ {
|
||||
filter = fwcs
|
||||
_, isAll = fs.psim.Find(stringToBytes(filter))
|
||||
}
|
||||
// If we are isAll and have no deleted we can do a simpler calculation.
|
||||
if !lastPerSubject && isAll && (fs.state.LastSeq-fs.state.FirstSeq+1) == fs.state.Msgs {
|
||||
@@ -2753,8 +2894,9 @@ func (fs *fileStore) NumPending(sseq uint64, filter string, lastPerSubject bool)
|
||||
return fs.state.LastSeq - sseq + 1, validThrough
|
||||
}
|
||||
|
||||
var tsa, fsa [32]string
|
||||
fts := tokenizeSubjectIntoSlice(fsa[:0], filter)
|
||||
_tsa, _fsa := [32]string{}, [32]string{}
|
||||
tsa, fsa := _tsa[:0], _fsa[:0]
|
||||
fsa = tokenizeSubjectIntoSlice(fsa[:0], filter)
|
||||
|
||||
isMatch := func(subj string) bool {
|
||||
if isAll {
|
||||
@@ -2763,8 +2905,8 @@ func (fs *fileStore) NumPending(sseq uint64, filter string, lastPerSubject bool)
|
||||
if !wc {
|
||||
return subj == filter
|
||||
}
|
||||
tts := tokenizeSubjectIntoSlice(tsa[:0], subj)
|
||||
return isSubsetMatchTokenized(tts, fts)
|
||||
tsa = tokenizeSubjectIntoSlice(tsa[:0], subj)
|
||||
return isSubsetMatchTokenized(tsa, fsa)
|
||||
}
|
||||
|
||||
// Handle last by subject a bit differently.
|
||||
@@ -2864,20 +3006,22 @@ func (fs *fileStore) NumPending(sseq uint64, filter string, lastPerSubject bool)
|
||||
mb.lsts = time.Now().UnixNano()
|
||||
|
||||
var havePartial bool
|
||||
for subj, ss := range mb.fss {
|
||||
if isMatch(subj) {
|
||||
if ss.firstNeedsUpdate {
|
||||
mb.recalculateFirstForSubj(subj, ss.First, ss)
|
||||
}
|
||||
if sseq <= ss.First {
|
||||
t += ss.Msgs
|
||||
} else if sseq <= ss.Last {
|
||||
// We matched but its a partial.
|
||||
havePartial = true
|
||||
break
|
||||
}
|
||||
mb.fss.Match(stringToBytes(filter), func(bsubj []byte, ss *SimpleState) {
|
||||
if havePartial {
|
||||
// If we already found a partial then don't do anything else.
|
||||
return
|
||||
}
|
||||
}
|
||||
subj := bytesToString(bsubj)
|
||||
if ss.firstNeedsUpdate {
|
||||
mb.recalculateFirstForSubj(subj, ss.First, ss)
|
||||
}
|
||||
if sseq <= ss.First {
|
||||
t += ss.Msgs
|
||||
} else if sseq <= ss.Last {
|
||||
// We matched but its a partial.
|
||||
havePartial = true
|
||||
}
|
||||
})
|
||||
|
||||
// See if we need to scan msgs here.
|
||||
if havePartial {
|
||||
@@ -2955,11 +3099,9 @@ func (fs *fileStore) NumPending(sseq uint64, filter string, lastPerSubject bool)
|
||||
// Mark fss activity.
|
||||
mb.lsts = time.Now().UnixNano()
|
||||
|
||||
for subj, ss := range mb.fss {
|
||||
if isMatch(subj) {
|
||||
adjust += ss.Msgs
|
||||
}
|
||||
}
|
||||
mb.fss.Match(stringToBytes(filter), func(bsubj []byte, ss *SimpleState) {
|
||||
adjust += ss.Msgs
|
||||
})
|
||||
}
|
||||
} else {
|
||||
// This is the last block. We need to scan per message here.
|
||||
@@ -3080,7 +3222,7 @@ func (fs *fileStore) newMsgBlockForWrite() (*msgBlock, error) {
|
||||
// Lock should be held to quiet race detector.
|
||||
mb.mu.Lock()
|
||||
mb.setupWriteCache(rbuf)
|
||||
mb.fss = make(map[string]*SimpleState)
|
||||
mb.fss = stree.NewSubjectTree[SimpleState]()
|
||||
|
||||
// Set cache time to creation time to start.
|
||||
ts := time.Now().UnixNano()
|
||||
@@ -3339,6 +3481,17 @@ func (mb *msgBlock) skipMsg(seq uint64, now time.Time) {
|
||||
mb.last.ts = nowts
|
||||
atomic.StoreUint64(&mb.first.seq, seq+1)
|
||||
mb.first.ts = nowts
|
||||
needsRecord = mb == mb.fs.lmb
|
||||
if needsRecord && mb.rbytes > 0 {
|
||||
// We want to make sure since we have no messages
|
||||
// that we write to the beginning since we only need last one.
|
||||
mb.rbytes, mb.cache = 0, &cache{}
|
||||
// If encrypted we need to reset counter since we just keep one.
|
||||
if mb.bek != nil {
|
||||
// Recreate to reset counter.
|
||||
mb.bek, _ = genBlockEncryptionKey(mb.fs.fcfg.Cipher, mb.seed, mb.nonce)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
needsRecord = true
|
||||
mb.dmap.Insert(seq)
|
||||
@@ -3521,10 +3674,11 @@ func (fs *fileStore) firstSeqForSubj(subj string) (uint64, error) {
|
||||
// Mark fss activity.
|
||||
mb.lsts = time.Now().UnixNano()
|
||||
|
||||
if ss := mb.fss[subj]; ss != nil {
|
||||
bsubj := stringToBytes(subj)
|
||||
if ss, ok := mb.fss.Find(bsubj); ok && ss != nil {
|
||||
// Adjust first if it was not where we thought it should be.
|
||||
if i != start {
|
||||
if info, ok := fs.psim.Find(stringToBytes(subj)); ok {
|
||||
if info, ok := fs.psim.Find(bsubj); ok {
|
||||
info.fblk = i
|
||||
}
|
||||
}
|
||||
@@ -3608,11 +3762,12 @@ func (fs *fileStore) enforceMsgPerSubjectLimit(fireCallback bool) {
|
||||
|
||||
// collect all that are not correct.
|
||||
needAttention := make(map[string]*psi)
|
||||
fs.psim.Match([]byte(fwcs), func(subj []byte, psi *psi) {
|
||||
fs.psim.Iter(func(subj []byte, psi *psi) bool {
|
||||
numMsgs += psi.total
|
||||
if psi.total > maxMsgsPer {
|
||||
needAttention[string(subj)] = psi
|
||||
}
|
||||
return true
|
||||
})
|
||||
|
||||
// We had an issue with a use case where psim (and hence fss) were correct but idx was not and was not properly being caught.
|
||||
@@ -3632,10 +3787,11 @@ func (fs *fileStore) enforceMsgPerSubjectLimit(fireCallback bool) {
|
||||
fs.rebuildStateLocked(nil)
|
||||
// Need to redo blocks that need attention.
|
||||
needAttention = make(map[string]*psi)
|
||||
fs.psim.Match([]byte(fwcs), func(subj []byte, psi *psi) {
|
||||
fs.psim.Iter(func(subj []byte, psi *psi) bool {
|
||||
if psi.total > maxMsgsPer {
|
||||
needAttention[string(subj)] = psi
|
||||
}
|
||||
return true
|
||||
})
|
||||
}
|
||||
|
||||
@@ -3657,8 +3813,8 @@ func (fs *fileStore) enforceMsgPerSubjectLimit(fireCallback bool) {
|
||||
// Grab the ss entry for this subject in case sparse.
|
||||
mb.mu.Lock()
|
||||
mb.ensurePerSubjectInfoLoaded()
|
||||
ss := mb.fss[subj]
|
||||
if ss != nil && ss.firstNeedsUpdate {
|
||||
ss, ok := mb.fss.Find(stringToBytes(subj))
|
||||
if ok && ss != nil && ss.firstNeedsUpdate {
|
||||
mb.recalculateFirstForSubj(subj, ss.First, ss)
|
||||
}
|
||||
mb.mu.Unlock()
|
||||
@@ -4753,11 +4909,11 @@ func (mb *msgBlock) writeMsgRecord(rl, seq uint64, subj string, mhdr, msg []byte
|
||||
}
|
||||
// Mark fss activity.
|
||||
mb.lsts = time.Now().UnixNano()
|
||||
if ss := mb.fss[subj]; ss != nil {
|
||||
if ss, ok := mb.fss.Find(stringToBytes(subj)); ok && ss != nil {
|
||||
ss.Msgs++
|
||||
ss.Last = seq
|
||||
} else {
|
||||
mb.fss[subj] = &SimpleState{Msgs: 1, First: seq, Last: seq}
|
||||
mb.fss.Insert(stringToBytes(subj), SimpleState{Msgs: 1, First: seq, Last: seq})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5358,7 +5514,7 @@ func (mb *msgBlock) indexCacheBuf(buf []byte) error {
|
||||
// Create FSS if we should track.
|
||||
var popFss bool
|
||||
if mb.fssNotLoaded() {
|
||||
mb.fss = make(map[string]*SimpleState)
|
||||
mb.fss = stree.NewSubjectTree[SimpleState]()
|
||||
popFss = true
|
||||
}
|
||||
// Mark fss activity.
|
||||
@@ -5425,15 +5581,15 @@ func (mb *msgBlock) indexCacheBuf(buf []byte) error {
|
||||
// Handle FSS inline here.
|
||||
if popFss && slen > 0 && !mb.noTrack && !erased && !mb.dmap.Exists(seq) {
|
||||
bsubj := buf[index+msgHdrSize : index+msgHdrSize+uint32(slen)]
|
||||
if ss := mb.fss[string(bsubj)]; ss != nil {
|
||||
if ss, ok := mb.fss.Find(bsubj); ok && ss != nil {
|
||||
ss.Msgs++
|
||||
ss.Last = seq
|
||||
} else {
|
||||
mb.fss[string(bsubj)] = &SimpleState{
|
||||
mb.fss.Insert(bsubj, SimpleState{
|
||||
Msgs: 1,
|
||||
First: seq,
|
||||
Last: seq,
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -6105,15 +6261,31 @@ func (fs *fileStore) loadLast(subj string, sm *StoreMsg) (lsm *StoreMsg, err err
|
||||
return nil, ErrStoreMsgNotFound
|
||||
}
|
||||
|
||||
start, stop := fs.lmb.index, fs.blks[0].index
|
||||
wc := subjectHasWildcard(subj)
|
||||
var start, stop uint32
|
||||
|
||||
// If literal subject check for presence.
|
||||
if !wc {
|
||||
if info, ok := fs.psim.Find(stringToBytes(subj)); !ok {
|
||||
if wc {
|
||||
start = fs.lmb.index
|
||||
fs.psim.Match(stringToBytes(subj), func(_ []byte, psi *psi) {
|
||||
// Keep track of start and stop indexes for this subject.
|
||||
if psi.fblk < start {
|
||||
start = psi.fblk
|
||||
}
|
||||
if psi.lblk > stop {
|
||||
stop = psi.lblk
|
||||
}
|
||||
})
|
||||
// None matched.
|
||||
if stop == 0 {
|
||||
return nil, ErrStoreMsgNotFound
|
||||
} else {
|
||||
start, stop = info.lblk, info.fblk
|
||||
}
|
||||
// These need to be swapped.
|
||||
start, stop = stop, start
|
||||
} else if info, ok := fs.psim.Find(stringToBytes(subj)); ok {
|
||||
start, stop = info.lblk, info.fblk
|
||||
} else {
|
||||
return nil, ErrStoreMsgNotFound
|
||||
}
|
||||
|
||||
// Walk blocks backwards.
|
||||
@@ -6133,7 +6305,7 @@ func (fs *fileStore) loadLast(subj string, sm *StoreMsg) (lsm *StoreMsg, err err
|
||||
var l uint64
|
||||
// Optimize if subject is not a wildcard.
|
||||
if !wc {
|
||||
if ss := mb.fss[subj]; ss != nil {
|
||||
if ss, ok := mb.fss.Find(stringToBytes(subj)); ok && ss != nil {
|
||||
l = ss.Last
|
||||
}
|
||||
}
|
||||
@@ -6227,7 +6399,12 @@ func (fs *fileStore) LoadNextMsg(filter string, wc bool, start uint64, sm *Store
|
||||
// let's check the psim to see if we can skip ahead.
|
||||
if start <= fs.state.FirstSeq {
|
||||
var ss SimpleState
|
||||
fs.numFilteredPending(filter, &ss)
|
||||
fs.numFilteredPendingNoLast(filter, &ss)
|
||||
// Nothing available.
|
||||
if ss.Msgs == 0 {
|
||||
return nil, fs.state.LastSeq, ErrStoreEOF
|
||||
}
|
||||
// We can skip ahead.
|
||||
if ss.First > start {
|
||||
start = ss.First
|
||||
}
|
||||
@@ -6243,8 +6420,27 @@ func (fs *fileStore) LoadNextMsg(filter string, wc bool, start uint64, sm *Store
|
||||
return sm, sm.seq, nil
|
||||
} else if err != ErrStoreMsgNotFound {
|
||||
return nil, 0, err
|
||||
} else if expireOk {
|
||||
mb.tryForceExpireCache()
|
||||
} else {
|
||||
// Nothing found in this block. We missed, if first block (bi) check psim.
|
||||
// Similar to above if start <= first seq.
|
||||
// TODO(dlc) - For v2 track these by filter subject since they will represent filtered consumers.
|
||||
if i == bi {
|
||||
nbi, lbi := fs.checkSkipFirstBlock(filter, wc)
|
||||
// Nothing available.
|
||||
if nbi < 0 || lbi <= bi {
|
||||
return nil, fs.state.LastSeq, ErrStoreEOF
|
||||
}
|
||||
// See if we can jump ahead here.
|
||||
// Right now we can only spin on first, so if we have interior sparseness need to favor checking per block fss if loaded.
|
||||
// For v2 will track all blocks that have matches for psim.
|
||||
if nbi > i {
|
||||
i = nbi - 1 // For the iterator condition i++
|
||||
}
|
||||
}
|
||||
// Check is we can expire.
|
||||
if expireOk {
|
||||
mb.tryForceExpireCache()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -6824,11 +7020,13 @@ func (fs *fileStore) Compact(seq uint64) (uint64, error) {
|
||||
bytes += mb.bytes
|
||||
// Make sure we do subject cleanup as well.
|
||||
mb.ensurePerSubjectInfoLoaded()
|
||||
for subj, ss := range mb.fss {
|
||||
mb.fss.Iter(func(bsubj []byte, ss *SimpleState) bool {
|
||||
subj := bytesToString(bsubj)
|
||||
for i := uint64(0); i < ss.Msgs; i++ {
|
||||
fs.removePerSubject(subj)
|
||||
}
|
||||
}
|
||||
return true
|
||||
})
|
||||
// Now close.
|
||||
mb.dirtyCloseWithRemove(true)
|
||||
mb.mu.Unlock()
|
||||
@@ -7229,13 +7427,17 @@ func (mb *msgBlock) dirtyCloseWithRemove(remove bool) {
|
||||
// Lock should be held.
|
||||
func (mb *msgBlock) removeSeqPerSubject(subj string, seq uint64) {
|
||||
mb.ensurePerSubjectInfoLoaded()
|
||||
ss := mb.fss[subj]
|
||||
if ss == nil {
|
||||
if mb.fss == nil {
|
||||
return
|
||||
}
|
||||
bsubj := stringToBytes(subj)
|
||||
ss, ok := mb.fss.Find(bsubj)
|
||||
if !ok || ss == nil {
|
||||
return
|
||||
}
|
||||
|
||||
if ss.Msgs == 1 {
|
||||
delete(mb.fss, subj)
|
||||
mb.fss.Delete(bsubj)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -7337,7 +7539,7 @@ func (mb *msgBlock) generatePerSubjectInfo() error {
|
||||
}
|
||||
|
||||
// Create new one regardless.
|
||||
mb.fss = make(map[string]*SimpleState)
|
||||
mb.fss = stree.NewSubjectTree[SimpleState]()
|
||||
|
||||
var smv StoreMsg
|
||||
fseq, lseq := atomic.LoadUint64(&mb.first.seq), atomic.LoadUint64(&mb.last.seq)
|
||||
@@ -7354,16 +7556,16 @@ func (mb *msgBlock) generatePerSubjectInfo() error {
|
||||
return err
|
||||
}
|
||||
if sm != nil && len(sm.subj) > 0 {
|
||||
if ss := mb.fss[sm.subj]; ss != nil {
|
||||
if ss, ok := mb.fss.Find(stringToBytes(sm.subj)); ok && ss != nil {
|
||||
ss.Msgs++
|
||||
ss.Last = seq
|
||||
} else {
|
||||
mb.fss[sm.subj] = &SimpleState{Msgs: 1, First: seq, Last: seq}
|
||||
mb.fss.Insert(stringToBytes(sm.subj), SimpleState{Msgs: 1, First: seq, Last: seq})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(mb.fss) > 0 {
|
||||
if mb.fss.Size() > 0 {
|
||||
// Make sure we run the cache expire timer.
|
||||
mb.llts = time.Now().UnixNano()
|
||||
// Mark fss activity.
|
||||
@@ -7384,7 +7586,7 @@ func (mb *msgBlock) ensurePerSubjectInfoLoaded() error {
|
||||
return nil
|
||||
}
|
||||
if mb.msgs == 0 {
|
||||
mb.fss = make(map[string]*SimpleState)
|
||||
mb.fss = stree.NewSubjectTree[SimpleState]()
|
||||
return nil
|
||||
}
|
||||
return mb.generatePerSubjectInfo()
|
||||
@@ -7401,9 +7603,8 @@ func (fs *fileStore) populateGlobalPerSubjectInfo(mb *msgBlock) {
|
||||
}
|
||||
|
||||
// Now populate psim.
|
||||
for subj, ss := range mb.fss {
|
||||
if len(subj) > 0 {
|
||||
bsubj := stringToBytes(subj)
|
||||
mb.fss.Iter(func(bsubj []byte, ss *SimpleState) bool {
|
||||
if len(bsubj) > 0 {
|
||||
if info, ok := fs.psim.Find(bsubj); ok {
|
||||
info.total += ss.Msgs
|
||||
if mb.index > info.lblk {
|
||||
@@ -7411,10 +7612,11 @@ func (fs *fileStore) populateGlobalPerSubjectInfo(mb *msgBlock) {
|
||||
}
|
||||
} else {
|
||||
fs.psim.Insert(bsubj, psi{total: ss.Msgs, fblk: mb.index, lblk: mb.index})
|
||||
fs.tsl += len(subj)
|
||||
fs.tsl += len(bsubj)
|
||||
}
|
||||
}
|
||||
}
|
||||
return true
|
||||
})
|
||||
}
|
||||
|
||||
// Close the message block.
|
||||
@@ -7486,10 +7688,23 @@ func (fs *fileStore) Delete() error {
|
||||
os.RemoveAll(pdir)
|
||||
}
|
||||
|
||||
// Do Purge() since if we have lots of blocks uses a mv/rename.
|
||||
fs.Purge()
|
||||
// Quickly close all blocks and simulate a purge w/o overhead an new write block.
|
||||
fs.mu.Lock()
|
||||
for _, mb := range fs.blks {
|
||||
mb.dirtyClose()
|
||||
}
|
||||
dmsgs := fs.state.Msgs
|
||||
dbytes := int64(fs.state.Bytes)
|
||||
fs.state.Msgs, fs.state.Bytes = 0, 0
|
||||
fs.blks = nil
|
||||
cb := fs.scb
|
||||
fs.mu.Unlock()
|
||||
|
||||
if err := fs.stop(false); err != nil {
|
||||
if cb != nil {
|
||||
cb(-int64(dmsgs), -dbytes, 0, _EMPTY_)
|
||||
}
|
||||
|
||||
if err := fs.stop(true, false); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -7505,14 +7720,19 @@ func (fs *fileStore) Delete() error {
|
||||
// Do this in separate Go routine in case lots of blocks.
|
||||
// Purge above protects us as does the removal of meta artifacts above.
|
||||
go func() {
|
||||
<-dios
|
||||
err := os.RemoveAll(ndir)
|
||||
dios <- struct{}{}
|
||||
if err == nil {
|
||||
return
|
||||
}
|
||||
ttl := time.Now().Add(time.Second)
|
||||
for time.Now().Before(ttl) {
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
if err = os.RemoveAll(ndir); err == nil {
|
||||
<-dios
|
||||
err = os.RemoveAll(ndir)
|
||||
dios <- struct{}{}
|
||||
if err == nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
@@ -7778,11 +7998,11 @@ func (fs *fileStore) _writeFullState(force bool) error {
|
||||
|
||||
// Stop the current filestore.
|
||||
func (fs *fileStore) Stop() error {
|
||||
return fs.stop(true)
|
||||
return fs.stop(false, true)
|
||||
}
|
||||
|
||||
// Stop the current filestore.
|
||||
func (fs *fileStore) stop(writeState bool) error {
|
||||
func (fs *fileStore) stop(delete, writeState bool) error {
|
||||
fs.mu.Lock()
|
||||
if fs.closed || fs.closing {
|
||||
fs.mu.Unlock()
|
||||
@@ -7833,7 +8053,11 @@ func (fs *fileStore) stop(writeState bool) error {
|
||||
fs.cmu.Unlock()
|
||||
|
||||
for _, o := range cfs {
|
||||
o.Stop()
|
||||
if delete {
|
||||
o.StreamDelete()
|
||||
} else {
|
||||
o.Stop()
|
||||
}
|
||||
}
|
||||
|
||||
if bytes > 0 && cb != nil {
|
||||
@@ -8550,7 +8774,8 @@ func (o *consumerFileStore) UpdateDelivered(dseq, sseq, dc uint64, ts int64) err
|
||||
// Check for an update to a message already delivered.
|
||||
if sseq <= o.state.Delivered.Stream {
|
||||
if p = o.state.Pending[sseq]; p != nil {
|
||||
p.Sequence, p.Timestamp = dseq, ts
|
||||
// Do not update p.Sequence, that should be the original delivery sequence.
|
||||
p.Timestamp = ts
|
||||
}
|
||||
} else {
|
||||
// Add to pending.
|
||||
@@ -8608,7 +8833,14 @@ func (o *consumerFileStore) UpdateAcks(dseq, sseq uint64) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Match leader logic on checking if ack is ahead of delivered.
|
||||
// This could happen on a cooperative takeover with high speed deliveries.
|
||||
if sseq > o.state.Delivered.Stream {
|
||||
o.state.Delivered.Stream = sseq + 1
|
||||
}
|
||||
|
||||
if len(o.state.Pending) == 0 || o.state.Pending[sseq] == nil {
|
||||
delete(o.state.Redelivered, sseq)
|
||||
return ErrStoreMsgNotFound
|
||||
}
|
||||
|
||||
@@ -8639,7 +8871,9 @@ func (o *consumerFileStore) UpdateAcks(dseq, sseq uint64) error {
|
||||
// First delete from our pending state.
|
||||
if p, ok := o.state.Pending[sseq]; ok {
|
||||
delete(o.state.Pending, sseq)
|
||||
dseq = p.Sequence // Use the original.
|
||||
if dseq > p.Sequence && p.Sequence > 0 {
|
||||
dseq = p.Sequence // Use the original.
|
||||
}
|
||||
}
|
||||
if len(o.state.Pending) == 0 {
|
||||
o.state.AckFloor.Consumer = o.state.Delivered.Consumer
|
||||
|
||||
+32
-17
@@ -1440,7 +1440,11 @@ func (a *Account) maxBytesLimits(cfg *StreamConfig) (bool, int64) {
|
||||
return false, 0
|
||||
}
|
||||
jsa.usageMu.RLock()
|
||||
selectedLimits, _, ok := jsa.selectLimits(cfg)
|
||||
var replicas int
|
||||
if cfg != nil {
|
||||
replicas = cfg.Replicas
|
||||
}
|
||||
selectedLimits, _, ok := jsa.selectLimits(replicas)
|
||||
jsa.usageMu.RUnlock()
|
||||
if !ok {
|
||||
return false, 0
|
||||
@@ -1590,7 +1594,7 @@ func diffCheckedLimits(a, b map[string]JetStreamAccountLimits) map[string]JetStr
|
||||
func (jsa *jsAccount) reservedStorage(tier string) (mem, store uint64) {
|
||||
for _, mset := range jsa.streams {
|
||||
cfg := &mset.cfg
|
||||
if tier == _EMPTY_ || tier == tierName(cfg) && cfg.MaxBytes > 0 {
|
||||
if tier == _EMPTY_ || tier == tierName(cfg.Replicas) && cfg.MaxBytes > 0 {
|
||||
switch cfg.Storage {
|
||||
case FileStorage:
|
||||
store += uint64(cfg.MaxBytes)
|
||||
@@ -1607,7 +1611,7 @@ func (jsa *jsAccount) reservedStorage(tier string) (mem, store uint64) {
|
||||
func reservedStorage(sas map[string]*streamAssignment, tier string) (mem, store uint64) {
|
||||
for _, sa := range sas {
|
||||
cfg := sa.Config
|
||||
if tier == _EMPTY_ || tier == tierName(cfg) && cfg.MaxBytes > 0 {
|
||||
if tier == _EMPTY_ || tier == tierName(cfg.Replicas) && cfg.MaxBytes > 0 {
|
||||
switch cfg.Storage {
|
||||
case FileStorage:
|
||||
store += uint64(cfg.MaxBytes)
|
||||
@@ -1695,17 +1699,29 @@ func (a *Account) JetStreamUsage() JetStreamAccountStats {
|
||||
stats.ReservedMemory, stats.ReservedStore = reservedStorage(sas, _EMPTY_)
|
||||
}
|
||||
for _, sa := range sas {
|
||||
stats.Consumers += len(sa.consumers)
|
||||
if !defaultTier {
|
||||
tier := tierName(sa.Config)
|
||||
u, ok := stats.Tiers[tier]
|
||||
if !ok {
|
||||
u = JetStreamTier{}
|
||||
}
|
||||
u.Streams++
|
||||
if defaultTier {
|
||||
stats.Consumers += len(sa.consumers)
|
||||
} else {
|
||||
stats.Streams++
|
||||
u.Consumers += len(sa.consumers)
|
||||
stats.Tiers[tier] = u
|
||||
streamTier := tierName(sa.Config.Replicas)
|
||||
su, ok := stats.Tiers[streamTier]
|
||||
if !ok {
|
||||
su = JetStreamTier{}
|
||||
}
|
||||
su.Streams++
|
||||
stats.Tiers[streamTier] = su
|
||||
|
||||
// Now consumers, check each since could be different tiers.
|
||||
for _, ca := range sa.consumers {
|
||||
stats.Consumers++
|
||||
consumerTier := tierName(ca.Config.replicas(sa.Config))
|
||||
cu, ok := stats.Tiers[consumerTier]
|
||||
if !ok {
|
||||
cu = JetStreamTier{}
|
||||
}
|
||||
cu.Consumers++
|
||||
stats.Tiers[consumerTier] = cu
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
@@ -2089,9 +2105,8 @@ func (js *jetStream) limitsExceeded(storeType StorageType) bool {
|
||||
return js.wouldExceedLimits(storeType, 0)
|
||||
}
|
||||
|
||||
func tierName(cfg *StreamConfig) string {
|
||||
func tierName(replicas int) string {
|
||||
// TODO (mh) this is where we could select based off a placement tag as well "qos:tier"
|
||||
replicas := cfg.Replicas
|
||||
if replicas == 0 {
|
||||
replicas = 1
|
||||
}
|
||||
@@ -2111,11 +2126,11 @@ func (jsa *jsAccount) jetStreamAndClustered() (*jetStream, bool) {
|
||||
}
|
||||
|
||||
// jsa.usageMu read lock should be held.
|
||||
func (jsa *jsAccount) selectLimits(cfg *StreamConfig) (JetStreamAccountLimits, string, bool) {
|
||||
func (jsa *jsAccount) selectLimits(replicas int) (JetStreamAccountLimits, string, bool) {
|
||||
if selectedLimits, ok := jsa.limits[_EMPTY_]; ok {
|
||||
return selectedLimits, _EMPTY_, true
|
||||
}
|
||||
tier := tierName(cfg)
|
||||
tier := tierName(replicas)
|
||||
if selectedLimits, ok := jsa.limits[tier]; ok {
|
||||
return selectedLimits, tier, true
|
||||
}
|
||||
|
||||
+5
-1
@@ -3267,7 +3267,11 @@ func (s *Server) jsStreamPurgeRequest(sub *subscription, c *client, _ *Account,
|
||||
}
|
||||
|
||||
func (acc *Account) jsNonClusteredStreamLimitsCheck(cfg *StreamConfig) *ApiError {
|
||||
selectedLimits, tier, jsa, apiErr := acc.selectLimits(cfg)
|
||||
var replicas int
|
||||
if cfg != nil {
|
||||
replicas = cfg.Replicas
|
||||
}
|
||||
selectedLimits, tier, jsa, apiErr := acc.selectLimits(replicas)
|
||||
if apiErr != nil {
|
||||
return apiErr
|
||||
}
|
||||
|
||||
+105
-47
@@ -534,12 +534,18 @@ func (js *jetStream) isStreamHealthy(acc *Account, sa *streamAssignment) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
// If we are catching up return false.
|
||||
if mset.isCatchingUp() {
|
||||
// If R1 we are good.
|
||||
if node == nil {
|
||||
return true
|
||||
}
|
||||
|
||||
// Here we are a replicated stream.
|
||||
// First make sure our monitor routine is running.
|
||||
if !mset.isMonitorRunning() {
|
||||
return false
|
||||
}
|
||||
|
||||
if node == nil || node.Healthy() {
|
||||
if node.Healthy() {
|
||||
// Check if we are processing a snapshot and are catching up.
|
||||
if !mset.isCatchingUp() {
|
||||
return true
|
||||
@@ -553,7 +559,6 @@ func (js *jetStream) isStreamHealthy(acc *Account, sa *streamAssignment) bool {
|
||||
js.restartStream(acc, sa)
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
@@ -863,6 +868,8 @@ func (js *jetStream) setupMetaGroup() error {
|
||||
atomic.StoreInt32(&js.clustered, 1)
|
||||
c.registerWithAccount(sacc)
|
||||
|
||||
// Set to true before we start.
|
||||
js.metaRecovering = true
|
||||
js.srv.startGoRoutine(
|
||||
js.monitorCluster,
|
||||
pprofLabels{
|
||||
@@ -2164,7 +2171,7 @@ func genPeerInfo(peers []string, split int) (newPeers, oldPeers []string, newPee
|
||||
// Should only be called from monitorStream.
|
||||
func (mset *stream) waitOnConsumerAssignments() {
|
||||
mset.mu.RLock()
|
||||
s, js, acc, sa, name := mset.srv, mset.js, mset.acc, mset.sa, mset.cfg.Name
|
||||
s, js, acc, sa, name, replicas := mset.srv, mset.js, mset.acc, mset.sa, mset.cfg.Name, mset.cfg.Replicas
|
||||
mset.mu.RUnlock()
|
||||
|
||||
if s == nil || js == nil || acc == nil || sa == nil {
|
||||
@@ -2186,6 +2193,9 @@ func (mset *stream) waitOnConsumerAssignments() {
|
||||
for _, o := range mset.getConsumers() {
|
||||
// Make sure we are registered with our consumer assignment.
|
||||
if ca := o.consumerAssignment(); ca != nil {
|
||||
if replicas > 1 && !o.isMonitorRunning() {
|
||||
break
|
||||
}
|
||||
numReady++
|
||||
} else {
|
||||
break
|
||||
@@ -2373,7 +2383,8 @@ func (js *jetStream) monitorStream(mset *stream, sa *streamAssignment, sendSnaps
|
||||
// since we process streams first then consumers as an asset class.
|
||||
mset.waitOnConsumerAssignments()
|
||||
// Setup a periodic check here.
|
||||
cist = time.NewTicker(30 * time.Second)
|
||||
// We will fire in 5s the first time then back off to 30s
|
||||
cist = time.NewTicker(5 * time.Second)
|
||||
cistc = cist.C
|
||||
}
|
||||
|
||||
@@ -2496,7 +2507,9 @@ func (js *jetStream) monitorStream(mset *stream, sa *streamAssignment, sendSnaps
|
||||
}
|
||||
|
||||
case <-cistc:
|
||||
mset.checkInterestState()
|
||||
cist.Reset(30 * time.Second)
|
||||
// We may be adjusting some things with consumers so do this in its own go routine.
|
||||
go mset.checkInterestState()
|
||||
|
||||
case <-datc:
|
||||
if mset == nil || isRecovering {
|
||||
@@ -4096,7 +4109,7 @@ func (js *jetStream) processConsumerAssignment(ca *consumerAssignment) {
|
||||
sa.consumers = make(map[string]*consumerAssignment)
|
||||
} else if oca := sa.consumers[ca.Name]; oca != nil {
|
||||
wasExisting = true
|
||||
// Copy over private existing state from former SA.
|
||||
// Copy over private existing state from former CA.
|
||||
if ca.Group != nil {
|
||||
ca.Group.node = oca.Group.node
|
||||
}
|
||||
@@ -4423,11 +4436,15 @@ func (js *jetStream) processClusterCreateConsumer(ca *consumerAssignment, state
|
||||
s.sendInternalMsgLocked(consumerAssignmentSubj, _EMPTY_, nil, b)
|
||||
}
|
||||
} else {
|
||||
js.mu.RLock()
|
||||
node := rg.node
|
||||
js.mu.RUnlock()
|
||||
|
||||
if didCreate {
|
||||
o.setCreatedTime(ca.Created)
|
||||
} else {
|
||||
// Check for scale down to 1..
|
||||
if rg.node != nil && len(rg.Peers) == 1 {
|
||||
if node != nil && len(rg.Peers) == 1 {
|
||||
o.clearNode()
|
||||
o.setLeader(true)
|
||||
// Need to clear from rg too.
|
||||
@@ -4442,7 +4459,7 @@ func (js *jetStream) processClusterCreateConsumer(ca *consumerAssignment, state
|
||||
}
|
||||
}
|
||||
|
||||
if rg.node == nil {
|
||||
if node == nil {
|
||||
// Single replica consumer, process manually here.
|
||||
js.mu.Lock()
|
||||
// Force response in case we think this is an update.
|
||||
@@ -4912,7 +4929,22 @@ func (js *jetStream) applyConsumerEntries(o *consumer, ce *CommittedEntry, isLea
|
||||
}
|
||||
}
|
||||
// Check our interest state if applicable.
|
||||
o.checkStateForInterestStream()
|
||||
if err := o.checkStateForInterestStream(); err == errAckFloorHigherThanLastSeq {
|
||||
o.mu.RLock()
|
||||
mset := o.mset
|
||||
o.mu.RUnlock()
|
||||
// Register pre-acks unless no state at all for the stream and we would create alot of pre-acks.
|
||||
mset.mu.Lock()
|
||||
var ss StreamState
|
||||
mset.store.FastState(&ss)
|
||||
// Only register if we have a valid FirstSeq.
|
||||
if ss.FirstSeq > 0 {
|
||||
for seq := ss.FirstSeq; seq < state.AckFloor.Stream; seq++ {
|
||||
mset.registerPreAck(o, seq)
|
||||
}
|
||||
}
|
||||
mset.mu.Unlock()
|
||||
}
|
||||
}
|
||||
|
||||
} else if e.Type == EntryRemovePeer {
|
||||
@@ -5161,9 +5193,7 @@ func (js *jetStream) processConsumerLeaderChange(o *consumer, isLeader bool) err
|
||||
} else {
|
||||
resp.ConsumerInfo = o.initialInfo()
|
||||
s.sendAPIResponse(client, acc, subject, reply, _EMPTY_, s.jsonResponse(&resp))
|
||||
if node := o.raftNode(); node != nil {
|
||||
o.sendCreateAdvisory()
|
||||
}
|
||||
o.sendCreateAdvisory()
|
||||
}
|
||||
|
||||
return nil
|
||||
@@ -5954,7 +5984,7 @@ func (js *jetStream) createGroupForStream(ci *ClientInfo, cfg *StreamConfig) (*r
|
||||
return nil, errs
|
||||
}
|
||||
|
||||
func (acc *Account) selectLimits(cfg *StreamConfig) (*JetStreamAccountLimits, string, *jsAccount, *ApiError) {
|
||||
func (acc *Account) selectLimits(replicas int) (*JetStreamAccountLimits, string, *jsAccount, *ApiError) {
|
||||
// Grab our jetstream account info.
|
||||
acc.mu.RLock()
|
||||
jsa := acc.js
|
||||
@@ -5965,7 +5995,7 @@ func (acc *Account) selectLimits(cfg *StreamConfig) (*JetStreamAccountLimits, st
|
||||
}
|
||||
|
||||
jsa.usageMu.RLock()
|
||||
selectedLimits, tierName, ok := jsa.selectLimits(cfg)
|
||||
selectedLimits, tierName, ok := jsa.selectLimits(replicas)
|
||||
jsa.usageMu.RUnlock()
|
||||
|
||||
if !ok {
|
||||
@@ -5976,7 +6006,11 @@ func (acc *Account) selectLimits(cfg *StreamConfig) (*JetStreamAccountLimits, st
|
||||
|
||||
// Read lock needs to be held
|
||||
func (js *jetStream) jsClusteredStreamLimitsCheck(acc *Account, cfg *StreamConfig) *ApiError {
|
||||
selectedLimits, tier, _, apiErr := acc.selectLimits(cfg)
|
||||
var replicas int
|
||||
if cfg != nil {
|
||||
replicas = cfg.Replicas
|
||||
}
|
||||
selectedLimits, tier, _, apiErr := acc.selectLimits(replicas)
|
||||
if apiErr != nil {
|
||||
return apiErr
|
||||
}
|
||||
@@ -7113,7 +7147,7 @@ func (s *Server) jsClusteredConsumerRequest(ci *ClientInfo, acc *Account, subjec
|
||||
s.sendAPIErrResponse(ci, acc, subject, reply, string(rmsg), s.jsonResponse(&resp))
|
||||
return
|
||||
}
|
||||
selectedLimits, _, _, apiErr := acc.selectLimits(&streamCfg)
|
||||
selectedLimits, _, _, apiErr := acc.selectLimits(cfg.replicas(&streamCfg))
|
||||
if apiErr != nil {
|
||||
resp.Error = apiErr
|
||||
s.sendAPIErrResponse(ci, acc, subject, reply, string(rmsg), s.jsonResponse(&resp))
|
||||
@@ -7144,25 +7178,45 @@ func (s *Server) jsClusteredConsumerRequest(ci *ClientInfo, acc *Account, subjec
|
||||
return
|
||||
}
|
||||
|
||||
// Was a consumer name provided?
|
||||
var oname string
|
||||
if isDurableConsumer(cfg) || cfg.Name != _EMPTY_ {
|
||||
if cfg.Name != _EMPTY_ {
|
||||
oname = cfg.Name
|
||||
} else {
|
||||
oname = cfg.Durable
|
||||
}
|
||||
}
|
||||
|
||||
// Check for max consumers here to short circuit if possible.
|
||||
// Start with limit on a stream, but if one is defined at the level of the account
|
||||
// and is lower, use that limit.
|
||||
maxc := sa.Config.MaxConsumers
|
||||
if maxc <= 0 || (selectedLimits.MaxConsumers > 0 && selectedLimits.MaxConsumers < maxc) {
|
||||
maxc = selectedLimits.MaxConsumers
|
||||
}
|
||||
if maxc > 0 {
|
||||
// Don't count DIRECTS.
|
||||
total := 0
|
||||
for _, ca := range sa.consumers {
|
||||
if ca.Config != nil && !ca.Config.Direct {
|
||||
total++
|
||||
}
|
||||
if action == ActionCreate || action == ActionCreateOrUpdate {
|
||||
maxc := sa.Config.MaxConsumers
|
||||
if maxc <= 0 || (selectedLimits.MaxConsumers > 0 && selectedLimits.MaxConsumers < maxc) {
|
||||
maxc = selectedLimits.MaxConsumers
|
||||
}
|
||||
if total >= maxc {
|
||||
resp.Error = NewJSMaximumConsumersLimitError()
|
||||
s.sendAPIErrResponse(ci, acc, subject, reply, string(rmsg), s.jsonResponse(&resp))
|
||||
return
|
||||
if maxc > 0 {
|
||||
// Don't count DIRECTS.
|
||||
total := 0
|
||||
for cn, ca := range sa.consumers {
|
||||
if action == ActionCreateOrUpdate {
|
||||
// If the consumer name is specified and we think it already exists, then
|
||||
// we're likely updating an existing consumer, so don't count it. Otherwise
|
||||
// we will incorrectly return NewJSMaximumConsumersLimitError for an update.
|
||||
if oname != _EMPTY_ && cn == oname && sa.consumers[oname] != nil {
|
||||
continue
|
||||
}
|
||||
}
|
||||
if ca.Config != nil && !ca.Config.Direct {
|
||||
total++
|
||||
}
|
||||
}
|
||||
if total >= maxc {
|
||||
resp.Error = NewJSMaximumConsumersLimitError()
|
||||
s.sendAPIErrResponse(ci, acc, subject, reply, string(rmsg), s.jsonResponse(&resp))
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -7189,16 +7243,10 @@ func (s *Server) jsClusteredConsumerRequest(ci *ClientInfo, acc *Account, subjec
|
||||
}
|
||||
|
||||
var ca *consumerAssignment
|
||||
var oname string
|
||||
|
||||
// See if we have an existing one already under same durable name or
|
||||
// if name was set by the user.
|
||||
if isDurableConsumer(cfg) || cfg.Name != _EMPTY_ {
|
||||
if cfg.Name != _EMPTY_ {
|
||||
oname = cfg.Name
|
||||
} else {
|
||||
oname = cfg.Durable
|
||||
}
|
||||
if oname != _EMPTY_ {
|
||||
if ca = sa.consumers[oname]; ca != nil && !ca.deleted {
|
||||
if action == ActionCreate && !reflect.DeepEqual(cfg, ca.Config) {
|
||||
resp.Error = NewJSConsumerAlreadyExistsError()
|
||||
@@ -7615,7 +7663,10 @@ func (mset *stream) stateSnapshot() []byte {
|
||||
func (mset *stream) stateSnapshotLocked() []byte {
|
||||
// Decide if we can support the new style of stream snapshots.
|
||||
if mset.supportsBinarySnapshotLocked() {
|
||||
snap, _ := mset.store.EncodedStreamState(mset.getCLFS())
|
||||
snap, err := mset.store.EncodedStreamState(mset.getCLFS())
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
return snap
|
||||
}
|
||||
|
||||
@@ -7707,7 +7758,7 @@ func (mset *stream) processClusteredInboundMsg(subject, reply string, hdr, msg [
|
||||
if err == nil {
|
||||
err = NewJSAccountResourcesExceededError()
|
||||
}
|
||||
s.RateLimitWarnf(err.Error())
|
||||
s.RateLimitWarnf("JetStream account limits exceeded for '%s': %s", jsa.acc().GetName(), err.Error())
|
||||
if canRespond {
|
||||
var resp = &JSPubAckResponse{PubAck: &PubAck{Stream: name}}
|
||||
resp.Error = err
|
||||
@@ -8085,8 +8136,11 @@ func (mset *stream) processSnapshot(snap *StreamReplicatedState) (e error) {
|
||||
var sub *subscription
|
||||
var err error
|
||||
|
||||
const activityInterval = 30 * time.Second
|
||||
notActive := time.NewTimer(activityInterval)
|
||||
const (
|
||||
startInterval = 5 * time.Second
|
||||
activityInterval = 30 * time.Second
|
||||
)
|
||||
notActive := time.NewTimer(startInterval)
|
||||
defer notActive.Stop()
|
||||
|
||||
defer func() {
|
||||
@@ -8169,7 +8223,7 @@ RETRY:
|
||||
default:
|
||||
}
|
||||
}
|
||||
notActive.Reset(activityInterval)
|
||||
notActive.Reset(startInterval)
|
||||
|
||||
// Grab sync request again on failures.
|
||||
if sreq == nil {
|
||||
@@ -8214,8 +8268,10 @@ RETRY:
|
||||
// Send our sync request.
|
||||
b, _ := json.Marshal(sreq)
|
||||
s.sendInternalMsgLocked(subject, reply, nil, b)
|
||||
|
||||
// Remember when we sent this out to avoid loop spins on errors below.
|
||||
reqSendTime := time.Now()
|
||||
|
||||
// Clear our sync request.
|
||||
sreq = nil
|
||||
|
||||
@@ -8764,7 +8820,7 @@ func (mset *stream) runCatchup(sendSubject string, sreq *streamSyncRequest) {
|
||||
done = maxOutMsgs-atomic.LoadInt32(&outm) > minBatchWait
|
||||
if !done {
|
||||
// Wait for a small bit.
|
||||
time.Sleep(50 * time.Millisecond)
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
} else {
|
||||
// GC friendly.
|
||||
mw.Stop()
|
||||
@@ -8853,7 +8909,9 @@ func (mset *stream) runCatchup(sendSubject string, sreq *streamSyncRequest) {
|
||||
mset.account(), mset.name(), seq, state)
|
||||
// Try our best to redo our invalidated snapshot as well.
|
||||
if n := mset.raftNode(); n != nil {
|
||||
n.InstallSnapshot(mset.stateSnapshot())
|
||||
if snap := mset.stateSnapshot(); snap != nil {
|
||||
n.InstallSnapshot(snap)
|
||||
}
|
||||
}
|
||||
// If we allow gap markers check if we have one pending.
|
||||
if drOk && dr.First > 0 {
|
||||
|
||||
+6
@@ -584,6 +584,9 @@ func (s *Server) clearObserverState(remote *leafNodeCfg) {
|
||||
return
|
||||
}
|
||||
|
||||
acc.jscmMu.Lock()
|
||||
defer acc.jscmMu.Unlock()
|
||||
|
||||
// Walk all streams looking for any clustered stream, skip otherwise.
|
||||
for _, mset := range acc.streams() {
|
||||
node := mset.raftNode()
|
||||
@@ -619,6 +622,9 @@ func (s *Server) checkJetStreamMigrate(remote *leafNodeCfg) {
|
||||
return
|
||||
}
|
||||
|
||||
acc.jscmMu.Lock()
|
||||
defer acc.jscmMu.Unlock()
|
||||
|
||||
// Walk all streams looking for any clustered stream, skip otherwise.
|
||||
// If we are the leader force stepdown.
|
||||
for _, mset := range acc.streams() {
|
||||
|
||||
+110
-49
@@ -261,7 +261,7 @@ func (ms *memStore) SkipMsg() uint64 {
|
||||
ms.state.LastSeq = seq
|
||||
ms.state.LastTime = now
|
||||
if ms.state.Msgs == 0 {
|
||||
ms.state.FirstSeq = seq
|
||||
ms.state.FirstSeq = seq + 1
|
||||
ms.state.FirstTime = now
|
||||
} else {
|
||||
ms.dmap.Insert(seq)
|
||||
@@ -389,9 +389,9 @@ func (ms *memStore) filteredStateLocked(sseq uint64, filter string, lastPerSubje
|
||||
}
|
||||
}
|
||||
|
||||
tsa := [32]string{}
|
||||
fsa := [32]string{}
|
||||
fts := tokenizeSubjectIntoSlice(fsa[:0], filter)
|
||||
_tsa, _fsa := [32]string{}, [32]string{}
|
||||
tsa, fsa := _tsa[:0], _fsa[:0]
|
||||
fsa = tokenizeSubjectIntoSlice(fsa[:0], filter)
|
||||
wc := subjectHasWildcard(filter)
|
||||
|
||||
// 1. See if we match any subs from fss.
|
||||
@@ -405,8 +405,8 @@ func (ms *memStore) filteredStateLocked(sseq uint64, filter string, lastPerSubje
|
||||
if !wc {
|
||||
return subj == filter
|
||||
}
|
||||
tts := tokenizeSubjectIntoSlice(tsa[:0], subj)
|
||||
return isSubsetMatchTokenized(tts, fts)
|
||||
tsa = tokenizeSubjectIntoSlice(tsa[:0], subj)
|
||||
return isSubsetMatchTokenized(tsa, fsa)
|
||||
}
|
||||
|
||||
update := func(fss *SimpleState) {
|
||||
@@ -426,9 +426,8 @@ func (ms *memStore) filteredStateLocked(sseq uint64, filter string, lastPerSubje
|
||||
var havePartial bool
|
||||
// We will track start and end sequences as we go.
|
||||
ms.fss.Match(stringToBytes(filter), func(subj []byte, fss *SimpleState) {
|
||||
subjs := bytesToString(subj)
|
||||
if fss.firstNeedsUpdate {
|
||||
ms.recalculateFirstForSubj(subjs, fss.First, fss)
|
||||
ms.recalculateFirstForSubj(bytesToString(subj), fss.First, fss)
|
||||
}
|
||||
if sseq <= fss.First {
|
||||
update(fss)
|
||||
@@ -465,14 +464,28 @@ func (ms *memStore) filteredStateLocked(sseq uint64, filter string, lastPerSubje
|
||||
}
|
||||
if toScan < toExclude {
|
||||
ss.Msgs, ss.First = 0, 0
|
||||
for seq := first; seq <= last; seq++ {
|
||||
if sm, ok := ms.msgs[seq]; ok && !seen[sm.subj] && isMatch(sm.subj) {
|
||||
ss.Msgs++
|
||||
if ss.First == 0 {
|
||||
ss.First = seq
|
||||
|
||||
update := func(sm *StoreMsg) {
|
||||
ss.Msgs++
|
||||
if ss.First == 0 {
|
||||
ss.First = sm.seq
|
||||
}
|
||||
if seen != nil {
|
||||
seen[sm.subj] = true
|
||||
}
|
||||
}
|
||||
// Check if easier to just scan msgs vs the sequence range.
|
||||
// This can happen with lots of interior deletes.
|
||||
if last-first > uint64(len(ms.msgs)) {
|
||||
for _, sm := range ms.msgs {
|
||||
if sm.seq >= first && sm.seq <= last && !seen[sm.subj] && isMatch(sm.subj) {
|
||||
update(sm)
|
||||
}
|
||||
if seen != nil {
|
||||
seen[sm.subj] = true
|
||||
}
|
||||
} else {
|
||||
for seq := first; seq <= last; seq++ {
|
||||
if sm, ok := ms.msgs[seq]; ok && !seen[sm.subj] && isMatch(sm.subj) {
|
||||
update(sm)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -482,17 +495,29 @@ func (ms *memStore) filteredStateLocked(sseq uint64, filter string, lastPerSubje
|
||||
var adjust uint64
|
||||
var tss *SimpleState
|
||||
|
||||
for seq := ms.state.FirstSeq; seq < first; seq++ {
|
||||
if sm, ok := ms.msgs[seq]; ok && !seen[sm.subj] && isMatch(sm.subj) {
|
||||
if lastPerSubject {
|
||||
tss, _ = ms.fss.Find(stringToBytes(sm.subj))
|
||||
update := func(sm *StoreMsg) {
|
||||
if lastPerSubject {
|
||||
tss, _ = ms.fss.Find(stringToBytes(sm.subj))
|
||||
}
|
||||
// If we are last per subject, make sure to only adjust if all messages are before our first.
|
||||
if tss == nil || tss.Last < first {
|
||||
adjust++
|
||||
}
|
||||
if seen != nil {
|
||||
seen[sm.subj] = true
|
||||
}
|
||||
}
|
||||
// Check if easier to just scan msgs vs the sequence range.
|
||||
if first-ms.state.FirstSeq > uint64(len(ms.msgs)) {
|
||||
for _, sm := range ms.msgs {
|
||||
if sm.seq < first && !seen[sm.subj] && isMatch(sm.subj) {
|
||||
update(sm)
|
||||
}
|
||||
// If we are last per subject, make sure to only adjust if all messages are before our first.
|
||||
if tss == nil || tss.Last < first {
|
||||
adjust++
|
||||
}
|
||||
if seen != nil {
|
||||
seen[sm.subj] = true
|
||||
}
|
||||
} else {
|
||||
for seq := ms.state.FirstSeq; seq < first; seq++ {
|
||||
if sm, ok := ms.msgs[seq]; ok && !seen[sm.subj] && isMatch(sm.subj) {
|
||||
update(sm)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -507,10 +532,27 @@ func (ms *memStore) filteredStateLocked(sseq uint64, filter string, lastPerSubje
|
||||
}
|
||||
ss.Msgs -= adjust
|
||||
if needScanFirst {
|
||||
for seq := first; seq < last; seq++ {
|
||||
if sm, ok := ms.msgs[seq]; ok && isMatch(sm.subj) {
|
||||
ss.First = seq
|
||||
break
|
||||
// Check if easier to just scan msgs vs the sequence range.
|
||||
// Since we will need to scan all of the msgs vs below where we break on the first match,
|
||||
// we will only do so if a few orders of magnitude lower.
|
||||
if last-first > 100*uint64(len(ms.msgs)) {
|
||||
low := ms.state.LastSeq
|
||||
for _, sm := range ms.msgs {
|
||||
if sm.seq >= first && sm.seq < last && isMatch(sm.subj) {
|
||||
if sm.seq < low {
|
||||
low = sm.seq
|
||||
}
|
||||
}
|
||||
}
|
||||
if low < ms.state.LastSeq {
|
||||
ss.First = low
|
||||
}
|
||||
} else {
|
||||
for seq := first; seq < last; seq++ {
|
||||
if sm, ok := ms.msgs[seq]; ok && isMatch(sm.subj) {
|
||||
ss.First = seq
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -559,9 +601,9 @@ func (ms *memStore) SubjectsTotals(filterSubject string) map[string]uint64 {
|
||||
return nil
|
||||
}
|
||||
|
||||
tsa := [32]string{}
|
||||
fsa := [32]string{}
|
||||
fts := tokenizeSubjectIntoSlice(fsa[:0], filterSubject)
|
||||
_tsa, _fsa := [32]string{}, [32]string{}
|
||||
tsa, fsa := _tsa[:0], _fsa[:0]
|
||||
fsa = tokenizeSubjectIntoSlice(fsa[:0], filterSubject)
|
||||
isAll := filterSubject == _EMPTY_ || filterSubject == fwcs
|
||||
|
||||
fst := make(map[string]uint64)
|
||||
@@ -570,7 +612,7 @@ func (ms *memStore) SubjectsTotals(filterSubject string) map[string]uint64 {
|
||||
if isAll {
|
||||
fst[subjs] = ss.Msgs
|
||||
} else {
|
||||
if tts := tokenizeSubjectIntoSlice(tsa[:0], subjs); isSubsetMatchTokenized(tts, fts) {
|
||||
if tsa = tokenizeSubjectIntoSlice(tsa[:0], subjs); isSubsetMatchTokenized(tsa, fsa) {
|
||||
fst[subjs] = ss.Msgs
|
||||
}
|
||||
}
|
||||
@@ -1176,7 +1218,11 @@ func (ms *memStore) removeSeqPerSubject(subj string, seq uint64) {
|
||||
// Will recalculate the first sequence for this subject in this block.
|
||||
// Lock should be held.
|
||||
func (ms *memStore) recalculateFirstForSubj(subj string, startSeq uint64, ss *SimpleState) {
|
||||
for tseq := startSeq + 1; tseq <= ss.Last; tseq++ {
|
||||
tseq := startSeq + 1
|
||||
if tseq < ms.state.FirstSeq {
|
||||
tseq = ms.state.FirstSeq
|
||||
}
|
||||
for ; tseq <= ss.Last; tseq++ {
|
||||
if sm := ms.msgs[tseq]; sm != nil && sm.subj == subj {
|
||||
ss.First = tseq
|
||||
ss.firstNeedsUpdate = false
|
||||
@@ -1509,7 +1555,8 @@ func (o *consumerMemStore) UpdateDelivered(dseq, sseq, dc uint64, ts int64) erro
|
||||
// Check for an update to a message already delivered.
|
||||
if sseq <= o.state.Delivered.Stream {
|
||||
if p = o.state.Pending[sseq]; p != nil {
|
||||
p.Sequence, p.Timestamp = dseq, ts
|
||||
// Do not update p.Sequence, that should be the original delivery sequence.
|
||||
p.Timestamp = ts
|
||||
}
|
||||
} else {
|
||||
// Add to pending.
|
||||
@@ -1558,23 +1605,38 @@ func (o *consumerMemStore) UpdateAcks(dseq, sseq uint64) error {
|
||||
if o.cfg.AckPolicy == AckNone {
|
||||
return ErrNoAckPolicy
|
||||
}
|
||||
if len(o.state.Pending) == 0 || o.state.Pending[sseq] == nil {
|
||||
return ErrStoreMsgNotFound
|
||||
}
|
||||
|
||||
// On restarts the old leader may get a replay from the raft logs that are old.
|
||||
if dseq <= o.state.AckFloor.Consumer {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Match leader logic on checking if ack is ahead of delivered.
|
||||
// This could happen on a cooperative takeover with high speed deliveries.
|
||||
if sseq > o.state.Delivered.Stream {
|
||||
o.state.Delivered.Stream = sseq + 1
|
||||
}
|
||||
|
||||
if len(o.state.Pending) == 0 || o.state.Pending[sseq] == nil {
|
||||
delete(o.state.Redelivered, sseq)
|
||||
return ErrStoreMsgNotFound
|
||||
}
|
||||
|
||||
// Check for AckAll here.
|
||||
if o.cfg.AckPolicy == AckAll {
|
||||
sgap := sseq - o.state.AckFloor.Stream
|
||||
o.state.AckFloor.Consumer = dseq
|
||||
o.state.AckFloor.Stream = sseq
|
||||
for seq := sseq; seq > sseq-sgap; seq-- {
|
||||
delete(o.state.Pending, seq)
|
||||
if len(o.state.Redelivered) > 0 {
|
||||
if sgap > uint64(len(o.state.Pending)) {
|
||||
for seq := range o.state.Pending {
|
||||
if seq <= sseq {
|
||||
delete(o.state.Pending, seq)
|
||||
delete(o.state.Redelivered, seq)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for seq := sseq; seq > sseq-sgap && len(o.state.Pending) > 0; seq-- {
|
||||
delete(o.state.Pending, seq)
|
||||
delete(o.state.Redelivered, seq)
|
||||
}
|
||||
}
|
||||
@@ -1586,23 +1648,20 @@ func (o *consumerMemStore) UpdateAcks(dseq, sseq uint64) error {
|
||||
// First delete from our pending state.
|
||||
if p, ok := o.state.Pending[sseq]; ok {
|
||||
delete(o.state.Pending, sseq)
|
||||
dseq = p.Sequence // Use the original.
|
||||
}
|
||||
// Now remove from redelivered.
|
||||
if len(o.state.Redelivered) > 0 {
|
||||
delete(o.state.Redelivered, sseq)
|
||||
if dseq > p.Sequence && p.Sequence > 0 {
|
||||
dseq = p.Sequence // Use the original.
|
||||
}
|
||||
}
|
||||
|
||||
if len(o.state.Pending) == 0 {
|
||||
o.state.AckFloor.Consumer = o.state.Delivered.Consumer
|
||||
o.state.AckFloor.Stream = o.state.Delivered.Stream
|
||||
} else if dseq == o.state.AckFloor.Consumer+1 {
|
||||
first := o.state.AckFloor.Consumer == 0
|
||||
o.state.AckFloor.Consumer = dseq
|
||||
o.state.AckFloor.Stream = sseq
|
||||
|
||||
if !first && o.state.Delivered.Consumer > dseq {
|
||||
for ss := sseq + 1; ss < o.state.Delivered.Stream; ss++ {
|
||||
if o.state.Delivered.Consumer > dseq {
|
||||
for ss := sseq + 1; ss <= o.state.Delivered.Stream; ss++ {
|
||||
if p, ok := o.state.Pending[ss]; ok {
|
||||
if p.Sequence > 0 {
|
||||
o.state.AckFloor.Consumer = p.Sequence - 1
|
||||
@@ -1613,6 +1672,8 @@ func (o *consumerMemStore) UpdateAcks(dseq, sseq uint64) error {
|
||||
}
|
||||
}
|
||||
}
|
||||
// We do these regardless.
|
||||
delete(o.state.Redelivered, sseq)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
+156
@@ -1387,6 +1387,8 @@ func (s *Server) HandleRoot(w http.ResponseWriter, r *http.Request) {
|
||||
var srcUrl string
|
||||
if gitCommit == _EMPTY_ {
|
||||
srcUrl = "https://github.com/nats-io/nats-server"
|
||||
} else if serverVersion != _EMPTY_ {
|
||||
srcUrl = fmt.Sprintf("https://github.com/nats-io/nats-server/tree/%s", serverVersion)
|
||||
} else {
|
||||
srcUrl = fmt.Sprintf("https://github.com/nats-io/nats-server/tree/%s", gitCommit)
|
||||
}
|
||||
@@ -1421,6 +1423,7 @@ func (s *Server) HandleRoot(w http.ResponseWriter, r *http.Request) {
|
||||
<a href=.%s>Routes</a>
|
||||
<a href=.%s>LeafNodes</a>
|
||||
<a href=.%s>Gateways</a>
|
||||
<a href=.%s>Raft Groups</a>
|
||||
<a href=.%s class=last>Health Probe</a>
|
||||
<a href=https://docs.nats.io/running-a-nats-service/nats_admin/monitoring class="help">Help</a>
|
||||
</body>
|
||||
@@ -1436,6 +1439,7 @@ func (s *Server) HandleRoot(w http.ResponseWriter, r *http.Request) {
|
||||
s.basePath(RoutezPath),
|
||||
s.basePath(LeafzPath),
|
||||
s.basePath(GatewayzPath),
|
||||
s.basePath(RaftzPath),
|
||||
s.basePath(HealthzPath),
|
||||
)
|
||||
}
|
||||
@@ -3490,6 +3494,23 @@ func (s *Server) healthz(opts *HealthzOptions) *HealthStatus {
|
||||
return health
|
||||
}
|
||||
|
||||
// Are we still recovering meta layer?
|
||||
if js.isMetaRecovering() {
|
||||
if !details {
|
||||
health.Status = na
|
||||
health.Error = "JetStream is still recovering meta layer"
|
||||
|
||||
} else {
|
||||
health.Errors = []HealthzError{
|
||||
{
|
||||
Type: HealthzErrorJetStream,
|
||||
Error: "JetStream is still recovering meta layer",
|
||||
},
|
||||
}
|
||||
}
|
||||
return health
|
||||
}
|
||||
|
||||
// Range across all accounts, the streams assigned to them, and the consumers.
|
||||
// If they are assigned to this server check their status.
|
||||
ourID := meta.ID()
|
||||
@@ -3724,3 +3745,138 @@ func (s *Server) profilez(opts *ProfilezOptions) *ProfilezStatus {
|
||||
Profile: buffer.Bytes(),
|
||||
}
|
||||
}
|
||||
|
||||
type RaftzGroup struct {
|
||||
ID string `json:"id"`
|
||||
State string `json:"state"`
|
||||
Size int `json:"size"`
|
||||
QuorumNeeded int `json:"quorum_needed"`
|
||||
Observer bool `json:"observer,omitempty"`
|
||||
Paused bool `json:"paused,omitempty"`
|
||||
Committed uint64 `json:"committed"`
|
||||
Applied uint64 `json:"applied"`
|
||||
CatchingUp bool `json:"catching_up,omitempty"`
|
||||
Leader string `json:"leader,omitempty"`
|
||||
EverHadLeader bool `json:"ever_had_leader"`
|
||||
Term uint64 `json:"term"`
|
||||
Vote string `json:"voted_for,omitempty"`
|
||||
PTerm uint64 `json:"pterm"`
|
||||
PIndex uint64 `json:"pindex"`
|
||||
IPQPropLen int `json:"ipq_proposal_len"`
|
||||
IPQEntryLen int `json:"ipq_entry_len"`
|
||||
IPQRespLen int `json:"ipq_resp_len"`
|
||||
IPQApplyLen int `json:"ipq_apply_len"`
|
||||
WAL StreamState `json:"wal"`
|
||||
WALError error `json:"wal_error,omitempty"`
|
||||
Peers map[string]RaftzGroupPeer `json:"peers"`
|
||||
}
|
||||
|
||||
type RaftzGroupPeer struct {
|
||||
Name string `json:"name"`
|
||||
Known bool `json:"known"`
|
||||
LastReplicatedIndex uint64 `json:"last_replicated_index,omitempty"`
|
||||
LastSeen string `json:"last_seen,omitempty"`
|
||||
}
|
||||
|
||||
func (s *Server) HandleRaftz(w http.ResponseWriter, r *http.Request) {
|
||||
if s.raftNodes == nil {
|
||||
w.WriteHeader(404)
|
||||
w.Write([]byte("No Raft nodes registered"))
|
||||
return
|
||||
}
|
||||
|
||||
gfilter := r.URL.Query().Get("group")
|
||||
afilter := r.URL.Query().Get("acc")
|
||||
if afilter == "" {
|
||||
afilter = s.SystemAccount().Name
|
||||
}
|
||||
|
||||
groups := map[string]RaftNode{}
|
||||
infos := map[string]map[string]RaftzGroup{} // account -> group ID
|
||||
|
||||
s.rnMu.RLock()
|
||||
if gfilter != _EMPTY_ {
|
||||
if rg, ok := s.raftNodes[gfilter]; ok && rg != nil {
|
||||
if n, ok := rg.(*raft); ok {
|
||||
if n.accName == afilter {
|
||||
groups[gfilter] = rg
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for name, rg := range s.raftNodes {
|
||||
if rg == nil {
|
||||
continue
|
||||
}
|
||||
if n, ok := rg.(*raft); ok {
|
||||
if n.accName != afilter {
|
||||
continue
|
||||
}
|
||||
groups[name] = rg
|
||||
}
|
||||
}
|
||||
}
|
||||
s.rnMu.RUnlock()
|
||||
|
||||
if len(groups) == 0 {
|
||||
w.WriteHeader(404)
|
||||
w.Write([]byte("No Raft nodes found, does the specified account/group exist?"))
|
||||
return
|
||||
}
|
||||
|
||||
for name, rg := range groups {
|
||||
n, ok := rg.(*raft)
|
||||
if n == nil || !ok {
|
||||
continue
|
||||
}
|
||||
if _, ok := infos[n.accName]; !ok {
|
||||
infos[n.accName] = map[string]RaftzGroup{}
|
||||
}
|
||||
// Only take the lock once, using the public RaftNode functions would
|
||||
// cause us to take and release the locks over and over again.
|
||||
n.RLock()
|
||||
info := RaftzGroup{
|
||||
ID: n.id,
|
||||
State: RaftState(n.state.Load()).String(),
|
||||
Size: n.csz,
|
||||
QuorumNeeded: n.qn,
|
||||
Observer: n.observer,
|
||||
Paused: n.paused,
|
||||
Committed: n.commit,
|
||||
Applied: n.applied,
|
||||
CatchingUp: n.catchup != nil,
|
||||
Leader: n.leader,
|
||||
EverHadLeader: n.pleader,
|
||||
Term: n.term,
|
||||
Vote: n.vote,
|
||||
PTerm: n.pterm,
|
||||
PIndex: n.pindex,
|
||||
IPQPropLen: n.prop.len(),
|
||||
IPQEntryLen: n.entry.len(),
|
||||
IPQRespLen: n.resp.len(),
|
||||
IPQApplyLen: n.apply.len(),
|
||||
WALError: n.werr,
|
||||
Peers: map[string]RaftzGroupPeer{},
|
||||
}
|
||||
n.wal.FastState(&info.WAL)
|
||||
for id, p := range n.peers {
|
||||
if id == n.id {
|
||||
continue
|
||||
}
|
||||
peer := RaftzGroupPeer{
|
||||
Name: s.serverNameForNode(id),
|
||||
Known: p.kp,
|
||||
LastReplicatedIndex: p.li,
|
||||
}
|
||||
if p.ts > 0 {
|
||||
peer.LastSeen = time.Since(time.Unix(0, p.ts)).String()
|
||||
}
|
||||
info.Peers[id] = peer
|
||||
}
|
||||
n.RUnlock()
|
||||
infos[n.accName][name] = info
|
||||
}
|
||||
|
||||
b, _ := json.MarshalIndent(infos, "", " ")
|
||||
ResponseHandler(w, r, b)
|
||||
}
|
||||
|
||||
+29
-10
@@ -974,7 +974,7 @@ func (s *Server) mqttHandleClosedClient(c *client) {
|
||||
|
||||
// This needs to be done outside of any lock.
|
||||
if doClean {
|
||||
if err := sess.clear(); err != nil {
|
||||
if err := sess.clear(true); err != nil {
|
||||
c.Errorf(err.Error())
|
||||
}
|
||||
}
|
||||
@@ -1449,7 +1449,7 @@ func (s *Server) mqttCreateAccountSessionManager(acc *Account, quitCh chan struc
|
||||
// Opportunistically delete the old (legacy) consumer, from v2.10.10 and
|
||||
// before. Ignore any errors that might arise.
|
||||
rmLegacyDurName := mqttRetainedMsgsStreamName + "_" + jsa.id
|
||||
jsa.deleteConsumer(mqttRetainedMsgsStreamName, rmLegacyDurName)
|
||||
jsa.deleteConsumer(mqttRetainedMsgsStreamName, rmLegacyDurName, true)
|
||||
|
||||
// Create a new, uniquely names consumer for retained messages for this
|
||||
// server. The prior one will expire eventually.
|
||||
@@ -1672,8 +1672,21 @@ func (jsa *mqttJSA) createDurableConsumer(cfg *CreateConsumerRequest) (*JSApiCon
|
||||
return ccr, ccr.ToError()
|
||||
}
|
||||
|
||||
func (jsa *mqttJSA) deleteConsumer(streamName, consName string) (*JSApiConsumerDeleteResponse, error) {
|
||||
func (jsa *mqttJSA) sendMsg(subj string, msg []byte) {
|
||||
if subj == _EMPTY_ {
|
||||
return
|
||||
}
|
||||
jsa.sendq.push(&mqttJSPubMsg{subj: subj, msg: msg, hdr: -1})
|
||||
}
|
||||
|
||||
// if noWait is specified, does not wait for the JS response, returns nil
|
||||
func (jsa *mqttJSA) deleteConsumer(streamName, consName string, noWait bool) (*JSApiConsumerDeleteResponse, error) {
|
||||
subj := fmt.Sprintf(JSApiConsumerDeleteT, streamName, consName)
|
||||
if noWait {
|
||||
jsa.sendMsg(subj, nil)
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
cdri, err := jsa.newRequest(mqttJSAConsumerDel, subj, 0, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -1950,9 +1963,13 @@ func (as *mqttAccountSessionManager) processRetainedMsg(_ *subscription, c *clie
|
||||
}
|
||||
// If lastSeq is 0 (nothing to recover, or done doing it) and this is
|
||||
// from our own server, ignore.
|
||||
as.mu.RLock()
|
||||
if as.rrmLastSeq == 0 && rm.Origin == as.jsa.id {
|
||||
as.mu.RUnlock()
|
||||
return
|
||||
}
|
||||
as.mu.RUnlock()
|
||||
|
||||
// At this point we either recover from our own server, or process a remote retained message.
|
||||
seq, _, _ := ackReplyInfo(reply)
|
||||
|
||||
@@ -1960,11 +1977,13 @@ func (as *mqttAccountSessionManager) processRetainedMsg(_ *subscription, c *clie
|
||||
as.handleRetainedMsg(rm.Subject, &mqttRetainedMsgRef{sseq: seq}, rm, false)
|
||||
|
||||
// If we were recovering (lastSeq > 0), then check if we are done.
|
||||
as.mu.Lock()
|
||||
if as.rrmLastSeq > 0 && seq >= as.rrmLastSeq {
|
||||
as.rrmLastSeq = 0
|
||||
close(as.rrmDoneCh)
|
||||
as.rrmDoneCh = nil
|
||||
}
|
||||
as.mu.Unlock()
|
||||
}
|
||||
|
||||
func (as *mqttAccountSessionManager) processRetainedMsgDel(_ *subscription, c *client, _ *Account, subject, reply string, rmsg []byte) {
|
||||
@@ -3072,7 +3091,7 @@ func (sess *mqttSession) save() error {
|
||||
//
|
||||
// Runs from the client's readLoop.
|
||||
// Lock not held on entry, but session is in the locked map.
|
||||
func (sess *mqttSession) clear() error {
|
||||
func (sess *mqttSession) clear(noWait bool) error {
|
||||
var durs []string
|
||||
var pubRelDur string
|
||||
|
||||
@@ -3100,19 +3119,19 @@ func (sess *mqttSession) clear() error {
|
||||
sess.mu.Unlock()
|
||||
|
||||
for _, dur := range durs {
|
||||
if _, err := sess.jsa.deleteConsumer(mqttStreamName, dur); isErrorOtherThan(err, JSConsumerNotFoundErr) {
|
||||
if _, err := sess.jsa.deleteConsumer(mqttStreamName, dur, noWait); isErrorOtherThan(err, JSConsumerNotFoundErr) {
|
||||
return fmt.Errorf("unable to delete consumer %q for session %q: %v", dur, sess.id, err)
|
||||
}
|
||||
}
|
||||
if pubRelDur != "" {
|
||||
_, err := sess.jsa.deleteConsumer(mqttOutStreamName, pubRelDur)
|
||||
if pubRelDur != _EMPTY_ {
|
||||
_, err := sess.jsa.deleteConsumer(mqttOutStreamName, pubRelDur, noWait)
|
||||
if isErrorOtherThan(err, JSConsumerNotFoundErr) {
|
||||
return fmt.Errorf("unable to delete consumer %q for session %q: %v", pubRelDur, sess.id, err)
|
||||
}
|
||||
}
|
||||
|
||||
if seq > 0 {
|
||||
err := sess.jsa.deleteMsg(mqttSessStreamName, seq, true)
|
||||
err := sess.jsa.deleteMsg(mqttSessStreamName, seq, !noWait)
|
||||
// Ignore the various errors indicating that the message (or sequence)
|
||||
// is already deleted, can happen in a cluster.
|
||||
if isErrorOtherThan(err, JSSequenceNotFoundErrF) {
|
||||
@@ -3378,7 +3397,7 @@ func (sess *mqttSession) untrackPubRel(pi uint16) (jsAckSubject string) {
|
||||
func (sess *mqttSession) deleteConsumer(cc *ConsumerConfig) {
|
||||
sess.mu.Lock()
|
||||
sess.tmaxack -= cc.MaxAckPending
|
||||
sess.jsa.sendq.push(&mqttJSPubMsg{subj: sess.jsa.prefixDomain(fmt.Sprintf(JSApiConsumerDeleteT, mqttStreamName, cc.Durable))})
|
||||
sess.jsa.deleteConsumer(mqttStreamName, cc.Durable, true)
|
||||
sess.mu.Unlock()
|
||||
}
|
||||
|
||||
@@ -3717,7 +3736,7 @@ CHECK:
|
||||
// This Session lasts as long as the Network Connection. State data
|
||||
// associated with this Session MUST NOT be reused in any subsequent
|
||||
// Session.
|
||||
if err := es.clear(); err != nil {
|
||||
if err := es.clear(false); err != nil {
|
||||
asm.removeSession(es, true)
|
||||
return err
|
||||
}
|
||||
|
||||
+125
-68
@@ -38,7 +38,7 @@ import (
|
||||
|
||||
type RaftNode interface {
|
||||
Propose(entry []byte) error
|
||||
ProposeDirect(entries []*Entry) error
|
||||
ProposeMulti(entries []*Entry) error
|
||||
ForwardProposal(entry []byte) error
|
||||
InstallSnapshot(snap []byte) error
|
||||
SendSnapshot(snap []byte) error
|
||||
@@ -85,6 +85,7 @@ type WAL interface {
|
||||
RemoveMsg(index uint64) (bool, error)
|
||||
Compact(index uint64) (uint64, error)
|
||||
Purge() (uint64, error)
|
||||
PurgeEx(subject string, seq, keep uint64) (uint64, error)
|
||||
Truncate(seq uint64) error
|
||||
State() StreamState
|
||||
FastState(*StreamState)
|
||||
@@ -155,25 +156,27 @@ type raft struct {
|
||||
llqrt time.Time // Last quorum lost time
|
||||
lsut time.Time // Last scale-up time
|
||||
|
||||
term uint64 // The current vote term
|
||||
pterm uint64 // Previous term from the last snapshot
|
||||
pindex uint64 // Previous index from the last snapshot
|
||||
commit uint64 // Sequence number of the most recent commit
|
||||
applied uint64 // Sequence number of the most recently applied commit
|
||||
hcbehind bool // Were we falling behind at the last health check? (see: isCurrent)
|
||||
term uint64 // The current vote term
|
||||
pterm uint64 // Previous term from the last snapshot
|
||||
pindex uint64 // Previous index from the last snapshot
|
||||
commit uint64 // Index of the most recent commit
|
||||
applied uint64 // Index of the most recently applied commit
|
||||
|
||||
leader string // The ID of the leader
|
||||
vote string // Our current vote state
|
||||
lxfer bool // Are we doing a leadership transfer?
|
||||
|
||||
hcbehind bool // Were we falling behind at the last health check? (see: isCurrent)
|
||||
|
||||
s *Server // Reference to top-level server
|
||||
c *client // Internal client for subscriptions
|
||||
js *jetStream // JetStream, if running, to see if we are out of resources
|
||||
|
||||
dflag bool // Debug flag
|
||||
pleader bool // Has the group ever had a leader?
|
||||
observer bool // The node is observing, i.e. not participating in voting
|
||||
extSt extensionState // Extension state
|
||||
dflag bool // Debug flag
|
||||
pleader bool // Has the group ever had a leader?
|
||||
observer bool // The node is observing, i.e. not participating in voting
|
||||
|
||||
extSt extensionState // Extension state
|
||||
|
||||
psubj string // Proposals subject
|
||||
rpsubj string // Remove peers subject
|
||||
@@ -232,16 +235,18 @@ const (
|
||||
hbIntervalDefault = 1 * time.Second
|
||||
lostQuorumIntervalDefault = hbIntervalDefault * 10 // 10 seconds
|
||||
lostQuorumCheckIntervalDefault = hbIntervalDefault * 10 // 10 seconds
|
||||
observerModeIntervalDefault = 48 * time.Hour
|
||||
)
|
||||
|
||||
var (
|
||||
minElectionTimeout = minElectionTimeoutDefault
|
||||
maxElectionTimeout = maxElectionTimeoutDefault
|
||||
minCampaignTimeout = minCampaignTimeoutDefault
|
||||
maxCampaignTimeout = maxCampaignTimeoutDefault
|
||||
hbInterval = hbIntervalDefault
|
||||
lostQuorumInterval = lostQuorumIntervalDefault
|
||||
lostQuorumCheck = lostQuorumCheckIntervalDefault
|
||||
minElectionTimeout = minElectionTimeoutDefault
|
||||
maxElectionTimeout = maxElectionTimeoutDefault
|
||||
minCampaignTimeout = minCampaignTimeoutDefault
|
||||
maxCampaignTimeout = maxCampaignTimeoutDefault
|
||||
hbInterval = hbIntervalDefault
|
||||
lostQuorumInterval = lostQuorumIntervalDefault
|
||||
lostQuorumCheck = lostQuorumCheckIntervalDefault
|
||||
observerModeInterval = observerModeIntervalDefault
|
||||
)
|
||||
|
||||
type RaftConfig struct {
|
||||
@@ -270,6 +275,7 @@ var (
|
||||
errLeaderLen = fmt.Errorf("raft: leader should be exactly %d bytes", idLen)
|
||||
errTooManyEntries = errors.New("raft: append entry can contain a max of 64k entries")
|
||||
errBadAppendEntry = errors.New("raft: append entry corrupt")
|
||||
errNoInternalClient = errors.New("raft: no internal client")
|
||||
)
|
||||
|
||||
// This will bootstrap a raftNode by writing its config into the store directory.
|
||||
@@ -387,7 +393,7 @@ func (s *Server) startRaftNode(accName string, cfg *RaftConfig, labels pprofLabe
|
||||
apply: newIPQueue[*CommittedEntry](s, qpfx+"committedEntry"),
|
||||
stepdown: newIPQueue[string](s, qpfx+"stepdown"),
|
||||
accName: accName,
|
||||
leadc: make(chan bool, 1),
|
||||
leadc: make(chan bool, 32),
|
||||
observer: cfg.Observer,
|
||||
extSt: ps.domainExt,
|
||||
}
|
||||
@@ -414,7 +420,8 @@ func (s *Server) startRaftNode(accName string, cfg *RaftConfig, labels pprofLabe
|
||||
return nil, fmt.Errorf("could not create snapshots directory - %v", err)
|
||||
}
|
||||
|
||||
// Can't recover snapshots if memory based.
|
||||
// Can't recover snapshots if memory based since wal will be reset.
|
||||
// We will inherit from the current leader.
|
||||
if _, ok := n.wal.(*memStore); ok {
|
||||
os.Remove(filepath.Join(n.sd, snapshotsDir, "*"))
|
||||
} else {
|
||||
@@ -692,36 +699,34 @@ func (n *raft) Propose(data []byte) error {
|
||||
n.debug("Proposal ignored, not leader (state: %v)", state)
|
||||
return errNotLeader
|
||||
}
|
||||
n.RLock()
|
||||
n.Lock()
|
||||
defer n.Unlock()
|
||||
|
||||
// Error if we had a previous write error.
|
||||
if werr := n.werr; werr != nil {
|
||||
n.RUnlock()
|
||||
return werr
|
||||
}
|
||||
prop := n.prop
|
||||
n.RUnlock()
|
||||
|
||||
prop.push(newEntry(EntryNormal, data))
|
||||
n.prop.push(newEntry(EntryNormal, data))
|
||||
return nil
|
||||
}
|
||||
|
||||
// ProposeDirect will propose entries directly by skipping the Raft state
|
||||
// machine and sending them straight to the wire instead.
|
||||
// ProposeDirect will propose multiple entries at once.
|
||||
// This should only be called on the leader.
|
||||
func (n *raft) ProposeDirect(entries []*Entry) error {
|
||||
func (n *raft) ProposeMulti(entries []*Entry) error {
|
||||
if state := n.State(); state != Leader {
|
||||
n.debug("Direct proposal ignored, not leader (state: %v)", state)
|
||||
return errNotLeader
|
||||
}
|
||||
n.RLock()
|
||||
n.Lock()
|
||||
defer n.Unlock()
|
||||
|
||||
// Error if we had a previous write error.
|
||||
if werr := n.werr; werr != nil {
|
||||
n.RUnlock()
|
||||
return werr
|
||||
}
|
||||
n.RUnlock()
|
||||
|
||||
n.sendAppendEntry(entries)
|
||||
for _, e := range entries {
|
||||
n.prop.push(e)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -871,7 +876,7 @@ func (n *raft) PauseApply() error {
|
||||
n.hcommit = n.commit
|
||||
// Also prevent us from trying to become a leader while paused and catching up.
|
||||
n.pobserver, n.observer = n.observer, true
|
||||
n.resetElect(48 * time.Hour)
|
||||
n.resetElect(observerModeInterval)
|
||||
|
||||
return nil
|
||||
}
|
||||
@@ -1012,25 +1017,20 @@ func (n *raft) InstallSnapshot(data []byte) error {
|
||||
}
|
||||
|
||||
n.Lock()
|
||||
defer n.Unlock()
|
||||
|
||||
// If a write error has occurred already then stop here.
|
||||
if werr := n.werr; werr != nil {
|
||||
n.Unlock()
|
||||
return werr
|
||||
}
|
||||
|
||||
// Check that a catchup isn't already taking place. If it is then we won't
|
||||
// allow installing snapshots until it is done.
|
||||
if len(n.progress) > 0 {
|
||||
n.Unlock()
|
||||
return errCatchupsRunning
|
||||
}
|
||||
|
||||
var state StreamState
|
||||
n.wal.FastState(&state)
|
||||
|
||||
if n.applied == 0 {
|
||||
n.Unlock()
|
||||
return errNoSnapAvailable
|
||||
}
|
||||
|
||||
@@ -1055,6 +1055,12 @@ func (n *raft) InstallSnapshot(data []byte) error {
|
||||
data: data,
|
||||
}
|
||||
|
||||
return n.installSnapshot(snap)
|
||||
}
|
||||
|
||||
// Install the snapshot.
|
||||
// Lock should be held.
|
||||
func (n *raft) installSnapshot(snap *snapshot) error {
|
||||
snapDir := filepath.Join(n.sd, snapshotsDir)
|
||||
sn := fmt.Sprintf(snapFileT, snap.lastTerm, snap.lastIndex)
|
||||
sfile := filepath.Join(snapDir, sn)
|
||||
@@ -1064,29 +1070,21 @@ func (n *raft) InstallSnapshot(data []byte) error {
|
||||
dios <- struct{}{}
|
||||
|
||||
if err != nil {
|
||||
n.Unlock()
|
||||
// We could set write err here, but if this is a temporary situation, too many open files etc.
|
||||
// we want to retry and snapshots are not fatal.
|
||||
return err
|
||||
}
|
||||
|
||||
// Delete our previous snapshot file if it exists.
|
||||
if n.snapfile != _EMPTY_ && n.snapfile != sfile {
|
||||
os.Remove(n.snapfile)
|
||||
}
|
||||
// Remember our latest snapshot file.
|
||||
n.snapfile = sfile
|
||||
if _, err := n.wal.Compact(snap.lastIndex + 1); err != nil {
|
||||
n.setWriteErrLocked(err)
|
||||
n.Unlock()
|
||||
return err
|
||||
}
|
||||
n.Unlock()
|
||||
|
||||
psnaps, _ := os.ReadDir(snapDir)
|
||||
// Remove any old snapshots.
|
||||
for _, fi := range psnaps {
|
||||
pn := fi.Name()
|
||||
if pn != sn {
|
||||
os.Remove(filepath.Join(snapDir, pn))
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
@@ -1628,6 +1626,13 @@ func (n *raft) shutdown(shouldDelete bool) {
|
||||
// allowing shutdown() to be called again. If that happens then the below
|
||||
// close(n.quit) will panic from trying to close an already-closed channel.
|
||||
if n.state.Swap(int32(Closed)) == int32(Closed) {
|
||||
// If we get called again with shouldDelete, in case we were called first with Stop() cleanup
|
||||
if shouldDelete {
|
||||
if wal := n.wal; wal != nil {
|
||||
wal.Delete()
|
||||
}
|
||||
os.RemoveAll(n.sd)
|
||||
}
|
||||
n.Unlock()
|
||||
return
|
||||
}
|
||||
@@ -1644,17 +1649,22 @@ func (n *raft) shutdown(shouldDelete bool) {
|
||||
n.unsubscribe(sub)
|
||||
}
|
||||
c.closeConnection(InternalClient)
|
||||
n.c = nil
|
||||
}
|
||||
|
||||
s, g, wal := n.s, n.group, n.wal
|
||||
|
||||
// Unregistering ipQueues do not prevent them from push/pop
|
||||
// just will remove them from the central monitoring map
|
||||
queues := []interface {
|
||||
unregister()
|
||||
drain()
|
||||
}{n.reqs, n.votes, n.prop, n.entry, n.resp, n.apply, n.stepdown}
|
||||
for _, q := range queues {
|
||||
q.drain()
|
||||
q.unregister()
|
||||
}
|
||||
sd := n.sd
|
||||
n.Unlock()
|
||||
|
||||
s.unregisterRaftNode(g)
|
||||
@@ -1669,7 +1679,7 @@ func (n *raft) shutdown(shouldDelete bool) {
|
||||
|
||||
if shouldDelete {
|
||||
// Delete all our peer state and vote state and any snapshots.
|
||||
os.RemoveAll(n.sd)
|
||||
os.RemoveAll(sd)
|
||||
n.debug("Deleted")
|
||||
} else {
|
||||
n.debug("Shutdown")
|
||||
@@ -1724,12 +1734,15 @@ func (n *raft) newInbox() string {
|
||||
// Our internal subscribe.
|
||||
// Lock should be held.
|
||||
func (n *raft) subscribe(subject string, cb msgHandler) (*subscription, error) {
|
||||
if n.c == nil {
|
||||
return nil, errNoInternalClient
|
||||
}
|
||||
return n.s.systemSubscribe(subject, _EMPTY_, false, n.c, cb)
|
||||
}
|
||||
|
||||
// Lock should be held.
|
||||
func (n *raft) unsubscribe(sub *subscription) {
|
||||
if sub != nil {
|
||||
if n.c != nil && sub != nil {
|
||||
n.c.processUnsub(sub.sid)
|
||||
}
|
||||
}
|
||||
@@ -1888,8 +1901,24 @@ func (n *raft) SetObserver(isObserver bool) {
|
||||
func (n *raft) setObserver(isObserver bool, extSt extensionState) {
|
||||
n.Lock()
|
||||
defer n.Unlock()
|
||||
|
||||
if n.paused {
|
||||
// Applies are paused so we're already in observer state.
|
||||
// Resuming the applies will set the state back to whatever
|
||||
// is in "pobserver", so update that instead.
|
||||
n.pobserver = isObserver
|
||||
return
|
||||
}
|
||||
|
||||
wasObserver := n.observer
|
||||
n.observer = isObserver
|
||||
n.extSt = extSt
|
||||
|
||||
// If we're leaving observer state then reset the election timer or
|
||||
// we might end up waiting for up to the observerModeInterval.
|
||||
if wasObserver && !isObserver {
|
||||
n.resetElect(randCampaignTimeout())
|
||||
}
|
||||
}
|
||||
|
||||
// processAppendEntries is called by the Raft state machine when there are
|
||||
@@ -1939,7 +1968,7 @@ func (n *raft) runAsFollower() {
|
||||
n.resetElectionTimeoutWithLock()
|
||||
n.debug("Not switching to candidate, no resources")
|
||||
} else if n.IsObserver() {
|
||||
n.resetElectWithLock(48 * time.Hour)
|
||||
n.resetElectWithLock(observerModeInterval)
|
||||
n.debug("Not switching to candidate, observer only")
|
||||
} else if n.isCatchingUp() {
|
||||
n.debug("Not switching to candidate, catching up")
|
||||
@@ -2304,15 +2333,15 @@ func (n *raft) runAsLeader() {
|
||||
return
|
||||
}
|
||||
|
||||
n.RLock()
|
||||
n.Lock()
|
||||
psubj, rpsubj := n.psubj, n.rpsubj
|
||||
n.RUnlock()
|
||||
|
||||
// For forwarded proposals, both normal and remove peer proposals.
|
||||
fsub, err := n.subscribe(psubj, n.handleForwardedProposal)
|
||||
if err != nil {
|
||||
n.warn("Error subscribing to forwarded proposals: %v", err)
|
||||
n.stepdown.push(noLeader)
|
||||
n.Unlock()
|
||||
return
|
||||
}
|
||||
rpsub, err := n.subscribe(rpsubj, n.handleForwardedRemovePeerProposal)
|
||||
@@ -2320,8 +2349,10 @@ func (n *raft) runAsLeader() {
|
||||
n.warn("Error subscribing to forwarded remove peer proposals: %v", err)
|
||||
n.unsubscribe(fsub)
|
||||
n.stepdown.push(noLeader)
|
||||
n.Unlock()
|
||||
return
|
||||
}
|
||||
n.Unlock()
|
||||
|
||||
// Cleanup our subscription when we leave.
|
||||
defer func() {
|
||||
@@ -2450,8 +2481,10 @@ func (n *raft) lostQuorum() bool {
|
||||
}
|
||||
|
||||
func (n *raft) lostQuorumLocked() bool {
|
||||
// Make sure we let any scale up actions settle before deciding.
|
||||
if !n.lsut.IsZero() && time.Since(n.lsut) < lostQuorumInterval {
|
||||
// In order to avoid false positives that can happen in heavily loaded systems
|
||||
// make sure nothing is queued up that we have not processed yet.
|
||||
// Also make sure we let any scale up actions settle before deciding.
|
||||
if n.resp.len() != 0 || (!n.lsut.IsZero() && time.Since(n.lsut) < lostQuorumInterval) {
|
||||
return false
|
||||
}
|
||||
|
||||
@@ -3080,17 +3113,20 @@ func (n *raft) truncateWAL(term, index uint64) {
|
||||
|
||||
if err := n.wal.Truncate(index); err != nil {
|
||||
// If we get an invalid sequence, reset our wal all together.
|
||||
// We will not have holes, so this means we do not have this message stored anymore.
|
||||
if err == ErrInvalidSequence {
|
||||
n.debug("Resetting WAL")
|
||||
n.wal.Truncate(0)
|
||||
index, n.term, n.pterm, n.pindex = 0, 0, 0, 0
|
||||
// If our index is non-zero use PurgeEx to set us to the correct next index.
|
||||
if index > 0 {
|
||||
n.wal.PurgeEx(fwcs, index+1, 0)
|
||||
}
|
||||
} else {
|
||||
n.warn("Error truncating WAL: %v", err)
|
||||
n.setWriteErrLocked(err)
|
||||
return
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Set after we know we have truncated properly.
|
||||
n.term, n.pterm, n.pindex = term, term, index
|
||||
}
|
||||
@@ -3159,15 +3195,17 @@ func (n *raft) processAppendEntry(ae *appendEntry, sub *subscription) {
|
||||
// to a follower of that node instead.
|
||||
if n.State() == Candidate {
|
||||
// Ignore old terms, otherwise we might end up stepping down incorrectly.
|
||||
if ae.term >= n.term {
|
||||
// Needs to be ahead of our pterm (last log index), as an isolated node
|
||||
// could have bumped its vote term up considerably past this point.
|
||||
if ae.term >= n.pterm {
|
||||
// If the append entry term is newer than the current term, erase our
|
||||
// vote.
|
||||
if ae.term > n.term {
|
||||
n.term = ae.term
|
||||
n.vote = noVote
|
||||
n.writeTermVote()
|
||||
}
|
||||
n.debug("Received append entry in candidate state from %q, converting to follower", ae.leader)
|
||||
n.term = ae.term
|
||||
n.writeTermVote()
|
||||
n.stepdown.push(ae.leader)
|
||||
}
|
||||
}
|
||||
@@ -3262,7 +3300,7 @@ func (n *raft) processAppendEntry(ae *appendEntry, sub *subscription) {
|
||||
// If terms mismatched, or we got an error loading, delete that entry and all others past it.
|
||||
// Make sure to cancel any catchups in progress.
|
||||
// Truncate will reset our pterm and pindex. Only do so if we have an entry.
|
||||
n.truncateWAL(ae.pterm, ae.pindex)
|
||||
n.truncateWAL(eae.pterm, eae.pindex)
|
||||
}
|
||||
// Cancel regardless.
|
||||
n.cancelCatchup()
|
||||
@@ -3309,6 +3347,7 @@ func (n *raft) processAppendEntry(ae *appendEntry, sub *subscription) {
|
||||
return
|
||||
}
|
||||
|
||||
// Inherit state from appendEntry with the leader's snapshot.
|
||||
n.pindex = ae.pindex
|
||||
n.pterm = ae.pterm
|
||||
n.commit = ae.pindex
|
||||
@@ -3319,6 +3358,19 @@ func (n *raft) processAppendEntry(ae *appendEntry, sub *subscription) {
|
||||
return
|
||||
}
|
||||
|
||||
snap := &snapshot{
|
||||
lastTerm: n.pterm,
|
||||
lastIndex: n.pindex,
|
||||
peerstate: encodePeerState(&peerState{n.peerNames(), n.csz, n.extSt}),
|
||||
data: ae.entries[0].Data,
|
||||
}
|
||||
// Install the leader's snapshot as our own.
|
||||
if err := n.installSnapshot(snap); err != nil {
|
||||
n.setWriteErrLocked(err)
|
||||
n.Unlock()
|
||||
return
|
||||
}
|
||||
|
||||
// Now send snapshot to upper levels. Only send the snapshot, not the peerstate entry.
|
||||
n.apply.push(newCommittedEntry(n.commit, ae.entries[:1]))
|
||||
n.Unlock()
|
||||
@@ -3735,7 +3787,8 @@ func readPeerState(sd string) (ps *peerState, err error) {
|
||||
}
|
||||
|
||||
const termVoteFile = "tav.idx"
|
||||
const termVoteLen = idLen + 8
|
||||
const termLen = 8 // uint64
|
||||
const termVoteLen = idLen + termLen
|
||||
|
||||
// Writes out our term & vote outside of a specific raft context.
|
||||
func writeTermVote(sd string, wtv []byte) error {
|
||||
@@ -3761,6 +3814,10 @@ func (n *raft) readTermVote() (term uint64, voted string, err error) {
|
||||
if err != nil {
|
||||
return 0, noVote, err
|
||||
}
|
||||
if len(buf) < termLen {
|
||||
// Not enough bytes for the uint64 below, so avoid a panic.
|
||||
return 0, noVote, nil
|
||||
}
|
||||
var le = binary.LittleEndian
|
||||
term = le.Uint64(buf[0:])
|
||||
if len(buf) < termVoteLen {
|
||||
|
||||
+3
-1
@@ -995,9 +995,11 @@ func (s *Server) Reload() error {
|
||||
return s.ReloadOptions(newOpts)
|
||||
}
|
||||
|
||||
// ReloadOptions applies any supported options from the provided Option
|
||||
// ReloadOptions applies any supported options from the provided Options
|
||||
// type. This returns an error if an option which doesn't support
|
||||
// hot-swapping was changed.
|
||||
// The provided Options type should not be re-used afterwards.
|
||||
// Either use Options.Clone() to pass a copy, or make a new one.
|
||||
func (s *Server) ReloadOptions(newOpts *Options) error {
|
||||
s.reloadMu.Lock()
|
||||
defer s.reloadMu.Unlock()
|
||||
|
||||
+22
-3
@@ -600,6 +600,8 @@ func New(opts *Options) *Server {
|
||||
|
||||
// NewServer will setup a new server struct after parsing the options.
|
||||
// Could return an error if options can not be validated.
|
||||
// The provided Options type should not be re-used afterwards.
|
||||
// Either use Options.Clone() to pass a copy, or make a new one.
|
||||
func NewServer(opts *Options) (*Server, error) {
|
||||
setBaselineOptions(opts)
|
||||
|
||||
@@ -1095,11 +1097,11 @@ func (s *Server) configureAccounts(reloading bool) (map[string]struct{}, error)
|
||||
if reloading && acc.Name != globalAccountName {
|
||||
if ai, ok := s.accounts.Load(acc.Name); ok {
|
||||
a = ai.(*Account)
|
||||
a.mu.Lock()
|
||||
// Before updating the account, check if stream imports have changed.
|
||||
if !a.checkStreamImportsEqual(acc) {
|
||||
awcsti[acc.Name] = struct{}{}
|
||||
}
|
||||
a.mu.Lock()
|
||||
// Collect the sids for the service imports since we are going to
|
||||
// replace with new ones.
|
||||
var sids [][]byte
|
||||
@@ -2062,7 +2064,6 @@ func (s *Server) fetchAccount(name string) (*Account, error) {
|
||||
return nil, err
|
||||
}
|
||||
acc := s.buildInternalAccount(accClaims)
|
||||
acc.claimJWT = claimJWT
|
||||
// Due to possible race, if registerAccount() returns a non
|
||||
// nil account, it means the same account was already
|
||||
// registered and we should use this one.
|
||||
@@ -2078,6 +2079,7 @@ func (s *Server) fetchAccount(name string) (*Account, error) {
|
||||
var needImportSubs bool
|
||||
|
||||
acc.mu.Lock()
|
||||
acc.claimJWT = claimJWT
|
||||
if len(acc.imports.services) > 0 {
|
||||
if acc.ic == nil {
|
||||
acc.ic = s.createInternalAccountClient()
|
||||
@@ -2847,6 +2849,7 @@ const (
|
||||
JszPath = "/jsz"
|
||||
HealthzPath = "/healthz"
|
||||
IPQueuesPath = "/ipqueuesz"
|
||||
RaftzPath = "/raftz"
|
||||
)
|
||||
|
||||
func (s *Server) basePath(p string) string {
|
||||
@@ -2961,6 +2964,8 @@ func (s *Server) startMonitoring(secure bool) error {
|
||||
mux.HandleFunc(s.basePath(HealthzPath), s.HandleHealthz)
|
||||
// IPQueuesz
|
||||
mux.HandleFunc(s.basePath(IPQueuesPath), s.HandleIPQueuesz)
|
||||
// Raftz
|
||||
mux.HandleFunc(s.basePath(RaftzPath), s.HandleRaftz)
|
||||
|
||||
// Do not set a WriteTimeout because it could cause cURL/browser
|
||||
// to return empty response or unable to display page if the
|
||||
@@ -4093,6 +4098,16 @@ func (s *Server) isLameDuckMode() bool {
|
||||
return s.ldm
|
||||
}
|
||||
|
||||
// LameDuckShutdown will perform a lame duck shutdown of NATS, whereby
|
||||
// the client listener is closed, existing client connections are
|
||||
// kicked, Raft leaderships are transferred, JetStream is shutdown
|
||||
// and then finally shutdown the the NATS Server itself.
|
||||
// This function blocks and will not return until the NATS Server
|
||||
// has completed the entire shutdown operation.
|
||||
func (s *Server) LameDuckShutdown() {
|
||||
s.lameDuckMode()
|
||||
}
|
||||
|
||||
// This function will close the client listener then close the clients
|
||||
// at some interval to avoid a reconnect storm.
|
||||
// We will also transfer any raft leaders and shutdown JetStream.
|
||||
@@ -4222,6 +4237,7 @@ func (s *Server) lameDuckMode() {
|
||||
}
|
||||
}
|
||||
s.Shutdown()
|
||||
s.WaitForShutdown()
|
||||
}
|
||||
|
||||
// Send an INFO update to routes with the indication that this server is in LDM mode.
|
||||
@@ -4416,8 +4432,11 @@ func (s *Server) DisconnectClientByID(id uint64) error {
|
||||
if client := s.getClient(id); client != nil {
|
||||
client.closeConnection(Kicked)
|
||||
return nil
|
||||
} else if client = s.GetLeafNode(id); client != nil {
|
||||
client.closeConnection(Kicked)
|
||||
return nil
|
||||
}
|
||||
return errors.New("no such client id")
|
||||
return errors.New("no such client or leafnode id")
|
||||
}
|
||||
|
||||
// LDMClientByID sends a Lame Duck Mode info message to a client by connection ID
|
||||
|
||||
+2
@@ -51,6 +51,7 @@ func (s *Server) handleSignals() {
|
||||
switch sig {
|
||||
case syscall.SIGINT:
|
||||
s.Shutdown()
|
||||
s.WaitForShutdown()
|
||||
os.Exit(0)
|
||||
case syscall.SIGTERM:
|
||||
// Shutdown unless graceful shutdown already in progress.
|
||||
@@ -60,6 +61,7 @@ func (s *Server) handleSignals() {
|
||||
|
||||
if !ldm {
|
||||
s.Shutdown()
|
||||
s.WaitForShutdown()
|
||||
os.Exit(1)
|
||||
}
|
||||
case syscall.SIGUSR1:
|
||||
|
||||
+85
-46
@@ -462,7 +462,7 @@ func (a *Account) addStreamWithAssignment(config *StreamConfig, fsConfig *FileSt
|
||||
}
|
||||
}
|
||||
jsa.usageMu.RLock()
|
||||
selected, tier, hasTier := jsa.selectLimits(&cfg)
|
||||
selected, tier, hasTier := jsa.selectLimits(cfg.Replicas)
|
||||
jsa.usageMu.RUnlock()
|
||||
reserved := int64(0)
|
||||
if !isClustered {
|
||||
@@ -858,7 +858,11 @@ func (mset *stream) setLeader(isLeader bool) error {
|
||||
if mset.sourcesConsumerSetup != nil {
|
||||
mset.sourcesConsumerSetup.Stop()
|
||||
mset.sourcesConsumerSetup = nil
|
||||
} else {
|
||||
// Stop any source consumers
|
||||
mset.stopSourceConsumers()
|
||||
}
|
||||
|
||||
// Stop responding to sync requests.
|
||||
mset.stopClusterSubs()
|
||||
// Unsubscribe from direct stream.
|
||||
@@ -1482,19 +1486,38 @@ func (s *Server) checkStreamCfg(config *StreamConfig, acc *Account) (StreamConfi
|
||||
}
|
||||
|
||||
// Check for literal duplication of subject interest in config
|
||||
// and no overlap with any JS API subject space
|
||||
// and no overlap with any JS or SYS API subject space.
|
||||
dset := make(map[string]struct{}, len(cfg.Subjects))
|
||||
for _, subj := range cfg.Subjects {
|
||||
// Make sure the subject is valid. Check this first.
|
||||
if !IsValidSubject(subj) {
|
||||
return StreamConfig{}, NewJSStreamInvalidConfigError(fmt.Errorf("invalid subject"))
|
||||
}
|
||||
if _, ok := dset[subj]; ok {
|
||||
return StreamConfig{}, NewJSStreamInvalidConfigError(fmt.Errorf("duplicate subjects detected"))
|
||||
}
|
||||
// Also check to make sure we do not overlap with our $JS API subjects.
|
||||
if subjectIsSubsetMatch(subj, "$JS.API.>") {
|
||||
return StreamConfig{}, NewJSStreamInvalidConfigError(fmt.Errorf("subjects overlap with jetstream api"))
|
||||
// Check for trying to capture everything.
|
||||
if subj == fwcs {
|
||||
if !cfg.NoAck {
|
||||
return StreamConfig{}, NewJSStreamInvalidConfigError(fmt.Errorf("capturing all subjects requires no-ack to be true"))
|
||||
}
|
||||
// Capturing everything also will require R1.
|
||||
if cfg.Replicas != 1 {
|
||||
return StreamConfig{}, NewJSStreamInvalidConfigError(fmt.Errorf("capturing all subjects requires replicas of 1"))
|
||||
}
|
||||
}
|
||||
// Make sure the subject is valid.
|
||||
if !IsValidSubject(subj) {
|
||||
return StreamConfig{}, NewJSStreamInvalidConfigError(fmt.Errorf("invalid subject"))
|
||||
// Also check to make sure we do not overlap with our $JS API subjects.
|
||||
if !cfg.NoAck && (subjectIsSubsetMatch(subj, "$JS.>") || subjectIsSubsetMatch(subj, "$JSC.>")) {
|
||||
// We allow an exception for $JS.EVENT.> since these could have been created in the past.
|
||||
if !subjectIsSubsetMatch(subj, "$JS.EVENT.>") {
|
||||
return StreamConfig{}, NewJSStreamInvalidConfigError(fmt.Errorf("subjects that overlap with jetstream api require no-ack to be true"))
|
||||
}
|
||||
}
|
||||
// And the $SYS subjects.
|
||||
if !cfg.NoAck && subjectIsSubsetMatch(subj, "$SYS.>") {
|
||||
if !subjectIsSubsetMatch(subj, "$SYS.ACCOUNT.>") {
|
||||
return StreamConfig{}, NewJSStreamInvalidConfigError(fmt.Errorf("subjects that overlap with system api require no-ack to be true"))
|
||||
}
|
||||
}
|
||||
// Mark for duplicate check.
|
||||
dset[subj] = struct{}{}
|
||||
@@ -1662,9 +1685,9 @@ func (jsa *jsAccount) configUpdateCheck(old, new *StreamConfig, s *Server) (*Str
|
||||
jsa.mu.RLock()
|
||||
acc := jsa.account
|
||||
jsa.usageMu.RLock()
|
||||
selected, tier, hasTier := jsa.selectLimits(&cfg)
|
||||
selected, tier, hasTier := jsa.selectLimits(cfg.Replicas)
|
||||
if !hasTier && old.Replicas != cfg.Replicas {
|
||||
selected, tier, hasTier = jsa.selectLimits(old)
|
||||
selected, tier, hasTier = jsa.selectLimits(old.Replicas)
|
||||
}
|
||||
jsa.usageMu.RUnlock()
|
||||
reserved := int64(0)
|
||||
@@ -1818,7 +1841,7 @@ func (mset *stream) updateWithAdvisory(config *StreamConfig, sendAdvisory bool)
|
||||
si.trs[i], err = NewSubjectTransform(s.SubjectTransforms[i].Source, s.SubjectTransforms[i].Destination)
|
||||
if err != nil {
|
||||
mset.mu.Unlock()
|
||||
mset.srv.Errorf("Unable to get subject transform for source: %v", err)
|
||||
return fmt.Errorf("unable to get subject transform for source: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1899,7 +1922,7 @@ func (mset *stream) updateWithAdvisory(config *StreamConfig, sendAdvisory bool)
|
||||
|
||||
js := mset.js
|
||||
|
||||
if targetTier := tierName(cfg); mset.tier != targetTier {
|
||||
if targetTier := tierName(cfg.Replicas); mset.tier != targetTier {
|
||||
// In cases such as R1->R3, only one update is needed
|
||||
jsa.usageMu.RLock()
|
||||
_, ok := jsa.limits[targetTier]
|
||||
@@ -2187,9 +2210,11 @@ func (mset *stream) processMirrorMsgs(mirror *sourceInfo, ready *sync.WaitGroup)
|
||||
msgs.recycle(&ims)
|
||||
case <-t.C:
|
||||
mset.mu.RLock()
|
||||
var stalled bool
|
||||
if mset.mirror != nil {
|
||||
stalled = time.Since(time.Unix(0, mset.mirror.last.Load())) > sourceHealthCheckInterval
|
||||
}
|
||||
isLeader := mset.isLeader()
|
||||
last := time.Unix(0, mset.mirror.last.Load())
|
||||
stalled := mset.mirror != nil && time.Since(last) > sourceHealthCheckInterval
|
||||
mset.mu.RUnlock()
|
||||
// No longer leader.
|
||||
if !isLeader {
|
||||
@@ -2406,14 +2431,14 @@ func (mset *stream) skipMsgs(start, end uint64) {
|
||||
return
|
||||
}
|
||||
|
||||
// FIXME (dlc) - We should allow proposals of DeleteEange, but would need to make sure all peers support.
|
||||
// FIXME (dlc) - We should allow proposals of DeleteRange, but would need to make sure all peers support.
|
||||
// With syncRequest was easy to add bool into request.
|
||||
var entries []*Entry
|
||||
for seq := start; seq <= end; seq++ {
|
||||
entries = append(entries, &Entry{EntryNormal, encodeStreamMsg(_EMPTY_, _EMPTY_, nil, nil, seq-1, 0)})
|
||||
entries = append(entries, newEntry(EntryNormal, encodeStreamMsg(_EMPTY_, _EMPTY_, nil, nil, seq-1, 0)))
|
||||
// So a single message does not get too big.
|
||||
if len(entries) > 10_000 {
|
||||
node.ProposeDirect(entries)
|
||||
node.ProposeMulti(entries)
|
||||
// We need to re-create `entries` because there is a reference
|
||||
// to it in the node's pae map.
|
||||
entries = entries[:0]
|
||||
@@ -2421,7 +2446,7 @@ func (mset *stream) skipMsgs(start, end uint64) {
|
||||
}
|
||||
// Send all at once.
|
||||
if len(entries) > 0 {
|
||||
node.ProposeDirect(entries)
|
||||
node.ProposeMulti(entries)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5249,9 +5274,8 @@ func (mset *stream) checkInterestState() {
|
||||
|
||||
var zeroAcks []*consumer
|
||||
var lowAckFloor uint64 = math.MaxUint64
|
||||
consumers := mset.getConsumers()
|
||||
|
||||
for _, o := range consumers {
|
||||
for _, o := range mset.getConsumers() {
|
||||
o.checkStateForInterestStream()
|
||||
|
||||
o.mu.Lock()
|
||||
@@ -5290,39 +5314,45 @@ func (mset *stream) checkInterestState() {
|
||||
return
|
||||
}
|
||||
|
||||
// Hold stream write lock in case we need to purge.
|
||||
mset.mu.Lock()
|
||||
defer mset.mu.Unlock()
|
||||
|
||||
// Capture our current state.
|
||||
// ok to do so without lock.
|
||||
var state StreamState
|
||||
mset.store.FastState(&state)
|
||||
|
||||
if lowAckFloor < math.MaxUint64 && lowAckFloor > state.FirstSeq {
|
||||
// Check if we had any zeroAcks, we will need to check them.
|
||||
for _, o := range zeroAcks {
|
||||
var np uint64
|
||||
o.mu.RLock()
|
||||
if o.isLeader() {
|
||||
np = uint64(o.numPending())
|
||||
} else {
|
||||
np, _ = o.calculateNumPending()
|
||||
}
|
||||
o.mu.RUnlock()
|
||||
// This means we have pending and can not remove anything at this time.
|
||||
if np > 0 {
|
||||
return
|
||||
}
|
||||
}
|
||||
if lowAckFloor <= state.LastSeq {
|
||||
// Purge the stream to lowest ack floor + 1
|
||||
mset.store.PurgeEx(_EMPTY_, lowAckFloor+1, 0)
|
||||
if lowAckFloor <= state.FirstSeq {
|
||||
return
|
||||
}
|
||||
|
||||
// Do not want to hold stream lock if calculating numPending.
|
||||
// Check if we had any zeroAcks, we will need to check them.
|
||||
for _, o := range zeroAcks {
|
||||
var np uint64
|
||||
o.mu.RLock()
|
||||
if o.isLeader() {
|
||||
np = uint64(o.numPending())
|
||||
} else {
|
||||
// Here we have a low ack floor higher then our last seq.
|
||||
// So we will just do normal purge.
|
||||
mset.store.Purge()
|
||||
np, _ = o.calculateNumPending()
|
||||
}
|
||||
o.mu.RUnlock()
|
||||
// This means we have pending and can not remove anything at this time.
|
||||
if np > 0 {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
mset.mu.Lock()
|
||||
defer mset.mu.Unlock()
|
||||
|
||||
// Check which purge we need to perform.
|
||||
if lowAckFloor <= state.LastSeq || state.Msgs == 0 {
|
||||
// Purge the stream to lowest ack floor + 1
|
||||
mset.store.PurgeEx(_EMPTY_, lowAckFloor+1, 0)
|
||||
} else {
|
||||
// Here we have a low ack floor higher then our last seq.
|
||||
// So we will just do normal purge.
|
||||
mset.store.Purge()
|
||||
}
|
||||
|
||||
// Make sure to reset our local lseq.
|
||||
mset.store.FastState(&state)
|
||||
mset.lseq = state.LastSeq
|
||||
@@ -5840,6 +5870,8 @@ func (a *Account) RestoreStream(ncfg *StreamConfig, r io.Reader) (*stream, error
|
||||
}
|
||||
mset, err := a.addStream(&cfg)
|
||||
if err != nil {
|
||||
// Make sure to clean up after ourselves here.
|
||||
os.RemoveAll(ndir)
|
||||
return nil, err
|
||||
}
|
||||
if !fcfg.Created.IsZero() {
|
||||
@@ -5975,3 +6007,10 @@ func (mset *stream) clearMonitorRunning() {
|
||||
defer mset.mu.Unlock()
|
||||
mset.inMonitor = false
|
||||
}
|
||||
|
||||
// Check if our monitor is running.
|
||||
func (mset *stream) isMonitorRunning() bool {
|
||||
mset.mu.RLock()
|
||||
defer mset.mu.RUnlock()
|
||||
return mset.inMonitor
|
||||
}
|
||||
|
||||
+1
@@ -51,6 +51,7 @@ func (t *SubjectTree[T]) dump(w io.Writer, n node, depth int) {
|
||||
func (n *leaf[T]) kind() string { return "LEAF" }
|
||||
func (n *node4) kind() string { return "NODE4" }
|
||||
func (n *node16) kind() string { return "NODE16" }
|
||||
func (n *node48) kind() string { return "NODE48" }
|
||||
func (n *node256) kind() string { return "NODE256" }
|
||||
|
||||
// Calculates the indendation, etc.
|
||||
|
||||
+3
-2
@@ -18,16 +18,17 @@ import (
|
||||
)
|
||||
|
||||
// Leaf node
|
||||
// Order of struct fields for best memory alignment (as per govet/fieldalignment)
|
||||
type leaf[T any] struct {
|
||||
value T
|
||||
// This could be the whole subject, but most likely just the suffix portion.
|
||||
// We will only store the suffix here and assume all prior prefix paths have
|
||||
// been checked once we arrive at this leafnode.
|
||||
suffix []byte
|
||||
value T
|
||||
}
|
||||
|
||||
func newLeaf[T any](suffix []byte, value T) *leaf[T] {
|
||||
return &leaf[T]{copyBytes(suffix), value}
|
||||
return &leaf[T]{value, copyBytes(suffix)}
|
||||
}
|
||||
|
||||
func (n *leaf[T]) isLeaf() bool { return true }
|
||||
|
||||
+4
-3
@@ -14,10 +14,11 @@
|
||||
package stree
|
||||
|
||||
// Node with 16 children
|
||||
// Order of struct fields for best memory alignment (as per govet/fieldalignment)
|
||||
type node16 struct {
|
||||
meta
|
||||
child [16]node
|
||||
key [16]byte
|
||||
meta
|
||||
key [16]byte
|
||||
}
|
||||
|
||||
func newNode16(prefix []byte) *node16 {
|
||||
@@ -49,7 +50,7 @@ func (n *node16) findChild(c byte) *node {
|
||||
func (n *node16) isFull() bool { return n.size >= 16 }
|
||||
|
||||
func (n *node16) grow() node {
|
||||
nn := newNode256(n.prefix)
|
||||
nn := newNode48(n.prefix)
|
||||
for i := 0; i < 16; i++ {
|
||||
nn.addChild(n.key[i], n.child[i])
|
||||
}
|
||||
|
||||
+4
-3
@@ -14,9 +14,10 @@
|
||||
package stree
|
||||
|
||||
// Node with 256 children
|
||||
// Order of struct fields for best memory alignment (as per govet/fieldalignment)
|
||||
type node256 struct {
|
||||
meta
|
||||
child [256]node
|
||||
meta
|
||||
}
|
||||
|
||||
func newNode256(prefix []byte) *node256 {
|
||||
@@ -50,10 +51,10 @@ func (n *node256) deleteChild(c byte) {
|
||||
|
||||
// Shrink if needed and return new node, otherwise return nil.
|
||||
func (n *node256) shrink() node {
|
||||
if n.size > 16 {
|
||||
if n.size > 48 {
|
||||
return nil
|
||||
}
|
||||
nn := newNode16(nil)
|
||||
nn := newNode48(nil)
|
||||
for c, child := range n.child {
|
||||
if child != nil {
|
||||
nn.addChild(byte(c), n.child[c])
|
||||
|
||||
+3
-2
@@ -14,10 +14,11 @@
|
||||
package stree
|
||||
|
||||
// Node with 4 children
|
||||
// Order of struct fields for best memory alignment (as per govet/fieldalignment)
|
||||
type node4 struct {
|
||||
meta
|
||||
child [4]node
|
||||
key [4]byte
|
||||
meta
|
||||
key [4]byte
|
||||
}
|
||||
|
||||
func newNode4(prefix []byte) *node4 {
|
||||
|
||||
+110
@@ -0,0 +1,110 @@
|
||||
// Copyright 2023-2024 The NATS Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package stree
|
||||
|
||||
// Node with 48 children
|
||||
// Memory saving vs node256 comes from the fact that the child array is 16 bytes
|
||||
// per `node` entry, so node256's 256*16=4096 vs node48's 256+(48*16)=1024
|
||||
// Note that `key` is effectively 1-indexed, as 0 means no entry, so offset by 1
|
||||
// Order of struct fields for best memory alignment (as per govet/fieldalignment)
|
||||
type node48 struct {
|
||||
child [48]node
|
||||
meta
|
||||
key [256]byte
|
||||
}
|
||||
|
||||
func newNode48(prefix []byte) *node48 {
|
||||
nn := &node48{}
|
||||
nn.setPrefix(prefix)
|
||||
return nn
|
||||
}
|
||||
|
||||
func (n *node48) addChild(c byte, nn node) {
|
||||
if n.size >= 48 {
|
||||
panic("node48 full!")
|
||||
}
|
||||
n.child[n.size] = nn
|
||||
n.key[c] = byte(n.size + 1) // 1-indexed
|
||||
n.size++
|
||||
}
|
||||
|
||||
func (n *node48) findChild(c byte) *node {
|
||||
i := n.key[c]
|
||||
if i == 0 {
|
||||
return nil
|
||||
}
|
||||
return &n.child[i-1]
|
||||
}
|
||||
|
||||
func (n *node48) isFull() bool { return n.size >= 48 }
|
||||
|
||||
func (n *node48) grow() node {
|
||||
nn := newNode256(n.prefix)
|
||||
for c := byte(0); c < 255; c++ {
|
||||
if i := n.key[c]; i > 0 {
|
||||
nn.addChild(c, n.child[i-1])
|
||||
}
|
||||
}
|
||||
return nn
|
||||
}
|
||||
|
||||
// Deletes a child from the node.
|
||||
func (n *node48) deleteChild(c byte) {
|
||||
i := n.key[c]
|
||||
if i == 0 {
|
||||
return
|
||||
}
|
||||
i-- // Adjust for 1-indexing
|
||||
last := byte(n.size - 1)
|
||||
if i < last {
|
||||
n.child[i] = n.child[last]
|
||||
for c := byte(0); c <= 255; c++ {
|
||||
if n.key[c] == last+1 {
|
||||
n.key[c] = i + 1
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
n.child[last] = nil
|
||||
n.key[c] = 0
|
||||
n.size--
|
||||
}
|
||||
|
||||
// Shrink if needed and return new node, otherwise return nil.
|
||||
func (n *node48) shrink() node {
|
||||
if n.size > 16 {
|
||||
return nil
|
||||
}
|
||||
nn := newNode16(nil)
|
||||
for c := byte(0); c < 255; c++ {
|
||||
if i := n.key[c]; i > 0 {
|
||||
nn.addChild(c, n.child[i-1])
|
||||
}
|
||||
}
|
||||
return nn
|
||||
}
|
||||
|
||||
// Iterate over all children calling func f.
|
||||
func (n *node48) iter(f func(node) bool) {
|
||||
for _, c := range n.child {
|
||||
if c != nil && !f(c) {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Return our children as a slice.
|
||||
func (n *node48) children() []node {
|
||||
return n.child[:n.size]
|
||||
}
|
||||
+14
-1
@@ -51,6 +51,10 @@ func (t *SubjectTree[T]) Empty() *SubjectTree[T] {
|
||||
|
||||
// Insert a value into the tree. Will return if the value was updated and if so the old value.
|
||||
func (t *SubjectTree[T]) Insert(subject []byte, value T) (*T, bool) {
|
||||
if t == nil {
|
||||
return nil, false
|
||||
}
|
||||
|
||||
old, updated := t.insert(&t.root, subject, value, 0)
|
||||
if !updated {
|
||||
t.size++
|
||||
@@ -60,6 +64,10 @@ func (t *SubjectTree[T]) Insert(subject []byte, value T) (*T, bool) {
|
||||
|
||||
// Find will find the value and return it or false if it was not found.
|
||||
func (t *SubjectTree[T]) Find(subject []byte) (*T, bool) {
|
||||
if t == nil {
|
||||
return nil, false
|
||||
}
|
||||
|
||||
var si int
|
||||
for n := t.root; n != nil; {
|
||||
if n.isLeaf() {
|
||||
@@ -88,6 +96,10 @@ func (t *SubjectTree[T]) Find(subject []byte) (*T, bool) {
|
||||
|
||||
// Delete will delete the item and return its value, or not found if it did not exist.
|
||||
func (t *SubjectTree[T]) Delete(subject []byte) (*T, bool) {
|
||||
if t == nil {
|
||||
return nil, false
|
||||
}
|
||||
|
||||
val, deleted := t.delete(&t.root, subject, 0)
|
||||
if deleted {
|
||||
t.size--
|
||||
@@ -97,7 +109,7 @@ func (t *SubjectTree[T]) Delete(subject []byte) (*T, bool) {
|
||||
|
||||
// Match will match against a subject that can have wildcards and invoke the callback func for each matched value.
|
||||
func (t *SubjectTree[T]) Match(filter []byte, cb func(subject []byte, val *T)) {
|
||||
if len(filter) == 0 || cb == nil {
|
||||
if t == nil || t.root == nil || len(filter) == 0 || cb == nil {
|
||||
return
|
||||
}
|
||||
// We need to break this up into chunks based on wildcards, either pwc '*' or fwc '>'.
|
||||
@@ -340,6 +352,7 @@ func (t *SubjectTree[T]) match(n node, parts [][]byte, pre []byte, cb func(subje
|
||||
t.match(cn, nparts, pre, cb)
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
// Here we have normal traversal, so find the next child.
|
||||
nn := n.findChild(p)
|
||||
|
||||
Reference in New Issue
Block a user