build(deps): bump github.com/nats-io/nats-server/v2

Bumps [github.com/nats-io/nats-server/v2](https://github.com/nats-io/nats-server) from 2.12.1 to 2.12.2.
- [Release notes](https://github.com/nats-io/nats-server/releases)
- [Changelog](https://github.com/nats-io/nats-server/blob/main/.goreleaser.yml)
- [Commits](https://github.com/nats-io/nats-server/compare/v2.12.1...v2.12.2)

---
updated-dependencies:
- dependency-name: github.com/nats-io/nats-server/v2
  dependency-version: 2.12.2
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
This commit is contained in:
dependabot[bot]
2025-11-26 14:17:05 +00:00
committed by Ralf Haferkamp
parent 3badc66d4a
commit 8a70a65597
74 changed files with 1779 additions and 968 deletions
+3 -3
View File
@@ -54,7 +54,7 @@ require (
github.com/mitchellh/mapstructure v1.5.0
github.com/mna/pigeon v1.3.0
github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826
github.com/nats-io/nats-server/v2 v2.12.1
github.com/nats-io/nats-server/v2 v2.12.2
github.com/nats-io/nats.go v1.47.0
github.com/oklog/run v1.2.0
github.com/olekukonko/tablewriter v1.1.1
@@ -259,7 +259,7 @@ require (
github.com/json-iterator/go v1.1.12 // indirect
github.com/juliangruber/go-intersect v1.1.0 // indirect
github.com/kevinburke/ssh_config v1.2.0 // indirect
github.com/klauspost/compress v1.18.0 // indirect
github.com/klauspost/compress v1.18.1 // indirect
github.com/klauspost/cpuid/v2 v2.2.11 // indirect
github.com/klauspost/crc32 v1.3.0 // indirect
github.com/kovidgoyal/go-parallel v1.1.1 // indirect
@@ -288,7 +288,7 @@ require (
github.com/miekg/dns v1.1.57 // indirect
github.com/mileusna/useragent v1.3.5 // indirect
github.com/minio/crc64nvme v1.1.0 // indirect
github.com/minio/highwayhash v1.0.3 // indirect
github.com/minio/highwayhash v1.0.4-0.20251030100505-070ab1a87a76 // indirect
github.com/minio/md5-simd v1.1.2 // indirect
github.com/minio/minio-go/v7 v7.0.97 // indirect
github.com/mitchellh/copystructure v1.2.0 // indirect
+6 -6
View File
@@ -725,8 +725,8 @@ github.com/kevinburke/ssh_config v1.2.0/go.mod h1:CT57kijsi8u/K/BOFA39wgDQJ9CxiF
github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q=
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo=
github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ=
github.com/klauspost/compress v1.18.1 h1:bcSGx7UbpBqMChDtsF28Lw6v/G94LPrrbMbdC3JH2co=
github.com/klauspost/compress v1.18.1/go.mod h1:ZQFFVG+MdnR0P+l6wpXgIL4NTtwiKIdBnrBd8Nrxr+0=
github.com/klauspost/cpuid/v2 v2.0.1/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
github.com/klauspost/cpuid/v2 v2.2.11 h1:0OwqZRYI2rFrjS4kvkDnqJkKHdHaRnCm68/DY4OxRzU=
github.com/klauspost/cpuid/v2 v2.2.11/go.mod h1:hqwkgyIinND0mEev00jJYCxPNVRVXFQeu1XKlok6oO0=
@@ -850,8 +850,8 @@ github.com/mileusna/useragent v1.3.5 h1:SJM5NzBmh/hO+4LGeATKpaEX9+b4vcGg2qXGLiNG
github.com/mileusna/useragent v1.3.5/go.mod h1:3d8TOmwL/5I8pJjyVDteHtgDGcefrFUX4ccGOMKNYYc=
github.com/minio/crc64nvme v1.1.0 h1:e/tAguZ+4cw32D+IO/8GSf5UVr9y+3eJcxZI2WOO/7Q=
github.com/minio/crc64nvme v1.1.0/go.mod h1:eVfm2fAzLlxMdUGc0EEBGSMmPwmXD5XiNRpnu9J3bvg=
github.com/minio/highwayhash v1.0.3 h1:kbnuUMoHYyVl7szWjSxJnxw11k2U709jqFPPmIUyD6Q=
github.com/minio/highwayhash v1.0.3/go.mod h1:GGYsuwP/fPD6Y9hMiXuapVvlIUEhFhMTh0rxU3ik1LQ=
github.com/minio/highwayhash v1.0.4-0.20251030100505-070ab1a87a76 h1:KGuD/pM2JpL9FAYvBrnBBeENKZNh6eNtjqytV6TYjnk=
github.com/minio/highwayhash v1.0.4-0.20251030100505-070ab1a87a76/go.mod h1:GGYsuwP/fPD6Y9hMiXuapVvlIUEhFhMTh0rxU3ik1LQ=
github.com/minio/md5-simd v1.1.2 h1:Gdi1DZK69+ZVMoNHRXJyNcxrMA4dSxoYHZSQbirFg34=
github.com/minio/md5-simd v1.1.2/go.mod h1:MzdKDxYpY2BT9XQFocsiZf/NKVtR7nkE4RoEpN+20RM=
github.com/minio/minio-go/v7 v7.0.97 h1:lqhREPyfgHTB/ciX8k2r8k0D93WaFqxbJX36UZq5occ=
@@ -910,8 +910,8 @@ github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRW
github.com/namedotcom/go v0.0.0-20180403034216-08470befbe04/go.mod h1:5sN+Lt1CaY4wsPvgQH/jsuJi4XO2ssZbdsIizr4CVC8=
github.com/nats-io/jwt/v2 v2.8.0 h1:K7uzyz50+yGZDO5o772eRE7atlcSEENpL7P+b74JV1g=
github.com/nats-io/jwt/v2 v2.8.0/go.mod h1:me11pOkwObtcBNR8AiMrUbtVOUGkqYjMQZ6jnSdVUIA=
github.com/nats-io/nats-server/v2 v2.12.1 h1:0tRrc9bzyXEdBLcHr2XEjDzVpUxWx64aZBm7Rl1QDrA=
github.com/nats-io/nats-server/v2 v2.12.1/go.mod h1:OEaOLmu/2e6J9LzUt2OuGjgNem4EpYApO5Rpf26HDs8=
github.com/nats-io/nats-server/v2 v2.12.2 h1:4TEQd0Y4zvcW0IsVxjlXnRso1hBkQl3TS0BI+SxgPhE=
github.com/nats-io/nats-server/v2 v2.12.2/go.mod h1:j1AAttYeu7WnvD8HLJ+WWKNMSyxsqmZ160pNtCQRMyE=
github.com/nats-io/nats.go v1.47.0 h1:YQdADw6J/UfGUd2Oy6tn4Hq6YHxCaJrVKayxxFqYrgM=
github.com/nats-io/nats.go v1.47.0/go.mod h1:iRWIPokVIFbVijxuMQq4y9ttaBTMe0SFdlZfMDd+33g=
github.com/nats-io/nkeys v0.4.11 h1:q44qGV008kYd9W1b1nEBkNzvnWxtRSQ7A8BoqRrcfa0=
+13 -36
View File
@@ -6,11 +6,12 @@
package flate
import (
"encoding/binary"
"errors"
"fmt"
"io"
"math"
"github.com/klauspost/compress/internal/le"
)
const (
@@ -234,12 +235,9 @@ func (d *compressor) fillWindow(b []byte) {
// Calculate 256 hashes at the time (more L1 cache hits)
loops := (n + 256 - minMatchLength) / 256
for j := 0; j < loops; j++ {
for j := range loops {
startindex := j * 256
end := startindex + 256 + minMatchLength - 1
if end > n {
end = n
}
end := min(startindex+256+minMatchLength-1, n)
tocheck := d.window[startindex:end]
dstSize := len(tocheck) - minMatchLength + 1
@@ -269,18 +267,12 @@ func (d *compressor) fillWindow(b []byte) {
// We only look at chainCount possibilities before giving up.
// pos = s.index, prevHead = s.chainHead-s.hashOffset, prevLength=minMatchLength-1, lookahead
func (d *compressor) findMatch(pos int, prevHead int, lookahead int) (length, offset int, ok bool) {
minMatchLook := maxMatchLength
if lookahead < minMatchLook {
minMatchLook = lookahead
}
minMatchLook := min(lookahead, maxMatchLength)
win := d.window[0 : pos+minMatchLook]
// We quit when we get a match that's at least nice long
nice := len(win) - pos
if d.nice < nice {
nice = d.nice
}
nice := min(d.nice, len(win)-pos)
// If we've got a match that's good enough, only look in 1/4 the chain.
tries := d.chain
@@ -288,10 +280,7 @@ func (d *compressor) findMatch(pos int, prevHead int, lookahead int) (length, of
wEnd := win[pos+length]
wPos := win[pos:]
minIndex := pos - windowSize
if minIndex < 0 {
minIndex = 0
}
minIndex := max(pos-windowSize, 0)
offset = 0
if d.chain < 100 {
@@ -374,7 +363,7 @@ func (d *compressor) writeStoredBlock(buf []byte) error {
// of the supplied slice.
// The caller must ensure that len(b) >= 4.
func hash4(b []byte) uint32 {
return hash4u(binary.LittleEndian.Uint32(b), hashBits)
return hash4u(le.Load32(b, 0), hashBits)
}
// hash4 returns the hash of u to fit in a hash table with h bits.
@@ -389,7 +378,7 @@ func bulkHash4(b []byte, dst []uint32) {
if len(b) < 4 {
return
}
hb := binary.LittleEndian.Uint32(b)
hb := le.Load32(b, 0)
dst[0] = hash4u(hb, hashBits)
end := len(b) - 4 + 1
@@ -480,10 +469,7 @@ func (d *compressor) deflateLazy() {
prevOffset := s.offset
s.length = minMatchLength - 1
s.offset = 0
minIndex := s.index - windowSize
if minIndex < 0 {
minIndex = 0
}
minIndex := max(s.index-windowSize, 0)
if s.chainHead-s.hashOffset >= minIndex && lookahead > prevLength && prevLength < d.lazy {
if newLength, newOffset, ok := d.findMatch(s.index, s.chainHead-s.hashOffset, lookahead); ok {
@@ -503,10 +489,7 @@ func (d *compressor) deflateLazy() {
if prevLength < maxMatchLength-checkOff {
prevIndex := s.index - 1
if prevIndex+prevLength < s.maxInsertIndex {
end := lookahead
if lookahead > maxMatchLength+checkOff {
end = maxMatchLength + checkOff
}
end := min(lookahead, maxMatchLength+checkOff)
end += prevIndex
// Hash at match end.
@@ -603,15 +586,9 @@ func (d *compressor) deflateLazy() {
// table.
newIndex := s.index + prevLength - 1
// Calculate missing hashes
end := newIndex
if end > s.maxInsertIndex {
end = s.maxInsertIndex
}
end := min(newIndex, s.maxInsertIndex)
end += minMatchLength - 1
startindex := s.index + 1
if startindex > s.maxInsertIndex {
startindex = s.maxInsertIndex
}
startindex := min(s.index+1, s.maxInsertIndex)
tocheck := d.window[startindex:end]
dstSize := len(tocheck) - minMatchLength + 1
if dstSize > 0 {
+1 -4
View File
@@ -104,10 +104,7 @@ func (dd *dictDecoder) writeCopy(dist, length int) int {
dstBase := dd.wrPos
dstPos := dstBase
srcPos := dstPos - dist
endPos := dstPos + length
if endPos > len(dd.hist) {
endPos = len(dd.hist)
}
endPos := min(dstPos+length, len(dd.hist))
// Copy non-overlapping section after destination position.
//
+3 -46
View File
@@ -7,7 +7,6 @@ package flate
import (
"fmt"
"math/bits"
"github.com/klauspost/compress/internal/le"
)
@@ -151,29 +150,9 @@ func (e *fastGen) matchlen(s, t int, src []byte) int32 {
panic(fmt.Sprint(s, "-", t, "(", s-t, ") > maxMatchLength (", maxMatchOffset, ")"))
}
}
s1 := min(s+maxMatchLength-4, len(src))
left := s1 - s
n := int32(0)
for left >= 8 {
diff := le.Load64(src, s) ^ le.Load64(src, t)
if diff != 0 {
return n + int32(bits.TrailingZeros64(diff)>>3)
}
s += 8
t += 8
n += 8
left -= 8
}
a := src[s:s1]
a := src[s:min(s+maxMatchLength-4, len(src))]
b := src[t:]
for i := range a {
if a[i] != b[i] {
break
}
n++
}
return n
return int32(matchLen(a, b))
}
// matchlenLong will return the match length between offsets and t in src.
@@ -193,29 +172,7 @@ func (e *fastGen) matchlenLong(s, t int, src []byte) int32 {
panic(fmt.Sprint(s, "-", t, "(", s-t, ") > maxMatchLength (", maxMatchOffset, ")"))
}
}
// Extend the match to be as long as possible.
left := len(src) - s
n := int32(0)
for left >= 8 {
diff := le.Load64(src, s) ^ le.Load64(src, t)
if diff != 0 {
return n + int32(bits.TrailingZeros64(diff)>>3)
}
s += 8
t += 8
n += 8
left -= 8
}
a := src[s:]
b := src[t:]
for i := range a {
if a[i] != b[i] {
break
}
n++
}
return n
return int32(matchLen(src[s:], src[t:]))
}
// Reset the encoding table.
+16 -26
View File
@@ -211,7 +211,9 @@ func (w *huffmanBitWriter) flush() {
n++
}
w.bits = 0
w.write(w.bytes[:n])
if n > 0 {
w.write(w.bytes[:n])
}
w.nbytes = 0
}
@@ -303,10 +305,7 @@ func (w *huffmanBitWriter) generateCodegen(numLiterals int, numOffsets int, litE
w.codegenFreq[size]++
count--
for count >= 3 {
n := 6
if n > count {
n = count
}
n := min(6, count)
codegen[outIndex] = 16
outIndex++
codegen[outIndex] = uint8(n - 3)
@@ -316,10 +315,7 @@ func (w *huffmanBitWriter) generateCodegen(numLiterals int, numOffsets int, litE
}
} else {
for count >= 11 {
n := 138
if n > count {
n = count
}
n := min(138, count)
codegen[outIndex] = 18
outIndex++
codegen[outIndex] = uint8(n - 11)
@@ -438,8 +434,8 @@ func (w *huffmanBitWriter) writeOutBits() {
w.nbits -= 48
n := w.nbytes
// We over-write, but faster...
le.Store64(w.bytes[n:], bits)
// We overwrite, but faster...
le.Store64(w.bytes[:], n, bits)
n += 6
if n >= bufferFlushSize {
@@ -472,7 +468,7 @@ func (w *huffmanBitWriter) writeDynamicHeader(numLiterals int, numOffsets int, n
w.writeBits(int32(numOffsets-1), 5)
w.writeBits(int32(numCodegens-4), 4)
for i := 0; i < numCodegens; i++ {
for i := range numCodegens {
value := uint(w.codegenEncoding.codes[codegenOrder[i]].len())
w.writeBits(int32(value), 3)
}
@@ -650,7 +646,7 @@ func (w *huffmanBitWriter) writeBlockDynamic(tokens *tokens, eof bool, input []b
w.lastHeader = 0
}
numLiterals, numOffsets := w.indexTokens(tokens, !sync)
numLiterals, numOffsets := w.indexTokens(tokens, fillReuse && !sync)
extraBits := 0
ssize, storable := w.storedSize(input)
@@ -855,8 +851,7 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode)
bits |= c.code64() << (nbits & 63)
nbits += c.len()
if nbits >= 48 {
le.Store64(w.bytes[nbytes:], bits)
//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
le.Store64(w.bytes[:], nbytes, bits)
bits >>= 48
nbits -= 48
nbytes += 6
@@ -883,8 +878,7 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode)
bits |= c.code64() << (nbits & 63)
nbits += c.len()
if nbits >= 48 {
le.Store64(w.bytes[nbytes:], bits)
//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
le.Store64(w.bytes[:], nbytes, bits)
bits >>= 48
nbits -= 48
nbytes += 6
@@ -906,8 +900,7 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode)
bits |= uint64(extraLength) << (nbits & 63)
nbits += extraLengthBits
if nbits >= 48 {
le.Store64(w.bytes[nbytes:], bits)
//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
le.Store64(w.bytes[:], nbytes, bits)
bits >>= 48
nbits -= 48
nbytes += 6
@@ -932,8 +925,7 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode)
bits |= c.code64() << (nbits & 63)
nbits += c.len()
if nbits >= 48 {
le.Store64(w.bytes[nbytes:], bits)
//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
le.Store64(w.bytes[:], nbytes, bits)
bits >>= 48
nbits -= 48
nbytes += 6
@@ -954,8 +946,7 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode)
bits |= uint64((offset-(offsetComb>>8))&matchOffsetOnlyMask) << (nbits & 63)
nbits += uint8(offsetComb)
if nbits >= 48 {
le.Store64(w.bytes[nbytes:], bits)
//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
le.Store64(w.bytes[:], nbytes, bits)
bits >>= 48
nbits -= 48
nbytes += 6
@@ -1108,7 +1099,7 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) {
// We must have at least 48 bits free.
if nbits >= 8 {
n := nbits >> 3
le.Store64(w.bytes[nbytes:], bits)
le.Store64(w.bytes[:], nbytes, bits)
bits >>= (n * 8) & 63
nbits -= n * 8
nbytes += n
@@ -1137,8 +1128,7 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) {
// Remaining...
for _, t := range input {
if nbits >= 48 {
le.Store64(w.bytes[nbytes:], bits)
//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
le.Store64(w.bytes[:], nbytes, bits)
bits >>= 48
nbits -= 48
nbytes += 6
+1 -1
View File
@@ -91,7 +91,7 @@ func generateFixedLiteralEncoding() *huffmanEncoder {
h := newHuffmanEncoder(literalCount)
codes := h.codes
var ch uint16
for ch = 0; ch < literalCount; ch++ {
for ch = range uint16(literalCount) {
var bits uint16
var size uint8
switch {
+2 -2
View File
@@ -485,7 +485,7 @@ func (f *decompressor) readHuffman() error {
f.nb -= 5 + 5 + 4
// (HCLEN+4)*3 bits: code lengths in the magic codeOrder order.
for i := 0; i < nclen; i++ {
for i := range nclen {
for f.nb < 3 {
if err := f.moreBits(); err != nil {
return err
@@ -776,7 +776,7 @@ func fixedHuffmanDecoderInit() {
fixedOnce.Do(func() {
// These come from the RFC section 3.2.6.
var bits [288]int
for i := 0; i < 144; i++ {
for i := range 144 {
bits[i] = 8
}
for i := 144; i < 256; i++ {
+1 -4
View File
@@ -677,10 +677,7 @@ func (e *fastEncL5Window) matchlen(s, t int32, src []byte) int32 {
panic(fmt.Sprint(s, "-", t, "(", s-t, ") > maxMatchLength (", maxMatchOffset, ")"))
}
}
s1 := int(s) + maxMatchLength - 4
if s1 > len(src) {
s1 = len(src)
}
s1 := min(int(s)+maxMatchLength-4, len(src))
// Extend the match to be as long as possible.
return int32(matchLen(src[s:s1], src[t:]))
+2 -2
View File
@@ -56,7 +56,7 @@ func NewStatelessWriter(dst io.Writer) io.WriteCloser {
// bitWriterPool contains bit writers that can be reused.
var bitWriterPool = sync.Pool{
New: func() interface{} {
New: func() any {
return newHuffmanBitWriter(nil)
},
}
@@ -184,7 +184,7 @@ func statelessEnc(dst *tokens, src []byte, startAt int16) {
// Index until startAt
if startAt > 0 {
cv := load3232(src, 0)
for i := int16(0); i < startAt; i++ {
for i := range startAt {
table[hashSL(cv)] = tableEntry{offset: i}
cv = (cv >> 8) | (uint32(src[i+4]) << 24)
}
+1 -1
View File
@@ -143,7 +143,7 @@ func (b *bitWriter) flush32() {
// flushAlign will flush remaining full bytes and align to next byte boundary.
func (b *bitWriter) flushAlign() {
nbBytes := (b.nBits + 7) >> 3
for i := uint8(0); i < nbBytes; i++ {
for i := range nbBytes {
b.out = append(b.out, byte(b.bitContainer>>(i*8)))
}
b.nBits = 0
+1 -1
View File
@@ -396,7 +396,7 @@ func (s *Scratch) buildCTable() error {
if v > largeLimit {
s.zeroBits = true
}
for nbOccurrences := int16(0); nbOccurrences < v; nbOccurrences++ {
for range v {
tableSymbol[position] = symbol
position = (position + step) & tableMask
for position > highThreshold {
+1 -1
View File
@@ -85,7 +85,7 @@ func (b *bitWriter) flush32() {
// flushAlign will flush remaining full bytes and align to next byte boundary.
func (b *bitWriter) flushAlign() {
nbBytes := (b.nBits + 7) >> 3
for i := uint8(0); i < nbBytes; i++ {
for i := range nbBytes {
b.out = append(b.out, byte(b.bitContainer>>(i*8)))
}
b.nBits = 0
+3 -3
View File
@@ -276,7 +276,7 @@ func (s *Scratch) compress4X(src []byte) ([]byte, error) {
offsetIdx := len(s.Out)
s.Out = append(s.Out, sixZeros[:]...)
for i := 0; i < 4; i++ {
for i := range 4 {
toDo := src
if len(toDo) > segmentSize {
toDo = toDo[:segmentSize]
@@ -312,7 +312,7 @@ func (s *Scratch) compress4Xp(src []byte) ([]byte, error) {
segmentSize := (len(src) + 3) / 4
var wg sync.WaitGroup
wg.Add(4)
for i := 0; i < 4; i++ {
for i := range 4 {
toDo := src
if len(toDo) > segmentSize {
toDo = toDo[:segmentSize]
@@ -326,7 +326,7 @@ func (s *Scratch) compress4Xp(src []byte) ([]byte, error) {
}(i)
}
wg.Wait()
for i := 0; i < 4; i++ {
for i := range 4 {
o := s.tmpOut[i]
if len(o) > math.MaxUint16 {
// We cannot store the size in the jump table
+4 -10
View File
@@ -626,7 +626,7 @@ func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) {
var br [4]bitReaderBytes
start := 6
for i := 0; i < 3; i++ {
for i := range 3 {
length := int(src[i*2]) | (int(src[i*2+1]) << 8)
if start+length >= len(src) {
return nil, errors.New("truncated input (or invalid offset)")
@@ -798,10 +798,7 @@ func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) {
remainBytes := dstEvery - (decoded / 4)
for i := range br {
offset := dstEvery * i
endsAt := offset + remainBytes
if endsAt > len(out) {
endsAt = len(out)
}
endsAt := min(offset+remainBytes, len(out))
br := &br[i]
bitsLeft := br.remaining()
for bitsLeft > 0 {
@@ -864,7 +861,7 @@ func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) {
func (d *Decoder) decompress4X8bitExactly(dst, src []byte) ([]byte, error) {
var br [4]bitReaderBytes
start := 6
for i := 0; i < 3; i++ {
for i := range 3 {
length := int(src[i*2]) | (int(src[i*2+1]) << 8)
if start+length >= len(src) {
return nil, errors.New("truncated input (or invalid offset)")
@@ -1035,10 +1032,7 @@ func (d *Decoder) decompress4X8bitExactly(dst, src []byte) ([]byte, error) {
remainBytes := dstEvery - (decoded / 4)
for i := range br {
offset := dstEvery * i
endsAt := offset + remainBytes
if endsAt > len(out) {
endsAt = len(out)
}
endsAt := min(offset+remainBytes, len(out))
br := &br[i]
bitsLeft := br.remaining()
for bitsLeft > 0 {
+2 -5
View File
@@ -58,7 +58,7 @@ func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) {
var br [4]bitReaderShifted
// Decode "jump table"
start := 6
for i := 0; i < 3; i++ {
for i := range 3 {
length := int(src[i*2]) | (int(src[i*2+1]) << 8)
if start+length >= len(src) {
return nil, errors.New("truncated input (or invalid offset)")
@@ -109,10 +109,7 @@ func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) {
remainBytes := dstEvery - (decoded / 4)
for i := range br {
offset := dstEvery * i
endsAt := offset + remainBytes
if endsAt > len(out) {
endsAt = len(out)
}
endsAt := min(offset+remainBytes, len(out))
br := &br[i]
bitsLeft := br.remaining()
for bitsLeft > 0 {
+2 -2
View File
@@ -201,7 +201,7 @@ func (c cTable) write(s *Scratch) error {
for i := range hist[:16] {
hist[i] = 0
}
for n := uint8(0); n < maxSymbolValue; n++ {
for n := range maxSymbolValue {
v := bitsToWeight[c[n].nBits] & 15
huffWeight[n] = v
hist[v]++
@@ -271,7 +271,7 @@ func (c cTable) estTableSize(s *Scratch) (sz int, err error) {
for i := range hist[:16] {
hist[i] = 0
}
for n := uint8(0); n < maxSymbolValue; n++ {
for n := range maxSymbolValue {
v := bitsToWeight[c[n].nBits] & 15
huffWeight[n] = v
hist[v]++
+2 -2
View File
@@ -37,6 +37,6 @@ func Store32(b []byte, v uint32) {
}
// Store64 will store v at b.
func Store64(b []byte, v uint64) {
binary.LittleEndian.PutUint64(b, v)
func Store64[I Indexer](b []byte, i I, v uint64) {
binary.LittleEndian.PutUint64(b[i:], v)
}
+3 -6
View File
@@ -38,18 +38,15 @@ func Load64[I Indexer](b []byte, i I) uint64 {
// Store16 will store v at b.
func Store16(b []byte, v uint16) {
//binary.LittleEndian.PutUint16(b, v)
*(*uint16)(unsafe.Pointer(unsafe.SliceData(b))) = v
}
// Store32 will store v at b.
func Store32(b []byte, v uint32) {
//binary.LittleEndian.PutUint32(b, v)
*(*uint32)(unsafe.Pointer(unsafe.SliceData(b))) = v
}
// Store64 will store v at b.
func Store64(b []byte, v uint64) {
//binary.LittleEndian.PutUint64(b, v)
*(*uint64)(unsafe.Pointer(unsafe.SliceData(b))) = v
// Store64 will store v at b[i:].
func Store64[I Indexer](b []byte, i I, v uint64) {
*(*uint64)(unsafe.Add(unsafe.Pointer(unsafe.SliceData(b)), i)) = v
}
+1 -1
View File
@@ -209,7 +209,7 @@ func (r *Reader) fill() error {
if !r.readFull(r.buf[:len(magicBody)], false) {
return r.err
}
for i := 0; i < len(magicBody); i++ {
for i := range len(magicBody) {
if r.buf[i] != magicBody[i] {
r.err = ErrCorrupt
return r.err
+3 -1
View File
@@ -20,8 +20,10 @@ import (
func Encode(dst, src []byte) []byte {
if n := MaxEncodedLen(len(src)); n < 0 {
panic(ErrTooLarge)
} else if len(dst) < n {
} else if cap(dst) < n {
dst = make([]byte, n)
} else {
dst = dst[:n]
}
// The block starts with the varint-encoded length of the decompressed bytes.
+14
View File
@@ -1,3 +1,17 @@
# MinLZ
I have taken the experiences from this library and created a backwards compatible compression package called MinLZ.
That package will seamlessly decode S2 content, making the transition from this package fairly trivial.
There are many improvements to pretty much all aspects of S2 since we have "broken free" of the Snappy format specification.
You can read a writeup on [Design and Improvements over S2](https://gist.github.com/klauspost/a25b66198cdbdf7b5b224f670c894ed5).
The only aspect not covered is custom dictionary encoding. While I do intend to fix errors in this package,
I do not expect to make significant improvements, since I consider MinLZ a better basis for going forward.
See https://github.com/minio/minlz for all details.
# S2 Compression
S2 is an extension of [Snappy](https://github.com/google/snappy).
+6 -2
View File
@@ -117,8 +117,10 @@ func EstimateBlockSize(src []byte) (d int) {
func EncodeBetter(dst, src []byte) []byte {
if n := MaxEncodedLen(len(src)); n < 0 {
panic(ErrTooLarge)
} else if len(dst) < n {
} else if cap(dst) < n {
dst = make([]byte, n)
} else {
dst = dst[:n]
}
// The block starts with the varint-encoded length of the decompressed bytes.
@@ -159,8 +161,10 @@ func EncodeBetter(dst, src []byte) []byte {
func EncodeBest(dst, src []byte) []byte {
if n := MaxEncodedLen(len(src)); n < 0 {
panic(ErrTooLarge)
} else if len(dst) < n {
} else if cap(dst) < n {
dst = make([]byte, n)
} else {
dst = dst[:n]
}
// The block starts with the varint-encoded length of the decompressed bytes.
+1 -4
View File
@@ -903,10 +903,7 @@ func encodeBlockDictGo(dst, src []byte, dict *Dict) (d int) {
// sLimit is when to stop looking for offset/length copies. The inputMargin
// lets us use a fast path for emitLiteral in the main loop, while we are
// looking for copies.
sLimit := len(src) - inputMargin
if sLimit > MaxDictSrcOffset-maxAhead {
sLimit = MaxDictSrcOffset - maxAhead
}
sLimit := min(len(src)-inputMargin, MaxDictSrcOffset-maxAhead)
// Bail if we can't compress to at least this.
dstLimit := len(src) - len(src)>>5 - 5
+1 -4
View File
@@ -42,10 +42,7 @@ func encodeBlockBest(dst, src []byte, dict *Dict) (d int) {
if len(src) < minNonLiteralBlockSize {
return 0
}
sLimitDict := len(src) - inputMargin
if sLimitDict > MaxDictSrcOffset-inputMargin {
sLimitDict = MaxDictSrcOffset - inputMargin
}
sLimitDict := min(len(src)-inputMargin, MaxDictSrcOffset-inputMargin)
var lTable [maxLTableSize]uint64
var sTable [maxSTableSize]uint64
+1 -4
View File
@@ -914,10 +914,7 @@ func encodeBlockBetterDict(dst, src []byte, dict *Dict) (d int) {
debug = false
)
sLimit := len(src) - inputMargin
if sLimit > MaxDictSrcOffset-maxAhead {
sLimit = MaxDictSrcOffset - maxAhead
}
sLimit := min(len(src)-inputMargin, MaxDictSrcOffset-maxAhead)
if len(src) < minNonLiteralBlockSize {
return 0
}
+1 -1
View File
@@ -72,7 +72,7 @@ func (i *Index) add(compressedOffset, uncompressedOffset int64) error {
return fmt.Errorf("internal error: Earlier uncompressed received (%d > %d)", latest.uncompressedOffset, uncompressedOffset)
}
if latest.compressedOffset > compressedOffset {
return fmt.Errorf("internal error: Earlier compressed received (%d > %d)", latest.uncompressedOffset, uncompressedOffset)
return fmt.Errorf("internal error: Earlier compressed received (%d > %d)", latest.compressedOffset, compressedOffset)
}
if latest.uncompressedOffset+minIndexDist > uncompressedOffset {
// Only add entry if distance is large enough.
+1 -1
View File
@@ -1046,7 +1046,7 @@ func (r *Reader) ReadByte() (byte, error) {
return c, nil
}
var tmp [1]byte
for i := 0; i < 10; i++ {
for range 10 {
n, err := r.Read(tmp[:])
if err != nil {
return 0, err
+1 -1
View File
@@ -47,7 +47,7 @@ func NewWriter(w io.Writer, opts ...WriterOption) *Writer {
w2.obufLen = obufHeaderLen + MaxEncodedLen(w2.blockSize)
w2.paramsOK = true
w2.ibuf = make([]byte, 0, w2.blockSize)
w2.buffers.New = func() interface{} {
w2.buffers.New = func() any {
return make([]byte, w2.obufLen)
}
w2.Reset(w)
+1 -1
View File
@@ -88,7 +88,7 @@ func (b *bitWriter) flush32() {
// flushAlign will flush remaining full bytes and align to next byte boundary.
func (b *bitWriter) flushAlign() {
nbBytes := (b.nBits + 7) >> 3
for i := uint8(0); i < nbBytes; i++ {
for i := range nbBytes {
b.out = append(b.out, byte(b.bitContainer>>(i*8)))
}
b.nBits = 0
+3 -3
View File
@@ -54,11 +54,11 @@ const (
)
var (
huffDecoderPool = sync.Pool{New: func() interface{} {
huffDecoderPool = sync.Pool{New: func() any {
return &huff0.Scratch{}
}}
fseDecoderPool = sync.Pool{New: func() interface{} {
fseDecoderPool = sync.Pool{New: func() any {
return &fseDecoder{}
}}
)
@@ -553,7 +553,7 @@ func (b *blockDec) prepareSequences(in []byte, hist *history) (err error) {
if compMode&3 != 0 {
return errors.New("corrupt block: reserved bits not zero")
}
for i := uint(0); i < 3; i++ {
for i := range uint(3) {
mode := seqCompMode((compMode >> (6 - i*2)) & 3)
if debugDecoder {
println("Table", tableIndex(i), "is", mode)
+3 -5
View File
@@ -373,11 +373,9 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) {
if cap(dst) == 0 && !d.o.limitToCap {
// Allocate len(input) * 2 by default if nothing is provided
// and we didn't get frame content size.
size := len(input) * 2
// Cap to 1 MB.
if size > 1<<20 {
size = 1 << 20
}
size := min(
// Cap to 1 MB.
len(input)*2, 1<<20)
if uint64(size) > d.o.maxDecodedSize {
size = int(d.o.maxDecodedSize)
}
+7 -13
View File
@@ -194,17 +194,17 @@ func BuildDict(o BuildDictOptions) ([]byte, error) {
hist := o.History
contents := o.Contents
debug := o.DebugOut != nil
println := func(args ...interface{}) {
println := func(args ...any) {
if o.DebugOut != nil {
fmt.Fprintln(o.DebugOut, args...)
}
}
printf := func(s string, args ...interface{}) {
printf := func(s string, args ...any) {
if o.DebugOut != nil {
fmt.Fprintf(o.DebugOut, s, args...)
}
}
print := func(args ...interface{}) {
print := func(args ...any) {
if o.DebugOut != nil {
fmt.Fprint(o.DebugOut, args...)
}
@@ -424,16 +424,10 @@ func BuildDict(o BuildDictOptions) ([]byte, error) {
}
// Literal table
avgSize := litTotal
if avgSize > huff0.BlockSizeMax/2 {
avgSize = huff0.BlockSizeMax / 2
}
avgSize := min(litTotal, huff0.BlockSizeMax/2)
huffBuff := make([]byte, 0, avgSize)
// Target size
div := litTotal / avgSize
if div < 1 {
div = 1
}
div := max(litTotal/avgSize, 1)
if debug {
println("Huffman weights:")
}
@@ -454,7 +448,7 @@ func BuildDict(o BuildDictOptions) ([]byte, error) {
huffBuff = append(huffBuff, 255)
}
scratch := &huff0.Scratch{TableLog: 11}
for tries := 0; tries < 255; tries++ {
for tries := range 255 {
scratch = &huff0.Scratch{TableLog: 11}
_, _, err = huff0.Compress1X(huffBuff, scratch)
if err == nil {
@@ -471,7 +465,7 @@ func BuildDict(o BuildDictOptions) ([]byte, error) {
// Bail out.... Just generate something
huffBuff = append(huffBuff, bytes.Repeat([]byte{255}, 10000)...)
for i := 0; i < 128; i++ {
for i := range 128 {
huffBuff = append(huffBuff, byte(i))
}
continue
+4 -6
View File
@@ -8,7 +8,7 @@ import (
)
const (
dictShardBits = 6
dictShardBits = 7
)
type fastBase struct {
@@ -41,11 +41,9 @@ func (e *fastBase) AppendCRC(dst []byte) []byte {
// or a window size small enough to contain the input size, if > 0.
func (e *fastBase) WindowSize(size int64) int32 {
if size > 0 && size < int64(e.maxMatchOff) {
b := int32(1) << uint(bits.Len(uint(size)))
// Keep minimum window.
if b < 1024 {
b = 1024
}
b := max(
// Keep minimum window.
int32(1)<<uint(bits.Len(uint(size))), 1024)
return b
}
return e.maxMatchOff
+6 -17
View File
@@ -158,11 +158,9 @@ func (e *bestFastEncoder) Encode(blk *blockEnc, src []byte) {
// Use this to estimate literal cost.
// Scaled by 10 bits.
bitsPerByte := int32((compress.ShannonEntropyBits(src) * 1024) / len(src))
// Huffman can never go < 1 bit/byte
if bitsPerByte < 1024 {
bitsPerByte = 1024
}
bitsPerByte := max(
// Huffman can never go < 1 bit/byte
int32((compress.ShannonEntropyBits(src)*1024)/len(src)), 1024)
// Override src
src = e.hist
@@ -235,10 +233,7 @@ encodeLoop:
// Extend candidate match backwards as far as possible.
// Do not extend repeats as we can assume they are optimal
// and offsets change if s == nextEmit.
tMin := s - e.maxMatchOff
if tMin < 0 {
tMin = 0
}
tMin := max(s-e.maxMatchOff, 0)
for offset > tMin && s > nextEmit && src[offset-1] == src[s-1] && l < maxMatchLength {
s--
offset--
@@ -382,10 +377,7 @@ encodeLoop:
nextEmit = s
// Index skipped...
end := s
if s > sLimit+4 {
end = sLimit + 4
}
end := min(s, sLimit+4)
off := index0 + e.cur
for index0 < end {
cv0 := load6432(src, index0)
@@ -444,10 +436,7 @@ encodeLoop:
nextEmit = s
// Index old s + 1 -> s - 1 or sLimit
end := s
if s > sLimit-4 {
end = sLimit - 4
}
end := min(s, sLimit-4)
off := index0 + e.cur
for index0 < end {
+6 -24
View File
@@ -190,10 +190,7 @@ encodeLoop:
// and have to do special offset treatment.
startLimit := nextEmit + 1
tMin := s - e.maxMatchOff
if tMin < 0 {
tMin = 0
}
tMin := max(s-e.maxMatchOff, 0)
for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
repIndex--
start--
@@ -252,10 +249,7 @@ encodeLoop:
// and have to do special offset treatment.
startLimit := nextEmit + 1
tMin := s - e.maxMatchOff
if tMin < 0 {
tMin = 0
}
tMin := max(s-e.maxMatchOff, 0)
for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
repIndex--
start--
@@ -480,10 +474,7 @@ encodeLoop:
l := matched
// Extend backwards
tMin := s - e.maxMatchOff
if tMin < 0 {
tMin = 0
}
tMin := max(s-e.maxMatchOff, 0)
for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
s--
t--
@@ -719,10 +710,7 @@ encodeLoop:
// and have to do special offset treatment.
startLimit := nextEmit + 1
tMin := s - e.maxMatchOff
if tMin < 0 {
tMin = 0
}
tMin := max(s-e.maxMatchOff, 0)
for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
repIndex--
start--
@@ -783,10 +771,7 @@ encodeLoop:
// and have to do special offset treatment.
startLimit := nextEmit + 1
tMin := s - e.maxMatchOff
if tMin < 0 {
tMin = 0
}
tMin := max(s-e.maxMatchOff, 0)
for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
repIndex--
start--
@@ -1005,10 +990,7 @@ encodeLoop:
l := matched
// Extend backwards
tMin := s - e.maxMatchOff
if tMin < 0 {
tMin = 0
}
tMin := max(s-e.maxMatchOff, 0)
for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
s--
t--
+7 -25
View File
@@ -13,7 +13,7 @@ const (
dFastLongLen = 8 // Bytes used for table hash
dLongTableShardCnt = 1 << (dFastLongTableBits - dictShardBits) // Number of shards in the table
dLongTableShardSize = dFastLongTableSize / tableShardCnt // Size of an individual shard
dLongTableShardSize = dFastLongTableSize / dLongTableShardCnt // Size of an individual shard
dFastShortTableBits = tableBits // Bits used in the short match table
dFastShortTableSize = 1 << dFastShortTableBits // Size of the table
@@ -149,10 +149,7 @@ encodeLoop:
// and have to do special offset treatment.
startLimit := nextEmit + 1
tMin := s - e.maxMatchOff
if tMin < 0 {
tMin = 0
}
tMin := max(s-e.maxMatchOff, 0)
for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
repIndex--
start--
@@ -266,10 +263,7 @@ encodeLoop:
l := e.matchlen(s+4, t+4, src) + 4
// Extend backwards
tMin := s - e.maxMatchOff
if tMin < 0 {
tMin = 0
}
tMin := max(s-e.maxMatchOff, 0)
for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
s--
t--
@@ -462,10 +456,7 @@ encodeLoop:
// and have to do special offset treatment.
startLimit := nextEmit + 1
tMin := s - e.maxMatchOff
if tMin < 0 {
tMin = 0
}
tMin := max(s-e.maxMatchOff, 0)
for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] {
repIndex--
start--
@@ -576,10 +567,7 @@ encodeLoop:
l := int32(matchLen(src[s+4:], src[t+4:])) + 4
// Extend backwards
tMin := s - e.maxMatchOff
if tMin < 0 {
tMin = 0
}
tMin := max(s-e.maxMatchOff, 0)
for t > tMin && s > nextEmit && src[t-1] == src[s-1] {
s--
t--
@@ -809,10 +797,7 @@ encodeLoop:
// and have to do special offset treatment.
startLimit := nextEmit + 1
tMin := s - e.maxMatchOff
if tMin < 0 {
tMin = 0
}
tMin := max(s-e.maxMatchOff, 0)
for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
repIndex--
start--
@@ -927,10 +912,7 @@ encodeLoop:
l := e.matchlen(s+4, t+4, src) + 4
// Extend backwards
tMin := s - e.maxMatchOff
if tMin < 0 {
tMin = 0
}
tMin := max(s-e.maxMatchOff, 0)
for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
s--
t--
+6 -24
View File
@@ -143,10 +143,7 @@ encodeLoop:
// and have to do special offset treatment.
startLimit := nextEmit + 1
sMin := s - e.maxMatchOff
if sMin < 0 {
sMin = 0
}
sMin := max(s-e.maxMatchOff, 0)
for repIndex > sMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch {
repIndex--
start--
@@ -223,10 +220,7 @@ encodeLoop:
l := e.matchlen(s+4, t+4, src) + 4
// Extend backwards
tMin := s - e.maxMatchOff
if tMin < 0 {
tMin = 0
}
tMin := max(s-e.maxMatchOff, 0)
for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
s--
t--
@@ -387,10 +381,7 @@ encodeLoop:
// and have to do special offset treatment.
startLimit := nextEmit + 1
sMin := s - e.maxMatchOff
if sMin < 0 {
sMin = 0
}
sMin := max(s-e.maxMatchOff, 0)
for repIndex > sMin && start > startLimit && src[repIndex-1] == src[start-1] {
repIndex--
start--
@@ -469,10 +460,7 @@ encodeLoop:
l := e.matchlen(s+4, t+4, src) + 4
// Extend backwards
tMin := s - e.maxMatchOff
if tMin < 0 {
tMin = 0
}
tMin := max(s-e.maxMatchOff, 0)
for t > tMin && s > nextEmit && src[t-1] == src[s-1] {
s--
t--
@@ -655,10 +643,7 @@ encodeLoop:
// and have to do special offset treatment.
startLimit := nextEmit + 1
sMin := s - e.maxMatchOff
if sMin < 0 {
sMin = 0
}
sMin := max(s-e.maxMatchOff, 0)
for repIndex > sMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch {
repIndex--
start--
@@ -735,10 +720,7 @@ encodeLoop:
l := e.matchlen(s+4, t+4, src) + 4
// Extend backwards
tMin := s - e.maxMatchOff
if tMin < 0 {
tMin = 0
}
tMin := max(s-e.maxMatchOff, 0)
for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
s--
t--
+1 -4
View File
@@ -238,10 +238,7 @@ func (d *frameDec) reset(br byteBuffer) error {
if d.WindowSize == 0 && d.SingleSegment {
// We may not need window in this case.
d.WindowSize = d.FrameContentSize
if d.WindowSize < MinWindowSize {
d.WindowSize = MinWindowSize
}
d.WindowSize = max(d.FrameContentSize, MinWindowSize)
if d.WindowSize > d.o.maxDecodedSize {
if debugDecoder {
printf("window size %d > max %d\n", d.WindowSize, d.o.maxWindowSize)
+1 -1
View File
@@ -149,7 +149,7 @@ func (s *fseEncoder) buildCTable() error {
if v > largeLimit {
s.zeroBits = true
}
for nbOccurrences := int16(0); nbOccurrences < v; nbOccurrences++ {
for range v {
tableSymbol[position] = symbol
position = (position + step) & tableMask
for position > highThreshold {
+1 -4
View File
@@ -231,10 +231,7 @@ func (s *sequenceDecs) decodeSync(hist []byte) error {
llTable, mlTable, ofTable := s.litLengths.fse.dt[:maxTablesize], s.matchLengths.fse.dt[:maxTablesize], s.offsets.fse.dt[:maxTablesize]
llState, mlState, ofState := s.litLengths.state.state, s.matchLengths.state.state, s.offsets.state.state
out := s.out
maxBlockSize := maxCompressedBlockSize
if s.windowSize < maxBlockSize {
maxBlockSize = s.windowSize
}
maxBlockSize := min(s.windowSize, maxCompressedBlockSize)
if debugDecoder {
println("decodeSync: decoding", seqs, "sequences", br.remain(), "bits remain on stream")
+2 -8
View File
@@ -79,10 +79,7 @@ func (s *sequenceDecs) decodeSyncSimple(hist []byte) (bool, error) {
br := s.br
maxBlockSize := maxCompressedBlockSize
if s.windowSize < maxBlockSize {
maxBlockSize = s.windowSize
}
maxBlockSize := min(s.windowSize, maxCompressedBlockSize)
ctx := decodeSyncAsmContext{
llTable: s.litLengths.fse.dt[:maxTablesize],
@@ -237,10 +234,7 @@ func sequenceDecs_decode_56_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmC
func (s *sequenceDecs) decode(seqs []seqVals) error {
br := s.br
maxBlockSize := maxCompressedBlockSize
if s.windowSize < maxBlockSize {
maxBlockSize = s.windowSize
}
maxBlockSize := min(s.windowSize, maxCompressedBlockSize)
ctx := decodeAsmContext{
llTable: s.litLengths.fse.dt[:maxTablesize],
+56
View File
@@ -0,0 +1,56 @@
// Copyright 2025+ Klaus Post. All rights reserved.
// License information can be found in the LICENSE file.
//go:build go1.24
package zstd
import (
"errors"
"runtime"
"sync"
"weak"
)
var weakMu sync.Mutex
var simpleEnc weak.Pointer[Encoder]
var simpleDec weak.Pointer[Decoder]
// EncodeTo appends the encoded data from src to dst.
func EncodeTo(dst []byte, src []byte) []byte {
weakMu.Lock()
enc := simpleEnc.Value()
if enc == nil {
var err error
enc, err = NewWriter(nil, WithEncoderConcurrency(runtime.NumCPU()), WithWindowSize(1<<20), WithLowerEncoderMem(true), WithZeroFrames(true))
if err != nil {
panic("failed to create simple encoder: " + err.Error())
}
simpleEnc = weak.Make(enc)
}
weakMu.Unlock()
return enc.EncodeAll(src, dst)
}
// DecodeTo appends the decoded data from src to dst.
// The maximum decoded size is 1GiB,
// not including what may already be in dst.
func DecodeTo(dst []byte, src []byte) ([]byte, error) {
weakMu.Lock()
dec := simpleDec.Value()
if dec == nil {
var err error
dec, err = NewReader(nil, WithDecoderConcurrency(runtime.NumCPU()), WithDecoderLowmem(true), WithDecoderMaxMemory(1<<30))
if err != nil {
weakMu.Unlock()
return nil, errors.New("failed to create simple decoder: " + err.Error())
}
runtime.SetFinalizer(dec, func(d *Decoder) {
d.Close()
})
simpleDec = weak.Make(dec)
}
weakMu.Unlock()
return dec.DecodeAll(src, dst)
}
+1 -1
View File
@@ -257,7 +257,7 @@ func (r *SnappyConverter) Convert(in io.Reader, w io.Writer) (int64, error) {
if !r.readFull(r.buf[:len(snappyMagicBody)], false) {
return written, r.err
}
for i := 0; i < len(snappyMagicBody); i++ {
for i := range len(snappyMagicBody) {
if r.buf[i] != snappyMagicBody[i] {
println("r.buf[i] != snappyMagicBody[i]", r.buf[i], snappyMagicBody[i], i)
r.err = ErrSnappyCorrupt
+1 -1
View File
@@ -19,7 +19,7 @@ const ZipMethodWinZip = 93
const ZipMethodPKWare = 20
// zipReaderPool is the default reader pool.
var zipReaderPool = sync.Pool{New: func() interface{} {
var zipReaderPool = sync.Pool{New: func() any {
z, err := NewReader(nil, WithDecoderLowmem(true), WithDecoderMaxWindow(128<<20), WithDecoderConcurrency(1))
if err != nil {
panic(err)
+2 -2
View File
@@ -98,13 +98,13 @@ var (
ErrDecoderNilInput = errors.New("nil input provided as reader")
)
func println(a ...interface{}) {
func println(a ...any) {
if debug || debugDecoder || debugEncoder {
log.Println(a...)
}
}
func printf(format string, a ...interface{}) {
func printf(format string, a ...any) {
if debug || debugDecoder || debugEncoder {
log.Printf(format, a...)
}
+44 -19
View File
@@ -25,39 +25,64 @@ const (
Size64 = 8
)
// These will error at compile time if the interface is not conformant.
var _ hash.Hash = &Digest{}
var _ hash.Hash = &Digest64{}
var errKeySize = errors.New("highwayhash: invalid key size")
// New returns a hash.Hash computing the HighwayHash-256 checksum.
// It returns a non-nil error if the key is not 32 bytes long.
func New(key []byte) (hash.Hash, error) {
if len(key) != Size {
return nil, errKeySize
}
h := &digest{size: Size}
copy(h.key[:], key)
h.Reset()
return h, nil
return NewDigest(key)
}
// New128 returns a hash.Hash computing the HighwayHash-128 checksum.
// It returns a non-nil error if the key is not 32 bytes long.
func New128(key []byte) (hash.Hash, error) {
return NewDigest128(key)
}
// New64 returns a hash.Hash64 computing the HighwayHash-64 checksum.
// It returns a non-nil error if the key is not 32 bytes long.
func New64(key []byte) (hash.Hash64, error) {
return NewDigest64(key)
}
// NewDigest returns a *Digest that conforms to hash.Hash computing
// the HighwayHash-256 checksum.
// It returns a non-nil error if the key is not 32 bytes long.
func NewDigest(key []byte) (*Digest, error) {
if len(key) != Size {
return nil, errKeySize
}
h := &digest{size: Size128}
h := &Digest{size: Size}
copy(h.key[:], key)
h.Reset()
return h, nil
}
// New64 returns a hash.Hash computing the HighwayHash-64 checksum.
// NewDigest128 returns a *Digest that conforms to hash.Hash computing
// the HighwayHash-128 checksum.
// It returns a non-nil error if the key is not 32 bytes long.
func New64(key []byte) (hash.Hash64, error) {
func NewDigest128(key []byte) (*Digest, error) {
if len(key) != Size {
return nil, errKeySize
}
h := new(digest64)
h := &Digest{size: Size128}
copy(h.key[:], key)
h.Reset()
return h, nil
}
// NewDigest64 returns a *Digest that conforms to hash.Hash computing
// the HighwayHash-64 checksum.
// It returns a non-nil error if the key is not 32 bytes long.
func NewDigest64(key []byte) (*Digest64, error) {
if len(key) != Size {
return nil, errKeySize
}
h := new(Digest64)
h.size = Size64
copy(h.key[:], key)
h.Reset()
@@ -130,9 +155,9 @@ func Sum64(data, key []byte) uint64 {
return binary.LittleEndian.Uint64(hash[:])
}
type digest64 struct{ digest }
type Digest64 struct{ Digest }
func (d *digest64) Sum64() uint64 {
func (d *Digest64) Sum64() uint64 {
state := d.state
if d.offset > 0 {
hashBuffer(&state, &d.buffer, d.offset)
@@ -142,7 +167,7 @@ func (d *digest64) Sum64() uint64 {
return binary.LittleEndian.Uint64(hash[:])
}
type digest struct {
type Digest struct {
state [16]uint64 // v0 | v1 | mul0 | mul1
key, buffer [Size]byte
@@ -151,16 +176,16 @@ type digest struct {
size int
}
func (d *digest) Size() int { return d.size }
func (d *Digest) Size() int { return d.size }
func (d *digest) BlockSize() int { return Size }
func (d *Digest) BlockSize() int { return Size }
func (d *digest) Reset() {
func (d *Digest) Reset() {
initialize(&d.state, d.key[:])
d.offset = 0
}
func (d *digest) Write(p []byte) (n int, err error) {
func (d *Digest) Write(p []byte) (n int, err error) {
n = len(p)
if d.offset > 0 {
remaining := Size - d.offset
@@ -183,7 +208,7 @@ func (d *digest) Write(p []byte) (n int, err error) {
return
}
func (d *digest) Sum(b []byte) []byte {
func (d *Digest) Sum(b []byte) []byte {
state := d.state
if d.offset > 0 {
hashBuffer(&state, &d.buffer, d.offset)
+1 -1
View File
@@ -24,7 +24,7 @@ func init() {
if useSVE {
if vl, _ := getVectorLength(); vl != 256 {
//
// Since HighwahHash is designed for AVX2,
// Since HighwayHash is designed for AVX2,
// SVE/SVE2 instructions only run correctly
// for vector length of 256
//
+2 -2
View File
@@ -3794,7 +3794,7 @@ func (s *Server) updateAccountClaimsWithRefresh(a *Account, ac *jwt.AccountClaim
// If JetStream is enabled for this server we will call into configJetStream for the account
// regardless of enabled or disabled. It handles both cases.
if jsEnabled {
if err := s.configJetStream(a); err != nil {
if err := s.configJetStream(a, nil); err != nil {
s.Errorf("Error configuring jetstream for account [%s]: %v", tl, err.Error())
a.mu.Lock()
// Absent reload of js server cfg, this is going to be broken until js is disabled
@@ -4371,7 +4371,7 @@ func (dr *DirAccResolver) Start(s *Server) error {
s.Warnf("DirResolver - Error checking for JetStream support for account %q: %v", pubKey, err)
}
} else if jsa == nil {
if err = s.configJetStream(acc); err != nil {
if err = s.configJetStream(acc, nil); err != nil {
s.Errorf("DirResolver - Error configuring JetStream for account %q: %v", pubKey, err)
}
}
+4 -1
View File
@@ -77,7 +77,10 @@ func AccessTime() int64 {
// Return last updated time.
v := utime.Load()
if v == 0 {
panic("access time service not running")
// Always register a time, the worst case is a stale time.
// On startup, we can register in parallel and could previously panic.
v = time.Now().UnixNano()
utime.Store(v)
}
return v
}
+2 -1
View File
@@ -1123,7 +1123,8 @@ func (s *Server) processClientOrLeafAuthentication(c *client, opts *Options) (au
return ok
}
if c.kind == CLIENT {
// Check for the use of simple auth.
if c.kind == CLIENT || c.kind == LEAF {
if proxyRequired = opts.ProxyRequired; proxyRequired && !trustedProxy {
return setProxyAuthError(ErrAuthProxyRequired)
}
+102 -31
View File
@@ -237,6 +237,26 @@ const (
pmrMsgImportedFromService
)
type WriteTimeoutPolicy uint8
const (
WriteTimeoutPolicyDefault = iota
WriteTimeoutPolicyClose
WriteTimeoutPolicyRetry
)
// String returns a human-friendly value. Only used in varz.
func (p WriteTimeoutPolicy) String() string {
switch p {
case WriteTimeoutPolicyClose:
return "close"
case WriteTimeoutPolicyRetry:
return "retry"
default:
return _EMPTY_
}
}
type client struct {
// Here first because of use of atomics, and memory alignment.
stats
@@ -328,15 +348,16 @@ type pinfo struct {
// outbound holds pending data for a socket.
type outbound struct {
nb net.Buffers // Pending buffers for send, each has fixed capacity as per nbPool below.
wnb net.Buffers // Working copy of "nb", reused on each flushOutbound call, partial writes may leave entries here for next iteration.
pb int64 // Total pending/queued bytes.
fsp int32 // Flush signals that are pending per producer from readLoop's pcd.
sg *sync.Cond // To signal writeLoop that there is data to flush.
wdl time.Duration // Snapshot of write deadline.
mp int64 // Snapshot of max pending for client.
lft time.Duration // Last flush time for Write.
stc chan struct{} // Stall chan we create to slow down producers on overrun, e.g. fan-in.
nb net.Buffers // Pending buffers for send, each has fixed capacity as per nbPool below.
wnb net.Buffers // Working copy of "nb", reused on each flushOutbound call, partial writes may leave entries here for next iteration.
pb int64 // Total pending/queued bytes.
fsp int32 // Flush signals that are pending per producer from readLoop's pcd.
wtp WriteTimeoutPolicy // What do we do on a write timeout?
sg *sync.Cond // To signal writeLoop that there is data to flush.
wdl time.Duration // Snapshot of write deadline.
mp int64 // Snapshot of max pending for client.
lft time.Duration // Last flush time for Write.
stc chan struct{} // Stall chan we create to slow down producers on overrun, e.g. fan-in.
cw *s2.Writer
}
@@ -698,6 +719,24 @@ func (c *client) initClient() {
case c.kind == LEAF && opts.LeafNode.WriteDeadline > 0:
c.out.wdl = opts.LeafNode.WriteDeadline
}
switch c.kind {
case ROUTER:
if c.out.wtp = opts.Cluster.WriteTimeout; c.out.wtp == WriteTimeoutPolicyDefault {
c.out.wtp = WriteTimeoutPolicyRetry
}
case LEAF:
if c.out.wtp = opts.LeafNode.WriteTimeout; c.out.wtp == WriteTimeoutPolicyDefault {
c.out.wtp = WriteTimeoutPolicyRetry
}
case GATEWAY:
if c.out.wtp = opts.Gateway.WriteTimeout; c.out.wtp == WriteTimeoutPolicyDefault {
c.out.wtp = WriteTimeoutPolicyRetry
}
default:
if c.out.wtp = opts.WriteTimeout; c.out.wtp == WriteTimeoutPolicyDefault {
c.out.wtp = WriteTimeoutPolicyClose
}
}
c.out.mp = opts.MaxPending
// Snapshot max control line since currently can not be changed on reload and we
// were checking it on each call to parse. If this changes and we allow MaxControlLine
@@ -1849,7 +1888,7 @@ func (c *client) handleWriteTimeout(written, attempted int64, numChunks int) boo
scState, c.out.wdl, numChunks, attempted)
// We always close CLIENT connections, or when nothing was written at all...
if c.kind == CLIENT || written == 0 {
if c.out.wtp == WriteTimeoutPolicyClose || written == 0 {
c.markConnAsClosed(SlowConsumerWriteDeadline)
return true
} else {
@@ -2548,9 +2587,11 @@ func (c *client) sendPing() {
// Generates the INFO to be sent to the client with the client ID included.
// info arg will be copied since passed by value.
// Assume lock is held.
func (c *client) generateClientInfoJSON(info Info) []byte {
func (c *client) generateClientInfoJSON(info Info, includeClientIP bool) []byte {
info.CID = c.cid
info.ClientIP = c.host
if includeClientIP {
info.ClientIP = c.host
}
info.MaxPayload = c.mpay
if c.isWebsocket() {
info.ClientConnectURLs = info.WSConnectURLs
@@ -2631,7 +2672,7 @@ func (c *client) processPing() {
info.RemoteAccount = c.acc.Name
info.IsSystemAccount = c.acc == srv.SystemAccount()
info.ConnectInfo = true
c.enqueueProto(c.generateClientInfoJSON(info))
c.enqueueProto(c.generateClientInfoJSON(info, true))
c.mu.Unlock()
srv.mu.Unlock()
}
@@ -4345,7 +4386,7 @@ func (c *client) setupResponseServiceImport(acc *Account, si *serviceImport, tra
// Will remove a header if present.
func removeHeaderIfPresent(hdr []byte, key string) []byte {
start := bytes.Index(hdr, []byte(key+":"))
start := getHeaderKeyIndex(key, hdr)
// key can't be first and we want to check that it is preceded by a '\n'
if start < 1 || hdr[start-1] != '\n' {
return hdr
@@ -4463,22 +4504,13 @@ func sliceHeader(key string, hdr []byte) []byte {
if len(hdr) == 0 {
return nil
}
index := bytes.Index(hdr, stringToBytes(key+":"))
hdrLen := len(hdr)
// Check that we have enough characters, this will handle the -1 case of the key not
// being found and will also handle not having enough characters for trailing CRLF.
if index < 2 {
index := getHeaderKeyIndex(key, hdr)
if index == -1 {
return nil
}
// There should be a terminating CRLF.
if index >= hdrLen-1 || hdr[index-1] != '\n' || hdr[index-2] != '\r' {
return nil
}
// The key should be immediately followed by a : separator.
// Skip over the key and the : separator.
index += len(key) + 1
if index >= hdrLen || hdr[index-1] != ':' {
return nil
}
hdrLen := len(hdr)
// Skip over whitespace before the value.
for index < hdrLen && hdr[index] == ' ' {
index++
@@ -4494,11 +4526,49 @@ func sliceHeader(key string, hdr []byte) []byte {
return hdr[start:index:index]
}
// getHeaderKeyIndex returns an index into the header slice for the given key.
// Returns -1 if not found.
func getHeaderKeyIndex(key string, hdr []byte) int {
if len(hdr) == 0 {
return -1
}
bkey := stringToBytes(key)
keyLen, hdrLen := len(key), len(hdr)
var offset int
for {
index := bytes.Index(hdr[offset:], bkey)
// Check that we have enough characters, this will handle the -1 case of the key not
// being found and will also handle not having enough characters for trailing CRLF.
if index < 2 {
return -1
}
index += offset
// There should be a terminating CRLF.
if index >= hdrLen-1 || hdr[index-1] != '\n' || hdr[index-2] != '\r' {
offset = index + keyLen
continue
}
// The key should be immediately followed by a : separator.
if index+keyLen >= hdrLen {
return -1
}
if hdr[index+keyLen] != ':' {
offset = index + keyLen
continue
}
return index
}
}
func setHeader(key, val string, hdr []byte) []byte {
prefix := []byte(key + ": ")
start := bytes.Index(hdr, prefix)
start := getHeaderKeyIndex(key, hdr)
if start >= 0 {
valStart := start + len(prefix)
valStart := start + len(key) + 1
// Preserve single whitespace if used.
hdrLen := len(hdr)
if valStart < hdrLen && hdr[valStart] == ' ' {
valStart++
}
valEnd := bytes.Index(hdr[valStart:], []byte("\r"))
if valEnd < 0 {
return hdr // malformed headers
@@ -5766,7 +5836,8 @@ func (c *client) closeConnection(reason ClosedState) {
}
// If we are shutting down, no need to do all the accounting on subs, etc.
if reason == ServerShutdown {
// During LDM we'll still do the accounting, otherwise account limits could close others after this reconnects.
if reason == ServerShutdown && c.srv.isShuttingDown() {
s := c.srv
c.mu.Unlock()
if s != nil {
+398
View File
@@ -0,0 +1,398 @@
// Copyright 2025 The NATS Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package server
import (
"encoding/binary"
"errors"
"fmt"
"io"
"net"
"strconv"
"strings"
"time"
)
// PROXY protocol v2 constants
const (
// Protocol signature (12 bytes)
proxyProtoV2Sig = "\x0D\x0A\x0D\x0A\x00\x0D\x0A\x51\x55\x49\x54\x0A"
// Version and command byte format: version(4 bits) | command(4 bits)
proxyProtoV2VerMask = 0xF0
proxyProtoV2Ver = 0x20 // Version 2
// Commands
proxyProtoCmdMask = 0x0F
proxyProtoCmdLocal = 0x00 // LOCAL command (health check, use original connection)
proxyProtoCmdProxy = 0x01 // PROXY command (proxied connection)
// Address family and protocol byte format: family(4 bits) | protocol(4 bits)
proxyProtoFamilyMask = 0xF0
proxyProtoFamilyUnspec = 0x00 // Unspecified
proxyProtoFamilyInet = 0x10 // IPv4
proxyProtoFamilyInet6 = 0x20 // IPv6
proxyProtoFamilyUnix = 0x30 // Unix socket
proxyProtoProtoMask = 0x0F
proxyProtoProtoUnspec = 0x00 // Unspecified
proxyProtoProtoStream = 0x01 // TCP/STREAM
proxyProtoProtoDatagram = 0x02 // UDP/DGRAM
// Address sizes
proxyProtoAddrSizeIPv4 = 12 // 4 (src IP) + 4 (dst IP) + 2 (src port) + 2 (dst port)
proxyProtoAddrSizeIPv6 = 36 // 16 (src IP) + 16 (dst IP) + 2 (src port) + 2 (dst port)
// Header sizes
proxyProtoV2HeaderSize = 16 // Fixed header: 12 (sig) + 1 (ver/cmd) + 1 (fam/proto) + 2 (addr len)
// Timeout for reading PROXY protocol header
proxyProtoReadTimeout = 5 * time.Second
)
// PROXY protocol v1 constants
const (
proxyProtoV1Prefix = "PROXY "
proxyProtoV1MaxLineLen = 107 // Maximum line length including CRLF
proxyProtoV1TCP4 = "TCP4"
proxyProtoV1TCP6 = "TCP6"
proxyProtoV1Unknown = "UNKNOWN"
)
var (
// Errors
errProxyProtoInvalid = errors.New("invalid PROXY protocol header")
errProxyProtoUnsupported = errors.New("unsupported PROXY protocol feature")
errProxyProtoTimeout = errors.New("timeout reading PROXY protocol header")
errProxyProtoUnrecognized = errors.New("unrecognized PROXY protocol format")
)
// proxyProtoAddr contains the address information extracted from PROXY protocol header
type proxyProtoAddr struct {
srcIP net.IP
srcPort uint16
dstIP net.IP
dstPort uint16
}
// String implements net.Addr interface
func (p *proxyProtoAddr) String() string {
return net.JoinHostPort(p.srcIP.String(), fmt.Sprintf("%d", p.srcPort))
}
// Network implements net.Addr interface
func (p *proxyProtoAddr) Network() string {
if p.srcIP.To4() != nil {
return "tcp4"
}
return "tcp6"
}
// proxyConn wraps a net.Conn to override RemoteAddr() with the address
// extracted from the PROXY protocol header
type proxyConn struct {
net.Conn
remoteAddr net.Addr
}
// RemoteAddr returns the original client address extracted from PROXY protocol
func (pc *proxyConn) RemoteAddr() net.Addr {
return pc.remoteAddr
}
// detectProxyProtoVersion reads the first bytes and determines protocol version.
// Returns 1 for v1, 2 for v2, or error.
// The first 6 bytes read are returned so they can be used by the parser.
func detectProxyProtoVersion(conn net.Conn) (version int, header []byte, err error) {
// Read first 6 bytes to check for "PROXY " or v2 signature
header = make([]byte, 6)
if _, err = io.ReadFull(conn, header); err != nil {
return 0, nil, fmt.Errorf("failed to read protocol version: %w", err)
}
switch bytesToString(header) {
case proxyProtoV1Prefix:
return 1, header, nil
case proxyProtoV2Sig[:6]:
return 2, header, nil
default:
return 0, nil, errProxyProtoUnrecognized
}
}
// readProxyProtoV1Header parses PROXY protocol v1 text format.
// Expects the "PROXY " prefix (6 bytes) to have already been consumed.
func readProxyProtoV1Header(conn net.Conn) (*proxyProtoAddr, error) {
// Read rest of line (max 107 bytes total, already read 6)
maxRemaining := proxyProtoV1MaxLineLen - 6
// Read up to maxRemaining bytes at once (more efficient than byte-by-byte)
buf := make([]byte, maxRemaining)
var line []byte
for len(line) < maxRemaining {
// Read available data
n, err := conn.Read(buf[len(line):])
if err != nil {
return nil, fmt.Errorf("failed to read v1 line: %w", err)
}
line = buf[:len(line)+n]
// Look for CRLF in what we've read so far
for i := 0; i < len(line)-1; i++ {
if line[i] == '\r' && line[i+1] == '\n' {
// Found CRLF - extract just the line portion
line = line[:i]
goto foundCRLF
}
}
}
// Exceeded max length without finding CRLF
return nil, fmt.Errorf("%w: v1 line too long", errProxyProtoInvalid)
foundCRLF:
// Get parts from the protocol
parts := strings.Fields(string(line))
// Validate format
if len(parts) < 1 {
return nil, fmt.Errorf("%w: invalid v1 format", errProxyProtoInvalid)
}
// Handle UNKNOWN (health check, like v2 LOCAL)
if parts[0] == proxyProtoV1Unknown {
return nil, nil
}
// Must have exactly 5 parts: protocol, src-ip, dst-ip, src-port, dst-port
if len(parts) != 5 {
return nil, fmt.Errorf("%w: invalid v1 format", errProxyProtoInvalid)
}
protocol := parts[0]
srcIP := net.ParseIP(parts[1])
dstIP := net.ParseIP(parts[2])
if srcIP == nil || dstIP == nil {
return nil, fmt.Errorf("%w: invalid address", errProxyProtoInvalid)
}
// Parse ports
srcPort, err := strconv.ParseUint(parts[3], 10, 16)
if err != nil {
return nil, fmt.Errorf("invalid source port: %w", err)
}
dstPort, err := strconv.ParseUint(parts[4], 10, 16)
if err != nil {
return nil, fmt.Errorf("invalid dest port: %w", err)
}
// Validate protocol matches IP version
if protocol == proxyProtoV1TCP4 && srcIP.To4() == nil {
return nil, fmt.Errorf("%w: TCP4 with IPv6 address", errProxyProtoInvalid)
}
if protocol == proxyProtoV1TCP6 && srcIP.To4() != nil {
return nil, fmt.Errorf("%w: TCP6 with IPv4 address", errProxyProtoInvalid)
}
if protocol != proxyProtoV1TCP4 && protocol != proxyProtoV1TCP6 {
return nil, fmt.Errorf("%w: invalid protocol %s", errProxyProtoInvalid, protocol)
}
return &proxyProtoAddr{
srcIP: srcIP,
srcPort: uint16(srcPort),
dstIP: dstIP,
dstPort: uint16(dstPort),
}, nil
}
// readProxyProtoHeader reads and parses PROXY protocol (v1 or v2) from the connection.
// Automatically detects version and routes to appropriate parser.
// If the command is LOCAL/UNKNOWN (health check), it returns nil for addr and no error.
// If the command is PROXY, it returns the parsed address information.
// The connection must be fresh (no data read yet).
func readProxyProtoHeader(conn net.Conn) (*proxyProtoAddr, error) {
// Set read deadline to prevent hanging on slow/malicious clients
if err := conn.SetReadDeadline(time.Now().Add(proxyProtoReadTimeout)); err != nil {
return nil, err
}
defer conn.SetReadDeadline(time.Time{})
// Detect version
version, firstBytes, err := detectProxyProtoVersion(conn)
if err != nil {
return nil, err
}
switch version {
case 1:
// v1 parser expects "PROXY " prefix already consumed
return readProxyProtoV1Header(conn)
case 2:
// Read rest of v2 signature (bytes 6-11, total 6 more bytes)
remaining := make([]byte, 6)
if _, err := io.ReadFull(conn, remaining); err != nil {
return nil, fmt.Errorf("failed to read v2 signature: %w", err)
}
// Verify full signature
fullSig := string(firstBytes) + string(remaining)
if fullSig != proxyProtoV2Sig {
return nil, fmt.Errorf("%w: invalid signature", errProxyProtoInvalid)
}
// Read rest of header: ver/cmd, fam/proto, addr-len (4 bytes)
header := make([]byte, 4)
if _, err := io.ReadFull(conn, header); err != nil {
return nil, fmt.Errorf("failed to read v2 header: %w", err)
}
// Continue with parsing
return parseProxyProtoV2Header(conn, header)
default:
return nil, fmt.Errorf("unsupported PROXY protocol version: %d", version)
}
}
// readProxyProtoV2Header is kept for backward compatibility and direct testing.
// It reads and parses a PROXY protocol v2 header from the connection.
// If the command is LOCAL (health check), it returns nil for addr and no error.
// If the command is PROXY, it returns the parsed address information.
// The connection must be fresh (no data read yet).
func readProxyProtoV2Header(conn net.Conn) (*proxyProtoAddr, error) {
// Set read deadline to prevent hanging on slow/malicious clients
if err := conn.SetReadDeadline(time.Now().Add(proxyProtoReadTimeout)); err != nil {
return nil, err
}
defer conn.SetReadDeadline(time.Time{})
// Read fixed header (16 bytes)
header := make([]byte, proxyProtoV2HeaderSize)
if _, err := io.ReadFull(conn, header); err != nil {
if ne, ok := err.(net.Error); ok && ne.Timeout() {
return nil, errProxyProtoTimeout
}
return nil, fmt.Errorf("failed to read PROXY protocol header: %w", err)
}
// Validate signature (first 12 bytes)
if string(header[:12]) != proxyProtoV2Sig {
return nil, fmt.Errorf("%w: invalid signature", errProxyProtoInvalid)
}
// Continue with parsing after signature
return parseProxyProtoV2Header(conn, header[12:16])
}
// parseProxyProtoV2Header parses v2 protocol after signature has been validated.
// header contains the 4 bytes: ver/cmd, fam/proto, addr-len (2 bytes).
func parseProxyProtoV2Header(conn net.Conn, header []byte) (*proxyProtoAddr, error) {
// Parse version and command
verCmd := header[0]
version := verCmd & proxyProtoV2VerMask
command := verCmd & proxyProtoCmdMask
if version != proxyProtoV2Ver {
return nil, fmt.Errorf("%w: invalid version 0x%02x", errProxyProtoInvalid, version)
}
// Parse address family and protocol
famProto := header[1]
family := famProto & proxyProtoFamilyMask
protocol := famProto & proxyProtoProtoMask
// Parse address length (big-endian uint16)
addrLen := binary.BigEndian.Uint16(header[2:4])
// Handle LOCAL command (health check)
if command == proxyProtoCmdLocal {
// For LOCAL, we should skip the address data if any
if addrLen > 0 {
// Discard the address data
if _, err := io.CopyN(io.Discard, conn, int64(addrLen)); err != nil {
return nil, fmt.Errorf("failed to discard LOCAL command address data: %w", err)
}
}
return nil, nil // nil addr indicates LOCAL command
}
// Handle PROXY command
if command != proxyProtoCmdProxy {
return nil, fmt.Errorf("unknown PROXY protocol command: 0x%02x", command)
}
// Validate protocol (we only support STREAM/TCP)
if protocol != proxyProtoProtoStream {
return nil, fmt.Errorf("%w: only STREAM protocol supported", errProxyProtoUnsupported)
}
// Parse address data based on family
var addr *proxyProtoAddr
var err error
switch family {
case proxyProtoFamilyInet:
addr, err = parseIPv4Addr(conn, addrLen)
case proxyProtoFamilyInet6:
addr, err = parseIPv6Addr(conn, addrLen)
case proxyProtoFamilyUnspec:
// UNSPEC family with PROXY command is valid but rare
// Just skip the address data
if addrLen > 0 {
if _, err := io.CopyN(io.Discard, conn, int64(addrLen)); err != nil {
return nil, fmt.Errorf("failed to discard UNSPEC address address data: %w", err)
}
}
return nil, nil
default:
return nil, fmt.Errorf("%w: unsupported address family 0x%02x", errProxyProtoUnsupported, family)
}
return addr, err
}
// parseIPv4Addr parses IPv4 address data from PROXY protocol header
func parseIPv4Addr(conn net.Conn, addrLen uint16) (*proxyProtoAddr, error) {
// IPv4: 4 (src IP) + 4 (dst IP) + 2 (src port) + 2 (dst port) = 12 bytes minimum
if addrLen < proxyProtoAddrSizeIPv4 {
return nil, fmt.Errorf("IPv4 address data too short: %d bytes", addrLen)
}
addrData := make([]byte, addrLen)
if _, err := io.ReadFull(conn, addrData); err != nil {
return nil, fmt.Errorf("failed to read IPv4 address data: %w", err)
}
return &proxyProtoAddr{
srcIP: net.IP(addrData[0:4]),
dstIP: net.IP(addrData[4:8]),
srcPort: binary.BigEndian.Uint16(addrData[8:10]),
dstPort: binary.BigEndian.Uint16(addrData[10:12]),
}, nil
}
// parseIPv6Addr parses IPv6 address data from PROXY protocol header
func parseIPv6Addr(conn net.Conn, addrLen uint16) (*proxyProtoAddr, error) {
// IPv6: 16 (src IP) + 16 (dst IP) + 2 (src port) + 2 (dst port) = 36 bytes minimum
if addrLen < proxyProtoAddrSizeIPv6 {
return nil, fmt.Errorf("IPv6 address data too short: %d bytes", addrLen)
}
addrData := make([]byte, addrLen)
if _, err := io.ReadFull(conn, addrData); err != nil {
return nil, fmt.Errorf("failed to read IPv6 address data: %w", err)
}
return &proxyProtoAddr{
srcIP: net.IP(addrData[0:16]),
dstIP: net.IP(addrData[16:32]),
srcPort: binary.BigEndian.Uint16(addrData[32:34]),
dstPort: binary.BigEndian.Uint16(addrData[34:36]),
}, nil
}
+1 -1
View File
@@ -66,7 +66,7 @@ func init() {
const (
// VERSION is the current version for the server.
VERSION = "2.12.1"
VERSION = "2.12.2"
// PROTO is the currently supported protocol.
// 0 was the original
+21 -6
View File
@@ -4519,7 +4519,8 @@ func (o *consumer) processWaiting(eos bool) (int, int, int, time.Time) {
var pre *waitingRequest
for wr := wq.head; wr != nil; {
// Check expiration.
if (eos && wr.noWait && wr.d > 0) || (!wr.expires.IsZero() && now.After(wr.expires)) {
expires := !wr.expires.IsZero() && now.After(wr.expires)
if (eos && wr.noWait) || expires {
rdWait := o.replicateDeliveries()
if rdWait {
// Check if we need to send the timeout after pending replicated deliveries, or can do so immediately.
@@ -4528,13 +4529,26 @@ func (o *consumer) processWaiting(eos bool) (int, int, int, time.Time) {
} else {
wd.pn, wd.pb = wr.n, wr.b
}
// If we still need to wait for replicated deliveries, remove from waiting list.
if rdWait {
wr = remove(pre, wr)
continue
}
}
if !rdWait {
// Normally it's a timeout.
if expires {
hdr := fmt.Appendf(nil, "NATS/1.0 408 Request Timeout\r\n%s: %d\r\n%s: %d\r\n\r\n", JSPullRequestPendingMsgs, wr.n, JSPullRequestPendingBytes, wr.b)
o.outq.send(newJSPubMsg(wr.reply, _EMPTY_, _EMPTY_, hdr, nil, nil, 0))
wr = remove(pre, wr)
continue
} else if wr.expires.IsZero() || wr.d > 0 {
// But if we're NoWait without expiry, we've reached the end of the stream, and we've not delivered any messages.
// Return no messages instead, which is the same as if we'd rejected the pull request initially.
hdr := fmt.Appendf(nil, "NATS/1.0 404 No Messages\r\n\r\n")
o.outq.send(newJSPubMsg(wr.reply, _EMPTY_, _EMPTY_, hdr, nil, nil, 0))
wr = remove(pre, wr)
continue
}
wr = remove(pre, wr)
continue
}
// Now check interest.
interest := wr.acc.sl.HasInterest(wr.interest)
@@ -5860,7 +5874,8 @@ func (o *consumer) hasNoLocalInterest() bool {
// This is when the underlying stream has been purged.
// sseq is the new first seq for the stream after purge.
// Lock should NOT be held.
// Consumer lock should NOT be held but the parent stream
// lock MUST be held.
func (o *consumer) purge(sseq uint64, slseq uint64, isWider bool) {
// Do not update our state unless we know we are the leader.
if !o.isLeader() {
@@ -5941,7 +5956,7 @@ func (o *consumer) purge(sseq uint64, slseq uint64, isWider bool) {
o.mu.Unlock()
if err := o.writeStoreState(); err != nil && s != nil && mset != nil {
s.Warnf("Consumer '%s > %s > %s' error on write store state from purge: %v", acc, mset.name(), name, err)
s.Warnf("Consumer '%s > %s > %s' error on write store state from purge: %v", acc, mset.nameLocked(false), name, err)
}
}
+24 -24
View File
@@ -1735,18 +1735,18 @@ func (s *Server) remoteServerUpdate(sub *subscription, c *client, _ *Account, su
node := getHash(si.Name)
accountNRG := si.AccountNRG()
oldInfo, _ := s.nodeToInfo.Swap(node, nodeInfo{
si.Name,
si.Version,
si.Cluster,
si.Domain,
si.ID,
si.Tags,
cfg,
stats,
false,
si.JetStreamEnabled(),
si.BinaryStreamSnapshot(),
accountNRG,
name: si.Name,
version: si.Version,
cluster: si.Cluster,
domain: si.Domain,
id: si.ID,
tags: si.Tags,
cfg: cfg,
stats: stats,
offline: false,
js: si.JetStreamEnabled(),
binarySnapshots: si.BinaryStreamSnapshot(),
accountNRG: accountNRG,
})
if oldInfo == nil || accountNRG != oldInfo.(nodeInfo).accountNRG {
// One of the servers we received statsz from changed its mind about
@@ -1789,18 +1789,18 @@ func (s *Server) processNewServer(si *ServerInfo) {
// Only update if non-existent
if _, ok := s.nodeToInfo.Load(node); !ok {
s.nodeToInfo.Store(node, nodeInfo{
si.Name,
si.Version,
si.Cluster,
si.Domain,
si.ID,
si.Tags,
nil,
nil,
false,
si.JetStreamEnabled(),
si.BinaryStreamSnapshot(),
si.AccountNRG(),
name: si.Name,
version: si.Version,
cluster: si.Cluster,
domain: si.Domain,
id: si.ID,
tags: si.Tags,
cfg: nil,
stats: nil,
offline: false,
js: si.JetStreamEnabled(),
binarySnapshots: si.BinaryStreamSnapshot(),
accountNRG: si.AccountNRG(),
})
}
}
+151 -70
View File
@@ -25,7 +25,6 @@ import (
"encoding/json"
"errors"
"fmt"
"hash"
"io"
"io/fs"
"math"
@@ -194,7 +193,7 @@ type fileStore struct {
psim *stree.SubjectTree[psi]
tsl int
adml int
hh hash.Hash64
hh *highwayhash.Digest64
qch chan struct{}
fsld chan struct{}
cmu sync.RWMutex
@@ -239,7 +238,7 @@ type msgBlock struct {
lrts int64
lsts int64
llseq uint64
hh hash.Hash64
hh *highwayhash.Digest64
ecache elastic.Pointer[cache]
cache *cache
cloads uint64
@@ -468,7 +467,7 @@ func newFileStoreWithCreated(fcfg FileStoreConfig, cfg StreamConfig, created tim
// Create highway hash for message blocks. Use sha256 of directory as key.
key := sha256.Sum256([]byte(cfg.Name))
fs.hh, err = highwayhash.New64(key[:])
fs.hh, err = highwayhash.NewDigest64(key[:])
if err != nil {
return nil, fmt.Errorf("could not create hash: %v", err)
}
@@ -939,7 +938,8 @@ func (fs *fileStore) writeStreamMeta() error {
}
fs.hh.Reset()
fs.hh.Write(b)
checksum := hex.EncodeToString(fs.hh.Sum(nil))
var hb [highwayhash.Size64]byte
checksum := hex.EncodeToString(fs.hh.Sum(hb[:0]))
sum := filepath.Join(fs.fcfg.StoreDir, JetStreamMetaFileSum)
err = fs.writeFileWithOptionalSync(sum, []byte(checksum), defaultFilePerms)
if err != nil {
@@ -1040,7 +1040,7 @@ func (fs *fileStore) initMsgBlock(index uint32) *msgBlock {
if mb.hh == nil {
key := sha256.Sum256(fs.hashKeyForBlock(index))
mb.hh, _ = highwayhash.New64(key[:])
mb.hh, _ = highwayhash.NewDigest64(key[:])
}
return mb
}
@@ -2705,7 +2705,7 @@ func (mb *msgBlock) firstMatchingMulti(sl *gsl.SimpleSublist, start uint64, sm *
if err != nil {
continue
}
expireOk := seq == lseq && mb.llseq == seq
expireOk := seq == lseq && mb.llseq != llseq && mb.llseq == seq
updateLLTS = false // cacheLookup already updated it.
if sl.HasInterest(fsm.subj) {
return fsm, expireOk, nil
@@ -2839,7 +2839,7 @@ func (mb *msgBlock) firstMatching(filter string, wc bool, start uint64, sm *Stor
continue
}
updateLLTS = false // cacheLookup already updated it.
expireOk := seq == lseq && mb.llseq == seq
expireOk := seq == lseq && mb.llseq != llseq && mb.llseq == seq
if isAll {
return fsm, expireOk, nil
}
@@ -4251,7 +4251,7 @@ func (fs *fileStore) newMsgBlockForWrite() (*msgBlock, error) {
// Now do local hash.
key := sha256.Sum256(fs.hashKeyForBlock(index))
hh, err := highwayhash.New64(key[:])
hh, err := highwayhash.NewDigest64(key[:])
if err != nil {
return nil, fmt.Errorf("could not create hash: %v", err)
}
@@ -4537,11 +4537,11 @@ func (mb *msgBlock) skipMsg(seq uint64, now int64) {
needsRecord = true
mb.dmap.Insert(seq)
}
mb.mu.Unlock()
if needsRecord {
mb.writeMsgRecord(emptyRecordLen, seq|ebit, _EMPTY_, nil, nil, now, true)
} else {
mb.writeMsgRecordLocked(emptyRecordLen, seq|ebit, _EMPTY_, nil, nil, now, true, true)
}
mb.mu.Unlock()
if !needsRecord {
mb.kickFlusher()
}
}
@@ -4629,10 +4629,9 @@ func (fs *fileStore) SkipMsgs(seq uint64, num uint64) error {
mb.dmap.Insert(seq)
}
}
mb.mu.Unlock()
// Write out our placeholder.
mb.writeMsgRecord(emptyRecordLen, lseq|ebit, _EMPTY_, nil, nil, now, true)
mb.writeMsgRecordLocked(emptyRecordLen, lseq|ebit, _EMPTY_, nil, nil, now, true, true)
mb.mu.Unlock()
// Now update FS accounting.
// Update fs state.
@@ -4756,6 +4755,17 @@ func (fs *fileStore) enforceMsgLimit() {
return
}
for nmsgs := fs.state.Msgs; nmsgs > uint64(fs.cfg.MaxMsgs); nmsgs = fs.state.Msgs {
// If the first block can be removed fully, purge it entirely without needing to walk sequences.
if len(fs.blks) > 0 {
fmb := fs.blks[0]
fmb.mu.RLock()
msgs := fmb.msgs
fmb.mu.RUnlock()
if nmsgs-msgs > uint64(fs.cfg.MaxMsgs) {
fs.purgeMsgBlock(fmb)
continue
}
}
if removed, err := fs.deleteFirstMsg(); err != nil || !removed {
fs.rebuildFirst()
return
@@ -4773,6 +4783,17 @@ func (fs *fileStore) enforceBytesLimit() {
return
}
for bs := fs.state.Bytes; bs > uint64(fs.cfg.MaxBytes); bs = fs.state.Bytes {
// If the first block can be removed fully, purge it entirely without needing to walk sequences.
if len(fs.blks) > 0 {
fmb := fs.blks[0]
fmb.mu.RLock()
bytes := fmb.bytes
fmb.mu.RUnlock()
if bs-bytes > uint64(fs.cfg.MaxBytes) {
fs.purgeMsgBlock(fmb)
continue
}
}
if removed, err := fs.deleteFirstMsg(); err != nil || !removed {
fs.rebuildFirst()
return
@@ -6373,15 +6394,21 @@ func (mb *msgBlock) writeMsgRecordLocked(rl, seq uint64, subj string, mhdr, msg
// Only update index and do accounting if not a delete tombstone.
if seq&tbit == 0 {
last := atomic.LoadUint64(&mb.last.seq)
// Accounting, do this before stripping ebit, it is ebit aware.
mb.updateAccounting(seq, ts, rl)
// Strip ebit if set.
seq = seq &^ ebit
if mb.cache.fseq == 0 {
mb.cache.fseq = seq
// If we have a hole due to skipping many messages, fill it.
if len(mb.cache.idx) > 0 && last+1 < seq {
for dseq := last + 1; dseq < seq; dseq++ {
mb.cache.idx = append(mb.cache.idx, dbit)
}
}
// Write index
mb.cache.idx = append(mb.cache.idx, uint32(index)|cbit)
if mb.cache.idx = append(mb.cache.idx, uint32(index)|cbit); len(mb.cache.idx) == 1 {
mb.cache.fseq = seq
}
} else {
// Make sure to account for tombstones in rbytes.
mb.rbytes += rl
@@ -7075,10 +7102,6 @@ func (mb *msgBlock) indexCacheBuf(buf []byte) error {
}
// Add to our index.
idx = append(idx, index)
// Adjust if we guessed wrong.
if seq != 0 && seq < fseq {
fseq = seq
}
// Make sure our dmap has this entry if it was erased.
if erased && dms == 0 && seq != 0 {
@@ -7121,8 +7144,8 @@ func (mb *msgBlock) indexCacheBuf(buf []byte) error {
// earlier loop if we've ran out of block file to look at, but should
// be easily noticed because the seq will be below the last seq from
// the index.
if seq > 0 && seq < mbLastSeq {
for dseq := seq; dseq < mbLastSeq; dseq++ {
if last > 0 && last+1 >= mbFirstSeq && last+1 <= mbLastSeq {
for dseq := last + 1; dseq <= mbLastSeq; dseq++ {
idx = append(idx, dbit)
if dms == 0 {
mb.dmap.Insert(dseq)
@@ -7642,7 +7665,7 @@ func (mb *msgBlock) cacheLookupEx(seq uint64, sm *StoreMsg, doCopy bool) (*Store
buf := mb.cache.buf[li:]
// We use the high bit to denote we have already checked the checksum.
var hh hash.Hash64
var hh *highwayhash.Digest64
if !hashChecked {
hh = mb.hh // This will force the hash check in msgFromBuf.
}
@@ -7741,7 +7764,7 @@ func (fs *fileStore) msgForSeqLocked(seq uint64, sm *StoreMsg, needFSLock bool)
// Internal function to return msg parts from a raw buffer.
// Raw buffer will be copied into sm.
// Lock should be held.
func (mb *msgBlock) msgFromBuf(buf []byte, sm *StoreMsg, hh hash.Hash64) (*StoreMsg, error) {
func (mb *msgBlock) msgFromBuf(buf []byte, sm *StoreMsg, hh *highwayhash.Digest64) (*StoreMsg, error) {
return mb.msgFromBufEx(buf, sm, hh, true)
}
@@ -7749,14 +7772,14 @@ func (mb *msgBlock) msgFromBuf(buf []byte, sm *StoreMsg, hh hash.Hash64) (*Store
// Raw buffer will NOT be copied into sm.
// Only use for internal use, any message that is passed to upper layers should use mb.msgFromBuf.
// Lock should be held.
func (mb *msgBlock) msgFromBufNoCopy(buf []byte, sm *StoreMsg, hh hash.Hash64) (*StoreMsg, error) {
func (mb *msgBlock) msgFromBufNoCopy(buf []byte, sm *StoreMsg, hh *highwayhash.Digest64) (*StoreMsg, error) {
return mb.msgFromBufEx(buf, sm, hh, false)
}
// Internal function to return msg parts from a raw buffer.
// copy boolean will determine if we make a copy or not.
// Lock should be held.
func (mb *msgBlock) msgFromBufEx(buf []byte, sm *StoreMsg, hh hash.Hash64, doCopy bool) (*StoreMsg, error) {
func (mb *msgBlock) msgFromBufEx(buf []byte, sm *StoreMsg, hh *highwayhash.Digest64, doCopy bool) (*StoreMsg, error) {
if len(buf) < emptyRecordLen {
return nil, errBadMsg{mb.mfn, "record too short"}
}
@@ -9347,6 +9370,25 @@ func (fs *fileStore) forceRemoveMsgBlock(mb *msgBlock) {
fs.removeMsgBlockFromList(mb)
}
// Purges and removes the msgBlock from the store.
// Lock should be held.
func (fs *fileStore) purgeMsgBlock(mb *msgBlock) {
mb.mu.Lock()
// Update top level accounting.
msgs, bytes := mb.msgs, mb.bytes
if msgs > fs.state.Msgs {
msgs = fs.state.Msgs
}
if bytes > fs.state.Bytes {
bytes = fs.state.Bytes
}
fs.state.Msgs -= msgs
fs.state.Bytes -= bytes
fs.removeMsgBlock(mb)
mb.mu.Unlock()
fs.selectNextFirst()
}
// Called by purge to simply get rid of the cache and close our fds.
// Lock should not be held.
func (mb *msgBlock) dirtyClose() {
@@ -9872,12 +9914,17 @@ func timestampNormalized(t time.Time) int64 {
// writeFullState will proceed to write the full meta state iff not complex and time consuming.
// Since this is for quick recovery it is optional and should not block/stall normal operations.
func (fs *fileStore) writeFullState() error {
return fs._writeFullState(false)
return fs._writeFullState(false, true)
}
// forceWriteFullState will proceed to write the full meta state. This should only be called by stop()
// forceWriteFullState will proceed to write the full meta state.
func (fs *fileStore) forceWriteFullState() error {
return fs._writeFullState(true)
return fs._writeFullState(true, true)
}
// forceWriteFullStateLocked will proceed to write the full meta state. This should only be called by stop()
func (fs *fileStore) forceWriteFullStateLocked() error {
return fs._writeFullState(true, false)
}
// This will write the full binary state for the stream.
@@ -9887,11 +9934,22 @@ func (fs *fileStore) forceWriteFullState() error {
// 2. PSIM - Per Subject Index Map - Tracks first and last blocks with subjects present.
// 3. MBs - Index, Bytes, First and Last Sequence and Timestamps, and the deleted map (avl.seqset).
// 4. Last block index and hash of record inclusive to this stream state.
func (fs *fileStore) _writeFullState(force bool) error {
func (fs *fileStore) _writeFullState(force, needLock bool) error {
fsLock := func() {
if needLock {
fs.mu.Lock()
}
}
fsUnlock := func() {
if needLock {
fs.mu.Unlock()
}
}
start := time.Now()
fs.mu.Lock()
fsLock()
if fs.closed || fs.dirty == 0 {
fs.mu.Unlock()
fsUnlock()
return nil
}
@@ -9910,7 +9968,7 @@ func (fs *fileStore) _writeFullState(force bool) error {
numDeleted = int((fs.state.LastSeq - fs.state.FirstSeq + 1) - fs.state.Msgs)
}
if numSubjects > numThreshold || numDeleted > numThreshold {
fs.mu.Unlock()
fsUnlock()
return errStateTooBig
}
}
@@ -10018,13 +10076,15 @@ func (fs *fileStore) _writeFullState(force bool) error {
// Encrypt if needed.
if fs.prf != nil {
if err := fs.setupAEK(); err != nil {
fs.mu.Unlock()
fsUnlock()
return err
}
nonce := make([]byte, fs.aek.NonceSize(), fs.aek.NonceSize()+len(buf)+fs.aek.Overhead())
if n, err := rand.Read(nonce); err != nil {
fsUnlock()
return err
} else if n != len(nonce) {
fsUnlock()
return fmt.Errorf("not enough nonce bytes read (%d != %d)", n, len(nonce))
}
buf = fs.aek.Seal(nonce, nonce, buf, nil)
@@ -10041,13 +10101,17 @@ func (fs *fileStore) _writeFullState(force bool) error {
statesEqual := trackingStatesEqual(&fs.state, &mstate)
// Release lock.
fs.mu.Unlock()
fsUnlock()
// Check consistency here.
if !statesEqual {
fs.warn("Stream state encountered internal inconsistency on write")
// Rebuild our fs state from the mb state.
fs.rebuildState(nil)
if needLock {
fs.rebuildState(nil)
} else {
fs.rebuildStateLocked(nil)
}
return errCorruptState
}
@@ -10072,14 +10136,14 @@ func (fs *fileStore) _writeFullState(force bool) error {
// Update dirty if successful.
if err == nil {
fs.mu.Lock()
fsLock()
fs.dirty -= priorDirty
fs.mu.Unlock()
fsUnlock()
}
// Attempt to write both files, an error in one should not prevent the other from being written.
ttlErr := fs.writeTTLState()
schedErr := fs.writeMsgSchedulingState()
ttlErr := fs.writeTTLState(needLock)
schedErr := fs.writeMsgSchedulingState(needLock)
if ttlErr != nil {
return ttlErr
} else if schedErr != nil {
@@ -10088,30 +10152,42 @@ func (fs *fileStore) _writeFullState(force bool) error {
return nil
}
func (fs *fileStore) writeTTLState() error {
fs.mu.RLock()
func (fs *fileStore) writeTTLState(needLock bool) error {
if needLock {
fs.mu.RLock()
}
if fs.ttls == nil {
fs.mu.RUnlock()
if needLock {
fs.mu.RUnlock()
}
return nil
}
fn := filepath.Join(fs.fcfg.StoreDir, msgDir, ttlStreamStateFile)
// Must be lseq+1 to identify up to which sequence the TTLs are valid.
buf := fs.ttls.Encode(fs.state.LastSeq + 1)
fs.mu.RUnlock()
if needLock {
fs.mu.RUnlock()
}
return fs.writeFileWithOptionalSync(fn, buf, defaultFilePerms)
}
func (fs *fileStore) writeMsgSchedulingState() error {
fs.mu.RLock()
func (fs *fileStore) writeMsgSchedulingState(needLock bool) error {
if needLock {
fs.mu.RLock()
}
if fs.scheduling == nil {
fs.mu.RUnlock()
if needLock {
fs.mu.RUnlock()
}
return nil
}
fn := filepath.Join(fs.fcfg.StoreDir, msgDir, msgSchedulingStreamStateFile)
// Must be lseq+1 to identify up to which sequence the schedules are valid.
buf := fs.scheduling.encode(fs.state.LastSeq + 1)
fs.mu.RUnlock()
if needLock {
fs.mu.RUnlock()
}
return fs.writeFileWithOptionalSync(fn, buf, defaultFilePerms)
}
@@ -10129,18 +10205,10 @@ func (fs *fileStore) stop(delete, writeState bool) error {
return ErrStoreClosed
}
// Mark as closing. Do before releasing the lock to writeFullState
// Mark as closing. Do before releasing the lock to wait on the state flush loop
// so we don't end up with this function running more than once.
fs.closing = true
if writeState {
fs.checkAndFlushLastBlock()
}
fs.closeAllMsgBlocks(false)
fs.cancelSyncTimer()
fs.cancelAgeChk()
// Release the state flusher loop.
if fs.qch != nil {
close(fs.qch)
@@ -10152,9 +10220,18 @@ func (fs *fileStore) stop(delete, writeState bool) error {
fsld := fs.fsld
fs.mu.Unlock()
<-fsld
// Write full state if needed. If not dirty this is a no-op.
fs.forceWriteFullState()
fs.mu.Lock()
fs.checkAndFlushLastBlock()
}
fs.closeAllMsgBlocks(false)
fs.cancelSyncTimer()
fs.cancelAgeChk()
if writeState {
// Write full state if needed. If not dirty this is a no-op.
fs.forceWriteFullStateLocked()
}
// Mark as closed. Last message block needs to be cleared after
@@ -10248,7 +10325,8 @@ func (fs *fileStore) streamSnapshot(w io.WriteCloser, includeConsumers bool, err
hh := fs.hh
hh.Reset()
hh.Write(meta)
sum := []byte(hex.EncodeToString(fs.hh.Sum(nil)))
var hb [highwayhash.Size64]byte
sum := []byte(hex.EncodeToString(fs.hh.Sum(hb[:0])))
fs.mu.Unlock()
// Meta first.
@@ -10351,7 +10429,8 @@ func (fs *fileStore) streamSnapshot(w io.WriteCloser, includeConsumers bool, err
}
o.hh.Reset()
o.hh.Write(meta)
sum := []byte(hex.EncodeToString(o.hh.Sum(nil)))
var hb [highwayhash.Size64]byte
sum := []byte(hex.EncodeToString(o.hh.Sum(hb[:0])))
// We can have the running state directly encoded now.
state, err := o.encodeState()
@@ -10568,7 +10647,7 @@ type consumerFileStore struct {
name string
odir string
ifn string
hh hash.Hash64
hh *highwayhash.Digest64
state ConsumerState
fch chan struct{}
qch chan struct{}
@@ -10611,7 +10690,7 @@ func (fs *fileStore) ConsumerStore(name string, cfg *ConsumerConfig) (ConsumerSt
ifn: filepath.Join(odir, consumerState),
}
key := sha256.Sum256([]byte(fs.cfg.Name + "/" + name))
hh, err := highwayhash.New64(key[:])
hh, err := highwayhash.NewDigest64(key[:])
if err != nil {
return nil, fmt.Errorf("could not create hash: %v", err)
}
@@ -11246,7 +11325,8 @@ func (cfs *consumerFileStore) writeConsumerMeta() error {
}
cfs.hh.Reset()
cfs.hh.Write(b)
checksum := hex.EncodeToString(cfs.hh.Sum(nil))
var hb [highwayhash.Size64]byte
checksum := hex.EncodeToString(cfs.hh.Sum(hb[:0]))
sum := filepath.Join(cfs.odir, JetStreamMetaFileSum)
err = cfs.fs.writeFileWithOptionalSync(sum, []byte(checksum), defaultFilePerms)
@@ -11630,14 +11710,14 @@ func (fs *fileStore) RemoveConsumer(o ConsumerStore) error {
// Deprecated: stream templates are deprecated and will be removed in a future version.
type templateFileStore struct {
dir string
hh hash.Hash64
hh *highwayhash.Digest64
}
// Deprecated: stream templates are deprecated and will be removed in a future version.
func newTemplateFileStore(storeDir string) *templateFileStore {
tdir := filepath.Join(storeDir, tmplsDir)
key := sha256.Sum256([]byte("templates"))
hh, err := highwayhash.New64(key[:])
hh, err := highwayhash.NewDigest64(key[:])
if err != nil {
return nil
}
@@ -11666,7 +11746,8 @@ func (ts *templateFileStore) Store(t *streamTemplate) error {
// FIXME(dlc) - Do checksum
ts.hh.Reset()
ts.hh.Write(b)
checksum := hex.EncodeToString(ts.hh.Sum(nil))
var hb [highwayhash.Size64]byte
checksum := hex.EncodeToString(ts.hh.Sum(hb[:0]))
sum := filepath.Join(dir, JetStreamMetaFileSum)
if err := os.WriteFile(sum, []byte(checksum), defaultFilePerms); err != nil {
return err
+22 -5
View File
@@ -2561,11 +2561,18 @@ func (c *client) sendMsgToGateways(acc *Account, msg, subject, reply []byte, qgr
return false
}
// Copy off original pa in case it changes.
pa := c.pa
mt, _ := c.isMsgTraceEnabled()
if mt != nil {
pa := c.pa
// We are going to replace "pa" with our copy of c.pa, but to restore
// to the original copy of c.pa, we need to save it again.
cpa := c.pa
msg = mt.setOriginAccountHeaderIfNeeded(c, acc, msg)
defer func() { c.pa = pa }()
defer func() { c.pa = cpa }()
// Update pa with our current c.pa state.
pa = c.pa
}
var (
@@ -2579,6 +2586,7 @@ func (c *client) sendMsgToGateways(acc *Account, msg, subject, reply []byte, qgr
didDeliver bool
prodIsMQTT = c.isMqtt()
dlvMsgs int64
dlvExtraSz int64
)
// Get a subscription from the pool
@@ -2676,8 +2684,11 @@ func (c *client) sendMsgToGateways(acc *Account, msg, subject, reply []byte, qgr
}
}
// Assume original message
dmsg := msg
if mt != nil {
msg = mt.setHopHeader(c, msg)
// If trace is enabled, we need to set the hop header per gateway.
dmsg = mt.setHopHeader(c, dmsg)
}
// Setup the message header.
@@ -2727,16 +2738,22 @@ func (c *client) sendMsgToGateways(acc *Account, msg, subject, reply []byte, qgr
sub.nm, sub.max = 0, 0
sub.client = gwc
sub.subject = subject
if c.deliverMsg(prodIsMQTT, sub, acc, subject, mreply, mh, msg, false) {
if c.deliverMsg(prodIsMQTT, sub, acc, subject, mreply, mh, dmsg, false) {
// We don't count internal deliveries so count only if sub.icb is nil
if sub.icb == nil {
dlvMsgs++
dlvExtraSz += int64(len(dmsg) - len(msg))
}
didDeliver = true
}
// If we set the header reset the origin pub args.
if mt != nil {
c.pa = pa
}
}
if dlvMsgs > 0 {
totalBytes := dlvMsgs * int64(len(msg))
totalBytes := dlvMsgs*int64(len(msg)) + dlvExtraSz
// For non MQTT producers, remove the CR_LF * number of messages
if !prodIsMQTT {
totalBytes -= dlvMsgs * int64(LEN_CR_LF)
+3 -24
View File
@@ -15,6 +15,7 @@ package gsl
import (
"errors"
"strings"
"sync"
"unsafe"
@@ -87,24 +88,13 @@ func NewSublist[T comparable]() *GenericSublist[T] {
// Insert adds a subscription into the sublist
func (s *GenericSublist[T]) Insert(subject string, value T) error {
tsa := [32]string{}
tokens := tsa[:0]
start := 0
for i := 0; i < len(subject); i++ {
if subject[i] == btsep {
tokens = append(tokens, subject[start:i])
start = i + 1
}
}
tokens = append(tokens, subject[start:])
s.Lock()
var sfwc bool
var n *node[T]
l := s.root
for _, t := range tokens {
for t := range strings.SplitSeq(subject, tsep) {
lt := len(t)
if lt == 0 || sfwc {
s.Unlock()
@@ -312,17 +302,6 @@ type lnt[T comparable] struct {
// Raw low level remove, can do batches with lock held outside.
func (s *GenericSublist[T]) remove(subject string, value T, shouldLock bool) error {
tsa := [32]string{}
tokens := tsa[:0]
start := 0
for i := 0; i < len(subject); i++ {
if subject[i] == btsep {
tokens = append(tokens, subject[start:i])
start = i + 1
}
}
tokens = append(tokens, subject[start:])
if shouldLock {
s.Lock()
defer s.Unlock()
@@ -336,7 +315,7 @@ func (s *GenericSublist[T]) remove(subject string, value T, shouldLock bool) err
var lnts [32]lnt[T]
levels := lnts[:0]
for _, t := range tokens {
for t := range strings.SplitSeq(subject, tsep) {
lt := len(t)
if lt == 0 || sfwc {
return ErrInvalidSubject
+191 -175
View File
@@ -194,6 +194,11 @@ func (s *Server) EnableJetStream(config *JetStreamConfig) error {
}
s.Noticef("Starting JetStream")
start := time.Now()
defer func() {
s.Noticef("Took %s to start JetStream", time.Since(start))
}()
if config == nil || config.MaxMemory <= 0 || config.MaxStore <= 0 {
var storeDir, domain, uniqueTag string
var maxStore, maxMem int64
@@ -686,6 +691,11 @@ func (s *Server) DisableJetStream() error {
}
func (s *Server) enableJetStreamAccounts() error {
// Reuse the same task workers across all accounts, so that we don't explode
// with a large number of goroutines on multi-account systems.
tq := parallelTaskQueue(len(dios))
defer close(tq)
// If we have no configured accounts setup then setup imports on global account.
if s.globalAccountOnly() {
gacc := s.GlobalAccount()
@@ -694,10 +704,10 @@ func (s *Server) enableJetStreamAccounts() error {
gacc.jsLimits = defaultJSAccountTiers
}
gacc.mu.Unlock()
if err := s.configJetStream(gacc); err != nil {
if err := s.configJetStream(gacc, tq); err != nil {
return err
}
} else if err := s.configAllJetStreamAccounts(); err != nil {
} else if err := s.configAllJetStreamAccounts(tq); err != nil {
return fmt.Errorf("Error enabling jetstream on configured accounts: %v", err)
}
return nil
@@ -761,7 +771,7 @@ func (a *Account) enableJetStreamInfoServiceImportOnly() error {
return a.enableAllJetStreamServiceImportsAndMappings()
}
func (s *Server) configJetStream(acc *Account) error {
func (s *Server) configJetStream(acc *Account, tq chan<- func()) error {
if acc == nil {
return nil
}
@@ -778,7 +788,7 @@ func (s *Server) configJetStream(acc *Account) error {
return err
}
} else {
if err := acc.EnableJetStream(jsLimits); err != nil {
if err := acc.EnableJetStream(jsLimits, tq); err != nil {
return err
}
if s.gateway.enabled {
@@ -799,7 +809,7 @@ func (s *Server) configJetStream(acc *Account) error {
}
// configAllJetStreamAccounts walk all configured accounts and turn on jetstream if requested.
func (s *Server) configAllJetStreamAccounts() error {
func (s *Server) configAllJetStreamAccounts(tq chan<- func()) error {
// Check to see if system account has been enabled. We could arrive here via reload and
// a non-default system account.
s.checkJetStreamExports()
@@ -839,7 +849,7 @@ func (s *Server) configAllJetStreamAccounts() error {
// Process any jetstream enabled accounts here. These will be accounts we are
// already aware of at startup etc.
for _, acc := range jsAccounts {
if err := s.configJetStream(acc); err != nil {
if err := s.configJetStream(acc, tq); err != nil {
return err
}
}
@@ -852,7 +862,7 @@ func (s *Server) configAllJetStreamAccounts() error {
// Only load up ones not already loaded since they are processed above.
if _, ok := accounts.Load(accName); !ok {
if acc, err := s.lookupAccount(accName); err != nil && acc != nil {
if err := s.configJetStream(acc); err != nil {
if err := s.configJetStream(acc, tq); err != nil {
return err
}
}
@@ -1013,11 +1023,11 @@ func (s *Server) shutdownJetStream() {
js.accounts = nil
var qch chan struct{}
var stopped chan struct{}
if cc := js.cluster; cc != nil {
if cc.qch != nil {
qch = cc.qch
cc.qch = nil
qch, stopped = cc.qch, cc.stopped
cc.qch, cc.stopped = nil, nil
}
js.stopUpdatesSub()
if cc.c != nil {
@@ -1034,14 +1044,11 @@ func (s *Server) shutdownJetStream() {
// We will wait for a bit for it to close.
// Do this without the lock.
if qch != nil {
close(qch) // Must be close() to signal *all* listeners
select {
case qch <- struct{}{}:
select {
case <-qch:
case <-time.After(2 * time.Second):
s.Warnf("Did not receive signal for successful shutdown of cluster routine")
}
default:
case <-stopped:
case <-time.After(10 * time.Second):
s.Warnf("Did not receive signal for successful shutdown of cluster routine")
}
}
}
@@ -1100,7 +1107,7 @@ func (a *Account) assignJetStreamLimits(limits map[string]JetStreamAccountLimits
// EnableJetStream will enable JetStream on this account with the defined limits.
// This is a helper for JetStreamEnableAccount.
func (a *Account) EnableJetStream(limits map[string]JetStreamAccountLimits) error {
func (a *Account) EnableJetStream(limits map[string]JetStreamAccountLimits, tq chan<- func()) error {
a.mu.RLock()
s := a.srv
a.mu.RUnlock()
@@ -1211,7 +1218,7 @@ func (a *Account) EnableJetStream(limits map[string]JetStreamAccountLimits) erro
tdir := filepath.Join(jsa.storeDir, tmplsDir)
if stat, err := os.Stat(tdir); err == nil && stat.IsDir() {
key := sha256.Sum256([]byte("templates"))
hh, err := highwayhash.New64(key[:])
hh, err := highwayhash.NewDigest64(key[:])
if err != nil {
return err
}
@@ -1235,7 +1242,8 @@ func (a *Account) EnableJetStream(limits map[string]JetStreamAccountLimits) erro
}
hh.Reset()
hh.Write(buf)
checksum := hex.EncodeToString(hh.Sum(nil))
var hb [highwayhash.Size64]byte
checksum := hex.EncodeToString(hh.Sum(hb[:0]))
if checksum != string(sum) {
s.Warnf(" StreamTemplate checksums do not match %q vs %q", sum, checksum)
continue
@@ -1253,33 +1261,142 @@ func (a *Account) EnableJetStream(limits map[string]JetStreamAccountLimits) erro
}
}
// Collect consumers, do after all streams.
type ce struct {
mset *stream
odir string
}
var consumers []*ce
// Collect any interest policy streams to check for
// https://github.com/nats-io/nats-server/issues/3612
var ipstreams []*stream
// Remember if we should be encrypted and what cipher we think we should use.
encrypted := s.getOpts().JetStreamKey != _EMPTY_
plaintext := true
sc := s.getOpts().JetStreamCipher
doConsumers := func(mset *stream, odir string) {
ofis, _ := os.ReadDir(odir)
if len(ofis) > 0 {
s.Noticef(" Recovering %d consumers for stream - '%s > %s'", len(ofis), mset.accName(), mset.name())
}
for _, ofi := range ofis {
metafile := filepath.Join(odir, ofi.Name(), JetStreamMetaFile)
metasum := filepath.Join(odir, ofi.Name(), JetStreamMetaFileSum)
if _, err := os.Stat(metafile); os.IsNotExist(err) {
s.Warnf(" Missing consumer metafile %q", metafile)
continue
}
buf, err := os.ReadFile(metafile)
if err != nil {
s.Warnf(" Error reading consumer metafile %q: %v", metafile, err)
continue
}
if _, err := os.Stat(metasum); os.IsNotExist(err) {
s.Warnf(" Missing consumer checksum for %q", metasum)
continue
}
// Check if we are encrypted.
if key, err := os.ReadFile(filepath.Join(odir, ofi.Name(), JetStreamMetaFileKey)); err == nil {
s.Debugf(" Consumer metafile is encrypted, reading encrypted keyfile")
// Decode the buffer before proceeding.
ctxName := mset.name() + tsep + ofi.Name()
nbuf, _, err := s.decryptMeta(sc, key, buf, a.Name, ctxName)
if err != nil {
s.Warnf(" Error decrypting our consumer metafile: %v", err)
continue
}
buf = nbuf
}
var cfg FileConsumerInfo
decoder := json.NewDecoder(bytes.NewReader(buf))
decoder.DisallowUnknownFields()
strictErr := decoder.Decode(&cfg)
if strictErr != nil {
cfg = FileConsumerInfo{}
if err := json.Unmarshal(buf, &cfg); err != nil {
s.Warnf(" Error unmarshalling consumer metafile %q: %v", metafile, err)
continue
}
}
if supported := supportsRequiredApiLevel(cfg.Metadata); !supported || strictErr != nil {
var offlineReason string
if !supported {
apiLevel := getRequiredApiLevel(cfg.Metadata)
if strictErr != nil {
offlineReason = fmt.Sprintf("unsupported - config error: %s", strings.TrimPrefix(strictErr.Error(), "json: "))
} else {
offlineReason = fmt.Sprintf("unsupported - required API level: %s, current API level: %d", apiLevel, JSApiLevel)
}
s.Warnf(" Detected unsupported consumer '%s > %s > %s': %s", a.Name, mset.name(), cfg.Name, offlineReason)
} else {
offlineReason = fmt.Sprintf("decoding error: %v", strictErr)
s.Warnf(" Error unmarshalling consumer metafile %q: %v", metafile, strictErr)
}
singleServerMode := !s.JetStreamIsClustered() && s.standAloneMode()
if singleServerMode {
if !mset.closed.Load() {
s.Warnf(" Stopping unsupported stream '%s > %s'", a.Name, mset.name())
mset.mu.Lock()
mset.offlineReason = fmt.Sprintf("stopped - unsupported consumer %q", cfg.Name)
mset.mu.Unlock()
mset.stop(false, false)
}
// Fake a consumer, so we can respond to API requests as single-server.
o := &consumer{
mset: mset,
js: s.getJetStream(),
acc: a,
srv: s,
cfg: cfg.ConsumerConfig,
active: false,
stream: mset.name(),
name: cfg.Name,
dseq: 1,
sseq: 1,
created: time.Now().UTC(),
closed: true,
offlineReason: offlineReason,
}
if !cfg.Created.IsZero() {
o.created = cfg.Created
}
mset.mu.Lock()
mset.setConsumer(o)
mset.mu.Unlock()
}
continue
}
isEphemeral := !isDurableConsumer(&cfg.ConsumerConfig)
if isEphemeral {
// This is an ephemeral consumer and this could fail on restart until
// the consumer can reconnect. We will create it as a durable and switch it.
cfg.ConsumerConfig.Durable = ofi.Name()
}
obs, err := mset.addConsumerWithAssignment(&cfg.ConsumerConfig, _EMPTY_, nil, true, ActionCreateOrUpdate, false)
if err != nil {
s.Warnf(" Error adding consumer %q: %v", cfg.Name, err)
continue
}
if isEphemeral {
obs.switchToEphemeral()
}
if !cfg.Created.IsZero() {
obs.setCreatedTime(cfg.Created)
}
if err != nil {
s.Warnf(" Error restoring consumer %q state: %v", cfg.Name, err)
}
}
}
// Now recover the streams.
fis, _ := os.ReadDir(sdir)
for _, fi := range fis {
doStream := func(fi os.DirEntry) error {
plaintext := true
mdir := filepath.Join(sdir, fi.Name())
// Check for partially deleted streams. They are marked with "." prefix.
if strings.HasPrefix(fi.Name(), tsep) {
go os.RemoveAll(mdir)
continue
return nil
}
key := sha256.Sum256([]byte(fi.Name()))
hh, err := highwayhash.New64(key[:])
hh, err := highwayhash.NewDigest64(key[:])
if err != nil {
return err
}
@@ -1287,27 +1404,28 @@ func (a *Account) EnableJetStream(limits map[string]JetStreamAccountLimits) erro
metasum := filepath.Join(mdir, JetStreamMetaFileSum)
if _, err := os.Stat(metafile); os.IsNotExist(err) {
s.Warnf(" Missing stream metafile for %q", metafile)
continue
return nil
}
buf, err := os.ReadFile(metafile)
if err != nil {
s.Warnf(" Error reading metafile %q: %v", metafile, err)
continue
return nil
}
if _, err := os.Stat(metasum); os.IsNotExist(err) {
s.Warnf(" Missing stream checksum file %q", metasum)
continue
return nil
}
sum, err := os.ReadFile(metasum)
if err != nil {
s.Warnf(" Error reading Stream metafile checksum %q: %v", metasum, err)
continue
return nil
}
hh.Write(buf)
checksum := hex.EncodeToString(hh.Sum(nil))
var hb [highwayhash.Size64]byte
checksum := hex.EncodeToString(hh.Sum(hb[:0]))
if checksum != string(sum) {
s.Warnf(" Stream metafile %q: checksums do not match %q vs %q", metafile, sum, checksum)
continue
return nil
}
// Track if we are converting ciphers.
@@ -1320,14 +1438,14 @@ func (a *Account) EnableJetStream(limits map[string]JetStreamAccountLimits) erro
s.Debugf(" Stream metafile is encrypted, reading encrypted keyfile")
if len(keyBuf) < minMetaKeySize {
s.Warnf(" Bad stream encryption key length of %d", len(keyBuf))
continue
return nil
}
// Decode the buffer before proceeding.
var nbuf []byte
nbuf, convertingCiphers, err = s.decryptMeta(sc, keyBuf, buf, a.Name, fi.Name())
if err != nil {
s.Warnf(" Error decrypting our stream metafile: %v", err)
continue
return nil
}
buf = nbuf
plaintext = false
@@ -1341,7 +1459,7 @@ func (a *Account) EnableJetStream(limits map[string]JetStreamAccountLimits) erro
cfg = FileStreamInfo{}
if err := json.Unmarshal(buf, &cfg); err != nil {
s.Warnf(" Error unmarshalling stream metafile %q: %v", metafile, err)
continue
return nil
}
}
if supported := supportsRequiredApiLevel(cfg.Metadata); !supported || strictErr != nil {
@@ -1384,13 +1502,16 @@ func (a *Account) EnableJetStream(limits map[string]JetStreamAccountLimits) erro
// Now do the consumers.
odir := filepath.Join(sdir, fi.Name(), consumerDir)
consumers = append(consumers, &ce{mset, odir})
doConsumers(mset, odir)
}
continue
return nil
}
if cfg.Template != _EMPTY_ {
if err := jsa.addStreamNameToTemplate(cfg.Template, cfg.Name); err != nil {
jsa.mu.Lock()
err := jsa.addStreamNameToTemplate(cfg.Template, cfg.Name)
jsa.mu.Unlock()
if err != nil {
s.Warnf(" Error adding stream %q to template %q: %v", cfg.Name, cfg.Template, err)
}
}
@@ -1415,7 +1536,7 @@ func (a *Account) EnableJetStream(limits map[string]JetStreamAccountLimits) erro
}
}
if hadSubjErr {
continue
return nil
}
// The other possible bug is assigning subjects to mirrors, so check for that and patch as well.
@@ -1449,7 +1570,7 @@ func (a *Account) EnableJetStream(limits map[string]JetStreamAccountLimits) erro
s.Warnf(" Error replacing meta keyfile for stream %q: %v", cfg.Name, err)
}
}
continue
return nil
}
if !cfg.Created.IsZero() {
mset.setCreatedTime(cfg.Created)
@@ -1514,146 +1635,41 @@ func (a *Account) EnableJetStream(limits map[string]JetStreamAccountLimits) erro
s.Noticef(" Restored %s messages for stream '%s > %s' in %v",
comma(int64(state.Msgs)), mset.accName(), mset.name(), time.Since(rt).Round(time.Millisecond))
// Now do the consumers.
odir := filepath.Join(sdir, fi.Name(), consumerDir)
doConsumers(mset, odir)
// Collect to check for dangling messages.
// TODO(dlc) - Can be removed eventually.
if cfg.StreamConfig.Retention == InterestPolicy {
ipstreams = append(ipstreams, mset)
mset.checkForOrphanMsgs()
mset.checkConsumerReplication()
}
// Now do the consumers.
odir := filepath.Join(sdir, fi.Name(), consumerDir)
consumers = append(consumers, &ce{mset, odir})
return nil
}
for _, e := range consumers {
ofis, _ := os.ReadDir(e.odir)
if len(ofis) > 0 {
s.Noticef(" Recovering %d consumers for stream - '%s > %s'", len(ofis), e.mset.accName(), e.mset.name())
if tq != nil {
// If a parallelTaskQueue was provided then use that for concurrency.
var wg sync.WaitGroup
wg.Add(len(fis))
for _, fi := range fis {
tq <- func() {
doStream(fi)
wg.Done()
}
}
for _, ofi := range ofis {
metafile := filepath.Join(e.odir, ofi.Name(), JetStreamMetaFile)
metasum := filepath.Join(e.odir, ofi.Name(), JetStreamMetaFileSum)
if _, err := os.Stat(metafile); os.IsNotExist(err) {
s.Warnf(" Missing consumer metafile %q", metafile)
continue
}
buf, err := os.ReadFile(metafile)
if err != nil {
s.Warnf(" Error reading consumer metafile %q: %v", metafile, err)
continue
}
if _, err := os.Stat(metasum); os.IsNotExist(err) {
s.Warnf(" Missing consumer checksum for %q", metasum)
continue
}
// Check if we are encrypted.
if key, err := os.ReadFile(filepath.Join(e.odir, ofi.Name(), JetStreamMetaFileKey)); err == nil {
s.Debugf(" Consumer metafile is encrypted, reading encrypted keyfile")
// Decode the buffer before proceeding.
ctxName := e.mset.name() + tsep + ofi.Name()
nbuf, _, err := s.decryptMeta(sc, key, buf, a.Name, ctxName)
if err != nil {
s.Warnf(" Error decrypting our consumer metafile: %v", err)
continue
}
buf = nbuf
}
var cfg FileConsumerInfo
decoder := json.NewDecoder(bytes.NewReader(buf))
decoder.DisallowUnknownFields()
strictErr := decoder.Decode(&cfg)
if strictErr != nil {
cfg = FileConsumerInfo{}
if err := json.Unmarshal(buf, &cfg); err != nil {
s.Warnf(" Error unmarshalling consumer metafile %q: %v", metafile, err)
continue
}
}
if supported := supportsRequiredApiLevel(cfg.Metadata); !supported || strictErr != nil {
var offlineReason string
if !supported {
apiLevel := getRequiredApiLevel(cfg.Metadata)
if strictErr != nil {
offlineReason = fmt.Sprintf("unsupported - config error: %s", strings.TrimPrefix(strictErr.Error(), "json: "))
} else {
offlineReason = fmt.Sprintf("unsupported - required API level: %s, current API level: %d", apiLevel, JSApiLevel)
}
s.Warnf(" Detected unsupported consumer '%s > %s > %s': %s", a.Name, e.mset.name(), cfg.Name, offlineReason)
} else {
offlineReason = fmt.Sprintf("decoding error: %v", strictErr)
s.Warnf(" Error unmarshalling consumer metafile %q: %v", metafile, strictErr)
}
singleServerMode := !s.JetStreamIsClustered() && s.standAloneMode()
if singleServerMode {
if !e.mset.closed.Load() {
s.Warnf(" Stopping unsupported stream '%s > %s'", a.Name, e.mset.name())
e.mset.mu.Lock()
e.mset.offlineReason = fmt.Sprintf("stopped - unsupported consumer %q", cfg.Name)
e.mset.mu.Unlock()
e.mset.stop(false, false)
}
// Fake a consumer, so we can respond to API requests as single-server.
o := &consumer{
mset: e.mset,
js: s.getJetStream(),
acc: a,
srv: s,
cfg: cfg.ConsumerConfig,
active: false,
stream: e.mset.name(),
name: cfg.Name,
dseq: 1,
sseq: 1,
created: time.Now().UTC(),
closed: true,
offlineReason: offlineReason,
}
if !cfg.Created.IsZero() {
o.created = cfg.Created
}
e.mset.mu.Lock()
e.mset.setConsumer(o)
e.mset.mu.Unlock()
}
continue
}
isEphemeral := !isDurableConsumer(&cfg.ConsumerConfig)
if isEphemeral {
// This is an ephemeral consumer and this could fail on restart until
// the consumer can reconnect. We will create it as a durable and switch it.
cfg.ConsumerConfig.Durable = ofi.Name()
}
obs, err := e.mset.addConsumerWithAssignment(&cfg.ConsumerConfig, _EMPTY_, nil, true, ActionCreateOrUpdate, false)
if err != nil {
s.Warnf(" Error adding consumer %q: %v", cfg.Name, err)
continue
}
if isEphemeral {
obs.switchToEphemeral()
}
if !cfg.Created.IsZero() {
obs.setCreatedTime(cfg.Created)
}
if err != nil {
s.Warnf(" Error restoring consumer %q state: %v", cfg.Name, err)
}
wg.Wait()
} else {
// No parallelTaskQueue provided, do inline as before.
for _, fi := range fis {
doStream(fi)
}
}
// Make sure to cleanup any old remaining snapshots.
os.RemoveAll(filepath.Join(jsa.storeDir, snapsDir))
// Check interest policy streams for auto cleanup.
for _, mset := range ipstreams {
mset.checkForOrphanMsgs()
mset.checkConsumerReplication()
}
s.Debugf("JetStream state for account %q recovered", a.Name)
return nil
+174 -102
View File
@@ -70,6 +70,11 @@ type jetStreamCluster struct {
peerStreamCancelMove *subscription
// To pop out the monitorCluster before the raft layer.
qch chan struct{}
// To notify others that monitorCluster has actually stopped.
stopped chan struct{}
// Track last meta snapshot time and duration for monitoring.
lastMetaSnapTime int64 // Unix nanoseconds
lastMetaSnapDuration int64 // Duration in nanoseconds
}
// Used to track inflight stream add requests to properly re-use same group and sync subject.
@@ -638,12 +643,12 @@ func (js *jetStream) isStreamHealthy(acc *Account, sa *streamAssignment) error {
case !mset.isMonitorRunning():
return errors.New("monitor goroutine not running")
case !node.Healthy():
return errors.New("group node unhealthy")
case mset.isCatchingUp():
return errors.New("stream catching up")
case !node.Healthy():
return errors.New("group node unhealthy")
default:
return nil
}
@@ -896,6 +901,9 @@ func (js *jetStream) setupMetaGroup() error {
}
if cfg.Observer {
s.Noticef("Turning JetStream metadata controller Observer Mode on")
s.Noticef("In cases where the JetStream domain is not intended to be extended through a SYS account leaf node connection")
s.Noticef("and waiting for leader election until first contact is not acceptable,")
s.Noticef(`manually disable Observer Mode by setting the JetStream Option "extension_hint: %s"`, jsNoExtend)
}
} else {
s.Noticef("JetStream cluster recovering state")
@@ -909,7 +917,7 @@ func (js *jetStream) setupMetaGroup() error {
cfg.Observer = false
case extUndetermined:
s.Noticef("Turning JetStream metadata controller Observer Mode on - no previous contact")
s.Noticef("In cases where JetStream will not be extended")
s.Noticef("In cases where the JetStream domain is not intended to be extended through a SYS account leaf node connection")
s.Noticef("and waiting for leader election until first contact is not acceptable,")
s.Noticef(`manually disable Observer Mode by setting the JetStream Option "extension_hint: %s"`, jsNoExtend)
}
@@ -948,6 +956,7 @@ func (js *jetStream) setupMetaGroup() error {
s: s,
c: c,
qch: make(chan struct{}),
stopped: make(chan struct{}),
}
atomic.StoreInt32(&js.clustered, 1)
c.registerWithAccount(sysAcc)
@@ -1184,6 +1193,16 @@ func (js *jetStream) clusterQuitC() chan struct{} {
return nil
}
// Return the cluster stopped chan.
func (js *jetStream) clusterStoppedC() chan struct{} {
js.mu.RLock()
defer js.mu.RUnlock()
if js.cluster != nil {
return js.cluster.stopped
}
return nil
}
// Mark that the meta layer is recovering.
func (js *jetStream) setMetaRecovering() {
js.mu.Lock()
@@ -1217,6 +1236,52 @@ type recoveryUpdates struct {
updateConsumers map[string]map[string]*consumerAssignment
}
func (ru *recoveryUpdates) removeStream(sa *streamAssignment) {
key := sa.recoveryKey()
ru.removeStreams[key] = sa
delete(ru.addStreams, key)
delete(ru.updateStreams, key)
delete(ru.updateConsumers, key)
delete(ru.removeConsumers, key)
}
func (ru *recoveryUpdates) addStream(sa *streamAssignment) {
key := sa.recoveryKey()
ru.addStreams[key] = sa
delete(ru.removeStreams, key)
}
func (ru *recoveryUpdates) updateStream(sa *streamAssignment) {
key := sa.recoveryKey()
ru.updateStreams[key] = sa
delete(ru.addStreams, key)
delete(ru.removeStreams, key)
}
func (ru *recoveryUpdates) removeConsumer(ca *consumerAssignment) {
key := ca.recoveryKey()
skey := ca.streamRecoveryKey()
if _, ok := ru.removeConsumers[skey]; !ok {
ru.removeConsumers[skey] = map[string]*consumerAssignment{}
}
ru.removeConsumers[skey][key] = ca
if consumers, ok := ru.updateConsumers[skey]; ok {
delete(consumers, key)
}
}
func (ru *recoveryUpdates) addOrUpdateConsumer(ca *consumerAssignment) {
key := ca.recoveryKey()
skey := ca.streamRecoveryKey()
if consumers, ok := ru.removeConsumers[skey]; ok {
delete(consumers, key)
}
if _, ok := ru.updateConsumers[skey]; !ok {
ru.updateConsumers[skey] = map[string]*consumerAssignment{}
}
ru.updateConsumers[skey][key] = ca
}
// Called after recovery of the cluster on startup to check for any orphans.
// Streams and consumers are recovered from disk, and the meta layer's mappings
// should clean them up, but under crash scenarios there could be orphans.
@@ -1294,9 +1359,10 @@ func (js *jetStream) checkForOrphans() {
func (js *jetStream) monitorCluster() {
s, n := js.server(), js.getMetaGroup()
qch, rqch, lch, aq := js.clusterQuitC(), n.QuitC(), n.LeadChangeC(), n.ApplyQ()
qch, stopped, rqch, lch, aq := js.clusterQuitC(), js.clusterStoppedC(), n.QuitC(), n.LeadChangeC(), n.ApplyQ()
defer s.grWG.Done()
defer close(stopped)
s.Debugf("Starting metadata monitor")
defer s.Debugf("Exiting metadata monitor")
@@ -1341,13 +1407,21 @@ func (js *jetStream) monitorCluster() {
js.setMetaRecovering()
// Snapshotting function.
doSnapshot := func() {
doSnapshot := func(force bool) {
// Suppress during recovery.
if js.isMetaRecovering() {
return
}
// For the meta layer we want to snapshot when asked if we need one or have any entries that we can compact.
if ne, _ := n.Size(); ne > 0 || n.NeedSnapshot() {
// Look up what the threshold is for compaction. Re-reading from config here as it is reloadable.
js.srv.optsMu.RLock()
ethresh := js.srv.opts.JetStreamMetaCompact
szthresh := js.srv.opts.JetStreamMetaCompactSize
js.srv.optsMu.RUnlock()
// Work out our criteria for snapshotting.
byEntries, bySize := ethresh > 0, szthresh > 0
byNeither := !byEntries && !bySize
// For the meta layer we want to snapshot when over the above threshold (which could be 0 by default).
if ne, nsz := n.Size(); force || byNeither || (byEntries && ne > ethresh) || (bySize && nsz > szthresh) || n.NeedSnapshot() {
snap, err := js.metaSnapshot()
if err != nil {
s.Warnf("Error generating JetStream cluster snapshot: %v", err)
@@ -1376,17 +1450,15 @@ func (js *jetStream) monitorCluster() {
select {
case <-s.quitCh:
// Server shutting down, but we might receive this before qch, so try to snapshot.
doSnapshot()
doSnapshot(false)
return
case <-rqch:
// Clean signal from shutdown routine so do best effort attempt to snapshot meta layer.
doSnapshot()
doSnapshot(false)
return
case <-qch:
// Clean signal from shutdown routine so do best effort attempt to snapshot meta layer.
doSnapshot()
// Return the signal back since shutdown will be waiting.
close(qch)
doSnapshot(false)
return
case <-aq.ch:
ces := aq.pop()
@@ -1420,6 +1492,8 @@ func (js *jetStream) monitorCluster() {
// Clear.
ru = nil
s.Debugf("Recovered JetStream cluster metadata")
// Snapshot now so we start with freshly compacted log.
doSnapshot(true)
oc = time.AfterFunc(30*time.Second, js.checkForOrphans)
// Do a health check here as well.
go checkHealth()
@@ -1432,9 +1506,9 @@ func (js *jetStream) monitorCluster() {
_, nb = n.Applied(ce.Index)
}
if js.hasPeerEntries(ce.Entries) || (didSnap && !isLeader) {
doSnapshot()
doSnapshot(true)
} else if nb > compactSizeMin && time.Since(lastSnapTime) > minSnapDelta {
doSnapshot()
doSnapshot(false)
}
} else {
s.Warnf("Error applying JetStream cluster entries: %v", err)
@@ -1450,11 +1524,11 @@ func (js *jetStream) monitorCluster() {
s.sendInternalMsgLocked(serverStatsPingReqSubj, _EMPTY_, nil, nil)
// Install a snapshot as we become leader.
js.checkClusterSize()
doSnapshot()
doSnapshot(false)
}
case <-t.C:
doSnapshot()
doSnapshot(false)
// Periodically check the cluster size.
if n.Leader() {
js.checkClusterSize()
@@ -1608,15 +1682,23 @@ func (js *jetStream) metaSnapshot() ([]byte, error) {
return nil, err
}
// Track how long it took to compress the JSON
// Track how long it took to compress the JSON.
cstart := time.Now()
snap := s2.Encode(nil, b)
cend := time.Since(cstart)
took := time.Since(start)
if took := time.Since(start); took > time.Second {
if took > time.Second {
s.rateLimitFormatWarnf("Metalayer snapshot took %.3fs (streams: %d, consumers: %d, marshal: %.3fs, s2: %.3fs, uncompressed: %d, compressed: %d)",
took.Seconds(), nsa, nca, mend.Seconds(), cend.Seconds(), len(b), len(snap))
}
// Track in jsz monitoring as well.
if cc != nil {
atomic.StoreInt64(&cc.lastMetaSnapTime, start.UnixNano())
atomic.StoreInt64(&cc.lastMetaSnapDuration, int64(took))
}
return snap, nil
}
@@ -1705,25 +1787,32 @@ func (js *jetStream) applyMetaSnapshot(buf []byte, ru *recoveryUpdates, isRecove
for _, sa := range saDel {
js.setStreamAssignmentRecovering(sa)
if isRecovering {
key := sa.recoveryKey()
ru.removeStreams[key] = sa
delete(ru.addStreams, key)
delete(ru.updateStreams, key)
delete(ru.updateConsumers, key)
delete(ru.removeConsumers, key)
ru.removeStream(sa)
} else {
js.processStreamRemoval(sa)
}
}
// Now do add for the streams. Also add in all consumers.
for _, sa := range saAdd {
consumers := sa.consumers
js.setStreamAssignmentRecovering(sa)
js.processStreamAssignment(sa)
if isRecovering {
// Since we're recovering and storing up changes, we'll need to clear out these consumers.
// Some might be removed, and we'll recover those later, must not be able to remember them.
sa.consumers = nil
ru.addStream(sa)
} else {
js.processStreamAssignment(sa)
}
// We can simply process the consumers.
for _, ca := range sa.consumers {
for _, ca := range consumers {
js.setConsumerAssignmentRecovering(ca)
js.processConsumerAssignment(ca)
if isRecovering {
ru.addOrUpdateConsumer(ca)
} else {
js.processConsumerAssignment(ca)
}
}
}
@@ -1732,10 +1821,7 @@ func (js *jetStream) applyMetaSnapshot(buf []byte, ru *recoveryUpdates, isRecove
for _, sa := range saChk {
js.setStreamAssignmentRecovering(sa)
if isRecovering {
key := sa.recoveryKey()
ru.updateStreams[key] = sa
delete(ru.addStreams, key)
delete(ru.removeStreams, key)
ru.updateStream(sa)
} else {
js.processUpdateStreamAssignment(sa)
}
@@ -1745,15 +1831,7 @@ func (js *jetStream) applyMetaSnapshot(buf []byte, ru *recoveryUpdates, isRecove
for _, ca := range caDel {
js.setConsumerAssignmentRecovering(ca)
if isRecovering {
key := ca.recoveryKey()
skey := ca.streamRecoveryKey()
if _, ok := ru.removeConsumers[skey]; !ok {
ru.removeConsumers[skey] = map[string]*consumerAssignment{}
}
ru.removeConsumers[skey][key] = ca
if consumers, ok := ru.updateConsumers[skey]; ok {
delete(consumers, key)
}
ru.removeConsumer(ca)
} else {
js.processConsumerRemoval(ca)
}
@@ -1761,15 +1839,7 @@ func (js *jetStream) applyMetaSnapshot(buf []byte, ru *recoveryUpdates, isRecove
for _, ca := range caAdd {
js.setConsumerAssignmentRecovering(ca)
if isRecovering {
key := ca.recoveryKey()
skey := ca.streamRecoveryKey()
if consumers, ok := ru.removeConsumers[skey]; ok {
delete(consumers, key)
}
if _, ok := ru.updateConsumers[skey]; !ok {
ru.updateConsumers[skey] = map[string]*consumerAssignment{}
}
ru.updateConsumers[skey][key] = ca
ru.addOrUpdateConsumer(ca)
} else {
js.processConsumerAssignment(ca)
}
@@ -2037,9 +2107,7 @@ func (js *jetStream) applyMetaEntries(entries []*Entry, ru *recoveryUpdates) (bo
}
if isRecovering {
js.setStreamAssignmentRecovering(sa)
key := sa.recoveryKey()
ru.addStreams[key] = sa
delete(ru.removeStreams, key)
ru.addStream(sa)
} else {
js.processStreamAssignment(sa)
}
@@ -2051,12 +2119,7 @@ func (js *jetStream) applyMetaEntries(entries []*Entry, ru *recoveryUpdates) (bo
}
if isRecovering {
js.setStreamAssignmentRecovering(sa)
key := sa.recoveryKey()
ru.removeStreams[key] = sa
delete(ru.addStreams, key)
delete(ru.updateStreams, key)
delete(ru.updateConsumers, key)
delete(ru.removeConsumers, key)
ru.removeStream(sa)
} else {
js.processStreamRemoval(sa)
}
@@ -2068,15 +2131,7 @@ func (js *jetStream) applyMetaEntries(entries []*Entry, ru *recoveryUpdates) (bo
}
if isRecovering {
js.setConsumerAssignmentRecovering(ca)
key := ca.recoveryKey()
skey := ca.streamRecoveryKey()
if consumers, ok := ru.removeConsumers[skey]; ok {
delete(consumers, key)
}
if _, ok := ru.updateConsumers[skey]; !ok {
ru.updateConsumers[skey] = map[string]*consumerAssignment{}
}
ru.updateConsumers[skey][key] = ca
ru.addOrUpdateConsumer(ca)
} else {
js.processConsumerAssignment(ca)
}
@@ -2108,15 +2163,7 @@ func (js *jetStream) applyMetaEntries(entries []*Entry, ru *recoveryUpdates) (bo
}
if isRecovering {
js.setConsumerAssignmentRecovering(ca)
key := ca.recoveryKey()
skey := ca.streamRecoveryKey()
if _, ok := ru.removeConsumers[skey]; !ok {
ru.removeConsumers[skey] = map[string]*consumerAssignment{}
}
ru.removeConsumers[skey][key] = ca
if consumers, ok := ru.updateConsumers[skey]; ok {
delete(consumers, key)
}
ru.removeConsumer(ca)
} else {
js.processConsumerRemoval(ca)
}
@@ -2128,10 +2175,7 @@ func (js *jetStream) applyMetaEntries(entries []*Entry, ru *recoveryUpdates) (bo
}
if isRecovering {
js.setStreamAssignmentRecovering(sa)
key := sa.recoveryKey()
ru.updateStreams[key] = sa
delete(ru.addStreams, key)
delete(ru.removeStreams, key)
ru.updateStream(sa)
} else {
js.processUpdateStreamAssignment(sa)
}
@@ -2614,11 +2658,19 @@ func (js *jetStream) monitorStream(mset *stream, sa *streamAssignment, sendSnaps
return
case <-mqch:
// Clean signal from shutdown routine so do best effort attempt to snapshot.
doSnapshot()
// Don't snapshot if not shutting down, monitor goroutine could be going away
// on a scale down or a remove for example.
if s.isShuttingDown() {
doSnapshot()
}
return
case <-qch:
// Clean signal from shutdown routine so do best effort attempt to snapshot.
doSnapshot()
// Don't snapshot if not shutting down, Raft node could be going away on a
// scale down or remove for example.
if s.isShuttingDown() {
doSnapshot()
}
return
case <-aq.ch:
var ne, nb uint64
@@ -2713,6 +2765,9 @@ func (js *jetStream) monitorStream(mset *stream, sa *streamAssignment, sendSnaps
}
case isLeader = <-lch:
// Process our leader change.
js.processStreamLeaderChange(mset, isLeader)
if isLeader {
if mset != nil && n != nil && sendSnapshot && !isRecovering {
// If we *are* recovering at the time then this will get done when the apply queue
@@ -2729,14 +2784,10 @@ func (js *jetStream) monitorStream(mset *stream, sa *streamAssignment, sendSnaps
}
// Always cancel if this was running.
stopDirectMonitoring()
} else if !n.Leaderless() {
js.setStreamAssignmentRecovering(sa)
}
// Process our leader change.
js.processStreamLeaderChange(mset, isLeader)
// We may receive a leader change after the stream assignment which would cancel us
// monitoring for this closely. So re-assess our state here as well.
// Or the old leader is no longer part of the set and transferred leadership
@@ -4019,7 +4070,7 @@ func (js *jetStream) processStreamAssignment(sa *streamAssignment) {
js.mu.Unlock()
// Need to stop the stream, we can't keep running with an old config.
acc, err := s.LookupAccount(accName)
acc, err := s.lookupOrFetchAccount(accName, isMember)
if err != nil {
return
}
@@ -4033,7 +4084,7 @@ func (js *jetStream) processStreamAssignment(sa *streamAssignment) {
}
js.mu.Unlock()
acc, err := s.LookupAccount(accName)
acc, err := s.lookupOrFetchAccount(accName, isMember)
if err != nil {
ll := fmt.Sprintf("Account [%s] lookup for stream create failed: %v", accName, err)
if isMember {
@@ -4148,7 +4199,7 @@ func (js *jetStream) processUpdateStreamAssignment(sa *streamAssignment) {
js.mu.Unlock()
// Need to stop the stream, we can't keep running with an old config.
acc, err := s.LookupAccount(accName)
acc, err := s.lookupOrFetchAccount(accName, isMember)
if err != nil {
return
}
@@ -4162,9 +4213,14 @@ func (js *jetStream) processUpdateStreamAssignment(sa *streamAssignment) {
}
js.mu.Unlock()
acc, err := s.LookupAccount(accName)
acc, err := s.lookupOrFetchAccount(accName, isMember)
if err != nil {
s.Warnf("Update Stream Account %s, error on lookup: %v", accName, err)
ll := fmt.Sprintf("Update Stream Account %s, error on lookup: %v", accName, err)
if isMember {
s.Warnf(ll)
} else {
s.Debugf(ll)
}
return
}
@@ -4837,7 +4893,7 @@ func (js *jetStream) processConsumerAssignment(ca *consumerAssignment) {
// Be conservative by protecting the whole stream, even if just one consumer is unsupported.
// This ensures it's safe, even with Interest-based retention where it would otherwise
// continue accepting but dropping messages.
acc, err := s.LookupAccount(accName)
acc, err := s.lookupOrFetchAccount(accName, isMember)
if err != nil {
return
}
@@ -4851,7 +4907,7 @@ func (js *jetStream) processConsumerAssignment(ca *consumerAssignment) {
}
js.mu.Unlock()
acc, err := s.LookupAccount(accName)
acc, err := s.lookupOrFetchAccount(accName, isMember)
if err != nil {
ll := fmt.Sprintf("Account [%s] lookup for consumer create failed: %v", accName, err)
if isMember {
@@ -4993,7 +5049,7 @@ func (js *jetStream) processClusterCreateConsumer(ca *consumerAssignment, state
acc, err := s.LookupAccount(accName)
if err != nil {
s.Warnf("JetStream cluster failed to lookup axccount %q: %v", accName, err)
s.Warnf("JetStream cluster failed to lookup account %q: %v", accName, err)
return
}
@@ -5512,11 +5568,19 @@ func (js *jetStream) monitorConsumer(o *consumer, ca *consumerAssignment) {
return
case <-mqch:
// Clean signal from shutdown routine so do best effort attempt to snapshot.
doSnapshot(false)
// Don't snapshot if not shutting down, monitor goroutine could be going away
// on a scale down or a remove for example.
if s.isShuttingDown() {
doSnapshot(false)
}
return
case <-qch:
// Clean signal from shutdown routine so do best effort attempt to snapshot.
doSnapshot(false)
// Don't snapshot if not shutting down, Raft node could be going away on a
// scale down or remove for example.
if s.isShuttingDown() {
doSnapshot(false)
}
return
case <-aq.ch:
ces := aq.pop()
@@ -8756,6 +8820,13 @@ func (mset *stream) stateSnapshotLocked() []byte {
}
// Older v1 version with deleted as a sorted []uint64.
// For a stream with millions or billions of interior deletes, this will be huge.
// Now that all server versions 2.10.+ support binary snapshots, we should never fall back.
assert.Unreachable("Legacy JSON stream snapshot used", map[string]any{
"stream": mset.cfg.Name,
"account": mset.acc.Name,
})
state := mset.store.State()
snap := &streamSnapshot{
Msgs: state.Msgs,
@@ -9881,6 +9952,7 @@ func (mset *stream) runCatchup(sendSubject string, sreq *streamSyncRequest) {
}
}
start := time.Now()
mset.setCatchupPeer(sreq.Peer, last-seq)
var spb int
@@ -9889,7 +9961,7 @@ func (mset *stream) runCatchup(sendSubject string, sreq *streamSyncRequest) {
sendNextBatchAndContinue := func(qch chan struct{}) bool {
// Check if we know we will not enter the loop because we are done.
if seq > last {
s.Noticef("Catchup for stream '%s > %s' complete", mset.account(), mset.name())
s.Noticef("Catchup for stream '%s > %s' complete (took %v)", mset.account(), mset.name(), time.Since(start))
// EOF
s.sendInternalMsgLocked(sendSubject, _EMPTY_, nil, nil)
return false
@@ -9958,7 +10030,7 @@ func (mset *stream) runCatchup(sendSubject string, sreq *streamSyncRequest) {
// See if we should use LoadNextMsg instead of walking sequence by sequence if we have an order magnitude more interior deletes.
// Only makes sense with delete range capabilities.
useLoadNext := drOk && (uint64(state.NumDeleted) > 10*state.Msgs)
useLoadNext := drOk && (uint64(state.NumDeleted) > 2*state.Msgs || state.NumDeleted > 1_000_000)
var smv StoreMsg
for ; seq <= last && atomic.LoadInt64(&outb) <= maxOutBytes && atomic.LoadInt32(&outm) <= maxOutMsgs && s.gcbBelowMax(); seq++ {
@@ -9998,8 +10070,8 @@ func (mset *stream) runCatchup(sendSubject string, sreq *streamSyncRequest) {
// The snapshot has a larger last sequence then we have. This could be due to a truncation
// when trying to recover after corruption, still not 100% sure. Could be off by 1 too somehow,
// but tested a ton of those with no success.
s.Warnf("Catchup for stream '%s > %s' completed, but requested sequence %d was larger than current state: %+v",
mset.account(), mset.name(), seq, state)
s.Warnf("Catchup for stream '%s > %s' completed (took %v), but requested sequence %d was larger than current state: %+v",
mset.account(), mset.name(), time.Since(start), seq, state)
// Try our best to redo our invalidated snapshot as well.
if n := mset.raftNode(); n != nil {
if snap := mset.stateSnapshot(); snap != nil {
@@ -10045,7 +10117,7 @@ func (mset *stream) runCatchup(sendSubject string, sreq *streamSyncRequest) {
if drOk && dr.First > 0 {
sendDR()
}
s.Noticef("Catchup for stream '%s > %s' complete", mset.account(), mset.name())
s.Noticef("Catchup for stream '%s > %s' complete (took %v)", mset.account(), mset.name(), time.Since(start))
// EOF
s.sendInternalMsgLocked(sendSubject, _EMPTY_, nil, nil)
return false
+9 -3
View File
@@ -1049,17 +1049,22 @@ func (c *client) sendLeafConnect(clusterName string, headers bool) error {
// In addition, and this is to allow auth callout, set user/password or
// token if applicable.
if userInfo := c.leaf.remote.curURL.User; userInfo != nil {
// For backward compatibility, if only username is provided, set both
// Token and User, not just Token.
cinfo.User = userInfo.Username()
var ok bool
cinfo.Pass, ok = userInfo.Password()
// For backward compatibility, if only username is provided, set both
// Token and User, not just Token.
if !ok {
cinfo.Token = cinfo.User
}
} else if c.leaf.remote.username != _EMPTY_ {
cinfo.User = c.leaf.remote.username
cinfo.Pass = c.leaf.remote.password
// For backward compatibility, if only username is provided, set both
// Token and User, not just Token.
if cinfo.Pass == _EMPTY_ {
cinfo.Token = cinfo.User
}
}
b, err := json.Marshal(cinfo)
if err != nil {
@@ -2421,7 +2426,8 @@ func (s *Server) initLeafNodeSmapAndSendSubs(c *client) {
// updateInterestForAccountOnGateway called from gateway code when processing RS+ and RS-.
func (s *Server) updateInterestForAccountOnGateway(accName string, sub *subscription, delta int32) {
acc, err := s.LookupAccount(accName)
// Since we're in the gateway's readLoop, and we would otherwise block, don't allow fetching.
acc, err := s.lookupOrFetchAccount(accName, false)
if acc == nil || err != nil {
s.Debugf("No or bad account for %q, failed to update interest from gateway", accName)
return
+90 -27
View File
@@ -1244,6 +1244,7 @@ type Varz struct {
JetStream JetStreamVarz `json:"jetstream,omitempty"` // JetStream is the JetStream state
TLSTimeout float64 `json:"tls_timeout"` // TLSTimeout is how long TLS operations have to complete
WriteDeadline time.Duration `json:"write_deadline"` // WriteDeadline is the maximum time writes to sockets have to complete
WriteTimeout string `json:"write_timeout,omitempty"` // WriteTimeout is the closure policy for write deadline errors
Start time.Time `json:"start"` // Start is time when the server was started
Now time.Time `json:"now"` // Now is the current time of the server
Uptime string `json:"uptime"` // Uptime is how long the server has been running
@@ -1290,15 +1291,17 @@ type JetStreamVarz struct {
// ClusterOptsVarz contains monitoring cluster information
type ClusterOptsVarz struct {
Name string `json:"name,omitempty"` // Name is the configured cluster name
Host string `json:"addr,omitempty"` // Host is the host the cluster listens on for connections
Port int `json:"cluster_port,omitempty"` // Port is the port the cluster listens on for connections
AuthTimeout float64 `json:"auth_timeout,omitempty"` // AuthTimeout is the time cluster connections have to complete authentication
URLs []string `json:"urls,omitempty"` // URLs is the list of cluster URLs
TLSTimeout float64 `json:"tls_timeout,omitempty"` // TLSTimeout is how long TLS operations have to complete
TLSRequired bool `json:"tls_required,omitempty"` // TLSRequired indicates if TLS is required for connections
TLSVerify bool `json:"tls_verify,omitempty"` // TLSVerify indicates if full verification of TLS connections is performed
PoolSize int `json:"pool_size,omitempty"` // PoolSize is the configured route connection pool size
Name string `json:"name,omitempty"` // Name is the configured cluster name
Host string `json:"addr,omitempty"` // Host is the host the cluster listens on for connections
Port int `json:"cluster_port,omitempty"` // Port is the port the cluster listens on for connections
AuthTimeout float64 `json:"auth_timeout,omitempty"` // AuthTimeout is the time cluster connections have to complete authentication
URLs []string `json:"urls,omitempty"` // URLs is the list of cluster URLs
TLSTimeout float64 `json:"tls_timeout,omitempty"` // TLSTimeout is how long TLS operations have to complete
TLSRequired bool `json:"tls_required,omitempty"` // TLSRequired indicates if TLS is required for connections
TLSVerify bool `json:"tls_verify,omitempty"` // TLSVerify indicates if full verification of TLS connections is performed
PoolSize int `json:"pool_size,omitempty"` // PoolSize is the configured route connection pool size
WriteDeadline time.Duration `json:"write_deadline,omitempty"` // WriteDeadline is the maximum time writes to sockets have to complete
WriteTimeout string `json:"write_timeout,omitempty"` // WriteTimeout is the closure policy for write deadline errors
}
// GatewayOptsVarz contains monitoring gateway information
@@ -1314,6 +1317,8 @@ type GatewayOptsVarz struct {
ConnectRetries int `json:"connect_retries,omitempty"` // ConnectRetries is how many connection attempts the route will make
Gateways []RemoteGatewayOptsVarz `json:"gateways,omitempty"` // Gateways is state of configured gateway remotes
RejectUnknown bool `json:"reject_unknown,omitempty"` // RejectUnknown indicates if unknown cluster connections will be rejected
WriteDeadline time.Duration `json:"write_deadline,omitempty"` // WriteDeadline is the maximum time writes to sockets have to complete
WriteTimeout string `json:"write_timeout,omitempty"` // WriteTimeout is the closure policy for write deadline errors
}
// RemoteGatewayOptsVarz contains monitoring remote gateway information
@@ -1333,6 +1338,8 @@ type LeafNodeOptsVarz struct {
TLSVerify bool `json:"tls_verify,omitempty"` // TLSVerify indicates if full verification of TLS connections is performed
Remotes []RemoteLeafOptsVarz `json:"remotes,omitempty"` // Remotes is state of configured Leafnode remotes
TLSOCSPPeerVerify bool `json:"tls_ocsp_peer_verify,omitempty"` // TLSOCSPPeerVerify indicates if OCSP verification will be performed
WriteDeadline time.Duration `json:"write_deadline,omitempty"` // WriteDeadline is the maximum time writes to sockets have to complete
WriteTimeout string `json:"write_timeout,omitempty"` // WriteTimeout is the closure policy for write deadline errors
}
// DenyRules Contains lists of subjects not allowed to be imported/exported
@@ -1501,7 +1508,8 @@ func (s *Server) HandleRoot(w http.ResponseWriter, r *http.Request) {
<a href=.%s>LeafNodes<span class="endpoint"> %s</span></a>
<a href=.%s>Gateways<span class="endpoint"> %s</span></a>
<a href=.%s>Raft Groups<span class="endpoint"> %s</span></a>
<a href=.%s class=last>Health Probe<span class="endpoint"> %s</span></a>
<a href=.%s>Health Probe<span class="endpoint"> %s</span></a>
<a href=.%s class=last>Expvar<span class="endpoint"> %s</span></a>
<a href=https://docs.nats.io/running-a-nats-service/nats_admin/monitoring class="help">Help</a>
</body>
</html>`,
@@ -1518,6 +1526,7 @@ func (s *Server) HandleRoot(w http.ResponseWriter, r *http.Request) {
s.basePath(GatewayzPath), GatewayzPath,
s.basePath(RaftzPath), RaftzPath,
s.basePath(HealthzPath), HealthzPath,
s.basePath(ExpvarzPath), ExpvarzPath,
)
}
@@ -1599,14 +1608,16 @@ func (s *Server) createVarz(pcpu float64, rss int64) *Varz {
HTTPBasePath: opts.HTTPBasePath,
HTTPSPort: opts.HTTPSPort,
Cluster: ClusterOptsVarz{
Name: info.Cluster,
Host: c.Host,
Port: c.Port,
AuthTimeout: c.AuthTimeout,
TLSTimeout: c.TLSTimeout,
TLSRequired: clustTlsReq,
TLSVerify: clustTlsReq,
PoolSize: opts.Cluster.PoolSize,
Name: info.Cluster,
Host: c.Host,
Port: c.Port,
AuthTimeout: c.AuthTimeout,
TLSTimeout: c.TLSTimeout,
TLSRequired: clustTlsReq,
TLSVerify: clustTlsReq,
PoolSize: opts.Cluster.PoolSize,
WriteDeadline: opts.Cluster.WriteDeadline,
WriteTimeout: opts.Cluster.WriteTimeout.String(),
},
Gateway: GatewayOptsVarz{
Name: gw.Name,
@@ -1620,6 +1631,8 @@ func (s *Server) createVarz(pcpu float64, rss int64) *Varz {
ConnectRetries: gw.ConnectRetries,
Gateways: []RemoteGatewayOptsVarz{},
RejectUnknown: gw.RejectUnknown,
WriteDeadline: opts.Cluster.WriteDeadline,
WriteTimeout: opts.Cluster.WriteTimeout.String(),
},
LeafNode: LeafNodeOptsVarz{
Host: ln.Host,
@@ -1630,6 +1643,8 @@ func (s *Server) createVarz(pcpu float64, rss int64) *Varz {
TLSVerify: leafTlsVerify,
TLSOCSPPeerVerify: leafTlsOCSPPeerVerify,
Remotes: []RemoteLeafOptsVarz{},
WriteDeadline: opts.Cluster.WriteDeadline,
WriteTimeout: opts.Cluster.WriteTimeout.String(),
},
MQTT: MQTTOptsVarz{
Host: mqtt.Host,
@@ -1746,6 +1761,7 @@ func (s *Server) updateVarzConfigReloadableFields(v *Varz) {
v.MaxPending = opts.MaxPending
v.TLSTimeout = opts.TLSTimeout
v.WriteDeadline = opts.WriteDeadline
v.WriteTimeout = opts.WriteTimeout.String()
v.ConfigLoadTime = s.configTime.UTC()
v.ConfigDigest = opts.configDigest
v.Tags = opts.Tags
@@ -2886,6 +2902,7 @@ type JSzOptions struct {
Accounts bool `json:"accounts,omitempty"`
Streams bool `json:"streams,omitempty"`
Consumer bool `json:"consumer,omitempty"`
DirectConsumer bool `json:"direct_consumer,omitempty"`
Config bool `json:"config,omitempty"`
LeaderOnly bool `json:"leader_only,omitempty"`
Offset int `json:"offset,omitempty"`
@@ -2934,6 +2951,7 @@ type StreamDetail struct {
Config *StreamConfig `json:"config,omitempty"`
State StreamState `json:"state,omitempty"`
Consumer []*ConsumerInfo `json:"consumer_detail,omitempty"`
DirectConsumer []*ConsumerInfo `json:"direct_consumer_detail,omitempty"`
Mirror *StreamSourceInfo `json:"mirror,omitempty"`
Sources []*StreamSourceInfo `json:"sources,omitempty"`
RaftGroup string `json:"stream_raft_group,omitempty"`
@@ -2953,14 +2971,23 @@ type AccountDetail struct {
Streams []StreamDetail `json:"stream_detail,omitempty"`
}
// MetaSnapshotStats shows information about meta snapshots.
type MetaSnapshotStats struct {
PendingEntries uint64 `json:"pending_entries"` // PendingEntries is the count of pending entries in the meta layer
PendingSize uint64 `json:"pending_size"` // PendingSize is the size in bytes of pending entries in the meta layer
LastTime time.Time `json:"last_time,omitempty"` // LastTime is when the last meta snapshot was taken
LastDuration time.Duration `json:"last_duration,omitempty"` // LastDuration is how long the last meta snapshot took
}
// MetaClusterInfo shows information about the meta group.
type MetaClusterInfo struct {
Name string `json:"name,omitempty"` // Name is the name of the cluster
Leader string `json:"leader,omitempty"` // Leader is the server name of the cluster leader
Peer string `json:"peer,omitempty"` // Peer is unique ID of the leader
Replicas []*PeerInfo `json:"replicas,omitempty"` // Replicas is a list of known peers
Size int `json:"cluster_size"` // Size is the known size of the cluster
Pending int `json:"pending"` // Pending is how many RAFT messages are not yet processed
Name string `json:"name,omitempty"` // Name is the name of the cluster
Leader string `json:"leader,omitempty"` // Leader is the server name of the cluster leader
Peer string `json:"peer,omitempty"` // Peer is unique ID of the leader
Replicas []*PeerInfo `json:"replicas,omitempty"` // Replicas is a list of known peers
Size int `json:"cluster_size"` // Size is the known size of the cluster
Pending int `json:"pending"` // Pending is how many RAFT messages are not yet processed
Snapshot *MetaSnapshotStats `json:"snapshot"` // Snapshot contains meta snapshot statistics
}
// JSInfo has detailed information on JetStream.
@@ -2982,7 +3009,7 @@ type JSInfo struct {
Total int `json:"total"`
}
func (s *Server) accountDetail(jsa *jsAccount, optStreams, optConsumers, optCfg, optRaft, optStreamLeader bool) *AccountDetail {
func (s *Server) accountDetail(jsa *jsAccount, optStreams, optConsumers, optDirectConsumers, optCfg, optRaft, optStreamLeader bool) *AccountDetail {
jsa.mu.RLock()
acc := jsa.account
name := acc.GetName()
@@ -3064,6 +3091,18 @@ func (s *Server) accountDetail(jsa *jsAccount, optStreams, optConsumers, optCfg,
}
}
}
if optDirectConsumers {
for _, consumer := range stream.getDirectConsumers() {
cInfo := consumer.info()
if cInfo == nil {
continue
}
if !optCfg {
cInfo.Config = nil
}
sdet.DirectConsumer = append(sdet.Consumer, cInfo)
}
}
}
detail.Streams = append(detail.Streams, sdet)
}
@@ -3087,7 +3126,7 @@ func (s *Server) JszAccount(opts *JSzOptions) (*AccountDetail, error) {
if !ok {
return nil, fmt.Errorf("account %q not jetstream enabled", acc)
}
return s.accountDetail(jsa, opts.Streams, opts.Consumer, opts.Config, opts.RaftGroups, opts.StreamLeaderOnly), nil
return s.accountDetail(jsa, opts.Streams, opts.Consumer, opts.DirectConsumer, opts.Config, opts.RaftGroups, opts.StreamLeaderOnly), nil
}
// helper to get cluster info from node via dummy group
@@ -3165,6 +3204,7 @@ func (s *Server) Jsz(opts *JSzOptions) (*JSInfo, error) {
if mg := js.getMetaGroup(); mg != nil {
if ci := s.raftNodeToClusterInfo(mg); ci != nil {
entries, bytes := mg.Size()
jsi.Meta = &MetaClusterInfo{Name: ci.Name, Leader: ci.Leader, Peer: getHash(ci.Leader), Size: mg.ClusterSize()}
if isLeader {
jsi.Meta.Replicas = ci.Replicas
@@ -3172,6 +3212,24 @@ func (s *Server) Jsz(opts *JSzOptions) (*JSInfo, error) {
if ipq := s.jsAPIRoutedReqs; ipq != nil {
jsi.Meta.Pending = ipq.len()
}
// Add meta snapshot stats
jsi.Meta.Snapshot = &MetaSnapshotStats{
PendingEntries: entries,
PendingSize: bytes,
}
js.mu.RLock()
cluster := js.cluster
js.mu.RUnlock()
if cluster != nil {
timeNanos := atomic.LoadInt64(&cluster.lastMetaSnapTime)
durationNanos := atomic.LoadInt64(&cluster.lastMetaSnapDuration)
if timeNanos > 0 {
jsi.Meta.Snapshot.LastTime = time.Unix(0, timeNanos).UTC()
}
if durationNanos > 0 {
jsi.Meta.Snapshot.LastDuration = time.Duration(durationNanos)
}
}
}
}
@@ -3236,7 +3294,7 @@ func (s *Server) Jsz(opts *JSzOptions) (*JSInfo, error) {
jsi.AccountDetails = make([]*AccountDetail, 0, len(accounts))
for _, jsa := range accounts {
detail := s.accountDetail(jsa, opts.Streams, opts.Consumer, opts.Config, opts.RaftGroups, opts.StreamLeaderOnly)
detail := s.accountDetail(jsa, opts.Streams, opts.Consumer, opts.DirectConsumer, opts.Config, opts.RaftGroups, opts.StreamLeaderOnly)
jsi.AccountDetails = append(jsi.AccountDetails, detail)
}
}
@@ -3261,6 +3319,10 @@ func (s *Server) HandleJsz(w http.ResponseWriter, r *http.Request) {
if err != nil {
return
}
directConsumers, err := decodeBool(w, r, "direct-consumers")
if err != nil {
return
}
config, err := decodeBool(w, r, "config")
if err != nil {
return
@@ -3292,6 +3354,7 @@ func (s *Server) HandleJsz(w http.ResponseWriter, r *http.Request) {
Accounts: accounts,
Streams: streams,
Consumer: consumers,
DirectConsumer: directConsumers,
Config: config,
LeaderOnly: leader,
Offset: offset,
+93 -45
View File
@@ -62,29 +62,30 @@ type PinnedCertSet map[string]struct{}
// NOTE: This structure is no longer used for monitoring endpoints
// and json tags are deprecated and may be removed in the future.
type ClusterOpts struct {
Name string `json:"-"`
Host string `json:"addr,omitempty"`
Port int `json:"cluster_port,omitempty"`
Username string `json:"-"`
Password string `json:"-"`
AuthTimeout float64 `json:"auth_timeout,omitempty"`
Permissions *RoutePermissions `json:"-"`
TLSTimeout float64 `json:"-"`
TLSConfig *tls.Config `json:"-"`
TLSMap bool `json:"-"`
TLSCheckKnownURLs bool `json:"-"`
TLSPinnedCerts PinnedCertSet `json:"-"`
ListenStr string `json:"-"`
Advertise string `json:"-"`
NoAdvertise bool `json:"-"`
ConnectRetries int `json:"-"`
ConnectBackoff bool `json:"-"`
PoolSize int `json:"-"`
PinnedAccounts []string `json:"-"`
Compression CompressionOpts `json:"-"`
PingInterval time.Duration `json:"-"`
MaxPingsOut int `json:"-"`
WriteDeadline time.Duration `json:"-"`
Name string `json:"-"`
Host string `json:"addr,omitempty"`
Port int `json:"cluster_port,omitempty"`
Username string `json:"-"`
Password string `json:"-"`
AuthTimeout float64 `json:"auth_timeout,omitempty"`
Permissions *RoutePermissions `json:"-"`
TLSTimeout float64 `json:"-"`
TLSConfig *tls.Config `json:"-"`
TLSMap bool `json:"-"`
TLSCheckKnownURLs bool `json:"-"`
TLSPinnedCerts PinnedCertSet `json:"-"`
ListenStr string `json:"-"`
Advertise string `json:"-"`
NoAdvertise bool `json:"-"`
ConnectRetries int `json:"-"`
ConnectBackoff bool `json:"-"`
PoolSize int `json:"-"`
PinnedAccounts []string `json:"-"`
Compression CompressionOpts `json:"-"`
PingInterval time.Duration `json:"-"`
MaxPingsOut int `json:"-"`
WriteDeadline time.Duration `json:"-"`
WriteTimeout WriteTimeoutPolicy `json:"-"`
// Not exported (used in tests)
resolver netResolver
@@ -128,6 +129,7 @@ type GatewayOpts struct {
Gateways []*RemoteGatewayOpts `json:"gateways,omitempty"`
RejectUnknown bool `json:"reject_unknown,omitempty"` // config got renamed to reject_unknown_cluster
WriteDeadline time.Duration `json:"-"`
WriteTimeout WriteTimeoutPolicy `json:"-"`
// Not exported, for tests.
resolver netResolver
@@ -174,11 +176,12 @@ type LeafNodeOpts struct {
// to start before falling back to previous behavior of sending the
// INFO protocol first. It allows for a mix of newer remote leafnodes
// that can require a TLS handshake first, and older that can't.
TLSHandshakeFirstFallback time.Duration `json:"-"`
Advertise string `json:"-"`
NoAdvertise bool `json:"-"`
ReconnectInterval time.Duration `json:"-"`
WriteDeadline time.Duration `json:"-"`
TLSHandshakeFirstFallback time.Duration `json:"-"`
Advertise string `json:"-"`
NoAdvertise bool `json:"-"`
ReconnectInterval time.Duration `json:"-"`
WriteDeadline time.Duration `json:"-"`
WriteTimeout WriteTimeoutPolicy `json:"-"`
// Compression options
Compression CompressionOpts `json:"-"`
@@ -353,6 +356,7 @@ type Options struct {
Username string `json:"-"`
Password string `json:"-"`
ProxyRequired bool `json:"-"`
ProxyProtocol bool `json:"-"`
Authorization string `json:"-"`
AuthCallout *AuthCallout `json:"-"`
PingInterval time.Duration `json:"ping_interval"`
@@ -383,6 +387,8 @@ type Options struct {
JetStreamTpm JSTpmOpts
JetStreamMaxCatchup int64
JetStreamRequestQueueLimit int64
JetStreamMetaCompact uint64
JetStreamMetaCompactSize uint64
StreamMaxBufferedMsgs int `json:"-"`
StreamMaxBufferedSize int64 `json:"-"`
StoreDir string `json:"-"`
@@ -423,12 +429,13 @@ type Options struct {
// to start before falling back to previous behavior of sending the
// INFO protocol first. It allows for a mix of newer clients that can
// require a TLS handshake first, and older clients that can't.
TLSHandshakeFirstFallback time.Duration `json:"-"`
AllowNonTLS bool `json:"-"`
WriteDeadline time.Duration `json:"-"`
MaxClosedClients int `json:"-"`
LameDuckDuration time.Duration `json:"-"`
LameDuckGracePeriod time.Duration `json:"-"`
TLSHandshakeFirstFallback time.Duration `json:"-"`
AllowNonTLS bool `json:"-"`
WriteDeadline time.Duration `json:"-"`
WriteTimeout WriteTimeoutPolicy `json:"-"`
MaxClosedClients int `json:"-"`
LameDuckDuration time.Duration `json:"-"`
LameDuckGracePeriod time.Duration `json:"-"`
// MaxTracedMsgLen is the maximum printable length for traced messages.
MaxTracedMsgLen int `json:"-"`
@@ -1253,6 +1260,8 @@ func (o *Options) processConfigFileLine(k string, v any, errors *[]error, warnin
o.MaxPayload = int32(v.(int64))
case "max_pending":
o.MaxPending = v.(int64)
case "proxy_protocol":
o.ProxyProtocol = v.(bool)
case "max_connections", "max_conn":
o.MaxConn = int(v.(int64))
case "max_traced_msg_len":
@@ -1347,6 +1356,8 @@ func (o *Options) processConfigFileLine(k string, v any, errors *[]error, warnin
o.AllowNonTLS = v.(bool)
case "write_deadline":
o.WriteDeadline = parseDuration("write_deadline", tk, v, errors, warnings)
case "write_timeout":
o.WriteTimeout = parseWriteDeadlinePolicy(tk, v.(string), errors)
case "lame_duck_duration":
dur, err := time.ParseDuration(v.(string))
if err != nil {
@@ -1828,6 +1839,21 @@ func parseDuration(field string, tk token, v any, errors *[]error, warnings *[]e
}
}
func parseWriteDeadlinePolicy(tk token, v string, errors *[]error) WriteTimeoutPolicy {
switch v {
case "default":
return WriteTimeoutPolicyDefault
case "close":
return WriteTimeoutPolicyClose
case "retry":
return WriteTimeoutPolicyRetry
default:
err := &configErr{tk, "write_timeout must be 'default', 'close' or 'retry'"}
*errors = append(*errors, err)
return WriteTimeoutPolicyDefault
}
}
func trackExplicitVal(pm *map[string]bool, name string, val bool) {
m := *pm
if m == nil {
@@ -2004,6 +2030,8 @@ func parseCluster(v any, opts *Options, errors *[]error, warnings *[]error) erro
opts.Cluster.MaxPingsOut = int(mv.(int64))
case "write_deadline":
opts.Cluster.WriteDeadline = parseDuration("write_deadline", tk, mv, errors, warnings)
case "write_timeout":
opts.Cluster.WriteTimeout = parseWriteDeadlinePolicy(tk, mv.(string), errors)
default:
if !tk.IsUsedVariable() {
err := &unknownConfigFieldErr{
@@ -2194,6 +2222,8 @@ func parseGateway(v any, o *Options, errors *[]error, warnings *[]error) error {
o.Gateway.RejectUnknown = mv.(bool)
case "write_deadline":
o.Gateway.WriteDeadline = parseDuration("write_deadline", tk, mv, errors, warnings)
case "write_timeout":
o.Gateway.WriteTimeout = parseWriteDeadlinePolicy(tk, mv.(string), errors)
default:
if !tk.IsUsedVariable() {
err := &unknownConfigFieldErr{
@@ -2603,6 +2633,21 @@ func parseJetStream(v any, opts *Options, errors *[]error, warnings *[]error) er
return &configErr{tk, fmt.Sprintf("Expected a parseable size for %q, got %v", mk, mv)}
}
opts.JetStreamRequestQueueLimit = lim
case "meta_compact":
thres, ok := mv.(int64)
if !ok || thres < 0 {
return &configErr{tk, fmt.Sprintf("Expected an absolute size for %q, got %v", mk, mv)}
}
opts.JetStreamMetaCompact = uint64(thres)
case "meta_compact_size":
s, err := getStorageSize(mv)
if err != nil {
return &configErr{tk, fmt.Sprintf("%s %s", strings.ToLower(mk), err)}
}
if s < 0 {
return &configErr{tk, fmt.Sprintf("Expected an absolute size for %q, got %v", mk, mv)}
}
opts.JetStreamMetaCompactSize = uint64(s)
default:
if !tk.IsUsedVariable() {
err := &unknownConfigFieldErr{
@@ -2719,6 +2764,8 @@ func parseLeafNodes(v any, opts *Options, errors *[]error, warnings *[]error) er
opts.LeafNode.IsolateLeafnodeInterest = mv.(bool)
case "write_deadline":
opts.LeafNode.WriteDeadline = parseDuration("write_deadline", tk, mv, errors, warnings)
case "write_timeout":
opts.LeafNode.WriteTimeout = parseWriteDeadlinePolicy(tk, mv.(string), errors)
default:
if !tk.IsUsedVariable() {
err := &unknownConfigFieldErr{
@@ -2889,6 +2936,7 @@ func parseRemoteLeafNodes(v any, errors *[]error, warnings *[]error) ([]*RemoteL
continue
}
remote := &RemoteLeafOpts{}
var proxyToken token
for k, v := range rm {
tk, v = unwrapValue(v, &lt)
switch strings.ToLower(k) {
@@ -3022,7 +3070,7 @@ func parseRemoteLeafNodes(v any, errors *[]error, warnings *[]error) ([]*RemoteL
continue
}
// Capture the token for the "proxy" field itself, before the map iteration
proxyToken := tk
proxyToken = tk
for pk, pv := range proxyMap {
tk, pv = unwrapValue(pv, &lt)
switch strings.ToLower(pk) {
@@ -3047,16 +3095,6 @@ func parseRemoteLeafNodes(v any, errors *[]error, warnings *[]error) ([]*RemoteL
}
}
}
// Use the saved proxy token for validation errors, not the last field token
if warns, err := validateLeafNodeProxyOptions(remote); err != nil {
*errors = append(*errors, &configErr{proxyToken, err.Error()})
continue
} else {
// Add any warnings about proxy configuration
for _, warn := range warns {
*warnings = append(*warnings, &configErr{proxyToken, warn})
}
}
default:
if !tk.IsUsedVariable() {
err := &unknownConfigFieldErr{
@@ -3070,6 +3108,16 @@ func parseRemoteLeafNodes(v any, errors *[]error, warnings *[]error) ([]*RemoteL
}
}
}
// Use the saved proxy token for validation errors, not the last field token
if warns, err := validateLeafNodeProxyOptions(remote); err != nil {
*errors = append(*errors, &configErr{proxyToken, err.Error()})
continue
} else {
// Add any warnings about proxy configuration
for _, warn := range warns {
*warnings = append(*warnings, &configErr{proxyToken, warn})
}
}
remotes = append(remotes, remote)
}
return remotes, nil
+28 -38
View File
@@ -19,7 +19,6 @@ import (
"encoding/binary"
"errors"
"fmt"
"hash"
"math"
"math/rand"
"net"
@@ -153,7 +152,7 @@ type raft struct {
state atomic.Int32 // RaftState
leaderState atomic.Bool // Is in (complete) leader state.
leaderSince atomic.Pointer[time.Time] // How long since becoming leader.
hh hash.Hash64 // Highwayhash, used for snapshots
hh *highwayhash.Digest64 // Highwayhash, used for snapshots
snapfile string // Snapshot filename
csz int // Cluster size
@@ -447,7 +446,7 @@ func (s *Server) initRaftNode(accName string, cfg *RaftConfig, labels pprofLabel
// Set up the highwayhash for the snapshots.
key := sha256.Sum256([]byte(n.group))
n.hh, _ = highwayhash.New64(key[:])
n.hh, _ = highwayhash.NewDigest64(key[:])
// If we have a term and vote file (tav.idx on the filesystem) then read in
// what we think the term and vote was. It's possible these are out of date
@@ -1118,11 +1117,11 @@ func (n *raft) ResumeApply() {
func (n *raft) DrainAndReplaySnapshot() bool {
n.Lock()
defer n.Unlock()
n.warn("Draining and replaying snapshot")
snap, err := n.loadLastSnapshot()
if err != nil {
return false
}
n.warn("Draining and replaying snapshot")
n.pauseApplyLocked()
n.apply.drain()
n.commit = snap.lastIndex
@@ -1225,7 +1224,8 @@ func (n *raft) encodeSnapshot(snap *snapshot) []byte {
// Now do the hash for the end.
n.hh.Reset()
n.hh.Write(buf[:wi])
checksum := n.hh.Sum(nil)
var hb [highwayhash.Size64]byte
checksum := n.hh.Sum(hb[:0])
copy(buf[wi:], checksum)
wi += len(checksum)
return buf[:wi]
@@ -1450,7 +1450,8 @@ func (n *raft) loadLastSnapshot() (*snapshot, error) {
lchk := buf[hoff:]
n.hh.Reset()
n.hh.Write(buf[:hoff])
if !bytes.Equal(lchk[:], n.hh.Sum(nil)) {
var hb [highwayhash.Size64]byte
if !bytes.Equal(lchk[:], n.hh.Sum(hb[:0])) {
n.warn("Snapshot corrupt, checksums did not match")
os.Remove(n.snapfile)
n.snapfile = _EMPTY_
@@ -2652,9 +2653,6 @@ func (n *raft) runAsLeader() {
n.unsubscribe(rpsub)
n.Unlock()
}()
// To send out our initial peer state.
n.sendPeerState()
n.Unlock()
hb := time.NewTicker(hbInterval)
@@ -3072,6 +3070,15 @@ func (n *raft) applyCommit(index uint64) error {
committed = append(committed, newEntry(EntrySnapshot, e.Data))
case EntrySnapshot:
committed = append(committed, e)
// If we have no snapshot, install the leader's snapshot as our own.
if len(ae.entries) == 1 && n.snapfile == _EMPTY_ && ae.commit > 0 {
n.installSnapshot(&snapshot{
lastTerm: ae.pterm,
lastIndex: ae.commit,
peerstate: encodePeerState(&peerState{n.peerNames(), n.csz, n.extSt}),
data: e.Data,
})
}
case EntryPeerState:
if n.State() != Leader {
if ps, err := decodePeerState(e.Data); err == nil {
@@ -3477,6 +3484,7 @@ func (n *raft) resetWAL() {
// Lock should be held
func (n *raft) updateLeader(newLeader string) {
wasLeader := n.leader == n.id
n.leader = newLeader
n.hasleader.Store(newLeader != _EMPTY_)
if !n.pleader.Load() && newLeader != noLeader {
@@ -3493,9 +3501,9 @@ func (n *raft) updateLeader(newLeader string) {
}
}
// Reset last seen timestamps.
// If we're the leader we track everyone, and don't reset.
// If we are (or were) the leader we track(ed) everyone, and don't reset.
// But if we're a follower we only track the leader, and reset all others.
if newLeader != n.id {
if newLeader != n.id && !wasLeader {
for peer, ps := range n.peers {
if peer == newLeader {
continue
@@ -4627,37 +4635,19 @@ func (n *raft) switchToLeader() {
}
n.Lock()
defer n.Unlock()
n.debug("Switching to leader")
// Check if we have items pending as we are taking over.
sendHB := n.pindex > n.commit
n.lxfer = false
n.updateLeader(n.id)
leadChange := n.switchState(Leader)
n.switchState(Leader)
if leadChange {
// Wait for messages to be applied if we've stored more, otherwise signal immediately.
// It's important to wait signaling we're leader if we're not up-to-date yet, as that
// would mean we're in a consistent state compared with the previous leader.
if n.pindex > n.applied {
n.aflr = n.pindex
} else {
// We know we have applied all entries in our log and can signal immediately.
// For sanity reset applied floor back down to 0, so we aren't able to signal twice.
n.aflr = 0
if !n.leaderState.Swap(true) {
// Only update timestamp if leader state actually changed.
nowts := time.Now().UTC()
n.leaderSince.Store(&nowts)
}
n.updateLeadChange(true)
}
}
n.Unlock()
if sendHB {
n.sendHeartbeat()
}
// To send out our initial peer state.
// In our implementation this is equivalent to sending a NOOP-entry upon becoming leader.
// Wait for this message (and potentially more) to be applied.
// It's important to wait signaling we're leader if we're not up-to-date yet, as that
// would mean we're in a consistent state compared with the previous leader.
n.sendPeerState()
n.aflr = n.pindex
}
+4 -2
View File
@@ -1257,9 +1257,9 @@ func imposeOrder(value any) error {
slices.SortFunc(value.Gateways, func(i, j *RemoteGatewayOpts) int { return cmp.Compare(i.Name, j.Name) })
case WebsocketOpts:
slices.Sort(value.AllowedOrigins)
case string, bool, uint8, uint16, int, int32, int64, time.Duration, float64, nil, LeafNodeOpts, ClusterOpts, *tls.Config, PinnedCertSet,
case string, bool, uint8, uint16, uint64, int, int32, int64, time.Duration, float64, nil, LeafNodeOpts, ClusterOpts, *tls.Config, PinnedCertSet,
*URLAccResolver, *MemAccResolver, *DirAccResolver, *CacheDirAccResolver, Authentication, MQTTOpts, jwt.TagList,
*OCSPConfig, map[string]string, JSLimitOpts, StoreCipher, *OCSPResponseCacheConfig, *ProxiesConfig:
*OCSPConfig, map[string]string, JSLimitOpts, StoreCipher, *OCSPResponseCacheConfig, *ProxiesConfig, WriteTimeoutPolicy:
// explicitly skipped types
case *AuthCallout:
case JSTpmOpts:
@@ -1659,6 +1659,8 @@ func (s *Server) diffOptions(newOpts *Options) ([]option, error) {
return nil, fmt.Errorf("config reload not supported for jetstream max memory and store")
}
}
case "jetstreammetacompact", "jetstreammetacompactsize":
// Allowed at runtime but monitorCluster looks at s.opts directly, so no further work needed here.
case "websocket":
// Similar to gateways
tmpOld := oldValue.(WebsocketOpts)
+15 -3
View File
@@ -1031,7 +1031,7 @@ func (s *Server) sendAsyncInfoToClients(regCli, wsCli bool) {
c.flags.isSet(firstPongSent) {
// sendInfo takes care of checking if the connection is still
// valid or not, so don't duplicate tests here.
c.enqueueProto(c.generateClientInfoJSON(info))
c.enqueueProto(c.generateClientInfoJSON(info, true))
}
c.mu.Unlock()
}
@@ -2346,8 +2346,20 @@ func (s *Server) addRoute(c *client, didSolicit, sendDelayedInfo bool, gossipMod
if doOnce {
// check to be consistent and future proof. but will be same domain
if s.sameDomain(info.Domain) {
s.nodeToInfo.Store(rHash,
nodeInfo{rn, s.info.Version, s.info.Cluster, info.Domain, id, nil, nil, nil, false, info.JetStream, false, false})
s.nodeToInfo.Store(rHash, nodeInfo{
name: rn,
version: s.info.Version,
cluster: s.info.Cluster,
domain: info.Domain,
id: id,
tags: nil,
cfg: nil,
stats: nil,
offline: false,
js: info.JetStream,
binarySnapshots: true, // Updated default to true. Versions 2.10.0+ support it.
accountNRG: false,
})
}
}
+102 -17
View File
@@ -44,6 +44,8 @@ import (
// Allow dynamic profiling.
_ "net/http/pprof"
"expvar"
"github.com/klauspost/compress/s2"
"github.com/nats-io/jwt/v2"
"github.com/nats-io/nats-server/v2/logger"
@@ -841,15 +843,18 @@ func NewServer(opts *Options) (*Server, error) {
if opts.JetStream {
ourNode := getHash(serverName)
s.nodeToInfo.Store(ourNode, nodeInfo{
serverName,
VERSION,
opts.Cluster.Name,
opts.JetStreamDomain,
info.ID,
opts.Tags,
&JetStreamConfig{MaxMemory: opts.JetStreamMaxMemory, MaxStore: opts.JetStreamMaxStore, CompressOK: true},
nil,
false, true, true, true,
name: serverName,
version: VERSION,
cluster: opts.Cluster.Name,
domain: opts.JetStreamDomain,
id: info.ID,
tags: opts.Tags,
cfg: &JetStreamConfig{MaxMemory: opts.JetStreamMaxMemory, MaxStore: opts.JetStreamMaxStore, CompressOK: true},
stats: nil,
offline: false,
js: true,
binarySnapshots: true,
accountNRG: true,
})
}
@@ -1076,8 +1081,8 @@ func (s *Server) serverName() string {
return s.getOpts().ServerName
}
// ClientURL returns the URL used to connect clients. Helpful in testing
// when we designate a random client port (-1).
// ClientURL returns the URL used to connect clients.
// Helpful in tests and with in-process servers using a random client port (-1).
func (s *Server) ClientURL() string {
// FIXME(dlc) - should we add in user and pass if defined single?
opts := s.getOpts()
@@ -1090,6 +1095,19 @@ func (s *Server) ClientURL() string {
return u.String()
}
// WebsocketURL returns the URL used to connect websocket clients.
// Helpful in tests and with in-process servers using a random websocket port (-1).
func (s *Server) WebsocketURL() string {
opts := s.getOpts()
var u url.URL
u.Scheme = "ws"
if opts.Websocket.TLSConfig != nil {
u.Scheme = "wss"
}
u.Host = net.JoinHostPort(opts.Websocket.Host, fmt.Sprintf("%d", opts.Websocket.Port))
return u.String()
}
func validateCluster(o *Options) error {
if o.Cluster.Name != _EMPTY_ && strings.Contains(o.Cluster.Name, " ") {
return ErrClusterNameHasSpaces
@@ -2049,6 +2067,13 @@ func (s *Server) setRouteInfo(acc *Account) {
// associated with an account name.
// Lock MUST NOT be held upon entry.
func (s *Server) lookupAccount(name string) (*Account, error) {
return s.lookupOrFetchAccount(name, true)
}
// lookupOrFetchAccount is a function to return the account structure
// associated with an account name.
// Lock MUST NOT be held upon entry.
func (s *Server) lookupOrFetchAccount(name string, fetch bool) (*Account, error) {
var acc *Account
if v, ok := s.accounts.Load(name); ok {
acc = v.(*Account)
@@ -2058,7 +2083,7 @@ func (s *Server) lookupAccount(name string) (*Account, error) {
// return the latest information from the resolver.
if acc.IsExpired() {
s.Debugf("Requested account [%s] has expired", name)
if s.AccountResolver() != nil {
if s.AccountResolver() != nil && fetch {
if err := s.updateAccount(acc); err != nil {
// This error could mask expired, so just return expired here.
return nil, ErrAccountExpired
@@ -2070,7 +2095,7 @@ func (s *Server) lookupAccount(name string) (*Account, error) {
return acc, nil
}
// If we have a resolver see if it can fetch the account.
if s.AccountResolver() == nil {
if s.AccountResolver() == nil || !fetch {
return nil, ErrMissingAccount
}
return s.fetchAccount(name)
@@ -2781,6 +2806,11 @@ func (s *Server) AcceptLoop(clr chan struct{}) {
s.Noticef("Listening for client connections on %s",
net.JoinHostPort(opts.Host, strconv.Itoa(l.Addr().(*net.TCPAddr).Port)))
// Alert if PROXY protocol is enabled
if opts.ProxyProtocol {
s.Noticef("PROXY protocol enabled for client connections")
}
// Alert of TLS enabled.
if opts.TLSConfig != nil {
s.Noticef("TLS required for client connections")
@@ -3017,6 +3047,7 @@ const (
HealthzPath = "/healthz"
IPQueuesPath = "/ipqueuesz"
RaftzPath = "/raftz"
ExpvarzPath = "/debug/vars"
)
func (s *Server) basePath(p string) string {
@@ -3135,6 +3166,8 @@ func (s *Server) startMonitoring(secure bool) error {
mux.HandleFunc(s.basePath(IPQueuesPath), s.HandleIPQueuesz)
// Raftz
mux.HandleFunc(s.basePath(RaftzPath), s.HandleRaftz)
// Expvarz
mux.Handle(s.basePath(ExpvarzPath), expvar.Handler())
// Do not set a WriteTimeout because it could cause cURL/browser
// to return empty response or unable to display page if the
@@ -3307,8 +3340,11 @@ func (s *Server) createClientEx(conn net.Conn, inProcess bool) *client {
}
// Decide if we are going to require TLS or not and generate INFO json.
// If we have ProxyProtocol enabled then we won't include the client
// IP in the initial INFO, as that would leak the proxy IP itself.
// In that case we'll send another INFO after the client introduces itself.
tlsRequired := info.TLSRequired
infoBytes := c.generateClientInfoJSON(info)
infoBytes := c.generateClientInfoJSON(info, !opts.ProxyProtocol)
// Send our information, except if TLS and TLSHandshakeFirst is requested.
if !tlsFirst {
@@ -3379,7 +3415,7 @@ func (s *Server) createClientEx(conn net.Conn, inProcess bool) *client {
// different that the current value and regenerate infoBytes.
if orgInfoTLSReq != info.TLSRequired {
info.TLSRequired = orgInfoTLSReq
infoBytes = c.generateClientInfoJSON(info)
infoBytes = c.generateClientInfoJSON(info, !opts.ProxyProtocol)
}
c.sendProtoNow(infoBytes)
// Set the boolean to false for the rest of the function.
@@ -3392,7 +3428,7 @@ func (s *Server) createClientEx(conn net.Conn, inProcess bool) *client {
// one the client wants. We'll always allow this for in-process
// connections.
if !isClosed && !tlsFirst && opts.TLSConfig != nil && (inProcess || opts.AllowNonTLS) {
pre = make([]byte, 4)
pre = make([]byte, 6) // Minimum 6 bytes for proxy proto in next step.
c.nc.SetReadDeadline(time.Now().Add(secondsToDuration(opts.TLSTimeout)))
n, _ := io.ReadFull(c.nc, pre[:])
c.nc.SetReadDeadline(time.Time{})
@@ -3404,6 +3440,55 @@ func (s *Server) createClientEx(conn net.Conn, inProcess bool) *client {
}
}
// Check for proxy protocol if enabled.
if !isClosed && !tlsRequired && opts.ProxyProtocol {
if len(pre) == 0 {
// There has been no pre-read yet, do so so we can work out
// if the client is trying to negotiate PROXY.
pre = make([]byte, 6)
c.nc.SetReadDeadline(time.Now().Add(proxyProtoReadTimeout))
n, _ := io.ReadFull(c.nc, pre)
c.nc.SetReadDeadline(time.Time{})
pre = pre[:n]
}
conn = &tlsMixConn{conn, bytes.NewBuffer(pre)}
addr, err := readProxyProtoHeader(conn)
if err != nil && err != errProxyProtoUnrecognized {
// err != errProxyProtoUnrecognized implies that we detected a proxy
// protocol header but we failed to parse it, so don't continue.
c.mu.Unlock()
s.Warnf("Error reading PROXY protocol header from %s: %v", conn.RemoteAddr(), err)
c.closeConnection(ProtocolViolation)
return nil
}
// If addr is nil, it was a LOCAL/UNKNOWN command (health check)
// Use the connection as-is
if addr != nil {
c.nc = &proxyConn{
Conn: conn,
remoteAddr: addr,
}
// These were set already by initClient, override them.
c.host = addr.srcIP.String()
c.port = addr.srcPort
}
// At this point, err is either:
// - nil => we parsed the proxy protocol header successfully
// - errProxyProtoUnrecognized => we didn't detect proxy protocol at all
// We only clear the pre-read if we successfully read the protocol header
// so that the next step doesn't re-read it. Otherwise we have to assume
// that it's a non-proxied connection and we want the pre-read to remain
// for the next step.
if err == nil {
pre = nil
}
// Because we have ProxyProtocol enabled, our earlier INFO message didn't
// include the client_ip. If we need to send it again then we will include
// it, but sending it here immediately can confuse clients who have just
// PING'd.
infoBytes = c.generateClientInfoJSON(info, true)
}
// Check for TLS
if !isClosed && tlsRequired {
if s.connRateCounter != nil && !s.connRateCounter.allow() {
@@ -4688,7 +4773,7 @@ func (s *Server) LDMClientByID(id uint64) error {
// sendInfo takes care of checking if the connection is still
// valid or not, so don't duplicate tests here.
c.Debugf("Sending Lame Duck Mode info to client")
c.enqueueProto(c.generateClientInfoJSON(info))
c.enqueueProto(c.generateClientInfoJSON(info, true))
return nil
} else {
return errors.New("client does not support Lame Duck Mode or is not ready to receive the notification")
+53 -15
View File
@@ -2012,6 +2012,18 @@ func (s *Server) checkStreamCfg(config *StreamConfig, acc *Account, pedantic boo
}
}
// Check the subject transform if any
if cfg.SubjectTransform != nil {
if cfg.SubjectTransform.Source != _EMPTY_ && !IsValidSubject(cfg.SubjectTransform.Source) {
return StreamConfig{}, NewJSStreamTransformInvalidSourceError(fmt.Errorf("%w %s", ErrBadSubject, cfg.SubjectTransform.Source))
}
err := ValidateMapping(cfg.SubjectTransform.Source, cfg.SubjectTransform.Destination)
if err != nil {
return StreamConfig{}, NewJSStreamTransformInvalidDestinationError(err)
}
}
// If we have a republish directive check if we can create a transform here.
if cfg.RePublish != nil {
// Check to make sure source is a valid subset of the subjects we have.
@@ -2023,6 +2035,18 @@ func (s *Server) checkStreamCfg(config *StreamConfig, acc *Account, pedantic boo
}
cfg.RePublish.Source = fwcs
}
// A RePublish from '>' to '>' could be used, normally this would form a cycle with the stream subjects.
// But if this aligns to a different subject based on the transform, we allow it still.
// The RePublish will be implicit based on the transform, but only if the transform's source
// is the only stream subject.
if cfg.RePublish.Destination == fwcs && cfg.RePublish.Source == fwcs && cfg.SubjectTransform != nil &&
len(cfg.Subjects) == 1 && cfg.SubjectTransform.Source == cfg.Subjects[0] {
if pedantic {
return StreamConfig{}, NewJSPedanticError(fmt.Errorf("implicit republish based on subject transform"))
}
// RePublish all messages with the transformed subject.
cfg.RePublish.Source, cfg.RePublish.Destination = cfg.SubjectTransform.Destination, cfg.SubjectTransform.Destination
}
var formsCycle bool
for _, subj := range cfg.Subjects {
if SubjectsCollide(cfg.RePublish.Destination, subj) {
@@ -2038,18 +2062,6 @@ func (s *Server) checkStreamCfg(config *StreamConfig, acc *Account, pedantic boo
}
}
// Check the subject transform if any
if cfg.SubjectTransform != nil {
if cfg.SubjectTransform.Source != _EMPTY_ && !IsValidSubject(cfg.SubjectTransform.Source) {
return StreamConfig{}, NewJSStreamTransformInvalidSourceError(fmt.Errorf("%w %s", ErrBadSubject, cfg.SubjectTransform.Source))
}
err := ValidateMapping(cfg.SubjectTransform.Source, cfg.SubjectTransform.Destination)
if err != nil {
return StreamConfig{}, NewJSStreamTransformInvalidDestinationError(err)
}
}
// Remove placement if it's an empty object.
if cfg.Placement != nil && reflect.DeepEqual(cfg.Placement, &Placement{}) {
cfg.Placement = nil
@@ -5287,8 +5299,8 @@ func (mset *stream) getDirectRequest(req *JSApiMsgGetRequest, reply string) {
// If batch was requested send EOB.
if isBatchRequest {
// Update if the stream's lasts sequence has moved past our validThrough.
if mset.lastSeq() > validThrough {
// Update if the stream's last sequence has moved past our validThrough.
if mset.lseq > validThrough {
np, _ = store.NumPending(seq, req.NextFor, false)
}
hdr := fmt.Appendf(nil, eob, np, lseq)
@@ -6507,7 +6519,7 @@ func (mset *stream) processJetStreamBatchMsg(batchId, subject, reply string, hdr
}
// Reject unsupported headers.
if getExpectedLastMsgId(hdr) != _EMPTY_ {
if getExpectedLastMsgId(bhdr) != _EMPTY_ {
return errorOnUnsupported(seq, JSExpectedLastMsgId)
}
@@ -7172,6 +7184,20 @@ func (mset *stream) getPublicConsumers() []*consumer {
return obs
}
// This returns all consumers that are DIRECT.
func (mset *stream) getDirectConsumers() []*consumer {
mset.clsMu.RLock()
defer mset.clsMu.RUnlock()
var obs []*consumer
for _, o := range mset.cList {
if o.cfg.Direct {
obs = append(obs, o)
}
}
return obs
}
// 2 minutes plus up to 30s jitter.
const (
defaultCheckInterestStateT = 2 * time.Minute
@@ -7593,7 +7619,19 @@ func (mset *stream) ackMsg(o *consumer, seq uint64) bool {
// Only propose message deletion to the stream if we're consumer leader, otherwise all followers would also propose.
// We must be the consumer leader, since we know for sure we've stored the message and don't register as pre-ack.
if o != nil && !o.IsLeader() {
// Currently, interest-based streams can race on "no interest" because consumer creates/updates go over
// the meta layer and published messages go over the stream layer. Some servers could then either store
// or not store some initial set of messages that gained new interest. To get the stream back in sync,
// we allow moving the first sequence up.
// TODO(mvv): later on only the stream leader should determine "no interest"
interestRaiseFirst := mset.cfg.Retention == InterestPolicy && seq == state.FirstSeq
mset.mu.Unlock()
if interestRaiseFirst {
if _, err := store.RemoveMsg(seq); err == ErrStoreEOF {
// This should not happen, but being pedantic.
mset.registerPreAckLock(o, seq)
}
}
// Must still mark as removal if follower. If we become leader later, we must be able to retry the proposal.
return true
}
+4 -3
View File
@@ -19,6 +19,7 @@ import (
"math"
"math/rand"
"regexp"
"slices"
"strconv"
"strings"
)
@@ -378,7 +379,7 @@ func transformTokenize(subject string) string {
// We need to make the appropriate markers for the wildcards etc.
i := 1
var nda []string
for _, token := range strings.Split(subject, tsep) {
for token := range strings.SplitSeq(subject, tsep) {
if token == pwcs {
nda = append(nda, fmt.Sprintf("$%d", i))
i++
@@ -399,7 +400,7 @@ func transformUntokenize(subject string) (string, []string) {
var phs []string
var nda []string
for _, token := range strings.Split(subject, tsep) {
for token := range strings.SplitSeq(subject, tsep) {
if args := getMappingFunctionArgs(wildcardMappingFunctionRegEx, token); (len(token) > 1 && token[0] == '$' && token[1] >= '1' && token[1] <= '9') || (len(args) == 1 && args[0] != _EMPTY_) {
phs = append(phs, token)
nda = append(nda, pwcs)
@@ -439,7 +440,7 @@ func (tr *subjectTransform) Match(subject string) (string, error) {
tts := tokenizeSubject(subject)
// TODO(jnm): optimization -> not sure this is actually needed but was there in initial code
if !isValidLiteralSubject(tts) {
if !isValidLiteralSubject(slices.Values(tts)) {
return _EMPTY_, ErrBadSubject
}
+8 -29
View File
@@ -16,6 +16,7 @@ package server
import (
"bytes"
"errors"
"iter"
"strings"
"sync"
"sync/atomic"
@@ -357,16 +358,6 @@ func (s *Sublist) chkForRemoveNotification(subject, queue string) {
func (s *Sublist) Insert(sub *subscription) error {
// copy the subject since we hold this and this might be part of a large byte slice.
subject := string(sub.subject)
tsa := [32]string{}
tokens := tsa[:0]
start := 0
for i := 0; i < len(subject); i++ {
if subject[i] == btsep {
tokens = append(tokens, subject[start:i])
start = i + 1
}
}
tokens = append(tokens, subject[start:])
s.Lock()
@@ -374,7 +365,7 @@ func (s *Sublist) Insert(sub *subscription) error {
var n *node
l := s.root
for _, t := range tokens {
for t := range strings.SplitSeq(subject, tsep) {
lt := len(t)
if lt == 0 || sfwc {
s.Unlock()
@@ -851,16 +842,6 @@ type lnt struct {
// Raw low level remove, can do batches with lock held outside.
func (s *Sublist) remove(sub *subscription, shouldLock bool, doCacheUpdates bool) error {
subject := string(sub.subject)
tsa := [32]string{}
tokens := tsa[:0]
start := 0
for i := 0; i < len(subject); i++ {
if subject[i] == btsep {
tokens = append(tokens, subject[start:i])
start = i + 1
}
}
tokens = append(tokens, subject[start:])
if shouldLock {
s.Lock()
@@ -875,7 +856,7 @@ func (s *Sublist) remove(sub *subscription, shouldLock bool, doCacheUpdates bool
var lnts [32]lnt
levels := lnts[:0]
for _, t := range tokens {
for t := range strings.SplitSeq(subject, tsep) {
lt := len(t)
if lt == 0 || sfwc {
return ErrInvalidSubject
@@ -1230,8 +1211,7 @@ func isValidSubject(subject string, checkRunes bool) bool {
}
}
sfwc := false
tokens := strings.Split(subject, tsep)
for _, t := range tokens {
for t := range strings.SplitSeq(subject, tsep) {
length := len(t)
if length == 0 || sfwc {
return false
@@ -1254,12 +1234,12 @@ func isValidSubject(subject string, checkRunes bool) bool {
// IsValidLiteralSubject returns true if a subject is valid and literal (no wildcards), false otherwise
func IsValidLiteralSubject(subject string) bool {
return isValidLiteralSubject(strings.Split(subject, tsep))
return isValidLiteralSubject(strings.SplitSeq(subject, tsep))
}
// isValidLiteralSubject returns true if the tokens are valid and literal (no wildcards), false otherwise
func isValidLiteralSubject(tokens []string) bool {
for _, t := range tokens {
func isValidLiteralSubject(tokens iter.Seq[string]) bool {
for t := range tokens {
if len(t) == 0 {
return false
}
@@ -1279,9 +1259,8 @@ func ValidateMapping(src string, dest string) error {
if dest == _EMPTY_ {
return nil
}
subjectTokens := strings.Split(dest, tsep)
sfwc := false
for _, t := range subjectTokens {
for t := range strings.SplitSeq(dest, tsep) {
length := len(t)
if length == 0 || sfwc {
return &mappingDestinationErr{t, ErrInvalidMappingDestinationSubject}
+23
View File
@@ -23,6 +23,7 @@ import (
"net"
"net/url"
"reflect"
"runtime"
"strconv"
"strings"
"time"
@@ -340,3 +341,25 @@ func generateInfoJSON(info *Info) []byte {
pcs := [][]byte{[]byte("INFO"), b, []byte(CR_LF)}
return bytes.Join(pcs, []byte(" "))
}
// parallelTaskQueue starts a number of goroutines and returns a channel
// which functions can be sent to for queued parallel execution. The
// goroutines will stop running when the returned channel is closed and
// all queued tasks have completed. The passed in mp limits concurrency,
// or a value <= 0 will default to GOMAXPROCS.
func parallelTaskQueue(mp int) chan<- func() {
if rmp := runtime.GOMAXPROCS(-1); mp <= 0 {
mp = rmp
} else {
mp = max(rmp, mp)
}
tq := make(chan func(), mp)
for range mp {
go func() {
for fn := range tq {
fn()
}
}()
}
return tq
}
+1 -1
View File
@@ -1294,7 +1294,7 @@ func (s *Server) createWSClient(conn net.Conn, ws *websocket) *client {
}
c.initClient()
c.Debugf("Client connection created")
c.sendProtoNow(c.generateClientInfoJSON(info))
c.sendProtoNow(c.generateClientInfoJSON(info, true))
c.mu.Unlock()
s.mu.Lock()
+4 -4
View File
@@ -868,8 +868,8 @@ github.com/justinas/alice
# github.com/kevinburke/ssh_config v1.2.0
## explicit
github.com/kevinburke/ssh_config
# github.com/klauspost/compress v1.18.0
## explicit; go 1.22
# github.com/klauspost/compress v1.18.1
## explicit; go 1.23
github.com/klauspost/compress
github.com/klauspost/compress/flate
github.com/klauspost/compress/fse
@@ -1068,7 +1068,7 @@ github.com/mileusna/useragent
# github.com/minio/crc64nvme v1.1.0
## explicit; go 1.22
github.com/minio/crc64nvme
# github.com/minio/highwayhash v1.0.3
# github.com/minio/highwayhash v1.0.4-0.20251030100505-070ab1a87a76
## explicit; go 1.15
github.com/minio/highwayhash
# github.com/minio/md5-simd v1.1.2
@@ -1150,7 +1150,7 @@ github.com/munnerz/goautoneg
# github.com/nats-io/jwt/v2 v2.8.0
## explicit; go 1.23.0
github.com/nats-io/jwt/v2
# github.com/nats-io/nats-server/v2 v2.12.1
# github.com/nats-io/nats-server/v2 v2.12.2
## explicit; go 1.24.0
github.com/nats-io/nats-server/v2/conf
github.com/nats-io/nats-server/v2/internal/fastrand