mirror of
https://github.com/opencloud-eu/opencloud.git
synced 2026-05-25 14:39:00 -05:00
build(deps): bump github.com/nats-io/nats-server/v2
Bumps [github.com/nats-io/nats-server/v2](https://github.com/nats-io/nats-server) from 2.12.4 to 2.12.5. - [Release notes](https://github.com/nats-io/nats-server/releases) - [Changelog](https://github.com/nats-io/nats-server/blob/main/RELEASES.md) - [Commits](https://github.com/nats-io/nats-server/compare/v2.12.4...v2.12.5) --- updated-dependencies: - dependency-name: github.com/nats-io/nats-server/v2 dependency-version: 2.12.5 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] <support@github.com>
This commit is contained in:
committed by
Ralf Haferkamp
parent
25b0c75375
commit
aebf15fe81
+1
-1
@@ -3,7 +3,7 @@ package internal
|
||||
// --------------------------------------------------------------------------------
|
||||
// Versions
|
||||
// --------------------------------------------------------------------------------
|
||||
const SDK_Version = "0.5.0"
|
||||
const SDK_Version = "0.6.0"
|
||||
const Protocol_Version = "1.1.0"
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
+15
-8
@@ -7,7 +7,7 @@ This package provides various compression algorithms.
|
||||
* Optimized [deflate](https://godoc.org/github.com/klauspost/compress/flate) packages which can be used as a dropin replacement for [gzip](https://godoc.org/github.com/klauspost/compress/gzip), [zip](https://godoc.org/github.com/klauspost/compress/zip) and [zlib](https://godoc.org/github.com/klauspost/compress/zlib).
|
||||
* [snappy](https://github.com/klauspost/compress/tree/master/snappy) is a drop-in replacement for `github.com/golang/snappy` offering better compression and concurrent streams.
|
||||
* [huff0](https://github.com/klauspost/compress/tree/master/huff0) and [FSE](https://github.com/klauspost/compress/tree/master/fse) implementations for raw entropy encoding.
|
||||
* [gzhttp](https://github.com/klauspost/compress/tree/master/gzhttp) Provides client and server wrappers for handling gzipped requests efficiently.
|
||||
* [gzhttp](https://github.com/klauspost/compress/tree/master/gzhttp) Provides client and server wrappers for handling gzipped/zstd HTTP requests efficiently.
|
||||
* [pgzip](https://github.com/klauspost/pgzip) is a separate package that provides a very fast parallel gzip implementation.
|
||||
|
||||
[](https://pkg.go.dev/github.com/klauspost/compress?tab=subdirectories)
|
||||
@@ -26,8 +26,14 @@ This package will support the current Go version and 2 versions back.
|
||||
Use the links above for more information on each.
|
||||
|
||||
# changelog
|
||||
* Jan 16th, 2026 [1.18.3](https://github.com/klauspost/compress/releases/tag/v1.18.3)
|
||||
* Downstream CVE-2025-61728. See [golang/go#77102](https://github.com/golang/go/issues/77102).
|
||||
|
||||
* Oct 20, 2025 - [1.18.1](https://github.com/klauspost/compress/releases/tag/v1.18.1)
|
||||
* Dec 1st, 2025 - [1.18.2](https://github.com/klauspost/compress/releases/tag/v1.18.2)
|
||||
* flate: Fix invalid encoding on level 9 with single value input in https://github.com/klauspost/compress/pull/1115
|
||||
* flate: reduce stateless allocations by @RXamzin in https://github.com/klauspost/compress/pull/1106
|
||||
|
||||
* Oct 20, 2025 - [1.18.1](https://github.com/klauspost/compress/releases/tag/v1.18.1) - RETRACTED
|
||||
* zstd: Add simple zstd EncodeTo/DecodeTo functions https://github.com/klauspost/compress/pull/1079
|
||||
* zstd: Fix incorrect buffer size in dictionary encodes https://github.com/klauspost/compress/pull/1059
|
||||
* s2: check for cap, not len of buffer in EncodeBetter/Best by @vdarulis in https://github.com/klauspost/compress/pull/1080
|
||||
@@ -603,7 +609,7 @@ While the release has been extensively tested, it is recommended to testing when
|
||||
|
||||
# deflate usage
|
||||
|
||||
The packages are drop-in replacements for standard libraries. Simply replace the import path to use them:
|
||||
The packages are drop-in replacements for standard library [deflate](https://godoc.org/github.com/klauspost/compress/flate), [gzip](https://godoc.org/github.com/klauspost/compress/gzip), [zip](https://godoc.org/github.com/klauspost/compress/zip), and [zlib](https://godoc.org/github.com/klauspost/compress/zlib). Simply replace the import path to use them:
|
||||
|
||||
Typical speed is about 2x of the standard library packages.
|
||||
|
||||
@@ -614,17 +620,15 @@ Typical speed is about 2x of the standard library packages.
|
||||
| `archive/zip` | `github.com/klauspost/compress/zip` | [zip](https://pkg.go.dev/github.com/klauspost/compress/zip?tab=doc) |
|
||||
| `compress/flate` | `github.com/klauspost/compress/flate` | [flate](https://pkg.go.dev/github.com/klauspost/compress/flate?tab=doc) |
|
||||
|
||||
* Optimized [deflate](https://godoc.org/github.com/klauspost/compress/flate) packages which can be used as a dropin replacement for [gzip](https://godoc.org/github.com/klauspost/compress/gzip), [zip](https://godoc.org/github.com/klauspost/compress/zip) and [zlib](https://godoc.org/github.com/klauspost/compress/zlib).
|
||||
You may also be interested in [pgzip](https://github.com/klauspost/pgzip), which is a drop-in replacement for gzip, which support multithreaded compression on big files and the optimized [crc32](https://github.com/klauspost/crc32) package used by these packages.
|
||||
|
||||
You may also be interested in [pgzip](https://github.com/klauspost/pgzip), which is a drop in replacement for gzip, which support multithreaded compression on big files and the optimized [crc32](https://github.com/klauspost/crc32) package used by these packages.
|
||||
|
||||
The packages contains the same as the standard library, so you can use the godoc for that: [gzip](http://golang.org/pkg/compress/gzip/), [zip](http://golang.org/pkg/archive/zip/), [zlib](http://golang.org/pkg/compress/zlib/), [flate](http://golang.org/pkg/compress/flate/).
|
||||
The packages implement the same API as the standard library, so you can use the original godoc documentation: [gzip](http://golang.org/pkg/compress/gzip/), [zip](http://golang.org/pkg/archive/zip/), [zlib](http://golang.org/pkg/compress/zlib/), [flate](http://golang.org/pkg/compress/flate/).
|
||||
|
||||
Currently there is only minor speedup on decompression (mostly CRC32 calculation).
|
||||
|
||||
Memory usage is typically 1MB for a Writer. stdlib is in the same range.
|
||||
If you expect to have a lot of concurrently allocated Writers consider using
|
||||
the stateless compress described below.
|
||||
the stateless compression described below.
|
||||
|
||||
For compression performance, see: [this spreadsheet](https://docs.google.com/spreadsheets/d/1nuNE2nPfuINCZJRMt6wFWhKpToF95I47XjSsc-1rbPQ/edit?usp=sharing).
|
||||
|
||||
@@ -684,3 +688,6 @@ Here are other packages of good quality and pure Go (no cgo wrappers or autoconv
|
||||
|
||||
This code is licensed under the same conditions as the original Go code. See LICENSE file.
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
+19
-9
@@ -39,9 +39,6 @@ type Decoder struct {
|
||||
|
||||
frame *frameDec
|
||||
|
||||
// Custom dictionaries.
|
||||
dicts map[uint32]*dict
|
||||
|
||||
// streamWg is the waitgroup for all streams
|
||||
streamWg sync.WaitGroup
|
||||
}
|
||||
@@ -101,12 +98,10 @@ func NewReader(r io.Reader, opts ...DOption) (*Decoder, error) {
|
||||
d.current.err = ErrDecoderNilInput
|
||||
}
|
||||
|
||||
// Transfer option dicts.
|
||||
d.dicts = make(map[uint32]*dict, len(d.o.dicts))
|
||||
for _, dc := range d.o.dicts {
|
||||
d.dicts[dc.id] = dc
|
||||
// Initialize dict map if needed.
|
||||
if d.o.dicts == nil {
|
||||
d.o.dicts = make(map[uint32]*dict)
|
||||
}
|
||||
d.o.dicts = nil
|
||||
|
||||
// Create decoders
|
||||
d.decoders = make(chan *blockDec, d.o.concurrent)
|
||||
@@ -238,6 +233,21 @@ func (d *Decoder) Reset(r io.Reader) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// ResetWithOptions will reset the decoder and apply the given options
|
||||
// for the next stream or DecodeAll operation.
|
||||
// Options are applied on top of the existing options.
|
||||
// Some options cannot be changed on reset and will return an error.
|
||||
func (d *Decoder) ResetWithOptions(r io.Reader, opts ...DOption) error {
|
||||
d.o.resetOpt = true
|
||||
defer func() { d.o.resetOpt = false }()
|
||||
for _, o := range opts {
|
||||
if err := o(&d.o); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return d.Reset(r)
|
||||
}
|
||||
|
||||
// drainOutput will drain the output until errEndOfStream is sent.
|
||||
func (d *Decoder) drainOutput() {
|
||||
if d.current.cancel != nil {
|
||||
@@ -930,7 +940,7 @@ decodeStream:
|
||||
}
|
||||
|
||||
func (d *Decoder) setDict(frame *frameDec) (err error) {
|
||||
dict, ok := d.dicts[frame.DictionaryID]
|
||||
dict, ok := d.o.dicts[frame.DictionaryID]
|
||||
if ok {
|
||||
if debugDecoder {
|
||||
println("setting dict", frame.DictionaryID)
|
||||
|
||||
+52
-8
@@ -20,10 +20,11 @@ type decoderOptions struct {
|
||||
concurrent int
|
||||
maxDecodedSize uint64
|
||||
maxWindowSize uint64
|
||||
dicts []*dict
|
||||
dicts map[uint32]*dict
|
||||
ignoreChecksum bool
|
||||
limitToCap bool
|
||||
decodeBufsBelow int
|
||||
resetOpt bool
|
||||
}
|
||||
|
||||
func (o *decoderOptions) setDefault() {
|
||||
@@ -42,8 +43,15 @@ func (o *decoderOptions) setDefault() {
|
||||
|
||||
// WithDecoderLowmem will set whether to use a lower amount of memory,
|
||||
// but possibly have to allocate more while running.
|
||||
// Cannot be changed with ResetWithOptions.
|
||||
func WithDecoderLowmem(b bool) DOption {
|
||||
return func(o *decoderOptions) error { o.lowMem = b; return nil }
|
||||
return func(o *decoderOptions) error {
|
||||
if o.resetOpt && b != o.lowMem {
|
||||
return errors.New("WithDecoderLowmem cannot be changed on Reset")
|
||||
}
|
||||
o.lowMem = b
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// WithDecoderConcurrency sets the number of created decoders.
|
||||
@@ -53,18 +61,23 @@ func WithDecoderLowmem(b bool) DOption {
|
||||
// inflight blocks.
|
||||
// When decoding streams and setting maximum to 1,
|
||||
// no async decoding will be done.
|
||||
// The value supplied must be at least 0.
|
||||
// When a value of 0 is provided GOMAXPROCS will be used.
|
||||
// By default this will be set to 4 or GOMAXPROCS, whatever is lower.
|
||||
// Cannot be changed with ResetWithOptions.
|
||||
func WithDecoderConcurrency(n int) DOption {
|
||||
return func(o *decoderOptions) error {
|
||||
if n < 0 {
|
||||
return errors.New("concurrency must be at least 1")
|
||||
return errors.New("concurrency must be at least 0")
|
||||
}
|
||||
newVal := n
|
||||
if n == 0 {
|
||||
o.concurrent = runtime.GOMAXPROCS(0)
|
||||
} else {
|
||||
o.concurrent = n
|
||||
newVal = runtime.GOMAXPROCS(0)
|
||||
}
|
||||
if o.resetOpt && newVal != o.concurrent {
|
||||
return errors.New("WithDecoderConcurrency cannot be changed on Reset")
|
||||
}
|
||||
o.concurrent = newVal
|
||||
return nil
|
||||
}
|
||||
}
|
||||
@@ -73,6 +86,7 @@ func WithDecoderConcurrency(n int) DOption {
|
||||
// non-streaming operations or maximum window size for streaming operations.
|
||||
// This can be used to control memory usage of potentially hostile content.
|
||||
// Maximum is 1 << 63 bytes. Default is 64GiB.
|
||||
// Can be changed with ResetWithOptions.
|
||||
func WithDecoderMaxMemory(n uint64) DOption {
|
||||
return func(o *decoderOptions) error {
|
||||
if n == 0 {
|
||||
@@ -92,16 +106,20 @@ func WithDecoderMaxMemory(n uint64) DOption {
|
||||
// "zstd --train" from the Zstandard reference implementation.
|
||||
//
|
||||
// If several dictionaries with the same ID are provided, the last one will be used.
|
||||
// Can be changed with ResetWithOptions.
|
||||
//
|
||||
// [dictionary format]: https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary-format
|
||||
func WithDecoderDicts(dicts ...[]byte) DOption {
|
||||
return func(o *decoderOptions) error {
|
||||
if o.dicts == nil {
|
||||
o.dicts = make(map[uint32]*dict)
|
||||
}
|
||||
for _, b := range dicts {
|
||||
d, err := loadDict(b)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
o.dicts = append(o.dicts, d)
|
||||
o.dicts[d.id] = d
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -109,12 +127,16 @@ func WithDecoderDicts(dicts ...[]byte) DOption {
|
||||
|
||||
// WithDecoderDictRaw registers a dictionary that may be used by the decoder.
|
||||
// The slice content can be arbitrary data.
|
||||
// Can be changed with ResetWithOptions.
|
||||
func WithDecoderDictRaw(id uint32, content []byte) DOption {
|
||||
return func(o *decoderOptions) error {
|
||||
if bits.UintSize > 32 && uint(len(content)) > dictMaxLength {
|
||||
return fmt.Errorf("dictionary of size %d > 2GiB too large", len(content))
|
||||
}
|
||||
o.dicts = append(o.dicts, &dict{id: id, content: content, offsets: [3]int{1, 4, 8}})
|
||||
if o.dicts == nil {
|
||||
o.dicts = make(map[uint32]*dict)
|
||||
}
|
||||
o.dicts[id] = &dict{id: id, content: content, offsets: [3]int{1, 4, 8}}
|
||||
return nil
|
||||
}
|
||||
}
|
||||
@@ -124,6 +146,7 @@ func WithDecoderDictRaw(id uint32, content []byte) DOption {
|
||||
// The Decoder will likely allocate more memory based on the WithDecoderLowmem setting.
|
||||
// If WithDecoderMaxMemory is set to a lower value, that will be used.
|
||||
// Default is 512MB, Maximum is ~3.75 TB as per zstandard spec.
|
||||
// Can be changed with ResetWithOptions.
|
||||
func WithDecoderMaxWindow(size uint64) DOption {
|
||||
return func(o *decoderOptions) error {
|
||||
if size < MinWindowSize {
|
||||
@@ -141,6 +164,7 @@ func WithDecoderMaxWindow(size uint64) DOption {
|
||||
// or any size set in WithDecoderMaxMemory.
|
||||
// This can be used to limit decoding to a specific maximum output size.
|
||||
// Disabled by default.
|
||||
// Can be changed with ResetWithOptions.
|
||||
func WithDecodeAllCapLimit(b bool) DOption {
|
||||
return func(o *decoderOptions) error {
|
||||
o.limitToCap = b
|
||||
@@ -153,17 +177,37 @@ func WithDecodeAllCapLimit(b bool) DOption {
|
||||
// This typically uses less allocations but will have the full decompressed object in memory.
|
||||
// Note that DecodeAllCapLimit will disable this, as well as giving a size of 0 or less.
|
||||
// Default is 128KiB.
|
||||
// Cannot be changed with ResetWithOptions.
|
||||
func WithDecodeBuffersBelow(size int) DOption {
|
||||
return func(o *decoderOptions) error {
|
||||
if o.resetOpt && size != o.decodeBufsBelow {
|
||||
return errors.New("WithDecodeBuffersBelow cannot be changed on Reset")
|
||||
}
|
||||
o.decodeBufsBelow = size
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// IgnoreChecksum allows to forcibly ignore checksum checking.
|
||||
// Can be changed with ResetWithOptions.
|
||||
func IgnoreChecksum(b bool) DOption {
|
||||
return func(o *decoderOptions) error {
|
||||
o.ignoreChecksum = b
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// WithDecoderDictDelete removes dictionaries by ID.
|
||||
// If no ids are passed, all dictionaries are deleted.
|
||||
// Should be used with ResetWithOptions.
|
||||
func WithDecoderDictDelete(ids ...uint32) DOption {
|
||||
return func(o *decoderOptions) error {
|
||||
if len(ids) == 0 {
|
||||
clear(o.dicts)
|
||||
}
|
||||
for _, id := range ids {
|
||||
delete(o.dicts, id)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
+16
@@ -131,6 +131,22 @@ func (e *Encoder) Reset(w io.Writer) {
|
||||
s.frameContentSize = 0
|
||||
}
|
||||
|
||||
// ResetWithOptions will re-initialize the writer and apply the given options
|
||||
// as a new, independent stream.
|
||||
// Options are applied on top of the existing options.
|
||||
// Some options cannot be changed on reset and will return an error.
|
||||
func (e *Encoder) ResetWithOptions(w io.Writer, opts ...EOption) error {
|
||||
e.o.resetOpt = true
|
||||
defer func() { e.o.resetOpt = false }()
|
||||
for _, o := range opts {
|
||||
if err := o(&e.o); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
e.Reset(w)
|
||||
return nil
|
||||
}
|
||||
|
||||
// ResetContentSize will reset and set a content size for the next stream.
|
||||
// If the bytes written does not match the size given an error will be returned
|
||||
// when calling Close().
|
||||
|
||||
+41
-3
@@ -14,6 +14,7 @@ type EOption func(*encoderOptions) error
|
||||
|
||||
// options retains accumulated state of multiple options.
|
||||
type encoderOptions struct {
|
||||
resetOpt bool
|
||||
concurrent int
|
||||
level EncoderLevel
|
||||
single *bool
|
||||
@@ -71,19 +72,28 @@ func (o encoderOptions) encoder() encoder {
|
||||
|
||||
// WithEncoderCRC will add CRC value to output.
|
||||
// Output will be 4 bytes larger.
|
||||
// Can be changed with ResetWithOptions.
|
||||
func WithEncoderCRC(b bool) EOption {
|
||||
return func(o *encoderOptions) error { o.crc = b; return nil }
|
||||
}
|
||||
|
||||
// WithEncoderConcurrency will set the concurrency,
|
||||
// meaning the maximum number of encoders to run concurrently.
|
||||
// The value supplied must be at least 1.
|
||||
// The value supplied must be at least 0.
|
||||
// When a value of 0 is provided GOMAXPROCS will be used.
|
||||
// For streams, setting a value of 1 will disable async compression.
|
||||
// By default this will be set to GOMAXPROCS.
|
||||
// Cannot be changed with ResetWithOptions.
|
||||
func WithEncoderConcurrency(n int) EOption {
|
||||
return func(o *encoderOptions) error {
|
||||
if n <= 0 {
|
||||
return fmt.Errorf("concurrency must be at least 1")
|
||||
if n < 0 {
|
||||
return errors.New("concurrency must at least 0")
|
||||
}
|
||||
if n == 0 {
|
||||
n = runtime.GOMAXPROCS(0)
|
||||
}
|
||||
if o.resetOpt && n != o.concurrent {
|
||||
return errors.New("WithEncoderConcurrency cannot be changed on Reset")
|
||||
}
|
||||
o.concurrent = n
|
||||
return nil
|
||||
@@ -95,6 +105,7 @@ func WithEncoderConcurrency(n int) EOption {
|
||||
// A larger value will enable better compression but allocate more memory and,
|
||||
// for above-default values, take considerably longer.
|
||||
// The default value is determined by the compression level and max 8MB.
|
||||
// Cannot be changed with ResetWithOptions.
|
||||
func WithWindowSize(n int) EOption {
|
||||
return func(o *encoderOptions) error {
|
||||
switch {
|
||||
@@ -105,6 +116,9 @@ func WithWindowSize(n int) EOption {
|
||||
case (n & (n - 1)) != 0:
|
||||
return errors.New("window size must be a power of 2")
|
||||
}
|
||||
if o.resetOpt && n != o.windowSize {
|
||||
return errors.New("WithWindowSize cannot be changed on Reset")
|
||||
}
|
||||
|
||||
o.windowSize = n
|
||||
o.customWindow = true
|
||||
@@ -122,6 +136,7 @@ func WithWindowSize(n int) EOption {
|
||||
// n must be > 0 and <= 1GB, 1<<30 bytes.
|
||||
// The padded area will be filled with data from crypto/rand.Reader.
|
||||
// If `EncodeAll` is used with data already in the destination, the total size will be multiple of this.
|
||||
// Can be changed with ResetWithOptions.
|
||||
func WithEncoderPadding(n int) EOption {
|
||||
return func(o *encoderOptions) error {
|
||||
if n <= 0 {
|
||||
@@ -215,12 +230,16 @@ func (e EncoderLevel) String() string {
|
||||
}
|
||||
|
||||
// WithEncoderLevel specifies a predefined compression level.
|
||||
// Cannot be changed with ResetWithOptions.
|
||||
func WithEncoderLevel(l EncoderLevel) EOption {
|
||||
return func(o *encoderOptions) error {
|
||||
switch {
|
||||
case l <= speedNotSet || l >= speedLast:
|
||||
return fmt.Errorf("unknown encoder level")
|
||||
}
|
||||
if o.resetOpt && l != o.level {
|
||||
return errors.New("WithEncoderLevel cannot be changed on Reset")
|
||||
}
|
||||
o.level = l
|
||||
if !o.customWindow {
|
||||
switch o.level {
|
||||
@@ -248,6 +267,7 @@ func WithEncoderLevel(l EncoderLevel) EOption {
|
||||
// WithZeroFrames will encode 0 length input as full frames.
|
||||
// This can be needed for compatibility with zstandard usage,
|
||||
// but is not needed for this package.
|
||||
// Can be changed with ResetWithOptions.
|
||||
func WithZeroFrames(b bool) EOption {
|
||||
return func(o *encoderOptions) error {
|
||||
o.fullZero = b
|
||||
@@ -259,6 +279,7 @@ func WithZeroFrames(b bool) EOption {
|
||||
// Disabling this will skip incompressible data faster, but in cases with no matches but
|
||||
// skewed character distribution compression is lost.
|
||||
// Default value depends on the compression level selected.
|
||||
// Can be changed with ResetWithOptions.
|
||||
func WithAllLitEntropyCompression(b bool) EOption {
|
||||
return func(o *encoderOptions) error {
|
||||
o.customALEntropy = true
|
||||
@@ -270,6 +291,7 @@ func WithAllLitEntropyCompression(b bool) EOption {
|
||||
// WithNoEntropyCompression will always skip entropy compression of literals.
|
||||
// This can be useful if content has matches, but unlikely to benefit from entropy
|
||||
// compression. Usually the slight speed improvement is not worth enabling this.
|
||||
// Can be changed with ResetWithOptions.
|
||||
func WithNoEntropyCompression(b bool) EOption {
|
||||
return func(o *encoderOptions) error {
|
||||
o.noEntropy = b
|
||||
@@ -287,6 +309,7 @@ func WithNoEntropyCompression(b bool) EOption {
|
||||
// This is only a recommendation, each decoder is free to support higher or lower limits, depending on local limitations.
|
||||
// If this is not specified, block encodes will automatically choose this based on the input size and the window size.
|
||||
// This setting has no effect on streamed encodes.
|
||||
// Can be changed with ResetWithOptions.
|
||||
func WithSingleSegment(b bool) EOption {
|
||||
return func(o *encoderOptions) error {
|
||||
o.single = &b
|
||||
@@ -298,8 +321,12 @@ func WithSingleSegment(b bool) EOption {
|
||||
// slower encoding speed.
|
||||
// This will not change the window size which is the primary function for reducing
|
||||
// memory usage. See WithWindowSize.
|
||||
// Cannot be changed with ResetWithOptions.
|
||||
func WithLowerEncoderMem(b bool) EOption {
|
||||
return func(o *encoderOptions) error {
|
||||
if o.resetOpt && b != o.lowMem {
|
||||
return errors.New("WithLowerEncoderMem cannot be changed on Reset")
|
||||
}
|
||||
o.lowMem = b
|
||||
return nil
|
||||
}
|
||||
@@ -311,6 +338,7 @@ func WithLowerEncoderMem(b bool) EOption {
|
||||
// "zstd --train" from the Zstandard reference implementation.
|
||||
//
|
||||
// The encoder *may* choose to use no dictionary instead for certain payloads.
|
||||
// Can be changed with ResetWithOptions.
|
||||
//
|
||||
// [dictionary format]: https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary-format
|
||||
func WithEncoderDict(dict []byte) EOption {
|
||||
@@ -328,6 +356,7 @@ func WithEncoderDict(dict []byte) EOption {
|
||||
//
|
||||
// The slice content may contain arbitrary data. It will be used as an initial
|
||||
// history.
|
||||
// Can be changed with ResetWithOptions.
|
||||
func WithEncoderDictRaw(id uint32, content []byte) EOption {
|
||||
return func(o *encoderOptions) error {
|
||||
if bits.UintSize > 32 && uint(len(content)) > dictMaxLength {
|
||||
@@ -337,3 +366,12 @@ func WithEncoderDictRaw(id uint32, content []byte) EOption {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// WithEncoderDictDelete clears the dictionary, so no dictionary will be used.
|
||||
// Should be used with ResetWithOptions.
|
||||
func WithEncoderDictDelete() EOption {
|
||||
return func(o *encoderOptions) error {
|
||||
o.dict = nil
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
+10
-4
@@ -138,6 +138,12 @@ type sconns struct {
|
||||
leafs int32
|
||||
}
|
||||
|
||||
// clampInt64ToInt32 safely converts an int64 limit to int32,
|
||||
// clamping values to the [math.MinInt32, math.MaxInt32] range.
|
||||
func clampInt64ToInt32(v int64) int32 {
|
||||
return int32(max(math.MinInt32, min(math.MaxInt32, v)))
|
||||
}
|
||||
|
||||
// Import stream mapping struct
|
||||
type streamImport struct {
|
||||
acc *Account
|
||||
@@ -3716,10 +3722,10 @@ func (s *Server) updateAccountClaimsWithRefresh(a *Account, ac *jwt.AccountClaim
|
||||
|
||||
// Now do limits if they are present.
|
||||
a.mu.Lock()
|
||||
a.msubs = int32(ac.Limits.Subs)
|
||||
a.mpay = int32(ac.Limits.Payload)
|
||||
a.mconns = int32(ac.Limits.Conn)
|
||||
a.mleafs = int32(ac.Limits.LeafNodeConn)
|
||||
a.msubs = clampInt64ToInt32(ac.Limits.Subs)
|
||||
a.mpay = clampInt64ToInt32(ac.Limits.Payload)
|
||||
a.mconns = clampInt64ToInt32(ac.Limits.Conn)
|
||||
a.mleafs = clampInt64ToInt32(ac.Limits.LeafNodeConn)
|
||||
a.disallowBearer = ac.Limits.DisallowBearer
|
||||
// Check for any revocations
|
||||
if len(ac.Revocations) > 0 {
|
||||
|
||||
+20
-4
@@ -421,7 +421,9 @@ func (c *client) matchesPinnedCert(tlsPinnedCerts PinnedCertSet) bool {
|
||||
}
|
||||
|
||||
var (
|
||||
mustacheRE = regexp.MustCompile(`{{2}([^}]+)}{2}`)
|
||||
mustacheRE = regexp.MustCompile(`{{2}([^}]+)}{2}`)
|
||||
maxPermTemplateSubjectExpansions = 4096
|
||||
errPermTemplateExpansionLimit error = fmt.Errorf("template expansion exceeds limit")
|
||||
)
|
||||
|
||||
func processUserPermissionsTemplate(lim jwt.UserPermissionLimits, ujwt *jwt.UserClaims, acc *Account) (jwt.UserPermissionLimits, error) {
|
||||
@@ -456,11 +458,11 @@ func processUserPermissionsTemplate(lim jwt.UserPermissionLimits, ujwt *jwt.User
|
||||
return p
|
||||
}
|
||||
isTag := func(op string) []string {
|
||||
if strings.EqualFold("tag(", op[:4]) && strings.HasSuffix(op, ")") {
|
||||
if len(op) >= 4 && strings.EqualFold("tag(", op[:4]) && strings.HasSuffix(op, ")") {
|
||||
v := strings.TrimPrefix(op, "tag(")
|
||||
v = strings.TrimSuffix(v, ")")
|
||||
return []string{"tag", v}
|
||||
} else if strings.EqualFold("account-tag(", op[:12]) && strings.HasSuffix(op, ")") {
|
||||
} else if len(op) >= 12 && strings.EqualFold("account-tag(", op[:12]) && strings.HasSuffix(op, ")") {
|
||||
v := strings.TrimPrefix(op, "account-tag(")
|
||||
v = strings.TrimSuffix(v, ")")
|
||||
return []string{"account-tag", v}
|
||||
@@ -529,7 +531,7 @@ func processUserPermissionsTemplate(lim jwt.UserPermissionLimits, ujwt *jwt.User
|
||||
// generate an invalid subject?
|
||||
values[tokenNum] = []string{" "}
|
||||
}
|
||||
} else if failOnBadSubject {
|
||||
} else {
|
||||
return nil, fmt.Errorf("template operation in %q: %q is not defined", list[i], op)
|
||||
}
|
||||
}
|
||||
@@ -544,6 +546,20 @@ func processUserPermissionsTemplate(lim jwt.UserPermissionLimits, ujwt *jwt.User
|
||||
return nil, fmt.Errorf("generated invalid subject")
|
||||
}
|
||||
} else {
|
||||
expCount := 1
|
||||
for _, v := range values {
|
||||
if len(v) == 0 {
|
||||
expCount = 0
|
||||
break
|
||||
}
|
||||
if expCount > maxPermTemplateSubjectExpansions/len(v) {
|
||||
return nil, fmt.Errorf("%w: %d", errPermTemplateExpansionLimit, maxPermTemplateSubjectExpansions)
|
||||
}
|
||||
expCount *= len(v)
|
||||
}
|
||||
if len(emittedList) > maxPermTemplateSubjectExpansions-expCount {
|
||||
return nil, fmt.Errorf("%w: %d", errPermTemplateExpansionLimit, maxPermTemplateSubjectExpansions)
|
||||
}
|
||||
a := nArrayCartesianProduct(values...)
|
||||
for _, aa := range a {
|
||||
subj := list[i]
|
||||
|
||||
+15
-5
@@ -32,6 +32,14 @@ const (
|
||||
AuthRequestXKeyHeader = "Nats-Server-Xkey"
|
||||
)
|
||||
|
||||
func titleCase(m string) string {
|
||||
r := []rune(m)
|
||||
if len(r) == 0 {
|
||||
return _EMPTY_
|
||||
}
|
||||
return string(append([]rune{unicode.ToUpper(r[0])}, r[1:]...))
|
||||
}
|
||||
|
||||
// Process a callout on this client's behalf.
|
||||
func (s *Server) processClientOrLeafCallout(c *client, opts *Options, proxyRequired, trustedProxy bool) (authorized bool, errStr string) {
|
||||
isOperatorMode := len(opts.TrustedKeys) > 0
|
||||
@@ -50,6 +58,13 @@ func (s *Server) processClientOrLeafCallout(c *client, opts *Options, proxyRequi
|
||||
} else {
|
||||
acc = c.acc
|
||||
}
|
||||
if acc == nil {
|
||||
// FIX for https://github.com/nats-io/nats-server/issues/7841
|
||||
// hand rolled creds on leafnode became crasher here
|
||||
errStr = fmt.Sprintf("%s not mapped to a callout account", c.kindString())
|
||||
s.Warnf(errStr)
|
||||
return false, errStr
|
||||
}
|
||||
|
||||
// Check if we have been requested to encrypt.
|
||||
var xkp nkeys.KeyPair
|
||||
@@ -234,11 +249,6 @@ func (s *Server) processClientOrLeafCallout(c *client, opts *Options, proxyRequi
|
||||
}
|
||||
|
||||
processReply := func(_ *subscription, rc *client, racc *Account, subject, reply string, rmsg []byte) {
|
||||
titleCase := func(m string) string {
|
||||
r := []rune(m)
|
||||
return string(append([]rune{unicode.ToUpper(r[0])}, r[1:]...))
|
||||
}
|
||||
|
||||
arc, err := decodeResponse(rc, rmsg, racc)
|
||||
if err != nil {
|
||||
c.authViolation()
|
||||
|
||||
+8
-6
@@ -928,14 +928,14 @@ func (c *client) applyAccountLimits() {
|
||||
c.msubs = jwt.NoLimit
|
||||
if c.opts.JWT != _EMPTY_ { // user jwt implies account
|
||||
if uc, _ := jwt.DecodeUserClaims(c.opts.JWT); uc != nil {
|
||||
atomic.StoreInt32(&c.mpay, int32(uc.Limits.Payload))
|
||||
c.msubs = int32(uc.Limits.Subs)
|
||||
atomic.StoreInt32(&c.mpay, clampInt64ToInt32(uc.Limits.Payload))
|
||||
c.msubs = clampInt64ToInt32(uc.Limits.Subs)
|
||||
if uc.IssuerAccount != _EMPTY_ && uc.IssuerAccount != uc.Issuer {
|
||||
if scope, ok := c.acc.signingKeys[uc.Issuer]; ok {
|
||||
if userScope, ok := scope.(*jwt.UserScope); ok {
|
||||
// if signing key disappeared or changed and we don't get here, the client will be disconnected
|
||||
c.mpay = int32(userScope.Template.Limits.Payload)
|
||||
c.msubs = int32(userScope.Template.Limits.Subs)
|
||||
c.mpay = clampInt64ToInt32(userScope.Template.Limits.Payload)
|
||||
c.msubs = clampInt64ToInt32(userScope.Template.Limits.Subs)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1686,9 +1686,11 @@ func (c *client) flushOutbound() bool {
|
||||
|
||||
cw.Reset(&bb)
|
||||
for _, buf := range collapsed {
|
||||
if _, err = cw.Write(buf); err != nil {
|
||||
break
|
||||
if err == nil {
|
||||
_, err = cw.Write(buf)
|
||||
}
|
||||
// Return always after consumed or error.
|
||||
nbPoolPut(buf)
|
||||
}
|
||||
if err == nil {
|
||||
err = cw.Close()
|
||||
|
||||
+1
-1
@@ -66,7 +66,7 @@ func init() {
|
||||
|
||||
const (
|
||||
// VERSION is the current version for the server.
|
||||
VERSION = "2.12.4"
|
||||
VERSION = "2.12.5"
|
||||
|
||||
// PROTO is the currently supported protocol.
|
||||
// 0 was the original
|
||||
|
||||
+88
-62
@@ -42,7 +42,6 @@ import (
|
||||
const (
|
||||
JSPullRequestPendingMsgs = "Nats-Pending-Messages"
|
||||
JSPullRequestPendingBytes = "Nats-Pending-Bytes"
|
||||
JSPullRequestWrongPinID = "NATS/1.0 423 Nats-Wrong-Pin-Id\r\n\r\n"
|
||||
JSPullRequestNatsPinId = "Nats-Pin-Id"
|
||||
)
|
||||
|
||||
@@ -512,7 +511,7 @@ type consumer struct {
|
||||
// Details described in ADR-42.
|
||||
|
||||
// currentPinId is the current nuid for the pinned consumer.
|
||||
// If the Consumer is running in `PriorityPinnedClient` mode, server will
|
||||
// If the Consumer is running in `PriorityPinnedClient` mode, server will
|
||||
// pick up a new nuid and assign it to first pending pull request.
|
||||
currentPinId string
|
||||
/// pinnedTtl is the remaining time before the current PinId expires.
|
||||
@@ -825,7 +824,7 @@ func checkConsumerCfg(
|
||||
return NewJSStreamInvalidConfigError(ErrBadSubject)
|
||||
}
|
||||
for inner, ssubject := range subjectFilters {
|
||||
if inner != outer && SubjectsCollide(subject, ssubject) {
|
||||
if inner != outer && subjectIsSubsetMatch(subject, ssubject) {
|
||||
return NewJSConsumerOverlappingSubjectFiltersError()
|
||||
}
|
||||
}
|
||||
@@ -1059,17 +1058,22 @@ func (mset *stream) addConsumerWithAssignment(config *ConsumerConfig, oname stri
|
||||
return nil, NewJSConsumerDoesNotExistError()
|
||||
}
|
||||
|
||||
// Check for any limits, if the config for the consumer sets a limit we check against that
|
||||
// but if not we use the value from account limits, if account limits is more restrictive
|
||||
// than stream config we prefer the account limits to handle cases where account limits are
|
||||
// updated during the lifecycle of the stream
|
||||
maxc := cfg.MaxConsumers
|
||||
if maxc <= 0 || (selectedLimits.MaxConsumers > 0 && selectedLimits.MaxConsumers < maxc) {
|
||||
maxc = selectedLimits.MaxConsumers
|
||||
}
|
||||
if maxc > 0 && mset.numPublicConsumers() >= maxc {
|
||||
mset.mu.Unlock()
|
||||
return nil, NewJSMaximumConsumersLimitError()
|
||||
// If we're clustered we've already done this check, only do this if we're a standalone server.
|
||||
// But if we're standalone, only enforce if we're not recovering, since the MaxConsumers could've
|
||||
// been updated while we already had more consumers on disk.
|
||||
if !s.JetStreamIsClustered() && s.standAloneMode() && !isRecovering {
|
||||
// Check for any limits, if the config for the consumer sets a limit we check against that
|
||||
// but if not we use the value from account limits, if account limits is more restrictive
|
||||
// than stream config we prefer the account limits to handle cases where account limits are
|
||||
// updated during the lifecycle of the stream
|
||||
maxc := cfg.MaxConsumers
|
||||
if maxc <= 0 || (selectedLimits.MaxConsumers > 0 && selectedLimits.MaxConsumers < maxc) {
|
||||
maxc = selectedLimits.MaxConsumers
|
||||
}
|
||||
if maxc > 0 && mset.numPublicConsumers() >= maxc {
|
||||
mset.mu.Unlock()
|
||||
return nil, NewJSMaximumConsumersLimitError()
|
||||
}
|
||||
}
|
||||
|
||||
// Check on stream type conflicts with WorkQueues.
|
||||
@@ -1215,14 +1219,13 @@ func (mset *stream) addConsumerWithAssignment(config *ConsumerConfig, oname stri
|
||||
|
||||
// If we have multiple filter subjects, create a sublist which we will use
|
||||
// in calling store.LoadNextMsgMulti.
|
||||
if len(o.cfg.FilterSubjects) > 0 {
|
||||
o.filters = gsl.NewSublist[struct{}]()
|
||||
for _, filter := range o.cfg.FilterSubjects {
|
||||
o.filters.Insert(filter, struct{}{})
|
||||
}
|
||||
} else {
|
||||
// Make sure this is nil otherwise.
|
||||
if len(o.subjf) <= 1 {
|
||||
o.filters = nil
|
||||
} else {
|
||||
o.filters = gsl.NewSublist[struct{}]()
|
||||
for _, filter := range o.subjf {
|
||||
o.filters.Insert(filter.subject, struct{}{})
|
||||
}
|
||||
}
|
||||
|
||||
if o.store != nil && o.store.HasState() {
|
||||
@@ -1402,8 +1405,12 @@ func (o *consumer) monitorQuitC() <-chan struct{} {
|
||||
if o == nil {
|
||||
return nil
|
||||
}
|
||||
o.mu.RLock()
|
||||
defer o.mu.RUnlock()
|
||||
o.mu.Lock()
|
||||
defer o.mu.Unlock()
|
||||
// Recreate if a prior monitor routine was stopped.
|
||||
if o.mqch == nil {
|
||||
o.mqch = make(chan struct{})
|
||||
}
|
||||
return o.mqch
|
||||
}
|
||||
|
||||
@@ -1686,6 +1693,7 @@ func (o *consumer) setLeader(isLeader bool) {
|
||||
} else if o.srv.gateway.enabled {
|
||||
stopAndClearTimer(&o.gwdtmr)
|
||||
}
|
||||
o.unassignPinId()
|
||||
// If we were the leader make sure to drain queued up acks.
|
||||
if wasLeader {
|
||||
o.ackMsgs.drain()
|
||||
@@ -2045,6 +2053,7 @@ func (o *consumer) deleteNotActive() {
|
||||
if o.srv != nil {
|
||||
qch = o.srv.quitCh
|
||||
}
|
||||
oqch := o.qch
|
||||
o.mu.Unlock()
|
||||
if js != nil {
|
||||
cqch = js.clusterQuitC()
|
||||
@@ -2093,6 +2102,9 @@ func (o *consumer) deleteNotActive() {
|
||||
return
|
||||
case <-cqch:
|
||||
return
|
||||
case <-oqch:
|
||||
// The consumer has stopped already, likely by an earlier delete proposal being applied.
|
||||
return
|
||||
}
|
||||
js.mu.RLock()
|
||||
if js.shuttingDown {
|
||||
@@ -2840,14 +2852,10 @@ func (o *consumer) releaseAnyPendingRequests(isAssigned bool) {
|
||||
if o.mset == nil || o.outq == nil || o.waiting.len() == 0 {
|
||||
return
|
||||
}
|
||||
var hdr []byte
|
||||
if !isAssigned {
|
||||
hdr = []byte("NATS/1.0 409 Consumer Deleted\r\n\r\n")
|
||||
}
|
||||
|
||||
wq := o.waiting
|
||||
for wr := wq.head; wr != nil; {
|
||||
if hdr != nil {
|
||||
if !isAssigned {
|
||||
hdr := []byte("NATS/1.0 409 Consumer Deleted\r\n\r\n")
|
||||
o.outq.send(newJSPubMsg(wr.reply, _EMPTY_, _EMPTY_, hdr, nil, nil, 0))
|
||||
}
|
||||
next := wr.next
|
||||
@@ -3051,6 +3059,11 @@ func (o *consumer) setStoreState(state *ConsumerState) error {
|
||||
err := o.store.Update(state)
|
||||
if err == nil {
|
||||
o.applyState(state)
|
||||
} else if err == ErrStoreOldUpdate {
|
||||
// Our store already has a newer state, which is normal during recovery
|
||||
// when the consumer was loaded from disk before the meta snapshot state
|
||||
// was applied.
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
@@ -3915,7 +3928,12 @@ func (o *consumer) setPinnedTimer(priorityGroup string) {
|
||||
} else {
|
||||
o.pinnedTtl = time.AfterFunc(o.cfg.PinnedTTL, func() {
|
||||
o.mu.Lock()
|
||||
o.currentPinId = _EMPTY_
|
||||
// Skip if already unset.
|
||||
if o.currentPinId == _EMPTY_ {
|
||||
o.mu.Unlock()
|
||||
return
|
||||
}
|
||||
o.unassignPinId()
|
||||
o.sendUnpinnedAdvisoryLocked(priorityGroup, "timeout")
|
||||
o.mu.Unlock()
|
||||
o.signalNewMessages()
|
||||
@@ -3923,6 +3941,28 @@ func (o *consumer) setPinnedTimer(priorityGroup string) {
|
||||
}
|
||||
}
|
||||
|
||||
// Lock should be held.
|
||||
func (o *consumer) assignNewPinId(wr *waitingRequest) {
|
||||
if wr.priorityGroup == nil || wr.priorityGroup.Group == _EMPTY_ {
|
||||
return
|
||||
}
|
||||
o.currentPinId = nuid.Next()
|
||||
o.pinnedTS = time.Now().UTC()
|
||||
wr.priorityGroup.Id = o.currentPinId
|
||||
o.setPinnedTimer(wr.priorityGroup.Group)
|
||||
o.sendPinnedAdvisoryLocked(wr.priorityGroup.Group)
|
||||
}
|
||||
|
||||
// Lock should be held.
|
||||
func (o *consumer) unassignPinId() {
|
||||
o.currentPinId = _EMPTY_
|
||||
o.pinnedTS = time.Time{}
|
||||
if o.pinnedTtl != nil {
|
||||
o.pinnedTtl.Stop()
|
||||
o.pinnedTtl = nil
|
||||
}
|
||||
}
|
||||
|
||||
// Return next waiting request. This will check for expirations but not noWait or interest.
|
||||
// That will be handled by processWaiting.
|
||||
// Lock should be held.
|
||||
@@ -3933,11 +3973,6 @@ func (o *consumer) nextWaiting(sz int) *waitingRequest {
|
||||
|
||||
// Check if server needs to assign a new pin id.
|
||||
needNewPin := o.currentPinId == _EMPTY_ && o.cfg.PriorityPolicy == PriorityPinnedClient
|
||||
// As long as we support only one priority group, we can capture that group here and reuse it.
|
||||
var priorityGroup string
|
||||
if len(o.cfg.PriorityGroups) > 0 {
|
||||
priorityGroup = o.cfg.PriorityGroups[0]
|
||||
}
|
||||
|
||||
numCycled := 0
|
||||
for wr := o.waiting.peek(); !o.waiting.isEmpty(); wr = o.waiting.peek() {
|
||||
@@ -3971,15 +4006,12 @@ func (o *consumer) nextWaiting(sz int) *waitingRequest {
|
||||
if wr.expires.IsZero() || time.Now().Before(wr.expires) {
|
||||
if needNewPin {
|
||||
if wr.priorityGroup.Id == _EMPTY_ {
|
||||
o.currentPinId = nuid.Next()
|
||||
o.pinnedTS = time.Now().UTC()
|
||||
wr.priorityGroup.Id = o.currentPinId
|
||||
o.setPinnedTimer(priorityGroup)
|
||||
|
||||
o.assignNewPinId(wr)
|
||||
} else {
|
||||
// There is pin id set, but not a matching one. Send a notification to the client and remove the request.
|
||||
// Probably this is the old pin id.
|
||||
o.outq.send(newJSPubMsg(wr.reply, _EMPTY_, _EMPTY_, []byte(JSPullRequestWrongPinID), nil, nil, 0))
|
||||
hdr := fmt.Appendf(nil, "NATS/1.0 423 Nats-Wrong-Pin-Id\r\n%s: %d\r\n%s: %d\r\n\r\n", JSPullRequestPendingMsgs, wr.n, JSPullRequestPendingBytes, wr.b)
|
||||
o.outq.send(newJSPubMsg(wr.reply, _EMPTY_, _EMPTY_, hdr, nil, nil, 0))
|
||||
o.waiting.removeCurrent()
|
||||
if o.node != nil {
|
||||
o.removeClusterPendingRequest(wr.reply)
|
||||
@@ -4000,7 +4032,8 @@ func (o *consumer) nextWaiting(sz int) *waitingRequest {
|
||||
continue
|
||||
} else {
|
||||
// There is pin id set, but not a matching one. Send a notification to the client and remove the request.
|
||||
o.outq.send(newJSPubMsg(wr.reply, _EMPTY_, _EMPTY_, []byte(JSPullRequestWrongPinID), nil, nil, 0))
|
||||
hdr := fmt.Appendf(nil, "NATS/1.0 423 Nats-Wrong-Pin-Id\r\n%s: %d\r\n%s: %d\r\n\r\n", JSPullRequestPendingMsgs, wr.n, JSPullRequestPendingBytes, wr.b)
|
||||
o.outq.send(newJSPubMsg(wr.reply, _EMPTY_, _EMPTY_, hdr, nil, nil, 0))
|
||||
o.waiting.removeCurrent()
|
||||
if o.node != nil {
|
||||
o.removeClusterPendingRequest(wr.reply)
|
||||
@@ -4012,9 +4045,13 @@ func (o *consumer) nextWaiting(sz int) *waitingRequest {
|
||||
|
||||
if o.cfg.PriorityPolicy == PriorityOverflow {
|
||||
if wr.priorityGroup != nil &&
|
||||
// If both limits are zero we don't cycle and the request will be fulfilled.
|
||||
(wr.priorityGroup.MinPending > 0 || wr.priorityGroup.MinAckPending > 0) &&
|
||||
// We need to check o.npc+1, because before calling nextWaiting, we do o.npc--
|
||||
(wr.priorityGroup.MinPending > 0 && wr.priorityGroup.MinPending > o.npc+1 ||
|
||||
wr.priorityGroup.MinAckPending > 0 && wr.priorityGroup.MinAckPending > int64(len(o.pending))) {
|
||||
// If one OR the other limit is exceeded, we want to fulfill the request.
|
||||
// This is an inverted check. For clarity, we check the positive condition and negate.
|
||||
!((wr.priorityGroup.MinPending > 0 && wr.priorityGroup.MinPending <= o.npc+1) ||
|
||||
(wr.priorityGroup.MinAckPending > 0 && wr.priorityGroup.MinAckPending <= int64(len(o.pending)))) {
|
||||
o.waiting.cycle()
|
||||
numCycled++
|
||||
// We're done cycling through the requests.
|
||||
@@ -4025,19 +4062,10 @@ func (o *consumer) nextWaiting(sz int) *waitingRequest {
|
||||
}
|
||||
}
|
||||
if wr.acc.sl.HasInterest(wr.interest) {
|
||||
if needNewPin {
|
||||
o.sendPinnedAdvisoryLocked(priorityGroup)
|
||||
}
|
||||
return o.waiting.popOrPopAndRequeue(o.cfg.PriorityPolicy)
|
||||
} else if time.Since(wr.received) < defaultGatewayRecentSubExpiration && (o.srv.leafNodeEnabled || o.srv.gateway.enabled) {
|
||||
if needNewPin {
|
||||
o.sendPinnedAdvisoryLocked(priorityGroup)
|
||||
}
|
||||
return o.waiting.popOrPopAndRequeue(o.cfg.PriorityPolicy)
|
||||
} else if o.srv.gateway.enabled && o.srv.hasGatewayInterest(wr.acc.Name, wr.interest) {
|
||||
if needNewPin {
|
||||
o.sendPinnedAdvisoryLocked(priorityGroup)
|
||||
}
|
||||
return o.waiting.popOrPopAndRequeue(o.cfg.PriorityPolicy)
|
||||
}
|
||||
} else {
|
||||
@@ -4195,15 +4223,7 @@ func (o *consumer) processNextMsgRequest(reply string, msg []byte) {
|
||||
sendErr(400, "Bad Request - Priority Group missing")
|
||||
return
|
||||
}
|
||||
|
||||
found := false
|
||||
for _, group := range o.cfg.PriorityGroups {
|
||||
if group == priorityGroup.Group {
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
if !slices.Contains(o.cfg.PriorityGroups, priorityGroup.Group) {
|
||||
sendErr(400, "Bad Request - Invalid Priority Group")
|
||||
return
|
||||
}
|
||||
@@ -4441,6 +4461,8 @@ func (o *consumer) getNextMsg() (*jsPubMsg, uint64, error) {
|
||||
// scheduled for redelivery, but it has been removed from the stream.
|
||||
// o.processTerm is called in a goroutine so could run after we get here.
|
||||
// That will correct the pending state and delivery/ack floors, so just skip here.
|
||||
pmsg.returnToPool()
|
||||
pmsg = nil
|
||||
continue
|
||||
}
|
||||
return pmsg, dc, err
|
||||
@@ -4468,6 +4490,7 @@ func (o *consumer) getNextMsg() (*jsPubMsg, uint64, error) {
|
||||
sm, err := o.mset.store.LoadMsg(seq, &pmsg.StoreMsg)
|
||||
if sm == nil || err != nil {
|
||||
pmsg.returnToPool()
|
||||
pmsg = nil
|
||||
}
|
||||
o.sseq++
|
||||
return pmsg, 1, err
|
||||
@@ -4979,6 +5002,7 @@ func (o *consumer) loopAndGatherMsgs(qch chan struct{}) {
|
||||
o.addToRedeliverQueue(pmsg.seq)
|
||||
}
|
||||
pmsg.returnToPool()
|
||||
pmsg = nil
|
||||
goto waitForMsgs
|
||||
}
|
||||
|
||||
@@ -4989,6 +5013,7 @@ func (o *consumer) loopAndGatherMsgs(qch chan struct{}) {
|
||||
select {
|
||||
case <-qch:
|
||||
pmsg.returnToPool()
|
||||
pmsg = nil
|
||||
return
|
||||
case <-time.After(delay):
|
||||
}
|
||||
@@ -5009,6 +5034,7 @@ func (o *consumer) loopAndGatherMsgs(qch chan struct{}) {
|
||||
select {
|
||||
case <-qch:
|
||||
pmsg.returnToPool()
|
||||
pmsg = nil
|
||||
return
|
||||
case <-time.After(delay):
|
||||
}
|
||||
|
||||
+3
@@ -251,6 +251,9 @@ type mappingDestinationErr struct {
|
||||
}
|
||||
|
||||
func (e *mappingDestinationErr) Error() string {
|
||||
if e.token == _EMPTY_ {
|
||||
return e.err.Error()
|
||||
}
|
||||
return fmt.Sprintf("%s in %s", e.err, e.token)
|
||||
}
|
||||
|
||||
|
||||
+191
-119
@@ -508,28 +508,39 @@ func newFileStoreWithCreated(fcfg FileStoreConfig, cfg StreamConfig, created tim
|
||||
return nil, err
|
||||
}
|
||||
|
||||
fs.mu.Lock()
|
||||
// Check if our prior state remembers a last sequence past where we can see.
|
||||
// Unless we're async flushing, in which case this can happen if some blocks weren't flushed.
|
||||
if prior.LastSeq > fs.state.LastSeq && !fs.fcfg.AsyncFlush {
|
||||
if mb, err := fs.newMsgBlockForWrite(); err != nil {
|
||||
fs.mu.Unlock()
|
||||
return nil, err
|
||||
} else if err = mb.writeTombstone(prior.LastSeq, prior.LastTime.UnixNano()); err != nil {
|
||||
fs.mu.Unlock()
|
||||
return nil, err
|
||||
}
|
||||
fs.state.LastSeq, fs.state.LastTime = prior.LastSeq, prior.LastTime
|
||||
if fs.state.Msgs == 0 {
|
||||
fs.state.FirstSeq = fs.state.LastSeq + 1
|
||||
fs.state.FirstTime = time.Time{}
|
||||
}
|
||||
if fs.ld != nil {
|
||||
if _, err := fs.newMsgBlockForWrite(); err == nil {
|
||||
if err = fs.writeTombstone(prior.LastSeq, prior.LastTime.UnixNano()); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
} else {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
}
|
||||
// Since we recovered here, make sure to kick ourselves to write out our stream state.
|
||||
fs.dirty++
|
||||
fs.mu.Unlock()
|
||||
}
|
||||
|
||||
// Lock during the remainder of the recovery.
|
||||
fs.mu.Lock()
|
||||
// Use defer to ensure the lock is released if any of the enforcement operations
|
||||
// run into issues to avoid potential deadlocks on exit.
|
||||
unlocked := false
|
||||
defer func() {
|
||||
if !unlocked {
|
||||
fs.mu.Unlock()
|
||||
}
|
||||
}()
|
||||
|
||||
// See if we can bring back our TTL timed hash wheel state from disk.
|
||||
if cfg.AllowMsgTTL {
|
||||
if err = fs.recoverTTLState(); err != nil && !os.IsNotExist(err) {
|
||||
@@ -552,9 +563,6 @@ func newFileStoreWithCreated(fcfg FileStoreConfig, cfg StreamConfig, created tim
|
||||
}
|
||||
}()
|
||||
|
||||
// Lock while we do enforcements and removals.
|
||||
fs.mu.Lock()
|
||||
|
||||
// Check if we have any left over tombstones to process.
|
||||
if len(fs.tombs) > 0 {
|
||||
for _, seq := range fs.tombs {
|
||||
@@ -586,6 +594,7 @@ func newFileStoreWithCreated(fcfg FileStoreConfig, cfg StreamConfig, created tim
|
||||
// Grab first sequence for check below while we have lock.
|
||||
firstSeq := fs.state.FirstSeq
|
||||
fs.mu.Unlock()
|
||||
unlocked = true
|
||||
|
||||
// If the stream has an initial sequence number then make sure we
|
||||
// have purged up until that point. We will do this only if the
|
||||
@@ -1158,10 +1167,14 @@ func (fs *fileStore) recoverMsgBlock(index uint32) (*msgBlock, error) {
|
||||
var lchk [8]byte
|
||||
if mb.rbytes >= checksumSize {
|
||||
if mb.bek != nil {
|
||||
if buf, _ := mb.loadBlock(nil); len(buf) >= checksumSize {
|
||||
// We pass nil, so get a buf from the block pool, we'll need to recycle it afterward.
|
||||
buf, _ := mb.loadBlock(nil)
|
||||
if len(buf) >= checksumSize {
|
||||
mb.bek.XORKeyStream(buf, buf)
|
||||
copy(lchk[0:], buf[len(buf)-checksumSize:])
|
||||
}
|
||||
// We can recycle it now.
|
||||
recycleMsgBlockBuf(buf)
|
||||
} else {
|
||||
file.ReadAt(lchk[:], int64(mb.rbytes)-checksumSize)
|
||||
}
|
||||
@@ -1576,15 +1589,15 @@ func (mb *msgBlock) rebuildStateFromBufLocked(buf []byte, allowTruncate bool) (*
|
||||
rl, slen := le.Uint32(hdr[0:]), int(le.Uint16(hdr[20:]))
|
||||
|
||||
hasHeaders := rl&hbit != 0
|
||||
var ttl int64
|
||||
if mb.fs.ttls != nil && len(hdr) > 0 {
|
||||
ttl, _ = getMessageTTL(hdr)
|
||||
}
|
||||
// Clear any headers bit that could be set.
|
||||
rl &^= hbit
|
||||
shlen := slen
|
||||
if hasHeaders {
|
||||
shlen += 4
|
||||
}
|
||||
dlen := int(rl) - msgHdrSize
|
||||
// Do some quick sanity checks here.
|
||||
if dlen < 0 || slen > (dlen-recordHashSize) || dlen > int(rl) || index+rl > lbuf || rl > rlBadThresh {
|
||||
if dlen < 0 || shlen > (dlen-recordHashSize) || dlen > int(rl) || index+rl > lbuf || rl > rlBadThresh {
|
||||
truncate(index)
|
||||
return gatherLost(lbuf - index), tombstones, errBadMsg{mb.mfn, fmt.Sprintf("sanity check failed (dlen %d slen %d rl %d index %d lbuf %d)", dlen, slen, rl, index, lbuf)}
|
||||
}
|
||||
@@ -1661,21 +1674,6 @@ func (mb *msgBlock) rebuildStateFromBufLocked(buf []byte, allowTruncate bool) (*
|
||||
if !mb.dmap.Exists(seq) {
|
||||
mb.msgs++
|
||||
mb.bytes += uint64(rl)
|
||||
if ttl > 0 {
|
||||
if mb.fs.ttls != nil {
|
||||
expires := time.Duration(ts) + (time.Second * time.Duration(ttl))
|
||||
mb.fs.ttls.Add(seq, int64(expires))
|
||||
}
|
||||
// Need to count these regardless as we might want to enable TTLs
|
||||
// later via UpdateConfig.
|
||||
mb.ttls++
|
||||
}
|
||||
if mb.fs.scheduling != nil {
|
||||
if schedule, ok := getMessageSchedule(hdr); ok && !schedule.IsZero() {
|
||||
mb.fs.scheduling.add(seq, string(subj), schedule.UnixNano())
|
||||
mb.schedules++
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
updateLast(seq, ts)
|
||||
@@ -1694,7 +1692,7 @@ func (mb *msgBlock) rebuildStateFromBufLocked(buf []byte, allowTruncate bool) (*
|
||||
} else if fseq == 0 && maxTombstoneSeq > 0 {
|
||||
atomic.StoreUint64(&mb.first.seq, maxTombstoneSeq+1)
|
||||
mb.first.ts = 0
|
||||
if mb.last.seq == 0 {
|
||||
if lseq := atomic.LoadUint64(&mb.last.seq); lseq == 0 {
|
||||
atomic.StoreUint64(&mb.last.seq, maxTombstoneSeq)
|
||||
mb.last.ts = maxTombstoneTs
|
||||
}
|
||||
@@ -1726,13 +1724,15 @@ func (fs *fileStore) debug(format string, args ...any) {
|
||||
|
||||
// Track local state but ignore timestamps here.
|
||||
func updateTrackingState(state *StreamState, mb *msgBlock) {
|
||||
first := atomic.LoadUint64(&mb.first.seq)
|
||||
last := atomic.LoadUint64(&mb.last.seq)
|
||||
if state.FirstSeq == 0 {
|
||||
state.FirstSeq = mb.first.seq
|
||||
} else if mb.first.seq < state.FirstSeq && mb.first.ts != 0 {
|
||||
state.FirstSeq = mb.first.seq
|
||||
state.FirstSeq = first
|
||||
} else if first < state.FirstSeq && mb.first.ts != 0 {
|
||||
state.FirstSeq = first
|
||||
}
|
||||
if mb.last.seq > state.LastSeq {
|
||||
state.LastSeq = mb.last.seq
|
||||
if last > state.LastSeq {
|
||||
state.LastSeq = last
|
||||
}
|
||||
state.Msgs += mb.msgs
|
||||
state.Bytes += mb.bytes
|
||||
@@ -2305,9 +2305,11 @@ func (fs *fileStore) recoverMsgs() error {
|
||||
if mb, err := fs.recoverMsgBlock(uint32(index)); err == nil && mb != nil {
|
||||
// This is a truncate block with possibly no index. If the OS got shutdown
|
||||
// out from underneath of us this is possible.
|
||||
if mb.first.seq == 0 {
|
||||
mb.mu.Lock()
|
||||
if atomic.LoadUint64(&mb.first.seq) == 0 {
|
||||
mb.dirtyCloseWithRemove(true)
|
||||
fs.removeMsgBlockFromList(mb)
|
||||
mb.mu.Unlock()
|
||||
continue
|
||||
}
|
||||
// If the stream is empty, reset the first/last sequences so these can
|
||||
@@ -2337,6 +2339,14 @@ func (fs *fileStore) recoverMsgs() error {
|
||||
}
|
||||
fs.state.Msgs += mb.msgs
|
||||
fs.state.Bytes += mb.bytes
|
||||
// If the block is empty, correct the sequences to be aligned with the current filestore state.
|
||||
if mb.msgs == 0 {
|
||||
atomic.StoreUint64(&mb.first.seq, fs.state.LastSeq+1)
|
||||
mb.first.ts = 0
|
||||
atomic.StoreUint64(&mb.last.seq, fs.state.LastSeq)
|
||||
mb.last.ts = fs.state.LastTime.UnixNano()
|
||||
}
|
||||
mb.mu.Unlock()
|
||||
} else {
|
||||
return err
|
||||
}
|
||||
@@ -2565,6 +2575,7 @@ func (fs *fileStore) expireMsgsOnRecover() error {
|
||||
fs.selectNextFirst()
|
||||
|
||||
// Check if we have no messages and blocks left.
|
||||
|
||||
if fs.lmb == nil && last.seq != 0 {
|
||||
if lmb, _ := fs.newMsgBlockForWrite(); lmb != nil {
|
||||
fs.writeTombstone(last.seq, last.ts)
|
||||
@@ -3524,6 +3535,11 @@ func (fs *fileStore) allLastSeqsLocked() ([]uint64, error) {
|
||||
mb.fss.IterFast(func(bsubj []byte, ss *SimpleState) bool {
|
||||
// Check if already been processed and accounted.
|
||||
if _, ok := subs[string(bsubj)]; !ok {
|
||||
// Check if we need to recalculate. We only care about the last sequence.
|
||||
if ss.lastNeedsUpdate {
|
||||
// mb is already loaded into the cache so should be fast-ish.
|
||||
mb.recalculateForSubj(bytesToString(bsubj), ss)
|
||||
}
|
||||
seqs = append(seqs, ss.Last)
|
||||
subs[string(bsubj)] = struct{}{}
|
||||
}
|
||||
@@ -3550,6 +3566,7 @@ func (fs *fileStore) filterIsAll(filters []string) bool {
|
||||
}
|
||||
// Sort so we can compare.
|
||||
slices.Sort(filters)
|
||||
slices.Sort(fs.cfg.Subjects)
|
||||
for i, subj := range filters {
|
||||
if !subjectIsSubsetMatch(fs.cfg.Subjects[i], subj) {
|
||||
return false
|
||||
@@ -4024,7 +4041,11 @@ func (fs *fileStore) NumPending(sseq uint64, filter string, lastPerSubject bool)
|
||||
mb.mu.Unlock()
|
||||
}
|
||||
// Make final adjustment.
|
||||
total -= adjust
|
||||
if adjust > total {
|
||||
total = 0
|
||||
} else {
|
||||
total -= adjust
|
||||
}
|
||||
|
||||
return total, validThrough, nil
|
||||
}
|
||||
@@ -4116,7 +4137,7 @@ func (fs *fileStore) NumPendingMulti(sseq uint64, sl *gsl.SimpleSublist, lastPer
|
||||
var shouldExpire bool
|
||||
var updateLLTS bool
|
||||
// We need to walk this block to correct accounting from above.
|
||||
if sseq > mb.first.seq {
|
||||
if sseq > atomic.LoadUint64(&mb.first.seq) {
|
||||
// Track the ones we add back in case more than one.
|
||||
seen := make(map[string]bool)
|
||||
// We need to discount the total by subjects seen before sseq, but also add them right back in if they are >= sseq for this blk.
|
||||
@@ -4358,7 +4379,11 @@ func (fs *fileStore) NumPendingMulti(sseq uint64, sl *gsl.SimpleSublist, lastPer
|
||||
mb.mu.Unlock()
|
||||
}
|
||||
// Make final adjustment.
|
||||
total -= adjust
|
||||
if adjust > total {
|
||||
total = 0
|
||||
} else {
|
||||
total -= adjust
|
||||
}
|
||||
|
||||
return total, validThrough, nil
|
||||
}
|
||||
@@ -4854,11 +4879,15 @@ func (fs *fileStore) SkipMsgs(seq uint64, num uint64) error {
|
||||
const maxDeletes = 64 * 1024
|
||||
mb := fs.lmb
|
||||
|
||||
var msgs uint64
|
||||
numDeletes := int(num)
|
||||
if mb != nil {
|
||||
mb.mu.RLock()
|
||||
numDeletes += mb.dmap.Size()
|
||||
msgs = mb.msgs
|
||||
mb.mu.RUnlock()
|
||||
}
|
||||
if mb == nil || numDeletes > maxDeletes && mb.msgs > 0 || mb.msgs > 0 && mb.blkSize()+emptyRecordLen > fs.fcfg.BlockSize {
|
||||
if mb == nil || numDeletes > maxDeletes && msgs > 0 || msgs > 0 && mb.blkSize()+emptyRecordLen > fs.fcfg.BlockSize {
|
||||
var err error
|
||||
if mb, err = fs.newMsgBlockForWrite(); err != nil {
|
||||
return err
|
||||
@@ -4969,18 +4998,18 @@ func (fs *fileStore) firstSeqForSubj(subj string) (uint64, error) {
|
||||
|
||||
bsubj := stringToBytes(subj)
|
||||
if ss, ok := mb.fss.Find(bsubj); ok && ss != nil {
|
||||
// Adjust first if it was not where we thought it should be.
|
||||
if i != start {
|
||||
if info, ok := fs.psim.Find(bsubj); ok {
|
||||
info.fblk = i
|
||||
}
|
||||
}
|
||||
if ss.firstNeedsUpdate || ss.lastNeedsUpdate {
|
||||
mb.recalculateForSubj(subj, ss)
|
||||
}
|
||||
mb.mu.Unlock()
|
||||
// Re-acquire fs lock
|
||||
fs.mu.Lock()
|
||||
// Adjust first if it was not where we thought it should be.
|
||||
if i != start {
|
||||
if info, ok := fs.psim.Find(bsubj); ok {
|
||||
info.fblk = i
|
||||
}
|
||||
}
|
||||
return ss.First, nil
|
||||
}
|
||||
// If we did not find it and we loaded this msgBlock try to expire as long as not the last.
|
||||
@@ -5300,18 +5329,15 @@ func (fs *fileStore) removeMsg(seq uint64, secure, viaLimits, needFSLock bool) (
|
||||
}
|
||||
}
|
||||
|
||||
var smv StoreMsg
|
||||
var sm *StoreMsg
|
||||
var err error
|
||||
if secure {
|
||||
// For a secure erase we can't use NoCopy, as eraseMsg will overwrite the
|
||||
// cache and we won't be able to access sm.subj etc anymore later on.
|
||||
sm, err = mb.cacheLookup(seq, &smv)
|
||||
} else {
|
||||
// For a non-secure erase it's fine to use NoCopy, as the cache won't change
|
||||
// from underneath us.
|
||||
sm, err = mb.cacheLookupNoCopy(seq, &smv)
|
||||
}
|
||||
var (
|
||||
smv StoreMsg
|
||||
subj string
|
||||
ts int64
|
||||
lhdr, lmsg int
|
||||
ttl int64
|
||||
)
|
||||
// We don't use a copy as long as that's possible. When unlocking mb or erasing, we'll copy the subject.
|
||||
sm, err := mb.cacheLookupNoCopy(seq, &smv)
|
||||
if err != nil {
|
||||
finishedWithCache()
|
||||
mb.mu.Unlock()
|
||||
@@ -5321,6 +5347,12 @@ func (fs *fileStore) removeMsg(seq uint64, secure, viaLimits, needFSLock bool) (
|
||||
err = nil
|
||||
}
|
||||
return false, err
|
||||
} else if sm != nil {
|
||||
subj = sm.subj
|
||||
ts = sm.ts
|
||||
lhdr = len(sm.hdr)
|
||||
lmsg = len(sm.msg)
|
||||
ttl, _ = getMessageTTL(sm.hdr)
|
||||
}
|
||||
|
||||
// Check if we need to write a deleted record tombstone.
|
||||
@@ -5328,6 +5360,8 @@ func (fs *fileStore) removeMsg(seq uint64, secure, viaLimits, needFSLock bool) (
|
||||
// when the last block is empty.
|
||||
// If not via limits and not empty (empty writes tombstone below if last) write tombstone.
|
||||
if !viaLimits && !isEmpty && sm != nil {
|
||||
// Need to copy the subject since we unlock and re-acquire, and the cache could change.
|
||||
subj = copyString(subj)
|
||||
mb.mu.Unlock() // Only safe way to checkLastBlock is to unlock here...
|
||||
lmb, err := fs.checkLastBlock(emptyRecordLen)
|
||||
if err != nil {
|
||||
@@ -5335,7 +5369,7 @@ func (fs *fileStore) removeMsg(seq uint64, secure, viaLimits, needFSLock bool) (
|
||||
fsUnlock()
|
||||
return false, err
|
||||
}
|
||||
if err := lmb.writeTombstone(sm.seq, sm.ts); err != nil {
|
||||
if err := lmb.writeTombstone(seq, ts); err != nil {
|
||||
finishedWithCache()
|
||||
fsUnlock()
|
||||
return false, err
|
||||
@@ -5344,7 +5378,7 @@ func (fs *fileStore) removeMsg(seq uint64, secure, viaLimits, needFSLock bool) (
|
||||
}
|
||||
|
||||
// Grab size
|
||||
msz := fileStoreMsgSize(sm.subj, sm.hdr, sm.msg)
|
||||
msz := fileStoreMsgSizeRaw(len(subj), lhdr, lmsg)
|
||||
|
||||
// Set cache timestamp for last remove.
|
||||
mb.lrts = ats.AccessTime()
|
||||
@@ -5359,6 +5393,9 @@ func (fs *fileStore) removeMsg(seq uint64, secure, viaLimits, needFSLock bool) (
|
||||
fsUnlock()
|
||||
return false, err
|
||||
}
|
||||
// Need to copy the subject, as eraseMsg will overwrite the cache and we won't
|
||||
// be able to access sm.subj anymore later on.
|
||||
subj = copyString(subj)
|
||||
if err := mb.eraseMsg(seq, int(ri), int(msz), isLastBlock); err != nil {
|
||||
finishedWithCache()
|
||||
mb.mu.Unlock()
|
||||
@@ -5397,13 +5434,11 @@ func (fs *fileStore) removeMsg(seq uint64, secure, viaLimits, needFSLock bool) (
|
||||
mb.ensurePerSubjectInfoLoaded()
|
||||
|
||||
// If we are tracking multiple subjects here make sure we update that accounting.
|
||||
mb.removeSeqPerSubject(sm.subj, seq)
|
||||
fs.removePerSubject(sm.subj)
|
||||
if fs.ttls != nil {
|
||||
if ttl, err := getMessageTTL(sm.hdr); err == nil {
|
||||
expires := time.Duration(sm.ts) + (time.Second * time.Duration(ttl))
|
||||
fs.ttls.Remove(seq, int64(expires))
|
||||
}
|
||||
mb.removeSeqPerSubject(subj, seq)
|
||||
fs.removePerSubject(subj)
|
||||
if fs.ttls != nil && ttl > 0 {
|
||||
expires := time.Duration(ts) + (time.Second * time.Duration(ttl))
|
||||
fs.ttls.Remove(seq, int64(expires))
|
||||
}
|
||||
|
||||
if fifo {
|
||||
@@ -5461,7 +5496,7 @@ func (fs *fileStore) removeMsg(seq uint64, secure, viaLimits, needFSLock bool) (
|
||||
fs.mu.Unlock()
|
||||
// Storage updates.
|
||||
delta := int64(msz)
|
||||
cb(-1, -delta, seq, sm.subj)
|
||||
cb(-1, -delta, seq, subj)
|
||||
|
||||
if !needFSLock {
|
||||
fs.mu.Lock()
|
||||
@@ -5532,11 +5567,16 @@ func (mb *msgBlock) compactWithFloor(floor uint64) error {
|
||||
}
|
||||
hdr := buf[index : index+msgHdrSize]
|
||||
rl, slen := le.Uint32(hdr[0:]), int(le.Uint16(hdr[20:]))
|
||||
hasHeaders := rl&hbit != 0
|
||||
// Clear any headers bit that could be set.
|
||||
rl &^= hbit
|
||||
shlen := slen
|
||||
if hasHeaders {
|
||||
shlen += 4
|
||||
}
|
||||
dlen := int(rl) - msgHdrSize
|
||||
// Do some quick sanity checks here.
|
||||
if dlen < 0 || slen > (dlen-recordHashSize) || dlen > int(rl) || index+rl > lbuf || rl > rlBadThresh {
|
||||
if dlen < 0 || shlen > (dlen-recordHashSize) || dlen > int(rl) || index+rl > lbuf || rl > rlBadThresh {
|
||||
return fmt.Errorf("sanity check failed")
|
||||
}
|
||||
// Only need to process non-deleted messages.
|
||||
@@ -5574,15 +5614,16 @@ func (mb *msgBlock) compactWithFloor(floor uint64) error {
|
||||
|
||||
// Handle compression
|
||||
if mb.cmp != NoCompression && len(nbuf) > 0 {
|
||||
cbuf, err := mb.cmp.Compress(nbuf)
|
||||
if err != nil {
|
||||
originalSize := len(nbuf)
|
||||
var err error
|
||||
if nbuf, err = mb.cmp.Compress(nbuf); err != nil {
|
||||
return err
|
||||
}
|
||||
meta := &CompressionInfo{
|
||||
Algorithm: mb.cmp,
|
||||
OriginalSize: uint64(len(nbuf)),
|
||||
OriginalSize: uint64(originalSize),
|
||||
}
|
||||
nbuf = append(meta.MarshalMetadata(), cbuf...)
|
||||
nbuf = append(meta.MarshalMetadata(), nbuf...)
|
||||
}
|
||||
|
||||
// Check for encryption.
|
||||
@@ -5916,10 +5957,10 @@ func (mb *msgBlock) truncate(tseq uint64, ts int64) (nmsgs, nbytes uint64, err e
|
||||
}
|
||||
}
|
||||
|
||||
// If the block is compressed then we have to load it into memory
|
||||
// and decompress it, truncate it and then write it back out.
|
||||
// If the block is compressed/encrypted then we have to load it into memory
|
||||
// and decompress/decrypt it, truncate it and then write it back out.
|
||||
// Otherwise, truncate the file itself and close the descriptor.
|
||||
if mb.cmp != NoCompression {
|
||||
if mb.cmp != NoCompression || mb.bek != nil {
|
||||
buf, err := mb.loadBlock(nil)
|
||||
if err != nil {
|
||||
return 0, 0, fmt.Errorf("failed to load block from disk: %w", err)
|
||||
@@ -5931,7 +5972,7 @@ func (mb *msgBlock) truncate(tseq uint64, ts int64) (nmsgs, nbytes uint64, err e
|
||||
return 0, 0, fmt.Errorf("failed to decompress block: %w", err)
|
||||
}
|
||||
buf = buf[:eof]
|
||||
copy(mb.lchk[0:], buf[:len(buf)-checksumSize])
|
||||
copy(mb.lchk[0:], buf[len(buf)-checksumSize:])
|
||||
// We did decompress but don't recompress the truncated buffer here since we're the last block
|
||||
// and would otherwise have compressed data and allow to write uncompressed data in the same block.
|
||||
if err = mb.atomicOverwriteFile(buf, false); err != nil {
|
||||
@@ -6974,6 +7015,7 @@ func (mb *msgBlock) atomicOverwriteFile(buf []byte, allowCompress bool) error {
|
||||
// The original buffer at this point is uncompressed, so we will now compress
|
||||
// it if needed. Note that if the selected algorithm is NoCompression, the
|
||||
// Compress function will just return the input buffer unmodified.
|
||||
originalSize := len(buf)
|
||||
if buf, err = alg.Compress(buf); err != nil {
|
||||
return errorCleanup(fmt.Errorf("failed to compress block: %w", err))
|
||||
}
|
||||
@@ -6983,7 +7025,7 @@ func (mb *msgBlock) atomicOverwriteFile(buf []byte, allowCompress bool) error {
|
||||
// writing metadata.
|
||||
meta := &CompressionInfo{
|
||||
Algorithm: alg,
|
||||
OriginalSize: uint64(len(buf)),
|
||||
OriginalSize: uint64(originalSize),
|
||||
}
|
||||
buf = append(meta.MarshalMetadata(), buf...)
|
||||
}
|
||||
@@ -7331,7 +7373,7 @@ func (mb *msgBlock) indexCacheBuf(buf []byte) error {
|
||||
|
||||
lbuf := uint32(len(buf))
|
||||
var seq, ttls, schedules uint64
|
||||
var sm StoreMsg // Used for finding TTL headers
|
||||
var sm StoreMsg // Used for finding headers
|
||||
|
||||
// To ensure the sequence keeps moving up. As well as confirming our index
|
||||
// is aligned with the mb's first and last sequence.
|
||||
@@ -7346,13 +7388,16 @@ func (mb *msgBlock) indexCacheBuf(buf []byte) error {
|
||||
rl, slen := le.Uint32(hdr[0:]), int(le.Uint16(hdr[20:]))
|
||||
seq = le.Uint64(hdr[4:])
|
||||
|
||||
// Clear any headers bit that could be set.
|
||||
hasHeaders := rl&hbit != 0
|
||||
// Clear any headers bit that could be set.
|
||||
rl &^= hbit
|
||||
shlen := slen
|
||||
if hasHeaders {
|
||||
shlen += 4
|
||||
}
|
||||
dlen := int(rl) - msgHdrSize
|
||||
|
||||
// Do some quick sanity checks here.
|
||||
if dlen < 0 || slen > (dlen-recordHashSize) || dlen > int(rl) || index+rl > lbuf || rl > rlBadThresh {
|
||||
if dlen < 0 || shlen > (dlen-recordHashSize) || dlen > int(rl) || index+rl > lbuf || rl > rlBadThresh {
|
||||
mb.fs.warn("indexCacheBuf corrupt record state in %s: dlen %d slen %d index %d rl %d lbuf %d", mb.mfn, dlen, slen, index, rl, lbuf)
|
||||
// This means something is off.
|
||||
// TODO(dlc) - Add into bad list?
|
||||
@@ -8089,8 +8134,13 @@ func (mb *msgBlock) msgFromBufEx(buf []byte, sm *StoreMsg, hh *highwayhash.Diges
|
||||
rl &^= hbit // clear header bit
|
||||
dlen := int(rl) - msgHdrSize
|
||||
slen := int(le.Uint16(hdr[20:]))
|
||||
|
||||
shlen := slen
|
||||
if hasHeaders {
|
||||
shlen += 4
|
||||
}
|
||||
// Simple sanity check.
|
||||
if dlen < 0 || slen > (dlen-recordHashSize) || dlen > int(rl) || int(rl) > len(buf) || rl > rlBadThresh {
|
||||
if dlen < 0 || shlen > (dlen-recordHashSize) || dlen > int(rl) || int(rl) > len(buf) || rl > rlBadThresh {
|
||||
return nil, errBadMsg{mb.mfn, fmt.Sprintf("sanity check failed (dlen %d slen %d rl %d buf %d)", dlen, slen, rl, buf)}
|
||||
}
|
||||
data := buf[msgHdrSize : msgHdrSize+dlen]
|
||||
@@ -8192,7 +8242,8 @@ func (fs *fileStore) SubjectForSeq(seq uint64) (string, error) {
|
||||
fs.mu.RUnlock()
|
||||
if mb != nil {
|
||||
if sm, _, _ := mb.fetchMsgNoCopy(seq, &smv); sm != nil {
|
||||
return sm.subj, nil
|
||||
// Copy the subject, as it's used elsewhere, and the backing cache could be reused in the meantime.
|
||||
return copyString(sm.subj), nil
|
||||
}
|
||||
}
|
||||
return _EMPTY_, ErrStoreMsgNotFound
|
||||
@@ -8265,6 +8316,11 @@ func (fs *fileStore) loadLast(subj string, sm *StoreMsg) (lsm *StoreMsg, err err
|
||||
// Optimize if subject is not a wildcard.
|
||||
if !wc {
|
||||
if ss, ok := mb.fss.Find(stringToBytes(subj)); ok && ss != nil {
|
||||
// Check if we need to recalculate. We only care about the last sequence.
|
||||
if ss.lastNeedsUpdate {
|
||||
// mb is already loaded into the cache so should be fast-ish.
|
||||
mb.recalculateForSubj(subj, ss)
|
||||
}
|
||||
l = ss.Last
|
||||
}
|
||||
}
|
||||
@@ -8824,7 +8880,9 @@ func (fs *fileStore) cacheLoads() uint64 {
|
||||
var tl uint64
|
||||
fs.mu.RLock()
|
||||
for _, mb := range fs.blks {
|
||||
mb.mu.RLock()
|
||||
tl += mb.cloads
|
||||
mb.mu.RUnlock()
|
||||
}
|
||||
fs.mu.RUnlock()
|
||||
return tl
|
||||
@@ -8835,7 +8893,7 @@ func (fs *fileStore) cacheSize() uint64 {
|
||||
var sz uint64
|
||||
fs.mu.RLock()
|
||||
for _, mb := range fs.blks {
|
||||
mb.mu.RLock()
|
||||
mb.mu.Lock()
|
||||
var needsCleanup bool
|
||||
if mb.cache == nil {
|
||||
mb.cache = mb.ecache.Value()
|
||||
@@ -8847,7 +8905,7 @@ func (fs *fileStore) cacheSize() uint64 {
|
||||
if needsCleanup {
|
||||
mb.finishedWithCache()
|
||||
}
|
||||
mb.mu.RUnlock()
|
||||
mb.mu.Unlock()
|
||||
}
|
||||
fs.mu.RUnlock()
|
||||
return sz
|
||||
@@ -8946,6 +9004,7 @@ func (fs *fileStore) PurgeEx(subject string, sequence, keep uint64) (purged uint
|
||||
if mb.cacheNotLoaded() {
|
||||
if err := mb.loadMsgsWithLock(); err != nil {
|
||||
mb.mu.Unlock()
|
||||
fs.mu.Unlock()
|
||||
return 0, err
|
||||
}
|
||||
shouldExpire = true
|
||||
@@ -9049,6 +9108,7 @@ func (fs *fileStore) PurgeEx(subject string, sequence, keep uint64) (purged uint
|
||||
if len(tombs) > 0 {
|
||||
for _, tomb := range tombs {
|
||||
if err := fs.writeTombstoneNoFlush(tomb.seq, tomb.ts); err != nil {
|
||||
fs.mu.Unlock()
|
||||
return purged, err
|
||||
}
|
||||
}
|
||||
@@ -9368,6 +9428,7 @@ SKIP:
|
||||
if len(tombs) > 0 {
|
||||
for _, tomb := range tombs {
|
||||
if err := fs.writeTombstoneNoFlush(tomb.seq, tomb.ts); err != nil {
|
||||
fs.mu.Unlock()
|
||||
return purged, err
|
||||
}
|
||||
}
|
||||
@@ -9558,6 +9619,7 @@ func (fs *fileStore) Truncate(seq uint64) error {
|
||||
// If we end up not needing to write tombstones, this block will be cleaned up at the end.
|
||||
tmb, err := fs.newMsgBlockForWrite()
|
||||
if err != nil {
|
||||
fs.mu.Unlock()
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -10159,6 +10221,29 @@ func (fs *fileStore) populateGlobalPerSubjectInfo(mb *msgBlock) {
|
||||
})
|
||||
}
|
||||
|
||||
// Calls os.RemoveAll on the given `dir` directory, but if an error occurs,
|
||||
// retries up to one second. If that still fails, returns the last error
|
||||
// that os.RemoveAll returned.
|
||||
func removeAllWithRetry(dir string) error {
|
||||
<-dios
|
||||
err := os.RemoveAll(dir)
|
||||
dios <- struct{}{}
|
||||
if err == nil {
|
||||
return nil
|
||||
}
|
||||
ttl := time.Now().Add(time.Second)
|
||||
for time.Now().Before(ttl) {
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
<-dios
|
||||
err = os.RemoveAll(dir)
|
||||
dios <- struct{}{}
|
||||
if err == nil {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// Close the message block.
|
||||
func (mb *msgBlock) close(sync bool) {
|
||||
if mb == nil {
|
||||
@@ -10261,28 +10346,12 @@ func (fs *fileStore) Delete(inline bool) error {
|
||||
}
|
||||
// Do this in separate Go routine in case lots of blocks.
|
||||
// Purge above protects us as does the removal of meta artifacts above.
|
||||
removeDir := func() {
|
||||
<-dios
|
||||
err := os.RemoveAll(ndir)
|
||||
dios <- struct{}{}
|
||||
if err == nil {
|
||||
return
|
||||
}
|
||||
ttl := time.Now().Add(time.Second)
|
||||
for time.Now().Before(ttl) {
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
<-dios
|
||||
err = os.RemoveAll(ndir)
|
||||
dios <- struct{}{}
|
||||
if err == nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
if inline {
|
||||
removeDir()
|
||||
if err := removeAllWithRetry(ndir); err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
go removeDir()
|
||||
go removeAllWithRetry(ndir)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -10837,18 +10906,18 @@ func (fs *fileStore) streamSnapshot(w io.WriteCloser, includeConsumers bool, err
|
||||
const minLen = 32
|
||||
sfn := filepath.Join(fs.fcfg.StoreDir, msgDir, streamStreamStateFile)
|
||||
if buf, err := os.ReadFile(sfn); err == nil && len(buf) >= minLen {
|
||||
fs.mu.Lock()
|
||||
if fs.aek != nil {
|
||||
ns := fs.aek.NonceSize()
|
||||
buf, err = fs.aek.Open(nil, buf[:ns], buf[ns:len(buf)-highwayhash.Size64], nil)
|
||||
if err == nil {
|
||||
// Redo hash checksum at end on plaintext.
|
||||
fs.mu.Lock()
|
||||
hh.Reset()
|
||||
hh.Write(buf)
|
||||
buf = fs.hh.Sum(buf)
|
||||
fs.mu.Unlock()
|
||||
}
|
||||
}
|
||||
fs.mu.Unlock()
|
||||
if err == nil && writeFile(msgPre+streamStreamStateFile, buf) != nil {
|
||||
return
|
||||
}
|
||||
@@ -11639,7 +11708,7 @@ func (o *consumerFileStore) Update(state *ConsumerState) error {
|
||||
|
||||
// Check to see if this is an outdated update.
|
||||
if state.Delivered.Consumer < o.state.Delivered.Consumer || state.AckFloor.Stream < o.state.AckFloor.Stream {
|
||||
return fmt.Errorf("old update ignored")
|
||||
return ErrStoreOldUpdate
|
||||
}
|
||||
|
||||
o.state.Delivered = state.Delivered
|
||||
@@ -11703,6 +11772,7 @@ func (o *consumerFileStore) writeState(buf []byte) error {
|
||||
if o.aek != nil {
|
||||
var err error
|
||||
if buf, err = o.encryptState(buf); err != nil {
|
||||
o.mu.Unlock()
|
||||
return err
|
||||
}
|
||||
}
|
||||
@@ -12068,6 +12138,7 @@ func (o *consumerFileStore) Stop() error {
|
||||
if buf, err = o.encodeState(); err == nil && len(buf) > 0 {
|
||||
if o.aek != nil {
|
||||
if buf, err = o.encryptState(buf); err != nil {
|
||||
o.mu.Unlock()
|
||||
return err
|
||||
}
|
||||
}
|
||||
@@ -12122,7 +12193,6 @@ func (o *consumerFileStore) delete(streamDeleted bool) error {
|
||||
o.qch = nil
|
||||
}
|
||||
|
||||
var err error
|
||||
odir := o.odir
|
||||
o.odir = _EMPTY_
|
||||
o.closed = true
|
||||
@@ -12131,16 +12201,18 @@ func (o *consumerFileStore) delete(streamDeleted bool) error {
|
||||
|
||||
// If our stream was not deleted this will remove the directories.
|
||||
if odir != _EMPTY_ && !streamDeleted {
|
||||
<-dios
|
||||
err = os.RemoveAll(odir)
|
||||
dios <- struct{}{}
|
||||
if err := removeAllWithRetry(odir); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if !streamDeleted {
|
||||
fs.RemoveConsumer(o)
|
||||
if err := fs.RemoveConsumer(o); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return err
|
||||
return nil
|
||||
}
|
||||
|
||||
func (fs *fileStore) AddConsumer(o ConsumerStore) error {
|
||||
|
||||
+4
-8
@@ -1563,7 +1563,7 @@ func (a *Account) EnableJetStream(limits map[string]JetStreamAccountLimits, tq c
|
||||
}
|
||||
|
||||
// Add in the stream.
|
||||
mset, err := a.addStream(&cfg.StreamConfig)
|
||||
mset, err := a.recoverStream(&cfg.StreamConfig)
|
||||
if err != nil {
|
||||
s.Warnf(" Error recreating stream %q: %v", cfg.Name, err)
|
||||
// If we removed a keyfile from above make sure to put it back.
|
||||
@@ -2365,8 +2365,10 @@ func tierName(replicas int) string {
|
||||
}
|
||||
|
||||
func isSameTier(cfgA, cfgB *StreamConfig) bool {
|
||||
a := max(1, cfgA.Replicas)
|
||||
b := max(1, cfgB.Replicas)
|
||||
// TODO (mh) this is where we could select based off a placement tag as well "qos:tier"
|
||||
return cfgA.Replicas == cfgB.Replicas
|
||||
return a == b
|
||||
}
|
||||
|
||||
func (jsa *jsAccount) jetStreamAndClustered() (*jetStream, bool) {
|
||||
@@ -2441,17 +2443,11 @@ func (jsa *jsAccount) wouldExceedLimits(storeType StorageType, tierName string,
|
||||
// Since tiers are flat we need to scale limit up by replicas when checking.
|
||||
if storeType == MemoryStorage {
|
||||
totalMem := inUse.total.mem + (int64(memStoreMsgSize(subj, hdr, msg)) * r)
|
||||
if selectedLimits.MemoryMaxStreamBytes > 0 && totalMem > selectedLimits.MemoryMaxStreamBytes*lr {
|
||||
return true, nil
|
||||
}
|
||||
if selectedLimits.MaxMemory >= 0 && totalMem > selectedLimits.MaxMemory*lr {
|
||||
return true, nil
|
||||
}
|
||||
} else {
|
||||
totalStore := inUse.total.store + (int64(fileStoreMsgSize(subj, hdr, msg)) * r)
|
||||
if selectedLimits.StoreMaxStreamBytes > 0 && totalStore > selectedLimits.StoreMaxStreamBytes*lr {
|
||||
return true, nil
|
||||
}
|
||||
if selectedLimits.MaxStore >= 0 && totalStore > selectedLimits.MaxStore*lr {
|
||||
return true, nil
|
||||
}
|
||||
|
||||
+81
-77
@@ -24,7 +24,6 @@ import (
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"slices"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
@@ -563,9 +562,17 @@ type JSApiStreamSnapshotRequest struct {
|
||||
DeliverSubject string `json:"deliver_subject"`
|
||||
// Do not include consumers in the snapshot.
|
||||
NoConsumers bool `json:"no_consumers,omitempty"`
|
||||
// Optional chunk size preference.
|
||||
// Best to just let server select.
|
||||
// Optional chunk size preference. Defaults to 128KB,
|
||||
// automatically clamped to within the range 1KB to 1MB.
|
||||
// A smaller chunk size means more in-flight messages
|
||||
// and more acks needed. Links with good throughput
|
||||
// but high latency may need to increase this.
|
||||
ChunkSize int `json:"chunk_size,omitempty"`
|
||||
// Optional window size preference. Defaults to 8MB,
|
||||
// automatically clamped to within the range 1KB to 32MB.
|
||||
// very slow connections may need to reduce this to
|
||||
// avoid slow consumer issues.
|
||||
WindowSize int `json:"window_size,omitempty"`
|
||||
// Check all message's checksums prior to snapshot.
|
||||
CheckMsgs bool `json:"jsck,omitempty"`
|
||||
}
|
||||
@@ -1578,23 +1585,19 @@ func (s *Server) jsonResponse(v any) string {
|
||||
|
||||
// Read lock must be held
|
||||
func (jsa *jsAccount) tieredReservation(tier string, cfg *StreamConfig) int64 {
|
||||
reservation := int64(0)
|
||||
if tier == _EMPTY_ {
|
||||
for _, sa := range jsa.streams {
|
||||
if sa.cfg.MaxBytes > 0 {
|
||||
if sa.cfg.Storage == cfg.Storage && sa.cfg.Name != cfg.Name {
|
||||
reservation += (int64(sa.cfg.Replicas) * sa.cfg.MaxBytes)
|
||||
}
|
||||
}
|
||||
var reservation int64
|
||||
for _, sa := range jsa.streams {
|
||||
// Don't count the stream toward the limit if it already exists.
|
||||
if sa.cfg.Name == cfg.Name {
|
||||
continue
|
||||
}
|
||||
} else {
|
||||
for _, sa := range jsa.streams {
|
||||
if sa.cfg.Replicas == cfg.Replicas {
|
||||
if sa.cfg.MaxBytes > 0 {
|
||||
if isSameTier(&sa.cfg, cfg) && sa.cfg.Name != cfg.Name {
|
||||
reservation += (int64(sa.cfg.Replicas) * sa.cfg.MaxBytes)
|
||||
}
|
||||
}
|
||||
if (tier == _EMPTY_ || isSameTier(&sa.cfg, cfg)) && sa.cfg.MaxBytes > 0 && sa.cfg.Storage == cfg.Storage {
|
||||
// If tier is empty, all storage is flat and we should adjust for replicas.
|
||||
// Otherwise if tiered, storage replication already taken into consideration.
|
||||
if tier == _EMPTY_ && sa.cfg.Replicas > 1 {
|
||||
reservation += (int64(sa.cfg.Replicas) * sa.cfg.MaxBytes)
|
||||
} else {
|
||||
reservation += sa.cfg.MaxBytes
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -2576,7 +2579,7 @@ func (s *Server) jsStreamRemovePeerRequest(sub *subscription, c *client, _ *Acco
|
||||
}
|
||||
|
||||
js.mu.RLock()
|
||||
isLeader, sa := cc.isLeader(), js.streamAssignment(acc.Name, name)
|
||||
isLeader, sa := cc.isLeader(), js.streamAssignmentOrInflight(acc.Name, name)
|
||||
js.mu.RUnlock()
|
||||
|
||||
// Make sure we are meta leader.
|
||||
@@ -3135,22 +3138,23 @@ func (s *Server) jsLeaderAccountPurgeRequest(sub *subscription, c *client, _ *Ac
|
||||
return
|
||||
}
|
||||
|
||||
js.mu.RLock()
|
||||
js.mu.Lock()
|
||||
ns, nc := 0, 0
|
||||
streams, hasAccount := cc.streams[accName]
|
||||
for _, osa := range streams {
|
||||
for _, oca := range osa.consumers {
|
||||
oca.deleted = true
|
||||
for osa := range js.streamAssignmentsOrInflightSeq(accName) {
|
||||
for oca := range js.consumerAssignmentsOrInflightSeq(accName, osa.Config.Name) {
|
||||
ca := &consumerAssignment{Group: oca.Group, Stream: oca.Stream, Name: oca.Name, Config: oca.Config, Subject: subject, Client: oca.Client, Created: oca.Created}
|
||||
meta.Propose(encodeDeleteConsumerAssignment(ca))
|
||||
cc.trackInflightConsumerProposal(accName, osa.Config.Name, ca, true)
|
||||
nc++
|
||||
}
|
||||
sa := &streamAssignment{Group: osa.Group, Config: osa.Config, Subject: subject, Client: osa.Client, Created: osa.Created}
|
||||
meta.Propose(encodeDeleteStreamAssignment(sa))
|
||||
cc.trackInflightStreamProposal(accName, sa, true)
|
||||
ns++
|
||||
}
|
||||
js.mu.RUnlock()
|
||||
js.mu.Unlock()
|
||||
|
||||
hasAccount := ns > 0
|
||||
s.Noticef("Purge request for account %s (streams: %d, consumer: %d, hasAccount: %t)", accName, ns, nc, hasAccount)
|
||||
|
||||
resp.Initiated = true
|
||||
@@ -3830,9 +3834,10 @@ func (s *Server) jsConsumerUnpinRequest(sub *subscription, c *client, _ *Account
|
||||
}
|
||||
|
||||
o.mu.Lock()
|
||||
o.currentPinId = _EMPTY_
|
||||
o.unassignPinId()
|
||||
o.sendUnpinnedAdvisoryLocked(req.Group, "admin")
|
||||
o.mu.Unlock()
|
||||
o.signalNewMessages()
|
||||
s.sendAPIResponse(ci, acc, subject, reply, string(msg), s.jsonResponse(resp))
|
||||
}
|
||||
|
||||
@@ -4398,20 +4403,36 @@ func (s *Server) jsStreamSnapshotRequest(sub *subscription, c *client, _ *Accoun
|
||||
}
|
||||
|
||||
// Default chunk size for now.
|
||||
const defaultSnapshotChunkSize = 128 * 1024
|
||||
const defaultSnapshotWindowSize = 8 * 1024 * 1024 // 8MB
|
||||
const defaultSnapshotChunkSize = 128 * 1024 // 128KiB
|
||||
const defaultSnapshotWindowSize = 8 * 1024 * 1024 // 8MiB
|
||||
const defaultSnapshotAckTimeout = 5 * time.Second
|
||||
|
||||
var snapshotAckTimeout = defaultSnapshotAckTimeout
|
||||
|
||||
// streamSnapshot will stream out our snapshot to the reply subject.
|
||||
func (s *Server) streamSnapshot(acc *Account, mset *stream, sr *SnapshotResult, req *JSApiStreamSnapshotRequest) {
|
||||
chunkSize := req.ChunkSize
|
||||
chunkSize, wndSize := req.ChunkSize, req.WindowSize
|
||||
if chunkSize == 0 {
|
||||
chunkSize = defaultSnapshotChunkSize
|
||||
}
|
||||
if wndSize == 0 {
|
||||
wndSize = defaultSnapshotWindowSize
|
||||
}
|
||||
chunkSize = min(max(1024, chunkSize), 1024*1024) // Clamp within 1KiB to 1MiB
|
||||
wndSize = min(max(1024, wndSize), 32*1024*1024) // Clamp within 1KiB to 32MiB
|
||||
wndSize = max(wndSize, chunkSize) // Guarantee at least one chunk
|
||||
maxInflight := wndSize / chunkSize // Between 1 and 32,768
|
||||
|
||||
// Setup for the chunk stream.
|
||||
reply := req.DeliverSubject
|
||||
r := sr.Reader
|
||||
defer r.Close()
|
||||
|
||||
// In case we run into an error, this allows subscription callbacks
|
||||
// to not sit and block endlessly.
|
||||
done := make(chan struct{})
|
||||
defer close(done)
|
||||
|
||||
// Check interest for the snapshot deliver subject.
|
||||
inch := make(chan bool, 1)
|
||||
acc.sl.RegisterNotification(req.DeliverSubject, inch)
|
||||
@@ -4425,78 +4446,59 @@ func (s *Server) streamSnapshot(acc *Account, mset *stream, sr *SnapshotResult,
|
||||
}
|
||||
}
|
||||
|
||||
// Create our ack flow handler.
|
||||
// This is very simple for now.
|
||||
ackSize := defaultSnapshotWindowSize / chunkSize
|
||||
if ackSize < 8 {
|
||||
ackSize = 8
|
||||
} else if ackSize > 8*1024 {
|
||||
ackSize = 8 * 1024
|
||||
// One slot per chunk. Each chunk read takes a slot, each ack will
|
||||
// replace it. Smooths out in-flight number of chunks.
|
||||
slots := make(chan struct{}, maxInflight)
|
||||
for range maxInflight {
|
||||
slots <- struct{}{}
|
||||
}
|
||||
acks := make(chan struct{}, ackSize)
|
||||
acks <- struct{}{}
|
||||
|
||||
// Track bytes outstanding.
|
||||
var out int32
|
||||
|
||||
// We will place sequence number and size of chunk sent in the reply.
|
||||
ackSubj := fmt.Sprintf(jsSnapshotAckT, mset.name(), nuid.Next())
|
||||
ackSub, _ := mset.subscribeInternal(ackSubj+".>", func(_ *subscription, _ *client, _ *Account, subject, _ string, _ []byte) {
|
||||
cs, _ := strconv.Atoi(tokenAt(subject, 6))
|
||||
// This is very crude and simple, but ok for now.
|
||||
// This only matters when sending multiple chunks.
|
||||
if atomic.AddInt32(&out, int32(-cs)) < defaultSnapshotWindowSize {
|
||||
select {
|
||||
case acks <- struct{}{}:
|
||||
default:
|
||||
}
|
||||
select {
|
||||
case slots <- struct{}{}:
|
||||
case <-done:
|
||||
}
|
||||
})
|
||||
defer mset.unsubscribe(ackSub)
|
||||
|
||||
// TODO(dlc) - Add in NATS-Chunked-Sequence header
|
||||
var hdr []byte
|
||||
chunk := make([]byte, chunkSize)
|
||||
for index := 1; ; index++ {
|
||||
chunk := make([]byte, chunkSize)
|
||||
n, err := r.Read(chunk)
|
||||
chunk = chunk[:n]
|
||||
select {
|
||||
case <-slots:
|
||||
// A slot has become available.
|
||||
case <-inch:
|
||||
// The receiver appears to have gone away.
|
||||
hdr = []byte("NATS/1.0 408 No Interest\r\n\r\n")
|
||||
goto done
|
||||
case err := <-sr.errCh:
|
||||
// The snapshotting goroutine has failed for some reason.
|
||||
hdr = []byte(fmt.Sprintf("NATS/1.0 500 %s\r\n\r\n", err))
|
||||
goto done
|
||||
case <-time.After(snapshotAckTimeout):
|
||||
// It's taking a very long time for the receiver to send us acks,
|
||||
// they have probably stalled or there is high loss on the link.
|
||||
hdr = []byte("NATS/1.0 408 No Flow Response\r\n\r\n")
|
||||
goto done
|
||||
}
|
||||
n, err := io.ReadFull(r, chunk)
|
||||
chunk := chunk[:n]
|
||||
if err != nil {
|
||||
if n > 0 {
|
||||
mset.outq.send(newJSPubMsg(reply, _EMPTY_, _EMPTY_, nil, chunk, nil, 0))
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
// Wait on acks for flow control if past our window size.
|
||||
// Wait up to 10ms for now if no acks received.
|
||||
if atomic.LoadInt32(&out) > defaultSnapshotWindowSize {
|
||||
select {
|
||||
case <-acks:
|
||||
// ok to proceed.
|
||||
case <-inch:
|
||||
// Lost interest
|
||||
hdr = []byte("NATS/1.0 408 No Interest\r\n\r\n")
|
||||
goto done
|
||||
case <-time.After(2 * time.Second):
|
||||
hdr = []byte("NATS/1.0 408 No Flow Response\r\n\r\n")
|
||||
goto done
|
||||
}
|
||||
}
|
||||
ackReply := fmt.Sprintf("%s.%d.%d", ackSubj, len(chunk), index)
|
||||
if hdr == nil {
|
||||
hdr = []byte("NATS/1.0 204\r\n\r\n")
|
||||
}
|
||||
mset.outq.send(newJSPubMsg(reply, _EMPTY_, ackReply, nil, chunk, nil, 0))
|
||||
atomic.AddInt32(&out, int32(len(chunk)))
|
||||
}
|
||||
|
||||
if err := <-sr.errCh; err != _EMPTY_ {
|
||||
hdr = []byte(fmt.Sprintf("NATS/1.0 500 %s\r\n\r\n", err))
|
||||
}
|
||||
|
||||
done:
|
||||
// Send last EOF
|
||||
// TODO(dlc) - place hash in header
|
||||
mset.outq.send(newJSPubMsg(reply, _EMPTY_, _EMPTY_, hdr, nil, nil, 0))
|
||||
}
|
||||
|
||||
@@ -4639,6 +4641,8 @@ func (s *Server) jsConsumerCreateRequest(sub *subscription, c *client, a *Accoun
|
||||
s.sendAPIErrResponse(ci, acc, subject, reply, string(msg), s.jsonResponse(&resp))
|
||||
return
|
||||
}
|
||||
// Durable, so we need to honor the name.
|
||||
req.Config.Name = consumerName
|
||||
}
|
||||
// If new style and durable set make sure they match.
|
||||
if rt == ccNew {
|
||||
|
||||
+999
-441
File diff suppressed because it is too large
Load Diff
+1
-1
@@ -17,7 +17,7 @@ import "strconv"
|
||||
|
||||
const (
|
||||
// JSApiLevel is the maximum supported JetStream API level for this server.
|
||||
JSApiLevel int = 2
|
||||
JSApiLevel int = 3
|
||||
|
||||
JSRequiredLevelMetadataKey = "_nats.req.level"
|
||||
JSServerVersionMetadataKey = "_nats.ver"
|
||||
|
||||
+19
-5
@@ -2760,6 +2760,14 @@ func (c *client) processLeafSub(argo []byte) (err error) {
|
||||
}
|
||||
|
||||
acc := c.acc
|
||||
// Guard against LS+ arriving before CONNECT has been processed, which
|
||||
// can happen when compression is enabled.
|
||||
if acc == nil {
|
||||
c.mu.Unlock()
|
||||
c.sendErr("Authorization Violation")
|
||||
c.closeConnection(ProtocolViolation)
|
||||
return nil
|
||||
}
|
||||
// Check if we have a loop.
|
||||
ldsPrefix := bytes.HasPrefix(sub.subject, []byte(leafNodeLoopDetectionSubjectPrefix))
|
||||
|
||||
@@ -2876,7 +2884,6 @@ func (c *client) processLeafUnsub(arg []byte) error {
|
||||
// Indicate any activity, so pub and sub or unsubs.
|
||||
c.in.subs++
|
||||
|
||||
acc := c.acc
|
||||
srv := c.srv
|
||||
|
||||
c.mu.Lock()
|
||||
@@ -2885,6 +2892,15 @@ func (c *client) processLeafUnsub(arg []byte) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
acc := c.acc
|
||||
// Guard against LS- arriving before CONNECT has been processed.
|
||||
if acc == nil {
|
||||
c.mu.Unlock()
|
||||
c.sendErr("Authorization Violation")
|
||||
c.closeConnection(ProtocolViolation)
|
||||
return nil
|
||||
}
|
||||
|
||||
spoke := c.isSpokeLeafNode()
|
||||
// We store local subs by account and subject and optionally queue name.
|
||||
// LS- will have the arg exactly as the key.
|
||||
@@ -2916,8 +2932,7 @@ func (c *client) processLeafUnsub(arg []byte) error {
|
||||
|
||||
func (c *client) processLeafHeaderMsgArgs(arg []byte) error {
|
||||
// Unroll splitArgs to avoid runtime/heap issues
|
||||
a := [MAX_MSG_ARGS][]byte{}
|
||||
args := a[:0]
|
||||
args := c.argsa[:0]
|
||||
start := -1
|
||||
for i, b := range arg {
|
||||
switch b {
|
||||
@@ -3000,8 +3015,7 @@ func (c *client) processLeafHeaderMsgArgs(arg []byte) error {
|
||||
|
||||
func (c *client) processLeafMsgArgs(arg []byte) error {
|
||||
// Unroll splitArgs to avoid runtime/heap issues
|
||||
a := [MAX_MSG_ARGS][]byte{}
|
||||
args := a[:0]
|
||||
args := c.argsa[:0]
|
||||
start := -1
|
||||
for i, b := range arg {
|
||||
switch b {
|
||||
|
||||
+100
-37
@@ -786,6 +786,10 @@ func (ms *memStore) allLastSeqsLocked() ([]uint64, error) {
|
||||
|
||||
seqs := make([]uint64, 0, ms.fss.Size())
|
||||
ms.fss.IterFast(func(subj []byte, ss *SimpleState) bool {
|
||||
// Check if we need to recalculate. We only care about the last sequence.
|
||||
if ss.lastNeedsUpdate {
|
||||
ms.recalculateForSubj(bytesToString(subj), ss)
|
||||
}
|
||||
seqs = append(seqs, ss.Last)
|
||||
return true
|
||||
})
|
||||
@@ -803,6 +807,7 @@ func (ms *memStore) filterIsAll(filters []string) bool {
|
||||
}
|
||||
// Sort so we can compare.
|
||||
slices.Sort(filters)
|
||||
slices.Sort(ms.cfg.Subjects)
|
||||
for i, subj := range filters {
|
||||
if !subjectIsSubsetMatch(ms.cfg.Subjects[i], subj) {
|
||||
return false
|
||||
@@ -814,8 +819,8 @@ func (ms *memStore) filterIsAll(filters []string) bool {
|
||||
// MultiLastSeqs will return a sorted list of sequences that match all subjects presented in filters.
|
||||
// We will not exceed the maxSeq, which if 0 becomes the store's last sequence.
|
||||
func (ms *memStore) MultiLastSeqs(filters []string, maxSeq uint64, maxAllowed int) ([]uint64, error) {
|
||||
ms.mu.RLock()
|
||||
defer ms.mu.RUnlock()
|
||||
ms.mu.Lock()
|
||||
defer ms.mu.Unlock()
|
||||
|
||||
if len(ms.msgs) == 0 {
|
||||
return nil, nil
|
||||
@@ -843,6 +848,9 @@ func (ms *memStore) MultiLastSeqs(filters []string, maxSeq uint64, maxAllowed in
|
||||
|
||||
for _, filter := range filters {
|
||||
ms.fss.Match(stringToBytes(filter), func(subj []byte, ss *SimpleState) {
|
||||
if ss.lastNeedsUpdate {
|
||||
ms.recalculateForSubj(bytesToString(subj), ss)
|
||||
}
|
||||
if ss.Last <= maxSeq {
|
||||
addIfNotDupe(ss.Last)
|
||||
} else if ss.Msgs > 1 {
|
||||
@@ -1664,7 +1672,8 @@ func (ms *memStore) SubjectForSeq(seq uint64) (string, error) {
|
||||
return _EMPTY_, ErrStoreMsgNotFound
|
||||
}
|
||||
if sm, ok := ms.msgs[seq]; ok {
|
||||
return sm.subj, nil
|
||||
// Copy the subject, as it's used elsewhere, and we've released the lock in the meantime.
|
||||
return copyString(sm.subj), nil
|
||||
}
|
||||
return _EMPTY_, ErrStoreMsgNotFound
|
||||
}
|
||||
@@ -1716,6 +1725,10 @@ func (ms *memStore) LoadLastMsg(subject string, smp *StoreMsg) (*StoreMsg, error
|
||||
} else if subjectIsLiteral(subject) {
|
||||
var ss *SimpleState
|
||||
if ss, ok = ms.fss.Find(stringToBytes(subject)); ok && ss.Msgs > 0 {
|
||||
// Check if we need to recalculate. We only care about the last sequence.
|
||||
if ss.lastNeedsUpdate {
|
||||
ms.recalculateForSubj(subject, ss)
|
||||
}
|
||||
sm, ok = ms.msgs[ss.Last]
|
||||
}
|
||||
} else if ss := ms.filteredStateLocked(1, subject, true); ss.Msgs > 0 {
|
||||
@@ -1774,6 +1787,78 @@ func (ms *memStore) LoadNextMsg(filter string, wc bool, start uint64, smp *Store
|
||||
return ms.loadNextMsgLocked(filter, wc, start, smp)
|
||||
}
|
||||
|
||||
// Find sequence bounds matching a wildcard filter from ms.fss.
|
||||
// Returns (first, last, true) if there is at least one matching
|
||||
// subject at or after start (start <= first <= last).
|
||||
// Returns (0, 0, false) if the subject does not exist or has no
|
||||
// messages at or after start.
|
||||
// Lock should be held.
|
||||
func (ms *memStore) nextWildcardMatchLocked(filter string, start uint64) (uint64, uint64, bool) {
|
||||
found := false
|
||||
first, last := ms.state.LastSeq, uint64(0)
|
||||
ms.fss.MatchUntil(stringToBytes(filter), func(subj []byte, ss *SimpleState) bool {
|
||||
ms.recalculateForSubj(string(subj), ss)
|
||||
|
||||
// Skip matches that are below our starting sequence
|
||||
if start > ss.Last {
|
||||
return true
|
||||
}
|
||||
|
||||
// A match was found, adjust the bounds accordingly
|
||||
found = true
|
||||
if ss.First < first {
|
||||
first = ss.First
|
||||
}
|
||||
if ss.Last > last {
|
||||
last = ss.Last
|
||||
}
|
||||
|
||||
// If first > start, there may be more matches between
|
||||
// start and first, in which case we keep searching.
|
||||
// If not, we have a match between start and last, we
|
||||
// can break out of the search.
|
||||
// This could be further optimized: if first and start
|
||||
// are "close", we could just extend the linear search,
|
||||
// especially if we know that the remaining ms.fss to
|
||||
// explore is large.
|
||||
return first > start
|
||||
})
|
||||
if !found {
|
||||
return 0, 0, false
|
||||
}
|
||||
return max(first, start), last, found
|
||||
}
|
||||
|
||||
// Find sequence bounds matching a literal filter from ms.fss.
|
||||
// Returns (first, last, true) if there is a matching literal
|
||||
// subject at or after start (start <= first <= last).
|
||||
// Returns (0, 0, false) if the subject does not exist or has no
|
||||
// messages at or after start.
|
||||
// Lock should be held.
|
||||
func (ms *memStore) nextLiteralMatchLocked(filter string, start uint64) (uint64, uint64, bool) {
|
||||
ss, ok := ms.fss.Find(stringToBytes(filter))
|
||||
if !ok {
|
||||
return 0, 0, false
|
||||
}
|
||||
ms.recalculateForSubj(filter, ss)
|
||||
if start > ss.Last {
|
||||
return 0, 0, false
|
||||
}
|
||||
return max(start, ss.First), ss.Last, true
|
||||
}
|
||||
|
||||
// Returns true if LoadNextMsg should perform a linear scan,
|
||||
// false if it should use the subject tree to try to reduce
|
||||
// the search space.
|
||||
// Lock should be held.
|
||||
func (ms *memStore) shouldLinearScan(filter string, wc bool, start uint64) bool {
|
||||
// Skip scan of ms.fss if number of messages in the block are less than
|
||||
// 1/2 the number of subjects in ms.fss. Or we have a wc and lots of fss entries.
|
||||
const linearScanMaxFSS = 256
|
||||
isAll := filter == fwcs
|
||||
return isAll || 2*int(ms.state.LastSeq-start) < ms.fss.Size() || (wc && ms.fss.Size() > linearScanMaxFSS)
|
||||
}
|
||||
|
||||
// Lock should be held.
|
||||
func (ms *memStore) loadNextMsgLocked(filter string, wc bool, start uint64, smp *StoreMsg) (*StoreMsg, uint64, error) {
|
||||
if start < ms.state.FirstSeq {
|
||||
@@ -1790,46 +1875,24 @@ func (ms *memStore) loadNextMsgLocked(filter string, wc bool, start uint64, smp
|
||||
}
|
||||
isAll := filter == fwcs
|
||||
|
||||
// Skip scan of ms.fss if number of messages in the block are less than
|
||||
// 1/2 the number of subjects in ms.fss. Or we have a wc and lots of fss entries.
|
||||
const linearScanMaxFSS = 256
|
||||
doLinearScan := isAll || 2*int(ms.state.LastSeq-start) < ms.fss.Size() || (wc && ms.fss.Size() > linearScanMaxFSS)
|
||||
|
||||
// Initial setup.
|
||||
fseq, lseq := start, ms.state.LastSeq
|
||||
|
||||
if !doLinearScan {
|
||||
subs := []string{filter}
|
||||
if wc || isAll {
|
||||
subs = subs[:0]
|
||||
ms.fss.Match(stringToBytes(filter), func(subj []byte, val *SimpleState) {
|
||||
subs = append(subs, string(subj))
|
||||
})
|
||||
if !ms.shouldLinearScan(filter, wc, start) {
|
||||
var found bool
|
||||
if wc {
|
||||
fseq, lseq, found = ms.nextWildcardMatchLocked(filter, start)
|
||||
} else {
|
||||
fseq, lseq, found = ms.nextLiteralMatchLocked(filter, start)
|
||||
}
|
||||
fseq, lseq = ms.state.LastSeq, uint64(0)
|
||||
for _, subj := range subs {
|
||||
ss, ok := ms.fss.Find(stringToBytes(subj))
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
if ss.firstNeedsUpdate || ss.lastNeedsUpdate {
|
||||
ms.recalculateForSubj(subj, ss)
|
||||
}
|
||||
if ss.First < fseq {
|
||||
fseq = ss.First
|
||||
}
|
||||
if ss.Last > lseq {
|
||||
lseq = ss.Last
|
||||
}
|
||||
}
|
||||
if fseq < start {
|
||||
fseq = start
|
||||
if !found {
|
||||
return nil, ms.state.LastSeq, ErrStoreEOF
|
||||
}
|
||||
}
|
||||
|
||||
eq := subjectsEqual
|
||||
if wc {
|
||||
eq = subjectIsSubsetMatch
|
||||
eq = matchLiteral
|
||||
}
|
||||
|
||||
for nseq := fseq; nseq <= lseq; nseq++ {
|
||||
@@ -2113,8 +2176,8 @@ func (ms *memStore) FastState(state *StreamState) {
|
||||
}
|
||||
|
||||
func (ms *memStore) State() StreamState {
|
||||
ms.mu.RLock()
|
||||
defer ms.mu.RUnlock()
|
||||
ms.mu.Lock()
|
||||
defer ms.mu.Unlock()
|
||||
|
||||
state := ms.state
|
||||
state.Consumers = ms.consumers
|
||||
@@ -2336,7 +2399,7 @@ func (o *consumerMemStore) Update(state *ConsumerState) error {
|
||||
|
||||
// Check to see if this is an outdated update.
|
||||
if state.Delivered.Consumer < o.state.Delivered.Consumer || state.AckFloor.Stream < o.state.AckFloor.Stream {
|
||||
return fmt.Errorf("old update ignored")
|
||||
return ErrStoreOldUpdate
|
||||
}
|
||||
|
||||
o.state.Delivered = state.Delivered
|
||||
|
||||
+4
-4
@@ -1,4 +1,4 @@
|
||||
// Copyright 2020-2025 The NATS Authors
|
||||
// Copyright 2020-2026 The NATS Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
@@ -585,7 +585,7 @@ func (s *Server) createMQTTClient(conn net.Conn, ws *websocket) *client {
|
||||
return c
|
||||
}
|
||||
|
||||
if opts.MaxConn > 0 && len(s.clients) >= opts.MaxConn {
|
||||
if opts.MaxConn < 0 || (opts.MaxConn > 0 && len(s.clients) >= opts.MaxConn) {
|
||||
s.mu.Unlock()
|
||||
c.maxConnExceeded()
|
||||
return nil
|
||||
@@ -4774,9 +4774,9 @@ func (c *client) mqttParseSubsOrUnsubs(r *mqttReader, b byte, pl int, sub bool)
|
||||
if rf := b & 0xf; rf != expectedFlag {
|
||||
return 0, nil, fmt.Errorf("wrong %ssubscribe reserved flags: %x", action, rf)
|
||||
}
|
||||
pi, err := r.readUint16("packet identifier")
|
||||
pi, err := mqttParsePIPacket(r)
|
||||
if err != nil {
|
||||
return 0, nil, fmt.Errorf("reading packet identifier: %v", err)
|
||||
return 0, nil, err
|
||||
}
|
||||
end := r.pos + (pl - 2)
|
||||
var filters []*mqttFilter
|
||||
|
||||
+6
-1
@@ -389,6 +389,7 @@ type Options struct {
|
||||
JetStreamRequestQueueLimit int64
|
||||
JetStreamMetaCompact uint64
|
||||
JetStreamMetaCompactSize uint64
|
||||
JetStreamMetaCompactSync bool
|
||||
StreamMaxBufferedMsgs int `json:"-"`
|
||||
StreamMaxBufferedSize int64 `json:"-"`
|
||||
StoreDir string `json:"-"`
|
||||
@@ -1268,7 +1269,9 @@ func (o *Options) processConfigFileLine(k string, v any, errors *[]error, warnin
|
||||
case "proxy_protocol":
|
||||
o.ProxyProtocol = v.(bool)
|
||||
case "max_connections", "max_conn":
|
||||
o.MaxConn = int(v.(int64))
|
||||
if o.MaxConn = int(v.(int64)); o.MaxConn == 0 {
|
||||
o.MaxConn = -1
|
||||
}
|
||||
case "max_traced_msg_len":
|
||||
o.MaxTracedMsgLen = int(v.(int64))
|
||||
case "max_subscriptions", "max_subs":
|
||||
@@ -2653,6 +2656,8 @@ func parseJetStream(v any, opts *Options, errors *[]error, warnings *[]error) er
|
||||
return &configErr{tk, fmt.Sprintf("Expected an absolute size for %q, got %v", mk, mv)}
|
||||
}
|
||||
opts.JetStreamMetaCompactSize = uint64(s)
|
||||
case "meta_compact_sync":
|
||||
opts.JetStreamMetaCompactSync = mv.(bool)
|
||||
default:
|
||||
if !tk.IsUsedVariable() {
|
||||
err := &unknownConfigFieldErr{
|
||||
|
||||
+1
@@ -32,6 +32,7 @@ type parseState struct {
|
||||
msgBuf []byte
|
||||
header http.Header // access via getHeader
|
||||
scratch [MAX_CONTROL_LINE_SIZE]byte
|
||||
argsa [MAX_HMSG_ARGS + 1][]byte // pre-allocated args array to avoid per-call heap escape
|
||||
}
|
||||
|
||||
type pubArg struct {
|
||||
|
||||
+383
-141
@@ -19,6 +19,7 @@ import (
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"fmt"
|
||||
"iter"
|
||||
"math"
|
||||
"math/rand"
|
||||
"net"
|
||||
@@ -40,7 +41,8 @@ type RaftNode interface {
|
||||
Propose(entry []byte) error
|
||||
ProposeMulti(entries []*Entry) error
|
||||
ForwardProposal(entry []byte) error
|
||||
InstallSnapshot(snap []byte) error
|
||||
InstallSnapshot(snap []byte, force bool) error
|
||||
CreateSnapshotCheckpoint(force bool) (RaftNodeCheckpoint, error)
|
||||
SendSnapshot(snap []byte) error
|
||||
NeedSnapshot() bool
|
||||
Applied(index uint64) (entries uint64, bytes uint64)
|
||||
@@ -89,6 +91,17 @@ type RaftNode interface {
|
||||
GetTrafficAccountName() string
|
||||
}
|
||||
|
||||
// RaftNodeCheckpoint is used as an alternative to a direct InstallSnapshot.
|
||||
// A checkpoint is created from CreateSnapshotCheckpoint and allows installing snapshots asynchronously,
|
||||
// as well as loading the last snapshot or entries between the last snapshot and the one we're about to create.
|
||||
// Abort can be called to cancel the snapshot installation at any time, or InstallSnapshot to install it.
|
||||
type RaftNodeCheckpoint interface {
|
||||
LoadLastSnapshot() (snap []byte, err error)
|
||||
AppendEntriesSeq() iter.Seq2[*appendEntry, error]
|
||||
Abort()
|
||||
InstallSnapshot(data []byte) (uint64, error)
|
||||
}
|
||||
|
||||
type WAL interface {
|
||||
Type() StorageType
|
||||
StoreMsg(subj string, hdr, msg []byte, ttl int64) (uint64, int64, error)
|
||||
@@ -179,6 +192,8 @@ type raft struct {
|
||||
applied uint64 // Index of the most recently applied commit
|
||||
papplied uint64 // First sequence of our log, matches when we last installed a snapshot.
|
||||
|
||||
membChangeIndex uint64 // Index of uncommitted membership change entry (0 means no change in progress)
|
||||
|
||||
aflr uint64 // Index when to signal initial messages have been applied after becoming leader. 0 means signaling is disabled.
|
||||
|
||||
leader string // The ID of the leader
|
||||
@@ -231,8 +246,8 @@ type raft struct {
|
||||
observer bool // The node is observing, i.e. not able to become leader
|
||||
initializing bool // The node is new, and "empty log" checks can be temporarily relaxed.
|
||||
scaleUp bool // The node is part of a scale up, puts us in observer mode until the log contains data.
|
||||
membChanging bool // There is a membership change proposal in progress
|
||||
deleted bool // If the node was deleted.
|
||||
snapshotting bool // Snapshot is in progress.
|
||||
}
|
||||
|
||||
type proposedEntry struct {
|
||||
@@ -240,7 +255,7 @@ type proposedEntry struct {
|
||||
reply string // Optional, to respond once proposal handled
|
||||
}
|
||||
|
||||
// cacthupState structure that holds our subscription, and catchup term and index
|
||||
// catchupState structure that holds our subscription, and catchup term and index
|
||||
// as well as starting term and index and how many updates we have seen.
|
||||
type catchupState struct {
|
||||
sub *subscription // Subscription that catchup messages will arrive on
|
||||
@@ -249,6 +264,7 @@ type catchupState struct {
|
||||
pterm uint64 // Starting term
|
||||
pindex uint64 // Starting index
|
||||
active time.Time // Last time we received a message for this catchup
|
||||
signal bool // Whether the EntryCatchup signal was sent.
|
||||
}
|
||||
|
||||
// lps holds peer state of last time and last index replicated.
|
||||
@@ -311,6 +327,8 @@ var (
|
||||
errNodeRemoved = errors.New("raft: peer was removed")
|
||||
errBadSnapName = errors.New("raft: snapshot name could not be parsed")
|
||||
errNoSnapAvailable = errors.New("raft: no snapshot available")
|
||||
errSnapInProgress = errors.New("raft: snapshot is already in progress")
|
||||
errSnapAborted = errors.New("raft: snapshot was aborted")
|
||||
errCatchupsRunning = errors.New("raft: snapshot can not be installed while catchups running")
|
||||
errSnapshotCorrupt = errors.New("raft: snapshot corrupt")
|
||||
errTooManyPrefs = errors.New("raft: stepdown requires at most one preferred new leader")
|
||||
@@ -390,6 +408,19 @@ func (s *Server) bootstrapRaftNode(cfg *RaftConfig, knownPeers []string, allPeer
|
||||
|
||||
// initRaftNode will initialize the raft node, to be used by startRaftNode or when testing to not run the Go routine.
|
||||
func (s *Server) initRaftNode(accName string, cfg *RaftConfig, labels pprofLabels) (*raft, error) {
|
||||
restorePeerState := func(n *raft) error {
|
||||
ps, err := readPeerState(cfg.Store)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if ps == nil {
|
||||
return errNoPeerState
|
||||
}
|
||||
n.processPeerState(ps)
|
||||
n.extSt = ps.domainExt
|
||||
return nil
|
||||
}
|
||||
|
||||
if cfg == nil {
|
||||
return nil, errNilCfg
|
||||
}
|
||||
@@ -401,15 +432,6 @@ func (s *Server) initRaftNode(accName string, cfg *RaftConfig, labels pprofLabel
|
||||
hash := s.sys.shash
|
||||
s.mu.RUnlock()
|
||||
|
||||
// Do this here to process error quicker.
|
||||
ps, err := readPeerState(cfg.Store)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if ps == nil {
|
||||
return nil, errNoPeerState
|
||||
}
|
||||
|
||||
qpfx := fmt.Sprintf("[ACC:%s] RAFT '%s' ", accName, cfg.Name)
|
||||
n := &raft{
|
||||
created: time.Now(),
|
||||
@@ -419,8 +441,6 @@ func (s *Server) initRaftNode(accName string, cfg *RaftConfig, labels pprofLabel
|
||||
wal: cfg.Log,
|
||||
wtype: cfg.Log.Type(),
|
||||
track: cfg.Track,
|
||||
csz: ps.clusterSize,
|
||||
qn: ps.clusterSize/2 + 1,
|
||||
peers: make(map[string]*lps),
|
||||
acks: make(map[uint64]map[string]struct{}),
|
||||
pae: make(map[uint64]*appendEntry),
|
||||
@@ -436,7 +456,6 @@ func (s *Server) initRaftNode(accName string, cfg *RaftConfig, labels pprofLabel
|
||||
accName: accName,
|
||||
leadc: make(chan bool, 32),
|
||||
observer: cfg.Observer,
|
||||
extSt: ps.domainExt,
|
||||
}
|
||||
|
||||
// Setup our internal subscriptions for proposals, votes and append entries.
|
||||
@@ -473,6 +492,15 @@ func (s *Server) initRaftNode(accName string, cfg *RaftConfig, labels pprofLabel
|
||||
n.setupLastSnapshot()
|
||||
}
|
||||
|
||||
// We may have restored the peer state from the
|
||||
// snapshot above. If not, we restore peers from
|
||||
// the peer state file.
|
||||
if len(n.peers) == 0 {
|
||||
if err := restorePeerState(n); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
// Make sure that the snapshots directory exists.
|
||||
if err := os.MkdirAll(filepath.Join(n.sd, snapshotsDir), defaultDirPerms); err != nil {
|
||||
return nil, fmt.Errorf("could not create snapshots directory - %v", err)
|
||||
@@ -532,18 +560,7 @@ func (s *Server) initRaftNode(accName string, cfg *RaftConfig, labels pprofLabel
|
||||
}
|
||||
}
|
||||
|
||||
// Make sure to track ourselves.
|
||||
n.peers[n.id] = &lps{time.Now(), 0, true}
|
||||
|
||||
// Track known peers
|
||||
for _, peer := range ps.knownPeers {
|
||||
if peer != n.id {
|
||||
// Set these to 0 to start but mark as known peer.
|
||||
n.peers[peer] = &lps{time.Time{}, 0, true}
|
||||
}
|
||||
}
|
||||
|
||||
n.debug("Started")
|
||||
n.debug("Started (cluster size %d, quorum %d)", n.csz, n.qn)
|
||||
|
||||
// Check if we need to start in observer mode due to lame duck status.
|
||||
// This will stop us from taking on the leader role when we're about to
|
||||
@@ -926,24 +943,23 @@ func (n *raft) ForwardProposal(entry []byte) error {
|
||||
|
||||
// ProposeAddPeer is called to add a peer to the group.
|
||||
func (n *raft) ProposeAddPeer(peer string) error {
|
||||
n.Lock()
|
||||
n.RLock()
|
||||
// Check state under lock, we might not be leader anymore.
|
||||
if n.State() != Leader {
|
||||
n.Unlock()
|
||||
n.RUnlock()
|
||||
return errNotLeader
|
||||
}
|
||||
// Error if we had a previous write error.
|
||||
if werr := n.werr; werr != nil {
|
||||
n.Unlock()
|
||||
n.RUnlock()
|
||||
return werr
|
||||
}
|
||||
if n.membChanging {
|
||||
n.Unlock()
|
||||
if n.membChangeIndex > 0 {
|
||||
n.RUnlock()
|
||||
return errMembershipChange
|
||||
}
|
||||
prop := n.prop
|
||||
n.membChanging = true
|
||||
n.Unlock()
|
||||
n.RUnlock()
|
||||
|
||||
prop.push(newProposedEntry(newEntry(EntryAddPeer, []byte(peer)), _EMPTY_))
|
||||
return nil
|
||||
@@ -951,36 +967,35 @@ func (n *raft) ProposeAddPeer(peer string) error {
|
||||
|
||||
// ProposeRemovePeer is called to remove a peer from the group.
|
||||
func (n *raft) ProposeRemovePeer(peer string) error {
|
||||
n.Lock()
|
||||
n.RLock()
|
||||
|
||||
// Error if we had a previous write error.
|
||||
if werr := n.werr; werr != nil {
|
||||
n.Unlock()
|
||||
n.RUnlock()
|
||||
return werr
|
||||
}
|
||||
|
||||
if n.State() != Leader {
|
||||
subj := n.rpsubj
|
||||
n.Unlock()
|
||||
n.RUnlock()
|
||||
|
||||
// Forward the proposal to the leader
|
||||
n.sendRPC(subj, _EMPTY_, []byte(peer))
|
||||
return nil
|
||||
}
|
||||
|
||||
if n.membChanging {
|
||||
n.Unlock()
|
||||
if n.membChangeIndex > 0 {
|
||||
n.RUnlock()
|
||||
return errMembershipChange
|
||||
}
|
||||
|
||||
if len(n.peers) <= 1 {
|
||||
n.Unlock()
|
||||
n.RUnlock()
|
||||
return errRemoveLastNode
|
||||
}
|
||||
|
||||
prop := n.prop
|
||||
n.membChanging = true
|
||||
n.Unlock()
|
||||
n.RUnlock()
|
||||
|
||||
prop.push(newProposedEntry(newEntry(EntryRemovePeer, []byte(peer)), _EMPTY_))
|
||||
return nil
|
||||
@@ -989,7 +1004,7 @@ func (n *raft) ProposeRemovePeer(peer string) error {
|
||||
func (n *raft) MembershipChangeInProgress() bool {
|
||||
n.RLock()
|
||||
defer n.RUnlock()
|
||||
return n.membChanging
|
||||
return n.membChangeIndex > 0
|
||||
}
|
||||
|
||||
// ClusterSize reports back the total cluster size.
|
||||
@@ -1260,51 +1275,35 @@ func (n *raft) SendSnapshot(data []byte) error {
|
||||
// Used to install a snapshot for the given term and applied index. This will release
|
||||
// all of the log entries up to and including index. This should not be called with
|
||||
// entries that have been applied to the FSM but have not been applied to the raft state.
|
||||
func (n *raft) InstallSnapshot(data []byte) error {
|
||||
if n.State() == Closed {
|
||||
return errNodeClosed
|
||||
}
|
||||
|
||||
func (n *raft) InstallSnapshot(data []byte, force bool) error {
|
||||
n.Lock()
|
||||
defer n.Unlock()
|
||||
|
||||
// If a write error has occurred already then stop here.
|
||||
if werr := n.werr; werr != nil {
|
||||
return werr
|
||||
c, err := n.createSnapshotCheckpointLocked(force)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Check that a catchup isn't already taking place. If it is then we won't
|
||||
// allow installing snapshots until it is done.
|
||||
if len(n.progress) > 0 || n.paused {
|
||||
return errCatchupsRunning
|
||||
}
|
||||
|
||||
if n.applied == 0 {
|
||||
n.debug("Not snapshotting as there are no applied entries")
|
||||
return errNoSnapAvailable
|
||||
}
|
||||
|
||||
var term uint64
|
||||
if ae, _ := n.loadEntry(n.applied); ae != nil {
|
||||
term = ae.term
|
||||
} else {
|
||||
n.debug("Not snapshotting as entry %d is not available", n.applied)
|
||||
return errNoSnapAvailable
|
||||
}
|
||||
|
||||
n.debug("Installing snapshot of %d bytes [%d:%d]", len(data), term, n.applied)
|
||||
|
||||
return n.installSnapshot(&snapshot{
|
||||
lastTerm: term,
|
||||
lastIndex: n.applied,
|
||||
peerstate: encodePeerState(&peerState{n.peerNames(), n.csz, n.extSt}),
|
||||
c.n.debug("Installing snapshot of %d bytes [%d:%d]", len(data), c.term, c.applied)
|
||||
snap := &snapshot{
|
||||
lastTerm: c.term,
|
||||
lastIndex: c.applied,
|
||||
peerstate: c.peerstate,
|
||||
data: data,
|
||||
})
|
||||
}
|
||||
return c.n.installSnapshot(snap)
|
||||
}
|
||||
|
||||
// Install the snapshot.
|
||||
// Lock should be held.
|
||||
func (n *raft) installSnapshot(snap *snapshot) error {
|
||||
// Always reset, regardless of success or error.
|
||||
// This is done even though this doesn't come from a checkpoint. We do this so we can
|
||||
// interrupt/abort an asynchronously running snapshot (if it exists). Ensures the upper layer
|
||||
// can't overwrite a snapshot that we installed here with an old asynchronously created one.
|
||||
defer func() {
|
||||
n.snapshotting = false
|
||||
}()
|
||||
|
||||
snapDir := filepath.Join(n.sd, snapshotsDir)
|
||||
sn := fmt.Sprintf(snapFileT, snap.lastTerm, snap.lastIndex)
|
||||
sfile := filepath.Join(snapDir, sn)
|
||||
@@ -1333,6 +1332,202 @@ func (n *raft) installSnapshot(snap *snapshot) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// CreateSnapshotCheckpoint creates a checkpoint to allow installing a snapshot asynchronously.
|
||||
// Caller MUST make sure it only ever has one checkpoint handle at most, and either installs or
|
||||
// aborts the checkpoint.
|
||||
// See also: RaftNodeCheckpoint
|
||||
func (n *raft) CreateSnapshotCheckpoint(force bool) (RaftNodeCheckpoint, error) {
|
||||
n.Lock()
|
||||
defer n.Unlock()
|
||||
return n.createSnapshotCheckpointLocked(force)
|
||||
}
|
||||
|
||||
func (n *raft) createSnapshotCheckpointLocked(force bool) (*checkpoint, error) {
|
||||
if n.State() == Closed {
|
||||
return nil, errNodeClosed
|
||||
}
|
||||
if n.snapshotting {
|
||||
return nil, errSnapInProgress
|
||||
}
|
||||
|
||||
// If a write error has occurred already then stop here.
|
||||
if werr := n.werr; werr != nil {
|
||||
return nil, werr
|
||||
}
|
||||
|
||||
// Check that a catchup isn't already taking place. If it is then we won't
|
||||
// allow installing snapshots until it is done.
|
||||
// Unless we're forced to snapshot. We might have been catching up a peer for
|
||||
// a long period, and this protects our log size from growing indefinitely.
|
||||
if !force && len(n.progress) > 0 {
|
||||
return nil, errCatchupsRunning
|
||||
}
|
||||
|
||||
if n.applied == 0 {
|
||||
n.debug("Not snapshotting as there are no applied entries")
|
||||
return nil, errNoSnapAvailable
|
||||
}
|
||||
|
||||
var term uint64
|
||||
if ae, _ := n.loadEntry(n.applied); ae != nil {
|
||||
term = ae.term
|
||||
ae.returnToPool()
|
||||
} else {
|
||||
n.debug("Not snapshotting as entry %d is not available", n.applied)
|
||||
return nil, errNoSnapAvailable
|
||||
}
|
||||
|
||||
// Snapshot the current peer state for the current applied index, we'll need it in the snapshot.
|
||||
peerstate := encodePeerState(&peerState{n.peerNames(), n.csz, n.extSt})
|
||||
snapDir := filepath.Join(n.sd, snapshotsDir)
|
||||
snapFile := filepath.Join(snapDir, fmt.Sprintf(snapFileT, term, n.applied))
|
||||
|
||||
n.snapshotting = true
|
||||
c := &checkpoint{
|
||||
n: n,
|
||||
term: term,
|
||||
applied: n.applied,
|
||||
papplied: n.papplied,
|
||||
snapFile: snapFile,
|
||||
peerstate: peerstate,
|
||||
}
|
||||
return c, nil
|
||||
}
|
||||
|
||||
type checkpoint struct {
|
||||
n *raft // Reference to the RaftNode.
|
||||
term uint64 // The term of the entry at applied.
|
||||
applied uint64 // What applied value the snapshot will represent and what the log can be compacted to.
|
||||
papplied uint64 // Previous applied value of the previous snapshot.
|
||||
snapFile string // Where the snapshot should be installed.
|
||||
peerstate []byte // Encoded peerstate generated when creating this checkpoint.
|
||||
}
|
||||
|
||||
// LoadLastSnapshot loads the last snapshot from disk when using a RaftNodeCheckpoint.
|
||||
func (c *checkpoint) LoadLastSnapshot() ([]byte, error) {
|
||||
c.n.Lock()
|
||||
defer c.n.Unlock()
|
||||
if !c.n.snapshotting {
|
||||
// The checkpoint can be aborted at any time, don't continue if that happened.
|
||||
return nil, errSnapAborted
|
||||
}
|
||||
snap, err := c.n.loadLastSnapshot()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if snap.lastIndex != c.papplied {
|
||||
// Another snapshot was installed in the meantime. This invalidates our checkpoint.
|
||||
return nil, errors.New("snapshot index mismatch")
|
||||
}
|
||||
return snap.data, nil
|
||||
}
|
||||
|
||||
// AppendEntriesSeq allows iterating over entries that can be compacted as part of a snapshot.
|
||||
func (c *checkpoint) AppendEntriesSeq() iter.Seq2[*appendEntry, error] {
|
||||
return func(yield func(*appendEntry, error) bool) {
|
||||
for index := c.papplied + 1; index <= c.applied; index++ {
|
||||
c.n.Lock()
|
||||
if !c.n.snapshotting {
|
||||
c.n.Unlock()
|
||||
// The checkpoint can be aborted at any time, don't continue if that happened.
|
||||
yield(nil, errSnapAborted)
|
||||
return
|
||||
}
|
||||
// Load entry and yield to the caller while unlocked.
|
||||
ae, err := c.n.loadEntry(index)
|
||||
c.n.Unlock()
|
||||
if err != nil {
|
||||
yield(nil, err)
|
||||
return
|
||||
}
|
||||
yield(ae, nil)
|
||||
ae.returnToPool()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Abort can be called to cancel the snapshot installation at any time.
|
||||
func (c *checkpoint) Abort() {
|
||||
c.n.Lock()
|
||||
defer c.n.Unlock()
|
||||
c.n.snapshotting = false
|
||||
}
|
||||
|
||||
// InstallSnapshot allows asynchronous installation of a snapshot by unlocking when
|
||||
// performing operations that don't strictly need to be locked. When the lock is re-acquired
|
||||
// n.snapshotting will be checked to ensure we're still meant to.
|
||||
// Async snapshots can only be used when using CreateSnapshotCheckpoint.
|
||||
// Lock should be held.
|
||||
func (c *checkpoint) InstallSnapshot(data []byte) (uint64, error) {
|
||||
n := c.n
|
||||
n.Lock()
|
||||
defer n.Unlock()
|
||||
if !n.snapshotting {
|
||||
// The checkpoint can be aborted at any time, don't continue if that happened.
|
||||
return 0, errSnapAborted
|
||||
}
|
||||
|
||||
// Always reset, regardless of success or error.
|
||||
defer func() {
|
||||
n.snapshotting = false
|
||||
}()
|
||||
|
||||
n.debug("Installing snapshot of %d bytes [%d:%d]", len(data), c.term, c.applied)
|
||||
snap := &snapshot{
|
||||
lastTerm: c.term,
|
||||
lastIndex: c.applied,
|
||||
peerstate: c.peerstate,
|
||||
data: data,
|
||||
}
|
||||
encoded := n.encodeSnapshot(snap)
|
||||
|
||||
// Unlock while writing.
|
||||
n.Unlock()
|
||||
err := writeFileWithSync(c.snapFile, encoded, defaultFilePerms)
|
||||
n.Lock()
|
||||
if err != nil {
|
||||
// We could set write err here, but if this is a temporary situation, too many open files etc.
|
||||
// we want to retry and snapshots are not fatal.
|
||||
return 0, err
|
||||
} else if !n.snapshotting {
|
||||
// The checkpoint can be aborted at any time, don't continue if that happened.
|
||||
return 0, errSnapAborted
|
||||
}
|
||||
|
||||
// Delete our previous snapshot file if it exists.
|
||||
if n.snapfile != _EMPTY_ && n.snapfile != c.snapFile {
|
||||
os.Remove(n.snapfile)
|
||||
}
|
||||
// Remember our latest snapshot file.
|
||||
n.snapfile = c.snapFile
|
||||
|
||||
// Unlock while compacting.
|
||||
n.Unlock()
|
||||
_, err = n.wal.Compact(snap.lastIndex + 1)
|
||||
n.Lock()
|
||||
if err != nil {
|
||||
n.setWriteErrLocked(err)
|
||||
return 0, err
|
||||
} else if !n.snapshotting {
|
||||
// The checkpoint can be aborted at any time, don't continue if that happened.
|
||||
return 0, errSnapAborted
|
||||
}
|
||||
|
||||
compacted := n.bytes
|
||||
var state StreamState
|
||||
n.wal.FastState(&state)
|
||||
n.papplied = snap.lastIndex
|
||||
n.bytes = state.Bytes
|
||||
|
||||
// Expose compacted size.
|
||||
if n.bytes > compacted {
|
||||
compacted = 0
|
||||
} else {
|
||||
compacted -= n.bytes
|
||||
}
|
||||
return compacted, nil
|
||||
}
|
||||
|
||||
// NeedSnapshot returns true if it is necessary to try to install a snapshot, i.e.
|
||||
// after we have finished recovering/replaying at startup, on a regular interval or
|
||||
// as a part of cleaning up when shutting down.
|
||||
@@ -1430,6 +1625,14 @@ func (n *raft) setupLastSnapshot() {
|
||||
// Applied will move up when the snapshot is actually applied.
|
||||
n.commit = snap.lastIndex
|
||||
n.papplied = snap.lastIndex
|
||||
// Restore the peerState
|
||||
ps, err := decodePeerState(snap.peerstate)
|
||||
if err == nil {
|
||||
n.processPeerState(ps)
|
||||
}
|
||||
n.processPeerState(ps)
|
||||
n.extSt = ps.domainExt
|
||||
|
||||
n.apply.push(newCommittedEntry(n.commit, []*Entry{{EntrySnapshot, snap.data}}))
|
||||
if _, err := n.wal.Compact(snap.lastIndex + 1); err != nil {
|
||||
n.setWriteErrLocked(err)
|
||||
@@ -1606,6 +1809,10 @@ func (n *raft) isCurrent(includeForwardProgress bool) bool {
|
||||
n.Unlock()
|
||||
time.Sleep(time.Millisecond)
|
||||
n.Lock()
|
||||
if n.State() == Closed {
|
||||
n.debug("Node closed during health check, returning not current")
|
||||
return false
|
||||
}
|
||||
if n.commit-n.applied < startDelta {
|
||||
// The gap is getting smaller, so we're making forward progress.
|
||||
clearBehindState()
|
||||
@@ -1844,13 +2051,34 @@ func (n *raft) Peers() []*Peer {
|
||||
|
||||
var peers []*Peer
|
||||
for id, ps := range n.peers {
|
||||
var current bool
|
||||
var lag uint64
|
||||
if n.commit > ps.li {
|
||||
lag = n.commit - ps.li
|
||||
if id == n.id {
|
||||
// We are current and have no lag when compared with ourselves.
|
||||
current = true
|
||||
} else if n.id == n.leader {
|
||||
// We are the leader, we know how many entries this replica has persisted.
|
||||
// Lag is determined by how many entries we have quorum on in our log that haven't yet
|
||||
// been persisted on the replica. They are current if there's no lag.
|
||||
// This will show all peers that are part of quorum as "current".
|
||||
if n.commit > ps.li {
|
||||
lag = n.commit - ps.li
|
||||
}
|
||||
current = lag == 0
|
||||
} else if id == n.leader {
|
||||
// This peer is the leader, we don't know our lag, but we can report
|
||||
// on whether we've seen the leader recently.
|
||||
okInterval := hbInterval * 2
|
||||
current = time.Since(ps.ts) <= okInterval
|
||||
} else {
|
||||
// The remaining condition is another follower that we're not in contact with.
|
||||
// We intentionally leave current and lag as empty.
|
||||
current, lag = false, 0
|
||||
}
|
||||
|
||||
p := &Peer{
|
||||
ID: id,
|
||||
Current: id == n.leader || ps.li >= n.applied,
|
||||
Current: current,
|
||||
Last: ps.ts,
|
||||
Lag: lag,
|
||||
}
|
||||
@@ -2192,7 +2420,7 @@ func (n *raft) setObserverLocked(isObserver bool, extSt extensionState) {
|
||||
// If we're leaving observer state then reset the election timer or
|
||||
// we might end up waiting for up to the observerModeInterval.
|
||||
if wasObserver && !isObserver {
|
||||
n.resetElect(randCampaignTimeout())
|
||||
n.resetElect(randElectionTimeout())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2610,18 +2838,23 @@ func (n *raft) handleForwardedRemovePeerProposal(sub *subscription, c *client, _
|
||||
|
||||
n.RLock()
|
||||
// Check state under lock, we might not be leader anymore.
|
||||
if n.State() != Leader {
|
||||
if n.State() != Leader || !n.leaderState.Load() {
|
||||
n.debug("Ignoring forwarded peer removal proposal, not leader")
|
||||
n.RUnlock()
|
||||
return
|
||||
}
|
||||
prop, werr := n.prop, n.werr
|
||||
n.RUnlock()
|
||||
|
||||
// Ignore if we have had a write error previous.
|
||||
if werr != nil {
|
||||
// Error if we had a previous write error.
|
||||
if werr := n.werr; werr != nil {
|
||||
n.RUnlock()
|
||||
return
|
||||
}
|
||||
if n.membChangeIndex > 0 {
|
||||
n.debug("Ignoring forwarded peer removal proposal, membership changing")
|
||||
n.RUnlock()
|
||||
return
|
||||
}
|
||||
prop := n.prop
|
||||
n.RUnlock()
|
||||
|
||||
// Need to copy since this is underlying client/route buffer.
|
||||
peer := copyBytes(msg)
|
||||
@@ -2635,7 +2868,7 @@ func (n *raft) handleForwardedProposal(sub *subscription, c *client, _ *Account,
|
||||
|
||||
n.RLock()
|
||||
// Check state under lock, we might not be leader anymore.
|
||||
if n.State() != Leader {
|
||||
if n.State() != Leader || !n.leaderState.Load() {
|
||||
n.debug("Ignoring forwarded proposal, not leader")
|
||||
n.RUnlock()
|
||||
return
|
||||
@@ -2697,14 +2930,18 @@ func (n *raft) sendMembershipChange(e *Entry) bool {
|
||||
n.Lock()
|
||||
defer n.Unlock()
|
||||
|
||||
// Only makes sense to call this with entries that change membership
|
||||
if !e.ChangesMembership() {
|
||||
// Only makes sense to call this with entries that change membership.
|
||||
// Also, ignore if we're already changing membership.
|
||||
if !e.ChangesMembership() || n.membChangeIndex > 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
// Set to the index where we will store the membership change.
|
||||
// It needs to be before we send, since if we're cluster size 1 we try to commit immediately.
|
||||
n.membChangeIndex = n.pindex + 1
|
||||
err := n.sendAppendEntryLocked([]*Entry{e}, true)
|
||||
if err != nil {
|
||||
n.membChanging = false
|
||||
n.membChangeIndex = 0
|
||||
return false
|
||||
}
|
||||
|
||||
@@ -2722,24 +2959,6 @@ func (n *raft) sendMembershipChange(e *Entry) bool {
|
||||
return true
|
||||
}
|
||||
|
||||
// logContainsUncommittedMembershipChange returns true if the
|
||||
// log contains uncommitted entries that change membership.
|
||||
// Lock should be held.
|
||||
func (n *raft) logContainsUncommittedMembershipChange() (bool, error) {
|
||||
for i := n.commit + 1; i <= n.pindex; i++ {
|
||||
ae, err := n.loadEntry(i)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
if len(ae.entries) > 0 && ae.entries[0].ChangesMembership() {
|
||||
ae.returnToPool()
|
||||
return true, nil
|
||||
}
|
||||
ae.returnToPool()
|
||||
}
|
||||
return false, nil
|
||||
}
|
||||
|
||||
func (n *raft) runAsLeader() {
|
||||
if n.State() == Closed {
|
||||
return
|
||||
@@ -2748,22 +2967,6 @@ func (n *raft) runAsLeader() {
|
||||
n.Lock()
|
||||
psubj, rpsubj := n.psubj, n.rpsubj
|
||||
|
||||
// Check if there are any uncommitted membership changes.
|
||||
// If so, we need to make sure we don't propose any new
|
||||
// ones until those are committed.
|
||||
found, err := n.logContainsUncommittedMembershipChange()
|
||||
if err != nil {
|
||||
n.warn("Error while looking for membership changes in WAL: %v", err)
|
||||
n.stepdownLocked(noLeader)
|
||||
n.Unlock()
|
||||
return
|
||||
|
||||
}
|
||||
if found {
|
||||
n.membChanging = true
|
||||
n.debug("Log contains uncommitted membership change")
|
||||
}
|
||||
|
||||
// For forwarded proposals, both normal and remove peer proposals.
|
||||
fsub, err := n.subscribe(psubj, n.handleForwardedProposal)
|
||||
if err != nil {
|
||||
@@ -3138,9 +3341,8 @@ func (n *raft) catchupFollower(ar *appendEntryResponse) {
|
||||
indexUpdates := newIPQueue[uint64](n.s, fmt.Sprintf("[ACC:%s] RAFT '%s' indexUpdates", n.accName, n.group))
|
||||
indexUpdates.push(ae.pindex)
|
||||
n.progress[ar.peer] = indexUpdates
|
||||
n.Unlock()
|
||||
|
||||
n.wg.Add(1)
|
||||
n.Unlock()
|
||||
n.s.startGoRoutine(func() {
|
||||
defer n.wg.Done()
|
||||
n.runCatchup(ar, indexUpdates)
|
||||
@@ -3245,7 +3447,7 @@ func (n *raft) applyCommit(index uint64) error {
|
||||
committed = append(committed, e)
|
||||
|
||||
// We are done with this membership change
|
||||
n.membChanging = false
|
||||
n.membChangeIndex = 0
|
||||
|
||||
case EntryRemovePeer:
|
||||
peer := string(e.Data)
|
||||
@@ -3260,7 +3462,7 @@ func (n *raft) applyCommit(index uint64) error {
|
||||
committed = append(committed, e)
|
||||
|
||||
// We are done with this membership change
|
||||
n.membChanging = false
|
||||
n.membChangeIndex = 0
|
||||
|
||||
// If this is us and we are the leader signal the caller
|
||||
// to attempt to stepdown.
|
||||
@@ -3502,9 +3704,8 @@ func (n *raft) cancelCatchup() {
|
||||
|
||||
if n.catchup != nil && n.catchup.sub != nil {
|
||||
n.unsubscribe(n.catchup.sub)
|
||||
// Send nil entry to signal the upper layers we are done catching up.
|
||||
n.apply.push(nil)
|
||||
}
|
||||
n.cancelCatchupSignal()
|
||||
n.catchup = nil
|
||||
}
|
||||
|
||||
@@ -3531,9 +3732,6 @@ func (n *raft) createCatchup(ae *appendEntry) string {
|
||||
// Cleanup any old ones.
|
||||
if n.catchup != nil && n.catchup.sub != nil {
|
||||
n.unsubscribe(n.catchup.sub)
|
||||
} else {
|
||||
// Signal to the upper layer that the following entries are catchup entries, up until the nil guard.
|
||||
n.apply.push(newCommittedEntry(0, []*Entry{{EntryCatchup, nil}}))
|
||||
}
|
||||
// Snapshot term and index.
|
||||
n.catchup = &catchupState{
|
||||
@@ -3546,10 +3744,28 @@ func (n *raft) createCatchup(ae *appendEntry) string {
|
||||
inbox := n.newCatchupInbox()
|
||||
sub, _ := n.subscribe(inbox, n.handleAppendEntry)
|
||||
n.catchup.sub = sub
|
||||
|
||||
return inbox
|
||||
}
|
||||
|
||||
// Lock should be held.
|
||||
func (n *raft) sendCatchupSignal() {
|
||||
if n.catchup == nil || n.catchup.signal {
|
||||
return
|
||||
}
|
||||
n.catchup.signal = true
|
||||
// Signal to the upper layer that the following entries are catchup entries, up until the nil guard.
|
||||
n.apply.push(newCommittedEntry(0, []*Entry{{EntryCatchup, nil}}))
|
||||
}
|
||||
|
||||
// Lock should be held.
|
||||
func (n *raft) cancelCatchupSignal() {
|
||||
if n.catchup == nil || !n.catchup.signal {
|
||||
return
|
||||
}
|
||||
// Send nil entry to signal the upper layers we are done catching up.
|
||||
n.apply.push(nil)
|
||||
}
|
||||
|
||||
// Truncate our WAL and reset.
|
||||
// Lock should be held.
|
||||
func (n *raft) truncateWAL(term, index uint64) {
|
||||
@@ -3591,9 +3807,6 @@ func (n *raft) truncateWAL(term, index uint64) {
|
||||
if n.applied > n.processed {
|
||||
n.applied = n.processed
|
||||
}
|
||||
if n.papplied > n.applied {
|
||||
n.papplied = n.applied
|
||||
}
|
||||
// Refresh bytes count after truncate.
|
||||
var state StreamState
|
||||
n.wal.FastState(&state)
|
||||
@@ -3607,6 +3820,11 @@ func (n *raft) truncateWAL(term, index uint64) {
|
||||
}
|
||||
// Set after we know we have truncated properly.
|
||||
n.pterm, n.pindex = term, index
|
||||
|
||||
// Check if we're truncating an uncommitted membership change.
|
||||
if n.membChangeIndex > 0 && n.membChangeIndex > index {
|
||||
n.membChangeIndex = 0
|
||||
}
|
||||
}
|
||||
|
||||
// Reset our WAL. This is equivalent to truncating all data from the log.
|
||||
@@ -3633,14 +3851,17 @@ func (n *raft) updateLeader(newLeader string) {
|
||||
}
|
||||
}
|
||||
}
|
||||
// Reset last seen timestamps.
|
||||
// Reset last seen timestamps and indices.
|
||||
// If we are (or were) the leader we track(ed) everyone, and don't reset.
|
||||
// But if we're a follower we only track the leader, and reset all others.
|
||||
if newLeader != n.id && !wasLeader {
|
||||
for peer, ps := range n.peers {
|
||||
// Always reset last replicated index.
|
||||
ps.li = 0
|
||||
if peer == newLeader {
|
||||
continue
|
||||
}
|
||||
// Only reset the last seen timestamp if this peer is not the leader.
|
||||
ps.ts = time.Time{}
|
||||
}
|
||||
}
|
||||
@@ -3900,6 +4121,7 @@ func (n *raft) processAppendEntry(ae *appendEntry, sub *subscription) {
|
||||
}
|
||||
|
||||
// Inherit state from appendEntry with the leader's snapshot.
|
||||
hadPreviousSnapshot := n.snapfile != _EMPTY_
|
||||
n.pindex = ae.pindex
|
||||
n.pterm = ae.pterm
|
||||
n.commit = ae.pindex
|
||||
@@ -3918,8 +4140,18 @@ func (n *raft) processAppendEntry(ae *appendEntry, sub *subscription) {
|
||||
}
|
||||
n.resetInitializing()
|
||||
|
||||
if !hadPreviousSnapshot {
|
||||
// If the first snapshot we install is received from another server, then we immediately signal
|
||||
// to the upper-layer it can coalesce catchup entries.
|
||||
n.sendCatchupSignal()
|
||||
}
|
||||
// Now send snapshot to upper levels. Only send the snapshot, not the peerstate entry.
|
||||
n.apply.push(newCommittedEntry(n.commit, ae.entries[:1]))
|
||||
if hadPreviousSnapshot {
|
||||
// Signal catchup only after we've sent the snapshot. That ensures the upper-layer processes the snapshot
|
||||
// as-is and can only coalesce other catchup entries after this one.
|
||||
n.sendCatchupSignal()
|
||||
}
|
||||
n.Unlock()
|
||||
return
|
||||
}
|
||||
@@ -3975,6 +4207,9 @@ CONTINUE:
|
||||
}
|
||||
}
|
||||
case EntryAddPeer:
|
||||
// When receiving or restoring, mark membership as changing.
|
||||
// Set to the index where this entry was stored (pindex is now this entry's index)
|
||||
n.membChangeIndex = n.pindex
|
||||
if newPeer := string(e.Data); len(newPeer) == idLen {
|
||||
// Track directly, but wait for commit to be official
|
||||
if _, ok := n.peers[newPeer]; !ok {
|
||||
@@ -3983,6 +4218,10 @@ CONTINUE:
|
||||
// Store our peer in our global peer map for all peers.
|
||||
peers.LoadOrStore(newPeer, newPeer)
|
||||
}
|
||||
case EntryRemovePeer:
|
||||
// When receiving or restoring, mark membership as changing.
|
||||
// Set to the index where this entry was stored (pindex is now this entry's index)
|
||||
n.membChangeIndex = n.pindex
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3992,6 +4231,10 @@ CONTINUE:
|
||||
|
||||
// Apply anything we need here.
|
||||
if aeCommit > n.commit {
|
||||
// If we're catching up, we might need to signal that it's okay to potentially coalesce entries from here.
|
||||
if catchingUp {
|
||||
n.sendCatchupSignal()
|
||||
}
|
||||
if n.paused {
|
||||
n.hcommit = aeCommit
|
||||
n.debug("Paused, not applying %d", aeCommit)
|
||||
@@ -4746,7 +4989,6 @@ func (n *raft) switchToFollowerLocked(leader string) {
|
||||
n.leaderState.Store(false)
|
||||
n.leaderSince.Store(nil)
|
||||
n.lxfer = false
|
||||
n.membChanging = false
|
||||
|
||||
// Reset acks, we can't assume acks from a previous term are still valid in another term.
|
||||
if len(n.acks) > 0 {
|
||||
|
||||
+8
-9
@@ -564,21 +564,20 @@ type maxConnOption struct {
|
||||
// below the limit if necessary.
|
||||
func (m *maxConnOption) Apply(server *Server) {
|
||||
server.mu.Lock()
|
||||
var (
|
||||
clients = make([]*client, len(server.clients))
|
||||
i = 0
|
||||
)
|
||||
clients := make([]*client, 0, len(server.clients))
|
||||
// Map iteration is random, which allows us to close random connections.
|
||||
for _, client := range server.clients {
|
||||
clients[i] = client
|
||||
i++
|
||||
if isInternalClient(client.kind) {
|
||||
continue
|
||||
}
|
||||
clients = append(clients, client)
|
||||
}
|
||||
server.mu.Unlock()
|
||||
|
||||
if m.newValue > 0 && len(clients) > m.newValue {
|
||||
if newc := max(0, m.newValue); len(clients) > newc {
|
||||
// Close connections til we are within the limit.
|
||||
var (
|
||||
numClose = len(clients) - m.newValue
|
||||
numClose = len(clients) - newc
|
||||
closed = 0
|
||||
)
|
||||
for _, client := range clients {
|
||||
@@ -1659,7 +1658,7 @@ func (s *Server) diffOptions(newOpts *Options) ([]option, error) {
|
||||
return nil, fmt.Errorf("config reload not supported for jetstream max memory and store")
|
||||
}
|
||||
}
|
||||
case "jetstreammetacompact", "jetstreammetacompactsize":
|
||||
case "jetstreammetacompact", "jetstreammetacompactsize", "jetstreammetacompactsync":
|
||||
// Allowed at runtime but monitorCluster looks at s.opts directly, so no further work needed here.
|
||||
case "websocket":
|
||||
// Similar to gateways
|
||||
|
||||
+3
-6
@@ -181,8 +181,7 @@ func (c *client) processAccountUnsub(arg []byte) {
|
||||
// we have an origin cluster and we force header semantics.
|
||||
func (c *client) processRoutedOriginClusterMsgArgs(arg []byte) error {
|
||||
// Unroll splitArgs to avoid runtime/heap issues
|
||||
a := [MAX_HMSG_ARGS + 1][]byte{}
|
||||
args := a[:0]
|
||||
args := c.argsa[:0]
|
||||
start := -1
|
||||
for i, b := range arg {
|
||||
switch b {
|
||||
@@ -280,8 +279,7 @@ func (c *client) processRoutedOriginClusterMsgArgs(arg []byte) error {
|
||||
// Process an inbound HMSG specification from the remote route.
|
||||
func (c *client) processRoutedHeaderMsgArgs(arg []byte) error {
|
||||
// Unroll splitArgs to avoid runtime/heap issues
|
||||
a := [MAX_HMSG_ARGS][]byte{}
|
||||
args := a[:0]
|
||||
args := c.argsa[:0]
|
||||
var an []byte
|
||||
if c.kind == ROUTER {
|
||||
if an = c.route.accName; len(an) > 0 {
|
||||
@@ -377,8 +375,7 @@ func (c *client) processRoutedHeaderMsgArgs(arg []byte) error {
|
||||
// Process an inbound RMSG or LMSG specification from the remote route.
|
||||
func (c *client) processRoutedMsgArgs(arg []byte) error {
|
||||
// Unroll splitArgs to avoid runtime/heap issues
|
||||
a := [MAX_RMSG_ARGS][]byte{}
|
||||
args := a[:0]
|
||||
args := c.argsa[:0]
|
||||
var an []byte
|
||||
if c.kind == ROUTER {
|
||||
if an = c.route.accName; len(an) > 0 {
|
||||
|
||||
+1
-1
@@ -3377,7 +3377,7 @@ func (s *Server) createClientEx(conn net.Conn, inProcess bool) *client {
|
||||
|
||||
// If there is a max connections specified, check that adding
|
||||
// this new client would not push us over the max
|
||||
if opts.MaxConn > 0 && len(s.clients) >= opts.MaxConn {
|
||||
if opts.MaxConn < 0 || (opts.MaxConn > 0 && len(s.clients) >= opts.MaxConn) {
|
||||
s.mu.Unlock()
|
||||
c.maxConnExceeded()
|
||||
return nil
|
||||
|
||||
+2
@@ -65,6 +65,8 @@ var (
|
||||
ErrCorruptStreamState = errors.New("stream state snapshot is corrupt")
|
||||
// ErrTooManyResults
|
||||
ErrTooManyResults = errors.New("too many matching results for request")
|
||||
// ErrStoreOldUpdate is returned when a consumer update is older than the current state.
|
||||
ErrStoreOldUpdate = errors.New("old update ignored")
|
||||
)
|
||||
|
||||
// StoreMsg is the stored message format for messages that are retained by the Store layer.
|
||||
|
||||
+88
-100
@@ -428,7 +428,8 @@ type stream struct {
|
||||
cisrun atomic.Bool // Indicates one checkInterestState is already running.
|
||||
|
||||
// Mirror
|
||||
mirror *sourceInfo
|
||||
mirror *sourceInfo
|
||||
mirrorConsumerSetup *time.Timer
|
||||
|
||||
// Sources
|
||||
sources map[string]*sourceInfo
|
||||
@@ -619,19 +620,24 @@ const StreamMaxReplicas = 5
|
||||
|
||||
// AddStream adds a stream for the given account.
|
||||
func (a *Account) addStream(config *StreamConfig) (*stream, error) {
|
||||
return a.addStreamWithAssignment(config, nil, nil, false)
|
||||
return a.addStreamWithAssignment(config, nil, nil, false, false)
|
||||
}
|
||||
|
||||
// recoverStream recovers a stream from disk for the given account.
|
||||
func (a *Account) recoverStream(config *StreamConfig) (*stream, error) {
|
||||
return a.addStreamWithAssignment(config, nil, nil, false, true)
|
||||
}
|
||||
|
||||
// AddStreamWithStore adds a stream for the given account with custome store config options.
|
||||
func (a *Account) addStreamWithStore(config *StreamConfig, fsConfig *FileStoreConfig) (*stream, error) {
|
||||
return a.addStreamWithAssignment(config, fsConfig, nil, false)
|
||||
return a.addStreamWithAssignment(config, fsConfig, nil, false, false)
|
||||
}
|
||||
|
||||
func (a *Account) addStreamPedantic(config *StreamConfig, pedantic bool) (*stream, error) {
|
||||
return a.addStreamWithAssignment(config, nil, nil, pedantic)
|
||||
return a.addStreamWithAssignment(config, nil, nil, pedantic, false)
|
||||
}
|
||||
|
||||
func (a *Account) addStreamWithAssignment(config *StreamConfig, fsConfig *FileStoreConfig, sa *streamAssignment, pedantic bool) (*stream, error) {
|
||||
func (a *Account) addStreamWithAssignment(config *StreamConfig, fsConfig *FileStoreConfig, sa *streamAssignment, pedantic, recovering bool) (*stream, error) {
|
||||
s, jsa, err := a.checkForJetStream()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -678,6 +684,7 @@ func (a *Account) addStreamWithAssignment(config *StreamConfig, fsConfig *FileSt
|
||||
}()
|
||||
}
|
||||
|
||||
// Note that isClustered will be false during recovery, even if we're part of a cluster. It shouldn't be used then.
|
||||
js, isClustered := jsa.jetStreamAndClustered()
|
||||
jsa.mu.Lock()
|
||||
if mset, ok := jsa.streams[cfg.Name]; ok {
|
||||
@@ -707,25 +714,30 @@ func (a *Account) addStreamWithAssignment(config *StreamConfig, fsConfig *FileSt
|
||||
jsa.usageMu.RLock()
|
||||
selected, tier, hasTier := jsa.selectLimits(cfg.Replicas)
|
||||
jsa.usageMu.RUnlock()
|
||||
reserved := int64(0)
|
||||
if !isClustered {
|
||||
reserved = jsa.tieredReservation(tier, cfg)
|
||||
}
|
||||
jsa.mu.Unlock()
|
||||
|
||||
if !hasTier {
|
||||
jsa.mu.Unlock()
|
||||
return nil, NewJSNoLimitsError()
|
||||
}
|
||||
js.mu.RLock()
|
||||
if isClustered {
|
||||
_, reserved = tieredStreamAndReservationCount(js.cluster.streams[a.Name], tier, cfg)
|
||||
}
|
||||
if err := js.checkAllLimits(&selected, cfg, reserved, 0); err != nil {
|
||||
|
||||
// Skip if we're recovering.
|
||||
if !recovering {
|
||||
reserved := int64(0)
|
||||
if !isClustered {
|
||||
reserved = jsa.tieredReservation(tier, cfg)
|
||||
}
|
||||
jsa.mu.Unlock()
|
||||
js.mu.RLock()
|
||||
if isClustered {
|
||||
_, reserved = js.tieredStreamAndReservationCount(a.Name, tier, cfg)
|
||||
}
|
||||
if err := js.checkAllLimits(&selected, cfg, reserved, 0); err != nil {
|
||||
js.mu.RUnlock()
|
||||
return nil, err
|
||||
}
|
||||
js.mu.RUnlock()
|
||||
return nil, err
|
||||
jsa.mu.Lock()
|
||||
}
|
||||
js.mu.RUnlock()
|
||||
jsa.mu.Lock()
|
||||
// Check for template ownership if present.
|
||||
if cfg.Template != _EMPTY_ && jsa.account != nil {
|
||||
if !jsa.checkTemplateOwnership(cfg.Template, cfg.Name) {
|
||||
@@ -790,11 +802,6 @@ func (a *Account) addStreamWithAssignment(config *StreamConfig, fsConfig *FileSt
|
||||
return nil, NewJSStreamSubjectOverlapError()
|
||||
}
|
||||
|
||||
if !hasTier {
|
||||
jsa.mu.Unlock()
|
||||
return nil, fmt.Errorf("no applicable tier found")
|
||||
}
|
||||
|
||||
// Setup the internal clients.
|
||||
c := s.createInternalJetStreamClient()
|
||||
ic := s.createInternalJetStreamClient()
|
||||
@@ -1088,8 +1095,12 @@ func (mset *stream) monitorQuitC() <-chan struct{} {
|
||||
if mset == nil {
|
||||
return nil
|
||||
}
|
||||
mset.mu.RLock()
|
||||
defer mset.mu.RUnlock()
|
||||
mset.mu.Lock()
|
||||
defer mset.mu.Unlock()
|
||||
// Recreate if a prior monitor routine was stopped.
|
||||
if mset.mqch == nil {
|
||||
mset.mqch = make(chan struct{})
|
||||
}
|
||||
return mset.mqch
|
||||
}
|
||||
|
||||
@@ -1830,6 +1841,30 @@ func (s *Server) checkStreamCfg(config *StreamConfig, acc *Account, pedantic boo
|
||||
} else {
|
||||
return StreamConfig{}, NewJSSourceDuplicateDetectedError()
|
||||
}
|
||||
|
||||
if src.FilterSubject != _EMPTY_ && len(src.SubjectTransforms) != 0 {
|
||||
return StreamConfig{}, NewJSSourceMultipleFiltersNotAllowedError()
|
||||
}
|
||||
|
||||
for _, tr := range src.SubjectTransforms {
|
||||
if tr.Source != _EMPTY_ && !IsValidSubject(tr.Source) {
|
||||
return StreamConfig{}, NewJSSourceInvalidSubjectFilterError(fmt.Errorf("%w %s", ErrBadSubject, tr.Source))
|
||||
}
|
||||
err := ValidateMapping(tr.Source, tr.Destination)
|
||||
if err != nil {
|
||||
return StreamConfig{}, NewJSSourceInvalidTransformDestinationError(err)
|
||||
}
|
||||
}
|
||||
|
||||
// Check subject filters overlap.
|
||||
for outer, tr := range src.SubjectTransforms {
|
||||
for inner, innertr := range src.SubjectTransforms {
|
||||
if inner != outer && subjectIsSubsetMatch(tr.Source, innertr.Source) {
|
||||
return StreamConfig{}, NewJSSourceOverlappingSubjectFiltersError()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Do not perform checks if External is provided, as it could lead to
|
||||
// checking against itself (if sourced stream name is the same on different JetStream)
|
||||
if src.External == nil {
|
||||
@@ -1842,30 +1877,6 @@ func (s *Server) checkStreamCfg(config *StreamConfig, acc *Account, pedantic boo
|
||||
return StreamConfig{}, NewJSSourceMaxMessageSizeTooBigError()
|
||||
}
|
||||
}
|
||||
|
||||
if src.FilterSubject != _EMPTY_ && len(src.SubjectTransforms) != 0 {
|
||||
return StreamConfig{}, NewJSSourceMultipleFiltersNotAllowedError()
|
||||
}
|
||||
|
||||
for _, tr := range src.SubjectTransforms {
|
||||
if tr.Source != _EMPTY_ && !IsValidSubject(tr.Source) {
|
||||
return StreamConfig{}, NewJSSourceInvalidSubjectFilterError(fmt.Errorf("%w %s", ErrBadSubject, tr.Source))
|
||||
}
|
||||
|
||||
err := ValidateMapping(tr.Source, tr.Destination)
|
||||
if err != nil {
|
||||
return StreamConfig{}, NewJSSourceInvalidTransformDestinationError(err)
|
||||
}
|
||||
}
|
||||
|
||||
// Check subject filters overlap.
|
||||
for outer, tr := range src.SubjectTransforms {
|
||||
for inner, innertr := range src.SubjectTransforms {
|
||||
if inner != outer && subjectIsSubsetMatch(tr.Source, innertr.Source) {
|
||||
return StreamConfig{}, NewJSSourceOverlappingSubjectFiltersError()
|
||||
}
|
||||
}
|
||||
}
|
||||
continue
|
||||
} else {
|
||||
if src.External.DeliverPrefix != _EMPTY_ {
|
||||
@@ -1957,7 +1968,7 @@ func (s *Server) checkStreamCfg(config *StreamConfig, acc *Account, pedantic boo
|
||||
// Check for literal duplication of subject interest in config
|
||||
// and no overlap with any JS or SYS API subject space.
|
||||
dset := make(map[string]struct{}, len(cfg.Subjects))
|
||||
for _, subj := range cfg.Subjects {
|
||||
for i, subj := range cfg.Subjects {
|
||||
// Make sure the subject is valid. Check this first.
|
||||
if !IsValidSubject(subj) {
|
||||
return StreamConfig{}, NewJSStreamInvalidConfigError(fmt.Errorf("invalid subject"))
|
||||
@@ -1991,6 +2002,13 @@ func (s *Server) checkStreamCfg(config *StreamConfig, acc *Account, pedantic boo
|
||||
}
|
||||
}
|
||||
}
|
||||
// Now check if we have multiple subjects that we do not overlap ourselves
|
||||
// which would cause duplicate entries (assuming no MsgID).
|
||||
for _, tsubj := range cfg.Subjects[i+1:] {
|
||||
if SubjectsCollide(tsubj, subj) {
|
||||
return StreamConfig{}, NewJSStreamInvalidConfigError(fmt.Errorf("subject %q overlaps with %q", subj, tsubj))
|
||||
}
|
||||
}
|
||||
// Mark for duplicate check.
|
||||
dset[subj] = struct{}{}
|
||||
}
|
||||
@@ -2008,18 +2026,6 @@ func (s *Server) checkStreamCfg(config *StreamConfig, acc *Account, pedantic boo
|
||||
return StreamConfig{}, NewJSStreamMaxStreamBytesExceededError()
|
||||
}
|
||||
|
||||
// Now check if we have multiple subjects they we do not overlap ourselves
|
||||
// which would cause duplicate entries (assuming no MsgID).
|
||||
if len(cfg.Subjects) > 1 {
|
||||
for _, subj := range cfg.Subjects {
|
||||
for _, tsubj := range cfg.Subjects {
|
||||
if tsubj != subj && SubjectsCollide(tsubj, subj) {
|
||||
return StreamConfig{}, NewJSStreamInvalidConfigError(fmt.Errorf("subject %q overlaps with %q", subj, tsubj))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check the subject transform if any
|
||||
if cfg.SubjectTransform != nil {
|
||||
if cfg.SubjectTransform.Source != _EMPTY_ && !IsValidSubject(cfg.SubjectTransform.Source) {
|
||||
@@ -2110,10 +2116,6 @@ func (jsa *jsAccount) configUpdateCheck(old, new *StreamConfig, s *Server, pedan
|
||||
if cfg.Name != old.Name {
|
||||
return nil, NewJSStreamInvalidConfigError(fmt.Errorf("stream configuration name must match original"))
|
||||
}
|
||||
// Can't change MaxConsumers for now.
|
||||
if cfg.MaxConsumers != old.MaxConsumers {
|
||||
return nil, NewJSStreamInvalidConfigError(fmt.Errorf("stream configuration update can not change MaxConsumers"))
|
||||
}
|
||||
// Can't change storage types.
|
||||
if cfg.Storage != old.Storage {
|
||||
return nil, NewJSStreamInvalidConfigError(fmt.Errorf("stream configuration update can not change storage type"))
|
||||
@@ -2230,7 +2232,7 @@ func (jsa *jsAccount) configUpdateCheck(old, new *StreamConfig, s *Server, pedan
|
||||
js.mu.RLock()
|
||||
defer js.mu.RUnlock()
|
||||
if isClustered {
|
||||
_, reserved = tieredStreamAndReservationCount(js.cluster.streams[acc.Name], tier, &cfg)
|
||||
_, reserved = js.tieredStreamAndReservationCount(acc.Name, tier, &cfg)
|
||||
}
|
||||
// reservation does not account for this stream, hence add the old value
|
||||
if tier == _EMPTY_ && old.Replicas > 1 {
|
||||
@@ -3119,7 +3121,8 @@ func (mset *stream) scheduleSetupMirrorConsumerRetry() {
|
||||
// Add some jitter.
|
||||
next += time.Duration(rand.Intn(int(100*time.Millisecond))) + 100*time.Millisecond
|
||||
|
||||
time.AfterFunc(next, func() {
|
||||
stopAndClearTimer(&mset.mirrorConsumerSetup)
|
||||
mset.mirrorConsumerSetup = time.AfterFunc(next, func() {
|
||||
mset.mu.Lock()
|
||||
mset.setupMirrorConsumer()
|
||||
mset.mu.Unlock()
|
||||
@@ -5795,7 +5798,7 @@ func (mset *stream) processJetStreamMsg(subject, reply string, hdr, msg []byte,
|
||||
mset.ddMu.Unlock()
|
||||
if seq > 0 {
|
||||
if canRespond {
|
||||
response := append(pubAck, strconv.FormatUint(dde.seq, 10)...)
|
||||
response := append(pubAck, strconv.FormatUint(seq, 10)...)
|
||||
response = append(response, ",\"duplicate\": true}"...)
|
||||
outq.sendMsg(reply, response)
|
||||
}
|
||||
@@ -6753,36 +6756,31 @@ type jsPubMsg struct {
|
||||
o *consumer
|
||||
}
|
||||
|
||||
var jsPubMsgPool sync.Pool
|
||||
var jsPubMsgPool = sync.Pool{
|
||||
New: func() any {
|
||||
return &jsPubMsg{}
|
||||
},
|
||||
}
|
||||
|
||||
func newJSPubMsg(dsubj, subj, reply string, hdr, msg []byte, o *consumer, seq uint64) *jsPubMsg {
|
||||
var m *jsPubMsg
|
||||
var buf []byte
|
||||
pm := jsPubMsgPool.Get()
|
||||
if pm != nil {
|
||||
m = pm.(*jsPubMsg)
|
||||
buf = m.buf[:0]
|
||||
if hdr != nil {
|
||||
hdr = append(m.hdr[:0], hdr...)
|
||||
}
|
||||
} else {
|
||||
m = new(jsPubMsg)
|
||||
m := getJSPubMsgFromPool()
|
||||
if m.buf == nil {
|
||||
m.buf = make([]byte, 0, len(hdr)+len(msg))
|
||||
}
|
||||
buf := append(m.buf[:0], hdr...)
|
||||
buf = append(buf, msg...)
|
||||
hdr = buf[:len(hdr):len(hdr)]
|
||||
msg = buf[len(hdr):]
|
||||
// When getting something from a pool it is critical that all fields are
|
||||
// initialized. Doing this way guarantees that if someone adds a field to
|
||||
// the structure, the compiler will fail the build if this line is not updated.
|
||||
(*m) = jsPubMsg{dsubj, reply, StoreMsg{subj, hdr, msg, buf, seq, 0}, o}
|
||||
|
||||
return m
|
||||
}
|
||||
|
||||
// Gets a jsPubMsg from the pool.
|
||||
func getJSPubMsgFromPool() *jsPubMsg {
|
||||
pm := jsPubMsgPool.Get()
|
||||
if pm != nil {
|
||||
return pm.(*jsPubMsg)
|
||||
}
|
||||
return new(jsPubMsg)
|
||||
return jsPubMsgPool.Get().(*jsPubMsg)
|
||||
}
|
||||
|
||||
func (pm *jsPubMsg) returnToPool() {
|
||||
@@ -6793,9 +6791,6 @@ func (pm *jsPubMsg) returnToPool() {
|
||||
if len(pm.buf) > 0 {
|
||||
pm.buf = pm.buf[:0]
|
||||
}
|
||||
if len(pm.hdr) > 0 {
|
||||
pm.hdr = pm.hdr[:0]
|
||||
}
|
||||
jsPubMsgPool.Put(pm)
|
||||
}
|
||||
|
||||
@@ -7691,15 +7686,8 @@ func (mset *stream) ackMsg(o *consumer, seq uint64) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
var shouldRemove bool
|
||||
switch mset.cfg.Retention {
|
||||
case WorkQueuePolicy:
|
||||
// Normally we just remove a message when its ack'd here but if we have direct consumers
|
||||
// from sources and/or mirrors we need to make sure they have delivered the msg.
|
||||
shouldRemove = mset.directs <= 0 || mset.noInterest(seq, o)
|
||||
case InterestPolicy:
|
||||
shouldRemove = mset.noInterest(seq, o)
|
||||
}
|
||||
// If there's no interest left on this message for all consumers, we can remove it.
|
||||
shouldRemove := mset.noInterest(seq, nil)
|
||||
|
||||
// If nothing else to do.
|
||||
if !shouldRemove {
|
||||
@@ -7810,7 +7798,7 @@ func (a *Account) RestoreStream(ncfg *StreamConfig, r io.Reader) (*stream, error
|
||||
if hasTier {
|
||||
if isClustered {
|
||||
js.mu.RLock()
|
||||
_, reserved = tieredStreamAndReservationCount(js.cluster.streams[a.Name], tier, &cfg)
|
||||
_, reserved = js.tieredStreamAndReservationCount(a.Name, tier, &cfg)
|
||||
js.mu.RUnlock()
|
||||
} else {
|
||||
reserved = jsa.tieredReservation(tier, &cfg)
|
||||
|
||||
+42
-12
@@ -124,7 +124,25 @@ func (t *SubjectTree[T]) Match(filter []byte, cb func(subject []byte, val *T)) {
|
||||
var raw [16][]byte
|
||||
parts := genParts(filter, raw[:0])
|
||||
var _pre [256]byte
|
||||
t.match(t.root, parts, _pre[:0], cb)
|
||||
t.match(t.root, parts, _pre[:0], func(subject []byte, val *T) bool {
|
||||
cb(subject, val)
|
||||
return true
|
||||
})
|
||||
}
|
||||
|
||||
// MatchUntil will match against a subject that can have wildcards and invoke
|
||||
// the callback func for each matched value.
|
||||
// Returning false from the callback will stop matching immediately.
|
||||
// Returns true if matching ran to completion, false if callback stopped it early.
|
||||
func (t *SubjectTree[T]) MatchUntil(filter []byte, cb func(subject []byte, val *T) bool) bool {
|
||||
if t == nil || t.root == nil || len(filter) == 0 || cb == nil {
|
||||
return true
|
||||
}
|
||||
// We need to break this up into chunks based on wildcards, either pwc '*' or fwc '>'.
|
||||
var raw [16][]byte
|
||||
parts := genParts(filter, raw[:0])
|
||||
var _pre [256]byte
|
||||
return t.match(t.root, parts, _pre[:0], cb)
|
||||
}
|
||||
|
||||
// IterOrdered will walk all entries in the SubjectTree lexicographically. The callback can return false to terminate the walk.
|
||||
@@ -296,7 +314,8 @@ func (t *SubjectTree[T]) delete(np *node, subject []byte, si int) (*T, bool) {
|
||||
|
||||
// Internal function which can be called recursively to match all leaf nodes to a given filter subject which
|
||||
// once here has been decomposed to parts. These parts only care about wildcards, both pwc and fwc.
|
||||
func (t *SubjectTree[T]) match(n node, parts [][]byte, pre []byte, cb func(subject []byte, val *T)) {
|
||||
// Returns false if the callback requested to stop matching.
|
||||
func (t *SubjectTree[T]) match(n node, parts [][]byte, pre []byte, cb func(subject []byte, val *T) bool) bool {
|
||||
// Capture if we are sitting on a terminal fwc.
|
||||
var hasFWC bool
|
||||
if lp := len(parts); lp > 0 && len(parts[lp-1]) > 0 && parts[lp-1][0] == fwc {
|
||||
@@ -307,15 +326,17 @@ func (t *SubjectTree[T]) match(n node, parts [][]byte, pre []byte, cb func(subje
|
||||
nparts, matched := n.matchParts(parts)
|
||||
// Check if we did not match.
|
||||
if !matched {
|
||||
return
|
||||
return true
|
||||
}
|
||||
// We have matched here. If we are a leaf and have exhausted all parts or he have a FWC fire callback.
|
||||
if n.isLeaf() {
|
||||
if len(nparts) == 0 || (hasFWC && len(nparts) == 1) {
|
||||
ln := n.(*leaf[T])
|
||||
cb(append(pre, ln.suffix...), &ln.value)
|
||||
if !cb(append(pre, ln.suffix...), &ln.value) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return
|
||||
return true
|
||||
}
|
||||
// We have normal nodes here.
|
||||
// We need to append our prefix
|
||||
@@ -343,17 +364,23 @@ func (t *SubjectTree[T]) match(n node, parts [][]byte, pre []byte, cb func(subje
|
||||
if cn.isLeaf() {
|
||||
ln := cn.(*leaf[T])
|
||||
if len(ln.suffix) == 0 {
|
||||
cb(append(pre, ln.suffix...), &ln.value)
|
||||
if !cb(append(pre, ln.suffix...), &ln.value) {
|
||||
return false
|
||||
}
|
||||
} else if hasTermPWC && bytes.IndexByte(ln.suffix, tsep) < 0 {
|
||||
cb(append(pre, ln.suffix...), &ln.value)
|
||||
if !cb(append(pre, ln.suffix...), &ln.value) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
} else if hasTermPWC {
|
||||
// We have terminal pwc so call into match again with the child node.
|
||||
t.match(cn, nparts, pre, cb)
|
||||
if !t.match(cn, nparts, pre, cb) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
}
|
||||
// Return regardless.
|
||||
return
|
||||
return true
|
||||
}
|
||||
// If we are sitting on a terminal fwc, put back and continue.
|
||||
if hasFWC && len(nparts) == 0 {
|
||||
@@ -370,18 +397,21 @@ func (t *SubjectTree[T]) match(n node, parts [][]byte, pre []byte, cb func(subje
|
||||
// to see if we match further down.
|
||||
for _, cn := range n.children() {
|
||||
if cn != nil {
|
||||
t.match(cn, nparts, pre, cb)
|
||||
if !t.match(cn, nparts, pre, cb) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
}
|
||||
return
|
||||
return true
|
||||
}
|
||||
// Here we have normal traversal, so find the next child.
|
||||
nn := n.findChild(p)
|
||||
if nn == nil {
|
||||
return
|
||||
return true
|
||||
}
|
||||
n, parts = *nn, nparts
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// Internal iter function to walk nodes in lexicographical order.
|
||||
|
||||
+5
-4
@@ -1327,8 +1327,9 @@ func SubjectsCollide(subj1, subj2 string) bool {
|
||||
if subj1 == subj2 {
|
||||
return true
|
||||
}
|
||||
toks1 := strings.Split(subj1, tsep)
|
||||
toks2 := strings.Split(subj2, tsep)
|
||||
tsa, tsb := [32]string{}, [32]string{}
|
||||
toks1 := tokenizeSubjectIntoSlice(tsa[:0], subj1)
|
||||
toks2 := tokenizeSubjectIntoSlice(tsb[:0], subj2)
|
||||
pwc1, fwc1 := analyzeTokens(toks1)
|
||||
pwc2, fwc2 := analyzeTokens(toks2)
|
||||
// if both literal just string compare.
|
||||
@@ -1338,9 +1339,9 @@ func SubjectsCollide(subj1, subj2 string) bool {
|
||||
}
|
||||
// So one or both have wildcards. If one is literal than we can do subset matching.
|
||||
if l1 && !l2 {
|
||||
return isSubsetMatch(toks1, subj2)
|
||||
return isSubsetMatchTokenized(toks1, toks2)
|
||||
} else if l2 && !l1 {
|
||||
return isSubsetMatch(toks2, subj1)
|
||||
return isSubsetMatchTokenized(toks2, toks1)
|
||||
}
|
||||
// Both have wildcards.
|
||||
// If they only have partials then the lengths must match.
|
||||
|
||||
+1
-1
@@ -165,7 +165,7 @@ func urlsAreEqual(u1, u2 *url.URL) bool {
|
||||
// e.g. comma(834142) -> 834,142
|
||||
//
|
||||
// This function was copied from the github.com/dustin/go-humanize
|
||||
// package and is Copyright Dustin Sallings <dustin@spy.net>
|
||||
// package (MIT License) and is Copyright Dustin Sallings <dustin@spy.net>
|
||||
func comma(v int64) string {
|
||||
sign := ""
|
||||
|
||||
|
||||
+116
-26
@@ -128,7 +128,7 @@ type srvWebsocket struct {
|
||||
server *http.Server
|
||||
listener net.Listener
|
||||
listenerErr error
|
||||
allowedOrigins map[string]*allowedOrigin // host will be the key
|
||||
allowedOrigins map[string][]*allowedOrigin // host will be the key
|
||||
sameOrigin bool
|
||||
connectURLs []string
|
||||
connectURLsMap refCountedUrlSet
|
||||
@@ -154,7 +154,7 @@ type wsUpgradeResult struct {
|
||||
}
|
||||
|
||||
type wsReadInfo struct {
|
||||
rem int
|
||||
rem uint64
|
||||
fs bool
|
||||
ff bool
|
||||
fc bool
|
||||
@@ -163,31 +163,42 @@ type wsReadInfo struct {
|
||||
mkey [4]byte
|
||||
cbufs [][]byte
|
||||
coff int
|
||||
csz uint64
|
||||
}
|
||||
|
||||
func (r *wsReadInfo) init() {
|
||||
r.fs, r.ff = true, true
|
||||
}
|
||||
|
||||
func (r *wsReadInfo) resetCompressedState() {
|
||||
r.fs = true
|
||||
r.ff = true
|
||||
r.fc = false
|
||||
r.rem = 0
|
||||
r.cbufs = nil
|
||||
r.coff = 0
|
||||
r.csz = 0
|
||||
}
|
||||
|
||||
// Returns a slice containing `needed` bytes from the given buffer `buf`
|
||||
// starting at position `pos`, and possibly read from the given reader `r`.
|
||||
// When bytes are present in `buf`, the `pos` is incremented by the number
|
||||
// of bytes found up to `needed` and the new position is returned. If not
|
||||
// enough bytes are found, the bytes found in `buf` are copied to the returned
|
||||
// slice and the remaning bytes are read from `r`.
|
||||
func wsGet(r io.Reader, buf []byte, pos, needed int) ([]byte, int, error) {
|
||||
avail := len(buf) - pos
|
||||
func wsGet(r io.Reader, buf []byte, pos, needed uint64) ([]byte, uint64, error) {
|
||||
avail := uint64(len(buf)) - pos
|
||||
if avail >= needed {
|
||||
return buf[pos : pos+needed], pos + needed, nil
|
||||
}
|
||||
b := make([]byte, needed)
|
||||
start := copy(b, buf[pos:])
|
||||
start := uint64(copy(b, buf[pos:]))
|
||||
for start != needed {
|
||||
n, err := r.Read(b[start:cap(b)])
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
start += n
|
||||
start += uint64(n)
|
||||
}
|
||||
return b, pos + avail, nil
|
||||
}
|
||||
@@ -210,16 +221,25 @@ func (c *client) wsRead(r *wsReadInfo, ior io.Reader, buf []byte) ([][]byte, err
|
||||
bufs [][]byte
|
||||
tmpBuf []byte
|
||||
err error
|
||||
pos int
|
||||
max = len(buf)
|
||||
pos uint64
|
||||
max = uint64(len(buf))
|
||||
mpay = int(atomic.LoadInt32(&c.mpay))
|
||||
)
|
||||
if mpay <= 0 {
|
||||
mpay = MAX_PAYLOAD_SIZE
|
||||
}
|
||||
for pos != max {
|
||||
if r.fs {
|
||||
b0 := buf[pos]
|
||||
frameType := wsOpCode(b0 & 0xF)
|
||||
final := b0&wsFinalBit != 0
|
||||
compressed := b0&wsRsv1Bit != 0
|
||||
if b0&(wsRsv2Bit|wsRsv3Bit) != 0 {
|
||||
return bufs, c.wsHandleProtocolError("RSV2 and RSV3 must be clear")
|
||||
}
|
||||
if compressed && !c.ws.compress {
|
||||
return bufs, c.wsHandleProtocolError("compressed frame received without negotiated permessage-deflate")
|
||||
}
|
||||
pos++
|
||||
|
||||
tmpBuf, pos, err = wsGet(ior, buf, pos, 1)
|
||||
@@ -235,7 +255,7 @@ func (c *client) wsRead(r *wsReadInfo, ior io.Reader, buf []byte) ([][]byte, err
|
||||
}
|
||||
|
||||
// Store size in case it is < 125
|
||||
r.rem = int(b1 & 0x7F)
|
||||
r.rem = uint64(b1 & 0x7F)
|
||||
|
||||
switch frameType {
|
||||
case wsPingMessage, wsPongMessage, wsCloseMessage:
|
||||
@@ -247,6 +267,9 @@ func (c *client) wsRead(r *wsReadInfo, ior io.Reader, buf []byte) ([][]byte, err
|
||||
if !final {
|
||||
return bufs, c.wsHandleProtocolError("control frame does not have final bit set")
|
||||
}
|
||||
if compressed {
|
||||
return bufs, c.wsHandleProtocolError("control frame must not be compressed")
|
||||
}
|
||||
case wsTextMessage, wsBinaryMessage:
|
||||
if !r.ff {
|
||||
return bufs, c.wsHandleProtocolError("new message started before final frame for previous message was received")
|
||||
@@ -269,13 +292,15 @@ func (c *client) wsRead(r *wsReadInfo, ior io.Reader, buf []byte) ([][]byte, err
|
||||
if err != nil {
|
||||
return bufs, err
|
||||
}
|
||||
r.rem = int(binary.BigEndian.Uint16(tmpBuf))
|
||||
r.rem = uint64(binary.BigEndian.Uint16(tmpBuf))
|
||||
case 127:
|
||||
tmpBuf, pos, err = wsGet(ior, buf, pos, 8)
|
||||
if err != nil {
|
||||
return bufs, err
|
||||
}
|
||||
r.rem = int(binary.BigEndian.Uint64(tmpBuf))
|
||||
if r.rem = binary.BigEndian.Uint64(tmpBuf); r.rem&(uint64(1)<<63) != 0 {
|
||||
return bufs, c.wsHandleProtocolError("invalid 64-bit payload length")
|
||||
}
|
||||
}
|
||||
|
||||
if r.mask {
|
||||
@@ -302,7 +327,7 @@ func (c *client) wsRead(r *wsReadInfo, ior io.Reader, buf []byte) ([][]byte, err
|
||||
}
|
||||
if pos < max {
|
||||
var b []byte
|
||||
var n int
|
||||
var n uint64
|
||||
|
||||
n = r.rem
|
||||
if pos+n > max {
|
||||
@@ -320,14 +345,20 @@ func (c *client) wsRead(r *wsReadInfo, ior io.Reader, buf []byte) ([][]byte, err
|
||||
if r.fc {
|
||||
// Assume that we may have continuation frames or not the full payload.
|
||||
addToBufs = false
|
||||
if r.csz+uint64(len(b)) > uint64(mpay) {
|
||||
r.resetCompressedState()
|
||||
return bufs, ErrMaxPayload
|
||||
}
|
||||
// Make a copy of the buffer before adding it to the list
|
||||
// of compressed fragments.
|
||||
r.cbufs = append(r.cbufs, append([]byte(nil), b...))
|
||||
r.csz += uint64(len(b))
|
||||
// When we have the final frame and we have read the full payload,
|
||||
// we can decompress it.
|
||||
if r.ff && r.rem == 0 {
|
||||
b, err = r.decompress(mpay)
|
||||
if err != nil {
|
||||
r.resetCompressedState()
|
||||
return bufs, err
|
||||
}
|
||||
r.fc = false
|
||||
@@ -391,6 +422,9 @@ func (r *wsReadInfo) nextCBuf() []byte {
|
||||
}
|
||||
|
||||
func (r *wsReadInfo) ReadByte() (byte, error) {
|
||||
for len(r.cbufs) > 0 && len(r.cbufs[0]) == 0 {
|
||||
r.nextCBuf()
|
||||
}
|
||||
if len(r.cbufs) == 0 {
|
||||
return 0, io.EOF
|
||||
}
|
||||
@@ -436,13 +470,15 @@ func (r *wsReadInfo) decompress(mpay int) ([]byte, error) {
|
||||
decompressorPool.Put(d)
|
||||
// Now reset the compressed buffers list.
|
||||
r.cbufs = nil
|
||||
r.coff = 0
|
||||
r.csz = 0
|
||||
return b, err
|
||||
}
|
||||
|
||||
// Handles the PING, PONG and CLOSE websocket control frames.
|
||||
//
|
||||
// Client lock MUST NOT be held on entry.
|
||||
func (c *client) wsHandleControlFrame(r *wsReadInfo, frameType wsOpCode, nc io.Reader, buf []byte, pos int) (int, error) {
|
||||
func (c *client) wsHandleControlFrame(r *wsReadInfo, frameType wsOpCode, nc io.Reader, buf []byte, pos uint64) (uint64, error) {
|
||||
var payload []byte
|
||||
var err error
|
||||
|
||||
@@ -461,6 +497,9 @@ func (c *client) wsHandleControlFrame(r *wsReadInfo, frameType wsOpCode, nc io.R
|
||||
status := wsCloseStatusNoStatusReceived
|
||||
var body string
|
||||
lp := len(payload)
|
||||
if lp == 1 {
|
||||
return pos, c.wsHandleProtocolError("close frame payload cannot be 1 byte")
|
||||
}
|
||||
// If there is a payload, the status is represented as a 2-byte
|
||||
// unsigned integer (in network byte order). Then, there may be an
|
||||
// optional body.
|
||||
@@ -468,6 +507,9 @@ func (c *client) wsHandleControlFrame(r *wsReadInfo, frameType wsOpCode, nc io.R
|
||||
if hasStatus {
|
||||
// Decode the status
|
||||
status = int(binary.BigEndian.Uint16(payload[:wsCloseSatusSize]))
|
||||
if !wsIsValidCloseStatus(status) {
|
||||
return pos, c.wsHandleProtocolError(fmt.Sprintf("invalid close status code %v", status))
|
||||
}
|
||||
// Now if there is a body, capture it and make sure this is a valid UTF-8.
|
||||
if hasBody {
|
||||
body = string(payload[wsCloseSatusSize:])
|
||||
@@ -704,6 +746,21 @@ func (c *client) wsHandleProtocolError(message string) error {
|
||||
return errors.New(message)
|
||||
}
|
||||
|
||||
func wsIsValidCloseStatus(code int) bool {
|
||||
switch code {
|
||||
case wsCloseStatusNoStatusReceived, 1004, 1006, wsCloseStatusTLSHandshake:
|
||||
return false
|
||||
}
|
||||
if code < 1000 || code >= 5000 {
|
||||
return false
|
||||
}
|
||||
// 1016-2999 are currently reserved.
|
||||
if code >= 1016 && code <= 2999 {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// Create a close message with the given `status` and `body`.
|
||||
// If the `body` is more than the maximum allows control frame payload size,
|
||||
// it is truncated and "..." is added at the end (as a hint that message
|
||||
@@ -763,6 +820,10 @@ func (s *Server) wsUpgrade(w http.ResponseWriter, r *http.Request) (*wsUpgradeRe
|
||||
if key == _EMPTY_ {
|
||||
return nil, wsReturnHTTPError(w, r, http.StatusBadRequest, "key missing")
|
||||
}
|
||||
decoded, err := base64.StdEncoding.DecodeString(key)
|
||||
if err != nil || len(decoded) != 16 {
|
||||
return nil, wsReturnHTTPError(w, r, http.StatusBadRequest, "invalid websocket key")
|
||||
}
|
||||
// Point 6.
|
||||
if !wsHeaderContains(r.Header, "Sec-Websocket-Version", "13") {
|
||||
return nil, wsReturnHTTPError(w, r, http.StatusBadRequest, "invalid version")
|
||||
@@ -784,7 +845,10 @@ func (s *Server) wsUpgrade(w http.ResponseWriter, r *http.Request) (*wsUpgradeRe
|
||||
// We will do masking if asked (unless we reject for tests)
|
||||
noMasking := r.Header.Get(wsNoMaskingHeader) == wsNoMaskingValue && !wsTestRejectNoMasking
|
||||
|
||||
h := w.(http.Hijacker)
|
||||
h, ok := w.(http.Hijacker)
|
||||
if !ok {
|
||||
return nil, wsReturnHTTPError(w, r, http.StatusBadRequest, "websocket upgrade not supported")
|
||||
}
|
||||
conn, brw, err := h.Hijack()
|
||||
if err != nil {
|
||||
if conn != nil {
|
||||
@@ -832,9 +896,11 @@ func (s *Server) wsUpgrade(w http.ResponseWriter, r *http.Request) (*wsUpgradeRe
|
||||
|
||||
// Check for X-Forwarded-For header
|
||||
if cips, ok := r.Header[wsXForwardedForHeader]; ok {
|
||||
cip := cips[0]
|
||||
if net.ParseIP(cip) != nil {
|
||||
ws.clientIP = cip
|
||||
if len(cips) > 0 {
|
||||
cip := cips[0]
|
||||
if net.ParseIP(cip) != nil {
|
||||
ws.clientIP = cip
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -964,7 +1030,11 @@ func (w *srvWebsocket) checkOrigin(r *http.Request) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if oh != rh || op != rp {
|
||||
rs := "http"
|
||||
if r.TLS != nil {
|
||||
rs = "https"
|
||||
}
|
||||
if oh != rh || op != rp || !strings.EqualFold(u.Scheme, rs) {
|
||||
return errors.New("not same origin")
|
||||
}
|
||||
// I guess it is possible to have cases where one wants to check
|
||||
@@ -973,9 +1043,16 @@ func (w *srvWebsocket) checkOrigin(r *http.Request) error {
|
||||
}
|
||||
if !listEmpty {
|
||||
w.mu.RLock()
|
||||
ao := w.allowedOrigins[oh]
|
||||
origins := w.allowedOrigins[oh]
|
||||
w.mu.RUnlock()
|
||||
if ao == nil || u.Scheme != ao.scheme || op != ao.port {
|
||||
var allowed bool
|
||||
for _, ao := range origins {
|
||||
if u.Scheme == ao.scheme && op == ao.port {
|
||||
allowed = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !allowed {
|
||||
return errors.New("not in the allowed list")
|
||||
}
|
||||
}
|
||||
@@ -1029,7 +1106,17 @@ func validateWebsocketOptions(o *Options) error {
|
||||
}
|
||||
// Make sure that allowed origins, if specified, can be parsed.
|
||||
for _, ao := range wo.AllowedOrigins {
|
||||
if _, err := url.Parse(ao); err != nil {
|
||||
u, err := url.ParseRequestURI(ao)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to parse allowed origin: %v", err)
|
||||
}
|
||||
if u.Scheme != "http" && u.Scheme != "https" {
|
||||
return fmt.Errorf("unable to parse allowed origin %q: allowed origins must be absolute URLs with http or https scheme", ao)
|
||||
}
|
||||
if u.Host == _EMPTY_ {
|
||||
return fmt.Errorf("unable to parse allowed origin %q: host is required", ao)
|
||||
}
|
||||
if _, _, err := wsGetHostAndPort(u.Scheme == "https", u.Host); err != nil {
|
||||
return fmt.Errorf("unable to parse allowed origin: %v", err)
|
||||
}
|
||||
}
|
||||
@@ -1101,9 +1188,9 @@ func (s *Server) wsSetOriginOptions(o *WebsocketOpts) {
|
||||
}
|
||||
h, p, _ := wsGetHostAndPort(u.Scheme == "https", u.Host)
|
||||
if ws.allowedOrigins == nil {
|
||||
ws.allowedOrigins = make(map[string]*allowedOrigin, len(o.AllowedOrigins))
|
||||
ws.allowedOrigins = make(map[string][]*allowedOrigin, len(o.AllowedOrigins))
|
||||
}
|
||||
ws.allowedOrigins[h] = &allowedOrigin{scheme: u.Scheme, port: p}
|
||||
ws.allowedOrigins[h] = append(ws.allowedOrigins[h], &allowedOrigin{scheme: u.Scheme, port: p})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1324,7 +1411,7 @@ func (s *Server) createWSClient(conn net.Conn, ws *websocket) *client {
|
||||
return c
|
||||
}
|
||||
|
||||
if opts.MaxConn > 0 && len(s.clients) >= opts.MaxConn {
|
||||
if opts.MaxConn < 0 || (opts.MaxConn > 0 && len(s.clients) >= opts.MaxConn) {
|
||||
s.mu.Unlock()
|
||||
c.maxConnExceeded()
|
||||
return nil
|
||||
@@ -1402,7 +1489,7 @@ func (c *client) wsCollapsePtoNB() (net.Buffers, int64) {
|
||||
cp.Reset(buf)
|
||||
}
|
||||
var csz int
|
||||
for _, b := range nb {
|
||||
for i, b := range nb {
|
||||
for len(b) > 0 {
|
||||
n, err := cp.Write(b)
|
||||
if err != nil {
|
||||
@@ -1414,7 +1501,10 @@ func (c *client) wsCollapsePtoNB() (net.Buffers, int64) {
|
||||
}
|
||||
b = b[n:]
|
||||
}
|
||||
nbPoolPut(b) // No longer needed as contents written to compressor.
|
||||
// Use original slice since capacity will change to zero
|
||||
// in the loop after consuming the buffer, which will make
|
||||
// nbPoolPut discard it.
|
||||
nbPoolPut(nb[i])
|
||||
}
|
||||
if err := cp.Flush(); err != nil {
|
||||
c.Errorf("Error during compression: %v", err)
|
||||
|
||||
+4
-2
@@ -1,3 +1,4 @@
|
||||
version: 2
|
||||
project_name: nkeys
|
||||
release:
|
||||
github:
|
||||
@@ -41,7 +42,8 @@ archives:
|
||||
- name_template: '{{ .ProjectName }}-v{{ .Version }}-{{ .Os }}-{{ .Arch }}{{ if .Arm
|
||||
}}v{{ .Arm }}{{ end }}'
|
||||
wrap_in_directory: true
|
||||
format: zip
|
||||
formats:
|
||||
- zip
|
||||
files:
|
||||
- README.md
|
||||
- LICENSE
|
||||
@@ -50,7 +52,7 @@ checksum:
|
||||
name_template: '{{ .ProjectName }}-v{{ .Version }}-checksums.txt'
|
||||
|
||||
snapshot:
|
||||
name_template: 'dev'
|
||||
version_template: dev
|
||||
|
||||
nfpms:
|
||||
- file_name_template: '{{ .ProjectName }}-v{{ .Version }}-{{ .Arch }}{{ if .Arm
|
||||
|
||||
Reference in New Issue
Block a user