From f0b304d49237c7a9f3658083fd29309bd8335d72 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 28 Apr 2025 16:10:02 +0000 Subject: [PATCH] build(deps): bump github.com/gabriel-vasile/mimetype from 1.4.8 to 1.4.9 Bumps [github.com/gabriel-vasile/mimetype](https://github.com/gabriel-vasile/mimetype) from 1.4.8 to 1.4.9. - [Release notes](https://github.com/gabriel-vasile/mimetype/releases) - [Commits](https://github.com/gabriel-vasile/mimetype/compare/v1.4.8...v1.4.9) --- updated-dependencies: - dependency-name: github.com/gabriel-vasile/mimetype dependency-version: 1.4.9 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- go.mod | 2 +- go.sum | 4 +- .../gabriel-vasile/mimetype/.golangci.yml | 5 + .../mimetype/internal/json/json.go | 567 ------------------ .../mimetype/internal/json/parser.go | 464 ++++++++++++++ .../mimetype/internal/magic/archive.go | 2 +- .../mimetype/internal/magic/binary.go | 6 +- .../mimetype/internal/magic/geo.go | 14 +- .../mimetype/internal/magic/text.go | 164 ++--- .../mimetype/supported_mimes.md | 3 +- .../gabriel-vasile/mimetype/tree.go | 5 +- vendor/modules.txt | 4 +- 12 files changed, 536 insertions(+), 704 deletions(-) create mode 100644 vendor/github.com/gabriel-vasile/mimetype/.golangci.yml delete mode 100644 vendor/github.com/gabriel-vasile/mimetype/internal/json/json.go create mode 100644 vendor/github.com/gabriel-vasile/mimetype/internal/json/parser.go diff --git a/go.mod b/go.mod index 386941c72f..7e5d758b70 100644 --- a/go.mod +++ b/go.mod @@ -19,7 +19,7 @@ require ( github.com/dhowden/tag v0.0.0-20240417053706-3d75831295e8 github.com/dutchcoders/go-clamd v0.0.0-20170520113014-b970184f4d9e github.com/egirna/icap-client v0.1.1 - github.com/gabriel-vasile/mimetype v1.4.8 + github.com/gabriel-vasile/mimetype v1.4.9 github.com/ggwhite/go-masker v1.1.0 github.com/go-chi/chi/v5 v5.2.1 github.com/go-chi/render v1.0.3 diff --git a/go.sum b/go.sum index 2cab5801a2..397d83a4e8 100644 --- a/go.sum +++ b/go.sum @@ -317,8 +317,8 @@ github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMo github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= github.com/fsnotify/fsnotify v1.8.0 h1:dAwr6QBTBZIkG8roQaJjGof0pp0EeF+tNV7YBP3F/8M= github.com/fsnotify/fsnotify v1.8.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0= -github.com/gabriel-vasile/mimetype v1.4.8 h1:FfZ3gj38NjllZIeJAmMhr+qKL8Wu+nOoI3GqacKw1NM= -github.com/gabriel-vasile/mimetype v1.4.8/go.mod h1:ByKUIKGjh1ODkGM1asKUbQZOLGrPjydw3hYPU2YU9t8= +github.com/gabriel-vasile/mimetype v1.4.9 h1:5k+WDwEsD9eTLL8Tz3L0VnmVh9QxGjRmjBvAG7U/oYY= +github.com/gabriel-vasile/mimetype v1.4.9/go.mod h1:WnSQhFKJuBlRyLiKohA/2DtIlPFAbguNaG7QCHcyGok= github.com/gdexlab/go-render v1.0.1 h1:rxqB3vo5s4n1kF0ySmoNeSPRYkEsyHgln4jFIQY7v0U= github.com/gdexlab/go-render v1.0.1/go.mod h1:wRi5nW2qfjiGj4mPukH4UV0IknS1cHD4VgFTmJX5JzM= github.com/getkin/kin-openapi v0.13.0/go.mod h1:WGRs2ZMM1Q8LR1QBEwUxC6RJEfaBcD0s+pcEVXFuAjw= diff --git a/vendor/github.com/gabriel-vasile/mimetype/.golangci.yml b/vendor/github.com/gabriel-vasile/mimetype/.golangci.yml new file mode 100644 index 0000000000..f2058ccc57 --- /dev/null +++ b/vendor/github.com/gabriel-vasile/mimetype/.golangci.yml @@ -0,0 +1,5 @@ +version: "2" +linters: + exclusions: + presets: + - std-error-handling diff --git a/vendor/github.com/gabriel-vasile/mimetype/internal/json/json.go b/vendor/github.com/gabriel-vasile/mimetype/internal/json/json.go deleted file mode 100644 index 5b2ecee443..0000000000 --- a/vendor/github.com/gabriel-vasile/mimetype/internal/json/json.go +++ /dev/null @@ -1,567 +0,0 @@ -// Copyright (c) 2009 The Go Authors. All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -// Package json provides a JSON value parser state machine. -// This package is almost entirely copied from the Go stdlib. -// Changes made to it permit users of the package to tell -// if some slice of bytes is a valid beginning of a json string. -package json - -import ( - "fmt" - "sync" -) - -type ( - scanStatus int -) - -const ( - parseObjectKey = iota // parsing object key (before colon) - parseObjectValue // parsing object value (after colon) - parseArrayValue // parsing array value - - scanContinue scanStatus = iota // uninteresting byte - scanBeginLiteral // end implied by next result != scanContinue - scanBeginObject // begin object - scanObjectKey // just finished object key (string) - scanObjectValue // just finished non-last object value - scanEndObject // end object (implies scanObjectValue if possible) - scanBeginArray // begin array - scanArrayValue // just finished array value - scanEndArray // end array (implies scanArrayValue if possible) - scanSkipSpace // space byte; can skip; known to be last "continue" result - scanEnd // top-level value ended *before* this byte; known to be first "stop" result - scanError // hit an error, scanner.err. - - // This limits the max nesting depth to prevent stack overflow. - // This is permitted by https://tools.ietf.org/html/rfc7159#section-9 - maxNestingDepth = 10000 -) - -type ( - scanner struct { - step func(*scanner, byte) scanStatus - parseState []int - endTop bool - err error - index int - } -) - -var scannerPool = sync.Pool{ - New: func() any { - return &scanner{} - }, -} - -func newScanner() *scanner { - s := scannerPool.Get().(*scanner) - s.reset() - return s -} - -func freeScanner(s *scanner) { - // Avoid hanging on to too much memory in extreme cases. - if len(s.parseState) > 1024 { - s.parseState = nil - } - scannerPool.Put(s) -} - -// Scan returns the number of bytes scanned and if there was any error -// in trying to reach the end of data. -func Scan(data []byte) (int, error) { - s := newScanner() - defer freeScanner(s) - _ = checkValid(data, s) - return s.index, s.err -} - -// checkValid verifies that data is valid JSON-encoded data. -// scan is passed in for use by checkValid to avoid an allocation. -func checkValid(data []byte, scan *scanner) error { - for _, c := range data { - scan.index++ - if scan.step(scan, c) == scanError { - return scan.err - } - } - if scan.eof() == scanError { - return scan.err - } - return nil -} - -func isSpace(c byte) bool { - return c == ' ' || c == '\t' || c == '\r' || c == '\n' -} - -func (s *scanner) reset() { - s.step = stateBeginValue - s.parseState = s.parseState[0:0] - s.err = nil - s.endTop = false - s.index = 0 -} - -// eof tells the scanner that the end of input has been reached. -// It returns a scan status just as s.step does. -func (s *scanner) eof() scanStatus { - if s.err != nil { - return scanError - } - if s.endTop { - return scanEnd - } - s.step(s, ' ') - if s.endTop { - return scanEnd - } - if s.err == nil { - s.err = fmt.Errorf("unexpected end of JSON input") - } - return scanError -} - -// pushParseState pushes a new parse state p onto the parse stack. -// an error state is returned if maxNestingDepth was exceeded, otherwise successState is returned. -func (s *scanner) pushParseState(c byte, newParseState int, successState scanStatus) scanStatus { - s.parseState = append(s.parseState, newParseState) - if len(s.parseState) <= maxNestingDepth { - return successState - } - return s.error(c, "exceeded max depth") -} - -// popParseState pops a parse state (already obtained) off the stack -// and updates s.step accordingly. -func (s *scanner) popParseState() { - n := len(s.parseState) - 1 - s.parseState = s.parseState[0:n] - if n == 0 { - s.step = stateEndTop - s.endTop = true - } else { - s.step = stateEndValue - } -} - -// stateBeginValueOrEmpty is the state after reading `[`. -func stateBeginValueOrEmpty(s *scanner, c byte) scanStatus { - if c <= ' ' && isSpace(c) { - return scanSkipSpace - } - if c == ']' { - return stateEndValue(s, c) - } - return stateBeginValue(s, c) -} - -// stateBeginValue is the state at the beginning of the input. -func stateBeginValue(s *scanner, c byte) scanStatus { - if c <= ' ' && isSpace(c) { - return scanSkipSpace - } - switch c { - case '{': - s.step = stateBeginStringOrEmpty - return s.pushParseState(c, parseObjectKey, scanBeginObject) - case '[': - s.step = stateBeginValueOrEmpty - return s.pushParseState(c, parseArrayValue, scanBeginArray) - case '"': - s.step = stateInString - return scanBeginLiteral - case '-': - s.step = stateNeg - return scanBeginLiteral - case '0': // beginning of 0.123 - s.step = state0 - return scanBeginLiteral - case 't': // beginning of true - s.step = stateT - return scanBeginLiteral - case 'f': // beginning of false - s.step = stateF - return scanBeginLiteral - case 'n': // beginning of null - s.step = stateN - return scanBeginLiteral - } - if '1' <= c && c <= '9' { // beginning of 1234.5 - s.step = state1 - return scanBeginLiteral - } - return s.error(c, "looking for beginning of value") -} - -// stateBeginStringOrEmpty is the state after reading `{`. -func stateBeginStringOrEmpty(s *scanner, c byte) scanStatus { - if c <= ' ' && isSpace(c) { - return scanSkipSpace - } - if c == '}' { - n := len(s.parseState) - s.parseState[n-1] = parseObjectValue - return stateEndValue(s, c) - } - return stateBeginString(s, c) -} - -// stateBeginString is the state after reading `{"key": value,`. -func stateBeginString(s *scanner, c byte) scanStatus { - if c <= ' ' && isSpace(c) { - return scanSkipSpace - } - if c == '"' { - s.step = stateInString - return scanBeginLiteral - } - return s.error(c, "looking for beginning of object key string") -} - -// stateEndValue is the state after completing a value, -// such as after reading `{}` or `true` or `["x"`. -func stateEndValue(s *scanner, c byte) scanStatus { - n := len(s.parseState) - if n == 0 { - // Completed top-level before the current byte. - s.step = stateEndTop - s.endTop = true - return stateEndTop(s, c) - } - if c <= ' ' && isSpace(c) { - s.step = stateEndValue - return scanSkipSpace - } - ps := s.parseState[n-1] - switch ps { - case parseObjectKey: - if c == ':' { - s.parseState[n-1] = parseObjectValue - s.step = stateBeginValue - return scanObjectKey - } - return s.error(c, "after object key") - case parseObjectValue: - if c == ',' { - s.parseState[n-1] = parseObjectKey - s.step = stateBeginString - return scanObjectValue - } - if c == '}' { - s.popParseState() - return scanEndObject - } - return s.error(c, "after object key:value pair") - case parseArrayValue: - if c == ',' { - s.step = stateBeginValue - return scanArrayValue - } - if c == ']' { - s.popParseState() - return scanEndArray - } - return s.error(c, "after array element") - } - return s.error(c, "") -} - -// stateEndTop is the state after finishing the top-level value, -// such as after reading `{}` or `[1,2,3]`. -// Only space characters should be seen now. -func stateEndTop(s *scanner, c byte) scanStatus { - if c != ' ' && c != '\t' && c != '\r' && c != '\n' { - // Complain about non-space byte on next call. - s.error(c, "after top-level value") - } - return scanEnd -} - -// stateInString is the state after reading `"`. -func stateInString(s *scanner, c byte) scanStatus { - if c == '"' { - s.step = stateEndValue - return scanContinue - } - if c == '\\' { - s.step = stateInStringEsc - return scanContinue - } - if c < 0x20 { - return s.error(c, "in string literal") - } - return scanContinue -} - -// stateInStringEsc is the state after reading `"\` during a quoted string. -func stateInStringEsc(s *scanner, c byte) scanStatus { - switch c { - case 'b', 'f', 'n', 'r', 't', '\\', '/', '"': - s.step = stateInString - return scanContinue - case 'u': - s.step = stateInStringEscU - return scanContinue - } - return s.error(c, "in string escape code") -} - -// stateInStringEscU is the state after reading `"\u` during a quoted string. -func stateInStringEscU(s *scanner, c byte) scanStatus { - if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' { - s.step = stateInStringEscU1 - return scanContinue - } - // numbers - return s.error(c, "in \\u hexadecimal character escape") -} - -// stateInStringEscU1 is the state after reading `"\u1` during a quoted string. -func stateInStringEscU1(s *scanner, c byte) scanStatus { - if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' { - s.step = stateInStringEscU12 - return scanContinue - } - // numbers - return s.error(c, "in \\u hexadecimal character escape") -} - -// stateInStringEscU12 is the state after reading `"\u12` during a quoted string. -func stateInStringEscU12(s *scanner, c byte) scanStatus { - if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' { - s.step = stateInStringEscU123 - return scanContinue - } - // numbers - return s.error(c, "in \\u hexadecimal character escape") -} - -// stateInStringEscU123 is the state after reading `"\u123` during a quoted string. -func stateInStringEscU123(s *scanner, c byte) scanStatus { - if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' { - s.step = stateInString - return scanContinue - } - // numbers - return s.error(c, "in \\u hexadecimal character escape") -} - -// stateNeg is the state after reading `-` during a number. -func stateNeg(s *scanner, c byte) scanStatus { - if c == '0' { - s.step = state0 - return scanContinue - } - if '1' <= c && c <= '9' { - s.step = state1 - return scanContinue - } - return s.error(c, "in numeric literal") -} - -// state1 is the state after reading a non-zero integer during a number, -// such as after reading `1` or `100` but not `0`. -func state1(s *scanner, c byte) scanStatus { - if '0' <= c && c <= '9' { - s.step = state1 - return scanContinue - } - return state0(s, c) -} - -// state0 is the state after reading `0` during a number. -func state0(s *scanner, c byte) scanStatus { - if c == '.' { - s.step = stateDot - return scanContinue - } - if c == 'e' || c == 'E' { - s.step = stateE - return scanContinue - } - return stateEndValue(s, c) -} - -// stateDot is the state after reading the integer and decimal point in a number, -// such as after reading `1.`. -func stateDot(s *scanner, c byte) scanStatus { - if '0' <= c && c <= '9' { - s.step = stateDot0 - return scanContinue - } - return s.error(c, "after decimal point in numeric literal") -} - -// stateDot0 is the state after reading the integer, decimal point, and subsequent -// digits of a number, such as after reading `3.14`. -func stateDot0(s *scanner, c byte) scanStatus { - if '0' <= c && c <= '9' { - return scanContinue - } - if c == 'e' || c == 'E' { - s.step = stateE - return scanContinue - } - return stateEndValue(s, c) -} - -// stateE is the state after reading the mantissa and e in a number, -// such as after reading `314e` or `0.314e`. -func stateE(s *scanner, c byte) scanStatus { - if c == '+' || c == '-' { - s.step = stateESign - return scanContinue - } - return stateESign(s, c) -} - -// stateESign is the state after reading the mantissa, e, and sign in a number, -// such as after reading `314e-` or `0.314e+`. -func stateESign(s *scanner, c byte) scanStatus { - if '0' <= c && c <= '9' { - s.step = stateE0 - return scanContinue - } - return s.error(c, "in exponent of numeric literal") -} - -// stateE0 is the state after reading the mantissa, e, optional sign, -// and at least one digit of the exponent in a number, -// such as after reading `314e-2` or `0.314e+1` or `3.14e0`. -func stateE0(s *scanner, c byte) scanStatus { - if '0' <= c && c <= '9' { - return scanContinue - } - return stateEndValue(s, c) -} - -// stateT is the state after reading `t`. -func stateT(s *scanner, c byte) scanStatus { - if c == 'r' { - s.step = stateTr - return scanContinue - } - return s.error(c, "in literal true (expecting 'r')") -} - -// stateTr is the state after reading `tr`. -func stateTr(s *scanner, c byte) scanStatus { - if c == 'u' { - s.step = stateTru - return scanContinue - } - return s.error(c, "in literal true (expecting 'u')") -} - -// stateTru is the state after reading `tru`. -func stateTru(s *scanner, c byte) scanStatus { - if c == 'e' { - s.step = stateEndValue - return scanContinue - } - return s.error(c, "in literal true (expecting 'e')") -} - -// stateF is the state after reading `f`. -func stateF(s *scanner, c byte) scanStatus { - if c == 'a' { - s.step = stateFa - return scanContinue - } - return s.error(c, "in literal false (expecting 'a')") -} - -// stateFa is the state after reading `fa`. -func stateFa(s *scanner, c byte) scanStatus { - if c == 'l' { - s.step = stateFal - return scanContinue - } - return s.error(c, "in literal false (expecting 'l')") -} - -// stateFal is the state after reading `fal`. -func stateFal(s *scanner, c byte) scanStatus { - if c == 's' { - s.step = stateFals - return scanContinue - } - return s.error(c, "in literal false (expecting 's')") -} - -// stateFals is the state after reading `fals`. -func stateFals(s *scanner, c byte) scanStatus { - if c == 'e' { - s.step = stateEndValue - return scanContinue - } - return s.error(c, "in literal false (expecting 'e')") -} - -// stateN is the state after reading `n`. -func stateN(s *scanner, c byte) scanStatus { - if c == 'u' { - s.step = stateNu - return scanContinue - } - return s.error(c, "in literal null (expecting 'u')") -} - -// stateNu is the state after reading `nu`. -func stateNu(s *scanner, c byte) scanStatus { - if c == 'l' { - s.step = stateNul - return scanContinue - } - return s.error(c, "in literal null (expecting 'l')") -} - -// stateNul is the state after reading `nul`. -func stateNul(s *scanner, c byte) scanStatus { - if c == 'l' { - s.step = stateEndValue - return scanContinue - } - return s.error(c, "in literal null (expecting 'l')") -} - -// stateError is the state after reaching a syntax error, -// such as after reading `[1}` or `5.1.2`. -func stateError(s *scanner, c byte) scanStatus { - return scanError -} - -// error records an error and switches to the error state. -func (s *scanner) error(c byte, context string) scanStatus { - s.step = stateError - s.err = fmt.Errorf("invalid character <<%c>> %s", c, context) - return scanError -} diff --git a/vendor/github.com/gabriel-vasile/mimetype/internal/json/parser.go b/vendor/github.com/gabriel-vasile/mimetype/internal/json/parser.go new file mode 100644 index 0000000000..fd8dd52028 --- /dev/null +++ b/vendor/github.com/gabriel-vasile/mimetype/internal/json/parser.go @@ -0,0 +1,464 @@ +package json + +import ( + "bytes" + "sync" +) + +const ( + QueryNone = "json" + QueryGeo = "geo" + QueryHAR = "har" + QueryGLTF = "gltf" + maxRecursion = 4096 +) + +var queries = map[string][]query{ + QueryNone: nil, + QueryGeo: {{ + SearchPath: [][]byte{[]byte("type")}, + SearchVals: [][]byte{ + []byte(`"Feature"`), + []byte(`"FeatureCollection"`), + []byte(`"Point"`), + []byte(`"LineString"`), + []byte(`"Polygon"`), + []byte(`"MultiPoint"`), + []byte(`"MultiLineString"`), + []byte(`"MultiPolygon"`), + []byte(`"GeometryCollection"`), + }, + }}, + QueryHAR: {{ + SearchPath: [][]byte{[]byte("log"), []byte("version")}, + }, { + SearchPath: [][]byte{[]byte("log"), []byte("creator")}, + }, { + SearchPath: [][]byte{[]byte("log"), []byte("entries")}, + }}, + QueryGLTF: {{ + SearchPath: [][]byte{[]byte("asset"), []byte("version")}, + SearchVals: [][]byte{[]byte(`"1.0"`), []byte(`"2.0"`)}, + }}, +} + +var parserPool = sync.Pool{ + New: func() any { + return &parserState{maxRecursion: maxRecursion} + }, +} + +// parserState holds the state of JSON parsing. The number of inspected bytes, +// the current path inside the JSON object, etc. +type parserState struct { + // ib represents the number of inspected bytes. + // Because mimetype limits itself to only reading the header of the file, + // it means sometimes the input JSON can be truncated. In that case, we want + // to still detect it as JSON, even if it's invalid/truncated. + // When ib == len(input) it means the JSON was valid (at least the header). + ib int + maxRecursion int + // currPath keeps a track of the JSON keys parsed up. + // It works only for JSON objects. JSON arrays are ignored + // mainly because the functionality is not needed. + currPath [][]byte + // firstToken stores the first JSON token encountered in input. + // TODO: performance would be better if we would stop parsing as soon + // as we see that first token is not what we are interested in. + firstToken int + // querySatisfied is true if both path and value of any queries passed to + // consumeAny are satisfied. + querySatisfied bool +} + +// query holds information about a combination of {"key": "val"} that we're trying +// to search for inside the JSON. +type query struct { + // SearchPath represents the whole path to look for inside the JSON. + // ex: [][]byte{[]byte("foo"), []byte("bar")} matches {"foo": {"bar": "baz"}} + SearchPath [][]byte + // SearchVals represents values to look for when the SearchPath is found. + // Each SearchVal element is tried until one of them matches (logical OR.) + SearchVals [][]byte +} + +func eq(path1, path2 [][]byte) bool { + if len(path1) != len(path2) { + return false + } + for i := range path1 { + if !bytes.Equal(path1[i], path2[i]) { + return false + } + } + return true +} + +// LooksLikeObjectOrArray reports if first non white space character from raw +// is either { or [. Parsing raw as JSON is a heavy operation. When receiving some +// text input we can skip parsing if the input does not even look like JSON. +func LooksLikeObjectOrArray(raw []byte) bool { + for i := range raw { + if isSpace(raw[i]) { + continue + } + return raw[i] == '{' || raw[i] == '[' + } + + return false +} + +// Parse will take out a parser from the pool depending on queryType and tries +// to parse raw bytes as JSON. +func Parse(queryType string, raw []byte) (parsed, inspected, firstToken int, querySatisfied bool) { + p := parserPool.Get().(*parserState) + defer func() { + // Avoid hanging on to too much memory in extreme input cases. + if len(p.currPath) > 128 { + p.currPath = nil + } + parserPool.Put(p) + }() + p.reset() + + qs := queries[queryType] + got := p.consumeAny(raw, qs, 0) + return got, p.ib, p.firstToken, p.querySatisfied +} + +func (p *parserState) reset() { + p.ib = 0 + p.currPath = p.currPath[0:0] + p.firstToken = TokInvalid + p.querySatisfied = false +} + +func (p *parserState) consumeSpace(b []byte) (n int) { + for len(b) > 0 && isSpace(b[0]) { + b = b[1:] + n++ + p.ib++ + } + return n +} + +func (p *parserState) consumeConst(b, cnst []byte) int { + lb := len(b) + for i, c := range cnst { + if lb > i && b[i] == c { + p.ib++ + } else { + return 0 + } + } + return len(cnst) +} + +func (p *parserState) consumeString(b []byte) (n int) { + var c byte + for len(b[n:]) > 0 { + c, n = b[n], n+1 + p.ib++ + switch c { + case '\\': + if len(b[n:]) == 0 { + return 0 + } + switch b[n] { + case '"', '\\', '/', 'b', 'f', 'n', 'r', 't': + n++ + p.ib++ + continue + case 'u': + n++ + p.ib++ + for j := 0; j < 4 && len(b[n:]) > 0; j++ { + if !isXDigit(b[n]) { + return 0 + } + n++ + p.ib++ + } + continue + default: + return 0 + } + case '"': + return n + default: + continue + } + } + return 0 +} + +func (p *parserState) consumeNumber(b []byte) (n int) { + got := false + var i int + + if len(b) == 0 { + goto out + } + if b[0] == '-' { + b, i = b[1:], i+1 + p.ib++ + } + + for len(b) > 0 { + if !isDigit(b[0]) { + break + } + got = true + b, i = b[1:], i+1 + p.ib++ + } + if len(b) == 0 { + goto out + } + if b[0] == '.' { + b, i = b[1:], i+1 + p.ib++ + } + for len(b) > 0 { + if !isDigit(b[0]) { + break + } + got = true + b, i = b[1:], i+1 + p.ib++ + } + if len(b) == 0 { + goto out + } + if got && (b[0] == 'e' || b[0] == 'E') { + b, i = b[1:], i+1 + p.ib++ + got = false + if len(b) == 0 { + goto out + } + if b[0] == '+' || b[0] == '-' { + b, i = b[1:], i+1 + p.ib++ + } + for len(b) > 0 { + if !isDigit(b[0]) { + break + } + got = true + b, i = b[1:], i+1 + p.ib++ + } + } +out: + if got { + return i + } + return 0 +} + +func (p *parserState) consumeArray(b []byte, qs []query, lvl int) (n int) { + p.currPath = append(p.currPath, []byte{'['}) + if len(b) == 0 { + return 0 + } + + for n < len(b) { + n += p.consumeSpace(b[n:]) + if len(b[n:]) == 0 { + return 0 + } + if b[n] == ']' { + p.ib++ + p.currPath = p.currPath[:len(p.currPath)-1] + return n + 1 + } + innerParsed := p.consumeAny(b[n:], qs, lvl) + if innerParsed == 0 { + return 0 + } + n += innerParsed + if len(b[n:]) == 0 { + return 0 + } + switch b[n] { + case ',': + n += 1 + p.ib++ + continue + case ']': + p.ib++ + return n + 1 + default: + return 0 + } + } + return 0 +} + +func queryPathMatch(qs []query, path [][]byte) int { + for i := range qs { + if eq(qs[i].SearchPath, path) { + return i + } + } + return -1 +} + +func (p *parserState) consumeObject(b []byte, qs []query, lvl int) (n int) { + for n < len(b) { + n += p.consumeSpace(b[n:]) + if len(b[n:]) == 0 { + return 0 + } + if b[n] == '}' { + p.ib++ + return n + 1 + } + if b[n] != '"' { + return 0 + } else { + n += 1 + p.ib++ + } + // queryMatched stores the index of the query satisfying the current path. + queryMatched := -1 + if keyLen := p.consumeString(b[n:]); keyLen == 0 { + return 0 + } else { + p.currPath = append(p.currPath, b[n:n+keyLen-1]) + if !p.querySatisfied { + queryMatched = queryPathMatch(qs, p.currPath) + } + n += keyLen + } + n += p.consumeSpace(b[n:]) + if len(b[n:]) == 0 { + return 0 + } + if b[n] != ':' { + return 0 + } else { + n += 1 + p.ib++ + } + n += p.consumeSpace(b[n:]) + if len(b[n:]) == 0 { + return 0 + } + + if valLen := p.consumeAny(b[n:], qs, lvl); valLen == 0 { + return 0 + } else { + if queryMatched != -1 { + q := qs[queryMatched] + if len(q.SearchVals) == 0 { + p.querySatisfied = true + } + for _, val := range q.SearchVals { + if bytes.Equal(val, bytes.TrimSpace(b[n:n+valLen])) { + p.querySatisfied = true + } + } + } + n += valLen + } + if len(b[n:]) == 0 { + return 0 + } + switch b[n] { + case ',': + p.currPath = p.currPath[:len(p.currPath)-1] + n++ + p.ib++ + continue + case '}': + p.currPath = p.currPath[:len(p.currPath)-1] + p.ib++ + return n + 1 + default: + return 0 + } + } + return 0 +} + +func (p *parserState) consumeAny(b []byte, qs []query, lvl int) (n int) { + // Avoid too much recursion. + if p.maxRecursion != 0 && lvl > p.maxRecursion { + return 0 + } + n += p.consumeSpace(b) + if len(b[n:]) == 0 { + return 0 + } + + var t, rv int + switch b[n] { + case '"': + n++ + p.ib++ + rv = p.consumeString(b[n:]) + t = TokString + case '[': + n++ + p.ib++ + rv = p.consumeArray(b[n:], qs, lvl+1) + t = TokArray + case '{': + n++ + p.ib++ + rv = p.consumeObject(b[n:], qs, lvl+1) + t = TokObject + case 't': + rv = p.consumeConst(b[n:], []byte("true")) + t = TokTrue + case 'f': + rv = p.consumeConst(b[n:], []byte("false")) + t = TokFalse + case 'n': + rv = p.consumeConst(b[n:], []byte("null")) + t = TokNull + default: + rv = p.consumeNumber(b[n:]) + t = TokNumber + } + if lvl == 0 { + p.firstToken = t + } + if len(qs) == 0 { + p.querySatisfied = true + } + if rv <= 0 { + return n + } + n += rv + n += p.consumeSpace(b[n:]) + return n +} + +func isSpace(c byte) bool { + return c == ' ' || c == '\t' || c == '\r' || c == '\n' +} +func isDigit(c byte) bool { + return '0' <= c && c <= '9' +} + +func isXDigit(c byte) bool { + if isDigit(c) { + return true + } + return ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F') +} + +const ( + TokInvalid = 0 + TokNull = 1 << iota + TokTrue + TokFalse + TokNumber + TokString + TokArray + TokObject + TokComma +) diff --git a/vendor/github.com/gabriel-vasile/mimetype/internal/magic/archive.go b/vendor/github.com/gabriel-vasile/mimetype/internal/magic/archive.go index 068d00f79a..dd7f2417c6 100644 --- a/vendor/github.com/gabriel-vasile/mimetype/internal/magic/archive.go +++ b/vendor/github.com/gabriel-vasile/mimetype/internal/magic/archive.go @@ -137,7 +137,7 @@ func tarParseOctal(b []byte) int64 { if b == 0 { break } - if !(b >= '0' && b <= '7') { + if b < '0' || b > '7' { return -1 } ret = (ret << 3) | int64(b-'0') diff --git a/vendor/github.com/gabriel-vasile/mimetype/internal/magic/binary.go b/vendor/github.com/gabriel-vasile/mimetype/internal/magic/binary.go index 7697320180..70599b3420 100644 --- a/vendor/github.com/gabriel-vasile/mimetype/internal/magic/binary.go +++ b/vendor/github.com/gabriel-vasile/mimetype/internal/magic/binary.go @@ -71,7 +71,7 @@ func Dbf(raw []byte, limit uint32) bool { } // 3rd and 4th bytes contain the last update month and day of month. - if !(0 < raw[2] && raw[2] < 13 && 0 < raw[3] && raw[3] < 32) { + if raw[2] == 0 || raw[2] > 12 || raw[3] == 0 || raw[3] > 31 { return false } @@ -153,7 +153,7 @@ func Marc(raw []byte, limit uint32) bool { return bytes.Contains(raw[:min(2048, len(raw))], []byte{0x1E}) } -// Glb matches a glTF model format file. +// GLB matches a glTF model format file. // GLB is the binary file format representation of 3D models saved in // the GL transmission Format (glTF). // GLB uses little endian and its header structure is as follows: @@ -168,7 +168,7 @@ func Marc(raw []byte, limit uint32) bool { // // [glTF specification]: https://registry.khronos.org/glTF/specs/2.0/glTF-2.0.html // [IANA glTF entry]: https://www.iana.org/assignments/media-types/model/gltf-binary -var Glb = prefix([]byte("\x67\x6C\x54\x46\x02\x00\x00\x00"), +var GLB = prefix([]byte("\x67\x6C\x54\x46\x02\x00\x00\x00"), []byte("\x67\x6C\x54\x46\x01\x00\x00\x00")) // TzIf matches a Time Zone Information Format (TZif) file. diff --git a/vendor/github.com/gabriel-vasile/mimetype/internal/magic/geo.go b/vendor/github.com/gabriel-vasile/mimetype/internal/magic/geo.go index f077e16724..cade91f18c 100644 --- a/vendor/github.com/gabriel-vasile/mimetype/internal/magic/geo.go +++ b/vendor/github.com/gabriel-vasile/mimetype/internal/magic/geo.go @@ -12,13 +12,13 @@ func Shp(raw []byte, limit uint32) bool { return false } - if !(binary.BigEndian.Uint32(raw[0:4]) == 9994 && - binary.BigEndian.Uint32(raw[4:8]) == 0 && - binary.BigEndian.Uint32(raw[8:12]) == 0 && - binary.BigEndian.Uint32(raw[12:16]) == 0 && - binary.BigEndian.Uint32(raw[16:20]) == 0 && - binary.BigEndian.Uint32(raw[20:24]) == 0 && - binary.LittleEndian.Uint32(raw[28:32]) == 1000) { + if binary.BigEndian.Uint32(raw[0:4]) != 9994 || + binary.BigEndian.Uint32(raw[4:8]) != 0 || + binary.BigEndian.Uint32(raw[8:12]) != 0 || + binary.BigEndian.Uint32(raw[12:16]) != 0 || + binary.BigEndian.Uint32(raw[16:20]) != 0 || + binary.BigEndian.Uint32(raw[20:24]) != 0 || + binary.LittleEndian.Uint32(raw[28:32]) != 1000 { return false } diff --git a/vendor/github.com/gabriel-vasile/mimetype/internal/magic/text.go b/vendor/github.com/gabriel-vasile/mimetype/internal/magic/text.go index cf6446397f..8178e4707c 100644 --- a/vendor/github.com/gabriel-vasile/mimetype/internal/magic/text.go +++ b/vendor/github.com/gabriel-vasile/mimetype/internal/magic/text.go @@ -2,7 +2,6 @@ package magic import ( "bytes" - "strings" "time" "github.com/gabriel-vasile/mimetype/internal/charset" @@ -154,145 +153,75 @@ func Php(raw []byte, limit uint32) bool { // JSON matches a JavaScript Object Notation file. func JSON(raw []byte, limit uint32) bool { - raw = trimLWS(raw) // #175 A single JSON string, number or bool is not considered JSON. // JSON objects and arrays are reported as JSON. - if len(raw) < 2 || (raw[0] != '[' && raw[0] != '{') { - return false - } - parsed, err := json.Scan(raw) - // If the full file content was provided, check there is no error. - if limit == 0 || len(raw) < int(limit) { - return err == nil - } - - // If a section of the file was provided, check if all of it was parsed. - return parsed == len(raw) && len(raw) > 0 + return jsonHelper(raw, limit, json.QueryNone, json.TokObject|json.TokArray) } // GeoJSON matches a RFC 7946 GeoJSON file. // // GeoJSON detection implies searching for key:value pairs like: `"type": "Feature"` // in the input. -// BUG(gabriel-vasile): The "type" key should be searched for in the root object. func GeoJSON(raw []byte, limit uint32) bool { - raw = trimLWS(raw) - if len(raw) == 0 { + return jsonHelper(raw, limit, json.QueryGeo, json.TokObject) +} + +// HAR matches a HAR Spec file. +// Spec: http://www.softwareishard.com/blog/har-12-spec/ +func HAR(raw []byte, limit uint32) bool { + return jsonHelper(raw, limit, json.QueryHAR, json.TokObject) +} + +// GLTF matches a GL Transmission Format (JSON) file. +// Visit [glTF specification] and [IANA glTF entry] for more details. +// +// [glTF specification]: https://registry.khronos.org/glTF/specs/2.0/glTF-2.0.html +// [IANA glTF entry]: https://www.iana.org/assignments/media-types/model/gltf+json +func GLTF(raw []byte, limit uint32) bool { + return jsonHelper(raw, limit, json.QueryGLTF, json.TokObject) +} + +func jsonHelper(raw []byte, limit uint32, q string, wantTok int) bool { + if !json.LooksLikeObjectOrArray(raw) { return false } - // GeoJSON is always a JSON object, not a JSON array or any other JSON value. - if raw[0] != '{' { + lraw := len(raw) + parsed, inspected, firstToken, querySatisfied := json.Parse(q, raw) + if !querySatisfied || firstToken&wantTok == 0 { return false } - s := []byte(`"type"`) - si, sl := bytes.Index(raw, s), len(s) - - if si == -1 { - return false + // If the full file content was provided, check that the whole input was parsed. + if limit == 0 || lraw < int(limit) { + return parsed == lraw } - // If the "type" string is the suffix of the input, - // there is no need to search for the value of the key. - if si+sl == len(raw) { - return false - } - // Skip the "type" part. - raw = raw[si+sl:] - // Skip any whitespace before the colon. - raw = trimLWS(raw) - // Check for colon. - if len(raw) == 0 || raw[0] != ':' { - return false - } - // Skip any whitespace after the colon. - raw = trimLWS(raw[1:]) - - geoJSONTypes := [][]byte{ - []byte(`"Feature"`), - []byte(`"FeatureCollection"`), - []byte(`"Point"`), - []byte(`"LineString"`), - []byte(`"Polygon"`), - []byte(`"MultiPoint"`), - []byte(`"MultiLineString"`), - []byte(`"MultiPolygon"`), - []byte(`"GeometryCollection"`), - } - for _, t := range geoJSONTypes { - if bytes.HasPrefix(raw, t) { - return true - } - } - - return false + // If a section of the file was provided, check if all of it was inspected. + // In other words, check that if there was a problem parsing, that problem + // occured at the last byte in the input. + return inspected == lraw && lraw > 0 } // NdJSON matches a Newline delimited JSON file. All complete lines from raw // must be valid JSON documents meaning they contain one of the valid JSON data // types. func NdJSON(raw []byte, limit uint32) bool { - lCount, hasObjOrArr := 0, false + lCount, objOrArr := 0, 0 raw = dropLastLine(raw, limit) var l []byte for len(raw) != 0 { l, raw = scanLine(raw) - // Empty lines are allowed in NDJSON. - if l = trimRWS(trimLWS(l)); len(l) == 0 { - continue - } - _, err := json.Scan(l) - if err != nil { + _, inspected, firstToken, _ := json.Parse(json.QueryNone, l) + if len(l) != inspected { return false } - if l[0] == '[' || l[0] == '{' { - hasObjOrArr = true + if firstToken == json.TokArray || firstToken == json.TokObject { + objOrArr++ } lCount++ } - return lCount > 1 && hasObjOrArr -} - -// HAR matches a HAR Spec file. -// Spec: http://www.softwareishard.com/blog/har-12-spec/ -func HAR(raw []byte, limit uint32) bool { - s := []byte(`"log"`) - si, sl := bytes.Index(raw, s), len(s) - - if si == -1 { - return false - } - - // If the "log" string is the suffix of the input, - // there is no need to search for the value of the key. - if si+sl == len(raw) { - return false - } - // Skip the "log" part. - raw = raw[si+sl:] - // Skip any whitespace before the colon. - raw = trimLWS(raw) - // Check for colon. - if len(raw) == 0 || raw[0] != ':' { - return false - } - // Skip any whitespace after the colon. - raw = trimLWS(raw[1:]) - - harJSONTypes := [][]byte{ - []byte(`"version"`), - []byte(`"creator"`), - []byte(`"entries"`), - } - for _, t := range harJSONTypes { - si := bytes.Index(raw, t) - if si > -1 { - return true - } - } - - return false + return lCount > 1 && objOrArr > 0 } // Svg matches a SVG file. @@ -305,32 +234,31 @@ func Srt(raw []byte, _ uint32) bool { line, raw := scanLine(raw) // First line must be 1. - if string(line) != "1" { + if len(line) != 1 || line[0] != '1' { return false } line, raw = scanLine(raw) - secondLine := string(line) - // Timestamp format (e.g: 00:02:16,612 --> 00:02:19,376) limits secondLine + // Timestamp format (e.g: 00:02:16,612 --> 00:02:19,376) limits second line // length to exactly 29 characters. - if len(secondLine) != 29 { + if len(line) != 29 { return false } // Decimal separator of fractional seconds in the timestamps must be a // comma, not a period. - if strings.Contains(secondLine, ".") { + if bytes.IndexByte(line, '.') != -1 { return false } - // Second line must be a time range. - ts := strings.Split(secondLine, " --> ") - if len(ts) != 2 { + sep := []byte(" --> ") + i := bytes.Index(line, sep) + if i == -1 { return false } const layout = "15:04:05,000" - t0, err := time.Parse(layout, ts[0]) + t0, err := time.Parse(layout, string(line[:i])) if err != nil { return false } - t1, err := time.Parse(layout, ts[1]) + t1, err := time.Parse(layout, string(line[i+len(sep):])) if err != nil { return false } diff --git a/vendor/github.com/gabriel-vasile/mimetype/supported_mimes.md b/vendor/github.com/gabriel-vasile/mimetype/supported_mimes.md index f9bf03cba6..6f45bfbb64 100644 --- a/vendor/github.com/gabriel-vasile/mimetype/supported_mimes.md +++ b/vendor/github.com/gabriel-vasile/mimetype/supported_mimes.md @@ -1,4 +1,4 @@ -## 178 Supported MIME types +## 179 Supported MIME types This file is automatically generated when running tests. Do not edit manually. Extension | MIME type | Aliases @@ -171,6 +171,7 @@ Extension | MIME type | Aliases **.json** | application/json | - **.geojson** | application/geo+json | - **.har** | application/json | - +**.gltf** | model/gltf+json | - **.ndjson** | application/x-ndjson | - **.rtf** | text/rtf | application/rtf **.srt** | application/x-subrip | application/x-srt, text/x-srt diff --git a/vendor/github.com/gabriel-vasile/mimetype/tree.go b/vendor/github.com/gabriel-vasile/mimetype/tree.go index b5f5662277..63a2093a4b 100644 --- a/vendor/github.com/gabriel-vasile/mimetype/tree.go +++ b/vendor/github.com/gabriel-vasile/mimetype/tree.go @@ -83,7 +83,7 @@ var ( text = newMIME("text/plain", ".txt", magic.Text, html, svg, xml, php, js, lua, perl, python, json, ndJSON, rtf, srt, tcl, csv, tsv, vCard, iCalendar, warc, vtt) xml = newMIME("text/xml", ".xml", magic.XML, rss, atom, x3d, kml, xliff, collada, gml, gpx, tcx, amf, threemf, xfdf, owl2). alias("application/xml") - json = newMIME("application/json", ".json", magic.JSON, geoJSON, har) + json = newMIME("application/json", ".json", magic.JSON, geoJSON, har, gltf) har = newMIME("application/json", ".har", magic.HAR) csv = newMIME("text/csv", ".csv", magic.Csv) tsv = newMIME("text/tab-separated-values", ".tsv", magic.Tsv) @@ -262,7 +262,8 @@ var ( pat = newMIME("image/x-gimp-pat", ".pat", magic.Pat) gbr = newMIME("image/x-gimp-gbr", ".gbr", magic.Gbr) xfdf = newMIME("application/vnd.adobe.xfdf", ".xfdf", magic.Xfdf) - glb = newMIME("model/gltf-binary", ".glb", magic.Glb) + glb = newMIME("model/gltf-binary", ".glb", magic.GLB) + gltf = newMIME("model/gltf+json", ".gltf", magic.GLTF) jxr = newMIME("image/jxr", ".jxr", magic.Jxr).alias("image/vnd.ms-photo") parquet = newMIME("application/vnd.apache.parquet", ".parquet", magic.Par1). alias("application/x-parquet") diff --git a/vendor/modules.txt b/vendor/modules.txt index c20682b20e..9119683ad4 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -417,8 +417,8 @@ github.com/felixge/httpsnoop ## explicit; go 1.17 github.com/fsnotify/fsnotify github.com/fsnotify/fsnotify/internal -# github.com/gabriel-vasile/mimetype v1.4.8 -## explicit; go 1.20 +# github.com/gabriel-vasile/mimetype v1.4.9 +## explicit; go 1.23.0 github.com/gabriel-vasile/mimetype github.com/gabriel-vasile/mimetype/internal/charset github.com/gabriel-vasile/mimetype/internal/json