Merge pull request #744 from opencloud-eu/dependabot/go_modules/github.com/gabriel-vasile/mimetype-1.4.9

build(deps): bump github.com/gabriel-vasile/mimetype from 1.4.8 to 1.4.9
This commit is contained in:
Ralf Haferkamp
2025-04-29 10:38:40 +02:00
committed by GitHub
12 changed files with 536 additions and 704 deletions

2
go.mod
View File

@@ -19,7 +19,7 @@ require (
github.com/dhowden/tag v0.0.0-20240417053706-3d75831295e8
github.com/dutchcoders/go-clamd v0.0.0-20170520113014-b970184f4d9e
github.com/egirna/icap-client v0.1.1
github.com/gabriel-vasile/mimetype v1.4.8
github.com/gabriel-vasile/mimetype v1.4.9
github.com/ggwhite/go-masker v1.1.0
github.com/go-chi/chi/v5 v5.2.1
github.com/go-chi/render v1.0.3

4
go.sum
View File

@@ -317,8 +317,8 @@ github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMo
github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=
github.com/fsnotify/fsnotify v1.8.0 h1:dAwr6QBTBZIkG8roQaJjGof0pp0EeF+tNV7YBP3F/8M=
github.com/fsnotify/fsnotify v1.8.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0=
github.com/gabriel-vasile/mimetype v1.4.8 h1:FfZ3gj38NjllZIeJAmMhr+qKL8Wu+nOoI3GqacKw1NM=
github.com/gabriel-vasile/mimetype v1.4.8/go.mod h1:ByKUIKGjh1ODkGM1asKUbQZOLGrPjydw3hYPU2YU9t8=
github.com/gabriel-vasile/mimetype v1.4.9 h1:5k+WDwEsD9eTLL8Tz3L0VnmVh9QxGjRmjBvAG7U/oYY=
github.com/gabriel-vasile/mimetype v1.4.9/go.mod h1:WnSQhFKJuBlRyLiKohA/2DtIlPFAbguNaG7QCHcyGok=
github.com/gdexlab/go-render v1.0.1 h1:rxqB3vo5s4n1kF0ySmoNeSPRYkEsyHgln4jFIQY7v0U=
github.com/gdexlab/go-render v1.0.1/go.mod h1:wRi5nW2qfjiGj4mPukH4UV0IknS1cHD4VgFTmJX5JzM=
github.com/getkin/kin-openapi v0.13.0/go.mod h1:WGRs2ZMM1Q8LR1QBEwUxC6RJEfaBcD0s+pcEVXFuAjw=

View File

@@ -0,0 +1,5 @@
version: "2"
linters:
exclusions:
presets:
- std-error-handling

View File

@@ -1,567 +0,0 @@
// Copyright (c) 2009 The Go Authors. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// Package json provides a JSON value parser state machine.
// This package is almost entirely copied from the Go stdlib.
// Changes made to it permit users of the package to tell
// if some slice of bytes is a valid beginning of a json string.
package json
import (
"fmt"
"sync"
)
type (
scanStatus int
)
const (
parseObjectKey = iota // parsing object key (before colon)
parseObjectValue // parsing object value (after colon)
parseArrayValue // parsing array value
scanContinue scanStatus = iota // uninteresting byte
scanBeginLiteral // end implied by next result != scanContinue
scanBeginObject // begin object
scanObjectKey // just finished object key (string)
scanObjectValue // just finished non-last object value
scanEndObject // end object (implies scanObjectValue if possible)
scanBeginArray // begin array
scanArrayValue // just finished array value
scanEndArray // end array (implies scanArrayValue if possible)
scanSkipSpace // space byte; can skip; known to be last "continue" result
scanEnd // top-level value ended *before* this byte; known to be first "stop" result
scanError // hit an error, scanner.err.
// This limits the max nesting depth to prevent stack overflow.
// This is permitted by https://tools.ietf.org/html/rfc7159#section-9
maxNestingDepth = 10000
)
type (
scanner struct {
step func(*scanner, byte) scanStatus
parseState []int
endTop bool
err error
index int
}
)
var scannerPool = sync.Pool{
New: func() any {
return &scanner{}
},
}
func newScanner() *scanner {
s := scannerPool.Get().(*scanner)
s.reset()
return s
}
func freeScanner(s *scanner) {
// Avoid hanging on to too much memory in extreme cases.
if len(s.parseState) > 1024 {
s.parseState = nil
}
scannerPool.Put(s)
}
// Scan returns the number of bytes scanned and if there was any error
// in trying to reach the end of data.
func Scan(data []byte) (int, error) {
s := newScanner()
defer freeScanner(s)
_ = checkValid(data, s)
return s.index, s.err
}
// checkValid verifies that data is valid JSON-encoded data.
// scan is passed in for use by checkValid to avoid an allocation.
func checkValid(data []byte, scan *scanner) error {
for _, c := range data {
scan.index++
if scan.step(scan, c) == scanError {
return scan.err
}
}
if scan.eof() == scanError {
return scan.err
}
return nil
}
func isSpace(c byte) bool {
return c == ' ' || c == '\t' || c == '\r' || c == '\n'
}
func (s *scanner) reset() {
s.step = stateBeginValue
s.parseState = s.parseState[0:0]
s.err = nil
s.endTop = false
s.index = 0
}
// eof tells the scanner that the end of input has been reached.
// It returns a scan status just as s.step does.
func (s *scanner) eof() scanStatus {
if s.err != nil {
return scanError
}
if s.endTop {
return scanEnd
}
s.step(s, ' ')
if s.endTop {
return scanEnd
}
if s.err == nil {
s.err = fmt.Errorf("unexpected end of JSON input")
}
return scanError
}
// pushParseState pushes a new parse state p onto the parse stack.
// an error state is returned if maxNestingDepth was exceeded, otherwise successState is returned.
func (s *scanner) pushParseState(c byte, newParseState int, successState scanStatus) scanStatus {
s.parseState = append(s.parseState, newParseState)
if len(s.parseState) <= maxNestingDepth {
return successState
}
return s.error(c, "exceeded max depth")
}
// popParseState pops a parse state (already obtained) off the stack
// and updates s.step accordingly.
func (s *scanner) popParseState() {
n := len(s.parseState) - 1
s.parseState = s.parseState[0:n]
if n == 0 {
s.step = stateEndTop
s.endTop = true
} else {
s.step = stateEndValue
}
}
// stateBeginValueOrEmpty is the state after reading `[`.
func stateBeginValueOrEmpty(s *scanner, c byte) scanStatus {
if c <= ' ' && isSpace(c) {
return scanSkipSpace
}
if c == ']' {
return stateEndValue(s, c)
}
return stateBeginValue(s, c)
}
// stateBeginValue is the state at the beginning of the input.
func stateBeginValue(s *scanner, c byte) scanStatus {
if c <= ' ' && isSpace(c) {
return scanSkipSpace
}
switch c {
case '{':
s.step = stateBeginStringOrEmpty
return s.pushParseState(c, parseObjectKey, scanBeginObject)
case '[':
s.step = stateBeginValueOrEmpty
return s.pushParseState(c, parseArrayValue, scanBeginArray)
case '"':
s.step = stateInString
return scanBeginLiteral
case '-':
s.step = stateNeg
return scanBeginLiteral
case '0': // beginning of 0.123
s.step = state0
return scanBeginLiteral
case 't': // beginning of true
s.step = stateT
return scanBeginLiteral
case 'f': // beginning of false
s.step = stateF
return scanBeginLiteral
case 'n': // beginning of null
s.step = stateN
return scanBeginLiteral
}
if '1' <= c && c <= '9' { // beginning of 1234.5
s.step = state1
return scanBeginLiteral
}
return s.error(c, "looking for beginning of value")
}
// stateBeginStringOrEmpty is the state after reading `{`.
func stateBeginStringOrEmpty(s *scanner, c byte) scanStatus {
if c <= ' ' && isSpace(c) {
return scanSkipSpace
}
if c == '}' {
n := len(s.parseState)
s.parseState[n-1] = parseObjectValue
return stateEndValue(s, c)
}
return stateBeginString(s, c)
}
// stateBeginString is the state after reading `{"key": value,`.
func stateBeginString(s *scanner, c byte) scanStatus {
if c <= ' ' && isSpace(c) {
return scanSkipSpace
}
if c == '"' {
s.step = stateInString
return scanBeginLiteral
}
return s.error(c, "looking for beginning of object key string")
}
// stateEndValue is the state after completing a value,
// such as after reading `{}` or `true` or `["x"`.
func stateEndValue(s *scanner, c byte) scanStatus {
n := len(s.parseState)
if n == 0 {
// Completed top-level before the current byte.
s.step = stateEndTop
s.endTop = true
return stateEndTop(s, c)
}
if c <= ' ' && isSpace(c) {
s.step = stateEndValue
return scanSkipSpace
}
ps := s.parseState[n-1]
switch ps {
case parseObjectKey:
if c == ':' {
s.parseState[n-1] = parseObjectValue
s.step = stateBeginValue
return scanObjectKey
}
return s.error(c, "after object key")
case parseObjectValue:
if c == ',' {
s.parseState[n-1] = parseObjectKey
s.step = stateBeginString
return scanObjectValue
}
if c == '}' {
s.popParseState()
return scanEndObject
}
return s.error(c, "after object key:value pair")
case parseArrayValue:
if c == ',' {
s.step = stateBeginValue
return scanArrayValue
}
if c == ']' {
s.popParseState()
return scanEndArray
}
return s.error(c, "after array element")
}
return s.error(c, "")
}
// stateEndTop is the state after finishing the top-level value,
// such as after reading `{}` or `[1,2,3]`.
// Only space characters should be seen now.
func stateEndTop(s *scanner, c byte) scanStatus {
if c != ' ' && c != '\t' && c != '\r' && c != '\n' {
// Complain about non-space byte on next call.
s.error(c, "after top-level value")
}
return scanEnd
}
// stateInString is the state after reading `"`.
func stateInString(s *scanner, c byte) scanStatus {
if c == '"' {
s.step = stateEndValue
return scanContinue
}
if c == '\\' {
s.step = stateInStringEsc
return scanContinue
}
if c < 0x20 {
return s.error(c, "in string literal")
}
return scanContinue
}
// stateInStringEsc is the state after reading `"\` during a quoted string.
func stateInStringEsc(s *scanner, c byte) scanStatus {
switch c {
case 'b', 'f', 'n', 'r', 't', '\\', '/', '"':
s.step = stateInString
return scanContinue
case 'u':
s.step = stateInStringEscU
return scanContinue
}
return s.error(c, "in string escape code")
}
// stateInStringEscU is the state after reading `"\u` during a quoted string.
func stateInStringEscU(s *scanner, c byte) scanStatus {
if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
s.step = stateInStringEscU1
return scanContinue
}
// numbers
return s.error(c, "in \\u hexadecimal character escape")
}
// stateInStringEscU1 is the state after reading `"\u1` during a quoted string.
func stateInStringEscU1(s *scanner, c byte) scanStatus {
if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
s.step = stateInStringEscU12
return scanContinue
}
// numbers
return s.error(c, "in \\u hexadecimal character escape")
}
// stateInStringEscU12 is the state after reading `"\u12` during a quoted string.
func stateInStringEscU12(s *scanner, c byte) scanStatus {
if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
s.step = stateInStringEscU123
return scanContinue
}
// numbers
return s.error(c, "in \\u hexadecimal character escape")
}
// stateInStringEscU123 is the state after reading `"\u123` during a quoted string.
func stateInStringEscU123(s *scanner, c byte) scanStatus {
if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
s.step = stateInString
return scanContinue
}
// numbers
return s.error(c, "in \\u hexadecimal character escape")
}
// stateNeg is the state after reading `-` during a number.
func stateNeg(s *scanner, c byte) scanStatus {
if c == '0' {
s.step = state0
return scanContinue
}
if '1' <= c && c <= '9' {
s.step = state1
return scanContinue
}
return s.error(c, "in numeric literal")
}
// state1 is the state after reading a non-zero integer during a number,
// such as after reading `1` or `100` but not `0`.
func state1(s *scanner, c byte) scanStatus {
if '0' <= c && c <= '9' {
s.step = state1
return scanContinue
}
return state0(s, c)
}
// state0 is the state after reading `0` during a number.
func state0(s *scanner, c byte) scanStatus {
if c == '.' {
s.step = stateDot
return scanContinue
}
if c == 'e' || c == 'E' {
s.step = stateE
return scanContinue
}
return stateEndValue(s, c)
}
// stateDot is the state after reading the integer and decimal point in a number,
// such as after reading `1.`.
func stateDot(s *scanner, c byte) scanStatus {
if '0' <= c && c <= '9' {
s.step = stateDot0
return scanContinue
}
return s.error(c, "after decimal point in numeric literal")
}
// stateDot0 is the state after reading the integer, decimal point, and subsequent
// digits of a number, such as after reading `3.14`.
func stateDot0(s *scanner, c byte) scanStatus {
if '0' <= c && c <= '9' {
return scanContinue
}
if c == 'e' || c == 'E' {
s.step = stateE
return scanContinue
}
return stateEndValue(s, c)
}
// stateE is the state after reading the mantissa and e in a number,
// such as after reading `314e` or `0.314e`.
func stateE(s *scanner, c byte) scanStatus {
if c == '+' || c == '-' {
s.step = stateESign
return scanContinue
}
return stateESign(s, c)
}
// stateESign is the state after reading the mantissa, e, and sign in a number,
// such as after reading `314e-` or `0.314e+`.
func stateESign(s *scanner, c byte) scanStatus {
if '0' <= c && c <= '9' {
s.step = stateE0
return scanContinue
}
return s.error(c, "in exponent of numeric literal")
}
// stateE0 is the state after reading the mantissa, e, optional sign,
// and at least one digit of the exponent in a number,
// such as after reading `314e-2` or `0.314e+1` or `3.14e0`.
func stateE0(s *scanner, c byte) scanStatus {
if '0' <= c && c <= '9' {
return scanContinue
}
return stateEndValue(s, c)
}
// stateT is the state after reading `t`.
func stateT(s *scanner, c byte) scanStatus {
if c == 'r' {
s.step = stateTr
return scanContinue
}
return s.error(c, "in literal true (expecting 'r')")
}
// stateTr is the state after reading `tr`.
func stateTr(s *scanner, c byte) scanStatus {
if c == 'u' {
s.step = stateTru
return scanContinue
}
return s.error(c, "in literal true (expecting 'u')")
}
// stateTru is the state after reading `tru`.
func stateTru(s *scanner, c byte) scanStatus {
if c == 'e' {
s.step = stateEndValue
return scanContinue
}
return s.error(c, "in literal true (expecting 'e')")
}
// stateF is the state after reading `f`.
func stateF(s *scanner, c byte) scanStatus {
if c == 'a' {
s.step = stateFa
return scanContinue
}
return s.error(c, "in literal false (expecting 'a')")
}
// stateFa is the state after reading `fa`.
func stateFa(s *scanner, c byte) scanStatus {
if c == 'l' {
s.step = stateFal
return scanContinue
}
return s.error(c, "in literal false (expecting 'l')")
}
// stateFal is the state after reading `fal`.
func stateFal(s *scanner, c byte) scanStatus {
if c == 's' {
s.step = stateFals
return scanContinue
}
return s.error(c, "in literal false (expecting 's')")
}
// stateFals is the state after reading `fals`.
func stateFals(s *scanner, c byte) scanStatus {
if c == 'e' {
s.step = stateEndValue
return scanContinue
}
return s.error(c, "in literal false (expecting 'e')")
}
// stateN is the state after reading `n`.
func stateN(s *scanner, c byte) scanStatus {
if c == 'u' {
s.step = stateNu
return scanContinue
}
return s.error(c, "in literal null (expecting 'u')")
}
// stateNu is the state after reading `nu`.
func stateNu(s *scanner, c byte) scanStatus {
if c == 'l' {
s.step = stateNul
return scanContinue
}
return s.error(c, "in literal null (expecting 'l')")
}
// stateNul is the state after reading `nul`.
func stateNul(s *scanner, c byte) scanStatus {
if c == 'l' {
s.step = stateEndValue
return scanContinue
}
return s.error(c, "in literal null (expecting 'l')")
}
// stateError is the state after reaching a syntax error,
// such as after reading `[1}` or `5.1.2`.
func stateError(s *scanner, c byte) scanStatus {
return scanError
}
// error records an error and switches to the error state.
func (s *scanner) error(c byte, context string) scanStatus {
s.step = stateError
s.err = fmt.Errorf("invalid character <<%c>> %s", c, context)
return scanError
}

View File

@@ -0,0 +1,464 @@
package json
import (
"bytes"
"sync"
)
const (
QueryNone = "json"
QueryGeo = "geo"
QueryHAR = "har"
QueryGLTF = "gltf"
maxRecursion = 4096
)
var queries = map[string][]query{
QueryNone: nil,
QueryGeo: {{
SearchPath: [][]byte{[]byte("type")},
SearchVals: [][]byte{
[]byte(`"Feature"`),
[]byte(`"FeatureCollection"`),
[]byte(`"Point"`),
[]byte(`"LineString"`),
[]byte(`"Polygon"`),
[]byte(`"MultiPoint"`),
[]byte(`"MultiLineString"`),
[]byte(`"MultiPolygon"`),
[]byte(`"GeometryCollection"`),
},
}},
QueryHAR: {{
SearchPath: [][]byte{[]byte("log"), []byte("version")},
}, {
SearchPath: [][]byte{[]byte("log"), []byte("creator")},
}, {
SearchPath: [][]byte{[]byte("log"), []byte("entries")},
}},
QueryGLTF: {{
SearchPath: [][]byte{[]byte("asset"), []byte("version")},
SearchVals: [][]byte{[]byte(`"1.0"`), []byte(`"2.0"`)},
}},
}
var parserPool = sync.Pool{
New: func() any {
return &parserState{maxRecursion: maxRecursion}
},
}
// parserState holds the state of JSON parsing. The number of inspected bytes,
// the current path inside the JSON object, etc.
type parserState struct {
// ib represents the number of inspected bytes.
// Because mimetype limits itself to only reading the header of the file,
// it means sometimes the input JSON can be truncated. In that case, we want
// to still detect it as JSON, even if it's invalid/truncated.
// When ib == len(input) it means the JSON was valid (at least the header).
ib int
maxRecursion int
// currPath keeps a track of the JSON keys parsed up.
// It works only for JSON objects. JSON arrays are ignored
// mainly because the functionality is not needed.
currPath [][]byte
// firstToken stores the first JSON token encountered in input.
// TODO: performance would be better if we would stop parsing as soon
// as we see that first token is not what we are interested in.
firstToken int
// querySatisfied is true if both path and value of any queries passed to
// consumeAny are satisfied.
querySatisfied bool
}
// query holds information about a combination of {"key": "val"} that we're trying
// to search for inside the JSON.
type query struct {
// SearchPath represents the whole path to look for inside the JSON.
// ex: [][]byte{[]byte("foo"), []byte("bar")} matches {"foo": {"bar": "baz"}}
SearchPath [][]byte
// SearchVals represents values to look for when the SearchPath is found.
// Each SearchVal element is tried until one of them matches (logical OR.)
SearchVals [][]byte
}
func eq(path1, path2 [][]byte) bool {
if len(path1) != len(path2) {
return false
}
for i := range path1 {
if !bytes.Equal(path1[i], path2[i]) {
return false
}
}
return true
}
// LooksLikeObjectOrArray reports if first non white space character from raw
// is either { or [. Parsing raw as JSON is a heavy operation. When receiving some
// text input we can skip parsing if the input does not even look like JSON.
func LooksLikeObjectOrArray(raw []byte) bool {
for i := range raw {
if isSpace(raw[i]) {
continue
}
return raw[i] == '{' || raw[i] == '['
}
return false
}
// Parse will take out a parser from the pool depending on queryType and tries
// to parse raw bytes as JSON.
func Parse(queryType string, raw []byte) (parsed, inspected, firstToken int, querySatisfied bool) {
p := parserPool.Get().(*parserState)
defer func() {
// Avoid hanging on to too much memory in extreme input cases.
if len(p.currPath) > 128 {
p.currPath = nil
}
parserPool.Put(p)
}()
p.reset()
qs := queries[queryType]
got := p.consumeAny(raw, qs, 0)
return got, p.ib, p.firstToken, p.querySatisfied
}
func (p *parserState) reset() {
p.ib = 0
p.currPath = p.currPath[0:0]
p.firstToken = TokInvalid
p.querySatisfied = false
}
func (p *parserState) consumeSpace(b []byte) (n int) {
for len(b) > 0 && isSpace(b[0]) {
b = b[1:]
n++
p.ib++
}
return n
}
func (p *parserState) consumeConst(b, cnst []byte) int {
lb := len(b)
for i, c := range cnst {
if lb > i && b[i] == c {
p.ib++
} else {
return 0
}
}
return len(cnst)
}
func (p *parserState) consumeString(b []byte) (n int) {
var c byte
for len(b[n:]) > 0 {
c, n = b[n], n+1
p.ib++
switch c {
case '\\':
if len(b[n:]) == 0 {
return 0
}
switch b[n] {
case '"', '\\', '/', 'b', 'f', 'n', 'r', 't':
n++
p.ib++
continue
case 'u':
n++
p.ib++
for j := 0; j < 4 && len(b[n:]) > 0; j++ {
if !isXDigit(b[n]) {
return 0
}
n++
p.ib++
}
continue
default:
return 0
}
case '"':
return n
default:
continue
}
}
return 0
}
func (p *parserState) consumeNumber(b []byte) (n int) {
got := false
var i int
if len(b) == 0 {
goto out
}
if b[0] == '-' {
b, i = b[1:], i+1
p.ib++
}
for len(b) > 0 {
if !isDigit(b[0]) {
break
}
got = true
b, i = b[1:], i+1
p.ib++
}
if len(b) == 0 {
goto out
}
if b[0] == '.' {
b, i = b[1:], i+1
p.ib++
}
for len(b) > 0 {
if !isDigit(b[0]) {
break
}
got = true
b, i = b[1:], i+1
p.ib++
}
if len(b) == 0 {
goto out
}
if got && (b[0] == 'e' || b[0] == 'E') {
b, i = b[1:], i+1
p.ib++
got = false
if len(b) == 0 {
goto out
}
if b[0] == '+' || b[0] == '-' {
b, i = b[1:], i+1
p.ib++
}
for len(b) > 0 {
if !isDigit(b[0]) {
break
}
got = true
b, i = b[1:], i+1
p.ib++
}
}
out:
if got {
return i
}
return 0
}
func (p *parserState) consumeArray(b []byte, qs []query, lvl int) (n int) {
p.currPath = append(p.currPath, []byte{'['})
if len(b) == 0 {
return 0
}
for n < len(b) {
n += p.consumeSpace(b[n:])
if len(b[n:]) == 0 {
return 0
}
if b[n] == ']' {
p.ib++
p.currPath = p.currPath[:len(p.currPath)-1]
return n + 1
}
innerParsed := p.consumeAny(b[n:], qs, lvl)
if innerParsed == 0 {
return 0
}
n += innerParsed
if len(b[n:]) == 0 {
return 0
}
switch b[n] {
case ',':
n += 1
p.ib++
continue
case ']':
p.ib++
return n + 1
default:
return 0
}
}
return 0
}
func queryPathMatch(qs []query, path [][]byte) int {
for i := range qs {
if eq(qs[i].SearchPath, path) {
return i
}
}
return -1
}
func (p *parserState) consumeObject(b []byte, qs []query, lvl int) (n int) {
for n < len(b) {
n += p.consumeSpace(b[n:])
if len(b[n:]) == 0 {
return 0
}
if b[n] == '}' {
p.ib++
return n + 1
}
if b[n] != '"' {
return 0
} else {
n += 1
p.ib++
}
// queryMatched stores the index of the query satisfying the current path.
queryMatched := -1
if keyLen := p.consumeString(b[n:]); keyLen == 0 {
return 0
} else {
p.currPath = append(p.currPath, b[n:n+keyLen-1])
if !p.querySatisfied {
queryMatched = queryPathMatch(qs, p.currPath)
}
n += keyLen
}
n += p.consumeSpace(b[n:])
if len(b[n:]) == 0 {
return 0
}
if b[n] != ':' {
return 0
} else {
n += 1
p.ib++
}
n += p.consumeSpace(b[n:])
if len(b[n:]) == 0 {
return 0
}
if valLen := p.consumeAny(b[n:], qs, lvl); valLen == 0 {
return 0
} else {
if queryMatched != -1 {
q := qs[queryMatched]
if len(q.SearchVals) == 0 {
p.querySatisfied = true
}
for _, val := range q.SearchVals {
if bytes.Equal(val, bytes.TrimSpace(b[n:n+valLen])) {
p.querySatisfied = true
}
}
}
n += valLen
}
if len(b[n:]) == 0 {
return 0
}
switch b[n] {
case ',':
p.currPath = p.currPath[:len(p.currPath)-1]
n++
p.ib++
continue
case '}':
p.currPath = p.currPath[:len(p.currPath)-1]
p.ib++
return n + 1
default:
return 0
}
}
return 0
}
func (p *parserState) consumeAny(b []byte, qs []query, lvl int) (n int) {
// Avoid too much recursion.
if p.maxRecursion != 0 && lvl > p.maxRecursion {
return 0
}
n += p.consumeSpace(b)
if len(b[n:]) == 0 {
return 0
}
var t, rv int
switch b[n] {
case '"':
n++
p.ib++
rv = p.consumeString(b[n:])
t = TokString
case '[':
n++
p.ib++
rv = p.consumeArray(b[n:], qs, lvl+1)
t = TokArray
case '{':
n++
p.ib++
rv = p.consumeObject(b[n:], qs, lvl+1)
t = TokObject
case 't':
rv = p.consumeConst(b[n:], []byte("true"))
t = TokTrue
case 'f':
rv = p.consumeConst(b[n:], []byte("false"))
t = TokFalse
case 'n':
rv = p.consumeConst(b[n:], []byte("null"))
t = TokNull
default:
rv = p.consumeNumber(b[n:])
t = TokNumber
}
if lvl == 0 {
p.firstToken = t
}
if len(qs) == 0 {
p.querySatisfied = true
}
if rv <= 0 {
return n
}
n += rv
n += p.consumeSpace(b[n:])
return n
}
func isSpace(c byte) bool {
return c == ' ' || c == '\t' || c == '\r' || c == '\n'
}
func isDigit(c byte) bool {
return '0' <= c && c <= '9'
}
func isXDigit(c byte) bool {
if isDigit(c) {
return true
}
return ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F')
}
const (
TokInvalid = 0
TokNull = 1 << iota
TokTrue
TokFalse
TokNumber
TokString
TokArray
TokObject
TokComma
)

View File

@@ -137,7 +137,7 @@ func tarParseOctal(b []byte) int64 {
if b == 0 {
break
}
if !(b >= '0' && b <= '7') {
if b < '0' || b > '7' {
return -1
}
ret = (ret << 3) | int64(b-'0')

View File

@@ -71,7 +71,7 @@ func Dbf(raw []byte, limit uint32) bool {
}
// 3rd and 4th bytes contain the last update month and day of month.
if !(0 < raw[2] && raw[2] < 13 && 0 < raw[3] && raw[3] < 32) {
if raw[2] == 0 || raw[2] > 12 || raw[3] == 0 || raw[3] > 31 {
return false
}
@@ -153,7 +153,7 @@ func Marc(raw []byte, limit uint32) bool {
return bytes.Contains(raw[:min(2048, len(raw))], []byte{0x1E})
}
// Glb matches a glTF model format file.
// GLB matches a glTF model format file.
// GLB is the binary file format representation of 3D models saved in
// the GL transmission Format (glTF).
// GLB uses little endian and its header structure is as follows:
@@ -168,7 +168,7 @@ func Marc(raw []byte, limit uint32) bool {
//
// [glTF specification]: https://registry.khronos.org/glTF/specs/2.0/glTF-2.0.html
// [IANA glTF entry]: https://www.iana.org/assignments/media-types/model/gltf-binary
var Glb = prefix([]byte("\x67\x6C\x54\x46\x02\x00\x00\x00"),
var GLB = prefix([]byte("\x67\x6C\x54\x46\x02\x00\x00\x00"),
[]byte("\x67\x6C\x54\x46\x01\x00\x00\x00"))
// TzIf matches a Time Zone Information Format (TZif) file.

View File

@@ -12,13 +12,13 @@ func Shp(raw []byte, limit uint32) bool {
return false
}
if !(binary.BigEndian.Uint32(raw[0:4]) == 9994 &&
binary.BigEndian.Uint32(raw[4:8]) == 0 &&
binary.BigEndian.Uint32(raw[8:12]) == 0 &&
binary.BigEndian.Uint32(raw[12:16]) == 0 &&
binary.BigEndian.Uint32(raw[16:20]) == 0 &&
binary.BigEndian.Uint32(raw[20:24]) == 0 &&
binary.LittleEndian.Uint32(raw[28:32]) == 1000) {
if binary.BigEndian.Uint32(raw[0:4]) != 9994 ||
binary.BigEndian.Uint32(raw[4:8]) != 0 ||
binary.BigEndian.Uint32(raw[8:12]) != 0 ||
binary.BigEndian.Uint32(raw[12:16]) != 0 ||
binary.BigEndian.Uint32(raw[16:20]) != 0 ||
binary.BigEndian.Uint32(raw[20:24]) != 0 ||
binary.LittleEndian.Uint32(raw[28:32]) != 1000 {
return false
}

View File

@@ -2,7 +2,6 @@ package magic
import (
"bytes"
"strings"
"time"
"github.com/gabriel-vasile/mimetype/internal/charset"
@@ -154,145 +153,75 @@ func Php(raw []byte, limit uint32) bool {
// JSON matches a JavaScript Object Notation file.
func JSON(raw []byte, limit uint32) bool {
raw = trimLWS(raw)
// #175 A single JSON string, number or bool is not considered JSON.
// JSON objects and arrays are reported as JSON.
if len(raw) < 2 || (raw[0] != '[' && raw[0] != '{') {
return false
}
parsed, err := json.Scan(raw)
// If the full file content was provided, check there is no error.
if limit == 0 || len(raw) < int(limit) {
return err == nil
}
// If a section of the file was provided, check if all of it was parsed.
return parsed == len(raw) && len(raw) > 0
return jsonHelper(raw, limit, json.QueryNone, json.TokObject|json.TokArray)
}
// GeoJSON matches a RFC 7946 GeoJSON file.
//
// GeoJSON detection implies searching for key:value pairs like: `"type": "Feature"`
// in the input.
// BUG(gabriel-vasile): The "type" key should be searched for in the root object.
func GeoJSON(raw []byte, limit uint32) bool {
raw = trimLWS(raw)
if len(raw) == 0 {
return jsonHelper(raw, limit, json.QueryGeo, json.TokObject)
}
// HAR matches a HAR Spec file.
// Spec: http://www.softwareishard.com/blog/har-12-spec/
func HAR(raw []byte, limit uint32) bool {
return jsonHelper(raw, limit, json.QueryHAR, json.TokObject)
}
// GLTF matches a GL Transmission Format (JSON) file.
// Visit [glTF specification] and [IANA glTF entry] for more details.
//
// [glTF specification]: https://registry.khronos.org/glTF/specs/2.0/glTF-2.0.html
// [IANA glTF entry]: https://www.iana.org/assignments/media-types/model/gltf+json
func GLTF(raw []byte, limit uint32) bool {
return jsonHelper(raw, limit, json.QueryGLTF, json.TokObject)
}
func jsonHelper(raw []byte, limit uint32, q string, wantTok int) bool {
if !json.LooksLikeObjectOrArray(raw) {
return false
}
// GeoJSON is always a JSON object, not a JSON array or any other JSON value.
if raw[0] != '{' {
lraw := len(raw)
parsed, inspected, firstToken, querySatisfied := json.Parse(q, raw)
if !querySatisfied || firstToken&wantTok == 0 {
return false
}
s := []byte(`"type"`)
si, sl := bytes.Index(raw, s), len(s)
if si == -1 {
return false
// If the full file content was provided, check that the whole input was parsed.
if limit == 0 || lraw < int(limit) {
return parsed == lraw
}
// If the "type" string is the suffix of the input,
// there is no need to search for the value of the key.
if si+sl == len(raw) {
return false
}
// Skip the "type" part.
raw = raw[si+sl:]
// Skip any whitespace before the colon.
raw = trimLWS(raw)
// Check for colon.
if len(raw) == 0 || raw[0] != ':' {
return false
}
// Skip any whitespace after the colon.
raw = trimLWS(raw[1:])
geoJSONTypes := [][]byte{
[]byte(`"Feature"`),
[]byte(`"FeatureCollection"`),
[]byte(`"Point"`),
[]byte(`"LineString"`),
[]byte(`"Polygon"`),
[]byte(`"MultiPoint"`),
[]byte(`"MultiLineString"`),
[]byte(`"MultiPolygon"`),
[]byte(`"GeometryCollection"`),
}
for _, t := range geoJSONTypes {
if bytes.HasPrefix(raw, t) {
return true
}
}
return false
// If a section of the file was provided, check if all of it was inspected.
// In other words, check that if there was a problem parsing, that problem
// occured at the last byte in the input.
return inspected == lraw && lraw > 0
}
// NdJSON matches a Newline delimited JSON file. All complete lines from raw
// must be valid JSON documents meaning they contain one of the valid JSON data
// types.
func NdJSON(raw []byte, limit uint32) bool {
lCount, hasObjOrArr := 0, false
lCount, objOrArr := 0, 0
raw = dropLastLine(raw, limit)
var l []byte
for len(raw) != 0 {
l, raw = scanLine(raw)
// Empty lines are allowed in NDJSON.
if l = trimRWS(trimLWS(l)); len(l) == 0 {
continue
}
_, err := json.Scan(l)
if err != nil {
_, inspected, firstToken, _ := json.Parse(json.QueryNone, l)
if len(l) != inspected {
return false
}
if l[0] == '[' || l[0] == '{' {
hasObjOrArr = true
if firstToken == json.TokArray || firstToken == json.TokObject {
objOrArr++
}
lCount++
}
return lCount > 1 && hasObjOrArr
}
// HAR matches a HAR Spec file.
// Spec: http://www.softwareishard.com/blog/har-12-spec/
func HAR(raw []byte, limit uint32) bool {
s := []byte(`"log"`)
si, sl := bytes.Index(raw, s), len(s)
if si == -1 {
return false
}
// If the "log" string is the suffix of the input,
// there is no need to search for the value of the key.
if si+sl == len(raw) {
return false
}
// Skip the "log" part.
raw = raw[si+sl:]
// Skip any whitespace before the colon.
raw = trimLWS(raw)
// Check for colon.
if len(raw) == 0 || raw[0] != ':' {
return false
}
// Skip any whitespace after the colon.
raw = trimLWS(raw[1:])
harJSONTypes := [][]byte{
[]byte(`"version"`),
[]byte(`"creator"`),
[]byte(`"entries"`),
}
for _, t := range harJSONTypes {
si := bytes.Index(raw, t)
if si > -1 {
return true
}
}
return false
return lCount > 1 && objOrArr > 0
}
// Svg matches a SVG file.
@@ -305,32 +234,31 @@ func Srt(raw []byte, _ uint32) bool {
line, raw := scanLine(raw)
// First line must be 1.
if string(line) != "1" {
if len(line) != 1 || line[0] != '1' {
return false
}
line, raw = scanLine(raw)
secondLine := string(line)
// Timestamp format (e.g: 00:02:16,612 --> 00:02:19,376) limits secondLine
// Timestamp format (e.g: 00:02:16,612 --> 00:02:19,376) limits second line
// length to exactly 29 characters.
if len(secondLine) != 29 {
if len(line) != 29 {
return false
}
// Decimal separator of fractional seconds in the timestamps must be a
// comma, not a period.
if strings.Contains(secondLine, ".") {
if bytes.IndexByte(line, '.') != -1 {
return false
}
// Second line must be a time range.
ts := strings.Split(secondLine, " --> ")
if len(ts) != 2 {
sep := []byte(" --> ")
i := bytes.Index(line, sep)
if i == -1 {
return false
}
const layout = "15:04:05,000"
t0, err := time.Parse(layout, ts[0])
t0, err := time.Parse(layout, string(line[:i]))
if err != nil {
return false
}
t1, err := time.Parse(layout, ts[1])
t1, err := time.Parse(layout, string(line[i+len(sep):]))
if err != nil {
return false
}

View File

@@ -1,4 +1,4 @@
## 178 Supported MIME types
## 179 Supported MIME types
This file is automatically generated when running tests. Do not edit manually.
Extension | MIME type | Aliases
@@ -171,6 +171,7 @@ Extension | MIME type | Aliases
**.json** | application/json | -
**.geojson** | application/geo+json | -
**.har** | application/json | -
**.gltf** | model/gltf+json | -
**.ndjson** | application/x-ndjson | -
**.rtf** | text/rtf | application/rtf
**.srt** | application/x-subrip | application/x-srt, text/x-srt

View File

@@ -83,7 +83,7 @@ var (
text = newMIME("text/plain", ".txt", magic.Text, html, svg, xml, php, js, lua, perl, python, json, ndJSON, rtf, srt, tcl, csv, tsv, vCard, iCalendar, warc, vtt)
xml = newMIME("text/xml", ".xml", magic.XML, rss, atom, x3d, kml, xliff, collada, gml, gpx, tcx, amf, threemf, xfdf, owl2).
alias("application/xml")
json = newMIME("application/json", ".json", magic.JSON, geoJSON, har)
json = newMIME("application/json", ".json", magic.JSON, geoJSON, har, gltf)
har = newMIME("application/json", ".har", magic.HAR)
csv = newMIME("text/csv", ".csv", magic.Csv)
tsv = newMIME("text/tab-separated-values", ".tsv", magic.Tsv)
@@ -262,7 +262,8 @@ var (
pat = newMIME("image/x-gimp-pat", ".pat", magic.Pat)
gbr = newMIME("image/x-gimp-gbr", ".gbr", magic.Gbr)
xfdf = newMIME("application/vnd.adobe.xfdf", ".xfdf", magic.Xfdf)
glb = newMIME("model/gltf-binary", ".glb", magic.Glb)
glb = newMIME("model/gltf-binary", ".glb", magic.GLB)
gltf = newMIME("model/gltf+json", ".gltf", magic.GLTF)
jxr = newMIME("image/jxr", ".jxr", magic.Jxr).alias("image/vnd.ms-photo")
parquet = newMIME("application/vnd.apache.parquet", ".parquet", magic.Par1).
alias("application/x-parquet")

4
vendor/modules.txt vendored
View File

@@ -417,8 +417,8 @@ github.com/felixge/httpsnoop
## explicit; go 1.17
github.com/fsnotify/fsnotify
github.com/fsnotify/fsnotify/internal
# github.com/gabriel-vasile/mimetype v1.4.8
## explicit; go 1.20
# github.com/gabriel-vasile/mimetype v1.4.9
## explicit; go 1.23.0
github.com/gabriel-vasile/mimetype
github.com/gabriel-vasile/mimetype/internal/charset
github.com/gabriel-vasile/mimetype/internal/json