diff --git a/go.mod b/go.mod
index 351e20ccf..e59b4c3a7 100644
--- a/go.mod
+++ b/go.mod
@@ -19,7 +19,7 @@ require (
github.com/dhowden/tag v0.0.0-20240417053706-3d75831295e8
github.com/dutchcoders/go-clamd v0.0.0-20170520113014-b970184f4d9e
github.com/egirna/icap-client v0.1.1
- github.com/gabriel-vasile/mimetype v1.4.9
+ github.com/gabriel-vasile/mimetype v1.4.10
github.com/ggwhite/go-masker v1.1.0
github.com/go-chi/chi/v5 v5.2.2
github.com/go-chi/render v1.0.3
diff --git a/go.sum b/go.sum
index 8cfd3fb26..04eddbc68 100644
--- a/go.sum
+++ b/go.sum
@@ -349,8 +349,8 @@ github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMo
github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=
github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k=
github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0=
-github.com/gabriel-vasile/mimetype v1.4.9 h1:5k+WDwEsD9eTLL8Tz3L0VnmVh9QxGjRmjBvAG7U/oYY=
-github.com/gabriel-vasile/mimetype v1.4.9/go.mod h1:WnSQhFKJuBlRyLiKohA/2DtIlPFAbguNaG7QCHcyGok=
+github.com/gabriel-vasile/mimetype v1.4.10 h1:zyueNbySn/z8mJZHLt6IPw0KoZsiQNszIpU+bX4+ZK0=
+github.com/gabriel-vasile/mimetype v1.4.10/go.mod h1:d+9Oxyo1wTzWdyVUPMmXFvp4F9tea18J8ufA774AB3s=
github.com/gdexlab/go-render v1.0.1 h1:rxqB3vo5s4n1kF0ySmoNeSPRYkEsyHgln4jFIQY7v0U=
github.com/gdexlab/go-render v1.0.1/go.mod h1:wRi5nW2qfjiGj4mPukH4UV0IknS1cHD4VgFTmJX5JzM=
github.com/getkin/kin-openapi v0.13.0/go.mod h1:WGRs2ZMM1Q8LR1QBEwUxC6RJEfaBcD0s+pcEVXFuAjw=
diff --git a/vendor/github.com/gabriel-vasile/mimetype/CODE_OF_CONDUCT.md b/vendor/github.com/gabriel-vasile/mimetype/CODE_OF_CONDUCT.md
deleted file mode 100644
index 8479cd87d..000000000
--- a/vendor/github.com/gabriel-vasile/mimetype/CODE_OF_CONDUCT.md
+++ /dev/null
@@ -1,76 +0,0 @@
-# Contributor Covenant Code of Conduct
-
-## Our Pledge
-
-In the interest of fostering an open and welcoming environment, we as
-contributors and maintainers pledge to making participation in our project and
-our community a harassment-free experience for everyone, regardless of age, body
-size, disability, ethnicity, sex characteristics, gender identity and expression,
-level of experience, education, socio-economic status, nationality, personal
-appearance, race, religion, or sexual identity and orientation.
-
-## Our Standards
-
-Examples of behavior that contributes to creating a positive environment
-include:
-
-* Using welcoming and inclusive language
-* Being respectful of differing viewpoints and experiences
-* Gracefully accepting constructive criticism
-* Focusing on what is best for the community
-* Showing empathy towards other community members
-
-Examples of unacceptable behavior by participants include:
-
-* The use of sexualized language or imagery and unwelcome sexual attention or
- advances
-* Trolling, insulting/derogatory comments, and personal or political attacks
-* Public or private harassment
-* Publishing others' private information, such as a physical or electronic
- address, without explicit permission
-* Other conduct which could reasonably be considered inappropriate in a
- professional setting
-
-## Our Responsibilities
-
-Project maintainers are responsible for clarifying the standards of acceptable
-behavior and are expected to take appropriate and fair corrective action in
-response to any instances of unacceptable behavior.
-
-Project maintainers have the right and responsibility to remove, edit, or
-reject comments, commits, code, wiki edits, issues, and other contributions
-that are not aligned to this Code of Conduct, or to ban temporarily or
-permanently any contributor for other behaviors that they deem inappropriate,
-threatening, offensive, or harmful.
-
-## Scope
-
-This Code of Conduct applies both within project spaces and in public spaces
-when an individual is representing the project or its community. Examples of
-representing a project or community include using an official project e-mail
-address, posting via an official social media account, or acting as an appointed
-representative at an online or offline event. Representation of a project may be
-further defined and clarified by project maintainers.
-
-## Enforcement
-
-Instances of abusive, harassing, or otherwise unacceptable behavior may be
-reported by contacting the project team at vasile.gabriel@email.com. All
-complaints will be reviewed and investigated and will result in a response that
-is deemed necessary and appropriate to the circumstances. The project team is
-obligated to maintain confidentiality with regard to the reporter of an incident.
-Further details of specific enforcement policies may be posted separately.
-
-Project maintainers who do not follow or enforce the Code of Conduct in good
-faith may face temporary or permanent repercussions as determined by other
-members of the project's leadership.
-
-## Attribution
-
-This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
-available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
-
-[homepage]: https://www.contributor-covenant.org
-
-For answers to common questions about this code of conduct, see
-https://www.contributor-covenant.org/faq
diff --git a/vendor/github.com/gabriel-vasile/mimetype/CONTRIBUTING.md b/vendor/github.com/gabriel-vasile/mimetype/CONTRIBUTING.md
deleted file mode 100644
index 56ae4e57c..000000000
--- a/vendor/github.com/gabriel-vasile/mimetype/CONTRIBUTING.md
+++ /dev/null
@@ -1,12 +0,0 @@
-## Contribute
-Contributions to **mimetype** are welcome. If you find an issue and you consider
-contributing, you can use the [Github issues tracker](https://github.com/gabriel-vasile/mimetype/issues)
-in order to report it, or better yet, open a pull request.
-
-Code contributions must respect these rules:
- - code must be test covered
- - code must be formatted using gofmt tool
- - exported names must be documented
-
-**Important**: By submitting a pull request, you agree to allow the project
-owner to license your work under the same license as that used by the project.
diff --git a/vendor/github.com/gabriel-vasile/mimetype/README.md b/vendor/github.com/gabriel-vasile/mimetype/README.md
index aa88b4bda..f28f56c9b 100644
--- a/vendor/github.com/gabriel-vasile/mimetype/README.md
+++ b/vendor/github.com/gabriel-vasile/mimetype/README.md
@@ -27,6 +27,7 @@
- possibility to [extend](https://pkg.go.dev/github.com/gabriel-vasile/mimetype#example-package-Extend) with other file formats
- common file formats are prioritized
- [text vs. binary files differentiation](https://pkg.go.dev/github.com/gabriel-vasile/mimetype#example-package-TextVsBinary)
+- no external dependencies
- safe for concurrent usage
## Install
@@ -45,8 +46,7 @@ fmt.Println(mtype.String(), mtype.Extension())
```
See the [runnable Go Playground examples](https://pkg.go.dev/github.com/gabriel-vasile/mimetype#pkg-overview).
-## Usage'
-Only use libraries like **mimetype** as a last resort. Content type detection
+Caution: only use libraries like **mimetype** as a last resort. Content type detection
using magic numbers is slow, inaccurate, and non-standard. Most of the times
protocols have methods for specifying such metadata; e.g., `Content-Type` header
in HTTP and SMTP.
@@ -67,6 +67,18 @@ mimetype.DetectFile("file.doc")
If increasing the limit does not help, please
[open an issue](https://github.com/gabriel-vasile/mimetype/issues/new?assignees=&labels=&template=mismatched-mime-type-detected.md&title=).
+## Tests
+In addition to unit tests,
+[mimetype_tests](https://github.com/gabriel-vasile/mimetype_tests) compares the
+library with the [Unix file utility](https://en.wikipedia.org/wiki/File_(command))
+for around 50 000 sample files. Check the latest comparison results
+[here](https://github.com/gabriel-vasile/mimetype_tests/actions).
+
+## Benchmarks
+Benchmarks for each file format are performed when a PR is open. The results can
+be seen on the [workflows page](https://github.com/gabriel-vasile/mimetype/actions/workflows/benchmark.yml).
+Performance improvements are welcome but correctness is prioritized.
+
## Structure
**mimetype** uses a hierarchical structure to keep the MIME type detection logic.
This reduces the number of calls needed for detecting the file type. The reason
@@ -84,19 +96,8 @@ or from a [file](https://pkg.go.dev/github.com/gabriel-vasile/mimetype#DetectFil
-## Performance
-Thanks to the hierarchical structure, searching for common formats first,
-and limiting itself to file headers, **mimetype** matches the performance of
-stdlib `http.DetectContentType` while outperforming the alternative package.
-
-```bash
- mimetype http.DetectContentType filetype
-BenchmarkMatchTar-24 250 ns/op 400 ns/op 3778 ns/op
-BenchmarkMatchZip-24 524 ns/op 351 ns/op 4884 ns/op
-BenchmarkMatchJpeg-24 103 ns/op 228 ns/op 839 ns/op
-BenchmarkMatchGif-24 139 ns/op 202 ns/op 751 ns/op
-BenchmarkMatchPng-24 165 ns/op 221 ns/op 1176 ns/op
-```
-
## Contributing
-See [CONTRIBUTING.md](CONTRIBUTING.md).
+Contributions are unexpected but welcome. When submitting a PR for detection of
+a new file format, please make sure to add a record to the list of testcases
+from [mimetype_test.go](mimetype_test.go). For complex files a record can be added
+in the [testdata](testdata) directory.
diff --git a/vendor/github.com/gabriel-vasile/mimetype/internal/charset/charset.go b/vendor/github.com/gabriel-vasile/mimetype/internal/charset/charset.go
index 0647f730e..8c5a05e4d 100644
--- a/vendor/github.com/gabriel-vasile/mimetype/internal/charset/charset.go
+++ b/vendor/github.com/gabriel-vasile/mimetype/internal/charset/charset.go
@@ -2,11 +2,10 @@ package charset
import (
"bytes"
- "encoding/xml"
- "strings"
"unicode/utf8"
- "golang.org/x/net/html"
+ "github.com/gabriel-vasile/mimetype/internal/markup"
+ "github.com/gabriel-vasile/mimetype/internal/scan"
)
const (
@@ -141,20 +140,31 @@ func FromXML(content []byte) string {
}
return FromPlain(content)
}
-func fromXML(content []byte) string {
- content = trimLWS(content)
- dec := xml.NewDecoder(bytes.NewReader(content))
- rawT, err := dec.RawToken()
- if err != nil {
- return ""
+func fromXML(s scan.Bytes) string {
+ xml := []byte(" 0 && line[n-1] == '\r' {
+ return line[:n-1], false // drop \r at end of line
+ }
+
+ // This line is problematic. The logic from CountFields comes from
+ // encoding/csv.Reader which relies on mutating the input bytes.
+ // https://github.com/golang/go/blob/b3251514531123d7fd007682389bce7428d159a0/src/encoding/csv/reader.go#L275-L279
+ // To avoid mutating the input, we return cutShort. #680
+ if n >= 2 && line[n-2] == '\r' && line[n-1] == '\n' {
+ return line[:n-2], true
+ }
+ return line, false
+}
+
+// CountFields reads one CSV line and counts how many records that line contained.
+// hasMore reports whether there are more lines in the input.
+// collectIndexes makes CountFields return a list of indexes where CSV fields
+// start in the line. These indexes are used to test the correctness against the
+// encoding/csv parser.
+func (r *Parser) CountFields(collectIndexes bool) (fields int, fieldPos []int, hasMore bool) {
+ finished := false
+ var line scan.Bytes
+ cutShort := false
+ for {
+ line, cutShort = r.readLine()
+ if finished {
+ return 0, nil, false
+ }
+ finished = len(r.s) == 0 && len(line) == 0
+ if len(line) == lengthNL(line) {
+ line = nil
+ continue // Skip empty lines.
+ }
+ if len(line) > 0 && line[0] == r.comment {
+ line = nil
+ continue
+ }
+ break
+ }
+
+ indexes := []int{}
+ originalLine := line
+parseField:
+ for {
+ if len(line) == 0 || line[0] != '"' { // non-quoted string field
+ fields++
+ if collectIndexes {
+ indexes = append(indexes, len(originalLine)-len(line))
+ }
+ i := bytes.IndexByte(line, r.comma)
+ if i >= 0 {
+ line.Advance(i + 1) // 1 to get over ending comma
+ continue parseField
+ }
+ break parseField
+ } else { // Quoted string field.
+ if collectIndexes {
+ indexes = append(indexes, len(originalLine)-len(line))
+ }
+ line.Advance(1) // get over starting quote
+ for {
+ i := bytes.IndexByte(line, '"')
+ if i >= 0 {
+ line.Advance(i + 1) // 1 for ending quote
+ switch rn := line.Peek(); {
+ case rn == '"':
+ line.Advance(1)
+ case rn == r.comma:
+ line.Advance(1)
+ fields++
+ continue parseField
+ case lengthNL(line) == len(line):
+ fields++
+ break parseField
+ }
+ } else if len(line) > 0 || cutShort {
+ line, cutShort = r.readLine()
+ originalLine = line
+ } else {
+ fields++
+ break parseField
+ }
+ }
+ }
+ }
+
+ return fields, indexes, fields != 0
+}
+
+// lengthNL reports the number of bytes for the trailing \n.
+func lengthNL(b []byte) int {
+ if len(b) > 0 && b[len(b)-1] == '\n' {
+ return 1
+ }
+ return 0
+}
diff --git a/vendor/github.com/gabriel-vasile/mimetype/internal/json/parser.go b/vendor/github.com/gabriel-vasile/mimetype/internal/json/parser.go
index fd8dd5202..4bc861743 100644
--- a/vendor/github.com/gabriel-vasile/mimetype/internal/json/parser.go
+++ b/vendor/github.com/gabriel-vasile/mimetype/internal/json/parser.go
@@ -258,7 +258,7 @@ out:
}
func (p *parserState) consumeArray(b []byte, qs []query, lvl int) (n int) {
- p.currPath = append(p.currPath, []byte{'['})
+ p.appendPath([]byte{'['}, qs)
if len(b) == 0 {
return 0
}
@@ -270,7 +270,7 @@ func (p *parserState) consumeArray(b []byte, qs []query, lvl int) (n int) {
}
if b[n] == ']' {
p.ib++
- p.currPath = p.currPath[:len(p.currPath)-1]
+ p.popLastPath(qs)
return n + 1
}
innerParsed := p.consumeAny(b[n:], qs, lvl)
@@ -305,6 +305,20 @@ func queryPathMatch(qs []query, path [][]byte) int {
return -1
}
+// appendPath will append a path fragment if queries is not empty.
+// If we don't need query functionality (just checking if a JSON is valid),
+// then we can skip keeping track of the path we're currently in.
+func (p *parserState) appendPath(path []byte, qs []query) {
+ if len(qs) != 0 {
+ p.currPath = append(p.currPath, path)
+ }
+}
+func (p *parserState) popLastPath(qs []query) {
+ if len(qs) != 0 {
+ p.currPath = p.currPath[:len(p.currPath)-1]
+ }
+}
+
func (p *parserState) consumeObject(b []byte, qs []query, lvl int) (n int) {
for n < len(b) {
n += p.consumeSpace(b[n:])
@@ -326,7 +340,7 @@ func (p *parserState) consumeObject(b []byte, qs []query, lvl int) (n int) {
if keyLen := p.consumeString(b[n:]); keyLen == 0 {
return 0
} else {
- p.currPath = append(p.currPath, b[n:n+keyLen-1])
+ p.appendPath(b[n:n+keyLen-1], qs)
if !p.querySatisfied {
queryMatched = queryPathMatch(qs, p.currPath)
}
@@ -368,12 +382,12 @@ func (p *parserState) consumeObject(b []byte, qs []query, lvl int) (n int) {
}
switch b[n] {
case ',':
- p.currPath = p.currPath[:len(p.currPath)-1]
+ p.popLastPath(qs)
n++
p.ib++
continue
case '}':
- p.currPath = p.currPath[:len(p.currPath)-1]
+ p.popLastPath(qs)
p.ib++
return n + 1
default:
@@ -388,6 +402,9 @@ func (p *parserState) consumeAny(b []byte, qs []query, lvl int) (n int) {
if p.maxRecursion != 0 && lvl > p.maxRecursion {
return 0
}
+ if len(qs) == 0 {
+ p.querySatisfied = true
+ }
n += p.consumeSpace(b)
if len(b[n:]) == 0 {
return 0
@@ -426,9 +443,6 @@ func (p *parserState) consumeAny(b []byte, qs []query, lvl int) (n int) {
if lvl == 0 {
p.firstToken = t
}
- if len(qs) == 0 {
- p.querySatisfied = true
- }
if rv <= 0 {
return n
}
diff --git a/vendor/github.com/gabriel-vasile/mimetype/internal/magic/document.go b/vendor/github.com/gabriel-vasile/mimetype/internal/magic/document.go
index b3b26d5a1..7f9308db3 100644
--- a/vendor/github.com/gabriel-vasile/mimetype/internal/magic/document.go
+++ b/vendor/github.com/gabriel-vasile/mimetype/internal/magic/document.go
@@ -1,18 +1,11 @@
package magic
-import "bytes"
+import (
+ "bytes"
+ "encoding/binary"
+)
var (
- // Pdf matches a Portable Document Format file.
- // https://github.com/file/file/blob/11010cc805546a3e35597e67e1129a481aed40e8/magic/Magdir/pdf
- Pdf = prefix(
- // usual pdf signature
- []byte("%PDF-"),
- // new-line prefixed signature
- []byte("\012%PDF-"),
- // UTF-8 BOM prefixed signature
- []byte("\xef\xbb\xbf%PDF-"),
- )
// Fdf matches a Forms Data Format file.
Fdf = prefix([]byte("%FDF"))
// Mobi matches a Mobi file.
@@ -21,8 +14,18 @@ var (
Lit = prefix([]byte("ITOLITLS"))
)
+// PDF matches a Portable Document Format file.
+// The %PDF- header should be the first thing inside the file but many
+// implementations don't follow the rule. The PDF spec at Appendix H says the
+// signature can be prepended by anything.
+// https://bugs.astron.com/view.php?id=446
+func PDF(raw []byte, _ uint32) bool {
+ raw = raw[:min(len(raw), 1024)]
+ return bytes.Contains(raw, []byte("%PDF-"))
+}
+
// DjVu matches a DjVu file.
-func DjVu(raw []byte, limit uint32) bool {
+func DjVu(raw []byte, _ uint32) bool {
if len(raw) < 12 {
return false
}
@@ -36,7 +39,7 @@ func DjVu(raw []byte, limit uint32) bool {
}
// P7s matches an .p7s signature File (PEM, Base64).
-func P7s(raw []byte, limit uint32) bool {
+func P7s(raw []byte, _ uint32) bool {
// Check for PEM Encoding.
if bytes.HasPrefix(raw, []byte("-----BEGIN PKCS7")) {
return true
@@ -60,3 +63,21 @@ func P7s(raw []byte, limit uint32) bool {
return false
}
+
+// Lotus123 matches a Lotus 1-2-3 spreadsheet document.
+func Lotus123(raw []byte, _ uint32) bool {
+ if len(raw) <= 20 {
+ return false
+ }
+ version := binary.BigEndian.Uint32(raw)
+ if version == 0x00000200 {
+ return raw[6] != 0 && raw[7] == 0
+ }
+
+ return version == 0x00001a00 && raw[20] > 0 && raw[20] < 32
+}
+
+// CHM matches a Microsoft Compiled HTML Help file.
+func CHM(raw []byte, _ uint32) bool {
+ return bytes.HasPrefix(raw, []byte("ITSF\003\000\000\000\x60\000\000\000"))
+}
diff --git a/vendor/github.com/gabriel-vasile/mimetype/internal/magic/magic.go b/vendor/github.com/gabriel-vasile/mimetype/internal/magic/magic.go
index a34c60984..5fe435b99 100644
--- a/vendor/github.com/gabriel-vasile/mimetype/internal/magic/magic.go
+++ b/vendor/github.com/gabriel-vasile/mimetype/internal/magic/magic.go
@@ -4,6 +4,8 @@ package magic
import (
"bytes"
"fmt"
+
+ "github.com/gabriel-vasile/mimetype/internal/scan"
)
type (
@@ -74,12 +76,13 @@ func ciCheck(sig, raw []byte) bool {
// matches the raw input.
func xml(sigs ...xmlSig) Detector {
return func(raw []byte, limit uint32) bool {
- raw = trimLWS(raw)
- if len(raw) == 0 {
+ b := scan.Bytes(raw)
+ b.TrimLWS()
+ if len(b) == 0 {
return false
}
for _, s := range sigs {
- if xmlCheck(s, raw) {
+ if xmlCheck(s, b) {
return true
}
}
@@ -104,19 +107,19 @@ func xmlCheck(sig xmlSig, raw []byte) bool {
// matches the raw input.
func markup(sigs ...[]byte) Detector {
return func(raw []byte, limit uint32) bool {
- if bytes.HasPrefix(raw, []byte{0xEF, 0xBB, 0xBF}) {
+ b := scan.Bytes(raw)
+ if bytes.HasPrefix(b, []byte{0xEF, 0xBB, 0xBF}) {
// We skip the UTF-8 BOM if present to ensure we correctly
// process any leading whitespace. The presence of the BOM
// is taken into account during charset detection in charset.go.
- raw = trimLWS(raw[3:])
- } else {
- raw = trimLWS(raw)
+ b.Advance(3)
}
- if len(raw) == 0 {
+ b.TrimLWS()
+ if len(b) == 0 {
return false
}
for _, s := range sigs {
- if markupCheck(s, raw) {
+ if markupCheck(s, b) {
return true
}
}
@@ -139,7 +142,7 @@ func markupCheck(sig, raw []byte) bool {
}
}
// Next byte must be space or right angle bracket.
- if db := raw[len(sig)]; db != ' ' && db != '>' {
+ if db := raw[len(sig)]; !scan.ByteIsWS(db) && db != '>' {
return false
}
@@ -183,8 +186,10 @@ func newXMLSig(localName, xmlns string) xmlSig {
// /usr/bin/env is the interpreter, php is the first and only argument.
func shebang(sigs ...[]byte) Detector {
return func(raw []byte, limit uint32) bool {
+ b := scan.Bytes(raw)
+ line := b.Line()
for _, s := range sigs {
- if shebangCheck(s, firstLine(raw)) {
+ if shebangCheck(s, line) {
return true
}
}
@@ -192,7 +197,7 @@ func shebang(sigs ...[]byte) Detector {
}
}
-func shebangCheck(sig, raw []byte) bool {
+func shebangCheck(sig []byte, raw scan.Bytes) bool {
if len(raw) < len(sig)+2 {
return false
}
@@ -200,52 +205,8 @@ func shebangCheck(sig, raw []byte) bool {
return false
}
- return bytes.Equal(trimLWS(trimRWS(raw[2:])), sig)
-}
-
-// trimLWS trims whitespace from beginning of the input.
-func trimLWS(in []byte) []byte {
- firstNonWS := 0
- for ; firstNonWS < len(in) && isWS(in[firstNonWS]); firstNonWS++ {
- }
-
- return in[firstNonWS:]
-}
-
-// trimRWS trims whitespace from the end of the input.
-func trimRWS(in []byte) []byte {
- lastNonWS := len(in) - 1
- for ; lastNonWS > 0 && isWS(in[lastNonWS]); lastNonWS-- {
- }
-
- return in[:lastNonWS+1]
-}
-
-func firstLine(in []byte) []byte {
- lineEnd := 0
- for ; lineEnd < len(in) && in[lineEnd] != '\n'; lineEnd++ {
- }
-
- return in[:lineEnd]
-}
-
-func isWS(b byte) bool {
- return b == '\t' || b == '\n' || b == '\x0c' || b == '\r' || b == ' '
-}
-
-func min(a, b int) int {
- if a < b {
- return a
- }
- return b
-}
-
-type readBuf []byte
-
-func (b *readBuf) advance(n int) bool {
- if n < 0 || len(*b) < n {
- return false
- }
- *b = (*b)[n:]
- return true
+ raw.Advance(2) // skip #! we checked above
+ raw.TrimLWS()
+ raw.TrimRWS()
+ return bytes.Equal(raw, sig)
}
diff --git a/vendor/github.com/gabriel-vasile/mimetype/internal/magic/ms_office.go b/vendor/github.com/gabriel-vasile/mimetype/internal/magic/ms_office.go
index 7d60e22e2..c912823e9 100644
--- a/vendor/github.com/gabriel-vasile/mimetype/internal/magic/ms_office.go
+++ b/vendor/github.com/gabriel-vasile/mimetype/internal/magic/ms_office.go
@@ -7,17 +7,34 @@ import (
// Xlsx matches a Microsoft Excel 2007 file.
func Xlsx(raw []byte, limit uint32) bool {
- return zipContains(raw, []byte("xl/"), true)
+ return msoxml(raw, zipEntries{{
+ name: []byte("xl/"),
+ dir: true,
+ }}, 100)
}
// Docx matches a Microsoft Word 2007 file.
func Docx(raw []byte, limit uint32) bool {
- return zipContains(raw, []byte("word/"), true)
+ return msoxml(raw, zipEntries{{
+ name: []byte("word/"),
+ dir: true,
+ }}, 100)
}
// Pptx matches a Microsoft PowerPoint 2007 file.
func Pptx(raw []byte, limit uint32) bool {
- return zipContains(raw, []byte("ppt/"), true)
+ return msoxml(raw, zipEntries{{
+ name: []byte("ppt/"),
+ dir: true,
+ }}, 100)
+}
+
+// Visio matches a Microsoft Visio 2013+ file.
+func Visio(raw []byte, limit uint32) bool {
+ return msoxml(raw, zipEntries{{
+ name: []byte("visio/"),
+ dir: true,
+ }}, 100)
}
// Ole matches an Open Linking and Embedding file.
@@ -157,6 +174,14 @@ func Msi(raw []byte, limit uint32) bool {
})
}
+// One matches a Microsoft OneNote file.
+func One(raw []byte, limit uint32) bool {
+ return bytes.HasPrefix(raw, []byte{
+ 0xe4, 0x52, 0x5c, 0x7b, 0x8c, 0xd8, 0xa7, 0x4d,
+ 0xae, 0xb1, 0x53, 0x78, 0xd0, 0x29, 0x96, 0xd3,
+ })
+}
+
// Helper to match by a specific CLSID of a compound file.
//
// http://fileformats.archiveteam.org/wiki/Microsoft_Compound_File
diff --git a/vendor/github.com/gabriel-vasile/mimetype/internal/magic/netpbm.go b/vendor/github.com/gabriel-vasile/mimetype/internal/magic/netpbm.go
new file mode 100644
index 000000000..4baa25767
--- /dev/null
+++ b/vendor/github.com/gabriel-vasile/mimetype/internal/magic/netpbm.go
@@ -0,0 +1,111 @@
+package magic
+
+import (
+ "bytes"
+ "strconv"
+
+ "github.com/gabriel-vasile/mimetype/internal/scan"
+)
+
+// NetPBM matches a Netpbm Portable BitMap ASCII/Binary file.
+//
+// See: https://en.wikipedia.org/wiki/Netpbm
+func NetPBM(raw []byte, _ uint32) bool {
+ return netp(raw, "P1\n", "P4\n")
+}
+
+// NetPGM matches a Netpbm Portable GrayMap ASCII/Binary file.
+//
+// See: https://en.wikipedia.org/wiki/Netpbm
+func NetPGM(raw []byte, _ uint32) bool {
+ return netp(raw, "P2\n", "P5\n")
+}
+
+// NetPPM matches a Netpbm Portable PixMap ASCII/Binary file.
+//
+// See: https://en.wikipedia.org/wiki/Netpbm
+func NetPPM(raw []byte, _ uint32) bool {
+ return netp(raw, "P3\n", "P6\n")
+}
+
+// NetPAM matches a Netpbm Portable Arbitrary Map file.
+//
+// See: https://en.wikipedia.org/wiki/Netpbm
+func NetPAM(raw []byte, _ uint32) bool {
+ if !bytes.HasPrefix(raw, []byte("P7\n")) {
+ return false
+ }
+ w, h, d, m, e := false, false, false, false, false
+ s := scan.Bytes(raw)
+ var l scan.Bytes
+ // Read line by line.
+ for i := 0; i < 128; i++ {
+ l = s.Line()
+ // If the line is empty or a comment, skip.
+ if len(l) == 0 || l.Peek() == '#' {
+ if len(s) == 0 {
+ return false
+ }
+ continue
+ } else if bytes.HasPrefix(l, []byte("TUPLTYPE")) {
+ continue
+ } else if bytes.HasPrefix(l, []byte("WIDTH ")) {
+ w = true
+ } else if bytes.HasPrefix(l, []byte("HEIGHT ")) {
+ h = true
+ } else if bytes.HasPrefix(l, []byte("DEPTH ")) {
+ d = true
+ } else if bytes.HasPrefix(l, []byte("MAXVAL ")) {
+ m = true
+ } else if bytes.HasPrefix(l, []byte("ENDHDR")) {
+ e = true
+ }
+ // When we reached header, return true if we collected all four required headers.
+ // WIDTH, HEIGHT, DEPTH and MAXVAL.
+ if e {
+ return w && h && d && m
+ }
+ }
+ return false
+}
+
+func netp(s scan.Bytes, prefixes ...string) bool {
+ foundPrefix := ""
+ for _, p := range prefixes {
+ if bytes.HasPrefix(s, []byte(p)) {
+ foundPrefix = p
+ }
+ }
+ if foundPrefix == "" {
+ return false
+ }
+ s.Advance(len(foundPrefix)) // jump over P1, P2, P3, etc.
+
+ var l scan.Bytes
+ // Read line by line.
+ for i := 0; i < 128; i++ {
+ l = s.Line()
+ // If the line is a comment, skip.
+ if l.Peek() == '#' {
+ continue
+ }
+ // If line has leading whitespace, then skip over whitespace.
+ for scan.ByteIsWS(l.Peek()) {
+ l.Advance(1)
+ }
+ if len(s) == 0 || len(l) > 0 {
+ break
+ }
+ }
+
+ // At this point l should be the two integers denoting the size of the matrix.
+ width := l.PopUntil(scan.ASCIISpaces...)
+ for scan.ByteIsWS(l.Peek()) {
+ l.Advance(1)
+ }
+ height := l.PopUntil(scan.ASCIISpaces...)
+
+ w, errw := strconv.ParseInt(string(width), 10, 64)
+ h, errh := strconv.ParseInt(string(height), 10, 64)
+ return errw == nil && errh == nil && w > 0 && h > 0
+}
diff --git a/vendor/github.com/gabriel-vasile/mimetype/internal/magic/text.go b/vendor/github.com/gabriel-vasile/mimetype/internal/magic/text.go
index 8178e4707..1841ee871 100644
--- a/vendor/github.com/gabriel-vasile/mimetype/internal/magic/text.go
+++ b/vendor/github.com/gabriel-vasile/mimetype/internal/magic/text.go
@@ -6,6 +6,8 @@ import (
"github.com/gabriel-vasile/mimetype/internal/charset"
"github.com/gabriel-vasile/mimetype/internal/json"
+ mkup "github.com/gabriel-vasile/mimetype/internal/markup"
+ "github.com/gabriel-vasile/mimetype/internal/scan"
)
var (
@@ -27,6 +29,7 @@ var (
[]byte("
+//
+func svgWithoutXMLDeclaration(s scan.Bytes) bool {
+ for scan.ByteIsWS(s.Peek()) {
+ s.Advance(1)
+ }
+ for mkup.SkipAComment(&s) {
+ }
+ if !bytes.HasPrefix(s, []byte("