groupware: add email HTML sanitization

* sanitize email text/html body parts using bluemonday

 * deps(groupware):
   - new dependency: github.com/microcosm-cc/bluemonday
   - transitive dependencies:
     - github.com/aymerick/douceur
     - github.com/gorilla/css
This commit is contained in:
Pascal Bleser
2025-10-17 17:18:48 +02:00
parent 3c386dfd1d
commit 9d4ec051e7
25 changed files with 6537 additions and 19 deletions

3
go.mod
View File

@@ -142,6 +142,7 @@ require (
github.com/antithesishq/antithesis-sdk-go v0.5.0-default-no-op // indirect
github.com/armon/go-radix v1.0.0 // indirect
github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 // indirect
github.com/aymerick/douceur v0.2.0 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/bitly/go-simplejson v0.5.0 // indirect
github.com/bits-and-blooms/bitset v1.22.0 // indirect
@@ -256,6 +257,7 @@ require (
github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 // indirect
github.com/google/renameio/v2 v2.0.1 // indirect
github.com/gookit/goutil v0.7.1 // indirect
github.com/gorilla/css v1.0.1 // indirect
github.com/gorilla/handlers v1.5.1 // indirect
github.com/gorilla/schema v1.4.1 // indirect
github.com/gorilla/websocket v1.5.3 // indirect
@@ -301,6 +303,7 @@ require (
github.com/mattn/go-sqlite3 v1.14.33 // indirect
github.com/maxymania/go-system v0.0.0-20170110133659-647cc364bf0b // indirect
github.com/mendsley/gojwk v0.0.0-20141217222730-4d5ec6e58103 // indirect
github.com/microcosm-cc/bluemonday v1.0.27 // indirect
github.com/miekg/dns v1.1.57 // indirect
github.com/mileusna/useragent v1.3.5 // indirect
github.com/minio/crc64nvme v1.1.1 // indirect

6
go.sum
View File

@@ -138,6 +138,8 @@ github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkY
github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 h1:DklsrG3dyBCFEj5IhUbnKptjxatkF07cF2ak3yi77so=
github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2/go.mod h1:WaHUgvxTVq04UNunO+XhnAqY/wQc+bxr74GqbsZ/Jqw=
github.com/aws/aws-sdk-go v1.37.27/go.mod h1:hcU610XS61/+aQV88ixoOzUoG7v3b31pl2zKMmprdro=
github.com/aymerick/douceur v0.2.0 h1:Mv+mAeH1Q+n9Fr+oyamOlAkUNPWPlA8PPGR0QAaYuPk=
github.com/aymerick/douceur v0.2.0/go.mod h1:wlT5vV2O3h55X9m7iVYN0TBM0NH/MmbLnd30/FjWUq4=
github.com/bbalet/stopwords v1.0.0 h1:0TnGycCtY0zZi4ltKoOGRFIlZHv0WqpoIGUsObjztfo=
github.com/bbalet/stopwords v1.0.0/go.mod h1:sAWrQoDMfqARGIn4s6dp7OW7ISrshUD8IP2q3KoqPjc=
github.com/beevik/etree v1.6.0 h1:u8Kwy8pp9D9XeITj2Z0XtA5qqZEmtJtuXZRQi+j03eE=
@@ -632,6 +634,8 @@ github.com/gophercloud/gophercloud v0.16.0/go.mod h1:wRtmUelyIIv3CSSDI47aUwbs075
github.com/gophercloud/utils v0.0.0-20210216074907-f6de111f2eae/go.mod h1:wx8HMD8oQD0Ryhz6+6ykq75PJ79iPyEqYHfwZ4l7OsA=
github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY=
github.com/gorilla/context v1.1.1/go.mod h1:kBGZzfjB9CEq2AlWe17Uuf7NDRt0dE0s8S51q0aT7Yg=
github.com/gorilla/css v1.0.1 h1:ntNaBIghp6JmvWnxbZKANoLyuXTPZ4cAMlo6RyhlbO8=
github.com/gorilla/css v1.0.1/go.mod h1:BvnYkspnSzMmwRK+b8/xgNPLiIuNZr6vbZBTPQ2A3b0=
github.com/gorilla/handlers v1.5.1 h1:9lRY6j8DEeeBT10CvO9hGW0gmky0BprnvDI5vfhUHH4=
github.com/gorilla/handlers v1.5.1/go.mod h1:t8XrUpc4KVXb7HGyJ4/cEnwQiaxrX/hz1Zv/4g96P1Q=
github.com/gorilla/mux v1.6.2/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs=
@@ -876,6 +880,8 @@ github.com/mendsley/gojwk v0.0.0-20141217222730-4d5ec6e58103 h1:Z/i1e+gTZrmcGeZy
github.com/mendsley/gojwk v0.0.0-20141217222730-4d5ec6e58103/go.mod h1:o9YPB5aGP8ob35Vy6+vyq3P3bWe7NQWzf+JLiXCiMaE=
github.com/mfridman/tparse v0.18.0 h1:wh6dzOKaIwkUGyKgOntDW4liXSo37qg5AXbIhkMV3vE=
github.com/mfridman/tparse v0.18.0/go.mod h1:gEvqZTuCgEhPbYk/2lS3Kcxg1GmTxxU7kTC8DvP0i/A=
github.com/microcosm-cc/bluemonday v1.0.27 h1:MpEUotklkwCSLeH+Qdx1VJgNqLlpY2KXwXFM08ygZfk=
github.com/microcosm-cc/bluemonday v1.0.27/go.mod h1:jFi9vgW+H7c3V0lb6nR74Ib/DIB5OBs92Dimizgw2cA=
github.com/miekg/dns v1.0.14/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg=
github.com/miekg/dns v1.1.40/go.mod h1:KNUDUusw/aVsxyTYZM1oqvCicbwhgbNgztCETuNZ7xM=
github.com/miekg/dns v1.1.57 h1:Jzi7ApEIzwEPLHWRcafCN9LZSBbqQpxjt/wpgvg7wcM=

View File

@@ -11,6 +11,7 @@ import (
"time"
"github.com/go-chi/chi/v5"
"github.com/microcosm-cc/bluemonday"
"github.com/rs/zerolog"
"github.com/opencloud-eu/opencloud/pkg/jmap"
@@ -77,12 +78,12 @@ func (g *Groupware) GetAllEmailsInMailbox(w http.ResponseWriter, r *http.Request
logger := log.From(req.logger.With().Str(HeaderSince, log.SafeString(since)).Str(logAccountId, log.SafeString(accountId)))
emails, sessionState, lang, jerr := g.jmap.GetMailboxChanges(accountId, req.session, req.ctx, logger, req.language(), mailboxId, since, true, g.maxBodyValueBytes, maxChanges)
changes, sessionState, lang, jerr := g.jmap.GetMailboxChanges(accountId, req.session, req.ctx, logger, req.language(), mailboxId, since, true, g.maxBodyValueBytes, maxChanges)
if jerr != nil {
return req.errorResponseFromJmap(jerr)
}
return etagResponse(emails, sessionState, emails.State, lang)
return etagResponse(changes, sessionState, changes.State, lang)
})
} else {
g.respond(w, r, func(req Request) Response {
@@ -119,7 +120,15 @@ func (g *Groupware) GetAllEmailsInMailbox(w http.ResponseWriter, r *http.Request
return req.errorResponseFromJmap(jerr)
}
return etagResponse(emails, sessionState, emails.State, lang)
safe := jmap.Emails{
Emails: g.sanitizeEmails(emails.Emails),
Total: emails.Total,
Limit: emails.Limit,
Offset: emails.Offset,
State: emails.State,
}
return etagResponse(safe, sessionState, emails.State, lang)
})
}
}
@@ -147,7 +156,7 @@ func (g *Groupware) GetEmailsById(w http.ResponseWriter, r *http.Request) {
if len(emails.Emails) < 1 {
return notFoundResponse(sessionState)
} else {
return etagResponse(emails.Emails[0], sessionState, emails.State, lang)
return etagResponse(g.sanitizeEmail(emails.Emails[0]), sessionState, emails.State, lang)
}
} else {
logger := log.From(l.Array("ids", log.SafeStringArray(ids)))
@@ -158,7 +167,7 @@ func (g *Groupware) GetEmailsById(w http.ResponseWriter, r *http.Request) {
if len(emails.Emails) < 1 {
return notFoundResponse(sessionState)
} else {
return etagResponse(emails.Emails, sessionState, emails.State, lang)
return etagResponse(g.sanitizeEmails(emails.Emails), sessionState, emails.State, lang)
}
}
})
@@ -203,7 +212,7 @@ func (g *Groupware) GetEmailAttachments(w http.ResponseWriter, r *http.Request)
if len(emails.Emails) < 1 {
return notFoundResponse(sessionState)
}
email := emails.Emails[0]
email := g.sanitizeEmail(emails.Emails[0])
return etagResponse(email.Attachments, sessionState, emails.State, lang)
})
} else {
@@ -229,7 +238,7 @@ func (g *Groupware) GetEmailAttachments(w http.ResponseWriter, r *http.Request)
return nil
}
email := emails.Emails[0]
email := g.sanitizeEmail(emails.Emails[0])
var attachment *jmap.EmailBodyPart = nil
for _, part := range email.Attachments {
if attachmentSelector(part) {
@@ -302,12 +311,12 @@ func (g *Groupware) getEmailsSince(w http.ResponseWriter, r *http.Request, since
logger := log.From(l)
emails, sessionState, lang, jerr := g.jmap.GetEmailsSince(accountId, req.session, req.ctx, logger, req.language(), since, true, g.maxBodyValueBytes, maxChanges)
changes, sessionState, lang, jerr := g.jmap.GetEmailsSince(accountId, req.session, req.ctx, logger, req.language(), since, true, g.maxBodyValueBytes, maxChanges)
if jerr != nil {
return req.errorResponseFromJmap(jerr)
}
return etagResponse(emails, sessionState, emails.State, lang)
return etagResponse(changes, sessionState, changes.State, lang)
})
}
@@ -518,8 +527,6 @@ func (g *Groupware) searchEmails(w http.ResponseWriter, r *http.Request) {
}
logger = log.From(logger.With().Str(logAccountId, log.SafeString(accountId)))
g.jmap.QueryEmails([]string{accountId}, filter, req.session, req.ctx, logger, req.language(), offset, limit, fetchBodies, g.maxBodyValueBytes)
resultsByAccount, sessionState, lang, jerr := g.jmap.QueryEmailsWithSnippets([]string{accountId}, filter, req.session, req.ctx, logger, req.language(), offset, limit, fetchBodies, g.maxBodyValueBytes)
if jerr != nil {
return req.errorResponseFromJmap(jerr)
@@ -542,7 +549,7 @@ func (g *Groupware) searchEmails(w http.ResponseWriter, r *http.Request) {
}
flattened[i] = EmailWithSnippets{
// AccountId: accountId,
Email: result.Email,
Email: g.sanitizeEmail(result.Email),
Snippets: snippets,
}
}
@@ -653,7 +660,7 @@ func (g *Groupware) GetEmailsForAllAccounts(w http.ResponseWriter, r *http.Reque
})
flattened[i] = EmailWithSnippets{
AccountId: accountId,
Email: result.Email,
Email: g.sanitizeEmail(result.Email),
Snippets: snippets,
}
}
@@ -701,7 +708,7 @@ func (g *Groupware) GetEmailsForAllAccounts(w http.ResponseWriter, r *http.Reque
i := 0
for _, list := range resultsByAccountId {
for _, e := range list.Emails {
flattened[i] = e
flattened[i] = g.sanitizeEmail(e)
i++
}
}
@@ -1196,7 +1203,7 @@ type AboutEmailResponse struct {
Language jmap.Language `json:"lang"`
}
func relatedEmails(email jmap.Email, beacon time.Time, days uint) jmap.EmailFilterElement {
func relatedEmailsFilter(email jmap.Email, beacon time.Time, days uint) jmap.EmailFilterElement {
filters := []jmap.EmailFilterElement{}
for _, from := range email.From {
if from.Email != "" {
@@ -1283,7 +1290,7 @@ func (g *Groupware) RelatedToEmail(w http.ResponseWriter, r *http.Request) {
beacon := email.ReceivedAt // TODO configurable: either relative to when the email was received, or relative to now
//beacon := time.Now()
filter := relatedEmails(email, beacon, days)
filter := relatedEmailsFilter(email, beacon, days)
// bgctx, _ := context.WithTimeout(context.Background(), time.Duration(30)*time.Second) // TODO configurable
bgctx := context.Background()
@@ -1298,7 +1305,7 @@ func (g *Groupware) RelatedToEmail(w http.ResponseWriter, r *http.Request) {
l.Error().Err(jerr).Msgf("failed to query %v emails", RelationTypeSameSender)
} else {
req.observe(g.metrics.EmailSameSenderDuration.WithLabelValues(req.session.JmapEndpoint), duration.Seconds())
related := filterEmails(results.Emails, email)
related := g.sanitizeEmails(filterEmails(results.Emails, email))
l.Trace().Msgf("'%v' found %v other emails", RelationTypeSameSender, len(related))
if len(related) > 0 {
req.push(RelationEntityEmail, AboutEmailsEvent{Id: reqId, Emails: related, Source: RelationTypeSameSender, Language: lang})
@@ -1316,7 +1323,7 @@ func (g *Groupware) RelatedToEmail(w http.ResponseWriter, r *http.Request) {
l.Error().Err(jerr).Msgf("failed to list %v emails", RelationTypeSameThread)
} else {
req.observe(g.metrics.EmailSameThreadDuration.WithLabelValues(req.session.JmapEndpoint), duration.Seconds())
related := filterEmails(emails, email)
related := g.sanitizeEmails(filterEmails(emails, email))
l.Trace().Msgf("'%v' found %v other emails", RelationTypeSameThread, len(related))
if len(related) > 0 {
req.push(RelationEntityEmail, AboutEmailsEvent{Id: reqId, Emails: related, Source: RelationTypeSameThread, Language: lang})
@@ -1325,7 +1332,7 @@ func (g *Groupware) RelatedToEmail(w http.ResponseWriter, r *http.Request) {
})
return etagResponse(AboutEmailResponse{
Email: email,
Email: g.sanitizeEmail(email),
RequestId: reqId,
}, sessionState, emails.State, lang)
})
@@ -1703,3 +1710,45 @@ func squashQueryState[V any](all map[string]V, mapper func(V) jmap.State) jmap.S
}
return jmap.State(strings.Join(parts, ","))
}
var sanitizationPolicy *bluemonday.Policy = bluemonday.UGCPolicy()
func (g *Groupware) sanitizeEmail(source jmap.Email) jmap.Email {
if !g.sanitize {
return source
}
memory := map[string]int{}
for _, ref := range []*[]jmap.EmailBodyPart{&source.HtmlBody, &source.TextBody} {
newBody := make([]jmap.EmailBodyPart, len(*ref))
for i, p := range *ref {
if p.Type == "text/html" {
if already, done := memory[p.PartId]; !done {
if part, ok := source.BodyValues[p.PartId]; ok {
safe := sanitizationPolicy.Sanitize(part.Value)
part.Value = safe
source.BodyValues[p.PartId] = part
newLen := len(safe)
memory[p.PartId] = newLen
p.Size = newLen
}
} else {
p.Size = already
}
}
newBody[i] = p
}
*ref = newBody
}
return source
}
func (g *Groupware) sanitizeEmails(source []jmap.Email) []jmap.Email {
if !g.sanitize {
return source
}
result := make([]jmap.Email, len(source))
for i, email := range source {
result[i] = g.sanitizeEmail(email)
}
return result
}

View File

@@ -90,6 +90,7 @@ type Groupware struct {
logger *log.Logger
defaultEmailLimit uint
maxBodyValueBytes uint
sanitize bool
// Caches successful and failed Sessions by the username.
sessionCache sessionCache
jmap *jmap.Client
@@ -192,6 +193,8 @@ func NewGroupware(config *config.Config, logger *log.Logger, mux *chi.Mux, prome
insecureTls := true // TODO make configurable
sanitize := true // TODO make configurable
m := metrics.New(prometheusRegistry, logger)
// TODO add timeouts and other meaningful configuration settings for the HTTP client
@@ -339,6 +342,7 @@ func NewGroupware(config *config.Config, logger *log.Logger, mux *chi.Mux, prome
jmap: &jmapClient,
defaultEmailLimit: defaultEmailLimit,
maxBodyValueBytes: maxBodyValueBytes,
sanitize: sanitize,
eventChannel: eventChannel,
jobsChannel: jobsChannel,
jobCounter: atomic.Uint64{},

View File

@@ -0,0 +1,34 @@
package groupware
import (
"testing"
"github.com/opencloud-eu/opencloud/pkg/jmap"
"github.com/stretchr/testify/require"
)
func TestSanitizeEmail(t *testing.T) {
email := jmap.Email{
Subject: "test",
BodyValues: map[string]jmap.EmailBodyValue{
"koze92I1": {
Value: `<a onblur="alert(secret)" href="http://www.google.com">Google</a>`,
},
},
HtmlBody: []jmap.EmailBodyPart{
{
PartId: "koze92I1",
Type: "text/html",
Size: 65,
},
},
}
g := &Groupware{sanitize: true}
safe := g.sanitizeEmail(email)
require := require.New(t)
require.Equal(`<a href="http://www.google.com" rel="nofollow">Google</a>`, safe.BodyValues["koze92I1"].Value)
require.Equal(57, safe.HtmlBody[0].Size)
}

22
vendor/github.com/aymerick/douceur/LICENSE generated vendored Normal file
View File

@@ -0,0 +1,22 @@
The MIT License (MIT)
Copyright (c) 2015 Aymerick JEHANNE
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

60
vendor/github.com/aymerick/douceur/css/declaration.go generated vendored Normal file
View File

@@ -0,0 +1,60 @@
package css
import "fmt"
// Declaration represents a parsed style property
type Declaration struct {
Property string
Value string
Important bool
}
// NewDeclaration instanciates a new Declaration
func NewDeclaration() *Declaration {
return &Declaration{}
}
// Returns string representation of the Declaration
func (decl *Declaration) String() string {
return decl.StringWithImportant(true)
}
// StringWithImportant returns string representation with optional !important part
func (decl *Declaration) StringWithImportant(option bool) string {
result := fmt.Sprintf("%s: %s", decl.Property, decl.Value)
if option && decl.Important {
result += " !important"
}
result += ";"
return result
}
// Equal returns true if both Declarations are equals
func (decl *Declaration) Equal(other *Declaration) bool {
return (decl.Property == other.Property) && (decl.Value == other.Value) && (decl.Important == other.Important)
}
//
// DeclarationsByProperty
//
// DeclarationsByProperty represents sortable style declarations
type DeclarationsByProperty []*Declaration
// Implements sort.Interface
func (declarations DeclarationsByProperty) Len() int {
return len(declarations)
}
// Implements sort.Interface
func (declarations DeclarationsByProperty) Swap(i, j int) {
declarations[i], declarations[j] = declarations[j], declarations[i]
}
// Implements sort.Interface
func (declarations DeclarationsByProperty) Less(i, j int) bool {
return declarations[i].Property < declarations[j].Property
}

230
vendor/github.com/aymerick/douceur/css/rule.go generated vendored Normal file
View File

@@ -0,0 +1,230 @@
package css
import (
"fmt"
"strings"
)
const (
indentSpace = 2
)
// RuleKind represents a Rule kind
type RuleKind int
// Rule kinds
const (
QualifiedRule RuleKind = iota
AtRule
)
// At Rules than have Rules inside their block instead of Declarations
var atRulesWithRulesBlock = []string{
"@document", "@font-feature-values", "@keyframes", "@media", "@supports",
}
// Rule represents a parsed CSS rule
type Rule struct {
Kind RuleKind
// At Rule name (eg: "@media")
Name string
// Raw prelude
Prelude string
// Qualified Rule selectors parsed from prelude
Selectors []string
// Style properties
Declarations []*Declaration
// At Rule embedded rules
Rules []*Rule
// Current rule embedding level
EmbedLevel int
}
// NewRule instanciates a new Rule
func NewRule(kind RuleKind) *Rule {
return &Rule{
Kind: kind,
}
}
// Returns string representation of rule kind
func (kind RuleKind) String() string {
switch kind {
case QualifiedRule:
return "Qualified Rule"
case AtRule:
return "At Rule"
default:
return "WAT"
}
}
// EmbedsRules returns true if this rule embeds another rules
func (rule *Rule) EmbedsRules() bool {
if rule.Kind == AtRule {
for _, atRuleName := range atRulesWithRulesBlock {
if rule.Name == atRuleName {
return true
}
}
}
return false
}
// Equal returns true if both rules are equals
func (rule *Rule) Equal(other *Rule) bool {
if (rule.Kind != other.Kind) ||
(rule.Prelude != other.Prelude) ||
(rule.Name != other.Name) {
return false
}
if (len(rule.Selectors) != len(other.Selectors)) ||
(len(rule.Declarations) != len(other.Declarations)) ||
(len(rule.Rules) != len(other.Rules)) {
return false
}
for i, sel := range rule.Selectors {
if sel != other.Selectors[i] {
return false
}
}
for i, decl := range rule.Declarations {
if !decl.Equal(other.Declarations[i]) {
return false
}
}
for i, rule := range rule.Rules {
if !rule.Equal(other.Rules[i]) {
return false
}
}
return true
}
// Diff returns a string representation of rules differences
func (rule *Rule) Diff(other *Rule) []string {
result := []string{}
if rule.Kind != other.Kind {
result = append(result, fmt.Sprintf("Kind: %s | %s", rule.Kind.String(), other.Kind.String()))
}
if rule.Prelude != other.Prelude {
result = append(result, fmt.Sprintf("Prelude: \"%s\" | \"%s\"", rule.Prelude, other.Prelude))
}
if rule.Name != other.Name {
result = append(result, fmt.Sprintf("Name: \"%s\" | \"%s\"", rule.Name, other.Name))
}
if len(rule.Selectors) != len(other.Selectors) {
result = append(result, fmt.Sprintf("Selectors: %v | %v", strings.Join(rule.Selectors, ", "), strings.Join(other.Selectors, ", ")))
} else {
for i, sel := range rule.Selectors {
if sel != other.Selectors[i] {
result = append(result, fmt.Sprintf("Selector: \"%s\" | \"%s\"", sel, other.Selectors[i]))
}
}
}
if len(rule.Declarations) != len(other.Declarations) {
result = append(result, fmt.Sprintf("Declarations Nb: %d | %d", len(rule.Declarations), len(other.Declarations)))
} else {
for i, decl := range rule.Declarations {
if !decl.Equal(other.Declarations[i]) {
result = append(result, fmt.Sprintf("Declaration: \"%s\" | \"%s\"", decl.String(), other.Declarations[i].String()))
}
}
}
if len(rule.Rules) != len(other.Rules) {
result = append(result, fmt.Sprintf("Rules Nb: %d | %d", len(rule.Rules), len(other.Rules)))
} else {
for i, rule := range rule.Rules {
if !rule.Equal(other.Rules[i]) {
result = append(result, fmt.Sprintf("Rule: \"%s\" | \"%s\"", rule.String(), other.Rules[i].String()))
}
}
}
return result
}
// Returns the string representation of a rule
func (rule *Rule) String() string {
result := ""
if rule.Kind == QualifiedRule {
for i, sel := range rule.Selectors {
if i != 0 {
result += ", "
}
result += sel
}
} else {
// AtRule
result += fmt.Sprintf("%s", rule.Name)
if rule.Prelude != "" {
if result != "" {
result += " "
}
result += fmt.Sprintf("%s", rule.Prelude)
}
}
if (len(rule.Declarations) == 0) && (len(rule.Rules) == 0) {
result += ";"
} else {
result += " {\n"
if rule.EmbedsRules() {
for _, subRule := range rule.Rules {
result += fmt.Sprintf("%s%s\n", rule.indent(), subRule.String())
}
} else {
for _, decl := range rule.Declarations {
result += fmt.Sprintf("%s%s\n", rule.indent(), decl.String())
}
}
result += fmt.Sprintf("%s}", rule.indentEndBlock())
}
return result
}
// Returns identation spaces for declarations and rules
func (rule *Rule) indent() string {
result := ""
for i := 0; i < ((rule.EmbedLevel + 1) * indentSpace); i++ {
result += " "
}
return result
}
// Returns identation spaces for end of block character
func (rule *Rule) indentEndBlock() string {
result := ""
for i := 0; i < (rule.EmbedLevel * indentSpace); i++ {
result += " "
}
return result
}

25
vendor/github.com/aymerick/douceur/css/stylesheet.go generated vendored Normal file
View File

@@ -0,0 +1,25 @@
package css
// Stylesheet represents a parsed stylesheet
type Stylesheet struct {
Rules []*Rule
}
// NewStylesheet instanciate a new Stylesheet
func NewStylesheet() *Stylesheet {
return &Stylesheet{}
}
// Returns string representation of the Stylesheet
func (sheet *Stylesheet) String() string {
result := ""
for _, rule := range sheet.Rules {
if result != "" {
result += "\n"
}
result += rule.String()
}
return result
}

409
vendor/github.com/aymerick/douceur/parser/parser.go generated vendored Normal file
View File

@@ -0,0 +1,409 @@
package parser
import (
"errors"
"fmt"
"regexp"
"strings"
"github.com/gorilla/css/scanner"
"github.com/aymerick/douceur/css"
)
const (
importantSuffixRegexp = `(?i)\s*!important\s*$`
)
var (
importantRegexp *regexp.Regexp
)
// Parser represents a CSS parser
type Parser struct {
scan *scanner.Scanner // Tokenizer
// Tokens parsed but not consumed yet
tokens []*scanner.Token
// Rule embedding level
embedLevel int
}
func init() {
importantRegexp = regexp.MustCompile(importantSuffixRegexp)
}
// NewParser instanciates a new parser
func NewParser(txt string) *Parser {
return &Parser{
scan: scanner.New(txt),
}
}
// Parse parses a whole stylesheet
func Parse(text string) (*css.Stylesheet, error) {
result, err := NewParser(text).ParseStylesheet()
if err != nil {
return nil, err
}
return result, nil
}
// ParseDeclarations parses CSS declarations
func ParseDeclarations(text string) ([]*css.Declaration, error) {
result, err := NewParser(text).ParseDeclarations()
if err != nil {
return nil, err
}
return result, nil
}
// ParseStylesheet parses a stylesheet
func (parser *Parser) ParseStylesheet() (*css.Stylesheet, error) {
result := css.NewStylesheet()
// Parse BOM
if _, err := parser.parseBOM(); err != nil {
return result, err
}
// Parse list of rules
rules, err := parser.ParseRules()
if err != nil {
return result, err
}
result.Rules = rules
return result, nil
}
// ParseRules parses a list of rules
func (parser *Parser) ParseRules() ([]*css.Rule, error) {
result := []*css.Rule{}
inBlock := false
if parser.tokenChar("{") {
// parsing a block of rules
inBlock = true
parser.embedLevel++
parser.shiftToken()
}
for parser.tokenParsable() {
if parser.tokenIgnorable() {
parser.shiftToken()
} else if parser.tokenChar("}") {
if !inBlock {
errMsg := fmt.Sprintf("Unexpected } character: %s", parser.nextToken().String())
return result, errors.New(errMsg)
}
parser.shiftToken()
parser.embedLevel--
// finished
break
} else {
rule, err := parser.ParseRule()
if err != nil {
return result, err
}
rule.EmbedLevel = parser.embedLevel
result = append(result, rule)
}
}
return result, parser.err()
}
// ParseRule parses a rule
func (parser *Parser) ParseRule() (*css.Rule, error) {
if parser.tokenAtKeyword() {
return parser.parseAtRule()
}
return parser.parseQualifiedRule()
}
// ParseDeclarations parses a list of declarations
func (parser *Parser) ParseDeclarations() ([]*css.Declaration, error) {
result := []*css.Declaration{}
if parser.tokenChar("{") {
parser.shiftToken()
}
for parser.tokenParsable() {
if parser.tokenIgnorable() {
parser.shiftToken()
} else if parser.tokenChar("}") {
// end of block
parser.shiftToken()
break
} else {
declaration, err := parser.ParseDeclaration()
if err != nil {
return result, err
}
result = append(result, declaration)
}
}
return result, parser.err()
}
// ParseDeclaration parses a declaration
func (parser *Parser) ParseDeclaration() (*css.Declaration, error) {
result := css.NewDeclaration()
curValue := ""
for parser.tokenParsable() {
if parser.tokenChar(":") {
result.Property = strings.TrimSpace(curValue)
curValue = ""
parser.shiftToken()
} else if parser.tokenChar(";") || parser.tokenChar("}") {
if result.Property == "" {
errMsg := fmt.Sprintf("Unexpected ; character: %s", parser.nextToken().String())
return result, errors.New(errMsg)
}
if importantRegexp.MatchString(curValue) {
result.Important = true
curValue = importantRegexp.ReplaceAllString(curValue, "")
}
result.Value = strings.TrimSpace(curValue)
if parser.tokenChar(";") {
parser.shiftToken()
}
// finished
break
} else {
token := parser.shiftToken()
curValue += token.Value
}
}
// log.Printf("[parsed] Declaration: %s", result.String())
return result, parser.err()
}
// Parse an At Rule
func (parser *Parser) parseAtRule() (*css.Rule, error) {
// parse rule name (eg: "@import")
token := parser.shiftToken()
result := css.NewRule(css.AtRule)
result.Name = token.Value
for parser.tokenParsable() {
if parser.tokenChar(";") {
parser.shiftToken()
// finished
break
} else if parser.tokenChar("{") {
if result.EmbedsRules() {
// parse rules block
rules, err := parser.ParseRules()
if err != nil {
return result, err
}
result.Rules = rules
} else {
// parse declarations block
declarations, err := parser.ParseDeclarations()
if err != nil {
return result, err
}
result.Declarations = declarations
}
// finished
break
} else {
// parse prelude
prelude, err := parser.parsePrelude()
if err != nil {
return result, err
}
result.Prelude = prelude
}
}
// log.Printf("[parsed] Rule: %s", result.String())
return result, parser.err()
}
// Parse a Qualified Rule
func (parser *Parser) parseQualifiedRule() (*css.Rule, error) {
result := css.NewRule(css.QualifiedRule)
for parser.tokenParsable() {
if parser.tokenChar("{") {
if result.Prelude == "" {
errMsg := fmt.Sprintf("Unexpected { character: %s", parser.nextToken().String())
return result, errors.New(errMsg)
}
// parse declarations block
declarations, err := parser.ParseDeclarations()
if err != nil {
return result, err
}
result.Declarations = declarations
// finished
break
} else {
// parse prelude
prelude, err := parser.parsePrelude()
if err != nil {
return result, err
}
result.Prelude = prelude
}
}
result.Selectors = strings.Split(result.Prelude, ",")
for i, sel := range result.Selectors {
result.Selectors[i] = strings.TrimSpace(sel)
}
// log.Printf("[parsed] Rule: %s", result.String())
return result, parser.err()
}
// Parse Rule prelude
func (parser *Parser) parsePrelude() (string, error) {
result := ""
for parser.tokenParsable() && !parser.tokenEndOfPrelude() {
token := parser.shiftToken()
result += token.Value
}
result = strings.TrimSpace(result)
// log.Printf("[parsed] prelude: %s", result)
return result, parser.err()
}
// Parse BOM
func (parser *Parser) parseBOM() (bool, error) {
if parser.nextToken().Type == scanner.TokenBOM {
parser.shiftToken()
return true, nil
}
return false, parser.err()
}
// Returns next token without removing it from tokens buffer
func (parser *Parser) nextToken() *scanner.Token {
if len(parser.tokens) == 0 {
// fetch next token
nextToken := parser.scan.Next()
// log.Printf("[token] %s => %v", nextToken.Type.String(), nextToken.Value)
// queue it
parser.tokens = append(parser.tokens, nextToken)
}
return parser.tokens[0]
}
// Returns next token and remove it from the tokens buffer
func (parser *Parser) shiftToken() *scanner.Token {
var result *scanner.Token
result, parser.tokens = parser.tokens[0], parser.tokens[1:]
return result
}
// Returns tokenizer error, or nil if no error
func (parser *Parser) err() error {
if parser.tokenError() {
token := parser.nextToken()
return fmt.Errorf("Tokenizer error: %s", token.String())
}
return nil
}
// Returns true if next token is Error
func (parser *Parser) tokenError() bool {
return parser.nextToken().Type == scanner.TokenError
}
// Returns true if next token is EOF
func (parser *Parser) tokenEOF() bool {
return parser.nextToken().Type == scanner.TokenEOF
}
// Returns true if next token is a whitespace
func (parser *Parser) tokenWS() bool {
return parser.nextToken().Type == scanner.TokenS
}
// Returns true if next token is a comment
func (parser *Parser) tokenComment() bool {
return parser.nextToken().Type == scanner.TokenComment
}
// Returns true if next token is a CDO or a CDC
func (parser *Parser) tokenCDOorCDC() bool {
switch parser.nextToken().Type {
case scanner.TokenCDO, scanner.TokenCDC:
return true
default:
return false
}
}
// Returns true if next token is ignorable
func (parser *Parser) tokenIgnorable() bool {
return parser.tokenWS() || parser.tokenComment() || parser.tokenCDOorCDC()
}
// Returns true if next token is parsable
func (parser *Parser) tokenParsable() bool {
return !parser.tokenEOF() && !parser.tokenError()
}
// Returns true if next token is an At Rule keyword
func (parser *Parser) tokenAtKeyword() bool {
return parser.nextToken().Type == scanner.TokenAtKeyword
}
// Returns true if next token is given character
func (parser *Parser) tokenChar(value string) bool {
token := parser.nextToken()
return (token.Type == scanner.TokenChar) && (token.Value == value)
}
// Returns true if next token marks the end of a prelude
func (parser *Parser) tokenEndOfPrelude() bool {
return parser.tokenChar(";") || parser.tokenChar("{")
}

28
vendor/github.com/gorilla/css/LICENSE generated vendored Normal file
View File

@@ -0,0 +1,28 @@
Copyright (c) 2023 The Gorilla Authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

33
vendor/github.com/gorilla/css/scanner/doc.go generated vendored Normal file
View File

@@ -0,0 +1,33 @@
// Copyright 2012 The Gorilla Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
/*
Package gorilla/css/scanner generates tokens for a CSS3 input.
It follows the CSS3 specification located at:
http://www.w3.org/TR/css3-syntax/
To use it, create a new scanner for a given CSS string and call Next() until
the token returned has type TokenEOF or TokenError:
s := scanner.New(myCSS)
for {
token := s.Next()
if token.Type == scanner.TokenEOF || token.Type == scanner.TokenError {
break
}
// Do something with the token...
}
Following the CSS3 specification, an error can only occur when the scanner
finds an unclosed quote or unclosed comment. In these cases the text becomes
"untokenizable". Everything else is tokenizable and it is up to a parser
to make sense of the token stream (or ignore nonsensical token sequences).
Note: the scanner doesn't perform lexical analysis or, in other words, it
doesn't care about the token context. It is intended to be used by a
lexer or parser.
*/
package scanner

360
vendor/github.com/gorilla/css/scanner/scanner.go generated vendored Normal file
View File

@@ -0,0 +1,360 @@
// Copyright 2012 The Gorilla Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package scanner
import (
"fmt"
"regexp"
"strings"
"unicode"
"unicode/utf8"
)
// tokenType identifies the type of lexical tokens.
type tokenType int
// String returns a string representation of the token type.
func (t tokenType) String() string {
return tokenNames[t]
}
// Token represents a token and the corresponding string.
type Token struct {
Type tokenType
Value string
Line int
Column int
}
// String returns a string representation of the token.
func (t *Token) String() string {
if len(t.Value) > 10 {
return fmt.Sprintf("%s (line: %d, column: %d): %.10q...",
t.Type, t.Line, t.Column, t.Value)
}
return fmt.Sprintf("%s (line: %d, column: %d): %q",
t.Type, t.Line, t.Column, t.Value)
}
// All tokens -----------------------------------------------------------------
// The complete list of tokens in CSS3.
const (
// Scanner flags.
TokenError tokenType = iota
TokenEOF
// From now on, only tokens from the CSS specification.
TokenIdent
TokenAtKeyword
TokenString
TokenHash
TokenNumber
TokenPercentage
TokenDimension
TokenURI
TokenUnicodeRange
TokenCDO
TokenCDC
TokenS
TokenComment
TokenFunction
TokenIncludes
TokenDashMatch
TokenPrefixMatch
TokenSuffixMatch
TokenSubstringMatch
TokenChar
TokenBOM
)
// tokenNames maps tokenType's to their names. Used for conversion to string.
var tokenNames = map[tokenType]string{
TokenError: "error",
TokenEOF: "EOF",
TokenIdent: "IDENT",
TokenAtKeyword: "ATKEYWORD",
TokenString: "STRING",
TokenHash: "HASH",
TokenNumber: "NUMBER",
TokenPercentage: "PERCENTAGE",
TokenDimension: "DIMENSION",
TokenURI: "URI",
TokenUnicodeRange: "UNICODE-RANGE",
TokenCDO: "CDO",
TokenCDC: "CDC",
TokenS: "S",
TokenComment: "COMMENT",
TokenFunction: "FUNCTION",
TokenIncludes: "INCLUDES",
TokenDashMatch: "DASHMATCH",
TokenPrefixMatch: "PREFIXMATCH",
TokenSuffixMatch: "SUFFIXMATCH",
TokenSubstringMatch: "SUBSTRINGMATCH",
TokenChar: "CHAR",
TokenBOM: "BOM",
}
// Macros and productions -----------------------------------------------------
// http://www.w3.org/TR/css3-syntax/#tokenization
var macroRegexp = regexp.MustCompile(`\{[a-z]+\}`)
// macros maps macro names to patterns to be expanded.
var macros = map[string]string{
// must be escaped: `\.+*?()|[]{}^$`
"ident": `-?{nmstart}{nmchar}*`,
"name": `{nmchar}+`,
"nmstart": `[a-zA-Z_]|{nonascii}|{escape}`,
"nonascii": "[\u0080-\uD7FF\uE000-\uFFFD\U00010000-\U0010FFFF]",
"unicode": `\\[0-9a-fA-F]{1,6}{wc}?`,
"escape": "{unicode}|\\\\[\u0020-\u007E\u0080-\uD7FF\uE000-\uFFFD\U00010000-\U0010FFFF]",
"nmchar": `[a-zA-Z0-9_-]|{nonascii}|{escape}`,
"num": `[0-9]*\.[0-9]+|[0-9]+`,
"string": `"(?:{stringchar}|')*"|'(?:{stringchar}|")*'`,
"stringchar": `{urlchar}|[ ]|\\{nl}`,
"nl": `[\n\r\f]|\r\n`,
"w": `{wc}*`,
"wc": `[\t\n\f\r ]`,
// urlchar should accept [(ascii characters minus those that need escaping)|{nonascii}|{escape}]
// ASCII characters range = `[\u0020-\u007e]`
// Skip space \u0020 = `[\u0021-\u007e]`
// Skip quotation mark \0022 = `[\u0021\u0023-\u007e]`
// Skip apostrophe \u0027 = `[\u0021\u0023-\u0026\u0028-\u007e]`
// Skip reverse solidus \u005c = `[\u0021\u0023-\u0026\u0028-\u005b\u005d\u007e]`
// Finally, the left square bracket (\u005b) and right (\u005d) needs escaping themselves
"urlchar": "[\u0021\u0023-\u0026\u0028-\\\u005b\\\u005d-\u007E]|{nonascii}|{escape}",
}
// productions maps the list of tokens to patterns to be expanded.
var productions = map[tokenType]string{
// Unused regexps (matched using other methods) are commented out.
TokenIdent: `{ident}`,
TokenAtKeyword: `@{ident}`,
TokenString: `{string}`,
TokenHash: `#{name}`,
TokenNumber: `{num}`,
TokenPercentage: `{num}%`,
TokenDimension: `{num}{ident}`,
TokenURI: `url\({w}(?:{string}|{urlchar}*?){w}\)`,
TokenUnicodeRange: `U\+[0-9A-F\?]{1,6}(?:-[0-9A-F]{1,6})?`,
//TokenCDO: `<!--`,
TokenCDC: `-->`,
TokenS: `{wc}+`,
TokenComment: `/\*[^\*]*[\*]+(?:[^/][^\*]*[\*]+)*/`,
TokenFunction: `{ident}\(`,
//TokenIncludes: `~=`,
//TokenDashMatch: `\|=`,
//TokenPrefixMatch: `\^=`,
//TokenSuffixMatch: `\$=`,
//TokenSubstringMatch: `\*=`,
//TokenChar: `[^"']`,
//TokenBOM: "\uFEFF",
}
// matchers maps the list of tokens to compiled regular expressions.
//
// The map is filled on init() using the macros and productions defined in
// the CSS specification.
var matchers = map[tokenType]*regexp.Regexp{}
// matchOrder is the order to test regexps when first-char shortcuts
// can't be used.
var matchOrder = []tokenType{
TokenURI,
TokenFunction,
TokenUnicodeRange,
TokenIdent,
TokenDimension,
TokenPercentage,
TokenNumber,
TokenCDC,
}
func init() {
// replace macros and compile regexps for productions.
replaceMacro := func(s string) string {
return "(?:" + macros[s[1:len(s)-1]] + ")"
}
for t, s := range productions {
for macroRegexp.MatchString(s) {
s = macroRegexp.ReplaceAllStringFunc(s, replaceMacro)
}
matchers[t] = regexp.MustCompile("^(?:" + s + ")")
}
}
// Scanner --------------------------------------------------------------------
// New returns a new CSS scanner for the given input.
func New(input string) *Scanner {
// Normalize newlines.
// https://www.w3.org/TR/css-syntax-3/#input-preprocessing
input = strings.Replace(input, "\r\n", "\n", -1)
input = strings.Replace(input, "\r", "\n", -1)
input = strings.Replace(input, "\f", "\n", -1)
input = strings.Replace(input, "\u0000", "\ufffd", -1)
return &Scanner{
input: input,
row: 1,
col: 1,
}
}
// Scanner scans an input and emits tokens following the CSS3 specification.
type Scanner struct {
input string
pos int
row int
col int
err *Token
}
// Next returns the next token from the input.
//
// At the end of the input the token type is TokenEOF.
//
// If the input can't be tokenized the token type is TokenError. This occurs
// in case of unclosed quotation marks or comments.
func (s *Scanner) Next() *Token {
if s.err != nil {
return s.err
}
if s.pos >= len(s.input) {
s.err = &Token{TokenEOF, "", s.row, s.col}
return s.err
}
if s.pos == 0 {
// Test BOM only once, at the beginning of the file.
if strings.HasPrefix(s.input, "\uFEFF") {
return s.emitSimple(TokenBOM, "\uFEFF")
}
}
// There's a lot we can guess based on the first byte so we'll take a
// shortcut before testing multiple regexps.
input := s.input[s.pos:]
switch input[0] {
case '\t', '\n', ' ':
// Whitespace.
return s.emitToken(TokenS, matchers[TokenS].FindString(input))
case '.':
// Dot is too common to not have a quick check.
// We'll test if this is a Char; if it is followed by a number it is a
// dimension/percentage/number, and this will be matched later.
if len(input) > 1 && !unicode.IsDigit(rune(input[1])) {
return s.emitSimple(TokenChar, ".")
}
case '#':
// Another common one: Hash or Char.
if match := matchers[TokenHash].FindString(input); match != "" {
return s.emitToken(TokenHash, match)
}
return s.emitSimple(TokenChar, "#")
case '@':
// Another common one: AtKeyword or Char.
if match := matchers[TokenAtKeyword].FindString(input); match != "" {
return s.emitSimple(TokenAtKeyword, match)
}
return s.emitSimple(TokenChar, "@")
case ':', ',', ';', '%', '&', '+', '=', '>', '(', ')', '[', ']', '{', '}':
// More common chars.
return s.emitSimple(TokenChar, string(input[0]))
case '"', '\'':
// String or error.
match := matchers[TokenString].FindString(input)
if match != "" {
return s.emitToken(TokenString, match)
}
s.err = &Token{TokenError, "unclosed quotation mark", s.row, s.col}
return s.err
case '/':
// Comment, error or Char.
if len(input) > 1 && input[1] == '*' {
match := matchers[TokenComment].FindString(input)
if match != "" {
return s.emitToken(TokenComment, match)
} else {
s.err = &Token{TokenError, "unclosed comment", s.row, s.col}
return s.err
}
}
return s.emitSimple(TokenChar, "/")
case '~':
// Includes or Char.
return s.emitPrefixOrChar(TokenIncludes, "~=")
case '|':
// DashMatch or Char.
return s.emitPrefixOrChar(TokenDashMatch, "|=")
case '^':
// PrefixMatch or Char.
return s.emitPrefixOrChar(TokenPrefixMatch, "^=")
case '$':
// SuffixMatch or Char.
return s.emitPrefixOrChar(TokenSuffixMatch, "$=")
case '*':
// SubstringMatch or Char.
return s.emitPrefixOrChar(TokenSubstringMatch, "*=")
case '<':
// CDO or Char.
return s.emitPrefixOrChar(TokenCDO, "<!--")
}
// Test all regexps, in order.
for _, token := range matchOrder {
if match := matchers[token].FindString(input); match != "" {
return s.emitToken(token, match)
}
}
// We already handled unclosed quotation marks and comments,
// so this can only be a Char.
r, width := utf8.DecodeRuneInString(input)
token := &Token{TokenChar, string(r), s.row, s.col}
s.col += width
s.pos += width
return token
}
// updatePosition updates input coordinates based on the consumed text.
func (s *Scanner) updatePosition(text string) {
width := utf8.RuneCountInString(text)
lines := strings.Count(text, "\n")
s.row += lines
if lines == 0 {
s.col += width
} else {
s.col = utf8.RuneCountInString(text[strings.LastIndex(text, "\n"):])
}
s.pos += len(text) // while col is a rune index, pos is a byte index
}
// emitToken returns a Token for the string v and updates the scanner position.
func (s *Scanner) emitToken(t tokenType, v string) *Token {
token := &Token{t, v, s.row, s.col}
s.updatePosition(v)
return token
}
// emitSimple returns a Token for the string v and updates the scanner
// position in a simplified manner.
//
// The string is known to have only ASCII characters and to not have a newline.
func (s *Scanner) emitSimple(t tokenType, v string) *Token {
token := &Token{t, v, s.row, s.col}
s.col += len(v)
s.pos += len(v)
return token
}
// emitPrefixOrChar returns a Token for type t if the current position
// matches the given prefix. Otherwise it returns a Char token using the
// first character from the prefix.
//
// The prefix is known to have only ASCII characters and to not have a newline.
func (s *Scanner) emitPrefixOrChar(t tokenType, prefix string) *Token {
if strings.HasPrefix(s.input[s.pos:], prefix) {
return s.emitSimple(t, prefix)
}
return s.emitSimple(TokenChar, string(prefix[0]))
}

View File

@@ -0,0 +1,50 @@
# Contributing to bluemonday
Third-party patches are essential for keeping bluemonday secure and offering the features developers want. However there are a few guidelines that we need contributors to follow so that we can maintain the quality of work that developers who use bluemonday expect.
## Getting Started
* Make sure you have a [Github account](https://github.com/signup/free)
## Guidelines
1. Do not vendor dependencies. Vendoring is a project problem, not a package problem.
2. I do not care about spelling mistakes or whitespace and I do not believe that you should either. PRs therefore must be functional in their nature or be substantial and impactful if documentation or examples.
3. This module does not participate in hacktober, please make your contributions meaningful.
## Submitting an Issue
* Submit a ticket for your issue, assuming one does not already exist
* Clearly describe the issue including the steps to reproduce (with sample input and output) if it is a bug
If you are reporting a security flaw, you may expect that we will provide the code to fix it for you. Otherwise you may want to submit a pull request to ensure the resolution is applied sooner rather than later:
* Fork the repository on Github
* Issue a pull request containing code to resolve the issue
## Submitting a Pull Request
* Submit a ticket for your issue, assuming one does not already exist
* Describe the reason for the pull request and if applicable show some example inputs and outputs to demonstrate what the patch does
* Fork the repository on Github
* Before submitting the pull request you should
1. Include tests for your patch, 1 test should encapsulate the entire patch and should refer to the Github issue
1. If you have added new exposed/public functionality, you should ensure it is documented appropriately
1. If you have added new exposed/public functionality, you should consider demonstrating how to use it within one of the helpers or shipped policies if appropriate or within a test if modifying a helper or policy is not appropriate
1. Run all of the tests `go test -v ./...` and ensure all tests pass
1. Run gofmt `go fmt ./...`
1. Run vet `go vet ./...` and resolve any issues
* When submitting the pull request you should
1. Note the issue(s) it resolves, i.e. `Closes #6` in the pull request comment to close issue #6 when the pull request is accepted
Once you have submitted a pull request, we *may* merge it without changes. If we have any comments or feedback, or need you to make changes to your pull request we will update the Github pull request or the associated issue. We expect responses from you within two weeks, and we may close the pull request is there is no activity.
### Contributor Licence Agreement
We haven't gone for the formal "Sign a Contributor Licence Agreement" thing that projects like [puppet](https://cla.puppetlabs.com/), [Mojito](https://developer.yahoo.com/cocktails/mojito/cla/) and companies like [Google](http://code.google.com/legal/individual-cla-v1.0.html) are using.
But we do need to know that we can accept and merge your contributions, so for now the act of contributing a pull request should be considered equivalent to agreeing to a contributor licence agreement, specifically:
* You accept that the act of submitting code to the bluemonday project is to grant a copyright licence to the project that is perpetual, worldwide, non-exclusive, no-charge, royalty free and irrevocable.
* You accept that all who comply with the licence of the project (BSD 3-clause) are permitted to use your contributions to the project.
* You accept, and by submitting code do declare, that you have the legal right to grant such a licence to the project and that each of the contributions is your own original creation.

8
vendor/github.com/microcosm-cc/bluemonday/CREDITS.md generated vendored Normal file
View File

@@ -0,0 +1,8 @@
1. John Graham-Cumming http://jgc.org/
1. Mohammad Gufran https://github.com/Gufran
1. Steven Gutzwiller https://github.com/StevenGutzwiller
1. Andrew Krasichkov @buglloc https://github.com/buglloc
1. Mike Samuel mikesamuel@gmail.com
1. Dmitri Shuralyov shurcooL@gmail.com
1. opennota https://github.com/opennota https://gitlab.com/opennota
1. Tom Anthony https://www.tomanthony.co.uk/

28
vendor/github.com/microcosm-cc/bluemonday/LICENSE.md generated vendored Normal file
View File

@@ -0,0 +1,28 @@
Copyright (c) 2014, David Kitchen <david@buro9.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of the organisation (Microcosm) nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

386
vendor/github.com/microcosm-cc/bluemonday/README.md generated vendored Normal file
View File

@@ -0,0 +1,386 @@
# bluemonday [![GoDoc](https://godoc.org/github.com/microcosm-cc/bluemonday?status.png)](https://godoc.org/github.com/microcosm-cc/bluemonday) [![Sourcegraph](https://sourcegraph.com/github.com/microcosm-cc/bluemonday/-/badge.svg)](https://sourcegraph.com/github.com/microcosm-cc/bluemonday?badge)
bluemonday is a HTML sanitizer implemented in Go. It is fast and highly configurable.
bluemonday takes untrusted user generated content as an input, and will return HTML that has been sanitised against an allowlist of approved HTML elements and attributes so that you can safely include the content in your web page.
If you accept user generated content, and your server uses Go, you **need** bluemonday.
The default policy for user generated content (`bluemonday.UGCPolicy().Sanitize()`) turns this:
```html
Hello <STYLE>.XSS{background-image:url("javascript:alert('XSS')");}</STYLE><A CLASS=XSS></A>World
```
Into a harmless:
```html
Hello World
```
And it turns this:
```html
<a href="javascript:alert('XSS1')" onmouseover="alert('XSS2')">XSS<a>
```
Into this:
```html
XSS
```
Whilst still allowing this:
```html
<a href="http://www.google.com/">
<img src="https://ssl.gstatic.com/accounts/ui/logo_2x.png"/>
</a>
```
To pass through mostly unaltered (it gained a rel="nofollow" which is a good thing for user generated content):
```html
<a href="http://www.google.com/" rel="nofollow">
<img src="https://ssl.gstatic.com/accounts/ui/logo_2x.png"/>
</a>
```
It protects sites from [XSS](http://en.wikipedia.org/wiki/Cross-site_scripting) attacks. There are many [vectors for an XSS attack](https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet) and the best way to mitigate the risk is to sanitize user input against a known safe list of HTML elements and attributes.
You should **always** run bluemonday **after** any other processing.
If you use [blackfriday](https://github.com/russross/blackfriday) or [Pandoc](http://johnmacfarlane.net/pandoc/) then bluemonday should be run after these steps. This ensures that no insecure HTML is introduced later in your process.
bluemonday is heavily inspired by both the [OWASP Java HTML Sanitizer](https://code.google.com/p/owasp-java-html-sanitizer/) and the [HTML Purifier](http://htmlpurifier.org/).
## Technical Summary
Allowlist based, you need to either build a policy describing the HTML elements and attributes to permit (and the `regexp` patterns of attributes), or use one of the supplied policies representing good defaults.
The policy containing the allowlist is applied using a fast non-validating, forward only, token-based parser implemented in the [Go net/html library](https://godoc.org/golang.org/x/net/html) by the core Go team.
We expect to be supplied with well-formatted HTML (closing elements for every applicable open element, nested correctly) and so we do not focus on repairing badly nested or incomplete HTML. We focus on simply ensuring that whatever elements do exist are described in the policy allowlist and that attributes and links are safe for use on your web page. [GIGO](http://en.wikipedia.org/wiki/Garbage_in,_garbage_out) does apply and if you feed it bad HTML bluemonday is not tasked with figuring out how to make it good again.
## Is it production ready?
*Yes*
We are using bluemonday in production having migrated from the widely used and heavily field tested OWASP Java HTML Sanitizer.
We are passing our extensive test suite (including AntiSamy tests as well as tests for any issues raised). Check for any [unresolved issues](https://github.com/microcosm-cc/bluemonday/issues?page=1&state=open) to see whether anything may be a blocker for you.
We invite pull requests and issues to help us ensure we are offering comprehensive protection against various attacks via user generated content.
## Usage
Install using `go get github.com/microcosm-cc/bluemonday`
Then call it:
```go
package main
import (
"fmt"
"github.com/microcosm-cc/bluemonday"
)
func main() {
// Do this once for each unique policy, and use the policy for the life of the program
// Policy creation/editing is not safe to use in multiple goroutines
p := bluemonday.UGCPolicy()
// The policy can then be used to sanitize lots of input and it is safe to use the policy in multiple goroutines
html := p.Sanitize(
`<a onblur="alert(secret)" href="http://www.google.com">Google</a>`,
)
// Output:
// <a href="http://www.google.com" rel="nofollow">Google</a>
fmt.Println(html)
}
```
We offer three ways to call Sanitize:
```go
p.Sanitize(string) string
p.SanitizeBytes([]byte) []byte
p.SanitizeReader(io.Reader) bytes.Buffer
```
If you are obsessed about performance, `p.SanitizeReader(r).Bytes()` will return a `[]byte` without performing any unnecessary casting of the inputs or outputs. Though the difference is so negligible you should never need to care.
You can build your own policies:
```go
package main
import (
"fmt"
"github.com/microcosm-cc/bluemonday"
)
func main() {
p := bluemonday.NewPolicy()
// Require URLs to be parseable by net/url.Parse and either:
// mailto: http:// or https://
p.AllowStandardURLs()
// We only allow <p> and <a href="">
p.AllowAttrs("href").OnElements("a")
p.AllowElements("p")
html := p.Sanitize(
`<a onblur="alert(secret)" href="http://www.google.com">Google</a>`,
)
// Output:
// <a href="http://www.google.com">Google</a>
fmt.Println(html)
}
```
We ship two default policies:
1. `bluemonday.StrictPolicy()` which can be thought of as equivalent to stripping all HTML elements and their attributes as it has nothing on its allowlist. An example usage scenario would be blog post titles where HTML tags are not expected at all and if they are then the elements *and* the content of the elements should be stripped. This is a *very* strict policy.
2. `bluemonday.UGCPolicy()` which allows a broad selection of HTML elements and attributes that are safe for user generated content. Note that this policy does *not* allow iframes, object, embed, styles, script, etc. An example usage scenario would be blog post bodies where a variety of formatting is expected along with the potential for TABLEs and IMGs.
## Policy Building
The essence of building a policy is to determine which HTML elements and attributes are considered safe for your scenario. OWASP provide an [XSS prevention cheat sheet](https://www.owasp.org/index.php/XSS_(Cross_Site_Scripting)_Prevention_Cheat_Sheet) to help explain the risks, but essentially:
1. Avoid anything other than the standard HTML elements
1. Avoid `script`, `style`, `iframe`, `object`, `embed`, `base` elements that allow code to be executed by the client or third party content to be included that can execute code
1. Avoid anything other than plain HTML attributes with values matched to a regexp
Basically, you should be able to describe what HTML is fine for your scenario. If you do not have confidence that you can describe your policy please consider using one of the shipped policies such as `bluemonday.UGCPolicy()`.
To create a new policy:
```go
p := bluemonday.NewPolicy()
```
To add elements to a policy either add just the elements:
```go
p.AllowElements("b", "strong")
```
Or using a regex:
_Note: if an element is added by name as shown above, any matching regex will be ignored_
It is also recommended to ensure multiple patterns don't overlap as order of execution is not guaranteed and can result in some rules being missed.
```go
p.AllowElementsMatching(regex.MustCompile(`^my-element-`))
```
Or add elements as a virtue of adding an attribute:
```go
// Note the recommended pattern, see the recommendation on using .Matching() below
p.AllowAttrs("nowrap").OnElements("td", "th")
```
Again, this also supports a regex pattern match alternative:
```go
p.AllowAttrs("nowrap").OnElementsMatching(regex.MustCompile(`^my-element-`))
```
Attributes can either be added to all elements:
```go
p.AllowAttrs("dir").Matching(regexp.MustCompile("(?i)rtl|ltr")).Globally()
```
Or attributes can be added to specific elements:
```go
// Not the recommended pattern, see the recommendation on using .Matching() below
p.AllowAttrs("value").OnElements("li")
```
It is **always** recommended that an attribute be made to match a pattern. XSS in HTML attributes is very easy otherwise:
```go
// \p{L} matches unicode letters, \p{N} matches unicode numbers
p.AllowAttrs("title").Matching(regexp.MustCompile(`[\p{L}\p{N}\s\-_',:\[\]!\./\\\(\)&]*`)).Globally()
```
You can stop at any time and call .Sanitize():
```go
// string htmlIn passed in from a HTTP POST
htmlOut := p.Sanitize(htmlIn)
```
And you can take any existing policy and extend it:
```go
p := bluemonday.UGCPolicy()
p.AllowElements("fieldset", "select", "option")
```
### Inline CSS
Although it's possible to handle inline CSS using `AllowAttrs` with a `Matching` rule, writing a single monolithic regular expression to safely process all inline CSS which you wish to allow is not a trivial task. Instead of attempting to do so, you can allow the `style` attribute on whichever element(s) you desire and use style policies to control and sanitize inline styles.
It is strongly recommended that you use `Matching` (with a suitable regular expression)
`MatchingEnum`, or `MatchingHandler` to ensure each style matches your needs,
but default handlers are supplied for most widely used styles.
Similar to attributes, you can allow specific CSS properties to be set inline:
```go
p.AllowAttrs("style").OnElements("span", "p")
// Allow the 'color' property with valid RGB(A) hex values only (on any element allowed a 'style' attribute)
p.AllowStyles("color").Matching(regexp.MustCompile("(?i)^#([0-9a-f]{3,4}|[0-9a-f]{6}|[0-9a-f]{8})$")).Globally()
```
Additionally, you can allow a CSS property to be set only to an allowed value:
```go
p.AllowAttrs("style").OnElements("span", "p")
// Allow the 'text-decoration' property to be set to 'underline', 'line-through' or 'none'
// on 'span' elements only
p.AllowStyles("text-decoration").MatchingEnum("underline", "line-through", "none").OnElements("span")
```
Or you can specify elements based on a regex pattern match:
```go
p.AllowAttrs("style").OnElementsMatching(regex.MustCompile(`^my-element-`))
// Allow the 'text-decoration' property to be set to 'underline', 'line-through' or 'none'
// on 'span' elements only
p.AllowStyles("text-decoration").MatchingEnum("underline", "line-through", "none").OnElementsMatching(regex.MustCompile(`^my-element-`))
```
If you need more specific checking, you can create a handler that takes in a string and returns a bool to
validate the values for a given property. The string parameter has been
converted to lowercase and unicode code points have been converted.
```go
myHandler := func(value string) bool{
// Validate your input here
return true
}
p.AllowAttrs("style").OnElements("span", "p")
// Allow the 'color' property with values validated by the handler (on any element allowed a 'style' attribute)
p.AllowStyles("color").MatchingHandler(myHandler).Globally()
```
### Links
Links are difficult beasts to sanitise safely and also one of the biggest attack vectors for malicious content.
It is possible to do this:
```go
p.AllowAttrs("href").Matching(regexp.MustCompile(`(?i)mailto|https?`)).OnElements("a")
```
But that will not protect you as the regular expression is insufficient in this case to have prevented a malformed value doing something unexpected.
We provide some additional global options for safely working with links.
`RequireParseableURLs` will ensure that URLs are parseable by Go's `net/url` package:
```go
p.RequireParseableURLs(true)
```
If you have enabled parseable URLs then the following option will `AllowRelativeURLs`. By default this is disabled (bluemonday is an allowlist tool... you need to explicitly tell us to permit things) and when disabled it will prevent all local and scheme relative URLs (i.e. `href="localpage.html"`, `href="../home.html"` and even `href="//www.google.com"` are relative):
```go
p.AllowRelativeURLs(true)
```
If you have enabled parseable URLs then you can allow the schemes (commonly called protocol when thinking of `http` and `https`) that are permitted. Bear in mind that allowing relative URLs in the above option will allow for a blank scheme:
```go
p.AllowURLSchemes("mailto", "http", "https")
```
Regardless of whether you have enabled parseable URLs, you can force all URLs to have a rel="nofollow" attribute. This will be added if it does not exist, but only when the `href` is valid:
```go
// This applies to "a" "area" "link" elements that have a "href" attribute
p.RequireNoFollowOnLinks(true)
```
Similarly, you can force all URLs to have "noreferrer" in their rel attribute.
```go
// This applies to "a" "area" "link" elements that have a "href" attribute
p.RequireNoReferrerOnLinks(true)
```
We provide a convenience method that applies all of the above, but you will still need to allow the linkable elements for the URL rules to be applied to:
```go
p.AllowStandardURLs()
p.AllowAttrs("cite").OnElements("blockquote", "q")
p.AllowAttrs("href").OnElements("a", "area")
p.AllowAttrs("src").OnElements("img")
```
An additional complexity regarding links is the data URI as defined in [RFC2397](http://tools.ietf.org/html/rfc2397). The data URI allows for images to be served inline using this format:
```html
<img src="data:image/webp;base64,UklGRh4AAABXRUJQVlA4TBEAAAAvAAAAAAfQ//73v/+BiOh/AAA=">
```
We have provided a helper to verify the mimetype followed by base64 content of data URIs links:
```go
p.AllowDataURIImages()
```
That helper will enable GIF, JPEG, PNG and WEBP images.
It should be noted that there is a potential [security](http://palizine.plynt.com/issues/2010Oct/bypass-xss-filters/) [risk](https://capec.mitre.org/data/definitions/244.html) with the use of data URI links. You should only enable data URI links if you already trust the content.
We also have some features to help deal with user generated content:
```go
p.AddTargetBlankToFullyQualifiedLinks(true)
```
This will ensure that anchor `<a href="" />` links that are fully qualified (the href destination includes a host name) will get `target="_blank"` added to them.
Additionally any link that has `target="_blank"` after the policy has been applied will also have the `rel` attribute adjusted to add `noopener`. This means a link may start like `<a href="//host/path"/>` and will end up as `<a href="//host/path" rel="noopener" target="_blank">`. It is important to note that the addition of `noopener` is a security feature and not an issue. There is an unfortunate feature to browsers that a browser window opened as a result of `target="_blank"` can still control the opener (your web page) and this protects against that. The background to this can be found here: [https://dev.to/ben/the-targetblank-vulnerability-by-example](https://dev.to/ben/the-targetblank-vulnerability-by-example)
### Policy Building Helpers
We also bundle some helpers to simplify policy building:
```go
// Permits the "dir", "id", "lang", "title" attributes globally
p.AllowStandardAttributes()
// Permits the "img" element and its standard attributes
p.AllowImages()
// Permits ordered and unordered lists, and also definition lists
p.AllowLists()
// Permits HTML tables and all applicable elements and non-styling attributes
p.AllowTables()
```
### Invalid Instructions
The following are invalid:
```go
// This does not say where the attributes are allowed, you need to add
// .Globally() or .OnElements(...)
// This will be ignored without error.
p.AllowAttrs("value")
// This does not say where the attributes are allowed, you need to add
// .Globally() or .OnElements(...)
// This will be ignored without error.
p.AllowAttrs(
"type",
).Matching(
regexp.MustCompile("(?i)^(circle|disc|square|a|A|i|I|1)$"),
)
```
Both examples exhibit the same issue, they declare attributes but do not then specify whether they are allowed globally or only on specific elements (and which elements). Attributes belong to one or more elements, and the policy needs to declare this.
## Limitations
We are not yet including any tools to help allow and sanitize CSS. Which means that unless you wish to do the heavy lifting in a single regular expression (inadvisable), **you should not allow the "style" attribute anywhere**.
In the same theme, both `<script>` and `<style>` are considered harmful. These elements (and their content) will not be rendered by default, and require you to explicitly set `p.AllowUnsafe(true)`. You should be aware that allowing these elements defeats the purpose of using a HTML sanitizer as you would be explicitly allowing either JavaScript (and any plainly written XSS) and CSS (which can modify a DOM to insert JS), and additionally but limitations in this library mean it is not aware of whether HTML is validly structured and that can allow these elements to bypass some of the safety mechanisms built into the [WhatWG HTML parser standard](https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inselect).
It is not the job of bluemonday to fix your bad HTML, it is merely the job of bluemonday to prevent malicious HTML getting through. If you have mismatched HTML elements, or non-conforming nesting of elements, those will remain. But if you have well-structured HTML bluemonday will not break it.
## TODO
* Investigate whether devs want to blacklist elements and attributes. This would allow devs to take an existing policy (such as the `bluemonday.UGCPolicy()` ) that encapsulates 90% of what they're looking for but does more than they need, and to remove the extra things they do not want to make it 100% what they want
* Investigate whether devs want a validating HTML mode, in which the HTML elements are not just transformed into a balanced tree (every start tag has a closing tag at the correct depth) but also that elements and character data appear only in their allowed context (i.e. that a `table` element isn't a descendent of a `caption`, that `colgroup`, `thead`, `tbody`, `tfoot` and `tr` are permitted, and that character data is not permitted)
## Long term goals
1. Open the code to adversarial peer review similar to the [Attack Review Ground Rules](https://code.google.com/p/owasp-java-html-sanitizer/wiki/AttackReviewGroundRules)
1. Raise funds and pay for an external security review

13
vendor/github.com/microcosm-cc/bluemonday/SECURITY.md generated vendored Normal file
View File

@@ -0,0 +1,13 @@
# Security Policy
## Supported Versions
Latest tag and tip are supported.
Changes are not backported, please verify any issue against the latest tag and tip.
## Reporting a Vulnerability
Report vulnerabilities either via [GitHub's private reporting flow](https://github.com/microcosm-cc/bluemonday/security/advisories/new) or via email to the security@ alias of geomys.org.
There is no bug bounty program but security issues will be taken seriously and resolved as soon as possible.

2016
vendor/github.com/microcosm-cc/bluemonday/css/handlers.go generated vendored Normal file

File diff suppressed because it is too large Load Diff

104
vendor/github.com/microcosm-cc/bluemonday/doc.go generated vendored Normal file
View File

@@ -0,0 +1,104 @@
// Copyright (c) 2014, David Kitchen <david@buro9.com>
//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
//
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * Neither the name of the organisation (Microcosm) nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
/*
Package bluemonday provides a way of describing an allowlist of HTML elements
and attributes as a policy, and for that policy to be applied to untrusted
strings from users that may contain markup. All elements and attributes not on
the allowlist will be stripped.
The default bluemonday.UGCPolicy().Sanitize() turns this:
Hello <STYLE>.XSS{background-image:url("javascript:alert('XSS')");}</STYLE><A CLASS=XSS></A>World
Into the more harmless:
Hello World
And it turns this:
<a href="javascript:alert('XSS1')" onmouseover="alert('XSS2')">XSS<a>
Into this:
XSS
Whilst still allowing this:
<a href="http://www.google.com/">
<img src="https://ssl.gstatic.com/accounts/ui/logo_2x.png"/>
</a>
To pass through mostly unaltered (it gained a rel="nofollow"):
<a href="http://www.google.com/" rel="nofollow">
<img src="https://ssl.gstatic.com/accounts/ui/logo_2x.png"/>
</a>
The primary purpose of bluemonday is to take potentially unsafe user generated
content (from things like Markdown, HTML WYSIWYG tools, etc) and make it safe
for you to put on your website.
It protects sites against XSS (http://en.wikipedia.org/wiki/Cross-site_scripting)
and other malicious content that a user interface may deliver. There are many
vectors for an XSS attack (https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet)
and the safest thing to do is to sanitize user input against a known safe list
of HTML elements and attributes.
Note: You should always run bluemonday after any other processing.
If you use blackfriday (https://github.com/russross/blackfriday) or
Pandoc (http://johnmacfarlane.net/pandoc/) then bluemonday should be run after
these steps. This ensures that no insecure HTML is introduced later in your
process.
bluemonday is heavily inspired by both the OWASP Java HTML Sanitizer
(https://code.google.com/p/owasp-java-html-sanitizer/) and the HTML Purifier
(http://htmlpurifier.org/).
We ship two default policies, one is bluemonday.StrictPolicy() and can be
thought of as equivalent to stripping all HTML elements and their attributes as
it has nothing on its allowlist.
The other is bluemonday.UGCPolicy() and allows a broad selection of HTML
elements and attributes that are safe for user generated content. Note that
this policy does not allow iframes, object, embed, styles, script, etc.
The essence of building a policy is to determine which HTML elements and
attributes are considered safe for your scenario. OWASP provide an XSS
prevention cheat sheet ( https://www.google.com/search?q=xss+prevention+cheat+sheet )
to help explain the risks, but essentially:
1. Avoid allowing anything other than plain HTML elements
2. Avoid allowing `script`, `style`, `iframe`, `object`, `embed`, `base`
elements
3. Avoid allowing anything other than plain HTML elements with simple
values that you can match to a regexp
*/
package bluemonday

300
vendor/github.com/microcosm-cc/bluemonday/helpers.go generated vendored Normal file
View File

@@ -0,0 +1,300 @@
// Copyright (c) 2014, David Kitchen <david@buro9.com>
//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
//
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * Neither the name of the organisation (Microcosm) nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package bluemonday
import (
"encoding/base64"
"net/url"
"regexp"
)
// A selection of regular expressions that can be used as .Matching() rules on
// HTML attributes.
var (
// CellAlign handles the `align` attribute
// https://developer.mozilla.org/en-US/docs/Web/HTML/Element/td#attr-align
CellAlign = regexp.MustCompile(`(?i)^(center|justify|left|right|char)$`)
// CellVerticalAlign handles the `valign` attribute
// https://developer.mozilla.org/en-US/docs/Web/HTML/Element/td#attr-valign
CellVerticalAlign = regexp.MustCompile(`(?i)^(baseline|bottom|middle|top)$`)
// Direction handles the `dir` attribute
// https://developer.mozilla.org/en-US/docs/Web/HTML/Element/bdo#attr-dir
Direction = regexp.MustCompile(`(?i)^(rtl|ltr)$`)
// ImageAlign handles the `align` attribute on the `image` tag
// http://www.w3.org/MarkUp/Test/Img/imgtest.html
ImageAlign = regexp.MustCompile(
`(?i)^(left|right|top|texttop|middle|absmiddle|baseline|bottom|absbottom)$`,
)
// Integer describes whole positive integers (including 0) used in places
// like td.colspan
// https://developer.mozilla.org/en-US/docs/Web/HTML/Element/td#attr-colspan
Integer = regexp.MustCompile(`^[0-9]+$`)
// ISO8601 according to the W3 group is only a subset of the ISO8601
// standard: http://www.w3.org/TR/NOTE-datetime
//
// Used in places like time.datetime
// https://developer.mozilla.org/en-US/docs/Web/HTML/Element/time#attr-datetime
//
// Matches patterns:
// Year:
// YYYY (eg 1997)
// Year and month:
// YYYY-MM (eg 1997-07)
// Complete date:
// YYYY-MM-DD (eg 1997-07-16)
// Complete date plus hours and minutes:
// YYYY-MM-DDThh:mmTZD (eg 1997-07-16T19:20+01:00)
// Complete date plus hours, minutes and seconds:
// YYYY-MM-DDThh:mm:ssTZD (eg 1997-07-16T19:20:30+01:00)
// Complete date plus hours, minutes, seconds and a decimal fraction of a
// second
// YYYY-MM-DDThh:mm:ss.sTZD (eg 1997-07-16T19:20:30.45+01:00)
ISO8601 = regexp.MustCompile(
`^[0-9]{4}(-[0-9]{2}(-[0-9]{2}([ T][0-9]{2}(:[0-9]{2}){1,2}(.[0-9]{1,6})` +
`?Z?([\+-][0-9]{2}:[0-9]{2})?)?)?)?$`,
)
// ListType encapsulates the common value as well as the latest spec
// values for lists
// https://developer.mozilla.org/en-US/docs/Web/HTML/Element/ol#attr-type
ListType = regexp.MustCompile(`(?i)^(circle|disc|square|a|A|i|I|1)$`)
// SpaceSeparatedTokens is used in places like `a.rel` and the common attribute
// `class` which both contain space delimited lists of data tokens
// http://www.w3.org/TR/html-markup/datatypes.html#common.data.tokens-def
// Regexp: \p{L} matches unicode letters, \p{N} matches unicode numbers
SpaceSeparatedTokens = regexp.MustCompile(`^([\s\p{L}\p{N}_-]+)$`)
// Number is a double value used on HTML5 meter and progress elements
// http://www.whatwg.org/specs/web-apps/current-work/multipage/the-button-element.html#the-meter-element
Number = regexp.MustCompile(`^[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?$`)
// NumberOrPercent is used predominantly as units of measurement in width
// and height attributes
// https://developer.mozilla.org/en-US/docs/Web/HTML/Element/img#attr-height
NumberOrPercent = regexp.MustCompile(`^[0-9]+[%]?$`)
// Paragraph of text in an attribute such as *.'title', img.alt, etc
// https://developer.mozilla.org/en-US/docs/Web/HTML/Global_attributes#attr-title
// Note that we are not allowing chars that could close tags like '>'
Paragraph = regexp.MustCompile(`^[\p{L}\p{N}\s\-_',\[\]!\./\\\(\)]*$`)
// dataURIImagePrefix is used by AllowDataURIImages to define the acceptable
// prefix of data URIs that contain common web image formats.
//
// This is not exported as it's not useful by itself, and only has value
// within the AllowDataURIImages func
dataURIImagePrefix = regexp.MustCompile(
`^image/(gif|jpeg|png|svg\+xml|webp);base64,`,
)
)
// AllowStandardURLs is a convenience function that will enable rel="nofollow"
// on "a", "area" and "link" (if you have allowed those elements) and will
// ensure that the URL values are parseable and either relative or belong to the
// "mailto", "http", or "https" schemes
func (p *Policy) AllowStandardURLs() {
// URLs must be parseable by net/url.Parse()
p.RequireParseableURLs(true)
// !url.IsAbs() is permitted
p.AllowRelativeURLs(true)
// Most common URL schemes only
p.AllowURLSchemes("mailto", "http", "https")
// For linking elements we will add rel="nofollow" if it does not already exist
// This applies to "a" "area" "link"
p.RequireNoFollowOnLinks(true)
}
// AllowStandardAttributes will enable "id", "title" and the language specific
// attributes "dir" and "lang" on all elements that are allowed
func (p *Policy) AllowStandardAttributes() {
// "dir" "lang" are permitted as both language attributes affect charsets
// and direction of text.
p.AllowAttrs("dir").Matching(Direction).Globally()
p.AllowAttrs(
"lang",
).Matching(regexp.MustCompile(`[a-zA-Z]{2,20}`)).Globally()
// "id" is permitted. This is pretty much as some HTML elements require this
// to work well ("dfn" is an example of a "id" being value)
// This does create a risk that JavaScript and CSS within your web page
// might identify the wrong elements. Ensure that you select things
// accurately
p.AllowAttrs("id").Matching(
regexp.MustCompile(`[a-zA-Z0-9\:\-_\.]+`),
).Globally()
// "title" is permitted as it improves accessibility.
p.AllowAttrs("title").Matching(Paragraph).Globally()
}
// AllowStyling presently enables the class attribute globally.
//
// Note: When bluemonday ships a CSS parser and we can safely sanitise that,
// this will also allow sanitized styling of elements via the style attribute.
func (p *Policy) AllowStyling() {
// "class" is permitted globally
p.AllowAttrs("class").Matching(SpaceSeparatedTokens).Globally()
}
// AllowImages enables the img element and some popular attributes. It will also
// ensure that URL values are parseable. This helper does not enable data URI
// images, for that you should also use the AllowDataURIImages() helper.
func (p *Policy) AllowImages() {
// "img" is permitted
p.AllowAttrs("align").Matching(ImageAlign).OnElements("img")
p.AllowAttrs("alt").Matching(Paragraph).OnElements("img")
p.AllowAttrs("height", "width").Matching(NumberOrPercent).OnElements("img")
// Standard URLs enabled
p.AllowStandardURLs()
p.AllowAttrs("src").OnElements("img")
}
// AllowDataURIImages permits the use of inline images defined in RFC2397
// http://tools.ietf.org/html/rfc2397
// http://en.wikipedia.org/wiki/Data_URI_scheme
//
// Images must have a mimetype matching:
//
// image/gif
// image/jpeg
// image/png
// image/webp
//
// NOTE: There is a potential security risk to allowing data URIs and you should
// only permit them on content you already trust.
// http://palizine.plynt.com/issues/2010Oct/bypass-xss-filters/
// https://capec.mitre.org/data/definitions/244.html
func (p *Policy) AllowDataURIImages() {
// URLs must be parseable by net/url.Parse()
p.RequireParseableURLs(true)
// Supply a function to validate images contained within data URI
p.AllowURLSchemeWithCustomPolicy(
"data",
func(url *url.URL) (allowUrl bool) {
if url.RawQuery != "" || url.Fragment != "" {
return false
}
matched := dataURIImagePrefix.FindString(url.Opaque)
if matched == "" {
return false
}
_, err := base64.StdEncoding.DecodeString(url.Opaque[len(matched):])
return err == nil
},
)
}
// AllowLists will enabled ordered and unordered lists, as well as definition
// lists
func (p *Policy) AllowLists() {
// "ol" "ul" are permitted
p.AllowAttrs("type").Matching(ListType).OnElements("ol", "ul")
// "li" is permitted
p.AllowAttrs("type").Matching(ListType).OnElements("li")
p.AllowAttrs("value").Matching(Integer).OnElements("li")
// "dl" "dt" "dd" are permitted
p.AllowElements("dl", "dt", "dd")
}
// AllowTables will enable a rich set of elements and attributes to describe
// HTML tables
func (p *Policy) AllowTables() {
// "table" is permitted
p.AllowAttrs("height", "width").Matching(NumberOrPercent).OnElements("table")
p.AllowAttrs("summary").Matching(Paragraph).OnElements("table")
// "caption" is permitted
p.AllowElements("caption")
// "col" "colgroup" are permitted
p.AllowAttrs("align").Matching(CellAlign).OnElements("col", "colgroup")
p.AllowAttrs("height", "width").Matching(
NumberOrPercent,
).OnElements("col", "colgroup")
p.AllowAttrs("span").Matching(Integer).OnElements("colgroup", "col")
p.AllowAttrs("valign").Matching(
CellVerticalAlign,
).OnElements("col", "colgroup")
// "thead" "tr" are permitted
p.AllowAttrs("align").Matching(CellAlign).OnElements("thead", "tr")
p.AllowAttrs("valign").Matching(CellVerticalAlign).OnElements("thead", "tr")
// "td" "th" are permitted
p.AllowAttrs("abbr").Matching(Paragraph).OnElements("td", "th")
p.AllowAttrs("align").Matching(CellAlign).OnElements("td", "th")
p.AllowAttrs("colspan", "rowspan").Matching(Integer).OnElements("td", "th")
p.AllowAttrs("headers").Matching(
SpaceSeparatedTokens,
).OnElements("td", "th")
p.AllowAttrs("height", "width").Matching(
NumberOrPercent,
).OnElements("td", "th")
p.AllowAttrs(
"scope",
).Matching(
regexp.MustCompile(`(?i)(?:row|col)(?:group)?`),
).OnElements("td", "th")
p.AllowAttrs("valign").Matching(CellVerticalAlign).OnElements("td", "th")
p.AllowAttrs("nowrap").Matching(
regexp.MustCompile(`(?i)|nowrap`),
).OnElements("td", "th")
// "tbody" "tfoot"
p.AllowAttrs("align").Matching(CellAlign).OnElements("tbody", "tfoot")
p.AllowAttrs("valign").Matching(
CellVerticalAlign,
).OnElements("tbody", "tfoot")
}
func (p *Policy) AllowIFrames(vals ...SandboxValue) {
p.AllowAttrs("sandbox").OnElements("iframe")
p.RequireSandboxOnIFrame(vals...)
}

253
vendor/github.com/microcosm-cc/bluemonday/policies.go generated vendored Normal file
View File

@@ -0,0 +1,253 @@
// Copyright (c) 2014, David Kitchen <david@buro9.com>
//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
//
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * Neither the name of the organisation (Microcosm) nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package bluemonday
import (
"regexp"
)
// StrictPolicy returns an empty policy, which will effectively strip all HTML
// elements and their attributes from a document.
func StrictPolicy() *Policy {
return NewPolicy()
}
// StripTagsPolicy is DEPRECATED. Use StrictPolicy instead.
func StripTagsPolicy() *Policy {
return StrictPolicy()
}
// UGCPolicy returns a policy aimed at user generated content that is a result
// of HTML WYSIWYG tools and Markdown conversions.
//
// This is expected to be a fairly rich document where as much markup as
// possible should be retained. Markdown permits raw HTML so we are basically
// providing a policy to sanitise HTML5 documents safely but with the
// least intrusion on the formatting expectations of the user.
func UGCPolicy() *Policy {
p := NewPolicy()
///////////////////////
// Global attributes //
///////////////////////
// "class" is not permitted as we are not allowing users to style their own
// content
p.AllowStandardAttributes()
//////////////////////////////
// Global URL format policy //
//////////////////////////////
p.AllowStandardURLs()
////////////////////////////////
// Declarations and structure //
////////////////////////////////
// "xml" "xslt" "DOCTYPE" "html" "head" are not permitted as we are
// expecting user generated content to be a fragment of HTML and not a full
// document.
//////////////////////////
// Sectioning root tags //
//////////////////////////
// "article" and "aside" are permitted and takes no attributes
p.AllowElements("article", "aside")
// "body" is not permitted as we are expecting user generated content to be a fragment
// of HTML and not a full document.
// "details" is permitted, including the "open" attribute which can either
// be blank or the value "open".
p.AllowAttrs(
"open",
).Matching(regexp.MustCompile(`(?i)^(|open)$`)).OnElements("details")
// "fieldset" is not permitted as we are not allowing forms to be created.
// "figure" is permitted and takes no attributes
p.AllowElements("figure")
// "nav" is not permitted as it is assumed that the site (and not the user)
// has defined navigation elements
// "section" is permitted and takes no attributes
p.AllowElements("section")
// "summary" is permitted and takes no attributes
p.AllowElements("summary")
//////////////////////////
// Headings and footers //
//////////////////////////
// "footer" is not permitted as we expect user content to be a fragment and
// not structural to this extent
// "h1" through "h6" are permitted and take no attributes
p.AllowElements("h1", "h2", "h3", "h4", "h5", "h6")
// "header" is not permitted as we expect user content to be a fragment and
// not structural to this extent
// "hgroup" is permitted and takes no attributes
p.AllowElements("hgroup")
/////////////////////////////////////
// Content grouping and separating //
/////////////////////////////////////
// "blockquote" is permitted, including the "cite" attribute which must be
// a standard URL.
p.AllowAttrs("cite").OnElements("blockquote")
// "br" "div" "hr" "p" "span" "wbr" are permitted and take no attributes
p.AllowElements("br", "div", "hr", "p", "span", "wbr")
///////////
// Links //
///////////
// "a" is permitted
p.AllowAttrs("href").OnElements("a")
// "area" is permitted along with the attributes that map image maps work
p.AllowAttrs("name").Matching(
regexp.MustCompile(`^([\p{L}\p{N}_-]+)$`),
).OnElements("map")
p.AllowAttrs("alt").Matching(Paragraph).OnElements("area")
p.AllowAttrs("coords").Matching(
regexp.MustCompile(`^([0-9]+,)+[0-9]+$`),
).OnElements("area")
p.AllowAttrs("href").OnElements("area")
p.AllowAttrs("rel").Matching(SpaceSeparatedTokens).OnElements("area")
p.AllowAttrs("shape").Matching(
regexp.MustCompile(`(?i)^(default|circle|rect|poly)$`),
).OnElements("area")
p.AllowAttrs("usemap").Matching(
regexp.MustCompile(`(?i)^#[\p{L}\p{N}_-]+$`),
).OnElements("img")
// "link" is not permitted
/////////////////////
// Phrase elements //
/////////////////////
// The following are all inline phrasing elements
p.AllowElements("abbr", "acronym", "cite", "code", "dfn", "em",
"figcaption", "mark", "s", "samp", "strong", "sub", "sup", "var")
// "q" is permitted and "cite" is a URL and handled by URL policies
p.AllowAttrs("cite").OnElements("q")
// "time" is permitted
p.AllowAttrs("datetime").Matching(ISO8601).OnElements("time")
////////////////////
// Style elements //
////////////////////
// block and inline elements that impart no semantic meaning but style the
// document
p.AllowElements("b", "i", "pre", "small", "strike", "tt", "u")
// "style" is not permitted as we are not yet sanitising CSS and it is an
// XSS attack vector
//////////////////////
// HTML5 Formatting //
//////////////////////
// "bdi" "bdo" are permitted
p.AllowAttrs("dir").Matching(Direction).OnElements("bdi", "bdo")
// "rp" "rt" "ruby" are permitted
p.AllowElements("rp", "rt", "ruby")
///////////////////////////
// HTML5 Change tracking //
///////////////////////////
// "del" "ins" are permitted
p.AllowAttrs("cite").Matching(Paragraph).OnElements("del", "ins")
p.AllowAttrs("datetime").Matching(ISO8601).OnElements("del", "ins")
///////////
// Lists //
///////////
p.AllowLists()
////////////
// Tables //
////////////
p.AllowTables()
///////////
// Forms //
///////////
// By and large, forms are not permitted. However there are some form
// elements that can be used to present data, and we do permit those
//
// "button" "fieldset" "input" "keygen" "label" "output" "select" "datalist"
// "textarea" "optgroup" "option" are all not permitted
// "meter" is permitted
p.AllowAttrs(
"value",
"min",
"max",
"low",
"high",
"optimum",
).Matching(Number).OnElements("meter")
// "progress" is permitted
p.AllowAttrs("value", "max").Matching(Number).OnElements("progress")
//////////////////////
// Embedded content //
//////////////////////
// Vast majority not permitted
// "audio" "canvas" "embed" "iframe" "object" "param" "source" "svg" "track"
// "video" are all not permitted
p.AllowImages()
return p
}

990
vendor/github.com/microcosm-cc/bluemonday/policy.go generated vendored Normal file
View File

@@ -0,0 +1,990 @@
// Copyright (c) 2014, David Kitchen <david@buro9.com>
//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
//
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * Neither the name of the organisation (Microcosm) nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package bluemonday
//TODO sgutzwiller create map of styles to default handlers
//TODO sgutzwiller create handlers for various attributes
import (
"net/url"
"regexp"
"strings"
"github.com/microcosm-cc/bluemonday/css"
)
// Policy encapsulates the allowlist of HTML elements and attributes that will
// be applied to the sanitised HTML.
//
// You should use bluemonday.NewPolicy() to create a blank policy as the
// unexported fields contain maps that need to be initialized.
type Policy struct {
// Declares whether the maps have been initialized, used as a cheap check to
// ensure that those using Policy{} directly won't cause nil pointer
// exceptions
initialized bool
// If true then we add spaces when stripping tags, specifically the closing
// tag is replaced by a space character.
addSpaces bool
// When true, add rel="nofollow" to HTML a, area, and link tags
requireNoFollow bool
// When true, add rel="nofollow" to HTML a, area, and link tags
// Will add for href="http://foo"
// Will skip for href="/foo" or href="foo"
requireNoFollowFullyQualifiedLinks bool
// When true, add rel="noreferrer" to HTML a, area, and link tags
requireNoReferrer bool
// When true, add rel="noreferrer" to HTML a, area, and link tags
// Will add for href="http://foo"
// Will skip for href="/foo" or href="foo"
requireNoReferrerFullyQualifiedLinks bool
// When true, add crossorigin="anonymous" to HTML audio, img, link, script, and video tags
requireCrossOriginAnonymous bool
// When true, add and filter sandbox attribute on iframe tags
requireSandboxOnIFrame map[string]bool
// When true add target="_blank" to fully qualified links
// Will add for href="http://foo"
// Will skip for href="/foo" or href="foo"
addTargetBlankToFullyQualifiedLinks bool
// When true, URLs must be parseable by "net/url" url.Parse()
requireParseableURLs bool
// When true, u, _ := url.Parse("url"); !u.IsAbs() is permitted
allowRelativeURLs bool
// When true, allow data attributes.
allowDataAttributes bool
// When true, allow comments.
allowComments bool
// map[htmlElementName]map[htmlAttributeName][]attrPolicy
elsAndAttrs map[string]map[string][]attrPolicy
// elsMatchingAndAttrs stores regex based element matches along with attributes
elsMatchingAndAttrs map[*regexp.Regexp]map[string][]attrPolicy
// map[htmlAttributeName][]attrPolicy
globalAttrs map[string][]attrPolicy
// map[htmlElementName]map[cssPropertyName][]stylePolicy
elsAndStyles map[string]map[string][]stylePolicy
// map[regex]map[cssPropertyName][]stylePolicy
elsMatchingAndStyles map[*regexp.Regexp]map[string][]stylePolicy
// map[cssPropertyName][]stylePolicy
globalStyles map[string][]stylePolicy
// If urlPolicy is nil, all URLs with matching schema are allowed.
// Otherwise, only the URLs with matching schema and urlPolicy(url)
// returning true are allowed.
allowURLSchemes map[string][]urlPolicy
// These regexps are used to match allowed URL schemes, for example
// if one would want to allow all URL schemes, they would add `.+`.
// However pay attention as this can lead to XSS being rendered thus
// defeating the purpose of using a HTML sanitizer.
// The regexps are only considered if a schema was not explicitly
// handled by `AllowURLSchemes` or `AllowURLSchemeWithCustomPolicy`.
allowURLSchemeRegexps []*regexp.Regexp
// If srcRewriter is not nil, it is used to rewrite the src attribute
// of tags that download resources, such as <img> and <script>.
// It requires that the URL is parsable by "net/url" url.Parse().
srcRewriter urlRewriter
// If an element has had all attributes removed as a result of a policy
// being applied, then the element would be removed from the output.
//
// However some elements are valid and have strong layout meaning without
// any attributes, i.e. <table>. To prevent those being removed we maintain
// a list of elements that are allowed to have no attributes and that will
// be maintained in the output HTML.
setOfElementsAllowedWithoutAttrs map[string]struct{}
// If an element has had all attributes removed as a result of a policy
// being applied, then the element would be removed from the output.
//
// However some elements are valid and have strong layout meaning without
// any attributes, i.e. <table>.
//
// In this case, any element matching a regular expression will be accepted without
// attributes added.
setOfElementsMatchingAllowedWithoutAttrs []*regexp.Regexp
setOfElementsToSkipContent map[string]struct{}
// Permits fundamentally unsafe elements.
//
// If false (default) then elements such as `style` and `script` will not be
// permitted even if declared in a policy. These elements when combined with
// untrusted input cannot be safely handled by bluemonday at this point in
// time.
//
// If true then `style` and `script` would be permitted by bluemonday if a
// policy declares them. However this is not recommended under any circumstance
// and can lead to XSS being rendered thus defeating the purpose of using a
// HTML sanitizer.
allowUnsafe bool
}
type attrPolicy struct {
// optional pattern to match, when not nil the regexp needs to match
// otherwise the attribute is removed
regexp *regexp.Regexp
}
type stylePolicy struct {
// handler to validate
handler func(string) bool
// optional pattern to match, when not nil the regexp needs to match
// otherwise the property is removed
regexp *regexp.Regexp
// optional list of allowed property values, for properties which
// have a defined list of allowed values; property will be removed
// if the value is not allowed
enum []string
}
type attrPolicyBuilder struct {
p *Policy
attrNames []string
regexp *regexp.Regexp
allowEmpty bool
}
type stylePolicyBuilder struct {
p *Policy
propertyNames []string
regexp *regexp.Regexp
enum []string
handler func(string) bool
}
type urlPolicy func(url *url.URL) (allowUrl bool)
type urlRewriter func(*url.URL)
type SandboxValue int64
const (
SandboxAllowDownloads SandboxValue = iota
SandboxAllowDownloadsWithoutUserActivation
SandboxAllowForms
SandboxAllowModals
SandboxAllowOrientationLock
SandboxAllowPointerLock
SandboxAllowPopups
SandboxAllowPopupsToEscapeSandbox
SandboxAllowPresentation
SandboxAllowSameOrigin
SandboxAllowScripts
SandboxAllowStorageAccessByUserActivation
SandboxAllowTopNavigation
SandboxAllowTopNavigationByUserActivation
)
// init initializes the maps if this has not been done already
func (p *Policy) init() {
if !p.initialized {
p.elsAndAttrs = make(map[string]map[string][]attrPolicy)
p.elsMatchingAndAttrs = make(map[*regexp.Regexp]map[string][]attrPolicy)
p.globalAttrs = make(map[string][]attrPolicy)
p.elsAndStyles = make(map[string]map[string][]stylePolicy)
p.elsMatchingAndStyles = make(map[*regexp.Regexp]map[string][]stylePolicy)
p.globalStyles = make(map[string][]stylePolicy)
p.allowURLSchemes = make(map[string][]urlPolicy)
p.allowURLSchemeRegexps = make([]*regexp.Regexp, 0)
p.setOfElementsAllowedWithoutAttrs = make(map[string]struct{})
p.setOfElementsToSkipContent = make(map[string]struct{})
p.initialized = true
}
}
// NewPolicy returns a blank policy with nothing allowed or permitted. This
// is the recommended way to start building a policy and you should now use
// AllowAttrs() and/or AllowElements() to construct the allowlist of HTML
// elements and attributes.
func NewPolicy() *Policy {
p := Policy{}
p.addDefaultElementsWithoutAttrs()
p.addDefaultSkipElementContent()
return &p
}
// AllowAttrs takes a range of HTML attribute names and returns an
// attribute policy builder that allows you to specify the pattern and scope of
// the allowed attribute.
//
// The attribute policy is only added to the core policy when either Globally()
// or OnElements(...) are called.
func (p *Policy) AllowAttrs(attrNames ...string) *attrPolicyBuilder {
p.init()
abp := attrPolicyBuilder{
p: p,
allowEmpty: false,
}
for _, attrName := range attrNames {
abp.attrNames = append(abp.attrNames, strings.ToLower(attrName))
}
return &abp
}
// AllowDataAttributes permits all data attributes. We can't specify the name
// of each attribute exactly as they are customized.
//
// NOTE: These values are not sanitized and applications that evaluate or process
// them without checking and verification of the input may be at risk if this option
// is enabled. This is a 'caveat emptor' option and the person enabling this option
// needs to fully understand the potential impact with regards to whatever application
// will be consuming the sanitized HTML afterwards, i.e. if you know you put a link in a
// data attribute and use that to automatically load some new window then you're giving
// the author of a HTML fragment the means to open a malicious destination automatically.
// Use with care!
func (p *Policy) AllowDataAttributes() {
p.allowDataAttributes = true
}
// AllowComments allows comments.
//
// Please note that only one type of comment will be allowed by this, this is the
// the standard HTML comment <!-- --> which includes the use of that to permit
// conditionals as per https://docs.microsoft.com/en-us/previous-versions/windows/internet-explorer/ie-developer/compatibility/ms537512(v=vs.85)?redirectedfrom=MSDN
//
// What is not permitted are CDATA XML comments, as the x/net/html package we depend
// on does not handle this fully and we are not choosing to take on that work:
// https://pkg.go.dev/golang.org/x/net/html#Tokenizer.AllowCDATA . If the x/net/html
// package changes this then these will be considered, otherwise if you AllowComments
// but provide a CDATA comment, then as per the documentation in x/net/html this will
// be treated as a plain HTML comment.
func (p *Policy) AllowComments() {
p.allowComments = true
}
// AllowNoAttrs says that attributes on element are optional.
//
// The attribute policy is only added to the core policy when OnElements(...)
// are called.
func (p *Policy) AllowNoAttrs() *attrPolicyBuilder {
p.init()
abp := attrPolicyBuilder{
p: p,
allowEmpty: true,
}
return &abp
}
// AllowNoAttrs says that attributes on element are optional.
//
// The attribute policy is only added to the core policy when OnElements(...)
// are called.
func (abp *attrPolicyBuilder) AllowNoAttrs() *attrPolicyBuilder {
abp.allowEmpty = true
return abp
}
// Matching allows a regular expression to be applied to a nascent attribute
// policy, and returns the attribute policy.
func (abp *attrPolicyBuilder) Matching(regex *regexp.Regexp) *attrPolicyBuilder {
abp.regexp = regex
return abp
}
// OnElements will bind an attribute policy to a given range of HTML elements
// and return the updated policy
func (abp *attrPolicyBuilder) OnElements(elements ...string) *Policy {
for _, element := range elements {
element = strings.ToLower(element)
for _, attr := range abp.attrNames {
if _, ok := abp.p.elsAndAttrs[element]; !ok {
abp.p.elsAndAttrs[element] = make(map[string][]attrPolicy)
}
ap := attrPolicy{}
if abp.regexp != nil {
ap.regexp = abp.regexp
}
abp.p.elsAndAttrs[element][attr] = append(abp.p.elsAndAttrs[element][attr], ap)
}
if abp.allowEmpty {
abp.p.setOfElementsAllowedWithoutAttrs[element] = struct{}{}
if _, ok := abp.p.elsAndAttrs[element]; !ok {
abp.p.elsAndAttrs[element] = make(map[string][]attrPolicy)
}
}
}
return abp.p
}
// OnElementsMatching will bind an attribute policy to all elements matching a given regex
// and return the updated policy
func (abp *attrPolicyBuilder) OnElementsMatching(regex *regexp.Regexp) *Policy {
for _, attr := range abp.attrNames {
if _, ok := abp.p.elsMatchingAndAttrs[regex]; !ok {
abp.p.elsMatchingAndAttrs[regex] = make(map[string][]attrPolicy)
}
ap := attrPolicy{}
if abp.regexp != nil {
ap.regexp = abp.regexp
}
abp.p.elsMatchingAndAttrs[regex][attr] = append(abp.p.elsMatchingAndAttrs[regex][attr], ap)
}
if abp.allowEmpty {
abp.p.setOfElementsMatchingAllowedWithoutAttrs = append(abp.p.setOfElementsMatchingAllowedWithoutAttrs, regex)
if _, ok := abp.p.elsMatchingAndAttrs[regex]; !ok {
abp.p.elsMatchingAndAttrs[regex] = make(map[string][]attrPolicy)
}
}
return abp.p
}
// Globally will bind an attribute policy to all HTML elements and return the
// updated policy
func (abp *attrPolicyBuilder) Globally() *Policy {
for _, attr := range abp.attrNames {
if _, ok := abp.p.globalAttrs[attr]; !ok {
abp.p.globalAttrs[attr] = []attrPolicy{}
}
ap := attrPolicy{}
if abp.regexp != nil {
ap.regexp = abp.regexp
}
abp.p.globalAttrs[attr] = append(abp.p.globalAttrs[attr], ap)
}
return abp.p
}
// AllowStyles takes a range of CSS property names and returns a
// style policy builder that allows you to specify the pattern and scope of
// the allowed property.
//
// The style policy is only added to the core policy when either Globally()
// or OnElements(...) are called.
func (p *Policy) AllowStyles(propertyNames ...string) *stylePolicyBuilder {
p.init()
abp := stylePolicyBuilder{
p: p,
}
for _, propertyName := range propertyNames {
abp.propertyNames = append(abp.propertyNames, strings.ToLower(propertyName))
}
return &abp
}
// Matching allows a regular expression to be applied to a nascent style
// policy, and returns the style policy.
func (spb *stylePolicyBuilder) Matching(regex *regexp.Regexp) *stylePolicyBuilder {
spb.regexp = regex
return spb
}
// MatchingEnum allows a list of allowed values to be applied to a nascent style
// policy, and returns the style policy.
func (spb *stylePolicyBuilder) MatchingEnum(enum ...string) *stylePolicyBuilder {
spb.enum = enum
return spb
}
// MatchingHandler allows a handler to be applied to a nascent style
// policy, and returns the style policy.
func (spb *stylePolicyBuilder) MatchingHandler(handler func(string) bool) *stylePolicyBuilder {
spb.handler = handler
return spb
}
// OnElements will bind a style policy to a given range of HTML elements
// and return the updated policy
func (spb *stylePolicyBuilder) OnElements(elements ...string) *Policy {
for _, element := range elements {
element = strings.ToLower(element)
for _, attr := range spb.propertyNames {
if _, ok := spb.p.elsAndStyles[element]; !ok {
spb.p.elsAndStyles[element] = make(map[string][]stylePolicy)
}
sp := stylePolicy{}
if spb.handler != nil {
sp.handler = spb.handler
} else if len(spb.enum) > 0 {
sp.enum = spb.enum
} else if spb.regexp != nil {
sp.regexp = spb.regexp
} else {
sp.handler = css.GetDefaultHandler(attr)
}
spb.p.elsAndStyles[element][attr] = append(spb.p.elsAndStyles[element][attr], sp)
}
}
return spb.p
}
// OnElementsMatching will bind a style policy to any HTML elements matching the pattern
// and return the updated policy
func (spb *stylePolicyBuilder) OnElementsMatching(regex *regexp.Regexp) *Policy {
for _, attr := range spb.propertyNames {
if _, ok := spb.p.elsMatchingAndStyles[regex]; !ok {
spb.p.elsMatchingAndStyles[regex] = make(map[string][]stylePolicy)
}
sp := stylePolicy{}
if spb.handler != nil {
sp.handler = spb.handler
} else if len(spb.enum) > 0 {
sp.enum = spb.enum
} else if spb.regexp != nil {
sp.regexp = spb.regexp
} else {
sp.handler = css.GetDefaultHandler(attr)
}
spb.p.elsMatchingAndStyles[regex][attr] = append(spb.p.elsMatchingAndStyles[regex][attr], sp)
}
return spb.p
}
// Globally will bind a style policy to all HTML elements and return the
// updated policy
func (spb *stylePolicyBuilder) Globally() *Policy {
for _, attr := range spb.propertyNames {
if _, ok := spb.p.globalStyles[attr]; !ok {
spb.p.globalStyles[attr] = []stylePolicy{}
}
// Use only one strategy for validating styles, fallback to default
sp := stylePolicy{}
if spb.handler != nil {
sp.handler = spb.handler
} else if len(spb.enum) > 0 {
sp.enum = spb.enum
} else if spb.regexp != nil {
sp.regexp = spb.regexp
} else {
sp.handler = css.GetDefaultHandler(attr)
}
spb.p.globalStyles[attr] = append(spb.p.globalStyles[attr], sp)
}
return spb.p
}
// AllowElements will append HTML elements to the allowlist without applying an
// attribute policy to those elements (the elements are permitted
// sans-attributes)
func (p *Policy) AllowElements(names ...string) *Policy {
p.init()
for _, element := range names {
element = strings.ToLower(element)
if _, ok := p.elsAndAttrs[element]; !ok {
p.elsAndAttrs[element] = make(map[string][]attrPolicy)
}
}
return p
}
// AllowElementsMatching will append HTML elements to the allowlist if they
// match a regexp.
func (p *Policy) AllowElementsMatching(regex *regexp.Regexp) *Policy {
p.init()
if _, ok := p.elsMatchingAndAttrs[regex]; !ok {
p.elsMatchingAndAttrs[regex] = make(map[string][]attrPolicy)
}
return p
}
// AllowURLSchemesMatching will append URL schemes to the allowlist if they
// match a regexp.
func (p *Policy) AllowURLSchemesMatching(r *regexp.Regexp) *Policy {
p.allowURLSchemeRegexps = append(p.allowURLSchemeRegexps, r)
return p
}
// RewriteSrc will rewrite the src attribute of a resource downloading tag
// (e.g. <img>, <script>, <iframe>) using the provided function.
//
// Typically the use case here is that if the content that we're sanitizing
// is untrusted then the content that is inlined is also untrusted.
// To prevent serving this content on the same domain as the content appears
// on it is good practise to proxy the content through an additional domain
// name as this will force the web client to consider the inline content as
// third party to the main content, thus providing browser isolation around
// the inline content.
//
// An example of this is a web mail provider like fastmail.com , when an
// email (user generated content) is displayed, the email text is shown on
// fastmail.com but the inline attachments and content are rendered from
// fastmailusercontent.com . This proxying of the external content on a
// domain that is different to the content domain forces the browser domain
// security model to kick in. Note that this only applies to differences
// below the suffix (as per the publix suffix list).
//
// This is a good practise to adopt as it prevents the content from being
// able to set cookies on the main domain and thus prevents the content on
// the main domain from being able to read those cookies.
func (p *Policy) RewriteSrc(fn urlRewriter) *Policy {
p.srcRewriter = fn
return p
}
// RequireNoFollowOnLinks will result in all a, area, link tags having a
// rel="nofollow"added to them if one does not already exist
//
// Note: This requires p.RequireParseableURLs(true) and will enable it.
func (p *Policy) RequireNoFollowOnLinks(require bool) *Policy {
p.requireNoFollow = require
p.requireParseableURLs = true
return p
}
// RequireNoFollowOnFullyQualifiedLinks will result in all a, area, and link
// tags that point to a non-local destination (i.e. starts with a protocol and
// has a host) having a rel="nofollow" added to them if one does not already
// exist
//
// Note: This requires p.RequireParseableURLs(true) and will enable it.
func (p *Policy) RequireNoFollowOnFullyQualifiedLinks(require bool) *Policy {
p.requireNoFollowFullyQualifiedLinks = require
p.requireParseableURLs = true
return p
}
// RequireNoReferrerOnLinks will result in all a, area, and link tags having a
// rel="noreferrrer" added to them if one does not already exist
//
// Note: This requires p.RequireParseableURLs(true) and will enable it.
func (p *Policy) RequireNoReferrerOnLinks(require bool) *Policy {
p.requireNoReferrer = require
p.requireParseableURLs = true
return p
}
// RequireNoReferrerOnFullyQualifiedLinks will result in all a, area, and link
// tags that point to a non-local destination (i.e. starts with a protocol and
// has a host) having a rel="noreferrer" added to them if one does not already
// exist
//
// Note: This requires p.RequireParseableURLs(true) and will enable it.
func (p *Policy) RequireNoReferrerOnFullyQualifiedLinks(require bool) *Policy {
p.requireNoReferrerFullyQualifiedLinks = require
p.requireParseableURLs = true
return p
}
// RequireCrossOriginAnonymous will result in all audio, img, link, script, and
// video tags having a crossorigin="anonymous" added to them if one does not
// already exist
func (p *Policy) RequireCrossOriginAnonymous(require bool) *Policy {
p.requireCrossOriginAnonymous = require
return p
}
// AddTargetBlankToFullyQualifiedLinks will result in all a, area and link tags
// that point to a non-local destination (i.e. starts with a protocol and has a
// host) having a target="_blank" added to them if one does not already exist
//
// Note: This requires p.RequireParseableURLs(true) and will enable it.
func (p *Policy) AddTargetBlankToFullyQualifiedLinks(require bool) *Policy {
p.addTargetBlankToFullyQualifiedLinks = require
p.requireParseableURLs = true
return p
}
// RequireParseableURLs will result in all URLs requiring that they be parseable
// by "net/url" url.Parse()
// This applies to:
// - a.href
// - area.href
// - blockquote.cite
// - img.src
// - link.href
// - script.src
func (p *Policy) RequireParseableURLs(require bool) *Policy {
p.requireParseableURLs = require
return p
}
// AllowRelativeURLs enables RequireParseableURLs and then permits URLs that
// are parseable, have no schema information and url.IsAbs() returns false
// This permits local URLs
func (p *Policy) AllowRelativeURLs(require bool) *Policy {
p.RequireParseableURLs(true)
p.allowRelativeURLs = require
return p
}
// AllowURLSchemes will append URL schemes to the allowlist
// Example: p.AllowURLSchemes("mailto", "http", "https")
func (p *Policy) AllowURLSchemes(schemes ...string) *Policy {
p.init()
p.RequireParseableURLs(true)
for _, scheme := range schemes {
scheme = strings.ToLower(scheme)
// Allow all URLs with matching scheme.
p.allowURLSchemes[scheme] = nil
}
return p
}
// AllowURLSchemeWithCustomPolicy will append URL schemes with
// a custom URL policy to the allowlist.
// Only the URLs with matching schema and urlPolicy(url)
// returning true will be allowed.
func (p *Policy) AllowURLSchemeWithCustomPolicy(
scheme string,
urlPolicy func(url *url.URL) (allowUrl bool),
) *Policy {
p.init()
p.RequireParseableURLs(true)
scheme = strings.ToLower(scheme)
p.allowURLSchemes[scheme] = append(p.allowURLSchemes[scheme], urlPolicy)
return p
}
// RequireSandboxOnIFrame will result in all iframe tags having a sandbox="" tag
// Any sandbox values not specified here will be filtered from the generated HTML
func (p *Policy) RequireSandboxOnIFrame(vals ...SandboxValue) {
p.requireSandboxOnIFrame = make(map[string]bool)
for _, val := range vals {
switch SandboxValue(val) {
case SandboxAllowDownloads:
p.requireSandboxOnIFrame["allow-downloads"] = true
case SandboxAllowDownloadsWithoutUserActivation:
p.requireSandboxOnIFrame["allow-downloads-without-user-activation"] = true
case SandboxAllowForms:
p.requireSandboxOnIFrame["allow-forms"] = true
case SandboxAllowModals:
p.requireSandboxOnIFrame["allow-modals"] = true
case SandboxAllowOrientationLock:
p.requireSandboxOnIFrame["allow-orientation-lock"] = true
case SandboxAllowPointerLock:
p.requireSandboxOnIFrame["allow-pointer-lock"] = true
case SandboxAllowPopups:
p.requireSandboxOnIFrame["allow-popups"] = true
case SandboxAllowPopupsToEscapeSandbox:
p.requireSandboxOnIFrame["allow-popups-to-escape-sandbox"] = true
case SandboxAllowPresentation:
p.requireSandboxOnIFrame["allow-presentation"] = true
case SandboxAllowSameOrigin:
p.requireSandboxOnIFrame["allow-same-origin"] = true
case SandboxAllowScripts:
p.requireSandboxOnIFrame["allow-scripts"] = true
case SandboxAllowStorageAccessByUserActivation:
p.requireSandboxOnIFrame["allow-storage-access-by-user-activation"] = true
case SandboxAllowTopNavigation:
p.requireSandboxOnIFrame["allow-top-navigation"] = true
case SandboxAllowTopNavigationByUserActivation:
p.requireSandboxOnIFrame["allow-top-navigation-by-user-activation"] = true
}
}
}
// AddSpaceWhenStrippingTag states whether to add a single space " " when
// removing tags that are not allowed by the policy.
//
// This is useful if you expect to strip tags in dense markup and may lose the
// value of whitespace.
//
// For example: "<p>Hello</p><p>World</p>"" would be sanitized to "HelloWorld"
// with the default value of false, but you may wish to sanitize this to
// " Hello World " by setting AddSpaceWhenStrippingTag to true as this would
// retain the intent of the text.
func (p *Policy) AddSpaceWhenStrippingTag(allow bool) *Policy {
p.addSpaces = allow
return p
}
// SkipElementsContent adds the HTML elements whose tags is needed to be removed
// with its content.
func (p *Policy) SkipElementsContent(names ...string) *Policy {
p.init()
for _, element := range names {
element = strings.ToLower(element)
if _, ok := p.setOfElementsToSkipContent[element]; !ok {
p.setOfElementsToSkipContent[element] = struct{}{}
}
}
return p
}
// AllowElementsContent marks the HTML elements whose content should be
// retained after removing the tag.
func (p *Policy) AllowElementsContent(names ...string) *Policy {
p.init()
for _, element := range names {
delete(p.setOfElementsToSkipContent, strings.ToLower(element))
}
return p
}
// AllowUnsafe permits fundamentally unsafe elements.
//
// If false (default) then elements such as `style` and `script` will not be
// permitted even if declared in a policy. These elements when combined with
// untrusted input cannot be safely handled by bluemonday at this point in
// time.
//
// If true then `style` and `script` would be permitted by bluemonday if a
// policy declares them. However this is not recommended under any circumstance
// and can lead to XSS being rendered thus defeating the purpose of using a
// HTML sanitizer.
func (p *Policy) AllowUnsafe(allowUnsafe bool) *Policy {
p.init()
p.allowUnsafe = allowUnsafe
return p
}
// addDefaultElementsWithoutAttrs adds the HTML elements that we know are valid
// without any attributes to an internal map.
// i.e. we know that <table> is valid, but <bdo> isn't valid as the "dir" attr
// is mandatory
func (p *Policy) addDefaultElementsWithoutAttrs() {
p.init()
p.setOfElementsAllowedWithoutAttrs["abbr"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["acronym"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["address"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["article"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["aside"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["audio"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["b"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["bdi"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["blockquote"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["body"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["br"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["button"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["canvas"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["caption"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["center"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["cite"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["code"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["col"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["colgroup"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["datalist"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["dd"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["del"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["details"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["dfn"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["div"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["dl"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["dt"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["em"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["fieldset"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["figcaption"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["figure"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["footer"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["h1"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["h2"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["h3"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["h4"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["h5"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["h6"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["head"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["header"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["hgroup"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["hr"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["html"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["i"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["ins"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["kbd"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["li"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["mark"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["marquee"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["nav"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["ol"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["optgroup"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["option"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["p"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["picture"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["pre"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["q"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["rp"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["rt"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["ruby"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["s"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["samp"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["script"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["section"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["select"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["small"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["span"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["strike"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["strong"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["style"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["sub"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["summary"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["sup"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["svg"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["table"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["tbody"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["td"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["textarea"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["tfoot"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["th"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["thead"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["title"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["time"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["tr"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["tt"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["u"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["ul"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["var"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["video"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["wbr"] = struct{}{}
}
// addDefaultSkipElementContent adds the HTML elements that we should skip
// rendering the character content of, if the element itself is not allowed.
// This is all character data that the end user would not normally see.
// i.e. if we exclude a <script> tag then we shouldn't render the JavaScript or
// anything else until we encounter the closing </script> tag.
func (p *Policy) addDefaultSkipElementContent() {
p.init()
p.setOfElementsToSkipContent["frame"] = struct{}{}
p.setOfElementsToSkipContent["frameset"] = struct{}{}
p.setOfElementsToSkipContent["iframe"] = struct{}{}
p.setOfElementsToSkipContent["noembed"] = struct{}{}
p.setOfElementsToSkipContent["noframes"] = struct{}{}
p.setOfElementsToSkipContent["noscript"] = struct{}{}
p.setOfElementsToSkipContent["nostyle"] = struct{}{}
p.setOfElementsToSkipContent["object"] = struct{}{}
p.setOfElementsToSkipContent["script"] = struct{}{}
p.setOfElementsToSkipContent["style"] = struct{}{}
p.setOfElementsToSkipContent["title"] = struct{}{}
}

1096
vendor/github.com/microcosm-cc/bluemonday/sanitize.go generated vendored Normal file

File diff suppressed because it is too large Load Diff

11
vendor/modules.txt vendored
View File

@@ -108,6 +108,10 @@ github.com/armon/go-radix
# github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2
## explicit; go 1.13
github.com/asaskevich/govalidator
# github.com/aymerick/douceur v0.2.0
## explicit
github.com/aymerick/douceur/css
github.com/aymerick/douceur/parser
# github.com/bbalet/stopwords v1.0.0
## explicit
github.com/bbalet/stopwords
@@ -837,6 +841,9 @@ github.com/gookit/goutil/x/encodes
github.com/gookit/goutil/x/goinfo
github.com/gookit/goutil/x/stdio
github.com/gookit/goutil/x/termenv
# github.com/gorilla/css v1.0.1
## explicit; go 1.20
github.com/gorilla/css/scanner
# github.com/gorilla/handlers v1.5.1
## explicit; go 1.14
github.com/gorilla/handlers
@@ -1121,6 +1128,10 @@ github.com/maxymania/go-system/syscall_x
# github.com/mendsley/gojwk v0.0.0-20141217222730-4d5ec6e58103
## explicit
github.com/mendsley/gojwk
# github.com/microcosm-cc/bluemonday v1.0.27
## explicit; go 1.19
github.com/microcosm-cc/bluemonday
github.com/microcosm-cc/bluemonday/css
# github.com/miekg/dns v1.1.57
## explicit; go 1.19
github.com/miekg/dns