Files
opencloud/services/search/pkg/query/kql/dictionary_gen.go
Florian Schade ed0dbce978 enhancement: Keyword Query Language (KQL) search syntax support (#7043)
* feat(search): introduce search query package

With the increasing complexity of how we organize our resources, the search must also be able to find them using entity properties.

The query package provides the necessary functionality to do this.

This makes it possible to search for resources via KQL, the microsoft spec is largely covered and can be used for this.

In the current state, the legacy query language is still used, in a future update this will be deprecated and KQL will become the standard
2023-08-28 16:41:36 +02:00

1952 lines
45 KiB
Go

// Code generated by pigeon; DO NOT EDIT.
package kql
import (
"bytes"
"errors"
"fmt"
"io"
"math"
"os"
"sort"
"strconv"
"strings"
"sync"
"unicode"
"unicode/utf8"
)
var g = &grammar{
rules: []*rule{
{
name: "AST",
pos: position{line: 9, col: 1, offset: 143},
expr: &actionExpr{
pos: position{line: 10, col: 5, offset: 154},
run: (*parser).callonAST1,
expr: &seqExpr{
pos: position{line: 10, col: 5, offset: 154},
exprs: []any{
&ruleRefExpr{
pos: position{line: 10, col: 5, offset: 154},
name: "_",
},
&labeledExpr{
pos: position{line: 10, col: 7, offset: 156},
label: "nodes",
expr: &ruleRefExpr{
pos: position{line: 10, col: 13, offset: 162},
name: "Nodes",
},
},
&ruleRefExpr{
pos: position{line: 10, col: 19, offset: 168},
name: "_",
},
},
},
},
},
{
name: "Nodes",
pos: position{line: 14, col: 1, offset: 225},
expr: &actionExpr{
pos: position{line: 15, col: 5, offset: 238},
run: (*parser).callonNodes1,
expr: &labeledExpr{
pos: position{line: 15, col: 5, offset: 238},
label: "n",
expr: &oneOrMoreExpr{
pos: position{line: 15, col: 7, offset: 240},
expr: &seqExpr{
pos: position{line: 16, col: 9, offset: 250},
exprs: []any{
&ruleRefExpr{
pos: position{line: 16, col: 9, offset: 250},
name: "_",
},
&choiceExpr{
pos: position{line: 18, col: 13, offset: 274},
alternatives: []any{
&ruleRefExpr{
pos: position{line: 18, col: 13, offset: 274},
name: "GroupNode",
},
&ruleRefExpr{
pos: position{line: 19, col: 13, offset: 298},
name: "PropertyRestrictionNodes",
},
&ruleRefExpr{
pos: position{line: 20, col: 13, offset: 337},
name: "BooleanOperatorNode",
},
&ruleRefExpr{
pos: position{line: 21, col: 13, offset: 371},
name: "FreeTextKeywordNodes",
},
},
},
&ruleRefExpr{
pos: position{line: 23, col: 9, offset: 410},
name: "_",
},
},
},
},
},
},
},
{
name: "GroupNode",
pos: position{line: 32, col: 1, offset: 581},
expr: &actionExpr{
pos: position{line: 33, col: 5, offset: 598},
run: (*parser).callonGroupNode1,
expr: &seqExpr{
pos: position{line: 33, col: 5, offset: 598},
exprs: []any{
&labeledExpr{
pos: position{line: 33, col: 5, offset: 598},
label: "k",
expr: &zeroOrOneExpr{
pos: position{line: 33, col: 7, offset: 600},
expr: &oneOrMoreExpr{
pos: position{line: 33, col: 8, offset: 601},
expr: &ruleRefExpr{
pos: position{line: 33, col: 8, offset: 601},
name: "Char",
},
},
},
},
&zeroOrOneExpr{
pos: position{line: 33, col: 16, offset: 609},
expr: &choiceExpr{
pos: position{line: 33, col: 17, offset: 610},
alternatives: []any{
&ruleRefExpr{
pos: position{line: 33, col: 17, offset: 610},
name: "ColonOperator",
},
&ruleRefExpr{
pos: position{line: 33, col: 33, offset: 626},
name: "EqualOperator",
},
},
},
},
&litMatcher{
pos: position{line: 33, col: 49, offset: 642},
val: "(",
ignoreCase: false,
want: "\"(\"",
},
&labeledExpr{
pos: position{line: 33, col: 53, offset: 646},
label: "v",
expr: &ruleRefExpr{
pos: position{line: 33, col: 55, offset: 648},
name: "Nodes",
},
},
&litMatcher{
pos: position{line: 33, col: 61, offset: 654},
val: ")",
ignoreCase: false,
want: "\")\"",
},
},
},
},
},
{
name: "PropertyRestrictionNodes",
pos: position{line: 41, col: 1, offset: 858},
expr: &choiceExpr{
pos: position{line: 42, col: 5, offset: 890},
alternatives: []any{
&ruleRefExpr{
pos: position{line: 42, col: 5, offset: 890},
name: "YesNoPropertyRestrictionNode",
},
&ruleRefExpr{
pos: position{line: 43, col: 5, offset: 925},
name: "TextPropertyRestrictionNode",
},
},
},
},
{
name: "YesNoPropertyRestrictionNode",
pos: position{line: 45, col: 1, offset: 954},
expr: &actionExpr{
pos: position{line: 46, col: 5, offset: 990},
run: (*parser).callonYesNoPropertyRestrictionNode1,
expr: &seqExpr{
pos: position{line: 46, col: 5, offset: 990},
exprs: []any{
&labeledExpr{
pos: position{line: 46, col: 5, offset: 990},
label: "k",
expr: &oneOrMoreExpr{
pos: position{line: 46, col: 7, offset: 992},
expr: &ruleRefExpr{
pos: position{line: 46, col: 7, offset: 992},
name: "Char",
},
},
},
&choiceExpr{
pos: position{line: 46, col: 14, offset: 999},
alternatives: []any{
&ruleRefExpr{
pos: position{line: 46, col: 14, offset: 999},
name: "ColonOperator",
},
&ruleRefExpr{
pos: position{line: 46, col: 30, offset: 1015},
name: "EqualOperator",
},
},
},
&labeledExpr{
pos: position{line: 46, col: 45, offset: 1030},
label: "v",
expr: &choiceExpr{
pos: position{line: 46, col: 48, offset: 1033},
alternatives: []any{
&litMatcher{
pos: position{line: 46, col: 48, offset: 1033},
val: "true",
ignoreCase: false,
want: "\"true\"",
},
&litMatcher{
pos: position{line: 46, col: 57, offset: 1042},
val: "false",
ignoreCase: false,
want: "\"false\"",
},
},
},
},
},
},
},
},
{
name: "TextPropertyRestrictionNode",
pos: position{line: 50, col: 1, offset: 1112},
expr: &actionExpr{
pos: position{line: 51, col: 5, offset: 1147},
run: (*parser).callonTextPropertyRestrictionNode1,
expr: &seqExpr{
pos: position{line: 51, col: 5, offset: 1147},
exprs: []any{
&labeledExpr{
pos: position{line: 51, col: 5, offset: 1147},
label: "k",
expr: &oneOrMoreExpr{
pos: position{line: 51, col: 7, offset: 1149},
expr: &ruleRefExpr{
pos: position{line: 51, col: 7, offset: 1149},
name: "Char",
},
},
},
&choiceExpr{
pos: position{line: 51, col: 14, offset: 1156},
alternatives: []any{
&ruleRefExpr{
pos: position{line: 51, col: 14, offset: 1156},
name: "ColonOperator",
},
&ruleRefExpr{
pos: position{line: 51, col: 30, offset: 1172},
name: "EqualOperator",
},
},
},
&labeledExpr{
pos: position{line: 51, col: 45, offset: 1187},
label: "v",
expr: &choiceExpr{
pos: position{line: 51, col: 48, offset: 1190},
alternatives: []any{
&ruleRefExpr{
pos: position{line: 51, col: 48, offset: 1190},
name: "String",
},
&oneOrMoreExpr{
pos: position{line: 51, col: 57, offset: 1199},
expr: &charClassMatcher{
pos: position{line: 51, col: 57, offset: 1199},
val: "[^ ()]",
chars: []rune{' ', '(', ')'},
ignoreCase: false,
inverted: true,
},
},
},
},
},
},
},
},
},
{
name: "FreeTextKeywordNodes",
pos: position{line: 60, col: 1, offset: 1406},
expr: &choiceExpr{
pos: position{line: 61, col: 5, offset: 1434},
alternatives: []any{
&ruleRefExpr{
pos: position{line: 61, col: 5, offset: 1434},
name: "PhraseNode",
},
&ruleRefExpr{
pos: position{line: 62, col: 5, offset: 1451},
name: "WordNode",
},
},
},
},
{
name: "PhraseNode",
pos: position{line: 64, col: 1, offset: 1461},
expr: &actionExpr{
pos: position{line: 65, col: 6, offset: 1480},
run: (*parser).callonPhraseNode1,
expr: &seqExpr{
pos: position{line: 65, col: 6, offset: 1480},
exprs: []any{
&zeroOrOneExpr{
pos: position{line: 65, col: 6, offset: 1480},
expr: &ruleRefExpr{
pos: position{line: 65, col: 6, offset: 1480},
name: "ColonOperator",
},
},
&ruleRefExpr{
pos: position{line: 65, col: 21, offset: 1495},
name: "_",
},
&labeledExpr{
pos: position{line: 65, col: 23, offset: 1497},
label: "v",
expr: &ruleRefExpr{
pos: position{line: 65, col: 25, offset: 1499},
name: "String",
},
},
&ruleRefExpr{
pos: position{line: 65, col: 32, offset: 1506},
name: "_",
},
&zeroOrOneExpr{
pos: position{line: 65, col: 34, offset: 1508},
expr: &ruleRefExpr{
pos: position{line: 65, col: 34, offset: 1508},
name: "ColonOperator",
},
},
},
},
},
},
{
name: "WordNode",
pos: position{line: 69, col: 1, offset: 1585},
expr: &actionExpr{
pos: position{line: 70, col: 6, offset: 1602},
run: (*parser).callonWordNode1,
expr: &seqExpr{
pos: position{line: 70, col: 6, offset: 1602},
exprs: []any{
&zeroOrOneExpr{
pos: position{line: 70, col: 6, offset: 1602},
expr: &ruleRefExpr{
pos: position{line: 70, col: 6, offset: 1602},
name: "ColonOperator",
},
},
&ruleRefExpr{
pos: position{line: 70, col: 21, offset: 1617},
name: "_",
},
&labeledExpr{
pos: position{line: 70, col: 23, offset: 1619},
label: "v",
expr: &oneOrMoreExpr{
pos: position{line: 70, col: 25, offset: 1621},
expr: &charClassMatcher{
pos: position{line: 70, col: 25, offset: 1621},
val: "[^ :()]",
chars: []rune{' ', ':', '(', ')'},
ignoreCase: false,
inverted: true,
},
},
},
&ruleRefExpr{
pos: position{line: 70, col: 34, offset: 1630},
name: "_",
},
&zeroOrOneExpr{
pos: position{line: 70, col: 36, offset: 1632},
expr: &ruleRefExpr{
pos: position{line: 70, col: 36, offset: 1632},
name: "ColonOperator",
},
},
},
},
},
},
{
name: "BooleanOperatorNode",
pos: position{line: 78, col: 1, offset: 1837},
expr: &actionExpr{
pos: position{line: 79, col: 5, offset: 1864},
run: (*parser).callonBooleanOperatorNode1,
expr: &choiceExpr{
pos: position{line: 79, col: 6, offset: 1865},
alternatives: []any{
&litMatcher{
pos: position{line: 79, col: 6, offset: 1865},
val: "AND",
ignoreCase: false,
want: "\"AND\"",
},
&litMatcher{
pos: position{line: 79, col: 14, offset: 1873},
val: "OR",
ignoreCase: false,
want: "\"OR\"",
},
&litMatcher{
pos: position{line: 79, col: 21, offset: 1880},
val: "NOT",
ignoreCase: false,
want: "\"NOT\"",
},
},
},
},
},
{
name: "ColonOperator",
pos: position{line: 83, col: 1, offset: 1944},
expr: &actionExpr{
pos: position{line: 84, col: 5, offset: 1965},
run: (*parser).callonColonOperator1,
expr: &litMatcher{
pos: position{line: 84, col: 5, offset: 1965},
val: ":",
ignoreCase: false,
want: "\":\"",
},
},
},
{
name: "EqualOperator",
pos: position{line: 88, col: 1, offset: 2005},
expr: &actionExpr{
pos: position{line: 89, col: 5, offset: 2026},
run: (*parser).callonEqualOperator1,
expr: &litMatcher{
pos: position{line: 89, col: 5, offset: 2026},
val: "=",
ignoreCase: false,
want: "\"=\"",
},
},
},
{
name: "Char",
pos: position{line: 97, col: 1, offset: 2189},
expr: &actionExpr{
pos: position{line: 98, col: 5, offset: 2201},
run: (*parser).callonChar1,
expr: &charClassMatcher{
pos: position{line: 98, col: 5, offset: 2201},
val: "[A-Za-z]",
ranges: []rune{'A', 'Z', 'a', 'z'},
ignoreCase: false,
inverted: false,
},
},
},
{
name: "String",
pos: position{line: 102, col: 1, offset: 2246},
expr: &actionExpr{
pos: position{line: 103, col: 5, offset: 2260},
run: (*parser).callonString1,
expr: &seqExpr{
pos: position{line: 103, col: 5, offset: 2260},
exprs: []any{
&litMatcher{
pos: position{line: 103, col: 5, offset: 2260},
val: "\"",
ignoreCase: false,
want: "\"\\\"\"",
},
&labeledExpr{
pos: position{line: 103, col: 9, offset: 2264},
label: "v",
expr: &zeroOrMoreExpr{
pos: position{line: 103, col: 11, offset: 2266},
expr: &charClassMatcher{
pos: position{line: 103, col: 11, offset: 2266},
val: "[^\"]",
chars: []rune{'"'},
ignoreCase: false,
inverted: true,
},
},
},
&litMatcher{
pos: position{line: 103, col: 17, offset: 2272},
val: "\"",
ignoreCase: false,
want: "\"\\\"\"",
},
},
},
},
},
{
name: "_",
pos: position{line: 107, col: 1, offset: 2307},
expr: &zeroOrMoreExpr{
pos: position{line: 108, col: 5, offset: 2316},
expr: &charClassMatcher{
pos: position{line: 108, col: 5, offset: 2316},
val: "[ \\t]",
chars: []rune{' ', '\t'},
ignoreCase: false,
inverted: false,
},
},
},
},
}
func (c *current) onAST1(nodes any) (any, error) {
return buildAST(nodes, c.text, c.pos)
}
func (p *parser) callonAST1() (any, error) {
stack := p.vstack[len(p.vstack)-1]
_ = stack
return p.cur.onAST1(stack["nodes"])
}
func (c *current) onNodes1(n any) (any, error) {
return buildNodes(n)
}
func (p *parser) callonNodes1() (any, error) {
stack := p.vstack[len(p.vstack)-1]
_ = stack
return p.cur.onNodes1(stack["n"])
}
func (c *current) onGroupNode1(k, v any) (any, error) {
return buildGroupNode(k, v, c.text, c.pos)
}
func (p *parser) callonGroupNode1() (any, error) {
stack := p.vstack[len(p.vstack)-1]
_ = stack
return p.cur.onGroupNode1(stack["k"], stack["v"])
}
func (c *current) onYesNoPropertyRestrictionNode1(k, v any) (any, error) {
return buildBooleanNode(k, v, c.text, c.pos)
}
func (p *parser) callonYesNoPropertyRestrictionNode1() (any, error) {
stack := p.vstack[len(p.vstack)-1]
_ = stack
return p.cur.onYesNoPropertyRestrictionNode1(stack["k"], stack["v"])
}
func (c *current) onTextPropertyRestrictionNode1(k, v any) (any, error) {
return buildStringNode(k, v, c.text, c.pos)
}
func (p *parser) callonTextPropertyRestrictionNode1() (any, error) {
stack := p.vstack[len(p.vstack)-1]
_ = stack
return p.cur.onTextPropertyRestrictionNode1(stack["k"], stack["v"])
}
func (c *current) onPhraseNode1(v any) (any, error) {
return buildStringNode("", v, c.text, c.pos)
}
func (p *parser) callonPhraseNode1() (any, error) {
stack := p.vstack[len(p.vstack)-1]
_ = stack
return p.cur.onPhraseNode1(stack["v"])
}
func (c *current) onWordNode1(v any) (any, error) {
return buildStringNode("", v, c.text, c.pos)
}
func (p *parser) callonWordNode1() (any, error) {
stack := p.vstack[len(p.vstack)-1]
_ = stack
return p.cur.onWordNode1(stack["v"])
}
func (c *current) onBooleanOperatorNode1() (any, error) {
return buildOperatorNode(c.text, c.pos)
}
func (p *parser) callonBooleanOperatorNode1() (any, error) {
stack := p.vstack[len(p.vstack)-1]
_ = stack
return p.cur.onBooleanOperatorNode1()
}
func (c *current) onColonOperator1() (any, error) {
return c.text, nil
}
func (p *parser) callonColonOperator1() (any, error) {
stack := p.vstack[len(p.vstack)-1]
_ = stack
return p.cur.onColonOperator1()
}
func (c *current) onEqualOperator1() (any, error) {
return c.text, nil
}
func (p *parser) callonEqualOperator1() (any, error) {
stack := p.vstack[len(p.vstack)-1]
_ = stack
return p.cur.onEqualOperator1()
}
func (c *current) onChar1() (any, error) {
return c.text, nil
}
func (p *parser) callonChar1() (any, error) {
stack := p.vstack[len(p.vstack)-1]
_ = stack
return p.cur.onChar1()
}
func (c *current) onString1(v any) (any, error) {
return v, nil
}
func (p *parser) callonString1() (any, error) {
stack := p.vstack[len(p.vstack)-1]
_ = stack
return p.cur.onString1(stack["v"])
}
var (
// errNoRule is returned when the grammar to parse has no rule.
errNoRule = errors.New("grammar has no rule")
// errInvalidEntrypoint is returned when the specified entrypoint rule
// does not exit.
errInvalidEntrypoint = errors.New("invalid entrypoint")
// errInvalidEncoding is returned when the source is not properly
// utf8-encoded.
errInvalidEncoding = errors.New("invalid encoding")
// errMaxExprCnt is used to signal that the maximum number of
// expressions have been parsed.
errMaxExprCnt = errors.New("max number of expresssions parsed")
)
// Option is a function that can set an option on the parser. It returns
// the previous setting as an Option.
type Option func(*parser) Option
// MaxExpressions creates an Option to stop parsing after the provided
// number of expressions have been parsed, if the value is 0 then the parser will
// parse for as many steps as needed (possibly an infinite number).
//
// The default for maxExprCnt is 0.
func MaxExpressions(maxExprCnt uint64) Option {
return func(p *parser) Option {
oldMaxExprCnt := p.maxExprCnt
p.maxExprCnt = maxExprCnt
return MaxExpressions(oldMaxExprCnt)
}
}
// Entrypoint creates an Option to set the rule name to use as entrypoint.
// The rule name must have been specified in the -alternate-entrypoints
// if generating the parser with the -optimize-grammar flag, otherwise
// it may have been optimized out. Passing an empty string sets the
// entrypoint to the first rule in the grammar.
//
// The default is to start parsing at the first rule in the grammar.
func Entrypoint(ruleName string) Option {
return func(p *parser) Option {
oldEntrypoint := p.entrypoint
p.entrypoint = ruleName
if ruleName == "" {
p.entrypoint = g.rules[0].name
}
return Entrypoint(oldEntrypoint)
}
}
// Statistics adds a user provided Stats struct to the parser to allow
// the user to process the results after the parsing has finished.
// Also the key for the "no match" counter is set.
//
// Example usage:
//
// input := "input"
// stats := Stats{}
// _, err := Parse("input-file", []byte(input), Statistics(&stats, "no match"))
// if err != nil {
// log.Panicln(err)
// }
// b, err := json.MarshalIndent(stats.ChoiceAltCnt, "", " ")
// if err != nil {
// log.Panicln(err)
// }
// fmt.Println(string(b))
func Statistics(stats *Stats, choiceNoMatch string) Option {
return func(p *parser) Option {
oldStats := p.Stats
p.Stats = stats
oldChoiceNoMatch := p.choiceNoMatch
p.choiceNoMatch = choiceNoMatch
if p.Stats.ChoiceAltCnt == nil {
p.Stats.ChoiceAltCnt = make(map[string]map[string]int)
}
return Statistics(oldStats, oldChoiceNoMatch)
}
}
// Debug creates an Option to set the debug flag to b. When set to true,
// debugging information is printed to stdout while parsing.
//
// The default is false.
func Debug(b bool) Option {
return func(p *parser) Option {
old := p.debug
p.debug = b
return Debug(old)
}
}
// Memoize creates an Option to set the memoize flag to b. When set to true,
// the parser will cache all results so each expression is evaluated only
// once. This guarantees linear parsing time even for pathological cases,
// at the expense of more memory and slower times for typical cases.
//
// The default is false.
func Memoize(b bool) Option {
return func(p *parser) Option {
old := p.memoize
p.memoize = b
return Memoize(old)
}
}
// AllowInvalidUTF8 creates an Option to allow invalid UTF-8 bytes.
// Every invalid UTF-8 byte is treated as a utf8.RuneError (U+FFFD)
// by character class matchers and is matched by the any matcher.
// The returned matched value, c.text and c.offset are NOT affected.
//
// The default is false.
func AllowInvalidUTF8(b bool) Option {
return func(p *parser) Option {
old := p.allowInvalidUTF8
p.allowInvalidUTF8 = b
return AllowInvalidUTF8(old)
}
}
// Recover creates an Option to set the recover flag to b. When set to
// true, this causes the parser to recover from panics and convert it
// to an error. Setting it to false can be useful while debugging to
// access the full stack trace.
//
// The default is true.
func Recover(b bool) Option {
return func(p *parser) Option {
old := p.recover
p.recover = b
return Recover(old)
}
}
// GlobalStore creates an Option to set a key to a certain value in
// the globalStore.
func GlobalStore(key string, value any) Option {
return func(p *parser) Option {
old := p.cur.globalStore[key]
p.cur.globalStore[key] = value
return GlobalStore(key, old)
}
}
// InitState creates an Option to set a key to a certain value in
// the global "state" store.
func InitState(key string, value any) Option {
return func(p *parser) Option {
old := p.cur.state[key]
p.cur.state[key] = value
return InitState(key, old)
}
}
// ParseFile parses the file identified by filename.
func ParseFile(filename string, opts ...Option) (i any, err error) {
f, err := os.Open(filename)
if err != nil {
return nil, err
}
defer func() {
if closeErr := f.Close(); closeErr != nil {
err = closeErr
}
}()
return ParseReader(filename, f, opts...)
}
// ParseReader parses the data from r using filename as information in the
// error messages.
func ParseReader(filename string, r io.Reader, opts ...Option) (any, error) {
b, err := io.ReadAll(r)
if err != nil {
return nil, err
}
return Parse(filename, b, opts...)
}
// Parse parses the data from b using filename as information in the
// error messages.
func Parse(filename string, b []byte, opts ...Option) (any, error) {
return newParser(filename, b, opts...).parse(g)
}
// position records a position in the text.
type position struct {
line, col, offset int
}
func (p position) String() string {
return strconv.Itoa(p.line) + ":" + strconv.Itoa(p.col) + " [" + strconv.Itoa(p.offset) + "]"
}
// savepoint stores all state required to go back to this point in the
// parser.
type savepoint struct {
position
rn rune
w int
}
type current struct {
pos position // start position of the match
text []byte // raw text of the match
// state is a store for arbitrary key,value pairs that the user wants to be
// tied to the backtracking of the parser.
// This is always rolled back if a parsing rule fails.
state storeDict
// globalStore is a general store for the user to store arbitrary key-value
// pairs that they need to manage and that they do not want tied to the
// backtracking of the parser. This is only modified by the user and never
// rolled back by the parser. It is always up to the user to keep this in a
// consistent state.
globalStore storeDict
}
type storeDict map[string]any
// the AST types...
type grammar struct {
pos position
rules []*rule
}
type rule struct {
pos position
name string
displayName string
expr any
}
type choiceExpr struct {
pos position
alternatives []any
}
type actionExpr struct {
pos position
expr any
run func(*parser) (any, error)
}
type recoveryExpr struct {
pos position
expr any
recoverExpr any
failureLabel []string
}
type seqExpr struct {
pos position
exprs []any
}
type throwExpr struct {
pos position
label string
}
type labeledExpr struct {
pos position
label string
expr any
}
type expr struct {
pos position
expr any
}
type (
andExpr expr
notExpr expr
zeroOrOneExpr expr
zeroOrMoreExpr expr
oneOrMoreExpr expr
)
type ruleRefExpr struct {
pos position
name string
}
type stateCodeExpr struct {
pos position
run func(*parser) error
}
type andCodeExpr struct {
pos position
run func(*parser) (bool, error)
}
type notCodeExpr struct {
pos position
run func(*parser) (bool, error)
}
type litMatcher struct {
pos position
val string
ignoreCase bool
want string
}
type charClassMatcher struct {
pos position
val string
basicLatinChars [128]bool
chars []rune
ranges []rune
classes []*unicode.RangeTable
ignoreCase bool
inverted bool
}
type anyMatcher position
// errList cumulates the errors found by the parser.
type errList []error
func (e *errList) add(err error) {
*e = append(*e, err)
}
func (e errList) err() error {
if len(e) == 0 {
return nil
}
e.dedupe()
return e
}
func (e *errList) dedupe() {
var cleaned []error
set := make(map[string]bool)
for _, err := range *e {
if msg := err.Error(); !set[msg] {
set[msg] = true
cleaned = append(cleaned, err)
}
}
*e = cleaned
}
func (e errList) Error() string {
switch len(e) {
case 0:
return ""
case 1:
return e[0].Error()
default:
var buf bytes.Buffer
for i, err := range e {
if i > 0 {
buf.WriteRune('\n')
}
buf.WriteString(err.Error())
}
return buf.String()
}
}
// parserError wraps an error with a prefix indicating the rule in which
// the error occurred. The original error is stored in the Inner field.
type parserError struct {
Inner error
pos position
prefix string
expected []string
}
// Error returns the error message.
func (p *parserError) Error() string {
return p.prefix + ": " + p.Inner.Error()
}
// newParser creates a parser with the specified input source and options.
func newParser(filename string, b []byte, opts ...Option) *parser {
stats := Stats{
ChoiceAltCnt: make(map[string]map[string]int),
}
p := &parser{
filename: filename,
errs: new(errList),
data: b,
pt: savepoint{position: position{line: 1}},
recover: true,
cur: current{
state: make(storeDict),
globalStore: make(storeDict),
},
maxFailPos: position{col: 1, line: 1},
maxFailExpected: make([]string, 0, 20),
Stats: &stats,
// start rule is rule [0] unless an alternate entrypoint is specified
entrypoint: g.rules[0].name,
}
p.setOptions(opts)
if p.maxExprCnt == 0 {
p.maxExprCnt = math.MaxUint64
}
return p
}
// setOptions applies the options to the parser.
func (p *parser) setOptions(opts []Option) {
for _, opt := range opts {
opt(p)
}
}
type resultTuple struct {
v any
b bool
end savepoint
}
const choiceNoMatch = -1
// Stats stores some statistics, gathered during parsing
type Stats struct {
// ExprCnt counts the number of expressions processed during parsing
// This value is compared to the maximum number of expressions allowed
// (set by the MaxExpressions option).
ExprCnt uint64
// ChoiceAltCnt is used to count for each ordered choice expression,
// which alternative is used how may times.
// These numbers allow to optimize the order of the ordered choice expression
// to increase the performance of the parser
//
// The outer key of ChoiceAltCnt is composed of the name of the rule as well
// as the line and the column of the ordered choice.
// The inner key of ChoiceAltCnt is the number (one-based) of the matching alternative.
// For each alternative the number of matches are counted. If an ordered choice does not
// match, a special counter is incremented. The name of this counter is set with
// the parser option Statistics.
// For an alternative to be included in ChoiceAltCnt, it has to match at least once.
ChoiceAltCnt map[string]map[string]int
}
type parser struct {
filename string
pt savepoint
cur current
data []byte
errs *errList
depth int
recover bool
debug bool
memoize bool
// memoization table for the packrat algorithm:
// map[offset in source] map[expression or rule] {value, match}
memo map[int]map[any]resultTuple
// rules table, maps the rule identifier to the rule node
rules map[string]*rule
// variables stack, map of label to value
vstack []map[string]any
// rule stack, allows identification of the current rule in errors
rstack []*rule
// parse fail
maxFailPos position
maxFailExpected []string
maxFailInvertExpected bool
// max number of expressions to be parsed
maxExprCnt uint64
// entrypoint for the parser
entrypoint string
allowInvalidUTF8 bool
*Stats
choiceNoMatch string
// recovery expression stack, keeps track of the currently available recovery expression, these are traversed in reverse
recoveryStack []map[string]any
}
// push a variable set on the vstack.
func (p *parser) pushV() {
if cap(p.vstack) == len(p.vstack) {
// create new empty slot in the stack
p.vstack = append(p.vstack, nil)
} else {
// slice to 1 more
p.vstack = p.vstack[:len(p.vstack)+1]
}
// get the last args set
m := p.vstack[len(p.vstack)-1]
if m != nil && len(m) == 0 {
// empty map, all good
return
}
m = make(map[string]any)
p.vstack[len(p.vstack)-1] = m
}
// pop a variable set from the vstack.
func (p *parser) popV() {
// if the map is not empty, clear it
m := p.vstack[len(p.vstack)-1]
if len(m) > 0 {
// GC that map
p.vstack[len(p.vstack)-1] = nil
}
p.vstack = p.vstack[:len(p.vstack)-1]
}
// push a recovery expression with its labels to the recoveryStack
func (p *parser) pushRecovery(labels []string, expr any) {
if cap(p.recoveryStack) == len(p.recoveryStack) {
// create new empty slot in the stack
p.recoveryStack = append(p.recoveryStack, nil)
} else {
// slice to 1 more
p.recoveryStack = p.recoveryStack[:len(p.recoveryStack)+1]
}
m := make(map[string]any, len(labels))
for _, fl := range labels {
m[fl] = expr
}
p.recoveryStack[len(p.recoveryStack)-1] = m
}
// pop a recovery expression from the recoveryStack
func (p *parser) popRecovery() {
// GC that map
p.recoveryStack[len(p.recoveryStack)-1] = nil
p.recoveryStack = p.recoveryStack[:len(p.recoveryStack)-1]
}
func (p *parser) print(prefix, s string) string {
if !p.debug {
return s
}
fmt.Printf("%s %d:%d:%d: %s [%#U]\n",
prefix, p.pt.line, p.pt.col, p.pt.offset, s, p.pt.rn)
return s
}
func (p *parser) in(s string) string {
p.depth++
return p.print(strings.Repeat(" ", p.depth)+">", s)
}
func (p *parser) out(s string) string {
p.depth--
return p.print(strings.Repeat(" ", p.depth)+"<", s)
}
func (p *parser) addErr(err error) {
p.addErrAt(err, p.pt.position, []string{})
}
func (p *parser) addErrAt(err error, pos position, expected []string) {
var buf bytes.Buffer
if p.filename != "" {
buf.WriteString(p.filename)
}
if buf.Len() > 0 {
buf.WriteString(":")
}
buf.WriteString(fmt.Sprintf("%d:%d (%d)", pos.line, pos.col, pos.offset))
if len(p.rstack) > 0 {
if buf.Len() > 0 {
buf.WriteString(": ")
}
rule := p.rstack[len(p.rstack)-1]
if rule.displayName != "" {
buf.WriteString("rule " + rule.displayName)
} else {
buf.WriteString("rule " + rule.name)
}
}
pe := &parserError{Inner: err, pos: pos, prefix: buf.String(), expected: expected}
p.errs.add(pe)
}
func (p *parser) failAt(fail bool, pos position, want string) {
// process fail if parsing fails and not inverted or parsing succeeds and invert is set
if fail == p.maxFailInvertExpected {
if pos.offset < p.maxFailPos.offset {
return
}
if pos.offset > p.maxFailPos.offset {
p.maxFailPos = pos
p.maxFailExpected = p.maxFailExpected[:0]
}
if p.maxFailInvertExpected {
want = "!" + want
}
p.maxFailExpected = append(p.maxFailExpected, want)
}
}
// read advances the parser to the next rune.
func (p *parser) read() {
p.pt.offset += p.pt.w
rn, n := utf8.DecodeRune(p.data[p.pt.offset:])
p.pt.rn = rn
p.pt.w = n
p.pt.col++
if rn == '\n' {
p.pt.line++
p.pt.col = 0
}
if rn == utf8.RuneError && n == 1 { // see utf8.DecodeRune
if !p.allowInvalidUTF8 {
p.addErr(errInvalidEncoding)
}
}
}
// restore parser position to the savepoint pt.
func (p *parser) restore(pt savepoint) {
if p.debug {
defer p.out(p.in("restore"))
}
if pt.offset == p.pt.offset {
return
}
p.pt = pt
}
// Cloner is implemented by any value that has a Clone method, which returns a
// copy of the value. This is mainly used for types which are not passed by
// value (e.g map, slice, chan) or structs that contain such types.
//
// This is used in conjunction with the global state feature to create proper
// copies of the state to allow the parser to properly restore the state in
// the case of backtracking.
type Cloner interface {
Clone() any
}
var statePool = &sync.Pool{
New: func() any { return make(storeDict) },
}
func (sd storeDict) Discard() {
for k := range sd {
delete(sd, k)
}
statePool.Put(sd)
}
// clone and return parser current state.
func (p *parser) cloneState() storeDict {
if p.debug {
defer p.out(p.in("cloneState"))
}
state := statePool.Get().(storeDict)
for k, v := range p.cur.state {
if c, ok := v.(Cloner); ok {
state[k] = c.Clone()
} else {
state[k] = v
}
}
return state
}
// restore parser current state to the state storeDict.
// every restoreState should applied only one time for every cloned state
func (p *parser) restoreState(state storeDict) {
if p.debug {
defer p.out(p.in("restoreState"))
}
p.cur.state.Discard()
p.cur.state = state
}
// get the slice of bytes from the savepoint start to the current position.
func (p *parser) sliceFrom(start savepoint) []byte {
return p.data[start.position.offset:p.pt.position.offset]
}
func (p *parser) getMemoized(node any) (resultTuple, bool) {
if len(p.memo) == 0 {
return resultTuple{}, false
}
m := p.memo[p.pt.offset]
if len(m) == 0 {
return resultTuple{}, false
}
res, ok := m[node]
return res, ok
}
func (p *parser) setMemoized(pt savepoint, node any, tuple resultTuple) {
if p.memo == nil {
p.memo = make(map[int]map[any]resultTuple)
}
m := p.memo[pt.offset]
if m == nil {
m = make(map[any]resultTuple)
p.memo[pt.offset] = m
}
m[node] = tuple
}
func (p *parser) buildRulesTable(g *grammar) {
p.rules = make(map[string]*rule, len(g.rules))
for _, r := range g.rules {
p.rules[r.name] = r
}
}
func (p *parser) parse(g *grammar) (val any, err error) {
if len(g.rules) == 0 {
p.addErr(errNoRule)
return nil, p.errs.err()
}
// TODO : not super critical but this could be generated
p.buildRulesTable(g)
if p.recover {
// panic can be used in action code to stop parsing immediately
// and return the panic as an error.
defer func() {
if e := recover(); e != nil {
if p.debug {
defer p.out(p.in("panic handler"))
}
val = nil
switch e := e.(type) {
case error:
p.addErr(e)
default:
p.addErr(fmt.Errorf("%v", e))
}
err = p.errs.err()
}
}()
}
startRule, ok := p.rules[p.entrypoint]
if !ok {
p.addErr(errInvalidEntrypoint)
return nil, p.errs.err()
}
p.read() // advance to first rune
val, ok = p.parseRule(startRule)
if !ok {
if len(*p.errs) == 0 {
// If parsing fails, but no errors have been recorded, the expected values
// for the farthest parser position are returned as error.
maxFailExpectedMap := make(map[string]struct{}, len(p.maxFailExpected))
for _, v := range p.maxFailExpected {
maxFailExpectedMap[v] = struct{}{}
}
expected := make([]string, 0, len(maxFailExpectedMap))
eof := false
if _, ok := maxFailExpectedMap["!."]; ok {
delete(maxFailExpectedMap, "!.")
eof = true
}
for k := range maxFailExpectedMap {
expected = append(expected, k)
}
sort.Strings(expected)
if eof {
expected = append(expected, "EOF")
}
p.addErrAt(errors.New("no match found, expected: "+listJoin(expected, ", ", "or")), p.maxFailPos, expected)
}
return nil, p.errs.err()
}
return val, p.errs.err()
}
func listJoin(list []string, sep string, lastSep string) string {
switch len(list) {
case 0:
return ""
case 1:
return list[0]
default:
return strings.Join(list[:len(list)-1], sep) + " " + lastSep + " " + list[len(list)-1]
}
}
func (p *parser) parseRule(rule *rule) (any, bool) {
if p.debug {
defer p.out(p.in("parseRule " + rule.name))
}
if p.memoize {
res, ok := p.getMemoized(rule)
if ok {
p.restore(res.end)
return res.v, res.b
}
}
start := p.pt
p.rstack = append(p.rstack, rule)
p.pushV()
val, ok := p.parseExpr(rule.expr)
p.popV()
p.rstack = p.rstack[:len(p.rstack)-1]
if ok && p.debug {
p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start)))
}
if p.memoize {
p.setMemoized(start, rule, resultTuple{val, ok, p.pt})
}
return val, ok
}
func (p *parser) parseExpr(expr any) (any, bool) {
var pt savepoint
if p.memoize {
res, ok := p.getMemoized(expr)
if ok {
p.restore(res.end)
return res.v, res.b
}
pt = p.pt
}
p.ExprCnt++
if p.ExprCnt > p.maxExprCnt {
panic(errMaxExprCnt)
}
var val any
var ok bool
switch expr := expr.(type) {
case *actionExpr:
val, ok = p.parseActionExpr(expr)
case *andCodeExpr:
val, ok = p.parseAndCodeExpr(expr)
case *andExpr:
val, ok = p.parseAndExpr(expr)
case *anyMatcher:
val, ok = p.parseAnyMatcher(expr)
case *charClassMatcher:
val, ok = p.parseCharClassMatcher(expr)
case *choiceExpr:
val, ok = p.parseChoiceExpr(expr)
case *labeledExpr:
val, ok = p.parseLabeledExpr(expr)
case *litMatcher:
val, ok = p.parseLitMatcher(expr)
case *notCodeExpr:
val, ok = p.parseNotCodeExpr(expr)
case *notExpr:
val, ok = p.parseNotExpr(expr)
case *oneOrMoreExpr:
val, ok = p.parseOneOrMoreExpr(expr)
case *recoveryExpr:
val, ok = p.parseRecoveryExpr(expr)
case *ruleRefExpr:
val, ok = p.parseRuleRefExpr(expr)
case *seqExpr:
val, ok = p.parseSeqExpr(expr)
case *stateCodeExpr:
val, ok = p.parseStateCodeExpr(expr)
case *throwExpr:
val, ok = p.parseThrowExpr(expr)
case *zeroOrMoreExpr:
val, ok = p.parseZeroOrMoreExpr(expr)
case *zeroOrOneExpr:
val, ok = p.parseZeroOrOneExpr(expr)
default:
panic(fmt.Sprintf("unknown expression type %T", expr))
}
if p.memoize {
p.setMemoized(pt, expr, resultTuple{val, ok, p.pt})
}
return val, ok
}
func (p *parser) parseActionExpr(act *actionExpr) (any, bool) {
if p.debug {
defer p.out(p.in("parseActionExpr"))
}
start := p.pt
val, ok := p.parseExpr(act.expr)
if ok {
p.cur.pos = start.position
p.cur.text = p.sliceFrom(start)
state := p.cloneState()
actVal, err := act.run(p)
if err != nil {
p.addErrAt(err, start.position, []string{})
}
p.restoreState(state)
val = actVal
}
if ok && p.debug {
p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start)))
}
return val, ok
}
func (p *parser) parseAndCodeExpr(and *andCodeExpr) (any, bool) {
if p.debug {
defer p.out(p.in("parseAndCodeExpr"))
}
state := p.cloneState()
ok, err := and.run(p)
if err != nil {
p.addErr(err)
}
p.restoreState(state)
return nil, ok
}
func (p *parser) parseAndExpr(and *andExpr) (any, bool) {
if p.debug {
defer p.out(p.in("parseAndExpr"))
}
pt := p.pt
state := p.cloneState()
p.pushV()
_, ok := p.parseExpr(and.expr)
p.popV()
p.restoreState(state)
p.restore(pt)
return nil, ok
}
func (p *parser) parseAnyMatcher(any *anyMatcher) (any, bool) {
if p.debug {
defer p.out(p.in("parseAnyMatcher"))
}
if p.pt.rn == utf8.RuneError && p.pt.w == 0 {
// EOF - see utf8.DecodeRune
p.failAt(false, p.pt.position, ".")
return nil, false
}
start := p.pt
p.read()
p.failAt(true, start.position, ".")
return p.sliceFrom(start), true
}
func (p *parser) parseCharClassMatcher(chr *charClassMatcher) (any, bool) {
if p.debug {
defer p.out(p.in("parseCharClassMatcher"))
}
cur := p.pt.rn
start := p.pt
// can't match EOF
if cur == utf8.RuneError && p.pt.w == 0 { // see utf8.DecodeRune
p.failAt(false, start.position, chr.val)
return nil, false
}
if chr.ignoreCase {
cur = unicode.ToLower(cur)
}
// try to match in the list of available chars
for _, rn := range chr.chars {
if rn == cur {
if chr.inverted {
p.failAt(false, start.position, chr.val)
return nil, false
}
p.read()
p.failAt(true, start.position, chr.val)
return p.sliceFrom(start), true
}
}
// try to match in the list of ranges
for i := 0; i < len(chr.ranges); i += 2 {
if cur >= chr.ranges[i] && cur <= chr.ranges[i+1] {
if chr.inverted {
p.failAt(false, start.position, chr.val)
return nil, false
}
p.read()
p.failAt(true, start.position, chr.val)
return p.sliceFrom(start), true
}
}
// try to match in the list of Unicode classes
for _, cl := range chr.classes {
if unicode.Is(cl, cur) {
if chr.inverted {
p.failAt(false, start.position, chr.val)
return nil, false
}
p.read()
p.failAt(true, start.position, chr.val)
return p.sliceFrom(start), true
}
}
if chr.inverted {
p.read()
p.failAt(true, start.position, chr.val)
return p.sliceFrom(start), true
}
p.failAt(false, start.position, chr.val)
return nil, false
}
func (p *parser) incChoiceAltCnt(ch *choiceExpr, altI int) {
choiceIdent := fmt.Sprintf("%s %d:%d", p.rstack[len(p.rstack)-1].name, ch.pos.line, ch.pos.col)
m := p.ChoiceAltCnt[choiceIdent]
if m == nil {
m = make(map[string]int)
p.ChoiceAltCnt[choiceIdent] = m
}
// We increment altI by 1, so the keys do not start at 0
alt := strconv.Itoa(altI + 1)
if altI == choiceNoMatch {
alt = p.choiceNoMatch
}
m[alt]++
}
func (p *parser) parseChoiceExpr(ch *choiceExpr) (any, bool) {
if p.debug {
defer p.out(p.in("parseChoiceExpr"))
}
for altI, alt := range ch.alternatives {
// dummy assignment to prevent compile error if optimized
_ = altI
state := p.cloneState()
p.pushV()
val, ok := p.parseExpr(alt)
p.popV()
if ok {
p.incChoiceAltCnt(ch, altI)
return val, ok
}
p.restoreState(state)
}
p.incChoiceAltCnt(ch, choiceNoMatch)
return nil, false
}
func (p *parser) parseLabeledExpr(lab *labeledExpr) (any, bool) {
if p.debug {
defer p.out(p.in("parseLabeledExpr"))
}
p.pushV()
val, ok := p.parseExpr(lab.expr)
p.popV()
if ok && lab.label != "" {
m := p.vstack[len(p.vstack)-1]
m[lab.label] = val
}
return val, ok
}
func (p *parser) parseLitMatcher(lit *litMatcher) (any, bool) {
if p.debug {
defer p.out(p.in("parseLitMatcher"))
}
start := p.pt
for _, want := range lit.val {
cur := p.pt.rn
if lit.ignoreCase {
cur = unicode.ToLower(cur)
}
if cur != want {
p.failAt(false, start.position, lit.want)
p.restore(start)
return nil, false
}
p.read()
}
p.failAt(true, start.position, lit.want)
return p.sliceFrom(start), true
}
func (p *parser) parseNotCodeExpr(not *notCodeExpr) (any, bool) {
if p.debug {
defer p.out(p.in("parseNotCodeExpr"))
}
state := p.cloneState()
ok, err := not.run(p)
if err != nil {
p.addErr(err)
}
p.restoreState(state)
return nil, !ok
}
func (p *parser) parseNotExpr(not *notExpr) (any, bool) {
if p.debug {
defer p.out(p.in("parseNotExpr"))
}
pt := p.pt
state := p.cloneState()
p.pushV()
p.maxFailInvertExpected = !p.maxFailInvertExpected
_, ok := p.parseExpr(not.expr)
p.maxFailInvertExpected = !p.maxFailInvertExpected
p.popV()
p.restoreState(state)
p.restore(pt)
return nil, !ok
}
func (p *parser) parseOneOrMoreExpr(expr *oneOrMoreExpr) (any, bool) {
if p.debug {
defer p.out(p.in("parseOneOrMoreExpr"))
}
var vals []any
for {
p.pushV()
val, ok := p.parseExpr(expr.expr)
p.popV()
if !ok {
if len(vals) == 0 {
// did not match once, no match
return nil, false
}
return vals, true
}
vals = append(vals, val)
}
}
func (p *parser) parseRecoveryExpr(recover *recoveryExpr) (any, bool) {
if p.debug {
defer p.out(p.in("parseRecoveryExpr (" + strings.Join(recover.failureLabel, ",") + ")"))
}
p.pushRecovery(recover.failureLabel, recover.recoverExpr)
val, ok := p.parseExpr(recover.expr)
p.popRecovery()
return val, ok
}
func (p *parser) parseRuleRefExpr(ref *ruleRefExpr) (any, bool) {
if p.debug {
defer p.out(p.in("parseRuleRefExpr " + ref.name))
}
if ref.name == "" {
panic(fmt.Sprintf("%s: invalid rule: missing name", ref.pos))
}
rule := p.rules[ref.name]
if rule == nil {
p.addErr(fmt.Errorf("undefined rule: %s", ref.name))
return nil, false
}
return p.parseRule(rule)
}
func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) {
if p.debug {
defer p.out(p.in("parseSeqExpr"))
}
vals := make([]any, 0, len(seq.exprs))
pt := p.pt
state := p.cloneState()
for _, expr := range seq.exprs {
val, ok := p.parseExpr(expr)
if !ok {
p.restoreState(state)
p.restore(pt)
return nil, false
}
vals = append(vals, val)
}
return vals, true
}
func (p *parser) parseStateCodeExpr(state *stateCodeExpr) (any, bool) {
if p.debug {
defer p.out(p.in("parseStateCodeExpr"))
}
err := state.run(p)
if err != nil {
p.addErr(err)
}
return nil, true
}
func (p *parser) parseThrowExpr(expr *throwExpr) (any, bool) {
if p.debug {
defer p.out(p.in("parseThrowExpr"))
}
for i := len(p.recoveryStack) - 1; i >= 0; i-- {
if recoverExpr, ok := p.recoveryStack[i][expr.label]; ok {
if val, ok := p.parseExpr(recoverExpr); ok {
return val, ok
}
}
}
return nil, false
}
func (p *parser) parseZeroOrMoreExpr(expr *zeroOrMoreExpr) (any, bool) {
if p.debug {
defer p.out(p.in("parseZeroOrMoreExpr"))
}
var vals []any
for {
p.pushV()
val, ok := p.parseExpr(expr.expr)
p.popV()
if !ok {
return vals, true
}
vals = append(vals, val)
}
}
func (p *parser) parseZeroOrOneExpr(expr *zeroOrOneExpr) (any, bool) {
if p.debug {
defer p.out(p.in("parseZeroOrOneExpr"))
}
p.pushV()
val, _ := p.parseExpr(expr.expr)
p.popV()
// whether it matched or not, consider it a match
return val, true
}