mirror of
https://github.com/dolthub/dolt.git
synced 2026-04-29 03:06:35 -05:00
Add parser for Noms Types (#3277)
At this point it only parses the type syntax. Towards #1466
This commit is contained in:
@@ -0,0 +1,108 @@
|
||||
// Copyright 2017 Attic Labs, Inc. All rights reserved.
|
||||
// Licensed under the Apache License, version 2.0:
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
package nomdl
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"text/scanner"
|
||||
)
|
||||
|
||||
type lexer struct {
|
||||
scanner *scanner.Scanner
|
||||
peekToken rune
|
||||
}
|
||||
|
||||
func (lex *lexer) next() rune {
|
||||
if lex.peekToken != 0 {
|
||||
tok := lex.peekToken
|
||||
lex.peekToken = 0
|
||||
return tok
|
||||
}
|
||||
|
||||
return lex.scanner.Scan()
|
||||
}
|
||||
|
||||
func (lex *lexer) peek() rune {
|
||||
if lex.peekToken != 0 {
|
||||
return lex.peekToken
|
||||
}
|
||||
tok := lex.scanner.Scan()
|
||||
lex.peekToken = tok
|
||||
return tok
|
||||
}
|
||||
|
||||
func (lex *lexer) pos() scanner.Position {
|
||||
if lex.peekToken != 0 {
|
||||
panic("Cannot use pos after peek")
|
||||
}
|
||||
return lex.scanner.Pos()
|
||||
}
|
||||
|
||||
func (lex *lexer) tokenText() string {
|
||||
if lex.peekToken != 0 {
|
||||
panic("Cannot use tokenText after peek")
|
||||
}
|
||||
return lex.scanner.TokenText()
|
||||
}
|
||||
|
||||
func (lex *lexer) eat(expected rune) rune {
|
||||
tok := lex.next()
|
||||
lex.check(expected, tok)
|
||||
return tok
|
||||
}
|
||||
|
||||
func (lex *lexer) eatIf(expected rune) bool {
|
||||
tok := lex.peek()
|
||||
if tok == expected {
|
||||
lex.next()
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (lex *lexer) check(expected, actual rune) {
|
||||
if actual != expected {
|
||||
lex.tokenMismatch(expected, actual)
|
||||
}
|
||||
}
|
||||
|
||||
func (lex *lexer) tokenMismatch(expected, actual rune) {
|
||||
raiseSyntaxError(fmt.Sprintf("Unexpected token %s, expected %s", scanner.TokenString(actual), scanner.TokenString(expected)), lex.pos())
|
||||
}
|
||||
|
||||
func (lex *lexer) unexpectedToken(actual rune) {
|
||||
raiseSyntaxError(fmt.Sprintf("Unexpected token %s", scanner.TokenString(actual)), lex.pos())
|
||||
}
|
||||
|
||||
func raiseSyntaxError(msg string, pos scanner.Position) {
|
||||
panic(syntaxError{
|
||||
msg: msg,
|
||||
pos: pos,
|
||||
})
|
||||
}
|
||||
|
||||
type syntaxError struct {
|
||||
msg string
|
||||
pos scanner.Position
|
||||
}
|
||||
|
||||
func (e syntaxError) Error() string {
|
||||
return fmt.Sprintf("%s, %s", e.msg, e.pos)
|
||||
}
|
||||
|
||||
func catchSyntaxError(f func()) (errRes error) {
|
||||
defer func() {
|
||||
if err := recover(); err != nil {
|
||||
if err, ok := err.(syntaxError); ok {
|
||||
errRes = err
|
||||
return
|
||||
}
|
||||
panic(err)
|
||||
}
|
||||
}()
|
||||
|
||||
f()
|
||||
return
|
||||
}
|
||||
@@ -0,0 +1,231 @@
|
||||
// Copyright 2017 Attic Labs, Inc. All rights reserved.
|
||||
// Licensed under the Apache License, version 2.0:
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
package nomdl
|
||||
|
||||
import (
|
||||
"io"
|
||||
"strconv"
|
||||
"strings"
|
||||
"text/scanner"
|
||||
|
||||
"github.com/attic-labs/noms/go/d"
|
||||
"github.com/attic-labs/noms/go/types"
|
||||
)
|
||||
|
||||
// Parser provides ways to parse Noms types.
|
||||
type Parser struct {
|
||||
lex *lexer
|
||||
}
|
||||
|
||||
// ParserOptions allows passing options into New.
|
||||
type ParserOptions struct {
|
||||
// Filename is the name of the file we are currently parsing.
|
||||
Filename string
|
||||
}
|
||||
|
||||
// New creates a new Parser.
|
||||
func New(r io.Reader, options ParserOptions) *Parser {
|
||||
s := scanner.Scanner{}
|
||||
s.Filename = options.Filename
|
||||
s.Mode = scanner.ScanIdents | scanner.ScanComments | scanner.SkipComments
|
||||
s.Init(r)
|
||||
lex := lexer{scanner: &s}
|
||||
return &Parser{&lex}
|
||||
}
|
||||
|
||||
// ParseType parses a string describing a Noms type.
|
||||
func ParseType(code string) (typ *types.Type, err error) {
|
||||
p := New(strings.NewReader(code), ParserOptions{})
|
||||
err = catchSyntaxError(func() {
|
||||
typ = p.parseType()
|
||||
p.ensureAtEnd()
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
// MustParseType parses a string describing a Noms type and panics if there
|
||||
// is an error.
|
||||
func MustParseType(code string) *types.Type {
|
||||
typ, err := ParseType(code)
|
||||
d.PanicIfError(err)
|
||||
return typ
|
||||
}
|
||||
|
||||
func (p *Parser) ensureAtEnd() {
|
||||
p.lex.eat(scanner.EOF)
|
||||
}
|
||||
|
||||
// Type :
|
||||
// TypeWithoutUnion (`|` TypeWithoutUnion)*
|
||||
//
|
||||
// TypeWithoutUnion :
|
||||
// `Blob`
|
||||
// `Bool`
|
||||
// `Number`
|
||||
// `String`
|
||||
// `Type`
|
||||
// `Value`
|
||||
// CycleType
|
||||
// ListType
|
||||
// MapType
|
||||
// RefType
|
||||
// SetType
|
||||
// StructType
|
||||
//
|
||||
// CycleType :
|
||||
// `Cycle` `<` uint32 `>`
|
||||
//
|
||||
// ListType :
|
||||
// `List` `<` Type? `>`
|
||||
//
|
||||
// MapType :
|
||||
// `Map` `<` (Type `,` Type)? `>`
|
||||
//
|
||||
// RefType :
|
||||
// `Set` `<` Type `>`
|
||||
//
|
||||
// SetType :
|
||||
// `Set` `<` Type? `>`
|
||||
//
|
||||
// StructType :
|
||||
// `struct` StructName? `{` StructFields? `}`
|
||||
//
|
||||
// StructFields :
|
||||
// StructField
|
||||
// StructField `,` StructFields?
|
||||
//
|
||||
// StructName :
|
||||
// Ident
|
||||
//
|
||||
// StructField :
|
||||
// StructFieldName `:` Type
|
||||
//
|
||||
// StructFieldName :
|
||||
// Ident
|
||||
|
||||
func (p *Parser) parseType() *types.Type {
|
||||
t := p.parseTypeWithoutUnion()
|
||||
tok := p.lex.peek()
|
||||
if tok != '|' {
|
||||
return t
|
||||
}
|
||||
unionTypes := []*types.Type{t}
|
||||
|
||||
for {
|
||||
tok = p.lex.peek()
|
||||
if tok == '|' {
|
||||
p.lex.next()
|
||||
} else {
|
||||
break
|
||||
}
|
||||
unionTypes = append(unionTypes, p.parseTypeWithoutUnion())
|
||||
}
|
||||
return types.MakeUnionType(unionTypes...)
|
||||
}
|
||||
|
||||
func (p *Parser) parseTypeWithoutUnion() *types.Type {
|
||||
tok := p.lex.next()
|
||||
switch tok {
|
||||
case scanner.Ident:
|
||||
switch p.lex.tokenText() {
|
||||
case "Bool":
|
||||
return types.BoolType
|
||||
case "Blob":
|
||||
return types.BlobType
|
||||
case "Number":
|
||||
return types.NumberType
|
||||
case "String":
|
||||
return types.StringType
|
||||
case "Type":
|
||||
return types.TypeType
|
||||
case "Value":
|
||||
return types.ValueType
|
||||
case "struct":
|
||||
return p.parseStructType()
|
||||
case "Map":
|
||||
return p.parseMapType()
|
||||
case "List":
|
||||
elemType := p.parseSingleElemType(true)
|
||||
return types.MakeListType(elemType)
|
||||
case "Set":
|
||||
elemType := p.parseSingleElemType(true)
|
||||
return types.MakeSetType(elemType)
|
||||
case "Ref":
|
||||
elemType := p.parseSingleElemType(false)
|
||||
return types.MakeRefType(elemType)
|
||||
case "Cycle":
|
||||
return p.parseCycleType()
|
||||
}
|
||||
}
|
||||
p.lex.unexpectedToken(tok)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *Parser) parseStructType() *types.Type {
|
||||
tok := p.lex.next()
|
||||
name := ""
|
||||
if tok == scanner.Ident {
|
||||
name = p.lex.tokenText()
|
||||
p.lex.eat('{')
|
||||
} else {
|
||||
p.lex.check('{', tok)
|
||||
}
|
||||
fields := types.FieldMap{}
|
||||
|
||||
for p.lex.peek() != '}' {
|
||||
p.lex.eat(scanner.Ident)
|
||||
|
||||
fieldName := p.lex.tokenText()
|
||||
p.lex.eat(':')
|
||||
typ := p.parseType()
|
||||
fields[fieldName] = typ
|
||||
|
||||
if p.lex.eatIf(',') {
|
||||
continue
|
||||
}
|
||||
|
||||
break
|
||||
}
|
||||
p.lex.eat('}')
|
||||
return types.MakeStructTypeFromFields(name, fields)
|
||||
}
|
||||
|
||||
func (p *Parser) parseSingleElemType(allowEmptyUnion bool) *types.Type {
|
||||
p.lex.eat('<')
|
||||
if allowEmptyUnion && p.lex.eatIf('>') {
|
||||
return types.MakeUnionType()
|
||||
}
|
||||
elemType := p.parseType()
|
||||
p.lex.eat('>')
|
||||
return elemType
|
||||
}
|
||||
|
||||
func (p *Parser) parseCycleType() *types.Type {
|
||||
p.lex.eat('<')
|
||||
tok := p.lex.eat(scanner.Int)
|
||||
s, err := strconv.ParseUint(p.lex.tokenText(), 10, 32)
|
||||
if err != nil {
|
||||
p.lex.unexpectedToken(tok)
|
||||
return nil
|
||||
}
|
||||
p.lex.eat('>')
|
||||
return types.MakeCycleType(uint32(s))
|
||||
}
|
||||
|
||||
func (p *Parser) parseMapType() *types.Type {
|
||||
var keyType, valueType *types.Type
|
||||
p.lex.eat('<')
|
||||
|
||||
if p.lex.eatIf('>') {
|
||||
keyType = types.MakeUnionType()
|
||||
valueType = keyType
|
||||
} else {
|
||||
keyType = p.parseType()
|
||||
p.lex.eat(',')
|
||||
valueType = p.parseType()
|
||||
p.lex.eat('>')
|
||||
}
|
||||
return types.MakeMapType(keyType, valueType)
|
||||
}
|
||||
@@ -0,0 +1,195 @@
|
||||
// Copyright 2017 Attic Labs, Inc. All rights reserved.
|
||||
// Licensed under the Apache License, version 2.0:
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
package nomdl
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/attic-labs/noms/go/types"
|
||||
"github.com/attic-labs/testify/assert"
|
||||
)
|
||||
|
||||
func assertParseType(t *testing.T, code string, expected *types.Type) {
|
||||
t.Run(code, func(t *testing.T) {
|
||||
actual, err := ParseType(code)
|
||||
assert.NoError(t, err)
|
||||
assert.True(t, expected.Equals(actual), "Expected: %s, Actual: %s", expected.Describe(), actual.Describe())
|
||||
})
|
||||
}
|
||||
|
||||
func assertParseError(t *testing.T, code, msg string) {
|
||||
t.Run(code, func(t *testing.T) {
|
||||
p := New(strings.NewReader(code), ParserOptions{
|
||||
Filename: "example",
|
||||
})
|
||||
err := catchSyntaxError(func() {
|
||||
typ := p.parseType()
|
||||
assert.Nil(t, typ)
|
||||
})
|
||||
if assert.Error(t, err) {
|
||||
assert.Equal(t, msg, err.Error())
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestSimpleTypes(t *testing.T) {
|
||||
assertParseType(t, "Blob", types.BlobType)
|
||||
assertParseType(t, "Bool", types.BoolType)
|
||||
assertParseType(t, "Number", types.NumberType)
|
||||
assertParseType(t, "String", types.StringType)
|
||||
assertParseType(t, "Value", types.ValueType)
|
||||
assertParseType(t, "Type", types.TypeType)
|
||||
}
|
||||
|
||||
func TestWhitespace(t *testing.T) {
|
||||
for _, r := range " \t\n\r" {
|
||||
assertParseType(t, string(r)+"Blob", types.BlobType)
|
||||
assertParseType(t, "Blob"+string(r), types.BlobType)
|
||||
}
|
||||
}
|
||||
|
||||
func TestComments(t *testing.T) {
|
||||
assertParseType(t, "/* */Blob", types.BlobType)
|
||||
assertParseType(t, "Blob/* */", types.BlobType)
|
||||
assertParseType(t, "Blob//", types.BlobType)
|
||||
assertParseType(t, "//\nBlob", types.BlobType)
|
||||
}
|
||||
|
||||
func TestCompoundTypes(t *testing.T) {
|
||||
assertParseType(t, "List<>", types.MakeListType(types.MakeUnionType()))
|
||||
assertParseType(t, "List<Bool>", types.MakeListType(types.BoolType))
|
||||
assertParseError(t, "List<Bool, Number>", `Unexpected token ",", expected ">", example:1:11`)
|
||||
assertParseError(t, "List<Bool", `Unexpected token EOF, expected ">", example:1:10`)
|
||||
assertParseError(t, "List<", `Unexpected token EOF, example:1:6`)
|
||||
assertParseError(t, "List", `Unexpected token EOF, expected "<", example:1:5`)
|
||||
|
||||
assertParseType(t, "Set<>", types.MakeSetType(types.MakeUnionType()))
|
||||
assertParseType(t, "Set<Bool>", types.MakeSetType(types.BoolType))
|
||||
assertParseError(t, "Set<Bool, Number>", `Unexpected token ",", expected ">", example:1:10`)
|
||||
assertParseError(t, "Set<Bool", `Unexpected token EOF, expected ">", example:1:9`)
|
||||
assertParseError(t, "Set<", `Unexpected token EOF, example:1:5`)
|
||||
assertParseError(t, "Set", `Unexpected token EOF, expected "<", example:1:4`)
|
||||
|
||||
assertParseError(t, "Ref<>", `Unexpected token ">", example:1:6`)
|
||||
assertParseType(t, "Ref<Bool>", types.MakeRefType(types.BoolType))
|
||||
assertParseError(t, "Ref<Number, Bool>", `Unexpected token ",", expected ">", example:1:12`)
|
||||
assertParseError(t, "Ref<Number", `Unexpected token EOF, expected ">", example:1:11`)
|
||||
assertParseError(t, "Ref<", `Unexpected token EOF, example:1:5`)
|
||||
assertParseError(t, "Ref", `Unexpected token EOF, expected "<", example:1:4`)
|
||||
|
||||
assertParseType(t, "Cycle<42>", types.MakeCycleType(42))
|
||||
assertParseError(t, "Cycle<-123>", `Unexpected token "-", expected Int, example:1:8`)
|
||||
assertParseError(t, "Cycle<12.3>", `Unexpected token Float, expected Int, example:1:11`)
|
||||
assertParseError(t, "Cycle<>", `Unexpected token ">", expected Int, example:1:8`)
|
||||
assertParseError(t, "Cycle<", `Unexpected token EOF, expected Int, example:1:7`)
|
||||
assertParseError(t, "Cycle", `Unexpected token EOF, expected "<", example:1:6`)
|
||||
|
||||
assertParseType(t, "Map<>", types.MakeMapType(types.MakeUnionType(), types.MakeUnionType()))
|
||||
assertParseType(t, "Map<Bool, String>", types.MakeMapType(types.BoolType, types.StringType))
|
||||
assertParseError(t, "Map<Bool,>", `Unexpected token ">", example:1:11`)
|
||||
assertParseError(t, "Map<,Bool>", `Unexpected token ",", example:1:6`)
|
||||
assertParseError(t, "Map<,>", `Unexpected token ",", example:1:6`)
|
||||
assertParseError(t, "Map<Bool,Bool", `Unexpected token EOF, expected ">", example:1:14`)
|
||||
assertParseError(t, "Map<Bool,", `Unexpected token EOF, example:1:10`)
|
||||
assertParseError(t, "Map<Bool", `Unexpected token EOF, expected ",", example:1:9`)
|
||||
assertParseError(t, "Map<", `Unexpected token EOF, example:1:5`)
|
||||
assertParseError(t, "Map", `Unexpected token EOF, expected "<", example:1:4`)
|
||||
}
|
||||
|
||||
func TestStructTypes(t *testing.T) {
|
||||
assertParseType(t, "struct {}", types.MakeStructTypeFromFields("", types.FieldMap{}))
|
||||
assertParseType(t, "struct S {}", types.MakeStructTypeFromFields("S", types.FieldMap{}))
|
||||
|
||||
assertParseType(t, `struct S {
|
||||
x: Number
|
||||
}`, types.MakeStructTypeFromFields("S", types.FieldMap{
|
||||
"x": types.NumberType,
|
||||
}))
|
||||
|
||||
assertParseType(t, `struct S {
|
||||
x: Number,
|
||||
}`, types.MakeStructTypeFromFields("S", types.FieldMap{
|
||||
"x": types.NumberType,
|
||||
}))
|
||||
|
||||
assertParseType(t, `struct S {
|
||||
x: Number,
|
||||
y: String
|
||||
}`, types.MakeStructTypeFromFields("S", types.FieldMap{
|
||||
"x": types.NumberType,
|
||||
"y": types.StringType,
|
||||
}))
|
||||
|
||||
assertParseType(t, `struct S {
|
||||
x: Number,
|
||||
y: String,
|
||||
}`, types.MakeStructTypeFromFields("S", types.FieldMap{
|
||||
"x": types.NumberType,
|
||||
"y": types.StringType,
|
||||
}))
|
||||
|
||||
assertParseType(t, `struct S {
|
||||
x: Number,
|
||||
y: struct {
|
||||
z: String,
|
||||
},
|
||||
}`, types.MakeStructTypeFromFields("S", types.FieldMap{
|
||||
"x": types.NumberType,
|
||||
"y": types.MakeStructTypeFromFields("", types.FieldMap{
|
||||
"z": types.StringType,
|
||||
}),
|
||||
}))
|
||||
|
||||
assertParseError(t, `struct S {
|
||||
x: Number
|
||||
y: String
|
||||
}`, `Unexpected token Ident, expected "}", example:3:11`)
|
||||
|
||||
assertParseError(t, `struct S {,}`, `Unexpected token ",", expected Ident, example:1:12`)
|
||||
assertParseError(t, `struct S {`, `Unexpected token EOF, expected Ident, example:1:11`)
|
||||
assertParseError(t, `struct S { x }`, `Unexpected token "}", expected ":", example:1:15`)
|
||||
assertParseError(t, `struct S { x`, `Unexpected token EOF, expected ":", example:1:13`)
|
||||
assertParseError(t, `struct S { x: }`, `Unexpected token "}", example:1:16`)
|
||||
assertParseError(t, `struct S { x: `, `Unexpected token EOF, example:1:15`)
|
||||
assertParseError(t, `struct S { x: Bool`, `Unexpected token EOF, expected "}", example:1:19`)
|
||||
assertParseError(t, `struct S { x: Bool,`, `Unexpected token EOF, expected Ident, example:1:20`)
|
||||
assertParseError(t, `struct S { x: Bool,,`, `Unexpected token ",", expected Ident, example:1:21`)
|
||||
|
||||
assertParseError(t, `struct S {`, `Unexpected token EOF, expected Ident, example:1:11`)
|
||||
assertParseError(t, `struct S `, `Unexpected token EOF, expected "{", example:1:10`)
|
||||
assertParseError(t, `struct {`, `Unexpected token EOF, expected Ident, example:1:9`)
|
||||
assertParseError(t, `struct`, `Unexpected token EOF, expected "{", example:1:7`)
|
||||
}
|
||||
|
||||
func TestUnionTypes(t *testing.T) {
|
||||
assertParseType(t, "Blob | Bool", types.MakeUnionType(types.BlobType, types.BoolType))
|
||||
assertParseType(t, "Bool | Number | String", types.MakeUnionType(types.BoolType, types.NumberType, types.StringType))
|
||||
assertParseType(t, "List<Bool | Number>", types.MakeListType(types.MakeUnionType(types.BoolType, types.NumberType)))
|
||||
assertParseType(t, "Map<Bool | Number, Bool | Number>",
|
||||
types.MakeMapType(
|
||||
types.MakeUnionType(types.BoolType, types.NumberType),
|
||||
types.MakeUnionType(types.BoolType, types.NumberType),
|
||||
),
|
||||
)
|
||||
assertParseType(t, `struct S {
|
||||
x: Number | Bool
|
||||
}`, types.MakeStructTypeFromFields("S", types.FieldMap{
|
||||
"x": types.MakeUnionType(types.BoolType, types.NumberType),
|
||||
}))
|
||||
assertParseType(t, `struct S {
|
||||
x: Number | Bool,
|
||||
y: String
|
||||
}`, types.MakeStructTypeFromFields("S", types.FieldMap{
|
||||
"x": types.MakeUnionType(types.BoolType, types.NumberType),
|
||||
"y": types.StringType,
|
||||
}))
|
||||
|
||||
assertParseError(t, "Bool |", "Unexpected token EOF, example:1:7")
|
||||
assertParseError(t, "Bool | Number |", "Unexpected token EOF, example:1:16")
|
||||
assertParseError(t, "Bool | | ", `Unexpected token "|", example:1:9`)
|
||||
assertParseError(t, "", `Unexpected token EOF, example:1:1`)
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user