Add parser for Noms Types (#3277)

At this point it only parses the type syntax.

Towards #1466
This commit is contained in:
Erik Arvidsson
2017-03-22 10:26:48 -07:00
committed by GitHub
parent dadaf99b3c
commit c48bd954f9
3 changed files with 534 additions and 0 deletions
+108
View File
@@ -0,0 +1,108 @@
// Copyright 2017 Attic Labs, Inc. All rights reserved.
// Licensed under the Apache License, version 2.0:
// http://www.apache.org/licenses/LICENSE-2.0
package nomdl
import (
"fmt"
"text/scanner"
)
type lexer struct {
scanner *scanner.Scanner
peekToken rune
}
func (lex *lexer) next() rune {
if lex.peekToken != 0 {
tok := lex.peekToken
lex.peekToken = 0
return tok
}
return lex.scanner.Scan()
}
func (lex *lexer) peek() rune {
if lex.peekToken != 0 {
return lex.peekToken
}
tok := lex.scanner.Scan()
lex.peekToken = tok
return tok
}
func (lex *lexer) pos() scanner.Position {
if lex.peekToken != 0 {
panic("Cannot use pos after peek")
}
return lex.scanner.Pos()
}
func (lex *lexer) tokenText() string {
if lex.peekToken != 0 {
panic("Cannot use tokenText after peek")
}
return lex.scanner.TokenText()
}
func (lex *lexer) eat(expected rune) rune {
tok := lex.next()
lex.check(expected, tok)
return tok
}
func (lex *lexer) eatIf(expected rune) bool {
tok := lex.peek()
if tok == expected {
lex.next()
return true
}
return false
}
func (lex *lexer) check(expected, actual rune) {
if actual != expected {
lex.tokenMismatch(expected, actual)
}
}
func (lex *lexer) tokenMismatch(expected, actual rune) {
raiseSyntaxError(fmt.Sprintf("Unexpected token %s, expected %s", scanner.TokenString(actual), scanner.TokenString(expected)), lex.pos())
}
func (lex *lexer) unexpectedToken(actual rune) {
raiseSyntaxError(fmt.Sprintf("Unexpected token %s", scanner.TokenString(actual)), lex.pos())
}
func raiseSyntaxError(msg string, pos scanner.Position) {
panic(syntaxError{
msg: msg,
pos: pos,
})
}
type syntaxError struct {
msg string
pos scanner.Position
}
func (e syntaxError) Error() string {
return fmt.Sprintf("%s, %s", e.msg, e.pos)
}
func catchSyntaxError(f func()) (errRes error) {
defer func() {
if err := recover(); err != nil {
if err, ok := err.(syntaxError); ok {
errRes = err
return
}
panic(err)
}
}()
f()
return
}
+231
View File
@@ -0,0 +1,231 @@
// Copyright 2017 Attic Labs, Inc. All rights reserved.
// Licensed under the Apache License, version 2.0:
// http://www.apache.org/licenses/LICENSE-2.0
package nomdl
import (
"io"
"strconv"
"strings"
"text/scanner"
"github.com/attic-labs/noms/go/d"
"github.com/attic-labs/noms/go/types"
)
// Parser provides ways to parse Noms types.
type Parser struct {
lex *lexer
}
// ParserOptions allows passing options into New.
type ParserOptions struct {
// Filename is the name of the file we are currently parsing.
Filename string
}
// New creates a new Parser.
func New(r io.Reader, options ParserOptions) *Parser {
s := scanner.Scanner{}
s.Filename = options.Filename
s.Mode = scanner.ScanIdents | scanner.ScanComments | scanner.SkipComments
s.Init(r)
lex := lexer{scanner: &s}
return &Parser{&lex}
}
// ParseType parses a string describing a Noms type.
func ParseType(code string) (typ *types.Type, err error) {
p := New(strings.NewReader(code), ParserOptions{})
err = catchSyntaxError(func() {
typ = p.parseType()
p.ensureAtEnd()
})
return
}
// MustParseType parses a string describing a Noms type and panics if there
// is an error.
func MustParseType(code string) *types.Type {
typ, err := ParseType(code)
d.PanicIfError(err)
return typ
}
func (p *Parser) ensureAtEnd() {
p.lex.eat(scanner.EOF)
}
// Type :
// TypeWithoutUnion (`|` TypeWithoutUnion)*
//
// TypeWithoutUnion :
// `Blob`
// `Bool`
// `Number`
// `String`
// `Type`
// `Value`
// CycleType
// ListType
// MapType
// RefType
// SetType
// StructType
//
// CycleType :
// `Cycle` `<` uint32 `>`
//
// ListType :
// `List` `<` Type? `>`
//
// MapType :
// `Map` `<` (Type `,` Type)? `>`
//
// RefType :
// `Set` `<` Type `>`
//
// SetType :
// `Set` `<` Type? `>`
//
// StructType :
// `struct` StructName? `{` StructFields? `}`
//
// StructFields :
// StructField
// StructField `,` StructFields?
//
// StructName :
// Ident
//
// StructField :
// StructFieldName `:` Type
//
// StructFieldName :
// Ident
func (p *Parser) parseType() *types.Type {
t := p.parseTypeWithoutUnion()
tok := p.lex.peek()
if tok != '|' {
return t
}
unionTypes := []*types.Type{t}
for {
tok = p.lex.peek()
if tok == '|' {
p.lex.next()
} else {
break
}
unionTypes = append(unionTypes, p.parseTypeWithoutUnion())
}
return types.MakeUnionType(unionTypes...)
}
func (p *Parser) parseTypeWithoutUnion() *types.Type {
tok := p.lex.next()
switch tok {
case scanner.Ident:
switch p.lex.tokenText() {
case "Bool":
return types.BoolType
case "Blob":
return types.BlobType
case "Number":
return types.NumberType
case "String":
return types.StringType
case "Type":
return types.TypeType
case "Value":
return types.ValueType
case "struct":
return p.parseStructType()
case "Map":
return p.parseMapType()
case "List":
elemType := p.parseSingleElemType(true)
return types.MakeListType(elemType)
case "Set":
elemType := p.parseSingleElemType(true)
return types.MakeSetType(elemType)
case "Ref":
elemType := p.parseSingleElemType(false)
return types.MakeRefType(elemType)
case "Cycle":
return p.parseCycleType()
}
}
p.lex.unexpectedToken(tok)
return nil
}
func (p *Parser) parseStructType() *types.Type {
tok := p.lex.next()
name := ""
if tok == scanner.Ident {
name = p.lex.tokenText()
p.lex.eat('{')
} else {
p.lex.check('{', tok)
}
fields := types.FieldMap{}
for p.lex.peek() != '}' {
p.lex.eat(scanner.Ident)
fieldName := p.lex.tokenText()
p.lex.eat(':')
typ := p.parseType()
fields[fieldName] = typ
if p.lex.eatIf(',') {
continue
}
break
}
p.lex.eat('}')
return types.MakeStructTypeFromFields(name, fields)
}
func (p *Parser) parseSingleElemType(allowEmptyUnion bool) *types.Type {
p.lex.eat('<')
if allowEmptyUnion && p.lex.eatIf('>') {
return types.MakeUnionType()
}
elemType := p.parseType()
p.lex.eat('>')
return elemType
}
func (p *Parser) parseCycleType() *types.Type {
p.lex.eat('<')
tok := p.lex.eat(scanner.Int)
s, err := strconv.ParseUint(p.lex.tokenText(), 10, 32)
if err != nil {
p.lex.unexpectedToken(tok)
return nil
}
p.lex.eat('>')
return types.MakeCycleType(uint32(s))
}
func (p *Parser) parseMapType() *types.Type {
var keyType, valueType *types.Type
p.lex.eat('<')
if p.lex.eatIf('>') {
keyType = types.MakeUnionType()
valueType = keyType
} else {
keyType = p.parseType()
p.lex.eat(',')
valueType = p.parseType()
p.lex.eat('>')
}
return types.MakeMapType(keyType, valueType)
}
+195
View File
@@ -0,0 +1,195 @@
// Copyright 2017 Attic Labs, Inc. All rights reserved.
// Licensed under the Apache License, version 2.0:
// http://www.apache.org/licenses/LICENSE-2.0
package nomdl
import (
"strings"
"testing"
"github.com/attic-labs/noms/go/types"
"github.com/attic-labs/testify/assert"
)
func assertParseType(t *testing.T, code string, expected *types.Type) {
t.Run(code, func(t *testing.T) {
actual, err := ParseType(code)
assert.NoError(t, err)
assert.True(t, expected.Equals(actual), "Expected: %s, Actual: %s", expected.Describe(), actual.Describe())
})
}
func assertParseError(t *testing.T, code, msg string) {
t.Run(code, func(t *testing.T) {
p := New(strings.NewReader(code), ParserOptions{
Filename: "example",
})
err := catchSyntaxError(func() {
typ := p.parseType()
assert.Nil(t, typ)
})
if assert.Error(t, err) {
assert.Equal(t, msg, err.Error())
}
})
}
func TestSimpleTypes(t *testing.T) {
assertParseType(t, "Blob", types.BlobType)
assertParseType(t, "Bool", types.BoolType)
assertParseType(t, "Number", types.NumberType)
assertParseType(t, "String", types.StringType)
assertParseType(t, "Value", types.ValueType)
assertParseType(t, "Type", types.TypeType)
}
func TestWhitespace(t *testing.T) {
for _, r := range " \t\n\r" {
assertParseType(t, string(r)+"Blob", types.BlobType)
assertParseType(t, "Blob"+string(r), types.BlobType)
}
}
func TestComments(t *testing.T) {
assertParseType(t, "/* */Blob", types.BlobType)
assertParseType(t, "Blob/* */", types.BlobType)
assertParseType(t, "Blob//", types.BlobType)
assertParseType(t, "//\nBlob", types.BlobType)
}
func TestCompoundTypes(t *testing.T) {
assertParseType(t, "List<>", types.MakeListType(types.MakeUnionType()))
assertParseType(t, "List<Bool>", types.MakeListType(types.BoolType))
assertParseError(t, "List<Bool, Number>", `Unexpected token ",", expected ">", example:1:11`)
assertParseError(t, "List<Bool", `Unexpected token EOF, expected ">", example:1:10`)
assertParseError(t, "List<", `Unexpected token EOF, example:1:6`)
assertParseError(t, "List", `Unexpected token EOF, expected "<", example:1:5`)
assertParseType(t, "Set<>", types.MakeSetType(types.MakeUnionType()))
assertParseType(t, "Set<Bool>", types.MakeSetType(types.BoolType))
assertParseError(t, "Set<Bool, Number>", `Unexpected token ",", expected ">", example:1:10`)
assertParseError(t, "Set<Bool", `Unexpected token EOF, expected ">", example:1:9`)
assertParseError(t, "Set<", `Unexpected token EOF, example:1:5`)
assertParseError(t, "Set", `Unexpected token EOF, expected "<", example:1:4`)
assertParseError(t, "Ref<>", `Unexpected token ">", example:1:6`)
assertParseType(t, "Ref<Bool>", types.MakeRefType(types.BoolType))
assertParseError(t, "Ref<Number, Bool>", `Unexpected token ",", expected ">", example:1:12`)
assertParseError(t, "Ref<Number", `Unexpected token EOF, expected ">", example:1:11`)
assertParseError(t, "Ref<", `Unexpected token EOF, example:1:5`)
assertParseError(t, "Ref", `Unexpected token EOF, expected "<", example:1:4`)
assertParseType(t, "Cycle<42>", types.MakeCycleType(42))
assertParseError(t, "Cycle<-123>", `Unexpected token "-", expected Int, example:1:8`)
assertParseError(t, "Cycle<12.3>", `Unexpected token Float, expected Int, example:1:11`)
assertParseError(t, "Cycle<>", `Unexpected token ">", expected Int, example:1:8`)
assertParseError(t, "Cycle<", `Unexpected token EOF, expected Int, example:1:7`)
assertParseError(t, "Cycle", `Unexpected token EOF, expected "<", example:1:6`)
assertParseType(t, "Map<>", types.MakeMapType(types.MakeUnionType(), types.MakeUnionType()))
assertParseType(t, "Map<Bool, String>", types.MakeMapType(types.BoolType, types.StringType))
assertParseError(t, "Map<Bool,>", `Unexpected token ">", example:1:11`)
assertParseError(t, "Map<,Bool>", `Unexpected token ",", example:1:6`)
assertParseError(t, "Map<,>", `Unexpected token ",", example:1:6`)
assertParseError(t, "Map<Bool,Bool", `Unexpected token EOF, expected ">", example:1:14`)
assertParseError(t, "Map<Bool,", `Unexpected token EOF, example:1:10`)
assertParseError(t, "Map<Bool", `Unexpected token EOF, expected ",", example:1:9`)
assertParseError(t, "Map<", `Unexpected token EOF, example:1:5`)
assertParseError(t, "Map", `Unexpected token EOF, expected "<", example:1:4`)
}
func TestStructTypes(t *testing.T) {
assertParseType(t, "struct {}", types.MakeStructTypeFromFields("", types.FieldMap{}))
assertParseType(t, "struct S {}", types.MakeStructTypeFromFields("S", types.FieldMap{}))
assertParseType(t, `struct S {
x: Number
}`, types.MakeStructTypeFromFields("S", types.FieldMap{
"x": types.NumberType,
}))
assertParseType(t, `struct S {
x: Number,
}`, types.MakeStructTypeFromFields("S", types.FieldMap{
"x": types.NumberType,
}))
assertParseType(t, `struct S {
x: Number,
y: String
}`, types.MakeStructTypeFromFields("S", types.FieldMap{
"x": types.NumberType,
"y": types.StringType,
}))
assertParseType(t, `struct S {
x: Number,
y: String,
}`, types.MakeStructTypeFromFields("S", types.FieldMap{
"x": types.NumberType,
"y": types.StringType,
}))
assertParseType(t, `struct S {
x: Number,
y: struct {
z: String,
},
}`, types.MakeStructTypeFromFields("S", types.FieldMap{
"x": types.NumberType,
"y": types.MakeStructTypeFromFields("", types.FieldMap{
"z": types.StringType,
}),
}))
assertParseError(t, `struct S {
x: Number
y: String
}`, `Unexpected token Ident, expected "}", example:3:11`)
assertParseError(t, `struct S {,}`, `Unexpected token ",", expected Ident, example:1:12`)
assertParseError(t, `struct S {`, `Unexpected token EOF, expected Ident, example:1:11`)
assertParseError(t, `struct S { x }`, `Unexpected token "}", expected ":", example:1:15`)
assertParseError(t, `struct S { x`, `Unexpected token EOF, expected ":", example:1:13`)
assertParseError(t, `struct S { x: }`, `Unexpected token "}", example:1:16`)
assertParseError(t, `struct S { x: `, `Unexpected token EOF, example:1:15`)
assertParseError(t, `struct S { x: Bool`, `Unexpected token EOF, expected "}", example:1:19`)
assertParseError(t, `struct S { x: Bool,`, `Unexpected token EOF, expected Ident, example:1:20`)
assertParseError(t, `struct S { x: Bool,,`, `Unexpected token ",", expected Ident, example:1:21`)
assertParseError(t, `struct S {`, `Unexpected token EOF, expected Ident, example:1:11`)
assertParseError(t, `struct S `, `Unexpected token EOF, expected "{", example:1:10`)
assertParseError(t, `struct {`, `Unexpected token EOF, expected Ident, example:1:9`)
assertParseError(t, `struct`, `Unexpected token EOF, expected "{", example:1:7`)
}
func TestUnionTypes(t *testing.T) {
assertParseType(t, "Blob | Bool", types.MakeUnionType(types.BlobType, types.BoolType))
assertParseType(t, "Bool | Number | String", types.MakeUnionType(types.BoolType, types.NumberType, types.StringType))
assertParseType(t, "List<Bool | Number>", types.MakeListType(types.MakeUnionType(types.BoolType, types.NumberType)))
assertParseType(t, "Map<Bool | Number, Bool | Number>",
types.MakeMapType(
types.MakeUnionType(types.BoolType, types.NumberType),
types.MakeUnionType(types.BoolType, types.NumberType),
),
)
assertParseType(t, `struct S {
x: Number | Bool
}`, types.MakeStructTypeFromFields("S", types.FieldMap{
"x": types.MakeUnionType(types.BoolType, types.NumberType),
}))
assertParseType(t, `struct S {
x: Number | Bool,
y: String
}`, types.MakeStructTypeFromFields("S", types.FieldMap{
"x": types.MakeUnionType(types.BoolType, types.NumberType),
"y": types.StringType,
}))
assertParseError(t, "Bool |", "Unexpected token EOF, example:1:7")
assertParseError(t, "Bool | Number |", "Unexpected token EOF, example:1:16")
assertParseError(t, "Bool | | ", `Unexpected token "|", example:1:9`)
assertParseError(t, "", `Unexpected token EOF, example:1:1`)
}