mirror of
https://github.com/opencloud-eu/opencloud.git
synced 2026-05-03 17:29:22 -05:00
[full-ci] enhancement: add more kql spec tests and simplify ast normalization (#7254)
* enhancement: add more kql spec tests and simplify ast normalization * enhancement: kql parser error if query starts with AND * enhancement: add kql docs and support for date and time only dateTimeRestriction queries * enhancement: add the ability to decide how kql nodes get connected connecting nodes (with edges) seem straight forward when not using group, the default connection for nodes with the same node is always OR. THis only applies for first level nodes, for grouped nodes it is defined differently. The KQL docs are saying, nodes inside a grouped node, with the same key are connected by a AND edge. * enhancement: explicit error handling for falsy group nodes and queries with leading binary operator * enhancement: use optimized grammar for kql parser and toolify pigeon * enhancement: simplify error handling * fix: kql implicit 'AND' and 'OR' follows the ms html spec instead of the pdf spec
This commit is contained in:
+13
@@ -0,0 +1,13 @@
|
||||
language: go
|
||||
|
||||
go:
|
||||
- 1.13.x
|
||||
|
||||
before_install:
|
||||
- go get -t -v ./...
|
||||
|
||||
script:
|
||||
- go test -race -coverprofile=coverage.txt -covermode=atomic
|
||||
|
||||
after_success:
|
||||
- bash <(curl -s https://codecov.io/bash)
|
||||
+21
@@ -0,0 +1,21 @@
|
||||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2015-2017 Aaron Raddon
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
+323
@@ -0,0 +1,323 @@
|
||||
Go Date Parser
|
||||
---------------------------
|
||||
|
||||
Parse many date strings without knowing format in advance. Uses a scanner to read bytes and use a state machine to find format. Much faster than shotgun based parse methods. See [bench_test.go](https://github.com/araddon/dateparse/blob/master/bench_test.go) for performance comparison.
|
||||
|
||||
|
||||
[](https://codecov.io/gh/araddon/dateparse)
|
||||
[](http://godoc.org/github.com/araddon/dateparse)
|
||||
[](https://travis-ci.org/araddon/dateparse)
|
||||
[](https://goreportcard.com/report/araddon/dateparse)
|
||||
|
||||
**MM/DD/YYYY VS DD/MM/YYYY** Right now this uses mm/dd/yyyy WHEN ambiguous if this is not desired behavior, use `ParseStrict` which will fail on ambiguous date strings.
|
||||
|
||||
**Timezones** The location your server is configured affects the results! See example or https://play.golang.org/p/IDHRalIyXh and last paragraph here https://golang.org/pkg/time/#Parse.
|
||||
|
||||
|
||||
```go
|
||||
|
||||
// Normal parse. Equivalent Timezone rules as time.Parse()
|
||||
t, err := dateparse.ParseAny("3/1/2014")
|
||||
|
||||
// Parse Strict, error on ambigous mm/dd vs dd/mm dates
|
||||
t, err := dateparse.ParseStrict("3/1/2014")
|
||||
> returns error
|
||||
|
||||
// Return a string that represents the layout to parse the given date-time.
|
||||
layout, err := dateparse.ParseFormat("May 8, 2009 5:57:51 PM")
|
||||
> "Jan 2, 2006 3:04:05 PM"
|
||||
|
||||
```
|
||||
|
||||
cli tool for testing dateformats
|
||||
----------------------------------
|
||||
|
||||
[Date Parse CLI](https://github.com/araddon/dateparse/blob/master/dateparse)
|
||||
|
||||
|
||||
Extended example
|
||||
-------------------
|
||||
|
||||
https://github.com/araddon/dateparse/blob/master/example/main.go
|
||||
|
||||
```go
|
||||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/scylladb/termtables"
|
||||
"github.com/araddon/dateparse"
|
||||
)
|
||||
|
||||
var examples = []string{
|
||||
"May 8, 2009 5:57:51 PM",
|
||||
"oct 7, 1970",
|
||||
"oct 7, '70",
|
||||
"oct. 7, 1970",
|
||||
"oct. 7, 70",
|
||||
"Mon Jan 2 15:04:05 2006",
|
||||
"Mon Jan 2 15:04:05 MST 2006",
|
||||
"Mon Jan 02 15:04:05 -0700 2006",
|
||||
"Monday, 02-Jan-06 15:04:05 MST",
|
||||
"Mon, 02 Jan 2006 15:04:05 MST",
|
||||
"Tue, 11 Jul 2017 16:28:13 +0200 (CEST)",
|
||||
"Mon, 02 Jan 2006 15:04:05 -0700",
|
||||
"Mon 30 Sep 2018 09:09:09 PM UTC",
|
||||
"Mon Aug 10 15:44:11 UTC+0100 2015",
|
||||
"Thu, 4 Jan 2018 17:53:36 +0000",
|
||||
"Fri Jul 03 2015 18:04:07 GMT+0100 (GMT Daylight Time)",
|
||||
"Sun, 3 Jan 2021 00:12:23 +0800 (GMT+08:00)",
|
||||
"September 17, 2012 10:09am",
|
||||
"September 17, 2012 at 10:09am PST-08",
|
||||
"September 17, 2012, 10:10:09",
|
||||
"October 7, 1970",
|
||||
"October 7th, 1970",
|
||||
"12 Feb 2006, 19:17",
|
||||
"12 Feb 2006 19:17",
|
||||
"14 May 2019 19:11:40.164",
|
||||
"7 oct 70",
|
||||
"7 oct 1970",
|
||||
"03 February 2013",
|
||||
"1 July 2013",
|
||||
"2013-Feb-03",
|
||||
// dd/Mon/yyy alpha Months
|
||||
"06/Jan/2008:15:04:05 -0700",
|
||||
"06/Jan/2008 15:04:05 -0700",
|
||||
// mm/dd/yy
|
||||
"3/31/2014",
|
||||
"03/31/2014",
|
||||
"08/21/71",
|
||||
"8/1/71",
|
||||
"4/8/2014 22:05",
|
||||
"04/08/2014 22:05",
|
||||
"4/8/14 22:05",
|
||||
"04/2/2014 03:00:51",
|
||||
"8/8/1965 12:00:00 AM",
|
||||
"8/8/1965 01:00:01 PM",
|
||||
"8/8/1965 01:00 PM",
|
||||
"8/8/1965 1:00 PM",
|
||||
"8/8/1965 12:00 AM",
|
||||
"4/02/2014 03:00:51",
|
||||
"03/19/2012 10:11:59",
|
||||
"03/19/2012 10:11:59.3186369",
|
||||
// yyyy/mm/dd
|
||||
"2014/3/31",
|
||||
"2014/03/31",
|
||||
"2014/4/8 22:05",
|
||||
"2014/04/08 22:05",
|
||||
"2014/04/2 03:00:51",
|
||||
"2014/4/02 03:00:51",
|
||||
"2012/03/19 10:11:59",
|
||||
"2012/03/19 10:11:59.3186369",
|
||||
// yyyy:mm:dd
|
||||
"2014:3:31",
|
||||
"2014:03:31",
|
||||
"2014:4:8 22:05",
|
||||
"2014:04:08 22:05",
|
||||
"2014:04:2 03:00:51",
|
||||
"2014:4:02 03:00:51",
|
||||
"2012:03:19 10:11:59",
|
||||
"2012:03:19 10:11:59.3186369",
|
||||
// Chinese
|
||||
"2014年04月08日",
|
||||
// yyyy-mm-ddThh
|
||||
"2006-01-02T15:04:05+0000",
|
||||
"2009-08-12T22:15:09-07:00",
|
||||
"2009-08-12T22:15:09",
|
||||
"2009-08-12T22:15:09.988",
|
||||
"2009-08-12T22:15:09Z",
|
||||
"2017-07-19T03:21:51:897+0100",
|
||||
"2019-05-29T08:41-04", // no seconds, 2 digit TZ offset
|
||||
// yyyy-mm-dd hh:mm:ss
|
||||
"2014-04-26 17:24:37.3186369",
|
||||
"2012-08-03 18:31:59.257000000",
|
||||
"2014-04-26 17:24:37.123",
|
||||
"2013-04-01 22:43",
|
||||
"2013-04-01 22:43:22",
|
||||
"2014-12-16 06:20:00 UTC",
|
||||
"2014-12-16 06:20:00 GMT",
|
||||
"2014-04-26 05:24:37 PM",
|
||||
"2014-04-26 13:13:43 +0800",
|
||||
"2014-04-26 13:13:43 +0800 +08",
|
||||
"2014-04-26 13:13:44 +09:00",
|
||||
"2012-08-03 18:31:59.257000000 +0000 UTC",
|
||||
"2015-09-30 18:48:56.35272715 +0000 UTC",
|
||||
"2015-02-18 00:12:00 +0000 GMT",
|
||||
"2015-02-18 00:12:00 +0000 UTC",
|
||||
"2015-02-08 03:02:00 +0300 MSK m=+0.000000001",
|
||||
"2015-02-08 03:02:00.001 +0300 MSK m=+0.000000001",
|
||||
"2017-07-19 03:21:51+00:00",
|
||||
"2014-04-26",
|
||||
"2014-04",
|
||||
"2014",
|
||||
"2014-05-11 08:20:13,787",
|
||||
// yyyy-mm-dd-07:00
|
||||
"2020-07-20+08:00",
|
||||
// mm.dd.yy
|
||||
"3.31.2014",
|
||||
"03.31.2014",
|
||||
"08.21.71",
|
||||
"2014.03",
|
||||
"2014.03.30",
|
||||
// yyyymmdd and similar
|
||||
"20140601",
|
||||
"20140722105203",
|
||||
// yymmdd hh:mm:yy mysql log
|
||||
// 080313 05:21:55 mysqld started
|
||||
"171113 14:14:20",
|
||||
// unix seconds, ms, micro, nano
|
||||
"1332151919",
|
||||
"1384216367189",
|
||||
"1384216367111222",
|
||||
"1384216367111222333",
|
||||
}
|
||||
|
||||
var (
|
||||
timezone = ""
|
||||
)
|
||||
|
||||
func main() {
|
||||
flag.StringVar(&timezone, "timezone", "UTC", "Timezone aka `America/Los_Angeles` formatted time-zone")
|
||||
flag.Parse()
|
||||
|
||||
if timezone != "" {
|
||||
// NOTE: This is very, very important to understand
|
||||
// time-parsing in go
|
||||
loc, err := time.LoadLocation(timezone)
|
||||
if err != nil {
|
||||
panic(err.Error())
|
||||
}
|
||||
time.Local = loc
|
||||
}
|
||||
|
||||
table := termtables.CreateTable()
|
||||
|
||||
table.AddHeaders("Input", "Parsed, and Output as %v")
|
||||
for _, dateExample := range examples {
|
||||
t, err := dateparse.ParseLocal(dateExample)
|
||||
if err != nil {
|
||||
panic(err.Error())
|
||||
}
|
||||
table.AddRow(dateExample, fmt.Sprintf("%v", t))
|
||||
}
|
||||
fmt.Println(table.Render())
|
||||
}
|
||||
|
||||
/*
|
||||
+-------------------------------------------------------+-----------------------------------------+
|
||||
| Input | Parsed, and Output as %v |
|
||||
+-------------------------------------------------------+-----------------------------------------+
|
||||
| May 8, 2009 5:57:51 PM | 2009-05-08 17:57:51 +0000 UTC |
|
||||
| oct 7, 1970 | 1970-10-07 00:00:00 +0000 UTC |
|
||||
| oct 7, '70 | 1970-10-07 00:00:00 +0000 UTC |
|
||||
| oct. 7, 1970 | 1970-10-07 00:00:00 +0000 UTC |
|
||||
| oct. 7, 70 | 1970-10-07 00:00:00 +0000 UTC |
|
||||
| Mon Jan 2 15:04:05 2006 | 2006-01-02 15:04:05 +0000 UTC |
|
||||
| Mon Jan 2 15:04:05 MST 2006 | 2006-01-02 15:04:05 +0000 MST |
|
||||
| Mon Jan 02 15:04:05 -0700 2006 | 2006-01-02 15:04:05 -0700 -0700 |
|
||||
| Monday, 02-Jan-06 15:04:05 MST | 2006-01-02 15:04:05 +0000 MST |
|
||||
| Mon, 02 Jan 2006 15:04:05 MST | 2006-01-02 15:04:05 +0000 MST |
|
||||
| Tue, 11 Jul 2017 16:28:13 +0200 (CEST) | 2017-07-11 16:28:13 +0200 +0200 |
|
||||
| Mon, 02 Jan 2006 15:04:05 -0700 | 2006-01-02 15:04:05 -0700 -0700 |
|
||||
| Mon 30 Sep 2018 09:09:09 PM UTC | 2018-09-30 21:09:09 +0000 UTC |
|
||||
| Mon Aug 10 15:44:11 UTC+0100 2015 | 2015-08-10 15:44:11 +0000 UTC |
|
||||
| Thu, 4 Jan 2018 17:53:36 +0000 | 2018-01-04 17:53:36 +0000 UTC |
|
||||
| Fri Jul 03 2015 18:04:07 GMT+0100 (GMT Daylight Time) | 2015-07-03 18:04:07 +0100 GMT |
|
||||
| Sun, 3 Jan 2021 00:12:23 +0800 (GMT+08:00) | 2021-01-03 00:12:23 +0800 +0800 |
|
||||
| September 17, 2012 10:09am | 2012-09-17 10:09:00 +0000 UTC |
|
||||
| September 17, 2012 at 10:09am PST-08 | 2012-09-17 10:09:00 -0800 PST |
|
||||
| September 17, 2012, 10:10:09 | 2012-09-17 10:10:09 +0000 UTC |
|
||||
| October 7, 1970 | 1970-10-07 00:00:00 +0000 UTC |
|
||||
| October 7th, 1970 | 1970-10-07 00:00:00 +0000 UTC |
|
||||
| 12 Feb 2006, 19:17 | 2006-02-12 19:17:00 +0000 UTC |
|
||||
| 12 Feb 2006 19:17 | 2006-02-12 19:17:00 +0000 UTC |
|
||||
| 14 May 2019 19:11:40.164 | 2019-05-14 19:11:40.164 +0000 UTC |
|
||||
| 7 oct 70 | 1970-10-07 00:00:00 +0000 UTC |
|
||||
| 7 oct 1970 | 1970-10-07 00:00:00 +0000 UTC |
|
||||
| 03 February 2013 | 2013-02-03 00:00:00 +0000 UTC |
|
||||
| 1 July 2013 | 2013-07-01 00:00:00 +0000 UTC |
|
||||
| 2013-Feb-03 | 2013-02-03 00:00:00 +0000 UTC |
|
||||
| 06/Jan/2008:15:04:05 -0700 | 2008-01-06 15:04:05 -0700 -0700 |
|
||||
| 06/Jan/2008 15:04:05 -0700 | 2008-01-06 15:04:05 -0700 -0700 |
|
||||
| 3/31/2014 | 2014-03-31 00:00:00 +0000 UTC |
|
||||
| 03/31/2014 | 2014-03-31 00:00:00 +0000 UTC |
|
||||
| 08/21/71 | 1971-08-21 00:00:00 +0000 UTC |
|
||||
| 8/1/71 | 1971-08-01 00:00:00 +0000 UTC |
|
||||
| 4/8/2014 22:05 | 2014-04-08 22:05:00 +0000 UTC |
|
||||
| 04/08/2014 22:05 | 2014-04-08 22:05:00 +0000 UTC |
|
||||
| 4/8/14 22:05 | 2014-04-08 22:05:00 +0000 UTC |
|
||||
| 04/2/2014 03:00:51 | 2014-04-02 03:00:51 +0000 UTC |
|
||||
| 8/8/1965 12:00:00 AM | 1965-08-08 00:00:00 +0000 UTC |
|
||||
| 8/8/1965 01:00:01 PM | 1965-08-08 13:00:01 +0000 UTC |
|
||||
| 8/8/1965 01:00 PM | 1965-08-08 13:00:00 +0000 UTC |
|
||||
| 8/8/1965 1:00 PM | 1965-08-08 13:00:00 +0000 UTC |
|
||||
| 8/8/1965 12:00 AM | 1965-08-08 00:00:00 +0000 UTC |
|
||||
| 4/02/2014 03:00:51 | 2014-04-02 03:00:51 +0000 UTC |
|
||||
| 03/19/2012 10:11:59 | 2012-03-19 10:11:59 +0000 UTC |
|
||||
| 03/19/2012 10:11:59.3186369 | 2012-03-19 10:11:59.3186369 +0000 UTC |
|
||||
| 2014/3/31 | 2014-03-31 00:00:00 +0000 UTC |
|
||||
| 2014/03/31 | 2014-03-31 00:00:00 +0000 UTC |
|
||||
| 2014/4/8 22:05 | 2014-04-08 22:05:00 +0000 UTC |
|
||||
| 2014/04/08 22:05 | 2014-04-08 22:05:00 +0000 UTC |
|
||||
| 2014/04/2 03:00:51 | 2014-04-02 03:00:51 +0000 UTC |
|
||||
| 2014/4/02 03:00:51 | 2014-04-02 03:00:51 +0000 UTC |
|
||||
| 2012/03/19 10:11:59 | 2012-03-19 10:11:59 +0000 UTC |
|
||||
| 2012/03/19 10:11:59.3186369 | 2012-03-19 10:11:59.3186369 +0000 UTC |
|
||||
| 2014:3:31 | 2014-03-31 00:00:00 +0000 UTC |
|
||||
| 2014:03:31 | 2014-03-31 00:00:00 +0000 UTC |
|
||||
| 2014:4:8 22:05 | 2014-04-08 22:05:00 +0000 UTC |
|
||||
| 2014:04:08 22:05 | 2014-04-08 22:05:00 +0000 UTC |
|
||||
| 2014:04:2 03:00:51 | 2014-04-02 03:00:51 +0000 UTC |
|
||||
| 2014:4:02 03:00:51 | 2014-04-02 03:00:51 +0000 UTC |
|
||||
| 2012:03:19 10:11:59 | 2012-03-19 10:11:59 +0000 UTC |
|
||||
| 2012:03:19 10:11:59.3186369 | 2012-03-19 10:11:59.3186369 +0000 UTC |
|
||||
| 2014年04月08日 | 2014-04-08 00:00:00 +0000 UTC |
|
||||
| 2006-01-02T15:04:05+0000 | 2006-01-02 15:04:05 +0000 UTC |
|
||||
| 2009-08-12T22:15:09-07:00 | 2009-08-12 22:15:09 -0700 -0700 |
|
||||
| 2009-08-12T22:15:09 | 2009-08-12 22:15:09 +0000 UTC |
|
||||
| 2009-08-12T22:15:09.988 | 2009-08-12 22:15:09.988 +0000 UTC |
|
||||
| 2009-08-12T22:15:09Z | 2009-08-12 22:15:09 +0000 UTC |
|
||||
| 2017-07-19T03:21:51:897+0100 | 2017-07-19 03:21:51.897 +0100 +0100 |
|
||||
| 2019-05-29T08:41-04 | 2019-05-29 08:41:00 -0400 -0400 |
|
||||
| 2014-04-26 17:24:37.3186369 | 2014-04-26 17:24:37.3186369 +0000 UTC |
|
||||
| 2012-08-03 18:31:59.257000000 | 2012-08-03 18:31:59.257 +0000 UTC |
|
||||
| 2014-04-26 17:24:37.123 | 2014-04-26 17:24:37.123 +0000 UTC |
|
||||
| 2013-04-01 22:43 | 2013-04-01 22:43:00 +0000 UTC |
|
||||
| 2013-04-01 22:43:22 | 2013-04-01 22:43:22 +0000 UTC |
|
||||
| 2014-12-16 06:20:00 UTC | 2014-12-16 06:20:00 +0000 UTC |
|
||||
| 2014-12-16 06:20:00 GMT | 2014-12-16 06:20:00 +0000 UTC |
|
||||
| 2014-04-26 05:24:37 PM | 2014-04-26 17:24:37 +0000 UTC |
|
||||
| 2014-04-26 13:13:43 +0800 | 2014-04-26 13:13:43 +0800 +0800 |
|
||||
| 2014-04-26 13:13:43 +0800 +08 | 2014-04-26 13:13:43 +0800 +0800 |
|
||||
| 2014-04-26 13:13:44 +09:00 | 2014-04-26 13:13:44 +0900 +0900 |
|
||||
| 2012-08-03 18:31:59.257000000 +0000 UTC | 2012-08-03 18:31:59.257 +0000 UTC |
|
||||
| 2015-09-30 18:48:56.35272715 +0000 UTC | 2015-09-30 18:48:56.35272715 +0000 UTC |
|
||||
| 2015-02-18 00:12:00 +0000 GMT | 2015-02-18 00:12:00 +0000 UTC |
|
||||
| 2015-02-18 00:12:00 +0000 UTC | 2015-02-18 00:12:00 +0000 UTC |
|
||||
| 2015-02-08 03:02:00 +0300 MSK m=+0.000000001 | 2015-02-08 03:02:00 +0300 +0300 |
|
||||
| 2015-02-08 03:02:00.001 +0300 MSK m=+0.000000001 | 2015-02-08 03:02:00.001 +0300 +0300 |
|
||||
| 2017-07-19 03:21:51+00:00 | 2017-07-19 03:21:51 +0000 UTC |
|
||||
| 2014-04-26 | 2014-04-26 00:00:00 +0000 UTC |
|
||||
| 2014-04 | 2014-04-01 00:00:00 +0000 UTC |
|
||||
| 2014 | 2014-01-01 00:00:00 +0000 UTC |
|
||||
| 2014-05-11 08:20:13,787 | 2014-05-11 08:20:13.787 +0000 UTC |
|
||||
| 2020-07-20+08:00 | 2020-07-20 00:00:00 +0800 +0800 |
|
||||
| 3.31.2014 | 2014-03-31 00:00:00 +0000 UTC |
|
||||
| 03.31.2014 | 2014-03-31 00:00:00 +0000 UTC |
|
||||
| 08.21.71 | 1971-08-21 00:00:00 +0000 UTC |
|
||||
| 2014.03 | 2014-03-01 00:00:00 +0000 UTC |
|
||||
| 2014.03.30 | 2014-03-30 00:00:00 +0000 UTC |
|
||||
| 20140601 | 2014-06-01 00:00:00 +0000 UTC |
|
||||
| 20140722105203 | 2014-07-22 10:52:03 +0000 UTC |
|
||||
| 171113 14:14:20 | 2017-11-13 14:14:20 +0000 UTC |
|
||||
| 1332151919 | 2012-03-19 10:11:59 +0000 UTC |
|
||||
| 1384216367189 | 2013-11-12 00:32:47.189 +0000 UTC |
|
||||
| 1384216367111222 | 2013-11-12 00:32:47.111222 +0000 UTC |
|
||||
| 1384216367111222333 | 2013-11-12 00:32:47.111222333 +0000 UTC |
|
||||
+-------------------------------------------------------+-----------------------------------------+
|
||||
*/
|
||||
|
||||
```
|
||||
+2189
File diff suppressed because it is too large
Load Diff
+6
@@ -0,0 +1,6 @@
|
||||
# See http://editorconfig.org
|
||||
|
||||
# In Go files we indent with tabs but still
|
||||
# set indent_size to control the GitHub web viewer.
|
||||
[*.go]
|
||||
indent_size=4
|
||||
+2
@@ -0,0 +1,2 @@
|
||||
*.go text eol=lf
|
||||
*.peg text eol=lf
|
||||
Generated
+22
@@ -0,0 +1,22 @@
|
||||
# Binaries for programs and plugins
|
||||
*.exe
|
||||
*.dll
|
||||
*.so
|
||||
*.dylib
|
||||
pigeon
|
||||
bin/
|
||||
bootstrap/cmd/bootstrap-pigeon/bootstrap-pigeon
|
||||
bootstrap/cmd/bootstrap-build/bootstrap-build
|
||||
bootstrap/cmd/pegscan/pegscan
|
||||
bootstrap/cmd/pegparse/pegparse
|
||||
|
||||
# Test binary, build with `go test -c`
|
||||
*.test
|
||||
|
||||
# Output of the go coverage tool, specifically when used with LiteIDE
|
||||
*.out
|
||||
|
||||
# Temporary and swap files
|
||||
*.swp
|
||||
*.swo
|
||||
*~
|
||||
+8
@@ -0,0 +1,8 @@
|
||||
language: go
|
||||
|
||||
script: make test
|
||||
|
||||
go:
|
||||
- 1.11.x
|
||||
- 1.12.x
|
||||
- tip
|
||||
+33
@@ -0,0 +1,33 @@
|
||||
# Contributing to pigeon
|
||||
|
||||
There are various ways to help support this open source project:
|
||||
|
||||
* if you use pigeon and find it useful, talk about it - that's probably the most basic way to help any open-source project: getting the word out that it exists and that it can be useful
|
||||
* if you use pigeon and find bugs, please [file an issue][0]
|
||||
* if something is poorly documented, or doesn't work as documented, this is also a bug, please [file an issue][0]
|
||||
* if you can fix the issue (whether it is documentation- or code-related), then [submit a pull-request][1] - but read on to see what should be done to get it merged
|
||||
* if you would like to see some new feature/behaviour being implemented, please first [open an issue][0] to discuss it because features are less likely to get merged compared to bug fixes
|
||||
|
||||
## Submitting a pull request
|
||||
|
||||
Assuming you already have a copy of the repository (either via `go get`, a github fork, a clone, etc.), you will also need `make` to regenerate all tools and files generated when a dependency changes. I use GNU make version 4.1, other versions of make may work too but haven't been tested.
|
||||
|
||||
Run `make` in the root directory of the repository. That will create the bootstrap builder, the bootstrap parser, and the final parser, along with some generated Go files. Once `make` is run successfully, run `go test ./...` in the root directory to make sure all tests pass.
|
||||
|
||||
Once this is done and tests pass, you can start implementing the bug fix (or the new feature provided **it has already been discussed and agreed in a github issue** first).
|
||||
|
||||
For a bug fix, the best way to proceed is to first write a test that proves the bug, then write the code that fixes the bug and makes the test pass. All other tests should still pass too (unless it relied on the buggy behaviour, in which case existing tests must be fixed).
|
||||
|
||||
For a new feature, it must be thoroughly tested. New code without new test(s) is unlikely to get merged.
|
||||
|
||||
Respect the coding style of the repository, which means essentially to respect the [coding guidelines of the Go community][2]. Use `gofmt` to format your code, and `goimports` to add and format the list of imported packages (or do it manually, but in a `goimports`-style).
|
||||
|
||||
Once all code is done and tests pass, regenerate the whole tree with `make`, run `make lint` to make sure the code is correct, and run tests again. You are now ready to submit the pull request.
|
||||
|
||||
## Licensing
|
||||
|
||||
All pull requests that get merged will be made available under the BSD 3-Clause license (see the LICENSE file for details), as the rest of the pigeon repository. Do not submit pull requests if you do not want your contributions to be made available under those terms.
|
||||
|
||||
[0]: https://github.com/mna/pigeon/issues/new
|
||||
[1]: https://github.com/mna/pigeon/pulls
|
||||
[2]: https://github.com/golang/go/wiki/CodeReviewComments
|
||||
+12
@@ -0,0 +1,12 @@
|
||||
Copyright (c) 2015, Martin Angers & Contributors
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the author nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
+198
@@ -0,0 +1,198 @@
|
||||
SHELL = /bin/bash
|
||||
|
||||
# force the use of go modules
|
||||
export GO111MODULE = on
|
||||
|
||||
# directories and source code lists
|
||||
ROOT = .
|
||||
ROOT_SRC = $(ROOT)/*.go
|
||||
BINDIR = ./bin
|
||||
EXAMPLES_DIR = $(ROOT)/examples
|
||||
TEST_DIR = $(ROOT)/test
|
||||
|
||||
# builder and ast packages
|
||||
BUILDER_DIR = $(ROOT)/builder
|
||||
BUILDER_SRC = $(BUILDER_DIR)/*.go
|
||||
AST_DIR = $(ROOT)/ast
|
||||
AST_SRC = $(AST_DIR)/*.go
|
||||
|
||||
# bootstrap tools variables
|
||||
BOOTSTRAP_DIR = $(ROOT)/bootstrap
|
||||
BOOTSTRAP_SRC = $(BOOTSTRAP_DIR)/*.go
|
||||
BOOTSTRAPBUILD_DIR = $(BOOTSTRAP_DIR)/cmd/bootstrap-build
|
||||
BOOTSTRAPBUILD_SRC = $(BOOTSTRAPBUILD_DIR)/*.go
|
||||
BOOTSTRAPPIGEON_DIR = $(BOOTSTRAP_DIR)/cmd/bootstrap-pigeon
|
||||
BOOTSTRAPPIGEON_SRC = $(BOOTSTRAPPIGEON_DIR)/*.go
|
||||
STATICCODEGENERATOR_DIR = $(BOOTSTRAP_DIR)/cmd/static_code_generator
|
||||
STATICCODEGENERATOR_SRC = $(STATICCODEGENERATOR_DIR)/*.go
|
||||
|
||||
# grammar variables
|
||||
GRAMMAR_DIR = $(ROOT)/grammar
|
||||
BOOTSTRAP_GRAMMAR = $(GRAMMAR_DIR)/bootstrap.peg
|
||||
PIGEON_GRAMMAR = $(GRAMMAR_DIR)/pigeon.peg
|
||||
|
||||
TEST_GENERATED_SRC = $(patsubst %.peg,%.go,$(shell echo ./{examples,test}/**/*.peg))
|
||||
|
||||
all: $(BUILDER_DIR)/generated_static_code.go $(BINDIR)/static_code_generator \
|
||||
$(BUILDER_DIR)/generated_static_code_range_table.go \
|
||||
$(BINDIR)/bootstrap-build $(BOOTSTRAPPIGEON_DIR)/bootstrap_pigeon.go \
|
||||
$(BINDIR)/bootstrap-pigeon $(ROOT)/pigeon.go $(BINDIR)/pigeon \
|
||||
$(TEST_GENERATED_SRC)
|
||||
|
||||
$(BINDIR)/static_code_generator: $(STATICCODEGENERATOR_SRC)
|
||||
go build -o $@ $(STATICCODEGENERATOR_DIR)
|
||||
|
||||
$(BINDIR)/bootstrap-build: $(BOOTSTRAPBUILD_SRC) $(BOOTSTRAP_SRC) $(BUILDER_SRC) \
|
||||
$(AST_SRC)
|
||||
go build -o $@ $(BOOTSTRAPBUILD_DIR)
|
||||
|
||||
$(BOOTSTRAPPIGEON_DIR)/bootstrap_pigeon.go: $(BINDIR)/bootstrap-build \
|
||||
$(BOOTSTRAP_GRAMMAR)
|
||||
$(BINDIR)/bootstrap-build $(BOOTSTRAP_GRAMMAR) > $@
|
||||
|
||||
$(BINDIR)/bootstrap-pigeon: $(BOOTSTRAPPIGEON_SRC) \
|
||||
$(BOOTSTRAPPIGEON_DIR)/bootstrap_pigeon.go
|
||||
go build -o $@ $(BOOTSTRAPPIGEON_DIR)
|
||||
|
||||
$(ROOT)/pigeon.go: $(BINDIR)/bootstrap-pigeon $(PIGEON_GRAMMAR)
|
||||
$(BINDIR)/bootstrap-pigeon $(PIGEON_GRAMMAR) > $@
|
||||
|
||||
$(BINDIR)/pigeon: $(ROOT_SRC) $(ROOT)/pigeon.go
|
||||
go build -o $@ $(ROOT)
|
||||
|
||||
$(BUILDER_DIR)/generated_static_code.go: $(BUILDER_DIR)/static_code.go $(BINDIR)/static_code_generator
|
||||
$(BINDIR)/static_code_generator $(BUILDER_DIR)/static_code.go $@ staticCode
|
||||
|
||||
$(BUILDER_DIR)/generated_static_code_range_table.go: $(BUILDER_DIR)/static_code_range_table.go $(BINDIR)/static_code_generator
|
||||
$(BINDIR)/static_code_generator $(BUILDER_DIR)/static_code_range_table.go $@ rangeTable0
|
||||
|
||||
$(BOOTSTRAP_GRAMMAR):
|
||||
$(PIGEON_GRAMMAR):
|
||||
|
||||
# surely there's a better way to define the examples and test targets
|
||||
$(EXAMPLES_DIR)/json/json.go: $(EXAMPLES_DIR)/json/json.peg $(EXAMPLES_DIR)/json/optimized/json.go $(EXAMPLES_DIR)/json/optimized-grammar/json.go $(BINDIR)/pigeon
|
||||
$(BINDIR)/pigeon -nolint $< > $@
|
||||
|
||||
$(EXAMPLES_DIR)/json/optimized/json.go: $(EXAMPLES_DIR)/json/json.peg $(BINDIR)/pigeon
|
||||
$(BINDIR)/pigeon -nolint -optimize-parser -optimize-basic-latin $< > $@
|
||||
|
||||
$(EXAMPLES_DIR)/json/optimized-grammar/json.go: $(EXAMPLES_DIR)/json/json.peg $(BINDIR)/pigeon
|
||||
$(BINDIR)/pigeon -nolint -optimize-grammar $< > $@
|
||||
|
||||
$(EXAMPLES_DIR)/calculator/calculator.go: $(EXAMPLES_DIR)/calculator/calculator.peg $(BINDIR)/pigeon
|
||||
$(BINDIR)/pigeon -nolint $< > $@
|
||||
|
||||
$(EXAMPLES_DIR)/indentation/indentation.go: $(EXAMPLES_DIR)/indentation/indentation.peg $(BINDIR)/pigeon
|
||||
$(BINDIR)/pigeon -nolint $< > $@
|
||||
|
||||
$(TEST_DIR)/andnot/andnot.go: $(TEST_DIR)/andnot/andnot.peg $(BINDIR)/pigeon
|
||||
$(BINDIR)/pigeon -nolint $< > $@
|
||||
|
||||
$(TEST_DIR)/predicates/predicates.go: $(TEST_DIR)/predicates/predicates.peg $(BINDIR)/pigeon
|
||||
$(BINDIR)/pigeon -nolint $< > $@
|
||||
|
||||
$(TEST_DIR)/issue_1/issue_1.go: $(TEST_DIR)/issue_1/issue_1.peg $(BINDIR)/pigeon
|
||||
$(BINDIR)/pigeon -nolint $< > $@
|
||||
|
||||
$(TEST_DIR)/linear/linear.go: $(TEST_DIR)/linear/linear.peg $(BINDIR)/pigeon
|
||||
$(BINDIR)/pigeon -nolint $< > $@
|
||||
|
||||
$(TEST_DIR)/issue_18/issue_18.go: $(TEST_DIR)/issue_18/issue_18.peg $(BINDIR)/pigeon
|
||||
$(BINDIR)/pigeon -nolint $< > $@
|
||||
|
||||
$(TEST_DIR)/runeerror/runeerror.go: $(TEST_DIR)/runeerror/runeerror.peg $(BINDIR)/pigeon
|
||||
$(BINDIR)/pigeon -nolint $< > $@
|
||||
|
||||
$(TEST_DIR)/errorpos/errorpos.go: $(TEST_DIR)/errorpos/errorpos.peg $(BINDIR)/pigeon
|
||||
$(BINDIR)/pigeon -nolint $< > $@
|
||||
|
||||
$(TEST_DIR)/global_store/global_store.go: $(TEST_DIR)/global_store/global_store.peg $(BINDIR)/pigeon
|
||||
$(BINDIR)/pigeon -nolint $< > $@
|
||||
|
||||
$(TEST_DIR)/goto/goto.go: $(TEST_DIR)/goto/goto.peg $(BINDIR)/pigeon
|
||||
$(BINDIR)/pigeon -nolint $< > $@
|
||||
|
||||
$(TEST_DIR)/goto_state/goto_state.go: $(TEST_DIR)/goto_state/goto_state.peg $(BINDIR)/pigeon
|
||||
$(BINDIR)/pigeon -nolint $< > $@
|
||||
|
||||
$(TEST_DIR)/max_expr_cnt/maxexpr.go: $(TEST_DIR)/max_expr_cnt/maxexpr.peg $(BINDIR)/pigeon
|
||||
$(BINDIR)/pigeon -nolint $< > $@
|
||||
|
||||
$(TEST_DIR)/labeled_failures/labeled_failures.go: $(TEST_DIR)/labeled_failures/labeled_failures.peg $(BINDIR)/pigeon
|
||||
$(BINDIR)/pigeon -nolint $< > $@
|
||||
|
||||
$(TEST_DIR)/thrownrecover/thrownrecover.go: $(TEST_DIR)/thrownrecover/thrownrecover.peg $(BINDIR)/pigeon
|
||||
$(BINDIR)/pigeon -nolint $< > $@
|
||||
|
||||
$(TEST_DIR)/alternate_entrypoint/altentry.go: $(TEST_DIR)/alternate_entrypoint/altentry.peg $(BINDIR)/pigeon
|
||||
$(BINDIR)/pigeon -nolint -optimize-grammar -alternate-entrypoints Entry2,Entry3,C $< > $@
|
||||
|
||||
$(TEST_DIR)/state/state.go: $(TEST_DIR)/state/state.peg $(BINDIR)/pigeon
|
||||
$(BINDIR)/pigeon -nolint -optimize-grammar $< > $@
|
||||
|
||||
$(TEST_DIR)/stateclone/stateclone.go: $(TEST_DIR)/stateclone/stateclone.peg $(BINDIR)/pigeon
|
||||
$(BINDIR)/pigeon -nolint $< > $@
|
||||
|
||||
$(TEST_DIR)/statereadonly/statereadonly.go: $(TEST_DIR)/statereadonly/statereadonly.peg $(BINDIR)/pigeon
|
||||
$(BINDIR)/pigeon -nolint $< > $@
|
||||
|
||||
$(TEST_DIR)/staterestore/staterestore.go: $(TEST_DIR)/staterestore/staterestore.peg $(TEST_DIR)/staterestore/standard/staterestore.go $(TEST_DIR)/staterestore/optimized/staterestore.go $(BINDIR)/pigeon
|
||||
$(BINDIR)/pigeon -nolint $< > $@
|
||||
|
||||
$(TEST_DIR)/staterestore/standard/staterestore.go: $(TEST_DIR)/staterestore/staterestore.peg $(BINDIR)/pigeon
|
||||
$(BINDIR)/pigeon -nolint $< > $@
|
||||
|
||||
$(TEST_DIR)/staterestore/optimized/staterestore.go: $(TEST_DIR)/staterestore/staterestore.peg $(BINDIR)/pigeon
|
||||
$(BINDIR)/pigeon -nolint -optimize-grammar -optimize-parser -alternate-entrypoints TestAnd,TestNot $< > $@
|
||||
|
||||
$(TEST_DIR)/emptystate/emptystate.go: $(TEST_DIR)/emptystate/emptystate.peg $(BINDIR)/pigeon
|
||||
$(BINDIR)/pigeon -nolint $< > $@
|
||||
|
||||
$(TEST_DIR)/issue_65/issue_65.go: $(TEST_DIR)/issue_65/issue_65.peg $(TEST_DIR)/issue_65/optimized/issue_65.go $(TEST_DIR)/issue_65/optimized-grammar/issue_65.go $(BINDIR)/pigeon
|
||||
$(BINDIR)/pigeon -nolint $< > $@
|
||||
|
||||
$(TEST_DIR)/issue_65/optimized/issue_65.go: $(TEST_DIR)/issue_65/issue_65.peg $(BINDIR)/pigeon
|
||||
$(BINDIR)/pigeon -nolint -optimize-parser -optimize-basic-latin $< > $@
|
||||
|
||||
$(TEST_DIR)/issue_65/optimized-grammar/issue_65.go: $(TEST_DIR)/issue_65/issue_65.peg $(BINDIR)/pigeon
|
||||
$(BINDIR)/pigeon -nolint -optimize-grammar $< > $@
|
||||
|
||||
$(TEST_DIR)/issue_70/issue_70.go: $(TEST_DIR)/issue_70/issue_70.peg $(TEST_DIR)/issue_70/optimized/issue_70.go $(TEST_DIR)/issue_70/optimized-grammar/issue_70.go $(BINDIR)/pigeon
|
||||
$(BINDIR)/pigeon -nolint $< > $@
|
||||
|
||||
$(TEST_DIR)/issue_70/optimized/issue_70.go: $(TEST_DIR)/issue_70/issue_70.peg $(BINDIR)/pigeon
|
||||
$(BINDIR)/pigeon -nolint -optimize-parser -optimize-basic-latin $< > $@
|
||||
|
||||
$(TEST_DIR)/issue_70/optimized-grammar/issue_70.go: $(TEST_DIR)/issue_70/issue_70.peg $(BINDIR)/pigeon
|
||||
$(BINDIR)/pigeon -nolint -optimize-grammar $< > $@
|
||||
|
||||
$(TEST_DIR)/issue_70b/issue_70b.go: $(TEST_DIR)/issue_70b/issue_70b.peg $(BINDIR)/pigeon
|
||||
$(BINDIR)/pigeon -nolint --optimize-grammar $< > $@
|
||||
|
||||
$(TEST_DIR)/issue_80/issue_80.go: $(TEST_DIR)/issue_80/issue_80.peg $(BINDIR)/pigeon
|
||||
$(BINDIR)/pigeon -nolint $< > $@
|
||||
|
||||
lint:
|
||||
golint ./...
|
||||
go vet ./...
|
||||
|
||||
gometalinter:
|
||||
gometalinter ./...
|
||||
|
||||
cmp:
|
||||
@boot=$$(mktemp) && $(BINDIR)/bootstrap-pigeon $(PIGEON_GRAMMAR) > $$boot && \
|
||||
official=$$(mktemp) && $(BINDIR)/pigeon $(PIGEON_GRAMMAR) > $$official && \
|
||||
cmp $$boot $$official && \
|
||||
unlink $$boot && \
|
||||
unlink $$official
|
||||
|
||||
test:
|
||||
go test -v ./...
|
||||
|
||||
clean:
|
||||
rm -f $(BUILDER_DIR)/generated_static_code.go $(BUILDER_DIR)/generated_static_code_range_table.go
|
||||
rm -f $(BOOTSTRAPPIGEON_DIR)/bootstrap_pigeon.go $(ROOT)/pigeon.go $(TEST_GENERATED_SRC) $(EXAMPLES_DIR)/json/optimized/json.go $(EXAMPLES_DIR)/json/optimized-grammar/json.go $(TEST_DIR)/staterestore/optimized/staterestore.go $(TEST_DIR)/staterestore/standard/staterestore.go $(TEST_DIR)/issue_65/optimized/issue_65.go $(TEST_DIR)/issue_65/optimized-grammar/issue_65.go
|
||||
rm -rf $(BINDIR)
|
||||
|
||||
.PHONY: all clean lint gometalinter cmp test
|
||||
|
||||
+148
@@ -0,0 +1,148 @@
|
||||
# pigeon - a PEG parser generator for Go
|
||||
|
||||
[](https://godoc.org/github.com/mna/pigeon)
|
||||
[](http://travis-ci.org/mna/pigeon)
|
||||
[](https://goreportcard.com/report/github.com/mna/pigeon)
|
||||
[](LICENSE)
|
||||
|
||||
The pigeon command generates parsers based on a [parsing expression grammar (PEG)][0]. Its grammar and syntax is inspired by the [PEG.js project][1], while the implementation is loosely based on the [parsing expression grammar for C# 3.0][2] article. It parses Unicode text encoded in UTF-8.
|
||||
|
||||
See the [godoc page][3] for detailed usage. Also have a look at the [Pigeon Wiki](https://github.com/mna/pigeon/wiki) for additional information about Pigeon and PEG in general.
|
||||
|
||||
## Releases
|
||||
|
||||
* v1.0.0 is the tagged release of the original implementation.
|
||||
* Work has started on v2.0.0 with some planned breaking changes.
|
||||
|
||||
Github user [@mna][6] created the package in April 2015, and [@breml][5] is the package's maintainer as of May 2017.
|
||||
|
||||
### Breaking Changes since v1.0.0
|
||||
|
||||
* Removed support for Go < v1.11 to support go modules for dependency tracking.
|
||||
|
||||
* Removed support for Go < v1.9 due to the requirement [golang.org/x/tools/imports](https://godoc.org/golang.org/x/tools/imports), which was updated to reflect changes in recent versions of Go. This is in compliance with the [Go Release Policy](https://golang.org/doc/devel/release.html#policy) respectively the [Go Release Maintenance](https://github.com/golang/go/wiki/Go-Release-Cycle#release-maintenance), which states support for each major release until there are two newer major releases.
|
||||
|
||||
## Installation
|
||||
|
||||
Provided you have Go correctly installed with the $GOPATH and $GOBIN environment variables set, run:
|
||||
|
||||
```
|
||||
$ go get -u github.com/mna/pigeon
|
||||
```
|
||||
|
||||
This will install or update the package, and the `pigeon` command will be installed in your $GOBIN directory. Neither this package nor the parsers generated by this command require any third-party dependency, unless such a dependency is used in the code blocks of the grammar.
|
||||
|
||||
## Basic usage
|
||||
|
||||
```
|
||||
$ pigeon [options] [PEG_GRAMMAR_FILE]
|
||||
```
|
||||
|
||||
By default, the input grammar is read from `stdin` and the generated code is printed to `stdout`. You may save it in a file using the `-o` flag.
|
||||
|
||||
## Example
|
||||
|
||||
Given the following grammar:
|
||||
|
||||
```
|
||||
{
|
||||
// part of the initializer code block omitted for brevity
|
||||
|
||||
var ops = map[string]func(int, int) int {
|
||||
"+": func(l, r int) int {
|
||||
return l + r
|
||||
},
|
||||
"-": func(l, r int) int {
|
||||
return l - r
|
||||
},
|
||||
"*": func(l, r int) int {
|
||||
return l * r
|
||||
},
|
||||
"/": func(l, r int) int {
|
||||
return l / r
|
||||
},
|
||||
}
|
||||
|
||||
func toIfaceSlice(v interface{}) []interface{} {
|
||||
if v == nil {
|
||||
return nil
|
||||
}
|
||||
return v.([]interface{})
|
||||
}
|
||||
|
||||
func eval(first, rest interface{}) int {
|
||||
l := first.(int)
|
||||
restSl := toIfaceSlice(rest)
|
||||
for _, v := range restSl {
|
||||
restExpr := toIfaceSlice(v)
|
||||
r := restExpr[3].(int)
|
||||
op := restExpr[1].(string)
|
||||
l = ops[op](l, r)
|
||||
}
|
||||
return l
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Input <- expr:Expr EOF {
|
||||
return expr, nil
|
||||
}
|
||||
|
||||
Expr <- _ first:Term rest:( _ AddOp _ Term )* _ {
|
||||
return eval(first, rest), nil
|
||||
}
|
||||
|
||||
Term <- first:Factor rest:( _ MulOp _ Factor )* {
|
||||
return eval(first, rest), nil
|
||||
}
|
||||
|
||||
Factor <- '(' expr:Expr ')' {
|
||||
return expr, nil
|
||||
} / integer:Integer {
|
||||
return integer, nil
|
||||
}
|
||||
|
||||
AddOp <- ( '+' / '-' ) {
|
||||
return string(c.text), nil
|
||||
}
|
||||
|
||||
MulOp <- ( '*' / '/' ) {
|
||||
return string(c.text), nil
|
||||
}
|
||||
|
||||
Integer <- '-'? [0-9]+ {
|
||||
return strconv.Atoi(string(c.text))
|
||||
}
|
||||
|
||||
_ "whitespace" <- [ \n\t\r]*
|
||||
|
||||
EOF <- !.
|
||||
```
|
||||
|
||||
The generated parser can parse simple arithmetic operations, e.g.:
|
||||
|
||||
```
|
||||
18 + 3 - 27 * (-18 / -3)
|
||||
|
||||
=> -141
|
||||
```
|
||||
|
||||
More examples can be found in the `examples/` subdirectory.
|
||||
|
||||
See the [godoc page][3] for detailed usage.
|
||||
|
||||
## Contributing
|
||||
|
||||
See the CONTRIBUTING.md file.
|
||||
|
||||
## License
|
||||
|
||||
The [BSD 3-Clause license][4]. See the LICENSE file.
|
||||
|
||||
[0]: http://en.wikipedia.org/wiki/Parsing_expression_grammar
|
||||
[1]: http://pegjs.org/
|
||||
[2]: http://www.codeproject.com/Articles/29713/Parsing-Expression-Grammar-Support-for-C-Part
|
||||
[3]: https://godoc.org/github.com/mna/pigeon
|
||||
[4]: http://opensource.org/licenses/BSD-3-Clause
|
||||
[5]: https://github.com/breml
|
||||
[6]: https://github.com/mna
|
||||
+3
@@ -0,0 +1,3 @@
|
||||
- refactor implementation as a VM to avoid stack overflow in pathological cases (and maybe better performance): in branch wip-vm
|
||||
? options like current receiver name read directly from the grammar file
|
||||
? type annotations for generated code functions
|
||||
+662
@@ -0,0 +1,662 @@
|
||||
// Package ast defines the abstract syntax tree for the PEG grammar.
|
||||
//
|
||||
// The parser generator's PEG grammar generates a tree using this package
|
||||
// that is then converted by the builder to the simplified AST used in
|
||||
// the generated parser.
|
||||
package ast
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// Pos represents a position in a source file.
|
||||
type Pos struct {
|
||||
Filename string
|
||||
Line int
|
||||
Col int
|
||||
Off int
|
||||
}
|
||||
|
||||
// String returns the textual representation of a position.
|
||||
func (p Pos) String() string {
|
||||
if p.Filename != "" {
|
||||
return fmt.Sprintf("%s:%d:%d (%d)", p.Filename, p.Line, p.Col, p.Off)
|
||||
}
|
||||
return fmt.Sprintf("%d:%d (%d)", p.Line, p.Col, p.Off)
|
||||
}
|
||||
|
||||
// Grammar is the top-level node of the AST for the PEG grammar.
|
||||
type Grammar struct {
|
||||
p Pos
|
||||
Init *CodeBlock
|
||||
Rules []*Rule
|
||||
}
|
||||
|
||||
// NewGrammar creates a new grammar at the specified position.
|
||||
func NewGrammar(p Pos) *Grammar {
|
||||
return &Grammar{p: p}
|
||||
}
|
||||
|
||||
// Pos returns the starting position of the node.
|
||||
func (g *Grammar) Pos() Pos { return g.p }
|
||||
|
||||
// String returns the textual representation of a node.
|
||||
func (g *Grammar) String() string {
|
||||
var buf bytes.Buffer
|
||||
|
||||
buf.WriteString(fmt.Sprintf("%s: %T{Init: %v, Rules: [\n",
|
||||
g.p, g, g.Init))
|
||||
for _, r := range g.Rules {
|
||||
buf.WriteString(fmt.Sprintf("%s,\n", r))
|
||||
}
|
||||
buf.WriteString("]}")
|
||||
return buf.String()
|
||||
}
|
||||
|
||||
// Rule represents a rule in the PEG grammar. It has a name, an optional
|
||||
// display name to be used in error messages, and an expression.
|
||||
type Rule struct {
|
||||
p Pos
|
||||
Name *Identifier
|
||||
DisplayName *StringLit
|
||||
Expr Expression
|
||||
}
|
||||
|
||||
// NewRule creates a rule with at the specified position and with the
|
||||
// specified name as identifier.
|
||||
func NewRule(p Pos, name *Identifier) *Rule {
|
||||
return &Rule{p: p, Name: name}
|
||||
}
|
||||
|
||||
// Pos returns the starting position of the node.
|
||||
func (r *Rule) Pos() Pos { return r.p }
|
||||
|
||||
// String returns the textual representation of a node.
|
||||
func (r *Rule) String() string {
|
||||
return fmt.Sprintf("%s: %T{Name: %v, DisplayName: %v, Expr: %v}",
|
||||
r.p, r, r.Name, r.DisplayName, r.Expr)
|
||||
}
|
||||
|
||||
// Expression is the interface implemented by all expression types.
|
||||
type Expression interface {
|
||||
Pos() Pos
|
||||
}
|
||||
|
||||
// ChoiceExpr is an ordered sequence of expressions. The parser tries to
|
||||
// match any of the alternatives in sequence and stops at the first one
|
||||
// that matches.
|
||||
type ChoiceExpr struct {
|
||||
p Pos
|
||||
Alternatives []Expression
|
||||
}
|
||||
|
||||
// NewChoiceExpr creates a choice expression at the specified position.
|
||||
func NewChoiceExpr(p Pos) *ChoiceExpr {
|
||||
return &ChoiceExpr{p: p}
|
||||
}
|
||||
|
||||
// Pos returns the starting position of the node.
|
||||
func (c *ChoiceExpr) Pos() Pos { return c.p }
|
||||
|
||||
// String returns the textual representation of a node.
|
||||
func (c *ChoiceExpr) String() string {
|
||||
var buf bytes.Buffer
|
||||
|
||||
buf.WriteString(fmt.Sprintf("%s: %T{Alternatives: [\n", c.p, c))
|
||||
for _, e := range c.Alternatives {
|
||||
buf.WriteString(fmt.Sprintf("%s,\n", e))
|
||||
}
|
||||
buf.WriteString("]}")
|
||||
return buf.String()
|
||||
}
|
||||
|
||||
// FailureLabel is an identifier, which can by thrown and recovered in a grammar
|
||||
type FailureLabel string
|
||||
|
||||
// RecoveryExpr is an ordered sequence of expressions. The parser tries to
|
||||
// match any of the alternatives in sequence and stops at the first one
|
||||
// that matches.
|
||||
type RecoveryExpr struct {
|
||||
p Pos
|
||||
Expr Expression
|
||||
RecoverExpr Expression
|
||||
Labels []FailureLabel
|
||||
}
|
||||
|
||||
// NewRecoveryExpr creates a choice expression at the specified position.
|
||||
func NewRecoveryExpr(p Pos) *RecoveryExpr {
|
||||
return &RecoveryExpr{p: p}
|
||||
}
|
||||
|
||||
// Pos returns the starting position of the node.
|
||||
func (r *RecoveryExpr) Pos() Pos { return r.p }
|
||||
|
||||
// String returns the textual representation of a node.
|
||||
func (r *RecoveryExpr) String() string {
|
||||
var buf bytes.Buffer
|
||||
|
||||
buf.WriteString(fmt.Sprintf("%s: %T{Expr: %v, RecoverExpr: %v", r.p, r, r.Expr, r.RecoverExpr))
|
||||
buf.WriteString(fmt.Sprintf(", Labels: [\n"))
|
||||
for _, e := range r.Labels {
|
||||
buf.WriteString(fmt.Sprintf("%s,\n", e))
|
||||
}
|
||||
buf.WriteString("]}")
|
||||
return buf.String()
|
||||
}
|
||||
|
||||
// ActionExpr is an expression that has an associated block of code to
|
||||
// execute when the expression matches.
|
||||
type ActionExpr struct {
|
||||
p Pos
|
||||
Expr Expression
|
||||
Code *CodeBlock
|
||||
FuncIx int
|
||||
}
|
||||
|
||||
// NewActionExpr creates a new action expression at the specified position.
|
||||
func NewActionExpr(p Pos) *ActionExpr {
|
||||
return &ActionExpr{p: p}
|
||||
}
|
||||
|
||||
// Pos returns the starting position of the node.
|
||||
func (a *ActionExpr) Pos() Pos { return a.p }
|
||||
|
||||
// String returns the textual representation of a node.
|
||||
func (a *ActionExpr) String() string {
|
||||
return fmt.Sprintf("%s: %T{Expr: %v, Code: %v}", a.p, a, a.Expr, a.Code)
|
||||
}
|
||||
|
||||
// ThrowExpr is an expression that throws an FailureLabel to be catched by a
|
||||
// RecoveryChoiceExpr.
|
||||
type ThrowExpr struct {
|
||||
p Pos
|
||||
Label string
|
||||
}
|
||||
|
||||
// NewThrowExpr creates a new throw expression at the specified position.
|
||||
func NewThrowExpr(p Pos) *ThrowExpr {
|
||||
return &ThrowExpr{p: p}
|
||||
}
|
||||
|
||||
// Pos returns the starting position of the node.
|
||||
func (t *ThrowExpr) Pos() Pos { return t.p }
|
||||
|
||||
// String returns the textual representation of a node.
|
||||
func (t *ThrowExpr) String() string {
|
||||
return fmt.Sprintf("%s: %T{Label: %v}", t.p, t, t.Label)
|
||||
}
|
||||
|
||||
// SeqExpr is an ordered sequence of expressions, all of which must match
|
||||
// if the SeqExpr is to be a match itself.
|
||||
type SeqExpr struct {
|
||||
p Pos
|
||||
Exprs []Expression
|
||||
}
|
||||
|
||||
// NewSeqExpr creates a new sequence expression at the specified position.
|
||||
func NewSeqExpr(p Pos) *SeqExpr {
|
||||
return &SeqExpr{p: p}
|
||||
}
|
||||
|
||||
// Pos returns the starting position of the node.
|
||||
func (s *SeqExpr) Pos() Pos { return s.p }
|
||||
|
||||
// String returns the textual representation of a node.
|
||||
func (s *SeqExpr) String() string {
|
||||
var buf bytes.Buffer
|
||||
|
||||
buf.WriteString(fmt.Sprintf("%s: %T{Exprs: [\n", s.p, s))
|
||||
for _, e := range s.Exprs {
|
||||
buf.WriteString(fmt.Sprintf("%s,\n", e))
|
||||
}
|
||||
buf.WriteString("]}")
|
||||
return buf.String()
|
||||
}
|
||||
|
||||
// LabeledExpr is an expression that has an associated label. Code blocks
|
||||
// can access the value of the expression using that label, that becomes
|
||||
// a local variable in the code.
|
||||
type LabeledExpr struct {
|
||||
p Pos
|
||||
Label *Identifier
|
||||
Expr Expression
|
||||
}
|
||||
|
||||
// NewLabeledExpr creates a new labeled expression at the specified position.
|
||||
func NewLabeledExpr(p Pos) *LabeledExpr {
|
||||
return &LabeledExpr{p: p}
|
||||
}
|
||||
|
||||
// Pos returns the starting position of the node.
|
||||
func (l *LabeledExpr) Pos() Pos { return l.p }
|
||||
|
||||
// String returns the textual representation of a node.
|
||||
func (l *LabeledExpr) String() string {
|
||||
return fmt.Sprintf("%s: %T{Label: %v, Expr: %v}", l.p, l, l.Label, l.Expr)
|
||||
}
|
||||
|
||||
// AndExpr is a zero-length matcher that is considered a match if the
|
||||
// expression it contains is a match.
|
||||
type AndExpr struct {
|
||||
p Pos
|
||||
Expr Expression
|
||||
}
|
||||
|
||||
// NewAndExpr creates a new and (&) expression at the specified position.
|
||||
func NewAndExpr(p Pos) *AndExpr {
|
||||
return &AndExpr{p: p}
|
||||
}
|
||||
|
||||
// Pos returns the starting position of the node.
|
||||
func (a *AndExpr) Pos() Pos { return a.p }
|
||||
|
||||
// String returns the textual representation of a node.
|
||||
func (a *AndExpr) String() string {
|
||||
return fmt.Sprintf("%s: %T{Expr: %v}", a.p, a, a.Expr)
|
||||
}
|
||||
|
||||
// NotExpr is a zero-length matcher that is considered a match if the
|
||||
// expression it contains is not a match.
|
||||
type NotExpr struct {
|
||||
p Pos
|
||||
Expr Expression
|
||||
}
|
||||
|
||||
// NewNotExpr creates a new not (!) expression at the specified position.
|
||||
func NewNotExpr(p Pos) *NotExpr {
|
||||
return &NotExpr{p: p}
|
||||
}
|
||||
|
||||
// Pos returns the starting position of the node.
|
||||
func (n *NotExpr) Pos() Pos { return n.p }
|
||||
|
||||
// String returns the textual representation of a node.
|
||||
func (n *NotExpr) String() string {
|
||||
return fmt.Sprintf("%s: %T{Expr: %v}", n.p, n, n.Expr)
|
||||
}
|
||||
|
||||
// ZeroOrOneExpr is an expression that can be matched zero or one time.
|
||||
type ZeroOrOneExpr struct {
|
||||
p Pos
|
||||
Expr Expression
|
||||
}
|
||||
|
||||
// NewZeroOrOneExpr creates a new zero or one expression at the specified
|
||||
// position.
|
||||
func NewZeroOrOneExpr(p Pos) *ZeroOrOneExpr {
|
||||
return &ZeroOrOneExpr{p: p}
|
||||
}
|
||||
|
||||
// Pos returns the starting position of the node.
|
||||
func (z *ZeroOrOneExpr) Pos() Pos { return z.p }
|
||||
|
||||
// String returns the textual representation of a node.
|
||||
func (z *ZeroOrOneExpr) String() string {
|
||||
return fmt.Sprintf("%s: %T{Expr: %v}", z.p, z, z.Expr)
|
||||
}
|
||||
|
||||
// ZeroOrMoreExpr is an expression that can be matched zero or more times.
|
||||
type ZeroOrMoreExpr struct {
|
||||
p Pos
|
||||
Expr Expression
|
||||
}
|
||||
|
||||
// NewZeroOrMoreExpr creates a new zero or more expression at the specified
|
||||
// position.
|
||||
func NewZeroOrMoreExpr(p Pos) *ZeroOrMoreExpr {
|
||||
return &ZeroOrMoreExpr{p: p}
|
||||
}
|
||||
|
||||
// Pos returns the starting position of the node.
|
||||
func (z *ZeroOrMoreExpr) Pos() Pos { return z.p }
|
||||
|
||||
// String returns the textual representation of a node.
|
||||
func (z *ZeroOrMoreExpr) String() string {
|
||||
return fmt.Sprintf("%s: %T{Expr: %v}", z.p, z, z.Expr)
|
||||
}
|
||||
|
||||
// OneOrMoreExpr is an expression that can be matched one or more times.
|
||||
type OneOrMoreExpr struct {
|
||||
p Pos
|
||||
Expr Expression
|
||||
}
|
||||
|
||||
// NewOneOrMoreExpr creates a new one or more expression at the specified
|
||||
// position.
|
||||
func NewOneOrMoreExpr(p Pos) *OneOrMoreExpr {
|
||||
return &OneOrMoreExpr{p: p}
|
||||
}
|
||||
|
||||
// Pos returns the starting position of the node.
|
||||
func (o *OneOrMoreExpr) Pos() Pos { return o.p }
|
||||
|
||||
// String returns the textual representation of a node.
|
||||
func (o *OneOrMoreExpr) String() string {
|
||||
return fmt.Sprintf("%s: %T{Expr: %v}", o.p, o, o.Expr)
|
||||
}
|
||||
|
||||
// RuleRefExpr is an expression that references a rule by name.
|
||||
type RuleRefExpr struct {
|
||||
p Pos
|
||||
Name *Identifier
|
||||
}
|
||||
|
||||
// NewRuleRefExpr creates a new rule reference expression at the specified
|
||||
// position.
|
||||
func NewRuleRefExpr(p Pos) *RuleRefExpr {
|
||||
return &RuleRefExpr{p: p}
|
||||
}
|
||||
|
||||
// Pos returns the starting position of the node.
|
||||
func (r *RuleRefExpr) Pos() Pos { return r.p }
|
||||
|
||||
// String returns the textual representation of a node.
|
||||
func (r *RuleRefExpr) String() string {
|
||||
return fmt.Sprintf("%s: %T{Name: %v}", r.p, r, r.Name)
|
||||
}
|
||||
|
||||
// StateCodeExpr is an expression which can modify the internal state of the parser.
|
||||
type StateCodeExpr struct {
|
||||
p Pos
|
||||
Code *CodeBlock
|
||||
FuncIx int
|
||||
}
|
||||
|
||||
// NewStateCodeExpr creates a new state (#) code expression at the specified
|
||||
// position.
|
||||
func NewStateCodeExpr(p Pos) *StateCodeExpr {
|
||||
return &StateCodeExpr{p: p}
|
||||
}
|
||||
|
||||
// Pos returns the starting position of the node.
|
||||
func (s *StateCodeExpr) Pos() Pos { return s.p }
|
||||
|
||||
// String returns the textual representation of a node.
|
||||
func (s *StateCodeExpr) String() string {
|
||||
return fmt.Sprintf("%s: %T{Code: %v}", s.p, s, s.Code)
|
||||
}
|
||||
|
||||
// AndCodeExpr is a zero-length matcher that is considered a match if the
|
||||
// code block returns true.
|
||||
type AndCodeExpr struct {
|
||||
p Pos
|
||||
Code *CodeBlock
|
||||
FuncIx int
|
||||
}
|
||||
|
||||
// NewAndCodeExpr creates a new and (&) code expression at the specified
|
||||
// position.
|
||||
func NewAndCodeExpr(p Pos) *AndCodeExpr {
|
||||
return &AndCodeExpr{p: p}
|
||||
}
|
||||
|
||||
// Pos returns the starting position of the node.
|
||||
func (a *AndCodeExpr) Pos() Pos { return a.p }
|
||||
|
||||
// String returns the textual representation of a node.
|
||||
func (a *AndCodeExpr) String() string {
|
||||
return fmt.Sprintf("%s: %T{Code: %v}", a.p, a, a.Code)
|
||||
}
|
||||
|
||||
// NotCodeExpr is a zero-length matcher that is considered a match if the
|
||||
// code block returns false.
|
||||
type NotCodeExpr struct {
|
||||
p Pos
|
||||
Code *CodeBlock
|
||||
FuncIx int
|
||||
}
|
||||
|
||||
// NewNotCodeExpr creates a new not (!) code expression at the specified
|
||||
// position.
|
||||
func NewNotCodeExpr(p Pos) *NotCodeExpr {
|
||||
return &NotCodeExpr{p: p}
|
||||
}
|
||||
|
||||
// Pos returns the starting position of the node.
|
||||
func (n *NotCodeExpr) Pos() Pos { return n.p }
|
||||
|
||||
// String returns the textual representation of a node.
|
||||
func (n *NotCodeExpr) String() string {
|
||||
return fmt.Sprintf("%s: %T{Code: %v}", n.p, n, n.Code)
|
||||
}
|
||||
|
||||
// LitMatcher is a string literal matcher. The value to match may be a
|
||||
// double-quoted string, a single-quoted single character, or a back-tick
|
||||
// quoted raw string.
|
||||
type LitMatcher struct {
|
||||
posValue // can be str, rstr or char
|
||||
IgnoreCase bool
|
||||
}
|
||||
|
||||
// NewLitMatcher creates a new literal matcher at the specified position and
|
||||
// with the specified value.
|
||||
func NewLitMatcher(p Pos, v string) *LitMatcher {
|
||||
return &LitMatcher{posValue: posValue{p: p, Val: v}}
|
||||
}
|
||||
|
||||
// Pos returns the starting position of the node.
|
||||
func (l *LitMatcher) Pos() Pos { return l.p }
|
||||
|
||||
// String returns the textual representation of a node.
|
||||
func (l *LitMatcher) String() string {
|
||||
return fmt.Sprintf("%s: %T{Val: %q, IgnoreCase: %t}", l.p, l, l.Val, l.IgnoreCase)
|
||||
}
|
||||
|
||||
// CharClassMatcher is a character class matcher. The value to match must
|
||||
// be one of the specified characters, in a range of characters, or in the
|
||||
// Unicode classes of characters.
|
||||
type CharClassMatcher struct {
|
||||
posValue
|
||||
IgnoreCase bool
|
||||
Inverted bool
|
||||
Chars []rune
|
||||
Ranges []rune // pairs of low/high range
|
||||
UnicodeClasses []string
|
||||
}
|
||||
|
||||
// NewCharClassMatcher creates a new character class matcher at the specified
|
||||
// position and with the specified raw value. It parses the raw value into
|
||||
// the list of characters, ranges and Unicode classes.
|
||||
func NewCharClassMatcher(p Pos, raw string) *CharClassMatcher {
|
||||
c := &CharClassMatcher{posValue: posValue{p: p, Val: raw}}
|
||||
c.parse()
|
||||
return c
|
||||
}
|
||||
|
||||
func (c *CharClassMatcher) parse() {
|
||||
raw := c.Val
|
||||
c.IgnoreCase = strings.HasSuffix(raw, "i")
|
||||
if c.IgnoreCase {
|
||||
raw = raw[:len(raw)-1]
|
||||
}
|
||||
|
||||
// "unquote" the character classes
|
||||
raw = raw[1 : len(raw)-1]
|
||||
if len(raw) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
c.Inverted = raw[0] == '^'
|
||||
if c.Inverted {
|
||||
raw = raw[1:]
|
||||
if len(raw) == 0 {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// content of char class is necessarily valid, so escapes are correct
|
||||
r := strings.NewReader(raw)
|
||||
var chars []rune
|
||||
var buf bytes.Buffer
|
||||
outer:
|
||||
for {
|
||||
rn, _, err := r.ReadRune()
|
||||
if err != nil {
|
||||
break outer
|
||||
}
|
||||
|
||||
consumeN := 0
|
||||
switch rn {
|
||||
case '\\':
|
||||
rn, _, _ := r.ReadRune()
|
||||
switch rn {
|
||||
case ']':
|
||||
chars = append(chars, rn)
|
||||
continue
|
||||
|
||||
case 'p':
|
||||
rn, _, _ := r.ReadRune()
|
||||
if rn == '{' {
|
||||
buf.Reset()
|
||||
for {
|
||||
rn, _, _ := r.ReadRune()
|
||||
if rn == '}' {
|
||||
break
|
||||
}
|
||||
buf.WriteRune(rn)
|
||||
}
|
||||
c.UnicodeClasses = append(c.UnicodeClasses, buf.String())
|
||||
} else {
|
||||
c.UnicodeClasses = append(c.UnicodeClasses, string(rn))
|
||||
}
|
||||
continue
|
||||
|
||||
case 'x':
|
||||
consumeN = 2
|
||||
case 'u':
|
||||
consumeN = 4
|
||||
case 'U':
|
||||
consumeN = 8
|
||||
case '0', '1', '2', '3', '4', '5', '6', '7':
|
||||
consumeN = 2
|
||||
}
|
||||
|
||||
buf.Reset()
|
||||
buf.WriteRune(rn)
|
||||
for i := 0; i < consumeN; i++ {
|
||||
rn, _, _ := r.ReadRune()
|
||||
buf.WriteRune(rn)
|
||||
}
|
||||
rn, _, _, _ = strconv.UnquoteChar("\\"+buf.String(), 0)
|
||||
chars = append(chars, rn)
|
||||
|
||||
default:
|
||||
chars = append(chars, rn)
|
||||
}
|
||||
}
|
||||
|
||||
// extract ranges and chars
|
||||
inRange, wasRange := false, false
|
||||
for i, r := range chars {
|
||||
if inRange {
|
||||
c.Ranges = append(c.Ranges, r)
|
||||
inRange = false
|
||||
wasRange = true
|
||||
continue
|
||||
}
|
||||
|
||||
if r == '-' && !wasRange && len(c.Chars) > 0 && i < len(chars)-1 {
|
||||
inRange = true
|
||||
wasRange = false
|
||||
// start of range is the last Char added
|
||||
c.Ranges = append(c.Ranges, c.Chars[len(c.Chars)-1])
|
||||
c.Chars = c.Chars[:len(c.Chars)-1]
|
||||
continue
|
||||
}
|
||||
wasRange = false
|
||||
c.Chars = append(c.Chars, r)
|
||||
}
|
||||
}
|
||||
|
||||
// Pos returns the starting position of the node.
|
||||
func (c *CharClassMatcher) Pos() Pos { return c.p }
|
||||
|
||||
// String returns the textual representation of a node.
|
||||
func (c *CharClassMatcher) String() string {
|
||||
return fmt.Sprintf("%s: %T{Val: %q, IgnoreCase: %t, Inverted: %t}",
|
||||
c.p, c, c.Val, c.IgnoreCase, c.Inverted)
|
||||
}
|
||||
|
||||
// AnyMatcher is a matcher that matches any character except end-of-file.
|
||||
type AnyMatcher struct {
|
||||
posValue
|
||||
}
|
||||
|
||||
// NewAnyMatcher creates a new any matcher at the specified position. The
|
||||
// value is provided for completeness' sake, but it is always the dot.
|
||||
func NewAnyMatcher(p Pos, v string) *AnyMatcher {
|
||||
return &AnyMatcher{posValue{p, v}}
|
||||
}
|
||||
|
||||
// Pos returns the starting position of the node.
|
||||
func (a *AnyMatcher) Pos() Pos { return a.p }
|
||||
|
||||
// String returns the textual representation of a node.
|
||||
func (a *AnyMatcher) String() string {
|
||||
return fmt.Sprintf("%s: %T{Val: %q}", a.p, a, a.Val)
|
||||
}
|
||||
|
||||
// CodeBlock represents a code block.
|
||||
type CodeBlock struct {
|
||||
posValue
|
||||
}
|
||||
|
||||
// NewCodeBlock creates a new code block at the specified position and with
|
||||
// the specified value. The value includes the outer braces.
|
||||
func NewCodeBlock(p Pos, code string) *CodeBlock {
|
||||
return &CodeBlock{posValue{p, code}}
|
||||
}
|
||||
|
||||
// Pos returns the starting position of the node.
|
||||
func (c *CodeBlock) Pos() Pos { return c.p }
|
||||
|
||||
// String returns the textual representation of a node.
|
||||
func (c *CodeBlock) String() string {
|
||||
return fmt.Sprintf("%s: %T{Val: %q}", c.p, c, c.Val)
|
||||
}
|
||||
|
||||
// Identifier represents an identifier.
|
||||
type Identifier struct {
|
||||
posValue
|
||||
}
|
||||
|
||||
// NewIdentifier creates a new identifier at the specified position and
|
||||
// with the specified name.
|
||||
func NewIdentifier(p Pos, name string) *Identifier {
|
||||
return &Identifier{posValue{p: p, Val: name}}
|
||||
}
|
||||
|
||||
// Pos returns the starting position of the node.
|
||||
func (i *Identifier) Pos() Pos { return i.p }
|
||||
|
||||
// String returns the textual representation of a node.
|
||||
func (i *Identifier) String() string {
|
||||
return fmt.Sprintf("%s: %T{Val: %q}", i.p, i, i.Val)
|
||||
}
|
||||
|
||||
// StringLit represents a string literal.
|
||||
type StringLit struct {
|
||||
posValue
|
||||
}
|
||||
|
||||
// NewStringLit creates a new string literal at the specified position and
|
||||
// with the specified value.
|
||||
func NewStringLit(p Pos, val string) *StringLit {
|
||||
return &StringLit{posValue{p: p, Val: val}}
|
||||
}
|
||||
|
||||
// Pos returns the starting position of the node.
|
||||
func (s *StringLit) Pos() Pos { return s.p }
|
||||
|
||||
// String returns the textual representation of a node.
|
||||
func (s *StringLit) String() string {
|
||||
return fmt.Sprintf("%s: %T{Val: %q}", s.p, s, s.Val)
|
||||
}
|
||||
|
||||
type posValue struct {
|
||||
p Pos
|
||||
Val string
|
||||
}
|
||||
+469
@@ -0,0 +1,469 @@
|
||||
package ast
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type grammarOptimizer struct {
|
||||
rule string
|
||||
protectedRules map[string]struct{}
|
||||
rules map[string]*Rule
|
||||
ruleUsesRules map[string]map[string]struct{}
|
||||
ruleUsedByRules map[string]map[string]struct{}
|
||||
visitor func(expr Expression) Visitor
|
||||
optimized bool
|
||||
}
|
||||
|
||||
func newGrammarOptimizer(protectedRules []string) *grammarOptimizer {
|
||||
pr := make(map[string]struct{}, len(protectedRules))
|
||||
for _, nm := range protectedRules {
|
||||
pr[nm] = struct{}{}
|
||||
}
|
||||
|
||||
r := grammarOptimizer{
|
||||
protectedRules: pr,
|
||||
rules: make(map[string]*Rule),
|
||||
ruleUsesRules: make(map[string]map[string]struct{}),
|
||||
ruleUsedByRules: make(map[string]map[string]struct{}),
|
||||
}
|
||||
r.visitor = r.init
|
||||
return &r
|
||||
}
|
||||
|
||||
// Visit is a generic Visitor to be used with Walk
|
||||
// The actual function, which should be used during Walk
|
||||
// is held in ruleRefOptimizer.visitor
|
||||
func (r *grammarOptimizer) Visit(expr Expression) Visitor {
|
||||
return r.visitor(expr)
|
||||
}
|
||||
|
||||
// init is a Visitor, which is used with the Walk function
|
||||
// The purpose of this function is to initialize the reference
|
||||
// maps rules, ruleUsesRules and ruleUsedByRules.
|
||||
func (r *grammarOptimizer) init(expr Expression) Visitor {
|
||||
switch expr := expr.(type) {
|
||||
case *Rule:
|
||||
// Keep track of current rule, which is processed
|
||||
r.rule = expr.Name.Val
|
||||
r.rules[expr.Name.Val] = expr
|
||||
case *RuleRefExpr:
|
||||
// Fill ruleUsesRules and ruleUsedByRules for every RuleRefExpr
|
||||
set(r.ruleUsesRules, r.rule, expr.Name.Val)
|
||||
set(r.ruleUsedByRules, expr.Name.Val, r.rule)
|
||||
}
|
||||
return r
|
||||
}
|
||||
|
||||
// Add element to map of maps, initialize the inner map
|
||||
// if necessary.
|
||||
func set(m map[string]map[string]struct{}, src, dst string) {
|
||||
if _, ok := m[src]; !ok {
|
||||
m[src] = make(map[string]struct{})
|
||||
}
|
||||
m[src][dst] = struct{}{}
|
||||
}
|
||||
|
||||
// optimize is a Visitor, which is used with the Walk function
|
||||
// The purpose of this function is to perform the actual optimizations.
|
||||
// See Optimize for a detailed list of the performed optimizations.
|
||||
func (r *grammarOptimizer) optimize(expr0 Expression) Visitor {
|
||||
switch expr := expr0.(type) {
|
||||
case *ActionExpr:
|
||||
expr.Expr = r.optimizeRule(expr.Expr)
|
||||
case *AndExpr:
|
||||
expr.Expr = r.optimizeRule(expr.Expr)
|
||||
case *ChoiceExpr:
|
||||
expr.Alternatives = r.optimizeRules(expr.Alternatives)
|
||||
|
||||
// Optimize choice nested in choice
|
||||
for i := 0; i < len(expr.Alternatives); i++ {
|
||||
if choice, ok := expr.Alternatives[i].(*ChoiceExpr); ok {
|
||||
r.optimized = true
|
||||
if i+1 < len(expr.Alternatives) {
|
||||
expr.Alternatives = append(expr.Alternatives[:i], append(choice.Alternatives, expr.Alternatives[i+1:]...)...)
|
||||
} else {
|
||||
expr.Alternatives = append(expr.Alternatives[:i], choice.Alternatives...)
|
||||
}
|
||||
}
|
||||
|
||||
// Combine sequence of single char LitMatcher to CharClassMatcher
|
||||
if i > 0 {
|
||||
l0, lok0 := expr.Alternatives[i-1].(*LitMatcher)
|
||||
l1, lok1 := expr.Alternatives[i].(*LitMatcher)
|
||||
c0, cok0 := expr.Alternatives[i-1].(*CharClassMatcher)
|
||||
c1, cok1 := expr.Alternatives[i].(*CharClassMatcher)
|
||||
|
||||
combined := false
|
||||
|
||||
switch {
|
||||
// Combine two LitMatcher to CharClassMatcher
|
||||
// "a" / "b" => [ab]
|
||||
case lok0 && lok1 && len([]rune(l0.Val)) == 1 && len([]rune(l1.Val)) == 1 && l0.IgnoreCase == l1.IgnoreCase:
|
||||
combined = true
|
||||
cm := CharClassMatcher{
|
||||
Chars: append([]rune(l0.Val), []rune(l1.Val)...),
|
||||
IgnoreCase: l0.IgnoreCase,
|
||||
posValue: l0.posValue,
|
||||
}
|
||||
expr.Alternatives[i-1] = &cm
|
||||
|
||||
// Combine LitMatcher with CharClassMatcher
|
||||
// "a" / [bc] => [abc]
|
||||
case lok0 && cok1 && len([]rune(l0.Val)) == 1 && l0.IgnoreCase == c1.IgnoreCase && !c1.Inverted:
|
||||
combined = true
|
||||
c1.Chars = append(c1.Chars, []rune(l0.Val)...)
|
||||
expr.Alternatives[i-1] = c1
|
||||
|
||||
// Combine CharClassMatcher with LitMatcher
|
||||
// [ab] / "c" => [abc]
|
||||
case cok0 && lok1 && len([]rune(l1.Val)) == 1 && c0.IgnoreCase == l1.IgnoreCase && !c0.Inverted:
|
||||
combined = true
|
||||
c0.Chars = append(c0.Chars, []rune(l1.Val)...)
|
||||
|
||||
// Combine CharClassMatcher with CharClassMatcher
|
||||
// [ab] / [cd] => [abcd]
|
||||
case cok0 && cok1 && c0.IgnoreCase == c1.IgnoreCase && c0.Inverted == c1.Inverted:
|
||||
combined = true
|
||||
c0.Chars = append(c0.Chars, c1.Chars...)
|
||||
c0.Ranges = append(c0.Ranges, c1.Ranges...)
|
||||
c0.UnicodeClasses = append(c0.UnicodeClasses, c1.UnicodeClasses...)
|
||||
}
|
||||
|
||||
// If one of the optimizations was applied, remove the second element from Alternatives
|
||||
if combined {
|
||||
r.optimized = true
|
||||
if i+1 < len(expr.Alternatives) {
|
||||
expr.Alternatives = append(expr.Alternatives[:i], expr.Alternatives[i+1:]...)
|
||||
} else {
|
||||
expr.Alternatives = expr.Alternatives[:i]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
case *Grammar:
|
||||
// Reset optimized at the start of each Walk.
|
||||
r.optimized = false
|
||||
for i := 0; i < len(expr.Rules); i++ {
|
||||
rule := expr.Rules[i]
|
||||
// Remove Rule, if it is no longer used by any other Rule and it is not the first Rule.
|
||||
_, used := r.ruleUsedByRules[rule.Name.Val]
|
||||
_, protected := r.protectedRules[rule.Name.Val]
|
||||
if !used && !protected {
|
||||
expr.Rules = append(expr.Rules[:i], expr.Rules[i+1:]...)
|
||||
// Compensate for the removed item
|
||||
i--
|
||||
|
||||
for k, v := range r.ruleUsedByRules {
|
||||
for kk := range v {
|
||||
if kk == rule.Name.Val {
|
||||
delete(r.ruleUsedByRules[k], kk)
|
||||
if len(r.ruleUsedByRules[k]) == 0 {
|
||||
delete(r.ruleUsedByRules, k)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
r.optimized = true
|
||||
continue
|
||||
}
|
||||
}
|
||||
case *LabeledExpr:
|
||||
expr.Expr = r.optimizeRule(expr.Expr)
|
||||
case *NotExpr:
|
||||
expr.Expr = r.optimizeRule(expr.Expr)
|
||||
case *OneOrMoreExpr:
|
||||
expr.Expr = r.optimizeRule(expr.Expr)
|
||||
case *Rule:
|
||||
r.rule = expr.Name.Val
|
||||
expr.Expr = r.optimizeRule(expr.Expr)
|
||||
case *SeqExpr:
|
||||
expr.Exprs = r.optimizeRules(expr.Exprs)
|
||||
|
||||
for i := 0; i < len(expr.Exprs); i++ {
|
||||
// Optimize nested sequences
|
||||
if seq, ok := expr.Exprs[i].(*SeqExpr); ok {
|
||||
r.optimized = true
|
||||
if i+1 < len(expr.Exprs) {
|
||||
expr.Exprs = append(expr.Exprs[:i], append(seq.Exprs, expr.Exprs[i+1:]...)...)
|
||||
} else {
|
||||
expr.Exprs = append(expr.Exprs[:i], seq.Exprs...)
|
||||
}
|
||||
}
|
||||
|
||||
// Combine sequence of LitMatcher
|
||||
if i > 0 {
|
||||
l0, ok0 := expr.Exprs[i-1].(*LitMatcher)
|
||||
l1, ok1 := expr.Exprs[i].(*LitMatcher)
|
||||
if ok0 && ok1 && l0.IgnoreCase == l1.IgnoreCase {
|
||||
r.optimized = true
|
||||
l0.Val += l1.Val
|
||||
expr.Exprs[i-1] = l0
|
||||
if i+1 < len(expr.Exprs) {
|
||||
expr.Exprs = append(expr.Exprs[:i], expr.Exprs[i+1:]...)
|
||||
} else {
|
||||
expr.Exprs = expr.Exprs[:i]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
case *ZeroOrMoreExpr:
|
||||
expr.Expr = r.optimizeRule(expr.Expr)
|
||||
case *ZeroOrOneExpr:
|
||||
expr.Expr = r.optimizeRule(expr.Expr)
|
||||
}
|
||||
return r
|
||||
}
|
||||
|
||||
func (r *grammarOptimizer) optimizeRules(exprs []Expression) []Expression {
|
||||
for i := 0; i < len(exprs); i++ {
|
||||
exprs[i] = r.optimizeRule(exprs[i])
|
||||
}
|
||||
return exprs
|
||||
}
|
||||
|
||||
func (r *grammarOptimizer) optimizeRule(expr Expression) Expression {
|
||||
// Optimize RuleRefExpr
|
||||
if ruleRef, ok := expr.(*RuleRefExpr); ok {
|
||||
if _, ok := r.ruleUsesRules[ruleRef.Name.Val]; !ok {
|
||||
r.optimized = true
|
||||
delete(r.ruleUsedByRules[ruleRef.Name.Val], r.rule)
|
||||
if len(r.ruleUsedByRules[ruleRef.Name.Val]) == 0 {
|
||||
delete(r.ruleUsedByRules, ruleRef.Name.Val)
|
||||
}
|
||||
delete(r.ruleUsesRules[r.rule], ruleRef.Name.Val)
|
||||
if len(r.ruleUsesRules[r.rule]) == 0 {
|
||||
delete(r.ruleUsesRules, r.rule)
|
||||
}
|
||||
// TODO: Check if reference exists, otherwise raise an error, which reference is missing!
|
||||
return cloneExpr(r.rules[ruleRef.Name.Val].Expr)
|
||||
}
|
||||
}
|
||||
|
||||
// Remove Choices with only one Alternative left
|
||||
if choice, ok := expr.(*ChoiceExpr); ok {
|
||||
if len(choice.Alternatives) == 1 {
|
||||
r.optimized = true
|
||||
return choice.Alternatives[0]
|
||||
}
|
||||
}
|
||||
|
||||
// Remove Sequence with only one Expression
|
||||
if seq, ok := expr.(*SeqExpr); ok {
|
||||
if len(seq.Exprs) == 1 {
|
||||
r.optimized = true
|
||||
return seq.Exprs[0]
|
||||
}
|
||||
}
|
||||
|
||||
return expr
|
||||
}
|
||||
|
||||
// cloneExpr takes an Expression and deep clones it (including all children)
|
||||
// This is necessary because referenced Rules are denormalized and therefore
|
||||
// have to become independent from their original Expression
|
||||
func cloneExpr(expr Expression) Expression {
|
||||
switch expr := expr.(type) {
|
||||
case *ActionExpr:
|
||||
return &ActionExpr{
|
||||
Code: expr.Code,
|
||||
Expr: cloneExpr(expr.Expr),
|
||||
FuncIx: expr.FuncIx,
|
||||
p: expr.p,
|
||||
}
|
||||
case *AndExpr:
|
||||
return &AndExpr{
|
||||
Expr: cloneExpr(expr.Expr),
|
||||
p: expr.p,
|
||||
}
|
||||
case *AndCodeExpr:
|
||||
return &AndCodeExpr{
|
||||
Code: expr.Code,
|
||||
FuncIx: expr.FuncIx,
|
||||
p: expr.p,
|
||||
}
|
||||
case *CharClassMatcher:
|
||||
return &CharClassMatcher{
|
||||
Chars: append([]rune{}, expr.Chars...),
|
||||
IgnoreCase: expr.IgnoreCase,
|
||||
Inverted: expr.Inverted,
|
||||
posValue: expr.posValue,
|
||||
Ranges: append([]rune{}, expr.Ranges...),
|
||||
UnicodeClasses: append([]string{}, expr.UnicodeClasses...),
|
||||
}
|
||||
case *ChoiceExpr:
|
||||
alts := make([]Expression, 0, len(expr.Alternatives))
|
||||
for i := 0; i < len(expr.Alternatives); i++ {
|
||||
alts = append(alts, cloneExpr(expr.Alternatives[i]))
|
||||
}
|
||||
return &ChoiceExpr{
|
||||
Alternatives: alts,
|
||||
p: expr.p,
|
||||
}
|
||||
case *LabeledExpr:
|
||||
return &LabeledExpr{
|
||||
Expr: cloneExpr(expr.Expr),
|
||||
Label: expr.Label,
|
||||
p: expr.p,
|
||||
}
|
||||
case *NotExpr:
|
||||
return &NotExpr{
|
||||
Expr: cloneExpr(expr.Expr),
|
||||
p: expr.p,
|
||||
}
|
||||
case *NotCodeExpr:
|
||||
return &NotCodeExpr{
|
||||
Code: expr.Code,
|
||||
FuncIx: expr.FuncIx,
|
||||
p: expr.p,
|
||||
}
|
||||
case *OneOrMoreExpr:
|
||||
return &OneOrMoreExpr{
|
||||
Expr: cloneExpr(expr.Expr),
|
||||
p: expr.p,
|
||||
}
|
||||
case *SeqExpr:
|
||||
exprs := make([]Expression, 0, len(expr.Exprs))
|
||||
for i := 0; i < len(expr.Exprs); i++ {
|
||||
exprs = append(exprs, cloneExpr(expr.Exprs[i]))
|
||||
}
|
||||
return &SeqExpr{
|
||||
Exprs: exprs,
|
||||
p: expr.p,
|
||||
}
|
||||
case *StateCodeExpr:
|
||||
return &StateCodeExpr{
|
||||
p: expr.p,
|
||||
Code: expr.Code,
|
||||
FuncIx: expr.FuncIx,
|
||||
}
|
||||
case *ZeroOrMoreExpr:
|
||||
return &ZeroOrMoreExpr{
|
||||
Expr: cloneExpr(expr.Expr),
|
||||
p: expr.p,
|
||||
}
|
||||
case *ZeroOrOneExpr:
|
||||
return &ZeroOrOneExpr{
|
||||
Expr: cloneExpr(expr.Expr),
|
||||
p: expr.p,
|
||||
}
|
||||
}
|
||||
return expr
|
||||
}
|
||||
|
||||
// cleanupCharClassMatcher is a Visitor, which is used with the Walk function
|
||||
// The purpose of this function is to cleanup the redundancies created by the
|
||||
// optimize Visitor. This includes to remove redundant entries in Chars, Ranges
|
||||
// and UnicodeClasses of the given CharClassMatcher as well as regenerating the
|
||||
// correct content for the Val field (string representation of the CharClassMatcher)
|
||||
func (r *grammarOptimizer) cleanupCharClassMatcher(expr0 Expression) Visitor {
|
||||
// We are only interested in nodes of type *CharClassMatcher
|
||||
if chr, ok := expr0.(*CharClassMatcher); ok {
|
||||
// Remove redundancies in Chars
|
||||
chars := make([]rune, 0, len(chr.Chars))
|
||||
charsMap := make(map[rune]struct{})
|
||||
for _, c := range chr.Chars {
|
||||
if _, ok := charsMap[c]; !ok {
|
||||
charsMap[c] = struct{}{}
|
||||
chars = append(chars, c)
|
||||
}
|
||||
}
|
||||
if len(chars) > 0 {
|
||||
chr.Chars = chars
|
||||
} else {
|
||||
chr.Chars = nil
|
||||
}
|
||||
|
||||
// Remove redundancies in Ranges
|
||||
ranges := make([]rune, 0, len(chr.Ranges))
|
||||
rangesMap := make(map[string]struct{})
|
||||
for i := 0; i < len(chr.Ranges); i += 2 {
|
||||
rangeKey := string(chr.Ranges[i]) + "-" + string(chr.Ranges[i+1])
|
||||
if _, ok := rangesMap[rangeKey]; !ok {
|
||||
rangesMap[rangeKey] = struct{}{}
|
||||
ranges = append(ranges, chr.Ranges[i], chr.Ranges[i+1])
|
||||
}
|
||||
}
|
||||
if len(ranges) > 0 {
|
||||
chr.Ranges = ranges
|
||||
} else {
|
||||
chr.Ranges = nil
|
||||
}
|
||||
|
||||
// Remove redundancies in UnicodeClasses
|
||||
unicodeClasses := make([]string, 0, len(chr.UnicodeClasses))
|
||||
unicodeClassesMap := make(map[string]struct{})
|
||||
for _, u := range chr.UnicodeClasses {
|
||||
if _, ok := unicodeClassesMap[u]; !ok {
|
||||
unicodeClassesMap[u] = struct{}{}
|
||||
unicodeClasses = append(unicodeClasses, u)
|
||||
}
|
||||
}
|
||||
if len(unicodeClasses) > 0 {
|
||||
chr.UnicodeClasses = unicodeClasses
|
||||
} else {
|
||||
chr.UnicodeClasses = nil
|
||||
}
|
||||
|
||||
// Regenerate the content for Val
|
||||
var val bytes.Buffer
|
||||
val.WriteString("[")
|
||||
if chr.Inverted {
|
||||
val.WriteString("^")
|
||||
}
|
||||
for _, c := range chr.Chars {
|
||||
val.WriteString(escapeRune(c))
|
||||
}
|
||||
for i := 0; i < len(chr.Ranges); i += 2 {
|
||||
val.WriteString(escapeRune(chr.Ranges[i]))
|
||||
val.WriteString("-")
|
||||
val.WriteString(escapeRune(chr.Ranges[i+1]))
|
||||
}
|
||||
for _, u := range chr.UnicodeClasses {
|
||||
val.WriteString("\\p" + u)
|
||||
}
|
||||
val.WriteString("]")
|
||||
if chr.IgnoreCase {
|
||||
val.WriteString("i")
|
||||
}
|
||||
chr.posValue.Val = val.String()
|
||||
}
|
||||
return r
|
||||
}
|
||||
|
||||
func escapeRune(r rune) string {
|
||||
return strings.Trim(strconv.QuoteRune(r), `'`)
|
||||
}
|
||||
|
||||
// Optimize walks a given grammar and optimizes the grammar in regards
|
||||
// of parsing performance. This is done with several optimizations:
|
||||
// * removal of unreferenced rules
|
||||
// * replace rule references with a copy of the referenced Rule, if the
|
||||
// referenced rule it self has no references.
|
||||
// * resolve nested choice expressions
|
||||
// * resolve choice expressions with only one alternative
|
||||
// * resolve nested sequences expression
|
||||
// * resolve sequence expressions with only one element
|
||||
// * combine character class matcher and literal matcher, where possible
|
||||
func Optimize(g *Grammar, alternateEntrypoints ...string) {
|
||||
entrypoints := alternateEntrypoints
|
||||
if len(g.Rules) > 0 {
|
||||
entrypoints = append(entrypoints, g.Rules[0].Name.Val)
|
||||
}
|
||||
|
||||
r := newGrammarOptimizer(entrypoints)
|
||||
Walk(r, g)
|
||||
|
||||
r.visitor = r.optimize
|
||||
r.optimized = true
|
||||
for r.optimized {
|
||||
Walk(r, g)
|
||||
}
|
||||
|
||||
r.visitor = r.cleanupCharClassMatcher
|
||||
Walk(r, g)
|
||||
}
|
||||
+87
@@ -0,0 +1,87 @@
|
||||
package ast
|
||||
|
||||
import "fmt"
|
||||
|
||||
// A Visitor implements a Visit method, which is invoked for each Expression
|
||||
// encountered by Walk.
|
||||
// If the result visitor w is not nil, Walk visits each of the children
|
||||
// of Expression with the visitor w, followed by a call of w.Visit(nil).
|
||||
type Visitor interface {
|
||||
Visit(expr Expression) (w Visitor)
|
||||
}
|
||||
|
||||
// Walk traverses an AST in depth-first order: It starts by calling
|
||||
// v.Visit(expr); Expression must not be nil. If the visitor w returned by
|
||||
// v.Visit(expr) is not nil, Walk is invoked recursively with visitor
|
||||
// w for each of the non-nil children of Expression, followed by a call of
|
||||
// w.Visit(nil).
|
||||
//
|
||||
func Walk(v Visitor, expr Expression) {
|
||||
if v = v.Visit(expr); v == nil {
|
||||
return
|
||||
}
|
||||
|
||||
switch expr := expr.(type) {
|
||||
case *ActionExpr:
|
||||
Walk(v, expr.Expr)
|
||||
case *AndCodeExpr:
|
||||
// Nothing to do
|
||||
case *AndExpr:
|
||||
Walk(v, expr.Expr)
|
||||
case *AnyMatcher:
|
||||
// Nothing to do
|
||||
case *CharClassMatcher:
|
||||
// Nothing to do
|
||||
case *ChoiceExpr:
|
||||
for _, e := range expr.Alternatives {
|
||||
Walk(v, e)
|
||||
}
|
||||
case *Grammar:
|
||||
for _, e := range expr.Rules {
|
||||
Walk(v, e)
|
||||
}
|
||||
case *LabeledExpr:
|
||||
Walk(v, expr.Expr)
|
||||
case *LitMatcher:
|
||||
// Nothing to do
|
||||
case *NotCodeExpr:
|
||||
// Nothing to do
|
||||
case *NotExpr:
|
||||
Walk(v, expr.Expr)
|
||||
case *OneOrMoreExpr:
|
||||
Walk(v, expr.Expr)
|
||||
case *Rule:
|
||||
Walk(v, expr.Expr)
|
||||
case *RuleRefExpr:
|
||||
// Nothing to do
|
||||
case *SeqExpr:
|
||||
for _, e := range expr.Exprs {
|
||||
Walk(v, e)
|
||||
}
|
||||
case *StateCodeExpr:
|
||||
// Nothing to do
|
||||
case *ZeroOrMoreExpr:
|
||||
Walk(v, expr.Expr)
|
||||
case *ZeroOrOneExpr:
|
||||
Walk(v, expr.Expr)
|
||||
default:
|
||||
panic(fmt.Sprintf("unknown expression type %T", expr))
|
||||
}
|
||||
}
|
||||
|
||||
type inspector func(Expression) bool
|
||||
|
||||
func (f inspector) Visit(expr Expression) Visitor {
|
||||
if f(expr) {
|
||||
return f
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Inspect traverses an AST in depth-first order: It starts by calling
|
||||
// f(expr); expr must not be nil. If f returns true, Inspect invokes f
|
||||
// recursively for each of the non-nil children of expr, followed by a
|
||||
// call of f(nil).
|
||||
func Inspect(expr Expression, f func(Expression) bool) {
|
||||
Walk(inspector(f), expr)
|
||||
}
|
||||
+817
@@ -0,0 +1,817 @@
|
||||
// Package builder generates the parser code for a given grammar. It makes
|
||||
// no attempt to verify the correctness of the grammar.
|
||||
package builder
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"strconv"
|
||||
"strings"
|
||||
"text/template"
|
||||
"unicode"
|
||||
|
||||
"regexp"
|
||||
|
||||
"github.com/mna/pigeon/ast"
|
||||
)
|
||||
|
||||
const codeGeneratedComment = "// Code generated by pigeon; DO NOT EDIT.\n\n"
|
||||
|
||||
// generated function templates
|
||||
var (
|
||||
onFuncTemplate = `func (%s *current) %s(%s) (interface{}, error) {
|
||||
%s
|
||||
}
|
||||
`
|
||||
onPredFuncTemplate = `func (%s *current) %s(%s) (bool, error) {
|
||||
%s
|
||||
}
|
||||
`
|
||||
onStateFuncTemplate = `func (%s *current) %s(%s) (error) {
|
||||
%s
|
||||
}
|
||||
`
|
||||
callFuncTemplate = `func (p *parser) call%s() (interface{}, error) {
|
||||
stack := p.vstack[len(p.vstack)-1]
|
||||
_ = stack
|
||||
return p.cur.%[1]s(%s)
|
||||
}
|
||||
`
|
||||
callPredFuncTemplate = `func (p *parser) call%s() (bool, error) {
|
||||
stack := p.vstack[len(p.vstack)-1]
|
||||
_ = stack
|
||||
return p.cur.%[1]s(%s)
|
||||
}
|
||||
`
|
||||
callStateFuncTemplate = `func (p *parser) call%s() error {
|
||||
stack := p.vstack[len(p.vstack)-1]
|
||||
_ = stack
|
||||
return p.cur.%[1]s(%s)
|
||||
}
|
||||
`
|
||||
)
|
||||
|
||||
// Option is a function that can set an option on the builder. It returns
|
||||
// the previous setting as an Option.
|
||||
type Option func(*builder) Option
|
||||
|
||||
// ReceiverName returns an option that specifies the receiver name to
|
||||
// use for the current struct (which is the struct on which all code blocks
|
||||
// except the initializer are generated).
|
||||
func ReceiverName(nm string) Option {
|
||||
return func(b *builder) Option {
|
||||
prev := b.recvName
|
||||
b.recvName = nm
|
||||
return ReceiverName(prev)
|
||||
}
|
||||
}
|
||||
|
||||
// Optimize returns an option that specifies the optimize option
|
||||
// If optimize is true, the Debug and Memoize code is completely
|
||||
// removed from the resulting parser
|
||||
func Optimize(optimize bool) Option {
|
||||
return func(b *builder) Option {
|
||||
prev := b.optimize
|
||||
b.optimize = optimize
|
||||
return Optimize(prev)
|
||||
}
|
||||
}
|
||||
|
||||
// Nolint returns an option that specifies the nolint option
|
||||
// If nolint is true, special '// nolint: ...' comments are added
|
||||
// to the generated parser to suppress warnings by gometalinter.
|
||||
func Nolint(nolint bool) Option {
|
||||
return func(b *builder) Option {
|
||||
prev := b.nolint
|
||||
b.nolint = nolint
|
||||
return Optimize(prev)
|
||||
}
|
||||
}
|
||||
|
||||
// BasicLatinLookupTable returns an option that specifies the basicLatinLookup option
|
||||
// If basicLatinLookup is true, a lookup slice for the first 128 chars of
|
||||
// the Unicode table (Basic Latin) is generated for each CharClassMatcher
|
||||
// to increase the character matching.
|
||||
func BasicLatinLookupTable(basicLatinLookupTable bool) Option {
|
||||
return func(b *builder) Option {
|
||||
prev := b.basicLatinLookupTable
|
||||
b.basicLatinLookupTable = basicLatinLookupTable
|
||||
return BasicLatinLookupTable(prev)
|
||||
}
|
||||
}
|
||||
|
||||
// BuildParser builds the PEG parser using the provider grammar. The code is
|
||||
// written to the specified w.
|
||||
func BuildParser(w io.Writer, g *ast.Grammar, opts ...Option) error {
|
||||
b := &builder{w: w, recvName: "c"}
|
||||
b.setOptions(opts)
|
||||
return b.buildParser(g)
|
||||
}
|
||||
|
||||
type builder struct {
|
||||
w io.Writer
|
||||
err error
|
||||
|
||||
// options
|
||||
recvName string
|
||||
optimize bool
|
||||
basicLatinLookupTable bool
|
||||
globalState bool
|
||||
nolint bool
|
||||
|
||||
ruleName string
|
||||
exprIndex int
|
||||
argsStack [][]string
|
||||
|
||||
rangeTable bool
|
||||
}
|
||||
|
||||
func (b *builder) setOptions(opts []Option) {
|
||||
for _, opt := range opts {
|
||||
opt(b)
|
||||
}
|
||||
}
|
||||
|
||||
func (b *builder) buildParser(g *ast.Grammar) error {
|
||||
b.writeInit(g.Init)
|
||||
b.writeGrammar(g)
|
||||
|
||||
for _, rule := range g.Rules {
|
||||
b.writeRuleCode(rule)
|
||||
}
|
||||
b.writeStaticCode()
|
||||
|
||||
return b.err
|
||||
}
|
||||
|
||||
func (b *builder) writeInit(init *ast.CodeBlock) {
|
||||
if init == nil {
|
||||
return
|
||||
}
|
||||
|
||||
// remove opening and closing braces
|
||||
val := codeGeneratedComment + init.Val[1:len(init.Val)-1]
|
||||
b.writelnf("%s", val)
|
||||
}
|
||||
|
||||
func (b *builder) writeGrammar(g *ast.Grammar) {
|
||||
// transform the ast grammar to the self-contained, no dependency version
|
||||
// of the parser-generator grammar.
|
||||
b.writelnf("var g = &grammar {")
|
||||
b.writelnf("\trules: []*rule{")
|
||||
for _, r := range g.Rules {
|
||||
b.writeRule(r)
|
||||
}
|
||||
b.writelnf("\t},")
|
||||
b.writelnf("}")
|
||||
}
|
||||
|
||||
func (b *builder) writeRule(r *ast.Rule) {
|
||||
if r == nil || r.Name == nil {
|
||||
return
|
||||
}
|
||||
|
||||
b.exprIndex = 0
|
||||
b.ruleName = r.Name.Val
|
||||
|
||||
b.writelnf("{")
|
||||
b.writelnf("\tname: %q,", r.Name.Val)
|
||||
if r.DisplayName != nil && r.DisplayName.Val != "" {
|
||||
b.writelnf("\tdisplayName: %q,", r.DisplayName.Val)
|
||||
}
|
||||
pos := r.Pos()
|
||||
b.writelnf("\tpos: position{line: %d, col: %d, offset: %d},", pos.Line, pos.Col, pos.Off)
|
||||
b.writef("\texpr: ")
|
||||
b.writeExpr(r.Expr)
|
||||
b.writelnf("},")
|
||||
}
|
||||
|
||||
func (b *builder) writeExpr(expr ast.Expression) {
|
||||
b.exprIndex++
|
||||
switch expr := expr.(type) {
|
||||
case *ast.ActionExpr:
|
||||
b.writeActionExpr(expr)
|
||||
case *ast.AndCodeExpr:
|
||||
b.writeAndCodeExpr(expr)
|
||||
case *ast.AndExpr:
|
||||
b.writeAndExpr(expr)
|
||||
case *ast.AnyMatcher:
|
||||
b.writeAnyMatcher(expr)
|
||||
case *ast.CharClassMatcher:
|
||||
b.writeCharClassMatcher(expr)
|
||||
case *ast.ChoiceExpr:
|
||||
b.writeChoiceExpr(expr)
|
||||
case *ast.LabeledExpr:
|
||||
b.writeLabeledExpr(expr)
|
||||
case *ast.LitMatcher:
|
||||
b.writeLitMatcher(expr)
|
||||
case *ast.NotCodeExpr:
|
||||
b.writeNotCodeExpr(expr)
|
||||
case *ast.NotExpr:
|
||||
b.writeNotExpr(expr)
|
||||
case *ast.OneOrMoreExpr:
|
||||
b.writeOneOrMoreExpr(expr)
|
||||
case *ast.RecoveryExpr:
|
||||
b.writeRecoveryExpr(expr)
|
||||
case *ast.RuleRefExpr:
|
||||
b.writeRuleRefExpr(expr)
|
||||
case *ast.SeqExpr:
|
||||
b.writeSeqExpr(expr)
|
||||
case *ast.StateCodeExpr:
|
||||
b.writeStateCodeExpr(expr)
|
||||
case *ast.ThrowExpr:
|
||||
b.writeThrowExpr(expr)
|
||||
case *ast.ZeroOrMoreExpr:
|
||||
b.writeZeroOrMoreExpr(expr)
|
||||
case *ast.ZeroOrOneExpr:
|
||||
b.writeZeroOrOneExpr(expr)
|
||||
default:
|
||||
b.err = fmt.Errorf("builder: unknown expression type %T", expr)
|
||||
}
|
||||
}
|
||||
|
||||
func (b *builder) writeActionExpr(act *ast.ActionExpr) {
|
||||
if act == nil {
|
||||
b.writelnf("nil,")
|
||||
return
|
||||
}
|
||||
if act.FuncIx == 0 {
|
||||
act.FuncIx = b.exprIndex
|
||||
}
|
||||
b.writelnf("&actionExpr{")
|
||||
pos := act.Pos()
|
||||
b.writelnf("\tpos: position{line: %d, col: %d, offset: %d},", pos.Line, pos.Col, pos.Off)
|
||||
b.writelnf("\trun: (*parser).call%s,", b.funcName(act.FuncIx))
|
||||
b.writef("\texpr: ")
|
||||
b.writeExpr(act.Expr)
|
||||
b.writelnf("},")
|
||||
}
|
||||
|
||||
func (b *builder) writeAndCodeExpr(and *ast.AndCodeExpr) {
|
||||
if and == nil {
|
||||
b.writelnf("nil,")
|
||||
return
|
||||
}
|
||||
b.writelnf("&andCodeExpr{")
|
||||
pos := and.Pos()
|
||||
if and.FuncIx == 0 {
|
||||
and.FuncIx = b.exprIndex
|
||||
}
|
||||
b.writelnf("\tpos: position{line: %d, col: %d, offset: %d},", pos.Line, pos.Col, pos.Off)
|
||||
b.writelnf("\trun: (*parser).call%s,", b.funcName(and.FuncIx))
|
||||
b.writelnf("},")
|
||||
}
|
||||
|
||||
func (b *builder) writeAndExpr(and *ast.AndExpr) {
|
||||
if and == nil {
|
||||
b.writelnf("nil,")
|
||||
return
|
||||
}
|
||||
b.writelnf("&andExpr{")
|
||||
pos := and.Pos()
|
||||
b.writelnf("\tpos: position{line: %d, col: %d, offset: %d},", pos.Line, pos.Col, pos.Off)
|
||||
b.writef("\texpr: ")
|
||||
b.writeExpr(and.Expr)
|
||||
b.writelnf("},")
|
||||
}
|
||||
|
||||
func (b *builder) writeAnyMatcher(any *ast.AnyMatcher) {
|
||||
if any == nil {
|
||||
b.writelnf("nil,")
|
||||
return
|
||||
}
|
||||
b.writelnf("&anyMatcher{")
|
||||
pos := any.Pos()
|
||||
b.writelnf("\tline: %d, col: %d, offset: %d,", pos.Line, pos.Col, pos.Off)
|
||||
b.writelnf("},")
|
||||
}
|
||||
|
||||
func (b *builder) writeCharClassMatcher(ch *ast.CharClassMatcher) {
|
||||
if ch == nil {
|
||||
b.writelnf("nil,")
|
||||
return
|
||||
}
|
||||
b.writelnf("&charClassMatcher{")
|
||||
pos := ch.Pos()
|
||||
b.writelnf("\tpos: position{line: %d, col: %d, offset: %d},", pos.Line, pos.Col, pos.Off)
|
||||
b.writelnf("\tval: %q,", ch.Val)
|
||||
if len(ch.Chars) > 0 {
|
||||
b.writef("\tchars: []rune{")
|
||||
for _, rn := range ch.Chars {
|
||||
if ch.IgnoreCase {
|
||||
b.writef("%q,", unicode.ToLower(rn))
|
||||
} else {
|
||||
b.writef("%q,", rn)
|
||||
}
|
||||
}
|
||||
b.writelnf("},")
|
||||
}
|
||||
if len(ch.Ranges) > 0 {
|
||||
b.writef("\tranges: []rune{")
|
||||
for _, rn := range ch.Ranges {
|
||||
if ch.IgnoreCase {
|
||||
b.writef("%q,", unicode.ToLower(rn))
|
||||
} else {
|
||||
b.writef("%q,", rn)
|
||||
}
|
||||
}
|
||||
b.writelnf("},")
|
||||
}
|
||||
if len(ch.UnicodeClasses) > 0 {
|
||||
b.rangeTable = true
|
||||
b.writef("\tclasses: []*unicode.RangeTable{")
|
||||
for _, cl := range ch.UnicodeClasses {
|
||||
b.writef("rangeTable(%q),", cl)
|
||||
}
|
||||
b.writelnf("},")
|
||||
}
|
||||
if b.basicLatinLookupTable {
|
||||
b.writelnf("\tbasicLatinChars: %#v,", BasicLatinLookup(ch.Chars, ch.Ranges, ch.UnicodeClasses, ch.IgnoreCase))
|
||||
}
|
||||
b.writelnf("\tignoreCase: %t,", ch.IgnoreCase)
|
||||
b.writelnf("\tinverted: %t,", ch.Inverted)
|
||||
b.writelnf("},")
|
||||
}
|
||||
|
||||
// BasicLatinLookup calculates the decision results for the first 256 characters of the UTF-8 character
|
||||
// set for a given set of chars, ranges and unicodeClasses to speedup the CharClassMatcher.
|
||||
func BasicLatinLookup(chars, ranges []rune, unicodeClasses []string, ignoreCase bool) (basicLatinChars [128]bool) {
|
||||
for _, rn := range chars {
|
||||
if rn < 128 {
|
||||
basicLatinChars[rn] = true
|
||||
if ignoreCase {
|
||||
if unicode.IsLower(rn) {
|
||||
basicLatinChars[unicode.ToUpper(rn)] = true
|
||||
} else {
|
||||
basicLatinChars[unicode.ToLower(rn)] = true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
for i := 0; i < len(ranges); i += 2 {
|
||||
if ranges[i] < 128 {
|
||||
for j := ranges[i]; j < 128 && j <= ranges[i+1]; j++ {
|
||||
basicLatinChars[j] = true
|
||||
if ignoreCase {
|
||||
if unicode.IsLower(j) {
|
||||
basicLatinChars[unicode.ToUpper(j)] = true
|
||||
} else {
|
||||
basicLatinChars[unicode.ToLower(j)] = true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
for _, cl := range unicodeClasses {
|
||||
rt := rangeTable(cl)
|
||||
for r := rune(0); r < 128; r++ {
|
||||
if unicode.Is(rt, r) {
|
||||
basicLatinChars[r] = true
|
||||
}
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (b *builder) writeChoiceExpr(ch *ast.ChoiceExpr) {
|
||||
if ch == nil {
|
||||
b.writelnf("nil,")
|
||||
return
|
||||
}
|
||||
b.writelnf("&choiceExpr{")
|
||||
pos := ch.Pos()
|
||||
b.writelnf("\tpos: position{line: %d, col: %d, offset: %d},", pos.Line, pos.Col, pos.Off)
|
||||
if len(ch.Alternatives) > 0 {
|
||||
b.writelnf("\talternatives: []interface{}{")
|
||||
for _, alt := range ch.Alternatives {
|
||||
b.writeExpr(alt)
|
||||
}
|
||||
b.writelnf("\t},")
|
||||
}
|
||||
b.writelnf("},")
|
||||
}
|
||||
|
||||
func (b *builder) writeLabeledExpr(lab *ast.LabeledExpr) {
|
||||
if lab == nil {
|
||||
b.writelnf("nil,")
|
||||
return
|
||||
}
|
||||
b.writelnf("&labeledExpr{")
|
||||
pos := lab.Pos()
|
||||
b.writelnf("\tpos: position{line: %d, col: %d, offset: %d},", pos.Line, pos.Col, pos.Off)
|
||||
if lab.Label != nil && lab.Label.Val != "" {
|
||||
b.writelnf("\tlabel: %q,", lab.Label.Val)
|
||||
}
|
||||
b.writef("\texpr: ")
|
||||
b.writeExpr(lab.Expr)
|
||||
b.writelnf("},")
|
||||
}
|
||||
|
||||
func (b *builder) writeLitMatcher(lit *ast.LitMatcher) {
|
||||
if lit == nil {
|
||||
b.writelnf("nil,")
|
||||
return
|
||||
}
|
||||
b.writelnf("&litMatcher{")
|
||||
pos := lit.Pos()
|
||||
b.writelnf("\tpos: position{line: %d, col: %d, offset: %d},", pos.Line, pos.Col, pos.Off)
|
||||
if lit.IgnoreCase {
|
||||
b.writelnf("\tval: %q,", strings.ToLower(lit.Val))
|
||||
} else {
|
||||
b.writelnf("\tval: %q,", lit.Val)
|
||||
}
|
||||
b.writelnf("\tignoreCase: %t,", lit.IgnoreCase)
|
||||
ignoreCaseFlag := ""
|
||||
if lit.IgnoreCase {
|
||||
ignoreCaseFlag = "i"
|
||||
}
|
||||
b.writelnf("\twant: %q,", strconv.Quote(lit.Val)+ignoreCaseFlag)
|
||||
b.writelnf("},")
|
||||
}
|
||||
|
||||
func (b *builder) writeNotCodeExpr(not *ast.NotCodeExpr) {
|
||||
if not == nil {
|
||||
b.writelnf("nil,")
|
||||
return
|
||||
}
|
||||
b.writelnf("¬CodeExpr{")
|
||||
pos := not.Pos()
|
||||
if not.FuncIx == 0 {
|
||||
not.FuncIx = b.exprIndex
|
||||
}
|
||||
b.writelnf("\tpos: position{line: %d, col: %d, offset: %d},", pos.Line, pos.Col, pos.Off)
|
||||
b.writelnf("\trun: (*parser).call%s,", b.funcName(not.FuncIx))
|
||||
b.writelnf("},")
|
||||
}
|
||||
|
||||
func (b *builder) writeNotExpr(not *ast.NotExpr) {
|
||||
if not == nil {
|
||||
b.writelnf("nil,")
|
||||
return
|
||||
}
|
||||
b.writelnf("¬Expr{")
|
||||
pos := not.Pos()
|
||||
b.writelnf("\tpos: position{line: %d, col: %d, offset: %d},", pos.Line, pos.Col, pos.Off)
|
||||
b.writef("\texpr: ")
|
||||
b.writeExpr(not.Expr)
|
||||
b.writelnf("},")
|
||||
}
|
||||
|
||||
func (b *builder) writeOneOrMoreExpr(one *ast.OneOrMoreExpr) {
|
||||
if one == nil {
|
||||
b.writelnf("nil,")
|
||||
return
|
||||
}
|
||||
b.writelnf("&oneOrMoreExpr{")
|
||||
pos := one.Pos()
|
||||
b.writelnf("\tpos: position{line: %d, col: %d, offset: %d},", pos.Line, pos.Col, pos.Off)
|
||||
b.writef("\texpr: ")
|
||||
b.writeExpr(one.Expr)
|
||||
b.writelnf("},")
|
||||
}
|
||||
|
||||
func (b *builder) writeRecoveryExpr(recover *ast.RecoveryExpr) {
|
||||
if recover == nil {
|
||||
b.writelnf("nil,")
|
||||
return
|
||||
}
|
||||
b.writelnf("&recoveryExpr{")
|
||||
pos := recover.Pos()
|
||||
b.writelnf("\tpos: position{line: %d, col: %d, offset: %d},", pos.Line, pos.Col, pos.Off)
|
||||
|
||||
b.writef("\texpr: ")
|
||||
b.writeExpr(recover.Expr)
|
||||
b.writef("\trecoverExpr: ")
|
||||
b.writeExpr(recover.RecoverExpr)
|
||||
b.writelnf("\tfailureLabel: []string{")
|
||||
for _, label := range recover.Labels {
|
||||
b.writelnf("%q,", label)
|
||||
}
|
||||
b.writelnf("\t},")
|
||||
b.writelnf("},")
|
||||
}
|
||||
|
||||
func (b *builder) writeRuleRefExpr(ref *ast.RuleRefExpr) {
|
||||
if ref == nil {
|
||||
b.writelnf("nil,")
|
||||
return
|
||||
}
|
||||
b.writelnf("&ruleRefExpr{")
|
||||
pos := ref.Pos()
|
||||
b.writelnf("\tpos: position{line: %d, col: %d, offset: %d},", pos.Line, pos.Col, pos.Off)
|
||||
if ref.Name != nil && ref.Name.Val != "" {
|
||||
b.writelnf("\tname: %q,", ref.Name.Val)
|
||||
}
|
||||
b.writelnf("},")
|
||||
}
|
||||
|
||||
func (b *builder) writeSeqExpr(seq *ast.SeqExpr) {
|
||||
if seq == nil {
|
||||
b.writelnf("nil,")
|
||||
return
|
||||
}
|
||||
b.writelnf("&seqExpr{")
|
||||
pos := seq.Pos()
|
||||
b.writelnf("\tpos: position{line: %d, col: %d, offset: %d},", pos.Line, pos.Col, pos.Off)
|
||||
if len(seq.Exprs) > 0 {
|
||||
b.writelnf("\texprs: []interface{}{")
|
||||
for _, e := range seq.Exprs {
|
||||
b.writeExpr(e)
|
||||
}
|
||||
b.writelnf("\t},")
|
||||
}
|
||||
b.writelnf("},")
|
||||
}
|
||||
|
||||
func (b *builder) writeStateCodeExpr(state *ast.StateCodeExpr) {
|
||||
if state == nil {
|
||||
b.writelnf("nil,")
|
||||
return
|
||||
}
|
||||
b.globalState = true
|
||||
b.writelnf("&stateCodeExpr{")
|
||||
pos := state.Pos()
|
||||
if state.FuncIx == 0 {
|
||||
state.FuncIx = b.exprIndex
|
||||
}
|
||||
b.writelnf("\tpos: position{line: %d, col: %d, offset: %d},", pos.Line, pos.Col, pos.Off)
|
||||
b.writelnf("\trun: (*parser).call%s,", b.funcName(state.FuncIx))
|
||||
b.writelnf("},")
|
||||
}
|
||||
|
||||
func (b *builder) writeThrowExpr(throw *ast.ThrowExpr) {
|
||||
if throw == nil {
|
||||
b.writelnf("nil,")
|
||||
return
|
||||
}
|
||||
b.writelnf("&throwExpr{")
|
||||
pos := throw.Pos()
|
||||
b.writelnf("\tpos: position{line: %d, col: %d, offset: %d},", pos.Line, pos.Col, pos.Off)
|
||||
b.writelnf("\tlabel: %q,", throw.Label)
|
||||
b.writelnf("},")
|
||||
}
|
||||
|
||||
func (b *builder) writeZeroOrMoreExpr(zero *ast.ZeroOrMoreExpr) {
|
||||
if zero == nil {
|
||||
b.writelnf("nil,")
|
||||
return
|
||||
}
|
||||
b.writelnf("&zeroOrMoreExpr{")
|
||||
pos := zero.Pos()
|
||||
b.writelnf("\tpos: position{line: %d, col: %d, offset: %d},", pos.Line, pos.Col, pos.Off)
|
||||
b.writef("\texpr: ")
|
||||
b.writeExpr(zero.Expr)
|
||||
b.writelnf("},")
|
||||
}
|
||||
|
||||
func (b *builder) writeZeroOrOneExpr(zero *ast.ZeroOrOneExpr) {
|
||||
if zero == nil {
|
||||
b.writelnf("nil,")
|
||||
return
|
||||
}
|
||||
b.writelnf("&zeroOrOneExpr{")
|
||||
pos := zero.Pos()
|
||||
b.writelnf("\tpos: position{line: %d, col: %d, offset: %d},", pos.Line, pos.Col, pos.Off)
|
||||
b.writef("\texpr: ")
|
||||
b.writeExpr(zero.Expr)
|
||||
b.writelnf("},")
|
||||
}
|
||||
|
||||
func (b *builder) writeRuleCode(rule *ast.Rule) {
|
||||
if rule == nil || rule.Name == nil {
|
||||
return
|
||||
}
|
||||
|
||||
// keep trace of the current rule, as the code blocks are created
|
||||
// in functions named "on<RuleName><#ExprIndex>".
|
||||
b.ruleName = rule.Name.Val
|
||||
b.pushArgsSet()
|
||||
b.writeExprCode(rule.Expr)
|
||||
b.popArgsSet()
|
||||
}
|
||||
|
||||
func (b *builder) pushArgsSet() {
|
||||
b.argsStack = append(b.argsStack, nil)
|
||||
}
|
||||
|
||||
func (b *builder) popArgsSet() {
|
||||
b.argsStack = b.argsStack[:len(b.argsStack)-1]
|
||||
}
|
||||
|
||||
func (b *builder) addArg(arg *ast.Identifier) {
|
||||
if arg == nil {
|
||||
return
|
||||
}
|
||||
ix := len(b.argsStack) - 1
|
||||
b.argsStack[ix] = append(b.argsStack[ix], arg.Val)
|
||||
}
|
||||
|
||||
func (b *builder) writeExprCode(expr ast.Expression) {
|
||||
switch expr := expr.(type) {
|
||||
case *ast.ActionExpr:
|
||||
b.writeExprCode(expr.Expr)
|
||||
b.writeActionExprCode(expr)
|
||||
|
||||
case *ast.AndCodeExpr:
|
||||
b.writeAndCodeExprCode(expr)
|
||||
|
||||
case *ast.LabeledExpr:
|
||||
b.addArg(expr.Label)
|
||||
b.pushArgsSet()
|
||||
b.writeExprCode(expr.Expr)
|
||||
b.popArgsSet()
|
||||
|
||||
case *ast.NotCodeExpr:
|
||||
b.writeNotCodeExprCode(expr)
|
||||
|
||||
case *ast.AndExpr:
|
||||
b.pushArgsSet()
|
||||
b.writeExprCode(expr.Expr)
|
||||
b.popArgsSet()
|
||||
|
||||
case *ast.ChoiceExpr:
|
||||
for _, alt := range expr.Alternatives {
|
||||
b.pushArgsSet()
|
||||
b.writeExprCode(alt)
|
||||
b.popArgsSet()
|
||||
}
|
||||
|
||||
case *ast.NotExpr:
|
||||
b.pushArgsSet()
|
||||
b.writeExprCode(expr.Expr)
|
||||
b.popArgsSet()
|
||||
|
||||
case *ast.OneOrMoreExpr:
|
||||
b.pushArgsSet()
|
||||
b.writeExprCode(expr.Expr)
|
||||
b.popArgsSet()
|
||||
|
||||
case *ast.RecoveryExpr:
|
||||
b.pushArgsSet()
|
||||
b.writeExprCode(expr.Expr)
|
||||
b.writeExprCode(expr.RecoverExpr)
|
||||
b.popArgsSet()
|
||||
|
||||
case *ast.SeqExpr:
|
||||
for _, sub := range expr.Exprs {
|
||||
b.writeExprCode(sub)
|
||||
}
|
||||
|
||||
case *ast.StateCodeExpr:
|
||||
b.writeStateCodeExprCode(expr)
|
||||
|
||||
case *ast.ZeroOrMoreExpr:
|
||||
b.pushArgsSet()
|
||||
b.writeExprCode(expr.Expr)
|
||||
b.popArgsSet()
|
||||
|
||||
case *ast.ZeroOrOneExpr:
|
||||
b.pushArgsSet()
|
||||
b.writeExprCode(expr.Expr)
|
||||
b.popArgsSet()
|
||||
}
|
||||
}
|
||||
|
||||
func (b *builder) writeActionExprCode(act *ast.ActionExpr) {
|
||||
if act == nil {
|
||||
return
|
||||
}
|
||||
if act.FuncIx > 0 {
|
||||
b.writeFunc(act.FuncIx, act.Code, callFuncTemplate, onFuncTemplate)
|
||||
act.FuncIx = 0 // already rendered, prevent duplicates
|
||||
}
|
||||
}
|
||||
|
||||
func (b *builder) writeAndCodeExprCode(and *ast.AndCodeExpr) {
|
||||
if and == nil {
|
||||
return
|
||||
}
|
||||
if and.FuncIx > 0 {
|
||||
b.writeFunc(and.FuncIx, and.Code, callPredFuncTemplate, onPredFuncTemplate)
|
||||
and.FuncIx = 0 // already rendered, prevent duplicates
|
||||
}
|
||||
}
|
||||
|
||||
func (b *builder) writeNotCodeExprCode(not *ast.NotCodeExpr) {
|
||||
if not == nil {
|
||||
return
|
||||
}
|
||||
if not.FuncIx > 0 {
|
||||
b.writeFunc(not.FuncIx, not.Code, callPredFuncTemplate, onPredFuncTemplate)
|
||||
not.FuncIx = 0 // already rendered, prevent duplicates
|
||||
}
|
||||
}
|
||||
|
||||
func (b *builder) writeStateCodeExprCode(state *ast.StateCodeExpr) {
|
||||
if state == nil {
|
||||
return
|
||||
}
|
||||
if state.FuncIx > 0 {
|
||||
b.writeFunc(state.FuncIx, state.Code, callStateFuncTemplate, onStateFuncTemplate)
|
||||
state.FuncIx = 0 // already rendered, prevent duplicates
|
||||
}
|
||||
}
|
||||
|
||||
func (b *builder) writeFunc(funcIx int, code *ast.CodeBlock, callTpl, funcTpl string) {
|
||||
if code == nil {
|
||||
return
|
||||
}
|
||||
val := strings.TrimSpace(code.Val)[1 : len(code.Val)-1]
|
||||
if len(val) > 0 && val[0] == '\n' {
|
||||
val = val[1:]
|
||||
}
|
||||
if len(val) > 0 && val[len(val)-1] == '\n' {
|
||||
val = val[:len(val)-1]
|
||||
}
|
||||
var args bytes.Buffer
|
||||
ix := len(b.argsStack) - 1
|
||||
if ix >= 0 {
|
||||
for i, arg := range b.argsStack[ix] {
|
||||
if i > 0 {
|
||||
args.WriteString(", ")
|
||||
}
|
||||
args.WriteString(arg)
|
||||
}
|
||||
}
|
||||
if args.Len() > 0 {
|
||||
args.WriteString(" interface{}")
|
||||
}
|
||||
|
||||
fnNm := b.funcName(funcIx)
|
||||
b.writelnf(funcTpl, b.recvName, fnNm, args.String(), val)
|
||||
|
||||
args.Reset()
|
||||
if ix >= 0 {
|
||||
for i, arg := range b.argsStack[ix] {
|
||||
if i > 0 {
|
||||
args.WriteString(", ")
|
||||
}
|
||||
args.WriteString(fmt.Sprintf(`stack[%q]`, arg))
|
||||
}
|
||||
}
|
||||
b.writelnf(callTpl, fnNm, args.String())
|
||||
}
|
||||
|
||||
func (b *builder) writeStaticCode() {
|
||||
buffer := bytes.NewBufferString("")
|
||||
params := struct {
|
||||
Optimize bool
|
||||
BasicLatinLookupTable bool
|
||||
GlobalState bool
|
||||
Nolint bool
|
||||
}{
|
||||
Optimize: b.optimize,
|
||||
BasicLatinLookupTable: b.basicLatinLookupTable,
|
||||
GlobalState: b.globalState,
|
||||
Nolint: b.nolint,
|
||||
}
|
||||
t := template.Must(template.New("static_code").Parse(staticCode))
|
||||
|
||||
err := t.Execute(buffer, params)
|
||||
if err != nil {
|
||||
// This is very unlikely to ever happen
|
||||
panic("executing template: " + err.Error())
|
||||
}
|
||||
|
||||
// Clean the ==template== comments from the generated parser
|
||||
lines := strings.Split(buffer.String(), "\n")
|
||||
buffer.Reset()
|
||||
re := regexp.MustCompile(`^\s*//\s*(==template==\s*)+$`)
|
||||
reLineEnd := regexp.MustCompile(`//\s*==template==\s*$`)
|
||||
for _, line := range lines {
|
||||
if !re.MatchString(line) {
|
||||
line = reLineEnd.ReplaceAllString(line, "")
|
||||
_, err := buffer.WriteString(line + "\n")
|
||||
if err != nil {
|
||||
// This is very unlikely to ever happen
|
||||
panic("unable to write to byte buffer: " + err.Error())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
b.writeln(buffer.String())
|
||||
if b.rangeTable {
|
||||
b.writeln(rangeTable0)
|
||||
}
|
||||
}
|
||||
|
||||
func (b *builder) funcName(ix int) string {
|
||||
return "on" + b.ruleName + strconv.Itoa(ix)
|
||||
}
|
||||
|
||||
func (b *builder) writef(f string, args ...interface{}) {
|
||||
if b.err == nil {
|
||||
_, b.err = fmt.Fprintf(b.w, f, args...)
|
||||
}
|
||||
}
|
||||
|
||||
func (b *builder) writelnf(f string, args ...interface{}) {
|
||||
b.writef(f+"\n", args...)
|
||||
}
|
||||
|
||||
func (b *builder) writeln(f string) {
|
||||
if b.err == nil {
|
||||
_, b.err = fmt.Fprint(b.w, f+"\n")
|
||||
}
|
||||
}
|
||||
+1450
File diff suppressed because it is too large
Load Diff
+21
@@ -0,0 +1,21 @@
|
||||
// Code generated by static_code_generator with go generate; DO NOT EDIT.
|
||||
|
||||
package builder
|
||||
|
||||
var rangeTable0 = `
|
||||
func rangeTable(class string) *unicode.RangeTable {
|
||||
if rt, ok := unicode.Categories[class]; ok {
|
||||
return rt
|
||||
}
|
||||
if rt, ok := unicode.Properties[class]; ok {
|
||||
return rt
|
||||
}
|
||||
if rt, ok := unicode.Scripts[class]; ok {
|
||||
return rt
|
||||
}
|
||||
|
||||
// cannot happen
|
||||
panic(fmt.Sprintf("invalid Unicode class: %s", class))
|
||||
}
|
||||
|
||||
`
|
||||
+1466
File diff suppressed because it is too large
Load Diff
+24
@@ -0,0 +1,24 @@
|
||||
//go:generate go run ../bootstrap/cmd/static_code_generator/main.go -- $GOFILE generated_$GOFILE rangeTable0
|
||||
|
||||
package builder
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"unicode"
|
||||
)
|
||||
|
||||
// IMPORTANT: All code below this line is added to the parser as static code
|
||||
func rangeTable(class string) *unicode.RangeTable {
|
||||
if rt, ok := unicode.Categories[class]; ok {
|
||||
return rt
|
||||
}
|
||||
if rt, ok := unicode.Properties[class]; ok {
|
||||
return rt
|
||||
}
|
||||
if rt, ok := unicode.Scripts[class]; ok {
|
||||
return rt
|
||||
}
|
||||
|
||||
// cannot happen
|
||||
panic(fmt.Sprintf("invalid Unicode class: %s", class))
|
||||
}
|
||||
+594
@@ -0,0 +1,594 @@
|
||||
/*
|
||||
Command pigeon generates parsers in Go from a PEG grammar.
|
||||
|
||||
From Wikipedia [0]:
|
||||
|
||||
A parsing expression grammar is a type of analytic formal grammar, i.e.
|
||||
it describes a formal language in terms of a set of rules for recognizing
|
||||
strings in the language.
|
||||
|
||||
Its features and syntax are inspired by the PEG.js project [1], while
|
||||
the implementation is loosely based on [2]. Formal presentation of the
|
||||
PEG theory by Bryan Ford is also an important reference [3]. An introductory
|
||||
blog post can be found at [4].
|
||||
|
||||
[0]: http://en.wikipedia.org/wiki/Parsing_expression_grammar
|
||||
[1]: http://pegjs.org/
|
||||
[2]: http://www.codeproject.com/Articles/29713/Parsing-Expression-Grammar-Support-for-C-Part
|
||||
[3]: http://pdos.csail.mit.edu/~baford/packrat/popl04/peg-popl04.pdf
|
||||
[4]: http://0value.com/A-PEG-parser-generator-for-Go
|
||||
|
||||
Command-line usage
|
||||
|
||||
The pigeon tool must be called with PEG input as defined
|
||||
by the accepted PEG syntax below. The grammar may be provided by a
|
||||
file or read from stdin. The generated parser is written to stdout
|
||||
by default.
|
||||
|
||||
pigeon [options] [GRAMMAR_FILE]
|
||||
|
||||
The following options can be specified:
|
||||
|
||||
-cache : cache parser results to avoid exponential parsing time in
|
||||
pathological cases. Can make the parsing slower for typical
|
||||
cases and uses more memory (default: false).
|
||||
|
||||
-debug : boolean, print debugging info to stdout (default: false).
|
||||
|
||||
-nolint: add '// nolint: ...' comments for generated parser to suppress
|
||||
warnings by gometalinter (https://github.com/alecthomas/gometalinter).
|
||||
|
||||
-no-recover : boolean, if set, do not recover from a panic. Useful
|
||||
to access the panic stack when debugging, otherwise the panic
|
||||
is converted to an error (default: false).
|
||||
|
||||
-o=FILE : string, output file where the generated parser will be
|
||||
written (default: stdout).
|
||||
|
||||
-optimize-basic-latin : boolean, if set, a lookup table for the first 128
|
||||
characters of the Unicode table (Basic Latin) is generated for each character
|
||||
class matcher. This speeds up the parsing, if parsed data mainly consists
|
||||
of characters from this range (default: false).
|
||||
|
||||
-optimize-grammar : boolean, (EXPERIMENTAL FEATURE) if set, several performance
|
||||
optimizations on the grammar are performed, with focus to the reduction of the
|
||||
grammar depth.
|
||||
Optimization:
|
||||
* removal of unreferenced rules
|
||||
* replace rule references with a copy of the referenced Rule, if the
|
||||
referenced rule it self has no references.
|
||||
* resolve nested choice expressions
|
||||
* resolve choice expressions with only one alternative
|
||||
* resolve nested sequences expression
|
||||
* resolve sequence expressions with only one element
|
||||
* combine character class matcher and literal matcher, where possible
|
||||
The resulting grammar is usually more memory consuming, but faster for parsing.
|
||||
The optimization of the grammar is done in multiple rounds (optimize until no
|
||||
more optimizations have applied). This process takes some time, depending on the
|
||||
optimization potential of the grammar.
|
||||
|
||||
-optimize-parser : boolean, if set, the options Debug, Memoize and Statistics are
|
||||
removed from the resulting parser. The global "state" is optimized as well by
|
||||
either removing all related code if no state change expression is present in the
|
||||
grammar or by removing the restoration of the global "state" store after action
|
||||
and predicate code blocks. This saves a few cpu cycles, when using the generated
|
||||
parser (default: false).
|
||||
|
||||
-x : boolean, if set, do not build the parser, just parse the input grammar
|
||||
(default: false).
|
||||
|
||||
-receiver-name=NAME : string, name of the receiver variable for the generated
|
||||
code blocks. Non-initializer code blocks in the grammar end up as methods on the
|
||||
*current type, and this option sets the name of the receiver (default: c).
|
||||
|
||||
-alternate-entrypoints=RULE[,RULE...] : string, comma-separated list of rule names
|
||||
that may be used as alternate entrypoints for the parser, in addition to the
|
||||
default entrypoint (the first rule in the grammar) (default: none).
|
||||
Such entrypoints can be specified in the call to Parse by passing an
|
||||
Entrypoint option that specifies the alternate rule name to use. This is only
|
||||
necessary if the -optimize-parser flag is set, as some rules may be optimized
|
||||
out of the resulting parser.
|
||||
|
||||
If the code blocks in the grammar (see below, section "Code block") are golint-
|
||||
and go vet-compliant, then the resulting generated code will also be golint-
|
||||
and go vet-compliant.
|
||||
|
||||
The generated code doesn't use any third-party dependency unless code blocks
|
||||
in the grammar require such a dependency.
|
||||
|
||||
PEG syntax
|
||||
|
||||
The accepted syntax for the grammar is formally defined in the
|
||||
grammar/pigeon.peg file, using the PEG syntax. What follows is an informal
|
||||
description of this syntax.
|
||||
|
||||
Identifiers, whitespace, comments and literals follow the same
|
||||
notation as the Go language, as defined in the language specification
|
||||
(http://golang.org/ref/spec#Source_code_representation):
|
||||
|
||||
// single line comment*/
|
||||
// /* multi-line comment */
|
||||
/* 'x' (single quotes for single char literal)
|
||||
"double quotes for string literal"
|
||||
`backtick quotes for raw string literal`
|
||||
RuleName (a valid identifier)
|
||||
|
||||
The grammar must be Unicode text encoded in UTF-8. New lines are identified
|
||||
by the \n character (U+000A). Space (U+0020), horizontal tabs (U+0009) and
|
||||
carriage returns (U+000D) are considered whitespace and are ignored except
|
||||
to separate tokens.
|
||||
|
||||
Rules
|
||||
|
||||
A PEG grammar consists of a set of rules. A rule is an identifier followed
|
||||
by a rule definition operator and an expression. An optional display name -
|
||||
a string literal used in error messages instead of the rule identifier - can
|
||||
be specified after the rule identifier. E.g.:
|
||||
RuleA "friendly name" = 'a'+ // RuleA is one or more lowercase 'a's
|
||||
|
||||
The rule definition operator can be any one of those:
|
||||
=, <-, ← (U+2190), ⟵ (U+27F5)
|
||||
|
||||
Expressions
|
||||
|
||||
A rule is defined by an expression. The following sections describe the
|
||||
various expression types. Expressions can be grouped by using parentheses,
|
||||
and a rule can be referenced by its identifier in place of an expression.
|
||||
|
||||
Choice expression
|
||||
|
||||
The choice expression is a list of expressions that will be tested in the
|
||||
order they are defined. The first one that matches will be used. Expressions
|
||||
are separated by the forward slash character "/". E.g.:
|
||||
ChoiceExpr = A / B / C // A, B and C should be rules declared in the grammar
|
||||
|
||||
Because the first match is used, it is important to think about the order
|
||||
of expressions. For example, in this rule, "<=" would never be used because
|
||||
the "<" expression comes first:
|
||||
BadChoiceExpr = "<" / "<="
|
||||
|
||||
Sequence expression
|
||||
|
||||
The sequence expression is a list of expressions that must all match in
|
||||
that same order for the sequence expression to be considered a match.
|
||||
Expressions are separated by whitespace. E.g.:
|
||||
SeqExpr = "A" "b" "c" // matches "Abc", but not "Acb"
|
||||
|
||||
Labeled expression
|
||||
|
||||
A labeled expression consists of an identifier followed by a colon ":"
|
||||
and an expression. A labeled expression introduces a variable named with
|
||||
the label that can be referenced in the code blocks in the same scope.
|
||||
The variable will have the value of the expression that follows the colon.
|
||||
E.g.:
|
||||
LabeledExpr = value:[a-z]+ {
|
||||
fmt.Println(value)
|
||||
return value, nil
|
||||
}
|
||||
|
||||
The variable is typed as an empty interface, and the underlying type depends
|
||||
on the following:
|
||||
|
||||
For terminals (character and string literals, character classes and
|
||||
the any matcher), the value is []byte. E.g.:
|
||||
Rule = label:'a' { // label is []byte }
|
||||
|
||||
For predicates (& and !), the value is always nil. E.g.:
|
||||
Rule = label:&'a' { // label is nil }
|
||||
|
||||
For a sequence, the value is a slice of empty interfaces, one for each
|
||||
expression value in the sequence. The underlying types of each value
|
||||
in the slice follow the same rules described here, recursively. E.g.:
|
||||
Rule = label:('a' 'b') { // label is []interface{} }
|
||||
|
||||
For a repetition (+ and *), the value is a slice of empty interfaces, one for
|
||||
each repetition. The underlying types of each value in the slice follow
|
||||
the same rules described here, recursively. E.g.:
|
||||
Rule = label:[a-z]+ { // label is []interface{} }
|
||||
|
||||
For a choice expression, the value is that of the matching choice. E.g.:
|
||||
Rule = label:('a' / 'b') { // label is []byte }
|
||||
|
||||
For the optional expression (?), the value is nil or the value of the
|
||||
expression. E.g.:
|
||||
Rule = label:'a'? { // label is nil or []byte }
|
||||
|
||||
Of course, the type of the value can be anything once an action code block
|
||||
is used. E.g.:
|
||||
RuleA = label:'3' {
|
||||
return 3, nil
|
||||
}
|
||||
RuleB = label:RuleA { // label is int }
|
||||
|
||||
And and not expressions
|
||||
|
||||
An expression prefixed with the ampersand "&" is the "and" predicate
|
||||
expression: it is considered a match if the following expression is a match,
|
||||
but it does not consume any input.
|
||||
|
||||
An expression prefixed with the exclamation point "!" is the "not" predicate
|
||||
expression: it is considered a match if the following expression is not
|
||||
a match, but it does not consume any input. E.g.:
|
||||
AndExpr = "A" &"B" // matches "A" if followed by a "B" (does not consume "B")
|
||||
NotExpr = "A" !"B" // matches "A" if not followed by a "B" (does not consume "B")
|
||||
|
||||
The expression following the & and ! operators can be a code block. In that
|
||||
case, the code block must return a bool and an error. The operator's semantic
|
||||
is the same, & is a match if the code block returns true, ! is a match if the
|
||||
code block returns false. The code block has access to any labeled value
|
||||
defined in its scope. E.g.:
|
||||
CodeAndExpr = value:[a-z] &{
|
||||
// can access the value local variable...
|
||||
return true, nil
|
||||
}
|
||||
|
||||
Repeating expressions
|
||||
|
||||
An expression followed by "*", "?" or "+" is a match if the expression
|
||||
occurs zero or more times ("*"), zero or one time "?" or one or more times
|
||||
("+") respectively. The match is greedy, it will match as many times as
|
||||
possible. E.g.
|
||||
ZeroOrMoreAs = "A"*
|
||||
|
||||
Literal matcher
|
||||
|
||||
A literal matcher tries to match the input against a single character or a
|
||||
string literal. The literal may be a single-quoted single character, a
|
||||
double-quoted string or a backtick-quoted raw string. The same rules as in Go
|
||||
apply regarding the allowed characters and escapes.
|
||||
|
||||
The literal may be followed by a lowercase "i" (outside the ending quote)
|
||||
to indicate that the match is case-insensitive. E.g.:
|
||||
LiteralMatch = "Awesome\n"i // matches "awesome" followed by a newline
|
||||
|
||||
Character class matcher
|
||||
|
||||
A character class matcher tries to match the input against a class of characters
|
||||
inside square brackets "[...]". Inside the brackets, characters represent
|
||||
themselves and the same escapes as in string literals are available, except
|
||||
that the single- and double-quote escape is not valid, instead the closing
|
||||
square bracket "]" must be escaped to be used.
|
||||
|
||||
Character ranges can be specified using the "[a-z]" notation. Unicode
|
||||
classes can be specified using the "[\pL]" notation, where L is a
|
||||
single-letter Unicode class of characters, or using the "[\p{Class}]"
|
||||
notation where Class is a valid Unicode class (e.g. "Latin").
|
||||
|
||||
As for string literals, a lowercase "i" may follow the matcher (outside
|
||||
the ending square bracket) to indicate that the match is case-insensitive.
|
||||
A "^" as first character inside the square brackets indicates that the match
|
||||
is inverted (it is a match if the input does not match the character class
|
||||
matcher). E.g.:
|
||||
NotAZ = [^a-z]i
|
||||
|
||||
Any matcher
|
||||
|
||||
The any matcher is represented by the dot ".". It matches any character
|
||||
except the end of file, thus the "!." expression is used to indicate "match
|
||||
the end of file". E.g.:
|
||||
AnyChar = . // match a single character
|
||||
EOF = !.
|
||||
|
||||
Code block
|
||||
|
||||
Code blocks can be added to generate custom Go code. There are three kinds
|
||||
of code blocks: the initializer, the action and the predicate. All code blocks
|
||||
appear inside curly braces "{...}".
|
||||
|
||||
The initializer must appear first in the grammar, before any rule. It is
|
||||
copied as-is (minus the wrapping curly braces) at the top of the generated
|
||||
parser. It may contain function declarations, types, variables, etc. just
|
||||
like any Go file. Every symbol declared here will be available to all other
|
||||
code blocks. Although the initializer is optional in a valid grammar, it is
|
||||
usually required to generate a valid Go source code file (for the package
|
||||
clause). E.g.:
|
||||
{
|
||||
package main
|
||||
|
||||
func someHelper() {
|
||||
// ...
|
||||
}
|
||||
}
|
||||
|
||||
Action code blocks are code blocks declared after an expression in a rule.
|
||||
Those code blocks are turned into a method on the "*current" type in the
|
||||
generated source code. The method receives any labeled expression's value
|
||||
as argument (as interface{}) and must return two values, the first being
|
||||
the value of the expression (an interface{}), and the second an error.
|
||||
If a non-nil error is returned, it is added to the list of errors that the
|
||||
parser will return. E.g.:
|
||||
RuleA = "A"+ {
|
||||
// return the matched string, "c" is the default name for
|
||||
// the *current receiver variable.
|
||||
return string(c.text), nil
|
||||
}
|
||||
|
||||
Predicate code blocks are code blocks declared immediately after the and "&"
|
||||
or the not "!" operators. Like action code blocks, predicate code blocks
|
||||
are turned into a method on the "*current" type in the generated source code.
|
||||
The method receives any labeled expression's value as argument (as interface{})
|
||||
and must return two opt, the first being a bool and the second an error.
|
||||
If a non-nil error is returned, it is added to the list of errors that the
|
||||
parser will return. E.g.:
|
||||
RuleAB = [ab]i+ &{
|
||||
return true, nil
|
||||
}
|
||||
|
||||
State change code blocks are code blocks starting with "#". In contrast to
|
||||
action and predicate code blocks, state change code blocks are allowed to
|
||||
modify values in the global "state" store (see below).
|
||||
State change code blocks are turned into a method on the "*current" type
|
||||
in the generated source code.
|
||||
The method is passed any labeled expression's value as an argument (of type
|
||||
interface{}) and must return a value of type error.
|
||||
If a non-nil error is returned, it is added to the list of errors that the
|
||||
parser will return, note that the parser does NOT backtrack if a non-nil
|
||||
error is returned.
|
||||
E.g:
|
||||
Rule = [a] #{
|
||||
c.state["a"]++
|
||||
if c.state["a"] > 5 {
|
||||
return fmt.Errorf("we have seen more than 5 a's") // parser will not backtrack
|
||||
}
|
||||
return nil
|
||||
}
|
||||
The "*current" type is a struct that provides four useful fields that can be
|
||||
accessed in action, state change, and predicate code blocks: "pos", "text",
|
||||
"state" and "globalStore".
|
||||
|
||||
The "pos" field indicates the current position of the parser in the source
|
||||
input. It is itself a struct with three fields: "line", "col" and "offset".
|
||||
Line is a 1-based line number, col is a 1-based column number that counts
|
||||
runes from the start of the line, and offset is a 0-based byte offset.
|
||||
|
||||
The "text" field is the slice of bytes of the current match. It is empty
|
||||
in a predicate code block.
|
||||
|
||||
The "state" field is a global store, with backtrack support, of type
|
||||
"map[string]interface{}". The values in the store are tied to the parser's
|
||||
backtracking, in particular if a rule fails to match then all updates to the
|
||||
state that occurred in the process of matching the rule are rolled back. For a
|
||||
key-value store that is not tied to the parser's backtracking, see the
|
||||
"globalStore".
|
||||
The values in the "state" store are available for read access in action and
|
||||
predicate code blocks, any changes made to the "state" store will be reverted
|
||||
once the action or predicate code block is finished running. To update values
|
||||
in the "state" use state change code blocks ("#{}").
|
||||
|
||||
IMPORTANT:
|
||||
- In order to properly roll back the state if a rule fails to match the
|
||||
parser must clone the state before trying to match a rule.
|
||||
- The default clone mechanism makes a "shallow" copy of each value in the
|
||||
"state", this implies that pointers, maps, slices, channels, and structs
|
||||
containing any of the previous types are not properly copied.
|
||||
- To support theses cases pigeon offers the "Cloner" interface which
|
||||
consists of a single method "Clone". If a value stored in the "state"
|
||||
store implements this interface, the "Clone" method is used to obtain a
|
||||
proper copy.
|
||||
- If a general solution is needed, external libraries which provide deep
|
||||
copy functionality may be used in the "Clone" method
|
||||
(e.g. https://github.com/mitchellh/copystructure).
|
||||
|
||||
The "globalStore" field is a global store of type "map[string]interface{}",
|
||||
which allows to store arbitrary values, which are available in action and
|
||||
predicate code blocks for read as well as write access.
|
||||
It is important to notice, that the global store is completely independent from
|
||||
the backtrack mechanism of PEG and is therefore not set back to its old state
|
||||
during backtrack.
|
||||
The initialization of the global store may be achieved by using the GlobalStore
|
||||
function (http://godoc.org/github.com/mna/pigeon/test/predicates#GlobalStore).
|
||||
Be aware, that all keys starting with "_pigeon" are reserved for internal use
|
||||
of pigeon and should not be used nor modified. Those keys are treated as
|
||||
internal implementation details and therefore there are no guarantees given in
|
||||
regards of API stability.
|
||||
|
||||
Failure labels, throw and recover
|
||||
|
||||
pigeon supports an extension of the classical PEG syntax called failure labels,
|
||||
proposed by Maidl et al. in their paper "Error Reporting in Parsing Expression Grammars" [7].
|
||||
The used syntax for the introduced expressions is borrowed from their lpeglabel [8]
|
||||
implementation.
|
||||
|
||||
This extension allows to signal different kinds of errors and to specify, which
|
||||
recovery pattern should handle a given label.
|
||||
|
||||
With labeled failures it is possible to distinguish between an ordinary failure
|
||||
and an error. Usually, an ordinary failure is produced when the matching of a
|
||||
character fails, and this failure is caught by ordered choice. An error
|
||||
(a non-ordinary failure), by its turn, is produced by the throw operator and
|
||||
may be caught by the recovery operator.
|
||||
|
||||
In pigeon, the recovery expression consists of the regular expression, the recovery
|
||||
expression and a set of labels to be matched. First, the regular expression is tried.
|
||||
If this fails with one of the provided labels, the recovery expression is tried. If
|
||||
this fails as well, the error is propagated. E.g.:
|
||||
FailureRecoveryExpr = RegularExpr //{FailureLabel1, FailureLabel2} RecoveryExpr
|
||||
|
||||
To signal a failure condition, the throw expression is used. E.g.:
|
||||
ThrowExpr = %{FailureLabel1}
|
||||
|
||||
For concrete examples, how to use throw and recover, have a look at the examples
|
||||
"labeled_failures" and "thrownrecover" in the "test" folder.
|
||||
|
||||
The implementation of the throw and recover operators work as follows:
|
||||
The failure recover expression adds the recover expression for every failure label
|
||||
to the recovery stack and runs the regular expression.
|
||||
The throw expression checks the recovery stack in reversed order for the provided
|
||||
failure label. If the label is found, the respective recovery expression is run. If
|
||||
this expression is successful, the parser continues the processing of the input. If
|
||||
the recovery expression is not successful, the parsing fails and the parser starts
|
||||
to backtrack.
|
||||
|
||||
If throw and recover expressions are used together with global state, it is the
|
||||
responsibility of the author of the grammar to reset the global state to a valid
|
||||
state during the recovery operation.
|
||||
|
||||
[7]: https://arxiv.org/pdf/1405.6646v3.pdf
|
||||
[8]: https://github.com/sqmedeiros/lpeglabel
|
||||
|
||||
Using the generated parser
|
||||
|
||||
The parser generated by pigeon exports a few symbols so that it can be used
|
||||
as a package with public functions to parse input text. The exported API is:
|
||||
- Parse(string, []byte, ...Option) (interface{}, error)
|
||||
- ParseFile(string, ...Option) (interface{}, error)
|
||||
- ParseReader(string, io.Reader, ...Option) (interface{}, error)
|
||||
- AllowInvalidUTF8(bool) Option
|
||||
- Debug(bool) Option
|
||||
- Entrypoint(string) Option
|
||||
- GlobalStore(string, interface{}) Option
|
||||
- MaxExpressions(uint64) Option
|
||||
- Memoize(bool) Option
|
||||
- Recover(bool) Option
|
||||
- Statistics(*Stats) Option
|
||||
|
||||
See the godoc page of the generated parser for the test/predicates grammar
|
||||
for an example documentation page of the exported API:
|
||||
http://godoc.org/github.com/mna/pigeon/test/predicates.
|
||||
|
||||
Like the grammar used to generate the parser, the input text must be
|
||||
UTF-8-encoded Unicode.
|
||||
|
||||
The start rule of the parser is the first rule in the PEG grammar used
|
||||
to generate the parser. A call to any of the Parse* functions returns
|
||||
the value generated by executing the grammar on the provided input text,
|
||||
and an optional error.
|
||||
|
||||
Typically, the grammar should generate some kind of abstract syntax tree (AST),
|
||||
but for simple grammars it may evaluate the result immediately, such as in
|
||||
the examples/calculator example. There are no constraints imposed on the
|
||||
author of the grammar, it can return whatever is needed.
|
||||
|
||||
Error reporting
|
||||
|
||||
When the parser returns a non-nil error, the error is always of type errList,
|
||||
which is defined as a slice of errors ([]error). Each error in the list is
|
||||
of type *parserError. This is a struct that has an "Inner" field that can be
|
||||
used to access the original error.
|
||||
|
||||
So if a code block returns some well-known error like:
|
||||
{
|
||||
return nil, io.EOF
|
||||
}
|
||||
|
||||
The original error can be accessed this way:
|
||||
_, err := ParseFile("some_file")
|
||||
if err != nil {
|
||||
list := err.(errList)
|
||||
for _, err := range list {
|
||||
pe := err.(*parserError)
|
||||
if pe.Inner == io.EOF {
|
||||
// ...
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
By defaut the parser will continue after an error is returned and will
|
||||
cumulate all errors found during parsing. If the grammar reaches a point
|
||||
where it shouldn't continue, a panic statement can be used to terminate
|
||||
parsing. The panic will be caught at the top-level of the Parse* call
|
||||
and will be converted into a *parserError like any error, and an errList
|
||||
will still be returned to the caller.
|
||||
|
||||
The divide by zero error in the examples/calculator grammar leverages this
|
||||
feature (no special code is needed to handle division by zero, if it
|
||||
happens, the runtime panics and it is recovered and returned as a parsing
|
||||
error).
|
||||
|
||||
Providing good error reporting in a parser is not a trivial task. Part
|
||||
of it is provided by the pigeon tool, by offering features such as
|
||||
filename, position, expected literals and rule name in the error message,
|
||||
but an important part of good error reporting needs to be done by the grammar
|
||||
author.
|
||||
|
||||
For example, many programming languages use double-quotes for string literals.
|
||||
Usually, if the opening quote is found, the closing quote is expected, and if
|
||||
none is found, there won't be any other rule that will match, there's no need
|
||||
to backtrack and try other choices, an error should be added to the list
|
||||
and the match should be consumed.
|
||||
|
||||
In order to do this, the grammar can look something like this:
|
||||
|
||||
StringLiteral = '"' ValidStringChar* '"' {
|
||||
// this is the valid case, build string literal node
|
||||
// node = ...
|
||||
return node, nil
|
||||
} / '"' ValidStringChar* !'"' {
|
||||
// invalid case, build a replacement string literal node or build a BadNode
|
||||
// node = ...
|
||||
return node, errors.New("string literal not terminated")
|
||||
}
|
||||
|
||||
This is just one example, but it illustrates the idea that error reporting
|
||||
needs to be thought out when designing the grammar.
|
||||
|
||||
Because the above mentioned error types (errList and parserError) are not
|
||||
exported, additional steps have to be taken, ff the generated parser is used as
|
||||
library package in other packages (e.g. if the same parser is used in multiple
|
||||
command line tools).
|
||||
One possible implementation for exported errors (based on interfaces) and
|
||||
customized error reporting (caret style formatting of the position, where
|
||||
the parsing failed) is available in the json example and its command line tool:
|
||||
http://godoc.org/github.com/mna/pigeon/examples/json
|
||||
|
||||
API stability
|
||||
|
||||
Generated parsers have user-provided code mixed with pigeon code
|
||||
in the same package, so there is no package
|
||||
boundary in the resulting code to prevent access to unexported symbols.
|
||||
What is meant to be implementation
|
||||
details in pigeon is also available to user code - which doesn't mean
|
||||
it should be used.
|
||||
|
||||
For this reason, it is important to precisely define what is intended to be
|
||||
the supported API of pigeon, the parts that will be stable
|
||||
in future versions.
|
||||
|
||||
The "stability" of the version 1.0 API attempts to make a similar guarantee
|
||||
as the Go 1 compatibility [5]. The following lists what part of the
|
||||
current pigeon code falls under that guarantee (features may be added in
|
||||
the future):
|
||||
|
||||
- The pigeon command-line flags and arguments: those will not be removed
|
||||
and will maintain the same semantics.
|
||||
|
||||
- The explicitly exported API generated by pigeon. See [6] for the
|
||||
documentation of this API on a generated parser.
|
||||
|
||||
- The PEG syntax, as documented above.
|
||||
|
||||
- The code blocks (except the initializer) will always be generated as
|
||||
methods on the *current type, and this type is guaranteed to have
|
||||
the fields pos (type position) and text (type []byte). There are no
|
||||
guarantees on other fields and methods of this type.
|
||||
|
||||
- The position type will always have the fields line, col and offset,
|
||||
all defined as int. There are no guarantees on other fields and methods
|
||||
of this type.
|
||||
|
||||
- The type of the error value returned by the Parse* functions, when
|
||||
not nil, will always be errList defined as a []error. There are no
|
||||
guarantees on methods of this type, other than the fact it implements the
|
||||
error interface.
|
||||
|
||||
- Individual errors in the errList will always be of type *parserError,
|
||||
and this type is guaranteed to have an Inner field that contains the
|
||||
original error value. There are no guarantees on other fields and methods
|
||||
of this type.
|
||||
|
||||
The above guarantee is given to the version 1.0 (https://github.com/mna/pigeon/releases/tag/v1.0.0)
|
||||
of pigeon, which has entered maintenance mode (bug fixes only). The current
|
||||
master branch includes the development toward a future version 2.0, which
|
||||
intends to further improve pigeon.
|
||||
While the given API stability should be maintained as far as it makes sense,
|
||||
breaking changes may be necessary to be able to improve pigeon.
|
||||
The new version 2.0 API has not yet stabilized and therefore changes to the API
|
||||
may occur at any time.
|
||||
|
||||
References:
|
||||
|
||||
[5]: https://golang.org/doc/go1compat
|
||||
[6]: http://godoc.org/github.com/mna/pigeon/test/predicates
|
||||
|
||||
*/
|
||||
package main
|
||||
+294
@@ -0,0 +1,294 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"errors"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/tools/imports"
|
||||
|
||||
"github.com/mna/pigeon/ast"
|
||||
"github.com/mna/pigeon/builder"
|
||||
)
|
||||
|
||||
// exit function mockable for tests
|
||||
var exit = os.Exit
|
||||
|
||||
// ruleNamesFlag is a custom flag that parses a comma-separated
|
||||
// list of rule names. It implements flag.Value.
|
||||
type ruleNamesFlag []string
|
||||
|
||||
func (r *ruleNamesFlag) String() string {
|
||||
return fmt.Sprint(*r)
|
||||
}
|
||||
|
||||
func (r *ruleNamesFlag) Set(value string) error {
|
||||
names := strings.Split(value, ",")
|
||||
*r = append(*r, names...)
|
||||
return nil
|
||||
}
|
||||
|
||||
func main() {
|
||||
fs := flag.NewFlagSet(os.Args[0], flag.ExitOnError)
|
||||
|
||||
// define command-line flags
|
||||
var (
|
||||
cacheFlag = fs.Bool("cache", false, "cache parsing results")
|
||||
dbgFlag = fs.Bool("debug", false, "set debug mode")
|
||||
shortHelpFlag = fs.Bool("h", false, "show help page")
|
||||
longHelpFlag = fs.Bool("help", false, "show help page")
|
||||
nolint = fs.Bool("nolint", false, "add '// nolint: ...' comments to suppress warnings by gometalinter")
|
||||
noRecoverFlag = fs.Bool("no-recover", false, "do not recover from panic")
|
||||
outputFlag = fs.String("o", "", "output file, defaults to stdout")
|
||||
optimizeBasicLatinFlag = fs.Bool("optimize-basic-latin", false, "generate optimized parser for Unicode Basic Latin character sets")
|
||||
optimizeGrammar = fs.Bool("optimize-grammar", false, "optimize the given grammar (EXPERIMENTAL FEATURE)")
|
||||
optimizeParserFlag = fs.Bool("optimize-parser", false, "generate optimized parser without Debug and Memoize options")
|
||||
recvrNmFlag = fs.String("receiver-name", "c", "receiver name for the generated methods")
|
||||
noBuildFlag = fs.Bool("x", false, "do not build, only parse")
|
||||
|
||||
altEntrypointsFlag ruleNamesFlag
|
||||
)
|
||||
fs.Var(&altEntrypointsFlag, "alternate-entrypoints", "comma-separated list of rule names that may be used as entrypoints")
|
||||
|
||||
fs.Usage = usage
|
||||
err := fs.Parse(os.Args[1:])
|
||||
if err != nil {
|
||||
fmt.Fprintln(os.Stderr, "args parse error:\n", err)
|
||||
exit(6)
|
||||
}
|
||||
|
||||
if *shortHelpFlag || *longHelpFlag {
|
||||
fs.Usage()
|
||||
exit(0)
|
||||
}
|
||||
|
||||
if fs.NArg() > 1 {
|
||||
argError(1, "expected one argument, got %q", strings.Join(fs.Args(), " "))
|
||||
}
|
||||
|
||||
// get input source
|
||||
infile := ""
|
||||
if fs.NArg() == 1 {
|
||||
infile = fs.Arg(0)
|
||||
}
|
||||
nm, rc := input(infile)
|
||||
defer func() {
|
||||
err = rc.Close()
|
||||
if err != nil {
|
||||
fmt.Fprintln(os.Stderr, "close file error:\n", err)
|
||||
}
|
||||
if r := recover(); r != nil {
|
||||
panic(r)
|
||||
}
|
||||
if err != nil {
|
||||
exit(7)
|
||||
}
|
||||
}()
|
||||
|
||||
// parse input
|
||||
g, err := ParseReader(nm, rc, Debug(*dbgFlag), Memoize(*cacheFlag), Recover(!*noRecoverFlag))
|
||||
if err != nil {
|
||||
fmt.Fprintln(os.Stderr, "parse error(s):\n", err)
|
||||
exit(3)
|
||||
}
|
||||
|
||||
// validate alternate entrypoints
|
||||
grammar := g.(*ast.Grammar)
|
||||
rules := make(map[string]struct{}, len(grammar.Rules))
|
||||
for _, rule := range grammar.Rules {
|
||||
rules[rule.Name.Val] = struct{}{}
|
||||
}
|
||||
for _, entrypoint := range altEntrypointsFlag {
|
||||
if entrypoint == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
if _, ok := rules[entrypoint]; !ok {
|
||||
fmt.Fprintf(os.Stderr, "argument error:\nunknown rule name %s used as alternate entrypoint\n", entrypoint)
|
||||
exit(9)
|
||||
}
|
||||
}
|
||||
|
||||
if !*noBuildFlag {
|
||||
if *optimizeGrammar {
|
||||
ast.Optimize(grammar, altEntrypointsFlag...)
|
||||
}
|
||||
|
||||
// generate parser
|
||||
out := output(*outputFlag)
|
||||
defer func() {
|
||||
err := out.Close()
|
||||
if err != nil {
|
||||
fmt.Fprintln(os.Stderr, "close file error:\n", err)
|
||||
exit(8)
|
||||
}
|
||||
}()
|
||||
|
||||
outBuf := bytes.NewBuffer([]byte{})
|
||||
|
||||
curNmOpt := builder.ReceiverName(*recvrNmFlag)
|
||||
optimizeParser := builder.Optimize(*optimizeParserFlag)
|
||||
basicLatinOptimize := builder.BasicLatinLookupTable(*optimizeBasicLatinFlag)
|
||||
nolintOpt := builder.Nolint(*nolint)
|
||||
if err := builder.BuildParser(outBuf, grammar, curNmOpt, optimizeParser, basicLatinOptimize, nolintOpt); err != nil {
|
||||
fmt.Fprintln(os.Stderr, "build error: ", err)
|
||||
exit(5)
|
||||
}
|
||||
|
||||
// Defaults from golang.org/x/tools/cmd/goimports
|
||||
options := &imports.Options{
|
||||
TabWidth: 8,
|
||||
TabIndent: true,
|
||||
Comments: true,
|
||||
Fragment: true,
|
||||
}
|
||||
|
||||
formattedBuf, err := imports.Process("filename", outBuf.Bytes(), options)
|
||||
if err != nil {
|
||||
if _, err := out.Write(outBuf.Bytes()); err != nil {
|
||||
fmt.Fprintln(os.Stderr, "write error: ", err)
|
||||
exit(7)
|
||||
}
|
||||
fmt.Fprintln(os.Stderr, "format error: ", err)
|
||||
exit(6)
|
||||
}
|
||||
|
||||
if _, err := out.Write(formattedBuf); err != nil {
|
||||
fmt.Fprintln(os.Stderr, "write error: ", err)
|
||||
exit(7)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var usagePage = `usage: %s [options] [GRAMMAR_FILE]
|
||||
|
||||
Pigeon generates a parser based on a PEG grammar.
|
||||
|
||||
By default, pigeon reads the grammar from stdin and writes the
|
||||
generated parser to stdout. If GRAMMAR_FILE is specified, the
|
||||
grammar is read from this file instead. If the -o flag is set,
|
||||
the generated code is written to this file instead.
|
||||
|
||||
-cache
|
||||
cache parser results to avoid exponential parsing time in
|
||||
pathological cases. Can make the parsing slower for typical
|
||||
cases and uses more memory.
|
||||
-debug
|
||||
output debugging information while parsing the grammar.
|
||||
-h -help
|
||||
display this help message.
|
||||
-nolint
|
||||
add '// nolint: ...' comments for generated parser to suppress
|
||||
warnings by gometalinter (https://github.com/alecthomas/gometalinter).
|
||||
-no-recover
|
||||
do not recover from a panic. Useful to access the panic stack
|
||||
when debugging, otherwise the panic is converted to an error.
|
||||
-o OUTPUT_FILE
|
||||
write the generated parser to OUTPUT_FILE. Defaults to stdout.
|
||||
-optimize-basic-latin
|
||||
generate optimized parser for Unicode Basic Latin character set
|
||||
-optimize-grammar
|
||||
performes several performance optimizations on the grammar (EXPERIMENTAL FEATURE)
|
||||
-optimize-parser
|
||||
generate optimized parser without Debug and Memoize options and
|
||||
with some other optimizations applied.
|
||||
-receiver-name NAME
|
||||
use NAME as for the receiver name of the generated methods
|
||||
for the grammar's code blocks. Defaults to "c".
|
||||
-x
|
||||
do not generate the parser, only parse the grammar.
|
||||
-alternate-entrypoints RULE[,RULE...]
|
||||
comma-separated list of rule names that may be used as alternate
|
||||
entrypoints for the parser, in addition to the first rule in the
|
||||
grammar.
|
||||
|
||||
See https://godoc.org/github.com/mna/pigeon for more information.
|
||||
`
|
||||
|
||||
// usage prints the help page of the command-line tool.
|
||||
func usage() {
|
||||
fmt.Printf(usagePage, os.Args[0])
|
||||
}
|
||||
|
||||
// argError prints an error message to stderr, prints the command usage
|
||||
// and exits with the specified exit code.
|
||||
func argError(exitCode int, msg string, args ...interface{}) {
|
||||
fmt.Fprintf(os.Stderr, msg, args...)
|
||||
fmt.Fprintln(os.Stderr)
|
||||
usage()
|
||||
exit(exitCode)
|
||||
}
|
||||
|
||||
// input gets the name and reader to get input text from.
|
||||
func input(filename string) (nm string, rc io.ReadCloser) {
|
||||
nm = "stdin"
|
||||
inf := os.Stdin
|
||||
if filename != "" {
|
||||
f, err := os.Open(filename)
|
||||
if err != nil {
|
||||
fmt.Fprintln(os.Stderr, err)
|
||||
exit(2)
|
||||
}
|
||||
inf = f
|
||||
nm = filename
|
||||
}
|
||||
r := bufio.NewReader(inf)
|
||||
return nm, makeReadCloser(r, inf)
|
||||
}
|
||||
|
||||
// output gets the writer to write the generated parser to.
|
||||
func output(filename string) io.WriteCloser {
|
||||
out := os.Stdout
|
||||
if filename != "" {
|
||||
f, err := os.Create(filename)
|
||||
if err != nil {
|
||||
fmt.Fprintln(os.Stderr, err)
|
||||
exit(4)
|
||||
}
|
||||
out = f
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// create a ReadCloser that reads from r and closes c.
|
||||
func makeReadCloser(r io.Reader, c io.Closer) io.ReadCloser {
|
||||
rc := struct {
|
||||
io.Reader
|
||||
io.Closer
|
||||
}{r, c}
|
||||
return io.ReadCloser(rc)
|
||||
}
|
||||
|
||||
// astPos is a helper method for the PEG grammar parser. It returns the
|
||||
// position of the current match as an ast.Pos.
|
||||
func (c *current) astPos() ast.Pos {
|
||||
return ast.Pos{Line: c.pos.line, Col: c.pos.col, Off: c.pos.offset}
|
||||
}
|
||||
|
||||
// toIfaceSlice is a helper function for the PEG grammar parser. It converts
|
||||
// v to a slice of empty interfaces.
|
||||
func toIfaceSlice(v interface{}) []interface{} {
|
||||
if v == nil {
|
||||
return nil
|
||||
}
|
||||
return v.([]interface{})
|
||||
}
|
||||
|
||||
// validateUnicodeEscape checks that the provided escape sequence is a
|
||||
// valid Unicode escape sequence.
|
||||
func validateUnicodeEscape(escape, errMsg string) (interface{}, error) {
|
||||
r, _, _, err := strconv.UnquoteChar("\\"+escape, '"')
|
||||
if err != nil {
|
||||
return nil, errors.New(errMsg)
|
||||
}
|
||||
if 0xD800 <= r && r <= 0xDFFF {
|
||||
return nil, errors.New(errMsg)
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
+4526
File diff suppressed because it is too large
Load Diff
+71
@@ -0,0 +1,71 @@
|
||||
package main
|
||||
|
||||
var reservedWords = map[string]bool{
|
||||
// Go keywords http://golang.org/ref/spec#Keywords
|
||||
"break": true,
|
||||
"case": true,
|
||||
"chan": true,
|
||||
"const": true,
|
||||
"continue": true,
|
||||
"default": true,
|
||||
"defer": true,
|
||||
"else": true,
|
||||
"fallthrough": true,
|
||||
"for": true,
|
||||
"func": true,
|
||||
"goto": true,
|
||||
"go": true,
|
||||
"if": true,
|
||||
"import": true,
|
||||
"interface": true,
|
||||
"map": true,
|
||||
"package": true,
|
||||
"range": true,
|
||||
"return": true,
|
||||
"select": true,
|
||||
"struct": true,
|
||||
"switch": true,
|
||||
"type": true,
|
||||
"var": true,
|
||||
|
||||
// predeclared identifiers http://golang.org/ref/spec#Predeclared_identifiers
|
||||
"bool": true,
|
||||
"byte": true,
|
||||
"complex64": true,
|
||||
"complex128": true,
|
||||
"error": true,
|
||||
"float32": true,
|
||||
"float64": true,
|
||||
"int8": true,
|
||||
"int16": true,
|
||||
"int32": true,
|
||||
"int64": true,
|
||||
"int": true,
|
||||
"rune": true,
|
||||
"string": true,
|
||||
"uint8": true,
|
||||
"uint16": true,
|
||||
"uint32": true,
|
||||
"uint64": true,
|
||||
"uintptr": true,
|
||||
"uint": true,
|
||||
"true": true,
|
||||
"false": true,
|
||||
"iota": true,
|
||||
"nil": true,
|
||||
"append": true,
|
||||
"cap": true,
|
||||
"close": true,
|
||||
"complex": true,
|
||||
"copy": true,
|
||||
"delete": true,
|
||||
"imag": true,
|
||||
"len": true,
|
||||
"make": true,
|
||||
"new": true,
|
||||
"panic": true,
|
||||
"println": true,
|
||||
"print": true,
|
||||
"real": true,
|
||||
"recover": true,
|
||||
}
|
||||
+200
@@ -0,0 +1,200 @@
|
||||
// This file is generated by the misc/cmd/unicode-classes tool.
|
||||
// Do not edit.
|
||||
|
||||
package main
|
||||
|
||||
var unicodeClasses = map[string]bool{
|
||||
"ASCII_Hex_Digit": true,
|
||||
"Arabic": true,
|
||||
"Armenian": true,
|
||||
"Avestan": true,
|
||||
"Balinese": true,
|
||||
"Bamum": true,
|
||||
"Bassa_Vah": true,
|
||||
"Batak": true,
|
||||
"Bengali": true,
|
||||
"Bidi_Control": true,
|
||||
"Bopomofo": true,
|
||||
"Brahmi": true,
|
||||
"Braille": true,
|
||||
"Buginese": true,
|
||||
"Buhid": true,
|
||||
"C": true,
|
||||
"Canadian_Aboriginal": true,
|
||||
"Carian": true,
|
||||
"Caucasian_Albanian": true,
|
||||
"Cc": true,
|
||||
"Cf": true,
|
||||
"Chakma": true,
|
||||
"Cham": true,
|
||||
"Cherokee": true,
|
||||
"Co": true,
|
||||
"Common": true,
|
||||
"Coptic": true,
|
||||
"Cs": true,
|
||||
"Cuneiform": true,
|
||||
"Cypriot": true,
|
||||
"Cyrillic": true,
|
||||
"Dash": true,
|
||||
"Deprecated": true,
|
||||
"Deseret": true,
|
||||
"Devanagari": true,
|
||||
"Diacritic": true,
|
||||
"Duployan": true,
|
||||
"Egyptian_Hieroglyphs": true,
|
||||
"Elbasan": true,
|
||||
"Ethiopic": true,
|
||||
"Extender": true,
|
||||
"Georgian": true,
|
||||
"Glagolitic": true,
|
||||
"Gothic": true,
|
||||
"Grantha": true,
|
||||
"Greek": true,
|
||||
"Gujarati": true,
|
||||
"Gurmukhi": true,
|
||||
"Han": true,
|
||||
"Hangul": true,
|
||||
"Hanunoo": true,
|
||||
"Hebrew": true,
|
||||
"Hex_Digit": true,
|
||||
"Hiragana": true,
|
||||
"Hyphen": true,
|
||||
"IDS_Binary_Operator": true,
|
||||
"IDS_Trinary_Operator": true,
|
||||
"Ideographic": true,
|
||||
"Imperial_Aramaic": true,
|
||||
"Inherited": true,
|
||||
"Inscriptional_Pahlavi": true,
|
||||
"Inscriptional_Parthian": true,
|
||||
"Javanese": true,
|
||||
"Join_Control": true,
|
||||
"Kaithi": true,
|
||||
"Kannada": true,
|
||||
"Katakana": true,
|
||||
"Kayah_Li": true,
|
||||
"Kharoshthi": true,
|
||||
"Khmer": true,
|
||||
"Khojki": true,
|
||||
"Khudawadi": true,
|
||||
"L": true,
|
||||
"Lao": true,
|
||||
"Latin": true,
|
||||
"Lepcha": true,
|
||||
"Limbu": true,
|
||||
"Linear_A": true,
|
||||
"Linear_B": true,
|
||||
"Lisu": true,
|
||||
"Ll": true,
|
||||
"Lm": true,
|
||||
"Lo": true,
|
||||
"Logical_Order_Exception": true,
|
||||
"Lt": true,
|
||||
"Lu": true,
|
||||
"Lycian": true,
|
||||
"Lydian": true,
|
||||
"M": true,
|
||||
"Mahajani": true,
|
||||
"Malayalam": true,
|
||||
"Mandaic": true,
|
||||
"Manichaean": true,
|
||||
"Mc": true,
|
||||
"Me": true,
|
||||
"Meetei_Mayek": true,
|
||||
"Mende_Kikakui": true,
|
||||
"Meroitic_Cursive": true,
|
||||
"Meroitic_Hieroglyphs": true,
|
||||
"Miao": true,
|
||||
"Mn": true,
|
||||
"Modi": true,
|
||||
"Mongolian": true,
|
||||
"Mro": true,
|
||||
"Myanmar": true,
|
||||
"N": true,
|
||||
"Nabataean": true,
|
||||
"Nd": true,
|
||||
"New_Tai_Lue": true,
|
||||
"Nko": true,
|
||||
"Nl": true,
|
||||
"No": true,
|
||||
"Noncharacter_Code_Point": true,
|
||||
"Ogham": true,
|
||||
"Ol_Chiki": true,
|
||||
"Old_Italic": true,
|
||||
"Old_North_Arabian": true,
|
||||
"Old_Permic": true,
|
||||
"Old_Persian": true,
|
||||
"Old_South_Arabian": true,
|
||||
"Old_Turkic": true,
|
||||
"Oriya": true,
|
||||
"Osmanya": true,
|
||||
"Other_Alphabetic": true,
|
||||
"Other_Default_Ignorable_Code_Point": true,
|
||||
"Other_Grapheme_Extend": true,
|
||||
"Other_ID_Continue": true,
|
||||
"Other_ID_Start": true,
|
||||
"Other_Lowercase": true,
|
||||
"Other_Math": true,
|
||||
"Other_Uppercase": true,
|
||||
"P": true,
|
||||
"Pahawh_Hmong": true,
|
||||
"Palmyrene": true,
|
||||
"Pattern_Syntax": true,
|
||||
"Pattern_White_Space": true,
|
||||
"Pau_Cin_Hau": true,
|
||||
"Pc": true,
|
||||
"Pd": true,
|
||||
"Pe": true,
|
||||
"Pf": true,
|
||||
"Phags_Pa": true,
|
||||
"Phoenician": true,
|
||||
"Pi": true,
|
||||
"Po": true,
|
||||
"Ps": true,
|
||||
"Psalter_Pahlavi": true,
|
||||
"Quotation_Mark": true,
|
||||
"Radical": true,
|
||||
"Rejang": true,
|
||||
"Runic": true,
|
||||
"S": true,
|
||||
"STerm": true,
|
||||
"Samaritan": true,
|
||||
"Saurashtra": true,
|
||||
"Sc": true,
|
||||
"Sharada": true,
|
||||
"Shavian": true,
|
||||
"Siddham": true,
|
||||
"Sinhala": true,
|
||||
"Sk": true,
|
||||
"Sm": true,
|
||||
"So": true,
|
||||
"Soft_Dotted": true,
|
||||
"Sora_Sompeng": true,
|
||||
"Sundanese": true,
|
||||
"Syloti_Nagri": true,
|
||||
"Syriac": true,
|
||||
"Tagalog": true,
|
||||
"Tagbanwa": true,
|
||||
"Tai_Le": true,
|
||||
"Tai_Tham": true,
|
||||
"Tai_Viet": true,
|
||||
"Takri": true,
|
||||
"Tamil": true,
|
||||
"Telugu": true,
|
||||
"Terminal_Punctuation": true,
|
||||
"Thaana": true,
|
||||
"Thai": true,
|
||||
"Tibetan": true,
|
||||
"Tifinagh": true,
|
||||
"Tirhuta": true,
|
||||
"Ugaritic": true,
|
||||
"Unified_Ideograph": true,
|
||||
"Vai": true,
|
||||
"Variation_Selector": true,
|
||||
"Warang_Citi": true,
|
||||
"White_Space": true,
|
||||
"Yi": true,
|
||||
"Z": true,
|
||||
"Zl": true,
|
||||
"Zp": true,
|
||||
"Zs": true,
|
||||
}
|
||||
Reference in New Issue
Block a user