mirror of
https://github.com/golang/go
synced 2024-09-25 05:10:12 -06:00
ebnf: use scanner instead of go/scanner
R=rsc, r CC=golang-dev https://golang.org/cl/5192043
This commit is contained in:
parent
b2f1eba324
commit
0da66a2e90
@ -98,12 +98,12 @@ func main() {
|
||||
src = extractEBNF(src)
|
||||
}
|
||||
|
||||
grammar, err := ebnf.Parse(fset, filename, src)
|
||||
grammar, err := ebnf.Parse(filename, bytes.NewBuffer(src))
|
||||
if err != nil {
|
||||
report(err)
|
||||
}
|
||||
|
||||
if err = ebnf.Verify(fset, grammar, *start); err != nil {
|
||||
if err = ebnf.Verify(grammar, *start); err != nil {
|
||||
report(err)
|
||||
}
|
||||
}
|
||||
|
@ -23,13 +23,39 @@
|
||||
package ebnf
|
||||
|
||||
import (
|
||||
"go/scanner"
|
||||
"go/token"
|
||||
"fmt"
|
||||
"os"
|
||||
"scanner"
|
||||
"unicode"
|
||||
"utf8"
|
||||
)
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Error handling
|
||||
|
||||
type errorList []os.Error
|
||||
|
||||
func (list errorList) Error() os.Error {
|
||||
if len(list) == 0 {
|
||||
return nil
|
||||
}
|
||||
return list
|
||||
}
|
||||
|
||||
func (list errorList) String() string {
|
||||
switch len(list) {
|
||||
case 0:
|
||||
return "no errors"
|
||||
case 1:
|
||||
return list[0].String()
|
||||
}
|
||||
return fmt.Sprintf("%s (and %d more errors)", list[0], len(list)-1)
|
||||
}
|
||||
|
||||
func newError(pos scanner.Position, msg string) os.Error {
|
||||
return os.NewError(fmt.Sprintf("%s: %s", pos, msg))
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Internal representation
|
||||
|
||||
@ -37,7 +63,7 @@ type (
|
||||
// An Expression node represents a production expression.
|
||||
Expression interface {
|
||||
// Pos is the position of the first character of the syntactic construct
|
||||
Pos() token.Pos
|
||||
Pos() scanner.Position
|
||||
}
|
||||
|
||||
// An Alternative node represents a non-empty list of alternative expressions.
|
||||
@ -48,13 +74,13 @@ type (
|
||||
|
||||
// A Name node represents a production name.
|
||||
Name struct {
|
||||
StringPos token.Pos
|
||||
StringPos scanner.Position
|
||||
String string
|
||||
}
|
||||
|
||||
// A Token node represents a literal.
|
||||
Token struct {
|
||||
StringPos token.Pos
|
||||
StringPos scanner.Position
|
||||
String string
|
||||
}
|
||||
|
||||
@ -65,50 +91,50 @@ type (
|
||||
|
||||
// A Group node represents a grouped expression.
|
||||
Group struct {
|
||||
Lparen token.Pos
|
||||
Lparen scanner.Position
|
||||
Body Expression // (body)
|
||||
}
|
||||
|
||||
// An Option node represents an optional expression.
|
||||
Option struct {
|
||||
Lbrack token.Pos
|
||||
Lbrack scanner.Position
|
||||
Body Expression // [body]
|
||||
}
|
||||
|
||||
// A Repetition node represents a repeated expression.
|
||||
Repetition struct {
|
||||
Lbrace token.Pos
|
||||
Lbrace scanner.Position
|
||||
Body Expression // {body}
|
||||
}
|
||||
|
||||
// A Bad node stands for pieces of source code that lead to a parse error.
|
||||
Bad struct {
|
||||
TokPos token.Pos
|
||||
Error string // parser error message
|
||||
}
|
||||
|
||||
// A Production node represents an EBNF production.
|
||||
Production struct {
|
||||
Name *Name
|
||||
Expr Expression
|
||||
}
|
||||
|
||||
// A Bad node stands for pieces of source code that lead to a parse error.
|
||||
Bad struct {
|
||||
TokPos scanner.Position
|
||||
Error string // parser error message
|
||||
}
|
||||
|
||||
// A Grammar is a set of EBNF productions. The map
|
||||
// is indexed by production name.
|
||||
//
|
||||
Grammar map[string]*Production
|
||||
)
|
||||
|
||||
func (x Alternative) Pos() token.Pos { return x[0].Pos() } // the parser always generates non-empty Alternative
|
||||
func (x Sequence) Pos() token.Pos { return x[0].Pos() } // the parser always generates non-empty Sequences
|
||||
func (x *Name) Pos() token.Pos { return x.StringPos }
|
||||
func (x *Token) Pos() token.Pos { return x.StringPos }
|
||||
func (x *Range) Pos() token.Pos { return x.Begin.Pos() }
|
||||
func (x *Group) Pos() token.Pos { return x.Lparen }
|
||||
func (x *Option) Pos() token.Pos { return x.Lbrack }
|
||||
func (x *Repetition) Pos() token.Pos { return x.Lbrace }
|
||||
func (x *Bad) Pos() token.Pos { return x.TokPos }
|
||||
func (x *Production) Pos() token.Pos { return x.Name.Pos() }
|
||||
func (x Alternative) Pos() scanner.Position { return x[0].Pos() } // the parser always generates non-empty Alternative
|
||||
func (x Sequence) Pos() scanner.Position { return x[0].Pos() } // the parser always generates non-empty Sequences
|
||||
func (x *Name) Pos() scanner.Position { return x.StringPos }
|
||||
func (x *Token) Pos() scanner.Position { return x.StringPos }
|
||||
func (x *Range) Pos() scanner.Position { return x.Begin.Pos() }
|
||||
func (x *Group) Pos() scanner.Position { return x.Lparen }
|
||||
func (x *Option) Pos() scanner.Position { return x.Lbrack }
|
||||
func (x *Repetition) Pos() scanner.Position { return x.Lbrace }
|
||||
func (x *Production) Pos() scanner.Position { return x.Name.Pos() }
|
||||
func (x *Bad) Pos() scanner.Position { return x.TokPos }
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Grammar verification
|
||||
@ -119,15 +145,14 @@ func isLexical(name string) bool {
|
||||
}
|
||||
|
||||
type verifier struct {
|
||||
fset *token.FileSet
|
||||
scanner.ErrorVector
|
||||
errors errorList
|
||||
worklist []*Production
|
||||
reached Grammar // set of productions reached from (and including) the root production
|
||||
grammar Grammar
|
||||
}
|
||||
|
||||
func (v *verifier) error(pos token.Pos, msg string) {
|
||||
v.Error(v.fset.Position(pos), msg)
|
||||
func (v *verifier) error(pos scanner.Position, msg string) {
|
||||
v.errors = append(v.errors, newError(pos, msg))
|
||||
}
|
||||
|
||||
func (v *verifier) push(prod *Production) {
|
||||
@ -187,24 +212,23 @@ func (v *verifier) verifyExpr(expr Expression, lexical bool) {
|
||||
v.verifyExpr(x.Body, lexical)
|
||||
case *Repetition:
|
||||
v.verifyExpr(x.Body, lexical)
|
||||
case *Bad:
|
||||
v.error(x.Pos(), x.Error)
|
||||
default:
|
||||
panic("unreachable")
|
||||
panic(fmt.Sprintf("internal error: unexpected type %T", expr))
|
||||
}
|
||||
}
|
||||
|
||||
func (v *verifier) verify(fset *token.FileSet, grammar Grammar, start string) {
|
||||
func (v *verifier) verify(grammar Grammar, start string) {
|
||||
// find root production
|
||||
root, found := grammar[start]
|
||||
if !found {
|
||||
// token.NoPos doesn't require a file set;
|
||||
// ok to set v.fset only afterwards
|
||||
v.error(token.NoPos, "no start production "+start)
|
||||
var noPos scanner.Position
|
||||
v.error(noPos, "no start production "+start)
|
||||
return
|
||||
}
|
||||
|
||||
// initialize verifier
|
||||
v.fset = fset
|
||||
v.ErrorVector.Reset()
|
||||
v.worklist = v.worklist[0:0]
|
||||
v.reached = make(Grammar)
|
||||
v.grammar = grammar
|
||||
@ -238,8 +262,8 @@ func (v *verifier) verify(fset *token.FileSet, grammar Grammar, start string) {
|
||||
//
|
||||
// Position information is interpreted relative to the file set fset.
|
||||
//
|
||||
func Verify(fset *token.FileSet, grammar Grammar, start string) os.Error {
|
||||
func Verify(grammar Grammar, start string) os.Error {
|
||||
var v verifier
|
||||
v.verify(fset, grammar, start)
|
||||
return v.GetError(scanner.Sorted)
|
||||
v.verify(grammar, start)
|
||||
return v.errors.Error()
|
||||
}
|
||||
|
@ -5,13 +5,10 @@
|
||||
package ebnf
|
||||
|
||||
import (
|
||||
"go/token"
|
||||
"io/ioutil"
|
||||
"bytes"
|
||||
"testing"
|
||||
)
|
||||
|
||||
var fset = token.NewFileSet()
|
||||
|
||||
var goodGrammars = []string{
|
||||
`Program = .`,
|
||||
|
||||
@ -46,18 +43,19 @@ var badGrammars = []string{
|
||||
`Program = {} .`,
|
||||
}
|
||||
|
||||
func checkGood(t *testing.T, filename string, src []byte) {
|
||||
grammar, err := Parse(fset, filename, src)
|
||||
func checkGood(t *testing.T, src string) {
|
||||
grammar, err := Parse("", bytes.NewBuffer([]byte(src)))
|
||||
if err != nil {
|
||||
t.Errorf("Parse(%s) failed: %v", src, err)
|
||||
return
|
||||
}
|
||||
if err = Verify(fset, grammar, "Program"); err != nil {
|
||||
if err = Verify(grammar, "Program"); err != nil {
|
||||
t.Errorf("Verify(%s) failed: %v", src, err)
|
||||
}
|
||||
}
|
||||
|
||||
func checkBad(t *testing.T, filename string, src []byte) {
|
||||
_, err := Parse(fset, filename, src)
|
||||
func checkBad(t *testing.T, src string) {
|
||||
_, err := Parse("", bytes.NewBuffer([]byte(src)))
|
||||
if err == nil {
|
||||
t.Errorf("Parse(%s) should have failed", src)
|
||||
}
|
||||
@ -65,23 +63,9 @@ func checkBad(t *testing.T, filename string, src []byte) {
|
||||
|
||||
func TestGrammars(t *testing.T) {
|
||||
for _, src := range goodGrammars {
|
||||
checkGood(t, "", []byte(src))
|
||||
checkGood(t, src)
|
||||
}
|
||||
for _, src := range badGrammars {
|
||||
checkBad(t, "", []byte(src))
|
||||
}
|
||||
}
|
||||
|
||||
var files = []string{
|
||||
// TODO(gri) add some test files
|
||||
}
|
||||
|
||||
func TestFiles(t *testing.T) {
|
||||
for _, filename := range files {
|
||||
src, err := ioutil.ReadFile(filename)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
checkGood(t, filename, src)
|
||||
checkBad(t, src)
|
||||
}
|
||||
}
|
||||
|
@ -5,51 +5,47 @@
|
||||
package ebnf
|
||||
|
||||
import (
|
||||
"go/scanner"
|
||||
"go/token"
|
||||
"io"
|
||||
"os"
|
||||
"scanner"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
type parser struct {
|
||||
fset *token.FileSet
|
||||
scanner.ErrorVector
|
||||
errors errorList
|
||||
scanner scanner.Scanner
|
||||
pos token.Pos // token position
|
||||
tok token.Token // one token look-ahead
|
||||
pos scanner.Position // token position
|
||||
tok int // one token look-ahead
|
||||
lit string // token literal
|
||||
}
|
||||
|
||||
func (p *parser) next() {
|
||||
p.pos, p.tok, p.lit = p.scanner.Scan()
|
||||
if p.tok.IsKeyword() {
|
||||
// TODO Should keyword mapping always happen outside scanner?
|
||||
// Or should there be a flag to scanner to enable keyword mapping?
|
||||
p.tok = token.IDENT
|
||||
}
|
||||
p.tok = p.scanner.Scan()
|
||||
p.pos = p.scanner.Position
|
||||
p.lit = p.scanner.TokenText()
|
||||
}
|
||||
|
||||
func (p *parser) error(pos token.Pos, msg string) {
|
||||
p.Error(p.fset.Position(pos), msg)
|
||||
func (p *parser) error(pos scanner.Position, msg string) {
|
||||
p.errors = append(p.errors, newError(pos, msg))
|
||||
}
|
||||
|
||||
func (p *parser) errorExpected(pos token.Pos, msg string) {
|
||||
msg = "expected " + msg
|
||||
if pos == p.pos {
|
||||
func (p *parser) errorExpected(pos scanner.Position, msg string) {
|
||||
msg = `expected "` + msg + `"`
|
||||
if pos.Offset == p.pos.Offset {
|
||||
// the error happened at the current position;
|
||||
// make the error message more specific
|
||||
msg += ", found '" + p.tok.String() + "'"
|
||||
if p.tok.IsLiteral() {
|
||||
msg += ", found " + scanner.TokenString(p.tok)
|
||||
if p.tok < 0 {
|
||||
msg += " " + p.lit
|
||||
}
|
||||
}
|
||||
p.error(pos, msg)
|
||||
}
|
||||
|
||||
func (p *parser) expect(tok token.Token) token.Pos {
|
||||
func (p *parser) expect(tok int) scanner.Position {
|
||||
pos := p.pos
|
||||
if p.tok != tok {
|
||||
p.errorExpected(pos, "'"+tok.String()+"'")
|
||||
p.errorExpected(pos, scanner.TokenString(tok))
|
||||
}
|
||||
p.next() // make progress in any case
|
||||
return pos
|
||||
@ -58,21 +54,21 @@ func (p *parser) expect(tok token.Token) token.Pos {
|
||||
func (p *parser) parseIdentifier() *Name {
|
||||
pos := p.pos
|
||||
name := p.lit
|
||||
p.expect(token.IDENT)
|
||||
p.expect(scanner.Ident)
|
||||
return &Name{pos, name}
|
||||
}
|
||||
|
||||
func (p *parser) parseToken() *Token {
|
||||
pos := p.pos
|
||||
value := ""
|
||||
if p.tok == token.STRING {
|
||||
if p.tok == scanner.String {
|
||||
value, _ = strconv.Unquote(p.lit)
|
||||
// Unquote may fail with an error, but only if the scanner found
|
||||
// an illegal string in the first place. In this case the error
|
||||
// has already been reported.
|
||||
p.next()
|
||||
} else {
|
||||
p.expect(token.STRING)
|
||||
p.expect(scanner.String)
|
||||
}
|
||||
return &Token{pos, value}
|
||||
}
|
||||
@ -82,32 +78,32 @@ func (p *parser) parseTerm() (x Expression) {
|
||||
pos := p.pos
|
||||
|
||||
switch p.tok {
|
||||
case token.IDENT:
|
||||
case scanner.Ident:
|
||||
x = p.parseIdentifier()
|
||||
|
||||
case token.STRING:
|
||||
case scanner.String:
|
||||
tok := p.parseToken()
|
||||
x = tok
|
||||
const ellipsis = "…" // U+2026, the horizontal ellipsis character
|
||||
if p.tok == token.ILLEGAL && p.lit == ellipsis {
|
||||
const ellipsis = '…' // U+2026, the horizontal ellipsis character
|
||||
if p.tok == ellipsis {
|
||||
p.next()
|
||||
x = &Range{tok, p.parseToken()}
|
||||
}
|
||||
|
||||
case token.LPAREN:
|
||||
case '(':
|
||||
p.next()
|
||||
x = &Group{pos, p.parseExpression()}
|
||||
p.expect(token.RPAREN)
|
||||
p.expect(')')
|
||||
|
||||
case token.LBRACK:
|
||||
case '[':
|
||||
p.next()
|
||||
x = &Option{pos, p.parseExpression()}
|
||||
p.expect(token.RBRACK)
|
||||
p.expect(']')
|
||||
|
||||
case token.LBRACE:
|
||||
case '{':
|
||||
p.next()
|
||||
x = &Repetition{pos, p.parseExpression()}
|
||||
p.expect(token.RBRACE)
|
||||
p.expect('}')
|
||||
}
|
||||
|
||||
return x
|
||||
@ -137,7 +133,7 @@ func (p *parser) parseExpression() Expression {
|
||||
|
||||
for {
|
||||
list = append(list, p.parseSequence())
|
||||
if p.tok != token.OR {
|
||||
if p.tok != '|' {
|
||||
break
|
||||
}
|
||||
p.next()
|
||||
@ -154,24 +150,22 @@ func (p *parser) parseExpression() Expression {
|
||||
|
||||
func (p *parser) parseProduction() *Production {
|
||||
name := p.parseIdentifier()
|
||||
p.expect(token.ASSIGN)
|
||||
p.expect('=')
|
||||
var expr Expression
|
||||
if p.tok != token.PERIOD {
|
||||
if p.tok != '.' {
|
||||
expr = p.parseExpression()
|
||||
}
|
||||
p.expect(token.PERIOD)
|
||||
p.expect('.')
|
||||
return &Production{name, expr}
|
||||
}
|
||||
|
||||
func (p *parser) parse(fset *token.FileSet, filename string, src []byte) Grammar {
|
||||
// initialize parser
|
||||
p.fset = fset
|
||||
p.ErrorVector.Reset()
|
||||
p.scanner.Init(fset.AddFile(filename, fset.Base(), len(src)), src, p, scanner.AllowIllegalChars)
|
||||
func (p *parser) parse(filename string, src io.Reader) Grammar {
|
||||
p.scanner.Init(src)
|
||||
p.scanner.Filename = filename
|
||||
p.next() // initializes pos, tok, lit
|
||||
|
||||
grammar := make(Grammar)
|
||||
for p.tok != token.EOF {
|
||||
for p.tok != scanner.EOF {
|
||||
prod := p.parseProduction()
|
||||
name := prod.Name.String
|
||||
if _, found := grammar[name]; !found {
|
||||
@ -187,11 +181,11 @@ func (p *parser) parse(fset *token.FileSet, filename string, src []byte) Grammar
|
||||
// Parse parses a set of EBNF productions from source src.
|
||||
// It returns a set of productions. Errors are reported
|
||||
// for incorrect syntax and if a production is declared
|
||||
// more than once. Position information is recorded relative
|
||||
// to the file set fset.
|
||||
// more than once; the filename is used only for error
|
||||
// positions.
|
||||
//
|
||||
func Parse(fset *token.FileSet, filename string, src []byte) (Grammar, os.Error) {
|
||||
func Parse(filename string, src io.Reader) (Grammar, os.Error) {
|
||||
var p parser
|
||||
grammar := p.parse(fset, filename, src)
|
||||
return grammar, p.GetError(scanner.Sorted)
|
||||
grammar := p.parse(filename, src)
|
||||
return grammar, p.errors.Error()
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user