2009-07-13 11:10:56 -06:00
|
|
|
// Copyright 2009 The Go Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
|
|
|
package ebnf
|
|
|
|
|
|
|
|
import (
|
|
|
|
"container/vector";
|
|
|
|
"go/scanner";
|
|
|
|
"go/token";
|
|
|
|
"os";
|
|
|
|
"strconv";
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
type parser struct {
|
2009-07-14 11:45:43 -06:00
|
|
|
scanner.ErrorVector;
|
2009-10-08 16:14:54 -06:00
|
|
|
scanner scanner.Scanner;
|
|
|
|
pos token.Position; // token position
|
|
|
|
tok token.Token; // one token look-ahead
|
|
|
|
lit []byte; // token literal
|
2009-07-13 11:10:56 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
func (p *parser) next() {
|
|
|
|
p.pos, p.tok, p.lit = p.scanner.Scan();
|
|
|
|
if p.tok.IsKeyword() {
|
|
|
|
// TODO Should keyword mapping always happen outside scanner?
|
|
|
|
// Or should there be a flag to scanner to enable keyword mapping?
|
2009-11-09 13:07:39 -07:00
|
|
|
p.tok = token.IDENT
|
2009-07-13 11:10:56 -06:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
func (p *parser) errorExpected(pos token.Position, msg string) {
|
|
|
|
msg = "expected " + msg;
|
|
|
|
if pos.Offset == p.pos.Offset {
|
|
|
|
// the error happened at the current position;
|
|
|
|
// make the error message more specific
|
|
|
|
msg += ", found '" + p.tok.String() + "'";
|
|
|
|
if p.tok.IsLiteral() {
|
2009-11-09 13:07:39 -07:00
|
|
|
msg += " "+string(p.lit)
|
2009-07-13 11:10:56 -06:00
|
|
|
}
|
|
|
|
}
|
|
|
|
p.Error(pos, msg);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
func (p *parser) expect(tok token.Token) token.Position {
|
|
|
|
pos := p.pos;
|
|
|
|
if p.tok != tok {
|
2009-11-09 13:07:39 -07:00
|
|
|
p.errorExpected(pos, "'" + tok.String() + "'")
|
2009-07-13 11:10:56 -06:00
|
|
|
}
|
2009-10-08 16:14:54 -06:00
|
|
|
p.next(); // make progress in any case
|
2009-07-13 11:10:56 -06:00
|
|
|
return pos;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
func (p *parser) parseIdentifier() *Name {
|
|
|
|
pos := p.pos;
|
|
|
|
name := string(p.lit);
|
|
|
|
p.expect(token.IDENT);
|
|
|
|
return &Name{pos, name};
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
func (p *parser) parseToken() *Token {
|
|
|
|
pos := p.pos;
|
|
|
|
value := "";
|
|
|
|
if p.tok == token.STRING {
|
2009-09-14 18:20:29 -06:00
|
|
|
value, _ = strconv.Unquote(string(p.lit));
|
2009-07-13 11:10:56 -06:00
|
|
|
// Unquote may fail with an error, but only if the scanner found
|
|
|
|
// an illegal string in the first place. In this case the error
|
|
|
|
// has already been reported.
|
|
|
|
p.next();
|
|
|
|
} else {
|
2009-11-09 13:07:39 -07:00
|
|
|
p.expect(token.STRING)
|
2009-07-13 11:10:56 -06:00
|
|
|
}
|
|
|
|
return &Token{pos, value};
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
func (p *parser) parseTerm() (x Expression) {
|
|
|
|
pos := p.pos;
|
|
|
|
|
|
|
|
switch p.tok {
|
|
|
|
case token.IDENT:
|
2009-11-09 13:07:39 -07:00
|
|
|
x = p.parseIdentifier()
|
2009-07-13 11:10:56 -06:00
|
|
|
|
|
|
|
case token.STRING:
|
|
|
|
tok := p.parseToken();
|
|
|
|
x = tok;
|
|
|
|
if p.tok == token.ELLIPSIS {
|
|
|
|
p.next();
|
|
|
|
x = &Range{tok, p.parseToken()};
|
|
|
|
}
|
|
|
|
|
|
|
|
case token.LPAREN:
|
|
|
|
p.next();
|
|
|
|
x = &Group{pos, p.parseExpression()};
|
|
|
|
p.expect(token.RPAREN);
|
|
|
|
|
|
|
|
case token.LBRACK:
|
|
|
|
p.next();
|
|
|
|
x = &Option{pos, p.parseExpression()};
|
|
|
|
p.expect(token.RBRACK);
|
|
|
|
|
|
|
|
case token.LBRACE:
|
|
|
|
p.next();
|
|
|
|
x = &Repetition{pos, p.parseExpression()};
|
|
|
|
p.expect(token.RBRACE);
|
|
|
|
}
|
|
|
|
|
|
|
|
return x;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
func (p *parser) parseSequence() Expression {
|
|
|
|
var list vector.Vector;
|
|
|
|
list.Init(0);
|
|
|
|
|
|
|
|
for x := p.parseTerm(); x != nil; x = p.parseTerm() {
|
2009-11-09 13:07:39 -07:00
|
|
|
list.Push(x)
|
2009-07-13 11:10:56 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
// no need for a sequence if list.Len() < 2
|
|
|
|
switch list.Len() {
|
|
|
|
case 0:
|
2009-11-09 13:07:39 -07:00
|
|
|
return nil
|
2009-07-13 11:10:56 -06:00
|
|
|
case 1:
|
2009-11-09 13:07:39 -07:00
|
|
|
return list.At(0).(Expression)
|
2009-07-13 11:10:56 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
// convert list into a sequence
|
|
|
|
seq := make(Sequence, list.Len());
|
|
|
|
for i := 0; i < list.Len(); i++ {
|
2009-11-09 13:07:39 -07:00
|
|
|
seq[i] = list.At(i).(Expression)
|
2009-07-13 11:10:56 -06:00
|
|
|
}
|
|
|
|
return seq;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
func (p *parser) parseExpression() Expression {
|
|
|
|
var list vector.Vector;
|
|
|
|
list.Init(0);
|
|
|
|
|
|
|
|
for {
|
|
|
|
x := p.parseSequence();
|
|
|
|
if x != nil {
|
2009-11-09 13:07:39 -07:00
|
|
|
list.Push(x)
|
2009-07-13 11:10:56 -06:00
|
|
|
}
|
|
|
|
if p.tok != token.OR {
|
2009-11-09 13:07:39 -07:00
|
|
|
break
|
2009-07-13 11:10:56 -06:00
|
|
|
}
|
|
|
|
p.next();
|
|
|
|
}
|
|
|
|
|
|
|
|
// no need for an Alternative node if list.Len() < 2
|
|
|
|
switch list.Len() {
|
|
|
|
case 0:
|
2009-11-09 13:07:39 -07:00
|
|
|
return nil
|
2009-07-13 11:10:56 -06:00
|
|
|
case 1:
|
2009-11-09 13:07:39 -07:00
|
|
|
return list.At(0).(Expression)
|
2009-07-13 11:10:56 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
// convert list into an Alternative node
|
|
|
|
alt := make(Alternative, list.Len());
|
|
|
|
for i := 0; i < list.Len(); i++ {
|
2009-11-09 13:07:39 -07:00
|
|
|
alt[i] = list.At(i).(Expression)
|
2009-07-13 11:10:56 -06:00
|
|
|
}
|
|
|
|
return alt;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
func (p *parser) parseProduction() *Production {
|
|
|
|
name := p.parseIdentifier();
|
|
|
|
p.expect(token.ASSIGN);
|
|
|
|
expr := p.parseExpression();
|
|
|
|
p.expect(token.PERIOD);
|
|
|
|
return &Production{name, expr};
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2009-07-14 11:45:43 -06:00
|
|
|
func (p *parser) parse(filename string, src []byte) Grammar {
|
2009-07-13 11:10:56 -06:00
|
|
|
// initialize parser
|
2009-07-14 11:45:43 -06:00
|
|
|
p.ErrorVector.Init();
|
|
|
|
p.scanner.Init(filename, src, p, 0);
|
2009-10-08 16:14:54 -06:00
|
|
|
p.next(); // initializes pos, tok, lit
|
2009-07-13 11:10:56 -06:00
|
|
|
|
|
|
|
grammar := make(Grammar);
|
|
|
|
for p.tok != token.EOF {
|
|
|
|
prod := p.parseProduction();
|
|
|
|
name := prod.Name.String;
|
2009-09-15 10:41:59 -06:00
|
|
|
if _, found := grammar[name]; !found {
|
2009-11-09 13:07:39 -07:00
|
|
|
grammar[name] = prod
|
2009-07-13 11:10:56 -06:00
|
|
|
} else {
|
2009-11-09 13:07:39 -07:00
|
|
|
p.Error(prod.Pos(), name + " declared already")
|
2009-07-13 11:10:56 -06:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return grammar;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Parse parses a set of EBNF productions from source src.
|
|
|
|
// It returns a set of productions. Errors are reported
|
|
|
|
// for incorrect syntax and if a production is declared
|
|
|
|
// more than once.
|
|
|
|
//
|
2009-07-14 11:45:43 -06:00
|
|
|
func Parse(filename string, src []byte) (Grammar, os.Error) {
|
2009-07-13 11:10:56 -06:00
|
|
|
var p parser;
|
2009-07-14 11:45:43 -06:00
|
|
|
grammar := p.parse(filename, src);
|
|
|
|
return grammar, p.GetError(scanner.Sorted);
|
2009-07-13 11:10:56 -06:00
|
|
|
}
|