// Copyright 2009 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package ebnf import ( "container/vector"; "fmt"; "go/scanner"; "go/token"; "os"; "strconv"; "strings"; "unicode"; "utf8"; ) type parser struct { scanner.ErrorVector; scanner scanner.Scanner; pos token.Position; // token position tok token.Token; // one token look-ahead lit []byte; // token literal } func (p *parser) next() { p.pos, p.tok, p.lit = p.scanner.Scan(); if p.tok.IsKeyword() { // TODO Should keyword mapping always happen outside scanner? // Or should there be a flag to scanner to enable keyword mapping? p.tok = token.IDENT; } } func (p *parser) errorExpected(pos token.Position, msg string) { msg = "expected " + msg; if pos.Offset == p.pos.Offset { // the error happened at the current position; // make the error message more specific msg += ", found '" + p.tok.String() + "'"; if p.tok.IsLiteral() { msg += " " + string(p.lit); } } p.Error(pos, msg); } func (p *parser) expect(tok token.Token) token.Position { pos := p.pos; if p.tok != tok { p.errorExpected(pos, "'" + tok.String() + "'"); } p.next(); // make progress in any case return pos; } func (p *parser) parseIdentifier() *Name { pos := p.pos; name := string(p.lit); p.expect(token.IDENT); return &Name{pos, name}; } func (p *parser) parseToken() *Token { pos := p.pos; value := ""; if p.tok == token.STRING { var err os.Error; value, err = strconv.Unquote(string(p.lit)); // Unquote may fail with an error, but only if the scanner found // an illegal string in the first place. In this case the error // has already been reported. p.next(); } else { p.expect(token.STRING); } return &Token{pos, value}; } func (p *parser) parseTerm() (x Expression) { pos := p.pos; switch p.tok { case token.IDENT: x = p.parseIdentifier(); case token.STRING: tok := p.parseToken(); x = tok; if p.tok == token.ELLIPSIS { p.next(); x = &Range{tok, p.parseToken()}; } case token.LPAREN: p.next(); x = &Group{pos, p.parseExpression()}; p.expect(token.RPAREN); case token.LBRACK: p.next(); x = &Option{pos, p.parseExpression()}; p.expect(token.RBRACK); case token.LBRACE: p.next(); x = &Repetition{pos, p.parseExpression()}; p.expect(token.RBRACE); } return x; } func (p *parser) parseSequence() Expression { var list vector.Vector; list.Init(0); for x := p.parseTerm(); x != nil; x = p.parseTerm() { list.Push(x); } // no need for a sequence if list.Len() < 2 switch list.Len() { case 0: return nil; case 1: return list.At(0).(Expression); } // convert list into a sequence seq := make(Sequence, list.Len()); for i := 0; i < list.Len(); i++ { seq[i] = list.At(i).(Expression); } return seq; } func (p *parser) parseExpression() Expression { var list vector.Vector; list.Init(0); for { x := p.parseSequence(); if x != nil { list.Push(x); } if p.tok != token.OR { break; } p.next(); } // no need for an Alternative node if list.Len() < 2 switch list.Len() { case 0: return nil; case 1: return list.At(0).(Expression); } // convert list into an Alternative node alt := make(Alternative, list.Len()); for i := 0; i < list.Len(); i++ { alt[i] = list.At(i).(Expression); } return alt; } func (p *parser) parseProduction() *Production { name := p.parseIdentifier(); p.expect(token.ASSIGN); expr := p.parseExpression(); p.expect(token.PERIOD); return &Production{name, expr}; } func (p *parser) parse(filename string, src []byte) Grammar { // initialize parser p.ErrorVector.Init(); p.scanner.Init(filename, src, p, 0); p.next(); // initializes pos, tok, lit grammar := make(Grammar); for p.tok != token.EOF { prod := p.parseProduction(); name := prod.Name.String; if prev, found := grammar[name]; !found { grammar[name] = prod; } else { p.Error(prod.Pos(), name + " declared already"); } } return grammar; } // Parse parses a set of EBNF productions from source src. // It returns a set of productions. Errors are reported // for incorrect syntax and if a production is declared // more than once. // func Parse(filename string, src []byte) (Grammar, os.Error) { var p parser; grammar := p.parse(filename, src); return grammar, p.GetError(scanner.Sorted); }