1
0
mirror of https://github.com/golang/go synced 2024-11-22 04:34:39 -07:00

go/scanner: removed scanner/internal-only uses of token.Position

First step towards a more light-weight implementation of token.Position:
- only use token.Position for reporting token and error position
- use offsets only for scanner control
- no interface changes yet

R=rsc
CC=golang-dev
https://golang.org/cl/2825041
This commit is contained in:
Robert Griesemer 2010-11-02 10:38:07 -07:00
parent 0808b199e0
commit 396228a652
2 changed files with 141 additions and 104 deletions

View File

@ -29,10 +29,14 @@ type Scanner struct {
mode uint // scanning mode mode uint // scanning mode
// scanning state // scanning state
pos token.Position // previous reading position (position before ch) filename string // current filename; may change via //line filename:line comment
offset int // current reading offset (position after ch) line int // current line
ch int // one char look-ahead column int // current column
insertSemi bool // insert a semicolon before next newline
ch int // current character
offset int // character offset
rdOffset int // reading offset (position after current character)
insertSemi bool // insert a semicolon before next newline
// public state - ok to modify // public state - ok to modify
ErrorCount int // number of errors encountered ErrorCount int // number of errors encountered
@ -43,29 +47,31 @@ type Scanner struct {
// S.ch < 0 means end-of-file. // S.ch < 0 means end-of-file.
// //
func (S *Scanner) next() { func (S *Scanner) next() {
if S.offset < len(S.src) { S.column++
S.pos.Offset = S.offset if S.rdOffset < len(S.src) {
S.pos.Column++ S.offset = S.rdOffset
if S.ch == '\n' { if S.ch == '\n' {
// next character starts a new line S.line++
S.pos.Line++ S.column = 1
S.pos.Column = 1
} }
r, w := int(S.src[S.offset]), 1 r, w := int(S.src[S.rdOffset]), 1
switch { switch {
case r == 0: case r == 0:
S.error(S.pos, "illegal character NUL") S.error("illegal character NUL")
case r >= 0x80: case r >= 0x80:
// not ASCII // not ASCII
r, w = utf8.DecodeRune(S.src[S.offset:]) r, w = utf8.DecodeRune(S.src[S.rdOffset:])
if r == utf8.RuneError && w == 1 { if r == utf8.RuneError && w == 1 {
S.error(S.pos, "illegal UTF-8 encoding") S.error("illegal UTF-8 encoding")
} }
} }
S.offset += w S.rdOffset += w
S.ch = r S.ch = r
} else { } else {
S.pos.Offset = len(S.src) S.offset = len(S.src)
if S.ch == '\n' {
S.column = 1
}
S.ch = -1 // eof S.ch = -1 // eof
} }
} }
@ -94,9 +100,17 @@ func (S *Scanner) Init(filename string, src []byte, err ErrorHandler, mode uint)
S.src = src S.src = src
S.err = err S.err = err
S.mode = mode S.mode = mode
S.pos = token.Position{filename, 0, 1, 0}
S.filename = filename
S.line = 1
S.column = 0
S.ch = ' '
S.offset = 0 S.offset = 0
S.rdOffset = 0
S.insertSemi = false
S.ErrorCount = 0 S.ErrorCount = 0
S.next() S.next()
} }
@ -131,7 +145,12 @@ func charString(ch int) string {
} }
func (S *Scanner) error(pos token.Position, msg string) { func (S *Scanner) error(msg string) {
S.errorAt(token.Position{S.filename, S.offset, S.line, S.column}, msg)
}
func (S *Scanner) errorAt(pos token.Position, msg string) {
if S.err != nil { if S.err != nil {
S.err.Error(pos, msg) S.err.Error(pos, msg)
} }
@ -139,18 +158,28 @@ func (S *Scanner) error(pos token.Position, msg string) {
} }
func (S *Scanner) expect(ch int) { var prefix = []byte("//line ")
if S.ch != ch {
S.error(S.pos, "expected "+charString(ch)+", found "+charString(S.ch)) func (S *Scanner) interpretLineComment(text []byte) {
if bytes.HasPrefix(text, prefix) {
// get filename and line number, if any
if i := bytes.Index(text, []byte{':'}); i > 0 {
if line, err := strconv.Atoi(string(text[i+1:])); err == nil && line > 0 {
// valid //line filename:line comment;
// update scanner position
S.filename = string(text[len(prefix):i])
S.line = line - 1 // -1 since the '\n' has not been consumed yet
}
}
} }
S.next() // always make progress
} }
var prefix = []byte("line ") func (S *Scanner) scanComment() {
// initial '/' already consumed; S.ch == '/' || S.ch == '*'
func (S *Scanner) scanComment(pos token.Position) { offs := S.offset - 1 // position of initial '/'
// first '/' already consumed col := S.column - 1
pos := token.Position{S.filename, S.offset - 1, S.line, S.column - 1}
if S.ch == '/' { if S.ch == '/' {
//-style comment //-style comment
@ -159,21 +188,9 @@ func (S *Scanner) scanComment(pos token.Position) {
if S.ch == '\n' { if S.ch == '\n' {
// '\n' is not part of the comment for purposes of scanning // '\n' is not part of the comment for purposes of scanning
// (the comment ends on the same line where it started) // (the comment ends on the same line where it started)
if pos.Column == 1 { if col == 1 {
text := S.src[pos.Offset+2 : S.pos.Offset] // comment starts at the beginning of the current line
if bytes.HasPrefix(text, prefix) { S.interpretLineComment(S.src[offs:S.offset])
// comment starts at beginning of line with "//line ";
// get filename and line number, if any
i := bytes.Index(text, []byte{':'})
if i >= 0 {
if line, err := strconv.Atoi(string(text[i+1:])); err == nil && line > 0 {
// valid //line filename:line comment;
// update scanner position
S.pos.Filename = string(text[len(prefix):i])
S.pos.Line = line - 1 // -1 since the '\n' has not been consumed yet
}
}
}
} }
return return
} }
@ -181,7 +198,7 @@ func (S *Scanner) scanComment(pos token.Position) {
} else { } else {
/*-style comment */ /*-style comment */
S.expect('*') S.next()
for S.ch >= 0 { for S.ch >= 0 {
ch := S.ch ch := S.ch
S.next() S.next()
@ -192,47 +209,56 @@ func (S *Scanner) scanComment(pos token.Position) {
} }
} }
S.error(pos, "comment not terminated") S.errorAt(pos, "comment not terminated")
} }
func (S *Scanner) findLineEnd(pos token.Position) bool { func (S *Scanner) findLineEnd() bool {
// initial '/' already consumed; pos is position of '/' // initial '/' already consumed
defer func(line, col, offs int) {
// reset scanner state to where it was upon calling findLineEnd
// (we don't scan //line comments and ignore errors thus
// S.filename and S.ErrorCount don't change)
S.line = line
S.column = col
S.ch = '/'
S.offset = offs
S.rdOffset = offs + 1
S.next() // consume initial '/' again
}(S.line, S.column-1, S.offset-1)
// read ahead until a newline, EOF, or non-comment token is found // read ahead until a newline, EOF, or non-comment token is found
lineend := false for S.ch == '/' || S.ch == '*' {
for pos1 := pos; S.ch == '/' || S.ch == '*'; {
if S.ch == '/' { if S.ch == '/' {
//-style comment always contains a newline //-style comment always contains a newline
lineend = true return true
break
} }
S.scanComment(pos1) /*-style comment: look for newline */
if pos1.Line < S.pos.Line { S.next()
/*-style comment contained a newline */ for S.ch >= 0 {
lineend = true ch := S.ch
break if ch == '\n' {
return true
}
S.next()
if ch == '*' && S.ch == '/' {
S.next()
break
}
} }
S.skipWhitespace() // S.insertSemi is set S.skipWhitespace() // S.insertSemi is set
if S.ch < 0 || S.ch == '\n' { if S.ch < 0 || S.ch == '\n' {
// line end return true
lineend = true
break
} }
if S.ch != '/' { if S.ch != '/' {
// non-comment token // non-comment token
break return false
} }
pos1 = S.pos
S.next() // consume '/' S.next() // consume '/'
} }
// reset position to where it was upon calling findLineEnd return false
S.pos = pos
S.offset = pos.Offset + 1
S.next() // consume initial '/' again
return lineend
} }
@ -247,11 +273,11 @@ func isDigit(ch int) bool {
func (S *Scanner) scanIdentifier() token.Token { func (S *Scanner) scanIdentifier() token.Token {
pos := S.pos.Offset offs := S.offset
for isLetter(S.ch) || isDigit(S.ch) { for isLetter(S.ch) || isDigit(S.ch) {
S.next() S.next()
} }
return token.Lookup(S.src[pos:S.pos.Offset]) return token.Lookup(S.src[offs:S.offset])
} }
@ -275,7 +301,7 @@ func (S *Scanner) scanMantissa(base int) {
} }
func (S *Scanner) scanNumber(pos token.Position, seenDecimalPoint bool) token.Token { func (S *Scanner) scanNumber(seenDecimalPoint bool) token.Token {
// digitVal(S.ch) < 10 // digitVal(S.ch) < 10
tok := token.INT tok := token.INT
@ -287,6 +313,7 @@ func (S *Scanner) scanNumber(pos token.Position, seenDecimalPoint bool) token.To
if S.ch == '0' { if S.ch == '0' {
// int or float // int or float
pos := token.Position{S.filename, S.offset, S.line, S.column}
S.next() S.next()
if S.ch == 'x' || S.ch == 'X' { if S.ch == 'x' || S.ch == 'X' {
// hexadecimal int // hexadecimal int
@ -306,7 +333,7 @@ func (S *Scanner) scanNumber(pos token.Position, seenDecimalPoint bool) token.To
} }
// octal int // octal int
if seenDecimalDigit { if seenDecimalDigit {
S.error(pos, "illegal octal number") S.errorAt(pos, "illegal octal number")
} }
} }
goto exit goto exit
@ -343,7 +370,7 @@ exit:
func (S *Scanner) scanEscape(quote int) { func (S *Scanner) scanEscape(quote int) {
pos := S.pos pos := token.Position{S.filename, S.offset, S.line, S.column}
var i, base, max uint32 var i, base, max uint32
switch S.ch { switch S.ch {
@ -363,7 +390,7 @@ func (S *Scanner) scanEscape(quote int) {
i, base, max = 8, 16, unicode.MaxRune i, base, max = 8, 16, unicode.MaxRune
default: default:
S.next() // always make progress S.next() // always make progress
S.error(pos, "unknown escape sequence") S.errorAt(pos, "unknown escape sequence")
return return
} }
@ -371,7 +398,7 @@ func (S *Scanner) scanEscape(quote int) {
for ; i > 0 && S.ch != quote && S.ch >= 0; i-- { for ; i > 0 && S.ch != quote && S.ch >= 0; i-- {
d := uint32(digitVal(S.ch)) d := uint32(digitVal(S.ch))
if d >= base { if d >= base {
S.error(S.pos, "illegal character in escape sequence") S.error("illegal character in escape sequence")
break break
} }
x = x*base + d x = x*base + d
@ -382,13 +409,14 @@ func (S *Scanner) scanEscape(quote int) {
S.next() S.next()
} }
if x > max || 0xd800 <= x && x < 0xe000 { if x > max || 0xd800 <= x && x < 0xe000 {
S.error(pos, "escape sequence is invalid Unicode code point") S.errorAt(pos, "escape sequence is invalid Unicode code point")
} }
} }
func (S *Scanner) scanChar(pos token.Position) { func (S *Scanner) scanChar() {
// '\'' already consumed // '\'' opening already consumed
pos := token.Position{S.filename, S.offset - 1, S.line, S.column - 1}
n := 0 n := 0
for S.ch != '\'' { for S.ch != '\'' {
@ -396,7 +424,7 @@ func (S *Scanner) scanChar(pos token.Position) {
n++ n++
S.next() S.next()
if ch == '\n' || ch < 0 { if ch == '\n' || ch < 0 {
S.error(pos, "character literal not terminated") S.errorAt(pos, "character literal not terminated")
n = 1 n = 1
break break
} }
@ -408,19 +436,20 @@ func (S *Scanner) scanChar(pos token.Position) {
S.next() S.next()
if n != 1 { if n != 1 {
S.error(pos, "illegal character literal") S.errorAt(pos, "illegal character literal")
} }
} }
func (S *Scanner) scanString(pos token.Position) { func (S *Scanner) scanString() {
// '"' already consumed // '"' opening already consumed
pos := token.Position{S.filename, S.offset - 1, S.line, S.column - 1}
for S.ch != '"' { for S.ch != '"' {
ch := S.ch ch := S.ch
S.next() S.next()
if ch == '\n' || ch < 0 { if ch == '\n' || ch < 0 {
S.error(pos, "string not terminated") S.errorAt(pos, "string not terminated")
break break
} }
if ch == '\\' { if ch == '\\' {
@ -432,14 +461,15 @@ func (S *Scanner) scanString(pos token.Position) {
} }
func (S *Scanner) scanRawString(pos token.Position) { func (S *Scanner) scanRawString() {
// '`' already consumed // '`' opening already consumed
pos := token.Position{S.filename, S.offset - 1, S.line, S.column - 1}
for S.ch != '`' { for S.ch != '`' {
ch := S.ch ch := S.ch
S.next() S.next()
if ch < 0 { if ch < 0 {
S.error(pos, "string not terminated") S.errorAt(pos, "string not terminated")
break break
} }
} }
@ -524,7 +554,8 @@ scanAgain:
// current token start // current token start
insertSemi := false insertSemi := false
pos, tok = S.pos, token.ILLEGAL pos, tok = token.Position{S.filename, S.offset, S.line, S.column}, token.ILLEGAL
offs := S.offset
// determine token value // determine token value
switch ch := S.ch; { switch ch := S.ch; {
@ -536,7 +567,7 @@ scanAgain:
} }
case digitVal(ch) < 10: case digitVal(ch) < 10:
insertSemi = true insertSemi = true
tok = S.scanNumber(pos, false) tok = S.scanNumber(false)
default: default:
S.next() // always make progress S.next() // always make progress
switch ch { switch ch {
@ -555,21 +586,21 @@ scanAgain:
case '"': case '"':
insertSemi = true insertSemi = true
tok = token.STRING tok = token.STRING
S.scanString(pos) S.scanString()
case '\'': case '\'':
insertSemi = true insertSemi = true
tok = token.CHAR tok = token.CHAR
S.scanChar(pos) S.scanChar()
case '`': case '`':
insertSemi = true insertSemi = true
tok = token.STRING tok = token.STRING
S.scanRawString(pos) S.scanRawString()
case ':': case ':':
tok = S.switch2(token.COLON, token.DEFINE) tok = S.switch2(token.COLON, token.DEFINE)
case '.': case '.':
if digitVal(S.ch) < 10 { if digitVal(S.ch) < 10 {
insertSemi = true insertSemi = true
tok = S.scanNumber(pos, true) tok = S.scanNumber(true)
} else if S.ch == '.' { } else if S.ch == '.' {
S.next() S.next()
if S.ch == '.' { if S.ch == '.' {
@ -613,15 +644,19 @@ scanAgain:
case '/': case '/':
if S.ch == '/' || S.ch == '*' { if S.ch == '/' || S.ch == '*' {
// comment // comment
if S.insertSemi && S.findLineEnd(pos) { line := S.line
col := S.column - 1 // beginning of comment
if S.insertSemi && S.findLineEnd() {
// reset position to the beginning of the comment // reset position to the beginning of the comment
S.pos = pos S.line = line
S.offset = pos.Offset + 1 S.column = col
S.ch = '/' S.ch = '/'
S.offset = offs
S.rdOffset = offs + 1
S.insertSemi = false // newline consumed S.insertSemi = false // newline consumed
return pos, token.SEMICOLON, newline return pos, token.SEMICOLON, newline
} }
S.scanComment(pos) S.scanComment()
if S.mode&ScanComments == 0 { if S.mode&ScanComments == 0 {
// skip comment // skip comment
S.insertSemi = false // newline consumed S.insertSemi = false // newline consumed
@ -659,7 +694,7 @@ scanAgain:
tok = S.switch3(token.OR, token.OR_ASSIGN, '|', token.LOR) tok = S.switch3(token.OR, token.OR_ASSIGN, '|', token.LOR)
default: default:
if S.mode&AllowIllegalChars == 0 { if S.mode&AllowIllegalChars == 0 {
S.error(pos, "illegal character "+charString(ch)) S.errorAt(pos, "illegal character "+charString(ch))
} }
insertSemi = S.insertSemi // preserve insertSemi info insertSemi = S.insertSemi // preserve insertSemi info
} }
@ -668,7 +703,7 @@ scanAgain:
if S.mode&InsertSemis != 0 { if S.mode&InsertSemis != 0 {
S.insertSemi = insertSemi S.insertSemi = insertSemi
} }
return pos, tok, S.src[pos.Offset:S.pos.Offset] return pos, tok, S.src[offs:S.offset]
} }

View File

@ -198,16 +198,16 @@ func newlineCount(s string) int {
func checkPos(t *testing.T, lit string, pos, expected token.Position) { func checkPos(t *testing.T, lit string, pos, expected token.Position) {
if pos.Filename != expected.Filename { if pos.Filename != expected.Filename {
t.Errorf("bad filename for %s: got %s, expected %s", lit, pos.Filename, expected.Filename) t.Errorf("bad filename for %q: got %s, expected %s", lit, pos.Filename, expected.Filename)
} }
if pos.Offset != expected.Offset { if pos.Offset != expected.Offset {
t.Errorf("bad position for %s: got %d, expected %d", lit, pos.Offset, expected.Offset) t.Errorf("bad position for %q: got %d, expected %d", lit, pos.Offset, expected.Offset)
} }
if pos.Line != expected.Line { if pos.Line != expected.Line {
t.Errorf("bad line for %s: got %d, expected %d", lit, pos.Line, expected.Line) t.Errorf("bad line for %q: got %d, expected %d", lit, pos.Line, expected.Line)
} }
if pos.Column != expected.Column { if pos.Column != expected.Column {
t.Errorf("bad column for %s: got %d, expected %d", lit, pos.Column, expected.Column) t.Errorf("bad column for %q: got %d, expected %d", lit, pos.Column, expected.Column)
} }
} }
@ -276,15 +276,15 @@ func checkSemi(t *testing.T, line string, mode uint) {
semiLit = ";" semiLit = ";"
} }
// next token must be a semicolon // next token must be a semicolon
offs := pos.Offset + 1 semiPos := pos
semiPos.Offset++
semiPos.Column++
pos, tok, lit = S.Scan() pos, tok, lit = S.Scan()
if tok == token.SEMICOLON { if tok == token.SEMICOLON {
if pos.Offset != offs {
t.Errorf("bad offset for %q: got %d, expected %d", line, pos.Offset, offs)
}
if string(lit) != semiLit { if string(lit) != semiLit {
t.Errorf(`bad literal for %q: got %q, expected %q`, line, lit, semiLit) t.Errorf(`bad literal for %q: got %q, expected %q`, line, lit, semiLit)
} }
checkPos(t, line, pos, semiPos)
} else { } else {
t.Errorf("bad token for %q: got %s, expected ;", line, tok.String()) t.Errorf("bad token for %q: got %s, expected ;", line, tok.String())
} }
@ -399,11 +399,13 @@ var lines = []string{
"foo$/*\n*/", "foo$/*\n*/",
"foo$/*comment*/ \n", "foo$/*comment*/ \n",
"foo$/*\n*/ ", "foo$/*\n*/ ",
"foo $// comment\n", "foo $// comment\n",
"foo $/*comment*/\n", "foo $/*comment*/\n",
"foo $/*\n*/", "foo $/*\n*/",
"foo $/* */ /* \n */ bar$/**/\n",
"foo $/*0*/ /*1*/ /*2*/\n", "foo $/*0*/ /*1*/ /*2*/\n",
"foo $/*comment*/ \n", "foo $/*comment*/ \n",
"foo $/*0*/ /*1*/ /*2*/ \n", "foo $/*0*/ /*1*/ /*2*/ \n",
"foo $/**/ /*-------------*/ /*----\n*/bar $/* \n*/baa$\n", "foo $/**/ /*-------------*/ /*----\n*/bar $/* \n*/baa$\n",