1
0
mirror of https://github.com/golang/go synced 2024-11-25 00:37:57 -07:00

fix TODO: insert semicolons before any sequence of comments

that introduce the newline (important for correct placement
of comments with gofmt when parsing new syntax)

R=rsc
https://golang.org/cl/179055
This commit is contained in:
Robert Griesemer 2009-12-15 08:41:50 -08:00
parent dec5bb7882
commit 55ca7a2644
2 changed files with 73 additions and 46 deletions

View File

@ -33,7 +33,6 @@ type Scanner struct {
offset int; // current reading offset (position after ch) offset int; // current reading offset (position after ch)
ch int; // one char look-ahead ch int; // one char look-ahead
insertSemi bool; // insert a semicolon before next newline insertSemi bool; // insert a semicolon before next newline
pendingComment token.Position; // valid if pendingComment.Line > 0
// public state - ok to modify // public state - ok to modify
ErrorCount int; // number of errors encountered ErrorCount int; // number of errors encountered
@ -151,7 +150,7 @@ func (S *Scanner) scanComment(pos token.Position) {
for S.ch >= 0 { for S.ch >= 0 {
S.next(); S.next();
if S.ch == '\n' { if S.ch == '\n' {
// '\n' is not part of the comment // '\n' is not part of the comment for purposes of scanning
// (the comment ends on the same line where it started) // (the comment ends on the same line where it started)
if pos.Column == 1 { if pos.Column == 1 {
text := S.src[pos.Offset+2 : S.pos.Offset]; text := S.src[pos.Offset+2 : S.pos.Offset];
@ -190,6 +189,49 @@ func (S *Scanner) scanComment(pos token.Position) {
} }
func (S *Scanner) findNewline(pos token.Position) bool {
// first '/' already consumed; assume S.ch == '/' || S.ch == '*'
// read ahead until a newline or non-comment token is found
newline := false;
for pos1 := pos; S.ch >= 0; {
if S.ch == '/' {
//-style comment always contains a newline
newline = true;
break;
}
S.scanComment(pos1);
if pos1.Line < S.pos.Line {
/*-style comment contained a newline */
newline = true;
break;
}
S.skipWhitespace();
if S.ch == '\n' {
newline = true;
break;
}
if S.ch != '/' {
// non-comment token
break
}
pos1 = S.pos;
S.next();
if S.ch != '/' && S.ch != '*' {
// non-comment token
break
}
}
// reset position
S.pos = pos;
S.offset = pos.Offset + 1;
S.ch = '/';
return newline;
}
func isLetter(ch int) bool { func isLetter(ch int) bool {
return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= 0x80 && unicode.IsLetter(ch) return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= 0x80 && unicode.IsLetter(ch)
} }
@ -378,6 +420,13 @@ func (S *Scanner) scanRawString(pos token.Position) {
} }
func (S *Scanner) skipWhitespace() {
for S.ch == ' ' || S.ch == '\t' || S.ch == '\n' && !S.insertSemi || S.ch == '\r' {
S.next()
}
}
// Helper functions for scanning multi-byte tokens such as >> += >>= . // Helper functions for scanning multi-byte tokens such as >> += >>= .
// Different routines recognize different length tok_i based on matches // Different routines recognize different length tok_i based on matches
// of ch_i. If a token ends in '=', the result is tok1 or tok3 // of ch_i. If a token ends in '=', the result is tok1 or tok3
@ -437,19 +486,8 @@ var semicolon = []byte{';'}
// of the error handler, if there was one installed. // of the error handler, if there was one installed.
// //
func (S *Scanner) Scan() (pos token.Position, tok token.Token, lit []byte) { func (S *Scanner) Scan() (pos token.Position, tok token.Token, lit []byte) {
if S.pendingComment.Line > 0 {
// "consume" pending comment
S.pos = S.pendingComment;
S.offset = S.pos.Offset + 1;
S.ch = '/';
S.pendingComment.Line = 0;
}
scanAgain: scanAgain:
// skip white space S.skipWhitespace();
for S.ch == ' ' || S.ch == '\t' || S.ch == '\n' && !S.insertSemi || S.ch == '\r' {
S.next()
}
// current token start // current token start
insertSemi := false; insertSemi := false;
@ -462,8 +500,6 @@ scanAgain:
switch tok { switch tok {
case token.IDENT, token.BREAK, token.CONTINUE, token.FALLTHROUGH, token.RETURN: case token.IDENT, token.BREAK, token.CONTINUE, token.FALLTHROUGH, token.RETURN:
insertSemi = true insertSemi = true
default:
insertSemi = false
} }
case digitVal(ch) < 10: case digitVal(ch) < 10:
insertSemi = true; insertSemi = true;
@ -474,7 +510,10 @@ scanAgain:
case -1: case -1:
tok = token.EOF tok = token.EOF
case '\n': case '\n':
S.insertSemi = false; // we only reach here of S.insertSemi was
// set in the first place and exited early
// from S.skipWhitespace()
S.insertSemi = false; // newline consumed
return pos, token.SEMICOLON, semicolon; return pos, token.SEMICOLON, semicolon;
case '"': case '"':
insertSemi = true; insertSemi = true;
@ -537,31 +576,17 @@ scanAgain:
case '/': case '/':
if S.ch == '/' || S.ch == '*' { if S.ch == '/' || S.ch == '*' {
// comment // comment
newline := false; if S.insertSemi && S.findNewline(pos) {
if S.insertSemi { S.insertSemi = false; // newline consumed
if S.ch == '/' {
// a line comment acts like a newline
newline = true
} else {
// a general comment may act like a newline
S.scanComment(pos);
newline = pos.Line < S.pos.Line;
}
} else {
S.scanComment(pos)
}
if newline {
// insert a semicolon and retain pending comment
S.insertSemi = false;
S.pendingComment = pos;
return pos, token.SEMICOLON, semicolon; return pos, token.SEMICOLON, semicolon;
} else if S.mode&ScanComments == 0 {
// skip comment
goto scanAgain
} else {
insertSemi = S.insertSemi; // preserve insertSemi info
tok = token.COMMENT;
} }
S.scanComment(pos);
if S.mode&ScanComments == 0 {
// skip comment
S.insertSemi = false; // newline consumed
goto scanAgain;
}
tok = token.COMMENT;
} else { } else {
tok = S.switch2(token.QUO, token.QUO_ASSIGN) tok = S.switch2(token.QUO, token.QUO_ASSIGN)
} }

View File

@ -284,6 +284,7 @@ func checkSemi(t *testing.T, line string, mode uint) {
var lines = []string{ var lines = []string{
// the $ character indicates where a semicolon is expected // the $ character indicates where a semicolon is expected
"", "",
"$;",
"foo$\n", "foo$\n",
"123$\n", "123$\n",
"1.2$\n", "1.2$\n",
@ -380,16 +381,17 @@ var lines = []string{
"foo$//comment\n", "foo$//comment\n",
"foo$/*comment*/\n", "foo$/*comment*/\n",
"foo$/*\n*/", "foo$/*\n*/",
"foo$/*comment*/ \n",
"foo$/*\n*/ ",
"foo $// comment\n", "foo $// comment\n",
"foo $/*comment*/\n", "foo $/*comment*/\n",
"foo $/*\n*/", "foo $/*\n*/",
// TODO(gri): These need to insert the semicolon *before* the "foo $/*comment*/\n",
// first comment which requires arbitrary far look- "foo $/*0*/ /*1*/ /*2*/\n",
// ahead. Only relevant for gofmt placement of "foo $/*comment*/ \n",
// comments. "foo $/*0*/ /*1*/ /*2*/ \n",
"foo /*comment*/ $\n", "foo $/**/ /*-------------*/ /*----\n*/bar $/* \n*/baa",
"foo /*0*/ /*1*/ $/*2*/\n",
} }