fix TODO: insert semicolons before any sequence of comments

that introduce the newline (important for correct placement of comments with gofmt when parsing new syntax) R=rsc https://golang.org/cl/179055
2024-11-24 20:20:03 -07:00 · 2009-12-15 08:41:50 -08:00 · 2009-12-15 08:41:50 -08:00 · 55ca7a2644
commit 55ca7a2644
parent dec5bb7882
2 changed files with 73 additions and 46 deletions
--- a/src/pkg/go/scanner/scanner.go
+++ b/src/pkg/go/scanner/scanner.go
@ -33,7 +33,6 @@ type Scanner struct {
 	offset		int;		// current reading offset (position after ch)
 	ch		int;		// one char look-ahead
 	insertSemi	bool;		// insert a semicolon before next newline
-	pendingComment	token.Position;	// valid if pendingComment.Line > 0

 	// public state - ok to modify
 	ErrorCount	int;	// number of errors encountered
@ -151,7 +150,7 @@ func (S *Scanner) scanComment(pos token.Position) {
 		for S.ch >= 0 {
 			S.next();
 			if S.ch == '\n' {
-				// '\n' is not part of the comment
+				// '\n' is not part of the comment for purposes of scanning
 				// (the comment ends on the same line where it started)
 				if pos.Column == 1 {
 					text := S.src[pos.Offset+2 : S.pos.Offset];
@ -190,6 +189,49 @@ func (S *Scanner) scanComment(pos token.Position) {
 }


+func (S *Scanner) findNewline(pos token.Position) bool {
+	// first '/' already consumed; assume S.ch == '/' || S.ch == '*'
+
+	// read ahead until a newline or non-comment token is found
+	newline := false;
+	for pos1 := pos; S.ch >= 0; {
+		if S.ch == '/' {
+			//-style comment always contains a newline
+			newline = true;
+			break;
+		}
+		S.scanComment(pos1);
+		if pos1.Line < S.pos.Line {
+			/*-style comment contained a newline */
+			newline = true;
+			break;
+		}
+		S.skipWhitespace();
+		if S.ch == '\n' {
+			newline = true;
+			break;
+		}
+		if S.ch != '/' {
+			// non-comment token
+			break
+		}
+		pos1 = S.pos;
+		S.next();
+		if S.ch != '/' && S.ch != '*' {
+			// non-comment token
+			break
+		}
+	}
+
+	// reset position
+	S.pos = pos;
+	S.offset = pos.Offset + 1;
+	S.ch = '/';
+
+	return newline;
+}
+
+
 func isLetter(ch int) bool {
 	return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= 0x80 && unicode.IsLetter(ch)
 }
@ -378,6 +420,13 @@ func (S *Scanner) scanRawString(pos token.Position) {
 }


+func (S *Scanner) skipWhitespace() {
+	for S.ch == ' ' || S.ch == '\t' || S.ch == '\n' && !S.insertSemi || S.ch == '\r' {
+		S.next()
+	}
+}
+
+
 // Helper functions for scanning multi-byte tokens such as >> += >>= .
 // Different routines recognize different length tok_i based on matches
 // of ch_i. If a token ends in '=', the result is tok1 or tok3
@ -437,19 +486,8 @@ var semicolon = []byte{';'}
 // of the error handler, if there was one installed.
 //
 func (S *Scanner) Scan() (pos token.Position, tok token.Token, lit []byte) {
-	if S.pendingComment.Line > 0 {
-		// "consume" pending comment
-		S.pos = S.pendingComment;
-		S.offset = S.pos.Offset + 1;
-		S.ch = '/';
-		S.pendingComment.Line = 0;
-	}
-
 scanAgain:
-	// skip white space
-	for S.ch == ' ' || S.ch == '\t' || S.ch == '\n' && !S.insertSemi || S.ch == '\r' {
-		S.next()
-	}
+	S.skipWhitespace();

 	// current token start
 	insertSemi := false;
@ -462,8 +500,6 @@ scanAgain:
 		switch tok {
 		case token.IDENT, token.BREAK, token.CONTINUE, token.FALLTHROUGH, token.RETURN:
 			insertSemi = true
-		default:
-			insertSemi = false
 		}
 	case digitVal(ch) < 10:
 		insertSemi = true;
@ -474,7 +510,10 @@ scanAgain:
 		case -1:
 			tok = token.EOF
 		case '\n':
-			S.insertSemi = false;
+			// we only reach here of S.insertSemi was
+			// set in the first place and exited early
+			// from S.skipWhitespace()
+			S.insertSemi = false;	// newline consumed
 			return pos, token.SEMICOLON, semicolon;
 		case '"':
 			insertSemi = true;
@ -537,31 +576,17 @@ scanAgain:
 		case '/':
 			if S.ch == '/' || S.ch == '*' {
 				// comment
-				newline := false;
-				if S.insertSemi {
-					if S.ch == '/' {
-						// a line comment acts like a newline
-						newline = true
-					} else {
-						// a general comment may act like a newline
-						S.scanComment(pos);
-						newline = pos.Line < S.pos.Line;
-					}
-				} else {
-					S.scanComment(pos)
-				}
-				if newline {
-					// insert a semicolon and retain pending comment
-					S.insertSemi = false;
-					S.pendingComment = pos;
+				if S.insertSemi && S.findNewline(pos) {
+					S.insertSemi = false;	// newline consumed
 					return pos, token.SEMICOLON, semicolon;
-				} else if S.mode&ScanComments == 0 {
-					// skip comment
-					goto scanAgain
-				} else {
-					insertSemi = S.insertSemi;	// preserve insertSemi info
-					tok = token.COMMENT;
 				}
+				S.scanComment(pos);
+				if S.mode&ScanComments == 0 {
+					// skip comment
+					S.insertSemi = false;	// newline consumed
+					goto scanAgain;
+				}
+				tok = token.COMMENT;
 			} else {
 				tok = S.switch2(token.QUO, token.QUO_ASSIGN)
 			}
--- a/src/pkg/go/scanner/scanner_test.go
+++ b/src/pkg/go/scanner/scanner_test.go
@ -284,6 +284,7 @@ func checkSemi(t *testing.T, line string, mode uint) {
 var lines = []string{
 	// the $ character indicates where a semicolon is expected
 	"",
+	"$;",
 	"foo$\n",
 	"123$\n",
 	"1.2$\n",
@ -380,16 +381,17 @@ var lines = []string{
 	"foo$//comment\n",
 	"foo$/*comment*/\n",
 	"foo$/*\n*/",
+	"foo$/*comment*/    \n",
+	"foo$/*\n*/    ",
 	"foo    $// comment\n",
 	"foo    $/*comment*/\n",
 	"foo    $/*\n*/",

-	// TODO(gri): These need to insert the semicolon *before* the
-	//            first comment which requires arbitrary far look-
-	//            ahead. Only relevant for gofmt placement of
-	//            comments.
-	"foo    /*comment*/    $\n",
-	"foo    /*0*/ /*1*/ $/*2*/\n",
+	"foo    $/*comment*/\n",
+	"foo    $/*0*/ /*1*/ /*2*/\n",
+	"foo    $/*comment*/    \n",
+	"foo    $/*0*/ /*1*/ /*2*/    \n",
+	"foo	$/**/ /*-------------*/       /*----\n*/bar       $/*  \n*/baa",
 }