From 34fb5855eb73fe04ee0bcfc0d9ca8be5440a560b Mon Sep 17 00:00:00 2001 From: Robert Griesemer Date: Wed, 20 Feb 2019 10:44:52 -0800 Subject: [PATCH] text/scanner: don't liberally consume (invalid) floats or underbars This is a follow-up on https://golang.org/cl/161199 which introduced the new Go 2 number literals to text/scanner. That change introduced a bug by allowing decimal and hexadecimal floats to be consumed even if the scanner was not configured to accept floats. This CL changes the code to not consume a radix dot '.' or exponent unless the scanner is configured to accept floats. This CL also introduces a new mode "AllowNumberbars" which controls whether underbars '_' are permitted as digit separators in numbers or not. There is a possibility that we may need to refine text/scanner further (e.g., the Float mode now includes hexadecimal floats which it didn't recognize before). We're very early in the cycle, so let's see how it goes. RELNOTE=yes Updates #12711. Updates #19308. Updates #28493. Updates #29008. Fixes #30320. Change-Id: I6481d314f0384e09ef6803ffad38dc529b1e89a3 Reviewed-on: https://go-review.googlesource.com/c/163079 Reviewed-by: Ian Lance Taylor --- api/except.txt | 1 + api/next.txt | 3 ++ src/text/scanner/scanner.go | 48 ++++++++++++++++++-------------- src/text/scanner/scanner_test.go | 37 ++++++++++++++++++++++++ 4 files changed, 68 insertions(+), 21 deletions(-) diff --git a/api/except.txt b/api/except.txt index 637be18135..a608d5783e 100644 --- a/api/except.txt +++ b/api/except.txt @@ -457,3 +457,4 @@ pkg syscall (freebsd-arm-cgo), type Stat_t struct, Nlink uint16 pkg syscall (freebsd-arm-cgo), type Stat_t struct, Rdev uint32 pkg syscall (freebsd-arm-cgo), type Statfs_t struct, Mntfromname [88]int8 pkg syscall (freebsd-arm-cgo), type Statfs_t struct, Mntonname [88]int8 +pkg text/scanner, const GoTokens = 1012 \ No newline at end of file diff --git a/api/next.txt b/api/next.txt index e69de29bb2..aaea62d70b 100644 --- a/api/next.txt +++ b/api/next.txt @@ -0,0 +1,3 @@ +pkg text/scanner, const AllowNumberbars = 1024 +pkg text/scanner, const AllowNumberbars ideal-int +pkg text/scanner, const GoTokens = 2036 diff --git a/src/text/scanner/scanner.go b/src/text/scanner/scanner.go index 38c27f6a08..8db0ed28d3 100644 --- a/src/text/scanner/scanner.go +++ b/src/text/scanner/scanner.go @@ -59,15 +59,16 @@ func (pos Position) String() string { // "foo" is scanned as the token sequence '"' Ident '"'. // const ( - ScanIdents = 1 << -Ident - ScanInts = 1 << -Int - ScanFloats = 1 << -Float // includes Ints - ScanChars = 1 << -Char - ScanStrings = 1 << -String - ScanRawStrings = 1 << -RawString - ScanComments = 1 << -Comment - SkipComments = 1 << -skipComment // if set with ScanComments, comments become white space - GoTokens = ScanIdents | ScanFloats | ScanChars | ScanStrings | ScanRawStrings | ScanComments | SkipComments + ScanIdents = 1 << -Ident + ScanInts = 1 << -Int + ScanFloats = 1 << -Float // includes Ints and hexadecimal floats + ScanChars = 1 << -Char + ScanStrings = 1 << -String + ScanRawStrings = 1 << -RawString + ScanComments = 1 << -Comment + SkipComments = 1 << -skipComment // if set with ScanComments, comments become white space + AllowNumberbars = 1 << -allowNumberbars // if set, number literals may contain underbars as digit separators + GoTokens = ScanIdents | ScanFloats | ScanChars | ScanStrings | ScanRawStrings | ScanComments | SkipComments | AllowNumberbars ) // The result of Scan is one of these tokens or a Unicode character. @@ -80,7 +81,10 @@ const ( String RawString Comment + + // internal use only skipComment + allowNumberbars ) var tokenString = map[rune]string{ @@ -359,7 +363,8 @@ func lower(ch rune) rune { return ('a' - 'A') | ch } // returns lower-case c func isDecimal(ch rune) bool { return '0' <= ch && ch <= '9' } func isHex(ch rune) bool { return '0' <= ch && ch <= '9' || 'a' <= lower(ch) && lower(ch) <= 'f' } -// digits accepts the sequence { digit | '_' } starting with ch0. +// digits accepts the sequence { digit } (if AllowNumberbars is not set) +// or { digit | '_' } (if AllowNumberbars is set), starting with ch0. // If base <= 10, digits accepts any decimal digit but records // the first invalid digit >= base in *invalid if *invalid == 0. // digits returns the first rune that is not part of the sequence @@ -369,7 +374,7 @@ func (s *Scanner) digits(ch0 rune, base int, invalid *rune) (ch rune, digsep int ch = ch0 if base <= 10 { max := rune('0' + base) - for isDecimal(ch) || ch == '_' { + for isDecimal(ch) || ch == '_' && s.Mode&AllowNumberbars != 0 { ds := 1 if ch == '_' { ds = 2 @@ -380,7 +385,7 @@ func (s *Scanner) digits(ch0 rune, base int, invalid *rune) (ch rune, digsep int ch = s.next() } } else { - for isHex(ch) || ch == '_' { + for isHex(ch) || ch == '_' && s.Mode&AllowNumberbars != 0 { ds := 1 if ch == '_' { ds = 2 @@ -392,7 +397,7 @@ func (s *Scanner) digits(ch0 rune, base int, invalid *rune) (ch rune, digsep int return } -func (s *Scanner) scanNumber(ch rune, integerPart bool) (rune, rune) { +func (s *Scanner) scanNumber(ch rune, seenDot bool) (rune, rune) { base := 10 // number base prefix := rune(0) // one of 0 (decimal), '0' (0-octal), 'x', 'o', or 'b' digsep := 0 // bit 0: digit present, bit 1: '_' present @@ -401,7 +406,7 @@ func (s *Scanner) scanNumber(ch rune, integerPart bool) (rune, rune) { // integer part var tok rune var ds int - if integerPart { + if !seenDot { tok = Int if ch == '0' { ch = s.next() @@ -422,17 +427,18 @@ func (s *Scanner) scanNumber(ch rune, integerPart bool) (rune, rune) { } ch, ds = s.digits(ch, base, &invalid) digsep |= ds + if ch == '.' && s.Mode&ScanFloats != 0 { + ch = s.next() + seenDot = true + } } // fractional part - if !integerPart || ch == '.' { + if seenDot { tok = Float if prefix == 'o' || prefix == 'b' { s.error("invalid radix point in " + litname(prefix)) } - if ch == '.' { - ch = s.next() - } ch, ds = s.digits(ch, base, &invalid) digsep |= ds } @@ -442,7 +448,7 @@ func (s *Scanner) scanNumber(ch rune, integerPart bool) (rune, rune) { } // exponent - if e := lower(ch); e == 'e' || e == 'p' { + if e := lower(ch); (e == 'e' || e == 'p') && s.Mode&ScanFloats != 0 { switch { case e == 'e' && prefix != 0 && prefix != '0': s.errorf("%q exponent requires decimal mantissa", ch) @@ -682,7 +688,7 @@ redo: } case isDecimal(ch): if s.Mode&(ScanInts|ScanFloats) != 0 { - tok, ch = s.scanNumber(ch, true) + tok, ch = s.scanNumber(ch, false) } else { ch = s.next() } @@ -705,7 +711,7 @@ redo: case '.': ch = s.next() if isDecimal(ch) && s.Mode&ScanFloats != 0 { - tok, ch = s.scanNumber(ch, false) + tok, ch = s.scanNumber(ch, true) } case '/': ch = s.next() diff --git a/src/text/scanner/scanner_test.go b/src/text/scanner/scanner_test.go index 58db8e1971..6ae8fd9a08 100644 --- a/src/text/scanner/scanner_test.go +++ b/src/text/scanner/scanner_test.go @@ -877,3 +877,40 @@ func TestNumbers(t *testing.T) { } } } + +func TestIssue30320(t *testing.T) { + for _, test := range []struct { + in, want string + mode uint + }{ + {"foo01.bar31.xx-0-1-1-0", "01 31 0 1 1 0", ScanInts}, + {"foo0/12/0/5.67", "0 12 0 5 67", ScanInts}, + {"xxx1e0yyy", "1 0", ScanInts}, + {"1_2", "1 2", ScanInts}, // don't consume _ as part of a number if not explicitly enabled + {"1_2", "1_2", ScanInts | AllowNumberbars}, + {"xxx1.0yyy2e3ee", "1 0 2 3", ScanInts}, + {"xxx1.0yyy2e3ee", "1.0 2e3", ScanFloats}, + } { + got := extractInts(test.in, test.mode) + if got != test.want { + t.Errorf("%q: got %q; want %q", test.in, got, test.want) + } + } +} + +func extractInts(t string, mode uint) (res string) { + var s Scanner + s.Init(strings.NewReader(t)) + s.Mode = mode + for { + switch tok := s.Scan(); tok { + case Int, Float: + if len(res) > 0 { + res += " " + } + res += s.TokenText() + case EOF: + return + } + } +}