From cfa93ba51fb62252633c62ddc9351d56c18ce018 Mon Sep 17 00:00:00 2001 From: Robert Griesemer Date: Thu, 7 Mar 2019 17:44:50 -0800 Subject: [PATCH] math/big: add support for underscores '_' in numbers The primary change is in nat.scan which now accepts underscores for base 0. While at it, streamlined error handling in that function as well. Also, improved the corresponding test significantly by checking the expected result values also in case of scan errors. The second major change is in scanExponent which now accepts underscores when the new sepOk argument is set. While at it, essentially rewrote that function to match error and underscore handling of nat.scan more closely. Added a new test for scanExponent which until now was only tested indirectly. Finally, updated the documentation for several functions and added many new test cases to clients of nat.scan. A major portion of this CL is due to much better test coverage. Updates #28493. Change-Id: I7f17b361b633fbe6c798619d891bd5e0a045b5c5 Reviewed-on: https://go-review.googlesource.com/c/go/+/166157 Reviewed-by: Emmanuel Odeke --- src/math/big/floatconv.go | 37 +++--- src/math/big/floatconv_test.go | 25 ++++ src/math/big/int.go | 16 ++- src/math/big/intconv_test.go | 17 +++ src/math/big/natconv.go | 201 ++++++++++++++++++--------------- src/math/big/natconv_test.go | 178 +++++++++++++++++------------ src/math/big/ratconv.go | 115 +++++++++++-------- src/math/big/ratconv_test.go | 121 +++++++++++++++++--- 8 files changed, 464 insertions(+), 246 deletions(-) diff --git a/src/math/big/floatconv.go b/src/math/big/floatconv.go index b685b2a288..88216f5600 100644 --- a/src/math/big/floatconv.go +++ b/src/math/big/floatconv.go @@ -55,7 +55,7 @@ func (z *Float) scan(r io.ByteScanner, base int) (f *Float, b int, err error) { // exponent var exp int64 var ebase int - exp, ebase, err = scanExponent(r, true) + exp, ebase, err = scanExponent(r, true, base == 0) if err != nil { return } @@ -216,20 +216,29 @@ func (z *Float) pow5(n uint64) *Float { // point number with a mantissa in the given conversion base (the exponent // is always a decimal number), or a string representing an infinite value. // +// For base 0, an underscore character ``_'' may appear between a base +// prefix and an adjacent digit, and between successive digits; such +// underscores do not change the value of the number, or the returned +// digit count. Incorrect placement of underscores is reported as an +// error if there are no other errors. If base != 0, underscores are +// not recognized and thus terminate scanning like any other character +// that is not a valid radix point or digit. +// // It sets z to the (possibly rounded) value of the corresponding floating- // point value, and returns z, the actual base b, and an error err, if any. // The entire string (not just a prefix) must be consumed for success. // If z's precision is 0, it is changed to 64 before rounding takes effect. // The number must be of the form: // -// number = [ sign ] [ prefix ] mantissa [ exponent ] | infinity . -// sign = "+" | "-" . -// prefix = "0" ( "b" | "B" | "o" | "O" | "x" | "X" ) . -// mantissa = digits | digits "." [ digits ] | "." digits . -// exponent = ( "e" | "E" | "p" | "P" ) [ sign ] digits . -// digits = digit { digit } . -// digit = "0" ... "9" | "a" ... "z" | "A" ... "Z" . -// infinity = [ sign ] ( "inf" | "Inf" ) . +// number = [ sign ] ( float | "inf" | "Inf" ) . +// sign = "+" | "-" . +// float = ( mantissa | prefix pmantissa ) [ exponent ] . +// prefix = "0" [ "b" | "B" | "o" | "O" | "x" | "X" ] . +// mantissa = digits "." [ digits ] | digits | "." digits . +// pmantissa = [ "_" ] digits "." [ digits ] | [ "_" ] digits | "." digits . +// exponent = ( "e" | "E" | "p" | "P" ) [ sign ] digits . +// digits = digit { [ "_" ] digit } . +// digit = "0" ... "9" | "a" ... "z" | "A" ... "Z" . // // The base argument must be 0, 2, 8, 10, or 16. Providing an invalid base // argument will lead to a run-time panic. @@ -240,11 +249,11 @@ func (z *Float) pow5(n uint64) *Float { // no prefix is accepted. The octal prefix "0" is not supported (a leading // "0" is simply considered a "0"). // -// A "p" or "P" exponent indicates a binary (rather then decimal) exponent; -// for instance "0x1.fffffffffffffp1023" (using base 0) represents the -// maximum float64 value. For hexadecimal mantissae, the exponent must -// be binary, if present (an "e" or "E" exponent indicator cannot be -// distinguished from a mantissa digit). +// A "p" or "P" exponent indicates a base 2 (rather then base 10) exponent; +// for instance, "0x1.fffffffffffffp1023" (using base 0) represents the +// maximum float64 value. For hexadecimal mantissae, the exponent character +// must be one of 'p' or 'P', if present (an "e" or "E" exponent indicator +// cannot be distinguished from a mantissa digit). // // The returned *Float f is nil and the value of z is valid but not // defined if an error is reported. diff --git a/src/math/big/floatconv_test.go b/src/math/big/floatconv_test.go index f32dd8928b..c6c6ba63e5 100644 --- a/src/math/big/floatconv_test.go +++ b/src/math/big/floatconv_test.go @@ -72,6 +72,21 @@ func TestFloatSetFloat64String(t *testing.T) { {"infinity", nan}, {"foobar", nan}, + // invalid underscores + {"_", nan}, + {"0_", nan}, + {"1__0", nan}, + {"123_.", nan}, + {"123._", nan}, + {"123._4", nan}, + {"1_2.3_4_", nan}, + {"_.123", nan}, + {"_123.456", nan}, + {"10._0", nan}, + {"10.0e_0", nan}, + {"10.0e0_", nan}, + {"0P-0__0", nan}, + // misc decimal values {"3.14159265", 3.14159265}, {"-687436.79457e-245", -687436.79457e-245}, @@ -142,6 +157,16 @@ func TestFloatSetFloat64String(t *testing.T) { {"-0X0.00008P+16", -0.5}, {"0x0.0000000000001p-1022", math.SmallestNonzeroFloat64}, {"0x1.fffffffffffffp1023", math.MaxFloat64}, + + // underscores + {"0_0", 0}, + {"1_000.", 1000}, + {"1_2_3.4_5_6", 123.456}, + {"1.0e0_0", 1}, + {"1p+1_0", 1024}, + {"0b_1000", 0x8}, + {"0b_1011_1101", 0xbd}, + {"0x_f0_0d_1eP+0_8", 0xf00d1e00}, } { var x Float x.SetPrec(53) diff --git a/src/math/big/int.go b/src/math/big/int.go index eb0285c48f..afad1bc961 100644 --- a/src/math/big/int.go +++ b/src/math/big/int.go @@ -401,16 +401,24 @@ func (x *Int) IsUint64() bool { // (not just a prefix) must be valid for success. If SetString fails, // the value of z is undefined but the returned value is nil. // -// The base argument must be 0 or a value between 2 and MaxBase. If the base -// is 0, the string prefix determines the actual conversion base. A prefix of -// ``0x'' or ``0X'' selects base 16; the ``0'' prefix selects base 8, and a -// ``0b'' or ``0B'' prefix selects base 2. Otherwise the selected base is 10. +// The base argument must be 0 or a value between 2 and MaxBase. +// For base 0, the number prefix determines the actual base: A prefix of +// ``0b'' or ``0B'' selects base 2, ``0'', ``0o'' or ``0O'' selects base 8, +// and ``0x'' or ``0X'' selects base 16. Otherwise, the selected base is 10 +// and no prefix is accepted. // // For bases <= 36, lower and upper case letters are considered the same: // The letters 'a' to 'z' and 'A' to 'Z' represent digit values 10 to 35. // For bases > 36, the upper case letters 'A' to 'Z' represent the digit // values 36 to 61. // +// For base 0, an underscore character ``_'' may appear between a base +// prefix and an adjacent digit, and between successive digits; such +// underscores do not change the value of the number. +// Incorrect placement of underscores is reported as an error if there +// are no other errors. If base != 0, underscores are not recognized +// and act like any other character that is not a valid digit. +// func (z *Int) SetString(s string, base int) (*Int, bool) { return z.setFromScanner(strings.NewReader(s), base) } diff --git a/src/math/big/intconv_test.go b/src/math/big/intconv_test.go index d625b6aa3d..5ba29263a6 100644 --- a/src/math/big/intconv_test.go +++ b/src/math/big/intconv_test.go @@ -34,6 +34,16 @@ var stringTests = []struct { {in: "0xg", base: 0}, {in: "g", base: 16}, + // invalid inputs with separators + // (smoke tests only - a comprehensive set of tests is in natconv_test.go) + {in: "_"}, + {in: "0_"}, + {in: "_0"}, + {in: "-1__0"}, + {in: "0x10_"}, + {in: "1_000", base: 10}, // separators are not permitted for bases != 0 + {in: "d_e_a_d", base: 16}, + // valid inputs {"0", "0", 0, 0, true}, {"0", "0", 10, 0, true}, @@ -67,6 +77,13 @@ var stringTests = []struct { {"A", "A", 37, 36, true}, {"ABCXYZ", "abcxyz", 36, 623741435, true}, {"ABCXYZ", "ABCXYZ", 62, 33536793425, true}, + + // valid input with separators + // (smoke tests only - a comprehensive set of tests is in natconv_test.go) + {"1_000", "1000", 0, 1000, true}, + {"0b_1010", "10", 0, 10, true}, + {"+0o_660", "432", 0, 0660, true}, + {"-0xF00D_1E", "-15731998", 0, -0xf00d1e, true}, } func TestIntText(t *testing.T) { diff --git a/src/math/big/natconv.go b/src/math/big/natconv.go index c3c4115097..42d1cccf6f 100644 --- a/src/math/big/natconv.go +++ b/src/math/big/natconv.go @@ -55,16 +55,31 @@ func pow(x Word, n int) (p Word) { return } +// scan errors +var ( + errNoDigits = errors.New("number has no digits") + errInvalSep = errors.New("'_' must separate successive digits") +) + // scan scans the number corresponding to the longest possible prefix // from r representing an unsigned number in a given conversion base. -// It returns the corresponding natural number res, the actual base b, +// scan returns the corresponding natural number res, the actual base b, // a digit count, and a read or syntax error err, if any. // -// number = [ prefix ] mantissa . -// prefix = "0" [ "b" | "B" | "o" | "O" | "x" | "X" ] . -// mantissa = digits | digits "." [ digits ] | "." digits . -// digits = digit { digit } . -// digit = "0" ... "9" | "a" ... "z" | "A" ... "Z" . +// For base 0, an underscore character ``_'' may appear between a base +// prefix and an adjacent digit, and between successive digits; such +// underscores do not change the value of the number, or the returned +// digit count. Incorrect placement of underscores is reported as an +// error if there are no other errors. If base != 0, underscores are +// not recognized and thus terminate scanning like any other character +// that is not a valid radix point or digit. +// +// number = mantissa | prefix pmantissa . +// prefix = "0" [ "b" | "B" | "o" | "O" | "x" | "X" ] . +// mantissa = digits "." [ digits ] | digits | "." digits . +// pmantissa = [ "_" ] digits "." [ digits ] | [ "_" ] digits | "." digits . +// digits = digit { [ "_" ] digit } . +// digit = "0" ... "9" | "a" ... "z" | "A" ... "Z" . // // Unless fracOk is set, the base argument must be 0 or a value between // 2 and MaxBase. If fracOk is set, the base argument must be one of @@ -92,53 +107,51 @@ func pow(x Word, n int) (p Word) { // In this case, the actual value of the scanned number is res * b**count. // func (z nat) scan(r io.ByteScanner, base int, fracOk bool) (res nat, b, count int, err error) { - // reject illegal bases + // reject invalid bases baseOk := base == 0 || !fracOk && 2 <= base && base <= MaxBase || fracOk && (base == 2 || base == 8 || base == 10 || base == 16) if !baseOk { - panic(fmt.Sprintf("illegal number base %d", base)) + panic(fmt.Sprintf("invalid number base %d", base)) } + // prev encodes the previously seen char: it is one + // of '_', '0' (a digit), or '.' (anything else). A + // valid separator '_' may only occur after a digit + // and if base == 0. + prev := '.' + invalSep := false + // one char look-ahead ch, err := r.ReadByte() - if err != nil { - return // io.EOF is also an error in this case - } // determine actual base b, prefix := base, 0 if base == 0 { // actual base is 10 unless there's a base prefix b = 10 - if ch == '0' { + if err == nil && ch == '0' { + prev = '0' count = 1 ch, err = r.ReadByte() - if err != nil { - if err == io.EOF { - err = nil // not an error; input is "0" - res = z[:0] + if err == nil { + // possibly one of 0b, 0B, 0o, 0O, 0x, 0X + switch ch { + case 'b', 'B': + b, prefix = 2, 'b' + case 'o', 'O': + b, prefix = 8, 'o' + case 'x', 'X': + b, prefix = 16, 'x' + default: + if !fracOk { + b, prefix = 8, '0' + } } - return - } - // possibly one of 0b, 0B, 0o, 0O, 0x, 0X - switch ch { - case 'b', 'B': - b, prefix = 2, 'b' - case 'o', 'O': - b, prefix = 8, 'o' - case 'x', 'X': - b, prefix = 16, 'x' - default: - if !fracOk { - b, prefix = 8, '0' - } - } - if prefix != 0 { - count = 0 // prefix is not counted - if prefix != '0' { - if ch, err = r.ReadByte(); err != nil { - return // io.EOF is also an error in this case + if prefix != 0 { + count = 0 // prefix is not counted + if prefix != '0' { + ch, err = r.ReadByte() } } } @@ -155,76 +168,76 @@ func (z nat) scan(r io.ByteScanner, base int, fracOk bool) (res nat, b, count in di := Word(0) // 0 <= di < b1**i < bn i := 0 // 0 <= i < n dp := -1 // position of decimal point - for { - if fracOk && ch == '.' { + for err == nil { + if ch == '.' && fracOk { fracOk = false + if prev == '_' { + invalSep = true + } + prev = '.' dp = count - // advance - if ch, err = r.ReadByte(); err != nil { - if err == io.EOF { - err = nil - break + } else if ch == '_' && base == 0 { + if prev != '0' { + invalSep = true + } + prev = '_' + } else { + // convert rune into digit value d1 + var d1 Word + switch { + case '0' <= ch && ch <= '9': + d1 = Word(ch - '0') + case 'a' <= ch && ch <= 'z': + d1 = Word(ch - 'a' + 10) + case 'A' <= ch && ch <= 'Z': + if b <= maxBaseSmall { + d1 = Word(ch - 'A' + 10) + } else { + d1 = Word(ch - 'A' + maxBaseSmall) } - return + default: + d1 = MaxBase + 1 } - } - - // convert rune into digit value d1 - var d1 Word - switch { - case '0' <= ch && ch <= '9': - d1 = Word(ch - '0') - case 'a' <= ch && ch <= 'z': - d1 = Word(ch - 'a' + 10) - case 'A' <= ch && ch <= 'Z': - if b <= maxBaseSmall { - d1 = Word(ch - 'A' + 10) - } else { - d1 = Word(ch - 'A' + maxBaseSmall) - } - default: - d1 = MaxBase + 1 - } - if d1 >= b1 { - r.UnreadByte() // ch does not belong to number anymore - break - } - count++ - - // collect d1 in di - di = di*b1 + d1 - i++ - - // if di is "full", add it to the result - if i == n { - z = z.mulAddWW(z, bn, di) - di = 0 - i = 0 - } - - // advance - if ch, err = r.ReadByte(); err != nil { - if err == io.EOF { - err = nil + if d1 >= b1 { + r.UnreadByte() // ch does not belong to number anymore break } - return + prev = '0' + count++ + + // collect d1 in di + di = di*b1 + d1 + i++ + + // if di is "full", add it to the result + if i == n { + z = z.mulAddWW(z, bn, di) + di = 0 + i = 0 + } } + + ch, err = r.ReadByte() + } + + if err == io.EOF { + err = nil + } + + // other errors take precedence over invalid separators + if err == nil && (invalSep || prev == '_') { + err = errInvalSep } if count == 0 { // no digits found if prefix == '0' { - // there was only the octal prefix 0 (possibly followed by digits > 7); - // count as one digit and return base 10, not 8 - count = 1 - b = 10 - } else { - err = errors.New("syntax error scanning number") + // there was only the octal prefix 0 (possibly followed by separators and digits > 7); + // interpret as decimal 0 + return z[:0], 10, 1, err } - return + err = errNoDigits // fall through; result will be 0 } - // count > 0 // add remaining digits to result if i > 0 { @@ -232,9 +245,9 @@ func (z nat) scan(r io.ByteScanner, base int, fracOk bool) (res nat, b, count in } res = z.norm() - // adjust for fraction, if any + // adjust count for fraction, if any if dp >= 0 { - // 0 <= dp <= count > 0 + // 0 <= dp <= count count = dp - count } diff --git a/src/math/big/natconv_test.go b/src/math/big/natconv_test.go index 645e2b8434..9c2acca07e 100644 --- a/src/math/big/natconv_test.go +++ b/src/math/big/natconv_test.go @@ -109,92 +109,126 @@ var natScanTests = []struct { x nat // expected nat b int // expected base count int // expected digit count - ok bool // expected success + err error // expected error next rune // next character (or 0, if at EOF) }{ - // invalid: no mantissa - {}, - {s: "?"}, - {base: 10}, - {base: 36}, - {base: 62}, - {s: "?", base: 10}, - {s: "0b"}, - {s: "0o"}, - {s: "0x"}, - {s: "0b2"}, - {s: "0B2"}, - {s: "0o8"}, - {s: "0O8"}, - {s: "0xg"}, - {s: "0Xg"}, - {s: "345", base: 2}, + // invalid: no digits + {"", 0, false, nil, 10, 0, errNoDigits, 0}, + {"_", 0, false, nil, 10, 0, errNoDigits, 0}, + {"?", 0, false, nil, 10, 0, errNoDigits, '?'}, + {"?", 10, false, nil, 10, 0, errNoDigits, '?'}, + {"", 10, false, nil, 10, 0, errNoDigits, 0}, + {"", 36, false, nil, 36, 0, errNoDigits, 0}, + {"", 62, false, nil, 62, 0, errNoDigits, 0}, + {"0b", 0, false, nil, 2, 0, errNoDigits, 0}, + {"0o", 0, false, nil, 8, 0, errNoDigits, 0}, + {"0x", 0, false, nil, 16, 0, errNoDigits, 0}, + {"0x_", 0, false, nil, 16, 0, errNoDigits, 0}, + {"0b2", 0, false, nil, 2, 0, errNoDigits, '2'}, + {"0B2", 0, false, nil, 2, 0, errNoDigits, '2'}, + {"0o8", 0, false, nil, 8, 0, errNoDigits, '8'}, + {"0O8", 0, false, nil, 8, 0, errNoDigits, '8'}, + {"0xg", 0, false, nil, 16, 0, errNoDigits, 'g'}, + {"0Xg", 0, false, nil, 16, 0, errNoDigits, 'g'}, + {"345", 2, false, nil, 2, 0, errNoDigits, '3'}, // invalid: incorrect use of decimal point - {s: ".0"}, - {s: ".0", base: 10}, - {s: ".", base: 0}, - {s: "0x.0"}, + {"._", 0, true, nil, 10, 0, errNoDigits, 0}, + {".0", 0, false, nil, 10, 0, errNoDigits, '.'}, + {".0", 10, false, nil, 10, 0, errNoDigits, '.'}, + {".", 0, true, nil, 10, 0, errNoDigits, 0}, + {"0x.", 0, true, nil, 16, 0, errNoDigits, 0}, + {"0x.g", 0, true, nil, 16, 0, errNoDigits, 'g'}, + {"0x.0", 0, false, nil, 16, 0, errNoDigits, '.'}, + + // invalid: incorrect use of separators + {"_0", 0, false, nil, 10, 1, errInvalSep, 0}, + {"0_", 0, false, nil, 10, 1, errInvalSep, 0}, + {"0__0", 0, false, nil, 8, 1, errInvalSep, 0}, + {"0x___0", 0, false, nil, 16, 1, errInvalSep, 0}, + {"0_x", 0, false, nil, 10, 1, errInvalSep, 'x'}, + {"0_8", 0, false, nil, 10, 1, errInvalSep, '8'}, + {"123_.", 0, true, nat{123}, 10, 0, errInvalSep, 0}, + {"._123", 0, true, nat{123}, 10, -3, errInvalSep, 0}, + {"0b__1000", 0, false, nat{0x8}, 2, 4, errInvalSep, 0}, + {"0o60___0", 0, false, nat{0600}, 8, 3, errInvalSep, 0}, + {"0466_", 0, false, nat{0466}, 8, 3, errInvalSep, 0}, + {"01234567_8", 0, false, nat{01234567}, 8, 7, errInvalSep, '8'}, + {"1_.", 0, true, nat{1}, 10, 0, errInvalSep, 0}, + {"0._1", 0, true, nat{1}, 10, -1, errInvalSep, 0}, + {"2.7_", 0, true, nat{27}, 10, -1, errInvalSep, 0}, + {"0x1.0_", 0, true, nat{0x10}, 16, -1, errInvalSep, 0}, + + // valid: separators are not accepted for base != 0 + {"0_", 10, false, nil, 10, 1, nil, '_'}, + {"1__0", 10, false, nat{1}, 10, 1, nil, '_'}, + {"0__8", 10, false, nil, 10, 1, nil, '_'}, + {"xy_z_", 36, false, nat{33*36 + 34}, 36, 2, nil, '_'}, // valid, no decimal point - {"0", 0, false, nil, 10, 1, true, 0}, - {"0", 10, false, nil, 10, 1, true, 0}, - {"0", 36, false, nil, 36, 1, true, 0}, - {"0", 62, false, nil, 62, 1, true, 0}, - {"1", 0, false, nat{1}, 10, 1, true, 0}, - {"1", 10, false, nat{1}, 10, 1, true, 0}, - {"0 ", 0, false, nil, 10, 1, true, ' '}, - {"00 ", 0, false, nil, 8, 1, true, ' '}, // octal 0 - {"0b1", 0, false, nat{1}, 2, 1, true, 0}, - {"0B11000101", 0, false, nat{0xc5}, 2, 8, true, 0}, - {"0B110001012", 0, false, nat{0xc5}, 2, 8, true, '2'}, - {"07", 0, false, nat{7}, 8, 1, true, 0}, - {"08", 0, false, nil, 10, 1, true, '8'}, - {"08", 10, false, nat{8}, 10, 2, true, 0}, - {"018", 0, false, nat{1}, 8, 1, true, '8'}, - {"0o7", 0, false, nat{7}, 8, 1, true, 0}, - {"0o18", 0, false, nat{1}, 8, 1, true, '8'}, - {"0O17", 0, false, nat{017}, 8, 2, true, 0}, - {"03271", 0, false, nat{03271}, 8, 4, true, 0}, - {"10ab", 0, false, nat{10}, 10, 2, true, 'a'}, - {"1234567890", 0, false, nat{1234567890}, 10, 10, true, 0}, - {"A", 36, false, nat{10}, 36, 1, true, 0}, - {"A", 37, false, nat{36}, 37, 1, true, 0}, - {"xyz", 36, false, nat{(33*36+34)*36 + 35}, 36, 3, true, 0}, - {"XYZ?", 36, false, nat{(33*36+34)*36 + 35}, 36, 3, true, '?'}, - {"XYZ?", 62, false, nat{(59*62+60)*62 + 61}, 62, 3, true, '?'}, - {"0x", 16, false, nil, 16, 1, true, 'x'}, - {"0xdeadbeef", 0, false, nat{0xdeadbeef}, 16, 8, true, 0}, - {"0XDEADBEEF", 0, false, nat{0xdeadbeef}, 16, 8, true, 0}, + {"0", 0, false, nil, 10, 1, nil, 0}, + {"0", 36, false, nil, 36, 1, nil, 0}, + {"0", 62, false, nil, 62, 1, nil, 0}, + {"1", 0, false, nat{1}, 10, 1, nil, 0}, + {"1", 10, false, nat{1}, 10, 1, nil, 0}, + {"0 ", 0, false, nil, 10, 1, nil, ' '}, + {"00 ", 0, false, nil, 8, 1, nil, ' '}, // octal 0 + {"0b1", 0, false, nat{1}, 2, 1, nil, 0}, + {"0B11000101", 0, false, nat{0xc5}, 2, 8, nil, 0}, + {"0B110001012", 0, false, nat{0xc5}, 2, 8, nil, '2'}, + {"07", 0, false, nat{7}, 8, 1, nil, 0}, + {"08", 0, false, nil, 10, 1, nil, '8'}, + {"08", 10, false, nat{8}, 10, 2, nil, 0}, + {"018", 0, false, nat{1}, 8, 1, nil, '8'}, + {"0o7", 0, false, nat{7}, 8, 1, nil, 0}, + {"0o18", 0, false, nat{1}, 8, 1, nil, '8'}, + {"0O17", 0, false, nat{017}, 8, 2, nil, 0}, + {"03271", 0, false, nat{03271}, 8, 4, nil, 0}, + {"10ab", 0, false, nat{10}, 10, 2, nil, 'a'}, + {"1234567890", 0, false, nat{1234567890}, 10, 10, nil, 0}, + {"A", 36, false, nat{10}, 36, 1, nil, 0}, + {"A", 37, false, nat{36}, 37, 1, nil, 0}, + {"xyz", 36, false, nat{(33*36+34)*36 + 35}, 36, 3, nil, 0}, + {"XYZ?", 36, false, nat{(33*36+34)*36 + 35}, 36, 3, nil, '?'}, + {"XYZ?", 62, false, nat{(59*62+60)*62 + 61}, 62, 3, nil, '?'}, + {"0x", 16, false, nil, 16, 1, nil, 'x'}, + {"0xdeadbeef", 0, false, nat{0xdeadbeef}, 16, 8, nil, 0}, + {"0XDEADBEEF", 0, false, nat{0xdeadbeef}, 16, 8, nil, 0}, // valid, with decimal point - {"0.", 0, false, nil, 10, 1, true, '.'}, - {"0.", 10, true, nil, 10, 0, true, 0}, - {"0.1.2", 10, true, nat{1}, 10, -1, true, '.'}, - {".000", 10, true, nil, 10, -3, true, 0}, - {"12.3", 10, true, nat{123}, 10, -1, true, 0}, - {"012.345", 10, true, nat{12345}, 10, -3, true, 0}, - {"0.1", 0, true, nat{1}, 10, -1, true, 0}, - {"0.1", 2, true, nat{1}, 2, -1, true, 0}, - {"0.12", 2, true, nat{1}, 2, -1, true, '2'}, - {"0b0.1", 0, true, nat{1}, 2, -1, true, 0}, - {"0B0.12", 0, true, nat{1}, 2, -1, true, '2'}, - {"0o0.7", 0, true, nat{7}, 8, -1, true, 0}, - {"0O0.78", 0, true, nat{7}, 8, -1, true, '8'}, + {"0.", 0, false, nil, 10, 1, nil, '.'}, + {"0.", 10, true, nil, 10, 0, nil, 0}, + {"0.1.2", 10, true, nat{1}, 10, -1, nil, '.'}, + {".000", 10, true, nil, 10, -3, nil, 0}, + {"12.3", 10, true, nat{123}, 10, -1, nil, 0}, + {"012.345", 10, true, nat{12345}, 10, -3, nil, 0}, + {"0.1", 0, true, nat{1}, 10, -1, nil, 0}, + {"0.1", 2, true, nat{1}, 2, -1, nil, 0}, + {"0.12", 2, true, nat{1}, 2, -1, nil, '2'}, + {"0b0.1", 0, true, nat{1}, 2, -1, nil, 0}, + {"0B0.12", 0, true, nat{1}, 2, -1, nil, '2'}, + {"0o0.7", 0, true, nat{7}, 8, -1, nil, 0}, + {"0O0.78", 0, true, nat{7}, 8, -1, nil, '8'}, + {"0xdead.beef", 0, true, nat{0xdeadbeef}, 16, -4, nil, 0}, + + // valid, with separators + {"1_000", 0, false, nat{1000}, 10, 4, nil, 0}, + {"0_466", 0, false, nat{0466}, 8, 3, nil, 0}, + {"0o_600", 0, false, nat{0600}, 8, 3, nil, 0}, + {"0x_f0_0d", 0, false, nat{0xf00d}, 16, 4, nil, 0}, + {"0b1000_0001", 0, false, nat{0x81}, 2, 8, nil, 0}, + {"1_000.000_1", 0, true, nat{10000001}, 10, -4, nil, 0}, + {"0x_f00d.1e", 0, true, nat{0xf00d1e}, 16, -2, nil, 0}, + {"0x_f00d.1E2", 0, true, nat{0xf00d1e2}, 16, -3, nil, 0}, + {"0x_f00d.1eg", 0, true, nat{0xf00d1e}, 16, -2, nil, 'g'}, } func TestScanBase(t *testing.T) { for _, a := range natScanTests { r := strings.NewReader(a.s) x, b, count, err := nat(nil).scan(r, a.base, a.frac) - if err == nil && !a.ok { - t.Errorf("scan%+v\n\texpected error", a) - } - if err != nil { - if a.ok { - t.Errorf("scan%+v\n\tgot error = %s", a, err) - } - continue + if err != a.err { + t.Errorf("scan%+v\n\tgot error = %v; want %v", a, err, a.err) } if x.cmp(a.x) != 0 { t.Errorf("scan%+v\n\tgot z = %v; want %v", a, x, a.x) diff --git a/src/math/big/ratconv.go b/src/math/big/ratconv.go index bd2509f168..07288ca94f 100644 --- a/src/math/big/ratconv.go +++ b/src/math/big/ratconv.go @@ -87,7 +87,7 @@ func (z *Rat) SetString(s string) (*Rat, bool) { // exponent var exp int64 - exp, _, err = scanExponent(r, false) + exp, _, err = scanExponent(r, false, false) if err != nil { return nil, false } @@ -129,75 +129,96 @@ func (z *Rat) SetString(s string) (*Rat, bool) { return z, true } -// scanExponent scans the longest possible prefix of r representing a decimal -// ('e', 'E') or binary ('p', 'P') exponent, if any. It returns the exponent, -// the exponent base (10 or 2), or a read or syntax error, if any. +// scanExponent scans the longest possible prefix of r representing a base 10 +// (``e'', ``E'') or a base 2 (``p'', ``P'') exponent, if any. It returns the +// exponent, the exponent base (10 or 2), or a read or syntax error, if any. +// +// If sepOk is set, an underscore character ``_'' may appear between successive +// exponent digits; such underscores do not change the value of the exponent. +// Incorrect placement of underscores is reported as an error if there are no +// other errors. If sepOk is not set, underscores are not recognized and thus +// terminate scanning like any other character that is not a valid digit. // // exponent = ( "e" | "E" | "p" | "P" ) [ sign ] digits . // sign = "+" | "-" . -// digits = digit { digit } . +// digits = digit { [ '_' ] digit } . // digit = "0" ... "9" . // -// A binary exponent is only permitted if binExpOk is set. -func scanExponent(r io.ByteScanner, binExpOk bool) (exp int64, base int, err error) { - base = 10 - - var ch byte - if ch, err = r.ReadByte(); err != nil { +// A base 2 exponent is only permitted if base2ok is set. +func scanExponent(r io.ByteScanner, base2ok, sepOk bool) (exp int64, base int, err error) { + // one char look-ahead + ch, err := r.ReadByte() + if err != nil { if err == io.EOF { - err = nil // no exponent; same as e0 + err = nil } - return + return 0, 10, err } + // exponent char switch ch { case 'e', 'E': - // ok + base = 10 case 'p', 'P': - if binExpOk { + if base2ok { base = 2 break // ok } fallthrough // binary exponent not permitted default: - r.UnreadByte() - return // no exponent; same as e0 - } - - var neg bool - if neg, err = scanSign(r); err != nil { - return + r.UnreadByte() // ch does not belong to exponent anymore + return 0, 10, nil } + // sign var digits []byte - if neg { - digits = append(digits, '-') + ch, err = r.ReadByte() + if err == nil && (ch == '+' || ch == '-') { + if ch == '-' { + digits = append(digits, '-') + } + ch, err = r.ReadByte() } - // no need to use nat.scan for exponent digits - // since we only care about int64 values - the - // from-scratch scan is easy enough and faster - for i := 0; ; i++ { - if ch, err = r.ReadByte(); err != nil { - if err != io.EOF || i == 0 { - return - } - err = nil - break // i > 0 - } - if ch < '0' || '9' < ch { - if i == 0 { - r.UnreadByte() - err = fmt.Errorf("invalid exponent (missing digits)") - return - } - break // i > 0 - } - digits = append(digits, ch) - } - // i > 0 => we have at least one digit + // prev encodes the previously seen char: it is one + // of '_', '0' (a digit), or '.' (anything else). A + // valid separator '_' may only occur after a digit. + prev := '.' + invalSep := false + + // exponent value + hasDigits := false + for err == nil { + if '0' <= ch && ch <= '9' { + digits = append(digits, ch) + prev = '0' + hasDigits = true + } else if ch == '_' && sepOk { + if prev != '0' { + invalSep = true + } + prev = '_' + } else { + r.UnreadByte() // ch does not belong to number anymore + break + } + ch, err = r.ReadByte() + } + + if err == io.EOF { + err = nil + } + if err == nil && !hasDigits { + err = errNoDigits + } + if err == nil { + exp, err = strconv.ParseInt(string(digits), 10, 64) + } + // other errors take precedence over invalid separators + if err == nil && (invalSep || prev == '_') { + err = errInvalSep + } - exp, err = strconv.ParseInt(string(digits), 10, 64) return } diff --git a/src/math/big/ratconv_test.go b/src/math/big/ratconv_test.go index bdc6a3e1b0..dea4d1933a 100644 --- a/src/math/big/ratconv_test.go +++ b/src/math/big/ratconv_test.go @@ -7,25 +7,91 @@ package big import ( "bytes" "fmt" + "io" "math" "strconv" "strings" "testing" ) +var exponentTests = []struct { + s string // string to be scanned + base2ok bool // true if 'p'/'P' exponents are accepted + sepOk bool // true if '_' separators are accepted + x int64 // expected exponent + b int // expected exponent base + err error // expected error + next rune // next character (or 0, if at EOF) +}{ + // valid, without separators + {"", false, false, 0, 10, nil, 0}, + {"1", false, false, 0, 10, nil, '1'}, + {"e0", false, false, 0, 10, nil, 0}, + {"E1", false, false, 1, 10, nil, 0}, + {"e+10", false, false, 10, 10, nil, 0}, + {"e-10", false, false, -10, 10, nil, 0}, + {"e123456789a", false, false, 123456789, 10, nil, 'a'}, + {"p", false, false, 0, 10, nil, 'p'}, + {"P+100", false, false, 0, 10, nil, 'P'}, + {"p0", true, false, 0, 2, nil, 0}, + {"P-123", true, false, -123, 2, nil, 0}, + {"p+0a", true, false, 0, 2, nil, 'a'}, + {"p+123__", true, false, 123, 2, nil, '_'}, // '_' is not part of the number anymore + + // valid, with separators + {"e+1_0", false, true, 10, 10, nil, 0}, + {"e-1_0", false, true, -10, 10, nil, 0}, + {"e123_456_789a", false, true, 123456789, 10, nil, 'a'}, + {"P+1_00", false, true, 0, 10, nil, 'P'}, + {"p-1_2_3", true, true, -123, 2, nil, 0}, + + // invalid: no digits + {"e", false, false, 0, 10, errNoDigits, 0}, + {"ef", false, false, 0, 10, errNoDigits, 'f'}, + {"e+", false, false, 0, 10, errNoDigits, 0}, + {"E-x", false, false, 0, 10, errNoDigits, 'x'}, + {"p", true, false, 0, 2, errNoDigits, 0}, + {"P-", true, false, 0, 2, errNoDigits, 0}, + {"p+e", true, false, 0, 2, errNoDigits, 'e'}, + {"e+_x", false, true, 0, 10, errNoDigits, 'x'}, + + // invalid: incorrect use of separator + {"e0_", false, true, 0, 10, errInvalSep, 0}, + {"e_0", false, true, 0, 10, errInvalSep, 0}, + {"e-1_2__3", false, true, -123, 10, errInvalSep, 0}, +} + +func TestScanExponent(t *testing.T) { + for _, a := range exponentTests { + r := strings.NewReader(a.s) + x, b, err := scanExponent(r, a.base2ok, a.sepOk) + if err != a.err { + t.Errorf("scanExponent%+v\n\tgot error = %v; want %v", a, err, a.err) + } + if x != a.x { + t.Errorf("scanExponent%+v\n\tgot z = %v; want %v", a, x, a.x) + } + if b != a.b { + t.Errorf("scanExponent%+v\n\tgot b = %d; want %d", a, b, a.b) + } + next, _, err := r.ReadRune() + if err == io.EOF { + next = 0 + err = nil + } + if err == nil && next != a.next { + t.Errorf("scanExponent%+v\n\tgot next = %q; want %q", a, next, a.next) + } + } +} + type StringTest struct { in, out string ok bool } var setStringTests = []StringTest{ - {"0", "0", true}, - {"-0", "0", true}, - {"1", "1", true}, - {"-1", "-1", true}, - {"1.", "1", true}, - {"1e0", "1", true}, - {"1.e1", "10", true}, + // invalid {in: "1e"}, {in: "1.e"}, {in: "1e+14e-5"}, @@ -33,6 +99,20 @@ var setStringTests = []StringTest{ {in: "r"}, {in: "a/b"}, {in: "a.b"}, + {in: "1/0"}, + {in: "4/3/2"}, // issue 17001 + {in: "4/3/"}, + {in: "4/3."}, + {in: "4/"}, + + // valid + {"0", "0", true}, + {"-0", "0", true}, + {"1", "1", true}, + {"-1", "-1", true}, + {"1.", "1", true}, + {"1e0", "1", true}, + {"1.e1", "10", true}, {"-0.1", "-1/10", true}, {"-.1", "-1/10", true}, {"2/4", "1/2", true}, @@ -49,24 +129,35 @@ var setStringTests = []StringTest{ {"106/141787961317645621392", "53/70893980658822810696", true}, {"204211327800791583.81095", "4084226556015831676219/20000", true}, {"0e9999999999", "0", true}, // issue #16176 - {in: "1/0"}, - {in: "4/3/2"}, // issue 17001 - {in: "4/3/"}, - {in: "4/3."}, - {in: "4/"}, } // These are not supported by fmt.Fscanf. var setStringTests2 = []StringTest{ + // invalid + {in: "4/3x"}, + + // invalid with separators + // (smoke tests only - a comprehensive set of tests is in natconv_test.go) + {in: "10_/1"}, + {in: "_10/1"}, + {in: "1/1__0"}, + {in: "1_000.0"}, // floats are base 10 which doesn't permit separators; see also issue #29799 + + // valid {"0b1000/3", "8/3", true}, {"0B1000/0x8", "1", true}, - {"-010/1", "-8", true}, // TODO(gri) should we even permit octal here? + {"-010/1", "-8", true}, {"-010.", "-10", true}, {"-0o10/1", "-8", true}, {"0x10/1", "16", true}, {"0x10/0x20", "1/2", true}, - {in: "4/3x"}, - // TODO(gri) add more tests + + // valid with separators + // (smoke tests only - a comprehensive set of tests is in natconv_test.go) + {"0b_1000/3", "8/3", true}, + {"0B_10_00/0x8", "1", true}, + {"0xdead/0B1101_1110_1010_1101", "1", true}, + {"0B1101_1110_1010_1101/0XD_E_A_D", "1", true}, } func TestRatSetString(t *testing.T) {