diff --git a/src/math/big/floatconv.go b/src/math/big/floatconv.go index b685b2a288..88216f5600 100644 --- a/src/math/big/floatconv.go +++ b/src/math/big/floatconv.go @@ -55,7 +55,7 @@ func (z *Float) scan(r io.ByteScanner, base int) (f *Float, b int, err error) { // exponent var exp int64 var ebase int - exp, ebase, err = scanExponent(r, true) + exp, ebase, err = scanExponent(r, true, base == 0) if err != nil { return } @@ -216,20 +216,29 @@ func (z *Float) pow5(n uint64) *Float { // point number with a mantissa in the given conversion base (the exponent // is always a decimal number), or a string representing an infinite value. // +// For base 0, an underscore character ``_'' may appear between a base +// prefix and an adjacent digit, and between successive digits; such +// underscores do not change the value of the number, or the returned +// digit count. Incorrect placement of underscores is reported as an +// error if there are no other errors. If base != 0, underscores are +// not recognized and thus terminate scanning like any other character +// that is not a valid radix point or digit. +// // It sets z to the (possibly rounded) value of the corresponding floating- // point value, and returns z, the actual base b, and an error err, if any. // The entire string (not just a prefix) must be consumed for success. // If z's precision is 0, it is changed to 64 before rounding takes effect. // The number must be of the form: // -// number = [ sign ] [ prefix ] mantissa [ exponent ] | infinity . -// sign = "+" | "-" . -// prefix = "0" ( "b" | "B" | "o" | "O" | "x" | "X" ) . -// mantissa = digits | digits "." [ digits ] | "." digits . -// exponent = ( "e" | "E" | "p" | "P" ) [ sign ] digits . -// digits = digit { digit } . -// digit = "0" ... "9" | "a" ... "z" | "A" ... "Z" . -// infinity = [ sign ] ( "inf" | "Inf" ) . +// number = [ sign ] ( float | "inf" | "Inf" ) . +// sign = "+" | "-" . +// float = ( mantissa | prefix pmantissa ) [ exponent ] . +// prefix = "0" [ "b" | "B" | "o" | "O" | "x" | "X" ] . +// mantissa = digits "." [ digits ] | digits | "." digits . +// pmantissa = [ "_" ] digits "." [ digits ] | [ "_" ] digits | "." digits . +// exponent = ( "e" | "E" | "p" | "P" ) [ sign ] digits . +// digits = digit { [ "_" ] digit } . +// digit = "0" ... "9" | "a" ... "z" | "A" ... "Z" . // // The base argument must be 0, 2, 8, 10, or 16. Providing an invalid base // argument will lead to a run-time panic. @@ -240,11 +249,11 @@ func (z *Float) pow5(n uint64) *Float { // no prefix is accepted. The octal prefix "0" is not supported (a leading // "0" is simply considered a "0"). // -// A "p" or "P" exponent indicates a binary (rather then decimal) exponent; -// for instance "0x1.fffffffffffffp1023" (using base 0) represents the -// maximum float64 value. For hexadecimal mantissae, the exponent must -// be binary, if present (an "e" or "E" exponent indicator cannot be -// distinguished from a mantissa digit). +// A "p" or "P" exponent indicates a base 2 (rather then base 10) exponent; +// for instance, "0x1.fffffffffffffp1023" (using base 0) represents the +// maximum float64 value. For hexadecimal mantissae, the exponent character +// must be one of 'p' or 'P', if present (an "e" or "E" exponent indicator +// cannot be distinguished from a mantissa digit). // // The returned *Float f is nil and the value of z is valid but not // defined if an error is reported. diff --git a/src/math/big/floatconv_test.go b/src/math/big/floatconv_test.go index f32dd8928b..c6c6ba63e5 100644 --- a/src/math/big/floatconv_test.go +++ b/src/math/big/floatconv_test.go @@ -72,6 +72,21 @@ func TestFloatSetFloat64String(t *testing.T) { {"infinity", nan}, {"foobar", nan}, + // invalid underscores + {"_", nan}, + {"0_", nan}, + {"1__0", nan}, + {"123_.", nan}, + {"123._", nan}, + {"123._4", nan}, + {"1_2.3_4_", nan}, + {"_.123", nan}, + {"_123.456", nan}, + {"10._0", nan}, + {"10.0e_0", nan}, + {"10.0e0_", nan}, + {"0P-0__0", nan}, + // misc decimal values {"3.14159265", 3.14159265}, {"-687436.79457e-245", -687436.79457e-245}, @@ -142,6 +157,16 @@ func TestFloatSetFloat64String(t *testing.T) { {"-0X0.00008P+16", -0.5}, {"0x0.0000000000001p-1022", math.SmallestNonzeroFloat64}, {"0x1.fffffffffffffp1023", math.MaxFloat64}, + + // underscores + {"0_0", 0}, + {"1_000.", 1000}, + {"1_2_3.4_5_6", 123.456}, + {"1.0e0_0", 1}, + {"1p+1_0", 1024}, + {"0b_1000", 0x8}, + {"0b_1011_1101", 0xbd}, + {"0x_f0_0d_1eP+0_8", 0xf00d1e00}, } { var x Float x.SetPrec(53) diff --git a/src/math/big/int.go b/src/math/big/int.go index eb0285c48f..afad1bc961 100644 --- a/src/math/big/int.go +++ b/src/math/big/int.go @@ -401,16 +401,24 @@ func (x *Int) IsUint64() bool { // (not just a prefix) must be valid for success. If SetString fails, // the value of z is undefined but the returned value is nil. // -// The base argument must be 0 or a value between 2 and MaxBase. If the base -// is 0, the string prefix determines the actual conversion base. A prefix of -// ``0x'' or ``0X'' selects base 16; the ``0'' prefix selects base 8, and a -// ``0b'' or ``0B'' prefix selects base 2. Otherwise the selected base is 10. +// The base argument must be 0 or a value between 2 and MaxBase. +// For base 0, the number prefix determines the actual base: A prefix of +// ``0b'' or ``0B'' selects base 2, ``0'', ``0o'' or ``0O'' selects base 8, +// and ``0x'' or ``0X'' selects base 16. Otherwise, the selected base is 10 +// and no prefix is accepted. // // For bases <= 36, lower and upper case letters are considered the same: // The letters 'a' to 'z' and 'A' to 'Z' represent digit values 10 to 35. // For bases > 36, the upper case letters 'A' to 'Z' represent the digit // values 36 to 61. // +// For base 0, an underscore character ``_'' may appear between a base +// prefix and an adjacent digit, and between successive digits; such +// underscores do not change the value of the number. +// Incorrect placement of underscores is reported as an error if there +// are no other errors. If base != 0, underscores are not recognized +// and act like any other character that is not a valid digit. +// func (z *Int) SetString(s string, base int) (*Int, bool) { return z.setFromScanner(strings.NewReader(s), base) } diff --git a/src/math/big/intconv_test.go b/src/math/big/intconv_test.go index d625b6aa3d..5ba29263a6 100644 --- a/src/math/big/intconv_test.go +++ b/src/math/big/intconv_test.go @@ -34,6 +34,16 @@ var stringTests = []struct { {in: "0xg", base: 0}, {in: "g", base: 16}, + // invalid inputs with separators + // (smoke tests only - a comprehensive set of tests is in natconv_test.go) + {in: "_"}, + {in: "0_"}, + {in: "_0"}, + {in: "-1__0"}, + {in: "0x10_"}, + {in: "1_000", base: 10}, // separators are not permitted for bases != 0 + {in: "d_e_a_d", base: 16}, + // valid inputs {"0", "0", 0, 0, true}, {"0", "0", 10, 0, true}, @@ -67,6 +77,13 @@ var stringTests = []struct { {"A", "A", 37, 36, true}, {"ABCXYZ", "abcxyz", 36, 623741435, true}, {"ABCXYZ", "ABCXYZ", 62, 33536793425, true}, + + // valid input with separators + // (smoke tests only - a comprehensive set of tests is in natconv_test.go) + {"1_000", "1000", 0, 1000, true}, + {"0b_1010", "10", 0, 10, true}, + {"+0o_660", "432", 0, 0660, true}, + {"-0xF00D_1E", "-15731998", 0, -0xf00d1e, true}, } func TestIntText(t *testing.T) { diff --git a/src/math/big/natconv.go b/src/math/big/natconv.go index c3c4115097..42d1cccf6f 100644 --- a/src/math/big/natconv.go +++ b/src/math/big/natconv.go @@ -55,16 +55,31 @@ func pow(x Word, n int) (p Word) { return } +// scan errors +var ( + errNoDigits = errors.New("number has no digits") + errInvalSep = errors.New("'_' must separate successive digits") +) + // scan scans the number corresponding to the longest possible prefix // from r representing an unsigned number in a given conversion base. -// It returns the corresponding natural number res, the actual base b, +// scan returns the corresponding natural number res, the actual base b, // a digit count, and a read or syntax error err, if any. // -// number = [ prefix ] mantissa . -// prefix = "0" [ "b" | "B" | "o" | "O" | "x" | "X" ] . -// mantissa = digits | digits "." [ digits ] | "." digits . -// digits = digit { digit } . -// digit = "0" ... "9" | "a" ... "z" | "A" ... "Z" . +// For base 0, an underscore character ``_'' may appear between a base +// prefix and an adjacent digit, and between successive digits; such +// underscores do not change the value of the number, or the returned +// digit count. Incorrect placement of underscores is reported as an +// error if there are no other errors. If base != 0, underscores are +// not recognized and thus terminate scanning like any other character +// that is not a valid radix point or digit. +// +// number = mantissa | prefix pmantissa . +// prefix = "0" [ "b" | "B" | "o" | "O" | "x" | "X" ] . +// mantissa = digits "." [ digits ] | digits | "." digits . +// pmantissa = [ "_" ] digits "." [ digits ] | [ "_" ] digits | "." digits . +// digits = digit { [ "_" ] digit } . +// digit = "0" ... "9" | "a" ... "z" | "A" ... "Z" . // // Unless fracOk is set, the base argument must be 0 or a value between // 2 and MaxBase. If fracOk is set, the base argument must be one of @@ -92,53 +107,51 @@ func pow(x Word, n int) (p Word) { // In this case, the actual value of the scanned number is res * b**count. // func (z nat) scan(r io.ByteScanner, base int, fracOk bool) (res nat, b, count int, err error) { - // reject illegal bases + // reject invalid bases baseOk := base == 0 || !fracOk && 2 <= base && base <= MaxBase || fracOk && (base == 2 || base == 8 || base == 10 || base == 16) if !baseOk { - panic(fmt.Sprintf("illegal number base %d", base)) + panic(fmt.Sprintf("invalid number base %d", base)) } + // prev encodes the previously seen char: it is one + // of '_', '0' (a digit), or '.' (anything else). A + // valid separator '_' may only occur after a digit + // and if base == 0. + prev := '.' + invalSep := false + // one char look-ahead ch, err := r.ReadByte() - if err != nil { - return // io.EOF is also an error in this case - } // determine actual base b, prefix := base, 0 if base == 0 { // actual base is 10 unless there's a base prefix b = 10 - if ch == '0' { + if err == nil && ch == '0' { + prev = '0' count = 1 ch, err = r.ReadByte() - if err != nil { - if err == io.EOF { - err = nil // not an error; input is "0" - res = z[:0] + if err == nil { + // possibly one of 0b, 0B, 0o, 0O, 0x, 0X + switch ch { + case 'b', 'B': + b, prefix = 2, 'b' + case 'o', 'O': + b, prefix = 8, 'o' + case 'x', 'X': + b, prefix = 16, 'x' + default: + if !fracOk { + b, prefix = 8, '0' + } } - return - } - // possibly one of 0b, 0B, 0o, 0O, 0x, 0X - switch ch { - case 'b', 'B': - b, prefix = 2, 'b' - case 'o', 'O': - b, prefix = 8, 'o' - case 'x', 'X': - b, prefix = 16, 'x' - default: - if !fracOk { - b, prefix = 8, '0' - } - } - if prefix != 0 { - count = 0 // prefix is not counted - if prefix != '0' { - if ch, err = r.ReadByte(); err != nil { - return // io.EOF is also an error in this case + if prefix != 0 { + count = 0 // prefix is not counted + if prefix != '0' { + ch, err = r.ReadByte() } } } @@ -155,76 +168,76 @@ func (z nat) scan(r io.ByteScanner, base int, fracOk bool) (res nat, b, count in di := Word(0) // 0 <= di < b1**i < bn i := 0 // 0 <= i < n dp := -1 // position of decimal point - for { - if fracOk && ch == '.' { + for err == nil { + if ch == '.' && fracOk { fracOk = false + if prev == '_' { + invalSep = true + } + prev = '.' dp = count - // advance - if ch, err = r.ReadByte(); err != nil { - if err == io.EOF { - err = nil - break + } else if ch == '_' && base == 0 { + if prev != '0' { + invalSep = true + } + prev = '_' + } else { + // convert rune into digit value d1 + var d1 Word + switch { + case '0' <= ch && ch <= '9': + d1 = Word(ch - '0') + case 'a' <= ch && ch <= 'z': + d1 = Word(ch - 'a' + 10) + case 'A' <= ch && ch <= 'Z': + if b <= maxBaseSmall { + d1 = Word(ch - 'A' + 10) + } else { + d1 = Word(ch - 'A' + maxBaseSmall) } - return + default: + d1 = MaxBase + 1 } - } - - // convert rune into digit value d1 - var d1 Word - switch { - case '0' <= ch && ch <= '9': - d1 = Word(ch - '0') - case 'a' <= ch && ch <= 'z': - d1 = Word(ch - 'a' + 10) - case 'A' <= ch && ch <= 'Z': - if b <= maxBaseSmall { - d1 = Word(ch - 'A' + 10) - } else { - d1 = Word(ch - 'A' + maxBaseSmall) - } - default: - d1 = MaxBase + 1 - } - if d1 >= b1 { - r.UnreadByte() // ch does not belong to number anymore - break - } - count++ - - // collect d1 in di - di = di*b1 + d1 - i++ - - // if di is "full", add it to the result - if i == n { - z = z.mulAddWW(z, bn, di) - di = 0 - i = 0 - } - - // advance - if ch, err = r.ReadByte(); err != nil { - if err == io.EOF { - err = nil + if d1 >= b1 { + r.UnreadByte() // ch does not belong to number anymore break } - return + prev = '0' + count++ + + // collect d1 in di + di = di*b1 + d1 + i++ + + // if di is "full", add it to the result + if i == n { + z = z.mulAddWW(z, bn, di) + di = 0 + i = 0 + } } + + ch, err = r.ReadByte() + } + + if err == io.EOF { + err = nil + } + + // other errors take precedence over invalid separators + if err == nil && (invalSep || prev == '_') { + err = errInvalSep } if count == 0 { // no digits found if prefix == '0' { - // there was only the octal prefix 0 (possibly followed by digits > 7); - // count as one digit and return base 10, not 8 - count = 1 - b = 10 - } else { - err = errors.New("syntax error scanning number") + // there was only the octal prefix 0 (possibly followed by separators and digits > 7); + // interpret as decimal 0 + return z[:0], 10, 1, err } - return + err = errNoDigits // fall through; result will be 0 } - // count > 0 // add remaining digits to result if i > 0 { @@ -232,9 +245,9 @@ func (z nat) scan(r io.ByteScanner, base int, fracOk bool) (res nat, b, count in } res = z.norm() - // adjust for fraction, if any + // adjust count for fraction, if any if dp >= 0 { - // 0 <= dp <= count > 0 + // 0 <= dp <= count count = dp - count } diff --git a/src/math/big/natconv_test.go b/src/math/big/natconv_test.go index 645e2b8434..9c2acca07e 100644 --- a/src/math/big/natconv_test.go +++ b/src/math/big/natconv_test.go @@ -109,92 +109,126 @@ var natScanTests = []struct { x nat // expected nat b int // expected base count int // expected digit count - ok bool // expected success + err error // expected error next rune // next character (or 0, if at EOF) }{ - // invalid: no mantissa - {}, - {s: "?"}, - {base: 10}, - {base: 36}, - {base: 62}, - {s: "?", base: 10}, - {s: "0b"}, - {s: "0o"}, - {s: "0x"}, - {s: "0b2"}, - {s: "0B2"}, - {s: "0o8"}, - {s: "0O8"}, - {s: "0xg"}, - {s: "0Xg"}, - {s: "345", base: 2}, + // invalid: no digits + {"", 0, false, nil, 10, 0, errNoDigits, 0}, + {"_", 0, false, nil, 10, 0, errNoDigits, 0}, + {"?", 0, false, nil, 10, 0, errNoDigits, '?'}, + {"?", 10, false, nil, 10, 0, errNoDigits, '?'}, + {"", 10, false, nil, 10, 0, errNoDigits, 0}, + {"", 36, false, nil, 36, 0, errNoDigits, 0}, + {"", 62, false, nil, 62, 0, errNoDigits, 0}, + {"0b", 0, false, nil, 2, 0, errNoDigits, 0}, + {"0o", 0, false, nil, 8, 0, errNoDigits, 0}, + {"0x", 0, false, nil, 16, 0, errNoDigits, 0}, + {"0x_", 0, false, nil, 16, 0, errNoDigits, 0}, + {"0b2", 0, false, nil, 2, 0, errNoDigits, '2'}, + {"0B2", 0, false, nil, 2, 0, errNoDigits, '2'}, + {"0o8", 0, false, nil, 8, 0, errNoDigits, '8'}, + {"0O8", 0, false, nil, 8, 0, errNoDigits, '8'}, + {"0xg", 0, false, nil, 16, 0, errNoDigits, 'g'}, + {"0Xg", 0, false, nil, 16, 0, errNoDigits, 'g'}, + {"345", 2, false, nil, 2, 0, errNoDigits, '3'}, // invalid: incorrect use of decimal point - {s: ".0"}, - {s: ".0", base: 10}, - {s: ".", base: 0}, - {s: "0x.0"}, + {"._", 0, true, nil, 10, 0, errNoDigits, 0}, + {".0", 0, false, nil, 10, 0, errNoDigits, '.'}, + {".0", 10, false, nil, 10, 0, errNoDigits, '.'}, + {".", 0, true, nil, 10, 0, errNoDigits, 0}, + {"0x.", 0, true, nil, 16, 0, errNoDigits, 0}, + {"0x.g", 0, true, nil, 16, 0, errNoDigits, 'g'}, + {"0x.0", 0, false, nil, 16, 0, errNoDigits, '.'}, + + // invalid: incorrect use of separators + {"_0", 0, false, nil, 10, 1, errInvalSep, 0}, + {"0_", 0, false, nil, 10, 1, errInvalSep, 0}, + {"0__0", 0, false, nil, 8, 1, errInvalSep, 0}, + {"0x___0", 0, false, nil, 16, 1, errInvalSep, 0}, + {"0_x", 0, false, nil, 10, 1, errInvalSep, 'x'}, + {"0_8", 0, false, nil, 10, 1, errInvalSep, '8'}, + {"123_.", 0, true, nat{123}, 10, 0, errInvalSep, 0}, + {"._123", 0, true, nat{123}, 10, -3, errInvalSep, 0}, + {"0b__1000", 0, false, nat{0x8}, 2, 4, errInvalSep, 0}, + {"0o60___0", 0, false, nat{0600}, 8, 3, errInvalSep, 0}, + {"0466_", 0, false, nat{0466}, 8, 3, errInvalSep, 0}, + {"01234567_8", 0, false, nat{01234567}, 8, 7, errInvalSep, '8'}, + {"1_.", 0, true, nat{1}, 10, 0, errInvalSep, 0}, + {"0._1", 0, true, nat{1}, 10, -1, errInvalSep, 0}, + {"2.7_", 0, true, nat{27}, 10, -1, errInvalSep, 0}, + {"0x1.0_", 0, true, nat{0x10}, 16, -1, errInvalSep, 0}, + + // valid: separators are not accepted for base != 0 + {"0_", 10, false, nil, 10, 1, nil, '_'}, + {"1__0", 10, false, nat{1}, 10, 1, nil, '_'}, + {"0__8", 10, false, nil, 10, 1, nil, '_'}, + {"xy_z_", 36, false, nat{33*36 + 34}, 36, 2, nil, '_'}, // valid, no decimal point - {"0", 0, false, nil, 10, 1, true, 0}, - {"0", 10, false, nil, 10, 1, true, 0}, - {"0", 36, false, nil, 36, 1, true, 0}, - {"0", 62, false, nil, 62, 1, true, 0}, - {"1", 0, false, nat{1}, 10, 1, true, 0}, - {"1", 10, false, nat{1}, 10, 1, true, 0}, - {"0 ", 0, false, nil, 10, 1, true, ' '}, - {"00 ", 0, false, nil, 8, 1, true, ' '}, // octal 0 - {"0b1", 0, false, nat{1}, 2, 1, true, 0}, - {"0B11000101", 0, false, nat{0xc5}, 2, 8, true, 0}, - {"0B110001012", 0, false, nat{0xc5}, 2, 8, true, '2'}, - {"07", 0, false, nat{7}, 8, 1, true, 0}, - {"08", 0, false, nil, 10, 1, true, '8'}, - {"08", 10, false, nat{8}, 10, 2, true, 0}, - {"018", 0, false, nat{1}, 8, 1, true, '8'}, - {"0o7", 0, false, nat{7}, 8, 1, true, 0}, - {"0o18", 0, false, nat{1}, 8, 1, true, '8'}, - {"0O17", 0, false, nat{017}, 8, 2, true, 0}, - {"03271", 0, false, nat{03271}, 8, 4, true, 0}, - {"10ab", 0, false, nat{10}, 10, 2, true, 'a'}, - {"1234567890", 0, false, nat{1234567890}, 10, 10, true, 0}, - {"A", 36, false, nat{10}, 36, 1, true, 0}, - {"A", 37, false, nat{36}, 37, 1, true, 0}, - {"xyz", 36, false, nat{(33*36+34)*36 + 35}, 36, 3, true, 0}, - {"XYZ?", 36, false, nat{(33*36+34)*36 + 35}, 36, 3, true, '?'}, - {"XYZ?", 62, false, nat{(59*62+60)*62 + 61}, 62, 3, true, '?'}, - {"0x", 16, false, nil, 16, 1, true, 'x'}, - {"0xdeadbeef", 0, false, nat{0xdeadbeef}, 16, 8, true, 0}, - {"0XDEADBEEF", 0, false, nat{0xdeadbeef}, 16, 8, true, 0}, + {"0", 0, false, nil, 10, 1, nil, 0}, + {"0", 36, false, nil, 36, 1, nil, 0}, + {"0", 62, false, nil, 62, 1, nil, 0}, + {"1", 0, false, nat{1}, 10, 1, nil, 0}, + {"1", 10, false, nat{1}, 10, 1, nil, 0}, + {"0 ", 0, false, nil, 10, 1, nil, ' '}, + {"00 ", 0, false, nil, 8, 1, nil, ' '}, // octal 0 + {"0b1", 0, false, nat{1}, 2, 1, nil, 0}, + {"0B11000101", 0, false, nat{0xc5}, 2, 8, nil, 0}, + {"0B110001012", 0, false, nat{0xc5}, 2, 8, nil, '2'}, + {"07", 0, false, nat{7}, 8, 1, nil, 0}, + {"08", 0, false, nil, 10, 1, nil, '8'}, + {"08", 10, false, nat{8}, 10, 2, nil, 0}, + {"018", 0, false, nat{1}, 8, 1, nil, '8'}, + {"0o7", 0, false, nat{7}, 8, 1, nil, 0}, + {"0o18", 0, false, nat{1}, 8, 1, nil, '8'}, + {"0O17", 0, false, nat{017}, 8, 2, nil, 0}, + {"03271", 0, false, nat{03271}, 8, 4, nil, 0}, + {"10ab", 0, false, nat{10}, 10, 2, nil, 'a'}, + {"1234567890", 0, false, nat{1234567890}, 10, 10, nil, 0}, + {"A", 36, false, nat{10}, 36, 1, nil, 0}, + {"A", 37, false, nat{36}, 37, 1, nil, 0}, + {"xyz", 36, false, nat{(33*36+34)*36 + 35}, 36, 3, nil, 0}, + {"XYZ?", 36, false, nat{(33*36+34)*36 + 35}, 36, 3, nil, '?'}, + {"XYZ?", 62, false, nat{(59*62+60)*62 + 61}, 62, 3, nil, '?'}, + {"0x", 16, false, nil, 16, 1, nil, 'x'}, + {"0xdeadbeef", 0, false, nat{0xdeadbeef}, 16, 8, nil, 0}, + {"0XDEADBEEF", 0, false, nat{0xdeadbeef}, 16, 8, nil, 0}, // valid, with decimal point - {"0.", 0, false, nil, 10, 1, true, '.'}, - {"0.", 10, true, nil, 10, 0, true, 0}, - {"0.1.2", 10, true, nat{1}, 10, -1, true, '.'}, - {".000", 10, true, nil, 10, -3, true, 0}, - {"12.3", 10, true, nat{123}, 10, -1, true, 0}, - {"012.345", 10, true, nat{12345}, 10, -3, true, 0}, - {"0.1", 0, true, nat{1}, 10, -1, true, 0}, - {"0.1", 2, true, nat{1}, 2, -1, true, 0}, - {"0.12", 2, true, nat{1}, 2, -1, true, '2'}, - {"0b0.1", 0, true, nat{1}, 2, -1, true, 0}, - {"0B0.12", 0, true, nat{1}, 2, -1, true, '2'}, - {"0o0.7", 0, true, nat{7}, 8, -1, true, 0}, - {"0O0.78", 0, true, nat{7}, 8, -1, true, '8'}, + {"0.", 0, false, nil, 10, 1, nil, '.'}, + {"0.", 10, true, nil, 10, 0, nil, 0}, + {"0.1.2", 10, true, nat{1}, 10, -1, nil, '.'}, + {".000", 10, true, nil, 10, -3, nil, 0}, + {"12.3", 10, true, nat{123}, 10, -1, nil, 0}, + {"012.345", 10, true, nat{12345}, 10, -3, nil, 0}, + {"0.1", 0, true, nat{1}, 10, -1, nil, 0}, + {"0.1", 2, true, nat{1}, 2, -1, nil, 0}, + {"0.12", 2, true, nat{1}, 2, -1, nil, '2'}, + {"0b0.1", 0, true, nat{1}, 2, -1, nil, 0}, + {"0B0.12", 0, true, nat{1}, 2, -1, nil, '2'}, + {"0o0.7", 0, true, nat{7}, 8, -1, nil, 0}, + {"0O0.78", 0, true, nat{7}, 8, -1, nil, '8'}, + {"0xdead.beef", 0, true, nat{0xdeadbeef}, 16, -4, nil, 0}, + + // valid, with separators + {"1_000", 0, false, nat{1000}, 10, 4, nil, 0}, + {"0_466", 0, false, nat{0466}, 8, 3, nil, 0}, + {"0o_600", 0, false, nat{0600}, 8, 3, nil, 0}, + {"0x_f0_0d", 0, false, nat{0xf00d}, 16, 4, nil, 0}, + {"0b1000_0001", 0, false, nat{0x81}, 2, 8, nil, 0}, + {"1_000.000_1", 0, true, nat{10000001}, 10, -4, nil, 0}, + {"0x_f00d.1e", 0, true, nat{0xf00d1e}, 16, -2, nil, 0}, + {"0x_f00d.1E2", 0, true, nat{0xf00d1e2}, 16, -3, nil, 0}, + {"0x_f00d.1eg", 0, true, nat{0xf00d1e}, 16, -2, nil, 'g'}, } func TestScanBase(t *testing.T) { for _, a := range natScanTests { r := strings.NewReader(a.s) x, b, count, err := nat(nil).scan(r, a.base, a.frac) - if err == nil && !a.ok { - t.Errorf("scan%+v\n\texpected error", a) - } - if err != nil { - if a.ok { - t.Errorf("scan%+v\n\tgot error = %s", a, err) - } - continue + if err != a.err { + t.Errorf("scan%+v\n\tgot error = %v; want %v", a, err, a.err) } if x.cmp(a.x) != 0 { t.Errorf("scan%+v\n\tgot z = %v; want %v", a, x, a.x) diff --git a/src/math/big/ratconv.go b/src/math/big/ratconv.go index bd2509f168..07288ca94f 100644 --- a/src/math/big/ratconv.go +++ b/src/math/big/ratconv.go @@ -87,7 +87,7 @@ func (z *Rat) SetString(s string) (*Rat, bool) { // exponent var exp int64 - exp, _, err = scanExponent(r, false) + exp, _, err = scanExponent(r, false, false) if err != nil { return nil, false } @@ -129,75 +129,96 @@ func (z *Rat) SetString(s string) (*Rat, bool) { return z, true } -// scanExponent scans the longest possible prefix of r representing a decimal -// ('e', 'E') or binary ('p', 'P') exponent, if any. It returns the exponent, -// the exponent base (10 or 2), or a read or syntax error, if any. +// scanExponent scans the longest possible prefix of r representing a base 10 +// (``e'', ``E'') or a base 2 (``p'', ``P'') exponent, if any. It returns the +// exponent, the exponent base (10 or 2), or a read or syntax error, if any. +// +// If sepOk is set, an underscore character ``_'' may appear between successive +// exponent digits; such underscores do not change the value of the exponent. +// Incorrect placement of underscores is reported as an error if there are no +// other errors. If sepOk is not set, underscores are not recognized and thus +// terminate scanning like any other character that is not a valid digit. // // exponent = ( "e" | "E" | "p" | "P" ) [ sign ] digits . // sign = "+" | "-" . -// digits = digit { digit } . +// digits = digit { [ '_' ] digit } . // digit = "0" ... "9" . // -// A binary exponent is only permitted if binExpOk is set. -func scanExponent(r io.ByteScanner, binExpOk bool) (exp int64, base int, err error) { - base = 10 - - var ch byte - if ch, err = r.ReadByte(); err != nil { +// A base 2 exponent is only permitted if base2ok is set. +func scanExponent(r io.ByteScanner, base2ok, sepOk bool) (exp int64, base int, err error) { + // one char look-ahead + ch, err := r.ReadByte() + if err != nil { if err == io.EOF { - err = nil // no exponent; same as e0 + err = nil } - return + return 0, 10, err } + // exponent char switch ch { case 'e', 'E': - // ok + base = 10 case 'p', 'P': - if binExpOk { + if base2ok { base = 2 break // ok } fallthrough // binary exponent not permitted default: - r.UnreadByte() - return // no exponent; same as e0 - } - - var neg bool - if neg, err = scanSign(r); err != nil { - return + r.UnreadByte() // ch does not belong to exponent anymore + return 0, 10, nil } + // sign var digits []byte - if neg { - digits = append(digits, '-') + ch, err = r.ReadByte() + if err == nil && (ch == '+' || ch == '-') { + if ch == '-' { + digits = append(digits, '-') + } + ch, err = r.ReadByte() } - // no need to use nat.scan for exponent digits - // since we only care about int64 values - the - // from-scratch scan is easy enough and faster - for i := 0; ; i++ { - if ch, err = r.ReadByte(); err != nil { - if err != io.EOF || i == 0 { - return - } - err = nil - break // i > 0 - } - if ch < '0' || '9' < ch { - if i == 0 { - r.UnreadByte() - err = fmt.Errorf("invalid exponent (missing digits)") - return - } - break // i > 0 - } - digits = append(digits, ch) - } - // i > 0 => we have at least one digit + // prev encodes the previously seen char: it is one + // of '_', '0' (a digit), or '.' (anything else). A + // valid separator '_' may only occur after a digit. + prev := '.' + invalSep := false + + // exponent value + hasDigits := false + for err == nil { + if '0' <= ch && ch <= '9' { + digits = append(digits, ch) + prev = '0' + hasDigits = true + } else if ch == '_' && sepOk { + if prev != '0' { + invalSep = true + } + prev = '_' + } else { + r.UnreadByte() // ch does not belong to number anymore + break + } + ch, err = r.ReadByte() + } + + if err == io.EOF { + err = nil + } + if err == nil && !hasDigits { + err = errNoDigits + } + if err == nil { + exp, err = strconv.ParseInt(string(digits), 10, 64) + } + // other errors take precedence over invalid separators + if err == nil && (invalSep || prev == '_') { + err = errInvalSep + } - exp, err = strconv.ParseInt(string(digits), 10, 64) return } diff --git a/src/math/big/ratconv_test.go b/src/math/big/ratconv_test.go index bdc6a3e1b0..dea4d1933a 100644 --- a/src/math/big/ratconv_test.go +++ b/src/math/big/ratconv_test.go @@ -7,25 +7,91 @@ package big import ( "bytes" "fmt" + "io" "math" "strconv" "strings" "testing" ) +var exponentTests = []struct { + s string // string to be scanned + base2ok bool // true if 'p'/'P' exponents are accepted + sepOk bool // true if '_' separators are accepted + x int64 // expected exponent + b int // expected exponent base + err error // expected error + next rune // next character (or 0, if at EOF) +}{ + // valid, without separators + {"", false, false, 0, 10, nil, 0}, + {"1", false, false, 0, 10, nil, '1'}, + {"e0", false, false, 0, 10, nil, 0}, + {"E1", false, false, 1, 10, nil, 0}, + {"e+10", false, false, 10, 10, nil, 0}, + {"e-10", false, false, -10, 10, nil, 0}, + {"e123456789a", false, false, 123456789, 10, nil, 'a'}, + {"p", false, false, 0, 10, nil, 'p'}, + {"P+100", false, false, 0, 10, nil, 'P'}, + {"p0", true, false, 0, 2, nil, 0}, + {"P-123", true, false, -123, 2, nil, 0}, + {"p+0a", true, false, 0, 2, nil, 'a'}, + {"p+123__", true, false, 123, 2, nil, '_'}, // '_' is not part of the number anymore + + // valid, with separators + {"e+1_0", false, true, 10, 10, nil, 0}, + {"e-1_0", false, true, -10, 10, nil, 0}, + {"e123_456_789a", false, true, 123456789, 10, nil, 'a'}, + {"P+1_00", false, true, 0, 10, nil, 'P'}, + {"p-1_2_3", true, true, -123, 2, nil, 0}, + + // invalid: no digits + {"e", false, false, 0, 10, errNoDigits, 0}, + {"ef", false, false, 0, 10, errNoDigits, 'f'}, + {"e+", false, false, 0, 10, errNoDigits, 0}, + {"E-x", false, false, 0, 10, errNoDigits, 'x'}, + {"p", true, false, 0, 2, errNoDigits, 0}, + {"P-", true, false, 0, 2, errNoDigits, 0}, + {"p+e", true, false, 0, 2, errNoDigits, 'e'}, + {"e+_x", false, true, 0, 10, errNoDigits, 'x'}, + + // invalid: incorrect use of separator + {"e0_", false, true, 0, 10, errInvalSep, 0}, + {"e_0", false, true, 0, 10, errInvalSep, 0}, + {"e-1_2__3", false, true, -123, 10, errInvalSep, 0}, +} + +func TestScanExponent(t *testing.T) { + for _, a := range exponentTests { + r := strings.NewReader(a.s) + x, b, err := scanExponent(r, a.base2ok, a.sepOk) + if err != a.err { + t.Errorf("scanExponent%+v\n\tgot error = %v; want %v", a, err, a.err) + } + if x != a.x { + t.Errorf("scanExponent%+v\n\tgot z = %v; want %v", a, x, a.x) + } + if b != a.b { + t.Errorf("scanExponent%+v\n\tgot b = %d; want %d", a, b, a.b) + } + next, _, err := r.ReadRune() + if err == io.EOF { + next = 0 + err = nil + } + if err == nil && next != a.next { + t.Errorf("scanExponent%+v\n\tgot next = %q; want %q", a, next, a.next) + } + } +} + type StringTest struct { in, out string ok bool } var setStringTests = []StringTest{ - {"0", "0", true}, - {"-0", "0", true}, - {"1", "1", true}, - {"-1", "-1", true}, - {"1.", "1", true}, - {"1e0", "1", true}, - {"1.e1", "10", true}, + // invalid {in: "1e"}, {in: "1.e"}, {in: "1e+14e-5"}, @@ -33,6 +99,20 @@ var setStringTests = []StringTest{ {in: "r"}, {in: "a/b"}, {in: "a.b"}, + {in: "1/0"}, + {in: "4/3/2"}, // issue 17001 + {in: "4/3/"}, + {in: "4/3."}, + {in: "4/"}, + + // valid + {"0", "0", true}, + {"-0", "0", true}, + {"1", "1", true}, + {"-1", "-1", true}, + {"1.", "1", true}, + {"1e0", "1", true}, + {"1.e1", "10", true}, {"-0.1", "-1/10", true}, {"-.1", "-1/10", true}, {"2/4", "1/2", true}, @@ -49,24 +129,35 @@ var setStringTests = []StringTest{ {"106/141787961317645621392", "53/70893980658822810696", true}, {"204211327800791583.81095", "4084226556015831676219/20000", true}, {"0e9999999999", "0", true}, // issue #16176 - {in: "1/0"}, - {in: "4/3/2"}, // issue 17001 - {in: "4/3/"}, - {in: "4/3."}, - {in: "4/"}, } // These are not supported by fmt.Fscanf. var setStringTests2 = []StringTest{ + // invalid + {in: "4/3x"}, + + // invalid with separators + // (smoke tests only - a comprehensive set of tests is in natconv_test.go) + {in: "10_/1"}, + {in: "_10/1"}, + {in: "1/1__0"}, + {in: "1_000.0"}, // floats are base 10 which doesn't permit separators; see also issue #29799 + + // valid {"0b1000/3", "8/3", true}, {"0B1000/0x8", "1", true}, - {"-010/1", "-8", true}, // TODO(gri) should we even permit octal here? + {"-010/1", "-8", true}, {"-010.", "-10", true}, {"-0o10/1", "-8", true}, {"0x10/1", "16", true}, {"0x10/0x20", "1/2", true}, - {in: "4/3x"}, - // TODO(gri) add more tests + + // valid with separators + // (smoke tests only - a comprehensive set of tests is in natconv_test.go) + {"0b_1000/3", "8/3", true}, + {"0B_10_00/0x8", "1", true}, + {"0xdead/0B1101_1110_1010_1101", "1", true}, + {"0B1101_1110_1010_1101/0XD_E_A_D", "1", true}, } func TestRatSetString(t *testing.T) {