From ce2701b2b0bafba079445100a7b220404a4211ad Mon Sep 17 00:00:00 2001 From: Robert Griesemer Date: Wed, 1 Jun 2011 14:17:00 -0700 Subject: [PATCH] big: ~8x faster number scanning - better number scanning algorithm - fixed a couple of bugs related to base interpretation - added scan benchmark - added more test cases and made tests more precise - introduced Int.scan method matching nat.scan - refactored Int.Scan; now uses int.scan - refactored Int.SetString; now uses int.scan There is more potential, this was a fairly simple change. gotest -test.bench="ScanPi" before/after (best of 3 runs): big.BenchmarkScanPi 1000 2024900 ns/op big.BenchmarkScanPi 10000 257540 ns/op R=chickencha CC=golang-dev, rsc https://golang.org/cl/4527089 --- src/pkg/big/int.go | 101 ++++++++++++++-------------- src/pkg/big/nat.go | 105 +++++++++++++++++------------ src/pkg/big/nat_test.go | 144 +++++++++++++++++++++++++++++++++++----- 3 files changed, 246 insertions(+), 104 deletions(-) diff --git a/src/pkg/big/int.go b/src/pkg/big/int.go index b96c387bf46..e66c34a8362 100755 --- a/src/pkg/big/int.go +++ b/src/pkg/big/int.go @@ -8,6 +8,7 @@ package big import ( "fmt" + "io" "os" "rand" "strings" @@ -375,11 +376,48 @@ func (x *Int) Format(s fmt.State, ch int) { } -// Scan is a support routine for fmt.Scanner. It accepts the formats -// 'b' (binary), 'o' (octal), 'd' (decimal), 'x' (lowercase hexadecimal), -// and 'X' (uppercase hexadecimal). -func (x *Int) Scan(s fmt.ScanState, ch int) os.Error { - var base int +// scan sets z to the integer value corresponding to the longest possible prefix +// read from r representing a signed integer number in a given conversion base. +// It returns z, the actual conversion base used, and an error, if any. In the +// error case, the value of z is undefined. The syntax follows the syntax of +// integer literals in Go. +// +// The base argument must be 0 or a value from 2 through MaxBase. If the base +// is 0, the string prefix determines the actual conversion base. A prefix of +// ``0x'' or ``0X'' selects base 16; the ``0'' prefix selects base 8, and a +// ``0b'' or ``0B'' prefix selects base 2. Otherwise the selected base is 10. +// +func (z *Int) scan(r io.RuneScanner, base int) (*Int, int, os.Error) { + // determine sign + ch, _, err := r.ReadRune() + if err != nil { + return z, 0, err + } + neg := false + switch ch { + case '-': + neg = true + case '+': // nothing to do + default: + r.UnreadRune() + } + + // determine mantissa + z.abs, base, err = z.abs.scan(r, base) + if err != nil { + return z, base, err + } + z.neg = len(z.abs) > 0 && neg // 0 has no sign + + return z, base, nil +} + + +// Scan is a support routine for fmt.Scanner; it sets z to the value of +// the scanned number. It accepts the formats 'b' (binary), 'o' (octal), +// 'd' (decimal), 'x' (lowercase hexadecimal), and 'X' (uppercase hexadecimal). +func (z *Int) Scan(s fmt.ScanState, ch int) os.Error { + base := 0 switch ch { case 'b': base = 2 @@ -394,32 +432,13 @@ func (x *Int) Scan(s fmt.ScanState, ch int) os.Error { default: return os.ErrorString("Int.Scan: invalid verb") } - - ch, _, err := s.ReadRune() - if err != nil { - return err - } - neg := false - switch ch { - case '-': - neg = true - case '+': // nothing to do - default: - s.UnreadRune() - } - - x.abs, _, err = x.abs.scan(s, base) - if err != nil { - return err - } - x.neg = len(x.abs) > 0 && neg // 0 has no sign - - return nil + _, _, err := z.scan(s, base) + return err } -// Int64 returns the int64 representation of z. -// If z cannot be represented in an int64, the result is undefined. +// Int64 returns the int64 representation of x. +// If x cannot be represented in an int64, the result is undefined. func (x *Int) Int64() int64 { if len(x.abs) == 0 { return 0 @@ -439,33 +458,19 @@ func (x *Int) Int64() int64 { // and returns z and a boolean indicating success. If SetString fails, // the value of z is undefined. // -// If the base argument is 0, the string prefix determines the actual -// conversion base. A prefix of ``0x'' or ``0X'' selects base 16; the -// ``0'' prefix selects base 8, and a ``0b'' or ``0B'' prefix selects -// base 2. Otherwise the selected base is 10. +// The base argument must be 0 or a value from 2 through MaxBase. If the base +// is 0, the string prefix determines the actual conversion base. A prefix of +// ``0x'' or ``0X'' selects base 16; the ``0'' prefix selects base 8, and a +// ``0b'' or ``0B'' prefix selects base 2. Otherwise the selected base is 10. // func (z *Int) SetString(s string, base int) (*Int, bool) { - neg := false - if len(s) > 0 { - switch s[0] { - case '-': - neg = true - fallthrough - case '+': - s = s[1:] - } - } - r := strings.NewReader(s) - abs, _, err := z.abs.scan(r, base) + _, _, err := z.scan(r, base) if err != nil { return z, false } _, _, err = r.ReadRune() - - z.abs = abs - z.neg = len(abs) > 0 && neg // 0 has no sign - return z, err == os.EOF // err == os.EOF => scan consumed all of s + return z, err == os.EOF // err == os.EOF => scan consumed all of s } diff --git a/src/pkg/big/nat.go b/src/pkg/big/nat.go index eb38750c168..313e22cbb31 100755 --- a/src/pkg/big/nat.go +++ b/src/pkg/big/nat.go @@ -608,8 +608,11 @@ func (x nat) bitLen() int { } -func hexValue(ch int) int { - var d int +// MaxBase is the largest number base accepted for string conversions. +const MaxBase = 'z' - 'a' + 10 + 1 // = hexValue('z') + 1 + +func hexValue(ch int) Word { + d := MaxBase + 1 // illegal base switch { case '0' <= ch && ch <= '9': d = ch - '0' @@ -617,86 +620,106 @@ func hexValue(ch int) int { d = ch - 'a' + 10 case 'A' <= ch && ch <= 'Z': d = ch - 'A' + 10 - default: - return -1 } - return d + return Word(d) } -// scan returns the natural number corresponding to the longest -// possible prefix read from r representing a natural number in a -// given conversion base, the actual conversion base used, and an -// error, if any. The syntax of natural numbers follows the syntax of +// scan sets z to the natural number corresponding to the longest possible prefix +// read from r representing an unsigned integer in a given conversion base. +// It returns z, the actual conversion base used, and an error, if any. In the +// error case, the value of z is undefined. The syntax follows the syntax of // unsigned integer literals in Go. // -// If the base argument is 0, the string prefix determines the actual -// conversion base. A prefix of ``0x'' or ``0X'' selects base 16; the -// ``0'' prefix selects base 8, and a ``0b'' or ``0B'' prefix selects -// base 2. Otherwise the selected base is 10. +// The base argument must be 0 or a value from 2 through MaxBase. If the base +// is 0, the string prefix determines the actual conversion base. A prefix of +// ``0x'' or ``0X'' selects base 16; the ``0'' prefix selects base 8, and a +// ``0b'' or ``0B'' prefix selects base 2. Otherwise the selected base is 10. // func (z nat) scan(r io.RuneScanner, base int) (nat, int, os.Error) { - n := 0 + // reject illegal bases + if base < 0 || base == 1 || MaxBase < base { + return z, 0, os.ErrorString("illegal number base") + } + + // one char look-ahead ch, _, err := r.ReadRune() if err != nil { return z, 0, err } + // determine base if necessary + b := Word(base) if base == 0 { - base = 10 + b = 10 if ch == '0' { - n++ switch ch, _, err = r.ReadRune(); err { case nil: - base = 8 + b = 8 switch ch { case 'x', 'X': - base = 16 + b = 16 case 'b', 'B': - base = 2 + b = 2 } - if base == 2 || base == 16 { - n-- + if b == 2 || b == 16 { if ch, _, err = r.ReadRune(); err != nil { - return z, 0, os.ErrorString("syntax error scanning binary or hexadecimal number") + return z, 0, err } } case os.EOF: return z, 10, nil default: - return z, 0, err + return z, 10, err } } } - // reject illegal bases - if base < 2 || 'z'-'a'+10 < base { - return z, 0, os.ErrorString("illegal number base") - } - // convert string + // - group as many digits d as possible together into a "super-digit" dd with "super-base" bb + // - only when bb does not fit into a word anymore, do a full number mulAddWW using bb and dd z = z.make(0) + bb := Word(1) + dd := Word(0) for { d := hexValue(ch) - if 0 <= d && d < base { - z = z.mulAddWW(z, Word(base), Word(d)) - } else { - r.UnreadRune() - if n > 0 { - break - } - return z, 0, os.ErrorString("syntax error scanning number") + if d >= b { + r.UnreadRune() // ch does not belong to number anymore + break } - n++ + + if tmp := bb * b; tmp < bb { + // overflow + z = z.mulAddWW(z, bb, dd) + bb = b + dd = d + } else { + bb = tmp + dd = dd*b + d + } + if ch, _, err = r.ReadRune(); err != nil { - if err == os.EOF { - break + if err != os.EOF { + return z, int(b), err } - return z, 0, err + break } } - return z.norm(), base, nil + switch { + case bb > 1: + // there was at least one mantissa digit + z = z.mulAddWW(z, bb, dd) + case base == 0 && b == 8: + // there was only the octal prefix 0 (possibly followed by digits > 7); + // return base 10, not 8 + return z, 10, nil + case base != 0 || b != 8: + // there was neither a mantissa digit nor the octal prefix 0 + return z, int(b), os.ErrorString("syntax error scanning number") + } + + return z.norm(), int(b), nil } diff --git a/src/pkg/big/nat_test.go b/src/pkg/big/nat_test.go index 25947adda16..c358cc8cda6 100755 --- a/src/pkg/big/nat_test.go +++ b/src/pkg/big/nat_test.go @@ -5,6 +5,7 @@ package big import ( + "os" "strings" "testing" ) @@ -211,28 +212,58 @@ func TestString(t *testing.T) { var natScanTests = []struct { s string // string to be scanned + base int // input base x nat // expected nat - base int // expected base + b int // expected base ok bool // expected success + next int // next character (or 0, if at EOF) }{ - {s: ""}, - {"0", nil, 10, true}, - {"0 ", nil, 8, true}, + // error: illegal base + {base: -1}, + {base: 1}, + {base: 37}, + + // error: no mantissa + {}, + {s: "?"}, + {base: 10}, + {base: 36}, + {s: "?", base: 10}, {s: "0x"}, - {"08", nil, 8, true}, - {"0b1", nat{1}, 2, true}, - {"0b11000101", nat{0xc5}, 2, true}, - {"03271", nat{03271}, 8, true}, - {"10ab", nat{10}, 10, true}, - {"1234567890", nat{1234567890}, 10, true}, - {"0xdeadbeef", nat{0xdeadbeef}, 16, true}, - {"0XDEADBEEF", nat{0xdeadbeef}, 16, true}, + {s: "345", base: 2}, + + // no errors + {"0", 0, nil, 10, true, 0}, + {"0", 10, nil, 10, true, 0}, + {"0", 36, nil, 36, true, 0}, + {"1", 0, nat{1}, 10, true, 0}, + {"1", 10, nat{1}, 10, true, 0}, + {"0 ", 0, nil, 10, true, ' '}, + {"08", 0, nil, 10, true, '8'}, + {"018", 0, nat{1}, 8, true, '8'}, + {"0b1", 0, nat{1}, 2, true, 0}, + {"0b11000101", 0, nat{0xc5}, 2, true, 0}, + {"03271", 0, nat{03271}, 8, true, 0}, + {"10ab", 0, nat{10}, 10, true, 'a'}, + {"1234567890", 0, nat{1234567890}, 10, true, 0}, + {"xyz", 36, nat{(33*36+34)*36 + 35}, 36, true, 0}, + {"xyz?", 36, nat{(33*36+34)*36 + 35}, 36, true, '?'}, + {"0x", 16, nil, 16, true, 'x'}, + {"0xdeadbeef", 0, nat{0xdeadbeef}, 16, true, 0}, + {"0XDEADBEEF", 0, nat{0xdeadbeef}, 16, true, 0}, + {"0xfedcba9876543213fedcba9876543212fedcba9876543211fedcba9876543210", + 0, + nat{0xfedcba9876543210, 0xfedcba9876543211, 0xfedcba9876543212, 0xfedcba9876543213}, + 16, + true, + 0}, } -func TestScanBase0(t *testing.T) { +func TestScanBase(t *testing.T) { for _, a := range natScanTests { - x, b, err := nat(nil).scan(strings.NewReader(a.s), 0) + r := strings.NewReader(a.s) + x, b, err := nat(nil).scan(r, a.base) if err == nil && !a.ok { t.Errorf("scan%+v\n\texpected error", a) } @@ -245,9 +276,92 @@ func TestScanBase0(t *testing.T) { if x.cmp(a.x) != 0 { t.Errorf("scan%+v\n\tgot z = %v; want %v", a, x, a.x) } - if b != a.base { + if b != a.b { t.Errorf("scan%+v\n\tgot b = %d; want %d", a, b, a.base) } + next, _, err := r.ReadRune() + if err == os.EOF { + next = 0 + err = nil + } + if err == nil && next != a.next { + t.Errorf("scan%+v\n\tgot next = %q; want %q", a, next, a.next) + } + } +} + + +var pi = "3" + + "14159265358979323846264338327950288419716939937510582097494459230781640628620899862803482534211706798214808651" + + "32823066470938446095505822317253594081284811174502841027019385211055596446229489549303819644288109756659334461" + + "28475648233786783165271201909145648566923460348610454326648213393607260249141273724587006606315588174881520920" + + "96282925409171536436789259036001133053054882046652138414695194151160943305727036575959195309218611738193261179" + + "31051185480744623799627495673518857527248912279381830119491298336733624406566430860213949463952247371907021798" + + "60943702770539217176293176752384674818467669405132000568127145263560827785771342757789609173637178721468440901" + + "22495343014654958537105079227968925892354201995611212902196086403441815981362977477130996051870721134999999837" + + "29780499510597317328160963185950244594553469083026425223082533446850352619311881710100031378387528865875332083" + + "81420617177669147303598253490428755468731159562863882353787593751957781857780532171226806613001927876611195909" + + "21642019893809525720106548586327886593615338182796823030195203530185296899577362259941389124972177528347913151" + + "55748572424541506959508295331168617278558890750983817546374649393192550604009277016711390098488240128583616035" + + "63707660104710181942955596198946767837449448255379774726847104047534646208046684259069491293313677028989152104" + + "75216205696602405803815019351125338243003558764024749647326391419927260426992279678235478163600934172164121992" + + "45863150302861829745557067498385054945885869269956909272107975093029553211653449872027559602364806654991198818" + + "34797753566369807426542527862551818417574672890977772793800081647060016145249192173217214772350141441973568548" + + "16136115735255213347574184946843852332390739414333454776241686251898356948556209921922218427255025425688767179" + + "04946016534668049886272327917860857843838279679766814541009538837863609506800642251252051173929848960841284886" + + "26945604241965285022210661186306744278622039194945047123713786960956364371917287467764657573962413890865832645" + + "99581339047802759009946576407895126946839835259570982582262052248940772671947826848260147699090264013639443745" + + "53050682034962524517493996514314298091906592509372216964615157098583874105978859597729754989301617539284681382" + + "68683868942774155991855925245953959431049972524680845987273644695848653836736222626099124608051243884390451244" + + "13654976278079771569143599770012961608944169486855584840635342207222582848864815845602850601684273945226746767" + + "88952521385225499546667278239864565961163548862305774564980355936345681743241125150760694794510965960940252288" + + "79710893145669136867228748940560101503308617928680920874760917824938589009714909675985261365549781893129784821" + + "68299894872265880485756401427047755513237964145152374623436454285844479526586782105114135473573952311342716610" + + "21359695362314429524849371871101457654035902799344037420073105785390621983874478084784896833214457138687519435" + + "06430218453191048481005370614680674919278191197939952061419663428754440643745123718192179998391015919561814675" + + "14269123974894090718649423196156794520809514655022523160388193014209376213785595663893778708303906979207734672" + + "21825625996615014215030680384477345492026054146659252014974428507325186660021324340881907104863317346496514539" + + "05796268561005508106658796998163574736384052571459102897064140110971206280439039759515677157700420337869936007" + + "23055876317635942187312514712053292819182618612586732157919841484882916447060957527069572209175671167229109816" + + "90915280173506712748583222871835209353965725121083579151369882091444210067510334671103141267111369908658516398" + + "31501970165151168517143765761835155650884909989859982387345528331635507647918535893226185489632132933089857064" + + "20467525907091548141654985946163718027098199430992448895757128289059232332609729971208443357326548938239119325" + + "97463667305836041428138830320382490375898524374417029132765618093773444030707469211201913020330380197621101100" + + "44929321516084244485963766983895228684783123552658213144957685726243344189303968642624341077322697802807318915" + + "44110104468232527162010526522721116603966655730925471105578537634668206531098965269186205647693125705863566201" + + "85581007293606598764861179104533488503461136576867532494416680396265797877185560845529654126654085306143444318" + + "58676975145661406800700237877659134401712749470420562230538994561314071127000407854733269939081454664645880797" + + "27082668306343285878569830523580893306575740679545716377525420211495576158140025012622859413021647155097925923" + + "09907965473761255176567513575178296664547791745011299614890304639947132962107340437518957359614589019389713111" + + "79042978285647503203198691514028708085990480109412147221317947647772622414254854540332157185306142288137585043" + + "06332175182979866223717215916077166925474873898665494945011465406284336639379003976926567214638530673609657120" + + "91807638327166416274888800786925602902284721040317211860820419000422966171196377921337575114959501566049631862" + + "94726547364252308177036751590673502350728354056704038674351362222477158915049530984448933309634087807693259939" + + "78054193414473774418426312986080998886874132604721569516239658645730216315981931951673538129741677294786724229" + + "24654366800980676928238280689964004824354037014163149658979409243237896907069779422362508221688957383798623001" + + "59377647165122893578601588161755782973523344604281512627203734314653197777416031990665541876397929334419521541" + + "34189948544473456738316249934191318148092777710386387734317720754565453220777092120190516609628049092636019759" + + "88281613323166636528619326686336062735676303544776280350450777235547105859548702790814356240145171806246436267" + + "94561275318134078330336254232783944975382437205835311477119926063813346776879695970309833913077109870408591337" + + +// Test case for BenchmarkScanPi. +func TestScanPi(t *testing.T) { + var x nat + z, _, err := x.scan(strings.NewReader(pi), 10) + if err != nil { + t.Errorf("scanning pi: %s", err) + } + if s := z.decimalString(); s != pi { + t.Errorf("scanning pi: got %s", s) + } +} + + +func BenchmarkScanPi(b *testing.B) { + for i := 0; i < b.N; i++ { + var x nat + x.scan(strings.NewReader(pi), 10) } }