mirror of
https://github.com/golang/go
synced 2024-11-26 05:07:59 -07:00
go/scanner: optimize scanIdentifier
While profiling parsing, I noticed that scanIdentifier was extremely hot, and could be optimized: it is responsible for a significant fraction of scanning and had a lot of unnecessary branching, bounds checks, and function calls. This CL implements some of those optimizations, while trying to strike a balance between optimization and readability. It achieves this by optimizing for the common case of ASCII identifiers, falling back on the slower scan when encountering the first non-ASCII character. Benchmark results: name old time/op new time/op delta Scan-12 16.9µs ± 4% 15.8µs ± 5% -6.92% (p=0.000 n=20+18) ScanFiles/go/types/expr.go-12 793µs ± 4% 672µs ± 6% -15.23% (p=0.000 n=20+20) ScanFiles/go/parser/parser.go-12 1.08ms ± 6% 0.90ms ± 4% -16.68% (p=0.000 n=20+20) ScanFiles/net/http/server.go-12 1.44ms ± 4% 1.23ms ± 5% -14.58% (p=0.000 n=18+20) ScanFiles/go/scanner/errors.go-12 40.7µs ± 2% 32.6µs ± 3% -20.01% (p=0.000 n=19+20) Change-Id: If78380004248e3ea75cfc78eb7f38f528124dced Reviewed-on: https://go-review.googlesource.com/c/go/+/308611 Trust: Robert Findley <rfindley@google.com> Trust: Robert Griesemer <gri@golang.org> Run-TryBot: Robert Findley <rfindley@google.com> TryBot-Result: Go Bot <gobot@golang.org> Reviewed-by: Robert Griesemer <gri@golang.org>
This commit is contained in:
parent
074a49bfe8
commit
cde6a675bc
@ -48,11 +48,16 @@ type Scanner struct {
|
||||
ErrorCount int // number of errors encountered
|
||||
}
|
||||
|
||||
const bom = 0xFEFF // byte order mark, only permitted as very first character
|
||||
const (
|
||||
bom = 0xFEFF // byte order mark, only permitted as very first character
|
||||
eof = -1 // end of file
|
||||
)
|
||||
|
||||
// Read the next Unicode char into s.ch.
|
||||
// s.ch < 0 means end-of-file.
|
||||
//
|
||||
// For optimization, there is some overlap between this method and
|
||||
// s.scanIdentifier.
|
||||
func (s *Scanner) next() {
|
||||
if s.rdOffset < len(s.src) {
|
||||
s.offset = s.rdOffset
|
||||
@ -81,7 +86,7 @@ func (s *Scanner) next() {
|
||||
s.lineOffset = s.offset
|
||||
s.file.AddLine(s.offset)
|
||||
}
|
||||
s.ch = -1 // eof
|
||||
s.ch = eof
|
||||
}
|
||||
}
|
||||
|
||||
@ -347,11 +352,53 @@ func isDigit(ch rune) bool {
|
||||
return isDecimal(ch) || ch >= utf8.RuneSelf && unicode.IsDigit(ch)
|
||||
}
|
||||
|
||||
// scanIdentifier reads the string of valid identifier characters at s.offset.
|
||||
// It must only be called when s.ch is known to be a valid letter.
|
||||
//
|
||||
// Be careful when making changes to this function: it is optimized and affects
|
||||
// scanning performance significantly.
|
||||
func (s *Scanner) scanIdentifier() string {
|
||||
offs := s.offset
|
||||
for isLetter(s.ch) || isDigit(s.ch) {
|
||||
|
||||
// Optimize for the common case of an ASCII identifier.
|
||||
//
|
||||
// Ranging over s.src[s.rdOffset:] lets us avoid some bounds checks, and
|
||||
// avoids conversions to runes.
|
||||
//
|
||||
// In case we encounter a non-ASCII character, fall back on the slower path
|
||||
// of calling into s.next().
|
||||
for rdOffset, b := range s.src[s.rdOffset:] {
|
||||
if 'a' <= b && b <= 'z' || 'A' <= b && b <= 'Z' || b == '_' || '0' <= b && b <= '9' {
|
||||
// Avoid assigning a rune for the common case of an ascii character.
|
||||
continue
|
||||
}
|
||||
s.rdOffset += rdOffset
|
||||
if b < utf8.RuneSelf {
|
||||
// Optimization: we've encountered an ASCII character that's not a letter
|
||||
// or number. Avoid the call into s.next() and corresponding set up.
|
||||
//
|
||||
// Note that s.next() does some line accounting if s.ch is '\n', so this
|
||||
// shortcut is only possible because we know that the preceding character
|
||||
// is not '\n'.
|
||||
s.ch = rune(b)
|
||||
s.offset = s.rdOffset
|
||||
s.rdOffset++
|
||||
goto exit
|
||||
}
|
||||
// We know that the preceding character is valid for an identifier because
|
||||
// scanIdentifier is only called when s.ch is a letter, so calling s.next()
|
||||
// at s.rdOffset resets the scanner state.
|
||||
s.next()
|
||||
for isLetter(s.ch) || isDigit(s.ch) {
|
||||
s.next()
|
||||
}
|
||||
goto exit
|
||||
}
|
||||
s.offset = len(s.src)
|
||||
s.rdOffset = len(s.src)
|
||||
s.ch = eof
|
||||
|
||||
exit:
|
||||
return string(s.src[offs:s.offset])
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user