From 82bf12902f2abecdbe2cb5fa38299ac70cf9c67e Mon Sep 17 00:00:00 2001 From: Ludi Rehak Date: Wed, 4 May 2022 15:22:21 -0700 Subject: [PATCH] regexp/syntax: test for lowercase letters first in IsWordChar MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lowercase letters occur more frequently than uppercase letters in English text. In IsWordChar, evaluate the most common case (lowercase letters) first to minimize the expected value of its execution time. Code clarity does not suffer by rearranging the order of the checks. Add a benchmark on a sentence demonstrating the performance improvement. name old time/op new time/op delta IsWordChar-10 122ns ± 0% 114ns ± 1% -6.68% (p=0.000 n=8+10) Change-Id: Ieee8126a4bd8ee8703905b4f75724623029f6fa2 Reviewed-on: https://go-review.googlesource.com/c/go/+/404100 Reviewed-by: Emmanuel Odeke Reviewed-by: Ian Lance Taylor Run-TryBot: Ian Lance Taylor Reviewed-by: Cherry Mui Auto-Submit: Ian Lance Taylor TryBot-Result: Gopher Robot Run-TryBot: thepudds --- src/regexp/syntax/prog.go | 4 +++- src/regexp/syntax/prog_test.go | 15 +++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/src/regexp/syntax/prog.go b/src/regexp/syntax/prog.go index 896cdc42c2a..66995e2052d 100644 --- a/src/regexp/syntax/prog.go +++ b/src/regexp/syntax/prog.go @@ -106,7 +106,9 @@ func EmptyOpContext(r1, r2 rune) EmptyOp { // during the evaluation of the \b and \B zero-width assertions. // These assertions are ASCII-only: the word characters are [A-Za-z0-9_]. func IsWordChar(r rune) bool { - return 'A' <= r && r <= 'Z' || 'a' <= r && r <= 'z' || '0' <= r && r <= '9' || r == '_' + // Test for lowercase letters first, as these occur more + // frequently than uppercase letters in common cases. + return 'a' <= r && r <= 'z' || 'A' <= r && r <= 'Z' || '0' <= r && r <= '9' || r == '_' } // An Inst is a single instruction in a regular expression program. diff --git a/src/regexp/syntax/prog_test.go b/src/regexp/syntax/prog_test.go index 5603aea2289..54dd1dd38d6 100644 --- a/src/regexp/syntax/prog_test.go +++ b/src/regexp/syntax/prog_test.go @@ -127,3 +127,18 @@ func BenchmarkEmptyOpContext(b *testing.B) { EmptyOpContext(r1, -1) } } + +var sink any + +func BenchmarkIsWordChar(b *testing.B) { + const chars = "Don't communicate by sharing memory, share memory by communicating." + for i := 0; i < b.N; i++ { + for _, r := range chars { + sink = IsWordChar(r) + } + } + if sink == nil { + b.Fatal("Benchmark did not run") + } + sink = nil +}