From 3d33437c450aa74014ea1d41cd986b6ee6266984 Mon Sep 17 00:00:00 2001 From: Cuong Manh Le Date: Fri, 13 Sep 2024 10:15:51 +0700 Subject: [PATCH] unicode/utf8: speedup RuneCount MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CL 612617 did speedup RuneCountInString, thus we can now use it to speedup RuneCount, too. name old time/op new time/op delta RuneCountTenASCIIChars-8 8.69ns ± 1% 3.59ns ± 2% -58.66% (p=0.000 n=9+9) RuneCountTenJapaneseChars-8 49.8ns ± 2% 40.9ns ± 0% -17.94% (p=0.000 n=10+8) Change-Id: I311750c00efc79af35fb0ca3b482a5d94e0a7977 Reviewed-on: https://go-review.googlesource.com/c/go/+/612955 Reviewed-by: Dmitri Shuralyov Auto-Submit: Cuong Manh Le Reviewed-by: Tim King LUCI-TryBot-Result: Go LUCI --- src/unicode/utf8/utf8.go | 32 ++++---------------------------- 1 file changed, 4 insertions(+), 28 deletions(-) diff --git a/src/unicode/utf8/utf8.go b/src/unicode/utf8/utf8.go index 9743b742580..180c008ed5b 100644 --- a/src/unicode/utf8/utf8.go +++ b/src/unicode/utf8/utf8.go @@ -414,35 +414,11 @@ func appendRuneNonASCII(p []byte, r rune) []byte { func RuneCount(p []byte) int { np := len(p) var n int - for i := 0; i < np; { - n++ - c := p[i] - if c < RuneSelf { - // ASCII fast path - i++ - continue + for ; n < np; n++ { + if c := p[n]; c >= RuneSelf { + // non-ASCII slow path + return n + RuneCountInString(string(p[n:])) } - x := first[c] - if x == xx { - i++ // invalid. - continue - } - size := int(x & 7) - if i+size > np { - i++ // Short or invalid. - continue - } - accept := acceptRanges[x>>4] - if c := p[i+1]; c < accept.lo || accept.hi < c { - size = 1 - } else if size == 2 { - } else if c := p[i+2]; c < locb || hicb < c { - size = 1 - } else if size == 3 { - } else if c := p[i+3]; c < locb || hicb < c { - size = 1 - } - i += size } return n }