From 7786f9790561ceda98e9883256ed50d26fa136d6 Mon Sep 17 00:00:00 2001 From: Alberto Donizetti Date: Tue, 23 Feb 2016 18:45:38 +0100 Subject: [PATCH] unicode/utf16: speed up and clean up Decode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit name old time/op new time/op delta DecodeValidASCII-4 94.7ns ± 1% 87.4ns ± 1% -7.71% (p=0.000 n=10+9) DecodeValidJapaneseChars-4 91.0ns ± 2% 84.8ns ± 0% -6.77% (p=0.000 n=9+10) DecodeRune-4 16.5ns ± 0% 16.6ns ± 2% ~ (p=0.108 n=9+10) For #6957 Change-Id: I618c15c2a42ef7ec6a5cd163b7c3f1a65ca4ad01 Reviewed-on: https://go-review.googlesource.com/19826 Reviewed-by: Rob Pike --- src/unicode/utf16/utf16.go | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/src/unicode/utf16/utf16.go b/src/unicode/utf16/utf16.go index b497500778e..276fce9e563 100644 --- a/src/unicode/utf16/utf16.go +++ b/src/unicode/utf16/utf16.go @@ -36,7 +36,7 @@ func IsSurrogate(r rune) bool { // the Unicode replacement code point U+FFFD. func DecodeRune(r1, r2 rune) rune { if surr1 <= r1 && r1 < surr2 && surr2 <= r2 && r2 < surr3 { - return (r1-surr1)<<10 | (r2 - surr2) + 0x10000 + return (r1-surr1)<<10 | (r2 - surr2) + surrSelf } return replacementChar } @@ -88,21 +88,19 @@ func Decode(s []uint16) []rune { n := 0 for i := 0; i < len(s); i++ { switch r := s[i]; { + case r < surr1, surr3 <= r: + // normal rune + a[n] = rune(r) case surr1 <= r && r < surr2 && i+1 < len(s) && surr2 <= s[i+1] && s[i+1] < surr3: // valid surrogate sequence a[n] = DecodeRune(rune(r), rune(s[i+1])) i++ - n++ - case surr1 <= r && r < surr3: + default: // invalid surrogate sequence a[n] = replacementChar - n++ - default: - // normal rune - a[n] = rune(r) - n++ } + n++ } - return a[0:n] + return a[:n] }