From a9581e2e7abfa0dd7033ee75ef6a64ce0a89bee2 Mon Sep 17 00:00:00 2001 From: Alberto Donizetti Date: Tue, 23 Feb 2016 22:54:38 +0100 Subject: [PATCH] unicode/utf16: speed up and clean up Encode and EncodeRune MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit name old time/op new time/op delta EncodeValidASCII-4 74.1ns ± 1% 70.1ns ± 1% -5.46% (p=0.000 n=10+10) EncodeValidJapaneseChars-4 61.3ns ± 0% 58.9ns ± 0% -3.82% (p=0.000 n=10+10) EncodeRune-4 13.1ns ± 1% 9.8ns ± 0% -25.24% (p=0.000 n=10+9) Fixes #6957 Change-Id: I9dde6d77420c34c6e2ef3e6213bb6be9b58a3074 Reviewed-on: https://go-review.googlesource.com/19891 Reviewed-by: Rob Pike Run-TryBot: Rob Pike TryBot-Result: Gobot Gobot --- src/unicode/utf16/utf16.go | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/unicode/utf16/utf16.go b/src/unicode/utf16/utf16.go index 276fce9e563..0a5a02ebe26 100644 --- a/src/unicode/utf16/utf16.go +++ b/src/unicode/utf16/utf16.go @@ -45,7 +45,7 @@ func DecodeRune(r1, r2 rune) rune { // If the rune is not a valid Unicode code point or does not need encoding, // EncodeRune returns U+FFFD, U+FFFD. func EncodeRune(r rune) (r1, r2 rune) { - if r < surrSelf || r > maxRune || IsSurrogate(r) { + if r < surrSelf || r > maxRune { return replacementChar, replacementChar } r -= surrSelf @@ -65,20 +65,22 @@ func Encode(s []rune) []uint16 { n = 0 for _, v := range s { switch { - case v < 0, surr1 <= v && v < surr3, v > maxRune: - v = replacementChar - fallthrough - case v < surrSelf: + case 0 <= v && v < surr1, surr3 <= v && v < surrSelf: + // normal rune a[n] = uint16(v) n++ - default: + case surrSelf <= v && v <= maxRune: + // needs surrogate sequence r1, r2 := EncodeRune(v) a[n] = uint16(r1) a[n+1] = uint16(r2) n += 2 + default: + a[n] = uint16(replacementChar) + n++ } } - return a[0:n] + return a[:n] } // Decode returns the Unicode code point sequence represented