diff --git a/api/next/51896.txt b/api/next/51896.txt new file mode 100644 index 00000000000..d4ef14cfa2b --- /dev/null +++ b/api/next/51896.txt @@ -0,0 +1 @@ +pkg unicode/utf16, func AppendRune([]uint16, int32) []uint16 #51896 \ No newline at end of file diff --git a/src/unicode/utf16/utf16.go b/src/unicode/utf16/utf16.go index 1a881aa7695..38d8be60602 100644 --- a/src/unicode/utf16/utf16.go +++ b/src/unicode/utf16/utf16.go @@ -83,6 +83,23 @@ func Encode(s []rune) []uint16 { return a[:n] } +// AppendRune appends the UTF-16 encoding of the Unicode code point r +// to the end of p and returns the extended buffer. If the rune is not +// a valid Unicode code point, it appends the encoding of U+FFFD. +func AppendRune(a []uint16, r rune) []uint16 { + // This function is inlineable for fast handling of ASCII. + switch { + case 0 <= r && r < surr1, surr3 <= r && r < surrSelf: + // normal rune + return append(a, uint16(r)) + case surrSelf <= r && r <= maxRune: + // needs surrogate sequence + r1, r2 := EncodeRune(r) + return append(a, uint16(r1), uint16(r2)) + } + return append(a, replacementChar) +} + // Decode returns the Unicode code point sequence represented // by the UTF-16 encoding s. func Decode(s []uint16) []rune { diff --git a/src/unicode/utf16/utf16_test.go b/src/unicode/utf16/utf16_test.go index 4ecaabef96c..be339b1fdf1 100644 --- a/src/unicode/utf16/utf16_test.go +++ b/src/unicode/utf16/utf16_test.go @@ -43,6 +43,18 @@ func TestEncode(t *testing.T) { } } +func TestAppendRune(t *testing.T) { + for _, tt := range encodeTests { + var out []uint16 + for _, u := range tt.in { + out = AppendRune(out, u) + } + if !reflect.DeepEqual(out, tt.out) { + t.Errorf("AppendRune(%x) = %x; want %x", tt.in, out, tt.out) + } + } +} + func TestEncodeRune(t *testing.T) { for i, tt := range encodeTests { j := 0 @@ -193,6 +205,28 @@ func BenchmarkEncodeValidJapaneseChars(b *testing.B) { } } +func BenchmarkAppendRuneValidASCII(b *testing.B) { + data := []rune{'h', 'e', 'l', 'l', 'o'} + a := make([]uint16, 0, len(data)*2) + for i := 0; i < b.N; i++ { + for _, u := range data { + a = AppendRune(a, u) + } + a = a[:0] + } +} + +func BenchmarkAppendRuneValidJapaneseChars(b *testing.B) { + data := []rune{'日', '本', '語'} + a := make([]uint16, 0, len(data)*2) + for i := 0; i < b.N; i++ { + for _, u := range data { + a = AppendRune(a, u) + } + a = a[:0] + } +} + func BenchmarkEncodeRune(b *testing.B) { for i := 0; i < b.N; i++ { for _, u := range []rune{'𝓐', '𝓑', '𝓒', '𝓓', '𝓔'} {