mirror of
https://github.com/golang/go
synced 2024-11-11 19:21:37 -07:00
unicode/utf16: add func RuneLen
This CL adds func RuneLen, while here, also uses RuneLen to simplify code in Encode. Fixes #44940 Change-Id: Ifd3b537f69880dfd32a69a6733d8d3c2b5d4ecba Reviewed-on: https://go-review.googlesource.com/c/go/+/569755 Reviewed-by: Ian Lance Taylor <iant@google.com> Reviewed-by: Michael Knyszek <mknyszek@google.com> Commit-Queue: Ian Lance Taylor <iant@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Auto-Submit: Ian Lance Taylor <iant@google.com>
This commit is contained in:
parent
e0ba596c15
commit
ef4f2a0597
1
api/next/44940.txt
Normal file
1
api/next/44940.txt
Normal file
@ -0,0 +1 @@
|
|||||||
|
pkg unicode/utf16, func RuneLen(int32) int #44940
|
3
doc/next/6-stdlib/99-minor/unicode/utf16/44940.md
Normal file
3
doc/next/6-stdlib/99-minor/unicode/utf16/44940.md
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
The [`unicode/utf16.RuneLen`](/pkg/unicode/utf16#RuneLen) function returns
|
||||||
|
the number of 16-bit words in the UTF-16 encoding of the rune. It returns -1
|
||||||
|
if the rune is not a valid value to encode in UTF-16.
|
@ -6,6 +6,9 @@ package utf16
|
|||||||
|
|
||||||
// Extra names for constants so we can validate them during testing.
|
// Extra names for constants so we can validate them during testing.
|
||||||
const (
|
const (
|
||||||
|
Surr1 = surr1
|
||||||
|
Surr3 = surr3
|
||||||
|
SurrSelf = surrSelf
|
||||||
MaxRune = maxRune
|
MaxRune = maxRune
|
||||||
ReplacementChar = replacementChar
|
ReplacementChar = replacementChar
|
||||||
)
|
)
|
||||||
|
@ -52,6 +52,19 @@ func EncodeRune(r rune) (r1, r2 rune) {
|
|||||||
return surr1 + (r>>10)&0x3ff, surr2 + r&0x3ff
|
return surr1 + (r>>10)&0x3ff, surr2 + r&0x3ff
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// RuneLen returns the number of 16-bit words in the UTF-16 encoding of the rune.
|
||||||
|
// It returns -1 if the rune is not a valid value to encode in UTF-16.
|
||||||
|
func RuneLen(r rune) int {
|
||||||
|
switch {
|
||||||
|
case 0 <= r && r < surr1, surr3 <= r && r < surrSelf:
|
||||||
|
return 1
|
||||||
|
case surrSelf <= r && r <= maxRune:
|
||||||
|
return 2
|
||||||
|
default:
|
||||||
|
return -1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Encode returns the UTF-16 encoding of the Unicode code point sequence s.
|
// Encode returns the UTF-16 encoding of the Unicode code point sequence s.
|
||||||
func Encode(s []rune) []uint16 {
|
func Encode(s []rune) []uint16 {
|
||||||
n := len(s)
|
n := len(s)
|
||||||
@ -64,13 +77,11 @@ func Encode(s []rune) []uint16 {
|
|||||||
a := make([]uint16, n)
|
a := make([]uint16, n)
|
||||||
n = 0
|
n = 0
|
||||||
for _, v := range s {
|
for _, v := range s {
|
||||||
switch {
|
switch RuneLen(v) {
|
||||||
case 0 <= v && v < surr1, surr3 <= v && v < surrSelf:
|
case 1: // normal rune
|
||||||
// normal rune
|
|
||||||
a[n] = uint16(v)
|
a[n] = uint16(v)
|
||||||
n++
|
n++
|
||||||
case surrSelf <= v && v <= maxRune:
|
case 2: // needs surrogate sequence
|
||||||
// needs surrogate sequence
|
|
||||||
r1, r2 := EncodeRune(v)
|
r1, r2 := EncodeRune(v)
|
||||||
a[n] = uint16(r1)
|
a[n] = uint16(r1)
|
||||||
a[n+1] = uint16(r2)
|
a[n+1] = uint16(r2)
|
||||||
|
@ -22,6 +22,26 @@ func TestConstants(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestRuneLen(t *testing.T) {
|
||||||
|
for _, tt := range []struct {
|
||||||
|
r rune
|
||||||
|
length int
|
||||||
|
}{
|
||||||
|
{0, 1},
|
||||||
|
{Surr1 - 1, 1},
|
||||||
|
{Surr3, 1},
|
||||||
|
{SurrSelf - 1, 1},
|
||||||
|
{SurrSelf, 2},
|
||||||
|
{MaxRune, 2},
|
||||||
|
{MaxRune + 1, -1},
|
||||||
|
{-1, -1},
|
||||||
|
} {
|
||||||
|
if length := RuneLen(tt.r); length != tt.length {
|
||||||
|
t.Errorf("RuneLen(%#U) = %d, want %d", tt.r, length, tt.length)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
type encodeTest struct {
|
type encodeTest struct {
|
||||||
in []rune
|
in []rune
|
||||||
out []uint16
|
out []uint16
|
||||||
|
Loading…
Reference in New Issue
Block a user