mirror of
https://github.com/golang/go
synced 2024-09-23 19:10:13 -06:00
utf16: add DecodeRune, EncodeRune
R=r CC=golang-dev https://golang.org/cl/970041
This commit is contained in:
parent
aa2187efa3
commit
57d9de3ac8
@ -18,6 +18,33 @@ const (
|
||||
surrSelf = 0x10000
|
||||
)
|
||||
|
||||
// IsSurrogate returns true if the specified Unicode code point
|
||||
// can appear in a surrogate pair.
|
||||
func IsSurrogate(rune int) bool {
|
||||
return surr1 <= rune && rune < surr3
|
||||
}
|
||||
|
||||
// DecodeRune returns the UTF-16 decoding of a surrogate pair.
|
||||
// If the pair is not a valid UTF-16 surrogate pair, DecodeRune returns
|
||||
// the Unicode replacement code point U+FFFD.
|
||||
func DecodeRune(r1, r2 int) int {
|
||||
if surr1 <= r1 && r1 < surr2 && surr2 <= r2 && r2 < surr3 {
|
||||
return (int(r1)-surr1)<<10 | (int(r2) - surr2) + 0x10000
|
||||
}
|
||||
return unicode.ReplacementChar
|
||||
}
|
||||
|
||||
// EncodeRune returns the UTF-16 surrogate pair r1, r2 for the given rune.
|
||||
// If the rune is not a valid Unicode code point or does not need encoding,
|
||||
// EncodeRune returns U+FFFD, U+FFFD.
|
||||
func EncodeRune(rune int) (r1, r2 int) {
|
||||
if rune < surrSelf || rune > unicode.MaxRune || IsSurrogate(rune) {
|
||||
return unicode.ReplacementChar, unicode.ReplacementChar
|
||||
}
|
||||
rune -= surrSelf
|
||||
return surr1 + (rune>>10)&0x3ff, surr2 + rune&0x3ff
|
||||
}
|
||||
|
||||
// Encode returns the UTF-16 encoding of the Unicode code point sequence s.
|
||||
func Encode(s []int) []uint16 {
|
||||
n := len(s)
|
||||
@ -38,9 +65,9 @@ func Encode(s []int) []uint16 {
|
||||
a[n] = uint16(v)
|
||||
n++
|
||||
default:
|
||||
v -= surrSelf
|
||||
a[n] = uint16(surr1 + (v>>10)&0x3ff)
|
||||
a[n+1] = uint16(surr2 + v&0x3ff)
|
||||
r1, r2 := EncodeRune(v)
|
||||
a[n] = uint16(r1)
|
||||
a[n+1] = uint16(r2)
|
||||
n += 2
|
||||
}
|
||||
}
|
||||
@ -57,7 +84,7 @@ func Decode(s []uint16) []int {
|
||||
case surr1 <= r && r < surr2 && i+1 < len(s) &&
|
||||
surr2 <= s[i+1] && s[i+1] < surr3:
|
||||
// valid surrogate sequence
|
||||
a[n] = (int(r)-surr1)<<10 | (int(s[i+1]) - surr2) + 0x10000
|
||||
a[n] = DecodeRune(int(r), int(s[i+1]))
|
||||
i++
|
||||
n++
|
||||
case surr1 <= r && r < surr3:
|
||||
|
@ -8,6 +8,7 @@ import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
"testing"
|
||||
"unicode"
|
||||
)
|
||||
|
||||
type encodeTest struct {
|
||||
@ -32,6 +33,41 @@ func TestEncode(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestEncodeRune(t *testing.T) {
|
||||
for i, tt := range encodeTests {
|
||||
j := 0
|
||||
for _, r := range tt.in {
|
||||
r1, r2 := EncodeRune(r)
|
||||
if r < 0x10000 || r > unicode.MaxRune {
|
||||
if j >= len(tt.out) {
|
||||
t.Errorf("#%d: ran out of tt.out", i)
|
||||
break
|
||||
}
|
||||
if r1 != unicode.ReplacementChar || r2 != unicode.ReplacementChar {
|
||||
t.Errorf("EncodeRune(%#x) = %#x, %#x; want 0xfffd, 0xfffd", r, r1, r2)
|
||||
}
|
||||
j++
|
||||
} else {
|
||||
if j+1 >= len(tt.out) {
|
||||
t.Errorf("#%d: ran out of tt.out", i)
|
||||
break
|
||||
}
|
||||
if r1 != int(tt.out[j]) || r2 != int(tt.out[j+1]) {
|
||||
t.Errorf("EncodeRune(%#x) = %#x, %#x; want %#x, %#x", r, r1, r2, tt.out[j], tt.out[j+1])
|
||||
}
|
||||
j += 2
|
||||
dec := DecodeRune(r1, r2)
|
||||
if dec != r {
|
||||
t.Errorf("DecodeRune(%#x, %#x) = %#x; want %#x", r1, r2, dec, r)
|
||||
}
|
||||
}
|
||||
}
|
||||
if j != len(tt.out) {
|
||||
t.Errorf("#%d: EncodeRune didn't generate enough output", i)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type decodeTest struct {
|
||||
in []uint16
|
||||
out []int
|
||||
|
Loading…
Reference in New Issue
Block a user