1
0
mirror of https://github.com/golang/go synced 2024-11-25 00:17:58 -07:00

unicode/utf16: delete dependence on package unicode

In the test, verify the copied constants are correct.
Also put the test into package utf16 rather than utf16_test;
the old location was probably due creating the test from
utf8, but the separation is not needed here.

R=golang-dev, bradfitz, rsc, rsc, r
CC=golang-dev
https://golang.org/cl/5752047
This commit is contained in:
Rob Pike 2012-03-06 14:58:08 +11:00
parent 866317af5e
commit 98c1baff6f
4 changed files with 45 additions and 7 deletions

View File

@ -0,0 +1,11 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package utf16
// Extra names for constants so we can validate them during testing.
const (
MaxRune = maxRune
ReplacementChar = replacementChar
)

View File

@ -5,7 +5,14 @@
// Package utf16 implements encoding and decoding of UTF-16 sequences. // Package utf16 implements encoding and decoding of UTF-16 sequences.
package utf16 package utf16
import "unicode" // The conditions replacementChar==unicode.ReplacementChar and
// maxRune==unicode.MaxRune are verified in the tests.
// Defining them locally avoids this package depending on package unicode.
const (
replacementChar = '\uFFFD' // Unicode replacement character
maxRune = '\U0010FFFF' // Maximum valid Unicode code point.
)
const ( const (
// 0xd800-0xdc00 encodes the high 10 bits of a pair. // 0xd800-0xdc00 encodes the high 10 bits of a pair.
@ -31,15 +38,15 @@ func DecodeRune(r1, r2 rune) rune {
if surr1 <= r1 && r1 < surr2 && surr2 <= r2 && r2 < surr3 { if surr1 <= r1 && r1 < surr2 && surr2 <= r2 && r2 < surr3 {
return (rune(r1)-surr1)<<10 | (rune(r2) - surr2) + 0x10000 return (rune(r1)-surr1)<<10 | (rune(r2) - surr2) + 0x10000
} }
return unicode.ReplacementChar return replacementChar
} }
// EncodeRune returns the UTF-16 surrogate pair r1, r2 for the given rune. // EncodeRune returns the UTF-16 surrogate pair r1, r2 for the given rune.
// If the rune is not a valid Unicode code point or does not need encoding, // If the rune is not a valid Unicode code point or does not need encoding,
// EncodeRune returns U+FFFD, U+FFFD. // EncodeRune returns U+FFFD, U+FFFD.
func EncodeRune(r rune) (r1, r2 rune) { func EncodeRune(r rune) (r1, r2 rune) {
if r < surrSelf || r > unicode.MaxRune || IsSurrogate(r) { if r < surrSelf || r > maxRune || IsSurrogate(r) {
return unicode.ReplacementChar, unicode.ReplacementChar return replacementChar, replacementChar
} }
r -= surrSelf r -= surrSelf
return surr1 + (r>>10)&0x3ff, surr2 + r&0x3ff return surr1 + (r>>10)&0x3ff, surr2 + r&0x3ff
@ -58,8 +65,8 @@ func Encode(s []rune) []uint16 {
n = 0 n = 0
for _, v := range s { for _, v := range s {
switch { switch {
case v < 0, surr1 <= v && v < surr3, v > unicode.MaxRune: case v < 0, surr1 <= v && v < surr3, v > maxRune:
v = unicode.ReplacementChar v = replacementChar
fallthrough fallthrough
case v < surrSelf: case v < surrSelf:
a[n] = uint16(v) a[n] = uint16(v)
@ -89,7 +96,7 @@ func Decode(s []uint16) []rune {
n++ n++
case surr1 <= r && r < surr3: case surr1 <= r && r < surr3:
// invalid surrogate sequence // invalid surrogate sequence
a[n] = unicode.ReplacementChar a[n] = replacementChar
n++ n++
default: default:
// normal rune // normal rune

View File

@ -11,6 +11,16 @@ import (
. "unicode/utf16" . "unicode/utf16"
) )
// Validate the constants redefined from unicode.
func TestConstants(t *testing.T) {
if MaxRune != unicode.MaxRune {
t.Errorf("utf16.maxRune is wrong: %x should be %x", MaxRune, unicode.MaxRune)
}
if ReplacementChar != unicode.ReplacementChar {
t.Errorf("utf16.replacementChar is wrong: %x should be %x", ReplacementChar, unicode.ReplacementChar)
}
}
type encodeTest struct { type encodeTest struct {
in []rune in []rune
out []uint16 out []uint16

View File

@ -21,6 +21,16 @@ func init() {
} }
} }
// Validate the constants redefined from unicode.
func TestConstants(t *testing.T) {
if MaxRune != unicode.MaxRune {
t.Errorf("utf8.MaxRune is wrong: %x should be %x", MaxRune, unicode.MaxRune)
}
if RuneError != unicode.ReplacementChar {
t.Errorf("utf8.RuneError is wrong: %x should be %x", RuneError, unicode.ReplacementChar)
}
}
type Utf8Map struct { type Utf8Map struct {
r rune r rune
str string str string