mirror of
https://github.com/golang/go
synced 2024-11-20 07:34:40 -07:00
exp/norm: use rune
Nothing terribly interesting here. (!) Since the public APIs are all in terms of UTF-8, the changes are all internal only. R=mpvl, gri, r CC=golang-dev https://golang.org/cl/5309042
This commit is contained in:
parent
b50a847c3c
commit
c945f77f41
@ -126,26 +126,26 @@ func (rb *reorderBuffer) insert(src input, i int, info runeInfo) bool {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// appendRune inserts a rune at the end of the buffer. It is used for Hangul.
|
// appendRune inserts a rune at the end of the buffer. It is used for Hangul.
|
||||||
func (rb *reorderBuffer) appendRune(rune uint32) {
|
func (rb *reorderBuffer) appendRune(r uint32) {
|
||||||
bn := rb.nbyte
|
bn := rb.nbyte
|
||||||
sz := utf8.EncodeRune(rb.byte[bn:], int(rune))
|
sz := utf8.EncodeRune(rb.byte[bn:], rune(r))
|
||||||
rb.nbyte += utf8.UTFMax
|
rb.nbyte += utf8.UTFMax
|
||||||
rb.rune[rb.nrune] = runeInfo{bn, uint8(sz), 0, 0}
|
rb.rune[rb.nrune] = runeInfo{bn, uint8(sz), 0, 0}
|
||||||
rb.nrune++
|
rb.nrune++
|
||||||
}
|
}
|
||||||
|
|
||||||
// assignRune sets a rune at position pos. It is used for Hangul and recomposition.
|
// assignRune sets a rune at position pos. It is used for Hangul and recomposition.
|
||||||
func (rb *reorderBuffer) assignRune(pos int, rune uint32) {
|
func (rb *reorderBuffer) assignRune(pos int, r uint32) {
|
||||||
bn := rb.rune[pos].pos
|
bn := rb.rune[pos].pos
|
||||||
sz := utf8.EncodeRune(rb.byte[bn:], int(rune))
|
sz := utf8.EncodeRune(rb.byte[bn:], rune(r))
|
||||||
rb.rune[pos] = runeInfo{bn, uint8(sz), 0, 0}
|
rb.rune[pos] = runeInfo{bn, uint8(sz), 0, 0}
|
||||||
}
|
}
|
||||||
|
|
||||||
// runeAt returns the rune at position n. It is used for Hangul and recomposition.
|
// runeAt returns the rune at position n. It is used for Hangul and recomposition.
|
||||||
func (rb *reorderBuffer) runeAt(n int) uint32 {
|
func (rb *reorderBuffer) runeAt(n int) uint32 {
|
||||||
inf := rb.rune[n]
|
inf := rb.rune[n]
|
||||||
rune, _ := utf8.DecodeRune(rb.byte[inf.pos : inf.pos+inf.size])
|
r, _ := utf8.DecodeRune(rb.byte[inf.pos : inf.pos+inf.size])
|
||||||
return uint32(rune)
|
return uint32(r)
|
||||||
}
|
}
|
||||||
|
|
||||||
// bytesAt returns the UTF-8 encoding of the rune at position n.
|
// bytesAt returns the UTF-8 encoding of the rune at position n.
|
||||||
@ -237,17 +237,17 @@ func isHangulWithoutJamoT(b []byte) bool {
|
|||||||
// decomposeHangul algorithmically decomposes a Hangul rune into
|
// decomposeHangul algorithmically decomposes a Hangul rune into
|
||||||
// its Jamo components.
|
// its Jamo components.
|
||||||
// See http://unicode.org/reports/tr15/#Hangul for details on decomposing Hangul.
|
// See http://unicode.org/reports/tr15/#Hangul for details on decomposing Hangul.
|
||||||
func (rb *reorderBuffer) decomposeHangul(rune uint32) bool {
|
func (rb *reorderBuffer) decomposeHangul(r uint32) bool {
|
||||||
b := rb.rune[:]
|
b := rb.rune[:]
|
||||||
n := rb.nrune
|
n := rb.nrune
|
||||||
if n+3 > len(b) {
|
if n+3 > len(b) {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
rune -= hangulBase
|
r -= hangulBase
|
||||||
x := rune % jamoTCount
|
x := r % jamoTCount
|
||||||
rune /= jamoTCount
|
r /= jamoTCount
|
||||||
rb.appendRune(jamoLBase + rune/jamoVCount)
|
rb.appendRune(jamoLBase + r/jamoVCount)
|
||||||
rb.appendRune(jamoVBase + rune%jamoVCount)
|
rb.appendRune(jamoVBase + r%jamoVCount)
|
||||||
if x != 0 {
|
if x != 0 {
|
||||||
rb.appendRune(jamoTBase + x)
|
rb.appendRune(jamoTBase + x)
|
||||||
}
|
}
|
||||||
|
@ -8,14 +8,14 @@ import "testing"
|
|||||||
|
|
||||||
// TestCase is used for most tests.
|
// TestCase is used for most tests.
|
||||||
type TestCase struct {
|
type TestCase struct {
|
||||||
in []int
|
in []rune
|
||||||
out []int
|
out []rune
|
||||||
}
|
}
|
||||||
|
|
||||||
type insertFunc func(rb *reorderBuffer, rune int) bool
|
type insertFunc func(rb *reorderBuffer, r rune) bool
|
||||||
|
|
||||||
func insert(rb *reorderBuffer, rune int) bool {
|
func insert(rb *reorderBuffer, r rune) bool {
|
||||||
src := inputString(string(rune))
|
src := inputString(string(r))
|
||||||
return rb.insert(src, 0, rb.f.info(src, 0))
|
return rb.insert(src, 0, rb.f.info(src, 0))
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -39,7 +39,7 @@ func runTests(t *testing.T, name string, fm Form, f insertFunc, tests []TestCase
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
for j, want := range test.out {
|
for j, want := range test.out {
|
||||||
found := int(rb.runeAt(j))
|
found := rune(rb.runeAt(j))
|
||||||
if found != want {
|
if found != want {
|
||||||
t.Errorf("%s:%d: runeAt(%d) = %U; want %U", name, i, j, found, want)
|
t.Errorf("%s:%d: runeAt(%d) = %U; want %U", name, i, j, found, want)
|
||||||
}
|
}
|
||||||
@ -57,7 +57,7 @@ func TestFlush(t *testing.T) {
|
|||||||
t.Errorf("wrote bytes on flush of empty buffer. (len(out) = %d)", len(out))
|
t.Errorf("wrote bytes on flush of empty buffer. (len(out) = %d)", len(out))
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, r := range []int("world!") {
|
for _, r := range []rune("world!") {
|
||||||
insert(&rb, r)
|
insert(&rb, r)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -76,14 +76,14 @@ func TestFlush(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
var insertTests = []TestCase{
|
var insertTests = []TestCase{
|
||||||
{[]int{'a'}, []int{'a'}},
|
{[]rune{'a'}, []rune{'a'}},
|
||||||
{[]int{0x300}, []int{0x300}},
|
{[]rune{0x300}, []rune{0x300}},
|
||||||
{[]int{0x300, 0x316}, []int{0x316, 0x300}}, // CCC(0x300)==230; CCC(0x316)==220
|
{[]rune{0x300, 0x316}, []rune{0x316, 0x300}}, // CCC(0x300)==230; CCC(0x316)==220
|
||||||
{[]int{0x316, 0x300}, []int{0x316, 0x300}},
|
{[]rune{0x316, 0x300}, []rune{0x316, 0x300}},
|
||||||
{[]int{0x41, 0x316, 0x300}, []int{0x41, 0x316, 0x300}},
|
{[]rune{0x41, 0x316, 0x300}, []rune{0x41, 0x316, 0x300}},
|
||||||
{[]int{0x41, 0x300, 0x316}, []int{0x41, 0x316, 0x300}},
|
{[]rune{0x41, 0x300, 0x316}, []rune{0x41, 0x316, 0x300}},
|
||||||
{[]int{0x300, 0x316, 0x41}, []int{0x316, 0x300, 0x41}},
|
{[]rune{0x300, 0x316, 0x41}, []rune{0x316, 0x300, 0x41}},
|
||||||
{[]int{0x41, 0x300, 0x40, 0x316}, []int{0x41, 0x300, 0x40, 0x316}},
|
{[]rune{0x41, 0x300, 0x40, 0x316}, []rune{0x41, 0x300, 0x40, 0x316}},
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestInsert(t *testing.T) {
|
func TestInsert(t *testing.T) {
|
||||||
@ -91,18 +91,18 @@ func TestInsert(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
var decompositionNFDTest = []TestCase{
|
var decompositionNFDTest = []TestCase{
|
||||||
{[]int{0xC0}, []int{0x41, 0x300}},
|
{[]rune{0xC0}, []rune{0x41, 0x300}},
|
||||||
{[]int{0xAC00}, []int{0x1100, 0x1161}},
|
{[]rune{0xAC00}, []rune{0x1100, 0x1161}},
|
||||||
{[]int{0x01C4}, []int{0x01C4}},
|
{[]rune{0x01C4}, []rune{0x01C4}},
|
||||||
{[]int{0x320E}, []int{0x320E}},
|
{[]rune{0x320E}, []rune{0x320E}},
|
||||||
{[]int("음ẻ과"), []int{0x110B, 0x1173, 0x11B7, 0x65, 0x309, 0x1100, 0x116A}},
|
{[]rune("음ẻ과"), []rune{0x110B, 0x1173, 0x11B7, 0x65, 0x309, 0x1100, 0x116A}},
|
||||||
}
|
}
|
||||||
|
|
||||||
var decompositionNFKDTest = []TestCase{
|
var decompositionNFKDTest = []TestCase{
|
||||||
{[]int{0xC0}, []int{0x41, 0x300}},
|
{[]rune{0xC0}, []rune{0x41, 0x300}},
|
||||||
{[]int{0xAC00}, []int{0x1100, 0x1161}},
|
{[]rune{0xAC00}, []rune{0x1100, 0x1161}},
|
||||||
{[]int{0x01C4}, []int{0x44, 0x5A, 0x030C}},
|
{[]rune{0x01C4}, []rune{0x44, 0x5A, 0x030C}},
|
||||||
{[]int{0x320E}, []int{0x28, 0x1100, 0x1161, 0x29}},
|
{[]rune{0x320E}, []rune{0x28, 0x1100, 0x1161, 0x29}},
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestDecomposition(t *testing.T) {
|
func TestDecomposition(t *testing.T) {
|
||||||
@ -111,15 +111,15 @@ func TestDecomposition(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
var compositionTest = []TestCase{
|
var compositionTest = []TestCase{
|
||||||
{[]int{0x41, 0x300}, []int{0xC0}},
|
{[]rune{0x41, 0x300}, []rune{0xC0}},
|
||||||
{[]int{0x41, 0x316}, []int{0x41, 0x316}},
|
{[]rune{0x41, 0x316}, []rune{0x41, 0x316}},
|
||||||
{[]int{0x41, 0x300, 0x35D}, []int{0xC0, 0x35D}},
|
{[]rune{0x41, 0x300, 0x35D}, []rune{0xC0, 0x35D}},
|
||||||
{[]int{0x41, 0x316, 0x300}, []int{0xC0, 0x316}},
|
{[]rune{0x41, 0x316, 0x300}, []rune{0xC0, 0x316}},
|
||||||
// blocking starter
|
// blocking starter
|
||||||
{[]int{0x41, 0x316, 0x40, 0x300}, []int{0x41, 0x316, 0x40, 0x300}},
|
{[]rune{0x41, 0x316, 0x40, 0x300}, []rune{0x41, 0x316, 0x40, 0x300}},
|
||||||
{[]int{0x1100, 0x1161}, []int{0xAC00}},
|
{[]rune{0x1100, 0x1161}, []rune{0xAC00}},
|
||||||
// parenthesized Hangul, alternate between ASCII and Hangul.
|
// parenthesized Hangul, alternate between ASCII and Hangul.
|
||||||
{[]int{0x28, 0x1100, 0x1161, 0x29}, []int{0x28, 0xAC00, 0x29}},
|
{[]rune{0x28, 0x1100, 0x1161, 0x29}, []rune{0x28, 0xAC00, 0x29}},
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestComposition(t *testing.T) {
|
func TestComposition(t *testing.T) {
|
||||||
|
@ -119,7 +119,7 @@ const (
|
|||||||
// This contains only the properties we're interested in.
|
// This contains only the properties we're interested in.
|
||||||
type Char struct {
|
type Char struct {
|
||||||
name string
|
name string
|
||||||
codePoint int // if zero, this index is not a valid code point.
|
codePoint rune // if zero, this index is not a valid code point.
|
||||||
ccc uint8 // canonical combining class
|
ccc uint8 // canonical combining class
|
||||||
excludeInComp bool // from CompositionExclusions.txt
|
excludeInComp bool // from CompositionExclusions.txt
|
||||||
compatDecomp bool // it has a compatibility expansion
|
compatDecomp bool // it has a compatibility expansion
|
||||||
@ -160,7 +160,7 @@ const (
|
|||||||
SMissing
|
SMissing
|
||||||
)
|
)
|
||||||
|
|
||||||
var lastChar int = 0
|
var lastChar = rune('\u0000')
|
||||||
|
|
||||||
func (c Char) isValid() bool {
|
func (c Char) isValid() bool {
|
||||||
return c.codePoint != 0 && c.state != SMissing
|
return c.codePoint != 0 && c.state != SMissing
|
||||||
@ -193,7 +193,7 @@ func (f FormInfo) String() string {
|
|||||||
return buf.String()
|
return buf.String()
|
||||||
}
|
}
|
||||||
|
|
||||||
type Decomposition []int
|
type Decomposition []rune
|
||||||
|
|
||||||
func (d Decomposition) String() string {
|
func (d Decomposition) String() string {
|
||||||
return fmt.Sprintf("%.4X", d)
|
return fmt.Sprintf("%.4X", d)
|
||||||
@ -220,7 +220,7 @@ func openReader(file string) (input io.ReadCloser) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
func parseDecomposition(s string, skipfirst bool) (a []int, e os.Error) {
|
func parseDecomposition(s string, skipfirst bool) (a []rune, e os.Error) {
|
||||||
decomp := strings.Split(s, " ")
|
decomp := strings.Split(s, " ")
|
||||||
if len(decomp) > 0 && skipfirst {
|
if len(decomp) > 0 && skipfirst {
|
||||||
decomp = decomp[1:]
|
decomp = decomp[1:]
|
||||||
@ -230,7 +230,7 @@ func parseDecomposition(s string, skipfirst bool) (a []int, e os.Error) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return a, err
|
return a, err
|
||||||
}
|
}
|
||||||
a = append(a, int(point))
|
a = append(a, rune(point))
|
||||||
}
|
}
|
||||||
return a, nil
|
return a, nil
|
||||||
}
|
}
|
||||||
@ -260,7 +260,7 @@ func parseCharacter(line string) {
|
|||||||
state = SLast
|
state = SLast
|
||||||
}
|
}
|
||||||
firstChar := lastChar + 1
|
firstChar := lastChar + 1
|
||||||
lastChar = int(point)
|
lastChar = rune(point)
|
||||||
if state != SLast {
|
if state != SLast {
|
||||||
firstChar = lastChar
|
firstChar = lastChar
|
||||||
}
|
}
|
||||||
@ -370,8 +370,8 @@ func loadCompositionExclusions() {
|
|||||||
// hasCompatDecomp returns true if any of the recursive
|
// hasCompatDecomp returns true if any of the recursive
|
||||||
// decompositions contains a compatibility expansion.
|
// decompositions contains a compatibility expansion.
|
||||||
// In this case, the character may not occur in NFK*.
|
// In this case, the character may not occur in NFK*.
|
||||||
func hasCompatDecomp(rune int) bool {
|
func hasCompatDecomp(r rune) bool {
|
||||||
c := &chars[rune]
|
c := &chars[r]
|
||||||
if c.compatDecomp {
|
if c.compatDecomp {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
@ -396,19 +396,19 @@ const (
|
|||||||
JamoTEnd = 0x11C3
|
JamoTEnd = 0x11C3
|
||||||
)
|
)
|
||||||
|
|
||||||
func isHangul(rune int) bool {
|
func isHangul(r rune) bool {
|
||||||
return HangulBase <= rune && rune < HangulEnd
|
return HangulBase <= r && r < HangulEnd
|
||||||
}
|
}
|
||||||
|
|
||||||
func ccc(rune int) uint8 {
|
func ccc(r rune) uint8 {
|
||||||
return chars[rune].ccc
|
return chars[r].ccc
|
||||||
}
|
}
|
||||||
|
|
||||||
// Insert a rune in a buffer, ordered by Canonical Combining Class.
|
// Insert a rune in a buffer, ordered by Canonical Combining Class.
|
||||||
func insertOrdered(b Decomposition, rune int) Decomposition {
|
func insertOrdered(b Decomposition, r rune) Decomposition {
|
||||||
n := len(b)
|
n := len(b)
|
||||||
b = append(b, 0)
|
b = append(b, 0)
|
||||||
cc := ccc(rune)
|
cc := ccc(r)
|
||||||
if cc > 0 {
|
if cc > 0 {
|
||||||
// Use bubble sort.
|
// Use bubble sort.
|
||||||
for ; n > 0; n-- {
|
for ; n > 0; n-- {
|
||||||
@ -418,18 +418,18 @@ func insertOrdered(b Decomposition, rune int) Decomposition {
|
|||||||
b[n] = b[n-1]
|
b[n] = b[n-1]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
b[n] = rune
|
b[n] = r
|
||||||
return b
|
return b
|
||||||
}
|
}
|
||||||
|
|
||||||
// Recursively decompose.
|
// Recursively decompose.
|
||||||
func decomposeRecursive(form int, rune int, d Decomposition) Decomposition {
|
func decomposeRecursive(form int, r rune, d Decomposition) Decomposition {
|
||||||
if isHangul(rune) {
|
if isHangul(r) {
|
||||||
return d
|
return d
|
||||||
}
|
}
|
||||||
dcomp := chars[rune].forms[form].decomp
|
dcomp := chars[r].forms[form].decomp
|
||||||
if len(dcomp) == 0 {
|
if len(dcomp) == 0 {
|
||||||
return insertOrdered(d, rune)
|
return insertOrdered(d, r)
|
||||||
}
|
}
|
||||||
for _, c := range dcomp {
|
for _, c := range dcomp {
|
||||||
d = decomposeRecursive(form, c, d)
|
d = decomposeRecursive(form, c, d)
|
||||||
@ -475,8 +475,8 @@ func completeCharFields(form int) {
|
|||||||
f.isOneWay = f.isOneWay || hasCompatDecomp(c.codePoint)
|
f.isOneWay = f.isOneWay || hasCompatDecomp(c.codePoint)
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, rune := range f.decomp {
|
for _, r := range f.decomp {
|
||||||
chars[rune].forms[form].inDecomp = true
|
chars[r].forms[form].inDecomp = true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -505,7 +505,7 @@ func completeCharFields(form int) {
|
|||||||
switch {
|
switch {
|
||||||
case len(f.decomp) > 0:
|
case len(f.decomp) > 0:
|
||||||
f.quickCheck[MDecomposed] = QCNo
|
f.quickCheck[MDecomposed] = QCNo
|
||||||
case isHangul(i):
|
case isHangul(rune(i)):
|
||||||
f.quickCheck[MDecomposed] = QCNo
|
f.quickCheck[MDecomposed] = QCNo
|
||||||
default:
|
default:
|
||||||
f.quickCheck[MDecomposed] = QCYes
|
f.quickCheck[MDecomposed] = QCYes
|
||||||
@ -588,7 +588,7 @@ func printCharInfoTables() int {
|
|||||||
for i, char := range chars {
|
for i, char := range chars {
|
||||||
v := makeCharInfo(char)
|
v := makeCharInfo(char)
|
||||||
if v != 0 {
|
if v != 0 {
|
||||||
t.insert(i, v)
|
t.insert(rune(i), v)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return t.printTables("charInfo")
|
return t.printTables("charInfo")
|
||||||
@ -606,7 +606,7 @@ func printDecompositionTables() int {
|
|||||||
for _, c := range chars {
|
for _, c := range chars {
|
||||||
for f := 0; f < 2; f++ {
|
for f := 0; f < 2; f++ {
|
||||||
d := c.forms[f].expandedDecomp
|
d := c.forms[f].expandedDecomp
|
||||||
s := string([]int(d))
|
s := string([]rune(d))
|
||||||
if _, ok := positionMap[s]; !ok {
|
if _, ok := positionMap[s]; !ok {
|
||||||
p := decompositions.Len()
|
p := decompositions.Len()
|
||||||
decompositions.WriteByte(uint8(len(s)))
|
decompositions.WriteByte(uint8(len(s)))
|
||||||
@ -624,7 +624,7 @@ func printDecompositionTables() int {
|
|||||||
for i, c := range chars {
|
for i, c := range chars {
|
||||||
d := c.forms[FCanonical].expandedDecomp
|
d := c.forms[FCanonical].expandedDecomp
|
||||||
if len(d) != 0 {
|
if len(d) != 0 {
|
||||||
nfcT.insert(i, positionMap[string([]int(d))])
|
nfcT.insert(rune(i), positionMap[string([]rune(d))])
|
||||||
if ccc(c.codePoint) != ccc(d[0]) {
|
if ccc(c.codePoint) != ccc(d[0]) {
|
||||||
// We assume the lead ccc of a decomposition is !=0 in this case.
|
// We assume the lead ccc of a decomposition is !=0 in this case.
|
||||||
if ccc(d[0]) == 0 {
|
if ccc(d[0]) == 0 {
|
||||||
@ -634,7 +634,7 @@ func printDecompositionTables() int {
|
|||||||
}
|
}
|
||||||
d = c.forms[FCompatibility].expandedDecomp
|
d = c.forms[FCompatibility].expandedDecomp
|
||||||
if len(d) != 0 {
|
if len(d) != 0 {
|
||||||
nfkcT.insert(i, positionMap[string([]int(d))])
|
nfkcT.insert(rune(i), positionMap[string([]rune(d))])
|
||||||
if ccc(c.codePoint) != ccc(d[0]) {
|
if ccc(c.codePoint) != ccc(d[0]) {
|
||||||
// We assume the lead ccc of a decomposition is !=0 in this case.
|
// We assume the lead ccc of a decomposition is !=0 in this case.
|
||||||
if ccc(d[0]) == 0 {
|
if ccc(d[0]) == 0 {
|
||||||
@ -752,7 +752,7 @@ func verifyComputed() {
|
|||||||
for i, c := range chars {
|
for i, c := range chars {
|
||||||
for _, f := range c.forms {
|
for _, f := range c.forms {
|
||||||
isNo := (f.quickCheck[MDecomposed] == QCNo)
|
isNo := (f.quickCheck[MDecomposed] == QCNo)
|
||||||
if (len(f.decomp) > 0) != isNo && !isHangul(i) {
|
if (len(f.decomp) > 0) != isNo && !isHangul(rune(i)) {
|
||||||
log.Fatalf("%U: NF*D must be no if rune decomposes", i)
|
log.Fatalf("%U: NF*D must be no if rune decomposes", i)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -16,7 +16,7 @@ func main() {
|
|||||||
|
|
||||||
// We take the smallest, largest and an arbitrary value for each
|
// We take the smallest, largest and an arbitrary value for each
|
||||||
// of the UTF-8 sequence lengths.
|
// of the UTF-8 sequence lengths.
|
||||||
var testRunes = []int{
|
var testRunes = []rune{
|
||||||
0x01, 0x0C, 0x7F, // 1-byte sequences
|
0x01, 0x0C, 0x7F, // 1-byte sequences
|
||||||
0x80, 0x100, 0x7FF, // 2-byte sequences
|
0x80, 0x100, 0x7FF, // 2-byte sequences
|
||||||
0x800, 0x999, 0xFFFF, // 3-byte sequences
|
0x800, 0x999, 0xFFFF, // 3-byte sequences
|
||||||
|
@ -28,13 +28,13 @@ func runPosTests(t *testing.T, name string, f Form, fn positionFunc, tests []Pos
|
|||||||
if pos != test.pos {
|
if pos != test.pos {
|
||||||
t.Errorf("%s:%d: position is %d; want %d", name, i, pos, test.pos)
|
t.Errorf("%s:%d: position is %d; want %d", name, i, pos, test.pos)
|
||||||
}
|
}
|
||||||
runes := []int(test.buffer)
|
runes := []rune(test.buffer)
|
||||||
if rb.nrune != len(runes) {
|
if rb.nrune != len(runes) {
|
||||||
t.Errorf("%s:%d: reorder buffer lenght is %d; want %d", name, i, rb.nrune, len(runes))
|
t.Errorf("%s:%d: reorder buffer lenght is %d; want %d", name, i, rb.nrune, len(runes))
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
for j, want := range runes {
|
for j, want := range runes {
|
||||||
found := int(rb.runeAt(j))
|
found := rune(rb.runeAt(j))
|
||||||
if found != want {
|
if found != want {
|
||||||
t.Errorf("%s:%d: rune at %d is %U; want %U", name, i, j, found, want)
|
t.Errorf("%s:%d: rune at %d is %U; want %U", name, i, j, found, want)
|
||||||
}
|
}
|
||||||
@ -385,8 +385,8 @@ func runAppendTests(t *testing.T, name string, f Form, fn appendFunc, tests []Ap
|
|||||||
}
|
}
|
||||||
if outs != test.out {
|
if outs != test.out {
|
||||||
// Find first rune that differs and show context.
|
// Find first rune that differs and show context.
|
||||||
ir := []int(outs)
|
ir := []rune(outs)
|
||||||
ig := []int(test.out)
|
ig := []rune(test.out)
|
||||||
for j := 0; j < len(ir) && j < len(ig); j++ {
|
for j := 0; j < len(ir) && j < len(ig); j++ {
|
||||||
if ir[j] == ig[j] {
|
if ir[j] == ig[j] {
|
||||||
continue
|
continue
|
||||||
|
@ -103,7 +103,7 @@ type Test struct {
|
|||||||
name string
|
name string
|
||||||
partnr int
|
partnr int
|
||||||
number int
|
number int
|
||||||
rune int // used for character by character test
|
r rune // used for character by character test
|
||||||
cols [cMaxColumns]string // Each has 5 entries, see below.
|
cols [cMaxColumns]string // Each has 5 entries, see below.
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -174,12 +174,12 @@ func loadTestData() {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Fatal(err)
|
logger.Fatal(err)
|
||||||
}
|
}
|
||||||
if test.rune == 0 {
|
if test.r == 0 {
|
||||||
// save for CharacterByCharacterTests
|
// save for CharacterByCharacterTests
|
||||||
test.rune = int(r)
|
test.r = int(r)
|
||||||
}
|
}
|
||||||
var buf [utf8.UTFMax]byte
|
var buf [utf8.UTFMax]byte
|
||||||
sz := utf8.EncodeRune(buf[:], int(r))
|
sz := utf8.EncodeRune(buf[:], rune(r))
|
||||||
test.cols[j-1] += string(buf[:sz])
|
test.cols[j-1] += string(buf[:sz])
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -198,7 +198,7 @@ func cmpResult(t *Test, name string, f norm.Form, gold, test, result string) {
|
|||||||
if errorCount > 20 {
|
if errorCount > 20 {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
st, sr, sg := []int(test), []int(result), []int(gold)
|
st, sr, sg := []rune(test), []rune(result), []rune(gold)
|
||||||
logger.Printf("%s:%s: %s(%X)=%X; want:%X: %s",
|
logger.Printf("%s:%s: %s(%X)=%X; want:%X: %s",
|
||||||
t.Name(), name, fstr[f], st, sr, sg, t.name)
|
t.Name(), name, fstr[f], st, sr, sg, t.name)
|
||||||
}
|
}
|
||||||
@ -210,7 +210,7 @@ func cmpIsNormal(t *Test, name string, f norm.Form, test string, result, want bo
|
|||||||
if errorCount > 20 {
|
if errorCount > 20 {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
logger.Printf("%s:%s: %s(%X)=%v; want: %v", t.Name(), name, fstr[f], []int(test), result, want)
|
logger.Printf("%s:%s: %s(%X)=%v; want: %v", t.Name(), name, fstr[f], []rune(test), result, want)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -243,13 +243,13 @@ func CharacterByCharacterTests() {
|
|||||||
tests := part[1].tests
|
tests := part[1].tests
|
||||||
last := 0
|
last := 0
|
||||||
for i := 0; i <= len(tests); i++ { // last one is special case
|
for i := 0; i <= len(tests); i++ { // last one is special case
|
||||||
var rune int
|
var r int
|
||||||
if i == len(tests) {
|
if i == len(tests) {
|
||||||
rune = 0x2FA1E // Don't have to go to 0x10FFFF
|
r = 0x2FA1E // Don't have to go to 0x10FFFF
|
||||||
} else {
|
} else {
|
||||||
rune = tests[i].rune
|
r = tests[i].r
|
||||||
}
|
}
|
||||||
for last++; last < rune; last++ {
|
for last++; last < r; last++ {
|
||||||
// Check all characters that were not explicitly listed in the test.
|
// Check all characters that were not explicitly listed in the test.
|
||||||
t := &Test{partnr: 1, number: -1}
|
t := &Test{partnr: 1, number: -1}
|
||||||
char := string(last)
|
char := string(last)
|
||||||
|
@ -73,15 +73,15 @@ var tests = []trietest{
|
|||||||
{1, []byte{t6, tx, tx, tx, tx, tx}},
|
{1, []byte{t6, tx, tx, tx, tx, tx}},
|
||||||
}
|
}
|
||||||
|
|
||||||
func mkUtf8(rune int) ([]byte, int) {
|
func mkUTF8(r rune) ([]byte, int) {
|
||||||
var b [utf8.UTFMax]byte
|
var b [utf8.UTFMax]byte
|
||||||
sz := utf8.EncodeRune(b[:], rune)
|
sz := utf8.EncodeRune(b[:], r)
|
||||||
return b[:sz], sz
|
return b[:sz], sz
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestLookup(t *testing.T) {
|
func TestLookup(t *testing.T) {
|
||||||
for i, tt := range testRunes {
|
for i, tt := range testRunes {
|
||||||
b, szg := mkUtf8(tt)
|
b, szg := mkUTF8(tt)
|
||||||
v, szt := testdata.lookup(b)
|
v, szt := testdata.lookup(b)
|
||||||
if int(v) != i {
|
if int(v) != i {
|
||||||
t.Errorf("lookup(%U): found value %#x, expected %#x", tt, v, i)
|
t.Errorf("lookup(%U): found value %#x, expected %#x", tt, v, i)
|
||||||
@ -103,7 +103,7 @@ func TestLookup(t *testing.T) {
|
|||||||
|
|
||||||
func TestLookupUnsafe(t *testing.T) {
|
func TestLookupUnsafe(t *testing.T) {
|
||||||
for i, tt := range testRunes {
|
for i, tt := range testRunes {
|
||||||
b, _ := mkUtf8(tt)
|
b, _ := mkUTF8(tt)
|
||||||
v := testdata.lookupUnsafe(b)
|
v := testdata.lookupUnsafe(b)
|
||||||
if int(v) != i {
|
if int(v) != i {
|
||||||
t.Errorf("lookupUnsafe(%U): found value %#x, expected %#x", i, v, i)
|
t.Errorf("lookupUnsafe(%U): found value %#x, expected %#x", i, v, i)
|
||||||
@ -113,7 +113,7 @@ func TestLookupUnsafe(t *testing.T) {
|
|||||||
|
|
||||||
func TestLookupString(t *testing.T) {
|
func TestLookupString(t *testing.T) {
|
||||||
for i, tt := range testRunes {
|
for i, tt := range testRunes {
|
||||||
b, szg := mkUtf8(tt)
|
b, szg := mkUTF8(tt)
|
||||||
v, szt := testdata.lookupString(string(b))
|
v, szt := testdata.lookupString(string(b))
|
||||||
if int(v) != i {
|
if int(v) != i {
|
||||||
t.Errorf("lookup(%U): found value %#x, expected %#x", i, v, i)
|
t.Errorf("lookup(%U): found value %#x, expected %#x", i, v, i)
|
||||||
@ -135,7 +135,7 @@ func TestLookupString(t *testing.T) {
|
|||||||
|
|
||||||
func TestLookupStringUnsafe(t *testing.T) {
|
func TestLookupStringUnsafe(t *testing.T) {
|
||||||
for i, tt := range testRunes {
|
for i, tt := range testRunes {
|
||||||
b, _ := mkUtf8(tt)
|
b, _ := mkUTF8(tt)
|
||||||
v := testdata.lookupStringUnsafe(string(b))
|
v := testdata.lookupStringUnsafe(string(b))
|
||||||
if int(v) != i {
|
if int(v) != i {
|
||||||
t.Errorf("lookupUnsafe(%U): found value %#x, expected %#x", i, v, i)
|
t.Errorf("lookupUnsafe(%U): found value %#x, expected %#x", i, v, i)
|
||||||
|
@ -4,7 +4,7 @@
|
|||||||
|
|
||||||
package norm
|
package norm
|
||||||
|
|
||||||
var testRunes = []int{1, 12, 127, 128, 256, 2047, 2048, 2457, 65535, 65536, 65793, 1114111, 512, 513, 514, 528, 533}
|
var testRunes = []rune{1, 12, 127, 128, 256, 2047, 2048, 2457, 65535, 65536, 65793, 1114111, 512, 513, 514, 528, 533}
|
||||||
|
|
||||||
// testdataValues: 192 entries, 384 bytes
|
// testdataValues: 192 entries, 384 bytes
|
||||||
// Block 2 is the null block.
|
// Block 2 is the null block.
|
||||||
|
@ -94,9 +94,9 @@ func (n trieNode) countSparseEntries() int {
|
|||||||
return count
|
return count
|
||||||
}
|
}
|
||||||
|
|
||||||
func (n *trieNode) insert(rune int, value uint16) {
|
func (n *trieNode) insert(r rune, value uint16) {
|
||||||
var p [utf8.UTFMax]byte
|
var p [utf8.UTFMax]byte
|
||||||
sz := utf8.EncodeRune(p[:], rune)
|
sz := utf8.EncodeRune(p[:], r)
|
||||||
|
|
||||||
for i := 0; i < sz; i++ {
|
for i := 0; i < sz; i++ {
|
||||||
if n.leaf {
|
if n.leaf {
|
||||||
|
Loading…
Reference in New Issue
Block a user