mirror of
https://github.com/golang/go
synced 2024-11-20 05:44:44 -07:00
exp/locale/collate: added representation for collation elements
(see http://www.unicode.org/reports/tr10/). R=r, r CC=golang-dev https://golang.org/cl/5981048
This commit is contained in:
parent
e456d015fb
commit
bb3f3c9775
179
src/pkg/exp/locale/collate/build/colelem.go
Normal file
179
src/pkg/exp/locale/collate/build/colelem.go
Normal file
@ -0,0 +1,179 @@
|
||||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package build
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"unicode"
|
||||
)
|
||||
|
||||
const (
|
||||
defaultSecondary = 0x20
|
||||
defaultTertiary = 0x2
|
||||
maxTertiary = 0x1F
|
||||
)
|
||||
|
||||
// A collation element is represented as an uint32.
|
||||
// In the typical case, a rune maps to a single collation element. If a rune
|
||||
// can be the start of a contraction or expands into multiple collation elements,
|
||||
// then the collation element that is associated with a rune will have a special
|
||||
// form to represent such m to n mappings. Such special collation elements
|
||||
// have a value >= 0x80000000.
|
||||
|
||||
// For normal collation elements, we assume that a collation element either has
|
||||
// a primary or non-default secondary value, not both.
|
||||
// Collation elements with a primary value are of the form
|
||||
// 010ppppp pppppppp pppppppp tttttttt, where
|
||||
// - p* is primary collation value
|
||||
// - t* is the tertiary collation value
|
||||
// Collation elements with a secondary value are of the form
|
||||
// 00000000 ssssssss ssssssss tttttttt, where
|
||||
// - s* is the secondary collation value
|
||||
// - t* is the tertiary collation value
|
||||
const (
|
||||
maxPrimaryBits = 21
|
||||
maxSecondaryBits = 16
|
||||
maxTertiaryBits = 8
|
||||
|
||||
isPrimary = 0x40000000
|
||||
)
|
||||
|
||||
func makeCE(weights []int) (uint32, error) {
|
||||
if w := weights[0]; w >= 1<<maxPrimaryBits || w < 0 {
|
||||
return 0, fmt.Errorf("makeCE: primary weight out of bounds: %x >= %x", w, 1<<maxPrimaryBits)
|
||||
}
|
||||
if w := weights[1]; w >= 1<<maxSecondaryBits || w < 0 {
|
||||
return 0, fmt.Errorf("makeCE: secondary weight out of bounds: %x >= %x", w, 1<<maxSecondaryBits)
|
||||
}
|
||||
if w := weights[2]; w >= 1<<maxTertiaryBits || w < 0 {
|
||||
return 0, fmt.Errorf("makeCE: tertiary weight out of bounds: %d >= %d", w, 1<<maxTertiaryBits)
|
||||
}
|
||||
ce := uint32(0)
|
||||
if weights[0] != 0 {
|
||||
// primary weight form
|
||||
if weights[1] != defaultSecondary {
|
||||
return 0, fmt.Errorf("makeCE: non-default secondary weight for non-zero primary: %X", weights)
|
||||
}
|
||||
ce = uint32(weights[0]<<maxTertiaryBits + weights[2])
|
||||
ce |= isPrimary
|
||||
} else {
|
||||
// secondary weight form
|
||||
ce = uint32(weights[1]<<maxTertiaryBits + weights[2])
|
||||
}
|
||||
return ce, nil
|
||||
}
|
||||
|
||||
// For contractions, collation elements are of the form
|
||||
// 10bbbbbb bbbbbbbb iiiiiiii iiinnnnn, where
|
||||
// - n* is the size of the first node in the contraction trie.
|
||||
// - i* is the index of the first node in the contraction trie.
|
||||
// - b* is the offset into the contraction collation element table.
|
||||
// See contract.go for details on the contraction trie.
|
||||
const (
|
||||
contractID = 0x80000000
|
||||
maxNBits = 5
|
||||
maxTrieIndexBits = 11
|
||||
maxContractOffsetBits = 14
|
||||
)
|
||||
|
||||
func makeContractIndex(h ctHandle, offset int) (uint32, error) {
|
||||
if h.n >= 1<<maxNBits {
|
||||
return 0, fmt.Errorf("size of contraction trie node too large: %d >= %d", h.n, 1<<maxNBits)
|
||||
}
|
||||
if h.index >= 1<<maxTrieIndexBits {
|
||||
return 0, fmt.Errorf("size of contraction trie offset too large: %d >= %d", h.index, 1<<maxTrieIndexBits)
|
||||
}
|
||||
if offset >= 1<<maxContractOffsetBits {
|
||||
return 0, fmt.Errorf("offset out of bounds: %x >= %x", offset, 1<<maxContractOffsetBits)
|
||||
}
|
||||
ce := uint32(contractID)
|
||||
ce += uint32(offset << (maxTrieIndexBits + maxNBits))
|
||||
ce += uint32(h.index << maxNBits)
|
||||
ce += uint32(h.n)
|
||||
return ce, nil
|
||||
}
|
||||
|
||||
// For expansions, collation elements are of the form
|
||||
// 110bbbbb bbbbbbbb bbbbbbbb bbbbbbbb,
|
||||
// where b* is the index into the expansion sequence table.
|
||||
const (
|
||||
expandID = 0xC0000000
|
||||
maxExpandIndexBits = 29
|
||||
)
|
||||
|
||||
func makeExpandIndex(index int) (uint32, error) {
|
||||
if index >= 1<<maxExpandIndexBits {
|
||||
return 0, fmt.Errorf("index out of bounds: %x >= %x", index, 1<<maxExpandIndexBits)
|
||||
}
|
||||
return expandID + uint32(index), nil
|
||||
}
|
||||
|
||||
// Each list of collation elements corresponding to an expansion starts with
|
||||
// a header indicating the length of the sequence.
|
||||
func makeExpansionHeader(n int) (uint32, error) {
|
||||
return uint32(n), nil
|
||||
}
|
||||
|
||||
// Some runes can be expanded using NFKD decomposition. Instead of storing the full
|
||||
// sequence of collation elements, we decompose the rune and lookup the collation
|
||||
// elements for each rune in the decomposition and modify the tertiary weights.
|
||||
// The collation element, in this case, is of the form
|
||||
// 11100000 00000000 wwwwwwww vvvvvvvv, where
|
||||
// - v* is the replacement tertiary weight for the first rune,
|
||||
// - w* is the replacement tertiary weight for the second rune,
|
||||
// Tertiary weights of subsequent runes should be replaced with maxTertiary.
|
||||
// See http://www.unicode.org/reports/tr10/#Compatibility_Decompositions for more details.
|
||||
const (
|
||||
decompID = 0xE0000000
|
||||
)
|
||||
|
||||
func makeDecompose(t1, t2 int) (uint32, error) {
|
||||
if t1 >= 256 || t1 < 0 {
|
||||
return 0, fmt.Errorf("first tertiary weight out of bounds: %d >= 256", t1)
|
||||
}
|
||||
if t2 >= 256 || t2 < 0 {
|
||||
return 0, fmt.Errorf("second tertiary weight out of bounds: %d >= 256", t2)
|
||||
}
|
||||
return uint32(t2<<8+t1) + decompID, nil
|
||||
}
|
||||
|
||||
const (
|
||||
// These constants were taken from http://www.unicode.org/versions/Unicode6.0.0/ch12.pdf.
|
||||
minUnified rune = 0x4E00
|
||||
maxUnified = 0x9FFF
|
||||
minCompatibility = 0xF900
|
||||
maxCompatibility = 0xFAFF
|
||||
minRare = 0x3400
|
||||
maxRare = 0x4DBF
|
||||
)
|
||||
const (
|
||||
commonUnifiedOffset = 0xFB40
|
||||
rareUnifiedOffset = 0x1FB40
|
||||
otherOffset = 0x4FB40
|
||||
illegalOffset = otherOffset + unicode.MaxRune
|
||||
maxPrimary = illegalOffset + 2 // there are 2 illegal values.
|
||||
)
|
||||
|
||||
// implicitPrimary returns the primary weight for the a rune
|
||||
// for which there is no entry for the rune in the collation table.
|
||||
// We take a different approach from the one specified in
|
||||
// http://unicode.org/reports/tr10/#Implicit_Weights,
|
||||
// but preserve the resulting relative ordering of the runes.
|
||||
func implicitPrimary(r rune) int {
|
||||
|
||||
if r >= minUnified && r <= maxUnified {
|
||||
// The most common case for CJK.
|
||||
return int(r) + commonUnifiedOffset
|
||||
}
|
||||
if r >= minCompatibility && r <= maxCompatibility {
|
||||
// This will never hit as long as we don't remove the characters
|
||||
// that would match from the table.
|
||||
return int(r) + commonUnifiedOffset
|
||||
}
|
||||
if unicode.Is(unicode.Unified_Ideograph, r) {
|
||||
return int(r) + rareUnifiedOffset
|
||||
}
|
||||
return int(r) + otherOffset
|
||||
}
|
80
src/pkg/exp/locale/collate/build/colelem_test.go
Normal file
80
src/pkg/exp/locale/collate/build/colelem_test.go
Normal file
@ -0,0 +1,80 @@
|
||||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package build
|
||||
|
||||
import "testing"
|
||||
|
||||
type ceTest struct {
|
||||
f func(in []int) (uint32, error)
|
||||
arg []int
|
||||
val uint32
|
||||
}
|
||||
|
||||
func normalCE(in []int) (ce uint32, err error) {
|
||||
return makeCE(in)
|
||||
}
|
||||
|
||||
func expandCE(in []int) (ce uint32, err error) {
|
||||
return makeExpandIndex(in[0])
|
||||
}
|
||||
|
||||
func contractCE(in []int) (ce uint32, err error) {
|
||||
return makeContractIndex(ctHandle{in[0], in[1]}, in[2])
|
||||
}
|
||||
|
||||
func decompCE(in []int) (ce uint32, err error) {
|
||||
return makeDecompose(in[0], in[1])
|
||||
}
|
||||
|
||||
var ceTests = []ceTest{
|
||||
{normalCE, []int{0, 0, 0}, 000},
|
||||
{normalCE, []int{0, 30, 3}, 0x1E03},
|
||||
{normalCE, []int{100, defaultSecondary, 3}, 0x40006403},
|
||||
{normalCE, []int{100, 0, 3}, 0xFFFF}, // non-ignorable primary with non-default secondary
|
||||
{normalCE, []int{100, 1, 3}, 0xFFFF},
|
||||
{normalCE, []int{1 << maxPrimaryBits, defaultSecondary, 0}, 0xFFFF},
|
||||
{normalCE, []int{0, 1 << maxSecondaryBits, 0}, 0xFFFF},
|
||||
{normalCE, []int{100, defaultSecondary, 1 << maxTertiaryBits}, 0xFFFF},
|
||||
|
||||
{contractCE, []int{0, 0, 0}, 0x80000000},
|
||||
{contractCE, []int{1, 1, 1}, 0x80010021},
|
||||
{contractCE, []int{1, (1 << maxNBits) - 1, 1}, 0x8001003F},
|
||||
{contractCE, []int{(1 << maxTrieIndexBits) - 1, 1, 1}, 0x8001FFE1},
|
||||
{contractCE, []int{1, 1, (1 << maxContractOffsetBits) - 1}, 0xBFFF0021},
|
||||
{contractCE, []int{1, (1 << maxNBits), 1}, 0xFFFF},
|
||||
{contractCE, []int{(1 << maxTrieIndexBits), 1, 1}, 0xFFFF},
|
||||
{contractCE, []int{1, (1 << maxContractOffsetBits), 1}, 0xFFFF},
|
||||
|
||||
{expandCE, []int{0}, 0xC0000000},
|
||||
{expandCE, []int{5}, 0xC0000005},
|
||||
{expandCE, []int{(1 << maxExpandIndexBits) - 1}, 0xDFFFFFFF},
|
||||
{expandCE, []int{1 << maxExpandIndexBits}, 0xFFFF},
|
||||
|
||||
{decompCE, []int{0, 0}, 0xE0000000},
|
||||
{decompCE, []int{1, 1}, 0xE0000101},
|
||||
{decompCE, []int{0x1F, 0x1F}, 0xE0001F1F},
|
||||
{decompCE, []int{256, 0x1F}, 0xFFFF},
|
||||
{decompCE, []int{0x1F, 256}, 0xFFFF},
|
||||
}
|
||||
|
||||
func TestColElem(t *testing.T) {
|
||||
for i, tt := range ceTests {
|
||||
in := make([]int, len(tt.arg))
|
||||
copy(in, tt.arg)
|
||||
ce, err := tt.f(in)
|
||||
if tt.val == 0xFFFF {
|
||||
if err == nil {
|
||||
t.Errorf("%d: expected error for args %x", i, tt.arg)
|
||||
}
|
||||
continue
|
||||
}
|
||||
if err != nil {
|
||||
t.Errorf("%d: unexpected error: %v", i, err.Error())
|
||||
}
|
||||
if ce != tt.val {
|
||||
t.Errorf("%d: colElem=%X; want %X", i, ce, tt.val)
|
||||
}
|
||||
}
|
||||
}
|
170
src/pkg/exp/locale/collate/colelem.go
Normal file
170
src/pkg/exp/locale/collate/colelem.go
Normal file
@ -0,0 +1,170 @@
|
||||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package collate
|
||||
|
||||
import (
|
||||
"unicode"
|
||||
)
|
||||
|
||||
// weights holds the decoded weights per collation level.
|
||||
type weights struct {
|
||||
primary uint32
|
||||
secondary uint16
|
||||
tertiary uint8
|
||||
// TODO: compute quaternary on the fly or compress this value into 8 bits
|
||||
// such that weights fit within 64bit.
|
||||
quaternary uint32
|
||||
}
|
||||
|
||||
const (
|
||||
defaultSecondary = 0x20
|
||||
defaultTertiary = 0x2
|
||||
maxTertiary = 0x1F
|
||||
)
|
||||
|
||||
// colElem is a representation of a collation element.
|
||||
// In the typical case, a rune maps to a single collation element. If a rune
|
||||
// can be the start of a contraction or expands into multiple collation elements,
|
||||
// then the colElem that is associated with a rune will have a special form to represent
|
||||
// such m to n mappings. Such special colElems have a value >= 0x80000000.
|
||||
type colElem uint32
|
||||
|
||||
const (
|
||||
maxCE colElem = 0x7FFFFFFF
|
||||
minContract = 0x80000000
|
||||
maxContract = 0xBFFFFFFF
|
||||
minExpand = 0xC0000000
|
||||
maxExpand = 0xDFFFFFFF
|
||||
minDecomp = 0xE0000000
|
||||
)
|
||||
|
||||
type ceType int
|
||||
|
||||
const (
|
||||
ceNormal ceType = iota // ceNormal includes implicits (ce == 0)
|
||||
ceContractionIndex // rune can be a start of a contraction
|
||||
ceExpansionIndex // rune expands into a sequence of collation elements
|
||||
ceDecompose // rune expands using NFKC decomposition
|
||||
)
|
||||
|
||||
func (ce colElem) ctype() ceType {
|
||||
if ce <= maxCE {
|
||||
return ceNormal
|
||||
}
|
||||
if ce <= maxContract {
|
||||
return ceContractionIndex
|
||||
} else {
|
||||
if ce <= maxExpand {
|
||||
return ceExpansionIndex
|
||||
}
|
||||
return ceDecompose
|
||||
}
|
||||
panic("should not reach here")
|
||||
return ceType(-1)
|
||||
}
|
||||
|
||||
// For normal collation elements, we assume that a collation element either has
|
||||
// a primary or non-default secondary value, not both.
|
||||
// Collation elements with a primary value are of the form
|
||||
// 010ppppp pppppppp pppppppp tttttttt, where
|
||||
// - p* is primary collation value
|
||||
// - t* is the tertiary collation value
|
||||
// Collation elements with a secondary value are of the form
|
||||
// 00000000 ssssssss ssssssss tttttttt, where
|
||||
// - s* is the secondary collation value
|
||||
// - t* is the tertiary collation value
|
||||
func splitCE(ce colElem) weights {
|
||||
w := weights{}
|
||||
w.tertiary = uint8(ce)
|
||||
if ce&0x40000000 != 0 {
|
||||
// primary weight form
|
||||
w.primary = uint32((ce >> 8) & 0x1FFFFF)
|
||||
w.secondary = defaultSecondary
|
||||
} else {
|
||||
// secondary weight form
|
||||
w.secondary = uint16(ce >> 8)
|
||||
}
|
||||
return w
|
||||
}
|
||||
|
||||
// For contractions, colElems are of the form 10bbbbbb bbbbbbbb hhhhhhhh hhhhhhhh, where
|
||||
// - h* is the compTrieHandle.
|
||||
// - b* is the offset into the contraction collation element table.
|
||||
// See contract.go for details on the contraction trie.
|
||||
const (
|
||||
maxNBits = 5
|
||||
maxTrieIndexBits = 11
|
||||
maxContractOffsetBits = 14
|
||||
)
|
||||
|
||||
func splitContractIndex(ce colElem) (index, n, offset int) {
|
||||
h := uint16(ce)
|
||||
return int(h >> maxNBits), int(h & (1<<maxNBits - 1)), int(ce>>16) & (1<<maxContractOffsetBits - 1)
|
||||
}
|
||||
|
||||
// For expansions, colElems are of the form 110bbbbb bbbbbbbb bbbbbbbb bbbbbbbb,
|
||||
// where b* is the index into the expansion sequence table.
|
||||
const (
|
||||
maxExpandIndexBits = 29
|
||||
)
|
||||
|
||||
func splitExpandIndex(ce colElem) (index int) {
|
||||
index = int(ce) & (1<<maxExpandIndexBits - 1)
|
||||
return
|
||||
}
|
||||
|
||||
// Some runes can be expanded using NFKD decomposition. Instead of storing the full
|
||||
// sequence of collation elements, we decompose the rune and lookup the collation
|
||||
// elements for each rune in the decomposition and modify the tertiary weights.
|
||||
// The colElem, in this case, is of the form 11100000 00000000 wwwwwwww vvvvvvvv, where
|
||||
// - v* is the replacement tertiary weight for the first rune,
|
||||
// - w* is the replacement tertiary weight for the second rune,
|
||||
// Tertiary weights of subsequent runes should be replaced with maxTertiary.
|
||||
// See http://www.unicode.org/reports/tr10/#Compatibility_Decompositions for more details.
|
||||
const (
|
||||
decompID = 0xE0000000
|
||||
)
|
||||
|
||||
func splitDecompose(ce colElem) (t1, t2 uint8) {
|
||||
return uint8(ce), uint8(ce >> 8)
|
||||
}
|
||||
|
||||
const (
|
||||
// These constants were taken from http://www.unicode.org/versions/Unicode6.0.0/ch12.pdf.
|
||||
minUnified rune = 0x4E00
|
||||
maxUnified = 0x9FFF
|
||||
minCompatibility = 0xF900
|
||||
maxCompatibility = 0xFAFF
|
||||
minRare = 0x3400
|
||||
maxRare = 0x4DBF
|
||||
)
|
||||
const (
|
||||
commonUnifiedOffset = 0xFB40
|
||||
rareUnifiedOffset = 0x1FB40
|
||||
otherOffset = 0x4FB40
|
||||
maxPrimary = otherOffset + unicode.MaxRune
|
||||
)
|
||||
|
||||
// implicitPrimary returns the primary weight for the a rune
|
||||
// for which there is no entry for the rune in the collation table.
|
||||
// We take a different approach from the one specified in
|
||||
// http://unicode.org/reports/tr10/#Implicit_Weights,
|
||||
// but preserve the resulting relative ordering of the runes.
|
||||
func implicitPrimary(r rune) int {
|
||||
|
||||
if r >= minUnified && r <= maxUnified {
|
||||
// The most common case for CJK.
|
||||
return int(r) + commonUnifiedOffset
|
||||
}
|
||||
if r >= minCompatibility && r <= maxCompatibility {
|
||||
// This will never hit as long as we don't remove the characters
|
||||
// that would match from the table.
|
||||
return int(r) + commonUnifiedOffset
|
||||
}
|
||||
if unicode.Is(unicode.Unified_Ideograph, r) {
|
||||
return int(r) + rareUnifiedOffset
|
||||
}
|
||||
return int(r) + otherOffset
|
||||
}
|
157
src/pkg/exp/locale/collate/colelem_test.go
Normal file
157
src/pkg/exp/locale/collate/colelem_test.go
Normal file
@ -0,0 +1,157 @@
|
||||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package collate
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"unicode"
|
||||
)
|
||||
|
||||
type ceTest struct {
|
||||
f func(inout []int) (colElem, ceType)
|
||||
arg []int
|
||||
}
|
||||
|
||||
// The make* funcs are simplified versions of the functions in build/colelem.go
|
||||
func makeCE(weights []int) colElem {
|
||||
const (
|
||||
maxPrimaryBits = 21
|
||||
maxSecondaryBits = 16
|
||||
maxTertiaryBits = 8
|
||||
isPrimary = 0x40000000
|
||||
)
|
||||
var ce colElem
|
||||
if weights[0] != 0 {
|
||||
ce = colElem(weights[0]<<maxTertiaryBits + weights[2])
|
||||
ce |= isPrimary
|
||||
} else {
|
||||
ce = colElem(weights[1]<<maxTertiaryBits + weights[2])
|
||||
}
|
||||
return ce
|
||||
}
|
||||
|
||||
func makeContractIndex(index, n, offset int) colElem {
|
||||
const (
|
||||
contractID = 0x80000000
|
||||
maxNBits = 5
|
||||
maxTrieIndexBits = 11
|
||||
)
|
||||
ce := colElem(contractID)
|
||||
ce += colElem(offset << (maxTrieIndexBits + maxNBits))
|
||||
ce += colElem(index << maxNBits)
|
||||
ce += colElem(n)
|
||||
return ce
|
||||
}
|
||||
|
||||
func makeExpandIndex(index int) colElem {
|
||||
const expandID = 0xC0000000
|
||||
return expandID + colElem(index)
|
||||
}
|
||||
|
||||
func makeDecompose(t1, t2 int) colElem {
|
||||
const decompID = 0xE0000000
|
||||
return colElem(t2<<8+t1) + decompID
|
||||
}
|
||||
|
||||
func normalCE(inout []int) (ce colElem, t ceType) {
|
||||
w := splitCE(makeCE(inout))
|
||||
inout[0] = int(w.primary)
|
||||
inout[1] = int(w.secondary)
|
||||
inout[2] = int(w.tertiary)
|
||||
return ce, ceNormal
|
||||
}
|
||||
|
||||
func expandCE(inout []int) (ce colElem, t ceType) {
|
||||
ce = makeExpandIndex(inout[0])
|
||||
inout[0] = splitExpandIndex(ce)
|
||||
return ce, ceExpansionIndex
|
||||
}
|
||||
|
||||
func contractCE(inout []int) (ce colElem, t ceType) {
|
||||
ce = makeContractIndex(inout[0], inout[1], inout[2])
|
||||
i, n, o := splitContractIndex(ce)
|
||||
inout[0], inout[1], inout[2] = i, n, o
|
||||
return ce, ceContractionIndex
|
||||
}
|
||||
|
||||
func decompCE(inout []int) (ce colElem, t ceType) {
|
||||
ce = makeDecompose(inout[0], inout[1])
|
||||
t1, t2 := splitDecompose(ce)
|
||||
inout[0], inout[1] = int(t1), int(t2)
|
||||
return ce, ceDecompose
|
||||
}
|
||||
|
||||
const (
|
||||
maxPrimaryBits = 21
|
||||
maxSecondaryBits = 16
|
||||
maxTertiaryBits = 8
|
||||
)
|
||||
|
||||
var ceTests = []ceTest{
|
||||
{normalCE, []int{0, 0, 0}},
|
||||
{normalCE, []int{0, 30, 3}},
|
||||
{normalCE, []int{100, defaultSecondary, 3}},
|
||||
|
||||
{contractCE, []int{0, 0, 0}},
|
||||
{contractCE, []int{1, 1, 1}},
|
||||
{contractCE, []int{1, (1 << maxNBits) - 1, 1}},
|
||||
{contractCE, []int{(1 << maxTrieIndexBits) - 1, 1, 1}},
|
||||
{contractCE, []int{1, 1, (1 << maxContractOffsetBits) - 1}},
|
||||
|
||||
{expandCE, []int{0}},
|
||||
{expandCE, []int{5}},
|
||||
{expandCE, []int{(1 << maxExpandIndexBits) - 1}},
|
||||
|
||||
{decompCE, []int{0, 0}},
|
||||
{decompCE, []int{1, 1}},
|
||||
{decompCE, []int{0x1F, 0x1F}},
|
||||
}
|
||||
|
||||
func TestColElem(t *testing.T) {
|
||||
for i, tt := range ceTests {
|
||||
inout := make([]int, len(tt.arg))
|
||||
copy(inout, tt.arg)
|
||||
ce, typ := tt.f(inout)
|
||||
if ce.ctype() != typ {
|
||||
t.Errorf("%d: type is %d; want %d", i, ce.ctype(), typ)
|
||||
}
|
||||
for j, a := range tt.arg {
|
||||
if inout[j] != a {
|
||||
t.Errorf("%d: argument %d is %d; want %d", i, j, inout[j], a)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type implicitTest struct {
|
||||
r rune
|
||||
p int
|
||||
}
|
||||
|
||||
var implicitTests = []implicitTest{
|
||||
{0x33FF, 0x52F3F},
|
||||
{0x3400, 0x22F40},
|
||||
{0x4DC0, 0x54900},
|
||||
{0x4DFF, 0x5493F},
|
||||
{0x4E00, 0x14940},
|
||||
{0x9FCB, 0x19B0B},
|
||||
{0xA000, 0x59B40},
|
||||
{0xF8FF, 0x5F43F},
|
||||
{0xF900, 0x1F440},
|
||||
{0xFA23, 0x1F563},
|
||||
{0xFAFF, 0x1F63F},
|
||||
{0xFB00, 0x5F640},
|
||||
{0x20000, 0x3FB40},
|
||||
{0x2B81C, 0x4B35C},
|
||||
{unicode.MaxRune, 0x15FB3F}, // maximum primary value
|
||||
}
|
||||
|
||||
func TestImplicit(t *testing.T) {
|
||||
for _, tt := range implicitTests {
|
||||
if p := implicitPrimary(tt.r); p != tt.p {
|
||||
t.Errorf("%U: was %X; want %X", tt.r, p, tt.p)
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user