mirror of
https://github.com/golang/go
synced 2024-11-21 22:24:40 -07:00
exp/locale/collate: first changes that introduce implementation of tailorings:
- Elements in the array are now sorted as a linked list. This makes it easier to apply tailorings. - Added code to sort entries by collation elements. - Added logical reset points. This is used for tailoring relative to certain properties, rather than characters. NOTE: all code for type entry should now be in order.go. To keep the diffs for this CL reasonable, though, the existing code is left in builder.go. I'll move this in a separate CL. R=r CC=golang-dev https://golang.org/cl/6493063
This commit is contained in:
parent
dd79b330c9
commit
18aa55c169
@ -38,8 +38,15 @@ type entry struct {
|
||||
elems [][]int // the collation elements for runes
|
||||
str string // same as string(runes)
|
||||
|
||||
decompose bool // can use NFKD decomposition to generate elems
|
||||
expansionIndex int // used to store index into expansion table
|
||||
// prev, next, and level are used to keep track of tailorings.
|
||||
prev, next *entry
|
||||
level collate.Level // next differs at this level
|
||||
|
||||
decompose bool // can use NFKD decomposition to generate elems
|
||||
exclude bool // do not include in table
|
||||
logical logicalAnchor
|
||||
|
||||
expansionIndex int // used to store index into expansion table
|
||||
contractionHandle ctHandle
|
||||
contractionIndex int // index into contraction elements
|
||||
}
|
||||
@ -69,13 +76,12 @@ func (e *entry) contractionStarter() bool {
|
||||
// collation elements for each entry. A common use will be to base the weights
|
||||
// on those specified in the allkeys* file as provided by the UCA or CLDR.
|
||||
type Builder struct {
|
||||
index *trieBuilder
|
||||
locale []*Tailoring
|
||||
entryMap map[string]*entry
|
||||
entry []*entry
|
||||
t *table
|
||||
err error
|
||||
built bool
|
||||
index *trieBuilder
|
||||
root ordering
|
||||
locale []*Tailoring
|
||||
t *table
|
||||
err error
|
||||
built bool
|
||||
|
||||
minNonVar int // lowest primary recorded for a variable
|
||||
varTop int // highest primary recorded for a non-variable
|
||||
@ -94,8 +100,8 @@ type Tailoring struct {
|
||||
// NewBuilder returns a new Builder.
|
||||
func NewBuilder() *Builder {
|
||||
b := &Builder{
|
||||
index: newTrieBuilder(),
|
||||
entryMap: make(map[string]*entry),
|
||||
index: newTrieBuilder(),
|
||||
root: makeRootOrdering(),
|
||||
}
|
||||
return b
|
||||
}
|
||||
@ -119,30 +125,27 @@ func (b *Builder) Tailoring(locale string) *Tailoring {
|
||||
// as a guide. The actual weights generated by Builder may differ.
|
||||
// The argument variables is a list of indices into colelems that should contain
|
||||
// a value for each colelem that is a variable. (See the reference above.)
|
||||
func (b *Builder) Add(str []rune, colelems [][]int, variables []int) error {
|
||||
e := &entry{
|
||||
runes: make([]rune, len(str)),
|
||||
elems: make([][]int, len(colelems)),
|
||||
str: string(str),
|
||||
}
|
||||
copy(e.runes, str)
|
||||
func (b *Builder) Add(runes []rune, colelems [][]int, variables []int) error {
|
||||
str := string(runes)
|
||||
|
||||
elems := make([][]int, len(colelems))
|
||||
for i, ce := range colelems {
|
||||
e.elems[i] = append(e.elems[i], ce...)
|
||||
elems[i] = append(elems[i], ce...)
|
||||
if len(ce) == 0 {
|
||||
e.elems[i] = append(e.elems[i], []int{0, 0, 0, 0}...)
|
||||
elems[i] = append(elems[i], []int{0, 0, 0, 0}...)
|
||||
break
|
||||
}
|
||||
if len(ce) == 1 {
|
||||
e.elems[i] = append(e.elems[i], defaultSecondary)
|
||||
elems[i] = append(elems[i], defaultSecondary)
|
||||
}
|
||||
if len(ce) <= 2 {
|
||||
e.elems[i] = append(e.elems[i], defaultTertiary)
|
||||
elems[i] = append(elems[i], defaultTertiary)
|
||||
}
|
||||
if len(ce) <= 3 {
|
||||
e.elems[i] = append(e.elems[i], ce[0])
|
||||
elems[i] = append(elems[i], ce[0])
|
||||
}
|
||||
}
|
||||
for i, ce := range e.elems {
|
||||
for i, ce := range elems {
|
||||
isvar := false
|
||||
for _, j := range variables {
|
||||
if i == j {
|
||||
@ -165,13 +168,11 @@ func (b *Builder) Add(str []rune, colelems [][]int, variables []int) error {
|
||||
}
|
||||
}
|
||||
}
|
||||
elems, err := convertLargeWeights(e.elems)
|
||||
elems, err := convertLargeWeights(elems)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
e.elems = elems
|
||||
b.entryMap[string(str)] = e
|
||||
b.entry = append(b.entry, e)
|
||||
b.root.newEntry(str, elems)
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -290,6 +291,7 @@ func (b *Builder) build() (*table, error) {
|
||||
variableTop: uint32(b.varTop),
|
||||
}
|
||||
|
||||
b.root.sort()
|
||||
b.simplify()
|
||||
b.processExpansions() // requires simplify
|
||||
b.processContractions() // requires simplify
|
||||
@ -374,58 +376,37 @@ func equalCEArrays(a, b [][]int) bool {
|
||||
return true
|
||||
}
|
||||
|
||||
// genColElems generates a collation element array from the runes in str. This
|
||||
// assumes that all collation elements have already been added to the Builder.
|
||||
func (b *Builder) genColElems(str string) [][]int {
|
||||
elems := [][]int{}
|
||||
for _, r := range []rune(str) {
|
||||
if ee, ok := b.entryMap[string(r)]; !ok {
|
||||
elem := []int{implicitPrimary(r), defaultSecondary, defaultTertiary, int(r)}
|
||||
elems = append(elems, elem)
|
||||
} else {
|
||||
elems = append(elems, ee.elems...)
|
||||
}
|
||||
}
|
||||
return elems
|
||||
}
|
||||
|
||||
func (b *Builder) simplify() {
|
||||
// Runes that are a starter of a contraction should not be removed.
|
||||
// (To date, there is only Kannada character 0CCA.)
|
||||
keep := make(map[rune]bool)
|
||||
for _, e := range b.entry {
|
||||
o := b.root
|
||||
|
||||
for e := o.front(); e != nil; e, _ = e.nextIndexed() {
|
||||
if len(e.runes) > 1 {
|
||||
keep[e.runes[0]] = true
|
||||
}
|
||||
}
|
||||
// Remove entries for which the runes normalize (using NFD) to identical values.
|
||||
for _, e := range b.entry {
|
||||
for e := o.front(); e != nil; e, _ = e.nextIndexed() {
|
||||
s := e.str
|
||||
nfd := norm.NFD.String(s)
|
||||
if len(e.runes) > 1 || keep[e.runes[0]] || nfd == s {
|
||||
continue
|
||||
}
|
||||
if equalCEArrays(b.genColElems(nfd), e.elems) {
|
||||
delete(b.entryMap, s)
|
||||
if equalCEArrays(o.genColElems(nfd), e.elems) {
|
||||
e.remove()
|
||||
}
|
||||
}
|
||||
// Remove entries in b.entry that were removed from b.entryMap
|
||||
k := 0
|
||||
for _, e := range b.entry {
|
||||
if _, ok := b.entryMap[e.str]; ok {
|
||||
b.entry[k] = e
|
||||
k++
|
||||
}
|
||||
}
|
||||
b.entry = b.entry[:k]
|
||||
|
||||
// Tag entries for which the runes NFKD decompose to identical values.
|
||||
for _, e := range b.entry {
|
||||
for e := o.front(); e != nil; e, _ = e.nextIndexed() {
|
||||
s := e.str
|
||||
nfkd := norm.NFKD.String(s)
|
||||
if len(e.runes) > 1 || keep[e.runes[0]] || nfkd == s {
|
||||
continue
|
||||
}
|
||||
if reproducibleFromNFKD(e, e.elems, b.genColElems(nfkd)) {
|
||||
if reproducibleFromNFKD(e, e.elems, o.genColElems(nfkd)) {
|
||||
e.decompose = true
|
||||
}
|
||||
}
|
||||
@ -510,7 +491,8 @@ func (b *Builder) appendExpansion(e *entry) int {
|
||||
// the extraction tables.
|
||||
func (b *Builder) processExpansions() {
|
||||
eidx := make(map[string]int)
|
||||
for _, e := range b.entry {
|
||||
o := b.root
|
||||
for e := o.front(); e != nil; e, _ = e.nextIndexed() {
|
||||
if !e.expansion() {
|
||||
continue
|
||||
}
|
||||
@ -527,8 +509,9 @@ func (b *Builder) processExpansions() {
|
||||
func (b *Builder) processContractions() {
|
||||
// Collate contractions per starter rune.
|
||||
starters := []rune{}
|
||||
o := b.root
|
||||
cm := make(map[rune][]*entry)
|
||||
for _, e := range b.entry {
|
||||
for e := o.front(); e != nil; e, _ = e.nextIndexed() {
|
||||
if e.contraction() {
|
||||
if len(e.str) > b.t.maxContractLen {
|
||||
b.t.maxContractLen = len(e.str)
|
||||
@ -541,7 +524,7 @@ func (b *Builder) processContractions() {
|
||||
}
|
||||
}
|
||||
// Add entries of single runes that are at a start of a contraction.
|
||||
for _, e := range b.entry {
|
||||
for e := o.front(); e != nil; e, _ = e.nextIndexed() {
|
||||
if !e.contraction() {
|
||||
r := e.runes[0]
|
||||
if _, ok := cm[r]; ok {
|
||||
@ -610,7 +593,8 @@ func (b *Builder) processContractions() {
|
||||
|
||||
func (b *Builder) buildTrie() {
|
||||
t := newNode()
|
||||
for _, e := range b.entry {
|
||||
o := b.root
|
||||
for e := o.front(); e != nil; e, _ = e.nextIndexed() {
|
||||
if !e.skip() {
|
||||
ce := b.colElem(e)
|
||||
t.insert(e.runes[0], ce)
|
||||
|
@ -53,6 +53,7 @@ func newBuilder(t *testing.T, ducet []ducetElem) *Builder {
|
||||
}
|
||||
}
|
||||
b.t = &table{}
|
||||
b.root.sort()
|
||||
return b
|
||||
}
|
||||
|
||||
@ -119,7 +120,7 @@ func TestGenColElems(t *testing.T) {
|
||||
b := newBuilder(t, simplifyTest[:5])
|
||||
|
||||
for i, tt := range genColTests {
|
||||
res := b.genColElems(tt.str)
|
||||
res := b.root.genColElems(tt.str)
|
||||
if !equalCEArrays(tt.ces, res) {
|
||||
t.Errorf("%d: result %X; want %X", i, res, tt.ces)
|
||||
}
|
||||
@ -142,22 +143,21 @@ var simplifyMarked = strArray{"\u01C5"}
|
||||
|
||||
func TestSimplify(t *testing.T) {
|
||||
b := newBuilder(t, simplifyTest)
|
||||
o := b.root
|
||||
b.simplify()
|
||||
|
||||
k := 0
|
||||
for i, tt := range simplifyTest {
|
||||
if simplifyRemoved.contains(tt.str) {
|
||||
continue
|
||||
}
|
||||
e := b.entry[k]
|
||||
k++
|
||||
e := o.find(tt.str)
|
||||
if e.str != tt.str || !equalCEArrays(e.elems, tt.ces) {
|
||||
t.Errorf("%d: found element %s -> %X; want %s -> %X", i, e.str, e.elems, tt.str, tt.ces)
|
||||
break
|
||||
}
|
||||
}
|
||||
k = 0
|
||||
for i, e := range b.entry {
|
||||
var i, k int
|
||||
for e := o.front(); e != nil; e, _ = e.nextIndexed() {
|
||||
gold := simplifyMarked.contains(e.str)
|
||||
if gold {
|
||||
k++
|
||||
@ -165,6 +165,7 @@ func TestSimplify(t *testing.T) {
|
||||
if gold != e.decompose {
|
||||
t.Errorf("%d: %s has decompose %v; want %v", i, e.str, e.decompose, gold)
|
||||
}
|
||||
i++
|
||||
}
|
||||
if k != len(simplifyMarked) {
|
||||
t.Errorf(" an entry that should be marked as decompose was deleted")
|
||||
@ -184,10 +185,11 @@ func TestExpand(t *testing.T) {
|
||||
totalElements = 2 + 2 + 3 + totalExpansions
|
||||
)
|
||||
b := newBuilder(t, expandTest)
|
||||
o := &b.root
|
||||
b.processExpansions()
|
||||
|
||||
for i, tt := range expandTest {
|
||||
e := b.entry[i]
|
||||
e := o.front()
|
||||
for _, tt := range expandTest {
|
||||
exp := b.t.expandElem[e.expansionIndex:]
|
||||
if int(exp[0]) != len(tt.ces) {
|
||||
t.Errorf("%U: len(expansion)==%d; want %d", []rune(tt.str)[0], exp[0], len(tt.ces))
|
||||
@ -198,6 +200,7 @@ func TestExpand(t *testing.T) {
|
||||
t.Errorf("%U: element %d is %X; want %X", []rune(tt.str)[0], j, exp[j], ce)
|
||||
}
|
||||
}
|
||||
e, _ = e.nextIndexed()
|
||||
}
|
||||
// Verify uniquing.
|
||||
if len(b.t.expandElem) != totalElements {
|
||||
@ -230,11 +233,12 @@ func TestContract(t *testing.T) {
|
||||
totalElements = 5 + 5 + 4
|
||||
)
|
||||
b := newBuilder(t, contractTest)
|
||||
o := &b.root
|
||||
b.processContractions()
|
||||
|
||||
indexMap := make(map[int]bool)
|
||||
handleMap := make(map[rune]*entry)
|
||||
for _, e := range b.entry {
|
||||
for e := o.front(); e != nil; e, _ = e.nextIndexed() {
|
||||
if e.contractionHandle.n > 0 {
|
||||
handleMap[e.runes[0]] = e
|
||||
indexMap[e.contractionHandle.index] = true
|
||||
|
@ -5,6 +5,7 @@
|
||||
package build
|
||||
|
||||
import (
|
||||
"exp/locale/collate"
|
||||
"fmt"
|
||||
"unicode"
|
||||
)
|
||||
@ -197,3 +198,52 @@ func implicitPrimary(r rune) int {
|
||||
}
|
||||
return int(r) + otherOffset
|
||||
}
|
||||
|
||||
// nextWeight computes the first possible collation weights following elems
|
||||
// for the given level.
|
||||
func nextWeight(level collate.Level, elems [][]int) [][]int {
|
||||
nce := make([][]int, len(elems))
|
||||
copy(nce, elems)
|
||||
|
||||
if level != collate.Identity {
|
||||
nce[0] = make([]int, len(elems[0]))
|
||||
copy(nce[0], elems[0])
|
||||
nce[0][level]++
|
||||
if level < collate.Secondary {
|
||||
nce[0][collate.Secondary] = defaultSecondary
|
||||
}
|
||||
if level < collate.Tertiary {
|
||||
nce[0][collate.Tertiary] = defaultTertiary
|
||||
}
|
||||
}
|
||||
return nce
|
||||
}
|
||||
|
||||
func nextVal(elems [][]int, i int, level collate.Level) (index, value int) {
|
||||
for ; i < len(elems) && elems[i][level] == 0; i++ {
|
||||
}
|
||||
if i < len(elems) {
|
||||
return i, elems[i][level]
|
||||
}
|
||||
return i, 0
|
||||
}
|
||||
|
||||
// compareWeights returns -1 if a < b, 1 if a > b, or 0 otherwise.
|
||||
// It also returns the collation level at which the difference is found.
|
||||
func compareWeights(a, b [][]int) (result int, level collate.Level) {
|
||||
for level := collate.Primary; level < collate.Identity; level++ {
|
||||
var va, vb int
|
||||
for ia, ib := 0, 0; ia < len(a) || ib < len(b); ia, ib = ia+1, ib+1 {
|
||||
ia, va = nextVal(a, ia, level)
|
||||
ib, vb = nextVal(b, ib, level)
|
||||
if va != vb {
|
||||
if va < vb {
|
||||
return -1, level
|
||||
} else {
|
||||
return 1, level
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0, collate.Identity
|
||||
}
|
||||
|
@ -4,7 +4,10 @@
|
||||
|
||||
package build
|
||||
|
||||
import "testing"
|
||||
import (
|
||||
"exp/locale/collate"
|
||||
"testing"
|
||||
)
|
||||
|
||||
type ceTest struct {
|
||||
f func(in []int) (uint32, error)
|
||||
@ -81,3 +84,114 @@ func TestColElem(t *testing.T) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type weightsTest struct {
|
||||
a, b [][]int
|
||||
level collate.Level
|
||||
result int
|
||||
}
|
||||
|
||||
var nextWeightTests = []weightsTest{
|
||||
{
|
||||
a: [][]int{{100, 20, 5, 0}},
|
||||
b: [][]int{{101, defaultSecondary, defaultTertiary, 0}},
|
||||
level: collate.Primary,
|
||||
},
|
||||
{
|
||||
a: [][]int{{100, 20, 5, 0}},
|
||||
b: [][]int{{100, 21, defaultTertiary, 0}},
|
||||
level: collate.Secondary,
|
||||
},
|
||||
{
|
||||
a: [][]int{{100, 20, 5, 0}},
|
||||
b: [][]int{{100, 20, 6, 0}},
|
||||
level: collate.Tertiary,
|
||||
},
|
||||
{
|
||||
a: [][]int{{100, 20, 5, 0}},
|
||||
b: [][]int{{100, 20, 5, 0}},
|
||||
level: collate.Identity,
|
||||
},
|
||||
}
|
||||
|
||||
var extra = []int{200, 32, 8, 0}
|
||||
|
||||
func TestNextWeight(t *testing.T) {
|
||||
for i, tt := range nextWeightTests {
|
||||
test := func(tt weightsTest, a, gold [][]int) {
|
||||
res := nextWeight(tt.level, a)
|
||||
if !equalCEArrays(gold, res) {
|
||||
t.Errorf("%d: expected weights %d; found %d", i, tt.b, res)
|
||||
}
|
||||
}
|
||||
test(tt, tt.a, tt.b)
|
||||
test(tt, append(tt.a, extra), append(tt.b, extra))
|
||||
}
|
||||
}
|
||||
|
||||
var compareTests = []weightsTest{
|
||||
{
|
||||
[][]int{{100, 20, 5, 0}},
|
||||
[][]int{{100, 20, 5, 0}},
|
||||
collate.Identity,
|
||||
0,
|
||||
},
|
||||
{
|
||||
[][]int{{100, 20, 5, 0}, extra},
|
||||
[][]int{{100, 20, 5, 1}},
|
||||
collate.Primary,
|
||||
1,
|
||||
},
|
||||
{
|
||||
[][]int{{100, 20, 5, 0}},
|
||||
[][]int{{101, 20, 5, 0}},
|
||||
collate.Primary,
|
||||
-1,
|
||||
},
|
||||
{
|
||||
[][]int{{101, 20, 5, 0}},
|
||||
[][]int{{100, 20, 5, 0}},
|
||||
collate.Primary,
|
||||
1,
|
||||
},
|
||||
{
|
||||
[][]int{{100, 0, 0, 0}, {0, 20, 5, 0}},
|
||||
[][]int{{0, 20, 5, 0}, {100, 0, 0, 0}},
|
||||
collate.Identity,
|
||||
0,
|
||||
},
|
||||
{
|
||||
[][]int{{100, 20, 5, 0}},
|
||||
[][]int{{100, 21, 5, 0}},
|
||||
collate.Secondary,
|
||||
-1,
|
||||
},
|
||||
{
|
||||
[][]int{{100, 20, 5, 0}},
|
||||
[][]int{{100, 20, 2, 0}},
|
||||
collate.Tertiary,
|
||||
1,
|
||||
},
|
||||
{
|
||||
[][]int{{100, 20, 5, 1}},
|
||||
[][]int{{100, 20, 5, 2}},
|
||||
collate.Quaternary,
|
||||
-1,
|
||||
},
|
||||
}
|
||||
|
||||
func TestCompareWeights(t *testing.T) {
|
||||
for i, tt := range compareTests {
|
||||
test := func(tt weightsTest, a, b [][]int) {
|
||||
res, level := compareWeights(a, b)
|
||||
if res != tt.result {
|
||||
t.Errorf("%d: expected comparisson result %d; found %d", i, tt.result, res)
|
||||
}
|
||||
if level != tt.level {
|
||||
t.Errorf("%d: expected level %d; found %d", i, tt.level, level)
|
||||
}
|
||||
}
|
||||
test(tt, tt.a, tt.b)
|
||||
test(tt, append(tt.a, extra), append(tt.b, extra))
|
||||
}
|
||||
}
|
||||
|
230
src/pkg/exp/locale/collate/build/order.go
Normal file
230
src/pkg/exp/locale/collate/build/order.go
Normal file
@ -0,0 +1,230 @@
|
||||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package build
|
||||
|
||||
import (
|
||||
"exp/locale/collate"
|
||||
"fmt"
|
||||
"log"
|
||||
"sort"
|
||||
"strings"
|
||||
"unicode"
|
||||
)
|
||||
|
||||
type logicalAnchor int
|
||||
|
||||
const (
|
||||
firstAnchor logicalAnchor = -1
|
||||
noAnchor = 0
|
||||
lastAnchor = 1
|
||||
)
|
||||
|
||||
// TODO: move type entry from builder.go to this file.
|
||||
|
||||
// nextIndexed gets the next entry that needs to be stored in the table.
|
||||
// It returns the entry and the collation level at which the next entry differs
|
||||
// from the current entry.
|
||||
// Entries that can be explicitly derived and logical reset positions are
|
||||
// examples of entries that will not be indexed.
|
||||
func (e *entry) nextIndexed() (*entry, collate.Level) {
|
||||
level := e.level
|
||||
for e = e.next; e != nil && e.exclude; e = e.next {
|
||||
if e.level < level {
|
||||
level = e.level
|
||||
}
|
||||
}
|
||||
return e, level
|
||||
}
|
||||
|
||||
// remove unlinks entry e from the sorted chain and clears the collation
|
||||
// elements. e may not be at the front or end of the list. This should always
|
||||
// be the case, as the front and end of the list are always logical anchors,
|
||||
// which may not be removed.
|
||||
func (e *entry) remove() {
|
||||
if e.logical != noAnchor {
|
||||
log.Fatalf("may not remove anchor %q", e.str)
|
||||
}
|
||||
if e.prev != nil {
|
||||
e.prev.next = e.next
|
||||
}
|
||||
if e.next != nil {
|
||||
e.next.prev = e.prev
|
||||
}
|
||||
e.elems = nil
|
||||
}
|
||||
|
||||
// insertAfter inserts t after e.
|
||||
func (e *entry) insertAfter(n *entry) {
|
||||
if e == n {
|
||||
panic("e == anchor")
|
||||
}
|
||||
if e == nil {
|
||||
panic("unexpected nil anchor")
|
||||
}
|
||||
n.remove()
|
||||
n.decompose = false // redo decomposition test
|
||||
|
||||
n.next = e.next
|
||||
n.prev = e
|
||||
e.next.prev = n
|
||||
e.next = n
|
||||
}
|
||||
|
||||
// entryLess returns true if a sorts before b and false otherwise.
|
||||
func entryLess(a, b *entry) bool {
|
||||
if res, _ := compareWeights(a.elems, b.elems); res != 0 {
|
||||
return res == -1
|
||||
}
|
||||
if a.logical != noAnchor {
|
||||
return a.logical == firstAnchor
|
||||
}
|
||||
if b.logical != noAnchor {
|
||||
return b.logical == lastAnchor
|
||||
}
|
||||
return a.str < b.str
|
||||
}
|
||||
|
||||
type sortedEntries []*entry
|
||||
|
||||
func (s sortedEntries) Len() int {
|
||||
return len(s)
|
||||
}
|
||||
|
||||
func (s sortedEntries) Swap(i, j int) {
|
||||
s[i], s[j] = s[j], s[i]
|
||||
}
|
||||
|
||||
func (s sortedEntries) Less(i, j int) bool {
|
||||
return entryLess(s[i], s[j])
|
||||
}
|
||||
|
||||
type ordering struct {
|
||||
entryMap map[string]*entry
|
||||
ordered []*entry
|
||||
}
|
||||
|
||||
// insert inserts e into both entryMap and ordered.
|
||||
// Note that insert simply appends e to ordered. To reattain a sorted
|
||||
// order, o.sort() should be called.
|
||||
func (o *ordering) insert(e *entry) {
|
||||
o.entryMap[e.str] = e
|
||||
o.ordered = append(o.ordered, e)
|
||||
}
|
||||
|
||||
// newEntry creates a new entry for the given info and inserts it into
|
||||
// the index.
|
||||
func (o *ordering) newEntry(s string, ces [][]int) *entry {
|
||||
e := &entry{
|
||||
runes: []rune(s),
|
||||
elems: ces,
|
||||
str: s,
|
||||
}
|
||||
o.insert(e)
|
||||
return e
|
||||
}
|
||||
|
||||
// find looks up and returns the entry for the given string.
|
||||
// It returns nil if str is not in the index and if an implicit value
|
||||
// cannot be derived, that is, if str represents more than one rune.
|
||||
func (o *ordering) find(str string) *entry {
|
||||
e := o.entryMap[str]
|
||||
if e == nil {
|
||||
r := []rune(str)
|
||||
if len(r) == 1 {
|
||||
e = o.newEntry(string(r[0]), [][]int{
|
||||
[]int{
|
||||
implicitPrimary(r[0]),
|
||||
defaultSecondary,
|
||||
defaultTertiary,
|
||||
int(r[0]),
|
||||
},
|
||||
})
|
||||
e.exclude = true // do not index implicits
|
||||
}
|
||||
}
|
||||
return e
|
||||
}
|
||||
|
||||
// makeRootOrdering returns a newly initialized ordering value and populates
|
||||
// it with a set of logical reset points that can be used as anchors.
|
||||
// The anchors first_tertiary_ignorable and __END__ will always sort at
|
||||
// the beginning and end, respectively. This means that prev and next are non-nil
|
||||
// for any indexed entry.
|
||||
func makeRootOrdering() ordering {
|
||||
const max = unicode.MaxRune
|
||||
o := ordering{
|
||||
entryMap: make(map[string]*entry),
|
||||
}
|
||||
insert := func(typ logicalAnchor, s string, ce []int) {
|
||||
// Use key format as used in UCA rules.
|
||||
e := o.newEntry(fmt.Sprintf("[%s]", s), [][]int{ce})
|
||||
// Also add index entry for XML format.
|
||||
o.entryMap[fmt.Sprintf("<%s/>", strings.Replace(s, " ", "_", -1))] = e
|
||||
e.runes = nil
|
||||
e.exclude = true
|
||||
e.logical = typ
|
||||
}
|
||||
insert(firstAnchor, "first tertiary ignorable", []int{0, 0, 0, 0})
|
||||
insert(lastAnchor, "last tertiary ignorable", []int{0, 0, 0, max})
|
||||
insert(lastAnchor, "last primary ignorable", []int{0, defaultSecondary, defaultTertiary, max})
|
||||
insert(lastAnchor, "last non ignorable", []int{maxPrimary, defaultSecondary, defaultTertiary, max})
|
||||
insert(lastAnchor, "__END__", []int{1 << maxPrimaryBits, defaultSecondary, defaultTertiary, max})
|
||||
return o
|
||||
}
|
||||
|
||||
// clone copies all ordering of es into a new ordering value.
|
||||
func (o *ordering) clone() *ordering {
|
||||
o.sort()
|
||||
oo := ordering{
|
||||
entryMap: make(map[string]*entry),
|
||||
}
|
||||
for _, e := range o.ordered {
|
||||
ne := &entry{
|
||||
runes: e.runes,
|
||||
elems: e.elems,
|
||||
str: e.str,
|
||||
decompose: e.decompose,
|
||||
exclude: e.exclude,
|
||||
}
|
||||
oo.insert(ne)
|
||||
}
|
||||
oo.sort() // link all ordering.
|
||||
return &oo
|
||||
}
|
||||
|
||||
// front returns the first entry to be indexed.
|
||||
// It assumes that sort() has been called.
|
||||
func (o *ordering) front() *entry {
|
||||
e := o.ordered[0]
|
||||
if e.prev != nil {
|
||||
log.Panicf("unexpected first entry: %v", e)
|
||||
}
|
||||
// The first entry is always a logical position, which should not be indexed.
|
||||
e, _ = e.nextIndexed()
|
||||
return e
|
||||
}
|
||||
|
||||
// sort sorts all ordering based on their collation elements and initializes
|
||||
// the prev, next, and level fields accordingly.
|
||||
func (o *ordering) sort() {
|
||||
sort.Sort(sortedEntries(o.ordered))
|
||||
l := o.ordered
|
||||
for i := 1; i < len(l); i++ {
|
||||
k := i - 1
|
||||
l[k].next = l[i]
|
||||
_, l[k].level = compareWeights(l[k].elems, l[i].elems)
|
||||
l[i].prev = l[k]
|
||||
}
|
||||
}
|
||||
|
||||
// genColElems generates a collation element array from the runes in str. This
|
||||
// assumes that all collation elements have already been added to the Builder.
|
||||
func (o *ordering) genColElems(str string) [][]int {
|
||||
elems := [][]int{}
|
||||
for _, r := range []rune(str) {
|
||||
elems = append(elems, o.find(string(r)).elems...)
|
||||
}
|
||||
return elems
|
||||
}
|
197
src/pkg/exp/locale/collate/build/order_test.go
Normal file
197
src/pkg/exp/locale/collate/build/order_test.go
Normal file
@ -0,0 +1,197 @@
|
||||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package build
|
||||
|
||||
import (
|
||||
"exp/locale/collate"
|
||||
"strconv"
|
||||
"testing"
|
||||
)
|
||||
|
||||
type entryTest struct {
|
||||
f func(in []int) (uint32, error)
|
||||
arg []int
|
||||
val uint32
|
||||
}
|
||||
|
||||
// makeList returns a list of entries of length n+2, with n normal
|
||||
// entries plus a leading and trailing anchor.
|
||||
func makeList(n int) []*entry {
|
||||
es := make([]*entry, n+2)
|
||||
weights := [][]int{{100, 20, 5, 0}}
|
||||
for i := range es {
|
||||
runes := []rune{rune(i)}
|
||||
es[i] = &entry{
|
||||
runes: runes,
|
||||
elems: weights,
|
||||
}
|
||||
weights = nextWeight(collate.Primary, weights)
|
||||
}
|
||||
for i := 1; i < len(es); i++ {
|
||||
es[i-1].next = es[i]
|
||||
es[i].prev = es[i-1]
|
||||
_, es[i-1].level = compareWeights(es[i-1].elems, es[i].elems)
|
||||
}
|
||||
es[0].exclude = true
|
||||
es[0].logical = firstAnchor
|
||||
es[len(es)-1].exclude = true
|
||||
es[len(es)-1].logical = lastAnchor
|
||||
return es
|
||||
}
|
||||
|
||||
func TestNextIndexed(t *testing.T) {
|
||||
const n = 5
|
||||
es := makeList(n)
|
||||
for i := int64(0); i < 1<<n; i++ {
|
||||
mask := strconv.FormatInt(i+(1<<n), 2)
|
||||
for i, c := range mask {
|
||||
es[i].exclude = c == '1'
|
||||
}
|
||||
e := es[0]
|
||||
for i, c := range mask {
|
||||
if c == '0' {
|
||||
e, _ = e.nextIndexed()
|
||||
if e != es[i] {
|
||||
t.Errorf("%d: expected entry %d; found %d", i, es[i].elems, e.elems)
|
||||
}
|
||||
}
|
||||
}
|
||||
if e, _ = e.nextIndexed(); e != nil {
|
||||
t.Errorf("%d: expected nil entry; found %d", i, e.elems)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestRemove(t *testing.T) {
|
||||
const n = 5
|
||||
for i := int64(0); i < 1<<n; i++ {
|
||||
es := makeList(n)
|
||||
mask := strconv.FormatInt(i+(1<<n), 2)
|
||||
for i, c := range mask {
|
||||
if c == '0' {
|
||||
es[i].remove()
|
||||
}
|
||||
}
|
||||
e := es[0]
|
||||
for i, c := range mask {
|
||||
if c == '1' {
|
||||
if e != es[i] {
|
||||
t.Errorf("%d: expected entry %d; found %d", i, es[i].elems, e.elems)
|
||||
}
|
||||
e, _ = e.nextIndexed()
|
||||
}
|
||||
}
|
||||
if e != nil {
|
||||
t.Errorf("%d: expected nil entry; found %d", i, e.elems)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// nextPerm generates the next permutation of the array. The starting
|
||||
// permutation is assumed to be a list of integers sorted in increasing order.
|
||||
// It returns false if there are no more permuations left.
|
||||
func nextPerm(a []int) bool {
|
||||
i := len(a) - 2
|
||||
for ; i >= 0; i-- {
|
||||
if a[i] < a[i+1] {
|
||||
break
|
||||
}
|
||||
}
|
||||
if i < 0 {
|
||||
return false
|
||||
}
|
||||
for j := len(a) - 1; j >= i; j-- {
|
||||
if a[j] > a[i] {
|
||||
a[i], a[j] = a[j], a[i]
|
||||
break
|
||||
}
|
||||
}
|
||||
for j := i + 1; j < (len(a)+i+1)/2; j++ {
|
||||
a[j], a[len(a)+i-j] = a[len(a)+i-j], a[j]
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func TestInsertAfter(t *testing.T) {
|
||||
const n = 5
|
||||
orig := makeList(n)
|
||||
perm := make([]int, n)
|
||||
for i := range perm {
|
||||
perm[i] = i + 1
|
||||
}
|
||||
for ok := true; ok; ok = nextPerm(perm) {
|
||||
es := makeList(n)
|
||||
last := es[0]
|
||||
for _, i := range perm {
|
||||
last.insertAfter(es[i])
|
||||
last = es[i]
|
||||
}
|
||||
e := es[0]
|
||||
for _, i := range perm {
|
||||
e, _ = e.nextIndexed()
|
||||
if e.runes[0] != orig[i].runes[0] {
|
||||
t.Errorf("%d:%d: expected entry %X; found %X", perm, i, orig[i].runes, e.runes)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type entryLessTest struct {
|
||||
a, b *entry
|
||||
res bool
|
||||
}
|
||||
|
||||
var (
|
||||
w1 = [][]int{{100, 20, 5, 5}}
|
||||
w2 = [][]int{{101, 20, 5, 5}}
|
||||
)
|
||||
|
||||
var entryLessTests = []entryLessTest{
|
||||
{&entry{str: "a", elems: w1},
|
||||
&entry{str: "a", elems: w1},
|
||||
false,
|
||||
},
|
||||
{&entry{str: "a", elems: w1},
|
||||
&entry{str: "a", elems: w2},
|
||||
true,
|
||||
},
|
||||
{&entry{str: "a", elems: w1},
|
||||
&entry{str: "b", elems: w1},
|
||||
true,
|
||||
},
|
||||
{&entry{str: "a", elems: w2},
|
||||
&entry{str: "a", elems: w1},
|
||||
false,
|
||||
},
|
||||
{&entry{str: "c", elems: w1},
|
||||
&entry{str: "b", elems: w1},
|
||||
false,
|
||||
},
|
||||
{&entry{str: "a", elems: w1, logical: firstAnchor},
|
||||
&entry{str: "a", elems: w1},
|
||||
true,
|
||||
},
|
||||
{&entry{str: "a", elems: w1},
|
||||
&entry{str: "b", elems: w1, logical: firstAnchor},
|
||||
false,
|
||||
},
|
||||
{&entry{str: "b", elems: w1},
|
||||
&entry{str: "a", elems: w1, logical: lastAnchor},
|
||||
true,
|
||||
},
|
||||
{&entry{str: "a", elems: w1, logical: lastAnchor},
|
||||
&entry{str: "c", elems: w1},
|
||||
false,
|
||||
},
|
||||
}
|
||||
|
||||
func TestEntryLess(t *testing.T) {
|
||||
for i, tt := range entryLessTests {
|
||||
if res := entryLess(tt.a, tt.b); res != tt.res {
|
||||
t.Errorf("%d: was %v; want %v", i, res, tt.res)
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user