mirror of
https://github.com/golang/go
synced 2024-11-20 04:04:41 -07:00
exp/locale/collate: implementation of tailorings and table generation.
Tailorings are represented by removing and reinserting entries from a linked list. After all tailorings are done, missing weights are computed and verified. This implementation assumes that entries that are used in expansions are not reinserted at a later point. This considerably simplifies the implementation. R=r CC=golang-dev https://golang.org/cl/6739052
This commit is contained in:
parent
4c1a6f84f8
commit
b8b329451c
@ -58,7 +58,9 @@ type Tailoring struct {
|
||||
id string
|
||||
builder *Builder
|
||||
index *ordering
|
||||
// TODO: implement.
|
||||
|
||||
anchor *entry
|
||||
before bool
|
||||
}
|
||||
|
||||
// NewBuilder returns a new Builder.
|
||||
@ -80,6 +82,7 @@ func (b *Builder) Tailoring(locale string) *Tailoring {
|
||||
builder: b,
|
||||
index: b.root.clone(),
|
||||
}
|
||||
t.index.id = t.id
|
||||
b.locale = append(b.locale, t)
|
||||
return t
|
||||
}
|
||||
@ -95,7 +98,6 @@ func (b *Builder) Tailoring(locale string) *Tailoring {
|
||||
// a value for each colelem that is a variable. (See the reference above.)
|
||||
func (b *Builder) Add(runes []rune, colelems [][]int, variables []int) error {
|
||||
str := string(runes)
|
||||
|
||||
elems := make([][]int, len(colelems))
|
||||
for i, ce := range colelems {
|
||||
elems[i] = append(elems[i], ce...)
|
||||
@ -144,6 +146,21 @@ func (b *Builder) Add(runes []rune, colelems [][]int, variables []int) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (t *Tailoring) setAnchor(anchor string) error {
|
||||
anchor = norm.NFD.String(anchor)
|
||||
a := t.index.find(anchor)
|
||||
if a == nil {
|
||||
a = t.index.newEntry(anchor, nil)
|
||||
a.implicit = true
|
||||
for _, r := range []rune(anchor) {
|
||||
e := t.index.find(string(r))
|
||||
e.lock = true
|
||||
}
|
||||
}
|
||||
t.anchor = a
|
||||
return nil
|
||||
}
|
||||
|
||||
// SetAnchor sets the point after which elements passed in subsequent calls to
|
||||
// Insert will be inserted. It is equivalent to the reset directive in an LDML
|
||||
// specification. See Insert for an example.
|
||||
@ -151,14 +168,20 @@ func (b *Builder) Add(runes []rune, colelems [][]int, variables []int) error {
|
||||
// <first_tertiary_ignorable/>, <last_teriary_ignorable/>, <first_primary_ignorable/>,
|
||||
// and <last_non_ignorable/>.
|
||||
func (t *Tailoring) SetAnchor(anchor string) error {
|
||||
// TODO: implement.
|
||||
if err := t.setAnchor(anchor); err != nil {
|
||||
return err
|
||||
}
|
||||
t.before = false
|
||||
return nil
|
||||
}
|
||||
|
||||
// SetAnchorBefore is similar to SetAnchor, except that subsequent calls to
|
||||
// Insert will insert entries before the anchor.
|
||||
func (t *Tailoring) SetAnchorBefore(anchor string) error {
|
||||
// TODO: implement.
|
||||
if err := t.setAnchor(anchor); err != nil {
|
||||
return err
|
||||
}
|
||||
t.before = true
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -195,7 +218,112 @@ func (t *Tailoring) SetAnchorBefore(anchor string) error {
|
||||
// t.SetAnchor("<last_primary_ignorable/>")
|
||||
// t.Insert(collate.Primary, "0", "")
|
||||
func (t *Tailoring) Insert(level collate.Level, str, extend string) error {
|
||||
// TODO: implement.
|
||||
if t.anchor == nil {
|
||||
return fmt.Errorf("%s:Insert: no anchor point set for tailoring of %s", t.id, str)
|
||||
}
|
||||
str = norm.NFD.String(str)
|
||||
e := t.index.find(str)
|
||||
if e == nil {
|
||||
e = t.index.newEntry(str, nil)
|
||||
} else if e.logical != noAnchor {
|
||||
return fmt.Errorf("%s:Insert: cannot reinsert logical reset position %q", t.id, e.str)
|
||||
}
|
||||
if e.lock {
|
||||
return fmt.Errorf("%s:Insert: cannot reinsert element %q", t.id, e.str)
|
||||
}
|
||||
a := t.anchor
|
||||
// Find the first element after the anchor which differs at a level smaller or
|
||||
// equal to the given level. Then insert at this position.
|
||||
// See http://unicode.org/reports/tr35/#Collation_Elements, Section 5.14.5 for details.
|
||||
e.before = t.before
|
||||
if t.before {
|
||||
t.before = false
|
||||
if a.prev == nil {
|
||||
a.insertBefore(e)
|
||||
} else {
|
||||
for a = a.prev; a.level > level; a = a.prev {
|
||||
}
|
||||
a.insertAfter(e)
|
||||
}
|
||||
e.level = level
|
||||
} else {
|
||||
for ; a.level > level; a = a.next {
|
||||
}
|
||||
e.level = a.level
|
||||
if a != e {
|
||||
a.insertAfter(e)
|
||||
a.level = level
|
||||
} else {
|
||||
// We don't set a to prev itself. This has the effect of the entry
|
||||
// getting new collation elements that are an increment of itself.
|
||||
// This is intentional.
|
||||
a.prev.level = level
|
||||
}
|
||||
}
|
||||
e.extend = norm.NFD.String(extend)
|
||||
e.exclude = false
|
||||
e.elems = nil
|
||||
t.anchor = e
|
||||
return nil
|
||||
}
|
||||
|
||||
func (o *ordering) getWeight(e *entry) [][]int {
|
||||
if len(e.elems) == 0 && e.logical == noAnchor {
|
||||
if e.implicit {
|
||||
for _, r := range e.runes {
|
||||
e.elems = append(e.elems, o.getWeight(o.find(string(r)))...)
|
||||
}
|
||||
} else if e.before {
|
||||
count := [collate.Identity + 1]int{}
|
||||
a := e
|
||||
for ; a.elems == nil && !a.implicit; a = a.next {
|
||||
count[a.level]++
|
||||
}
|
||||
e.elems = append([][]int(nil), make([]int, len(a.elems[0])))
|
||||
copy(e.elems[0], a.elems[0])
|
||||
for i := collate.Primary; i < collate.Quaternary; i++ {
|
||||
if count[i] != 0 {
|
||||
e.elems[0][i] -= count[i]
|
||||
break
|
||||
}
|
||||
}
|
||||
if e.prev != nil {
|
||||
o.verifyWeights(e.prev, e, e.prev.level)
|
||||
}
|
||||
} else {
|
||||
prev := e.prev
|
||||
e.elems = nextWeight(prev.level, o.getWeight(prev))
|
||||
o.verifyWeights(e, e.next, e.level)
|
||||
}
|
||||
}
|
||||
return e.elems
|
||||
}
|
||||
|
||||
func (o *ordering) addExtension(e *entry) {
|
||||
if ex := o.find(e.extend); ex != nil {
|
||||
e.elems = append(e.elems, ex.elems...)
|
||||
} else {
|
||||
for _, r := range []rune(e.extend) {
|
||||
e.elems = append(e.elems, o.find(string(r)).elems...)
|
||||
}
|
||||
}
|
||||
e.extend = ""
|
||||
}
|
||||
|
||||
func (o *ordering) verifyWeights(a, b *entry, level collate.Level) error {
|
||||
if level == collate.Identity || b == nil || b.elems == nil || a.elems == nil {
|
||||
return nil
|
||||
}
|
||||
for i := collate.Primary; i < level; i++ {
|
||||
if a.elems[0][i] < b.elems[0][i] {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
if a.elems[0][level] >= b.elems[0][level] {
|
||||
err := fmt.Errorf("%s:overflow: collation elements of %q (%X) overflows those of %q (%X) at level %d (%X >= %X)", o.id, a.str, a.runes, b.str, b.runes, level, a.elems, b.elems)
|
||||
log.Println(err)
|
||||
// TODO: return the error instead, or better, fix the conflicting entry by making room.
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -205,7 +333,19 @@ func (b *Builder) error(e error) {
|
||||
}
|
||||
}
|
||||
|
||||
func (b *Builder) errorID(locale string, e error) {
|
||||
if e != nil {
|
||||
b.err = fmt.Errorf("%s:%v", locale, e)
|
||||
}
|
||||
}
|
||||
|
||||
func (b *Builder) buildOrdering(o *ordering) {
|
||||
for _, e := range o.ordered {
|
||||
o.getWeight(e)
|
||||
}
|
||||
for _, e := range o.ordered {
|
||||
o.addExtension(e)
|
||||
}
|
||||
o.sort()
|
||||
simplify(o)
|
||||
b.processExpansions(o) // requires simplify
|
||||
@ -215,7 +355,7 @@ func (b *Builder) buildOrdering(o *ordering) {
|
||||
for e := o.front(); e != nil; e, _ = e.nextIndexed() {
|
||||
if !e.skip() {
|
||||
ce, err := e.encode()
|
||||
b.error(err)
|
||||
b.errorID(o.id, err)
|
||||
t.insert(e.runes[0], ce)
|
||||
}
|
||||
}
|
||||
@ -252,7 +392,11 @@ func (b *Builder) Build() (*collate.Collator, error) {
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return collate.Init(t), nil
|
||||
c := collate.Init(t)
|
||||
if c == nil {
|
||||
panic("generated table of incompatible type")
|
||||
}
|
||||
return c, nil
|
||||
}
|
||||
|
||||
// Build builds a Collator for Tailoring t.
|
||||
@ -308,6 +452,10 @@ func reproducibleFromNFKD(e *entry, exp, nfkd [][]int) bool {
|
||||
if i >= 2 && ce[2] != maxTertiary {
|
||||
return false
|
||||
}
|
||||
if _, err := makeCE(ce); err != nil {
|
||||
// Simply return false. The error will be caught elsewhere.
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
@ -332,12 +480,11 @@ func simplify(o *ordering) {
|
||||
e.remove()
|
||||
}
|
||||
}
|
||||
|
||||
// Tag entries for which the runes NFKD decompose to identical values.
|
||||
for e := o.front(); e != nil; e, _ = e.nextIndexed() {
|
||||
s := e.str
|
||||
nfkd := norm.NFKD.String(s)
|
||||
if len(e.runes) > 1 || keep[e.runes[0]] || nfkd == s {
|
||||
if e.decompose || len(e.runes) > 1 || len(e.elems) == 1 || keep[e.runes[0]] || nfkd == s {
|
||||
continue
|
||||
}
|
||||
if reproducibleFromNFKD(e, e.elems, o.genColElems(nfkd)) {
|
||||
@ -459,7 +606,7 @@ func (b *Builder) processContractions(o *ordering) {
|
||||
elems := []uint32{}
|
||||
for _, e := range es {
|
||||
ce, err := e.encodeBase()
|
||||
b.error(err)
|
||||
b.errorID(o.id, err)
|
||||
elems = append(elems, ce)
|
||||
}
|
||||
key = fmt.Sprintf("%v", elems)
|
||||
|
@ -26,16 +26,21 @@ const (
|
||||
// Collation Element Table.
|
||||
// See http://www.unicode.org/Public/UCA/6.0.0/allkeys.txt.
|
||||
type entry struct {
|
||||
runes []rune
|
||||
elems [][]int // the collation elements for runes
|
||||
str string // same as string(runes)
|
||||
str string // same as string(runes)
|
||||
runes []rune
|
||||
elems [][]int // the collation elements
|
||||
extend string // weights of extend to be appended to elems
|
||||
before bool // weights relative to next instead of previous.
|
||||
lock bool // entry is used in extension and can no longer be moved.
|
||||
|
||||
// prev, next, and level are used to keep track of tailorings.
|
||||
prev, next *entry
|
||||
level collate.Level // next differs at this level
|
||||
skipRemove bool // do not unlink when removed
|
||||
|
||||
decompose bool // can use NFKD decomposition to generate elems
|
||||
exclude bool // do not include in table
|
||||
implicit bool // derived, is not included in the list
|
||||
logical logicalAnchor
|
||||
|
||||
expansionIndex int // used to store index into expansion table
|
||||
@ -44,8 +49,8 @@ type entry struct {
|
||||
}
|
||||
|
||||
func (e *entry) String() string {
|
||||
return fmt.Sprintf("%X -> %X (ch:%x; ci:%d, ei:%d)",
|
||||
e.runes, e.elems, e.contractionHandle, e.contractionIndex, e.expansionIndex)
|
||||
return fmt.Sprintf("%X (%q) -> %X (ch:%x; ci:%d, ei:%d)",
|
||||
e.runes, e.str, e.elems, e.contractionHandle, e.contractionIndex, e.expansionIndex)
|
||||
}
|
||||
|
||||
func (e *entry) skip() bool {
|
||||
@ -71,7 +76,7 @@ func (e *entry) contractionStarter() bool {
|
||||
// examples of entries that will not be indexed.
|
||||
func (e *entry) nextIndexed() (*entry, collate.Level) {
|
||||
level := e.level
|
||||
for e = e.next; e != nil && e.exclude; e = e.next {
|
||||
for e = e.next; e != nil && (e.exclude || len(e.elems) == 0); e = e.next {
|
||||
if e.level < level {
|
||||
level = e.level
|
||||
}
|
||||
@ -87,16 +92,20 @@ func (e *entry) remove() {
|
||||
if e.logical != noAnchor {
|
||||
log.Fatalf("may not remove anchor %q", e.str)
|
||||
}
|
||||
if e.prev != nil {
|
||||
e.prev.next = e.next
|
||||
}
|
||||
if e.next != nil {
|
||||
e.next.prev = e.prev
|
||||
}
|
||||
// TODO: need to set e.prev.level to e.level if e.level is smaller?
|
||||
e.elems = nil
|
||||
if !e.skipRemove {
|
||||
if e.prev != nil {
|
||||
e.prev.next = e.next
|
||||
}
|
||||
if e.next != nil {
|
||||
e.next.prev = e.prev
|
||||
}
|
||||
}
|
||||
e.skipRemove = false
|
||||
}
|
||||
|
||||
// insertAfter inserts t after e.
|
||||
// insertAfter inserts n after e.
|
||||
func (e *entry) insertAfter(n *entry) {
|
||||
if e == n {
|
||||
panic("e == anchor")
|
||||
@ -109,10 +118,31 @@ func (e *entry) insertAfter(n *entry) {
|
||||
|
||||
n.next = e.next
|
||||
n.prev = e
|
||||
e.next.prev = n
|
||||
if e.next != nil {
|
||||
e.next.prev = n
|
||||
}
|
||||
e.next = n
|
||||
}
|
||||
|
||||
// insertBefore inserts n before e.
|
||||
func (e *entry) insertBefore(n *entry) {
|
||||
if e == n {
|
||||
panic("e == anchor")
|
||||
}
|
||||
if e == nil {
|
||||
panic("unexpected nil anchor")
|
||||
}
|
||||
n.remove()
|
||||
n.decompose = false // redo decomposition test
|
||||
|
||||
n.prev = e.prev
|
||||
n.next = e
|
||||
if e.prev != nil {
|
||||
e.prev.next = n
|
||||
}
|
||||
e.prev = n
|
||||
}
|
||||
|
||||
func (e *entry) encodeBase() (ce uint32, err error) {
|
||||
switch {
|
||||
case e.expansion():
|
||||
@ -178,6 +208,7 @@ func (s sortedEntries) Less(i, j int) bool {
|
||||
}
|
||||
|
||||
type ordering struct {
|
||||
id string
|
||||
entryMap map[string]*entry
|
||||
ordered []*entry
|
||||
handle *trieHandle
|
||||
@ -187,7 +218,14 @@ type ordering struct {
|
||||
// Note that insert simply appends e to ordered. To reattain a sorted
|
||||
// order, o.sort() should be called.
|
||||
func (o *ordering) insert(e *entry) {
|
||||
o.entryMap[e.str] = e
|
||||
if e.logical == noAnchor {
|
||||
o.entryMap[e.str] = e
|
||||
} else {
|
||||
// Use key format as used in UCA rules.
|
||||
o.entryMap[fmt.Sprintf("[%s]", e.str)] = e
|
||||
// Also add index entry for XML format.
|
||||
o.entryMap[fmt.Sprintf("<%s/>", strings.Replace(e.str, " ", "_", -1))] = e
|
||||
}
|
||||
o.ordered = append(o.ordered, e)
|
||||
}
|
||||
|
||||
@ -236,13 +274,13 @@ func makeRootOrdering() ordering {
|
||||
entryMap: make(map[string]*entry),
|
||||
}
|
||||
insert := func(typ logicalAnchor, s string, ce []int) {
|
||||
// Use key format as used in UCA rules.
|
||||
e := o.newEntry(fmt.Sprintf("[%s]", s), [][]int{ce})
|
||||
// Also add index entry for XML format.
|
||||
o.entryMap[fmt.Sprintf("<%s/>", strings.Replace(s, " ", "_", -1))] = e
|
||||
e.runes = nil
|
||||
e.exclude = true
|
||||
e.logical = typ
|
||||
e := &entry{
|
||||
elems: [][]int{ce},
|
||||
str: s,
|
||||
exclude: true,
|
||||
logical: typ,
|
||||
}
|
||||
o.insert(e)
|
||||
}
|
||||
insert(firstAnchor, "first tertiary ignorable", []int{0, 0, 0, 0})
|
||||
insert(lastAnchor, "last tertiary ignorable", []int{0, 0, 0, max})
|
||||
@ -252,6 +290,29 @@ func makeRootOrdering() ordering {
|
||||
return o
|
||||
}
|
||||
|
||||
// patchForInsert eleminates entries from the list with more than one collation element.
|
||||
// The next and prev fields of the eliminated entries still point to appropriate
|
||||
// values in the newly created list.
|
||||
// It requires that sort has been called.
|
||||
func (o *ordering) patchForInsert() {
|
||||
for i := 0; i < len(o.ordered)-1; {
|
||||
e := o.ordered[i]
|
||||
lev := e.level
|
||||
n := e.next
|
||||
for ; n != nil && len(n.elems) > 1; n = n.next {
|
||||
if n.level < lev {
|
||||
lev = n.level
|
||||
}
|
||||
n.skipRemove = true
|
||||
}
|
||||
for ; o.ordered[i] != n; i++ {
|
||||
o.ordered[i].level = lev
|
||||
o.ordered[i].next = n
|
||||
o.ordered[i+1].prev = e
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// clone copies all ordering of es into a new ordering value.
|
||||
func (o *ordering) clone() *ordering {
|
||||
o.sort()
|
||||
@ -270,6 +331,7 @@ func (o *ordering) clone() *ordering {
|
||||
oo.insert(ne)
|
||||
}
|
||||
oo.sort() // link all ordering.
|
||||
oo.patchForInsert()
|
||||
return &oo
|
||||
}
|
||||
|
||||
|
@ -128,6 +128,9 @@ func TestInsertAfter(t *testing.T) {
|
||||
last.insertAfter(es[i])
|
||||
last = es[i]
|
||||
}
|
||||
for _, e := range es {
|
||||
e.elems = es[0].elems
|
||||
}
|
||||
e := es[0]
|
||||
for _, i := range perm {
|
||||
e, _ = e.nextIndexed()
|
||||
@ -139,6 +142,34 @@ func TestInsertAfter(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestInsertBefore(t *testing.T) {
|
||||
const n = 5
|
||||
orig := makeList(n)
|
||||
perm := make([]int, n)
|
||||
for i := range perm {
|
||||
perm[i] = i + 1
|
||||
}
|
||||
for ok := true; ok; ok = nextPerm(perm) {
|
||||
es := makeList(n)
|
||||
last := es[len(es)-1]
|
||||
for _, i := range perm {
|
||||
last.insertBefore(es[i])
|
||||
last = es[i]
|
||||
}
|
||||
for _, e := range es {
|
||||
e.elems = es[0].elems
|
||||
}
|
||||
e := es[0]
|
||||
for i := n - 1; i >= 0; i-- {
|
||||
e, _ = e.nextIndexed()
|
||||
if e.runes[0] != rune(perm[i]) {
|
||||
t.Errorf("%d:%d: expected entry %X; found %X", perm, i, orig[i].runes, e.runes)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type entryLessTest struct {
|
||||
a, b *entry
|
||||
res bool
|
||||
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user