mirror of
https://github.com/golang/go
synced 2024-11-12 04:40:22 -07:00
exp/locale/collate: implementation of main collation functionality for
key and simple comparisson. Search is not yet implemented in this CL. Changed some of the types of table_test.go to allow reuse in the new test. Also reduced number of primary values for illegal runes to 1 (both map to the same). R=r CC=golang-dev https://golang.org/cl/6202062
This commit is contained in:
parent
a2004546a9
commit
ec099b2bc7
@ -153,7 +153,7 @@ const (
|
||||
rareUnifiedOffset = 0x1FB40
|
||||
otherOffset = 0x4FB40
|
||||
illegalOffset = otherOffset + unicode.MaxRune
|
||||
maxPrimary = illegalOffset + 2 // there are 2 illegal values.
|
||||
maxPrimary = illegalOffset + 1
|
||||
)
|
||||
|
||||
// implicitPrimary returns the primary weight for the a rune
|
||||
|
@ -22,6 +22,7 @@ const (
|
||||
defaultSecondary = 0x20
|
||||
defaultTertiary = 0x2
|
||||
maxTertiary = 0x1F
|
||||
maxQuaternary = 0x1FFFFF // 21 bits.
|
||||
)
|
||||
|
||||
// colElem is a representation of a collation element.
|
||||
@ -145,7 +146,8 @@ const (
|
||||
commonUnifiedOffset = 0xFB40
|
||||
rareUnifiedOffset = 0x1FB40
|
||||
otherOffset = 0x4FB40
|
||||
maxPrimary = otherOffset + unicode.MaxRune
|
||||
illegalOffset = otherOffset + unicode.MaxRune
|
||||
maxPrimary = illegalOffset + 1
|
||||
)
|
||||
|
||||
// implicitPrimary returns the primary weight for the a rune
|
||||
|
@ -8,6 +8,7 @@
|
||||
package collate
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"exp/norm"
|
||||
)
|
||||
|
||||
@ -67,6 +68,7 @@ type Collator struct {
|
||||
// This option exists predominantly to support reverse sorting of accents in French.
|
||||
Backwards bool
|
||||
|
||||
// TODO: implement:
|
||||
// With HiraganaQuaternary enabled, Hiragana codepoints will get lower values
|
||||
// than all the other non-variable code points. Strength must be greater or
|
||||
// equal to Quaternary for this to take effect.
|
||||
@ -122,25 +124,46 @@ func (b *Buffer) ResetKeys() {
|
||||
|
||||
// Compare returns an integer comparing the two byte slices.
|
||||
// The result will be 0 if a==b, -1 if a < b, and +1 if a > b.
|
||||
// Compare calls ResetKeys, thereby invalidating keys
|
||||
// previously generated using Key or KeyFromString using buf.
|
||||
func (c *Collator) Compare(buf *Buffer, a, b []byte) int {
|
||||
// TODO: implement
|
||||
return 0
|
||||
// TODO: for now we simply compute keys and compare. Once we
|
||||
// have good benchmarks, move to an implementation that works
|
||||
// incrementally for the majority of cases.
|
||||
// - Benchmark with long strings that only vary in modifiers.
|
||||
buf.ResetKeys()
|
||||
ka := c.Key(buf, a)
|
||||
kb := c.Key(buf, b)
|
||||
defer buf.ResetKeys()
|
||||
return bytes.Compare(ka, kb)
|
||||
}
|
||||
|
||||
// CompareString returns an integer comparing the two strings.
|
||||
// The result will be 0 if a==b, -1 if a < b, and +1 if a > b.
|
||||
// CompareString calls ResetKeys, thereby invalidating keys
|
||||
// previously generated using Key or KeyFromString using buf.
|
||||
func (c *Collator) CompareString(buf *Buffer, a, b string) int {
|
||||
// TODO: implement
|
||||
buf.ResetKeys()
|
||||
ka := c.KeyFromString(buf, a)
|
||||
kb := c.KeyFromString(buf, b)
|
||||
defer buf.ResetKeys()
|
||||
return bytes.Compare(ka, kb)
|
||||
}
|
||||
|
||||
func (c *Collator) Prefix(buf *Buffer, s, prefix []byte) int {
|
||||
// iterate over s, track bytes consumed.
|
||||
return 0
|
||||
}
|
||||
|
||||
// Key returns the collation key for str.
|
||||
// Passing the buffer buf may avoid memory allocations.
|
||||
// The returned slice will point to an allocation in Buffer and will retain
|
||||
// The returned slice will point to an allocation in Buffer and will remain
|
||||
// valid until the next call to buf.ResetKeys().
|
||||
func (c *Collator) Key(buf *Buffer, str []byte) []byte {
|
||||
// TODO: implement
|
||||
return nil
|
||||
// See http://www.unicode.org/reports/tr10/#Main_Algorithm for more details.
|
||||
buf.init()
|
||||
c.getColElems(buf, str)
|
||||
return c.key(buf, buf.ce)
|
||||
}
|
||||
|
||||
// KeyFromString returns the collation key for str.
|
||||
@ -148,6 +171,175 @@ func (c *Collator) Key(buf *Buffer, str []byte) []byte {
|
||||
// The returned slice will point to an allocation in Buffer and will retain
|
||||
// valid until the next call to buf.ResetKeys().
|
||||
func (c *Collator) KeyFromString(buf *Buffer, str string) []byte {
|
||||
// TODO: implement
|
||||
return nil
|
||||
// See http://www.unicode.org/reports/tr10/#Main_Algorithm for more details.
|
||||
buf.init()
|
||||
c.getColElemsString(buf, str)
|
||||
return c.key(buf, buf.ce)
|
||||
}
|
||||
|
||||
func (c *Collator) key(buf *Buffer, w []weights) []byte {
|
||||
processWeights(c.Alternate, c.variableTop, w)
|
||||
kn := len(buf.key)
|
||||
c.keyFromElems(buf, w)
|
||||
return buf.key[kn:]
|
||||
}
|
||||
|
||||
func (c *Collator) getColElems(buf *Buffer, str []byte) {
|
||||
i := c.iter()
|
||||
i.src.SetInput(c.f, str)
|
||||
for !i.done() {
|
||||
buf.ce = i.next(buf.ce)
|
||||
}
|
||||
}
|
||||
|
||||
func (c *Collator) getColElemsString(buf *Buffer, str string) {
|
||||
i := c.iter()
|
||||
i.src.SetInputString(c.f, str)
|
||||
for !i.done() {
|
||||
buf.ce = i.next(buf.ce)
|
||||
}
|
||||
}
|
||||
|
||||
type iter struct {
|
||||
src norm.Iter
|
||||
ba [1024]byte
|
||||
buf []byte
|
||||
t *table
|
||||
p int
|
||||
minBufSize int
|
||||
_done, eof bool
|
||||
}
|
||||
|
||||
func (c *Collator) iter() iter {
|
||||
i := iter{t: c.t, minBufSize: c.t.maxContractLen}
|
||||
i.buf = i.ba[:0]
|
||||
return i
|
||||
}
|
||||
|
||||
func (i *iter) done() bool {
|
||||
return i._done
|
||||
}
|
||||
|
||||
func (i *iter) next(ce []weights) []weights {
|
||||
if !i.eof && len(i.buf)-i.p < i.minBufSize {
|
||||
// replenish buffer
|
||||
n := copy(i.buf, i.buf[i.p:])
|
||||
n += i.src.Next(i.buf[n:cap(i.buf)])
|
||||
i.buf = i.buf[:n]
|
||||
i.p = 0
|
||||
i.eof = i.src.Done()
|
||||
}
|
||||
if i.p == len(i.buf) {
|
||||
i._done = true
|
||||
return ce
|
||||
}
|
||||
ce, sz := i.t.appendNext(ce, i.buf[i.p:])
|
||||
i.p += sz
|
||||
return ce
|
||||
}
|
||||
|
||||
func appendPrimary(key []byte, p uint32) []byte {
|
||||
// Convert to variable length encoding; supports up to 23 bits.
|
||||
if p <= 0x7FFF {
|
||||
key = append(key, uint8(p>>8), uint8(p))
|
||||
} else {
|
||||
key = append(key, uint8(p>>16)|0x80, uint8(p>>8), uint8(p))
|
||||
}
|
||||
return key
|
||||
}
|
||||
|
||||
// keyFromElems converts the weights ws to a compact sequence of bytes.
|
||||
// The result will be appended to the byte buffer in buf.
|
||||
func (c *Collator) keyFromElems(buf *Buffer, ws []weights) {
|
||||
for _, v := range ws {
|
||||
if w := v.primary; w > 0 {
|
||||
buf.key = appendPrimary(buf.key, w)
|
||||
}
|
||||
}
|
||||
if Secondary <= c.Strength {
|
||||
buf.key = append(buf.key, 0, 0)
|
||||
// TODO: we can use one 0 if we can guarantee that all non-zero weights are > 0xFF.
|
||||
if !c.Backwards {
|
||||
for _, v := range ws {
|
||||
if w := v.secondary; w > 0 {
|
||||
buf.key = append(buf.key, uint8(w>>8), uint8(w))
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for i := len(ws) - 1; i >= 0; i-- {
|
||||
if w := ws[i].secondary; w > 0 {
|
||||
buf.key = append(buf.key, uint8(w>>8), uint8(w))
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if c.CaseLevel {
|
||||
buf.key = append(buf.key, 0, 0)
|
||||
}
|
||||
if Tertiary <= c.Strength || c.CaseLevel {
|
||||
buf.key = append(buf.key, 0, 0)
|
||||
for _, v := range ws {
|
||||
if w := v.tertiary; w > 0 {
|
||||
buf.key = append(buf.key, w)
|
||||
}
|
||||
}
|
||||
// Derive the quaternary weights from the options and other levels.
|
||||
// Note that we represent maxQuaternary as 0xFF. The first byte of the
|
||||
// representation of a a primary weight is always smaller than 0xFF,
|
||||
// so using this single byte value will compare correctly.
|
||||
if Quaternary <= c.Strength {
|
||||
if c.Alternate == AltShiftTrimmed {
|
||||
lastNonFFFF := len(buf.key)
|
||||
buf.key = append(buf.key, 0)
|
||||
for _, v := range ws {
|
||||
if w := v.quaternary; w == maxQuaternary {
|
||||
buf.key = append(buf.key, 0xFF)
|
||||
} else if w > 0 {
|
||||
buf.key = appendPrimary(buf.key, w)
|
||||
lastNonFFFF = len(buf.key)
|
||||
}
|
||||
}
|
||||
buf.key = buf.key[:lastNonFFFF]
|
||||
} else {
|
||||
buf.key = append(buf.key, 0)
|
||||
for _, v := range ws {
|
||||
if w := v.quaternary; w == maxQuaternary {
|
||||
buf.key = append(buf.key, 0xFF)
|
||||
} else if w > 0 {
|
||||
buf.key = appendPrimary(buf.key, w)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func processWeights(vw AlternateHandling, top uint32, wa []weights) {
|
||||
ignore := false
|
||||
switch vw {
|
||||
case AltShifted, AltShiftTrimmed:
|
||||
for i := range wa {
|
||||
if p := wa[i].primary; p <= top && p != 0 {
|
||||
wa[i] = weights{quaternary: p}
|
||||
ignore = true
|
||||
} else if p == 0 {
|
||||
if ignore {
|
||||
wa[i] = weights{}
|
||||
} else if wa[i].tertiary != 0 {
|
||||
wa[i].quaternary = maxQuaternary
|
||||
}
|
||||
} else {
|
||||
wa[i].quaternary = maxQuaternary
|
||||
ignore = false
|
||||
}
|
||||
}
|
||||
case AltBlanked:
|
||||
for i := range wa {
|
||||
if p := wa[i].primary; p <= top && (ignore || p != 0) {
|
||||
wa[i] = weights{}
|
||||
ignore = true
|
||||
} else {
|
||||
ignore = false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
422
src/pkg/exp/locale/collate/collate_test.go
Normal file
422
src/pkg/exp/locale/collate/collate_test.go
Normal file
@ -0,0 +1,422 @@
|
||||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package collate_test
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"exp/locale/collate"
|
||||
"testing"
|
||||
)
|
||||
|
||||
type weightsTest struct {
|
||||
opt opts
|
||||
in, out ColElems
|
||||
}
|
||||
|
||||
type opts struct {
|
||||
lev int
|
||||
alt collate.AlternateHandling
|
||||
top int
|
||||
|
||||
backwards bool
|
||||
caseLevel bool
|
||||
}
|
||||
|
||||
func (o opts) level() collate.Level {
|
||||
if o.lev == 0 {
|
||||
return collate.Quaternary
|
||||
}
|
||||
return collate.Level(o.lev - 1)
|
||||
}
|
||||
|
||||
func (o opts) collator() *collate.Collator {
|
||||
c := &collate.Collator{
|
||||
Strength: o.level(),
|
||||
Alternate: o.alt,
|
||||
Backwards: o.backwards,
|
||||
CaseLevel: o.caseLevel,
|
||||
}
|
||||
collate.SetTop(c, o.top)
|
||||
return c
|
||||
}
|
||||
|
||||
const (
|
||||
maxQ = 0x1FFFFF
|
||||
)
|
||||
|
||||
func wpq(p, q int) collate.Weights {
|
||||
return collate.W(p, defaults.Secondary, defaults.Tertiary, q)
|
||||
}
|
||||
|
||||
func wsq(s, q int) collate.Weights {
|
||||
return collate.W(0, s, defaults.Tertiary, q)
|
||||
}
|
||||
|
||||
func wq(q int) collate.Weights {
|
||||
return collate.W(0, 0, 0, q)
|
||||
}
|
||||
|
||||
var zero = w(0, 0, 0, 0)
|
||||
|
||||
var processTests = []weightsTest{
|
||||
// Shifted
|
||||
{ // simple sequence of non-variables
|
||||
opt: opts{alt: collate.AltShifted, top: 100},
|
||||
in: ColElems{w(200), w(300), w(400)},
|
||||
out: ColElems{wpq(200, maxQ), wpq(300, maxQ), wpq(400, maxQ)},
|
||||
},
|
||||
{ // first is a variable
|
||||
opt: opts{alt: collate.AltShifted, top: 250},
|
||||
in: ColElems{w(200), w(300), w(400)},
|
||||
out: ColElems{wq(200), wpq(300, maxQ), wpq(400, maxQ)},
|
||||
},
|
||||
{ // all but first are variable
|
||||
opt: opts{alt: collate.AltShifted, top: 999},
|
||||
in: ColElems{w(1000), w(200), w(300), w(400)},
|
||||
out: ColElems{wpq(1000, maxQ), wq(200), wq(300), wq(400)},
|
||||
},
|
||||
{ // first is a modifier
|
||||
opt: opts{alt: collate.AltShifted, top: 999},
|
||||
in: ColElems{w(0, 10), w(1000)},
|
||||
out: ColElems{wsq(10, maxQ), wpq(1000, maxQ)},
|
||||
},
|
||||
{ // primary ignorables
|
||||
opt: opts{alt: collate.AltShifted, top: 250},
|
||||
in: ColElems{w(200), w(0, 10), w(300), w(0, 15), w(400)},
|
||||
out: ColElems{wq(200), zero, wpq(300, maxQ), wsq(15, maxQ), wpq(400, maxQ)},
|
||||
},
|
||||
{ // secondary ignorables
|
||||
opt: opts{alt: collate.AltShifted, top: 250},
|
||||
in: ColElems{w(200), w(0, 0, 10), w(300), w(0, 0, 15), w(400)},
|
||||
out: ColElems{wq(200), zero, wpq(300, maxQ), w(0, 0, 15, maxQ), wpq(400, maxQ)},
|
||||
},
|
||||
{ // tertiary ignorables, no change
|
||||
opt: opts{alt: collate.AltShifted, top: 250},
|
||||
in: ColElems{w(200), zero, w(300), zero, w(400)},
|
||||
out: ColElems{wq(200), zero, wpq(300, maxQ), zero, wpq(400, maxQ)},
|
||||
},
|
||||
|
||||
// ShiftTrimmed (same as Shifted)
|
||||
{ // simple sequence of non-variables
|
||||
opt: opts{alt: collate.AltShiftTrimmed, top: 100},
|
||||
in: ColElems{w(200), w(300), w(400)},
|
||||
out: ColElems{wpq(200, maxQ), wpq(300, maxQ), wpq(400, maxQ)},
|
||||
},
|
||||
{ // first is a variable
|
||||
opt: opts{alt: collate.AltShiftTrimmed, top: 250},
|
||||
in: ColElems{w(200), w(300), w(400)},
|
||||
out: ColElems{wq(200), wpq(300, maxQ), wpq(400, maxQ)},
|
||||
},
|
||||
{ // all but first are variable
|
||||
opt: opts{alt: collate.AltShiftTrimmed, top: 999},
|
||||
in: ColElems{w(1000), w(200), w(300), w(400)},
|
||||
out: ColElems{wpq(1000, maxQ), wq(200), wq(300), wq(400)},
|
||||
},
|
||||
{ // first is a modifier
|
||||
opt: opts{alt: collate.AltShiftTrimmed, top: 999},
|
||||
in: ColElems{w(0, 10), w(1000)},
|
||||
out: ColElems{wsq(10, maxQ), wpq(1000, maxQ)},
|
||||
},
|
||||
{ // primary ignorables
|
||||
opt: opts{alt: collate.AltShiftTrimmed, top: 250},
|
||||
in: ColElems{w(200), w(0, 10), w(300), w(0, 15), w(400)},
|
||||
out: ColElems{wq(200), zero, wpq(300, maxQ), wsq(15, maxQ), wpq(400, maxQ)},
|
||||
},
|
||||
{ // secondary ignorables
|
||||
opt: opts{alt: collate.AltShiftTrimmed, top: 250},
|
||||
in: ColElems{w(200), w(0, 0, 10), w(300), w(0, 0, 15), w(400)},
|
||||
out: ColElems{wq(200), zero, wpq(300, maxQ), w(0, 0, 15, maxQ), wpq(400, maxQ)},
|
||||
},
|
||||
{ // tertiary ignorables, no change
|
||||
opt: opts{alt: collate.AltShiftTrimmed, top: 250},
|
||||
in: ColElems{w(200), zero, w(300), zero, w(400)},
|
||||
out: ColElems{wq(200), zero, wpq(300, maxQ), zero, wpq(400, maxQ)},
|
||||
},
|
||||
|
||||
// Blanked
|
||||
{ // simple sequence of non-variables
|
||||
opt: opts{alt: collate.AltBlanked, top: 100},
|
||||
in: ColElems{w(200), w(300), w(400)},
|
||||
out: ColElems{w(200), w(300), w(400)},
|
||||
},
|
||||
{ // first is a variable
|
||||
opt: opts{alt: collate.AltBlanked, top: 250},
|
||||
in: ColElems{w(200), w(300), w(400)},
|
||||
out: ColElems{zero, w(300), w(400)},
|
||||
},
|
||||
{ // all but first are variable
|
||||
opt: opts{alt: collate.AltBlanked, top: 999},
|
||||
in: ColElems{w(1000), w(200), w(300), w(400)},
|
||||
out: ColElems{w(1000), zero, zero, zero},
|
||||
},
|
||||
{ // first is a modifier
|
||||
opt: opts{alt: collate.AltBlanked, top: 999},
|
||||
in: ColElems{w(0, 10), w(1000)},
|
||||
out: ColElems{w(0, 10), w(1000)},
|
||||
},
|
||||
{ // primary ignorables
|
||||
opt: opts{alt: collate.AltBlanked, top: 250},
|
||||
in: ColElems{w(200), w(0, 10), w(300), w(0, 15), w(400)},
|
||||
out: ColElems{zero, zero, w(300), w(0, 15), w(400)},
|
||||
},
|
||||
{ // secondary ignorables
|
||||
opt: opts{alt: collate.AltBlanked, top: 250},
|
||||
in: ColElems{w(200), w(0, 0, 10), w(300), w(0, 0, 15), w(400)},
|
||||
out: ColElems{zero, zero, w(300), w(0, 0, 15), w(400)},
|
||||
},
|
||||
{ // tertiary ignorables, no change
|
||||
opt: opts{alt: collate.AltBlanked, top: 250},
|
||||
in: ColElems{w(200), zero, w(300), zero, w(400)},
|
||||
out: ColElems{zero, zero, w(300), zero, w(400)},
|
||||
},
|
||||
|
||||
// Non-ignorable: input is always equal to output.
|
||||
{ // all but first are variable
|
||||
opt: opts{alt: collate.AltNonIgnorable, top: 999},
|
||||
in: ColElems{w(1000), w(200), w(300), w(400)},
|
||||
out: ColElems{w(1000), w(200), w(300), w(400)},
|
||||
},
|
||||
{ // primary ignorables
|
||||
opt: opts{alt: collate.AltNonIgnorable, top: 250},
|
||||
in: ColElems{w(200), w(0, 10), w(300), w(0, 15), w(400)},
|
||||
out: ColElems{w(200), w(0, 10), w(300), w(0, 15), w(400)},
|
||||
},
|
||||
{ // secondary ignorables
|
||||
opt: opts{alt: collate.AltNonIgnorable, top: 250},
|
||||
in: ColElems{w(200), w(0, 0, 10), w(300), w(0, 0, 15), w(400)},
|
||||
out: ColElems{w(200), w(0, 0, 10), w(300), w(0, 0, 15), w(400)},
|
||||
},
|
||||
{ // tertiary ignorables, no change
|
||||
opt: opts{alt: collate.AltNonIgnorable, top: 250},
|
||||
in: ColElems{w(200), zero, w(300), zero, w(400)},
|
||||
out: ColElems{w(200), zero, w(300), zero, w(400)},
|
||||
},
|
||||
}
|
||||
|
||||
func TestProcessWeights(t *testing.T) {
|
||||
for i, tt := range processTests {
|
||||
res := collate.ProcessWeights(tt.opt.alt, tt.opt.top, tt.in)
|
||||
if len(res) != len(tt.out) {
|
||||
t.Errorf("%d: len(ws) was %d; want %d (%v should be %v)", i, len(res), len(tt.out), res, tt.out)
|
||||
continue
|
||||
}
|
||||
for j, w := range res {
|
||||
if w != tt.out[j] {
|
||||
t.Errorf("%d: Weights %d was %v; want %v", i, j, w, tt.out[j])
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type keyFromElemTest struct {
|
||||
opt opts
|
||||
in ColElems
|
||||
out []byte
|
||||
}
|
||||
|
||||
var defS = byte(defaults.Secondary)
|
||||
var defT = byte(defaults.Tertiary)
|
||||
|
||||
const sep = 0 // separator byte
|
||||
|
||||
var keyFromElemTests = []keyFromElemTest{
|
||||
{ // simple primary and secondary weights.
|
||||
opts{},
|
||||
ColElems{w(0x200), w(0x7FFF), w(0, 0x30), w(0x100)},
|
||||
[]byte{0x2, 0, 0x7F, 0xFF, 0x1, 0x00, // primary
|
||||
sep, sep, 0, defS, 0, defS, 0, 0x30, 0, defS, // secondary
|
||||
sep, sep, defT, defT, defT, defT, // tertiary
|
||||
sep, 0xFF, 0xFF, 0xFF, 0xFF, // quaternary
|
||||
},
|
||||
},
|
||||
{ // same as first, but with zero element that need to be removed
|
||||
opts{},
|
||||
ColElems{w(0x200), zero, w(0x7FFF), w(0, 0x30), zero, w(0x100)},
|
||||
[]byte{0x2, 0, 0x7F, 0xFF, 0x1, 0x00, // primary
|
||||
sep, sep, 0, defS, 0, defS, 0, 0x30, 0, defS, // secondary
|
||||
sep, sep, defT, defT, defT, defT, // tertiary
|
||||
sep, 0xFF, 0xFF, 0xFF, 0xFF, // quaternary
|
||||
},
|
||||
},
|
||||
{ // same as first, with large primary values
|
||||
opts{},
|
||||
ColElems{w(0x200), w(0x8000), w(0, 0x30), w(0x12345)},
|
||||
[]byte{0x2, 0, 0x80, 0x80, 0x00, 0x81, 0x23, 0x45, // primary
|
||||
sep, sep, 0, defS, 0, defS, 0, 0x30, 0, defS, // secondary
|
||||
sep, sep, defT, defT, defT, defT, // tertiary
|
||||
sep, 0xFF, 0xFF, 0xFF, 0xFF, // quaternary
|
||||
},
|
||||
},
|
||||
{ // same as first, but with the secondary level backwards
|
||||
opts{backwards: true},
|
||||
ColElems{w(0x200), w(0x7FFF), w(0, 0x30), w(0x100)},
|
||||
[]byte{0x2, 0, 0x7F, 0xFF, 0x1, 0x00, // primary
|
||||
sep, sep, 0, defS, 0, 0x30, 0, defS, 0, defS, // secondary
|
||||
sep, sep, defT, defT, defT, defT, // tertiary
|
||||
sep, 0xFF, 0xFF, 0xFF, 0xFF, // quaternary
|
||||
},
|
||||
},
|
||||
{ // same as first, ignoring quaternary level
|
||||
opts{lev: 3},
|
||||
ColElems{w(0x200), zero, w(0x7FFF), w(0, 0x30), zero, w(0x100)},
|
||||
[]byte{0x2, 0, 0x7F, 0xFF, 0x1, 0x00, // primary
|
||||
sep, sep, 0, defS, 0, defS, 0, 0x30, 0, defS, // secondary
|
||||
sep, sep, defT, defT, defT, defT, // tertiary
|
||||
},
|
||||
},
|
||||
{ // same as first, ignoring tertiary level
|
||||
opts{lev: 2},
|
||||
ColElems{w(0x200), zero, w(0x7FFF), w(0, 0x30), zero, w(0x100)},
|
||||
[]byte{0x2, 0, 0x7F, 0xFF, 0x1, 0x00, // primary
|
||||
sep, sep, 0, defS, 0, defS, 0, 0x30, 0, defS, // secondary
|
||||
},
|
||||
},
|
||||
{ // same as first, ignoring secondary level
|
||||
opts{lev: 1},
|
||||
ColElems{w(0x200), zero, w(0x7FFF), w(0, 0x30), zero, w(0x100)},
|
||||
[]byte{0x2, 0, 0x7F, 0xFF, 0x1, 0x00},
|
||||
},
|
||||
{ // simple primary and secondary weights.
|
||||
opts{alt: collate.AltShiftTrimmed, top: 0x250},
|
||||
ColElems{w(0x300), w(0x200), w(0x7FFF), w(0, 0x30), w(0x800)},
|
||||
[]byte{0x3, 0, 0x7F, 0xFF, 0x8, 0x00, // primary
|
||||
sep, sep, 0, defS, 0, defS, 0, 0x30, 0, defS, // secondary
|
||||
sep, sep, defT, defT, defT, defT, // tertiary
|
||||
sep, 0xFF, 0x2, 0, // quaternary
|
||||
},
|
||||
},
|
||||
{ // as first, primary with case level enabled
|
||||
opts{lev: 1, caseLevel: true},
|
||||
ColElems{w(0x200), w(0x7FFF), w(0, 0x30), w(0x100)},
|
||||
[]byte{0x2, 0, 0x7F, 0xFF, 0x1, 0x00, // primary
|
||||
sep, sep, // secondary
|
||||
sep, sep, defT, defT, defT, defT, // tertiary
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
func TestKeyFromElems(t *testing.T) {
|
||||
buf := collate.Buffer{}
|
||||
for i, tt := range keyFromElemTests {
|
||||
buf.ResetKeys()
|
||||
ws := collate.ProcessWeights(tt.opt.alt, tt.opt.top, tt.in)
|
||||
res := collate.KeyFromElems(tt.opt.collator(), &buf, ws)
|
||||
if len(res) != len(tt.out) {
|
||||
t.Errorf("%d: len(ws) was %d; want %d (%X should be %X)", i, len(res), len(tt.out), res, tt.out)
|
||||
}
|
||||
n := len(res)
|
||||
if len(tt.out) < n {
|
||||
n = len(tt.out)
|
||||
}
|
||||
for j, c := range res[:n] {
|
||||
if c != tt.out[j] {
|
||||
t.Errorf("%d: byte %d was %X; want %X", i, j, c, tt.out[j])
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetColElems(t *testing.T) {
|
||||
for i, tt := range appendNextTests {
|
||||
c, err := makeTable(tt.in)
|
||||
if err != nil {
|
||||
// error is reported in TestAppendNext
|
||||
continue
|
||||
}
|
||||
buf := collate.Buffer{}
|
||||
// Create one large test per table
|
||||
str := make([]byte, 0, 4000)
|
||||
out := ColElems{}
|
||||
for len(str) < 3000 {
|
||||
for _, chk := range tt.chk {
|
||||
str = append(str, chk.in[:chk.n]...)
|
||||
out = append(out, chk.out...)
|
||||
}
|
||||
}
|
||||
for j, chk := range append(tt.chk, check{string(str), len(str), out}) {
|
||||
ws := collate.GetColElems(c, &buf, []byte(chk.in)[:chk.n])
|
||||
if len(ws) != len(chk.out) {
|
||||
t.Errorf("%d:%d: len(ws) was %d; want %d", i, j, len(ws), len(chk.out))
|
||||
continue
|
||||
}
|
||||
cnt := 0
|
||||
for k, w := range ws {
|
||||
if w != chk.out[k] {
|
||||
t.Errorf("%d:%d: Weights %d was %v; want %v", i, j, k, w, chk.out[k])
|
||||
cnt++
|
||||
}
|
||||
if cnt > 10 {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type keyTest struct {
|
||||
in string
|
||||
out []byte
|
||||
}
|
||||
|
||||
var keyTests = []keyTest{
|
||||
{"abc",
|
||||
[]byte{0, 100, 0, 200, 1, 44, 0, 0, 0, 32, 0, 32, 0, 32, 0, 0, 2, 2, 2, 0, 255, 255, 255},
|
||||
},
|
||||
{"a\u0301",
|
||||
[]byte{0, 102, 0, 0, 0, 32, 0, 0, 2, 0, 255},
|
||||
},
|
||||
{"aaaaa",
|
||||
[]byte{0, 100, 0, 100, 0, 100, 0, 100, 0, 100, 0, 0,
|
||||
0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 0,
|
||||
2, 2, 2, 2, 2, 0,
|
||||
255, 255, 255, 255, 255,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
func TestKey(t *testing.T) {
|
||||
c, _ := makeTable(appendNextTests[4].in)
|
||||
buf := collate.Buffer{}
|
||||
keys1 := [][]byte{}
|
||||
keys2 := [][]byte{}
|
||||
for _, tt := range keyTests {
|
||||
keys1 = append(keys1, c.Key(&buf, []byte(tt.in)))
|
||||
keys2 = append(keys2, c.KeyFromString(&buf, tt.in))
|
||||
}
|
||||
// Separate generation from testing to ensure buffers are not overwritten.
|
||||
for i, tt := range keyTests {
|
||||
if bytes.Compare(keys1[i], tt.out) != 0 {
|
||||
t.Errorf("%d: Key(%q) = %d; want %d", i, tt.in, keys1[i], tt.out)
|
||||
}
|
||||
if bytes.Compare(keys2[i], tt.out) != 0 {
|
||||
t.Errorf("%d: KeyFromString(%q) = %d; want %d", i, tt.in, keys2[i], tt.out)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type compareTest struct {
|
||||
a, b string
|
||||
res int // comparison result
|
||||
}
|
||||
|
||||
var compareTests = []compareTest{
|
||||
{"a\u0301", "a", 1},
|
||||
{"a", "a\u0301", -1},
|
||||
{"a\u0301", "a\u0301", 0},
|
||||
{"a", "a", 0},
|
||||
}
|
||||
|
||||
func TestCompare(t *testing.T) {
|
||||
c, _ := makeTable(appendNextTests[4].in)
|
||||
buf := collate.Buffer{}
|
||||
for i, tt := range compareTests {
|
||||
if res := c.Compare(&buf, []byte(tt.a), []byte(tt.b)); res != tt.res {
|
||||
t.Errorf("%d: Compare(%q, %q) == %d; want %d", i, tt.a, tt.b, res, tt.res)
|
||||
}
|
||||
if res := c.CompareString(&buf, tt.a, tt.b); res != tt.res {
|
||||
t.Errorf("%d: CompareString(%q, %q) == %d; want %d", i, tt.a, tt.b, res, tt.res)
|
||||
}
|
||||
}
|
||||
}
|
@ -6,24 +6,30 @@ package collate
|
||||
|
||||
// Export for testing.
|
||||
|
||||
import "fmt"
|
||||
import (
|
||||
"exp/norm"
|
||||
"fmt"
|
||||
)
|
||||
|
||||
type Weights struct {
|
||||
Primary, Secondary, Tertiary int
|
||||
Primary, Secondary, Tertiary, Quaternary int
|
||||
}
|
||||
|
||||
func W(ce ...int) Weights {
|
||||
w := Weights{ce[0], defaultSecondary, defaultTertiary}
|
||||
w := Weights{ce[0], defaultSecondary, defaultTertiary, 0}
|
||||
if len(ce) > 1 {
|
||||
w.Secondary = ce[1]
|
||||
}
|
||||
if len(ce) > 2 {
|
||||
w.Tertiary = ce[2]
|
||||
}
|
||||
if len(ce) > 3 {
|
||||
w.Quaternary = ce[3]
|
||||
}
|
||||
return w
|
||||
}
|
||||
func (w Weights) String() string {
|
||||
return fmt.Sprintf("[%d.%d.%d]", w.Primary, w.Secondary, w.Tertiary)
|
||||
return fmt.Sprintf("[%d.%d.%d.%d]", w.Primary, w.Secondary, w.Tertiary, w.Quaternary)
|
||||
}
|
||||
|
||||
type Table struct {
|
||||
@ -35,15 +41,52 @@ func GetTable(c *Collator) *Table {
|
||||
return &Table{c.t, nil}
|
||||
}
|
||||
|
||||
func convertWeights(ws []weights) []Weights {
|
||||
func convertToWeights(ws []weights) []Weights {
|
||||
out := make([]Weights, len(ws))
|
||||
for i, w := range ws {
|
||||
out[i] = Weights{int(w.primary), int(w.secondary), int(w.tertiary)}
|
||||
out[i] = Weights{int(w.primary), int(w.secondary), int(w.tertiary), int(w.quaternary)}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func convertFromWeights(ws []Weights) []weights {
|
||||
out := make([]weights, len(ws))
|
||||
for i, w := range ws {
|
||||
out[i] = weights{uint32(w.Primary), uint16(w.Secondary), uint8(w.Tertiary), uint32(w.Quaternary)}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func (t *Table) AppendNext(s []byte) ([]Weights, int) {
|
||||
w, n := t.t.appendNext(nil, s)
|
||||
return convertWeights(w), n
|
||||
return convertToWeights(w), n
|
||||
}
|
||||
|
||||
func SetTop(c *Collator, top int) {
|
||||
c.variableTop = uint32(top)
|
||||
}
|
||||
|
||||
func InitCollator(c *Collator) {
|
||||
c.Strength = Quaternary
|
||||
c.f = norm.NFD
|
||||
c.t.maxContractLen = 30
|
||||
}
|
||||
|
||||
func GetColElems(c *Collator, buf *Buffer, str []byte) []Weights {
|
||||
buf.ResetKeys()
|
||||
InitCollator(c)
|
||||
c.getColElems(buf, str)
|
||||
return convertToWeights(buf.ce)
|
||||
}
|
||||
|
||||
func ProcessWeights(h AlternateHandling, top int, w []Weights) []Weights {
|
||||
in := convertFromWeights(w)
|
||||
processWeights(h, uint32(top), in)
|
||||
return convertToWeights(in)
|
||||
}
|
||||
|
||||
func KeyFromElems(c *Collator, buf *Buffer, w []Weights) []byte {
|
||||
k := len(buf.key)
|
||||
c.keyFromElems(buf, convertFromWeights(w))
|
||||
return buf.key[k:]
|
||||
}
|
||||
|
@ -11,9 +11,7 @@ import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
type Weights struct {
|
||||
collate.Weights
|
||||
}
|
||||
type ColElems []collate.Weights
|
||||
|
||||
type input struct {
|
||||
str string
|
||||
@ -23,7 +21,7 @@ type input struct {
|
||||
type check struct {
|
||||
in string
|
||||
n int
|
||||
out []Weights
|
||||
out ColElems
|
||||
}
|
||||
|
||||
type tableTest struct {
|
||||
@ -31,8 +29,8 @@ type tableTest struct {
|
||||
chk []check
|
||||
}
|
||||
|
||||
func w(ce ...int) Weights {
|
||||
return Weights{collate.W(ce...)}
|
||||
func w(ce ...int) collate.Weights {
|
||||
return collate.W(ce...)
|
||||
}
|
||||
|
||||
var defaults = w(0)
|
||||
@ -46,7 +44,11 @@ func makeTable(in []input) (*collate.Collator, error) {
|
||||
for _, r := range in {
|
||||
b.Add([]rune(r.str), r.ces)
|
||||
}
|
||||
return b.Build("")
|
||||
c, err := b.Build("")
|
||||
if err == nil {
|
||||
collate.InitCollator(c)
|
||||
}
|
||||
return c, err
|
||||
}
|
||||
|
||||
// modSeq holds a seqeunce of modifiers in increasing order of CCC long enough
|
||||
@ -60,8 +62,8 @@ var modSeq = []rune{
|
||||
}
|
||||
|
||||
var mods []input
|
||||
var modW = func() []Weights {
|
||||
ws := []Weights{}
|
||||
var modW = func() ColElems {
|
||||
ws := ColElems{}
|
||||
for _, r := range modSeq {
|
||||
rune := norm.NFC.PropertiesString(string(r))
|
||||
ws = append(ws, w(0, int(rune.CCC())))
|
||||
@ -79,14 +81,14 @@ var appendNextTests = []tableTest{
|
||||
{"ß", [][]int{{120}}},
|
||||
},
|
||||
[]check{
|
||||
{"a", 1, []Weights{w(100)}},
|
||||
{"b", 1, []Weights{w(105)}},
|
||||
{"c", 1, []Weights{w(110)}},
|
||||
{"d", 1, []Weights{w(0x4FBA4)}},
|
||||
{"ab", 1, []Weights{w(100)}},
|
||||
{"bc", 1, []Weights{w(105)}},
|
||||
{"dd", 1, []Weights{w(0x4FBA4)}},
|
||||
{"ß", 2, []Weights{w(120)}},
|
||||
{"a", 1, ColElems{w(100)}},
|
||||
{"b", 1, ColElems{w(105)}},
|
||||
{"c", 1, ColElems{w(110)}},
|
||||
{"d", 1, ColElems{w(0x4FBA4)}},
|
||||
{"ab", 1, ColElems{w(100)}},
|
||||
{"bc", 1, ColElems{w(105)}},
|
||||
{"dd", 1, ColElems{w(0x4FBA4)}},
|
||||
{"ß", 2, ColElems{w(120)}},
|
||||
},
|
||||
},
|
||||
{ // test expansion
|
||||
@ -97,10 +99,10 @@ var appendNextTests = []tableTest{
|
||||
{"W", [][]int{{100}, {0, 25}, {100}, {0, 25}}},
|
||||
},
|
||||
[]check{
|
||||
{"u", 1, []Weights{w(100)}},
|
||||
{"U", 1, []Weights{w(100), w(0, 25)}},
|
||||
{"w", 1, []Weights{w(100), w(100)}},
|
||||
{"W", 1, []Weights{w(100), w(0, 25), w(100), w(0, 25)}},
|
||||
{"u", 1, ColElems{w(100)}},
|
||||
{"U", 1, ColElems{w(100), w(0, 25)}},
|
||||
{"w", 1, ColElems{w(100), w(100)}},
|
||||
{"W", 1, ColElems{w(100), w(0, 25), w(100), w(0, 25)}},
|
||||
},
|
||||
},
|
||||
{ // test decompose
|
||||
@ -111,7 +113,7 @@ var appendNextTests = []tableTest{
|
||||
{"\u01C5", [][]int{pt(104, 9), pt(130, 4), {0, 40, 0x1F}}}, // Dž = D+z+caron
|
||||
},
|
||||
[]check{
|
||||
{"\u01C5", 2, []Weights{w(pt(104, 9)...), w(pt(130, 4)...), w(0, 40, 0x1F)}},
|
||||
{"\u01C5", 2, ColElems{w(pt(104, 9)...), w(pt(130, 4)...), w(0, 40, 0x1F)}},
|
||||
},
|
||||
},
|
||||
{ // test basic contraction
|
||||
@ -125,16 +127,16 @@ var appendNextTests = []tableTest{
|
||||
{"d", [][]int{{400}}},
|
||||
},
|
||||
[]check{
|
||||
{"a", 1, []Weights{w(100)}},
|
||||
{"aa", 1, []Weights{w(100)}},
|
||||
{"aac", 1, []Weights{w(100)}},
|
||||
{"ab", 2, []Weights{w(101)}},
|
||||
{"abb", 2, []Weights{w(101)}},
|
||||
{"aab", 3, []Weights{w(101), w(101)}},
|
||||
{"aaba", 3, []Weights{w(101), w(101)}},
|
||||
{"abc", 3, []Weights{w(102)}},
|
||||
{"abcd", 3, []Weights{w(102)}},
|
||||
{"d", 1, []Weights{w(400)}},
|
||||
{"a", 1, ColElems{w(100)}},
|
||||
{"aa", 1, ColElems{w(100)}},
|
||||
{"aac", 1, ColElems{w(100)}},
|
||||
{"d", 1, ColElems{w(400)}},
|
||||
{"ab", 2, ColElems{w(101)}},
|
||||
{"abb", 2, ColElems{w(101)}},
|
||||
{"aab", 3, ColElems{w(101), w(101)}},
|
||||
{"aaba", 3, ColElems{w(101), w(101)}},
|
||||
{"abc", 3, ColElems{w(102)}},
|
||||
{"abcd", 3, ColElems{w(102)}},
|
||||
},
|
||||
},
|
||||
{ // test discontinuous contraction
|
||||
@ -177,75 +179,75 @@ var appendNextTests = []tableTest{
|
||||
{"\u302F\u18A9", [][]int{{0, 130}}},
|
||||
}...),
|
||||
[]check{
|
||||
{"ab", 1, []Weights{w(100)}}, // closing segment
|
||||
{"a\u0316\u0300b", 5, []Weights{w(101), w(0, 220)}}, // closing segment
|
||||
{"a\u0316\u0300", 5, []Weights{w(101), w(0, 220)}}, // no closing segment
|
||||
{"a\u0316\u0300\u035Cb", 5, []Weights{w(101), w(0, 220)}}, // completes before segment end
|
||||
{"a\u0316\u0300\u035C", 5, []Weights{w(101), w(0, 220)}}, // completes before segment end
|
||||
{"ab", 1, ColElems{w(100)}}, // closing segment
|
||||
{"a\u0316\u0300b", 5, ColElems{w(101), w(0, 220)}}, // closing segment
|
||||
{"a\u0316\u0300", 5, ColElems{w(101), w(0, 220)}}, // no closing segment
|
||||
{"a\u0316\u0300\u035Cb", 5, ColElems{w(101), w(0, 220)}}, // completes before segment end
|
||||
{"a\u0316\u0300\u035C", 5, ColElems{w(101), w(0, 220)}}, // completes before segment end
|
||||
|
||||
{"a\u0316\u0301b", 5, []Weights{w(102), w(0, 220)}}, // closing segment
|
||||
{"a\u0316\u0301", 5, []Weights{w(102), w(0, 220)}}, // no closing segment
|
||||
{"a\u0316\u0301\u035Cb", 5, []Weights{w(102), w(0, 220)}}, // completes before segment end
|
||||
{"a\u0316\u0301\u035C", 5, []Weights{w(102), w(0, 220)}}, // completes before segment end
|
||||
{"a\u0316\u0301b", 5, ColElems{w(102), w(0, 220)}}, // closing segment
|
||||
{"a\u0316\u0301", 5, ColElems{w(102), w(0, 220)}}, // no closing segment
|
||||
{"a\u0316\u0301\u035Cb", 5, ColElems{w(102), w(0, 220)}}, // completes before segment end
|
||||
{"a\u0316\u0301\u035C", 5, ColElems{w(102), w(0, 220)}}, // completes before segment end
|
||||
|
||||
// match blocked by modifier with same ccc
|
||||
{"a\u0301\u0315\u031A\u035Fb", 3, []Weights{w(102)}},
|
||||
{"a\u0301\u0315\u031A\u035Fb", 3, ColElems{w(102)}},
|
||||
|
||||
// multiple gaps
|
||||
{"a\u0301\u035Db", 6, []Weights{w(120)}},
|
||||
{"a\u0301\u035F", 5, []Weights{w(121)}},
|
||||
{"a\u0301\u035Fb", 6, []Weights{w(122)}},
|
||||
{"a\u0316\u0301\u035F", 7, []Weights{w(121), w(0, 220)}},
|
||||
{"a\u0301\u0315\u035Fb", 7, []Weights{w(121), w(0, 232)}},
|
||||
{"a\u0316\u0301\u0315\u035Db", 5, []Weights{w(102), w(0, 220)}},
|
||||
{"a\u0316\u0301\u0315\u035F", 9, []Weights{w(121), w(0, 220), w(0, 232)}},
|
||||
{"a\u0316\u0301\u0315\u035Fb", 9, []Weights{w(121), w(0, 220), w(0, 232)}},
|
||||
{"a\u0316\u0301\u0315\u035F\u035D", 9, []Weights{w(121), w(0, 220), w(0, 232)}},
|
||||
{"a\u0316\u0301\u0315\u035F\u035Db", 9, []Weights{w(121), w(0, 220), w(0, 232)}},
|
||||
{"a\u0301\u035Db", 6, ColElems{w(120)}},
|
||||
{"a\u0301\u035F", 5, ColElems{w(121)}},
|
||||
{"a\u0301\u035Fb", 6, ColElems{w(122)}},
|
||||
{"a\u0316\u0301\u035F", 7, ColElems{w(121), w(0, 220)}},
|
||||
{"a\u0301\u0315\u035Fb", 7, ColElems{w(121), w(0, 232)}},
|
||||
{"a\u0316\u0301\u0315\u035Db", 5, ColElems{w(102), w(0, 220)}},
|
||||
{"a\u0316\u0301\u0315\u035F", 9, ColElems{w(121), w(0, 220), w(0, 232)}},
|
||||
{"a\u0316\u0301\u0315\u035Fb", 9, ColElems{w(121), w(0, 220), w(0, 232)}},
|
||||
{"a\u0316\u0301\u0315\u035F\u035D", 9, ColElems{w(121), w(0, 220), w(0, 232)}},
|
||||
{"a\u0316\u0301\u0315\u035F\u035Db", 9, ColElems{w(121), w(0, 220), w(0, 232)}},
|
||||
|
||||
// handling of segment overflow
|
||||
{ // just fits within segment
|
||||
"a" + string(modSeq[:30]) + "\u0301",
|
||||
3 + len(string(modSeq[:30])),
|
||||
append([]Weights{w(102)}, modW[:30]...),
|
||||
append(ColElems{w(102)}, modW[:30]...),
|
||||
},
|
||||
{"a" + string(modSeq[:31]) + "\u0301", 1, []Weights{w(100)}}, // overflow
|
||||
{"a" + string(modSeq) + "\u0301", 1, []Weights{w(100)}},
|
||||
{"a" + string(modSeq[:31]) + "\u0301", 1, ColElems{w(100)}}, // overflow
|
||||
{"a" + string(modSeq) + "\u0301", 1, ColElems{w(100)}},
|
||||
{ // just fits within segment with two interstitial runes
|
||||
"a" + string(modSeq[:28]) + "\u0301\u0315\u035F",
|
||||
7 + len(string(modSeq[:28])),
|
||||
append(append([]Weights{w(121)}, modW[:28]...), w(0, 232)),
|
||||
append(append(ColElems{w(121)}, modW[:28]...), w(0, 232)),
|
||||
},
|
||||
{ // second half does not fit within segment
|
||||
"a" + string(modSeq[:29]) + "\u0301\u0315\u035F",
|
||||
3 + len(string(modSeq[:29])),
|
||||
append([]Weights{w(102)}, modW[:29]...),
|
||||
append(ColElems{w(102)}, modW[:29]...),
|
||||
},
|
||||
|
||||
// discontinuity can only occur in last normalization segment
|
||||
{"a\u035Eb\u035E", 6, []Weights{w(115)}},
|
||||
{"a\u0316\u035Eb\u035E", 5, []Weights{w(110), w(0, 220)}},
|
||||
{"a\u035Db\u035D", 6, []Weights{w(117)}},
|
||||
{"a\u0316\u035Db\u035D", 1, []Weights{w(100)}},
|
||||
{"a\u035Eb\u0316\u035E", 8, []Weights{w(115), w(0, 220)}},
|
||||
{"a\u035Db\u0316\u035D", 8, []Weights{w(117), w(0, 220)}},
|
||||
{"ac\u035Eaca\u035E", 9, []Weights{w(116)}},
|
||||
{"a\u0316c\u035Eaca\u035E", 1, []Weights{w(100)}},
|
||||
{"ac\u035Eac\u0316a\u035E", 1, []Weights{w(100)}},
|
||||
{"a\u035Eb\u035E", 6, ColElems{w(115)}},
|
||||
{"a\u0316\u035Eb\u035E", 5, ColElems{w(110), w(0, 220)}},
|
||||
{"a\u035Db\u035D", 6, ColElems{w(117)}},
|
||||
{"a\u0316\u035Db\u035D", 1, ColElems{w(100)}},
|
||||
{"a\u035Eb\u0316\u035E", 8, ColElems{w(115), w(0, 220)}},
|
||||
{"a\u035Db\u0316\u035D", 8, ColElems{w(117), w(0, 220)}},
|
||||
{"ac\u035Eaca\u035E", 9, ColElems{w(116)}},
|
||||
{"a\u0316c\u035Eaca\u035E", 1, ColElems{w(100)}},
|
||||
{"ac\u035Eac\u0316a\u035E", 1, ColElems{w(100)}},
|
||||
|
||||
// expanding contraction
|
||||
{"\u03B1\u0345", 4, []Weights{w(901), w(902)}},
|
||||
{"\u03B1\u0345", 4, ColElems{w(901), w(902)}},
|
||||
|
||||
// Theoretical possibilities
|
||||
// contraction within a gap
|
||||
{"a\u302F\u18A9\u0301", 9, []Weights{w(102), w(0, 130)}},
|
||||
{"a\u302F\u18A9\u0301", 9, ColElems{w(102), w(0, 130)}},
|
||||
// expansion within a gap
|
||||
{"a\u0317\u0301", 5, []Weights{w(102), w(0, 220), w(0, 220)}},
|
||||
{"a\u302E\u18A9\u0301", 9, []Weights{w(102), w(0, 131), w(0, 132)}},
|
||||
{"a\u0317\u0301", 5, ColElems{w(102), w(0, 220), w(0, 220)}},
|
||||
{"a\u302E\u18A9\u0301", 9, ColElems{w(102), w(0, 131), w(0, 132)}},
|
||||
{
|
||||
"a\u0317\u302E\u18A9\u0301",
|
||||
11,
|
||||
[]Weights{w(102), w(0, 220), w(0, 220), w(0, 131), w(0, 132)},
|
||||
ColElems{w(102), w(0, 220), w(0, 220), w(0, 131), w(0, 132)},
|
||||
},
|
||||
},
|
||||
},
|
||||
@ -269,7 +271,7 @@ func TestAppendNext(t *testing.T) {
|
||||
continue
|
||||
}
|
||||
for k, w := range ws {
|
||||
if w != chk.out[k].Weights {
|
||||
if w != chk.out[k] {
|
||||
t.Errorf("%d:%d: Weights %d was %v; want %v", i, j, k, w, chk.out[k])
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user