mirror of
https://github.com/golang/go
synced 2024-11-26 06:38:00 -07:00
exp/locale/collate: changed API of Builder to be more convenient
for dealing with CLDR files: - Add now taxes a list of indexes of colelems that are variables. Checking and handling is now done by the Builder. VariableTop is now also properly generated using the Build method. - Introduced separate Builder, called Tailoring, for creating tailorings of root table. This clearly separates the functionality for building a table based on weights (the allkeys* files) versus tables based on LDML XML files. - Tailorings are now added by two calls instead of one: SetAnchor and Insert. This more closely reflects the structure of LDML side and simplifies the implementation of both the client and library side. It also preserves some information that is otherwise hard to recover for the Builder. - Allow the LDML XML element extend to be passed to Insert. This simplifies both client and library implementation. R=r CC=golang-dev https://golang.org/cl/6454061
This commit is contained in:
parent
b9b29ce2ba
commit
89d40b911c
@ -12,6 +12,7 @@ import (
|
|||||||
"log"
|
"log"
|
||||||
"sort"
|
"sort"
|
||||||
"strings"
|
"strings"
|
||||||
|
"unicode/utf8"
|
||||||
)
|
)
|
||||||
|
|
||||||
// TODO: optimizations:
|
// TODO: optimizations:
|
||||||
@ -22,7 +23,11 @@ import (
|
|||||||
// - trie valueBlocks are currently 100K. There are a lot of sparse blocks
|
// - trie valueBlocks are currently 100K. There are a lot of sparse blocks
|
||||||
// and many consecutive values with the same stride. This can be further
|
// and many consecutive values with the same stride. This can be further
|
||||||
// compacted.
|
// compacted.
|
||||||
// - compress secondary weights into 8 bits.
|
// - Compress secondary weights into 8 bits.
|
||||||
|
// - Some LDML specs specify a context element. Currently we simply concatenate
|
||||||
|
// those. Context can be implemented using the contraction trie. If Builder
|
||||||
|
// could analyze and detect when using a context makes sense, there is no
|
||||||
|
// need to expose this construct in the API.
|
||||||
|
|
||||||
// entry is used to keep track of a single entry in the collation element table
|
// entry is used to keep track of a single entry in the collation element table
|
||||||
// during building. Examples of entries can be found in the Default Unicode
|
// during building. Examples of entries can be found in the Default Unicode
|
||||||
@ -60,18 +65,30 @@ func (e *entry) contractionStarter() bool {
|
|||||||
return e.contractionHandle.n != 0
|
return e.contractionHandle.n != 0
|
||||||
}
|
}
|
||||||
|
|
||||||
// A Builder builds collation tables. It can generate both the root table and
|
// A Builder builds a root collation table. The user must specify the
|
||||||
// locale-specific tables defined as tailorings to the root table.
|
// collation elements for each entry. A common use will be to base the weights
|
||||||
// The typical use case is to specify the data for the root table and all locale-specific
|
// on those specified in the allkeys* file as provided by the UCA or CLDR.
|
||||||
// tables using Add and AddTailoring before making any call to Build. This allows
|
|
||||||
// Builder to ensure that a root table can support tailorings for each locale.
|
|
||||||
type Builder struct {
|
type Builder struct {
|
||||||
index *trieBuilder
|
index *trieBuilder
|
||||||
|
locale []*Tailoring
|
||||||
entryMap map[string]*entry
|
entryMap map[string]*entry
|
||||||
entry []*entry
|
entry []*entry
|
||||||
t *table
|
t *table
|
||||||
err error
|
err error
|
||||||
built bool
|
built bool
|
||||||
|
|
||||||
|
minNonVar int // lowest primary recorded for a variable
|
||||||
|
varTop int // highest primary recorded for a non-variable
|
||||||
|
}
|
||||||
|
|
||||||
|
// A Tailoring builds a collation table based on another collation table.
|
||||||
|
// The table is defined by specifying tailorings to the underlying table.
|
||||||
|
// See http://unicode.org/reports/tr35/ for an overview of tailoring
|
||||||
|
// collation tables. The CLDR contains pre-defined tailorings for a variety
|
||||||
|
// of languages (See http://www.unicode.org/Public/cldr/2.0.1/core.zip.)
|
||||||
|
type Tailoring struct {
|
||||||
|
id string
|
||||||
|
// TODO: implement.
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewBuilder returns a new Builder.
|
// NewBuilder returns a new Builder.
|
||||||
@ -83,14 +100,26 @@ func NewBuilder() *Builder {
|
|||||||
return b
|
return b
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add adds an entry for the root collation element table, mapping
|
// Tailoring returns a Tailoring for the given locale. One should
|
||||||
|
// have completed all calls to Add before calling Tailoring.
|
||||||
|
func (b *Builder) Tailoring(locale string) *Tailoring {
|
||||||
|
t := &Tailoring{
|
||||||
|
id: locale,
|
||||||
|
}
|
||||||
|
b.locale = append(b.locale, t)
|
||||||
|
return t
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add adds an entry to the collation element table, mapping
|
||||||
// a slice of runes to a sequence of collation elements.
|
// a slice of runes to a sequence of collation elements.
|
||||||
// A collation element is specified as list of weights: []int{primary, secondary, ...}.
|
// A collation element is specified as list of weights: []int{primary, secondary, ...}.
|
||||||
// The entries are typically obtained from a collation element table
|
// The entries are typically obtained from a collation element table
|
||||||
// as defined in http://www.unicode.org/reports/tr10/#Data_Table_Format.
|
// as defined in http://www.unicode.org/reports/tr10/#Data_Table_Format.
|
||||||
// Note that the collation elements specified by colelems are only used
|
// Note that the collation elements specified by colelems are only used
|
||||||
// as a guide. The actual weights generated by Builder may differ.
|
// as a guide. The actual weights generated by Builder may differ.
|
||||||
func (b *Builder) Add(str []rune, colelems [][]int) error {
|
// The argument variables is a list of indices into colelems that should contain
|
||||||
|
// a value for each colelem that is a variable. (See the reference above.)
|
||||||
|
func (b *Builder) Add(str []rune, colelems [][]int, variables []int) error {
|
||||||
e := &entry{
|
e := &entry{
|
||||||
runes: make([]rune, len(str)),
|
runes: make([]rune, len(str)),
|
||||||
elems: make([][]int, len(colelems)),
|
elems: make([][]int, len(colelems)),
|
||||||
@ -113,6 +142,29 @@ func (b *Builder) Add(str []rune, colelems [][]int) error {
|
|||||||
e.elems[i] = append(e.elems[i], ce[0])
|
e.elems[i] = append(e.elems[i], ce[0])
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
for i, ce := range e.elems {
|
||||||
|
isvar := false
|
||||||
|
for _, j := range variables {
|
||||||
|
if i == j {
|
||||||
|
isvar = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if isvar {
|
||||||
|
if ce[0] >= b.minNonVar && b.minNonVar > 0 {
|
||||||
|
return fmt.Errorf("primary value %X of variable is larger than the smallest non-variable %X", ce[0], b.minNonVar)
|
||||||
|
}
|
||||||
|
if ce[0] > b.varTop {
|
||||||
|
b.varTop = ce[0]
|
||||||
|
}
|
||||||
|
} else if ce[0] > 0 {
|
||||||
|
if ce[0] <= b.varTop {
|
||||||
|
return fmt.Errorf("primary value %X of non-variable is smaller than the highest variable %X", ce[0], b.varTop)
|
||||||
|
}
|
||||||
|
if b.minNonVar == 0 || ce[0] < b.minNonVar {
|
||||||
|
b.minNonVar = ce[0]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
elems, err := convertLargeWeights(e.elems)
|
elems, err := convertLargeWeights(e.elems)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
@ -123,13 +175,57 @@ func (b *Builder) Add(str []rune, colelems [][]int) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// AddTailoring defines a tailoring x <_level y for the given locale.
|
// SetAnchor sets the point after which elements passed in subsequent calls to
|
||||||
// For example, AddTailoring("se", "z", "ä", Primary) sorts "ä" after "z"
|
// Insert will be inserted. It is equivalent to the reset directive in an LDML
|
||||||
// at the primary level for Swedish. AddTailoring("de", "ue", "ü", Secondary)
|
// specification. See Insert for an example.
|
||||||
// sorts "ü" after "ue" at the secondary level for German.
|
// SetAnchor supports the following logical reset positions:
|
||||||
|
// <first_tertiary_ignorable/>, <last_teriary_ignorable/>, <first_primary_ignorable/>,
|
||||||
|
// and <last_non_ignorable/>.
|
||||||
|
func (t *Tailoring) SetAnchor(anchor string) error {
|
||||||
|
// TODO: implement.
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// SetAnchorBefore is similar to SetAnchor, except that subsequent calls to
|
||||||
|
// Insert will insert entries before the anchor.
|
||||||
|
func (t *Tailoring) SetAnchorBefore(anchor string) error {
|
||||||
|
// TODO: implement.
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Insert sets the ordering of str relative to the entry set by the previous
|
||||||
|
// call to SetAnchor or Insert. The argument extend corresponds
|
||||||
|
// to the extend elements as defined in LDML. A non-empty value for extend
|
||||||
|
// will cause the collation elements corresponding to extend to be appended
|
||||||
|
// to the collation elements generated for the entry added by Insert.
|
||||||
|
// This has the same net effect as sorting str after the string anchor+extend.
|
||||||
// See http://www.unicode.org/reports/tr10/#Tailoring_Example for details
|
// See http://www.unicode.org/reports/tr10/#Tailoring_Example for details
|
||||||
// on parametric tailoring.
|
// on parametric tailoring and http://unicode.org/reports/tr35/#Collation_Elements
|
||||||
func (b *Builder) AddTailoring(locale, x, y string, l collate.Level) error {
|
// for full details on LDML.
|
||||||
|
//
|
||||||
|
// Examples: create a tailoring for Swedish, where "ä" is ordered after "z"
|
||||||
|
// at the primary sorting level:
|
||||||
|
// t := b.Tailoring("se")
|
||||||
|
// t.SetAnchor("z")
|
||||||
|
// t.Insert(collate.Primary, "ä", "")
|
||||||
|
// Order "ü" after "ue" at the secondary sorting level:
|
||||||
|
// t.SetAnchor("ue")
|
||||||
|
// t.Insert(collate.Secondary, "ü","")
|
||||||
|
// or
|
||||||
|
// t.SetAnchor("u")
|
||||||
|
// t.Insert(collate.Secondary, "ü", "e")
|
||||||
|
// Order "q" afer "ab" at the secondary level and "Q" after "q"
|
||||||
|
// at the tertiary level:
|
||||||
|
// t.SetAnchor("ab")
|
||||||
|
// t.Insert(collate.Secondary, "q", "")
|
||||||
|
// t.Insert(collate.Tertiary, "Q", "")
|
||||||
|
// Order "b" before "a":
|
||||||
|
// t.SetAnchorBefore("a")
|
||||||
|
// t.Insert(collate.Primary, "b", "")
|
||||||
|
// Order "0" after the last primary ignorable:
|
||||||
|
// t.SetAnchor("<last_primary_ignorable/>")
|
||||||
|
// t.Insert(collate.Primary, "0", "")
|
||||||
|
func (t *Tailoring) Insert(level collate.Level, str, extend string) error {
|
||||||
// TODO: implement.
|
// TODO: implement.
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@ -189,7 +285,10 @@ func (b *Builder) error(e error) {
|
|||||||
func (b *Builder) build() (*table, error) {
|
func (b *Builder) build() (*table, error) {
|
||||||
if !b.built {
|
if !b.built {
|
||||||
b.built = true
|
b.built = true
|
||||||
b.t = &table{}
|
b.t = &table{
|
||||||
|
maxContractLen: utf8.UTFMax,
|
||||||
|
variableTop: uint32(b.varTop),
|
||||||
|
}
|
||||||
|
|
||||||
b.simplify()
|
b.simplify()
|
||||||
b.processExpansions() // requires simplify
|
b.processExpansions() // requires simplify
|
||||||
@ -202,18 +301,23 @@ func (b *Builder) build() (*table, error) {
|
|||||||
return b.t, nil
|
return b.t, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Build builds a Collator for the given locale. To build the root table, set locale to "".
|
// Build builds the root Collator.
|
||||||
func (b *Builder) Build(locale string) (*collate.Collator, error) {
|
func (b *Builder) Build() (*collate.Collator, error) {
|
||||||
t, err := b.build()
|
t, err := b.build()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
// TODO: support multiple locales
|
|
||||||
return collate.Init(t), nil
|
return collate.Init(t), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Print prints all tables to a Go file that can be included in
|
// Build builds a Collator for Tailoring t.
|
||||||
// the Collate package.
|
func (t *Tailoring) Build() (*collate.Collator, error) {
|
||||||
|
// TODO: implement.
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Print prints the tables for b and all its Tailorings as a Go file
|
||||||
|
// that can be included in the Collate package.
|
||||||
func (b *Builder) Print(w io.Writer) (int, error) {
|
func (b *Builder) Print(w io.Writer) (int, error) {
|
||||||
t, err := b.build()
|
t, err := b.build()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -48,7 +48,7 @@ type ducetElem struct {
|
|||||||
func newBuilder(t *testing.T, ducet []ducetElem) *Builder {
|
func newBuilder(t *testing.T, ducet []ducetElem) *Builder {
|
||||||
b := NewBuilder()
|
b := NewBuilder()
|
||||||
for _, e := range ducet {
|
for _, e := range ducet {
|
||||||
if err := b.Add([]rune(e.str), e.ces); err != nil {
|
if err := b.Add([]rune(e.str), e.ces, nil); err != nil {
|
||||||
t.Errorf(err.Error())
|
t.Errorf(err.Error())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -23,6 +23,7 @@ type table struct {
|
|||||||
contractTries contractTrieSet
|
contractTries contractTrieSet
|
||||||
contractElem []uint32
|
contractElem []uint32
|
||||||
maxContractLen int
|
maxContractLen int
|
||||||
|
variableTop uint32
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *table) TrieIndex() []uint16 {
|
func (t *table) TrieIndex() []uint16 {
|
||||||
@ -53,6 +54,10 @@ func (t *table) MaxContractLen() int {
|
|||||||
return t.maxContractLen
|
return t.maxContractLen
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (t *table) VariableTop() uint32 {
|
||||||
|
return t.variableTop
|
||||||
|
}
|
||||||
|
|
||||||
// print writes the table as Go compilable code to w. It prefixes the
|
// print writes the table as Go compilable code to w. It prefixes the
|
||||||
// variable names with name. It returns the number of bytes written
|
// variable names with name. It returns the number of bytes written
|
||||||
// and the size of the resulting table.
|
// and the size of the resulting table.
|
||||||
@ -78,6 +83,7 @@ func (t *table) fprint(w io.Writer, name string) (n, size int, err error) {
|
|||||||
p(",\n")
|
p(",\n")
|
||||||
p("%sContractElem[:],\n", name)
|
p("%sContractElem[:],\n", name)
|
||||||
p("%d,\n", t.maxContractLen)
|
p("%d,\n", t.maxContractLen)
|
||||||
|
p("0x%X,\n", t.variableTop)
|
||||||
p("}\n\n")
|
p("}\n\n")
|
||||||
|
|
||||||
// Write arrays needed for the structure.
|
// Write arrays needed for the structure.
|
||||||
|
@ -55,9 +55,6 @@ const (
|
|||||||
// Collator provides functionality for comparing strings for a given
|
// Collator provides functionality for comparing strings for a given
|
||||||
// collation order.
|
// collation order.
|
||||||
type Collator struct {
|
type Collator struct {
|
||||||
// See SetVariableTop.
|
|
||||||
variableTop uint32
|
|
||||||
|
|
||||||
// Strength sets the maximum level to use in comparison.
|
// Strength sets the maximum level to use in comparison.
|
||||||
Strength Level
|
Strength Level
|
||||||
|
|
||||||
@ -178,7 +175,7 @@ func (c *Collator) KeyFromString(buf *Buffer, str string) []byte {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (c *Collator) key(buf *Buffer, w []weights) []byte {
|
func (c *Collator) key(buf *Buffer, w []weights) []byte {
|
||||||
processWeights(c.Alternate, c.variableTop, w)
|
processWeights(c.Alternate, c.t.variableTop, w)
|
||||||
kn := len(buf.key)
|
kn := len(buf.key)
|
||||||
c.keyFromElems(buf, w)
|
c.keyFromElems(buf, w)
|
||||||
return buf.key[kn:]
|
return buf.key[kn:]
|
||||||
|
@ -4,6 +4,8 @@
|
|||||||
|
|
||||||
package collate
|
package collate
|
||||||
|
|
||||||
|
import "exp/norm"
|
||||||
|
|
||||||
// Init is used by type Builder in exp/locale/collate/build/
|
// Init is used by type Builder in exp/locale/collate/build/
|
||||||
// to create Collator instances. It is for internal use only.
|
// to create Collator instances. It is for internal use only.
|
||||||
func Init(data interface{}) *Collator {
|
func Init(data interface{}) *Collator {
|
||||||
@ -21,7 +23,12 @@ func Init(data interface{}) *Collator {
|
|||||||
t.contractTries = init.ContractTries()
|
t.contractTries = init.ContractTries()
|
||||||
t.contractElem = init.ContractElems()
|
t.contractElem = init.ContractElems()
|
||||||
t.maxContractLen = init.MaxContractLen()
|
t.maxContractLen = init.MaxContractLen()
|
||||||
return &Collator{t: t}
|
t.variableTop = init.VariableTop()
|
||||||
|
return &Collator{
|
||||||
|
Strength: Quaternary,
|
||||||
|
f: norm.NFD,
|
||||||
|
t: t,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
type tableInitializer interface {
|
type tableInitializer interface {
|
||||||
@ -32,4 +39,5 @@ type tableInitializer interface {
|
|||||||
ContractTries() []struct{ l, h, n, i uint8 }
|
ContractTries() []struct{ l, h, n, i uint8 }
|
||||||
ContractElems() []uint32
|
ContractElems() []uint32
|
||||||
MaxContractLen() int
|
MaxContractLen() int
|
||||||
|
VariableTop() uint32
|
||||||
}
|
}
|
||||||
|
@ -7,7 +7,6 @@ package collate
|
|||||||
// Export for testing.
|
// Export for testing.
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"exp/norm"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -63,18 +62,14 @@ func (t *Table) AppendNext(s []byte) ([]Weights, int) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func SetTop(c *Collator, top int) {
|
func SetTop(c *Collator, top int) {
|
||||||
c.variableTop = uint32(top)
|
if c.t == nil {
|
||||||
|
c.t = &table{}
|
||||||
}
|
}
|
||||||
|
c.t.variableTop = uint32(top)
|
||||||
func InitCollator(c *Collator) {
|
|
||||||
c.Strength = Quaternary
|
|
||||||
c.f = norm.NFD
|
|
||||||
c.t.maxContractLen = 30
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func GetColElems(c *Collator, buf *Buffer, str []byte) []Weights {
|
func GetColElems(c *Collator, buf *Buffer, str []byte) []Weights {
|
||||||
buf.ResetKeys()
|
buf.ResetKeys()
|
||||||
InitCollator(c)
|
|
||||||
c.getColElems(buf, str)
|
c.getColElems(buf, str)
|
||||||
return convertToWeights(buf.ce)
|
return convertToWeights(buf.ce)
|
||||||
}
|
}
|
||||||
|
@ -33,7 +33,7 @@ var localFiles = flag.Bool("local",
|
|||||||
false,
|
false,
|
||||||
"data files have been copied to the current directory; for debugging only")
|
"data files have been copied to the current directory; for debugging only")
|
||||||
|
|
||||||
func failonerror(e error) {
|
func failOnError(e error) {
|
||||||
if e != nil {
|
if e != nil {
|
||||||
log.Fatal(e)
|
log.Fatal(e)
|
||||||
}
|
}
|
||||||
@ -62,10 +62,9 @@ func openReader(url string) (io.ReadCloser, error) {
|
|||||||
// parseUCA parses a Default Unicode Collation Element Table of the format
|
// parseUCA parses a Default Unicode Collation Element Table of the format
|
||||||
// specified in http://www.unicode.org/reports/tr10/#File_Format.
|
// specified in http://www.unicode.org/reports/tr10/#File_Format.
|
||||||
// It returns the variable top.
|
// It returns the variable top.
|
||||||
func parseUCA(builder *build.Builder) int {
|
func parseUCA(builder *build.Builder) {
|
||||||
maxVar, minNonVar := 0, 1<<30
|
|
||||||
r, err := openReader(*ducet)
|
r, err := openReader(*ducet)
|
||||||
failonerror(err)
|
failOnError(err)
|
||||||
defer r.Close()
|
defer r.Close()
|
||||||
input := bufio.NewReader(r)
|
input := bufio.NewReader(r)
|
||||||
colelem := regexp.MustCompile(`\[([.*])([0-9A-F.]+)\]`)
|
colelem := regexp.MustCompile(`\[([.*])([0-9A-F.]+)\]`)
|
||||||
@ -109,32 +108,25 @@ func parseUCA(builder *build.Builder) int {
|
|||||||
lhs = append(lhs, rune(convHex(i, v)))
|
lhs = append(lhs, rune(convHex(i, v)))
|
||||||
}
|
}
|
||||||
var n int
|
var n int
|
||||||
|
var vars []int
|
||||||
rhs := [][]int{}
|
rhs := [][]int{}
|
||||||
for _, m := range colelem.FindAllStringSubmatch(part[1], -1) {
|
for i, m := range colelem.FindAllStringSubmatch(part[1], -1) {
|
||||||
n += len(m[0])
|
n += len(m[0])
|
||||||
elem := []int{}
|
elem := []int{}
|
||||||
for _, h := range strings.Split(m[2], ".") {
|
for _, h := range strings.Split(m[2], ".") {
|
||||||
elem = append(elem, convHex(i, h))
|
elem = append(elem, convHex(i, h))
|
||||||
}
|
}
|
||||||
if p := elem[0]; m[1] == "*" {
|
if m[1] == "*" {
|
||||||
if p > maxVar {
|
vars = append(vars, i)
|
||||||
maxVar = p
|
|
||||||
}
|
|
||||||
} else if p > 0 && p < minNonVar {
|
|
||||||
minNonVar = p
|
|
||||||
}
|
}
|
||||||
rhs = append(rhs, elem)
|
rhs = append(rhs, elem)
|
||||||
}
|
}
|
||||||
if len(part[1]) < n+3 || part[1][n+1] != '#' {
|
if len(part[1]) < n+3 || part[1][n+1] != '#' {
|
||||||
log.Fatalf("%d: expected comment; found %s", i, part[1][n:])
|
log.Fatalf("%d: expected comment; found %s", i, part[1][n:])
|
||||||
}
|
}
|
||||||
builder.Add(lhs, rhs)
|
failOnError(builder.Add(lhs, rhs, vars))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if maxVar >= minNonVar {
|
|
||||||
log.Fatalf("found maxVar > minNonVar (%d > %d)", maxVar, minNonVar)
|
|
||||||
}
|
|
||||||
return maxVar
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func convHex(line int, s string) int {
|
func convHex(line int, s string) int {
|
||||||
@ -146,11 +138,10 @@ func convHex(line int, s string) int {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// TODO: move this functionality to exp/locale/collate/build.
|
// TODO: move this functionality to exp/locale/collate/build.
|
||||||
func printCollators(c *collate.Collator, vartop int) {
|
func printCollators(c *collate.Collator) {
|
||||||
const name = "Root"
|
const name = "Root"
|
||||||
fmt.Printf("var _%s = Collator{\n", name)
|
fmt.Printf("var _%s = Collator{\n", name)
|
||||||
fmt.Printf("\tStrength: %v,\n", c.Strength)
|
fmt.Printf("\tStrength: %v,\n", c.Strength)
|
||||||
fmt.Printf("\tvariableTop: 0x%X,\n", vartop)
|
|
||||||
fmt.Printf("\tf: norm.NFD,\n")
|
fmt.Printf("\tf: norm.NFD,\n")
|
||||||
fmt.Printf("\tt: &%sTable,\n", strings.ToLower(name))
|
fmt.Printf("\tt: &%sTable,\n", strings.ToLower(name))
|
||||||
fmt.Printf("}\n\n")
|
fmt.Printf("}\n\n")
|
||||||
@ -162,9 +153,9 @@ func printCollators(c *collate.Collator, vartop int) {
|
|||||||
func main() {
|
func main() {
|
||||||
flag.Parse()
|
flag.Parse()
|
||||||
b := build.NewBuilder()
|
b := build.NewBuilder()
|
||||||
vartop := parseUCA(b)
|
parseUCA(b)
|
||||||
_, err := b.Build("")
|
c, err := b.Build()
|
||||||
failonerror(err)
|
failOnError(err)
|
||||||
|
|
||||||
fmt.Println("// Generated by running")
|
fmt.Println("// Generated by running")
|
||||||
fmt.Printf("// maketables --ducet=%s\n", *ducet)
|
fmt.Printf("// maketables --ducet=%s\n", *ducet)
|
||||||
@ -176,10 +167,8 @@ func main() {
|
|||||||
fmt.Println(`import "exp/norm"`)
|
fmt.Println(`import "exp/norm"`)
|
||||||
fmt.Println("")
|
fmt.Println("")
|
||||||
|
|
||||||
c := &collate.Collator{}
|
printCollators(c)
|
||||||
c.Strength = collate.Quaternary
|
|
||||||
printCollators(c, vartop)
|
|
||||||
|
|
||||||
_, err = b.Print(os.Stdout)
|
_, err = b.Print(os.Stdout)
|
||||||
failonerror(err)
|
failOnError(err)
|
||||||
}
|
}
|
||||||
|
@ -20,6 +20,7 @@ type table struct {
|
|||||||
contractTries contractTrieSet
|
contractTries contractTrieSet
|
||||||
contractElem []uint32
|
contractElem []uint32
|
||||||
maxContractLen int
|
maxContractLen int
|
||||||
|
variableTop uint32
|
||||||
}
|
}
|
||||||
|
|
||||||
// appendNext appends the weights corresponding to the next rune or
|
// appendNext appends the weights corresponding to the next rune or
|
||||||
|
@ -42,14 +42,9 @@ func pt(p, t int) []int {
|
|||||||
func makeTable(in []input) (*collate.Collator, error) {
|
func makeTable(in []input) (*collate.Collator, error) {
|
||||||
b := build.NewBuilder()
|
b := build.NewBuilder()
|
||||||
for _, r := range in {
|
for _, r := range in {
|
||||||
b.Add([]rune(r.str), r.ces)
|
b.Add([]rune(r.str), r.ces, nil)
|
||||||
}
|
}
|
||||||
c, err := b.Build("")
|
return b.Build()
|
||||||
if c == nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
collate.InitCollator(c)
|
|
||||||
return c, err
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// modSeq holds a seqeunce of modifiers in increasing order of CCC long enough
|
// modSeq holds a seqeunce of modifiers in increasing order of CCC long enough
|
||||||
|
@ -9,7 +9,6 @@ import "exp/norm"
|
|||||||
|
|
||||||
var _Root = Collator{
|
var _Root = Collator{
|
||||||
Strength: 3,
|
Strength: 3,
|
||||||
variableTop: 0x1560,
|
|
||||||
f: norm.NFD,
|
f: norm.NFD,
|
||||||
t: &rootTable,
|
t: &rootTable,
|
||||||
}
|
}
|
||||||
@ -24,6 +23,7 @@ var rootTable = table{
|
|||||||
contractTrieSet(rootCTEntries[:]),
|
contractTrieSet(rootCTEntries[:]),
|
||||||
rootContractElem[:],
|
rootContractElem[:],
|
||||||
9,
|
9,
|
||||||
|
0x1560,
|
||||||
}
|
}
|
||||||
|
|
||||||
// rootExpandElem: 4630 entries, 18520 bytes
|
// rootExpandElem: 4630 entries, 18520 bytes
|
||||||
|
Loading…
Reference in New Issue
Block a user