mirror of
https://github.com/golang/go
synced 2024-11-21 19:54:41 -07:00
exp/locale/collate: changed API to allow access to different locales through New(),
instead of variables. Several reasons: - Encourage users of the API to minimize the number of creations and reuse Collate objects. - Don't rule out the possibility of using initialization code for collators. For some locales it will be possible to have very compact representations that can be quickly expanded into a proper table on demand. Other changes: - Change name of root* vars to main*, as the tables are shared between locales. - Added Locales() method to get a list of supported locales. R=r CC=golang-dev https://golang.org/cl/6498107
This commit is contained in:
parent
db3c800d19
commit
a4d08ed5df
@ -263,14 +263,31 @@ func (t *Tailoring) Build() (*collate.Collator, error) {
|
||||
|
||||
// Print prints the tables for b and all its Tailorings as a Go file
|
||||
// that can be included in the Collate package.
|
||||
func (b *Builder) Print(w io.Writer) (int, error) {
|
||||
func (b *Builder) Print(w io.Writer) (n int, err error) {
|
||||
p := func(nn int, e error) {
|
||||
n += nn
|
||||
if err == nil {
|
||||
err = e
|
||||
}
|
||||
}
|
||||
t, err := b.build()
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
// TODO: support multiple locales
|
||||
n, _, err := t.fprint(w, "root")
|
||||
return n, err
|
||||
p(fmt.Fprintf(w, "var availableLocales = []string{"))
|
||||
for _, loc := range b.locale {
|
||||
p(fmt.Fprintf(w, "%q, ", loc.id))
|
||||
}
|
||||
p(fmt.Fprintln(w, "}\n"))
|
||||
p(fmt.Fprintln(w, "var locales = map[string]tableIndex{"))
|
||||
for _, loc := range b.locale {
|
||||
p(fmt.Fprintf(w, "\t%q: ", loc.id))
|
||||
p(t.fprintIndex(w, loc.index.handle))
|
||||
p(fmt.Fprintln(w, ","))
|
||||
}
|
||||
p(fmt.Fprint(w, "}\n\n"))
|
||||
n, _, err = t.fprint(w, "main")
|
||||
return
|
||||
}
|
||||
|
||||
// reproducibleFromNFKD checks whether the given expansion could be generated
|
||||
|
@ -96,6 +96,21 @@ func (t *table) fprint(w io.Writer, name string) (n, size int, err error) {
|
||||
return
|
||||
}
|
||||
|
||||
func (t *table) fprintIndex(w io.Writer, h *trieHandle) (n int, err error) {
|
||||
p := func(f string, a ...interface{}) {
|
||||
nn, e := fmt.Fprintf(w, f, a...)
|
||||
n += nn
|
||||
if err == nil {
|
||||
err = e
|
||||
}
|
||||
}
|
||||
p("tableIndex{\n")
|
||||
p("\t\tlookupOffset: 0x%x,\n", h.lookupStart)
|
||||
p("\t\tvaluesOffset: 0x%x,\n", h.valueStart)
|
||||
p("\t}")
|
||||
return
|
||||
}
|
||||
|
||||
func printColElems(w io.Writer, a []uint32, name string) (n, sz int, err error) {
|
||||
p := func(f string, a ...interface{}) {
|
||||
nn, e := fmt.Fprintf(w, f, a...)
|
||||
|
@ -85,6 +85,27 @@ type Collator struct {
|
||||
t *table
|
||||
}
|
||||
|
||||
// Locales returns the list of locales for which collating differs from its parent locale.
|
||||
func Locales() []string {
|
||||
return availableLocales
|
||||
}
|
||||
|
||||
// New returns a new Collator initialized for the given locale.
|
||||
func New(loc string) *Collator {
|
||||
// TODO: handle locale selection according to spec.
|
||||
t := &mainTable
|
||||
if loc != "" {
|
||||
if idx, ok := locales[loc]; ok {
|
||||
t = mainTable.indexedTable(idx)
|
||||
}
|
||||
}
|
||||
return &Collator{
|
||||
Strength: Quaternary,
|
||||
f: norm.NFD,
|
||||
t: t,
|
||||
}
|
||||
}
|
||||
|
||||
// SetVariableTop sets all runes with primary strength less than the primary
|
||||
// strength of r to be variable and thus affected by alternate handling.
|
||||
func (c *Collator) SetVariableTop(r rune) {
|
||||
|
@ -658,7 +658,7 @@ func insertCollation(builder *build.Builder, locale string, c *Collation) {
|
||||
}
|
||||
|
||||
func testCollator(c *collate.Collator) {
|
||||
c0 := collate.Root
|
||||
c0 := collate.New("")
|
||||
|
||||
// iterator over all characters for all locales and check
|
||||
// whether Key is equal.
|
||||
@ -682,19 +682,6 @@ func testCollator(c *collate.Collator) {
|
||||
fmt.Println("PASS")
|
||||
}
|
||||
|
||||
// TODO: move this functionality to exp/locale/collate/build.
|
||||
func printCollators(c *collate.Collator) {
|
||||
const name = "Root"
|
||||
fmt.Printf("var _%s = Collator{\n", name)
|
||||
fmt.Printf("\tStrength: %v,\n", c.Strength)
|
||||
fmt.Printf("\tf: norm.NFD,\n")
|
||||
fmt.Printf("\tt: &%sTable,\n", strings.ToLower(name))
|
||||
fmt.Printf("}\n\n")
|
||||
fmt.Printf("var (\n")
|
||||
fmt.Printf("\t%s = _%s\n", name, name)
|
||||
fmt.Printf(")\n\n")
|
||||
}
|
||||
|
||||
func main() {
|
||||
flag.Parse()
|
||||
b := build.NewBuilder()
|
||||
@ -725,9 +712,6 @@ func main() {
|
||||
fmt.Printf("package %s\n", *pkg)
|
||||
if tables.contains("collate") {
|
||||
fmt.Println("")
|
||||
fmt.Println(`import "exp/norm"`)
|
||||
fmt.Println("")
|
||||
printCollators(c)
|
||||
_, err = b.Print(os.Stdout)
|
||||
failOnError(err)
|
||||
}
|
||||
|
@ -9,6 +9,13 @@ import (
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// tableIndex holds information for constructing a table
|
||||
// for a certain locale based on the main table.
|
||||
type tableIndex struct {
|
||||
lookupOffset uint32
|
||||
valuesOffset uint32
|
||||
}
|
||||
|
||||
// table holds all collation data for a given collation ordering.
|
||||
type table struct {
|
||||
index trie // main trie
|
||||
@ -23,6 +30,13 @@ type table struct {
|
||||
variableTop uint32
|
||||
}
|
||||
|
||||
func (t *table) indexedTable(idx tableIndex) *table {
|
||||
nt := *t
|
||||
nt.index.index0 = t.index.index[idx.lookupOffset*blockSize:]
|
||||
nt.index.values0 = t.index.values[idx.valuesOffset*blockSize:]
|
||||
return &nt
|
||||
}
|
||||
|
||||
// appendNext appends the weights corresponding to the next rune or
|
||||
// contraction in s. If a contraction is matched to a discontinuous
|
||||
// sequence of runes, the weights for the interstitial runes are
|
||||
|
@ -5,29 +5,330 @@
|
||||
|
||||
package collate
|
||||
|
||||
import "exp/norm"
|
||||
var availableLocales = []string{"af", "ar", "as", "az", "be", "bg", "bn", "ca", "cs", "cy", "da", "de", "dz", "el", "en_US_POSIX", "eo", "es", "et", "fa", "fi", "fil", "fo", "fr_CA", "gu", "ha", "haw", "he", "hi", "hr", "hu", "hy", "ig", "is", "ja", "kk", "kl", "km", "kn", "ko", "kok", "ln", "lt", "lv", "mk", "ml", "mr", "mt", "my", "nb", "nn", "nso", "om", "or", "pa", "pl", "ps", "ro", "root", "ru", "se", "si", "sk", "sl", "sq", "sr", "sv", "ta", "te", "th", "tn", "tr", "uk", "ur", "vi", "wae", "yo", "zh"}
|
||||
|
||||
var _Root = Collator{
|
||||
Strength: 3,
|
||||
f: norm.NFD,
|
||||
t: &rootTable,
|
||||
var locales = map[string]tableIndex{
|
||||
"af": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"ar": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"as": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"az": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"be": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"bg": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"bn": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"ca": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"cs": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"cy": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"da": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"de": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"dz": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"el": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"en_US_POSIX": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"eo": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"es": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"et": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"fa": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"fi": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"fil": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"fo": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"fr_CA": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"gu": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"ha": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"haw": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"he": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"hi": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"hr": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"hu": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"hy": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"ig": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"is": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"ja": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"kk": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"kl": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"km": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"kn": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"ko": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"kok": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"ln": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"lt": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"lv": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"mk": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"ml": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"mr": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"mt": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"my": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"nb": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"nn": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"nso": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"om": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"or": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"pa": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"pl": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"ps": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"ro": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"root": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"ru": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"se": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"si": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"sk": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"sl": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"sq": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"sr": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"sv": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"ta": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"te": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"th": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"tn": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"tr": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"uk": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"ur": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"vi": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"wae": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"yo": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
"zh": tableIndex{
|
||||
lookupOffset: 0x13,
|
||||
valuesOffset: 0x0,
|
||||
},
|
||||
}
|
||||
|
||||
var (
|
||||
Root = _Root
|
||||
)
|
||||
|
||||
var rootTable = table{
|
||||
trie{rootLookup[1216:], rootValues[0:], rootLookup[:], rootValues[:]},
|
||||
rootExpandElem[:],
|
||||
contractTrieSet(rootCTEntries[:]),
|
||||
rootContractElem[:],
|
||||
var mainTable = table{
|
||||
trie{mainLookup[1216:], mainValues[0:], mainLookup[:], mainValues[:]},
|
||||
mainExpandElem[:],
|
||||
contractTrieSet(mainCTEntries[:]),
|
||||
mainContractElem[:],
|
||||
9,
|
||||
0x2ED,
|
||||
}
|
||||
|
||||
// rootExpandElem: 4642 entries, 18568 bytes
|
||||
var rootExpandElem = [4642]uint32{
|
||||
// mainExpandElem: 4642 entries, 18568 bytes
|
||||
var mainExpandElem = [4642]uint32{
|
||||
// Block 0, offset 0x0
|
||||
0x00000002, 0x8000A31A, 0x8000AD1A, 0x00000002, 0x8000A51A, 0x8000AD1A,
|
||||
0x00000002, 0x8000A718, 0x8000AD18, 0x00000002, 0x8000A71A, 0x8000AD1A,
|
||||
@ -901,8 +1202,8 @@ var rootExpandElem = [4642]uint32{
|
||||
0x80015F1F, 0x00000002, 0x033F3804, 0x8001601F,
|
||||
}
|
||||
|
||||
// rootContractElem: 799 entries, 3196 bytes
|
||||
var rootContractElem = [799]uint32{
|
||||
// mainContractElem: 799 entries, 3196 bytes
|
||||
var mainContractElem = [799]uint32{
|
||||
// Block 0, offset 0x0
|
||||
0x4016C420, 0xE0000789, 0xE0000789, 0x002D8808, 0xE000078F, 0xE000078F,
|
||||
0x40194320, 0x40194720, 0x40194B20, 0x00328608, 0x00328E08, 0x00329608,
|
||||
@ -1056,9 +1357,9 @@ var rootContractElem = [799]uint32{
|
||||
0x40283220,
|
||||
}
|
||||
|
||||
// rootValues: 25408 entries, 101632 bytes
|
||||
// mainValues: 25408 entries, 101632 bytes
|
||||
// Block 2 is the null block.
|
||||
var rootValues = [25408]uint32{
|
||||
var mainValues = [25408]uint32{
|
||||
// Block 0x0, offset 0x0
|
||||
0x0000: 0x80000000, 0x0001: 0x80000000, 0x0002: 0x80000000, 0x0003: 0x80000000,
|
||||
0x0004: 0x80000000, 0x0005: 0x80000000, 0x0006: 0x80000000, 0x0007: 0x80000000,
|
||||
@ -7047,9 +7348,9 @@ var rootValues = [25408]uint32{
|
||||
0x632c: 0x80000000, 0x632d: 0x80000000, 0x632e: 0x80000000, 0x632f: 0x80000000,
|
||||
}
|
||||
|
||||
// rootLookup: 1472 entries, 2944 bytes
|
||||
// mainLookup: 1472 entries, 2944 bytes
|
||||
// Block 0 is the null block.
|
||||
var rootLookup = [1472]uint16{
|
||||
var mainLookup = [1472]uint16{
|
||||
// Block 0x0, offset 0x0
|
||||
// Block 0x1, offset 0x40
|
||||
// Block 0x2, offset 0x80
|
||||
@ -7144,8 +7445,8 @@ var rootLookup = [1472]uint16{
|
||||
0x5b0: 0x11, 0x5b3: 0x13,
|
||||
}
|
||||
|
||||
// rootCTEntries: 126 entries, 504 bytes
|
||||
var rootCTEntries = [126]struct{ l, h, n, i uint8 }{
|
||||
// mainCTEntries: 126 entries, 504 bytes
|
||||
var mainCTEntries = [126]struct{ l, h, n, i uint8 }{
|
||||
{0xCE, 0x1, 1, 255},
|
||||
{0xC2, 0x0, 1, 255},
|
||||
{0xB7, 0xB7, 0, 1},
|
||||
@ -7274,4 +7575,4 @@ var rootCTEntries = [126]struct{ l, h, n, i uint8 }{
|
||||
{0xB5, 0xB5, 0, 1},
|
||||
}
|
||||
|
||||
// Total size of rootTable is 126988 bytes
|
||||
// Total size of mainTable is 126988 bytes
|
||||
|
Loading…
Reference in New Issue
Block a user