1
0
mirror of https://github.com/golang/go synced 2024-10-03 19:31:21 -06:00

exp/locale/collate: changed API to allow access to different locales through New(),

instead of variables. Several reasons:
- Encourage users of the API to minimize the number of creations and reuse Collate objects.
- Don't rule out the possibility of using initialization code for collators. For some locales
  it will be possible to have very compact representations that can be quickly expanded
  into a proper table on demand.
Other changes:
- Change name of root* vars to main*, as the tables are shared between locales.
- Added Locales() method to get a list of supported locales.

R=r
CC=golang-dev
https://golang.org/cl/6498107
This commit is contained in:
Marcel van Lohuizen 2012-09-14 19:10:02 +09:00
parent db3c800d19
commit a4d08ed5df
6 changed files with 398 additions and 46 deletions

View File

@ -263,14 +263,31 @@ func (t *Tailoring) Build() (*collate.Collator, error) {
// Print prints the tables for b and all its Tailorings as a Go file
// that can be included in the Collate package.
func (b *Builder) Print(w io.Writer) (int, error) {
func (b *Builder) Print(w io.Writer) (n int, err error) {
p := func(nn int, e error) {
n += nn
if err == nil {
err = e
}
}
t, err := b.build()
if err != nil {
return 0, err
}
// TODO: support multiple locales
n, _, err := t.fprint(w, "root")
return n, err
p(fmt.Fprintf(w, "var availableLocales = []string{"))
for _, loc := range b.locale {
p(fmt.Fprintf(w, "%q, ", loc.id))
}
p(fmt.Fprintln(w, "}\n"))
p(fmt.Fprintln(w, "var locales = map[string]tableIndex{"))
for _, loc := range b.locale {
p(fmt.Fprintf(w, "\t%q: ", loc.id))
p(t.fprintIndex(w, loc.index.handle))
p(fmt.Fprintln(w, ","))
}
p(fmt.Fprint(w, "}\n\n"))
n, _, err = t.fprint(w, "main")
return
}
// reproducibleFromNFKD checks whether the given expansion could be generated

View File

@ -96,6 +96,21 @@ func (t *table) fprint(w io.Writer, name string) (n, size int, err error) {
return
}
func (t *table) fprintIndex(w io.Writer, h *trieHandle) (n int, err error) {
p := func(f string, a ...interface{}) {
nn, e := fmt.Fprintf(w, f, a...)
n += nn
if err == nil {
err = e
}
}
p("tableIndex{\n")
p("\t\tlookupOffset: 0x%x,\n", h.lookupStart)
p("\t\tvaluesOffset: 0x%x,\n", h.valueStart)
p("\t}")
return
}
func printColElems(w io.Writer, a []uint32, name string) (n, sz int, err error) {
p := func(f string, a ...interface{}) {
nn, e := fmt.Fprintf(w, f, a...)

View File

@ -85,6 +85,27 @@ type Collator struct {
t *table
}
// Locales returns the list of locales for which collating differs from its parent locale.
func Locales() []string {
return availableLocales
}
// New returns a new Collator initialized for the given locale.
func New(loc string) *Collator {
// TODO: handle locale selection according to spec.
t := &mainTable
if loc != "" {
if idx, ok := locales[loc]; ok {
t = mainTable.indexedTable(idx)
}
}
return &Collator{
Strength: Quaternary,
f: norm.NFD,
t: t,
}
}
// SetVariableTop sets all runes with primary strength less than the primary
// strength of r to be variable and thus affected by alternate handling.
func (c *Collator) SetVariableTop(r rune) {

View File

@ -658,7 +658,7 @@ func insertCollation(builder *build.Builder, locale string, c *Collation) {
}
func testCollator(c *collate.Collator) {
c0 := collate.Root
c0 := collate.New("")
// iterator over all characters for all locales and check
// whether Key is equal.
@ -682,19 +682,6 @@ func testCollator(c *collate.Collator) {
fmt.Println("PASS")
}
// TODO: move this functionality to exp/locale/collate/build.
func printCollators(c *collate.Collator) {
const name = "Root"
fmt.Printf("var _%s = Collator{\n", name)
fmt.Printf("\tStrength: %v,\n", c.Strength)
fmt.Printf("\tf: norm.NFD,\n")
fmt.Printf("\tt: &%sTable,\n", strings.ToLower(name))
fmt.Printf("}\n\n")
fmt.Printf("var (\n")
fmt.Printf("\t%s = _%s\n", name, name)
fmt.Printf(")\n\n")
}
func main() {
flag.Parse()
b := build.NewBuilder()
@ -725,9 +712,6 @@ func main() {
fmt.Printf("package %s\n", *pkg)
if tables.contains("collate") {
fmt.Println("")
fmt.Println(`import "exp/norm"`)
fmt.Println("")
printCollators(c)
_, err = b.Print(os.Stdout)
failOnError(err)
}

View File

@ -9,6 +9,13 @@ import (
"unicode/utf8"
)
// tableIndex holds information for constructing a table
// for a certain locale based on the main table.
type tableIndex struct {
lookupOffset uint32
valuesOffset uint32
}
// table holds all collation data for a given collation ordering.
type table struct {
index trie // main trie
@ -23,6 +30,13 @@ type table struct {
variableTop uint32
}
func (t *table) indexedTable(idx tableIndex) *table {
nt := *t
nt.index.index0 = t.index.index[idx.lookupOffset*blockSize:]
nt.index.values0 = t.index.values[idx.valuesOffset*blockSize:]
return &nt
}
// appendNext appends the weights corresponding to the next rune or
// contraction in s. If a contraction is matched to a discontinuous
// sequence of runes, the weights for the interstitial runes are

View File

@ -5,29 +5,330 @@
package collate
import "exp/norm"
var availableLocales = []string{"af", "ar", "as", "az", "be", "bg", "bn", "ca", "cs", "cy", "da", "de", "dz", "el", "en_US_POSIX", "eo", "es", "et", "fa", "fi", "fil", "fo", "fr_CA", "gu", "ha", "haw", "he", "hi", "hr", "hu", "hy", "ig", "is", "ja", "kk", "kl", "km", "kn", "ko", "kok", "ln", "lt", "lv", "mk", "ml", "mr", "mt", "my", "nb", "nn", "nso", "om", "or", "pa", "pl", "ps", "ro", "root", "ru", "se", "si", "sk", "sl", "sq", "sr", "sv", "ta", "te", "th", "tn", "tr", "uk", "ur", "vi", "wae", "yo", "zh"}
var _Root = Collator{
Strength: 3,
f: norm.NFD,
t: &rootTable,
var locales = map[string]tableIndex{
"af": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"ar": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"as": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"az": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"be": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"bg": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"bn": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"ca": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"cs": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"cy": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"da": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"de": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"dz": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"el": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"en_US_POSIX": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"eo": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"es": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"et": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"fa": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"fi": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"fil": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"fo": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"fr_CA": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"gu": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"ha": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"haw": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"he": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"hi": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"hr": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"hu": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"hy": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"ig": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"is": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"ja": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"kk": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"kl": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"km": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"kn": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"ko": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"kok": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"ln": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"lt": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"lv": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"mk": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"ml": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"mr": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"mt": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"my": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"nb": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"nn": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"nso": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"om": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"or": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"pa": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"pl": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"ps": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"ro": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"root": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"ru": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"se": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"si": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"sk": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"sl": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"sq": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"sr": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"sv": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"ta": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"te": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"th": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"tn": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"tr": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"uk": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"ur": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"vi": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"wae": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"yo": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
"zh": tableIndex{
lookupOffset: 0x13,
valuesOffset: 0x0,
},
}
var (
Root = _Root
)
var rootTable = table{
trie{rootLookup[1216:], rootValues[0:], rootLookup[:], rootValues[:]},
rootExpandElem[:],
contractTrieSet(rootCTEntries[:]),
rootContractElem[:],
var mainTable = table{
trie{mainLookup[1216:], mainValues[0:], mainLookup[:], mainValues[:]},
mainExpandElem[:],
contractTrieSet(mainCTEntries[:]),
mainContractElem[:],
9,
0x2ED,
}
// rootExpandElem: 4642 entries, 18568 bytes
var rootExpandElem = [4642]uint32{
// mainExpandElem: 4642 entries, 18568 bytes
var mainExpandElem = [4642]uint32{
// Block 0, offset 0x0
0x00000002, 0x8000A31A, 0x8000AD1A, 0x00000002, 0x8000A51A, 0x8000AD1A,
0x00000002, 0x8000A718, 0x8000AD18, 0x00000002, 0x8000A71A, 0x8000AD1A,
@ -901,8 +1202,8 @@ var rootExpandElem = [4642]uint32{
0x80015F1F, 0x00000002, 0x033F3804, 0x8001601F,
}
// rootContractElem: 799 entries, 3196 bytes
var rootContractElem = [799]uint32{
// mainContractElem: 799 entries, 3196 bytes
var mainContractElem = [799]uint32{
// Block 0, offset 0x0
0x4016C420, 0xE0000789, 0xE0000789, 0x002D8808, 0xE000078F, 0xE000078F,
0x40194320, 0x40194720, 0x40194B20, 0x00328608, 0x00328E08, 0x00329608,
@ -1056,9 +1357,9 @@ var rootContractElem = [799]uint32{
0x40283220,
}
// rootValues: 25408 entries, 101632 bytes
// mainValues: 25408 entries, 101632 bytes
// Block 2 is the null block.
var rootValues = [25408]uint32{
var mainValues = [25408]uint32{
// Block 0x0, offset 0x0
0x0000: 0x80000000, 0x0001: 0x80000000, 0x0002: 0x80000000, 0x0003: 0x80000000,
0x0004: 0x80000000, 0x0005: 0x80000000, 0x0006: 0x80000000, 0x0007: 0x80000000,
@ -7047,9 +7348,9 @@ var rootValues = [25408]uint32{
0x632c: 0x80000000, 0x632d: 0x80000000, 0x632e: 0x80000000, 0x632f: 0x80000000,
}
// rootLookup: 1472 entries, 2944 bytes
// mainLookup: 1472 entries, 2944 bytes
// Block 0 is the null block.
var rootLookup = [1472]uint16{
var mainLookup = [1472]uint16{
// Block 0x0, offset 0x0
// Block 0x1, offset 0x40
// Block 0x2, offset 0x80
@ -7144,8 +7445,8 @@ var rootLookup = [1472]uint16{
0x5b0: 0x11, 0x5b3: 0x13,
}
// rootCTEntries: 126 entries, 504 bytes
var rootCTEntries = [126]struct{ l, h, n, i uint8 }{
// mainCTEntries: 126 entries, 504 bytes
var mainCTEntries = [126]struct{ l, h, n, i uint8 }{
{0xCE, 0x1, 1, 255},
{0xC2, 0x0, 1, 255},
{0xB7, 0xB7, 0, 1},
@ -7274,4 +7575,4 @@ var rootCTEntries = [126]struct{ l, h, n, i uint8 }{
{0xB5, 0xB5, 0, 1},
}
// Total size of rootTable is 126988 bytes
// Total size of mainTable is 126988 bytes