diff --git a/src/pkg/exp/locale/collate/build/builder.go b/src/pkg/exp/locale/collate/build/builder.go index 401ef24e387..97d5e81ddd0 100644 --- a/src/pkg/exp/locale/collate/build/builder.go +++ b/src/pkg/exp/locale/collate/build/builder.go @@ -263,14 +263,31 @@ func (t *Tailoring) Build() (*collate.Collator, error) { // Print prints the tables for b and all its Tailorings as a Go file // that can be included in the Collate package. -func (b *Builder) Print(w io.Writer) (int, error) { +func (b *Builder) Print(w io.Writer) (n int, err error) { + p := func(nn int, e error) { + n += nn + if err == nil { + err = e + } + } t, err := b.build() if err != nil { return 0, err } - // TODO: support multiple locales - n, _, err := t.fprint(w, "root") - return n, err + p(fmt.Fprintf(w, "var availableLocales = []string{")) + for _, loc := range b.locale { + p(fmt.Fprintf(w, "%q, ", loc.id)) + } + p(fmt.Fprintln(w, "}\n")) + p(fmt.Fprintln(w, "var locales = map[string]tableIndex{")) + for _, loc := range b.locale { + p(fmt.Fprintf(w, "\t%q: ", loc.id)) + p(t.fprintIndex(w, loc.index.handle)) + p(fmt.Fprintln(w, ",")) + } + p(fmt.Fprint(w, "}\n\n")) + n, _, err = t.fprint(w, "main") + return } // reproducibleFromNFKD checks whether the given expansion could be generated diff --git a/src/pkg/exp/locale/collate/build/table.go b/src/pkg/exp/locale/collate/build/table.go index a7973f5510e..0a290cd00b5 100644 --- a/src/pkg/exp/locale/collate/build/table.go +++ b/src/pkg/exp/locale/collate/build/table.go @@ -96,6 +96,21 @@ func (t *table) fprint(w io.Writer, name string) (n, size int, err error) { return } +func (t *table) fprintIndex(w io.Writer, h *trieHandle) (n int, err error) { + p := func(f string, a ...interface{}) { + nn, e := fmt.Fprintf(w, f, a...) + n += nn + if err == nil { + err = e + } + } + p("tableIndex{\n") + p("\t\tlookupOffset: 0x%x,\n", h.lookupStart) + p("\t\tvaluesOffset: 0x%x,\n", h.valueStart) + p("\t}") + return +} + func printColElems(w io.Writer, a []uint32, name string) (n, sz int, err error) { p := func(f string, a ...interface{}) { nn, e := fmt.Fprintf(w, f, a...) diff --git a/src/pkg/exp/locale/collate/collate.go b/src/pkg/exp/locale/collate/collate.go index 081c3b4ce73..5853b710c33 100644 --- a/src/pkg/exp/locale/collate/collate.go +++ b/src/pkg/exp/locale/collate/collate.go @@ -85,6 +85,27 @@ type Collator struct { t *table } +// Locales returns the list of locales for which collating differs from its parent locale. +func Locales() []string { + return availableLocales +} + +// New returns a new Collator initialized for the given locale. +func New(loc string) *Collator { + // TODO: handle locale selection according to spec. + t := &mainTable + if loc != "" { + if idx, ok := locales[loc]; ok { + t = mainTable.indexedTable(idx) + } + } + return &Collator{ + Strength: Quaternary, + f: norm.NFD, + t: t, + } +} + // SetVariableTop sets all runes with primary strength less than the primary // strength of r to be variable and thus affected by alternate handling. func (c *Collator) SetVariableTop(r rune) { diff --git a/src/pkg/exp/locale/collate/maketables.go b/src/pkg/exp/locale/collate/maketables.go index 481c64c457f..7e05ec87a57 100644 --- a/src/pkg/exp/locale/collate/maketables.go +++ b/src/pkg/exp/locale/collate/maketables.go @@ -658,7 +658,7 @@ func insertCollation(builder *build.Builder, locale string, c *Collation) { } func testCollator(c *collate.Collator) { - c0 := collate.Root + c0 := collate.New("") // iterator over all characters for all locales and check // whether Key is equal. @@ -682,19 +682,6 @@ func testCollator(c *collate.Collator) { fmt.Println("PASS") } -// TODO: move this functionality to exp/locale/collate/build. -func printCollators(c *collate.Collator) { - const name = "Root" - fmt.Printf("var _%s = Collator{\n", name) - fmt.Printf("\tStrength: %v,\n", c.Strength) - fmt.Printf("\tf: norm.NFD,\n") - fmt.Printf("\tt: &%sTable,\n", strings.ToLower(name)) - fmt.Printf("}\n\n") - fmt.Printf("var (\n") - fmt.Printf("\t%s = _%s\n", name, name) - fmt.Printf(")\n\n") -} - func main() { flag.Parse() b := build.NewBuilder() @@ -725,9 +712,6 @@ func main() { fmt.Printf("package %s\n", *pkg) if tables.contains("collate") { fmt.Println("") - fmt.Println(`import "exp/norm"`) - fmt.Println("") - printCollators(c) _, err = b.Print(os.Stdout) failOnError(err) } diff --git a/src/pkg/exp/locale/collate/table.go b/src/pkg/exp/locale/collate/table.go index b2a5b623164..c25799b98b2 100644 --- a/src/pkg/exp/locale/collate/table.go +++ b/src/pkg/exp/locale/collate/table.go @@ -9,6 +9,13 @@ import ( "unicode/utf8" ) +// tableIndex holds information for constructing a table +// for a certain locale based on the main table. +type tableIndex struct { + lookupOffset uint32 + valuesOffset uint32 +} + // table holds all collation data for a given collation ordering. type table struct { index trie // main trie @@ -23,6 +30,13 @@ type table struct { variableTop uint32 } +func (t *table) indexedTable(idx tableIndex) *table { + nt := *t + nt.index.index0 = t.index.index[idx.lookupOffset*blockSize:] + nt.index.values0 = t.index.values[idx.valuesOffset*blockSize:] + return &nt +} + // appendNext appends the weights corresponding to the next rune or // contraction in s. If a contraction is matched to a discontinuous // sequence of runes, the weights for the interstitial runes are diff --git a/src/pkg/exp/locale/collate/tables.go b/src/pkg/exp/locale/collate/tables.go index 30fe862c3d4..f8d469a56bc 100644 --- a/src/pkg/exp/locale/collate/tables.go +++ b/src/pkg/exp/locale/collate/tables.go @@ -5,29 +5,330 @@ package collate -import "exp/norm" +var availableLocales = []string{"af", "ar", "as", "az", "be", "bg", "bn", "ca", "cs", "cy", "da", "de", "dz", "el", "en_US_POSIX", "eo", "es", "et", "fa", "fi", "fil", "fo", "fr_CA", "gu", "ha", "haw", "he", "hi", "hr", "hu", "hy", "ig", "is", "ja", "kk", "kl", "km", "kn", "ko", "kok", "ln", "lt", "lv", "mk", "ml", "mr", "mt", "my", "nb", "nn", "nso", "om", "or", "pa", "pl", "ps", "ro", "root", "ru", "se", "si", "sk", "sl", "sq", "sr", "sv", "ta", "te", "th", "tn", "tr", "uk", "ur", "vi", "wae", "yo", "zh"} -var _Root = Collator{ - Strength: 3, - f: norm.NFD, - t: &rootTable, +var locales = map[string]tableIndex{ + "af": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "ar": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "as": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "az": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "be": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "bg": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "bn": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "ca": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "cs": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "cy": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "da": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "de": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "dz": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "el": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "en_US_POSIX": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "eo": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "es": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "et": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "fa": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "fi": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "fil": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "fo": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "fr_CA": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "gu": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "ha": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "haw": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "he": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "hi": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "hr": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "hu": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "hy": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "ig": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "is": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "ja": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "kk": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "kl": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "km": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "kn": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "ko": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "kok": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "ln": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "lt": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "lv": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "mk": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "ml": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "mr": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "mt": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "my": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "nb": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "nn": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "nso": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "om": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "or": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "pa": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "pl": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "ps": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "ro": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "root": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "ru": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "se": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "si": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "sk": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "sl": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "sq": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "sr": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "sv": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "ta": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "te": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "th": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "tn": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "tr": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "uk": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "ur": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "vi": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "wae": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "yo": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, + "zh": tableIndex{ + lookupOffset: 0x13, + valuesOffset: 0x0, + }, } -var ( - Root = _Root -) - -var rootTable = table{ - trie{rootLookup[1216:], rootValues[0:], rootLookup[:], rootValues[:]}, - rootExpandElem[:], - contractTrieSet(rootCTEntries[:]), - rootContractElem[:], +var mainTable = table{ + trie{mainLookup[1216:], mainValues[0:], mainLookup[:], mainValues[:]}, + mainExpandElem[:], + contractTrieSet(mainCTEntries[:]), + mainContractElem[:], 9, 0x2ED, } -// rootExpandElem: 4642 entries, 18568 bytes -var rootExpandElem = [4642]uint32{ +// mainExpandElem: 4642 entries, 18568 bytes +var mainExpandElem = [4642]uint32{ // Block 0, offset 0x0 0x00000002, 0x8000A31A, 0x8000AD1A, 0x00000002, 0x8000A51A, 0x8000AD1A, 0x00000002, 0x8000A718, 0x8000AD18, 0x00000002, 0x8000A71A, 0x8000AD1A, @@ -901,8 +1202,8 @@ var rootExpandElem = [4642]uint32{ 0x80015F1F, 0x00000002, 0x033F3804, 0x8001601F, } -// rootContractElem: 799 entries, 3196 bytes -var rootContractElem = [799]uint32{ +// mainContractElem: 799 entries, 3196 bytes +var mainContractElem = [799]uint32{ // Block 0, offset 0x0 0x4016C420, 0xE0000789, 0xE0000789, 0x002D8808, 0xE000078F, 0xE000078F, 0x40194320, 0x40194720, 0x40194B20, 0x00328608, 0x00328E08, 0x00329608, @@ -1056,9 +1357,9 @@ var rootContractElem = [799]uint32{ 0x40283220, } -// rootValues: 25408 entries, 101632 bytes +// mainValues: 25408 entries, 101632 bytes // Block 2 is the null block. -var rootValues = [25408]uint32{ +var mainValues = [25408]uint32{ // Block 0x0, offset 0x0 0x0000: 0x80000000, 0x0001: 0x80000000, 0x0002: 0x80000000, 0x0003: 0x80000000, 0x0004: 0x80000000, 0x0005: 0x80000000, 0x0006: 0x80000000, 0x0007: 0x80000000, @@ -7047,9 +7348,9 @@ var rootValues = [25408]uint32{ 0x632c: 0x80000000, 0x632d: 0x80000000, 0x632e: 0x80000000, 0x632f: 0x80000000, } -// rootLookup: 1472 entries, 2944 bytes +// mainLookup: 1472 entries, 2944 bytes // Block 0 is the null block. -var rootLookup = [1472]uint16{ +var mainLookup = [1472]uint16{ // Block 0x0, offset 0x0 // Block 0x1, offset 0x40 // Block 0x2, offset 0x80 @@ -7144,8 +7445,8 @@ var rootLookup = [1472]uint16{ 0x5b0: 0x11, 0x5b3: 0x13, } -// rootCTEntries: 126 entries, 504 bytes -var rootCTEntries = [126]struct{ l, h, n, i uint8 }{ +// mainCTEntries: 126 entries, 504 bytes +var mainCTEntries = [126]struct{ l, h, n, i uint8 }{ {0xCE, 0x1, 1, 255}, {0xC2, 0x0, 1, 255}, {0xB7, 0xB7, 0, 1}, @@ -7274,4 +7575,4 @@ var rootCTEntries = [126]struct{ l, h, n, i uint8 }{ {0xB5, 0xB5, 0, 1}, } -// Total size of rootTable is 126988 bytes +// Total size of mainTable is 126988 bytes