1
0
mirror of https://github.com/golang/go synced 2024-11-12 04:30:22 -07:00

exp/locale/collate: Add maketables tool and generated tables.

Also set maxContractLen automatically.
Note that the table size is much bigger than it needs to be.
Optimization is best done, though, when the language specific
tables are added.

R=r
CC=golang-dev
https://golang.org/cl/6167044
This commit is contained in:
Marcel van Lohuizen 2012-05-09 12:03:55 +02:00
parent 25a8a8dab8
commit 0355a71751
6 changed files with 7477 additions and 4 deletions

View File

@ -0,0 +1,16 @@
# Copyright 2012 The Go Authors. All rights reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.
CLEANFILES+=maketables
maketables: maketables.go
go build $^
tables: maketables
./maketables > tables.go
gofmt -w tables.go
# Build (but do not run) maketables during testing,
# just to make sure it still compiles.
testshort: maketables

View File

@ -412,6 +412,9 @@ func (b *Builder) processContractions() {
cm := make(map[rune][]*entry)
for _, e := range b.entry {
if e.contraction() {
if len(e.str) > b.t.maxContractLen {
b.t.maxContractLen = len(e.str)
}
r := e.runes[0]
if _, ok := cm[r]; !ok {
starters = append(starters, r)

View File

@ -72,6 +72,7 @@ func (t *table) print(w io.Writer, name string) (n, size int, err error) {
update(t.contractTries.printStruct(w, name))
p(",\n")
p("%sContractElem[:],\n", name)
p("%d,\n", t.maxContractLen)
p("}\n\n")
// Write arrays needed for the structure.

View File

@ -92,10 +92,6 @@ func (c *Collator) SetVariableTop(r rune) {
// TODO: implement
}
var (
Root = Collator{}
)
// Buffer holds reusable buffers that can be used during collation.
// Reusing a Buffer for the various calls that accept it may avoid
// unnecessary memory allocations.

View File

@ -0,0 +1,185 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
// Collation table generator.
// Data read from the web.
package main
import (
"bufio"
"exp/locale/collate"
"exp/locale/collate/build"
"flag"
"fmt"
"io"
"log"
"net/http"
"os"
"path"
"regexp"
"strconv"
"strings"
"unicode"
)
var ducet = flag.String("ducet",
"http://unicode.org/Public/UCA/"+unicode.Version+"/allkeys.txt",
"URL of the Default Unicode Collation Element Table (DUCET).")
var localFiles = flag.Bool("local",
false,
"data files have been copied to the current directory; for debugging only")
func failonerror(e error) {
if e != nil {
log.Fatal(e)
}
}
// openReader opens the url or file given by url and returns it as an io.ReadCloser
// or nil on error.
func openReader(url string) (io.ReadCloser, error) {
if *localFiles {
pwd, _ := os.Getwd()
url = "file://" + path.Join(pwd, path.Base(url))
}
t := &http.Transport{}
t.RegisterProtocol("file", http.NewFileTransport(http.Dir("/")))
c := &http.Client{Transport: t}
resp, err := c.Get(url)
if err != nil {
return nil, err
}
if resp.StatusCode != 200 {
return nil, fmt.Errorf(`bad GET status for "%s": %s`, url, resp.Status)
}
return resp.Body, nil
}
// parseUCA parses a Default Unicode Collation Element Table of the format
// specified in http://www.unicode.org/reports/tr10/#File_Format.
// It returns the variable top.
func parseUCA(builder *build.Builder) int {
maxVar, minNonVar := 0, 1<<30
r, err := openReader(*ducet)
failonerror(err)
defer r.Close()
input := bufio.NewReader(r)
colelem := regexp.MustCompile(`\[([.*])([0-9A-F.]+)\]`)
for i := 1; err == nil; i++ {
l, prefix, e := input.ReadLine()
err = e
line := string(l)
if prefix {
log.Fatalf("%d: buffer overflow", i)
}
if err != nil && err != io.EOF {
log.Fatalf("%d: %v", i, err)
}
if len(line) == 0 || line[0] == '#' {
continue
}
if line[0] == '@' {
// parse properties
switch {
case strings.HasPrefix(line[1:], "version "):
a := strings.Split(line[1:], " ")
if a[1] != unicode.Version {
log.Fatalf("incompatible version %s; want %s", a[1], unicode.Version)
}
case strings.HasPrefix(line[1:], "backwards "):
log.Fatalf("%d: unsupported option backwards", i)
default:
log.Printf("%d: unknown option %s", i, line[1:])
}
} else {
// parse entries
part := strings.Split(line, " ; ")
if len(part) != 2 {
log.Fatalf("%d: production rule without ';': %v", i, line)
}
lhs := []rune{}
for _, v := range strings.Split(part[0], " ") {
if v == "" {
continue
}
lhs = append(lhs, rune(convHex(i, v)))
}
var n int
rhs := [][]int{}
for _, m := range colelem.FindAllStringSubmatch(part[1], -1) {
n += len(m[0])
elem := []int{}
for _, h := range strings.Split(m[2], ".") {
elem = append(elem, convHex(i, h))
}
if p := elem[0]; m[1] == "*" {
if p > maxVar {
maxVar = p
}
} else if p > 0 && p < minNonVar {
minNonVar = p
}
rhs = append(rhs, elem)
}
if len(part[1]) < n+3 || part[1][n+1] != '#' {
log.Fatalf("%d: expected comment; found %s", i, part[1][n:])
}
builder.Add(lhs, rhs)
}
}
if maxVar >= minNonVar {
log.Fatalf("found maxVar > minNonVar (%d > %d)", maxVar, minNonVar)
}
return maxVar
}
func convHex(line int, s string) int {
r, e := strconv.ParseInt(s, 16, 32)
if e != nil {
log.Fatalf("%d: %v", line, e)
}
return int(r)
}
// TODO: move this functionality to exp/locale/collate/build.
func printCollators(c *collate.Collator, vartop int) {
const name = "Root"
fmt.Printf("var _%s = Collator{\n", name)
fmt.Printf("\tStrength: %v,\n", c.Strength)
fmt.Printf("\tvariableTop: 0x%X,\n", vartop)
fmt.Printf("\tf: norm.NFD,\n")
fmt.Printf("\tt: &%sTable,\n", strings.ToLower(name))
fmt.Printf("}\n\n")
fmt.Printf("var (\n")
fmt.Printf("\t%s = _%s\n", name, name)
fmt.Printf(")\n\n")
}
func main() {
flag.Parse()
b := build.NewBuilder()
vartop := parseUCA(b)
_, err := b.Build("")
failonerror(err)
fmt.Println("// Generated by running")
fmt.Printf("// maketables --ducet=%s\n", *ducet)
fmt.Println("// DO NOT EDIT")
fmt.Println("// TODO: implement more compact representation for sparse blocks.")
fmt.Println("")
fmt.Println("package collate")
fmt.Println("")
fmt.Println(`import "exp/norm"`)
fmt.Println("")
c := &collate.Collator{}
c.Strength = collate.Quaternary
printCollators(c, vartop)
_, err = b.Print(os.Stdout)
failonerror(err)
}

File diff suppressed because it is too large Load Diff