1
0
mirror of https://github.com/golang/go synced 2024-11-19 02:14:43 -07:00

unicode/maketables: add -output flag, buffer output, use gofmt

Simplify the invocation (and speed it up substantially) in preparation
for move to go generate.

LGTM=bradfitz, mpvl
R=mpvl, bradfitz, josharian
CC=golang-codereviews
https://golang.org/cl/135790043
This commit is contained in:
Rob Pike 2014-08-25 14:56:35 -07:00
parent 2c121b68b1
commit dff17f4617
2 changed files with 117 additions and 62 deletions

View File

@ -6,8 +6,7 @@ maketables: maketables.go
go build maketables.go go build maketables.go
tables: maketables tables: maketables
./maketables --tables=all > tables.go ./maketables --tables=all -output tables.go
gofmt -w tables.go
# Downloads from www.unicode.org, so not part # Downloads from www.unicode.org, so not part
# of standard test scripts. # of standard test scripts.

View File

@ -13,9 +13,11 @@ import (
"bufio" "bufio"
"flag" "flag"
"fmt" "fmt"
"io"
"log" "log"
"net/http" "net/http"
"os" "os"
"os/exec"
"path/filepath" "path/filepath"
"regexp" "regexp"
"sort" "sort"
@ -26,6 +28,7 @@ import (
func main() { func main() {
flag.Parse() flag.Parse()
setupOutput()
loadChars() // always needed loadChars() // always needed
loadCasefold() loadCasefold()
printCategories() printCategories()
@ -35,6 +38,7 @@ func main() {
printLatinProperties() printLatinProperties()
printCasefold() printCasefold()
printSizes() printSizes()
flushOutput()
} }
var dataURL = flag.String("data", "", "full URL for UnicodeData.txt; defaults to --url/UnicodeData.txt") var dataURL = flag.String("data", "", "full URL for UnicodeData.txt; defaults to --url/UnicodeData.txt")
@ -60,10 +64,62 @@ var test = flag.Bool("test",
var localFiles = flag.Bool("local", var localFiles = flag.Bool("local",
false, false,
"data files have been copied to current directory; for debugging only") "data files have been copied to current directory; for debugging only")
var outputFile = flag.String("output",
"",
"output file for generated tables; default stdout")
var scriptRe = regexp.MustCompile(`^([0-9A-F]+)(\.\.[0-9A-F]+)? *; ([A-Za-z_]+)$`) var scriptRe = regexp.MustCompile(`^([0-9A-F]+)(\.\.[0-9A-F]+)? *; ([A-Za-z_]+)$`)
var logger = log.New(os.Stderr, "", log.Lshortfile) var logger = log.New(os.Stderr, "", log.Lshortfile)
var output *bufio.Writer // points to os.Stdout or to "gofmt > outputFile"
func setupOutput() {
output = bufio.NewWriter(startGofmt())
}
// startGofmt connects output to a gofmt process if -output is set.
func startGofmt() io.Writer {
if *outputFile == "" {
return os.Stdout
}
stdout, err := os.Create(*outputFile)
if err != nil {
logger.Fatal(err)
}
// Pipe output to gofmt.
gofmt := exec.Command("gofmt")
fd, err := gofmt.StdinPipe()
if err != nil {
logger.Fatal(err)
}
gofmt.Stdout = stdout
gofmt.Stderr = os.Stderr
err = gofmt.Start()
if err != nil {
logger.Fatal(err)
}
return fd
}
func flushOutput() {
err := output.Flush()
if err != nil {
logger.Fatal(err)
}
}
func printf(format string, args ...interface{}) {
fmt.Fprintf(output, format, args...)
}
func print(args ...interface{}) {
fmt.Fprint(output, args...)
}
func println(args ...interface{}) {
fmt.Fprintln(output, args...)
}
type reader struct { type reader struct {
*bufio.Reader *bufio.Reader
fd *os.File fd *os.File
@ -245,11 +301,11 @@ func parseCategory(line string) (state State) {
} }
func (char *Char) dump(s string) { func (char *Char) dump(s string) {
fmt.Print(s, " ") print(s, " ")
for i := 0; i < len(char.field); i++ { for i := 0; i < len(char.field); i++ {
fmt.Printf("%s:%q ", fieldName[i], char.field[i]) printf("%s:%q ", fieldName[i], char.field[i])
} }
fmt.Print("\n") print("\n")
} }
func (char *Char) letter(u, l, t string) { func (char *Char) letter(u, l, t string) {
@ -411,18 +467,18 @@ func printCategories() {
fullCategoryTest(list) fullCategoryTest(list)
return return
} }
fmt.Printf(progHeader, *tablelist, *dataURL, *casefoldingURL) printf(progHeader, *tablelist, *dataURL, *casefoldingURL)
fmt.Println("// Version is the Unicode edition from which the tables are derived.") println("// Version is the Unicode edition from which the tables are derived.")
fmt.Printf("const Version = %q\n\n", version()) printf("const Version = %q\n\n", version())
if *tablelist == "all" { if *tablelist == "all" {
fmt.Println("// Categories is the set of Unicode category tables.") println("// Categories is the set of Unicode category tables.")
fmt.Println("var Categories = map[string] *RangeTable {") println("var Categories = map[string] *RangeTable {")
for _, k := range allCategories() { for _, k := range allCategories() {
fmt.Printf("\t%q: %s,\n", k, k) printf("\t%q: %s,\n", k, k)
} }
fmt.Print("}\n\n") print("}\n\n")
} }
decl := make(sort.StringSlice, len(list)) decl := make(sort.StringSlice, len(list))
@ -486,12 +542,12 @@ func printCategories() {
func(code rune) bool { return chars[code].category == name }) func(code rune) bool { return chars[code].category == name })
} }
decl.Sort() decl.Sort()
fmt.Println("// These variables have type *RangeTable.") println("// These variables have type *RangeTable.")
fmt.Println("var (") println("var (")
for _, d := range decl { for _, d := range decl {
fmt.Print(d) print(d)
} }
fmt.Print(")\n\n") print(")\n\n")
} }
type Op func(code rune) bool type Op func(code rune) bool
@ -499,10 +555,10 @@ type Op func(code rune) bool
const format = "\t\t{0x%04x, 0x%04x, %d},\n" const format = "\t\t{0x%04x, 0x%04x, %d},\n"
func dumpRange(header string, inCategory Op) { func dumpRange(header string, inCategory Op) {
fmt.Print(header) print(header)
next := rune(0) next := rune(0)
latinOffset := 0 latinOffset := 0
fmt.Print("\tR16: []Range16{\n") print("\tR16: []Range16{\n")
// one Range for each iteration // one Range for each iteration
count := &range16Count count := &range16Count
size := 16 size := 16
@ -528,7 +584,7 @@ func dumpRange(header string, inCategory Op) {
} }
if next >= rune(len(chars)) { if next >= rune(len(chars)) {
// no more characters // no more characters
fmt.Printf(format, lo, hi, stride) printf(format, lo, hi, stride)
break break
} }
// set stride // set stride
@ -552,11 +608,11 @@ func dumpRange(header string, inCategory Op) {
// next range: start looking where this range ends // next range: start looking where this range ends
next = hi + 1 next = hi + 1
} }
fmt.Print("\t},\n") print("\t},\n")
if latinOffset > 0 { if latinOffset > 0 {
fmt.Printf("\tLatinOffset: %d,\n", latinOffset) printf("\tLatinOffset: %d,\n", latinOffset)
} }
fmt.Print("}\n\n") print("}\n\n")
} }
func printRange(lo, hi, stride uint32, size int, count *int) (int, *int) { func printRange(lo, hi, stride uint32, size int, count *int) (int, *int) {
@ -568,17 +624,17 @@ func printRange(lo, hi, stride uint32, size int, count *int) (int, *int) {
// No range contains U+FFFF as an instance, so split // No range contains U+FFFF as an instance, so split
// the range into two entries. That way we can maintain // the range into two entries. That way we can maintain
// the invariant that R32 contains only >= 1<<16. // the invariant that R32 contains only >= 1<<16.
fmt.Printf(format, lo, lo, 1) printf(format, lo, lo, 1)
lo = hi lo = hi
stride = 1 stride = 1
*count++ *count++
} }
fmt.Print("\t},\n") print("\t},\n")
fmt.Print("\tR32: []Range32{\n") print("\tR32: []Range32{\n")
size = 32 size = 32
count = &range32Count count = &range32Count
} }
fmt.Printf(format, lo, hi, stride) printf(format, lo, hi, stride)
*count++ *count++
return size, count return size, count
} }
@ -727,7 +783,7 @@ func printScriptOrProperty(doProps bool) {
return return
} }
fmt.Printf( printf(
"// Generated by running\n"+ "// Generated by running\n"+
"// maketables --%s=%s --url=%s\n"+ "// maketables --%s=%s --url=%s\n"+
"// DO NOT EDIT\n\n", "// DO NOT EDIT\n\n",
@ -736,16 +792,16 @@ func printScriptOrProperty(doProps bool) {
*url) *url)
if flaglist == "all" { if flaglist == "all" {
if doProps { if doProps {
fmt.Println("// Properties is the set of Unicode property tables.") println("// Properties is the set of Unicode property tables.")
fmt.Println("var Properties = map[string] *RangeTable{") println("var Properties = map[string] *RangeTable{")
} else { } else {
fmt.Println("// Scripts is the set of Unicode script tables.") println("// Scripts is the set of Unicode script tables.")
fmt.Println("var Scripts = map[string] *RangeTable{") println("var Scripts = map[string] *RangeTable{")
} }
for _, k := range all(table) { for _, k := range all(table) {
fmt.Printf("\t%q: %s,\n", k, k) printf("\t%q: %s,\n", k, k)
} }
fmt.Print("}\n\n") print("}\n\n")
} }
decl := make(sort.StringSlice, len(list)) decl := make(sort.StringSlice, len(list))
@ -761,27 +817,27 @@ func printScriptOrProperty(doProps bool) {
name, name, name, name) name, name, name, name)
} }
ndecl++ ndecl++
fmt.Printf("var _%s = &RangeTable {\n", name) printf("var _%s = &RangeTable {\n", name)
ranges := foldAdjacent(table[name]) ranges := foldAdjacent(table[name])
fmt.Print("\tR16: []Range16{\n") print("\tR16: []Range16{\n")
size := 16 size := 16
count := &range16Count count := &range16Count
for _, s := range ranges { for _, s := range ranges {
size, count = printRange(s.Lo, s.Hi, s.Stride, size, count) size, count = printRange(s.Lo, s.Hi, s.Stride, size, count)
} }
fmt.Print("\t},\n") print("\t},\n")
if off := findLatinOffset(ranges); off > 0 { if off := findLatinOffset(ranges); off > 0 {
fmt.Printf("\tLatinOffset: %d,\n", off) printf("\tLatinOffset: %d,\n", off)
} }
fmt.Print("}\n\n") print("}\n\n")
} }
decl.Sort() decl.Sort()
fmt.Println("// These variables have type *RangeTable.") println("// These variables have type *RangeTable.")
fmt.Println("var (") println("var (")
for _, d := range decl { for _, d := range decl {
fmt.Print(d) print(d)
} }
fmt.Print(")\n\n") print(")\n\n")
} }
func findLatinOffset(ranges []unicode.Range32) int { func findLatinOffset(ranges []unicode.Range32) int {
@ -940,7 +996,7 @@ func printCases() {
fullCaseTest() fullCaseTest()
return return
} }
fmt.Printf( printf(
"// Generated by running\n"+ "// Generated by running\n"+
"// maketables --data=%s --casefolding=%s\n"+ "// maketables --data=%s --casefolding=%s\n"+
"// DO NOT EDIT\n\n"+ "// DO NOT EDIT\n\n"+
@ -966,7 +1022,7 @@ func printCases() {
} }
prevState = state prevState = state
} }
fmt.Print("}\n") print("}\n")
} }
func printCaseRange(lo, hi *caseState) { func printCaseRange(lo, hi *caseState) {
@ -979,14 +1035,14 @@ func printCaseRange(lo, hi *caseState) {
} }
switch { switch {
case hi.point > lo.point && lo.isUpperLower(): case hi.point > lo.point && lo.isUpperLower():
fmt.Printf("\t{0x%04X, 0x%04X, d{UpperLower, UpperLower, UpperLower}},\n", printf("\t{0x%04X, 0x%04X, d{UpperLower, UpperLower, UpperLower}},\n",
lo.point, hi.point) lo.point, hi.point)
case hi.point > lo.point && lo.isLowerUpper(): case hi.point > lo.point && lo.isLowerUpper():
logger.Fatalf("LowerUpper sequence: should not happen: %U. If it's real, need to fix To()", lo.point) logger.Fatalf("LowerUpper sequence: should not happen: %U. If it's real, need to fix To()", lo.point)
fmt.Printf("\t{0x%04X, 0x%04X, d{LowerUpper, LowerUpper, LowerUpper}},\n", printf("\t{0x%04X, 0x%04X, d{LowerUpper, LowerUpper, LowerUpper}},\n",
lo.point, hi.point) lo.point, hi.point)
default: default:
fmt.Printf("\t{0x%04X, 0x%04X, d{%d, %d, %d}},\n", printf("\t{0x%04X, 0x%04X, d{%d, %d, %d}},\n",
lo.point, hi.point, lo.point, hi.point,
lo.deltaToUpper, lo.deltaToLower, lo.deltaToTitle) lo.deltaToUpper, lo.deltaToLower, lo.deltaToTitle)
} }
@ -1025,7 +1081,7 @@ func printLatinProperties() {
if *test { if *test {
return return
} }
fmt.Println("var properties = [MaxLatin1+1]uint8{") println("var properties = [MaxLatin1+1]uint8{")
for code := 0; code <= unicode.MaxLatin1; code++ { for code := 0; code <= unicode.MaxLatin1; code++ {
var property string var property string
switch chars[code].category { switch chars[code].category {
@ -1054,9 +1110,9 @@ func printLatinProperties() {
if code == ' ' { if code == ' ' {
property = "pZ | pp" property = "pZ | pp"
} }
fmt.Printf("\t0x%02X: %s, // %q\n", code, property, code) printf("\t0x%02X: %s, // %q\n", code, property, code)
} }
fmt.Printf("}\n\n") printf("}\n\n")
} }
type runeSlice []rune type runeSlice []rune
@ -1235,15 +1291,15 @@ func printCaseOrbit() {
return return
} }
fmt.Printf("var caseOrbit = []foldPair{\n") printf("var caseOrbit = []foldPair{\n")
for i := range chars { for i := range chars {
c := &chars[i] c := &chars[i]
if c.caseOrbit != 0 { if c.caseOrbit != 0 {
fmt.Printf("\t{0x%04X, 0x%04X},\n", i, c.caseOrbit) printf("\t{0x%04X, 0x%04X},\n", i, c.caseOrbit)
foldPairCount++ foldPairCount++
} }
} }
fmt.Printf("}\n\n") printf("}\n\n")
} }
func printCatFold(name string, m map[string]map[rune]bool) { func printCatFold(name string, m map[string]map[rune]bool) {
@ -1288,12 +1344,12 @@ func printCatFold(name string, m map[string]map[rune]bool) {
return return
} }
fmt.Print(comment[name]) print(comment[name])
fmt.Printf("var %s = map[string]*RangeTable{\n", name) printf("var %s = map[string]*RangeTable{\n", name)
for _, name := range allCatFold(m) { for _, name := range allCatFold(m) {
fmt.Printf("\t%q: fold%s,\n", name, name) printf("\t%q: fold%s,\n", name, name)
} }
fmt.Printf("}\n\n") printf("}\n\n")
for _, name := range allCatFold(m) { for _, name := range allCatFold(m) {
class := m[name] class := m[name]
dumpRange( dumpRange(
@ -1310,11 +1366,11 @@ func printSizes() {
if *test { if *test {
return return
} }
fmt.Println() println()
fmt.Printf("// Range entries: %d 16-bit, %d 32-bit, %d total.\n", range16Count, range32Count, range16Count+range32Count) printf("// Range entries: %d 16-bit, %d 32-bit, %d total.\n", range16Count, range32Count, range16Count+range32Count)
range16Bytes := range16Count * 3 * 2 range16Bytes := range16Count * 3 * 2
range32Bytes := range32Count * 3 * 4 range32Bytes := range32Count * 3 * 4
fmt.Printf("// Range bytes: %d 16-bit, %d 32-bit, %d total.\n", range16Bytes, range32Bytes, range16Bytes+range32Bytes) printf("// Range bytes: %d 16-bit, %d 32-bit, %d total.\n", range16Bytes, range32Bytes, range16Bytes+range32Bytes)
fmt.Println() println()
fmt.Printf("// Fold orbit bytes: %d pairs, %d bytes\n", foldPairCount, foldPairCount*2*2) printf("// Fold orbit bytes: %d pairs, %d bytes\n", foldPairCount, foldPairCount*2*2)
} }