mirror of
https://github.com/golang/go
synced 2024-11-19 02:14:43 -07:00
unicode/maketables: add -output flag, buffer output, use gofmt
Simplify the invocation (and speed it up substantially) in preparation for move to go generate. LGTM=bradfitz, mpvl R=mpvl, bradfitz, josharian CC=golang-codereviews https://golang.org/cl/135790043
This commit is contained in:
parent
2c121b68b1
commit
dff17f4617
@ -6,8 +6,7 @@ maketables: maketables.go
|
|||||||
go build maketables.go
|
go build maketables.go
|
||||||
|
|
||||||
tables: maketables
|
tables: maketables
|
||||||
./maketables --tables=all > tables.go
|
./maketables --tables=all -output tables.go
|
||||||
gofmt -w tables.go
|
|
||||||
|
|
||||||
# Downloads from www.unicode.org, so not part
|
# Downloads from www.unicode.org, so not part
|
||||||
# of standard test scripts.
|
# of standard test scripts.
|
||||||
|
@ -13,9 +13,11 @@ import (
|
|||||||
"bufio"
|
"bufio"
|
||||||
"flag"
|
"flag"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"io"
|
||||||
"log"
|
"log"
|
||||||
"net/http"
|
"net/http"
|
||||||
"os"
|
"os"
|
||||||
|
"os/exec"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"regexp"
|
"regexp"
|
||||||
"sort"
|
"sort"
|
||||||
@ -26,6 +28,7 @@ import (
|
|||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
flag.Parse()
|
flag.Parse()
|
||||||
|
setupOutput()
|
||||||
loadChars() // always needed
|
loadChars() // always needed
|
||||||
loadCasefold()
|
loadCasefold()
|
||||||
printCategories()
|
printCategories()
|
||||||
@ -35,6 +38,7 @@ func main() {
|
|||||||
printLatinProperties()
|
printLatinProperties()
|
||||||
printCasefold()
|
printCasefold()
|
||||||
printSizes()
|
printSizes()
|
||||||
|
flushOutput()
|
||||||
}
|
}
|
||||||
|
|
||||||
var dataURL = flag.String("data", "", "full URL for UnicodeData.txt; defaults to --url/UnicodeData.txt")
|
var dataURL = flag.String("data", "", "full URL for UnicodeData.txt; defaults to --url/UnicodeData.txt")
|
||||||
@ -60,10 +64,62 @@ var test = flag.Bool("test",
|
|||||||
var localFiles = flag.Bool("local",
|
var localFiles = flag.Bool("local",
|
||||||
false,
|
false,
|
||||||
"data files have been copied to current directory; for debugging only")
|
"data files have been copied to current directory; for debugging only")
|
||||||
|
var outputFile = flag.String("output",
|
||||||
|
"",
|
||||||
|
"output file for generated tables; default stdout")
|
||||||
|
|
||||||
var scriptRe = regexp.MustCompile(`^([0-9A-F]+)(\.\.[0-9A-F]+)? *; ([A-Za-z_]+)$`)
|
var scriptRe = regexp.MustCompile(`^([0-9A-F]+)(\.\.[0-9A-F]+)? *; ([A-Za-z_]+)$`)
|
||||||
var logger = log.New(os.Stderr, "", log.Lshortfile)
|
var logger = log.New(os.Stderr, "", log.Lshortfile)
|
||||||
|
|
||||||
|
var output *bufio.Writer // points to os.Stdout or to "gofmt > outputFile"
|
||||||
|
|
||||||
|
func setupOutput() {
|
||||||
|
output = bufio.NewWriter(startGofmt())
|
||||||
|
}
|
||||||
|
|
||||||
|
// startGofmt connects output to a gofmt process if -output is set.
|
||||||
|
func startGofmt() io.Writer {
|
||||||
|
if *outputFile == "" {
|
||||||
|
return os.Stdout
|
||||||
|
}
|
||||||
|
stdout, err := os.Create(*outputFile)
|
||||||
|
if err != nil {
|
||||||
|
logger.Fatal(err)
|
||||||
|
}
|
||||||
|
// Pipe output to gofmt.
|
||||||
|
gofmt := exec.Command("gofmt")
|
||||||
|
fd, err := gofmt.StdinPipe()
|
||||||
|
if err != nil {
|
||||||
|
logger.Fatal(err)
|
||||||
|
}
|
||||||
|
gofmt.Stdout = stdout
|
||||||
|
gofmt.Stderr = os.Stderr
|
||||||
|
err = gofmt.Start()
|
||||||
|
if err != nil {
|
||||||
|
logger.Fatal(err)
|
||||||
|
}
|
||||||
|
return fd
|
||||||
|
}
|
||||||
|
|
||||||
|
func flushOutput() {
|
||||||
|
err := output.Flush()
|
||||||
|
if err != nil {
|
||||||
|
logger.Fatal(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func printf(format string, args ...interface{}) {
|
||||||
|
fmt.Fprintf(output, format, args...)
|
||||||
|
}
|
||||||
|
|
||||||
|
func print(args ...interface{}) {
|
||||||
|
fmt.Fprint(output, args...)
|
||||||
|
}
|
||||||
|
|
||||||
|
func println(args ...interface{}) {
|
||||||
|
fmt.Fprintln(output, args...)
|
||||||
|
}
|
||||||
|
|
||||||
type reader struct {
|
type reader struct {
|
||||||
*bufio.Reader
|
*bufio.Reader
|
||||||
fd *os.File
|
fd *os.File
|
||||||
@ -245,11 +301,11 @@ func parseCategory(line string) (state State) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (char *Char) dump(s string) {
|
func (char *Char) dump(s string) {
|
||||||
fmt.Print(s, " ")
|
print(s, " ")
|
||||||
for i := 0; i < len(char.field); i++ {
|
for i := 0; i < len(char.field); i++ {
|
||||||
fmt.Printf("%s:%q ", fieldName[i], char.field[i])
|
printf("%s:%q ", fieldName[i], char.field[i])
|
||||||
}
|
}
|
||||||
fmt.Print("\n")
|
print("\n")
|
||||||
}
|
}
|
||||||
|
|
||||||
func (char *Char) letter(u, l, t string) {
|
func (char *Char) letter(u, l, t string) {
|
||||||
@ -411,18 +467,18 @@ func printCategories() {
|
|||||||
fullCategoryTest(list)
|
fullCategoryTest(list)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
fmt.Printf(progHeader, *tablelist, *dataURL, *casefoldingURL)
|
printf(progHeader, *tablelist, *dataURL, *casefoldingURL)
|
||||||
|
|
||||||
fmt.Println("// Version is the Unicode edition from which the tables are derived.")
|
println("// Version is the Unicode edition from which the tables are derived.")
|
||||||
fmt.Printf("const Version = %q\n\n", version())
|
printf("const Version = %q\n\n", version())
|
||||||
|
|
||||||
if *tablelist == "all" {
|
if *tablelist == "all" {
|
||||||
fmt.Println("// Categories is the set of Unicode category tables.")
|
println("// Categories is the set of Unicode category tables.")
|
||||||
fmt.Println("var Categories = map[string] *RangeTable {")
|
println("var Categories = map[string] *RangeTable {")
|
||||||
for _, k := range allCategories() {
|
for _, k := range allCategories() {
|
||||||
fmt.Printf("\t%q: %s,\n", k, k)
|
printf("\t%q: %s,\n", k, k)
|
||||||
}
|
}
|
||||||
fmt.Print("}\n\n")
|
print("}\n\n")
|
||||||
}
|
}
|
||||||
|
|
||||||
decl := make(sort.StringSlice, len(list))
|
decl := make(sort.StringSlice, len(list))
|
||||||
@ -486,12 +542,12 @@ func printCategories() {
|
|||||||
func(code rune) bool { return chars[code].category == name })
|
func(code rune) bool { return chars[code].category == name })
|
||||||
}
|
}
|
||||||
decl.Sort()
|
decl.Sort()
|
||||||
fmt.Println("// These variables have type *RangeTable.")
|
println("// These variables have type *RangeTable.")
|
||||||
fmt.Println("var (")
|
println("var (")
|
||||||
for _, d := range decl {
|
for _, d := range decl {
|
||||||
fmt.Print(d)
|
print(d)
|
||||||
}
|
}
|
||||||
fmt.Print(")\n\n")
|
print(")\n\n")
|
||||||
}
|
}
|
||||||
|
|
||||||
type Op func(code rune) bool
|
type Op func(code rune) bool
|
||||||
@ -499,10 +555,10 @@ type Op func(code rune) bool
|
|||||||
const format = "\t\t{0x%04x, 0x%04x, %d},\n"
|
const format = "\t\t{0x%04x, 0x%04x, %d},\n"
|
||||||
|
|
||||||
func dumpRange(header string, inCategory Op) {
|
func dumpRange(header string, inCategory Op) {
|
||||||
fmt.Print(header)
|
print(header)
|
||||||
next := rune(0)
|
next := rune(0)
|
||||||
latinOffset := 0
|
latinOffset := 0
|
||||||
fmt.Print("\tR16: []Range16{\n")
|
print("\tR16: []Range16{\n")
|
||||||
// one Range for each iteration
|
// one Range for each iteration
|
||||||
count := &range16Count
|
count := &range16Count
|
||||||
size := 16
|
size := 16
|
||||||
@ -528,7 +584,7 @@ func dumpRange(header string, inCategory Op) {
|
|||||||
}
|
}
|
||||||
if next >= rune(len(chars)) {
|
if next >= rune(len(chars)) {
|
||||||
// no more characters
|
// no more characters
|
||||||
fmt.Printf(format, lo, hi, stride)
|
printf(format, lo, hi, stride)
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
// set stride
|
// set stride
|
||||||
@ -552,11 +608,11 @@ func dumpRange(header string, inCategory Op) {
|
|||||||
// next range: start looking where this range ends
|
// next range: start looking where this range ends
|
||||||
next = hi + 1
|
next = hi + 1
|
||||||
}
|
}
|
||||||
fmt.Print("\t},\n")
|
print("\t},\n")
|
||||||
if latinOffset > 0 {
|
if latinOffset > 0 {
|
||||||
fmt.Printf("\tLatinOffset: %d,\n", latinOffset)
|
printf("\tLatinOffset: %d,\n", latinOffset)
|
||||||
}
|
}
|
||||||
fmt.Print("}\n\n")
|
print("}\n\n")
|
||||||
}
|
}
|
||||||
|
|
||||||
func printRange(lo, hi, stride uint32, size int, count *int) (int, *int) {
|
func printRange(lo, hi, stride uint32, size int, count *int) (int, *int) {
|
||||||
@ -568,17 +624,17 @@ func printRange(lo, hi, stride uint32, size int, count *int) (int, *int) {
|
|||||||
// No range contains U+FFFF as an instance, so split
|
// No range contains U+FFFF as an instance, so split
|
||||||
// the range into two entries. That way we can maintain
|
// the range into two entries. That way we can maintain
|
||||||
// the invariant that R32 contains only >= 1<<16.
|
// the invariant that R32 contains only >= 1<<16.
|
||||||
fmt.Printf(format, lo, lo, 1)
|
printf(format, lo, lo, 1)
|
||||||
lo = hi
|
lo = hi
|
||||||
stride = 1
|
stride = 1
|
||||||
*count++
|
*count++
|
||||||
}
|
}
|
||||||
fmt.Print("\t},\n")
|
print("\t},\n")
|
||||||
fmt.Print("\tR32: []Range32{\n")
|
print("\tR32: []Range32{\n")
|
||||||
size = 32
|
size = 32
|
||||||
count = &range32Count
|
count = &range32Count
|
||||||
}
|
}
|
||||||
fmt.Printf(format, lo, hi, stride)
|
printf(format, lo, hi, stride)
|
||||||
*count++
|
*count++
|
||||||
return size, count
|
return size, count
|
||||||
}
|
}
|
||||||
@ -727,7 +783,7 @@ func printScriptOrProperty(doProps bool) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
fmt.Printf(
|
printf(
|
||||||
"// Generated by running\n"+
|
"// Generated by running\n"+
|
||||||
"// maketables --%s=%s --url=%s\n"+
|
"// maketables --%s=%s --url=%s\n"+
|
||||||
"// DO NOT EDIT\n\n",
|
"// DO NOT EDIT\n\n",
|
||||||
@ -736,16 +792,16 @@ func printScriptOrProperty(doProps bool) {
|
|||||||
*url)
|
*url)
|
||||||
if flaglist == "all" {
|
if flaglist == "all" {
|
||||||
if doProps {
|
if doProps {
|
||||||
fmt.Println("// Properties is the set of Unicode property tables.")
|
println("// Properties is the set of Unicode property tables.")
|
||||||
fmt.Println("var Properties = map[string] *RangeTable{")
|
println("var Properties = map[string] *RangeTable{")
|
||||||
} else {
|
} else {
|
||||||
fmt.Println("// Scripts is the set of Unicode script tables.")
|
println("// Scripts is the set of Unicode script tables.")
|
||||||
fmt.Println("var Scripts = map[string] *RangeTable{")
|
println("var Scripts = map[string] *RangeTable{")
|
||||||
}
|
}
|
||||||
for _, k := range all(table) {
|
for _, k := range all(table) {
|
||||||
fmt.Printf("\t%q: %s,\n", k, k)
|
printf("\t%q: %s,\n", k, k)
|
||||||
}
|
}
|
||||||
fmt.Print("}\n\n")
|
print("}\n\n")
|
||||||
}
|
}
|
||||||
|
|
||||||
decl := make(sort.StringSlice, len(list))
|
decl := make(sort.StringSlice, len(list))
|
||||||
@ -761,27 +817,27 @@ func printScriptOrProperty(doProps bool) {
|
|||||||
name, name, name, name)
|
name, name, name, name)
|
||||||
}
|
}
|
||||||
ndecl++
|
ndecl++
|
||||||
fmt.Printf("var _%s = &RangeTable {\n", name)
|
printf("var _%s = &RangeTable {\n", name)
|
||||||
ranges := foldAdjacent(table[name])
|
ranges := foldAdjacent(table[name])
|
||||||
fmt.Print("\tR16: []Range16{\n")
|
print("\tR16: []Range16{\n")
|
||||||
size := 16
|
size := 16
|
||||||
count := &range16Count
|
count := &range16Count
|
||||||
for _, s := range ranges {
|
for _, s := range ranges {
|
||||||
size, count = printRange(s.Lo, s.Hi, s.Stride, size, count)
|
size, count = printRange(s.Lo, s.Hi, s.Stride, size, count)
|
||||||
}
|
}
|
||||||
fmt.Print("\t},\n")
|
print("\t},\n")
|
||||||
if off := findLatinOffset(ranges); off > 0 {
|
if off := findLatinOffset(ranges); off > 0 {
|
||||||
fmt.Printf("\tLatinOffset: %d,\n", off)
|
printf("\tLatinOffset: %d,\n", off)
|
||||||
}
|
}
|
||||||
fmt.Print("}\n\n")
|
print("}\n\n")
|
||||||
}
|
}
|
||||||
decl.Sort()
|
decl.Sort()
|
||||||
fmt.Println("// These variables have type *RangeTable.")
|
println("// These variables have type *RangeTable.")
|
||||||
fmt.Println("var (")
|
println("var (")
|
||||||
for _, d := range decl {
|
for _, d := range decl {
|
||||||
fmt.Print(d)
|
print(d)
|
||||||
}
|
}
|
||||||
fmt.Print(")\n\n")
|
print(")\n\n")
|
||||||
}
|
}
|
||||||
|
|
||||||
func findLatinOffset(ranges []unicode.Range32) int {
|
func findLatinOffset(ranges []unicode.Range32) int {
|
||||||
@ -940,7 +996,7 @@ func printCases() {
|
|||||||
fullCaseTest()
|
fullCaseTest()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
fmt.Printf(
|
printf(
|
||||||
"// Generated by running\n"+
|
"// Generated by running\n"+
|
||||||
"// maketables --data=%s --casefolding=%s\n"+
|
"// maketables --data=%s --casefolding=%s\n"+
|
||||||
"// DO NOT EDIT\n\n"+
|
"// DO NOT EDIT\n\n"+
|
||||||
@ -966,7 +1022,7 @@ func printCases() {
|
|||||||
}
|
}
|
||||||
prevState = state
|
prevState = state
|
||||||
}
|
}
|
||||||
fmt.Print("}\n")
|
print("}\n")
|
||||||
}
|
}
|
||||||
|
|
||||||
func printCaseRange(lo, hi *caseState) {
|
func printCaseRange(lo, hi *caseState) {
|
||||||
@ -979,14 +1035,14 @@ func printCaseRange(lo, hi *caseState) {
|
|||||||
}
|
}
|
||||||
switch {
|
switch {
|
||||||
case hi.point > lo.point && lo.isUpperLower():
|
case hi.point > lo.point && lo.isUpperLower():
|
||||||
fmt.Printf("\t{0x%04X, 0x%04X, d{UpperLower, UpperLower, UpperLower}},\n",
|
printf("\t{0x%04X, 0x%04X, d{UpperLower, UpperLower, UpperLower}},\n",
|
||||||
lo.point, hi.point)
|
lo.point, hi.point)
|
||||||
case hi.point > lo.point && lo.isLowerUpper():
|
case hi.point > lo.point && lo.isLowerUpper():
|
||||||
logger.Fatalf("LowerUpper sequence: should not happen: %U. If it's real, need to fix To()", lo.point)
|
logger.Fatalf("LowerUpper sequence: should not happen: %U. If it's real, need to fix To()", lo.point)
|
||||||
fmt.Printf("\t{0x%04X, 0x%04X, d{LowerUpper, LowerUpper, LowerUpper}},\n",
|
printf("\t{0x%04X, 0x%04X, d{LowerUpper, LowerUpper, LowerUpper}},\n",
|
||||||
lo.point, hi.point)
|
lo.point, hi.point)
|
||||||
default:
|
default:
|
||||||
fmt.Printf("\t{0x%04X, 0x%04X, d{%d, %d, %d}},\n",
|
printf("\t{0x%04X, 0x%04X, d{%d, %d, %d}},\n",
|
||||||
lo.point, hi.point,
|
lo.point, hi.point,
|
||||||
lo.deltaToUpper, lo.deltaToLower, lo.deltaToTitle)
|
lo.deltaToUpper, lo.deltaToLower, lo.deltaToTitle)
|
||||||
}
|
}
|
||||||
@ -1025,7 +1081,7 @@ func printLatinProperties() {
|
|||||||
if *test {
|
if *test {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
fmt.Println("var properties = [MaxLatin1+1]uint8{")
|
println("var properties = [MaxLatin1+1]uint8{")
|
||||||
for code := 0; code <= unicode.MaxLatin1; code++ {
|
for code := 0; code <= unicode.MaxLatin1; code++ {
|
||||||
var property string
|
var property string
|
||||||
switch chars[code].category {
|
switch chars[code].category {
|
||||||
@ -1054,9 +1110,9 @@ func printLatinProperties() {
|
|||||||
if code == ' ' {
|
if code == ' ' {
|
||||||
property = "pZ | pp"
|
property = "pZ | pp"
|
||||||
}
|
}
|
||||||
fmt.Printf("\t0x%02X: %s, // %q\n", code, property, code)
|
printf("\t0x%02X: %s, // %q\n", code, property, code)
|
||||||
}
|
}
|
||||||
fmt.Printf("}\n\n")
|
printf("}\n\n")
|
||||||
}
|
}
|
||||||
|
|
||||||
type runeSlice []rune
|
type runeSlice []rune
|
||||||
@ -1235,15 +1291,15 @@ func printCaseOrbit() {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
fmt.Printf("var caseOrbit = []foldPair{\n")
|
printf("var caseOrbit = []foldPair{\n")
|
||||||
for i := range chars {
|
for i := range chars {
|
||||||
c := &chars[i]
|
c := &chars[i]
|
||||||
if c.caseOrbit != 0 {
|
if c.caseOrbit != 0 {
|
||||||
fmt.Printf("\t{0x%04X, 0x%04X},\n", i, c.caseOrbit)
|
printf("\t{0x%04X, 0x%04X},\n", i, c.caseOrbit)
|
||||||
foldPairCount++
|
foldPairCount++
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
fmt.Printf("}\n\n")
|
printf("}\n\n")
|
||||||
}
|
}
|
||||||
|
|
||||||
func printCatFold(name string, m map[string]map[rune]bool) {
|
func printCatFold(name string, m map[string]map[rune]bool) {
|
||||||
@ -1288,12 +1344,12 @@ func printCatFold(name string, m map[string]map[rune]bool) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
fmt.Print(comment[name])
|
print(comment[name])
|
||||||
fmt.Printf("var %s = map[string]*RangeTable{\n", name)
|
printf("var %s = map[string]*RangeTable{\n", name)
|
||||||
for _, name := range allCatFold(m) {
|
for _, name := range allCatFold(m) {
|
||||||
fmt.Printf("\t%q: fold%s,\n", name, name)
|
printf("\t%q: fold%s,\n", name, name)
|
||||||
}
|
}
|
||||||
fmt.Printf("}\n\n")
|
printf("}\n\n")
|
||||||
for _, name := range allCatFold(m) {
|
for _, name := range allCatFold(m) {
|
||||||
class := m[name]
|
class := m[name]
|
||||||
dumpRange(
|
dumpRange(
|
||||||
@ -1310,11 +1366,11 @@ func printSizes() {
|
|||||||
if *test {
|
if *test {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
fmt.Println()
|
println()
|
||||||
fmt.Printf("// Range entries: %d 16-bit, %d 32-bit, %d total.\n", range16Count, range32Count, range16Count+range32Count)
|
printf("// Range entries: %d 16-bit, %d 32-bit, %d total.\n", range16Count, range32Count, range16Count+range32Count)
|
||||||
range16Bytes := range16Count * 3 * 2
|
range16Bytes := range16Count * 3 * 2
|
||||||
range32Bytes := range32Count * 3 * 4
|
range32Bytes := range32Count * 3 * 4
|
||||||
fmt.Printf("// Range bytes: %d 16-bit, %d 32-bit, %d total.\n", range16Bytes, range32Bytes, range16Bytes+range32Bytes)
|
printf("// Range bytes: %d 16-bit, %d 32-bit, %d total.\n", range16Bytes, range32Bytes, range16Bytes+range32Bytes)
|
||||||
fmt.Println()
|
println()
|
||||||
fmt.Printf("// Fold orbit bytes: %d pairs, %d bytes\n", foldPairCount, foldPairCount*2*2)
|
printf("// Fold orbit bytes: %d pairs, %d bytes\n", foldPairCount, foldPairCount*2*2)
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user