mirror of
https://github.com/golang/go
synced 2024-11-25 09:07:58 -07:00
exp/norm: merged charinfo and decomposition tables. As a result only
one trie lookup per rune is needed. See forminfo.go for a description of the new format. Also included leading and trailing canonical combining class in decomposition information. This will often avoid additional trie lookups. R=r, r CC=golang-dev https://golang.org/cl/5616071
This commit is contained in:
parent
7bd6ebb104
commit
a52fb458df
@ -102,7 +102,7 @@ func (rb *reorderBuffer) insert(src input, i int, info runeInfo) bool {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if info.hasDecomposition() {
|
if info.hasDecomposition() {
|
||||||
dcomp := rb.f.decompose(src, i)
|
dcomp := info.decomposition()
|
||||||
rb.tmpBytes = inputBytes(dcomp)
|
rb.tmpBytes = inputBytes(dcomp)
|
||||||
for i := 0; i < len(dcomp); {
|
for i := 0; i < len(dcomp); {
|
||||||
info = rb.f.info(&rb.tmpBytes, i)
|
info = rb.f.info(&rb.tmpBytes, i)
|
||||||
|
@ -6,25 +6,50 @@ package norm
|
|||||||
|
|
||||||
// This file contains Form-specific logic and wrappers for data in tables.go.
|
// This file contains Form-specific logic and wrappers for data in tables.go.
|
||||||
|
|
||||||
|
// Rune info is stored in a separate trie per composing form. A composing form
|
||||||
|
// and its corresponding decomposing form share the same trie. Each trie maps
|
||||||
|
// a rune to a uint16. The values take two forms. For v >= 0x8000:
|
||||||
|
// bits
|
||||||
|
// 0..8: ccc
|
||||||
|
// 9..12: qcInfo (see below). isYesD is always true (no decompostion).
|
||||||
|
// 16: 1
|
||||||
|
// For v < 0x8000, the respective rune has a decomposition and v is an index
|
||||||
|
// into a byte array of UTF-8 decomposition sequences and additional info and
|
||||||
|
// has the form:
|
||||||
|
// <header> <decomp_byte>* [<tccc> [<lccc>]]
|
||||||
|
// The header contains the number of bytes in the decomposition (excluding this
|
||||||
|
// length byte). The two most significant bits of this lenght byte correspond
|
||||||
|
// to bit 2 and 3 of qcIfo (see below). The byte sequence itself starts at v+1.
|
||||||
|
// The byte sequence is followed by a trailing and leading CCC if the values
|
||||||
|
// for these are not zero. The value of v determines which ccc are appended
|
||||||
|
// to the sequences. For v < firstCCC, there are none, for v >= firstCCC,
|
||||||
|
// the seqence is followed by a trailing ccc, and for v >= firstLeadingCC
|
||||||
|
// there is an additional leading ccc.
|
||||||
|
|
||||||
|
const (
|
||||||
|
qcInfoMask = 0xF // to clear all but the relevant bits in a qcInfo
|
||||||
|
headerLenMask = 0x3F // extract the lenght value from the header byte
|
||||||
|
headerFlagsMask = 0xC0 // extract the qcInfo bits from the header byte
|
||||||
|
)
|
||||||
|
|
||||||
|
// runeInfo is a representation for the data stored in charinfoTrie.
|
||||||
type runeInfo struct {
|
type runeInfo struct {
|
||||||
pos uint8 // start position in reorderBuffer; used in composition.go
|
pos uint8 // start position in reorderBuffer; used in composition.go
|
||||||
size uint8 // length of UTF-8 encoding of this rune
|
size uint8 // length of UTF-8 encoding of this rune
|
||||||
ccc uint8 // canonical combining class
|
ccc uint8 // leading canonical combining class (ccc if not decomposition)
|
||||||
|
tccc uint8 // trailing canonical combining class (ccc if not decomposition)
|
||||||
flags qcInfo // quick check flags
|
flags qcInfo // quick check flags
|
||||||
|
index uint16
|
||||||
}
|
}
|
||||||
|
|
||||||
// functions dispatchable per form
|
// functions dispatchable per form
|
||||||
type lookupFunc func(b input, i int) runeInfo
|
type lookupFunc func(b input, i int) runeInfo
|
||||||
type decompFunc func(b input, i int) []byte
|
|
||||||
|
|
||||||
// formInfo holds Form-specific functions and tables.
|
// formInfo holds Form-specific functions and tables.
|
||||||
type formInfo struct {
|
type formInfo struct {
|
||||||
form Form
|
form Form
|
||||||
|
|
||||||
composing, compatibility bool // form type
|
composing, compatibility bool // form type
|
||||||
|
info lookupFunc
|
||||||
decompose decompFunc
|
|
||||||
info lookupFunc
|
|
||||||
}
|
}
|
||||||
|
|
||||||
var formTable []*formInfo
|
var formTable []*formInfo
|
||||||
@ -38,10 +63,8 @@ func init() {
|
|||||||
f.form = Form(i)
|
f.form = Form(i)
|
||||||
if Form(i) == NFKD || Form(i) == NFKC {
|
if Form(i) == NFKD || Form(i) == NFKC {
|
||||||
f.compatibility = true
|
f.compatibility = true
|
||||||
f.decompose = decomposeNFKC
|
|
||||||
f.info = lookupInfoNFKC
|
f.info = lookupInfoNFKC
|
||||||
} else {
|
} else {
|
||||||
f.decompose = decomposeNFC
|
|
||||||
f.info = lookupInfoNFC
|
f.info = lookupInfoNFC
|
||||||
}
|
}
|
||||||
if Form(i) == NFC || Form(i) == NFKC {
|
if Form(i) == NFC || Form(i) == NFKC {
|
||||||
@ -76,8 +99,6 @@ func (i runeInfo) boundaryAfter() bool {
|
|||||||
//
|
//
|
||||||
// When all 4 bits are zero, the character is inert, meaning it is never
|
// When all 4 bits are zero, the character is inert, meaning it is never
|
||||||
// influenced by normalization.
|
// influenced by normalization.
|
||||||
//
|
|
||||||
// We pack the bits for both NFC/D and NFKC/D in one byte.
|
|
||||||
type qcInfo uint8
|
type qcInfo uint8
|
||||||
|
|
||||||
func (i runeInfo) isYesC() bool { return i.flags&0x4 == 0 }
|
func (i runeInfo) isYesC() bool { return i.flags&0x4 == 0 }
|
||||||
@ -91,22 +112,12 @@ func (r runeInfo) isInert() bool {
|
|||||||
return r.flags&0xf == 0 && r.ccc == 0
|
return r.flags&0xf == 0 && r.ccc == 0
|
||||||
}
|
}
|
||||||
|
|
||||||
// Wrappers for tables.go
|
func (r runeInfo) decomposition() []byte {
|
||||||
|
if r.index == 0 {
|
||||||
// The 16-bit value of the decomposition tries is an index into a byte
|
return nil
|
||||||
// array of UTF-8 decomposition sequences. The first byte is the number
|
}
|
||||||
// of bytes in the decomposition (excluding this length byte). The actual
|
p := r.index
|
||||||
// sequence starts at the offset+1.
|
n := decomps[p] & 0x3F
|
||||||
func decomposeNFC(s input, i int) []byte {
|
|
||||||
p := s.decomposeNFC(i)
|
|
||||||
n := decomps[p]
|
|
||||||
p++
|
|
||||||
return decomps[p : p+uint16(n)]
|
|
||||||
}
|
|
||||||
|
|
||||||
func decomposeNFKC(s input, i int) []byte {
|
|
||||||
p := s.decomposeNFKC(i)
|
|
||||||
n := decomps[p]
|
|
||||||
p++
|
p++
|
||||||
return decomps[p : p+uint16(n)]
|
return decomps[p : p+uint16(n)]
|
||||||
}
|
}
|
||||||
@ -124,16 +135,40 @@ func combine(a, b rune) rune {
|
|||||||
return recompMap[key]
|
return recompMap[key]
|
||||||
}
|
}
|
||||||
|
|
||||||
// The 16-bit character info has the following bit layout:
|
|
||||||
// 0..7 CCC value.
|
|
||||||
// 8..11 qcInfo for NFC/NFD
|
|
||||||
// 12..15 qcInfo for NFKC/NFKD
|
|
||||||
func lookupInfoNFC(b input, i int) runeInfo {
|
func lookupInfoNFC(b input, i int) runeInfo {
|
||||||
v, sz := b.charinfo(i)
|
v, sz := b.charinfoNFC(i)
|
||||||
return runeInfo{size: uint8(sz), ccc: uint8(v), flags: qcInfo(v >> 8)}
|
return compInfo(v, sz)
|
||||||
}
|
}
|
||||||
|
|
||||||
func lookupInfoNFKC(b input, i int) runeInfo {
|
func lookupInfoNFKC(b input, i int) runeInfo {
|
||||||
v, sz := b.charinfo(i)
|
v, sz := b.charinfoNFKC(i)
|
||||||
return runeInfo{size: uint8(sz), ccc: uint8(v), flags: qcInfo(v >> 12)}
|
return compInfo(v, sz)
|
||||||
|
}
|
||||||
|
|
||||||
|
// compInfo converts the information contained in v and sz
|
||||||
|
// to a runeInfo. See the comment at the top of the file
|
||||||
|
// for more information on the format.
|
||||||
|
func compInfo(v uint16, sz int) runeInfo {
|
||||||
|
if v == 0 {
|
||||||
|
return runeInfo{size: uint8(sz)}
|
||||||
|
} else if v >= 0x8000 {
|
||||||
|
return runeInfo{
|
||||||
|
size: uint8(sz),
|
||||||
|
ccc: uint8(v),
|
||||||
|
tccc: uint8(v),
|
||||||
|
flags: qcInfo(v>>8) & qcInfoMask,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// has decomposition
|
||||||
|
h := decomps[v]
|
||||||
|
f := (qcInfo(h&headerFlagsMask) >> 4) | 0x1
|
||||||
|
ri := runeInfo{size: uint8(sz), flags: f, index: v}
|
||||||
|
if v >= firstCCC {
|
||||||
|
v += uint16(h&headerLenMask) + 1
|
||||||
|
ri.tccc = decomps[v]
|
||||||
|
if v >= firstLeadingCCC {
|
||||||
|
ri.ccc = decomps[v+1]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ri
|
||||||
}
|
}
|
||||||
|
@ -11,9 +11,8 @@ type input interface {
|
|||||||
skipNonStarter(p int) int
|
skipNonStarter(p int) int
|
||||||
appendSlice(buf []byte, s, e int) []byte
|
appendSlice(buf []byte, s, e int) []byte
|
||||||
copySlice(buf []byte, s, e int)
|
copySlice(buf []byte, s, e int)
|
||||||
charinfo(p int) (uint16, int)
|
charinfoNFC(p int) (uint16, int)
|
||||||
decomposeNFC(p int) uint16
|
charinfoNFKC(p int) (uint16, int)
|
||||||
decomposeNFKC(p int) uint16
|
|
||||||
hangul(p int) rune
|
hangul(p int) rune
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -42,16 +41,12 @@ func (s inputString) copySlice(buf []byte, b, e int) {
|
|||||||
copy(buf, s[b:e])
|
copy(buf, s[b:e])
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s inputString) charinfo(p int) (uint16, int) {
|
func (s inputString) charinfoNFC(p int) (uint16, int) {
|
||||||
return charInfoTrie.lookupString(string(s[p:]))
|
return nfcTrie.lookupString(string(s[p:]))
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s inputString) decomposeNFC(p int) uint16 {
|
func (s inputString) charinfoNFKC(p int) (uint16, int) {
|
||||||
return nfcDecompTrie.lookupStringUnsafe(string(s[p:]))
|
return nfkcTrie.lookupString(string(s[p:]))
|
||||||
}
|
|
||||||
|
|
||||||
func (s inputString) decomposeNFKC(p int) uint16 {
|
|
||||||
return nfkcDecompTrie.lookupStringUnsafe(string(s[p:]))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s inputString) hangul(p int) rune {
|
func (s inputString) hangul(p int) rune {
|
||||||
@ -84,16 +79,12 @@ func (s inputBytes) copySlice(buf []byte, b, e int) {
|
|||||||
copy(buf, s[b:e])
|
copy(buf, s[b:e])
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s inputBytes) charinfo(p int) (uint16, int) {
|
func (s inputBytes) charinfoNFC(p int) (uint16, int) {
|
||||||
return charInfoTrie.lookup(s[p:])
|
return nfcTrie.lookup(s[p:])
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s inputBytes) decomposeNFC(p int) uint16 {
|
func (s inputBytes) charinfoNFKC(p int) (uint16, int) {
|
||||||
return nfcDecompTrie.lookupUnsafe(s[p:])
|
return nfkcTrie.lookup(s[p:])
|
||||||
}
|
|
||||||
|
|
||||||
func (s inputBytes) decomposeNFKC(p int) uint16 {
|
|
||||||
return nfkcDecompTrie.lookupUnsafe(s[p:])
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s inputBytes) hangul(p int) rune {
|
func (s inputBytes) hangul(p int) rune {
|
||||||
|
@ -4,6 +4,7 @@
|
|||||||
|
|
||||||
// Normalization table generator.
|
// Normalization table generator.
|
||||||
// Data read from the web.
|
// Data read from the web.
|
||||||
|
// See forminfo.go for a description of the trie values associated with each rune.
|
||||||
|
|
||||||
package main
|
package main
|
||||||
|
|
||||||
@ -17,6 +18,7 @@ import (
|
|||||||
"net/http"
|
"net/http"
|
||||||
"os"
|
"os"
|
||||||
"regexp"
|
"regexp"
|
||||||
|
"sort"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
)
|
)
|
||||||
@ -187,18 +189,14 @@ func (f FormInfo) String() string {
|
|||||||
fmt.Fprintf(buf, " cmbBackward: %v\n", f.combinesBackward)
|
fmt.Fprintf(buf, " cmbBackward: %v\n", f.combinesBackward)
|
||||||
fmt.Fprintf(buf, " isOneWay: %v\n", f.isOneWay)
|
fmt.Fprintf(buf, " isOneWay: %v\n", f.isOneWay)
|
||||||
fmt.Fprintf(buf, " inDecomp: %v\n", f.inDecomp)
|
fmt.Fprintf(buf, " inDecomp: %v\n", f.inDecomp)
|
||||||
fmt.Fprintf(buf, " decomposition: %v\n", f.decomp)
|
fmt.Fprintf(buf, " decomposition: %X\n", f.decomp)
|
||||||
fmt.Fprintf(buf, " expandedDecomp: %v\n", f.expandedDecomp)
|
fmt.Fprintf(buf, " expandedDecomp: %X\n", f.expandedDecomp)
|
||||||
|
|
||||||
return buf.String()
|
return buf.String()
|
||||||
}
|
}
|
||||||
|
|
||||||
type Decomposition []rune
|
type Decomposition []rune
|
||||||
|
|
||||||
func (d Decomposition) String() string {
|
|
||||||
return fmt.Sprintf("%.4X", d)
|
|
||||||
}
|
|
||||||
|
|
||||||
func openReader(file string) (input io.ReadCloser) {
|
func openReader(file string) (input io.ReadCloser) {
|
||||||
if *localFiles {
|
if *localFiles {
|
||||||
f, err := os.Open(file)
|
f, err := os.Open(file)
|
||||||
@ -571,80 +569,121 @@ func makeEntry(f *FormInfo) uint16 {
|
|||||||
return e
|
return e
|
||||||
}
|
}
|
||||||
|
|
||||||
// Bits
|
// decompSet keeps track of unique decompositions, grouped by whether
|
||||||
// 0..8: CCC
|
// the decomposition is followed by a trailing and/or leading CCC.
|
||||||
// 9..12: NF(C|D) qc bits.
|
type decompSet [4]map[string]bool
|
||||||
// 13..16: NFK(C|D) qc bits.
|
|
||||||
func makeCharInfo(c Char) uint16 {
|
func makeDecompSet() decompSet {
|
||||||
e := makeEntry(&c.forms[FCompatibility])
|
m := decompSet{}
|
||||||
e = e<<4 | makeEntry(&c.forms[FCanonical])
|
for i, _ := range m {
|
||||||
e = e<<8 | uint16(c.ccc)
|
m[i] = make(map[string]bool)
|
||||||
return e
|
}
|
||||||
|
return m
|
||||||
|
}
|
||||||
|
func (m *decompSet) insert(key int, s string) {
|
||||||
|
m[key][s] = true
|
||||||
}
|
}
|
||||||
|
|
||||||
func printCharInfoTables() int {
|
func printCharInfoTables() int {
|
||||||
// Quick Check + CCC trie.
|
mkstr := func(r rune, f *FormInfo) (int, string) {
|
||||||
t := newNode()
|
d := f.expandedDecomp
|
||||||
for i, char := range chars {
|
s := string([]rune(d))
|
||||||
v := makeCharInfo(char)
|
if max := 1 << 6; len(s) >= max {
|
||||||
if v != 0 {
|
const msg = "%U: too many bytes in decomposition: %d >= %d"
|
||||||
t.insert(rune(i), v)
|
logger.Fatalf(msg, r, len(s), max)
|
||||||
}
|
}
|
||||||
|
head := uint8(len(s))
|
||||||
|
if f.quickCheck[MComposed] != QCYes {
|
||||||
|
head |= 0x40
|
||||||
|
}
|
||||||
|
if f.combinesForward {
|
||||||
|
head |= 0x80
|
||||||
|
}
|
||||||
|
s = string([]byte{head}) + s
|
||||||
|
|
||||||
|
lccc := ccc(d[0])
|
||||||
|
tccc := ccc(d[len(d)-1])
|
||||||
|
if tccc < lccc && lccc != 0 {
|
||||||
|
const msg = "%U: lccc (%d) must be <= tcc (%d)"
|
||||||
|
logger.Fatalf(msg, r, lccc, tccc)
|
||||||
|
}
|
||||||
|
index := 0
|
||||||
|
if tccc > 0 || lccc > 0 {
|
||||||
|
s += string([]byte{tccc})
|
||||||
|
index = 1
|
||||||
|
if lccc > 0 {
|
||||||
|
s += string([]byte{lccc})
|
||||||
|
index |= 2
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return index, s
|
||||||
}
|
}
|
||||||
return t.printTables("charInfo")
|
|
||||||
}
|
|
||||||
|
|
||||||
func printDecompositionTables() int {
|
decompSet := makeDecompSet()
|
||||||
decompositions := bytes.NewBuffer(make([]byte, 0, 10000))
|
|
||||||
size := 0
|
|
||||||
|
|
||||||
// Map decompositions
|
|
||||||
positionMap := make(map[string]uint16)
|
|
||||||
|
|
||||||
// Store the uniqued decompositions in a byte buffer,
|
// Store the uniqued decompositions in a byte buffer,
|
||||||
// preceded by their byte length.
|
// preceded by their byte length.
|
||||||
for _, c := range chars {
|
for _, c := range chars {
|
||||||
for f := 0; f < 2; f++ {
|
for _, f := range c.forms {
|
||||||
d := c.forms[f].expandedDecomp
|
if len(f.expandedDecomp) == 0 {
|
||||||
s := string([]rune(d))
|
continue
|
||||||
if _, ok := positionMap[s]; !ok {
|
|
||||||
p := decompositions.Len()
|
|
||||||
decompositions.WriteByte(uint8(len(s)))
|
|
||||||
decompositions.WriteString(s)
|
|
||||||
positionMap[s] = uint16(p)
|
|
||||||
}
|
}
|
||||||
|
if f.combinesBackward {
|
||||||
|
logger.Fatalf("%U: combinesBackward and decompose", c.codePoint)
|
||||||
|
}
|
||||||
|
index, s := mkstr(c.codePoint, &f)
|
||||||
|
decompSet.insert(index, s)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
decompositions := bytes.NewBuffer(make([]byte, 0, 10000))
|
||||||
|
size := 0
|
||||||
|
positionMap := make(map[string]uint16)
|
||||||
|
decompositions.WriteString("\000")
|
||||||
|
cname := []string{"firstCCC", "firstLeadingCCC", "", "lastDecomp"}
|
||||||
|
fmt.Println("const (")
|
||||||
|
for i, m := range decompSet {
|
||||||
|
sa := []string{}
|
||||||
|
for s, _ := range m {
|
||||||
|
sa = append(sa, s)
|
||||||
|
}
|
||||||
|
sort.Strings(sa)
|
||||||
|
for _, s := range sa {
|
||||||
|
p := decompositions.Len()
|
||||||
|
decompositions.WriteString(s)
|
||||||
|
positionMap[s] = uint16(p)
|
||||||
|
}
|
||||||
|
if cname[i] != "" {
|
||||||
|
fmt.Printf("%s = 0x%X\n", cname[i], decompositions.Len())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fmt.Println("maxDecomp = 0x8000")
|
||||||
|
fmt.Println(")")
|
||||||
b := decompositions.Bytes()
|
b := decompositions.Bytes()
|
||||||
printBytes(b, "decomps")
|
printBytes(b, "decomps")
|
||||||
size += len(b)
|
size += len(b)
|
||||||
|
|
||||||
nfcT := newNode()
|
varnames := []string{"nfc", "nfkc"}
|
||||||
nfkcT := newNode()
|
for i := 0; i < FNumberOfFormTypes; i++ {
|
||||||
for i, c := range chars {
|
trie := newNode()
|
||||||
d := c.forms[FCanonical].expandedDecomp
|
for r, c := range chars {
|
||||||
if len(d) != 0 {
|
f := c.forms[i]
|
||||||
nfcT.insert(rune(i), positionMap[string([]rune(d))])
|
d := f.expandedDecomp
|
||||||
if ccc(c.codePoint) != ccc(d[0]) {
|
if len(d) != 0 {
|
||||||
// We assume the lead ccc of a decomposition is !=0 in this case.
|
_, key := mkstr(c.codePoint, &f)
|
||||||
if ccc(d[0]) == 0 {
|
trie.insert(rune(r), positionMap[key])
|
||||||
logger.Fatal("Expected differing CCC to be non-zero.")
|
if c.ccc != ccc(d[0]) {
|
||||||
}
|
// We assume the lead ccc of a decomposition !=0 in this case.
|
||||||
}
|
if ccc(d[0]) == 0 {
|
||||||
}
|
logger.Fatal("Expected leading CCC to be non-zero; ccc is %d", c.ccc)
|
||||||
d = c.forms[FCompatibility].expandedDecomp
|
}
|
||||||
if len(d) != 0 {
|
|
||||||
nfkcT.insert(rune(i), positionMap[string([]rune(d))])
|
|
||||||
if ccc(c.codePoint) != ccc(d[0]) {
|
|
||||||
// We assume the lead ccc of a decomposition is !=0 in this case.
|
|
||||||
if ccc(d[0]) == 0 {
|
|
||||||
logger.Fatal("Expected differing CCC to be non-zero.")
|
|
||||||
}
|
}
|
||||||
|
} else if v := makeEntry(&f)<<8 | uint16(c.ccc); v != 0 {
|
||||||
|
trie.insert(c.codePoint, 0x8000|v)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
size += trie.printTables(varnames[i])
|
||||||
}
|
}
|
||||||
size += nfcT.printTables("nfcDecomp")
|
|
||||||
size += nfkcT.printTables("nfkcDecomp")
|
|
||||||
return size
|
return size
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -687,15 +726,15 @@ func makeTables() {
|
|||||||
}
|
}
|
||||||
list := strings.Split(*tablelist, ",")
|
list := strings.Split(*tablelist, ",")
|
||||||
if *tablelist == "all" {
|
if *tablelist == "all" {
|
||||||
list = []string{"decomp", "recomp", "info"}
|
list = []string{"recomp", "info"}
|
||||||
}
|
}
|
||||||
fmt.Printf(fileHeader, *tablelist, *url)
|
fmt.Printf(fileHeader, *tablelist, *url)
|
||||||
|
|
||||||
fmt.Println("// Version is the Unicode edition from which the tables are derived.")
|
fmt.Println("// Version is the Unicode edition from which the tables are derived.")
|
||||||
fmt.Printf("const Version = %q\n\n", version())
|
fmt.Printf("const Version = %q\n\n", version())
|
||||||
|
|
||||||
if contains(list, "decomp") {
|
if contains(list, "info") {
|
||||||
size += printDecompositionTables()
|
size += printCharInfoTables()
|
||||||
}
|
}
|
||||||
|
|
||||||
if contains(list, "recomp") {
|
if contains(list, "recomp") {
|
||||||
@ -730,9 +769,6 @@ func makeTables() {
|
|||||||
fmt.Printf("}\n\n")
|
fmt.Printf("}\n\n")
|
||||||
}
|
}
|
||||||
|
|
||||||
if contains(list, "info") {
|
|
||||||
size += printCharInfoTables()
|
|
||||||
}
|
|
||||||
fmt.Printf("// Total size of tables: %dKB (%d bytes)\n", (size+512)/1024, size)
|
fmt.Printf("// Total size of tables: %dKB (%d bytes)\n", (size+512)/1024, size)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -761,6 +797,11 @@ func verifyComputed() {
|
|||||||
log.Fatalf("%U: NF*C must be maybe if combinesBackward", i)
|
log.Fatalf("%U: NF*C must be maybe if combinesBackward", i)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
nfc := c.forms[FCanonical]
|
||||||
|
nfkc := c.forms[FCompatibility]
|
||||||
|
if nfc.combinesBackward != nfkc.combinesBackward {
|
||||||
|
logger.Fatalf("%U: Cannot combine combinesBackward\n", c.codePoint)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -448,7 +448,7 @@ func decomposeToLastBoundary(rb *reorderBuffer, buf []byte) []byte {
|
|||||||
}
|
}
|
||||||
// Check that decomposition doesn't result in overflow.
|
// Check that decomposition doesn't result in overflow.
|
||||||
if info.hasDecomposition() {
|
if info.hasDecomposition() {
|
||||||
dcomp := rb.f.decompose(inputBytes(buf), p-int(info.size))
|
dcomp := info.decomposition()
|
||||||
for i := 0; i < len(dcomp); {
|
for i := 0; i < len(dcomp); {
|
||||||
inf := rb.f.info(inputBytes(dcomp), i)
|
inf := rb.f.info(inputBytes(dcomp), i)
|
||||||
i += int(inf.size)
|
i += int(inf.size)
|
||||||
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user