mirror of
https://github.com/golang/go
synced 2024-11-26 07:38:00 -07:00
exp/norm: exposed runeInfo type in API.
For completeness, we also expose the Canonical Combining Class of a rune. This does not increase the data size. R=r CC=golang-dev https://golang.org/cl/5931043
This commit is contained in:
parent
d8e9b04ca6
commit
98aa4968b7
@ -22,7 +22,7 @@ const (
|
|||||||
// the UTF-8 characters in order. Only the rune array is maintained in sorted
|
// the UTF-8 characters in order. Only the rune array is maintained in sorted
|
||||||
// order. flush writes the resulting segment to a byte array.
|
// order. flush writes the resulting segment to a byte array.
|
||||||
type reorderBuffer struct {
|
type reorderBuffer struct {
|
||||||
rune [maxBufferSize]runeInfo // Per character info.
|
rune [maxBufferSize]Properties // Per character info.
|
||||||
byte [maxByteBufferSize]byte // UTF-8 buffer. Referenced by runeInfo.pos.
|
byte [maxByteBufferSize]byte // UTF-8 buffer. Referenced by runeInfo.pos.
|
||||||
nrune int // Number of runeInfos.
|
nrune int // Number of runeInfos.
|
||||||
nbyte uint8 // Number or bytes.
|
nbyte uint8 // Number or bytes.
|
||||||
@ -81,7 +81,7 @@ func (rb *reorderBuffer) flushCopy(buf []byte) int {
|
|||||||
// insertOrdered inserts a rune in the buffer, ordered by Canonical Combining Class.
|
// insertOrdered inserts a rune in the buffer, ordered by Canonical Combining Class.
|
||||||
// It returns false if the buffer is not large enough to hold the rune.
|
// It returns false if the buffer is not large enough to hold the rune.
|
||||||
// It is used internally by insert and insertString only.
|
// It is used internally by insert and insertString only.
|
||||||
func (rb *reorderBuffer) insertOrdered(info runeInfo) bool {
|
func (rb *reorderBuffer) insertOrdered(info Properties) bool {
|
||||||
n := rb.nrune
|
n := rb.nrune
|
||||||
if n >= maxCombiningChars+1 {
|
if n >= maxCombiningChars+1 {
|
||||||
return false
|
return false
|
||||||
@ -107,12 +107,12 @@ func (rb *reorderBuffer) insertOrdered(info runeInfo) bool {
|
|||||||
|
|
||||||
// insert inserts the given rune in the buffer ordered by CCC.
|
// insert inserts the given rune in the buffer ordered by CCC.
|
||||||
// It returns true if the buffer was large enough to hold the decomposed rune.
|
// It returns true if the buffer was large enough to hold the decomposed rune.
|
||||||
func (rb *reorderBuffer) insert(src input, i int, info runeInfo) bool {
|
func (rb *reorderBuffer) insert(src input, i int, info Properties) bool {
|
||||||
if rune := src.hangul(i); rune != 0 {
|
if rune := src.hangul(i); rune != 0 {
|
||||||
return rb.decomposeHangul(rune)
|
return rb.decomposeHangul(rune)
|
||||||
}
|
}
|
||||||
if info.hasDecomposition() {
|
if info.hasDecomposition() {
|
||||||
return rb.insertDecomposed(info.decomposition())
|
return rb.insertDecomposed(info.Decomposition())
|
||||||
}
|
}
|
||||||
return rb.insertSingle(src, i, info)
|
return rb.insertSingle(src, i, info)
|
||||||
}
|
}
|
||||||
@ -136,7 +136,7 @@ func (rb *reorderBuffer) insertDecomposed(dcomp []byte) bool {
|
|||||||
|
|
||||||
// insertSingle inserts an entry in the reorderBuffer for the rune at
|
// insertSingle inserts an entry in the reorderBuffer for the rune at
|
||||||
// position i. info is the runeInfo for the rune at position i.
|
// position i. info is the runeInfo for the rune at position i.
|
||||||
func (rb *reorderBuffer) insertSingle(src input, i int, info runeInfo) bool {
|
func (rb *reorderBuffer) insertSingle(src input, i int, info Properties) bool {
|
||||||
// insertOrder changes nbyte
|
// insertOrder changes nbyte
|
||||||
pos := rb.nbyte
|
pos := rb.nbyte
|
||||||
if !rb.insertOrdered(info) {
|
if !rb.insertOrdered(info) {
|
||||||
@ -151,7 +151,7 @@ func (rb *reorderBuffer) appendRune(r rune) {
|
|||||||
bn := rb.nbyte
|
bn := rb.nbyte
|
||||||
sz := utf8.EncodeRune(rb.byte[bn:], rune(r))
|
sz := utf8.EncodeRune(rb.byte[bn:], rune(r))
|
||||||
rb.nbyte += utf8.UTFMax
|
rb.nbyte += utf8.UTFMax
|
||||||
rb.rune[rb.nrune] = runeInfo{pos: bn, size: uint8(sz)}
|
rb.rune[rb.nrune] = Properties{pos: bn, size: uint8(sz)}
|
||||||
rb.nrune++
|
rb.nrune++
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -159,7 +159,7 @@ func (rb *reorderBuffer) appendRune(r rune) {
|
|||||||
func (rb *reorderBuffer) assignRune(pos int, r rune) {
|
func (rb *reorderBuffer) assignRune(pos int, r rune) {
|
||||||
bn := rb.rune[pos].pos
|
bn := rb.rune[pos].pos
|
||||||
sz := utf8.EncodeRune(rb.byte[bn:], rune(r))
|
sz := utf8.EncodeRune(rb.byte[bn:], rune(r))
|
||||||
rb.rune[pos] = runeInfo{pos: bn, size: uint8(sz)}
|
rb.rune[pos] = Properties{pos: bn, size: uint8(sz)}
|
||||||
}
|
}
|
||||||
|
|
||||||
// runeAt returns the rune at position n. It is used for Hangul and recomposition.
|
// runeAt returns the rune at position n. It is used for Hangul and recomposition.
|
||||||
|
@ -32,8 +32,8 @@ const (
|
|||||||
headerFlagsMask = 0xC0 // extract the qcInfo bits from the header byte
|
headerFlagsMask = 0xC0 // extract the qcInfo bits from the header byte
|
||||||
)
|
)
|
||||||
|
|
||||||
// runeInfo is a representation for the data stored in charinfoTrie.
|
// Properties provides access to normalization properties of a rune.
|
||||||
type runeInfo struct {
|
type Properties struct {
|
||||||
pos uint8 // start position in reorderBuffer; used in composition.go
|
pos uint8 // start position in reorderBuffer; used in composition.go
|
||||||
size uint8 // length of UTF-8 encoding of this rune
|
size uint8 // length of UTF-8 encoding of this rune
|
||||||
ccc uint8 // leading canonical combining class (ccc if not decomposition)
|
ccc uint8 // leading canonical combining class (ccc if not decomposition)
|
||||||
@ -43,7 +43,7 @@ type runeInfo struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// functions dispatchable per form
|
// functions dispatchable per form
|
||||||
type lookupFunc func(b input, i int) runeInfo
|
type lookupFunc func(b input, i int) Properties
|
||||||
|
|
||||||
// formInfo holds Form-specific functions and tables.
|
// formInfo holds Form-specific functions and tables.
|
||||||
type formInfo struct {
|
type formInfo struct {
|
||||||
@ -75,11 +75,14 @@ func init() {
|
|||||||
|
|
||||||
// We do not distinguish between boundaries for NFC, NFD, etc. to avoid
|
// We do not distinguish between boundaries for NFC, NFD, etc. to avoid
|
||||||
// unexpected behavior for the user. For example, in NFD, there is a boundary
|
// unexpected behavior for the user. For example, in NFD, there is a boundary
|
||||||
// after 'a'. However, a might combine with modifiers, so from the application's
|
// after 'a'. However, 'a' might combine with modifiers, so from the application's
|
||||||
// perspective it is not a good boundary. We will therefore always use the
|
// perspective it is not a good boundary. We will therefore always use the
|
||||||
// boundaries for the combining variants.
|
// boundaries for the combining variants.
|
||||||
func (i runeInfo) boundaryBefore() bool {
|
|
||||||
if i.ccc == 0 && !i.combinesBackward() {
|
// BoundaryBefore returns true if this rune starts a new segment and
|
||||||
|
// cannot combine with any rune on the left.
|
||||||
|
func (p Properties) BoundaryBefore() bool {
|
||||||
|
if p.ccc == 0 && !p.combinesBackward() {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
// We assume that the CCC of the first character in a decomposition
|
// We assume that the CCC of the first character in a decomposition
|
||||||
@ -88,8 +91,10 @@ func (i runeInfo) boundaryBefore() bool {
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
func (i runeInfo) boundaryAfter() bool {
|
// BoundaryAfter returns true if this rune cannot combine with runes to the right
|
||||||
return i.isInert()
|
// and always denotes the end of a segment.
|
||||||
|
func (p Properties) BoundaryAfter() bool {
|
||||||
|
return p.isInert()
|
||||||
}
|
}
|
||||||
|
|
||||||
// We pack quick check data in 4 bits:
|
// We pack quick check data in 4 bits:
|
||||||
@ -101,25 +106,52 @@ func (i runeInfo) boundaryAfter() bool {
|
|||||||
// influenced by normalization.
|
// influenced by normalization.
|
||||||
type qcInfo uint8
|
type qcInfo uint8
|
||||||
|
|
||||||
func (i runeInfo) isYesC() bool { return i.flags&0x4 == 0 }
|
func (p Properties) isYesC() bool { return p.flags&0x4 == 0 }
|
||||||
func (i runeInfo) isYesD() bool { return i.flags&0x1 == 0 }
|
func (p Properties) isYesD() bool { return p.flags&0x1 == 0 }
|
||||||
|
|
||||||
func (i runeInfo) combinesForward() bool { return i.flags&0x8 != 0 }
|
func (p Properties) combinesForward() bool { return p.flags&0x8 != 0 }
|
||||||
func (i runeInfo) combinesBackward() bool { return i.flags&0x2 != 0 } // == isMaybe
|
func (p Properties) combinesBackward() bool { return p.flags&0x2 != 0 } // == isMaybe
|
||||||
func (i runeInfo) hasDecomposition() bool { return i.flags&0x1 != 0 } // == isNoD
|
func (p Properties) hasDecomposition() bool { return p.flags&0x1 != 0 } // == isNoD
|
||||||
|
|
||||||
func (r runeInfo) isInert() bool {
|
func (p Properties) isInert() bool {
|
||||||
return r.flags&0xf == 0 && r.ccc == 0
|
return p.flags&0xf == 0 && p.ccc == 0
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r runeInfo) decomposition() []byte {
|
// Decomposition returns the decomposition for the underlying rune
|
||||||
if r.index == 0 {
|
// or nil if there is none.
|
||||||
|
func (p Properties) Decomposition() []byte {
|
||||||
|
if p.index == 0 {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
p := r.index
|
i := p.index
|
||||||
n := decomps[p] & 0x3F
|
n := decomps[i] & headerLenMask
|
||||||
p++
|
i++
|
||||||
return decomps[p : p+uint16(n)]
|
return decomps[i : i+uint16(n)]
|
||||||
|
}
|
||||||
|
|
||||||
|
// Size returns the length of UTF-8 encoding of the rune.
|
||||||
|
func (p Properties) Size() int {
|
||||||
|
return int(p.size)
|
||||||
|
}
|
||||||
|
|
||||||
|
// CCC returns the canonical combining class of the underlying rune.
|
||||||
|
func (p Properties) CCC() uint8 {
|
||||||
|
if p.index > firstCCCZeroExcept {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
return p.ccc
|
||||||
|
}
|
||||||
|
|
||||||
|
// LeadCCC returns the CCC of the first rune in the decomposition.
|
||||||
|
// If there is no decomposition, LeadCCC equals CCC.
|
||||||
|
func (p Properties) LeadCCC() uint8 {
|
||||||
|
return p.ccc
|
||||||
|
}
|
||||||
|
|
||||||
|
// TrailCCC returns the CCC of the last rune in the decomposition.
|
||||||
|
// If there is no decomposition, TrailCCC equals CCC.
|
||||||
|
func (p Properties) TrailCCC() uint8 {
|
||||||
|
return p.tccc
|
||||||
}
|
}
|
||||||
|
|
||||||
// Recomposition
|
// Recomposition
|
||||||
@ -135,24 +167,40 @@ func combine(a, b rune) rune {
|
|||||||
return recompMap[key]
|
return recompMap[key]
|
||||||
}
|
}
|
||||||
|
|
||||||
func lookupInfoNFC(b input, i int) runeInfo {
|
func lookupInfoNFC(b input, i int) Properties {
|
||||||
v, sz := b.charinfoNFC(i)
|
v, sz := b.charinfoNFC(i)
|
||||||
return compInfo(v, sz)
|
return compInfo(v, sz)
|
||||||
}
|
}
|
||||||
|
|
||||||
func lookupInfoNFKC(b input, i int) runeInfo {
|
func lookupInfoNFKC(b input, i int) Properties {
|
||||||
v, sz := b.charinfoNFKC(i)
|
v, sz := b.charinfoNFKC(i)
|
||||||
return compInfo(v, sz)
|
return compInfo(v, sz)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Properties returns properties for the first rune in s.
|
||||||
|
func (f Form) Properties(s []byte) Properties {
|
||||||
|
if f == NFC || f == NFD {
|
||||||
|
return compInfo(nfcTrie.lookup(s))
|
||||||
|
}
|
||||||
|
return compInfo(nfkcTrie.lookup(s))
|
||||||
|
}
|
||||||
|
|
||||||
|
// PropertiesString returns properties for the first rune in s.
|
||||||
|
func (f Form) PropertiesString(s string) Properties {
|
||||||
|
if f == NFC || f == NFD {
|
||||||
|
return compInfo(nfcTrie.lookupString(s))
|
||||||
|
}
|
||||||
|
return compInfo(nfkcTrie.lookupString(s))
|
||||||
|
}
|
||||||
|
|
||||||
// compInfo converts the information contained in v and sz
|
// compInfo converts the information contained in v and sz
|
||||||
// to a runeInfo. See the comment at the top of the file
|
// to a Properties. See the comment at the top of the file
|
||||||
// for more information on the format.
|
// for more information on the format.
|
||||||
func compInfo(v uint16, sz int) runeInfo {
|
func compInfo(v uint16, sz int) Properties {
|
||||||
if v == 0 {
|
if v == 0 {
|
||||||
return runeInfo{size: uint8(sz)}
|
return Properties{size: uint8(sz)}
|
||||||
} else if v >= 0x8000 {
|
} else if v >= 0x8000 {
|
||||||
return runeInfo{
|
return Properties{
|
||||||
size: uint8(sz),
|
size: uint8(sz),
|
||||||
ccc: uint8(v),
|
ccc: uint8(v),
|
||||||
tccc: uint8(v),
|
tccc: uint8(v),
|
||||||
@ -162,7 +210,7 @@ func compInfo(v uint16, sz int) runeInfo {
|
|||||||
// has decomposition
|
// has decomposition
|
||||||
h := decomps[v]
|
h := decomps[v]
|
||||||
f := (qcInfo(h&headerFlagsMask) >> 4) | 0x1
|
f := (qcInfo(h&headerFlagsMask) >> 4) | 0x1
|
||||||
ri := runeInfo{size: uint8(sz), flags: f, index: v}
|
ri := Properties{size: uint8(sz), flags: f, index: v}
|
||||||
if v >= firstCCC {
|
if v >= firstCCC {
|
||||||
v += uint16(h&headerLenMask) + 1
|
v += uint16(h&headerLenMask) + 1
|
||||||
ri.tccc = decomps[v]
|
ri.tccc = decomps[v]
|
||||||
|
@ -10,7 +10,7 @@ const MaxSegmentSize = maxByteBufferSize
|
|||||||
// to a given Form.
|
// to a given Form.
|
||||||
type Iter struct {
|
type Iter struct {
|
||||||
rb reorderBuffer
|
rb reorderBuffer
|
||||||
info runeInfo // first character saved from previous iteration
|
info Properties // first character saved from previous iteration
|
||||||
next iterFunc // implementation of next depends on form
|
next iterFunc // implementation of next depends on form
|
||||||
|
|
||||||
p int // current position in input source
|
p int // current position in input source
|
||||||
@ -124,7 +124,7 @@ doFast:
|
|||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if d := i.info.decomposition(); d != nil {
|
} else if d := i.info.Decomposition(); d != nil {
|
||||||
i.rb.src.copySlice(out[outCopyStart:], inCopyStart, i.p)
|
i.rb.src.copySlice(out[outCopyStart:], inCopyStart, i.p)
|
||||||
p := outp + len(d)
|
p := outp + len(d)
|
||||||
if p > i.maxseg && i.setStart(outp, i.p) {
|
if p > i.maxseg && i.setStart(outp, i.p) {
|
||||||
@ -245,7 +245,7 @@ doFast:
|
|||||||
if i.setStart(outp-1, i.p-1) {
|
if i.setStart(outp-1, i.p-1) {
|
||||||
i.p--
|
i.p--
|
||||||
outp--
|
outp--
|
||||||
i.info = runeInfo{size: 1}
|
i.info = Properties{size: 1}
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -274,7 +274,7 @@ doNorm:
|
|||||||
return outp
|
return outp
|
||||||
}
|
}
|
||||||
i.info = i.rb.f.info(i.rb.src, i.p)
|
i.info = i.rb.f.info(i.rb.src, i.p)
|
||||||
if i.info.boundaryBefore() {
|
if i.info.BoundaryBefore() {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -605,6 +605,10 @@ func printCharInfoTables() int {
|
|||||||
|
|
||||||
lccc := ccc(d[0])
|
lccc := ccc(d[0])
|
||||||
tccc := ccc(d[len(d)-1])
|
tccc := ccc(d[len(d)-1])
|
||||||
|
cc := ccc(r)
|
||||||
|
if cc != 0 && lccc == 0 && tccc == 0 {
|
||||||
|
logger.Fatalf("%U: trailing and leading ccc are 0 for non-zero ccc %d", cc)
|
||||||
|
}
|
||||||
if tccc < lccc && lccc != 0 {
|
if tccc < lccc && lccc != 0 {
|
||||||
const msg = "%U: lccc (%d) must be <= tcc (%d)"
|
const msg = "%U: lccc (%d) must be <= tcc (%d)"
|
||||||
logger.Fatalf(msg, r, lccc, tccc)
|
logger.Fatalf(msg, r, lccc, tccc)
|
||||||
@ -615,7 +619,13 @@ func printCharInfoTables() int {
|
|||||||
index = 1
|
index = 1
|
||||||
if lccc > 0 {
|
if lccc > 0 {
|
||||||
s += string([]byte{lccc})
|
s += string([]byte{lccc})
|
||||||
index |= 2
|
index = 2
|
||||||
|
}
|
||||||
|
if cc != lccc {
|
||||||
|
if cc != 0 {
|
||||||
|
logger.Fatalf("%U: for lccc != ccc, expected ccc to be 0; was %d", cc)
|
||||||
|
}
|
||||||
|
index = 3
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return index, s
|
return index, s
|
||||||
@ -642,7 +652,7 @@ func printCharInfoTables() int {
|
|||||||
size := 0
|
size := 0
|
||||||
positionMap := make(map[string]uint16)
|
positionMap := make(map[string]uint16)
|
||||||
decompositions.WriteString("\000")
|
decompositions.WriteString("\000")
|
||||||
cname := []string{"firstCCC", "firstLeadingCCC", "", "lastDecomp"}
|
cname := []string{"firstCCC", "firstLeadingCCC", "firstCCCZeroExcept", "lastDecomp"}
|
||||||
fmt.Println("const (")
|
fmt.Println("const (")
|
||||||
for i, m := range decompSet {
|
for i, m := range decompSet {
|
||||||
sa := []string{}
|
sa := []string{}
|
||||||
|
@ -185,14 +185,14 @@ func doAppend(rb *reorderBuffer, out []byte, p int) []byte {
|
|||||||
}
|
}
|
||||||
fd := &rb.f
|
fd := &rb.f
|
||||||
if doMerge {
|
if doMerge {
|
||||||
var info runeInfo
|
var info Properties
|
||||||
if p < n {
|
if p < n {
|
||||||
info = fd.info(src, p)
|
info = fd.info(src, p)
|
||||||
if p == 0 && !info.boundaryBefore() {
|
if p == 0 && !info.BoundaryBefore() {
|
||||||
out = decomposeToLastBoundary(rb, out)
|
out = decomposeToLastBoundary(rb, out)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if info.size == 0 || info.boundaryBefore() {
|
if info.size == 0 || info.BoundaryBefore() {
|
||||||
if fd.composing {
|
if fd.composing {
|
||||||
rb.compose()
|
rb.compose()
|
||||||
}
|
}
|
||||||
@ -316,13 +316,13 @@ func firstBoundary(rb *reorderBuffer) int {
|
|||||||
}
|
}
|
||||||
fd := &rb.f
|
fd := &rb.f
|
||||||
info := fd.info(src, i)
|
info := fd.info(src, i)
|
||||||
for n := 0; info.size != 0 && !info.boundaryBefore(); {
|
for n := 0; info.size != 0 && !info.BoundaryBefore(); {
|
||||||
i += int(info.size)
|
i += int(info.size)
|
||||||
if n++; n >= maxCombiningChars {
|
if n++; n >= maxCombiningChars {
|
||||||
return i
|
return i
|
||||||
}
|
}
|
||||||
if i >= nsrc {
|
if i >= nsrc {
|
||||||
if !info.boundaryAfter() {
|
if !info.BoundaryAfter() {
|
||||||
return -1
|
return -1
|
||||||
}
|
}
|
||||||
return nsrc
|
return nsrc
|
||||||
@ -368,11 +368,11 @@ func lastBoundary(fd *formInfo, b []byte) int {
|
|||||||
if p+int(info.size) != i { // trailing non-starter bytes: illegal UTF-8
|
if p+int(info.size) != i { // trailing non-starter bytes: illegal UTF-8
|
||||||
return i
|
return i
|
||||||
}
|
}
|
||||||
if info.boundaryAfter() {
|
if info.BoundaryAfter() {
|
||||||
return i
|
return i
|
||||||
}
|
}
|
||||||
i = p
|
i = p
|
||||||
for n := 0; i >= 0 && !info.boundaryBefore(); {
|
for n := 0; i >= 0 && !info.BoundaryBefore(); {
|
||||||
info, p = lastRuneStart(fd, b[:i])
|
info, p = lastRuneStart(fd, b[:i])
|
||||||
if n++; n >= maxCombiningChars {
|
if n++; n >= maxCombiningChars {
|
||||||
return len(b)
|
return len(b)
|
||||||
@ -404,7 +404,7 @@ func decomposeSegment(rb *reorderBuffer, sp int) int {
|
|||||||
break
|
break
|
||||||
}
|
}
|
||||||
info = rb.f.info(rb.src, sp)
|
info = rb.f.info(rb.src, sp)
|
||||||
bound := info.boundaryBefore()
|
bound := info.BoundaryBefore()
|
||||||
if bound || info.size == 0 {
|
if bound || info.size == 0 {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
@ -414,12 +414,12 @@ func decomposeSegment(rb *reorderBuffer, sp int) int {
|
|||||||
|
|
||||||
// lastRuneStart returns the runeInfo and position of the last
|
// lastRuneStart returns the runeInfo and position of the last
|
||||||
// rune in buf or the zero runeInfo and -1 if no rune was found.
|
// rune in buf or the zero runeInfo and -1 if no rune was found.
|
||||||
func lastRuneStart(fd *formInfo, buf []byte) (runeInfo, int) {
|
func lastRuneStart(fd *formInfo, buf []byte) (Properties, int) {
|
||||||
p := len(buf) - 1
|
p := len(buf) - 1
|
||||||
for ; p >= 0 && !utf8.RuneStart(buf[p]); p-- {
|
for ; p >= 0 && !utf8.RuneStart(buf[p]); p-- {
|
||||||
}
|
}
|
||||||
if p < 0 {
|
if p < 0 {
|
||||||
return runeInfo{}, -1
|
return Properties{}, -1
|
||||||
}
|
}
|
||||||
return fd.info(inputBytes(buf), p), p
|
return fd.info(inputBytes(buf), p), p
|
||||||
}
|
}
|
||||||
@ -433,15 +433,15 @@ func decomposeToLastBoundary(rb *reorderBuffer, buf []byte) []byte {
|
|||||||
// illegal trailing continuation bytes
|
// illegal trailing continuation bytes
|
||||||
return buf
|
return buf
|
||||||
}
|
}
|
||||||
if info.boundaryAfter() {
|
if info.BoundaryAfter() {
|
||||||
return buf
|
return buf
|
||||||
}
|
}
|
||||||
var add [maxBackRunes]runeInfo // stores runeInfo in reverse order
|
var add [maxBackRunes]Properties // stores runeInfo in reverse order
|
||||||
add[0] = info
|
add[0] = info
|
||||||
padd := 1
|
padd := 1
|
||||||
n := 1
|
n := 1
|
||||||
p := len(buf) - int(info.size)
|
p := len(buf) - int(info.size)
|
||||||
for ; p >= 0 && !info.boundaryBefore(); p -= int(info.size) {
|
for ; p >= 0 && !info.BoundaryBefore(); p -= int(info.size) {
|
||||||
info, i = lastRuneStart(fd, buf[:p])
|
info, i = lastRuneStart(fd, buf[:p])
|
||||||
if int(info.size) != p-i {
|
if int(info.size) != p-i {
|
||||||
break
|
break
|
||||||
@ -452,7 +452,7 @@ func decomposeToLastBoundary(rb *reorderBuffer, buf []byte) []byte {
|
|||||||
i += int(info.size)
|
i += int(info.size)
|
||||||
n++
|
n++
|
||||||
} else {
|
} else {
|
||||||
dcomp := info.decomposition()
|
dcomp := info.Decomposition()
|
||||||
for i := 0; i < len(dcomp); {
|
for i := 0; i < len(dcomp); {
|
||||||
inf := rb.f.info(inputBytes(dcomp), i)
|
inf := rb.f.info(inputBytes(dcomp), i)
|
||||||
i += int(inf.size)
|
i += int(inf.size)
|
||||||
|
@ -10,6 +10,7 @@ const Version = "6.0.0"
|
|||||||
const (
|
const (
|
||||||
firstCCC = 0x2E45
|
firstCCC = 0x2E45
|
||||||
firstLeadingCCC = 0x4965
|
firstLeadingCCC = 0x4965
|
||||||
|
firstCCCZeroExcept = 0x497B
|
||||||
lastDecomp = 0x49A2
|
lastDecomp = 0x49A2
|
||||||
maxDecomp = 0x8000
|
maxDecomp = 0x8000
|
||||||
)
|
)
|
||||||
@ -2660,10 +2661,10 @@ var decomps = [...]byte{
|
|||||||
0xCC, 0x94, 0xCC, 0x81, 0xE6, 0x86, 0xCF, 0x89,
|
0xCC, 0x94, 0xCC, 0x81, 0xE6, 0x86, 0xCF, 0x89,
|
||||||
0xCC, 0x94, 0xCD, 0x82, 0xE6, 0x42, 0xCC, 0x80,
|
0xCC, 0x94, 0xCD, 0x82, 0xE6, 0x42, 0xCC, 0x80,
|
||||||
0xE6, 0xE6, 0x42, 0xCC, 0x81, 0xE6, 0xE6, 0x42,
|
0xE6, 0xE6, 0x42, 0xCC, 0x81, 0xE6, 0xE6, 0x42,
|
||||||
0xCC, 0x93, 0xE6, 0xE6, 0x43, 0xE3, 0x82, 0x99,
|
0xCC, 0x93, 0xE6, 0xE6, 0x44, 0xCC, 0x88, 0xCC,
|
||||||
0x08, 0x08, 0x43, 0xE3, 0x82, 0x9A, 0x08, 0x08,
|
0x81, 0xE6, 0xE6, 0x43, 0xE3, 0x82, 0x99, 0x08,
|
||||||
// Bytes 4980 - 49bf
|
// Bytes 4980 - 49bf
|
||||||
0x44, 0xCC, 0x88, 0xCC, 0x81, 0xE6, 0xE6, 0x46,
|
0x08, 0x43, 0xE3, 0x82, 0x9A, 0x08, 0x08, 0x46,
|
||||||
0xE0, 0xBD, 0xB1, 0xE0, 0xBD, 0xB2, 0x82, 0x81,
|
0xE0, 0xBD, 0xB1, 0xE0, 0xBD, 0xB2, 0x82, 0x81,
|
||||||
0x46, 0xE0, 0xBD, 0xB1, 0xE0, 0xBD, 0xB4, 0x84,
|
0x46, 0xE0, 0xBD, 0xB1, 0xE0, 0xBD, 0xB4, 0x84,
|
||||||
0x81, 0x46, 0xE0, 0xBD, 0xB1, 0xE0, 0xBE, 0x80,
|
0x81, 0x46, 0xE0, 0xBD, 0xB1, 0xE0, 0xBE, 0x80,
|
||||||
@ -2756,7 +2757,7 @@ var nfcValues = [2944]uint16{
|
|||||||
0x0236: 0x8001, 0x0237: 0x8001, 0x0238: 0x8601, 0x0239: 0x80dc, 0x023a: 0x80dc, 0x023b: 0x80dc,
|
0x0236: 0x8001, 0x0237: 0x8001, 0x0238: 0x8601, 0x0239: 0x80dc, 0x023a: 0x80dc, 0x023b: 0x80dc,
|
||||||
0x023c: 0x80dc, 0x023d: 0x80e6, 0x023e: 0x80e6, 0x023f: 0x80e6,
|
0x023c: 0x80dc, 0x023d: 0x80e6, 0x023e: 0x80e6, 0x023f: 0x80e6,
|
||||||
// Block 0x9, offset 0x240
|
// Block 0x9, offset 0x240
|
||||||
0x0240: 0x4965, 0x0241: 0x496a, 0x0242: 0x86e6, 0x0243: 0x496f, 0x0244: 0x4980, 0x0245: 0x86f0,
|
0x0240: 0x4965, 0x0241: 0x496a, 0x0242: 0x86e6, 0x0243: 0x496f, 0x0244: 0x4974, 0x0245: 0x86f0,
|
||||||
0x0246: 0x80e6, 0x0247: 0x80dc, 0x0248: 0x80dc, 0x0249: 0x80dc, 0x024a: 0x80e6, 0x024b: 0x80e6,
|
0x0246: 0x80e6, 0x0247: 0x80dc, 0x0248: 0x80dc, 0x0249: 0x80dc, 0x024a: 0x80e6, 0x024b: 0x80e6,
|
||||||
0x024c: 0x80e6, 0x024d: 0x80dc, 0x024e: 0x80dc, 0x0250: 0x80e6, 0x0251: 0x80e6,
|
0x024c: 0x80e6, 0x024d: 0x80dc, 0x024e: 0x80dc, 0x0250: 0x80e6, 0x0251: 0x80e6,
|
||||||
0x0252: 0x80e6, 0x0253: 0x80dc, 0x0254: 0x80dc, 0x0255: 0x80dc, 0x0256: 0x80dc, 0x0257: 0x80e6,
|
0x0252: 0x80e6, 0x0253: 0x80dc, 0x0254: 0x80dc, 0x0255: 0x80dc, 0x0256: 0x80dc, 0x0257: 0x80e6,
|
||||||
@ -3903,7 +3904,7 @@ var nfkcValues = [5568]uint16{
|
|||||||
0x0236: 0x8001, 0x0237: 0x8001, 0x0238: 0x8601, 0x0239: 0x80dc, 0x023a: 0x80dc, 0x023b: 0x80dc,
|
0x0236: 0x8001, 0x0237: 0x8001, 0x0238: 0x8601, 0x0239: 0x80dc, 0x023a: 0x80dc, 0x023b: 0x80dc,
|
||||||
0x023c: 0x80dc, 0x023d: 0x80e6, 0x023e: 0x80e6, 0x023f: 0x80e6,
|
0x023c: 0x80dc, 0x023d: 0x80e6, 0x023e: 0x80e6, 0x023f: 0x80e6,
|
||||||
// Block 0x9, offset 0x240
|
// Block 0x9, offset 0x240
|
||||||
0x0240: 0x4965, 0x0241: 0x496a, 0x0242: 0x86e6, 0x0243: 0x496f, 0x0244: 0x4980, 0x0245: 0x86f0,
|
0x0240: 0x4965, 0x0241: 0x496a, 0x0242: 0x86e6, 0x0243: 0x496f, 0x0244: 0x4974, 0x0245: 0x86f0,
|
||||||
0x0246: 0x80e6, 0x0247: 0x80dc, 0x0248: 0x80dc, 0x0249: 0x80dc, 0x024a: 0x80e6, 0x024b: 0x80e6,
|
0x0246: 0x80e6, 0x0247: 0x80dc, 0x0248: 0x80dc, 0x0249: 0x80dc, 0x024a: 0x80e6, 0x024b: 0x80e6,
|
||||||
0x024c: 0x80e6, 0x024d: 0x80dc, 0x024e: 0x80dc, 0x0250: 0x80e6, 0x0251: 0x80e6,
|
0x024c: 0x80e6, 0x024d: 0x80dc, 0x024e: 0x80dc, 0x0250: 0x80e6, 0x0251: 0x80e6,
|
||||||
0x0252: 0x80e6, 0x0253: 0x80dc, 0x0254: 0x80dc, 0x0255: 0x80dc, 0x0256: 0x80dc, 0x0257: 0x80e6,
|
0x0252: 0x80e6, 0x0253: 0x80dc, 0x0254: 0x80dc, 0x0255: 0x80dc, 0x0256: 0x80dc, 0x0257: 0x80e6,
|
||||||
@ -4609,7 +4610,7 @@ var nfkcValues = [5568]uint16{
|
|||||||
0x124c: 0x0a89, 0x124d: 0x0a8d, 0x124e: 0x0a91, 0x124f: 0x0a95, 0x1250: 0x0a99, 0x1251: 0x0a9d,
|
0x124c: 0x0a89, 0x124d: 0x0a8d, 0x124e: 0x0a91, 0x124f: 0x0a95, 0x1250: 0x0a99, 0x1251: 0x0a9d,
|
||||||
0x1252: 0x0aa1, 0x1253: 0x0aa5, 0x1254: 0x0aad, 0x1255: 0x0ab5, 0x1256: 0x0abd, 0x1257: 0x0ac1,
|
0x1252: 0x0aa1, 0x1253: 0x0aa5, 0x1254: 0x0aad, 0x1255: 0x0ab5, 0x1256: 0x0abd, 0x1257: 0x0ac1,
|
||||||
0x1258: 0x0ac5, 0x1259: 0x0ac9, 0x125a: 0x0acd, 0x125b: 0x0ad1, 0x125c: 0x0ad5, 0x125d: 0x0ae5,
|
0x1258: 0x0ac5, 0x1259: 0x0ac9, 0x125a: 0x0acd, 0x125b: 0x0ad1, 0x125c: 0x0ad5, 0x125d: 0x0ae5,
|
||||||
0x125e: 0x4974, 0x125f: 0x497a, 0x1260: 0x0889, 0x1261: 0x07d9, 0x1262: 0x07dd, 0x1263: 0x0901,
|
0x125e: 0x497b, 0x125f: 0x4981, 0x1260: 0x0889, 0x1261: 0x07d9, 0x1262: 0x07dd, 0x1263: 0x0901,
|
||||||
0x1264: 0x07e1, 0x1265: 0x0905, 0x1266: 0x0909, 0x1267: 0x07e5, 0x1268: 0x07e9, 0x1269: 0x07ed,
|
0x1264: 0x07e1, 0x1265: 0x0905, 0x1266: 0x0909, 0x1267: 0x07e5, 0x1268: 0x07e9, 0x1269: 0x07ed,
|
||||||
0x126a: 0x090d, 0x126b: 0x0911, 0x126c: 0x0915, 0x126d: 0x0919, 0x126e: 0x091d, 0x126f: 0x0921,
|
0x126a: 0x090d, 0x126b: 0x0911, 0x126c: 0x0915, 0x126d: 0x0919, 0x126e: 0x091d, 0x126f: 0x0921,
|
||||||
0x1270: 0x082d, 0x1271: 0x07f1, 0x1272: 0x07f5, 0x1273: 0x07f9, 0x1274: 0x0841, 0x1275: 0x07fd,
|
0x1270: 0x082d, 0x1271: 0x07f1, 0x1272: 0x07f5, 0x1273: 0x07f9, 0x1274: 0x0841, 0x1275: 0x07fd,
|
||||||
|
Loading…
Reference in New Issue
Block a user