1
0
mirror of https://github.com/golang/go synced 2024-11-23 04:00:03 -07:00

unicode: update to Unicode 10.0.0

Also includes all derived values as well as
vendored packages.

Generated by running
    UNICODE_VERSION=10.0.0 go generate
in golang.org/x/text

and modified by hand to add the tests and
entries in next.txt for new script and properties.

Closes Issue #21471

Change-Id: I1d10ee3887bd1fd3d5a756ee0d04bd6ec2814ba1
Reviewed-on: https://go-review.googlesource.com/63953
Run-TryBot: Marcel van Lohuizen <mpvl@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Andrew Bonventre <andybons@golang.org>
This commit is contained in:
Marcel van Lohuizen 2017-09-15 16:18:01 +02:00
parent 8c532f5fc4
commit 2fd9549797
16 changed files with 4812 additions and 4445 deletions

View File

@ -344,3 +344,4 @@ pkg syscall (openbsd-386), const SYS_KILL = 37
pkg syscall (openbsd-386-cgo), const SYS_KILL = 37
pkg syscall (openbsd-amd64), const SYS_KILL = 37
pkg syscall (openbsd-amd64-cgo), const SYS_KILL = 37
pkg unicode, const Version = "9.0.0"

View File

@ -1 +1,7 @@
pkg math/big, const MaxBase = 62
pkg unicode, const Version = "10.0.0"
pkg unicode, var Masaram_Gondi *RangeTable
pkg unicode, var Nushu *RangeTable
pkg unicode, var Soyombo *RangeTable
pkg unicode, var Zanabazar_Square *RangeTable
pkg unicode, var Regional_Indicator *RangeTable

View File

@ -7,7 +7,7 @@
package strconv
// (462+139+82)*2 + (378)*4 = 2878 bytes
// (456+140+86)*2 + (396)*4 = 2948 bytes
var isPrint16 = []uint16{
0x0020, 0x007e,
@ -25,7 +25,7 @@ var isPrint16 = []uint16{
0x07c0, 0x07fa,
0x0800, 0x082d,
0x0830, 0x085b,
0x085e, 0x085e,
0x085e, 0x086a,
0x08a0, 0x08bd,
0x08d4, 0x098c,
0x098f, 0x0990,
@ -36,7 +36,7 @@ var isPrint16 = []uint16{
0x09cb, 0x09ce,
0x09d7, 0x09d7,
0x09dc, 0x09e3,
0x09e6, 0x09fb,
0x09e6, 0x09fd,
0x0a01, 0x0a0a,
0x0a0f, 0x0a10,
0x0a13, 0x0a39,
@ -51,8 +51,7 @@ var isPrint16 = []uint16{
0x0ad0, 0x0ad0,
0x0ae0, 0x0ae3,
0x0ae6, 0x0af1,
0x0af9, 0x0af9,
0x0b01, 0x0b0c,
0x0af9, 0x0b0c,
0x0b0f, 0x0b10,
0x0b13, 0x0b39,
0x0b3c, 0x0b44,
@ -82,8 +81,7 @@ var isPrint16 = []uint16{
0x0cd5, 0x0cd6,
0x0cde, 0x0ce3,
0x0ce6, 0x0cf2,
0x0d01, 0x0d3a,
0x0d3d, 0x0d4f,
0x0d00, 0x0d4f,
0x0d54, 0x0d63,
0x0d66, 0x0d7f,
0x0d82, 0x0d96,
@ -154,8 +152,7 @@ var isPrint16 = []uint16{
0x1c4d, 0x1c88,
0x1cc0, 0x1cc7,
0x1cd0, 0x1cf9,
0x1d00, 0x1df5,
0x1dfb, 0x1f15,
0x1d00, 0x1f15,
0x1f18, 0x1f1d,
0x1f20, 0x1f45,
0x1f48, 0x1f4d,
@ -167,7 +164,7 @@ var isPrint16 = []uint16{
0x2030, 0x205e,
0x2070, 0x2071,
0x2074, 0x209c,
0x20a0, 0x20be,
0x20a0, 0x20bf,
0x20d0, 0x20f0,
0x2100, 0x218b,
0x2190, 0x2426,
@ -175,7 +172,7 @@ var isPrint16 = []uint16{
0x2460, 0x2b73,
0x2b76, 0x2b95,
0x2b98, 0x2bb9,
0x2bbd, 0x2bd1,
0x2bbd, 0x2bd2,
0x2bec, 0x2bef,
0x2c00, 0x2cf3,
0x2cf9, 0x2d27,
@ -183,17 +180,17 @@ var isPrint16 = []uint16{
0x2d30, 0x2d67,
0x2d6f, 0x2d70,
0x2d7f, 0x2d96,
0x2da0, 0x2e44,
0x2da0, 0x2e49,
0x2e80, 0x2ef3,
0x2f00, 0x2fd5,
0x2ff0, 0x2ffb,
0x3001, 0x3096,
0x3099, 0x30ff,
0x3105, 0x312d,
0x3105, 0x312e,
0x3131, 0x31ba,
0x31c0, 0x31e3,
0x31f0, 0x4db5,
0x4dc0, 0x9fd5,
0x4dc0, 0x9fea,
0xa000, 0xa48c,
0xa490, 0xa4c6,
0xa4d0, 0xa62b,
@ -254,6 +251,7 @@ var isNotPrint16 = []uint16{
0x0590,
0x06dd,
0x083f,
0x085f,
0x08b5,
0x08e2,
0x0984,
@ -275,6 +273,7 @@ var isNotPrint16 = []uint16{
0x0ab4,
0x0ac6,
0x0aca,
0x0b00,
0x0b04,
0x0b29,
0x0b31,
@ -341,7 +340,7 @@ var isNotPrint16 = []uint16{
0x1771,
0x191f,
0x1a5f,
0x1cf7,
0x1dfa,
0x1f58,
0x1f5a,
0x1f5c,
@ -351,7 +350,6 @@ var isNotPrint16 = []uint16{
0x1fdc,
0x1ff5,
0x208f,
0x23ff,
0x2bc9,
0x2c2f,
0x2c5f,
@ -398,7 +396,7 @@ var isPrint32 = []uint32{
0x0102a0, 0x0102d0,
0x0102e0, 0x0102fb,
0x010300, 0x010323,
0x010330, 0x01034a,
0x01032d, 0x01034a,
0x010350, 0x01037a,
0x010380, 0x0103c3,
0x0103c8, 0x0103d5,
@ -481,11 +479,17 @@ var isPrint32 = []uint32{
0x011730, 0x01173f,
0x0118a0, 0x0118f2,
0x0118ff, 0x0118ff,
0x011a00, 0x011a47,
0x011a50, 0x011a83,
0x011a86, 0x011aa2,
0x011ac0, 0x011af8,
0x011c00, 0x011c45,
0x011c50, 0x011c6c,
0x011c70, 0x011c8f,
0x011c92, 0x011cb6,
0x011d00, 0x011d36,
0x011d3a, 0x011d47,
0x011d50, 0x011d59,
0x012000, 0x012399,
0x012400, 0x012474,
0x012480, 0x012543,
@ -502,10 +506,11 @@ var isPrint32 = []uint32{
0x016f00, 0x016f44,
0x016f50, 0x016f7e,
0x016f8f, 0x016f9f,
0x016fe0, 0x016fe0,
0x016fe0, 0x016fe1,
0x017000, 0x0187ec,
0x018800, 0x018af2,
0x01b000, 0x01b001,
0x01b000, 0x01b11e,
0x01b170, 0x01b2fb,
0x01bc00, 0x01bc6a,
0x01bc70, 0x01bc7c,
0x01bc80, 0x01bc88,
@ -553,9 +558,10 @@ var isPrint32 = []uint32{
0x01f210, 0x01f23b,
0x01f240, 0x01f248,
0x01f250, 0x01f251,
0x01f300, 0x01f6d2,
0x01f260, 0x01f265,
0x01f300, 0x01f6d4,
0x01f6e0, 0x01f6ec,
0x01f6f0, 0x01f6f6,
0x01f6f0, 0x01f6f8,
0x01f700, 0x01f773,
0x01f780, 0x01f7d4,
0x01f800, 0x01f80b,
@ -563,16 +569,17 @@ var isPrint32 = []uint32{
0x01f850, 0x01f859,
0x01f860, 0x01f887,
0x01f890, 0x01f8ad,
0x01f910, 0x01f927,
0x01f930, 0x01f930,
0x01f933, 0x01f94b,
0x01f950, 0x01f95e,
0x01f980, 0x01f991,
0x01f900, 0x01f90b,
0x01f910, 0x01f94c,
0x01f950, 0x01f96b,
0x01f980, 0x01f997,
0x01f9c0, 0x01f9c0,
0x01f9d0, 0x01f9e6,
0x020000, 0x02a6d6,
0x02a700, 0x02b734,
0x02b740, 0x02b81d,
0x02b820, 0x02cea1,
0x02ceb0, 0x02ebe0,
0x02f800, 0x02fa1d,
0x0e0100, 0x0e01ef,
}
@ -605,9 +612,14 @@ var isNotPrint32 = []uint16{ // add 0x10000 to each entry
0x1334,
0x145a,
0x145c,
0x1a9d,
0x1c09,
0x1c37,
0x1ca8,
0x1d07,
0x1d0a,
0x1d3b,
0x1d3e,
0x246f,
0x6a5f,
0x6b5a,
@ -658,7 +670,6 @@ var isNotPrint32 = []uint16{ // add 0x10000 to each entry
0xf0c0,
0xf0d0,
0xf12f,
0xf91f,
0xf93f,
}

View File

@ -14,8 +14,13 @@ type T struct {
script string
}
// Hand-chosen tests from Unicode 5.1.0, 6.0.0, 6.2.0, 6.3.0, 7.0.0 and 8.0.0
// Hand-chosen tests from Unicode 5.1.0, 6.0.0, 6.2.0, 6.3.0, 7.0.0, 8.0.0,
// 9.0.0, 10.0.0.
// mostly to discover when new scripts and categories arise.
// If this tests fails, add the missing scripts to the test and add entries
// of the form
// pkg unicode, var <new script> *RangeTable
// to api/next.txt.
var inTest = []T{
{0x11711, "Ahom"},
{0x1e900, "Adlam"},
@ -92,6 +97,7 @@ var inTest = []T{
{0x0843, "Mandaic"},
{0x10ac8, "Manichaean"},
{0x11cB6, "Marchen"},
{0x11d59, "Masaram_Gondi"},
{0xabd0, "Meetei_Mayek"},
{0x1e800, "Mende_Kikakui"},
{0x1099f, "Meroitic_Hieroglyphs"},
@ -106,6 +112,7 @@ var inTest = []T{
{0x11400, "Newa"},
{0x19c3, "New_Tai_Lue"},
{0x07f8, "Nko"},
{0x1b170, "Nushu"},
{0x169b, "Ogham"},
{0x1c6a, "Ol_Chiki"},
{0x10C80, "Old_Hungarian"},
@ -134,6 +141,7 @@ var inTest = []T{
{0x1D920, "SignWriting"},
{0x0dbd, "Sinhala"},
{0x110d0, "Sora_Sompeng"},
{0x11a99, "Soyombo"},
{0x1ba3, "Sundanese"},
{0xa803, "Syloti_Nagri"},
{0x070f, "Syriac"},
@ -155,6 +163,7 @@ var inTest = []T{
{0xa60e, "Vai"},
{0x118ff, "Warang_Citi"},
{0xa216, "Yi"},
{0x11a0a, "Zanabazar_Square"},
}
var outTest = []T{ // not really worth being thorough
@ -229,6 +238,7 @@ var inPropTest = []T{
{0x06DD, "Prepended_Concatenation_Mark"},
{0x300D, "Quotation_Mark"},
{0x2EF3, "Radical"},
{0x1f1ff, "Regional_Indicator"},
{0x061F, "STerm"}, // Deprecated alias of Sentence_Terminal
{0x061F, "Sentence_Terminal"},
{0x2071, "Soft_Dotted"},

View File

@ -4,12 +4,12 @@
// Code generated by maketables; DO NOT EDIT.
// To regenerate, run:
// maketables --tables=all --data=http://www.unicode.org/Public/9.0.0/ucd/UnicodeData.txt --casefolding=http://www.unicode.org/Public/9.0.0/ucd/CaseFolding.txt
// maketables --tables=all --data=http://www.unicode.org/Public/10.0.0/ucd/UnicodeData.txt --casefolding=http://www.unicode.org/Public/10.0.0/ucd/CaseFolding.txt
package unicode
// Version is the Unicode edition from which the tables are derived.
const Version = "9.0.0"
const Version = "10.0.0"
// Categories is the set of Unicode category tables.
var Categories = map[string]*RangeTable{
@ -172,6 +172,7 @@ var _L = &RangeTable{
{0x081a, 0x0824, 10},
{0x0828, 0x0840, 24},
{0x0841, 0x0858, 1},
{0x0860, 0x086a, 1},
{0x08a0, 0x08b4, 1},
{0x08b6, 0x08bd, 1},
{0x0904, 0x0939, 1},
@ -188,7 +189,8 @@ var _L = &RangeTable{
{0x09dc, 0x09dd, 1},
{0x09df, 0x09e1, 1},
{0x09f0, 0x09f1, 1},
{0x0a05, 0x0a0a, 1},
{0x09fc, 0x0a05, 9},
{0x0a06, 0x0a0a, 1},
{0x0a0f, 0x0a10, 1},
{0x0a13, 0x0a28, 1},
{0x0a2a, 0x0a30, 1},
@ -403,12 +405,12 @@ var _L = &RangeTable{
{0x309d, 0x309f, 1},
{0x30a1, 0x30fa, 1},
{0x30fc, 0x30ff, 1},
{0x3105, 0x312d, 1},
{0x3105, 0x312e, 1},
{0x3131, 0x318e, 1},
{0x31a0, 0x31ba, 1},
{0x31f0, 0x31ff, 1},
{0x3400, 0x4db5, 1},
{0x4e00, 0x9fd5, 1},
{0x4e00, 0x9fea, 1},
{0xa000, 0xa48c, 1},
{0xa4d0, 0xa4fd, 1},
{0xa500, 0xa60c, 1},
@ -498,7 +500,7 @@ var _L = &RangeTable{
{0x10280, 0x1029c, 1},
{0x102a0, 0x102d0, 1},
{0x10300, 0x1031f, 1},
{0x10330, 0x10340, 1},
{0x1032d, 0x10340, 1},
{0x10342, 0x10349, 1},
{0x10350, 0x10375, 1},
{0x10380, 0x1039d, 1},
@ -578,13 +580,21 @@ var _L = &RangeTable{
{0x11681, 0x116aa, 1},
{0x11700, 0x11719, 1},
{0x118a0, 0x118df, 1},
{0x118ff, 0x11ac0, 449},
{0x11ac1, 0x11af8, 1},
{0x118ff, 0x11a00, 257},
{0x11a0b, 0x11a32, 1},
{0x11a3a, 0x11a50, 22},
{0x11a5c, 0x11a83, 1},
{0x11a86, 0x11a89, 1},
{0x11ac0, 0x11af8, 1},
{0x11c00, 0x11c08, 1},
{0x11c0a, 0x11c2e, 1},
{0x11c40, 0x11c72, 50},
{0x11c73, 0x11c8f, 1},
{0x12000, 0x12399, 1},
{0x11d00, 0x11d06, 1},
{0x11d08, 0x11d09, 1},
{0x11d0b, 0x11d30, 1},
{0x11d46, 0x12000, 698},
{0x12001, 0x12399, 1},
{0x12480, 0x12543, 1},
{0x13000, 0x1342e, 1},
{0x14400, 0x14646, 1},
@ -598,10 +608,11 @@ var _L = &RangeTable{
{0x16f00, 0x16f44, 1},
{0x16f50, 0x16f93, 67},
{0x16f94, 0x16f9f, 1},
{0x16fe0, 0x17000, 32},
{0x17001, 0x187ec, 1},
{0x16fe0, 0x16fe1, 1},
{0x17000, 0x187ec, 1},
{0x18800, 0x18af2, 1},
{0x1b000, 0x1b001, 1},
{0x1b000, 0x1b11e, 1},
{0x1b170, 0x1b2fb, 1},
{0x1bc00, 0x1bc6a, 1},
{0x1bc70, 0x1bc7c, 1},
{0x1bc80, 0x1bc88, 1},
@ -666,6 +677,7 @@ var _L = &RangeTable{
{0x2a700, 0x2b734, 1},
{0x2b740, 0x2b81d, 1},
{0x2b820, 0x2cea1, 1},
{0x2ceb0, 0x2ebe0, 1},
{0x2f800, 0x2fa1d, 1},
},
LatinOffset: 6,
@ -876,7 +888,7 @@ var _Lm = &RangeTable{
{0x16b40, 0x16b40, 1},
{0x16b41, 0x16b43, 1},
{0x16f93, 0x16f9f, 1},
{0x16fe0, 0x16fe0, 1},
{0x16fe0, 0x16fe1, 1},
},
}
@ -902,6 +914,7 @@ var _Lo = &RangeTable{
{0x07cb, 0x07ea, 1},
{0x0800, 0x0815, 1},
{0x0840, 0x0858, 1},
{0x0860, 0x086a, 1},
{0x08a0, 0x08b4, 1},
{0x08b6, 0x08bd, 1},
{0x0904, 0x0939, 1},
@ -918,7 +931,8 @@ var _Lo = &RangeTable{
{0x09dc, 0x09dd, 1},
{0x09df, 0x09e1, 1},
{0x09f0, 0x09f1, 1},
{0x0a05, 0x0a0a, 1},
{0x09fc, 0x0a05, 9},
{0x0a06, 0x0a0a, 1},
{0x0a0f, 0x0a10, 1},
{0x0a13, 0x0a28, 1},
{0x0a2a, 0x0a30, 1},
@ -1086,12 +1100,12 @@ var _Lo = &RangeTable{
{0x309f, 0x30a1, 2},
{0x30a2, 0x30fa, 1},
{0x30ff, 0x3105, 6},
{0x3106, 0x312d, 1},
{0x3106, 0x312e, 1},
{0x3131, 0x318e, 1},
{0x31a0, 0x31ba, 1},
{0x31f0, 0x31ff, 1},
{0x3400, 0x4db5, 1},
{0x4e00, 0x9fd5, 1},
{0x4e00, 0x9fea, 1},
{0xa000, 0xa014, 1},
{0xa016, 0xa48c, 1},
{0xa4d0, 0xa4f7, 1},
@ -1174,7 +1188,7 @@ var _Lo = &RangeTable{
{0x10280, 0x1029c, 1},
{0x102a0, 0x102d0, 1},
{0x10300, 0x1031f, 1},
{0x10330, 0x10340, 1},
{0x1032d, 0x10340, 1},
{0x10342, 0x10349, 1},
{0x10350, 0x10375, 1},
{0x10380, 0x1039d, 1},
@ -1249,13 +1263,21 @@ var _Lo = &RangeTable{
{0x11644, 0x11680, 60},
{0x11681, 0x116aa, 1},
{0x11700, 0x11719, 1},
{0x118ff, 0x11ac0, 449},
{0x11ac1, 0x11af8, 1},
{0x118ff, 0x11a00, 257},
{0x11a0b, 0x11a32, 1},
{0x11a3a, 0x11a50, 22},
{0x11a5c, 0x11a83, 1},
{0x11a86, 0x11a89, 1},
{0x11ac0, 0x11af8, 1},
{0x11c00, 0x11c08, 1},
{0x11c0a, 0x11c2e, 1},
{0x11c40, 0x11c72, 50},
{0x11c73, 0x11c8f, 1},
{0x12000, 0x12399, 1},
{0x11d00, 0x11d06, 1},
{0x11d08, 0x11d09, 1},
{0x11d0b, 0x11d30, 1},
{0x11d46, 0x12000, 698},
{0x12001, 0x12399, 1},
{0x12480, 0x12543, 1},
{0x13000, 0x1342e, 1},
{0x14400, 0x14646, 1},
@ -1269,7 +1291,8 @@ var _Lo = &RangeTable{
{0x16f50, 0x17000, 176},
{0x17001, 0x187ec, 1},
{0x18800, 0x18af2, 1},
{0x1b000, 0x1b001, 1},
{0x1b000, 0x1b11e, 1},
{0x1b170, 0x1b2fb, 1},
{0x1bc00, 0x1bc6a, 1},
{0x1bc70, 0x1bc7c, 1},
{0x1bc80, 0x1bc88, 1},
@ -1303,6 +1326,7 @@ var _Lo = &RangeTable{
{0x2a700, 0x2b734, 1},
{0x2b740, 0x2b81d, 1},
{0x2b820, 0x2cea1, 1},
{0x2ceb0, 0x2ebe0, 1},
{0x2f800, 0x2fa1d, 1},
},
LatinOffset: 1,
@ -1516,6 +1540,7 @@ var _M = &RangeTable{
{0x0ac7, 0x0ac9, 1},
{0x0acb, 0x0acd, 1},
{0x0ae2, 0x0ae3, 1},
{0x0afa, 0x0aff, 1},
{0x0b01, 0x0b03, 1},
{0x0b3c, 0x0b3e, 2},
{0x0b3f, 0x0b44, 1},
@ -1541,7 +1566,8 @@ var _M = &RangeTable{
{0x0cca, 0x0ccd, 1},
{0x0cd5, 0x0cd6, 1},
{0x0ce2, 0x0ce3, 1},
{0x0d01, 0x0d03, 1},
{0x0d00, 0x0d03, 1},
{0x0d3b, 0x0d3c, 1},
{0x0d3e, 0x0d44, 1},
{0x0d46, 0x0d48, 1},
{0x0d4a, 0x0d4d, 1},
@ -1604,8 +1630,8 @@ var _M = &RangeTable{
{0x1cd4, 0x1ce8, 1},
{0x1ced, 0x1cf2, 5},
{0x1cf3, 0x1cf4, 1},
{0x1cf8, 0x1cf9, 1},
{0x1dc0, 0x1df5, 1},
{0x1cf7, 0x1cf9, 1},
{0x1dc0, 0x1df9, 1},
{0x1dfb, 0x1dff, 1},
{0x20d0, 0x20f0, 1},
{0x2cef, 0x2cf1, 1},
@ -1684,11 +1710,22 @@ var _M = &RangeTable{
{0x11630, 0x11640, 1},
{0x116ab, 0x116b7, 1},
{0x1171d, 0x1172b, 1},
{0x11a01, 0x11a0a, 1},
{0x11a33, 0x11a39, 1},
{0x11a3b, 0x11a3e, 1},
{0x11a47, 0x11a51, 10},
{0x11a52, 0x11a5b, 1},
{0x11a8a, 0x11a99, 1},
{0x11c2f, 0x11c36, 1},
{0x11c38, 0x11c3f, 1},
{0x11c92, 0x11ca7, 1},
{0x11ca9, 0x11cb6, 1},
{0x16af0, 0x16af4, 1},
{0x11d31, 0x11d36, 1},
{0x11d3a, 0x11d3c, 2},
{0x11d3d, 0x11d3f, 2},
{0x11d40, 0x11d45, 1},
{0x11d47, 0x16af0, 19881},
{0x16af1, 0x16af4, 1},
{0x16b30, 0x16b36, 1},
{0x16f51, 0x16f7e, 1},
{0x16f8f, 0x16f92, 1},
@ -1794,11 +1831,12 @@ var _Mc = &RangeTable{
{0x1c25, 0x1c2b, 1},
{0x1c34, 0x1c35, 1},
{0x1ce1, 0x1cf2, 17},
{0x1cf3, 0x302e, 4923},
{0x302f, 0xa823, 30708},
{0xa824, 0xa827, 3},
{0xa880, 0xa881, 1},
{0xa8b4, 0xa8c3, 1},
{0x1cf3, 0x1cf7, 4},
{0x302e, 0x302f, 1},
{0xa823, 0xa824, 1},
{0xa827, 0xa880, 89},
{0xa881, 0xa8b4, 51},
{0xa8b5, 0xa8c3, 1},
{0xa952, 0xa953, 1},
{0xa983, 0xa9b4, 49},
{0xa9b5, 0xa9ba, 5},
@ -1849,6 +1887,9 @@ var _Mc = &RangeTable{
{0x116ae, 0x116af, 1},
{0x116b6, 0x11720, 106},
{0x11721, 0x11726, 5},
{0x11a07, 0x11a08, 1},
{0x11a39, 0x11a57, 30},
{0x11a58, 0x11a97, 63},
{0x11c2f, 0x11c3e, 15},
{0x11ca9, 0x11cb1, 8},
{0x11cb4, 0x16f51, 21149},
@ -1914,9 +1955,11 @@ var _Mn = &RangeTable{
{0x0ac2, 0x0ac5, 1},
{0x0ac7, 0x0ac8, 1},
{0x0acd, 0x0ae2, 21},
{0x0ae3, 0x0b01, 30},
{0x0b3c, 0x0b3f, 3},
{0x0b41, 0x0b44, 1},
{0x0ae3, 0x0afa, 23},
{0x0afb, 0x0aff, 1},
{0x0b01, 0x0b3c, 59},
{0x0b3f, 0x0b41, 2},
{0x0b42, 0x0b44, 1},
{0x0b4d, 0x0b56, 9},
{0x0b62, 0x0b63, 1},
{0x0b82, 0x0bc0, 62},
@ -1930,8 +1973,9 @@ var _Mn = &RangeTable{
{0x0cbf, 0x0cc6, 7},
{0x0ccc, 0x0ccd, 1},
{0x0ce2, 0x0ce3, 1},
{0x0d01, 0x0d41, 64},
{0x0d42, 0x0d44, 1},
{0x0d00, 0x0d01, 1},
{0x0d3b, 0x0d3c, 1},
{0x0d41, 0x0d44, 1},
{0x0d4d, 0x0d62, 21},
{0x0d63, 0x0dca, 103},
{0x0dd2, 0x0dd4, 1},
@ -2004,7 +2048,7 @@ var _Mn = &RangeTable{
{0x1ce2, 0x1ce8, 1},
{0x1ced, 0x1cf4, 7},
{0x1cf8, 0x1cf9, 1},
{0x1dc0, 0x1df5, 1},
{0x1dc0, 0x1df9, 1},
{0x1dfb, 0x1dff, 1},
{0x20d0, 0x20dc, 1},
{0x20e1, 0x20e5, 4},
@ -2093,6 +2137,15 @@ var _Mn = &RangeTable{
{0x1171e, 0x1171f, 1},
{0x11722, 0x11725, 1},
{0x11727, 0x1172b, 1},
{0x11a01, 0x11a06, 1},
{0x11a09, 0x11a0a, 1},
{0x11a33, 0x11a38, 1},
{0x11a3b, 0x11a3e, 1},
{0x11a47, 0x11a51, 10},
{0x11a52, 0x11a56, 1},
{0x11a59, 0x11a5b, 1},
{0x11a8a, 0x11a96, 1},
{0x11a98, 0x11a99, 1},
{0x11c30, 0x11c36, 1},
{0x11c38, 0x11c3d, 1},
{0x11c3f, 0x11c92, 83},
@ -2100,7 +2153,12 @@ var _Mn = &RangeTable{
{0x11caa, 0x11cb0, 1},
{0x11cb2, 0x11cb3, 1},
{0x11cb5, 0x11cb6, 1},
{0x16af0, 0x16af4, 1},
{0x11d31, 0x11d36, 1},
{0x11d3a, 0x11d3c, 2},
{0x11d3d, 0x11d3f, 2},
{0x11d40, 0x11d45, 1},
{0x11d47, 0x16af0, 19881},
{0x16af1, 0x16af4, 1},
{0x16b30, 0x16b36, 1},
{0x16f8f, 0x16f92, 1},
{0x1bc9d, 0x1bc9e, 1},
@ -2233,6 +2291,7 @@ var _N = &RangeTable{
{0x11730, 0x1173b, 1},
{0x118e0, 0x118f2, 1},
{0x11c50, 0x11c6c, 1},
{0x11d50, 0x11d59, 1},
{0x12400, 0x1246e, 1},
{0x16a60, 0x16a69, 1},
{0x16b50, 0x16b59, 1},
@ -2300,6 +2359,7 @@ var _Nd = &RangeTable{
{0x11730, 0x11739, 1},
{0x118e0, 0x118e9, 1},
{0x11c50, 0x11c59, 1},
{0x11d50, 0x11d59, 1},
{0x16a60, 0x16a69, 1},
{0x16b50, 0x16b59, 1},
{0x1d7ce, 0x1d7ff, 1},
@ -2423,10 +2483,10 @@ var _P = &RangeTable{
{0x0830, 0x083e, 1},
{0x085e, 0x0964, 262},
{0x0965, 0x0970, 11},
{0x0af0, 0x0df4, 772},
{0x0e4f, 0x0e5a, 11},
{0x0e5b, 0x0f04, 169},
{0x0f05, 0x0f12, 1},
{0x09fd, 0x0af0, 243},
{0x0df4, 0x0e4f, 91},
{0x0e5a, 0x0e5b, 1},
{0x0f04, 0x0f12, 1},
{0x0f14, 0x0f3a, 38},
{0x0f3b, 0x0f3d, 1},
{0x0f85, 0x0fd0, 75},
@ -2471,7 +2531,7 @@ var _P = &RangeTable{
{0x2cfe, 0x2cff, 1},
{0x2d70, 0x2e00, 144},
{0x2e01, 0x2e2e, 1},
{0x2e30, 0x2e44, 1},
{0x2e30, 0x2e49, 1},
{0x3001, 0x3003, 1},
{0x3008, 0x3011, 1},
{0x3014, 0x301f, 1},
@ -2535,6 +2595,9 @@ var _P = &RangeTable{
{0x11641, 0x11643, 1},
{0x11660, 0x1166c, 1},
{0x1173c, 0x1173e, 1},
{0x11a3f, 0x11a46, 1},
{0x11a9a, 0x11a9c, 1},
{0x11a9e, 0x11aa2, 1},
{0x11c41, 0x11c45, 1},
{0x11c70, 0x11c71, 1},
{0x12470, 0x12474, 1},
@ -2650,10 +2713,10 @@ var _Po = &RangeTable{
{0x0830, 0x083e, 1},
{0x085e, 0x0964, 262},
{0x0965, 0x0970, 11},
{0x0af0, 0x0df4, 772},
{0x0e4f, 0x0e5a, 11},
{0x0e5b, 0x0f04, 169},
{0x0f05, 0x0f12, 1},
{0x09fd, 0x0af0, 243},
{0x0df4, 0x0e4f, 91},
{0x0e5a, 0x0e5b, 1},
{0x0f04, 0x0f12, 1},
{0x0f14, 0x0f85, 113},
{0x0fd0, 0x0fd4, 1},
{0x0fd9, 0x0fda, 1},
@ -2699,8 +2762,8 @@ var _Po = &RangeTable{
{0x2e30, 0x2e39, 1},
{0x2e3c, 0x2e3f, 1},
{0x2e41, 0x2e43, 2},
{0x2e44, 0x3001, 445},
{0x3002, 0x3003, 1},
{0x2e44, 0x2e49, 1},
{0x3001, 0x3003, 1},
{0x303d, 0x30fb, 190},
{0xa4fe, 0xa4ff, 1},
{0xa60d, 0xa60f, 1},
@ -2763,6 +2826,9 @@ var _Po = &RangeTable{
{0x11641, 0x11643, 1},
{0x11660, 0x1166c, 1},
{0x1173c, 0x1173e, 1},
{0x11a3f, 0x11a46, 1},
{0x11a9a, 0x11a9c, 1},
{0x11a9e, 0x11aa2, 1},
{0x11c41, 0x11c45, 1},
{0x11c70, 0x11c71, 1},
{0x12470, 0x12474, 1},
@ -2863,7 +2929,7 @@ var _S = &RangeTable{
{0x2044, 0x2052, 14},
{0x207a, 0x207c, 1},
{0x208a, 0x208c, 1},
{0x20a0, 0x20be, 1},
{0x20a0, 0x20bf, 1},
{0x2100, 0x2101, 1},
{0x2103, 0x2106, 1},
{0x2108, 0x2109, 1},
@ -2879,8 +2945,7 @@ var _S = &RangeTable{
{0x218b, 0x2190, 5},
{0x2191, 0x2307, 1},
{0x230c, 0x2328, 1},
{0x232b, 0x23fe, 1},
{0x2400, 0x2426, 1},
{0x232b, 0x2426, 1},
{0x2440, 0x244a, 1},
{0x249c, 0x24e9, 1},
{0x2500, 0x2767, 1},
@ -2893,7 +2958,7 @@ var _S = &RangeTable{
{0x2b76, 0x2b95, 1},
{0x2b98, 0x2bb9, 1},
{0x2bbd, 0x2bc8, 1},
{0x2bca, 0x2bd1, 1},
{0x2bca, 0x2bd2, 1},
{0x2bec, 0x2bef, 1},
{0x2ce5, 0x2cea, 1},
{0x2e80, 0x2e99, 1},
@ -2982,9 +3047,10 @@ var _S = &RangeTable{
{0x1f210, 0x1f23b, 1},
{0x1f240, 0x1f248, 1},
{0x1f250, 0x1f251, 1},
{0x1f300, 0x1f6d2, 1},
{0x1f260, 0x1f265, 1},
{0x1f300, 0x1f6d4, 1},
{0x1f6e0, 0x1f6ec, 1},
{0x1f6f0, 0x1f6f6, 1},
{0x1f6f0, 0x1f6f8, 1},
{0x1f700, 0x1f773, 1},
{0x1f780, 0x1f7d4, 1},
{0x1f800, 0x1f80b, 1},
@ -2992,14 +3058,13 @@ var _S = &RangeTable{
{0x1f850, 0x1f859, 1},
{0x1f860, 0x1f887, 1},
{0x1f890, 0x1f8ad, 1},
{0x1f910, 0x1f91e, 1},
{0x1f920, 0x1f927, 1},
{0x1f930, 0x1f933, 3},
{0x1f934, 0x1f93e, 1},
{0x1f940, 0x1f94b, 1},
{0x1f950, 0x1f95e, 1},
{0x1f980, 0x1f991, 1},
{0x1f9c0, 0x1f9c0, 1},
{0x1f900, 0x1f90b, 1},
{0x1f910, 0x1f93e, 1},
{0x1f940, 0x1f94c, 1},
{0x1f950, 0x1f96b, 1},
{0x1f980, 0x1f997, 1},
{0x1f9c0, 0x1f9d0, 16},
{0x1f9d1, 0x1f9e6, 1},
},
LatinOffset: 10,
}
@ -3013,7 +3078,7 @@ var _Sc = &RangeTable{
{0x09fb, 0x0af1, 246},
{0x0bf9, 0x0e3f, 582},
{0x17db, 0x20a0, 2245},
{0x20a1, 0x20be, 1},
{0x20a1, 0x20bf, 1},
{0xa838, 0xfdfc, 21956},
{0xfe69, 0xff04, 155},
{0xffe0, 0xffe1, 1},
@ -3167,8 +3232,7 @@ var _So = &RangeTable{
{0x232b, 0x237b, 1},
{0x237d, 0x239a, 1},
{0x23b4, 0x23db, 1},
{0x23e2, 0x23fe, 1},
{0x2400, 0x2426, 1},
{0x23e2, 0x2426, 1},
{0x2440, 0x244a, 1},
{0x249c, 0x24e9, 1},
{0x2500, 0x25b6, 1},
@ -3184,7 +3248,7 @@ var _So = &RangeTable{
{0x2b76, 0x2b95, 1},
{0x2b98, 0x2bb9, 1},
{0x2bbd, 0x2bc8, 1},
{0x2bca, 0x2bd1, 1},
{0x2bca, 0x2bd2, 1},
{0x2bec, 0x2bef, 1},
{0x2ce5, 0x2cea, 1},
{0x2e80, 0x2e99, 1},
@ -3256,10 +3320,11 @@ var _So = &RangeTable{
{0x1f210, 0x1f23b, 1},
{0x1f240, 0x1f248, 1},
{0x1f250, 0x1f251, 1},
{0x1f260, 0x1f265, 1},
{0x1f300, 0x1f3fa, 1},
{0x1f400, 0x1f6d2, 1},
{0x1f400, 0x1f6d4, 1},
{0x1f6e0, 0x1f6ec, 1},
{0x1f6f0, 0x1f6f6, 1},
{0x1f6f0, 0x1f6f8, 1},
{0x1f700, 0x1f773, 1},
{0x1f780, 0x1f7d4, 1},
{0x1f800, 0x1f80b, 1},
@ -3267,14 +3332,13 @@ var _So = &RangeTable{
{0x1f850, 0x1f859, 1},
{0x1f860, 0x1f887, 1},
{0x1f890, 0x1f8ad, 1},
{0x1f910, 0x1f91e, 1},
{0x1f920, 0x1f927, 1},
{0x1f930, 0x1f933, 3},
{0x1f934, 0x1f93e, 1},
{0x1f940, 0x1f94b, 1},
{0x1f950, 0x1f95e, 1},
{0x1f980, 0x1f991, 1},
{0x1f9c0, 0x1f9c0, 1},
{0x1f900, 0x1f90b, 1},
{0x1f910, 0x1f93e, 1},
{0x1f940, 0x1f94c, 1},
{0x1f950, 0x1f96b, 1},
{0x1f980, 0x1f997, 1},
{0x1f9c0, 0x1f9d0, 16},
{0x1f9d1, 0x1f9e6, 1},
},
LatinOffset: 2,
}
@ -3366,7 +3430,7 @@ var (
)
// Generated by running
// maketables --scripts=all --url=http://www.unicode.org/Public/9.0.0/ucd/
// maketables --scripts=all --url=http://www.unicode.org/Public/10.0.0/ucd/
// DO NOT EDIT
// Scripts is the set of Unicode script tables.
@ -3445,6 +3509,7 @@ var Scripts = map[string]*RangeTable{
"Mandaic": Mandaic,
"Manichaean": Manichaean,
"Marchen": Marchen,
"Masaram_Gondi": Masaram_Gondi,
"Meetei_Mayek": Meetei_Mayek,
"Mende_Kikakui": Mende_Kikakui,
"Meroitic_Cursive": Meroitic_Cursive,
@ -3459,6 +3524,7 @@ var Scripts = map[string]*RangeTable{
"New_Tai_Lue": New_Tai_Lue,
"Newa": Newa,
"Nko": Nko,
"Nushu": Nushu,
"Ogham": Ogham,
"Ol_Chiki": Ol_Chiki,
"Old_Hungarian": Old_Hungarian,
@ -3487,6 +3553,7 @@ var Scripts = map[string]*RangeTable{
"SignWriting": SignWriting,
"Sinhala": Sinhala,
"Sora_Sompeng": Sora_Sompeng,
"Soyombo": Soyombo,
"Sundanese": Sundanese,
"Syloti_Nagri": Syloti_Nagri,
"Syriac": Syriac,
@ -3508,6 +3575,7 @@ var Scripts = map[string]*RangeTable{
"Vai": Vai,
"Warang_Citi": Warang_Citi,
"Yi": Yi,
"Zanabazar_Square": Zanabazar_Square,
}
var _Adlam = &RangeTable{
@ -3540,6 +3608,7 @@ var _Arabic = &RangeTable{
{0x0600, 0x0604, 1},
{0x0606, 0x060b, 1},
{0x060d, 0x061a, 1},
{0x061c, 0x061c, 1},
{0x061e, 0x061e, 1},
{0x0620, 0x063f, 1},
{0x0641, 0x064a, 1},
@ -3663,7 +3732,7 @@ var _Bengali = &RangeTable{
{0x09d7, 0x09d7, 1},
{0x09dc, 0x09dd, 1},
{0x09df, 0x09e3, 1},
{0x09e6, 0x09fb, 1},
{0x09e6, 0x09fd, 1},
},
}
@ -3680,7 +3749,7 @@ var _Bhaiksuki = &RangeTable{
var _Bopomofo = &RangeTable{
R16: []Range16{
{0x02ea, 0x02eb, 1},
{0x3105, 0x312d, 1},
{0x3105, 0x312e, 1},
{0x31a0, 0x31ba, 1},
},
}
@ -3779,7 +3848,7 @@ var _Common = &RangeTable{
{0x0589, 0x0589, 1},
{0x0605, 0x0605, 1},
{0x060c, 0x060c, 1},
{0x061b, 0x061c, 1},
{0x061b, 0x061b, 1},
{0x061f, 0x061f, 1},
{0x0640, 0x0640, 1},
{0x06dd, 0x06dd, 1},
@ -3796,30 +3865,29 @@ var _Common = &RangeTable{
{0x1ce1, 0x1ce1, 1},
{0x1ce9, 0x1cec, 1},
{0x1cee, 0x1cf3, 1},
{0x1cf5, 0x1cf6, 1},
{0x1cf5, 0x1cf7, 1},
{0x2000, 0x200b, 1},
{0x200e, 0x2064, 1},
{0x2066, 0x2070, 1},
{0x2074, 0x207e, 1},
{0x2080, 0x208e, 1},
{0x20a0, 0x20be, 1},
{0x20a0, 0x20bf, 1},
{0x2100, 0x2125, 1},
{0x2127, 0x2129, 1},
{0x212c, 0x2131, 1},
{0x2133, 0x214d, 1},
{0x214f, 0x215f, 1},
{0x2189, 0x218b, 1},
{0x2190, 0x23fe, 1},
{0x2400, 0x2426, 1},
{0x2190, 0x2426, 1},
{0x2440, 0x244a, 1},
{0x2460, 0x27ff, 1},
{0x2900, 0x2b73, 1},
{0x2b76, 0x2b95, 1},
{0x2b98, 0x2bb9, 1},
{0x2bbd, 0x2bc8, 1},
{0x2bca, 0x2bd1, 1},
{0x2bca, 0x2bd2, 1},
{0x2bec, 0x2bef, 1},
{0x2e00, 0x2e44, 1},
{0x2e00, 0x2e49, 1},
{0x2ff0, 0x2ffb, 1},
{0x3000, 0x3004, 1},
{0x3006, 0x3006, 1},
@ -3909,9 +3977,10 @@ var _Common = &RangeTable{
{0x1f210, 0x1f23b, 1},
{0x1f240, 0x1f248, 1},
{0x1f250, 0x1f251, 1},
{0x1f300, 0x1f6d2, 1},
{0x1f260, 0x1f265, 1},
{0x1f300, 0x1f6d4, 1},
{0x1f6e0, 0x1f6ec, 1},
{0x1f6f0, 0x1f6f6, 1},
{0x1f6f0, 0x1f6f8, 1},
{0x1f700, 0x1f773, 1},
{0x1f780, 0x1f7d4, 1},
{0x1f800, 0x1f80b, 1},
@ -3919,14 +3988,13 @@ var _Common = &RangeTable{
{0x1f850, 0x1f859, 1},
{0x1f860, 0x1f887, 1},
{0x1f890, 0x1f8ad, 1},
{0x1f910, 0x1f91e, 1},
{0x1f920, 0x1f927, 1},
{0x1f930, 0x1f930, 1},
{0x1f933, 0x1f93e, 1},
{0x1f940, 0x1f94b, 1},
{0x1f950, 0x1f95e, 1},
{0x1f980, 0x1f991, 1},
{0x1f900, 0x1f90b, 1},
{0x1f910, 0x1f93e, 1},
{0x1f940, 0x1f94c, 1},
{0x1f950, 0x1f96b, 1},
{0x1f980, 0x1f997, 1},
{0x1f9c0, 0x1f9c0, 1},
{0x1f9d0, 0x1f9e6, 1},
{0xe0001, 0xe0001, 1},
{0xe0020, 0xe007f, 1},
},
@ -4167,7 +4235,7 @@ var _Gujarati = &RangeTable{
{0x0ad0, 0x0ad0, 1},
{0x0ae0, 0x0ae3, 1},
{0x0ae6, 0x0af1, 1},
{0x0af9, 0x0af9, 1},
{0x0af9, 0x0aff, 1},
},
}
@ -4202,7 +4270,7 @@ var _Han = &RangeTable{
{0x3021, 0x3029, 1},
{0x3038, 0x303b, 1},
{0x3400, 0x4db5, 1},
{0x4e00, 0x9fd5, 1},
{0x4e00, 0x9fea, 1},
{0xf900, 0xfa6d, 1},
{0xfa70, 0xfad9, 1},
},
@ -4211,6 +4279,7 @@ var _Han = &RangeTable{
{0x2a700, 0x2b734, 1},
{0x2b740, 0x2b81d, 1},
{0x2b820, 0x2cea1, 1},
{0x2ceb0, 0x2ebe0, 1},
{0x2f800, 0x2fa1d, 1},
},
}
@ -4269,7 +4338,7 @@ var _Hiragana = &RangeTable{
{0x309d, 0x309f, 1},
},
R32: []Range32{
{0x1b001, 0x1b001, 1},
{0x1b001, 0x1b11e, 1},
{0x1f200, 0x1f200, 1},
},
}
@ -4296,7 +4365,7 @@ var _Inherited = &RangeTable{
{0x1ced, 0x1ced, 1},
{0x1cf4, 0x1cf4, 1},
{0x1cf8, 0x1cf9, 1},
{0x1dc0, 0x1df5, 1},
{0x1dc0, 0x1df9, 1},
{0x1dfb, 0x1dff, 1},
{0x200c, 0x200d, 1},
{0x20d0, 0x20f0, 1},
@ -4557,11 +4626,10 @@ var _Mahajani = &RangeTable{
var _Malayalam = &RangeTable{
R16: []Range16{
{0x0d01, 0x0d03, 1},
{0x0d00, 0x0d03, 1},
{0x0d05, 0x0d0c, 1},
{0x0d0e, 0x0d10, 1},
{0x0d12, 0x0d3a, 1},
{0x0d3d, 0x0d44, 1},
{0x0d12, 0x0d44, 1},
{0x0d46, 0x0d48, 1},
{0x0d4a, 0x0d4f, 1},
{0x0d54, 0x0d63, 1},
@ -4593,6 +4661,19 @@ var _Marchen = &RangeTable{
},
}
var _Masaram_Gondi = &RangeTable{
R16: []Range16{},
R32: []Range32{
{0x11d00, 0x11d06, 1},
{0x11d08, 0x11d09, 1},
{0x11d0b, 0x11d36, 1},
{0x11d3a, 0x11d3a, 1},
{0x11d3c, 0x11d3d, 1},
{0x11d3f, 0x11d47, 1},
{0x11d50, 0x11d59, 1},
},
}
var _Meetei_Mayek = &RangeTable{
R16: []Range16{
{0xaae0, 0xaaf6, 1},
@ -4716,6 +4797,14 @@ var _Nko = &RangeTable{
},
}
var _Nushu = &RangeTable{
R16: []Range16{},
R32: []Range32{
{0x16fe1, 0x16fe1, 1},
{0x1b170, 0x1b2fb, 1},
},
}
var _Ogham = &RangeTable{
R16: []Range16{
{0x1680, 0x169c, 1},
@ -4741,6 +4830,7 @@ var _Old_Italic = &RangeTable{
R16: []Range16{},
R32: []Range32{
{0x10300, 0x10323, 1},
{0x1032d, 0x1032f, 1},
},
}
@ -4951,6 +5041,15 @@ var _Sora_Sompeng = &RangeTable{
},
}
var _Soyombo = &RangeTable{
R16: []Range16{},
R32: []Range32{
{0x11a50, 0x11a83, 1},
{0x11a86, 0x11a9c, 1},
{0x11a9e, 0x11aa2, 1},
},
}
var _Sundanese = &RangeTable{
R16: []Range16{
{0x1b80, 0x1bbf, 1},
@ -4969,6 +5068,7 @@ var _Syriac = &RangeTable{
{0x0700, 0x070d, 1},
{0x070f, 0x074a, 1},
{0x074d, 0x074f, 1},
{0x0860, 0x086a, 1},
},
}
@ -5137,6 +5237,13 @@ var _Yi = &RangeTable{
},
}
var _Zanabazar_Square = &RangeTable{
R16: []Range16{},
R32: []Range32{
{0x11a00, 0x11a47, 1},
},
}
// These variables have type *RangeTable.
var (
Adlam = _Adlam // Adlam is the set of Unicode characters in script Adlam.
@ -5213,6 +5320,7 @@ var (
Mandaic = _Mandaic // Mandaic is the set of Unicode characters in script Mandaic.
Manichaean = _Manichaean // Manichaean is the set of Unicode characters in script Manichaean.
Marchen = _Marchen // Marchen is the set of Unicode characters in script Marchen.
Masaram_Gondi = _Masaram_Gondi // Masaram_Gondi is the set of Unicode characters in script Masaram_Gondi.
Meetei_Mayek = _Meetei_Mayek // Meetei_Mayek is the set of Unicode characters in script Meetei_Mayek.
Mende_Kikakui = _Mende_Kikakui // Mende_Kikakui is the set of Unicode characters in script Mende_Kikakui.
Meroitic_Cursive = _Meroitic_Cursive // Meroitic_Cursive is the set of Unicode characters in script Meroitic_Cursive.
@ -5227,6 +5335,7 @@ var (
New_Tai_Lue = _New_Tai_Lue // New_Tai_Lue is the set of Unicode characters in script New_Tai_Lue.
Newa = _Newa // Newa is the set of Unicode characters in script Newa.
Nko = _Nko // Nko is the set of Unicode characters in script Nko.
Nushu = _Nushu // Nushu is the set of Unicode characters in script Nushu.
Ogham = _Ogham // Ogham is the set of Unicode characters in script Ogham.
Ol_Chiki = _Ol_Chiki // Ol_Chiki is the set of Unicode characters in script Ol_Chiki.
Old_Hungarian = _Old_Hungarian // Old_Hungarian is the set of Unicode characters in script Old_Hungarian.
@ -5255,6 +5364,7 @@ var (
SignWriting = _SignWriting // SignWriting is the set of Unicode characters in script SignWriting.
Sinhala = _Sinhala // Sinhala is the set of Unicode characters in script Sinhala.
Sora_Sompeng = _Sora_Sompeng // Sora_Sompeng is the set of Unicode characters in script Sora_Sompeng.
Soyombo = _Soyombo // Soyombo is the set of Unicode characters in script Soyombo.
Sundanese = _Sundanese // Sundanese is the set of Unicode characters in script Sundanese.
Syloti_Nagri = _Syloti_Nagri // Syloti_Nagri is the set of Unicode characters in script Syloti_Nagri.
Syriac = _Syriac // Syriac is the set of Unicode characters in script Syriac.
@ -5276,10 +5386,11 @@ var (
Vai = _Vai // Vai is the set of Unicode characters in script Vai.
Warang_Citi = _Warang_Citi // Warang_Citi is the set of Unicode characters in script Warang_Citi.
Yi = _Yi // Yi is the set of Unicode characters in script Yi.
Zanabazar_Square = _Zanabazar_Square // Zanabazar_Square is the set of Unicode characters in script Zanabazar_Square.
)
// Generated by running
// maketables --props=all --url=http://www.unicode.org/Public/9.0.0/ucd/
// maketables --props=all --url=http://www.unicode.org/Public/10.0.0/ucd/
// DO NOT EDIT
// Properties is the set of Unicode property tables.
@ -5311,6 +5422,7 @@ var Properties = map[string]*RangeTable{
"Prepended_Concatenation_Mark": Prepended_Concatenation_Mark,
"Quotation_Mark": Quotation_Mark,
"Radical": Radical,
"Regional_Indicator": Regional_Indicator,
"Sentence_Terminal": Sentence_Terminal,
"STerm": Sentence_Terminal,
"Soft_Dotted": Soft_Dotted,
@ -5421,12 +5533,14 @@ var _Diacritic = &RangeTable{
{0x0a4d, 0x0a4d, 1},
{0x0abc, 0x0abc, 1},
{0x0acd, 0x0acd, 1},
{0x0afd, 0x0aff, 1},
{0x0b3c, 0x0b3c, 1},
{0x0b4d, 0x0b4d, 1},
{0x0bcd, 0x0bcd, 1},
{0x0c4d, 0x0c4d, 1},
{0x0cbc, 0x0cbc, 1},
{0x0ccd, 0x0ccd, 1},
{0x0d3b, 0x0d3c, 1},
{0x0d4d, 0x0d4d, 1},
{0x0dca, 0x0dca, 1},
{0x0e47, 0x0e4c, 1},
@ -5460,10 +5574,10 @@ var _Diacritic = &RangeTable{
{0x1cd0, 0x1ce8, 1},
{0x1ced, 0x1ced, 1},
{0x1cf4, 0x1cf4, 1},
{0x1cf8, 0x1cf9, 1},
{0x1cf7, 0x1cf9, 1},
{0x1d2c, 0x1d6a, 1},
{0x1dc4, 0x1dcf, 1},
{0x1df5, 0x1df5, 1},
{0x1df5, 0x1df9, 1},
{0x1dfd, 0x1dff, 1},
{0x1fbd, 0x1fbd, 1},
{0x1fbf, 0x1fc1, 1},
@ -5525,7 +5639,12 @@ var _Diacritic = &RangeTable{
{0x1163f, 0x1163f, 1},
{0x116b6, 0x116b7, 1},
{0x1172b, 0x1172b, 1},
{0x11a34, 0x11a34, 1},
{0x11a47, 0x11a47, 1},
{0x11a99, 0x11a99, 1},
{0x11c3f, 0x11c3f, 1},
{0x11d42, 0x11d42, 1},
{0x11d44, 0x11d45, 1},
{0x16af0, 0x16af4, 1},
{0x16f8f, 0x16f9f, 1},
{0x1d167, 0x1d169, 1},
@ -5569,8 +5688,9 @@ var _Extender = &RangeTable{
R32: []Range32{
{0x1135d, 0x1135d, 1},
{0x115c6, 0x115c8, 1},
{0x11a98, 0x11a98, 1},
{0x16b42, 0x16b43, 1},
{0x16fe0, 0x16fe0, 1},
{0x16fe0, 0x16fe1, 1},
{0x1e944, 0x1e946, 1},
},
LatinOffset: 1,
@ -5623,17 +5743,19 @@ var _Ideographic = &RangeTable{
{0x3021, 0x3029, 1},
{0x3038, 0x303a, 1},
{0x3400, 0x4db5, 1},
{0x4e00, 0x9fd5, 1},
{0x4e00, 0x9fea, 1},
{0xf900, 0xfa6d, 1},
{0xfa70, 0xfad9, 1},
},
R32: []Range32{
{0x17000, 0x187ec, 1},
{0x18800, 0x18af2, 1},
{0x1b170, 0x1b2fb, 1},
{0x20000, 0x2a6d6, 1},
{0x2a700, 0x2b734, 1},
{0x2b740, 0x2b81d, 1},
{0x2b820, 0x2cea1, 1},
{0x2ceb0, 0x2ebe0, 1},
{0x2f800, 0x2fa1d, 1},
},
}
@ -5730,6 +5852,7 @@ var _Other_Alphabetic = &RangeTable{
{0x0ac7, 0x0ac9, 1},
{0x0acb, 0x0acc, 1},
{0x0ae2, 0x0ae3, 1},
{0x0afa, 0x0afc, 1},
{0x0b01, 0x0b03, 1},
{0x0b3e, 0x0b44, 1},
{0x0b47, 0x0b48, 1},
@ -5753,7 +5876,7 @@ var _Other_Alphabetic = &RangeTable{
{0x0cca, 0x0ccc, 1},
{0x0cd5, 0x0cd6, 1},
{0x0ce2, 0x0ce3, 1},
{0x0d01, 0x0d03, 1},
{0x0d00, 0x0d03, 1},
{0x0d3e, 0x0d44, 1},
{0x0d46, 0x0d48, 1},
{0x0d4a, 0x0d4c, 1},
@ -5863,10 +5986,21 @@ var _Other_Alphabetic = &RangeTable{
{0x11640, 0x11640, 1},
{0x116ab, 0x116b5, 1},
{0x1171d, 0x1172a, 1},
{0x11a01, 0x11a0a, 1},
{0x11a35, 0x11a39, 1},
{0x11a3b, 0x11a3e, 1},
{0x11a51, 0x11a5b, 1},
{0x11a8a, 0x11a97, 1},
{0x11c2f, 0x11c36, 1},
{0x11c38, 0x11c3e, 1},
{0x11c92, 0x11ca7, 1},
{0x11ca9, 0x11cb6, 1},
{0x11d31, 0x11d36, 1},
{0x11d3a, 0x11d3a, 1},
{0x11d3c, 0x11d3d, 1},
{0x11d3f, 0x11d41, 1},
{0x11d43, 0x11d43, 1},
{0x11d47, 0x11d47, 1},
{0x16b30, 0x16b36, 1},
{0x16f51, 0x16f7e, 1},
{0x1bc9e, 0x1bc9e, 1},
@ -6213,6 +6347,13 @@ var _Radical = &RangeTable{
},
}
var _Regional_Indicator = &RangeTable{
R16: []Range16{},
R32: []Range32{
{0x1f1e6, 0x1f1ff, 1},
},
}
var _Sentence_Terminal = &RangeTable{
R16: []Range16{
{0x0021, 0x0021, 1},
@ -6276,6 +6417,8 @@ var _Sentence_Terminal = &RangeTable{
{0x115c9, 0x115d7, 1},
{0x11641, 0x11642, 1},
{0x1173c, 0x1173e, 1},
{0x11a42, 0x11a43, 1},
{0x11a9b, 0x11a9c, 1},
{0x11c41, 0x11c42, 1},
{0x16a6e, 0x16a6f, 1},
{0x16af5, 0x16af5, 1},
@ -6415,6 +6558,9 @@ var _Terminal_Punctuation = &RangeTable{
{0x115c9, 0x115d7, 1},
{0x11641, 0x11642, 1},
{0x1173c, 0x1173e, 1},
{0x11a42, 0x11a43, 1},
{0x11a9b, 0x11a9c, 1},
{0x11aa1, 0x11aa2, 1},
{0x11c41, 0x11c43, 1},
{0x11c71, 0x11c71, 1},
{0x12470, 0x12474, 1},
@ -6431,7 +6577,7 @@ var _Terminal_Punctuation = &RangeTable{
var _Unified_Ideograph = &RangeTable{
R16: []Range16{
{0x3400, 0x4db5, 1},
{0x4e00, 0x9fd5, 1},
{0x4e00, 0x9fea, 1},
{0xfa0e, 0xfa0f, 1},
{0xfa11, 0xfa11, 1},
{0xfa13, 0xfa14, 1},
@ -6445,6 +6591,7 @@ var _Unified_Ideograph = &RangeTable{
{0x2a700, 0x2b734, 1},
{0x2b740, 0x2b81d, 1},
{0x2b820, 0x2cea1, 1},
{0x2ceb0, 0x2ebe0, 1},
},
}
@ -6503,6 +6650,7 @@ var (
Prepended_Concatenation_Mark = _Prepended_Concatenation_Mark // Prepended_Concatenation_Mark is the set of Unicode characters with property Prepended_Concatenation_Mark.
Quotation_Mark = _Quotation_Mark // Quotation_Mark is the set of Unicode characters with property Quotation_Mark.
Radical = _Radical // Radical is the set of Unicode characters with property Radical.
Regional_Indicator = _Regional_Indicator // Regional_Indicator is the set of Unicode characters with property Regional_Indicator.
STerm = _Sentence_Terminal // STerm is an alias for Sentence_Terminal.
Sentence_Terminal = _Sentence_Terminal // Sentence_Terminal is the set of Unicode characters with property Sentence_Terminal.
Soft_Dotted = _Soft_Dotted // Soft_Dotted is the set of Unicode characters with property Soft_Dotted.
@ -6513,7 +6661,7 @@ var (
)
// Generated by running
// maketables --data=http://www.unicode.org/Public/9.0.0/ucd/UnicodeData.txt --casefolding=http://www.unicode.org/Public/9.0.0/ucd/CaseFolding.txt
// maketables --data=http://www.unicode.org/Public/10.0.0/ucd/UnicodeData.txt --casefolding=http://www.unicode.org/Public/10.0.0/ucd/CaseFolding.txt
// DO NOT EDIT
// CaseRanges is the table describing case mappings for all letters with
@ -7612,7 +7760,7 @@ var foldInherited = &RangeTable{
},
}
// Range entries: 3576 16-bit, 1454 32-bit, 5030 total.
// Range bytes: 21456 16-bit, 17448 32-bit, 38904 total.
// Range entries: 3587 16-bit, 1554 32-bit, 5141 total.
// Range bytes: 21522 16-bit, 18648 32-bit, 40170 total.
// Fold orbit bytes: 88 pairs, 352 bytes

View File

@ -21,6 +21,7 @@ import (
"unicode/utf8"
"golang_org/x/text/secure/bidirule"
"golang_org/x/text/unicode/bidi"
"golang_org/x/text/unicode/norm"
)
@ -67,6 +68,15 @@ func VerifyDNSLength(verify bool) Option {
return func(o *options) { o.verifyDNSLength = verify }
}
// RemoveLeadingDots removes leading label separators. Leading runes that map to
// dots, such as U+3002 IDEOGRAPHIC FULL STOP, are removed as well.
//
// This is the behavior suggested by the UTS #46 and is adopted by some
// browsers.
func RemoveLeadingDots(remove bool) Option {
return func(o *options) { o.removeLeadingDots = remove }
}
// ValidateLabels sets whether to check the mandatory label validation criteria
// as defined in Section 5.4 of RFC 5891. This includes testing for correct use
// of hyphens ('-'), normalization, validity of runes, and the context rules.
@ -83,7 +93,7 @@ func ValidateLabels(enable bool) Option {
}
}
// StrictDomainName limits the set of permissable ASCII characters to those
// StrictDomainName limits the set of permissible ASCII characters to those
// allowed in domain names as defined in RFC 1034 (A-Z, a-z, 0-9 and the
// hyphen). This is set by default for MapForLookup and ValidateForRegistration.
//
@ -137,10 +147,11 @@ func MapForLookup() Option {
}
type options struct {
transitional bool
useSTD3Rules bool
validateLabels bool
verifyDNSLength bool
transitional bool
useSTD3Rules bool
validateLabels bool
verifyDNSLength bool
removeLeadingDots bool
trie *idnaTrie
@ -149,14 +160,14 @@ type options struct {
// mapping implements a validation and mapping step as defined in RFC 5895
// or UTS 46, tailored to, for example, domain registration or lookup.
mapping func(p *Profile, s string) (string, error)
mapping func(p *Profile, s string) (mapped string, isBidi bool, err error)
// bidirule, if specified, checks whether s conforms to the Bidi Rule
// defined in RFC 5893.
bidirule func(s string) bool
}
// A Profile defines the configuration of a IDNA mapper.
// A Profile defines the configuration of an IDNA mapper.
type Profile struct {
options
}
@ -289,12 +300,16 @@ func (e runeError) Error() string {
// see http://www.unicode.org/reports/tr46.
func (p *Profile) process(s string, toASCII bool) (string, error) {
var err error
var isBidi bool
if p.mapping != nil {
s, err = p.mapping(p, s)
s, isBidi, err = p.mapping(p, s)
}
// Remove leading empty labels.
for ; len(s) > 0 && s[0] == '.'; s = s[1:] {
if p.removeLeadingDots {
for ; len(s) > 0 && s[0] == '.'; s = s[1:] {
}
}
// TODO: allow for a quick check the tables data.
// It seems like we should only create this error on ToASCII, but the
// UTS 46 conformance tests suggests we should always check this.
if err == nil && p.verifyDNSLength && s == "" {
@ -320,6 +335,7 @@ func (p *Profile) process(s string, toASCII bool) (string, error) {
// Spec says keep the old label.
continue
}
isBidi = isBidi || bidirule.DirectionString(u) != bidi.LeftToRight
labels.set(u)
if err == nil && p.validateLabels {
err = p.fromPuny(p, u)
@ -334,6 +350,14 @@ func (p *Profile) process(s string, toASCII bool) (string, error) {
err = p.validateLabel(label)
}
}
if isBidi && p.bidirule != nil && err == nil {
for labels.reset(); !labels.done(); labels.next() {
if !p.bidirule(labels.label()) {
err = &labelError{s, "B"}
break
}
}
}
if toASCII {
for labels.reset(); !labels.done(); labels.next() {
label := labels.label()
@ -365,41 +389,65 @@ func (p *Profile) process(s string, toASCII bool) (string, error) {
return s, err
}
func normalize(p *Profile, s string) (string, error) {
return norm.NFC.String(s), nil
func normalize(p *Profile, s string) (mapped string, isBidi bool, err error) {
// TODO: consider first doing a quick check to see if any of these checks
// need to be done. This will make it slower in the general case, but
// faster in the common case.
mapped = norm.NFC.String(s)
isBidi = bidirule.DirectionString(mapped) == bidi.RightToLeft
return mapped, isBidi, nil
}
func validateRegistration(p *Profile, s string) (string, error) {
func validateRegistration(p *Profile, s string) (idem string, bidi bool, err error) {
// TODO: filter need for normalization in loop below.
if !norm.NFC.IsNormalString(s) {
return s, &labelError{s, "V1"}
return s, false, &labelError{s, "V1"}
}
var err error
for i := 0; i < len(s); {
v, sz := trie.lookupString(s[i:])
i += sz
bidi = bidi || info(v).isBidi(s[i:])
// Copy bytes not copied so far.
switch p.simplify(info(v).category()) {
// TODO: handle the NV8 defined in the Unicode idna data set to allow
// for strict conformance to IDNA2008.
case valid, deviation:
case disallowed, mapped, unknown, ignored:
if err == nil {
r, _ := utf8.DecodeRuneInString(s[i:])
err = runeError(r)
}
r, _ := utf8.DecodeRuneInString(s[i:])
return s, bidi, runeError(r)
}
i += sz
}
return s, err
return s, bidi, nil
}
func validateAndMap(p *Profile, s string) (string, error) {
func (c info) isBidi(s string) bool {
if !c.isMapped() {
return c&attributesMask == rtl
}
// TODO: also store bidi info for mapped data. This is possible, but a bit
// cumbersome and not for the common case.
p, _ := bidi.LookupString(s)
switch p.Class() {
case bidi.R, bidi.AL, bidi.AN:
return true
}
return false
}
func validateAndMap(p *Profile, s string) (vm string, bidi bool, err error) {
var (
err error
b []byte
k int
b []byte
k int
)
// combinedInfoBits contains the or-ed bits of all runes. We use this
// to derive the mayNeedNorm bit later. This may trigger normalization
// overeagerly, but it will not do so in the common case. The end result
// is another 10% saving on BenchmarkProfile for the common case.
var combinedInfoBits info
for i := 0; i < len(s); {
v, sz := trie.lookupString(s[i:])
combinedInfoBits |= info(v)
bidi = bidi || info(v).isBidi(s[i:])
start := i
i += sz
// Copy bytes not copied so far.
@ -408,7 +456,7 @@ func validateAndMap(p *Profile, s string) (string, error) {
continue
case disallowed:
if err == nil {
r, _ := utf8.DecodeRuneInString(s[i:])
r, _ := utf8.DecodeRuneInString(s[start:])
err = runeError(r)
}
continue
@ -426,7 +474,9 @@ func validateAndMap(p *Profile, s string) (string, error) {
}
if k == 0 {
// No changes so far.
s = norm.NFC.String(s)
if combinedInfoBits&mayNeedNorm != 0 {
s = norm.NFC.String(s)
}
} else {
b = append(b, s[k:]...)
if norm.NFC.QuickSpan(b) != len(b) {
@ -435,7 +485,7 @@ func validateAndMap(p *Profile, s string) (string, error) {
// TODO: the punycode converters require strings as input.
s = string(b)
}
return s, err
return s, bidi, err
}
// A labelIter allows iterating over domain name labels.
@ -530,6 +580,8 @@ func validateFromPunycode(p *Profile, s string) error {
if !norm.NFC.IsNormalString(s) {
return &labelError{s, "V1"}
}
// TODO: detect whether string may have to be normalized in the following
// loop.
for i := 0; i < len(s); {
v, sz := trie.lookupString(s[i:])
if c := p.simplify(info(v).category()); c != valid && c != deviation {
@ -604,16 +656,13 @@ var joinStates = [][numJoinTypes]joinState{
// validateLabel validates the criteria from Section 4.1. Item 1, 4, and 6 are
// already implicitly satisfied by the overall implementation.
func (p *Profile) validateLabel(s string) error {
func (p *Profile) validateLabel(s string) (err error) {
if s == "" {
if p.verifyDNSLength {
return &labelError{s, "A4"}
}
return nil
}
if p.bidirule != nil && !p.bidirule(s) {
return &labelError{s, "B"}
}
if !p.validateLabels {
return nil
}

File diff suppressed because it is too large Load Diff

View File

@ -28,9 +28,9 @@ package idna
// 15..3 index into xor or mapping table
// }
// } else {
// 15..13 unused
// 12 modifier (including virama)
// 11 virama modifier
// 15..14 unused
// 13 mayNeedNorm
// 12..11 attributes
// 10..8 joining type
// 7..3 category type
// }
@ -51,15 +51,20 @@ const (
joinShift = 8
joinMask = 0x07
viramaModifier = 0x0800
// Attributes
attributesMask = 0x1800
viramaModifier = 0x1800
modifier = 0x1000
rtl = 0x0800
mayNeedNorm = 0x2000
)
// A category corresponds to a category defined in the IDNA mapping table.
type category uint16
const (
unknown category = 0 // not defined currently in unicode.
unknown category = 0 // not currently defined in unicode.
mapped category = 1
disallowedSTD3Mapped category = 2
deviation category = 3
@ -112,5 +117,5 @@ func (c info) isModifier() bool {
}
func (c info) isViramaModifier() bool {
return c&(viramaModifier|catSmallMask) == viramaModifier
return c&(attributesMask|catSmallMask) == viramaModifier
}

View File

@ -157,6 +157,7 @@ func DirectionString(s string) bidi.Direction {
e, sz := bidi.LookupString(s[i:])
if sz == 0 {
i++
continue
}
c := e.Class()
if c == bidi.R || c == bidi.AL || c == bidi.AN {
@ -205,9 +206,6 @@ func (t *Transformer) isRTL() bool {
}
func (t *Transformer) isFinal() bool {
if !t.isRTL() {
return true
}
return t.state == ruleLTRFinal || t.state == ruleRTLFinal || t.state == ruleInitial
}

File diff suppressed because it is too large Load Diff

View File

@ -35,17 +35,9 @@ const (
// streamSafe implements the policy of when a CGJ should be inserted.
type streamSafe uint8
// mkStreamSafe is a shorthand for declaring a streamSafe var and calling
// first on it.
func mkStreamSafe(p Properties) streamSafe {
return streamSafe(p.nTrailingNonStarters())
}
// first inserts the first rune of a segment.
// first inserts the first rune of a segment. It is a faster version of next if
// it is known p represents the first rune in a segment.
func (ss *streamSafe) first(p Properties) {
if *ss != 0 {
panic("!= 0")
}
*ss = streamSafe(p.nTrailingNonStarters())
}
@ -68,7 +60,7 @@ func (ss *streamSafe) next(p Properties) ssState {
// be a non-starter. Note that it always hold that if nLead > 0 then
// nLead == nTrail.
if n == 0 {
*ss = 0
*ss = streamSafe(p.nTrailingNonStarters())
return ssStarter
}
return ssSuccess
@ -144,7 +136,6 @@ func (rb *reorderBuffer) setFlusher(out []byte, f func(*reorderBuffer) bool) {
func (rb *reorderBuffer) reset() {
rb.nrune = 0
rb.nbyte = 0
rb.ss = 0
}
func (rb *reorderBuffer) doFlush() bool {
@ -259,6 +250,9 @@ func (rb *reorderBuffer) insertUnsafe(src input, i int, info Properties) {
// It flushes the buffer on each new segment start.
func (rb *reorderBuffer) insertDecomposed(dcomp []byte) insertErr {
rb.tmpBytes.setBytes(dcomp)
// As the streamSafe accounting already handles the counting for modifiers,
// we don't have to call next. However, we do need to keep the accounting
// intact when flushing the buffer.
for i := 0; i < len(dcomp); {
info := rb.f.info(rb.tmpBytes, i)
if info.BoundaryBefore() && rb.nrune > 0 && !rb.doFlush() {

View File

@ -92,16 +92,20 @@ func (in *input) charinfoNFKC(p int) (uint16, int) {
}
func (in *input) hangul(p int) (r rune) {
var size int
if in.bytes == nil {
if !isHangulString(in.str[p:]) {
return 0
}
r, _ = utf8.DecodeRuneInString(in.str[p:])
r, size = utf8.DecodeRuneInString(in.str[p:])
} else {
if !isHangul(in.bytes[p:]) {
return 0
}
r, _ = utf8.DecodeRune(in.bytes[p:])
r, size = utf8.DecodeRune(in.bytes[p:])
}
if size != hangulUTF8Size {
return 0
}
return r
}

View File

@ -43,6 +43,7 @@ func (i *Iter) Init(f Form, src []byte) {
i.next = i.rb.f.nextMain
i.asciiF = nextASCIIBytes
i.info = i.rb.f.info(i.rb.src, i.p)
i.rb.ss.first(i.info)
}
// InitString initializes i to iterate over src after normalizing it to Form f.
@ -58,11 +59,12 @@ func (i *Iter) InitString(f Form, src string) {
i.next = i.rb.f.nextMain
i.asciiF = nextASCIIString
i.info = i.rb.f.info(i.rb.src, i.p)
i.rb.ss.first(i.info)
}
// Seek sets the segment to be returned by the next call to Next to start
// at position p. It is the responsibility of the caller to set p to the
// start of a UTF8 rune.
// start of a segment.
func (i *Iter) Seek(offset int64, whence int) (int64, error) {
var abs int64
switch whence {
@ -86,6 +88,7 @@ func (i *Iter) Seek(offset int64, whence int) (int64, error) {
i.multiSeg = nil
i.next = i.rb.f.nextMain
i.info = i.rb.f.info(i.rb.src, i.p)
i.rb.ss.first(i.info)
return abs, nil
}
@ -163,6 +166,7 @@ func nextHangul(i *Iter) []byte {
if next >= i.rb.nsrc {
i.setDone()
} else if i.rb.src.hangul(next) == 0 {
i.rb.ss.next(i.info)
i.info = i.rb.f.info(i.rb.src, i.p)
i.next = i.rb.f.nextMain
return i.next(i)
@ -206,12 +210,10 @@ func nextMultiNorm(i *Iter) []byte {
if info.BoundaryBefore() {
i.rb.compose()
seg := i.buf[:i.rb.flushCopy(i.buf[:])]
i.rb.ss.first(info)
i.rb.insertUnsafe(input{bytes: d}, j, info)
i.multiSeg = d[j+int(info.size):]
return seg
}
i.rb.ss.next(info)
i.rb.insertUnsafe(input{bytes: d}, j, info)
j += int(info.size)
}
@ -224,9 +226,9 @@ func nextMultiNorm(i *Iter) []byte {
func nextDecomposed(i *Iter) (next []byte) {
outp := 0
inCopyStart, outCopyStart := i.p, 0
ss := mkStreamSafe(i.info)
for {
if sz := int(i.info.size); sz <= 1 {
i.rb.ss = 0
p := i.p
i.p++ // ASCII or illegal byte. Either way, advance by 1.
if i.p >= i.rb.nsrc {
@ -245,6 +247,8 @@ func nextDecomposed(i *Iter) (next []byte) {
p := outp + len(d)
if outp > 0 {
i.rb.src.copySlice(i.buf[outCopyStart:], inCopyStart, i.p)
// TODO: this condition should not be possible, but we leave it
// in for defensive purposes.
if p > len(i.buf) {
return i.buf[:outp]
}
@ -268,7 +272,7 @@ func nextDecomposed(i *Iter) (next []byte) {
} else {
i.info = i.rb.f.info(i.rb.src, i.p)
}
switch ss.next(i.info) {
switch i.rb.ss.next(i.info) {
case ssOverflow:
i.next = nextCGJDecompose
fallthrough
@ -311,7 +315,7 @@ func nextDecomposed(i *Iter) (next []byte) {
}
prevCC := i.info.tccc
i.info = i.rb.f.info(i.rb.src, i.p)
if v := ss.next(i.info); v == ssStarter {
if v := i.rb.ss.next(i.info); v == ssStarter {
break
} else if v == ssOverflow {
i.next = nextCGJDecompose
@ -337,10 +341,6 @@ doNorm:
func doNormDecomposed(i *Iter) []byte {
for {
if s := i.rb.ss.next(i.info); s == ssOverflow {
i.next = nextCGJDecompose
break
}
i.rb.insertUnsafe(i.rb.src, i.p, i.info)
if i.p += int(i.info.size); i.p >= i.rb.nsrc {
i.setDone()
@ -350,6 +350,10 @@ func doNormDecomposed(i *Iter) []byte {
if i.info.ccc == 0 {
break
}
if s := i.rb.ss.next(i.info); s == ssOverflow {
i.next = nextCGJDecompose
break
}
}
// new segment or too many combining characters: exit normalization
return i.buf[:i.rb.flushCopy(i.buf[:])]
@ -359,6 +363,7 @@ func nextCGJDecompose(i *Iter) []byte {
i.rb.ss = 0
i.rb.insertCGJ()
i.next = nextDecomposed
i.rb.ss.first(i.info)
buf := doNormDecomposed(i)
return buf
}
@ -367,7 +372,6 @@ func nextCGJDecompose(i *Iter) []byte {
func nextComposed(i *Iter) []byte {
outp, startp := 0, i.p
var prevCC uint8
ss := mkStreamSafe(i.info)
for {
if !i.info.isYesC() {
goto doNorm
@ -387,11 +391,12 @@ func nextComposed(i *Iter) []byte {
i.setDone()
break
} else if i.rb.src._byte(i.p) < utf8.RuneSelf {
i.rb.ss = 0
i.next = i.asciiF
break
}
i.info = i.rb.f.info(i.rb.src, i.p)
if v := ss.next(i.info); v == ssStarter {
if v := i.rb.ss.next(i.info); v == ssStarter {
break
} else if v == ssOverflow {
i.next = nextCGJCompose
@ -403,8 +408,10 @@ func nextComposed(i *Iter) []byte {
}
return i.returnSlice(startp, i.p)
doNorm:
// reset to start position
i.p = startp
i.info = i.rb.f.info(i.rb.src, i.p)
i.rb.ss.first(i.info)
if i.info.multiSegment() {
d := i.info.Decomposition()
info := i.rb.f.info(input{bytes: d}, 0)

View File

@ -324,7 +324,6 @@ func (f *formInfo) quickSpan(src input, i, end int, atEOF bool) (n int, ok bool)
// have an overflow for runes that are starters (e.g. with U+FF9E).
switch ss.next(info) {
case ssStarter:
ss.first(info)
lastSegStart = i
case ssOverflow:
return lastSegStart, false
@ -441,6 +440,8 @@ func (f Form) nextBoundary(src input, nsrc int, atEOF bool) int {
}
return -1
}
// TODO: Using streamSafe to determine the boundary isn't the same as
// using BoundaryBefore. Determine which should be used.
if s := ss.next(info); s != ssSuccess {
return i
}
@ -505,15 +506,14 @@ func decomposeSegment(rb *reorderBuffer, sp int, atEOF bool) int {
if info.size == 0 {
return 0
}
if rb.nrune > 0 {
if s := rb.ss.next(info); s == ssStarter {
goto end
} else if s == ssOverflow {
rb.insertCGJ()
if s := rb.ss.next(info); s == ssStarter {
// TODO: this could be removed if we don't support merging.
if rb.nrune > 0 {
goto end
}
} else {
rb.ss.first(info)
} else if s == ssOverflow {
rb.insertCGJ()
goto end
}
if err := rb.insertFlush(rb.src, sp, info); err != iSuccess {
return int(err)

File diff suppressed because it is too large Load Diff

View File

@ -42,7 +42,7 @@ func (f Form) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error)
}
func flushTransform(rb *reorderBuffer) bool {
// Write out (must fully fit in dst, or else it is a ErrShortDst).
// Write out (must fully fit in dst, or else it is an ErrShortDst).
if len(rb.out) < rb.nrune*utf8.UTFMax {
return false
}