mirror of
https://github.com/golang/go
synced 2024-11-19 17:14:44 -07:00
unicode: move unicode and related packages to Unicode 6.2.0.
R=r, mpvl CC=golang-dev https://golang.org/cl/6818067
This commit is contained in:
parent
b8b329451c
commit
e14cf90a8b
@ -129,7 +129,7 @@ func (b *Builder) Add(runes []rune, colelems [][]int, variables []int) error {
|
||||
if ce[0] > b.varTop {
|
||||
b.varTop = ce[0]
|
||||
}
|
||||
} else if ce[0] > 0 {
|
||||
} else if ce[0] > 1 { // 1 is a special primary value reserved for FFFE
|
||||
if ce[0] <= b.varTop {
|
||||
return fmt.Errorf("primary value %X of non-variable is smaller than the highest variable %X", ce[0], b.varTop)
|
||||
}
|
||||
|
@ -38,7 +38,7 @@ var (
|
||||
`URL of the Default Unicode Collation Element Table (DUCET). This can be a zip
|
||||
file containing the file allkeys_CLDR.txt or an allkeys.txt file.`)
|
||||
cldr = flag.String("cldr",
|
||||
"http://www.unicode.org/Public/cldr/2.0.1/core.zip",
|
||||
"http://www.unicode.org/Public/cldr/22/core.zip",
|
||||
"URL of CLDR archive.")
|
||||
test = flag.Bool("test", false,
|
||||
"test existing tables; can be used to compare web data with package data.")
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -6,15 +6,17 @@ package unicode
|
||||
|
||||
// Bit masks for each code point under U+0100, for fast lookup.
|
||||
const (
|
||||
pC = 1 << iota // a control character.
|
||||
pP // a punctuation character.
|
||||
pN // a numeral.
|
||||
pS // a symbolic character.
|
||||
pZ // a spacing character.
|
||||
pLu // an upper-case letter.
|
||||
pLl // a lower-case letter.
|
||||
pp // a printable character according to Go's definition.
|
||||
pg = pp | pZ // a graphical character according to the Unicode definition.
|
||||
pC = 1 << iota // a control character.
|
||||
pP // a punctuation character.
|
||||
pN // a numeral.
|
||||
pS // a symbolic character.
|
||||
pZ // a spacing character.
|
||||
pLu // an upper-case letter.
|
||||
pLl // a lower-case letter.
|
||||
pp // a printable character according to Go's definition.
|
||||
pg = pp | pZ // a graphical character according to the Unicode definition.
|
||||
pLo = pLl | pLu // a letter that is neither upper nor lower case.
|
||||
pLmask = pLo
|
||||
)
|
||||
|
||||
// GraphicRanges defines the set of graphic characters according to Unicode.
|
||||
@ -76,7 +78,7 @@ func IsControl(r rune) bool {
|
||||
// IsLetter reports whether the rune is a letter (category L).
|
||||
func IsLetter(r rune) bool {
|
||||
if uint32(r) <= MaxLatin1 {
|
||||
return properties[uint8(r)]&(pLu|pLl) != 0
|
||||
return properties[uint8(r)]&(pLmask) != 0
|
||||
}
|
||||
return isExcludingLatin(Letter, r)
|
||||
}
|
||||
|
@ -180,7 +180,7 @@ func isExcludingLatin(rangeTab *RangeTable, r rune) bool {
|
||||
func IsUpper(r rune) bool {
|
||||
// See comment in IsGraphic.
|
||||
if uint32(r) <= MaxLatin1 {
|
||||
return properties[uint8(r)]&pLu != 0
|
||||
return properties[uint8(r)]&pLmask == pLu
|
||||
}
|
||||
return isExcludingLatin(Upper, r)
|
||||
}
|
||||
@ -189,7 +189,7 @@ func IsUpper(r rune) bool {
|
||||
func IsLower(r rune) bool {
|
||||
// See comment in IsGraphic.
|
||||
if uint32(r) <= MaxLatin1 {
|
||||
return properties[uint8(r)]&pLl != 0
|
||||
return properties[uint8(r)]&pLmask == pLl
|
||||
}
|
||||
return isExcludingLatin(Lower, r)
|
||||
}
|
||||
|
@ -41,7 +41,7 @@ func main() {
|
||||
var dataURL = flag.String("data", "", "full URL for UnicodeData.txt; defaults to --url/UnicodeData.txt")
|
||||
var casefoldingURL = flag.String("casefolding", "", "full URL for CaseFolding.txt; defaults to --url/CaseFolding.txt")
|
||||
var url = flag.String("url",
|
||||
"http://www.unicode.org/Public/6.0.0/ucd/",
|
||||
"http://www.unicode.org/Public/6.2.0/ucd/",
|
||||
"URL of Unicode database directory")
|
||||
var tablelist = flag.String("tables",
|
||||
"all",
|
||||
@ -367,7 +367,7 @@ func loadCasefold() {
|
||||
}
|
||||
logger.Fatal(err)
|
||||
}
|
||||
if line[0] == '#' {
|
||||
if line[0] == '#' || len(strings.TrimSpace(line)) == 0 {
|
||||
continue
|
||||
}
|
||||
field := strings.Split(line, "; ")
|
||||
@ -1040,6 +1040,8 @@ func printLatinProperties() {
|
||||
property = "0"
|
||||
case "Ll":
|
||||
property = "pLl | pp"
|
||||
case "Lo":
|
||||
property = "pLo | pp"
|
||||
case "Lu":
|
||||
property = "pLu | pp"
|
||||
case "Nd", "No":
|
||||
|
@ -14,7 +14,7 @@ type T struct {
|
||||
script string
|
||||
}
|
||||
|
||||
// Hand-chosen tests from Unicode 5.1.0 & 6.0..0, mostly to discover when new
|
||||
// Hand-chosen tests from Unicode 5.1.0, 6.0.0 and 6.2.0 mostly to discover when new
|
||||
// scripts and categories arise.
|
||||
var inTest = []T{
|
||||
{0x06e2, "Arabic"},
|
||||
@ -31,6 +31,7 @@ var inTest = []T{
|
||||
{0x11011, "Brahmi"},
|
||||
{0x156d, "Canadian_Aboriginal"},
|
||||
{0x102a9, "Carian"},
|
||||
{0x11111, "Chakma"},
|
||||
{0xaa4d, "Cham"},
|
||||
{0x13c2, "Cherokee"},
|
||||
{0x0020, "Common"},
|
||||
@ -76,6 +77,9 @@ var inTest = []T{
|
||||
{0x0d42, "Malayalam"},
|
||||
{0x0843, "Mandaic"},
|
||||
{0xabd0, "Meetei_Mayek"},
|
||||
{0x1099f, "Meroitic_Hieroglyphs"},
|
||||
{0x109a0, "Meroitic_Cursive"},
|
||||
{0x16f00, "Miao"},
|
||||
{0x1822, "Mongolian"},
|
||||
{0x104c, "Myanmar"},
|
||||
{0x19c3, "New_Tai_Lue"},
|
||||
@ -94,8 +98,10 @@ var inTest = []T{
|
||||
{0x16c0, "Runic"},
|
||||
{0x081d, "Samaritan"},
|
||||
{0xa892, "Saurashtra"},
|
||||
{0x111a0, "Sharada"},
|
||||
{0x10463, "Shavian"},
|
||||
{0x0dbd, "Sinhala"},
|
||||
{0x110d0, "Sora_Sompeng"},
|
||||
{0x1ba3, "Sundanese"},
|
||||
{0xa803, "Syloti_Nagri"},
|
||||
{0x070f, "Syriac"},
|
||||
@ -104,6 +110,7 @@ var inTest = []T{
|
||||
{0x1972, "Tai_Le"},
|
||||
{0x1a62, "Tai_Tham"},
|
||||
{0xaadc, "Tai_Viet"},
|
||||
{0x116c9, "Takri"},
|
||||
{0x0bbf, "Tamil"},
|
||||
{0x0c55, "Telugu"},
|
||||
{0x07a7, "Thaana"},
|
||||
@ -121,7 +128,7 @@ var outTest = []T{ // not really worth being thorough
|
||||
|
||||
var inCategoryTest = []T{
|
||||
{0x0081, "Cc"},
|
||||
{0x17b4, "Cf"},
|
||||
{0x200B, "Cf"},
|
||||
{0xf0000, "Co"},
|
||||
{0xdb80, "Cs"},
|
||||
{0x0236, "Ll"},
|
||||
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user