1
0
mirror of https://github.com/golang/go synced 2024-11-19 17:14:44 -07:00

unicode: move unicode and related packages to Unicode 6.2.0.

R=r, mpvl
CC=golang-dev
https://golang.org/cl/6818067
This commit is contained in:
Marcel van Lohuizen 2012-10-31 17:32:16 +01:00
parent b8b329451c
commit e14cf90a8b
9 changed files with 46874 additions and 45957 deletions

View File

@ -129,7 +129,7 @@ func (b *Builder) Add(runes []rune, colelems [][]int, variables []int) error {
if ce[0] > b.varTop {
b.varTop = ce[0]
}
} else if ce[0] > 0 {
} else if ce[0] > 1 { // 1 is a special primary value reserved for FFFE
if ce[0] <= b.varTop {
return fmt.Errorf("primary value %X of non-variable is smaller than the highest variable %X", ce[0], b.varTop)
}

View File

@ -38,7 +38,7 @@ var (
`URL of the Default Unicode Collation Element Table (DUCET). This can be a zip
file containing the file allkeys_CLDR.txt or an allkeys.txt file.`)
cldr = flag.String("cldr",
"http://www.unicode.org/Public/cldr/2.0.1/core.zip",
"http://www.unicode.org/Public/cldr/22/core.zip",
"URL of CLDR archive.")
test = flag.Bool("test", false,
"test existing tables; can be used to compare web data with package data.")

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -6,15 +6,17 @@ package unicode
// Bit masks for each code point under U+0100, for fast lookup.
const (
pC = 1 << iota // a control character.
pP // a punctuation character.
pN // a numeral.
pS // a symbolic character.
pZ // a spacing character.
pLu // an upper-case letter.
pLl // a lower-case letter.
pp // a printable character according to Go's definition.
pg = pp | pZ // a graphical character according to the Unicode definition.
pC = 1 << iota // a control character.
pP // a punctuation character.
pN // a numeral.
pS // a symbolic character.
pZ // a spacing character.
pLu // an upper-case letter.
pLl // a lower-case letter.
pp // a printable character according to Go's definition.
pg = pp | pZ // a graphical character according to the Unicode definition.
pLo = pLl | pLu // a letter that is neither upper nor lower case.
pLmask = pLo
)
// GraphicRanges defines the set of graphic characters according to Unicode.
@ -76,7 +78,7 @@ func IsControl(r rune) bool {
// IsLetter reports whether the rune is a letter (category L).
func IsLetter(r rune) bool {
if uint32(r) <= MaxLatin1 {
return properties[uint8(r)]&(pLu|pLl) != 0
return properties[uint8(r)]&(pLmask) != 0
}
return isExcludingLatin(Letter, r)
}

View File

@ -180,7 +180,7 @@ func isExcludingLatin(rangeTab *RangeTable, r rune) bool {
func IsUpper(r rune) bool {
// See comment in IsGraphic.
if uint32(r) <= MaxLatin1 {
return properties[uint8(r)]&pLu != 0
return properties[uint8(r)]&pLmask == pLu
}
return isExcludingLatin(Upper, r)
}
@ -189,7 +189,7 @@ func IsUpper(r rune) bool {
func IsLower(r rune) bool {
// See comment in IsGraphic.
if uint32(r) <= MaxLatin1 {
return properties[uint8(r)]&pLl != 0
return properties[uint8(r)]&pLmask == pLl
}
return isExcludingLatin(Lower, r)
}

View File

@ -41,7 +41,7 @@ func main() {
var dataURL = flag.String("data", "", "full URL for UnicodeData.txt; defaults to --url/UnicodeData.txt")
var casefoldingURL = flag.String("casefolding", "", "full URL for CaseFolding.txt; defaults to --url/CaseFolding.txt")
var url = flag.String("url",
"http://www.unicode.org/Public/6.0.0/ucd/",
"http://www.unicode.org/Public/6.2.0/ucd/",
"URL of Unicode database directory")
var tablelist = flag.String("tables",
"all",
@ -367,7 +367,7 @@ func loadCasefold() {
}
logger.Fatal(err)
}
if line[0] == '#' {
if line[0] == '#' || len(strings.TrimSpace(line)) == 0 {
continue
}
field := strings.Split(line, "; ")
@ -1040,6 +1040,8 @@ func printLatinProperties() {
property = "0"
case "Ll":
property = "pLl | pp"
case "Lo":
property = "pLo | pp"
case "Lu":
property = "pLu | pp"
case "Nd", "No":

View File

@ -14,7 +14,7 @@ type T struct {
script string
}
// Hand-chosen tests from Unicode 5.1.0 & 6.0..0, mostly to discover when new
// Hand-chosen tests from Unicode 5.1.0, 6.0.0 and 6.2.0 mostly to discover when new
// scripts and categories arise.
var inTest = []T{
{0x06e2, "Arabic"},
@ -31,6 +31,7 @@ var inTest = []T{
{0x11011, "Brahmi"},
{0x156d, "Canadian_Aboriginal"},
{0x102a9, "Carian"},
{0x11111, "Chakma"},
{0xaa4d, "Cham"},
{0x13c2, "Cherokee"},
{0x0020, "Common"},
@ -76,6 +77,9 @@ var inTest = []T{
{0x0d42, "Malayalam"},
{0x0843, "Mandaic"},
{0xabd0, "Meetei_Mayek"},
{0x1099f, "Meroitic_Hieroglyphs"},
{0x109a0, "Meroitic_Cursive"},
{0x16f00, "Miao"},
{0x1822, "Mongolian"},
{0x104c, "Myanmar"},
{0x19c3, "New_Tai_Lue"},
@ -94,8 +98,10 @@ var inTest = []T{
{0x16c0, "Runic"},
{0x081d, "Samaritan"},
{0xa892, "Saurashtra"},
{0x111a0, "Sharada"},
{0x10463, "Shavian"},
{0x0dbd, "Sinhala"},
{0x110d0, "Sora_Sompeng"},
{0x1ba3, "Sundanese"},
{0xa803, "Syloti_Nagri"},
{0x070f, "Syriac"},
@ -104,6 +110,7 @@ var inTest = []T{
{0x1972, "Tai_Le"},
{0x1a62, "Tai_Tham"},
{0xaadc, "Tai_Viet"},
{0x116c9, "Takri"},
{0x0bbf, "Tamil"},
{0x0c55, "Telugu"},
{0x07a7, "Thaana"},
@ -121,7 +128,7 @@ var outTest = []T{ // not really worth being thorough
var inCategoryTest = []T{
{0x0081, "Cc"},
{0x17b4, "Cf"},
{0x200B, "Cf"},
{0xf0000, "Co"},
{0xdb80, "Cs"},
{0x0236, "Ll"},

File diff suppressed because it is too large Load Diff