1
0
mirror of https://github.com/golang/go synced 2024-10-03 16:41:28 -06:00

generate the unicode tables directly from web database

after this CL, two more to come:
	1) add an exhaustive test, probably as a variant of maketables
	2) add ToUpper, ToLower, ToTitle and associated tests

R=rsc
DELTA=1578  (1007 added, 559 deleted, 12 changed)
OCL=33902
CL=33907
This commit is contained in:
Rob Pike 2009-08-26 16:01:31 -07:00
parent 2aea4a063b
commit 396b47bbbd
8 changed files with 1019 additions and 570 deletions

View File

@ -68,7 +68,7 @@ func (c *common) setNext(i instr) { c._next = i }
func (c *common) index() int { return c._index }
func (c *common) setIndex(i int) { c._index = i }
// The representation of a compiled regular expression.
// Regexp is the representation of a compiled regular expression.
// The public interface is entirely through methods.
type Regexp struct {
expr string; // the original expression

View File

@ -7,6 +7,15 @@ include $(GOROOT)/src/Make.$(GOARCH)
TARG=unicode
GOFILES=\
decimaldigit.go\
digittables.go\
letter.go\
lettertables.go\
include $(GOROOT)/src/Make.pkg
tables:
$(GC) maketables.go
$(LD) -o maketables maketables.$O
maketables --digits > digittables.go
maketables > lettertables.go
rm -f maketables

View File

@ -4,46 +4,6 @@
package unicode
// TODO: Generated by hand starting with
// http://www.unicode.org/Public/UNIDATA/UnicodeData.txt
// These ranges are the characters with the third field "Nd".
// Should generate automatically etc.
// Decimal digit is the set of Unicode characters with the "decimal digit" property.
var DecimalDigit = []Range{
Range{0x0030, 0x0039, 1},
Range{0x0660, 0x0669, 1},
Range{0x06F0, 0x06F9, 1},
Range{0x07C0, 0x07C9, 1},
Range{0x0966, 0x096F, 1},
Range{0x09E6, 0x09EF, 1},
Range{0x0A66, 0x0A6F, 1},
Range{0x0AE6, 0x0AEF, 1},
Range{0x0B66, 0x0B6F, 1},
Range{0x0BE6, 0x0BEF, 1},
Range{0x0C66, 0x0C6F, 1},
Range{0x0CE6, 0x0CEF, 1},
Range{0x0D66, 0x0D6F, 1},
Range{0x0E50, 0x0E59, 1},
Range{0x0ED0, 0x0ED9, 1},
Range{0x0F20, 0x0F29, 1},
Range{0x1040, 0x1049, 1},
Range{0x1090, 0x1099, 1},
Range{0x17E0, 0x17E9, 1},
Range{0x1810, 0x1819, 1},
Range{0x1946, 0x194F, 1},
Range{0x19D0, 0x19D9, 1},
Range{0x1B50, 0x1B59, 1},
Range{0x1BB0, 0x1BB9, 1},
Range{0x1C40, 0x1C49, 1},
Range{0x1C50, 0x1C59, 1},
Range{0xA620, 0xA629, 1},
Range{0xA8D0, 0xA8D9, 1},
Range{0xA900, 0xA909, 1},
Range{0xAA50, 0xAA59, 1},
Range{0xFF10, 0xFF19, 1},
}
// IsDecimalDigit reports whether the rune is a decimal digit.
func IsDecimalDigit(rune int) bool {
return Is(DecimalDigit, rune);

View File

@ -0,0 +1,43 @@
// Generated by running
// tables --digits=true --url=http://www.unicode.org/Public/5.1.0/ucd/UnicodeData.txt
// DO NOT EDIT
package unicode
// DecimalDigit is the set of Unicode characters with the "decimal digit" property.
var DecimalDigit = decimalDigit
var decimalDigit = []Range {
Range{0x0030, 0x0039, 1},
Range{0x0660, 0x0669, 1},
Range{0x06f0, 0x06f9, 1},
Range{0x07c0, 0x07c9, 1},
Range{0x0966, 0x096f, 1},
Range{0x09e6, 0x09ef, 1},
Range{0x0a66, 0x0a6f, 1},
Range{0x0ae6, 0x0aef, 1},
Range{0x0b66, 0x0b6f, 1},
Range{0x0be6, 0x0bef, 1},
Range{0x0c66, 0x0c6f, 1},
Range{0x0ce6, 0x0cef, 1},
Range{0x0d66, 0x0d6f, 1},
Range{0x0e50, 0x0e59, 1},
Range{0x0ed0, 0x0ed9, 1},
Range{0x0f20, 0x0f29, 1},
Range{0x1040, 0x1049, 1},
Range{0x1090, 0x1099, 1},
Range{0x17e0, 0x17e9, 1},
Range{0x1810, 0x1819, 1},
Range{0x1946, 0x194f, 1},
Range{0x19d0, 0x19d9, 1},
Range{0x1b50, 0x1b59, 1},
Range{0x1bb0, 0x1bb9, 1},
Range{0x1c40, 0x1c49, 1},
Range{0x1c50, 0x1c59, 1},
Range{0xa620, 0xa629, 1},
Range{0xa8d0, 0xa8d9, 1},
Range{0xa900, 0xa909, 1},
Range{0xaa50, 0xaa59, 1},
Range{0xff10, 0xff19, 1},
Range{0x104a0, 0x104a9, 1},
Range{0x1d7ce, 0x1d7ff, 1},
}

View File

@ -2,14 +2,6 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Unicode table access. A quick start for now.
// TODO: Generated by hand.
// Should generate automatically from Unicode
// tables, expand to other properties, split into files,
// link in only the tables that are used by the program,
// etc.
// This package provides data and functions to test some properties of Unicode code points.
// It is rudimentary but will improve.
package unicode
@ -22,515 +14,6 @@ type Range struct {
Stride int;
}
// Upper is the set of Unicode upper case letters.
var Upper = []Range{
Range{0x0041, 0x005a, 1},
Range{0x00c0, 0x00d6, 1},
Range{0x00d8, 0x00de, 1},
Range{0x0100, 0x0136, 2},
Range{0x0139, 0x0147, 2},
Range{0x014a, 0x0176, 2},
Range{0x0178, 0x0179, 1},
Range{0x017b, 0x017d, 2},
Range{0x0181, 0x0182, 1},
Range{0x0184, 0x0184, 1},
Range{0x0186, 0x0187, 1},
Range{0x0189, 0x018b, 1},
Range{0x018e, 0x0191, 1},
Range{0x0193, 0x0194, 1},
Range{0x0196, 0x0198, 1},
Range{0x019c, 0x019d, 1},
Range{0x019f, 0x01a0, 1},
Range{0x01a2, 0x01a4, 2},
Range{0x01a6, 0x01a7, 1},
Range{0x01a9, 0x01ac, 3},
Range{0x01ae, 0x01af, 1},
Range{0x01b1, 0x01b3, 1},
Range{0x01b5, 0x01b5, 1},
Range{0x01b7, 0x01b8, 1},
Range{0x01bc, 0x01c4, 8},
Range{0x01c7, 0x01cd, 3},
Range{0x01cf, 0x01db, 2},
Range{0x01de, 0x01ee, 2},
Range{0x01f1, 0x01f4, 3},
Range{0x01f6, 0x01f8, 1},
Range{0x01fa, 0x0232, 2},
Range{0x023a, 0x023b, 1},
Range{0x023d, 0x023e, 1},
Range{0x0241, 0x0241, 1},
Range{0x0243, 0x0246, 1},
Range{0x0248, 0x024e, 2},
Range{0x0370, 0x0372, 2},
Range{0x0376, 0x0386, 16},
Range{0x0388, 0x038a, 1},
Range{0x038c, 0x038c, 1},
Range{0x038e, 0x038f, 1},
Range{0x0391, 0x03a1, 1},
Range{0x03a3, 0x03ab, 1},
Range{0x03cf, 0x03cf, 1},
Range{0x03d2, 0x03d4, 1},
Range{0x03d8, 0x03ee, 2},
Range{0x03f4, 0x03f7, 3},
Range{0x03f9, 0x03fa, 1},
Range{0x03fd, 0x042f, 1},
Range{0x0460, 0x0480, 2},
Range{0x048a, 0x04be, 2},
Range{0x04c0, 0x04c1, 1},
Range{0x04c3, 0x04cd, 2},
Range{0x04d0, 0x0522, 2},
Range{0x0531, 0x0556, 1},
Range{0x10a0, 0x10c5, 1},
Range{0x1e00, 0x1e94, 2},
Range{0x1e9e, 0x1efe, 2},
Range{0x1f08, 0x1f0f, 1},
Range{0x1f18, 0x1f1d, 1},
Range{0x1f28, 0x1f2f, 1},
Range{0x1f38, 0x1f3f, 1},
Range{0x1f48, 0x1f4d, 1},
Range{0x1f59, 0x1f5f, 2},
Range{0x1f68, 0x1f6f, 1},
Range{0x1fb8, 0x1fbb, 1},
Range{0x1fc8, 0x1fcb, 1},
Range{0x1fd8, 0x1fdb, 1},
Range{0x1fe8, 0x1fec, 1},
Range{0x1ff8, 0x1ffb, 1},
Range{0x2102, 0x2107, 5},
Range{0x210b, 0x210d, 1},
Range{0x2110, 0x2112, 1},
Range{0x2115, 0x2115, 1},
Range{0x2119, 0x211d, 1},
Range{0x2124, 0x2128, 2},
Range{0x212a, 0x212d, 1},
Range{0x2130, 0x2133, 1},
Range{0x213e, 0x213f, 1},
Range{0x2145, 0x2183, 62},
Range{0x2c00, 0x2c2e, 1},
Range{0x2c60, 0x2c60, 1},
Range{0x2c62, 0x2c64, 1},
Range{0x2c67, 0x2c6b, 2},
Range{0x2c6d, 0x2c6f, 1},
Range{0x2c72, 0x2c75, 3},
Range{0x2c80, 0x2ce2, 2},
Range{0xa640, 0xa65e, 2},
Range{0xa662, 0xa66c, 2},
Range{0xa680, 0xa696, 2},
Range{0xa722, 0xa72e, 2},
Range{0xa732, 0xa76e, 2},
Range{0xa779, 0xa77b, 2},
Range{0xa77d, 0xa77e, 1},
Range{0xa780, 0xa786, 2},
Range{0xa78b, 0xa78b, 1},
Range{0xff21, 0xff3a, 1},
Range{0x10400, 0x10427, 1},
Range{0x1d400, 0x1d419, 1},
Range{0x1d434, 0x1d44d, 1},
Range{0x1d468, 0x1d481, 1},
Range{0x1d49c, 0x1d49c, 1},
Range{0x1d49e, 0x1d49f, 1},
Range{0x1d4a2, 0x1d4a2, 1},
Range{0x1d4a5, 0x1d4a6, 1},
Range{0x1d4a9, 0x1d4ac, 1},
Range{0x1d4ae, 0x1d4b5, 1},
Range{0x1d4d0, 0x1d4e9, 1},
Range{0x1d504, 0x1d505, 1},
Range{0x1d507, 0x1d50a, 1},
Range{0x1d50d, 0x1d514, 1},
Range{0x1d516, 0x1d51c, 1},
Range{0x1d538, 0x1d539, 1},
Range{0x1d53b, 0x1d53e, 1},
Range{0x1d540, 0x1d544, 1},
Range{0x1d546, 0x1d546, 1},
Range{0x1d54a, 0x1d550, 1},
Range{0x1d56c, 0x1d585, 1},
Range{0x1d5a0, 0x1d5b9, 1},
Range{0x1d5d4, 0x1d5ed, 1},
Range{0x1d608, 0x1d621, 1},
Range{0x1d63c, 0x1d655, 1},
Range{0x1d670, 0x1d689, 1},
Range{0x1d6a8, 0x1d6c0, 1},
Range{0x1d6e2, 0x1d6fa, 1},
Range{0x1d71c, 0x1d734, 1},
Range{0x1d756, 0x1d76e, 1},
Range{0x1d790, 0x1d7a8, 1},
Range{0x1d7ca, 0x1d7ca, 1},
}
// Letter is the set of Unicode letters.
var Letter = []Range {
Range{0x0041, 0x005a, 1},
Range{0x0061, 0x007a, 1},
Range{0x00aa, 0x00b5, 11},
Range{0x00ba, 0x00ba, 1},
Range{0x00c0, 0x00d6, 1},
Range{0x00d8, 0x00f6, 1},
Range{0x00f8, 0x02c1, 1},
Range{0x02c6, 0x02d1, 1},
Range{0x02e0, 0x02e4, 1},
Range{0x02ec, 0x02ee, 2},
Range{0x0370, 0x0374, 1},
Range{0x0376, 0x0377, 1},
Range{0x037a, 0x037d, 1},
Range{0x0386, 0x0386, 1},
Range{0x0388, 0x038a, 1},
Range{0x038c, 0x038c, 1},
Range{0x038e, 0x03a1, 1},
Range{0x03a3, 0x03f5, 1},
Range{0x03f7, 0x0481, 1},
Range{0x048a, 0x0523, 1},
Range{0x0531, 0x0556, 1},
Range{0x0559, 0x0559, 1},
Range{0x0561, 0x0587, 1},
Range{0x05d0, 0x05ea, 1},
Range{0x05f0, 0x05f2, 1},
Range{0x0621, 0x064a, 1},
Range{0x066e, 0x066f, 1},
Range{0x0671, 0x06d3, 1},
Range{0x06d5, 0x06d5, 1},
Range{0x06e5, 0x06e6, 1},
Range{0x06ee, 0x06ef, 1},
Range{0x06fa, 0x06fc, 1},
Range{0x06ff, 0x0710, 17},
Range{0x0712, 0x072f, 1},
Range{0x074d, 0x07a5, 1},
Range{0x07b1, 0x07b1, 1},
Range{0x07ca, 0x07ea, 1},
Range{0x07f4, 0x07f5, 1},
Range{0x07fa, 0x07fa, 1},
Range{0x0904, 0x0939, 1},
Range{0x093d, 0x0950, 19},
Range{0x0958, 0x0961, 1},
Range{0x0971, 0x0972, 1},
Range{0x097b, 0x097f, 1},
Range{0x0985, 0x098c, 1},
Range{0x098f, 0x0990, 1},
Range{0x0993, 0x09a8, 1},
Range{0x09aa, 0x09b0, 1},
Range{0x09b2, 0x09b2, 1},
Range{0x09b6, 0x09b9, 1},
Range{0x09bd, 0x09ce, 17},
Range{0x09dc, 0x09dd, 1},
Range{0x09df, 0x09e1, 1},
Range{0x09f0, 0x09f1, 1},
Range{0x0a05, 0x0a0a, 1},
Range{0x0a0f, 0x0a10, 1},
Range{0x0a13, 0x0a28, 1},
Range{0x0a2a, 0x0a30, 1},
Range{0x0a32, 0x0a33, 1},
Range{0x0a35, 0x0a36, 1},
Range{0x0a38, 0x0a39, 1},
Range{0x0a59, 0x0a5c, 1},
Range{0x0a5e, 0x0a5e, 1},
Range{0x0a72, 0x0a74, 1},
Range{0x0a85, 0x0a8d, 1},
Range{0x0a8f, 0x0a91, 1},
Range{0x0a93, 0x0aa8, 1},
Range{0x0aaa, 0x0ab0, 1},
Range{0x0ab2, 0x0ab3, 1},
Range{0x0ab5, 0x0ab9, 1},
Range{0x0abd, 0x0ad0, 19},
Range{0x0ae0, 0x0ae1, 1},
Range{0x0b05, 0x0b0c, 1},
Range{0x0b0f, 0x0b10, 1},
Range{0x0b13, 0x0b28, 1},
Range{0x0b2a, 0x0b30, 1},
Range{0x0b32, 0x0b33, 1},
Range{0x0b35, 0x0b39, 1},
Range{0x0b3d, 0x0b3d, 1},
Range{0x0b5c, 0x0b5d, 1},
Range{0x0b5f, 0x0b61, 1},
Range{0x0b71, 0x0b83, 18},
Range{0x0b85, 0x0b8a, 1},
Range{0x0b8e, 0x0b90, 1},
Range{0x0b92, 0x0b95, 1},
Range{0x0b99, 0x0b9a, 1},
Range{0x0b9c, 0x0b9c, 1},
Range{0x0b9e, 0x0b9f, 1},
Range{0x0ba3, 0x0ba4, 1},
Range{0x0ba8, 0x0baa, 1},
Range{0x0bae, 0x0bb9, 1},
Range{0x0bd0, 0x0bd0, 1},
Range{0x0c05, 0x0c0c, 1},
Range{0x0c0e, 0x0c10, 1},
Range{0x0c12, 0x0c28, 1},
Range{0x0c2a, 0x0c33, 1},
Range{0x0c35, 0x0c39, 1},
Range{0x0c3d, 0x0c3d, 1},
Range{0x0c58, 0x0c59, 1},
Range{0x0c60, 0x0c61, 1},
Range{0x0c85, 0x0c8c, 1},
Range{0x0c8e, 0x0c90, 1},
Range{0x0c92, 0x0ca8, 1},
Range{0x0caa, 0x0cb3, 1},
Range{0x0cb5, 0x0cb9, 1},
Range{0x0cbd, 0x0cde, 33},
Range{0x0ce0, 0x0ce1, 1},
Range{0x0d05, 0x0d0c, 1},
Range{0x0d0e, 0x0d10, 1},
Range{0x0d12, 0x0d28, 1},
Range{0x0d2a, 0x0d39, 1},
Range{0x0d3d, 0x0d3d, 1},
Range{0x0d60, 0x0d61, 1},
Range{0x0d7a, 0x0d7f, 1},
Range{0x0d85, 0x0d96, 1},
Range{0x0d9a, 0x0db1, 1},
Range{0x0db3, 0x0dbb, 1},
Range{0x0dbd, 0x0dbd, 1},
Range{0x0dc0, 0x0dc6, 1},
Range{0x0e01, 0x0e30, 1},
Range{0x0e32, 0x0e33, 1},
Range{0x0e40, 0x0e46, 1},
Range{0x0e81, 0x0e82, 1},
Range{0x0e84, 0x0e84, 1},
Range{0x0e87, 0x0e88, 1},
Range{0x0e8a, 0x0e8d, 3},
Range{0x0e94, 0x0e97, 1},
Range{0x0e99, 0x0e9f, 1},
Range{0x0ea1, 0x0ea3, 1},
Range{0x0ea5, 0x0ea7, 2},
Range{0x0eaa, 0x0eab, 1},
Range{0x0ead, 0x0eb0, 1},
Range{0x0eb2, 0x0eb3, 1},
Range{0x0ebd, 0x0ebd, 1},
Range{0x0ec0, 0x0ec4, 1},
Range{0x0ec6, 0x0ec6, 1},
Range{0x0edc, 0x0edd, 1},
Range{0x0f00, 0x0f00, 1},
Range{0x0f40, 0x0f47, 1},
Range{0x0f49, 0x0f6c, 1},
Range{0x0f88, 0x0f8b, 1},
Range{0x1000, 0x102a, 1},
Range{0x103f, 0x103f, 1},
Range{0x1050, 0x1055, 1},
Range{0x105a, 0x105d, 1},
Range{0x1061, 0x1061, 1},
Range{0x1065, 0x1066, 1},
Range{0x106e, 0x1070, 1},
Range{0x1075, 0x1081, 1},
Range{0x108e, 0x108e, 1},
Range{0x10a0, 0x10c5, 1},
Range{0x10d0, 0x10fa, 1},
Range{0x10fc, 0x10fc, 1},
Range{0x1100, 0x1159, 1},
Range{0x115f, 0x11a2, 1},
Range{0x11a8, 0x11f9, 1},
Range{0x1200, 0x1248, 1},
Range{0x124a, 0x124d, 1},
Range{0x1250, 0x1256, 1},
Range{0x1258, 0x1258, 1},
Range{0x125a, 0x125d, 1},
Range{0x1260, 0x1288, 1},
Range{0x128a, 0x128d, 1},
Range{0x1290, 0x12b0, 1},
Range{0x12b2, 0x12b5, 1},
Range{0x12b8, 0x12be, 1},
Range{0x12c0, 0x12c0, 1},
Range{0x12c2, 0x12c5, 1},
Range{0x12c8, 0x12d6, 1},
Range{0x12d8, 0x1310, 1},
Range{0x1312, 0x1315, 1},
Range{0x1318, 0x135a, 1},
Range{0x1380, 0x138f, 1},
Range{0x13a0, 0x13f4, 1},
Range{0x1401, 0x166c, 1},
Range{0x166f, 0x1676, 1},
Range{0x1681, 0x169a, 1},
Range{0x16a0, 0x16ea, 1},
Range{0x1700, 0x170c, 1},
Range{0x170e, 0x1711, 1},
Range{0x1720, 0x1731, 1},
Range{0x1740, 0x1751, 1},
Range{0x1760, 0x176c, 1},
Range{0x176e, 0x1770, 1},
Range{0x1780, 0x17b3, 1},
Range{0x17d7, 0x17dc, 5},
Range{0x1820, 0x1877, 1},
Range{0x1880, 0x18a8, 1},
Range{0x18aa, 0x18aa, 1},
Range{0x1900, 0x191c, 1},
Range{0x1950, 0x196d, 1},
Range{0x1970, 0x1974, 1},
Range{0x1980, 0x19a9, 1},
Range{0x19c1, 0x19c7, 1},
Range{0x1a00, 0x1a16, 1},
Range{0x1b05, 0x1b33, 1},
Range{0x1b45, 0x1b4b, 1},
Range{0x1b83, 0x1ba0, 1},
Range{0x1bae, 0x1baf, 1},
Range{0x1c00, 0x1c23, 1},
Range{0x1c4d, 0x1c4f, 1},
Range{0x1c5a, 0x1c7d, 1},
Range{0x1d00, 0x1dbf, 1},
Range{0x1e00, 0x1f15, 1},
Range{0x1f18, 0x1f1d, 1},
Range{0x1f20, 0x1f45, 1},
Range{0x1f48, 0x1f4d, 1},
Range{0x1f50, 0x1f57, 1},
Range{0x1f59, 0x1f5d, 2},
Range{0x1f5f, 0x1f7d, 1},
Range{0x1f80, 0x1fb4, 1},
Range{0x1fb6, 0x1fbc, 1},
Range{0x1fbe, 0x1fbe, 1},
Range{0x1fc2, 0x1fc4, 1},
Range{0x1fc6, 0x1fcc, 1},
Range{0x1fd0, 0x1fd3, 1},
Range{0x1fd6, 0x1fdb, 1},
Range{0x1fe0, 0x1fec, 1},
Range{0x1ff2, 0x1ff4, 1},
Range{0x1ff6, 0x1ffc, 1},
Range{0x2071, 0x207f, 14},
Range{0x2090, 0x2094, 1},
Range{0x2102, 0x2107, 5},
Range{0x210a, 0x2113, 1},
Range{0x2115, 0x2115, 1},
Range{0x2119, 0x211d, 1},
Range{0x2124, 0x2128, 2},
Range{0x212a, 0x212d, 1},
Range{0x212f, 0x2139, 1},
Range{0x213c, 0x213f, 1},
Range{0x2145, 0x2149, 1},
Range{0x214e, 0x214e, 1},
Range{0x2183, 0x2184, 1},
Range{0x2c00, 0x2c2e, 1},
Range{0x2c30, 0x2c5e, 1},
Range{0x2c60, 0x2c6f, 1},
Range{0x2c71, 0x2c7d, 1},
Range{0x2c80, 0x2ce4, 1},
Range{0x2d00, 0x2d25, 1},
Range{0x2d30, 0x2d65, 1},
Range{0x2d6f, 0x2d6f, 1},
Range{0x2d80, 0x2d96, 1},
Range{0x2da0, 0x2da6, 1},
Range{0x2da8, 0x2dae, 1},
Range{0x2db0, 0x2db6, 1},
Range{0x2db8, 0x2dbe, 1},
Range{0x2dc0, 0x2dc6, 1},
Range{0x2dc8, 0x2dce, 1},
Range{0x2dd0, 0x2dd6, 1},
Range{0x2dd8, 0x2dde, 1},
Range{0x2e2f, 0x2e2f, 1},
Range{0x3005, 0x3006, 1},
Range{0x3031, 0x3035, 1},
Range{0x303b, 0x303c, 1},
Range{0x3041, 0x3096, 1},
Range{0x309d, 0x309f, 1},
Range{0x30a1, 0x30fa, 1},
Range{0x30fc, 0x30ff, 1},
Range{0x3105, 0x312d, 1},
Range{0x3131, 0x318e, 1},
Range{0x31a0, 0x31b7, 1},
Range{0x31f0, 0x31ff, 1},
Range{0x3400, 0x4db5, 1},
Range{0x4e00, 0x9fc3, 1},
Range{0xa000, 0xa48c, 1},
Range{0xa500, 0xa60c, 1},
Range{0xa610, 0xa61f, 1},
Range{0xa62a, 0xa62b, 1},
Range{0xa640, 0xa65f, 1},
Range{0xa662, 0xa66e, 1},
Range{0xa67f, 0xa697, 1},
Range{0xa717, 0xa71f, 1},
Range{0xa722, 0xa788, 1},
Range{0xa78b, 0xa78c, 1},
Range{0xa7fb, 0xa801, 1},
Range{0xa803, 0xa805, 1},
Range{0xa807, 0xa80a, 1},
Range{0xa80c, 0xa822, 1},
Range{0xa840, 0xa873, 1},
Range{0xa882, 0xa8b3, 1},
Range{0xa90a, 0xa925, 1},
Range{0xa930, 0xa946, 1},
Range{0xaa00, 0xaa28, 1},
Range{0xaa40, 0xaa42, 1},
Range{0xaa44, 0xaa4b, 1},
Range{0xac00, 0xd7a3, 1},
Range{0xf900, 0xfa2d, 1},
Range{0xfa30, 0xfa6a, 1},
Range{0xfa70, 0xfad9, 1},
Range{0xfb00, 0xfb06, 1},
Range{0xfb13, 0xfb17, 1},
Range{0xfb1d, 0xfb1d, 1},
Range{0xfb1f, 0xfb28, 1},
Range{0xfb2a, 0xfb36, 1},
Range{0xfb38, 0xfb3c, 1},
Range{0xfb3e, 0xfb3e, 1},
Range{0xfb40, 0xfb41, 1},
Range{0xfb43, 0xfb44, 1},
Range{0xfb46, 0xfbb1, 1},
Range{0xfbd3, 0xfd3d, 1},
Range{0xfd50, 0xfd8f, 1},
Range{0xfd92, 0xfdc7, 1},
Range{0xfdf0, 0xfdfb, 1},
Range{0xfe70, 0xfe74, 1},
Range{0xfe76, 0xfefc, 1},
Range{0xff21, 0xff3a, 1},
Range{0xff41, 0xff5a, 1},
Range{0xff66, 0xffbe, 1},
Range{0xffc2, 0xffc7, 1},
Range{0xffca, 0xffcf, 1},
Range{0xffd2, 0xffd7, 1},
Range{0xffda, 0xffdc, 1},
Range{0x10000, 0x1000b, 1},
Range{0x1000d, 0x10026, 1},
Range{0x10028, 0x1003a, 1},
Range{0x1003c, 0x1003d, 1},
Range{0x1003f, 0x1004d, 1},
Range{0x10050, 0x1005d, 1},
Range{0x10080, 0x100fa, 1},
Range{0x10280, 0x1029c, 1},
Range{0x102a0, 0x102d0, 1},
Range{0x10300, 0x1031e, 1},
Range{0x10330, 0x10340, 1},
Range{0x10342, 0x10349, 1},
Range{0x10380, 0x1039d, 1},
Range{0x103a0, 0x103c3, 1},
Range{0x103c8, 0x103cf, 1},
Range{0x10400, 0x1049d, 1},
Range{0x10800, 0x10805, 1},
Range{0x10808, 0x10808, 1},
Range{0x1080a, 0x10835, 1},
Range{0x10837, 0x10838, 1},
Range{0x1083c, 0x1083f, 3},
Range{0x10900, 0x10915, 1},
Range{0x10920, 0x10939, 1},
Range{0x10a00, 0x10a00, 1},
Range{0x10a10, 0x10a13, 1},
Range{0x10a15, 0x10a17, 1},
Range{0x10a19, 0x10a33, 1},
Range{0x12000, 0x1236e, 1},
Range{0x1d400, 0x1d454, 1},
Range{0x1d456, 0x1d49c, 1},
Range{0x1d49e, 0x1d49f, 1},
Range{0x1d4a2, 0x1d4a2, 1},
Range{0x1d4a5, 0x1d4a6, 1},
Range{0x1d4a9, 0x1d4ac, 1},
Range{0x1d4ae, 0x1d4b9, 1},
Range{0x1d4bb, 0x1d4bb, 1},
Range{0x1d4bd, 0x1d4c3, 1},
Range{0x1d4c5, 0x1d505, 1},
Range{0x1d507, 0x1d50a, 1},
Range{0x1d50d, 0x1d514, 1},
Range{0x1d516, 0x1d51c, 1},
Range{0x1d51e, 0x1d539, 1},
Range{0x1d53b, 0x1d53e, 1},
Range{0x1d540, 0x1d544, 1},
Range{0x1d546, 0x1d546, 1},
Range{0x1d54a, 0x1d550, 1},
Range{0x1d552, 0x1d6a5, 1},
Range{0x1d6a8, 0x1d6c0, 1},
Range{0x1d6c2, 0x1d6da, 1},
Range{0x1d6dc, 0x1d6fa, 1},
Range{0x1d6fc, 0x1d714, 1},
Range{0x1d716, 0x1d734, 1},
Range{0x1d736, 0x1d74e, 1},
Range{0x1d750, 0x1d76e, 1},
Range{0x1d770, 0x1d788, 1},
Range{0x1d78a, 0x1d7a8, 1},
Range{0x1d7aa, 0x1d7c2, 1},
Range{0x1d7c4, 0x1d7cb, 1},
Range{0x20000, 0x2a6d6, 1},
Range{0x2f800, 0x2fa1d, 1},
}
// Is tests whether rune is in the specified table of ranges.
func Is(ranges []Range, rune int) bool {
// common case: rune is ASCII or Latin-1
@ -566,13 +49,22 @@ func Is(ranges []Range, rune int) bool {
return false;
}
// IsLetter reports whether the rune is an upper case letter.
// IsUpper reports whether the rune is an upper case letter.
func IsUpper(rune int) bool {
return Is(Upper, rune);
}
// IsLower reports whether the rune is a lower case letter.
func IsLower(rune int) bool {
return Is(Lower, rune);
}
// IsTitle reports whether the rune is a title case letter.
func IsTitle(rune int) bool {
return Is(Title, rune);
}
// IsLetter reports whether the rune is a letter.
func IsLetter(rune int) bool {
return Is(Letter, rune);
}

View File

@ -6,7 +6,7 @@ package unicode
import "testing"
var upper = []int{
var upperTest = []int{
0x41,
0xc0,
0xd8,
@ -30,7 +30,7 @@ var upper = []int{
0x1d7ca,
}
var notupper = []int{
var notupperTest = []int{
0x40,
0x5b,
0x61,
@ -43,7 +43,7 @@ var notupper = []int{
0x10000,
}
var letter = []int{
var letterTest = []int{
0x41,
0x61,
0xaa,
@ -78,7 +78,7 @@ var letter = []int{
0x2fa1d,
}
var notletter = []int{
var notletterTest = []int{
0x20,
0x35,
0x375,
@ -90,17 +90,17 @@ var notletter = []int{
}
func TestIsLetter(t *testing.T) {
for i, r := range upper {
for i, r := range upperTest {
if !IsLetter(r) {
t.Errorf("IsLetter(%#x) = false, want true\n", r);
}
}
for i, r := range letter {
for i, r := range letterTest {
if !IsLetter(r) {
t.Errorf("IsLetter(%#x) = false, want true\n", r);
}
}
for i, r := range notletter {
for i, r := range notletterTest {
if IsLetter(r) {
t.Errorf("IsLetter(%#x) = true, want false\n", r);
}
@ -108,17 +108,17 @@ func TestIsLetter(t *testing.T) {
}
func TestIsUpper(t *testing.T) {
for i, r := range upper {
for i, r := range upperTest {
if !IsUpper(r) {
t.Errorf("IsUpper(%#x) = false, want true\n", r);
}
}
for i, r := range notupper {
for i, r := range notupperTest {
if IsUpper(r) {
t.Errorf("IsUpper(%#x) = true, want false\n", r);
}
}
for i, r := range notletter {
for i, r := range notletterTest {
if IsUpper(r) {
t.Errorf("IsUpper(%#x) = true, want false\n", r);
}

View File

@ -0,0 +1,664 @@
// Generated by running
// tables --digits=false --url=http://www.unicode.org/Public/5.1.0/ucd/UnicodeData.txt
// DO NOT EDIT
package unicode
// Letter is the set of Unicode letters.
var Letter = letter
var letter = []Range {
Range{0x0041, 0x005a, 1},
Range{0x0061, 0x007a, 1},
Range{0x00aa, 0x00b5, 11},
Range{0x00ba, 0x00c0, 6},
Range{0x00c1, 0x00d6, 1},
Range{0x00d8, 0x00f6, 1},
Range{0x00f8, 0x02c1, 1},
Range{0x02c6, 0x02d1, 1},
Range{0x02e0, 0x02e4, 1},
Range{0x02ec, 0x02ee, 2},
Range{0x0370, 0x0374, 1},
Range{0x0376, 0x0377, 1},
Range{0x037a, 0x037d, 1},
Range{0x0386, 0x0388, 2},
Range{0x0389, 0x038a, 1},
Range{0x038c, 0x038e, 2},
Range{0x038f, 0x03a1, 1},
Range{0x03a3, 0x03f5, 1},
Range{0x03f7, 0x0481, 1},
Range{0x048a, 0x0523, 1},
Range{0x0531, 0x0556, 1},
Range{0x0559, 0x0561, 8},
Range{0x0562, 0x0587, 1},
Range{0x05d0, 0x05ea, 1},
Range{0x05f0, 0x05f2, 1},
Range{0x0621, 0x064a, 1},
Range{0x066e, 0x066f, 1},
Range{0x0671, 0x06d3, 1},
Range{0x06d5, 0x06e5, 16},
Range{0x06e6, 0x06ee, 8},
Range{0x06ef, 0x06fa, 11},
Range{0x06fb, 0x06fc, 1},
Range{0x06ff, 0x0710, 17},
Range{0x0712, 0x072f, 1},
Range{0x074d, 0x07a5, 1},
Range{0x07b1, 0x07ca, 25},
Range{0x07cb, 0x07ea, 1},
Range{0x07f4, 0x07f5, 1},
Range{0x07fa, 0x0904, 266},
Range{0x0905, 0x0939, 1},
Range{0x093d, 0x0950, 19},
Range{0x0958, 0x0961, 1},
Range{0x0971, 0x0972, 1},
Range{0x097b, 0x097f, 1},
Range{0x0985, 0x098c, 1},
Range{0x098f, 0x0990, 1},
Range{0x0993, 0x09a8, 1},
Range{0x09aa, 0x09b0, 1},
Range{0x09b2, 0x09b6, 4},
Range{0x09b7, 0x09b9, 1},
Range{0x09bd, 0x09ce, 17},
Range{0x09dc, 0x09dd, 1},
Range{0x09df, 0x09e1, 1},
Range{0x09f0, 0x09f1, 1},
Range{0x0a05, 0x0a0a, 1},
Range{0x0a0f, 0x0a10, 1},
Range{0x0a13, 0x0a28, 1},
Range{0x0a2a, 0x0a30, 1},
Range{0x0a32, 0x0a33, 1},
Range{0x0a35, 0x0a36, 1},
Range{0x0a38, 0x0a39, 1},
Range{0x0a59, 0x0a5c, 1},
Range{0x0a5e, 0x0a72, 20},
Range{0x0a73, 0x0a74, 1},
Range{0x0a85, 0x0a8d, 1},
Range{0x0a8f, 0x0a91, 1},
Range{0x0a93, 0x0aa8, 1},
Range{0x0aaa, 0x0ab0, 1},
Range{0x0ab2, 0x0ab3, 1},
Range{0x0ab5, 0x0ab9, 1},
Range{0x0abd, 0x0ad0, 19},
Range{0x0ae0, 0x0ae1, 1},
Range{0x0b05, 0x0b0c, 1},
Range{0x0b0f, 0x0b10, 1},
Range{0x0b13, 0x0b28, 1},
Range{0x0b2a, 0x0b30, 1},
Range{0x0b32, 0x0b33, 1},
Range{0x0b35, 0x0b39, 1},
Range{0x0b3d, 0x0b5c, 31},
Range{0x0b5d, 0x0b5f, 2},
Range{0x0b60, 0x0b61, 1},
Range{0x0b71, 0x0b83, 18},
Range{0x0b85, 0x0b8a, 1},
Range{0x0b8e, 0x0b90, 1},
Range{0x0b92, 0x0b95, 1},
Range{0x0b99, 0x0b9a, 1},
Range{0x0b9c, 0x0b9e, 2},
Range{0x0b9f, 0x0ba3, 4},
Range{0x0ba4, 0x0ba8, 4},
Range{0x0ba9, 0x0baa, 1},
Range{0x0bae, 0x0bb9, 1},
Range{0x0bd0, 0x0c05, 53},
Range{0x0c06, 0x0c0c, 1},
Range{0x0c0e, 0x0c10, 1},
Range{0x0c12, 0x0c28, 1},
Range{0x0c2a, 0x0c33, 1},
Range{0x0c35, 0x0c39, 1},
Range{0x0c3d, 0x0c58, 27},
Range{0x0c59, 0x0c60, 7},
Range{0x0c61, 0x0c85, 36},
Range{0x0c86, 0x0c8c, 1},
Range{0x0c8e, 0x0c90, 1},
Range{0x0c92, 0x0ca8, 1},
Range{0x0caa, 0x0cb3, 1},
Range{0x0cb5, 0x0cb9, 1},
Range{0x0cbd, 0x0cde, 33},
Range{0x0ce0, 0x0ce1, 1},
Range{0x0d05, 0x0d0c, 1},
Range{0x0d0e, 0x0d10, 1},
Range{0x0d12, 0x0d28, 1},
Range{0x0d2a, 0x0d39, 1},
Range{0x0d3d, 0x0d60, 35},
Range{0x0d61, 0x0d7a, 25},
Range{0x0d7b, 0x0d7f, 1},
Range{0x0d85, 0x0d96, 1},
Range{0x0d9a, 0x0db1, 1},
Range{0x0db3, 0x0dbb, 1},
Range{0x0dbd, 0x0dc0, 3},
Range{0x0dc1, 0x0dc6, 1},
Range{0x0e01, 0x0e30, 1},
Range{0x0e32, 0x0e33, 1},
Range{0x0e40, 0x0e46, 1},
Range{0x0e81, 0x0e82, 1},
Range{0x0e84, 0x0e87, 3},
Range{0x0e88, 0x0e8a, 2},
Range{0x0e8d, 0x0e94, 7},
Range{0x0e95, 0x0e97, 1},
Range{0x0e99, 0x0e9f, 1},
Range{0x0ea1, 0x0ea3, 1},
Range{0x0ea5, 0x0ea7, 2},
Range{0x0eaa, 0x0eab, 1},
Range{0x0ead, 0x0eb0, 1},
Range{0x0eb2, 0x0eb3, 1},
Range{0x0ebd, 0x0ec0, 3},
Range{0x0ec1, 0x0ec4, 1},
Range{0x0ec6, 0x0edc, 22},
Range{0x0edd, 0x0f00, 35},
Range{0x0f40, 0x0f47, 1},
Range{0x0f49, 0x0f6c, 1},
Range{0x0f88, 0x0f8b, 1},
Range{0x1000, 0x102a, 1},
Range{0x103f, 0x1050, 17},
Range{0x1051, 0x1055, 1},
Range{0x105a, 0x105d, 1},
Range{0x1061, 0x1065, 4},
Range{0x1066, 0x106e, 8},
Range{0x106f, 0x1070, 1},
Range{0x1075, 0x1081, 1},
Range{0x108e, 0x10a0, 18},
Range{0x10a1, 0x10c5, 1},
Range{0x10d0, 0x10fa, 1},
Range{0x10fc, 0x1100, 4},
Range{0x1101, 0x1159, 1},
Range{0x115f, 0x11a2, 1},
Range{0x11a8, 0x11f9, 1},
Range{0x1200, 0x1248, 1},
Range{0x124a, 0x124d, 1},
Range{0x1250, 0x1256, 1},
Range{0x1258, 0x125a, 2},
Range{0x125b, 0x125d, 1},
Range{0x1260, 0x1288, 1},
Range{0x128a, 0x128d, 1},
Range{0x1290, 0x12b0, 1},
Range{0x12b2, 0x12b5, 1},
Range{0x12b8, 0x12be, 1},
Range{0x12c0, 0x12c2, 2},
Range{0x12c3, 0x12c5, 1},
Range{0x12c8, 0x12d6, 1},
Range{0x12d8, 0x1310, 1},
Range{0x1312, 0x1315, 1},
Range{0x1318, 0x135a, 1},
Range{0x1380, 0x138f, 1},
Range{0x13a0, 0x13f4, 1},
Range{0x1401, 0x166c, 1},
Range{0x166f, 0x1676, 1},
Range{0x1681, 0x169a, 1},
Range{0x16a0, 0x16ea, 1},
Range{0x1700, 0x170c, 1},
Range{0x170e, 0x1711, 1},
Range{0x1720, 0x1731, 1},
Range{0x1740, 0x1751, 1},
Range{0x1760, 0x176c, 1},
Range{0x176e, 0x1770, 1},
Range{0x1780, 0x17b3, 1},
Range{0x17d7, 0x17dc, 5},
Range{0x1820, 0x1877, 1},
Range{0x1880, 0x18a8, 1},
Range{0x18aa, 0x1900, 86},
Range{0x1901, 0x191c, 1},
Range{0x1950, 0x196d, 1},
Range{0x1970, 0x1974, 1},
Range{0x1980, 0x19a9, 1},
Range{0x19c1, 0x19c7, 1},
Range{0x1a00, 0x1a16, 1},
Range{0x1b05, 0x1b33, 1},
Range{0x1b45, 0x1b4b, 1},
Range{0x1b83, 0x1ba0, 1},
Range{0x1bae, 0x1baf, 1},
Range{0x1c00, 0x1c23, 1},
Range{0x1c4d, 0x1c4f, 1},
Range{0x1c5a, 0x1c7d, 1},
Range{0x1d00, 0x1dbf, 1},
Range{0x1e00, 0x1f15, 1},
Range{0x1f18, 0x1f1d, 1},
Range{0x1f20, 0x1f45, 1},
Range{0x1f48, 0x1f4d, 1},
Range{0x1f50, 0x1f57, 1},
Range{0x1f59, 0x1f5f, 2},
Range{0x1f60, 0x1f7d, 1},
Range{0x1f80, 0x1fb4, 1},
Range{0x1fb6, 0x1fbc, 1},
Range{0x1fbe, 0x1fc2, 4},
Range{0x1fc3, 0x1fc4, 1},
Range{0x1fc6, 0x1fcc, 1},
Range{0x1fd0, 0x1fd3, 1},
Range{0x1fd6, 0x1fdb, 1},
Range{0x1fe0, 0x1fec, 1},
Range{0x1ff2, 0x1ff4, 1},
Range{0x1ff6, 0x1ffc, 1},
Range{0x2071, 0x207f, 14},
Range{0x2090, 0x2094, 1},
Range{0x2102, 0x2107, 5},
Range{0x210a, 0x2113, 1},
Range{0x2115, 0x2119, 4},
Range{0x211a, 0x211d, 1},
Range{0x2124, 0x212a, 2},
Range{0x212b, 0x212d, 1},
Range{0x212f, 0x2139, 1},
Range{0x213c, 0x213f, 1},
Range{0x2145, 0x2149, 1},
Range{0x214e, 0x2183, 53},
Range{0x2184, 0x2c00, 2684},
Range{0x2c01, 0x2c2e, 1},
Range{0x2c30, 0x2c5e, 1},
Range{0x2c60, 0x2c6f, 1},
Range{0x2c71, 0x2c7d, 1},
Range{0x2c80, 0x2ce4, 1},
Range{0x2d00, 0x2d25, 1},
Range{0x2d30, 0x2d65, 1},
Range{0x2d6f, 0x2d80, 17},
Range{0x2d81, 0x2d96, 1},
Range{0x2da0, 0x2da6, 1},
Range{0x2da8, 0x2dae, 1},
Range{0x2db0, 0x2db6, 1},
Range{0x2db8, 0x2dbe, 1},
Range{0x2dc0, 0x2dc6, 1},
Range{0x2dc8, 0x2dce, 1},
Range{0x2dd0, 0x2dd6, 1},
Range{0x2dd8, 0x2dde, 1},
Range{0x2e2f, 0x3005, 470},
Range{0x3006, 0x3031, 43},
Range{0x3032, 0x3035, 1},
Range{0x303b, 0x303c, 1},
Range{0x3041, 0x3096, 1},
Range{0x309d, 0x309f, 1},
Range{0x30a1, 0x30fa, 1},
Range{0x30fc, 0x30ff, 1},
Range{0x3105, 0x312d, 1},
Range{0x3131, 0x318e, 1},
Range{0x31a0, 0x31b7, 1},
Range{0x31f0, 0x31ff, 1},
Range{0x3400, 0x4db5, 6581},
Range{0x4e00, 0x9fc3, 20931},
Range{0xa000, 0xa48c, 1},
Range{0xa500, 0xa60c, 1},
Range{0xa610, 0xa61f, 1},
Range{0xa62a, 0xa62b, 1},
Range{0xa640, 0xa65f, 1},
Range{0xa662, 0xa66e, 1},
Range{0xa67f, 0xa697, 1},
Range{0xa717, 0xa71f, 1},
Range{0xa722, 0xa788, 1},
Range{0xa78b, 0xa78c, 1},
Range{0xa7fb, 0xa801, 1},
Range{0xa803, 0xa805, 1},
Range{0xa807, 0xa80a, 1},
Range{0xa80c, 0xa822, 1},
Range{0xa840, 0xa873, 1},
Range{0xa882, 0xa8b3, 1},
Range{0xa90a, 0xa925, 1},
Range{0xa930, 0xa946, 1},
Range{0xaa00, 0xaa28, 1},
Range{0xaa40, 0xaa42, 1},
Range{0xaa44, 0xaa4b, 1},
Range{0xac00, 0xd7a3, 11171},
Range{0xf900, 0xfa2d, 1},
Range{0xfa30, 0xfa6a, 1},
Range{0xfa70, 0xfad9, 1},
Range{0xfb00, 0xfb06, 1},
Range{0xfb13, 0xfb17, 1},
Range{0xfb1d, 0xfb1f, 2},
Range{0xfb20, 0xfb28, 1},
Range{0xfb2a, 0xfb36, 1},
Range{0xfb38, 0xfb3c, 1},
Range{0xfb3e, 0xfb40, 2},
Range{0xfb41, 0xfb43, 2},
Range{0xfb44, 0xfb46, 2},
Range{0xfb47, 0xfbb1, 1},
Range{0xfbd3, 0xfd3d, 1},
Range{0xfd50, 0xfd8f, 1},
Range{0xfd92, 0xfdc7, 1},
Range{0xfdf0, 0xfdfb, 1},
Range{0xfe70, 0xfe74, 1},
Range{0xfe76, 0xfefc, 1},
Range{0xff21, 0xff3a, 1},
Range{0xff41, 0xff5a, 1},
Range{0xff66, 0xffbe, 1},
Range{0xffc2, 0xffc7, 1},
Range{0xffca, 0xffcf, 1},
Range{0xffd2, 0xffd7, 1},
Range{0xffda, 0xffdc, 1},
Range{0x10000, 0x1000b, 1},
Range{0x1000d, 0x10026, 1},
Range{0x10028, 0x1003a, 1},
Range{0x1003c, 0x1003d, 1},
Range{0x1003f, 0x1004d, 1},
Range{0x10050, 0x1005d, 1},
Range{0x10080, 0x100fa, 1},
Range{0x10280, 0x1029c, 1},
Range{0x102a0, 0x102d0, 1},
Range{0x10300, 0x1031e, 1},
Range{0x10330, 0x10340, 1},
Range{0x10342, 0x10349, 1},
Range{0x10380, 0x1039d, 1},
Range{0x103a0, 0x103c3, 1},
Range{0x103c8, 0x103cf, 1},
Range{0x10400, 0x1049d, 1},
Range{0x10800, 0x10805, 1},
Range{0x10808, 0x1080a, 2},
Range{0x1080b, 0x10835, 1},
Range{0x10837, 0x10838, 1},
Range{0x1083c, 0x1083f, 3},
Range{0x10900, 0x10915, 1},
Range{0x10920, 0x10939, 1},
Range{0x10a00, 0x10a10, 16},
Range{0x10a11, 0x10a13, 1},
Range{0x10a15, 0x10a17, 1},
Range{0x10a19, 0x10a33, 1},
Range{0x12000, 0x1236e, 1},
Range{0x1d400, 0x1d454, 1},
Range{0x1d456, 0x1d49c, 1},
Range{0x1d49e, 0x1d49f, 1},
Range{0x1d4a2, 0x1d4a5, 3},
Range{0x1d4a6, 0x1d4a9, 3},
Range{0x1d4aa, 0x1d4ac, 1},
Range{0x1d4ae, 0x1d4b9, 1},
Range{0x1d4bb, 0x1d4bd, 2},
Range{0x1d4be, 0x1d4c3, 1},
Range{0x1d4c5, 0x1d505, 1},
Range{0x1d507, 0x1d50a, 1},
Range{0x1d50d, 0x1d514, 1},
Range{0x1d516, 0x1d51c, 1},
Range{0x1d51e, 0x1d539, 1},
Range{0x1d53b, 0x1d53e, 1},
Range{0x1d540, 0x1d544, 1},
Range{0x1d546, 0x1d54a, 4},
Range{0x1d54b, 0x1d550, 1},
Range{0x1d552, 0x1d6a5, 1},
Range{0x1d6a8, 0x1d6c0, 1},
Range{0x1d6c2, 0x1d6da, 1},
Range{0x1d6dc, 0x1d6fa, 1},
Range{0x1d6fc, 0x1d714, 1},
Range{0x1d716, 0x1d734, 1},
Range{0x1d736, 0x1d74e, 1},
Range{0x1d750, 0x1d76e, 1},
Range{0x1d770, 0x1d788, 1},
Range{0x1d78a, 0x1d7a8, 1},
Range{0x1d7aa, 0x1d7c2, 1},
Range{0x1d7c4, 0x1d7cb, 1},
Range{0x20000, 0x2a6d6, 42710},
Range{0x2f800, 0x2fa1d, 1},
}
// Upper is the set of Unicode upper case letters.
var Upper = upper
var upper = []Range {
Range{0x0041, 0x005a, 1},
Range{0x00c0, 0x00d6, 1},
Range{0x00d8, 0x00de, 1},
Range{0x0100, 0x0136, 2},
Range{0x0139, 0x0147, 2},
Range{0x014a, 0x0178, 2},
Range{0x0179, 0x017d, 2},
Range{0x0181, 0x0182, 1},
Range{0x0184, 0x0186, 2},
Range{0x0187, 0x0189, 2},
Range{0x018a, 0x018b, 1},
Range{0x018e, 0x0191, 1},
Range{0x0193, 0x0194, 1},
Range{0x0196, 0x0198, 1},
Range{0x019c, 0x019d, 1},
Range{0x019f, 0x01a0, 1},
Range{0x01a2, 0x01a6, 2},
Range{0x01a7, 0x01a9, 2},
Range{0x01ac, 0x01ae, 2},
Range{0x01af, 0x01b1, 2},
Range{0x01b2, 0x01b3, 1},
Range{0x01b5, 0x01b7, 2},
Range{0x01b8, 0x01bc, 4},
Range{0x01c4, 0x01cd, 3},
Range{0x01cf, 0x01db, 2},
Range{0x01de, 0x01ee, 2},
Range{0x01f1, 0x01f4, 3},
Range{0x01f6, 0x01f8, 1},
Range{0x01fa, 0x0232, 2},
Range{0x023a, 0x023b, 1},
Range{0x023d, 0x023e, 1},
Range{0x0241, 0x0243, 2},
Range{0x0244, 0x0246, 1},
Range{0x0248, 0x024e, 2},
Range{0x0370, 0x0372, 2},
Range{0x0376, 0x0386, 16},
Range{0x0388, 0x038a, 1},
Range{0x038c, 0x038e, 2},
Range{0x038f, 0x0391, 2},
Range{0x0392, 0x03a1, 1},
Range{0x03a3, 0x03ab, 1},
Range{0x03cf, 0x03d2, 3},
Range{0x03d3, 0x03d4, 1},
Range{0x03d8, 0x03ee, 2},
Range{0x03f4, 0x03f7, 3},
Range{0x03f9, 0x03fa, 1},
Range{0x03fd, 0x042f, 1},
Range{0x0460, 0x0480, 2},
Range{0x048a, 0x04c0, 2},
Range{0x04c1, 0x04cd, 2},
Range{0x04d0, 0x0522, 2},
Range{0x0531, 0x0556, 1},
Range{0x10a0, 0x10c5, 1},
Range{0x1e00, 0x1e94, 2},
Range{0x1e9e, 0x1efe, 2},
Range{0x1f08, 0x1f0f, 1},
Range{0x1f18, 0x1f1d, 1},
Range{0x1f28, 0x1f2f, 1},
Range{0x1f38, 0x1f3f, 1},
Range{0x1f48, 0x1f4d, 1},
Range{0x1f59, 0x1f5f, 2},
Range{0x1f68, 0x1f6f, 1},
Range{0x1fb8, 0x1fbb, 1},
Range{0x1fc8, 0x1fcb, 1},
Range{0x1fd8, 0x1fdb, 1},
Range{0x1fe8, 0x1fec, 1},
Range{0x1ff8, 0x1ffb, 1},
Range{0x2102, 0x2107, 5},
Range{0x210b, 0x210d, 1},
Range{0x2110, 0x2112, 1},
Range{0x2115, 0x2119, 4},
Range{0x211a, 0x211d, 1},
Range{0x2124, 0x212a, 2},
Range{0x212b, 0x212d, 1},
Range{0x2130, 0x2133, 1},
Range{0x213e, 0x213f, 1},
Range{0x2145, 0x2183, 62},
Range{0x2c00, 0x2c2e, 1},
Range{0x2c60, 0x2c62, 2},
Range{0x2c63, 0x2c64, 1},
Range{0x2c67, 0x2c6d, 2},
Range{0x2c6e, 0x2c6f, 1},
Range{0x2c72, 0x2c75, 3},
Range{0x2c80, 0x2ce2, 2},
Range{0xa640, 0xa65e, 2},
Range{0xa662, 0xa66c, 2},
Range{0xa680, 0xa696, 2},
Range{0xa722, 0xa72e, 2},
Range{0xa732, 0xa76e, 2},
Range{0xa779, 0xa77d, 2},
Range{0xa77e, 0xa786, 2},
Range{0xa78b, 0xff21, 22422},
Range{0xff22, 0xff3a, 1},
Range{0x10400, 0x10427, 1},
Range{0x1d400, 0x1d419, 1},
Range{0x1d434, 0x1d44d, 1},
Range{0x1d468, 0x1d481, 1},
Range{0x1d49c, 0x1d49e, 2},
Range{0x1d49f, 0x1d4a5, 3},
Range{0x1d4a6, 0x1d4a9, 3},
Range{0x1d4aa, 0x1d4ac, 1},
Range{0x1d4ae, 0x1d4b5, 1},
Range{0x1d4d0, 0x1d4e9, 1},
Range{0x1d504, 0x1d505, 1},
Range{0x1d507, 0x1d50a, 1},
Range{0x1d50d, 0x1d514, 1},
Range{0x1d516, 0x1d51c, 1},
Range{0x1d538, 0x1d539, 1},
Range{0x1d53b, 0x1d53e, 1},
Range{0x1d540, 0x1d544, 1},
Range{0x1d546, 0x1d54a, 4},
Range{0x1d54b, 0x1d550, 1},
Range{0x1d56c, 0x1d585, 1},
Range{0x1d5a0, 0x1d5b9, 1},
Range{0x1d5d4, 0x1d5ed, 1},
Range{0x1d608, 0x1d621, 1},
Range{0x1d63c, 0x1d655, 1},
Range{0x1d670, 0x1d689, 1},
Range{0x1d6a8, 0x1d6c0, 1},
Range{0x1d6e2, 0x1d6fa, 1},
Range{0x1d71c, 0x1d734, 1},
Range{0x1d756, 0x1d76e, 1},
Range{0x1d790, 0x1d7a8, 1},
Range{0x1d7ca, 0x1d7ca, 1},
}
// Lower is the set of Unicode lower case letters.
var Lower = lower
var lower = []Range {
Range{0x0061, 0x007a, 1},
Range{0x00aa, 0x00b5, 11},
Range{0x00ba, 0x00df, 37},
Range{0x00e0, 0x00f6, 1},
Range{0x00f8, 0x00ff, 1},
Range{0x0101, 0x0137, 2},
Range{0x0138, 0x0148, 2},
Range{0x0149, 0x0177, 2},
Range{0x017a, 0x017e, 2},
Range{0x017f, 0x0180, 1},
Range{0x0183, 0x0185, 2},
Range{0x0188, 0x018c, 4},
Range{0x018d, 0x0192, 5},
Range{0x0195, 0x0199, 4},
Range{0x019a, 0x019b, 1},
Range{0x019e, 0x01a1, 3},
Range{0x01a3, 0x01a5, 2},
Range{0x01a8, 0x01aa, 2},
Range{0x01ab, 0x01ad, 2},
Range{0x01b0, 0x01b4, 4},
Range{0x01b6, 0x01b9, 3},
Range{0x01ba, 0x01bd, 3},
Range{0x01be, 0x01bf, 1},
Range{0x01c6, 0x01cc, 3},
Range{0x01ce, 0x01dc, 2},
Range{0x01dd, 0x01ef, 2},
Range{0x01f0, 0x01f3, 3},
Range{0x01f5, 0x01f9, 4},
Range{0x01fb, 0x0233, 2},
Range{0x0234, 0x0239, 1},
Range{0x023c, 0x023f, 3},
Range{0x0240, 0x0242, 2},
Range{0x0247, 0x024f, 2},
Range{0x0250, 0x0293, 1},
Range{0x0295, 0x02af, 1},
Range{0x0371, 0x0373, 2},
Range{0x0377, 0x037b, 4},
Range{0x037c, 0x037d, 1},
Range{0x0390, 0x03ac, 28},
Range{0x03ad, 0x03ce, 1},
Range{0x03d0, 0x03d1, 1},
Range{0x03d5, 0x03d7, 1},
Range{0x03d9, 0x03ef, 2},
Range{0x03f0, 0x03f3, 1},
Range{0x03f5, 0x03fb, 3},
Range{0x03fc, 0x0430, 52},
Range{0x0431, 0x045f, 1},
Range{0x0461, 0x0481, 2},
Range{0x048b, 0x04bf, 2},
Range{0x04c2, 0x04ce, 2},
Range{0x04cf, 0x0523, 2},
Range{0x0561, 0x0587, 1},
Range{0x1d00, 0x1d2b, 1},
Range{0x1d62, 0x1d77, 1},
Range{0x1d79, 0x1d9a, 1},
Range{0x1e01, 0x1e95, 2},
Range{0x1e96, 0x1e9d, 1},
Range{0x1e9f, 0x1eff, 2},
Range{0x1f00, 0x1f07, 1},
Range{0x1f10, 0x1f15, 1},
Range{0x1f20, 0x1f27, 1},
Range{0x1f30, 0x1f37, 1},
Range{0x1f40, 0x1f45, 1},
Range{0x1f50, 0x1f57, 1},
Range{0x1f60, 0x1f67, 1},
Range{0x1f70, 0x1f7d, 1},
Range{0x1f80, 0x1f87, 1},
Range{0x1f90, 0x1f97, 1},
Range{0x1fa0, 0x1fa7, 1},
Range{0x1fb0, 0x1fb4, 1},
Range{0x1fb6, 0x1fb7, 1},
Range{0x1fbe, 0x1fc2, 4},
Range{0x1fc3, 0x1fc4, 1},
Range{0x1fc6, 0x1fc7, 1},
Range{0x1fd0, 0x1fd3, 1},
Range{0x1fd6, 0x1fd7, 1},
Range{0x1fe0, 0x1fe7, 1},
Range{0x1ff2, 0x1ff4, 1},
Range{0x1ff6, 0x1ff7, 1},
Range{0x2071, 0x207f, 14},
Range{0x210a, 0x210e, 4},
Range{0x210f, 0x2113, 4},
Range{0x212f, 0x2139, 5},
Range{0x213c, 0x213d, 1},
Range{0x2146, 0x2149, 1},
Range{0x214e, 0x2184, 54},
Range{0x2c30, 0x2c5e, 1},
Range{0x2c61, 0x2c65, 4},
Range{0x2c66, 0x2c6c, 2},
Range{0x2c71, 0x2c73, 2},
Range{0x2c74, 0x2c76, 2},
Range{0x2c77, 0x2c7c, 1},
Range{0x2c81, 0x2ce3, 2},
Range{0x2ce4, 0x2d00, 28},
Range{0x2d01, 0x2d25, 1},
Range{0xa641, 0xa65f, 2},
Range{0xa663, 0xa66d, 2},
Range{0xa681, 0xa697, 2},
Range{0xa723, 0xa72f, 2},
Range{0xa730, 0xa731, 1},
Range{0xa733, 0xa771, 2},
Range{0xa772, 0xa778, 1},
Range{0xa77a, 0xa77c, 2},
Range{0xa77f, 0xa787, 2},
Range{0xa78c, 0xfb00, 21364},
Range{0xfb01, 0xfb06, 1},
Range{0xfb13, 0xfb17, 1},
Range{0xff41, 0xff5a, 1},
Range{0x10428, 0x1044f, 1},
Range{0x1d41a, 0x1d433, 1},
Range{0x1d44e, 0x1d454, 1},
Range{0x1d456, 0x1d467, 1},
Range{0x1d482, 0x1d49b, 1},
Range{0x1d4b6, 0x1d4b9, 1},
Range{0x1d4bb, 0x1d4bd, 2},
Range{0x1d4be, 0x1d4c3, 1},
Range{0x1d4c5, 0x1d4cf, 1},
Range{0x1d4ea, 0x1d503, 1},
Range{0x1d51e, 0x1d537, 1},
Range{0x1d552, 0x1d56b, 1},
Range{0x1d586, 0x1d59f, 1},
Range{0x1d5ba, 0x1d5d3, 1},
Range{0x1d5ee, 0x1d607, 1},
Range{0x1d622, 0x1d63b, 1},
Range{0x1d656, 0x1d66f, 1},
Range{0x1d68a, 0x1d6a5, 1},
Range{0x1d6c2, 0x1d6da, 1},
Range{0x1d6dc, 0x1d6e1, 1},
Range{0x1d6fc, 0x1d714, 1},
Range{0x1d716, 0x1d71b, 1},
Range{0x1d736, 0x1d74e, 1},
Range{0x1d750, 0x1d755, 1},
Range{0x1d770, 0x1d788, 1},
Range{0x1d78a, 0x1d78f, 1},
Range{0x1d7aa, 0x1d7c2, 1},
Range{0x1d7c4, 0x1d7c9, 1},
Range{0x1d7cb, 0x1d7cb, 1},
}
// Title is the set of Unicode title case letters.
var Title = title
var title = []Range {
Range{0x01c5, 0x01cb, 3},
Range{0x01f2, 0x1f88, 7574},
Range{0x1f89, 0x1f8f, 1},
Range{0x1f98, 0x1f9f, 1},
Range{0x1fa8, 0x1faf, 1},
Range{0x1fbc, 0x1fcc, 16},
Range{0x1ffc, 0x1ffc, 1},
}

View File

@ -0,0 +1,281 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Unicode table generator.
// Data read from the web.
package main
import (
"bufio";
"flag";
"fmt";
"http";
"log";
"os";
"strconv";
"strings";
)
var url = flag.String("url", "http://www.unicode.org/Public/5.1.0/ucd/UnicodeData.txt", "URL of Unicode database")
var digits = flag.Bool("digits", false, "whether to generate digit tables; default is letter tables");
var die = log.New(os.Stderr, nil, "", log.Lexit|log.Lshortfile);
// Data has form:
// 0037;DIGIT SEVEN;Nd;0;EN;;7;7;7;N;;;;;
// 007A;LATIN SMALL LETTER Z;Ll;0;L;;;;;N;;;005A;;005A
// See http://www.unicode.org/Public/5.1.0/ucd/UCD.html for full explanation
// The fields
const (
FCodePoint = iota;
FName;
FGeneralCategory;
FCanonicalCombiningClass;
FBidiClass;
FDecompositionType;
FDecompositionMapping;
FNumericType;
FNumericValue;
FBidiMirrored;
FUnicode1Name;
FISOComment;
FSimpleUppercaseMapping;
FSimpleLowercaseMapping;
FSimpleTitlecaseMapping;
NumField;
MaxChar = 0xF0000; // anything above this doesn't have useful properties
)
var fieldName = []string{
"CodePoint",
"Name",
"GeneralCategory",
"CanonicalCombiningClass",
"BidiClass",
"DecompositionType",
"DecompositionMapping",
"NumericType",
"NumericValue",
"BidiMirrored",
"Unicode1Name",
"ISOComment",
"SimpleUppercaseMapping",
"SimpleLowercaseMapping",
"SimpleTitlecaseMapping"
}
// This contains only the properties we're interested in.
type Char struct {
field []string; // debugging only; could be deleted if we take out char.dump()
codePoint uint32; // redundant (it's the index in the chars table) but useful
category string;
numValue int;
upperCase uint32;
lowerCase uint32;
titleCase uint32;
}
var chars = make([]Char, MaxChar)
var lastChar uint32 = 0;
func parse(line string) {
field := strings.Split(line, ";", -1);
if len(field) != NumField {
die.Logf("%.5s...: %d fields (expected %d)\n", line, len(field), NumField);
}
point, err := strconv.Btoui64(field[FCodePoint], 16);
if err != nil {
die.Log("%.5s...:", err)
}
lastChar = uint32(point);
if point == 0 {
return // not interesting and we use 0 as unset
}
if point >= MaxChar {
fmt.Fprintf(os.Stderr, "ignoring char U+%04x\n", point);
return;
}
char := &chars[point];
char.field=field;
if char.codePoint != 0 {
die.Logf("point U+%04x reused\n");
}
char.codePoint = lastChar;
char.category = field[FGeneralCategory];
switch char.category {
case "Nd":
// Decimal digit
v, err := strconv.Atoi(field[FNumericValue]);
if err != nil {
die.Log("U+%04x: bad numeric field: %s", point, err);
}
char.numValue = v;
case "Lu":
char.letter(field[FCodePoint], field[FSimpleLowercaseMapping], field[FSimpleTitlecaseMapping]);
case "Ll":
char.letter(field[FSimpleUppercaseMapping], field[FCodePoint], field[FSimpleTitlecaseMapping]);
case "Lt":
char.letter(field[FSimpleUppercaseMapping], field[FSimpleLowercaseMapping], field[FCodePoint]);
case "Lm", "Lo":
char.letter(field[FSimpleUppercaseMapping], field[FSimpleLowercaseMapping], field[FSimpleTitlecaseMapping]);
}
}
func (char *Char) dump(s string) {
fmt.Print(s, " ");
for i:=0;i<len(char.field);i++ {
fmt.Printf("%s:%q ", fieldName[i], char.field[i]);
}
fmt.Print("\n");
}
func (char *Char) letter(u, l, t string) {
char.upperCase = char.letterValue(u, "U");
char.lowerCase = char.letterValue(l, "L");
char.titleCase = char.letterValue(t, "T");
}
func (char *Char) letterValue(s string, cas string) uint32 {
if s == "" {
return 0
}
v, err := strconv.Btoui64(s, 16);
if err != nil {
char.dump(cas);
die.Logf("U+%04x: bad letter(%s): %s", char.codePoint, s, err)
}
return uint32(v)
}
func main() {
flag.Parse();
resp, _, err := http.Get(*url);
if err != nil {
die.Log(err);
}
input := bufio.NewReader(resp.Body);
for {
line, err := input.ReadLineString('\n', false);
if err != nil {
if err == os.EOF {
break;
}
die.Log(err);
}
parse(line);
}
resp.Body.Close();
fmt.Printf(
"// Generated by running\n"
"// tables --digits=%t --url=%s\n"
"// DO NOT EDIT\n\n"
"package unicode\n",
*digits,
*url
);
// We generate an UpperCase name to serve as concise documentation and a lowerCase
// name to store the data. This stops godoc dumping all the tables but keeps them
// available to clients.
if *digits {
dumpRange(
"\n// DecimalDigit is the set of Unicode characters with the \"decimal digit\" property.\n"
"var DecimalDigit = decimalDigit\n"
"var decimalDigit = []Range {\n",
func(code int) bool { return chars[code].category == "Nd" },
"}\n"
);
} else {
dumpRange(
"\n// Letter is the set of Unicode letters.\n"
"var Letter = letter\n"
"var letter = []Range {\n",
func(code int) bool {
switch chars[code].category {
case "Lu", "Ll", "Lt", "Lm", "Lo":
return true
}
return false
},
"}\n"
);
dumpRange(
"\n// Upper is the set of Unicode upper case letters.\n"
"var Upper = upper\n"
"var upper = []Range {\n",
func(code int) bool { return chars[code].category == "Lu" },
"}\n"
);
dumpRange(
"\n// Lower is the set of Unicode lower case letters.\n"
"var Lower = lower\n"
"var lower = []Range {\n",
func(code int) bool { return chars[code].category == "Ll" },
"}\n"
);
dumpRange(
"\n// Title is the set of Unicode title case letters.\n"
"var Title = title\n"
"var title = []Range {\n",
func(code int) bool { return chars[code].category == "Lt" },
"}\n"
);
}
}
type Op func(code int) bool
func dumpRange(header string, inCategory Op, trailer string) {
fmt.Print(header);
const format = "\tRange{0x%04x, 0x%04x, %d},\n";
next := 0;
// one Range for each iteration
for {
// look for start of range
for next < len(chars) && !inCategory(next) {
next++
}
if next >= len(chars) {
// no characters remain
break
}
// start of range
lo := next;
hi := next;
stride := 1;
// accept lo
next++;
// look for another character to set the stride
for next < len(chars) && !inCategory(next) {
next++
}
if next >= len(chars) {
// no more characters
fmt.Printf(format, lo, hi, stride);
break;
}
// set stride
stride = next - lo;
// check for length of run. next points to first jump in stride
for i := next; i < len(chars); i++ {
if inCategory(i) == (((i-lo)%stride) == 0) {
// accept
if inCategory(i) {
hi = i
}
} else {
// no more characters in this run
break
}
}
fmt.Printf(format, lo, hi, stride);
// next range: start looking where this range ends
next = hi + 1;
}
fmt.Print(trailer);
}