1
0
mirror of https://github.com/golang/go synced 2024-09-25 15:10:11 -06:00

improve generated code for godocability

R=rsc
DELTA=459  (168 added, 279 deleted, 12 changed)
OCL=34005
CL=34007
This commit is contained in:
Rob Pike 2009-08-27 18:38:02 -07:00
parent f7e92c596c
commit 25caf18a8e
3 changed files with 180 additions and 291 deletions

View File

@ -14,6 +14,7 @@ import (
"http";
"log";
"os";
"sort";
"strconv";
"strings";
"regexp";
@ -235,7 +236,7 @@ func printCategories() {
die.Log(err);
}
if resp.StatusCode != 200 {
die.Log("bad GET status for UnicodeData.txt", resp.StatusCode);
die.Log("bad GET status for UnicodeData.txt", resp.Status);
}
input := bufio.NewReader(resp.Body);
for {
@ -279,6 +280,8 @@ func printCategories() {
fmt.Printf("}\n\n");
}
decl := make(sort.StringArray, len(list));
ndecl := 0;
for _, name := range list {
if _, ok := category[name]; !ok {
die.Log("unknown category", name);
@ -286,56 +289,52 @@ func printCategories() {
// We generate an UpperCase name to serve as concise documentation and an _UnderScored
// name to store the data. This stops godoc dumping all the tables but keeps them
// available to clients.
// Cases deserving special comments
varDecl := "";
switch name {
case "letter":
varDecl = "\tLetter = letter; // Letter is the set of Unicode letters.\n";
case "Nd":
varDecl = "\tDigit = _Nd; // Digit is the set of Unicode characters with the \"decimal digit\" property.\n";
case "Lu":
varDecl = "\tUpper = _Lu; // Upper is the set of Unicode upper case letters.\n";
case "Ll":
varDecl = "\tLower = _Ll; // Lower is the set of Unicode lower case letters.\n";
case "Lt":
varDecl = "\tTitle = _Lt; // Title is the set of Unicode title case letters.\n";
}
if name != "letter" {
varDecl += fmt.Sprintf(
"\t%s = _%s; // %s is the set of Unicode characters in category %s.\n",
name, name, name, name
);
}
decl[ndecl] = varDecl;
ndecl++;
if name == "letter" { // special case
dumpRange(
"\n// Letter is the set of Unicode letters.\n"
"var Letter = letter\n"
"var letter = []Range {\n",
letterOp,
"}\n"
letterOp
);
continue;
}
// Cases deserving special comments
switch name {
case "Nd":
fmt.Printf(
"\n// Digit is the set of Unicode characters with the \"decimal digit\" property.\n"
"var Digit = Nd\n\n"
)
case "Lu":
fmt.Printf(
"\n// Upper is the set of Unicode upper case letters.\n"
"var Upper = Lu\n\n"
)
case "Ll":
fmt.Printf(
"\n// Lower is the set of Unicode lower case letters.\n"
"var Lower = Ll\n\n"
)
case "Lt":
fmt.Printf(
"\n// Title is the set of Unicode title case letters.\n"
"var Title = Lt\n\n"
)
}
dumpRange(
fmt.Sprintf(
"// %s is the set of Unicode characters in category %s.\n"
"var %s = _%s\n"
"var _%s = []Range {\n",
name, name, name, name, name
),
func(code int) bool { return chars[code].category == name },
"}\n\n"
fmt.Sprintf("var _%s = []Range {\n", name),
func(code int) bool { return chars[code].category == name }
);
}
decl.Sort();
fmt.Println("var (");
for _, d := range decl {
fmt.Print(d);
}
fmt.Println(")\n");
}
type Op func(code int) bool
const format = "\tRange{0x%04x, 0x%04x, %d},\n";
func dumpRange(header string, inCategory Op, trailer string) {
func dumpRange(header string, inCategory Op) {
fmt.Print(header);
next := 0;
// one Range for each iteration
@ -382,7 +381,7 @@ func dumpRange(header string, inCategory Op, trailer string) {
// next range: start looking where this range ends
next = hi + 1;
}
fmt.Print(trailer);
fmt.Print("}\n\n");
}
func fullCategoryTest(list []string) {
@ -510,19 +509,27 @@ func printScripts() {
fmt.Printf("}\n\n");
}
decl := make(sort.StringArray, len(list));
ndecl := 0;
for _, name := range list {
fmt.Printf(
"// %s is the set of Unicode characters in script %s.\n"
"var %s = _%s\n"
"var _%s = []Range {\n",
name, name, name, name, name
decl[ndecl] = fmt.Sprintf(
"\t%s = _%s;\t// %s is the set of Unicode characters in script %s.\n",
name, name, name, name
);
ndecl++;
fmt.Printf("var _%s = []Range {\n", name);
ranges := foldAdjacent(scripts[name]);
for _, s := range ranges {
fmt.Printf(format, s.Lo, s.Hi, s.Stride);
}
fmt.Printf("}\n\n");
}
decl.Sort();
fmt.Println("var (");
for _, d := range decl {
fmt.Print(d);
}
fmt.Println(")\n");
}
// The script tables have a lot of adjacent elements. Fold them together.

View File

@ -132,43 +132,38 @@ var inCategoryTest = []T {
}
func TestScripts(t *testing.T) {
notTested := make(map[string] bool);
for k := range Scripts {
notTested[k] = true
}
for i, test := range inTest {
if !Is(Scripts[test.script], test.rune) {
t.Errorf("IsScript(%#x, %s) = false, want true\n", test.rune, test.script);
}
notTested[test.script] = false, false
}
for i, test := range outTest {
if Is(Scripts[test.script], test.rune) {
t.Errorf("IsScript(%#x, %s) = true, want false\n", test.rune, test.script);
}
}
tested := make(map[string] bool);
for k := range Scripts {
tested[k] = true
}
for _, test := range inTest {
tested[test.script] = false, false
}
for k := range tested {
for k := range notTested {
t.Error("not tested:", k)
}
}
func TestCategories(t *testing.T) {
notTested := make(map[string] bool);
for k := range Categories {
notTested[k] = true
}
for i, test := range inCategoryTest {
if !Is(Categories[test.script], test.rune) {
t.Errorf("IsCategory(%#x, %s) = false, want true\n", test.rune, test.script);
}
notTested[test.script] = false, false
}
tested := make(map[string] bool);
for k := range Categories {
tested[k] = true
}
for _, test := range inCategoryTest {
tested[test.script] = false, false
}
for k := range tested {
for k := range notTested {
t.Error("not tested:", k)
}
}

View File

@ -41,8 +41,6 @@ var Categories = map[string] []Range {
"Lo": Lo,
}
// Lm is the set of Unicode characters in category Lm.
var Lm = _Lm
var _Lm = []Range {
Range{0x02b0, 0x02c1, 1},
Range{0x02c6, 0x02d1, 1},
@ -75,12 +73,6 @@ var _Lm = []Range {
Range{0xff9f, 0xff9f, 1},
}
// Lower is the set of Unicode lower case letters.
var Lower = Ll
// Ll is the set of Unicode characters in category Ll.
var Ll = _Ll
var _Ll = []Range {
Range{0x0061, 0x007a, 1},
Range{0x00aa, 0x00b5, 11},
@ -221,8 +213,6 @@ var _Ll = []Range {
Range{0x1d7cb, 0x1d7cb, 1},
}
// Me is the set of Unicode characters in category Me.
var Me = _Me
var _Me = []Range {
Range{0x0488, 0x0489, 1},
Range{0x06de, 0x20dd, 6655},
@ -231,8 +221,6 @@ var _Me = []Range {
Range{0xa670, 0xa672, 1},
}
// Mc is the set of Unicode characters in category Mc.
var Mc = _Mc
var _Mc = []Range {
Range{0x0903, 0x093e, 59},
Range{0x093f, 0x0940, 1},
@ -314,8 +302,6 @@ var _Mc = []Range {
Range{0x1d16e, 0x1d172, 1},
}
// Mn is the set of Unicode characters in category Mn.
var Mn = _Mn
var _Mn = []Range {
Range{0x0300, 0x036f, 1},
Range{0x0483, 0x0487, 1},
@ -461,15 +447,10 @@ var _Mn = []Range {
Range{0xe0100, 0xe01ef, 1},
}
// Zl is the set of Unicode characters in category Zl.
var Zl = _Zl
var _Zl = []Range {
Range{0x2028, 0x2028, 1},
}
// Letter is the set of Unicode letters.
var Letter = letter
var letter = []Range {
Range{0x0041, 0x005a, 1},
Range{0x0061, 0x007a, 1},
@ -843,14 +824,11 @@ var letter = []Range {
Range{0x20000, 0x2a6d6, 42710},
Range{0x2f800, 0x2fa1d, 1},
}
// Zp is the set of Unicode characters in category Zp.
var Zp = _Zp
var _Zp = []Range {
Range{0x2029, 0x2029, 1},
}
// Zs is the set of Unicode characters in category Zs.
var Zs = _Zs
var _Zs = []Range {
Range{0x0020, 0x00a0, 128},
Range{0x1680, 0x180e, 398},
@ -859,24 +837,18 @@ var _Zs = []Range {
Range{0x3000, 0x3000, 1},
}
// Cs is the set of Unicode characters in category Cs.
var Cs = _Cs
var _Cs = []Range {
Range{0xd800, 0xdb7f, 895},
Range{0xdb80, 0xdbff, 127},
Range{0xdc00, 0xdfff, 1023},
}
// Co is the set of Unicode characters in category Co.
var Co = _Co
var _Co = []Range {
Range{0xe000, 0xf8ff, 6399},
Range{0xf0000, 0xffffd, 65533},
Range{0x100000, 0x10fffd, 65533},
}
// Cf is the set of Unicode characters in category Cf.
var Cf = _Cf
var _Cf = []Range {
Range{0x00ad, 0x0600, 1363},
Range{0x0601, 0x0603, 1},
@ -893,15 +865,11 @@ var _Cf = []Range {
Range{0xe0021, 0xe007f, 1},
}
// Cc is the set of Unicode characters in category Cc.
var Cc = _Cc
var _Cc = []Range {
Range{0x0001, 0x001f, 1},
Range{0x007f, 0x009f, 1},
}
// Po is the set of Unicode characters in category Po.
var Po = _Po
var _Po = []Range {
Range{0x0021, 0x0023, 1},
Range{0x0025, 0x0027, 1},
@ -999,8 +967,6 @@ var _Po = []Range {
Range{0x12470, 0x12473, 1},
}
// Pi is the set of Unicode characters in category Pi.
var Pi = _Pi
var _Pi = []Range {
Range{0x00ab, 0x2018, 8045},
Range{0x201b, 0x201c, 1},
@ -1010,8 +976,6 @@ var _Pi = []Range {
Range{0x2e1c, 0x2e20, 4},
}
// Pf is the set of Unicode characters in category Pf.
var Pf = _Pf
var _Pf = []Range {
Range{0x00bb, 0x2019, 8030},
Range{0x201d, 0x203a, 29},
@ -1020,8 +984,6 @@ var _Pf = []Range {
Range{0x2e1d, 0x2e21, 4},
}
// Pe is the set of Unicode characters in category Pe.
var Pe = _Pe
var _Pe = []Range {
Range{0x0029, 0x005d, 52},
Range{0x007d, 0x0f3b, 3774},
@ -1046,8 +1008,6 @@ var _Pe = []Range {
Range{0xff5d, 0xff63, 3},
}
// Pd is the set of Unicode characters in category Pd.
var Pd = _Pd
var _Pd = []Range {
Range{0x002d, 0x058a, 1373},
Range{0x05be, 0x1806, 4680},
@ -1059,8 +1019,6 @@ var _Pd = []Range {
Range{0xfe63, 0xff0d, 170},
}
// Pc is the set of Unicode characters in category Pc.
var Pc = _Pc
var _Pc = []Range {
Range{0x005f, 0x203f, 8160},
Range{0x2040, 0x2054, 20},
@ -1069,8 +1027,6 @@ var _Pc = []Range {
Range{0xff3f, 0xff3f, 1},
}
// Ps is the set of Unicode characters in category Ps.
var Ps = _Ps
var _Ps = []Range {
Range{0x0028, 0x005b, 51},
Range{0x007b, 0x0f3a, 3775},
@ -1097,12 +1053,6 @@ var _Ps = []Range {
Range{0xff62, 0xff62, 1},
}
// Digit is the set of Unicode characters with the "decimal digit" property.
var Digit = Nd
// Nd is the set of Unicode characters in category Nd.
var Nd = _Nd
var _Nd = []Range {
Range{0x0030, 0x0039, 1},
Range{0x0660, 0x0669, 1},
@ -1139,8 +1089,6 @@ var _Nd = []Range {
Range{0x1d7ce, 0x1d7ff, 1},
}
// Nl is the set of Unicode characters in category Nl.
var Nl = _Nl
var _Nl = []Range {
Range{0x16ee, 0x16f0, 1},
Range{0x2160, 0x2182, 1},
@ -1154,8 +1102,6 @@ var _Nl = []Range {
Range{0x12400, 0x12462, 1},
}
// No is the set of Unicode characters in category No.
var No = _No
var _No = []Range {
Range{0x00b2, 0x00b3, 1},
Range{0x00b9, 0x00bc, 3},
@ -1189,8 +1135,6 @@ var _No = []Range {
Range{0x1d360, 0x1d371, 1},
}
// So is the set of Unicode characters in category So.
var So = _So
var _So = []Range {
Range{0x00a6, 0x00a7, 1},
Range{0x00a9, 0x00ae, 5},
@ -1315,8 +1259,6 @@ var _So = []Range {
Range{0x1f030, 0x1f093, 1},
}
// Sm is the set of Unicode characters in category Sm.
var Sm = _Sm
var _Sm = []Range {
Range{0x002b, 0x003c, 17},
Range{0x003d, 0x003e, 1},
@ -1370,8 +1312,6 @@ var _Sm = []Range {
Range{0x1d7a9, 0x1d7c3, 26},
}
// Sk is the set of Unicode characters in category Sk.
var Sk = _Sk
var _Sk = []Range {
Range{0x005e, 0x0060, 2},
Range{0x00a8, 0x00af, 7},
@ -1396,8 +1336,6 @@ var _Sk = []Range {
Range{0xffe3, 0xffe3, 1},
}
// Sc is the set of Unicode characters in category Sc.
var Sc = _Sc
var _Sc = []Range {
Range{0x0024, 0x00a2, 126},
Range{0x00a3, 0x00a5, 1},
@ -1412,12 +1350,6 @@ var _Sc = []Range {
Range{0xffe6, 0xffe6, 1},
}
// Upper is the set of Unicode upper case letters.
var Upper = Lu
// Lu is the set of Unicode characters in category Lu.
var Lu = _Lu
var _Lu = []Range {
Range{0x0041, 0x005a, 1},
Range{0x00c0, 0x00d6, 1},
@ -1545,12 +1477,6 @@ var _Lu = []Range {
Range{0x1d7ca, 0x1d7ca, 1},
}
// Title is the set of Unicode title case letters.
var Title = Lt
// Lt is the set of Unicode characters in category Lt.
var Lt = _Lt
var _Lt = []Range {
Range{0x01c5, 0x01cb, 3},
Range{0x01f2, 0x1f88, 7574},
@ -1561,8 +1487,6 @@ var _Lt = []Range {
Range{0x1ffc, 0x1ffc, 1},
}
// Lo is the set of Unicode characters in category Lo.
var Lo = _Lo
var _Lo = []Range {
Range{0x01bb, 0x01c0, 5},
Range{0x01c1, 0x01c3, 1},
@ -1838,6 +1762,43 @@ var _Lo = []Range {
Range{0x2f800, 0x2fa1d, 1},
}
var (
Cc = _Cc; // Cc is the set of Unicode characters in category Cc.
Cf = _Cf; // Cf is the set of Unicode characters in category Cf.
Co = _Co; // Co is the set of Unicode characters in category Co.
Cs = _Cs; // Cs is the set of Unicode characters in category Cs.
Digit = _Nd; // Digit is the set of Unicode characters with the "decimal digit" property.
Nd = _Nd; // Nd is the set of Unicode characters in category Nd.
Letter = letter; // Letter is the set of Unicode letters.
Lm = _Lm; // Lm is the set of Unicode characters in category Lm.
Lo = _Lo; // Lo is the set of Unicode characters in category Lo.
Lower = _Ll; // Lower is the set of Unicode lower case letters.
Ll = _Ll; // Ll is the set of Unicode characters in category Ll.
Mc = _Mc; // Mc is the set of Unicode characters in category Mc.
Me = _Me; // Me is the set of Unicode characters in category Me.
Mn = _Mn; // Mn is the set of Unicode characters in category Mn.
Nl = _Nl; // Nl is the set of Unicode characters in category Nl.
No = _No; // No is the set of Unicode characters in category No.
Pc = _Pc; // Pc is the set of Unicode characters in category Pc.
Pd = _Pd; // Pd is the set of Unicode characters in category Pd.
Pe = _Pe; // Pe is the set of Unicode characters in category Pe.
Pf = _Pf; // Pf is the set of Unicode characters in category Pf.
Pi = _Pi; // Pi is the set of Unicode characters in category Pi.
Po = _Po; // Po is the set of Unicode characters in category Po.
Ps = _Ps; // Ps is the set of Unicode characters in category Ps.
Sc = _Sc; // Sc is the set of Unicode characters in category Sc.
Sk = _Sk; // Sk is the set of Unicode characters in category Sk.
Sm = _Sm; // Sm is the set of Unicode characters in category Sm.
So = _So; // So is the set of Unicode characters in category So.
Title = _Lt; // Title is the set of Unicode title case letters.
Lt = _Lt; // Lt is the set of Unicode characters in category Lt.
Upper = _Lu; // Upper is the set of Unicode upper case letters.
Lu = _Lu; // Lu is the set of Unicode characters in category Lu.
Zl = _Zl; // Zl is the set of Unicode characters in category Zl.
Zp = _Zp; // Zp is the set of Unicode characters in category Zp.
Zs = _Zs; // Zs is the set of Unicode characters in category Zs.
)
// Generated by running
// maketables --scripts=all --url=http://www.unicode.org/Public/5.1.0/ucd/
// DO NOT EDIT
@ -1923,8 +1884,6 @@ var Scripts = map[string] []Range {
"Gothic": Gothic,
}
// Katakana is the set of Unicode characters in script Katakana.
var Katakana = _Katakana
var _Katakana = []Range {
Range{0x30a1, 0x30fa, 1},
Range{0x30fd, 0x30ff, 1},
@ -1935,8 +1894,6 @@ var _Katakana = []Range {
Range{0xff71, 0xff9d, 1},
}
// Malayalam is the set of Unicode characters in script Malayalam.
var Malayalam = _Malayalam
var _Malayalam = []Range {
Range{0x0d02, 0x0d03, 1},
Range{0x0d05, 0x0d0c, 1},
@ -1952,14 +1909,10 @@ var _Malayalam = []Range {
Range{0x0d79, 0x0d7f, 1},
}
// Phags_Pa is the set of Unicode characters in script Phags_Pa.
var Phags_Pa = _Phags_Pa
var _Phags_Pa = []Range {
Range{0xa840, 0xa877, 1},
}
// Latin is the set of Unicode characters in script Latin.
var Latin = _Latin
var _Latin = []Range {
Range{0x0041, 0x005a, 1},
Range{0x0061, 0x007a, 1},
@ -1992,15 +1945,11 @@ var _Latin = []Range {
Range{0xff41, 0xff5a, 1},
}
// Osmanya is the set of Unicode characters in script Osmanya.
var Osmanya = _Osmanya
var _Osmanya = []Range {
Range{0x10480, 0x1049d, 1},
Range{0x104a0, 0x104a9, 1},
}
// Khmer is the set of Unicode characters in script Khmer.
var Khmer = _Khmer
var _Khmer = []Range {
Range{0x1780, 0x17dd, 1},
Range{0x17e0, 0x17e9, 1},
@ -2008,8 +1957,6 @@ var _Khmer = []Range {
Range{0x19e0, 0x19ff, 1},
}
// Inherited is the set of Unicode characters in script Inherited.
var Inherited = _Inherited
var _Inherited = []Range {
Range{0x0300, 0x036f, 1},
Range{0x064b, 0x0655, 1},
@ -2031,8 +1978,6 @@ var _Inherited = []Range {
Range{0xe0100, 0xe01ef, 1},
}
// Telugu is the set of Unicode characters in script Telugu.
var Telugu = _Telugu
var _Telugu = []Range {
Range{0x0c01, 0x0c03, 1},
Range{0x0c05, 0x0c0c, 1},
@ -2050,21 +1995,15 @@ var _Telugu = []Range {
Range{0x0c78, 0x0c7f, 1},
}
// Bopomofo is the set of Unicode characters in script Bopomofo.
var Bopomofo = _Bopomofo
var _Bopomofo = []Range {
Range{0x3105, 0x312d, 1},
Range{0x31a0, 0x31b7, 1},
}
// Kayah_Li is the set of Unicode characters in script Kayah_Li.
var Kayah_Li = _Kayah_Li
var _Kayah_Li = []Range {
Range{0xa900, 0xa92f, 1},
}
// New_Tai_Lue is the set of Unicode characters in script New_Tai_Lue.
var New_Tai_Lue = _New_Tai_Lue
var _New_Tai_Lue = []Range {
Range{0x1980, 0x19a9, 1},
Range{0x19b0, 0x19c9, 1},
@ -2072,15 +2011,11 @@ var _New_Tai_Lue = []Range {
Range{0x19de, 0x19df, 1},
}
// Tai_Le is the set of Unicode characters in script Tai_Le.
var Tai_Le = _Tai_Le
var _Tai_Le = []Range {
Range{0x1950, 0x196d, 1},
Range{0x1970, 0x1974, 1},
}
// Kharoshthi is the set of Unicode characters in script Kharoshthi.
var Kharoshthi = _Kharoshthi
var _Kharoshthi = []Range {
Range{0x10a00, 0x10a03, 1},
Range{0x10a05, 0x10a06, 1},
@ -2092,8 +2027,6 @@ var _Kharoshthi = []Range {
Range{0x10a50, 0x10a58, 1},
}
// Common is the set of Unicode characters in script Common.
var Common = _Common
var _Common = []Range {
Range{0x0000, 0x0040, 1},
Range{0x005b, 0x0060, 1},
@ -2234,8 +2167,6 @@ var _Common = []Range {
Range{0xe0020, 0xe007f, 1},
}
// Kannada is the set of Unicode characters in script Kannada.
var Kannada = _Kannada
var _Kannada = []Range {
Range{0x0c82, 0x0c83, 1},
Range{0x0c85, 0x0c8c, 1},
@ -2252,8 +2183,6 @@ var _Kannada = []Range {
Range{0x0ce6, 0x0cef, 1},
}
// Tamil is the set of Unicode characters in script Tamil.
var Tamil = _Tamil
var _Tamil = []Range {
Range{0x0b82, 0x0b83, 1},
Range{0x0b85, 0x0b8a, 1},
@ -2273,15 +2202,11 @@ var _Tamil = []Range {
Range{0x0be6, 0x0bfa, 1},
}
// Tagalog is the set of Unicode characters in script Tagalog.
var Tagalog = _Tagalog
var _Tagalog = []Range {
Range{0x1700, 0x170c, 1},
Range{0x170e, 0x1714, 1},
}
// Arabic is the set of Unicode characters in script Arabic.
var Arabic = _Arabic
var _Arabic = []Range {
Range{0x0606, 0x060b, 1},
Range{0x060d, 0x061a, 1},
@ -2302,22 +2227,16 @@ var _Arabic = []Range {
Range{0xfe76, 0xfefc, 1},
}
// Tagbanwa is the set of Unicode characters in script Tagbanwa.
var Tagbanwa = _Tagbanwa
var _Tagbanwa = []Range {
Range{0x1760, 0x176c, 1},
Range{0x176e, 0x1770, 1},
Range{0x1772, 0x1773, 1},
}
// Canadian_Aboriginal is the set of Unicode characters in script Canadian_Aboriginal.
var Canadian_Aboriginal = _Canadian_Aboriginal
var _Canadian_Aboriginal = []Range {
Range{0x1401, 0x1676, 1},
}
// Tibetan is the set of Unicode characters in script Tibetan.
var Tibetan = _Tibetan
var _Tibetan = []Range {
Range{0x0f00, 0x0f47, 1},
Range{0x0f49, 0x0f6c, 1},
@ -2328,23 +2247,17 @@ var _Tibetan = []Range {
Range{0x0fce, 0x0fd4, 1},
}
// Coptic is the set of Unicode characters in script Coptic.
var Coptic = _Coptic
var _Coptic = []Range {
Range{0x03e2, 0x03ef, 1},
Range{0x2c80, 0x2cea, 1},
Range{0x2cf9, 0x2cff, 1},
}
// Hiragana is the set of Unicode characters in script Hiragana.
var Hiragana = _Hiragana
var _Hiragana = []Range {
Range{0x3041, 0x3096, 1},
Range{0x309d, 0x309f, 1},
}
// Limbu is the set of Unicode characters in script Limbu.
var Limbu = _Limbu
var _Limbu = []Range {
Range{0x1900, 0x191c, 1},
Range{0x1920, 0x192b, 1},
@ -2353,15 +2266,11 @@ var _Limbu = []Range {
Range{0x1944, 0x194f, 1},
}
// Myanmar is the set of Unicode characters in script Myanmar.
var Myanmar = _Myanmar
var _Myanmar = []Range {
Range{0x1000, 0x1099, 1},
Range{0x109e, 0x109f, 1},
}
// Armenian is the set of Unicode characters in script Armenian.
var Armenian = _Armenian
var _Armenian = []Range {
Range{0x0531, 0x0556, 1},
Range{0x0559, 0x055f, 1},
@ -2370,8 +2279,6 @@ var _Armenian = []Range {
Range{0xfb13, 0xfb17, 1},
}
// Sinhala is the set of Unicode characters in script Sinhala.
var Sinhala = _Sinhala
var _Sinhala = []Range {
Range{0x0d82, 0x0d83, 1},
Range{0x0d85, 0x0d96, 1},
@ -2386,8 +2293,6 @@ var _Sinhala = []Range {
Range{0x0df2, 0x0df4, 1},
}
// Bengali is the set of Unicode characters in script Bengali.
var Bengali = _Bengali
var _Bengali = []Range {
Range{0x0981, 0x0983, 1},
Range{0x0985, 0x098c, 1},
@ -2405,8 +2310,6 @@ var _Bengali = []Range {
Range{0x09e6, 0x09fa, 1},
}
// Greek is the set of Unicode characters in script Greek.
var Greek = _Greek
var _Greek = []Range {
Range{0x0370, 0x0373, 1},
Range{0x0375, 0x0377, 1},
@ -2443,8 +2346,6 @@ var _Greek = []Range {
Range{0x1d200, 0x1d245, 1},
}
// Cham is the set of Unicode characters in script Cham.
var Cham = _Cham
var _Cham = []Range {
Range{0xaa00, 0xaa36, 1},
Range{0xaa40, 0xaa4d, 1},
@ -2452,8 +2353,6 @@ var _Cham = []Range {
Range{0xaa5c, 0xaa5f, 1},
}
// Hebrew is the set of Unicode characters in script Hebrew.
var Hebrew = _Hebrew
var _Hebrew = []Range {
Range{0x0591, 0x05c7, 1},
Range{0x05d0, 0x05ea, 1},
@ -2466,15 +2365,11 @@ var _Hebrew = []Range {
Range{0xfb46, 0xfb4f, 1},
}
// Saurashtra is the set of Unicode characters in script Saurashtra.
var Saurashtra = _Saurashtra
var _Saurashtra = []Range {
Range{0xa880, 0xa8c4, 1},
Range{0xa8ce, 0xa8d9, 1},
}
// Hangul is the set of Unicode characters in script Hangul.
var Hangul = _Hangul
var _Hangul = []Range {
Range{0x1100, 0x1159, 1},
Range{0x115f, 0x11a2, 1},
@ -2490,35 +2385,25 @@ var _Hangul = []Range {
Range{0xffda, 0xffdc, 1},
}
// Runic is the set of Unicode characters in script Runic.
var Runic = _Runic
var _Runic = []Range {
Range{0x16a0, 0x16ea, 1},
Range{0x16ee, 0x16f0, 1},
}
// Deseret is the set of Unicode characters in script Deseret.
var Deseret = _Deseret
var _Deseret = []Range {
Range{0x10400, 0x1044f, 1},
}
// Sundanese is the set of Unicode characters in script Sundanese.
var Sundanese = _Sundanese
var _Sundanese = []Range {
Range{0x1b80, 0x1baa, 1},
Range{0x1bae, 0x1bb9, 1},
}
// Glagolitic is the set of Unicode characters in script Glagolitic.
var Glagolitic = _Glagolitic
var _Glagolitic = []Range {
Range{0x2c00, 0x2c2e, 1},
Range{0x2c30, 0x2c5e, 1},
}
// Oriya is the set of Unicode characters in script Oriya.
var Oriya = _Oriya
var _Oriya = []Range {
Range{0x0b01, 0x0b03, 1},
Range{0x0b05, 0x0b0c, 1},
@ -2536,14 +2421,10 @@ var _Oriya = []Range {
Range{0x0b66, 0x0b71, 1},
}
// Buhid is the set of Unicode characters in script Buhid.
var Buhid = _Buhid
var _Buhid = []Range {
Range{0x1740, 0x1753, 1},
}
// Ethiopic is the set of Unicode characters in script Ethiopic.
var Ethiopic = _Ethiopic
var _Ethiopic = []Range {
Range{0x1200, 0x1248, 1},
Range{0x124a, 0x124d, 1},
@ -2574,40 +2455,28 @@ var _Ethiopic = []Range {
Range{0x2dd8, 0x2dde, 1},
}
// Syloti_Nagri is the set of Unicode characters in script Syloti_Nagri.
var Syloti_Nagri = _Syloti_Nagri
var _Syloti_Nagri = []Range {
Range{0xa800, 0xa82b, 1},
}
// Vai is the set of Unicode characters in script Vai.
var Vai = _Vai
var _Vai = []Range {
Range{0xa500, 0xa62b, 1},
}
// Cherokee is the set of Unicode characters in script Cherokee.
var Cherokee = _Cherokee
var _Cherokee = []Range {
Range{0x13a0, 0x13f4, 1},
}
// Ogham is the set of Unicode characters in script Ogham.
var Ogham = _Ogham
var _Ogham = []Range {
Range{0x1680, 0x169c, 1},
}
// Syriac is the set of Unicode characters in script Syriac.
var Syriac = _Syriac
var _Syriac = []Range {
Range{0x0700, 0x070d, 1},
Range{0x070f, 0x074a, 1},
Range{0x074d, 0x074f, 1},
}
// Gurmukhi is the set of Unicode characters in script Gurmukhi.
var Gurmukhi = _Gurmukhi
var _Gurmukhi = []Range {
Range{0x0a01, 0x0a03, 1},
Range{0x0a05, 0x0a0a, 1},
@ -2627,14 +2496,10 @@ var _Gurmukhi = []Range {
Range{0x0a66, 0x0a75, 1},
}
// Ol_Chiki is the set of Unicode characters in script Ol_Chiki.
var Ol_Chiki = _Ol_Chiki
var _Ol_Chiki = []Range {
Range{0x1c50, 0x1c7f, 1},
}
// Mongolian is the set of Unicode characters in script Mongolian.
var Mongolian = _Mongolian
var _Mongolian = []Range {
Range{0x1800, 0x1801, 1},
Range{0x1804, 0x1804, 1},
@ -2644,14 +2509,10 @@ var _Mongolian = []Range {
Range{0x1880, 0x18aa, 1},
}
// Hanunoo is the set of Unicode characters in script Hanunoo.
var Hanunoo = _Hanunoo
var _Hanunoo = []Range {
Range{0x1720, 0x1734, 1},
}
// Cypriot is the set of Unicode characters in script Cypriot.
var Cypriot = _Cypriot
var _Cypriot = []Range {
Range{0x10800, 0x10805, 1},
Range{0x10808, 0x10808, 1},
@ -2661,51 +2522,37 @@ var _Cypriot = []Range {
Range{0x1083f, 0x1083f, 1},
}
// Buginese is the set of Unicode characters in script Buginese.
var Buginese = _Buginese
var _Buginese = []Range {
Range{0x1a00, 0x1a1b, 1},
Range{0x1a1e, 0x1a1f, 1},
}
// Lepcha is the set of Unicode characters in script Lepcha.
var Lepcha = _Lepcha
var _Lepcha = []Range {
Range{0x1c00, 0x1c37, 1},
Range{0x1c3b, 0x1c49, 1},
Range{0x1c4d, 0x1c4f, 1},
}
// Thaana is the set of Unicode characters in script Thaana.
var Thaana = _Thaana
var _Thaana = []Range {
Range{0x0780, 0x07b1, 1},
}
// Old_Persian is the set of Unicode characters in script Old_Persian.
var Old_Persian = _Old_Persian
var _Old_Persian = []Range {
Range{0x103a0, 0x103c3, 1},
Range{0x103c8, 0x103d5, 1},
}
// Cuneiform is the set of Unicode characters in script Cuneiform.
var Cuneiform = _Cuneiform
var _Cuneiform = []Range {
Range{0x12000, 0x1236e, 1},
Range{0x12400, 0x12462, 1},
Range{0x12470, 0x12473, 1},
}
// Rejang is the set of Unicode characters in script Rejang.
var Rejang = _Rejang
var _Rejang = []Range {
Range{0xa930, 0xa953, 1},
Range{0xa95f, 0xa95f, 1},
}
// Georgian is the set of Unicode characters in script Georgian.
var Georgian = _Georgian
var _Georgian = []Range {
Range{0x10a0, 0x10c5, 1},
Range{0x10d0, 0x10fa, 1},
@ -2713,33 +2560,23 @@ var _Georgian = []Range {
Range{0x2d00, 0x2d25, 1},
}
// Shavian is the set of Unicode characters in script Shavian.
var Shavian = _Shavian
var _Shavian = []Range {
Range{0x10450, 0x1047f, 1},
}
// Lycian is the set of Unicode characters in script Lycian.
var Lycian = _Lycian
var _Lycian = []Range {
Range{0x10280, 0x1029c, 1},
}
// Nko is the set of Unicode characters in script Nko.
var Nko = _Nko
var _Nko = []Range {
Range{0x07c0, 0x07fa, 1},
}
// Yi is the set of Unicode characters in script Yi.
var Yi = _Yi
var _Yi = []Range {
Range{0xa000, 0xa48c, 1},
Range{0xa490, 0xa4c6, 1},
}
// Lao is the set of Unicode characters in script Lao.
var Lao = _Lao
var _Lao = []Range {
Range{0x0e81, 0x0e82, 1},
Range{0x0e84, 0x0e84, 1},
@ -2761,8 +2598,6 @@ var _Lao = []Range {
Range{0x0edc, 0x0edd, 1},
}
// Linear_B is the set of Unicode characters in script Linear_B.
var Linear_B = _Linear_B
var _Linear_B = []Range {
Range{0x10000, 0x1000b, 1},
Range{0x1000d, 0x10026, 1},
@ -2773,15 +2608,11 @@ var _Linear_B = []Range {
Range{0x10080, 0x100fa, 1},
}
// Old_Italic is the set of Unicode characters in script Old_Italic.
var Old_Italic = _Old_Italic
var _Old_Italic = []Range {
Range{0x10300, 0x1031e, 1},
Range{0x10320, 0x10323, 1},
}
// Devanagari is the set of Unicode characters in script Devanagari.
var Devanagari = _Devanagari
var _Devanagari = []Range {
Range{0x0901, 0x0939, 1},
Range{0x093c, 0x094d, 1},
@ -2793,36 +2624,26 @@ var _Devanagari = []Range {
Range{0x097b, 0x097f, 1},
}
// Lydian is the set of Unicode characters in script Lydian.
var Lydian = _Lydian
var _Lydian = []Range {
Range{0x10920, 0x10939, 1},
Range{0x1093f, 0x1093f, 1},
}
// Tifinagh is the set of Unicode characters in script Tifinagh.
var Tifinagh = _Tifinagh
var _Tifinagh = []Range {
Range{0x2d30, 0x2d65, 1},
Range{0x2d6f, 0x2d6f, 1},
}
// Ugaritic is the set of Unicode characters in script Ugaritic.
var Ugaritic = _Ugaritic
var _Ugaritic = []Range {
Range{0x10380, 0x1039d, 1},
Range{0x1039f, 0x1039f, 1},
}
// Thai is the set of Unicode characters in script Thai.
var Thai = _Thai
var _Thai = []Range {
Range{0x0e01, 0x0e3a, 1},
Range{0x0e40, 0x0e5b, 1},
}
// Cyrillic is the set of Unicode characters in script Cyrillic.
var Cyrillic = _Cyrillic
var _Cyrillic = []Range {
Range{0x0400, 0x0523, 1},
Range{0x1d2b, 0x1d2b, 1},
@ -2833,8 +2654,6 @@ var _Cyrillic = []Range {
Range{0xa67c, 0xa697, 1},
}
// Gujarati is the set of Unicode characters in script Gujarati.
var Gujarati = _Gujarati
var _Gujarati = []Range {
Range{0x0a81, 0x0a83, 1},
Range{0x0a85, 0x0a8d, 1},
@ -2852,34 +2671,24 @@ var _Gujarati = []Range {
Range{0x0af1, 0x0af1, 1},
}
// Carian is the set of Unicode characters in script Carian.
var Carian = _Carian
var _Carian = []Range {
Range{0x102a0, 0x102d0, 1},
}
// Phoenician is the set of Unicode characters in script Phoenician.
var Phoenician = _Phoenician
var _Phoenician = []Range {
Range{0x10900, 0x10919, 1},
Range{0x1091f, 0x1091f, 1},
}
// Balinese is the set of Unicode characters in script Balinese.
var Balinese = _Balinese
var _Balinese = []Range {
Range{0x1b00, 0x1b4b, 1},
Range{0x1b50, 0x1b7c, 1},
}
// Braille is the set of Unicode characters in script Braille.
var Braille = _Braille
var _Braille = []Range {
Range{0x2800, 0x28ff, 1},
}
// Han is the set of Unicode characters in script Han.
var Han = _Han
var _Han = []Range {
Range{0x2e80, 0x2e99, 1},
Range{0x2e9b, 0x2ef3, 1},
@ -2897,9 +2706,87 @@ var _Han = []Range {
Range{0x2f800, 0x2fa1d, 1},
}
// Gothic is the set of Unicode characters in script Gothic.
var Gothic = _Gothic
var _Gothic = []Range {
Range{0x10330, 0x1034a, 1},
}
var (
Arabic = _Arabic; // Arabic is the set of Unicode characters in script Arabic.
Armenian = _Armenian; // Armenian is the set of Unicode characters in script Armenian.
Balinese = _Balinese; // Balinese is the set of Unicode characters in script Balinese.
Bengali = _Bengali; // Bengali is the set of Unicode characters in script Bengali.
Bopomofo = _Bopomofo; // Bopomofo is the set of Unicode characters in script Bopomofo.
Braille = _Braille; // Braille is the set of Unicode characters in script Braille.
Buginese = _Buginese; // Buginese is the set of Unicode characters in script Buginese.
Buhid = _Buhid; // Buhid is the set of Unicode characters in script Buhid.
Canadian_Aboriginal = _Canadian_Aboriginal; // Canadian_Aboriginal is the set of Unicode characters in script Canadian_Aboriginal.
Carian = _Carian; // Carian is the set of Unicode characters in script Carian.
Cham = _Cham; // Cham is the set of Unicode characters in script Cham.
Cherokee = _Cherokee; // Cherokee is the set of Unicode characters in script Cherokee.
Common = _Common; // Common is the set of Unicode characters in script Common.
Coptic = _Coptic; // Coptic is the set of Unicode characters in script Coptic.
Cuneiform = _Cuneiform; // Cuneiform is the set of Unicode characters in script Cuneiform.
Cypriot = _Cypriot; // Cypriot is the set of Unicode characters in script Cypriot.
Cyrillic = _Cyrillic; // Cyrillic is the set of Unicode characters in script Cyrillic.
Deseret = _Deseret; // Deseret is the set of Unicode characters in script Deseret.
Devanagari = _Devanagari; // Devanagari is the set of Unicode characters in script Devanagari.
Ethiopic = _Ethiopic; // Ethiopic is the set of Unicode characters in script Ethiopic.
Georgian = _Georgian; // Georgian is the set of Unicode characters in script Georgian.
Glagolitic = _Glagolitic; // Glagolitic is the set of Unicode characters in script Glagolitic.
Gothic = _Gothic; // Gothic is the set of Unicode characters in script Gothic.
Greek = _Greek; // Greek is the set of Unicode characters in script Greek.
Gujarati = _Gujarati; // Gujarati is the set of Unicode characters in script Gujarati.
Gurmukhi = _Gurmukhi; // Gurmukhi is the set of Unicode characters in script Gurmukhi.
Han = _Han; // Han is the set of Unicode characters in script Han.
Hangul = _Hangul; // Hangul is the set of Unicode characters in script Hangul.
Hanunoo = _Hanunoo; // Hanunoo is the set of Unicode characters in script Hanunoo.
Hebrew = _Hebrew; // Hebrew is the set of Unicode characters in script Hebrew.
Hiragana = _Hiragana; // Hiragana is the set of Unicode characters in script Hiragana.
Inherited = _Inherited; // Inherited is the set of Unicode characters in script Inherited.
Kannada = _Kannada; // Kannada is the set of Unicode characters in script Kannada.
Katakana = _Katakana; // Katakana is the set of Unicode characters in script Katakana.
Kayah_Li = _Kayah_Li; // Kayah_Li is the set of Unicode characters in script Kayah_Li.
Kharoshthi = _Kharoshthi; // Kharoshthi is the set of Unicode characters in script Kharoshthi.
Khmer = _Khmer; // Khmer is the set of Unicode characters in script Khmer.
Lao = _Lao; // Lao is the set of Unicode characters in script Lao.
Latin = _Latin; // Latin is the set of Unicode characters in script Latin.
Lepcha = _Lepcha; // Lepcha is the set of Unicode characters in script Lepcha.
Limbu = _Limbu; // Limbu is the set of Unicode characters in script Limbu.
Linear_B = _Linear_B; // Linear_B is the set of Unicode characters in script Linear_B.
Lycian = _Lycian; // Lycian is the set of Unicode characters in script Lycian.
Lydian = _Lydian; // Lydian is the set of Unicode characters in script Lydian.
Malayalam = _Malayalam; // Malayalam is the set of Unicode characters in script Malayalam.
Mongolian = _Mongolian; // Mongolian is the set of Unicode characters in script Mongolian.
Myanmar = _Myanmar; // Myanmar is the set of Unicode characters in script Myanmar.
New_Tai_Lue = _New_Tai_Lue; // New_Tai_Lue is the set of Unicode characters in script New_Tai_Lue.
Nko = _Nko; // Nko is the set of Unicode characters in script Nko.
Ogham = _Ogham; // Ogham is the set of Unicode characters in script Ogham.
Ol_Chiki = _Ol_Chiki; // Ol_Chiki is the set of Unicode characters in script Ol_Chiki.
Old_Italic = _Old_Italic; // Old_Italic is the set of Unicode characters in script Old_Italic.
Old_Persian = _Old_Persian; // Old_Persian is the set of Unicode characters in script Old_Persian.
Oriya = _Oriya; // Oriya is the set of Unicode characters in script Oriya.
Osmanya = _Osmanya; // Osmanya is the set of Unicode characters in script Osmanya.
Phags_Pa = _Phags_Pa; // Phags_Pa is the set of Unicode characters in script Phags_Pa.
Phoenician = _Phoenician; // Phoenician is the set of Unicode characters in script Phoenician.
Rejang = _Rejang; // Rejang is the set of Unicode characters in script Rejang.
Runic = _Runic; // Runic is the set of Unicode characters in script Runic.
Saurashtra = _Saurashtra; // Saurashtra is the set of Unicode characters in script Saurashtra.
Shavian = _Shavian; // Shavian is the set of Unicode characters in script Shavian.
Sinhala = _Sinhala; // Sinhala is the set of Unicode characters in script Sinhala.
Sundanese = _Sundanese; // Sundanese is the set of Unicode characters in script Sundanese.
Syloti_Nagri = _Syloti_Nagri; // Syloti_Nagri is the set of Unicode characters in script Syloti_Nagri.
Syriac = _Syriac; // Syriac is the set of Unicode characters in script Syriac.
Tagalog = _Tagalog; // Tagalog is the set of Unicode characters in script Tagalog.
Tagbanwa = _Tagbanwa; // Tagbanwa is the set of Unicode characters in script Tagbanwa.
Tai_Le = _Tai_Le; // Tai_Le is the set of Unicode characters in script Tai_Le.
Tamil = _Tamil; // Tamil is the set of Unicode characters in script Tamil.
Telugu = _Telugu; // Telugu is the set of Unicode characters in script Telugu.
Thaana = _Thaana; // Thaana is the set of Unicode characters in script Thaana.
Thai = _Thai; // Thai is the set of Unicode characters in script Thai.
Tibetan = _Tibetan; // Tibetan is the set of Unicode characters in script Tibetan.
Tifinagh = _Tifinagh; // Tifinagh is the set of Unicode characters in script Tifinagh.
Ugaritic = _Ugaritic; // Ugaritic is the set of Unicode characters in script Ugaritic.
Vai = _Vai; // Vai is the set of Unicode characters in script Vai.
Yi = _Yi; // Yi is the set of Unicode characters in script Yi.
)