diff --git a/src/regexp/exec_test.go b/src/regexp/exec_test.go index cfc1e147c1..f8f5f4020e 100644 --- a/src/regexp/exec_test.go +++ b/src/regexp/exec_test.go @@ -672,6 +672,7 @@ func benchmark(b *testing.B, re string, n int) { const ( easy0 = "ABCDEFGHIJKLMNOPQRSTUVWXYZ$" + easy0i = "(?i)ABCDEFGHIJklmnopqrstuvwxyz$" easy1 = "A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$" medium = "[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$" hard = "[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$" @@ -682,6 +683,11 @@ func BenchmarkMatchEasy0_1K(b *testing.B) { benchmark(b, easy0, 1<<10) } func BenchmarkMatchEasy0_32K(b *testing.B) { benchmark(b, easy0, 32<<10) } func BenchmarkMatchEasy0_1M(b *testing.B) { benchmark(b, easy0, 1<<20) } func BenchmarkMatchEasy0_32M(b *testing.B) { benchmark(b, easy0, 32<<20) } +func BenchmarkMatchEasy0i_32(b *testing.B) { benchmark(b, easy0i, 32<<0) } +func BenchmarkMatchEasy0i_1K(b *testing.B) { benchmark(b, easy0i, 1<<10) } +func BenchmarkMatchEasy0i_32K(b *testing.B) { benchmark(b, easy0i, 32<<10) } +func BenchmarkMatchEasy0i_1M(b *testing.B) { benchmark(b, easy0i, 1<<20) } +func BenchmarkMatchEasy0i_32M(b *testing.B) { benchmark(b, easy0i, 32<<20) } func BenchmarkMatchEasy1_32(b *testing.B) { benchmark(b, easy1, 32<<0) } func BenchmarkMatchEasy1_1K(b *testing.B) { benchmark(b, easy1, 1<<10) } func BenchmarkMatchEasy1_32K(b *testing.B) { benchmark(b, easy1, 32<<10) } diff --git a/src/unicode/letter.go b/src/unicode/letter.go index ffa083eb57..8aec920d22 100644 --- a/src/unicode/letter.go +++ b/src/unicode/letter.go @@ -332,6 +332,10 @@ type foldPair struct { // SimpleFold('1') = '1' // func SimpleFold(r rune) rune { + if int(r) < len(asciiFold) { + return rune(asciiFold[r]) + } + // Consult caseOrbit table for special cases. lo := 0 hi := len(caseOrbit) diff --git a/src/unicode/maketables.go b/src/unicode/maketables.go index 328c75ed63..f364515c90 100644 --- a/src/unicode/maketables.go +++ b/src/unicode/maketables.go @@ -1172,6 +1172,7 @@ func printCasefold() { } } + printAsciiFold() printCaseOrbit() // Tables of category and script folding exceptions: code points @@ -1269,6 +1270,25 @@ var comment = map[string]string{ "// If there is no entry for a script name, there are no such points.\n", } +func printAsciiFold() { + printf("var asciiFold = [MaxASCII + 1]uint16{\n") + for i := rune(0); i <= unicode.MaxASCII; i++ { + c := chars[i] + f := c.caseOrbit + if f == 0 { + if c.lowerCase != i && c.lowerCase != 0 { + f = c.lowerCase + } else if c.upperCase != i && c.upperCase != 0 { + f = c.upperCase + } else { + f = i + } + } + printf("\t0x%04X,\n", f) + } + printf("}\n\n") +} + func printCaseOrbit() { if *test { for j := range chars { diff --git a/src/unicode/tables.go b/src/unicode/tables.go index 8bb42062f9..c04d69a6ff 100644 --- a/src/unicode/tables.go +++ b/src/unicode/tables.go @@ -6834,6 +6834,137 @@ var properties = [MaxLatin1 + 1]uint8{ 0xFF: pLl | pp, // 'ΓΏ' } +var asciiFold = [MaxASCII + 1]uint16{ + 0x0000, + 0x0001, + 0x0002, + 0x0003, + 0x0004, + 0x0005, + 0x0006, + 0x0007, + 0x0008, + 0x0009, + 0x000A, + 0x000B, + 0x000C, + 0x000D, + 0x000E, + 0x000F, + 0x0010, + 0x0011, + 0x0012, + 0x0013, + 0x0014, + 0x0015, + 0x0016, + 0x0017, + 0x0018, + 0x0019, + 0x001A, + 0x001B, + 0x001C, + 0x001D, + 0x001E, + 0x001F, + 0x0020, + 0x0021, + 0x0022, + 0x0023, + 0x0024, + 0x0025, + 0x0026, + 0x0027, + 0x0028, + 0x0029, + 0x002A, + 0x002B, + 0x002C, + 0x002D, + 0x002E, + 0x002F, + 0x0030, + 0x0031, + 0x0032, + 0x0033, + 0x0034, + 0x0035, + 0x0036, + 0x0037, + 0x0038, + 0x0039, + 0x003A, + 0x003B, + 0x003C, + 0x003D, + 0x003E, + 0x003F, + 0x0040, + 0x0061, + 0x0062, + 0x0063, + 0x0064, + 0x0065, + 0x0066, + 0x0067, + 0x0068, + 0x0069, + 0x006A, + 0x006B, + 0x006C, + 0x006D, + 0x006E, + 0x006F, + 0x0070, + 0x0071, + 0x0072, + 0x0073, + 0x0074, + 0x0075, + 0x0076, + 0x0077, + 0x0078, + 0x0079, + 0x007A, + 0x005B, + 0x005C, + 0x005D, + 0x005E, + 0x005F, + 0x0060, + 0x0041, + 0x0042, + 0x0043, + 0x0044, + 0x0045, + 0x0046, + 0x0047, + 0x0048, + 0x0049, + 0x004A, + 0x212A, + 0x004C, + 0x004D, + 0x004E, + 0x004F, + 0x0050, + 0x0051, + 0x0052, + 0x017F, + 0x0054, + 0x0055, + 0x0056, + 0x0057, + 0x0058, + 0x0059, + 0x005A, + 0x007B, + 0x007C, + 0x007D, + 0x007E, + 0x007F, +} + var caseOrbit = []foldPair{ {0x004B, 0x006B}, {0x0053, 0x0073},