1
0
mirror of https://github.com/golang/go synced 2024-11-21 23:24:41 -07:00

strconv: add table-based isPrint

Not used yet for simpler merge.

R=golang-dev, r
CC=golang-dev
https://golang.org/cl/5756048
This commit is contained in:
Russ Cox 2012-03-06 00:36:12 -05:00
parent 9ff00c8fc9
commit e9d5a641d7
3 changed files with 734 additions and 0 deletions

521
src/pkg/strconv/isprint.go Normal file
View File

@ -0,0 +1,521 @@
// DO NOT EDIT. GENERATED BY
// go run makeisprint.go >x && mv x isprint.go
package strconv
// (474+134)*2 + (180+42)*4 = 2104 bytes
var isPrint16 = []uint16{
0x0020, 0x007e,
0x00a1, 0x0377,
0x037a, 0x037e,
0x0384, 0x0527,
0x0531, 0x0556,
0x0559, 0x058a,
0x0591, 0x05c7,
0x05d0, 0x05ea,
0x05f0, 0x05f4,
0x0606, 0x061b,
0x061e, 0x070d,
0x0710, 0x074a,
0x074d, 0x07b1,
0x07c0, 0x07fa,
0x0800, 0x082d,
0x0830, 0x085b,
0x085e, 0x085e,
0x0900, 0x098c,
0x098f, 0x0990,
0x0993, 0x09b2,
0x09b6, 0x09b9,
0x09bc, 0x09c4,
0x09c7, 0x09c8,
0x09cb, 0x09ce,
0x09d7, 0x09d7,
0x09dc, 0x09e3,
0x09e6, 0x09fb,
0x0a01, 0x0a0a,
0x0a0f, 0x0a10,
0x0a13, 0x0a39,
0x0a3c, 0x0a42,
0x0a47, 0x0a48,
0x0a4b, 0x0a4d,
0x0a51, 0x0a51,
0x0a59, 0x0a5e,
0x0a66, 0x0a75,
0x0a81, 0x0ab9,
0x0abc, 0x0acd,
0x0ad0, 0x0ad0,
0x0ae0, 0x0ae3,
0x0ae6, 0x0af1,
0x0b01, 0x0b0c,
0x0b0f, 0x0b10,
0x0b13, 0x0b39,
0x0b3c, 0x0b44,
0x0b47, 0x0b48,
0x0b4b, 0x0b4d,
0x0b56, 0x0b57,
0x0b5c, 0x0b63,
0x0b66, 0x0b77,
0x0b82, 0x0b8a,
0x0b8e, 0x0b95,
0x0b99, 0x0b9f,
0x0ba3, 0x0ba4,
0x0ba8, 0x0baa,
0x0bae, 0x0bb9,
0x0bbe, 0x0bc2,
0x0bc6, 0x0bcd,
0x0bd0, 0x0bd0,
0x0bd7, 0x0bd7,
0x0be6, 0x0bfa,
0x0c01, 0x0c39,
0x0c3d, 0x0c4d,
0x0c55, 0x0c59,
0x0c60, 0x0c63,
0x0c66, 0x0c6f,
0x0c78, 0x0c7f,
0x0c82, 0x0cb9,
0x0cbc, 0x0ccd,
0x0cd5, 0x0cd6,
0x0cde, 0x0ce3,
0x0ce6, 0x0cf2,
0x0d02, 0x0d3a,
0x0d3d, 0x0d4e,
0x0d57, 0x0d57,
0x0d60, 0x0d63,
0x0d66, 0x0d75,
0x0d79, 0x0d7f,
0x0d82, 0x0d96,
0x0d9a, 0x0dbd,
0x0dc0, 0x0dc6,
0x0dca, 0x0dca,
0x0dcf, 0x0ddf,
0x0df2, 0x0df4,
0x0e01, 0x0e3a,
0x0e3f, 0x0e5b,
0x0e81, 0x0e84,
0x0e87, 0x0e8a,
0x0e8d, 0x0e8d,
0x0e94, 0x0ea7,
0x0eaa, 0x0ebd,
0x0ec0, 0x0ecd,
0x0ed0, 0x0ed9,
0x0edc, 0x0edd,
0x0f00, 0x0f6c,
0x0f71, 0x0fda,
0x1000, 0x10c5,
0x10d0, 0x10fc,
0x1100, 0x124d,
0x1250, 0x125d,
0x1260, 0x128d,
0x1290, 0x12b5,
0x12b8, 0x12c5,
0x12c8, 0x1315,
0x1318, 0x135a,
0x135d, 0x137c,
0x1380, 0x1399,
0x13a0, 0x13f4,
0x1400, 0x169c,
0x16a0, 0x16f0,
0x1700, 0x1714,
0x1720, 0x1736,
0x1740, 0x1753,
0x1760, 0x1773,
0x1780, 0x17b3,
0x17b6, 0x17dd,
0x17e0, 0x17e9,
0x17f0, 0x17f9,
0x1800, 0x180d,
0x1810, 0x1819,
0x1820, 0x1877,
0x1880, 0x18aa,
0x18b0, 0x18f5,
0x1900, 0x191c,
0x1920, 0x192b,
0x1930, 0x193b,
0x1940, 0x1940,
0x1944, 0x196d,
0x1970, 0x1974,
0x1980, 0x19ab,
0x19b0, 0x19c9,
0x19d0, 0x19da,
0x19de, 0x1a1b,
0x1a1e, 0x1a7c,
0x1a7f, 0x1a89,
0x1a90, 0x1a99,
0x1aa0, 0x1aad,
0x1b00, 0x1b4b,
0x1b50, 0x1b7c,
0x1b80, 0x1baa,
0x1bae, 0x1bb9,
0x1bc0, 0x1bf3,
0x1bfc, 0x1c37,
0x1c3b, 0x1c49,
0x1c4d, 0x1c7f,
0x1cd0, 0x1cf2,
0x1d00, 0x1de6,
0x1dfc, 0x1f15,
0x1f18, 0x1f1d,
0x1f20, 0x1f45,
0x1f48, 0x1f4d,
0x1f50, 0x1f7d,
0x1f80, 0x1fd3,
0x1fd6, 0x1fef,
0x1ff2, 0x1ffe,
0x2010, 0x2027,
0x2030, 0x205e,
0x2070, 0x2071,
0x2074, 0x209c,
0x20a0, 0x20b9,
0x20d0, 0x20f0,
0x2100, 0x2189,
0x2190, 0x23f3,
0x2400, 0x2426,
0x2440, 0x244a,
0x2460, 0x2b4c,
0x2b50, 0x2b59,
0x2c00, 0x2cf1,
0x2cf9, 0x2d25,
0x2d30, 0x2d65,
0x2d6f, 0x2d70,
0x2d7f, 0x2d96,
0x2da0, 0x2e31,
0x2e80, 0x2ef3,
0x2f00, 0x2fd5,
0x2ff0, 0x2ffb,
0x3001, 0x3096,
0x3099, 0x30ff,
0x3105, 0x312d,
0x3131, 0x31ba,
0x31c0, 0x31e3,
0x31f0, 0x4db5,
0x4dc0, 0x9fcb,
0xa000, 0xa48c,
0xa490, 0xa4c6,
0xa4d0, 0xa62b,
0xa640, 0xa673,
0xa67c, 0xa697,
0xa6a0, 0xa6f7,
0xa700, 0xa791,
0xa7a0, 0xa7a9,
0xa7fa, 0xa82b,
0xa830, 0xa839,
0xa840, 0xa877,
0xa880, 0xa8c4,
0xa8ce, 0xa8d9,
0xa8e0, 0xa8fb,
0xa900, 0xa953,
0xa95f, 0xa97c,
0xa980, 0xa9d9,
0xa9de, 0xa9df,
0xaa00, 0xaa36,
0xaa40, 0xaa4d,
0xaa50, 0xaa59,
0xaa5c, 0xaa7b,
0xaa80, 0xaac2,
0xaadb, 0xaadf,
0xab01, 0xab06,
0xab09, 0xab0e,
0xab11, 0xab16,
0xab20, 0xab2e,
0xabc0, 0xabed,
0xabf0, 0xabf9,
0xac00, 0xd7a3,
0xd7b0, 0xd7c6,
0xd7cb, 0xd7fb,
0xf900, 0xfa2d,
0xfa30, 0xfa6d,
0xfa70, 0xfad9,
0xfb00, 0xfb06,
0xfb13, 0xfb17,
0xfb1d, 0xfbc1,
0xfbd3, 0xfd3f,
0xfd50, 0xfd8f,
0xfd92, 0xfdc7,
0xfdf0, 0xfdfd,
0xfe00, 0xfe19,
0xfe20, 0xfe26,
0xfe30, 0xfe6b,
0xfe70, 0xfefc,
0xff01, 0xffbe,
0xffc2, 0xffc7,
0xffca, 0xffcf,
0xffd2, 0xffd7,
0xffda, 0xffdc,
0xffe0, 0xffee,
0xfffc, 0xfffd,
}
var isNotPrint16 = []uint16{
0x00ad,
0x038b,
0x038d,
0x03a2,
0x0560,
0x0588,
0x06dd,
0x083f,
0x0978,
0x0980,
0x0984,
0x09a9,
0x09b1,
0x09de,
0x0a04,
0x0a29,
0x0a31,
0x0a34,
0x0a37,
0x0a3d,
0x0a5d,
0x0a84,
0x0a8e,
0x0a92,
0x0aa9,
0x0ab1,
0x0ab4,
0x0ac6,
0x0aca,
0x0af0,
0x0b04,
0x0b29,
0x0b31,
0x0b34,
0x0b5e,
0x0b84,
0x0b91,
0x0b9b,
0x0b9d,
0x0bc9,
0x0c04,
0x0c0d,
0x0c11,
0x0c29,
0x0c34,
0x0c45,
0x0c49,
0x0c57,
0x0c84,
0x0c8d,
0x0c91,
0x0ca9,
0x0cb4,
0x0cc5,
0x0cc9,
0x0cdf,
0x0cf0,
0x0d04,
0x0d0d,
0x0d11,
0x0d45,
0x0d49,
0x0d84,
0x0db2,
0x0dbc,
0x0dd5,
0x0dd7,
0x0e83,
0x0e89,
0x0e98,
0x0ea0,
0x0ea4,
0x0ea6,
0x0eac,
0x0eba,
0x0ec5,
0x0ec7,
0x0f48,
0x0f98,
0x0fbd,
0x0fcd,
0x1249,
0x1257,
0x1259,
0x1289,
0x12b1,
0x12bf,
0x12c1,
0x12d7,
0x1311,
0x1680,
0x170d,
0x176d,
0x1771,
0x1a5f,
0x1f58,
0x1f5a,
0x1f5c,
0x1f5e,
0x1fb5,
0x1fc5,
0x1fdc,
0x1ff5,
0x208f,
0x2700,
0x27cb,
0x27cd,
0x2c2f,
0x2c5f,
0x2da7,
0x2daf,
0x2db7,
0x2dbf,
0x2dc7,
0x2dcf,
0x2dd7,
0x2ddf,
0x2e9a,
0x3040,
0x318f,
0x321f,
0x32ff,
0xa78f,
0xa9ce,
0xab27,
0xfb37,
0xfb3d,
0xfb3f,
0xfb42,
0xfb45,
0xfe53,
0xfe67,
0xfe75,
0xffe7,
}
var isPrint32 = []uint32{
0x000020, 0x00007e,
0x0000a1, 0x000377,
0x00037a, 0x00037e,
0x000384, 0x000527,
0x000531, 0x000556,
0x000559, 0x00058a,
0x000591, 0x0005c7,
0x0005d0, 0x0005ea,
0x0005f0, 0x0005f4,
0x000606, 0x00061b,
0x00061e, 0x00070d,
0x000710, 0x00074a,
0x00074d, 0x0007b1,
0x0007c0, 0x0007fa,
0x000800, 0x00082d,
0x000830, 0x00085b,
0x00085e, 0x00085e,
0x000900, 0x00098c,
0x00098f, 0x000990,
0x000993, 0x0009b2,
0x0009b6, 0x0009b9,
0x0009bc, 0x0009c4,
0x0009c7, 0x0009c8,
0x0009cb, 0x0009ce,
0x0009d7, 0x0009d7,
0x0009dc, 0x0009e3,
0x0009e6, 0x0009fb,
0x000a01, 0x000a0a,
0x000a0f, 0x000a10,
0x000a13, 0x000a39,
0x000a3c, 0x000a42,
0x000a47, 0x000a48,
0x000a4b, 0x000a4d,
0x000a51, 0x000a51,
0x000a59, 0x000a5e,
0x000a66, 0x000a75,
0x000a81, 0x000ab9,
0x000abc, 0x000acd,
0x000ad0, 0x000ad0,
0x000ae0, 0x000ae3,
0x000ae6, 0x000af1,
0x000b01, 0x000b0c,
0x000b0f, 0x000b10,
0x000b13, 0x000b39,
0x000b3c, 0x000b44,
0x000b47, 0x000b48,
0x000b4b, 0x000b4d,
0x000b56, 0x000b57,
0x000b5c, 0x000b63,
0x000b66, 0x000b77,
0x000b82, 0x000b8a,
0x000b8e, 0x000b95,
0x000b99, 0x000b9f,
0x000ba3, 0x000ba4,
0x000ba8, 0x000baa,
0x000bae, 0x000bb9,
0x000bbe, 0x000bc2,
0x000bc6, 0x000bcd,
0x000bd0, 0x000bd0,
0x000bd7, 0x000bd7,
0x000be6, 0x000bfa,
0x000c01, 0x000c39,
0x000c3d, 0x000c4d,
0x000c55, 0x000c59,
0x000c60, 0x000c63,
0x000c66, 0x000c6f,
0x000c78, 0x000c7f,
0x000c82, 0x000cb9,
0x000cbc, 0x000ccd,
0x000cd5, 0x000cd6,
0x000cde, 0x000ce3,
0x000ce6, 0x000cf2,
0x000d02, 0x000d3a,
0x000d3d, 0x000d4e,
0x000d57, 0x000d57,
0x000d60, 0x000d63,
0x000d66, 0x000d75,
0x000d79, 0x000d7f,
0x000d82, 0x000d96,
0x000d9a, 0x000dbd,
0x000dc0, 0x000dc6,
0x000dca, 0x000dca,
0x000dcf, 0x000ddf,
0x000df2, 0x000df4,
0x000e01, 0x000e3a,
0x000e3f, 0x000e5b,
0x000e81, 0x000e84,
0x000e87, 0x000e8a,
0x000e8d, 0x000e8d,
0x000e94, 0x000ea7,
}
var isNotPrint32 = []uint32{
0x1000c,
0x10027,
0x1003b,
0x1003e,
0x1031f,
0x1039e,
0x10809,
0x10836,
0x10856,
0x10a04,
0x10a14,
0x10a18,
0x110bd,
0x1d455,
0x1d49d,
0x1d4ad,
0x1d4ba,
0x1d4bc,
0x1d4c4,
0x1d506,
0x1d515,
0x1d51d,
0x1d53a,
0x1d53f,
0x1d545,
0x1d551,
0x1f0d0,
0x1f12f,
0x1f336,
0x1f3c5,
0x1f43f,
0x1f441,
0x1f4f8,
0x1f600,
0x1f611,
0x1f615,
0x1f617,
0x1f619,
0x1f61b,
0x1f61f,
0x1f62c,
0x1f634,
}

View File

@ -0,0 +1,157 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
// makeisprint generates the tables for strconv's compact isPrint.
package main
import (
"fmt"
"unicode"
)
var (
range16 []uint16
except16 []uint16
range32 []uint32
except32 []uint32
)
// bsearch16 returns the smallest i such that a[i] >= x.
// If there is no such i, bsearch16 returns len(a).
func bsearch16(a []uint16, x uint16) int {
i, j := 0, len(a)
for i < j {
h := i + (j-i)/2
if a[h] < x {
i = h + 1
} else {
j = h
}
}
return i
}
// bsearch32 returns the smallest i such that a[i] >= x.
// If there is no such i, bsearch32 returns len(a).
func bsearch32(a []uint32, x uint32) int {
i, j := 0, len(a)
for i < j {
h := i + (j-i)/2
if a[h] < x {
i = h + 1
} else {
j = h
}
}
return i
}
func isPrint(r rune) bool {
// Same algorithm, either on uint16 or uint32 value.
// First, find first i such that rang[i] >= x.
// This is the index of either the start or end of a pair that might span x.
// The start is even (rang[i&^1]) and the end is odd (rang[i|1]).
// If we find x in a range, make sure x is not in exception list.
if 0 <= r && r < 1<<16 {
rr, rang, except := uint16(r), range16, except16
i := bsearch16(rang, rr)
if i >= len(rang) || rr < rang[i&^1] || rang[i|1] < rr {
return false
}
j := bsearch16(except, rr)
return j >= len(except) || except[j] != rr
}
rr, rang, except := uint32(r), range32, except32
i := bsearch32(rang, rr)
if i >= len(rang) || rr < rang[i&^1] || rang[i|1] < rr {
return false
}
j := bsearch32(except, rr)
return j >= len(except) || except[j] != rr
}
func scan(min, max rune) (rang, except []uint32) {
lo := rune(-1)
for i := min; ; i++ {
if (i > max || !unicode.IsPrint(i)) && lo >= 0 {
// End range, but avoid flip flop.
if i+1 <= max && unicode.IsPrint(i+1) {
except = append(except, uint32(i))
continue
}
rang = append(rang, uint32(lo), uint32(i-1))
lo = -1
}
if i > max {
break
}
if lo < 0 && unicode.IsPrint(i) {
lo = i
}
}
return
}
func to16(x []uint32) []uint16 {
var y []uint16
for _, v := range x {
if uint32(uint16(v)) != v {
panic("bad 32->16 conversion")
}
y = append(y, uint16(v))
}
return y
}
func main() {
rang, except := scan(0, 0xFFFF)
range16 = to16(rang)
except16 = to16(except)
range32, except32 = scan(0x10000, unicode.MaxRune)
for i := rune(0); i <= unicode.MaxRune; i++ {
if isPrint(i) != unicode.IsPrint(i) {
fmt.Printf("%U: isPrint=%v, want %v\n", i, isPrint(i), unicode.IsPrint(i))
break
}
}
fmt.Printf("// DO NOT EDIT. GENERATED BY\n")
fmt.Printf("// go run makeisprint.go >x && mv x isprint.go\n\n")
fmt.Printf("package strconv\n\n")
fmt.Printf("// (%d+%d)*2 + (%d+%d)*4 = %d bytes\n\n",
len(range16), len(except16),
len(range32), len(except32),
(len(range16)+len(except16))*2+
(len(range32)+len(except32))*4)
fmt.Printf("var isPrint16 = []uint16{\n")
for i := 0; i < len(range16); i += 2 {
fmt.Printf("\t%#04x, %#04x,\n", range16[i], range16[i+1])
}
fmt.Printf("}\n\n")
fmt.Printf("var isNotPrint16 = []uint16{\n")
for _, r := range except16 {
fmt.Printf("\t%#04x,\n", r)
}
fmt.Printf("}\n\n")
fmt.Printf("var isPrint32 = []uint32{\n")
for i := 0; i < len(range32); i += 2 {
fmt.Printf("\t%#06x, %#06x,\n", range16[i], range16[i+1])
}
fmt.Printf("}\n\n")
fmt.Printf("var isNotPrint32 = []uint32{\n")
for _, r := range except32 {
fmt.Printf("\t%#04x,\n", r)
}
fmt.Printf("}\n")
}

View File

@ -351,3 +351,59 @@ func Unquote(s string) (t string, err error) {
}
return string(buf), nil
}
// bsearch16 returns the smallest i such that a[i] >= x.
// If there is no such i, bsearch16 returns len(a).
func bsearch16(a []uint16, x uint16) int {
i, j := 0, len(a)
for i < j {
h := i + (j-i)/2
if a[h] < x {
i = h + 1
} else {
j = h
}
}
return i
}
// bsearch32 returns the smallest i such that a[i] >= x.
// If there is no such i, bsearch32 returns len(a).
func bsearch32(a []uint32, x uint32) int {
i, j := 0, len(a)
for i < j {
h := i + (j-i)/2
if a[h] < x {
i = h + 1
} else {
j = h
}
}
return i
}
func isPrint(r rune) bool {
// Same algorithm, either on uint16 or uint32 value.
// First, find first i such that isPrint[i] >= x.
// This is the index of either the start or end of a pair that might span x.
// The start is even (isPrint[i&^1]) and the end is odd (isPrint[i|1]).
// If we find x in a range, make sure x is not in isNotPrint list.
if 0 <= r && r < 1<<16 {
rr, isPrint, isNotPrint := uint16(r), isPrint16, isNotPrint16
i := bsearch16(isPrint, rr)
if i >= len(isPrint) || rr < isPrint[i&^1] || isPrint[i|1] < rr {
return false
}
j := bsearch16(isNotPrint, rr)
return j >= len(isNotPrint) || isNotPrint[j] != rr
}
rr, isPrint, isNotPrint := uint32(r), isPrint32, isNotPrint32
i := bsearch32(isPrint, rr)
if i >= len(isPrint) || rr < isPrint[i&^1] || isPrint[i|1] < rr {
return false
}
j := bsearch32(isNotPrint, rr)
return j >= len(isNotPrint) || isNotPrint[j] != rr
}