1
0
mirror of https://github.com/golang/go synced 2024-09-30 06:34:29 -06:00

strconv: remove dependence on unicode and strings

We need a compact, reasonably efficient IsPrint. That adds about 2K of data,
plus a modest amount of code, but now strconv is a near-leaf package.

R=r, bradfitz, adg, rsc, minux.ma
CC=golang-dev
https://golang.org/cl/5756050
This commit is contained in:
Rob Pike 2012-03-07 13:50:31 +11:00
parent 7db4384354
commit f91326b7b1
5 changed files with 215 additions and 163 deletions

View File

@ -52,7 +52,7 @@ var pkgDeps = map[string][]string{
"math/rand": {"L0", "math"},
"path": {"L0", "unicode/utf8", "strings"},
"sort": {"math"},
"strconv": {"L0", "unicode", "unicode/utf8", "math", "strings"},
"strconv": {"L0", "unicode/utf8", "math"},
"strings": {"L0", "unicode", "unicode/utf8"},
"unicode": {},
"unicode/utf16": {},

View File

@ -3,7 +3,7 @@
package strconv
// (474+134)*2 + (180+42)*4 = 2104 bytes
// (474+134+42)*2 + (180)*4 = 2020 bytes
var isPrint16 = []uint16{
0x0020, 0x007e,
@ -383,139 +383,139 @@ var isNotPrint16 = []uint16{
}
var isPrint32 = []uint32{
0x000020, 0x00007e,
0x0000a1, 0x000377,
0x00037a, 0x00037e,
0x000384, 0x000527,
0x000531, 0x000556,
0x000559, 0x00058a,
0x000591, 0x0005c7,
0x0005d0, 0x0005ea,
0x0005f0, 0x0005f4,
0x000606, 0x00061b,
0x00061e, 0x00070d,
0x000710, 0x00074a,
0x00074d, 0x0007b1,
0x0007c0, 0x0007fa,
0x000800, 0x00082d,
0x000830, 0x00085b,
0x00085e, 0x00085e,
0x000900, 0x00098c,
0x00098f, 0x000990,
0x000993, 0x0009b2,
0x0009b6, 0x0009b9,
0x0009bc, 0x0009c4,
0x0009c7, 0x0009c8,
0x0009cb, 0x0009ce,
0x0009d7, 0x0009d7,
0x0009dc, 0x0009e3,
0x0009e6, 0x0009fb,
0x000a01, 0x000a0a,
0x000a0f, 0x000a10,
0x000a13, 0x000a39,
0x000a3c, 0x000a42,
0x000a47, 0x000a48,
0x000a4b, 0x000a4d,
0x000a51, 0x000a51,
0x000a59, 0x000a5e,
0x000a66, 0x000a75,
0x000a81, 0x000ab9,
0x000abc, 0x000acd,
0x000ad0, 0x000ad0,
0x000ae0, 0x000ae3,
0x000ae6, 0x000af1,
0x000b01, 0x000b0c,
0x000b0f, 0x000b10,
0x000b13, 0x000b39,
0x000b3c, 0x000b44,
0x000b47, 0x000b48,
0x000b4b, 0x000b4d,
0x000b56, 0x000b57,
0x000b5c, 0x000b63,
0x000b66, 0x000b77,
0x000b82, 0x000b8a,
0x000b8e, 0x000b95,
0x000b99, 0x000b9f,
0x000ba3, 0x000ba4,
0x000ba8, 0x000baa,
0x000bae, 0x000bb9,
0x000bbe, 0x000bc2,
0x000bc6, 0x000bcd,
0x000bd0, 0x000bd0,
0x000bd7, 0x000bd7,
0x000be6, 0x000bfa,
0x000c01, 0x000c39,
0x000c3d, 0x000c4d,
0x000c55, 0x000c59,
0x000c60, 0x000c63,
0x000c66, 0x000c6f,
0x000c78, 0x000c7f,
0x000c82, 0x000cb9,
0x000cbc, 0x000ccd,
0x000cd5, 0x000cd6,
0x000cde, 0x000ce3,
0x000ce6, 0x000cf2,
0x000d02, 0x000d3a,
0x000d3d, 0x000d4e,
0x000d57, 0x000d57,
0x000d60, 0x000d63,
0x000d66, 0x000d75,
0x000d79, 0x000d7f,
0x000d82, 0x000d96,
0x000d9a, 0x000dbd,
0x000dc0, 0x000dc6,
0x000dca, 0x000dca,
0x000dcf, 0x000ddf,
0x000df2, 0x000df4,
0x000e01, 0x000e3a,
0x000e3f, 0x000e5b,
0x000e81, 0x000e84,
0x000e87, 0x000e8a,
0x000e8d, 0x000e8d,
0x000e94, 0x000ea7,
0x010000, 0x01004d,
0x010050, 0x01005d,
0x010080, 0x0100fa,
0x010100, 0x010102,
0x010107, 0x010133,
0x010137, 0x01018a,
0x010190, 0x01019b,
0x0101d0, 0x0101fd,
0x010280, 0x01029c,
0x0102a0, 0x0102d0,
0x010300, 0x010323,
0x010330, 0x01034a,
0x010380, 0x0103c3,
0x0103c8, 0x0103d5,
0x010400, 0x01049d,
0x0104a0, 0x0104a9,
0x010800, 0x010805,
0x010808, 0x010838,
0x01083c, 0x01083c,
0x01083f, 0x01085f,
0x010900, 0x01091b,
0x01091f, 0x010939,
0x01093f, 0x01093f,
0x010a00, 0x010a06,
0x010a0c, 0x010a33,
0x010a38, 0x010a3a,
0x010a3f, 0x010a47,
0x010a50, 0x010a58,
0x010a60, 0x010a7f,
0x010b00, 0x010b35,
0x010b39, 0x010b55,
0x010b58, 0x010b72,
0x010b78, 0x010b7f,
0x010c00, 0x010c48,
0x010e60, 0x010e7e,
0x011000, 0x01104d,
0x011052, 0x01106f,
0x011080, 0x0110c1,
0x012000, 0x01236e,
0x012400, 0x012462,
0x012470, 0x012473,
0x013000, 0x01342e,
0x016800, 0x016a38,
0x01b000, 0x01b001,
0x01d000, 0x01d0f5,
0x01d100, 0x01d126,
0x01d129, 0x01d172,
0x01d17b, 0x01d1dd,
0x01d200, 0x01d245,
0x01d300, 0x01d356,
0x01d360, 0x01d371,
0x01d400, 0x01d49f,
0x01d4a2, 0x01d4a2,
0x01d4a5, 0x01d4a6,
0x01d4a9, 0x01d50a,
0x01d50d, 0x01d546,
0x01d54a, 0x01d6a5,
0x01d6a8, 0x01d7cb,
0x01d7ce, 0x01d7ff,
0x01f000, 0x01f02b,
0x01f030, 0x01f093,
0x01f0a0, 0x01f0ae,
0x01f0b1, 0x01f0be,
0x01f0c1, 0x01f0df,
0x01f100, 0x01f10a,
0x01f110, 0x01f169,
0x01f170, 0x01f19a,
0x01f1e6, 0x01f202,
0x01f210, 0x01f23a,
0x01f240, 0x01f248,
0x01f250, 0x01f251,
0x01f300, 0x01f320,
0x01f330, 0x01f37c,
0x01f380, 0x01f393,
0x01f3a0, 0x01f3ca,
0x01f3e0, 0x01f3f0,
0x01f400, 0x01f4fc,
0x01f500, 0x01f53d,
0x01f550, 0x01f567,
0x01f5fb, 0x01f625,
0x01f628, 0x01f62d,
0x01f630, 0x01f640,
0x01f645, 0x01f64f,
0x01f680, 0x01f6c5,
0x01f700, 0x01f773,
0x020000, 0x02a6d6,
0x02a700, 0x02b734,
0x02b740, 0x02b81d,
0x02f800, 0x02fa1d,
0x0e0100, 0x0e01ef,
}
var isNotPrint32 = []uint32{
0x1000c,
0x10027,
0x1003b,
0x1003e,
0x1031f,
0x1039e,
0x10809,
0x10836,
0x10856,
0x10a04,
0x10a14,
0x10a18,
0x110bd,
0x1d455,
0x1d49d,
0x1d4ad,
0x1d4ba,
0x1d4bc,
0x1d4c4,
0x1d506,
0x1d515,
0x1d51d,
0x1d53a,
0x1d53f,
0x1d545,
0x1d551,
0x1f0d0,
0x1f12f,
0x1f336,
0x1f3c5,
0x1f43f,
0x1f441,
0x1f4f8,
0x1f600,
0x1f611,
0x1f615,
0x1f617,
0x1f619,
0x1f61b,
0x1f61f,
0x1f62c,
0x1f634,
var isNotPrint32 = []uint16{ // add 0x10000 to each entry
0x000c,
0x0027,
0x003b,
0x003e,
0x031f,
0x039e,
0x0809,
0x0836,
0x0856,
0x0a04,
0x0a14,
0x0a18,
0x10bd,
0xd455,
0xd49d,
0xd4ad,
0xd4ba,
0xd4bc,
0xd4c4,
0xd506,
0xd515,
0xd51d,
0xd53a,
0xd53f,
0xd545,
0xd551,
0xf0d0,
0xf12f,
0xf336,
0xf3c5,
0xf43f,
0xf441,
0xf4f8,
0xf600,
0xf611,
0xf615,
0xf617,
0xf619,
0xf61b,
0xf61f,
0xf62c,
0xf634,
}

View File

@ -9,6 +9,7 @@ package main
import (
"fmt"
"os"
"unicode"
)
@ -116,8 +117,8 @@ func main() {
for i := rune(0); i <= unicode.MaxRune; i++ {
if isPrint(i) != unicode.IsPrint(i) {
fmt.Printf("%U: isPrint=%v, want %v\n", i, isPrint(i), unicode.IsPrint(i))
break
fmt.Fprintf(os.Stderr, "%U: isPrint=%v, want %v\n", i, isPrint(i), unicode.IsPrint(i))
return
}
}
@ -125,11 +126,11 @@ func main() {
fmt.Printf("// go run makeisprint.go >x && mv x isprint.go\n\n")
fmt.Printf("package strconv\n\n")
fmt.Printf("// (%d+%d)*2 + (%d+%d)*4 = %d bytes\n\n",
len(range16), len(except16),
len(range32), len(except32),
(len(range16)+len(except16))*2+
(len(range32)+len(except32))*4)
fmt.Printf("// (%d+%d+%d)*2 + (%d)*4 = %d bytes\n\n",
len(range16), len(except16), len(except32),
len(range32),
(len(range16)+len(except16)+len(except32))*2+
(len(range32))*4)
fmt.Printf("var isPrint16 = []uint16{\n")
for i := 0; i < len(range16); i += 2 {
@ -145,13 +146,17 @@ func main() {
fmt.Printf("var isPrint32 = []uint32{\n")
for i := 0; i < len(range32); i += 2 {
fmt.Printf("\t%#06x, %#06x,\n", range16[i], range16[i+1])
fmt.Printf("\t%#06x, %#06x,\n", range32[i], range32[i+1])
}
fmt.Printf("}\n\n")
fmt.Printf("var isNotPrint32 = []uint32{\n")
fmt.Printf("var isNotPrint32 = []uint16{ // add 0x10000 to each entry\n")
for _, r := range except32 {
fmt.Printf("\t%#04x,\n", r)
if r >= 0x20000 {
fmt.Fprintf(os.Stderr, "%U too big for isNotPrint32\n", r)
return
}
fmt.Printf("\t%#04x,\n", r-0x10000)
}
fmt.Printf("}\n")
}

View File

@ -5,8 +5,6 @@
package strconv
import (
"strings"
"unicode"
"unicode/utf8"
)
@ -34,11 +32,11 @@ func quoteWith(s string, quote byte, ASCIIonly bool) string {
continue
}
if ASCIIonly {
if r <= unicode.MaxASCII && unicode.IsPrint(r) {
if r < utf8.RuneSelf && IsPrint(r) {
buf = append(buf, byte(r))
continue
}
} else if unicode.IsPrint(r) {
} else if IsPrint(r) {
n := utf8.EncodeRune(runeTmp[:], r)
buf = append(buf, runeTmp[:n]...)
continue
@ -64,7 +62,7 @@ func quoteWith(s string, quote byte, ASCIIonly bool) string {
buf = append(buf, `\x`...)
buf = append(buf, lowerhex[s[0]>>4])
buf = append(buf, lowerhex[s[0]&0xF])
case r > unicode.MaxRune:
case r > utf8.MaxRune:
r = 0xFFFD
fallthrough
case r < 0x10000:
@ -88,7 +86,7 @@ func quoteWith(s string, quote byte, ASCIIonly bool) string {
// Quote returns a double-quoted Go string literal representing s. The
// returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for
// control characters and non-printable characters as defined by
// unicode.IsPrint.
// IsPrint.
func Quote(s string) string {
return quoteWith(s, '"', false)
}
@ -101,8 +99,7 @@ func AppendQuote(dst []byte, s string) []byte {
// QuoteToASCII returns a double-quoted Go string literal representing s.
// The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for
// non-ASCII characters and non-printable characters as defined by
// unicode.IsPrint.
// non-ASCII characters and non-printable characters as defined by IsPrint.
func QuoteToASCII(s string) string {
return quoteWith(s, '"', true)
}
@ -115,8 +112,7 @@ func AppendQuoteToASCII(dst []byte, s string) []byte {
// QuoteRune returns a single-quoted Go character literal representing the
// rune. The returned string uses Go escape sequences (\t, \n, \xFF, \u0100)
// for control characters and non-printable characters as defined by
// unicode.IsPrint.
// for control characters and non-printable characters as defined by IsPrint.
func QuoteRune(r rune) string {
// TODO: avoid the allocation here.
return quoteWith(string(r), '\'', false)
@ -131,7 +127,7 @@ func AppendQuoteRune(dst []byte, r rune) []byte {
// QuoteRuneToASCII returns a single-quoted Go character literal representing
// the rune. The returned string uses Go escape sequences (\t, \n, \xFF,
// \u0100) for non-ASCII characters and non-printable characters as defined
// by unicode.IsPrint.
// by IsPrint.
func QuoteRuneToASCII(r rune) string {
// TODO: avoid the allocation here.
return quoteWith(string(r), '\'', true)
@ -246,7 +242,7 @@ func UnquoteChar(s string, quote byte) (value rune, multibyte bool, tail string,
value = v
break
}
if v > unicode.MaxRune {
if v > utf8.MaxRune {
err = ErrSyntax
return
}
@ -305,7 +301,7 @@ func Unquote(s string) (t string, err error) {
s = s[1 : n-1]
if quote == '`' {
if strings.Contains(s, "`") {
if contains(s, '`') {
return "", ErrSyntax
}
return s, nil
@ -313,12 +309,12 @@ func Unquote(s string) (t string, err error) {
if quote != '"' && quote != '\'' {
return "", ErrSyntax
}
if strings.Index(s, "\n") >= 0 {
if contains(s, '\n') {
return "", ErrSyntax
}
// Is it trivial? Avoid allocation.
if strings.Index(s, `\`) < 0 && strings.IndexRune(s, rune(quote)) < 0 {
if !contains(s, '\\') && !contains(s, quote) {
switch quote {
case '"':
return s, nil
@ -352,6 +348,16 @@ func Unquote(s string) (t string, err error) {
return string(buf), nil
}
// contains reports whether the string contains the byte c.
func contains(s string, c byte) bool {
for i := 0; i < len(s); i++ {
if s[i] == c {
return true
}
}
return false
}
// bsearch16 returns the smallest i such that a[i] >= x.
// If there is no such i, bsearch16 returns len(a).
func bsearch16(a []uint16, x uint16) int {
@ -382,7 +388,29 @@ func bsearch32(a []uint32, x uint32) int {
return i
}
func isPrint(r rune) bool {
// TODO: IsPrint is a local implementation of unicode.IsPrint, verified by the tests
// to give the same answer. It allows this package not to depend on unicode,
// and therefore not pull in all the Unicode tables. If the linker were better
// at tossing unused tables, we could get rid of this implementation.
// That would be nice.
// IsPrint reports whether the rune is defined as printable by Go, with
// the same definition as unicode.IsPrint: letters, numbers, punctuation,
// symbols and ASCII space.
func IsPrint(r rune) bool {
// Fast check for Latin-1
if r <= 0xFF {
if 0x20 <= r && r <= 0x7E {
// All the ASCII is printable from space through DEL-1.
return true
}
if 0xA1 <= r && r <= 0xFF {
// Similarly for ¡ through ÿ...
return r != 0xAD // ...except for the bizarre soft hyphen.
}
return false
}
// Same algorithm, either on uint16 or uint32 value.
// First, find first i such that isPrint[i] >= x.
// This is the index of either the start or end of a pair that might span x.
@ -404,6 +432,10 @@ func isPrint(r rune) bool {
if i >= len(isPrint) || rr < isPrint[i&^1] || isPrint[i|1] < rr {
return false
}
j := bsearch32(isNotPrint, rr)
return j >= len(isNotPrint) || isNotPrint[j] != rr
if r >= 0x20000 {
return true
}
r -= 0x10000
j := bsearch16(isNotPrint, uint16(r))
return j >= len(isNotPrint) || isNotPrint[j] != uint16(r)
}

View File

@ -7,8 +7,23 @@ package strconv_test
import (
. "strconv"
"testing"
"unicode"
)
// Verify that our isPrint agrees with unicode.IsPrint
func TestIsPrint(t *testing.T) {
n := 0
for r := rune(0); r <= unicode.MaxRune; r++ {
if IsPrint(r) != unicode.IsPrint(r) {
t.Errorf("IsPrint(%U)=%t incorrect", r, IsPrint(r))
n++
if n > 10 {
return
}
}
}
}
type quoteTest struct {
in string
out string