mirror of
https://github.com/golang/go
synced 2024-11-24 13:00:15 -07:00
strconv: change Quote to be Unicode-friendly,
add QuoteToASCII. The Quote and QuoteRune functions now let printable runes (as defined by unicode.IsPrint) through. When true 7-bit clean stuff is necessary, there are now two new functions: QuoteToASCII and QuoteRuneToASCII. Printf("%q") uses Quote. To get the old behavior, it will now be necessary to say Printf("%s", strconv.QuoteToASCII(s)) but that should rarely be necessary. R=golang-dev, gri, r CC=golang-dev https://golang.org/cl/4561061
This commit is contained in:
parent
05348ab0c8
commit
f2f3b8fa99
@ -132,15 +132,15 @@ var fmttests = []struct {
|
||||
{"%q", `"`, `"\""`},
|
||||
{"%q", "\a\b\f\r\n\t\v", `"\a\b\f\r\n\t\v"`},
|
||||
{"%q", "abc\xffdef", `"abc\xffdef"`},
|
||||
{"%q", "\u263a", `"\u263a"`},
|
||||
{"%q", "\u263a", `"☺"`},
|
||||
{"%q", "\U0010ffff", `"\U0010ffff"`},
|
||||
|
||||
// escaped characters
|
||||
{"%q", 'x', `'x'`},
|
||||
{"%q", 0, `'\x00'`},
|
||||
{"%q", '\n', `'\n'`},
|
||||
{"%q", '\u1234', `'\u1234'`},
|
||||
{"%q", '\U00012345', `'\U00012345'`},
|
||||
{"%q", '\u0e00', `'\u0e00'`}, // not a printable rune.
|
||||
{"%q", '\U000c2345', `'\U000c2345'`}, // not a printable rune.
|
||||
{"%q", int64(0x7FFFFFFF), `%!q(int64=2147483647)`},
|
||||
{"%q", uint64(0xFFFFFFFF), `%!q(uint64=4294967295)`},
|
||||
{"%q", '"', `'"'`},
|
||||
@ -148,7 +148,7 @@ var fmttests = []struct {
|
||||
|
||||
// width
|
||||
{"%5s", "abc", " abc"},
|
||||
{"%2s", "\u263a", " \u263a"},
|
||||
{"%2s", "\u263a", " ☺"},
|
||||
{"%-5s", "abc", "abc "},
|
||||
{"%-8q", "abc", `"abc" `},
|
||||
{"%05s", "abc", "00abc"},
|
||||
@ -158,9 +158,9 @@ var fmttests = []struct {
|
||||
{"%.5s", "日本語日本語", "日本語日本"},
|
||||
{"%.5s", []byte("日本語日本語"), "日本語日本"},
|
||||
{"%.5q", "abcdefghijklmnopqrstuvwxyz", `"abcde"`},
|
||||
{"%.3q", "日本語日本語", `"\u65e5\u672c\u8a9e"`},
|
||||
{"%.3q", []byte("日本語日本語"), `"\u65e5\u672c\u8a9e"`},
|
||||
{"%10.1q", "日本語日本語", ` "\u65e5"`},
|
||||
{"%.3q", "日本語日本語", `"日本語"`},
|
||||
{"%.3q", []byte("日本語日本語"), `"日本語"`},
|
||||
{"%10.1q", "日本語日本語", ` "日"`},
|
||||
|
||||
// integers
|
||||
{"%d", 12345, "12345"},
|
||||
|
@ -652,7 +652,7 @@ var errors = []struct {
|
||||
}{
|
||||
{"\a", token.ILLEGAL, 0, "illegal character '\\a'"},
|
||||
{`#`, token.ILLEGAL, 0, "illegal character '#'"},
|
||||
{`…`, token.ILLEGAL, 0, "illegal character '\\u2026'"},
|
||||
{`…`, token.ILLEGAL, 0, "illegal character '…'"},
|
||||
{`' '`, token.CHAR, 0, ""},
|
||||
{`''`, token.CHAR, 0, "illegal character literal"},
|
||||
{`'\8'`, token.CHAR, 2, "unknown escape sequence"},
|
||||
|
@ -14,56 +14,68 @@ import (
|
||||
|
||||
const lowerhex = "0123456789abcdef"
|
||||
|
||||
func quoteWith(s string, quote byte) string {
|
||||
func quoteWith(s string, quote byte, ASCIIonly bool) string {
|
||||
var buf bytes.Buffer
|
||||
buf.WriteByte(quote)
|
||||
for ; len(s) > 0; s = s[1:] {
|
||||
switch c := s[0]; {
|
||||
case c == quote:
|
||||
for width := 0; len(s) > 0; s = s[width:] {
|
||||
rune := int(s[0])
|
||||
width = 1
|
||||
if rune >= utf8.RuneSelf {
|
||||
rune, width = utf8.DecodeRuneInString(s)
|
||||
}
|
||||
if width == 1 && rune == utf8.RuneError {
|
||||
goto printEscX
|
||||
}
|
||||
if rune == int(quote) || rune == '\\' { // always backslashed
|
||||
buf.WriteByte('\\')
|
||||
buf.WriteByte(quote)
|
||||
case c == '\\':
|
||||
buf.WriteString(`\\`)
|
||||
case ' ' <= c && c <= '~':
|
||||
buf.WriteString(string(c))
|
||||
case c == '\a':
|
||||
buf.WriteByte(byte(rune))
|
||||
continue
|
||||
}
|
||||
if ASCIIonly {
|
||||
if rune <= unicode.MaxASCII && unicode.IsPrint(rune) {
|
||||
buf.WriteRune(rune)
|
||||
continue
|
||||
}
|
||||
} else if unicode.IsPrint(rune) {
|
||||
buf.WriteRune(rune)
|
||||
continue
|
||||
}
|
||||
switch rune {
|
||||
case '\a':
|
||||
buf.WriteString(`\a`)
|
||||
case c == '\b':
|
||||
case '\b':
|
||||
buf.WriteString(`\b`)
|
||||
case c == '\f':
|
||||
case '\f':
|
||||
buf.WriteString(`\f`)
|
||||
case c == '\n':
|
||||
case '\n':
|
||||
buf.WriteString(`\n`)
|
||||
case c == '\r':
|
||||
case '\r':
|
||||
buf.WriteString(`\r`)
|
||||
case c == '\t':
|
||||
case '\t':
|
||||
buf.WriteString(`\t`)
|
||||
case c == '\v':
|
||||
case '\v':
|
||||
buf.WriteString(`\v`)
|
||||
|
||||
case c >= utf8.RuneSelf && utf8.FullRuneInString(s):
|
||||
r, size := utf8.DecodeRuneInString(s)
|
||||
if r == utf8.RuneError && size == 1 {
|
||||
goto EscX
|
||||
}
|
||||
s = s[size-1:] // next iteration will slice off 1 more
|
||||
if r < 0x10000 {
|
||||
buf.WriteString(`\u`)
|
||||
for j := uint(0); j < 4; j++ {
|
||||
buf.WriteByte(lowerhex[(r>>(12-4*j))&0xF])
|
||||
}
|
||||
} else {
|
||||
buf.WriteString(`\U`)
|
||||
for j := uint(0); j < 8; j++ {
|
||||
buf.WriteByte(lowerhex[(r>>(28-4*j))&0xF])
|
||||
}
|
||||
}
|
||||
|
||||
default:
|
||||
EscX:
|
||||
buf.WriteString(`\x`)
|
||||
buf.WriteByte(lowerhex[c>>4])
|
||||
buf.WriteByte(lowerhex[c&0xF])
|
||||
switch {
|
||||
case rune < ' ':
|
||||
printEscX:
|
||||
buf.WriteString(`\x`)
|
||||
buf.WriteByte(lowerhex[s[0]>>4])
|
||||
buf.WriteByte(lowerhex[s[0]&0xF])
|
||||
case rune > unicode.MaxRune:
|
||||
rune = 0xFFFD
|
||||
fallthrough
|
||||
case rune < 0x10000:
|
||||
buf.WriteString(`\u`)
|
||||
for s := 12; s >= 0; s -= 4 {
|
||||
buf.WriteByte(lowerhex[rune>>uint(s)&0xF])
|
||||
}
|
||||
default:
|
||||
buf.WriteString(`\U`)
|
||||
for s := 28; s >= 0; s -= 4 {
|
||||
buf.WriteByte(lowerhex[rune>>uint(s)&0xF])
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
buf.WriteByte(quote)
|
||||
@ -71,21 +83,38 @@ func quoteWith(s string, quote byte) string {
|
||||
|
||||
}
|
||||
|
||||
// Quote returns a double-quoted Go string literal
|
||||
// representing s. The returned string uses Go escape
|
||||
// sequences (\t, \n, \xFF, \u0100) for control characters
|
||||
// and non-ASCII characters.
|
||||
// Quote returns a double-quoted Go string literal representing s. The
|
||||
// returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for
|
||||
// control characters and non-printable characters as defined by
|
||||
// unicode.IsPrint.
|
||||
func Quote(s string) string {
|
||||
return quoteWith(s, '"')
|
||||
return quoteWith(s, '"', false)
|
||||
}
|
||||
|
||||
// QuoteRune returns a single-quoted Go character literal
|
||||
// representing the rune. The returned string uses Go escape
|
||||
// sequences (\t, \n, \xFF, \u0100) for control characters
|
||||
// and non-ASCII characters.
|
||||
// QuoteToASCII returns a double-quoted Go string literal representing s.
|
||||
// The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for
|
||||
// non-ASCII characters and non-printable characters as defined by
|
||||
// unicode.IsPrint.
|
||||
func QuoteToASCII(s string) string {
|
||||
return quoteWith(s, '"', true)
|
||||
}
|
||||
|
||||
// QuoteRune returns a single-quoted Go character literal representing the
|
||||
// rune. The returned string uses Go escape sequences (\t, \n, \xFF, \u0100)
|
||||
// for control characters and non-printable characters as defined by
|
||||
// unicode.IsPrint.
|
||||
func QuoteRune(rune int) string {
|
||||
// TODO: avoid the allocation here.
|
||||
return quoteWith(string(rune), '\'')
|
||||
return quoteWith(string(rune), '\'', false)
|
||||
}
|
||||
|
||||
// QuoteRuneToASCII returns a single-quoted Go character literal representing
|
||||
// the rune. The returned string uses Go escape sequences (\t, \n, \xFF,
|
||||
// \u0100) for non-ASCII characters and non-printable characters as defined
|
||||
// by unicode.IsPrint.
|
||||
func QuoteRuneToASCII(rune int) string {
|
||||
// TODO: avoid the allocation here.
|
||||
return quoteWith(string(rune), '\'', true)
|
||||
}
|
||||
|
||||
// CanBackquote returns whether the string s would be
|
||||
|
@ -11,17 +11,18 @@ import (
|
||||
)
|
||||
|
||||
type quoteTest struct {
|
||||
in string
|
||||
out string
|
||||
in string
|
||||
out string
|
||||
ascii string
|
||||
}
|
||||
|
||||
var quotetests = []quoteTest{
|
||||
{"\a\b\f\r\n\t\v", `"\a\b\f\r\n\t\v"`},
|
||||
{"\\", `"\\"`},
|
||||
{"abc\xffdef", `"abc\xffdef"`},
|
||||
{"\u263a", `"\u263a"`},
|
||||
{"\U0010ffff", `"\U0010ffff"`},
|
||||
{"\x04", `"\x04"`},
|
||||
{"\a\b\f\r\n\t\v", `"\a\b\f\r\n\t\v"`, `"\a\b\f\r\n\t\v"`},
|
||||
{"\\", `"\\"`, `"\\"`},
|
||||
{"abc\xffdef", `"abc\xffdef"`, `"abc\xffdef"`},
|
||||
{"\u263a", `"☺"`, `"\u263a"`},
|
||||
{"\U0010ffff", `"\U0010ffff"`, `"\U0010ffff"`},
|
||||
{"\x04", `"\x04"`, `"\x04"`},
|
||||
}
|
||||
|
||||
func TestQuote(t *testing.T) {
|
||||
@ -32,20 +33,30 @@ func TestQuote(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestQuoteToASCII(t *testing.T) {
|
||||
for _, tt := range quotetests {
|
||||
if out := QuoteToASCII(tt.in); out != tt.ascii {
|
||||
t.Errorf("QuoteToASCII(%s) = %s, want %s", tt.in, out, tt.ascii)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type quoteRuneTest struct {
|
||||
in int
|
||||
out string
|
||||
in int
|
||||
out string
|
||||
ascii string
|
||||
}
|
||||
|
||||
var quoterunetests = []quoteRuneTest{
|
||||
{'a', `'a'`},
|
||||
{'\a', `'\a'`},
|
||||
{'\\', `'\\'`},
|
||||
{0xFF, `'\u00ff'`},
|
||||
{0x263a, `'\u263a'`},
|
||||
{0x0010ffff, `'\U0010ffff'`},
|
||||
{0x0010ffff + 1, `'\ufffd'`},
|
||||
{0x04, `'\x04'`},
|
||||
{'a', `'a'`, `'a'`},
|
||||
{'\a', `'\a'`, `'\a'`},
|
||||
{'\\', `'\\'`, `'\\'`},
|
||||
{0xFF, `'ÿ'`, `'\u00ff'`},
|
||||
{0x263a, `'☺'`, `'\u263a'`},
|
||||
{0xfffd, `'<27>'`, `'\ufffd'`},
|
||||
{0x0010ffff, `'\U0010ffff'`, `'\U0010ffff'`},
|
||||
{0x0010ffff + 1, `'<27>'`, `'\ufffd'`},
|
||||
{0x04, `'\x04'`, `'\x04'`},
|
||||
}
|
||||
|
||||
func TestQuoteRune(t *testing.T) {
|
||||
@ -56,6 +67,14 @@ func TestQuoteRune(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestQuoteRuneToASCII(t *testing.T) {
|
||||
for _, tt := range quoterunetests {
|
||||
if out := QuoteRuneToASCII(tt.in); out != tt.ascii {
|
||||
t.Errorf("QuoteRuneToASCII(%U) = %s, want %s", tt.in, out, tt.ascii)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type canBackquoteTest struct {
|
||||
in string
|
||||
out bool
|
||||
@ -110,7 +129,12 @@ func TestCanBackquote(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
var unquotetests = []quoteTest{
|
||||
type unQuoteTest struct {
|
||||
in string
|
||||
out string
|
||||
}
|
||||
|
||||
var unquotetests = []unQuoteTest{
|
||||
{`""`, ""},
|
||||
{`"a"`, "a"},
|
||||
{`"abc"`, "abc"},
|
||||
|
Loading…
Reference in New Issue
Block a user