1
0
mirror of https://github.com/golang/go synced 2024-11-26 04:07:59 -07:00

add strconv.Unquote

R=r
DELTA=229  (227 added, 0 deleted, 2 changed)
OCL=27200
CL=27366
This commit is contained in:
Russ Cox 2009-04-13 13:27:39 -07:00
parent a62467af93
commit 73aadff8eb
2 changed files with 229 additions and 2 deletions

View File

@ -5,6 +5,7 @@
package strconv package strconv
import ( import (
"os";
"utf8"; "utf8";
) )
@ -15,6 +16,7 @@ const lowerhex = "0123456789abcdef"
// sequences (\t, \n, \xFF, \u0100) for control characters // sequences (\t, \n, \xFF, \u0100) for control characters
// and non-ASCII characters. // and non-ASCII characters.
func Quote(s string) string { func Quote(s string) string {
// TODO(rsc): String accumulation could be more efficient.
t := `"`; t := `"`;
for i := 0; i < len(s); i++ { for i := 0; i < len(s); i++ {
switch { switch {
@ -75,10 +77,153 @@ func Quote(s string) string {
// a valid Go string literal if enclosed in backquotes. // a valid Go string literal if enclosed in backquotes.
func CanBackquote(s string) bool { func CanBackquote(s string) bool {
for i := 0; i < len(s); i++ { for i := 0; i < len(s); i++ {
if s[i] < ' ' || s[i] == '`' { if (s[i] < ' ' && s[i] != '\t') || s[i] == '`' {
return false; return false;
} }
} }
return true; return true;
} }
func unhex(b byte) (v int, ok bool) {
c := int(b);
switch {
case '0' <= c && c <= '9':
return c - '0', true;
case 'a' <= c && c <= 'f':
return c - 'a' + 10, true;
case 'A' <= c && c <= 'F':
return c - 'A' + 10, true;
}
return;
}
func unquoteChar(s string, i int, q byte) (t string, ii int, err *os.Error) {
err = os.EINVAL; // assume error for easy return
// easy cases
switch c := s[i]; {
case c >= utf8.RuneSelf:
r, size := utf8.DecodeRuneInString(s, i);
return s[i:i+size], i+size, nil;
case c == q:
return;
case c != '\\':
return s[i:i+1], i+1, nil;
}
// hard case: c is backslash
if i+1 >= len(s) {
return;
}
c := s[i+1];
i += 2;
switch c {
case 'a':
return "\a", i, nil;
case 'b':
return "\b", i, nil;
case 'f':
return "\f", i, nil;
case 'n':
return "\n", i, nil;
case 'r':
return "\r", i, nil;
case 't':
return "\t", i, nil;
case 'v':
return "\v", i, nil;
case 'x', 'u', 'U':
n := 0;
switch c {
case 'x':
n = 2;
case 'u':
n = 4;
case 'U':
n = 8;
}
v := 0;
for j := 0; j < n; j++ {
if i+j >= len(s) {
return;
}
x, ok := unhex(s[i+j]);
if !ok {
return;
}
v = v<<4 | x;
}
if c == 'x' {
return string([]byte{byte(v)}), i+n, nil;
}
if v > utf8.RuneMax {
return;
}
return string(v), i+n, nil;
case '0', '1', '2', '3', '4', '5', '6', '7':
v := 0;
i--;
for j := 0; j < 3; j++ {
if i+j >= len(s) {
return;
}
x := int(s[i+j]) - '0';
if x < 0 || x > 7 {
return;
}
v = (v<<3) | x;
}
if v > 255 {
return;
}
return string(v), i+3, nil;
case '\\', q:
return string(c), i, nil;
}
return;
}
// Unquote interprets s as a single-quoted, double-quoted,
// or backquoted Go string literal, returning the string value
// that s quotes. (If s is single-quoted, it would be a Go
// character literal; Unquote returns the corresponding
// one-character string.)
func Unquote(s string) (t string, err *os.Error) {
err = os.EINVAL; // assume error for easy return
n := len(s);
if n < 2 || s[0] != s[n-1] {
return;
}
switch s[0] {
case '`':
t := s[1:n-1];
return t, nil;
case '"', '\'':
// TODO(rsc): String accumulation could be more efficient.
t := "";
q := s[0];
var c string;
var err *os.Error;
for i := 1; i < n-1; {
c, i, err = unquoteChar(s, i, q);
if err != nil {
return "", err;
}
t += c;
if q == '\'' && i != n-1 {
// single-quoted must be single character
return;
}
if i > n-1 {
// read too far
return;
}
}
return t, nil
}
return;
}

View File

@ -5,6 +5,7 @@
package strconv package strconv
import ( import (
"os";
"strconv"; "strconv";
"testing"; "testing";
) )
@ -48,7 +49,7 @@ var canbackquotetests = []canBackquoteTest {
canBackquoteTest{ string(6), false }, canBackquoteTest{ string(6), false },
canBackquoteTest{ string(7), false }, canBackquoteTest{ string(7), false },
canBackquoteTest{ string(8), false }, canBackquoteTest{ string(8), false },
canBackquoteTest{ string(9), false }, canBackquoteTest{ string(9), true }, // \t
canBackquoteTest{ string(10), false }, canBackquoteTest{ string(10), false },
canBackquoteTest{ string(11), false }, canBackquoteTest{ string(11), false },
canBackquoteTest{ string(12), false }, canBackquoteTest{ string(12), false },
@ -86,3 +87,84 @@ func TestCanBackquote(t *testing.T) {
} }
} }
} }
var unquotetests = []quoteTest {
quoteTest{ `""`, "" },
quoteTest{ `"a"`, "a" },
quoteTest{ `"abc"`, "abc" },
quoteTest{ `"☺"`, "☺" },
quoteTest{ `"hello world"`, "hello world" },
quoteTest{ `"\xFF"`, "\xFF" },
quoteTest{ `"\377"`, "\377" },
quoteTest{ `"\u1234"`, "\u1234" },
quoteTest{ `"\U00010111"`, "\U00010111" },
quoteTest{ `"\U0001011111"`, "\U0001011111" },
quoteTest{ `"\a\b\f\n\r\t\v\\\""`, "\a\b\f\n\r\t\v\\\"" },
quoteTest{ `"'"`, "'" },
quoteTest{ `'a'`, "a" },
quoteTest{ `'☹'`, "☹" },
quoteTest{ `'\a'`, "\a" },
quoteTest{ `'\x10'`, "\x10" },
quoteTest{ `'\377'`, "\377" },
quoteTest{ `'\u1234'`, "\u1234" },
quoteTest{ `'\U00010111'`, "\U00010111" },
quoteTest{ `'\t'`, "\t" },
quoteTest{ `' '`, " " },
quoteTest{ `'\''`, "'" },
quoteTest{ `'"'`, "\"" },
quoteTest{ "``", `` },
quoteTest{ "`a`", `a` },
quoteTest{ "`abc`", `abc` },
quoteTest{ "`☺`", `` },
quoteTest{ "`hello world`", `hello world` },
quoteTest{ "`\\xFF`", `\xFF` },
quoteTest{ "`\\377`", `\377` },
quoteTest{ "`\\`", `\` },
quoteTest{ "` `", ` ` },
quoteTest{ "` `", ` ` },
}
var misquoted = []string {
``,
`"`,
`"a`,
`"'`,
`b"`,
`"\"`,
`'\'`,
`'ab'`,
`"\x1!"`,
`"\U12345678"`,
`"\z"`,
"`",
"`xxx",
"`\"",
`"\'"`,
`'\"'`,
}
func TestUnquote(t *testing.T) {
for i := 0; i < len(unquotetests); i++ {
tt := unquotetests[i];
if out, err := Unquote(tt.in); err != nil && out != tt.out {
t.Errorf("Unquote(%s) = %q, %s want %q, nil", tt.in, out, err, tt.out);
}
}
// run the quote tests too, backward
for i := 0; i < len(quotetests); i++ {
tt := quotetests[i];
if in, err := Unquote(tt.out); in != tt.in {
t.Errorf("Unquote(%s) = %q, %s, want %q, nil", tt.out, in, err, tt.in);
}
}
for i := 0; i < len(misquoted); i++ {
s := misquoted[i];
if out, err := Unquote(s); out != "" || err != os.EINVAL {
t.Errorf("Unquote(%q) = %q, %s want %q, %s", s, out, err, "", os.EINVAL);
}
}
}