From 73aadff8ebc44dbaa8e6c1635f725f4da95a4c09 Mon Sep 17 00:00:00 2001 From: Russ Cox Date: Mon, 13 Apr 2009 13:27:39 -0700 Subject: [PATCH] add strconv.Unquote R=r DELTA=229 (227 added, 0 deleted, 2 changed) OCL=27200 CL=27366 --- src/lib/strconv/quote.go | 147 +++++++++++++++++++++++++++++++++- src/lib/strconv/quote_test.go | 84 ++++++++++++++++++- 2 files changed, 229 insertions(+), 2 deletions(-) diff --git a/src/lib/strconv/quote.go b/src/lib/strconv/quote.go index 442821a53da..c06204013b9 100644 --- a/src/lib/strconv/quote.go +++ b/src/lib/strconv/quote.go @@ -5,6 +5,7 @@ package strconv import ( + "os"; "utf8"; ) @@ -15,6 +16,7 @@ const lowerhex = "0123456789abcdef" // sequences (\t, \n, \xFF, \u0100) for control characters // and non-ASCII characters. func Quote(s string) string { + // TODO(rsc): String accumulation could be more efficient. t := `"`; for i := 0; i < len(s); i++ { switch { @@ -75,10 +77,153 @@ func Quote(s string) string { // a valid Go string literal if enclosed in backquotes. func CanBackquote(s string) bool { for i := 0; i < len(s); i++ { - if s[i] < ' ' || s[i] == '`' { + if (s[i] < ' ' && s[i] != '\t') || s[i] == '`' { return false; } } return true; } +func unhex(b byte) (v int, ok bool) { + c := int(b); + switch { + case '0' <= c && c <= '9': + return c - '0', true; + case 'a' <= c && c <= 'f': + return c - 'a' + 10, true; + case 'A' <= c && c <= 'F': + return c - 'A' + 10, true; + } + return; +} + +func unquoteChar(s string, i int, q byte) (t string, ii int, err *os.Error) { + err = os.EINVAL; // assume error for easy return + + // easy cases + switch c := s[i]; { + case c >= utf8.RuneSelf: + r, size := utf8.DecodeRuneInString(s, i); + return s[i:i+size], i+size, nil; + case c == q: + return; + case c != '\\': + return s[i:i+1], i+1, nil; + } + + // hard case: c is backslash + if i+1 >= len(s) { + return; + } + c := s[i+1]; + i += 2; + + switch c { + case 'a': + return "\a", i, nil; + case 'b': + return "\b", i, nil; + case 'f': + return "\f", i, nil; + case 'n': + return "\n", i, nil; + case 'r': + return "\r", i, nil; + case 't': + return "\t", i, nil; + case 'v': + return "\v", i, nil; + case 'x', 'u', 'U': + n := 0; + switch c { + case 'x': + n = 2; + case 'u': + n = 4; + case 'U': + n = 8; + } + v := 0; + for j := 0; j < n; j++ { + if i+j >= len(s) { + return; + } + x, ok := unhex(s[i+j]); + if !ok { + return; + } + v = v<<4 | x; + } + if c == 'x' { + return string([]byte{byte(v)}), i+n, nil; + } + if v > utf8.RuneMax { + return; + } + return string(v), i+n, nil; + case '0', '1', '2', '3', '4', '5', '6', '7': + v := 0; + i--; + for j := 0; j < 3; j++ { + if i+j >= len(s) { + return; + } + x := int(s[i+j]) - '0'; + if x < 0 || x > 7 { + return; + } + v = (v<<3) | x; + } + if v > 255 { + return; + } + return string(v), i+3, nil; + + case '\\', q: + return string(c), i, nil; + } + return; +} + +// Unquote interprets s as a single-quoted, double-quoted, +// or backquoted Go string literal, returning the string value +// that s quotes. (If s is single-quoted, it would be a Go +// character literal; Unquote returns the corresponding +// one-character string.) +func Unquote(s string) (t string, err *os.Error) { + err = os.EINVAL; // assume error for easy return + n := len(s); + if n < 2 || s[0] != s[n-1] { + return; + } + + switch s[0] { + case '`': + t := s[1:n-1]; + return t, nil; + + case '"', '\'': + // TODO(rsc): String accumulation could be more efficient. + t := ""; + q := s[0]; + var c string; + var err *os.Error; + for i := 1; i < n-1; { + c, i, err = unquoteChar(s, i, q); + if err != nil { + return "", err; + } + t += c; + if q == '\'' && i != n-1 { + // single-quoted must be single character + return; + } + if i > n-1 { + // read too far + return; + } + } + return t, nil + } + return; +} diff --git a/src/lib/strconv/quote_test.go b/src/lib/strconv/quote_test.go index 8421fcde497..0fc01ebae3b 100644 --- a/src/lib/strconv/quote_test.go +++ b/src/lib/strconv/quote_test.go @@ -5,6 +5,7 @@ package strconv import ( + "os"; "strconv"; "testing"; ) @@ -48,7 +49,7 @@ var canbackquotetests = []canBackquoteTest { canBackquoteTest{ string(6), false }, canBackquoteTest{ string(7), false }, canBackquoteTest{ string(8), false }, - canBackquoteTest{ string(9), false }, + canBackquoteTest{ string(9), true }, // \t canBackquoteTest{ string(10), false }, canBackquoteTest{ string(11), false }, canBackquoteTest{ string(12), false }, @@ -86,3 +87,84 @@ func TestCanBackquote(t *testing.T) { } } } + +var unquotetests = []quoteTest { + quoteTest{ `""`, "" }, + quoteTest{ `"a"`, "a" }, + quoteTest{ `"abc"`, "abc" }, + quoteTest{ `"☺"`, "☺" }, + quoteTest{ `"hello world"`, "hello world" }, + quoteTest{ `"\xFF"`, "\xFF" }, + quoteTest{ `"\377"`, "\377" }, + quoteTest{ `"\u1234"`, "\u1234" }, + quoteTest{ `"\U00010111"`, "\U00010111" }, + quoteTest{ `"\U0001011111"`, "\U0001011111" }, + quoteTest{ `"\a\b\f\n\r\t\v\\\""`, "\a\b\f\n\r\t\v\\\"" }, + quoteTest{ `"'"`, "'" }, + + quoteTest{ `'a'`, "a" }, + quoteTest{ `'☹'`, "☹" }, + quoteTest{ `'\a'`, "\a" }, + quoteTest{ `'\x10'`, "\x10" }, + quoteTest{ `'\377'`, "\377" }, + quoteTest{ `'\u1234'`, "\u1234" }, + quoteTest{ `'\U00010111'`, "\U00010111" }, + quoteTest{ `'\t'`, "\t" }, + quoteTest{ `' '`, " " }, + quoteTest{ `'\''`, "'" }, + quoteTest{ `'"'`, "\"" }, + + quoteTest{ "``", `` }, + quoteTest{ "`a`", `a` }, + quoteTest{ "`abc`", `abc` }, + quoteTest{ "`☺`", `☺` }, + quoteTest{ "`hello world`", `hello world` }, + quoteTest{ "`\\xFF`", `\xFF` }, + quoteTest{ "`\\377`", `\377` }, + quoteTest{ "`\\`", `\` }, + quoteTest{ "` `", ` ` }, + quoteTest{ "` `", ` ` }, +} + +var misquoted = []string { + ``, + `"`, + `"a`, + `"'`, + `b"`, + `"\"`, + `'\'`, + `'ab'`, + `"\x1!"`, + `"\U12345678"`, + `"\z"`, + "`", + "`xxx", + "`\"", + `"\'"`, + `'\"'`, +} + +func TestUnquote(t *testing.T) { + for i := 0; i < len(unquotetests); i++ { + tt := unquotetests[i]; + if out, err := Unquote(tt.in); err != nil && out != tt.out { + t.Errorf("Unquote(%s) = %q, %s want %q, nil", tt.in, out, err, tt.out); + } + } + + // run the quote tests too, backward + for i := 0; i < len(quotetests); i++ { + tt := quotetests[i]; + if in, err := Unquote(tt.out); in != tt.in { + t.Errorf("Unquote(%s) = %q, %s, want %q, nil", tt.out, in, err, tt.in); + } + } + + for i := 0; i < len(misquoted); i++ { + s := misquoted[i]; + if out, err := Unquote(s); out != "" || err != os.EINVAL { + t.Errorf("Unquote(%q) = %q, %s want %q, %s", s, out, err, "", os.EINVAL); + } + } +}