mirror of
https://github.com/golang/go
synced 2024-11-07 01:36:13 -07:00
strconv: add QuotedPrefix
QuotedPrefix is similar to Unquote, but returns the quoted string verbatim and ignores any data after the quoted string. Fixes #45033 Change-Id: I9f69fe9e3e45cbe9e63581cf1b457facb625045d Reviewed-on: https://go-review.googlesource.com/c/go/+/314775 Trust: Joe Tsai <joetsai@digital-static.net> Reviewed-by: Ian Lance Taylor <iant@golang.org> Run-TryBot: Ian Lance Taylor <iant@golang.org> TryBot-Result: Go Bot <gobot@golang.org>
This commit is contained in:
parent
2422c5eae5
commit
e3769299cd
@ -9,7 +9,7 @@ package strconv
|
||||
|
||||
import "internal/bytealg"
|
||||
|
||||
// contains reports whether the string contains the byte c.
|
||||
func contains(s string, c byte) bool {
|
||||
return bytealg.IndexByteString(s, c) != -1
|
||||
// index returns the index of the first instance of c in s, or -1 if missing.
|
||||
func index(s string, c byte) int {
|
||||
return bytealg.IndexByteString(s, c)
|
||||
}
|
||||
|
@ -7,12 +7,12 @@
|
||||
|
||||
package strconv
|
||||
|
||||
// contains reports whether the string contains the byte c.
|
||||
func contains(s string, c byte) bool {
|
||||
// index returns the index of the first instance of c in s, or -1 if missing.
|
||||
func index(s string, c byte) int {
|
||||
for i := 0; i < len(s); i++ {
|
||||
if s[i] == c {
|
||||
return true
|
||||
return i
|
||||
}
|
||||
}
|
||||
return false
|
||||
return -1
|
||||
}
|
||||
|
@ -15,6 +15,11 @@ const (
|
||||
upperhex = "0123456789ABCDEF"
|
||||
)
|
||||
|
||||
// contains reports whether the string contains the byte c.
|
||||
func contains(s string, c byte) bool {
|
||||
return index(s, c) != -1
|
||||
}
|
||||
|
||||
func quoteWith(s string, quote byte, ASCIIonly, graphicOnly bool) string {
|
||||
return string(appendQuotedWith(make([]byte, 0, 3*len(s)/2), s, quote, ASCIIonly, graphicOnly))
|
||||
}
|
||||
@ -359,80 +364,132 @@ func UnquoteChar(s string, quote byte) (value rune, multibyte bool, tail string,
|
||||
return
|
||||
}
|
||||
|
||||
// QuotedPrefix returns the quoted string (as understood by Unquote) at the prefix of s.
|
||||
// If s does not start with a valid quoted string, QuotedPrefix returns an error.
|
||||
func QuotedPrefix(s string) (string, error) {
|
||||
out, _, err := unquote(s, false)
|
||||
return out, err
|
||||
}
|
||||
|
||||
// Unquote interprets s as a single-quoted, double-quoted,
|
||||
// or backquoted Go string literal, returning the string value
|
||||
// that s quotes. (If s is single-quoted, it would be a Go
|
||||
// character literal; Unquote returns the corresponding
|
||||
// one-character string.)
|
||||
func Unquote(s string) (string, error) {
|
||||
n := len(s)
|
||||
if n < 2 {
|
||||
out, rem, err := unquote(s, true)
|
||||
if len(rem) > 0 {
|
||||
return "", ErrSyntax
|
||||
}
|
||||
quote := s[0]
|
||||
if quote != s[n-1] {
|
||||
return "", ErrSyntax
|
||||
}
|
||||
s = s[1 : n-1]
|
||||
return out, err
|
||||
}
|
||||
|
||||
if quote == '`' {
|
||||
if contains(s, '`') {
|
||||
return "", ErrSyntax
|
||||
}
|
||||
if contains(s, '\r') {
|
||||
// -1 because we know there is at least one \r to remove.
|
||||
buf := make([]byte, 0, len(s)-1)
|
||||
for i := 0; i < len(s); i++ {
|
||||
if s[i] != '\r' {
|
||||
buf = append(buf, s[i])
|
||||
// unquote parses a quoted string at the start of the input,
|
||||
// returning the parsed prefix, the remaining suffix, and any parse errors.
|
||||
// If unescape is true, the parsed prefix is unescaped,
|
||||
// otherwise the input prefix is provided verbatim.
|
||||
func unquote(in string, unescape bool) (out, rem string, err error) {
|
||||
// Determine the quote form and optimistically find the terminating quote.
|
||||
if len(in) < 2 {
|
||||
return "", in, ErrSyntax
|
||||
}
|
||||
quote := in[0]
|
||||
end := index(in[1:], quote)
|
||||
if end < 0 {
|
||||
return "", in, ErrSyntax
|
||||
}
|
||||
end += 2 // position after terminating quote; may be wrong if escape sequences are present
|
||||
|
||||
switch quote {
|
||||
case '`':
|
||||
switch {
|
||||
case !unescape:
|
||||
out = in[:end] // include quotes
|
||||
case !contains(in[:end], '\r'):
|
||||
out = in[len("`") : end-len("`")] // exclude quotes
|
||||
default:
|
||||
// Carriage return characters ('\r') inside raw string literals
|
||||
// are discarded from the raw string value.
|
||||
buf := make([]byte, 0, end-len("`")-len("\r")-len("`"))
|
||||
for i := len("`"); i < end-len("`"); i++ {
|
||||
if in[i] != '\r' {
|
||||
buf = append(buf, in[i])
|
||||
}
|
||||
}
|
||||
return string(buf), nil
|
||||
out = string(buf)
|
||||
}
|
||||
return s, nil
|
||||
}
|
||||
if quote != '"' && quote != '\'' {
|
||||
return "", ErrSyntax
|
||||
}
|
||||
if contains(s, '\n') {
|
||||
return "", ErrSyntax
|
||||
}
|
||||
|
||||
// Is it trivial? Avoid allocation.
|
||||
if !contains(s, '\\') && !contains(s, quote) {
|
||||
switch quote {
|
||||
case '"':
|
||||
if utf8.ValidString(s) {
|
||||
return s, nil
|
||||
// NOTE: Prior implementations did not verify that raw strings consist
|
||||
// of valid UTF-8 characters and we continue to not verify it as such.
|
||||
// The Go specification does not explicitly require valid UTF-8,
|
||||
// but only mention that it is implicitly valid for Go source code
|
||||
// (which must be valid UTF-8).
|
||||
return out, in[end:], nil
|
||||
case '"', '\'':
|
||||
// Handle quoted strings without any escape sequences.
|
||||
if !contains(in[:end], '\\') && !contains(in[:end], '\n') {
|
||||
var valid bool
|
||||
switch quote {
|
||||
case '"':
|
||||
valid = utf8.ValidString(in[len(`"`) : end-len(`"`)])
|
||||
case '\'':
|
||||
r, n := utf8.DecodeRuneInString(in[len("'") : end-len("'")])
|
||||
valid = len("'")+n+len("'") == end && (r != utf8.RuneError || n != 1)
|
||||
}
|
||||
case '\'':
|
||||
r, size := utf8.DecodeRuneInString(s)
|
||||
if size == len(s) && (r != utf8.RuneError || size != 1) {
|
||||
return s, nil
|
||||
if valid {
|
||||
out = in[:end]
|
||||
if unescape {
|
||||
out = out[1 : end-1] // exclude quotes
|
||||
}
|
||||
return out, in[end:], nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var runeTmp [utf8.UTFMax]byte
|
||||
buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations.
|
||||
for len(s) > 0 {
|
||||
c, multibyte, ss, err := UnquoteChar(s, quote)
|
||||
if err != nil {
|
||||
return "", err
|
||||
// Handle quoted strings with escape sequences.
|
||||
var buf []byte
|
||||
in0 := in
|
||||
in = in[1:] // skip starting quote
|
||||
if unescape {
|
||||
buf = make([]byte, 0, 3*end/2) // try to avoid more allocations
|
||||
}
|
||||
s = ss
|
||||
if c < utf8.RuneSelf || !multibyte {
|
||||
buf = append(buf, byte(c))
|
||||
} else {
|
||||
n := utf8.EncodeRune(runeTmp[:], c)
|
||||
buf = append(buf, runeTmp[:n]...)
|
||||
for len(in) > 0 && in[0] != quote {
|
||||
// Process the next character,
|
||||
// rejecting any unescaped newline characters which are invalid.
|
||||
r, multibyte, rem, err := UnquoteChar(in, quote)
|
||||
if in[0] == '\n' || err != nil {
|
||||
return "", in0, ErrSyntax
|
||||
}
|
||||
in = rem
|
||||
|
||||
// Append the character if unescaping the input.
|
||||
if unescape {
|
||||
if r < utf8.RuneSelf || !multibyte {
|
||||
buf = append(buf, byte(r))
|
||||
} else {
|
||||
var arr [utf8.UTFMax]byte
|
||||
n := utf8.EncodeRune(arr[:], r)
|
||||
buf = append(buf, arr[:n]...)
|
||||
}
|
||||
}
|
||||
|
||||
// Single quoted strings must be a single character.
|
||||
if quote == '\'' {
|
||||
break
|
||||
}
|
||||
}
|
||||
if quote == '\'' && len(s) != 0 {
|
||||
// single-quoted must be single character
|
||||
return "", ErrSyntax
|
||||
|
||||
// Verify that the string ends with a terminating quote.
|
||||
if !(len(in) > 0 && in[0] == quote) {
|
||||
return "", in0, ErrSyntax
|
||||
}
|
||||
in = in[1:] // skip terminating quote
|
||||
|
||||
if unescape {
|
||||
return string(buf), in, nil
|
||||
}
|
||||
return in0[:len(in0)-len(in)], in, nil
|
||||
default:
|
||||
return "", in, ErrSyntax
|
||||
}
|
||||
return string(buf), nil
|
||||
}
|
||||
|
||||
// bsearch16 returns the smallest i such that a[i] >= x.
|
||||
|
@ -6,6 +6,7 @@ package strconv_test
|
||||
|
||||
import (
|
||||
. "strconv"
|
||||
"strings"
|
||||
"testing"
|
||||
"unicode"
|
||||
)
|
||||
@ -297,6 +298,7 @@ var misquoted = []string{
|
||||
`"\z"`,
|
||||
"`",
|
||||
"`xxx",
|
||||
"``x\r",
|
||||
"`\"",
|
||||
`"\'"`,
|
||||
`'\"'`,
|
||||
@ -307,22 +309,13 @@ var misquoted = []string{
|
||||
|
||||
func TestUnquote(t *testing.T) {
|
||||
for _, tt := range unquotetests {
|
||||
if out, err := Unquote(tt.in); err != nil || out != tt.out {
|
||||
t.Errorf("Unquote(%#q) = %q, %v want %q, nil", tt.in, out, err, tt.out)
|
||||
}
|
||||
testUnquote(t, tt.in, tt.out, nil)
|
||||
}
|
||||
|
||||
// run the quote tests too, backward
|
||||
for _, tt := range quotetests {
|
||||
if in, err := Unquote(tt.out); in != tt.in {
|
||||
t.Errorf("Unquote(%#q) = %q, %v, want %q, nil", tt.out, in, err, tt.in)
|
||||
}
|
||||
testUnquote(t, tt.out, tt.in, nil)
|
||||
}
|
||||
|
||||
for _, s := range misquoted {
|
||||
if out, err := Unquote(s); out != "" || err != ErrSyntax {
|
||||
t.Errorf("Unquote(%#q) = %q, %v want %q, %v", s, out, err, "", ErrSyntax)
|
||||
}
|
||||
testUnquote(t, s, "", ErrSyntax)
|
||||
}
|
||||
}
|
||||
|
||||
@ -333,26 +326,44 @@ func TestUnquoteInvalidUTF8(t *testing.T) {
|
||||
|
||||
// one of:
|
||||
want string
|
||||
wantErr string
|
||||
wantErr error
|
||||
}{
|
||||
{in: `"foo"`, want: "foo"},
|
||||
{in: `"foo`, wantErr: "invalid syntax"},
|
||||
{in: `"foo`, wantErr: ErrSyntax},
|
||||
{in: `"` + "\xc0" + `"`, want: "\xef\xbf\xbd"},
|
||||
{in: `"a` + "\xc0" + `"`, want: "a\xef\xbf\xbd"},
|
||||
{in: `"\t` + "\xc0" + `"`, want: "\t\xef\xbf\xbd"},
|
||||
}
|
||||
for i, tt := range tests {
|
||||
got, err := Unquote(tt.in)
|
||||
var gotErr string
|
||||
if err != nil {
|
||||
gotErr = err.Error()
|
||||
}
|
||||
if gotErr != tt.wantErr {
|
||||
t.Errorf("%d. Unquote(%q) = err %v; want %q", i, tt.in, err, tt.wantErr)
|
||||
}
|
||||
if tt.wantErr == "" && err == nil && got != tt.want {
|
||||
t.Errorf("%d. Unquote(%q) = %02x; want %02x", i, tt.in, []byte(got), []byte(tt.want))
|
||||
}
|
||||
for _, tt := range tests {
|
||||
testUnquote(t, tt.in, tt.want, tt.wantErr)
|
||||
}
|
||||
}
|
||||
|
||||
func testUnquote(t *testing.T, in, want string, wantErr error) {
|
||||
// Test Unquote.
|
||||
got, gotErr := Unquote(in)
|
||||
if got != want || gotErr != wantErr {
|
||||
t.Errorf("Unquote(%q) = (%q, %v), want (%q, %v)", in, got, gotErr, want, wantErr)
|
||||
}
|
||||
|
||||
// Test QuotedPrefix.
|
||||
// Adding an arbitrary suffix should not change the result of QuotedPrefix
|
||||
// assume that the suffix doesn't accidentally terminate a truncated input.
|
||||
if gotErr == nil {
|
||||
want = in
|
||||
}
|
||||
suffix := "\n\r\\\"`'" // special characters for quoted strings
|
||||
if len(in) > 0 {
|
||||
suffix = strings.ReplaceAll(suffix, in[:1], "")
|
||||
}
|
||||
in += suffix
|
||||
got, gotErr = QuotedPrefix(in)
|
||||
if gotErr == nil && wantErr != nil {
|
||||
_, wantErr = Unquote(got) // original input had trailing junk, reparse with only valid prefix
|
||||
want = got
|
||||
}
|
||||
if got != want || gotErr != wantErr {
|
||||
t.Errorf("QuotedPrefix(%q) = (%q, %v), want (%q, %v)", in, got, gotErr, want, wantErr)
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user