1
0
mirror of https://github.com/golang/go synced 2024-09-30 07:18:34 -06:00

cmd/go: fuzz test splitPkgConfigOutput and fix minor bugs

In reviewing CL 466875, I noticed that the implementation of
splitPkgConfigOutput from CL 86541 referred to another specific
implementation, and that implementation has had recent changes to fix
deviations from the POSIX specification for shell argument parsing.

Curious about those changes, I decided to fuzz the function to check
whether it agreed in practice with the way a real shell parses
arguments in POSIX mode. It turned out to deviate in several edge
cases, such as backslash-escapes within single quotes, quoted empty
strings, and carriage returns. (We do not expect to see carriage
returns in pkg-config output anyway, but the quote handling might
matter.)

This change updates the implementation to refer to the POSIX
documentation instead of another implementation, and confirms the
behavior with a fuzz test. It may introduce minor deviations from the
pkgconf implementation that was previously used as a reference, but if
so it is plausible that those could be fixed upstream in pkgconf
(like the other recent changes there).

For #35262.
Updates ##23373.

Change-Id: Ifab76e94af0ca9a6d826379f4a6e2028561e615c
Reviewed-on: https://go-review.googlesource.com/c/go/+/466864
Reviewed-by: Ian Lance Taylor <iant@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Run-TryBot: Bryan Mills <bcmills@google.com>
Auto-Submit: Bryan Mills <bcmills@google.com>
This commit is contained in:
Bryan C. Mills 2023-02-09 16:35:04 -05:00 committed by Gopher Robot
parent a7b75972f2
commit 30151f2aac
3 changed files with 228 additions and 42 deletions

View File

@ -41,19 +41,20 @@ func TestSplitPkgConfigOutput(t *testing.T) {
}{
{[]byte(`-r:foo -L/usr/white\ space/lib -lfoo\ bar -lbar\ baz`), []string{"-r:foo", "-L/usr/white space/lib", "-lfoo bar", "-lbar baz"}},
{[]byte(`-lextra\ fun\ arg\\`), []string{`-lextra fun arg\`}},
{[]byte("\textra whitespace\r\n"), []string{"extra", "whitespace"}},
{[]byte(" \r\n "), nil},
{[]byte("\textra whitespace\r\n"), []string{"extra", "whitespace\r"}},
{[]byte(" \r\n "), []string{"\r"}},
{[]byte(`"-r:foo" "-L/usr/white space/lib" "-lfoo bar" "-lbar baz"`), []string{"-r:foo", "-L/usr/white space/lib", "-lfoo bar", "-lbar baz"}},
{[]byte(`"-lextra fun arg\\"`), []string{`-lextra fun arg\`}},
{[]byte(`" \r\n\ "`), []string{` \r\n\ `}},
{[]byte(`""`), nil},
{[]byte(`""`), []string{""}},
{[]byte(``), nil},
{[]byte(`"\\"`), []string{`\`}},
{[]byte(`"\x"`), []string{`\x`}},
{[]byte(`"\\x"`), []string{`\x`}},
{[]byte(`'\\'`), []string{`\`}},
{[]byte(`'\\'`), []string{`\\`}},
{[]byte(`'\x'`), []string{`\x`}},
{[]byte(`"\\x"`), []string{`\x`}},
{[]byte("\\\n"), nil},
{[]byte(`-fPIC -I/test/include/foo -DQUOTED='"/test/share/doc"'`), []string{"-fPIC", "-I/test/include/foo", `-DQUOTED="/test/share/doc"`}},
{[]byte(`-fPIC -I/test/include/foo -DQUOTED="/test/share/doc"`), []string{"-fPIC", "-I/test/include/foo", "-DQUOTED=/test/share/doc"}},
{[]byte(`-fPIC -I/test/include/foo -DQUOTED=\"/test/share/doc\"`), []string{"-fPIC", "-I/test/include/foo", `-DQUOTED="/test/share/doc"`}},
@ -64,11 +65,11 @@ func TestSplitPkgConfigOutput(t *testing.T) {
} {
got, err := splitPkgConfigOutput(test.in)
if err != nil {
t.Errorf("splitPkgConfigOutput on %v failed with error %v", test.in, err)
t.Errorf("splitPkgConfigOutput on %#q failed with error %v", test.in, err)
continue
}
if !reflect.DeepEqual(got, test.want) {
t.Errorf("splitPkgConfigOutput(%v) = %v; want %v", test.in, got, test.want)
t.Errorf("splitPkgConfigOutput(%#q) = %#q; want %#q", test.in, got, test.want)
}
}

View File

@ -1446,64 +1446,110 @@ func (b *Builder) PkgconfigCmd() string {
return envList("PKG_CONFIG", cfg.DefaultPkgConfig)[0]
}
// splitPkgConfigOutput parses the pkg-config output into a slice of
// flags. This implements the algorithm from pkgconf/libpkgconf/argvsplit.c.
// splitPkgConfigOutput parses the pkg-config output into a slice of flags.
// This implements the shell quoting semantics described in
// https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_02,
// except that it does not support parameter or arithmetic expansion or command
// substitution and hard-codes the <blank> delimiters instead of reading them
// from LC_LOCALE.
func splitPkgConfigOutput(out []byte) ([]string, error) {
if len(out) == 0 {
return nil, nil
}
var flags []string
flag := make([]byte, 0, len(out))
escaped := false
quote := byte(0)
didQuote := false // was the current flag parsed from a quoted string?
escaped := false // did we just read `\` in a non-single-quoted context?
quote := byte(0) // what is the quote character around the current string?
for _, c := range out {
if escaped {
if quote != 0 {
if quote == '"' {
// “The <backslash> shall retain its special meaning as an escape
// character … only when followed by one of the following characters
// when considered special:”
switch c {
case '$', '`', '"', '\\':
case '$', '`', '"', '\\', '\n':
// Handle the escaped character normally.
default:
flag = append(flag, '\\')
// Not an escape character after all.
flag = append(flag, '\\', c)
escaped = false
continue
}
flag = append(flag, c)
}
if c == '\n' {
// “If a <newline> follows the <backslash>, the shell shall interpret
// this as line continuation.”
} else {
flag = append(flag, c)
}
escaped = false
} else if quote != 0 {
if c == quote {
quote = 0
} else {
switch c {
case '\\':
escaped = true
default:
flag = append(flag, c)
}
}
} else if strings.IndexByte(" \t\n\v\f\r", c) < 0 {
continue
}
if quote != 0 && c == quote {
quote = 0
continue
}
switch quote {
case '\'':
// “preserve the literal value of each character”
flag = append(flag, c)
continue
case '"':
// “preserve the literal value of all characters within the double-quotes,
// with the exception of …”
switch c {
case '\\':
escaped = true
case '\'', '"':
quote = c
case '`', '$', '\\':
default:
flag = append(flag, c)
continue
}
} else if len(flag) != 0 {
flags = append(flags, string(flag))
flag = flag[:0]
}
}
if escaped {
return nil, errors.New("broken character escaping in pkgconf output ")
}
if quote != 0 {
return nil, errors.New("unterminated quoted string in pkgconf output ")
} else if len(flag) != 0 {
flags = append(flags, string(flag))
// “The application shall quote the following characters if they are to
// represent themselves:”
switch c {
case '|', '&', ';', '<', '>', '(', ')', '$', '`':
return nil, fmt.Errorf("unexpected shell character %q in pkgconf output", c)
case '\\':
// “A <backslash> that is not quoted shall preserve the literal value of
// the following character, with the exception of a <newline>.”
escaped = true
continue
case '"', '\'':
quote = c
didQuote = true
continue
case ' ', '\t', '\n':
if len(flag) > 0 || didQuote {
flags = append(flags, string(flag))
}
flag, didQuote = flag[:0], false
continue
}
flag = append(flag, c)
}
// Prefer to report a missing quote instead of a missing escape. If the string
// is something like `"foo\`, it's ambiguous as to whether the trailing
// backslash is really an escape at all.
if quote != 0 {
return nil, errors.New("unterminated quoted string in pkgconf output")
}
if escaped {
return nil, errors.New("broken character escaping in pkgconf output")
}
if len(flag) > 0 || didQuote {
flags = append(flags, string(flag))
}
return flags, nil
}
@ -1535,7 +1581,7 @@ func (b *Builder) getPkgConfigFlags(p *load.Package) (cflags, ldflags []string,
return nil, nil, err
}
if len(out) > 0 {
cflags, err = splitPkgConfigOutput(out)
cflags, err = splitPkgConfigOutput(bytes.TrimSpace(out))
if err != nil {
return nil, nil, err
}

View File

@ -0,0 +1,139 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build unix
package work
import (
"bytes"
"internal/testenv"
"strings"
"testing"
"unicode"
)
func FuzzSplitPkgConfigOutput(f *testing.F) {
testenv.MustHaveExecPath(f, "/bin/sh")
f.Add([]byte(`$FOO`))
f.Add([]byte(`\$FOO`))
f.Add([]byte(`${FOO}`))
f.Add([]byte(`\${FOO}`))
f.Add([]byte(`$(/bin/false)`))
f.Add([]byte(`\$(/bin/false)`))
f.Add([]byte(`$((0))`))
f.Add([]byte(`\$((0))`))
f.Add([]byte(`unescaped space`))
f.Add([]byte(`escaped\ space`))
f.Add([]byte(`"unterminated quote`))
f.Add([]byte(`'unterminated quote`))
f.Add([]byte(`unterminated escape\`))
f.Add([]byte(`"quote with unterminated escape\`))
f.Add([]byte(`'quoted "double quotes"'`))
f.Add([]byte(`"quoted 'single quotes'"`))
f.Add([]byte(`"\$0"`))
f.Add([]byte(`"\$\0"`))
f.Add([]byte(`"\$"`))
f.Add([]byte(`"\$ "`))
// Example positive inputs from TestSplitPkgConfigOutput.
// Some bare newlines have been removed so that the inputs
// are valid in the shell script we use for comparison.
f.Add([]byte(`-r:foo -L/usr/white\ space/lib -lfoo\ bar -lbar\ baz`))
f.Add([]byte(`-lextra\ fun\ arg\\`))
f.Add([]byte("\textra whitespace\r"))
f.Add([]byte(" \r "))
f.Add([]byte(`"-r:foo" "-L/usr/white space/lib" "-lfoo bar" "-lbar baz"`))
f.Add([]byte(`"-lextra fun arg\\"`))
f.Add([]byte(`" \r\n\ "`))
f.Add([]byte(`""`))
f.Add([]byte(``))
f.Add([]byte(`"\\"`))
f.Add([]byte(`"\x"`))
f.Add([]byte(`"\\x"`))
f.Add([]byte(`'\\'`))
f.Add([]byte(`'\x'`))
f.Add([]byte(`"\\x"`))
f.Add([]byte("\\\n"))
f.Add([]byte(`-fPIC -I/test/include/foo -DQUOTED='"/test/share/doc"'`))
f.Add([]byte(`-fPIC -I/test/include/foo -DQUOTED="/test/share/doc"`))
f.Add([]byte(`-fPIC -I/test/include/foo -DQUOTED=\"/test/share/doc\"`))
f.Add([]byte(`-fPIC -I/test/include/foo -DQUOTED='/test/share/doc'`))
f.Add([]byte(`-DQUOTED='/te\st/share/d\oc'`))
f.Add([]byte(`-Dhello=10 -Dworld=+32 -DDEFINED_FROM_PKG_CONFIG=hello\ world`))
f.Add([]byte(`"broken\"" \\\a "a"`))
// Example negative inputs from TestSplitPkgConfigOutput.
f.Add([]byte(`" \r\n `))
f.Add([]byte(`"-r:foo" "-L/usr/white space/lib "-lfoo bar" "-lbar baz"`))
f.Add([]byte(`"-lextra fun arg\\`))
f.Add([]byte(`broken flag\`))
f.Add([]byte(`extra broken flag \`))
f.Add([]byte(`\`))
f.Add([]byte(`"broken\"" "extra" \`))
f.Fuzz(func(t *testing.T, b []byte) {
t.Parallel()
if bytes.ContainsAny(b, "*?[#~%\x00{}!") {
t.Skipf("skipping %#q: contains a sometimes-quoted character", b)
}
// splitPkgConfigOutput itself rejects inputs that contain unquoted
// shell operator characters. (Quoted shell characters are fine.)
for _, c := range b {
if c > unicode.MaxASCII {
t.Skipf("skipping %#q: contains a non-ASCII character %q", b, c)
}
if !unicode.IsGraphic(rune(c)) && !unicode.IsSpace(rune(c)) {
t.Skipf("skipping %#q: contains non-graphic character %q", b, c)
}
}
args, err := splitPkgConfigOutput(b)
if err != nil {
// We haven't checked that the shell would actually reject this input too,
// but if splitPkgConfigOutput rejected it it's probably too dangerous to
// run in the script.
t.Logf("%#q: %v", b, err)
return
}
t.Logf("splitPkgConfigOutput(%#q) = %#q", b, args)
if len(args) == 0 {
t.Skipf("skipping %#q: contains no arguments", b)
}
var buf strings.Builder
for _, arg := range args {
buf.WriteString(arg)
buf.WriteString("\n")
}
wantOut := buf.String()
if strings.Count(wantOut, "\n") != len(args)+bytes.Count(b, []byte("\n")) {
// One of the newlines in b was treated as a delimiter and not part of an
// argument. Our bash test script would interpret that as a syntax error.
t.Skipf("skipping %#q: contains a bare newline", b)
}
// We use the printf shell command to echo the arguments because, per
// https://pubs.opengroup.org/onlinepubs/9699919799/utilities/echo.html#tag_20_37_16:
// “It is not possible to use echo portably across all POSIX systems unless
// both -n (as the first argument) and escape sequences are omitted.”
cmd := testenv.Command(t, "/bin/sh", "-c", "printf '%s\n' "+string(b))
cmd.Env = append(cmd.Environ(), "LC_ALL=POSIX", "POSIXLY_CORRECT=1")
cmd.Stderr = new(strings.Builder)
out, err := cmd.Output()
if err != nil {
t.Fatalf("%#q: %v\n%s", cmd.Args, err, cmd.Stderr)
}
if string(out) != wantOut {
t.Logf("%#q:\n%#q", cmd.Args, out)
t.Logf("want:\n%#q", wantOut)
t.Errorf("parsed args do not match")
}
})
}