From 30151f2aac2882dcb369019acfec95d16fea3c02 Mon Sep 17 00:00:00 2001 From: "Bryan C. Mills" Date: Thu, 9 Feb 2023 16:35:04 -0500 Subject: [PATCH] cmd/go: fuzz test splitPkgConfigOutput and fix minor bugs In reviewing CL 466875, I noticed that the implementation of splitPkgConfigOutput from CL 86541 referred to another specific implementation, and that implementation has had recent changes to fix deviations from the POSIX specification for shell argument parsing. Curious about those changes, I decided to fuzz the function to check whether it agreed in practice with the way a real shell parses arguments in POSIX mode. It turned out to deviate in several edge cases, such as backslash-escapes within single quotes, quoted empty strings, and carriage returns. (We do not expect to see carriage returns in pkg-config output anyway, but the quote handling might matter.) This change updates the implementation to refer to the POSIX documentation instead of another implementation, and confirms the behavior with a fuzz test. It may introduce minor deviations from the pkgconf implementation that was previously used as a reference, but if so it is plausible that those could be fixed upstream in pkgconf (like the other recent changes there). For #35262. Updates ##23373. Change-Id: Ifab76e94af0ca9a6d826379f4a6e2028561e615c Reviewed-on: https://go-review.googlesource.com/c/go/+/466864 Reviewed-by: Ian Lance Taylor TryBot-Result: Gopher Robot Run-TryBot: Bryan Mills Auto-Submit: Bryan Mills --- src/cmd/go/internal/work/build_test.go | 13 +-- src/cmd/go/internal/work/exec.go | 118 ++++++++++++++------- src/cmd/go/internal/work/shell_test.go | 139 +++++++++++++++++++++++++ 3 files changed, 228 insertions(+), 42 deletions(-) create mode 100644 src/cmd/go/internal/work/shell_test.go diff --git a/src/cmd/go/internal/work/build_test.go b/src/cmd/go/internal/work/build_test.go index 9e32b39c39..b9c16eea74 100644 --- a/src/cmd/go/internal/work/build_test.go +++ b/src/cmd/go/internal/work/build_test.go @@ -41,19 +41,20 @@ func TestSplitPkgConfigOutput(t *testing.T) { }{ {[]byte(`-r:foo -L/usr/white\ space/lib -lfoo\ bar -lbar\ baz`), []string{"-r:foo", "-L/usr/white space/lib", "-lfoo bar", "-lbar baz"}}, {[]byte(`-lextra\ fun\ arg\\`), []string{`-lextra fun arg\`}}, - {[]byte("\textra whitespace\r\n"), []string{"extra", "whitespace"}}, - {[]byte(" \r\n "), nil}, + {[]byte("\textra whitespace\r\n"), []string{"extra", "whitespace\r"}}, + {[]byte(" \r\n "), []string{"\r"}}, {[]byte(`"-r:foo" "-L/usr/white space/lib" "-lfoo bar" "-lbar baz"`), []string{"-r:foo", "-L/usr/white space/lib", "-lfoo bar", "-lbar baz"}}, {[]byte(`"-lextra fun arg\\"`), []string{`-lextra fun arg\`}}, {[]byte(`" \r\n\ "`), []string{` \r\n\ `}}, - {[]byte(`""`), nil}, + {[]byte(`""`), []string{""}}, {[]byte(``), nil}, {[]byte(`"\\"`), []string{`\`}}, {[]byte(`"\x"`), []string{`\x`}}, {[]byte(`"\\x"`), []string{`\x`}}, - {[]byte(`'\\'`), []string{`\`}}, + {[]byte(`'\\'`), []string{`\\`}}, {[]byte(`'\x'`), []string{`\x`}}, {[]byte(`"\\x"`), []string{`\x`}}, + {[]byte("\\\n"), nil}, {[]byte(`-fPIC -I/test/include/foo -DQUOTED='"/test/share/doc"'`), []string{"-fPIC", "-I/test/include/foo", `-DQUOTED="/test/share/doc"`}}, {[]byte(`-fPIC -I/test/include/foo -DQUOTED="/test/share/doc"`), []string{"-fPIC", "-I/test/include/foo", "-DQUOTED=/test/share/doc"}}, {[]byte(`-fPIC -I/test/include/foo -DQUOTED=\"/test/share/doc\"`), []string{"-fPIC", "-I/test/include/foo", `-DQUOTED="/test/share/doc"`}}, @@ -64,11 +65,11 @@ func TestSplitPkgConfigOutput(t *testing.T) { } { got, err := splitPkgConfigOutput(test.in) if err != nil { - t.Errorf("splitPkgConfigOutput on %v failed with error %v", test.in, err) + t.Errorf("splitPkgConfigOutput on %#q failed with error %v", test.in, err) continue } if !reflect.DeepEqual(got, test.want) { - t.Errorf("splitPkgConfigOutput(%v) = %v; want %v", test.in, got, test.want) + t.Errorf("splitPkgConfigOutput(%#q) = %#q; want %#q", test.in, got, test.want) } } diff --git a/src/cmd/go/internal/work/exec.go b/src/cmd/go/internal/work/exec.go index c1476f8757..b211680e1c 100644 --- a/src/cmd/go/internal/work/exec.go +++ b/src/cmd/go/internal/work/exec.go @@ -1446,64 +1446,110 @@ func (b *Builder) PkgconfigCmd() string { return envList("PKG_CONFIG", cfg.DefaultPkgConfig)[0] } -// splitPkgConfigOutput parses the pkg-config output into a slice of -// flags. This implements the algorithm from pkgconf/libpkgconf/argvsplit.c. +// splitPkgConfigOutput parses the pkg-config output into a slice of flags. +// This implements the shell quoting semantics described in +// https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_02, +// except that it does not support parameter or arithmetic expansion or command +// substitution and hard-codes the delimiters instead of reading them +// from LC_LOCALE. func splitPkgConfigOutput(out []byte) ([]string, error) { if len(out) == 0 { return nil, nil } var flags []string flag := make([]byte, 0, len(out)) - escaped := false - quote := byte(0) + didQuote := false // was the current flag parsed from a quoted string? + escaped := false // did we just read `\` in a non-single-quoted context? + quote := byte(0) // what is the quote character around the current string? for _, c := range out { if escaped { - if quote != 0 { + if quote == '"' { + // “The shall retain its special meaning as an escape + // character … only when followed by one of the following characters + // when considered special:” switch c { - case '$', '`', '"', '\\': + case '$', '`', '"', '\\', '\n': + // Handle the escaped character normally. default: - flag = append(flag, '\\') + // Not an escape character after all. + flag = append(flag, '\\', c) + escaped = false + continue } - flag = append(flag, c) + } + + if c == '\n' { + // “If a follows the , the shell shall interpret + // this as line continuation.” } else { flag = append(flag, c) } escaped = false - } else if quote != 0 { - if c == quote { - quote = 0 - } else { - switch c { - case '\\': - escaped = true - default: - flag = append(flag, c) - } - } - } else if strings.IndexByte(" \t\n\v\f\r", c) < 0 { + continue + } + + if quote != 0 && c == quote { + quote = 0 + continue + } + switch quote { + case '\'': + // “preserve the literal value of each character” + flag = append(flag, c) + continue + case '"': + // “preserve the literal value of all characters within the double-quotes, + // with the exception of …” switch c { - case '\\': - escaped = true - case '\'', '"': - quote = c + case '`', '$', '\\': default: flag = append(flag, c) + continue } - } else if len(flag) != 0 { - flags = append(flags, string(flag)) - flag = flag[:0] } - } - if escaped { - return nil, errors.New("broken character escaping in pkgconf output ") - } - if quote != 0 { - return nil, errors.New("unterminated quoted string in pkgconf output ") - } else if len(flag) != 0 { - flags = append(flags, string(flag)) + + // “The application shall quote the following characters if they are to + // represent themselves:” + switch c { + case '|', '&', ';', '<', '>', '(', ')', '$', '`': + return nil, fmt.Errorf("unexpected shell character %q in pkgconf output", c) + + case '\\': + // “A that is not quoted shall preserve the literal value of + // the following character, with the exception of a .” + escaped = true + continue + + case '"', '\'': + quote = c + didQuote = true + continue + + case ' ', '\t', '\n': + if len(flag) > 0 || didQuote { + flags = append(flags, string(flag)) + } + flag, didQuote = flag[:0], false + continue + } + + flag = append(flag, c) } + // Prefer to report a missing quote instead of a missing escape. If the string + // is something like `"foo\`, it's ambiguous as to whether the trailing + // backslash is really an escape at all. + if quote != 0 { + return nil, errors.New("unterminated quoted string in pkgconf output") + } + if escaped { + return nil, errors.New("broken character escaping in pkgconf output") + } + + if len(flag) > 0 || didQuote { + flags = append(flags, string(flag)) + } return flags, nil } @@ -1535,7 +1581,7 @@ func (b *Builder) getPkgConfigFlags(p *load.Package) (cflags, ldflags []string, return nil, nil, err } if len(out) > 0 { - cflags, err = splitPkgConfigOutput(out) + cflags, err = splitPkgConfigOutput(bytes.TrimSpace(out)) if err != nil { return nil, nil, err } diff --git a/src/cmd/go/internal/work/shell_test.go b/src/cmd/go/internal/work/shell_test.go new file mode 100644 index 0000000000..24bef4e684 --- /dev/null +++ b/src/cmd/go/internal/work/shell_test.go @@ -0,0 +1,139 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build unix + +package work + +import ( + "bytes" + "internal/testenv" + "strings" + "testing" + "unicode" +) + +func FuzzSplitPkgConfigOutput(f *testing.F) { + testenv.MustHaveExecPath(f, "/bin/sh") + + f.Add([]byte(`$FOO`)) + f.Add([]byte(`\$FOO`)) + f.Add([]byte(`${FOO}`)) + f.Add([]byte(`\${FOO}`)) + f.Add([]byte(`$(/bin/false)`)) + f.Add([]byte(`\$(/bin/false)`)) + f.Add([]byte(`$((0))`)) + f.Add([]byte(`\$((0))`)) + f.Add([]byte(`unescaped space`)) + f.Add([]byte(`escaped\ space`)) + f.Add([]byte(`"unterminated quote`)) + f.Add([]byte(`'unterminated quote`)) + f.Add([]byte(`unterminated escape\`)) + f.Add([]byte(`"quote with unterminated escape\`)) + f.Add([]byte(`'quoted "double quotes"'`)) + f.Add([]byte(`"quoted 'single quotes'"`)) + f.Add([]byte(`"\$0"`)) + f.Add([]byte(`"\$\0"`)) + f.Add([]byte(`"\$"`)) + f.Add([]byte(`"\$ "`)) + + // Example positive inputs from TestSplitPkgConfigOutput. + // Some bare newlines have been removed so that the inputs + // are valid in the shell script we use for comparison. + f.Add([]byte(`-r:foo -L/usr/white\ space/lib -lfoo\ bar -lbar\ baz`)) + f.Add([]byte(`-lextra\ fun\ arg\\`)) + f.Add([]byte("\textra whitespace\r")) + f.Add([]byte(" \r ")) + f.Add([]byte(`"-r:foo" "-L/usr/white space/lib" "-lfoo bar" "-lbar baz"`)) + f.Add([]byte(`"-lextra fun arg\\"`)) + f.Add([]byte(`" \r\n\ "`)) + f.Add([]byte(`""`)) + f.Add([]byte(``)) + f.Add([]byte(`"\\"`)) + f.Add([]byte(`"\x"`)) + f.Add([]byte(`"\\x"`)) + f.Add([]byte(`'\\'`)) + f.Add([]byte(`'\x'`)) + f.Add([]byte(`"\\x"`)) + f.Add([]byte("\\\n")) + f.Add([]byte(`-fPIC -I/test/include/foo -DQUOTED='"/test/share/doc"'`)) + f.Add([]byte(`-fPIC -I/test/include/foo -DQUOTED="/test/share/doc"`)) + f.Add([]byte(`-fPIC -I/test/include/foo -DQUOTED=\"/test/share/doc\"`)) + f.Add([]byte(`-fPIC -I/test/include/foo -DQUOTED='/test/share/doc'`)) + f.Add([]byte(`-DQUOTED='/te\st/share/d\oc'`)) + f.Add([]byte(`-Dhello=10 -Dworld=+32 -DDEFINED_FROM_PKG_CONFIG=hello\ world`)) + f.Add([]byte(`"broken\"" \\\a "a"`)) + + // Example negative inputs from TestSplitPkgConfigOutput. + f.Add([]byte(`" \r\n `)) + f.Add([]byte(`"-r:foo" "-L/usr/white space/lib "-lfoo bar" "-lbar baz"`)) + f.Add([]byte(`"-lextra fun arg\\`)) + f.Add([]byte(`broken flag\`)) + f.Add([]byte(`extra broken flag \`)) + f.Add([]byte(`\`)) + f.Add([]byte(`"broken\"" "extra" \`)) + + f.Fuzz(func(t *testing.T, b []byte) { + t.Parallel() + + if bytes.ContainsAny(b, "*?[#~%\x00{}!") { + t.Skipf("skipping %#q: contains a sometimes-quoted character", b) + } + // splitPkgConfigOutput itself rejects inputs that contain unquoted + // shell operator characters. (Quoted shell characters are fine.) + + for _, c := range b { + if c > unicode.MaxASCII { + t.Skipf("skipping %#q: contains a non-ASCII character %q", b, c) + } + if !unicode.IsGraphic(rune(c)) && !unicode.IsSpace(rune(c)) { + t.Skipf("skipping %#q: contains non-graphic character %q", b, c) + } + } + + args, err := splitPkgConfigOutput(b) + if err != nil { + // We haven't checked that the shell would actually reject this input too, + // but if splitPkgConfigOutput rejected it it's probably too dangerous to + // run in the script. + t.Logf("%#q: %v", b, err) + return + } + t.Logf("splitPkgConfigOutput(%#q) = %#q", b, args) + if len(args) == 0 { + t.Skipf("skipping %#q: contains no arguments", b) + } + + var buf strings.Builder + for _, arg := range args { + buf.WriteString(arg) + buf.WriteString("\n") + } + wantOut := buf.String() + + if strings.Count(wantOut, "\n") != len(args)+bytes.Count(b, []byte("\n")) { + // One of the newlines in b was treated as a delimiter and not part of an + // argument. Our bash test script would interpret that as a syntax error. + t.Skipf("skipping %#q: contains a bare newline", b) + } + + // We use the printf shell command to echo the arguments because, per + // https://pubs.opengroup.org/onlinepubs/9699919799/utilities/echo.html#tag_20_37_16: + // “It is not possible to use echo portably across all POSIX systems unless + // both -n (as the first argument) and escape sequences are omitted.” + cmd := testenv.Command(t, "/bin/sh", "-c", "printf '%s\n' "+string(b)) + cmd.Env = append(cmd.Environ(), "LC_ALL=POSIX", "POSIXLY_CORRECT=1") + cmd.Stderr = new(strings.Builder) + out, err := cmd.Output() + if err != nil { + t.Fatalf("%#q: %v\n%s", cmd.Args, err, cmd.Stderr) + } + + if string(out) != wantOut { + t.Logf("%#q:\n%#q", cmd.Args, out) + t.Logf("want:\n%#q", wantOut) + t.Errorf("parsed args do not match") + } + }) +}