diff --git a/src/cmd/go/internal/work/build_test.go b/src/cmd/go/internal/work/build_test.go index 9e32b39c39..b9c16eea74 100644 --- a/src/cmd/go/internal/work/build_test.go +++ b/src/cmd/go/internal/work/build_test.go @@ -41,19 +41,20 @@ func TestSplitPkgConfigOutput(t *testing.T) { }{ {[]byte(`-r:foo -L/usr/white\ space/lib -lfoo\ bar -lbar\ baz`), []string{"-r:foo", "-L/usr/white space/lib", "-lfoo bar", "-lbar baz"}}, {[]byte(`-lextra\ fun\ arg\\`), []string{`-lextra fun arg\`}}, - {[]byte("\textra whitespace\r\n"), []string{"extra", "whitespace"}}, - {[]byte(" \r\n "), nil}, + {[]byte("\textra whitespace\r\n"), []string{"extra", "whitespace\r"}}, + {[]byte(" \r\n "), []string{"\r"}}, {[]byte(`"-r:foo" "-L/usr/white space/lib" "-lfoo bar" "-lbar baz"`), []string{"-r:foo", "-L/usr/white space/lib", "-lfoo bar", "-lbar baz"}}, {[]byte(`"-lextra fun arg\\"`), []string{`-lextra fun arg\`}}, {[]byte(`" \r\n\ "`), []string{` \r\n\ `}}, - {[]byte(`""`), nil}, + {[]byte(`""`), []string{""}}, {[]byte(``), nil}, {[]byte(`"\\"`), []string{`\`}}, {[]byte(`"\x"`), []string{`\x`}}, {[]byte(`"\\x"`), []string{`\x`}}, - {[]byte(`'\\'`), []string{`\`}}, + {[]byte(`'\\'`), []string{`\\`}}, {[]byte(`'\x'`), []string{`\x`}}, {[]byte(`"\\x"`), []string{`\x`}}, + {[]byte("\\\n"), nil}, {[]byte(`-fPIC -I/test/include/foo -DQUOTED='"/test/share/doc"'`), []string{"-fPIC", "-I/test/include/foo", `-DQUOTED="/test/share/doc"`}}, {[]byte(`-fPIC -I/test/include/foo -DQUOTED="/test/share/doc"`), []string{"-fPIC", "-I/test/include/foo", "-DQUOTED=/test/share/doc"}}, {[]byte(`-fPIC -I/test/include/foo -DQUOTED=\"/test/share/doc\"`), []string{"-fPIC", "-I/test/include/foo", `-DQUOTED="/test/share/doc"`}}, @@ -64,11 +65,11 @@ func TestSplitPkgConfigOutput(t *testing.T) { } { got, err := splitPkgConfigOutput(test.in) if err != nil { - t.Errorf("splitPkgConfigOutput on %v failed with error %v", test.in, err) + t.Errorf("splitPkgConfigOutput on %#q failed with error %v", test.in, err) continue } if !reflect.DeepEqual(got, test.want) { - t.Errorf("splitPkgConfigOutput(%v) = %v; want %v", test.in, got, test.want) + t.Errorf("splitPkgConfigOutput(%#q) = %#q; want %#q", test.in, got, test.want) } } diff --git a/src/cmd/go/internal/work/exec.go b/src/cmd/go/internal/work/exec.go index c1476f8757..b211680e1c 100644 --- a/src/cmd/go/internal/work/exec.go +++ b/src/cmd/go/internal/work/exec.go @@ -1446,64 +1446,110 @@ func (b *Builder) PkgconfigCmd() string { return envList("PKG_CONFIG", cfg.DefaultPkgConfig)[0] } -// splitPkgConfigOutput parses the pkg-config output into a slice of -// flags. This implements the algorithm from pkgconf/libpkgconf/argvsplit.c. +// splitPkgConfigOutput parses the pkg-config output into a slice of flags. +// This implements the shell quoting semantics described in +// https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_02, +// except that it does not support parameter or arithmetic expansion or command +// substitution and hard-codes the delimiters instead of reading them +// from LC_LOCALE. func splitPkgConfigOutput(out []byte) ([]string, error) { if len(out) == 0 { return nil, nil } var flags []string flag := make([]byte, 0, len(out)) - escaped := false - quote := byte(0) + didQuote := false // was the current flag parsed from a quoted string? + escaped := false // did we just read `\` in a non-single-quoted context? + quote := byte(0) // what is the quote character around the current string? for _, c := range out { if escaped { - if quote != 0 { + if quote == '"' { + // “The shall retain its special meaning as an escape + // character … only when followed by one of the following characters + // when considered special:” switch c { - case '$', '`', '"', '\\': + case '$', '`', '"', '\\', '\n': + // Handle the escaped character normally. default: - flag = append(flag, '\\') + // Not an escape character after all. + flag = append(flag, '\\', c) + escaped = false + continue } - flag = append(flag, c) + } + + if c == '\n' { + // “If a follows the , the shell shall interpret + // this as line continuation.” } else { flag = append(flag, c) } escaped = false - } else if quote != 0 { - if c == quote { - quote = 0 - } else { - switch c { - case '\\': - escaped = true - default: - flag = append(flag, c) - } - } - } else if strings.IndexByte(" \t\n\v\f\r", c) < 0 { + continue + } + + if quote != 0 && c == quote { + quote = 0 + continue + } + switch quote { + case '\'': + // “preserve the literal value of each character” + flag = append(flag, c) + continue + case '"': + // “preserve the literal value of all characters within the double-quotes, + // with the exception of …” switch c { - case '\\': - escaped = true - case '\'', '"': - quote = c + case '`', '$', '\\': default: flag = append(flag, c) + continue } - } else if len(flag) != 0 { - flags = append(flags, string(flag)) - flag = flag[:0] } - } - if escaped { - return nil, errors.New("broken character escaping in pkgconf output ") - } - if quote != 0 { - return nil, errors.New("unterminated quoted string in pkgconf output ") - } else if len(flag) != 0 { - flags = append(flags, string(flag)) + + // “The application shall quote the following characters if they are to + // represent themselves:” + switch c { + case '|', '&', ';', '<', '>', '(', ')', '$', '`': + return nil, fmt.Errorf("unexpected shell character %q in pkgconf output", c) + + case '\\': + // “A that is not quoted shall preserve the literal value of + // the following character, with the exception of a .” + escaped = true + continue + + case '"', '\'': + quote = c + didQuote = true + continue + + case ' ', '\t', '\n': + if len(flag) > 0 || didQuote { + flags = append(flags, string(flag)) + } + flag, didQuote = flag[:0], false + continue + } + + flag = append(flag, c) } + // Prefer to report a missing quote instead of a missing escape. If the string + // is something like `"foo\`, it's ambiguous as to whether the trailing + // backslash is really an escape at all. + if quote != 0 { + return nil, errors.New("unterminated quoted string in pkgconf output") + } + if escaped { + return nil, errors.New("broken character escaping in pkgconf output") + } + + if len(flag) > 0 || didQuote { + flags = append(flags, string(flag)) + } return flags, nil } @@ -1535,7 +1581,7 @@ func (b *Builder) getPkgConfigFlags(p *load.Package) (cflags, ldflags []string, return nil, nil, err } if len(out) > 0 { - cflags, err = splitPkgConfigOutput(out) + cflags, err = splitPkgConfigOutput(bytes.TrimSpace(out)) if err != nil { return nil, nil, err } diff --git a/src/cmd/go/internal/work/shell_test.go b/src/cmd/go/internal/work/shell_test.go new file mode 100644 index 0000000000..24bef4e684 --- /dev/null +++ b/src/cmd/go/internal/work/shell_test.go @@ -0,0 +1,139 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build unix + +package work + +import ( + "bytes" + "internal/testenv" + "strings" + "testing" + "unicode" +) + +func FuzzSplitPkgConfigOutput(f *testing.F) { + testenv.MustHaveExecPath(f, "/bin/sh") + + f.Add([]byte(`$FOO`)) + f.Add([]byte(`\$FOO`)) + f.Add([]byte(`${FOO}`)) + f.Add([]byte(`\${FOO}`)) + f.Add([]byte(`$(/bin/false)`)) + f.Add([]byte(`\$(/bin/false)`)) + f.Add([]byte(`$((0))`)) + f.Add([]byte(`\$((0))`)) + f.Add([]byte(`unescaped space`)) + f.Add([]byte(`escaped\ space`)) + f.Add([]byte(`"unterminated quote`)) + f.Add([]byte(`'unterminated quote`)) + f.Add([]byte(`unterminated escape\`)) + f.Add([]byte(`"quote with unterminated escape\`)) + f.Add([]byte(`'quoted "double quotes"'`)) + f.Add([]byte(`"quoted 'single quotes'"`)) + f.Add([]byte(`"\$0"`)) + f.Add([]byte(`"\$\0"`)) + f.Add([]byte(`"\$"`)) + f.Add([]byte(`"\$ "`)) + + // Example positive inputs from TestSplitPkgConfigOutput. + // Some bare newlines have been removed so that the inputs + // are valid in the shell script we use for comparison. + f.Add([]byte(`-r:foo -L/usr/white\ space/lib -lfoo\ bar -lbar\ baz`)) + f.Add([]byte(`-lextra\ fun\ arg\\`)) + f.Add([]byte("\textra whitespace\r")) + f.Add([]byte(" \r ")) + f.Add([]byte(`"-r:foo" "-L/usr/white space/lib" "-lfoo bar" "-lbar baz"`)) + f.Add([]byte(`"-lextra fun arg\\"`)) + f.Add([]byte(`" \r\n\ "`)) + f.Add([]byte(`""`)) + f.Add([]byte(``)) + f.Add([]byte(`"\\"`)) + f.Add([]byte(`"\x"`)) + f.Add([]byte(`"\\x"`)) + f.Add([]byte(`'\\'`)) + f.Add([]byte(`'\x'`)) + f.Add([]byte(`"\\x"`)) + f.Add([]byte("\\\n")) + f.Add([]byte(`-fPIC -I/test/include/foo -DQUOTED='"/test/share/doc"'`)) + f.Add([]byte(`-fPIC -I/test/include/foo -DQUOTED="/test/share/doc"`)) + f.Add([]byte(`-fPIC -I/test/include/foo -DQUOTED=\"/test/share/doc\"`)) + f.Add([]byte(`-fPIC -I/test/include/foo -DQUOTED='/test/share/doc'`)) + f.Add([]byte(`-DQUOTED='/te\st/share/d\oc'`)) + f.Add([]byte(`-Dhello=10 -Dworld=+32 -DDEFINED_FROM_PKG_CONFIG=hello\ world`)) + f.Add([]byte(`"broken\"" \\\a "a"`)) + + // Example negative inputs from TestSplitPkgConfigOutput. + f.Add([]byte(`" \r\n `)) + f.Add([]byte(`"-r:foo" "-L/usr/white space/lib "-lfoo bar" "-lbar baz"`)) + f.Add([]byte(`"-lextra fun arg\\`)) + f.Add([]byte(`broken flag\`)) + f.Add([]byte(`extra broken flag \`)) + f.Add([]byte(`\`)) + f.Add([]byte(`"broken\"" "extra" \`)) + + f.Fuzz(func(t *testing.T, b []byte) { + t.Parallel() + + if bytes.ContainsAny(b, "*?[#~%\x00{}!") { + t.Skipf("skipping %#q: contains a sometimes-quoted character", b) + } + // splitPkgConfigOutput itself rejects inputs that contain unquoted + // shell operator characters. (Quoted shell characters are fine.) + + for _, c := range b { + if c > unicode.MaxASCII { + t.Skipf("skipping %#q: contains a non-ASCII character %q", b, c) + } + if !unicode.IsGraphic(rune(c)) && !unicode.IsSpace(rune(c)) { + t.Skipf("skipping %#q: contains non-graphic character %q", b, c) + } + } + + args, err := splitPkgConfigOutput(b) + if err != nil { + // We haven't checked that the shell would actually reject this input too, + // but if splitPkgConfigOutput rejected it it's probably too dangerous to + // run in the script. + t.Logf("%#q: %v", b, err) + return + } + t.Logf("splitPkgConfigOutput(%#q) = %#q", b, args) + if len(args) == 0 { + t.Skipf("skipping %#q: contains no arguments", b) + } + + var buf strings.Builder + for _, arg := range args { + buf.WriteString(arg) + buf.WriteString("\n") + } + wantOut := buf.String() + + if strings.Count(wantOut, "\n") != len(args)+bytes.Count(b, []byte("\n")) { + // One of the newlines in b was treated as a delimiter and not part of an + // argument. Our bash test script would interpret that as a syntax error. + t.Skipf("skipping %#q: contains a bare newline", b) + } + + // We use the printf shell command to echo the arguments because, per + // https://pubs.opengroup.org/onlinepubs/9699919799/utilities/echo.html#tag_20_37_16: + // “It is not possible to use echo portably across all POSIX systems unless + // both -n (as the first argument) and escape sequences are omitted.” + cmd := testenv.Command(t, "/bin/sh", "-c", "printf '%s\n' "+string(b)) + cmd.Env = append(cmd.Environ(), "LC_ALL=POSIX", "POSIXLY_CORRECT=1") + cmd.Stderr = new(strings.Builder) + out, err := cmd.Output() + if err != nil { + t.Fatalf("%#q: %v\n%s", cmd.Args, err, cmd.Stderr) + } + + if string(out) != wantOut { + t.Logf("%#q:\n%#q", cmd.Args, out) + t.Logf("want:\n%#q", wantOut) + t.Errorf("parsed args do not match") + } + }) +}