From 265ff83af662c3d81551531bbff3454e1d5d66fb Mon Sep 17 00:00:00 2001 From: Andrew Gerrand Date: Fri, 1 Nov 2013 11:05:17 +1100 Subject: [PATCH] [release-branch.go1.2] cmd/cgo: stop using compiler error message text to analyze C names MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ««« CL 15070043 / 90a628ac54ed cmd/cgo: stop using compiler error message text to analyze C names The old approach to determining whether "name" was a type, constant, or expression was to compile the C program name; and scan the errors and warnings generated by the compiler. This requires looking for specific substrings in the errors and warnings, which ties the implementation to specific compiler versions. As compilers change their errors or drop warnings, cgo breaks. This happens slowly but it does happen. Clang in particular (now required on OS X) has a significant churn rate. The new approach compiles a slightly more complex program that is either valid C or not valid C depending on what kind of thing "name" is. It uses only the presence or absence of an error message on a particular line, not the error text itself. The program is: // error if and only if name is undeclared void f1(void) { typeof(name) *x; } // error if and only if name is not a type void f2(void) { name *x; } // error if and only if name is not an integer constant void f3(void) { enum { x = (name)*1 }; } I had not been planning to do this until Go 1.3, because it is a non-trivial change, but it fixes a real Xcode 5 problem in Go 1.2, and the new code is easier to understand than the old code. It should be significantly more robust. Fixes #6596. Fixes #6612. R=golang-dev, r, james, iant CC=golang-dev https://golang.org/cl/15070043 »»» R=golang-dev CC=golang-dev https://golang.org/cl/20060044 --- misc/cgo/test/cgo_test.go | 1 + misc/cgo/test/issue6612.go | 93 +++++++++++++++ src/cmd/cgo/doc.go | 41 +++---- src/cmd/cgo/gcc.go | 235 +++++++++++++++++-------------------- 4 files changed, 225 insertions(+), 145 deletions(-) create mode 100644 misc/cgo/test/issue6612.go diff --git a/misc/cgo/test/cgo_test.go b/misc/cgo/test/cgo_test.go index 45572bad1a..b7c6d28769 100644 --- a/misc/cgo/test/cgo_test.go +++ b/misc/cgo/test/cgo_test.go @@ -49,5 +49,6 @@ func TestFpVar(t *testing.T) { testFpVar(t) } func Test4339(t *testing.T) { test4339(t) } func Test6390(t *testing.T) { test6390(t) } func Test5986(t *testing.T) { test5986(t) } +func TestNaming(t *testing.T) { testNaming(t) } func BenchmarkCgoCall(b *testing.B) { benchCgoCall(b) } diff --git a/misc/cgo/test/issue6612.go b/misc/cgo/test/issue6612.go new file mode 100644 index 0000000000..902878c259 --- /dev/null +++ b/misc/cgo/test/issue6612.go @@ -0,0 +1,93 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// golang.org/issue/6612 +// Test new scheme for deciding whether C.name is an expression, type, constant. +// Clang silences some warnings when the name is a #defined macro, so test those too +// (even though we now use errors exclusively, not warnings). + +package cgotest + +/* +void myfunc(void) {} +int myvar = 5; +const char *mytext = "abcdef"; +typedef int mytype; +enum { + myenum = 1234, +}; + +#define myfunc_def myfunc +#define myvar_def myvar +#define mytext_def mytext +#define mytype_def mytype +#define myenum_def myenum +#define myint_def 12345 +#define myfloat_def 1.5 +#define mystring_def "hello" +*/ +import "C" + +import "testing" + +func testNaming(t *testing.T) { + C.myfunc() + C.myfunc_def() + if v := C.myvar; v != 5 { + t.Errorf("C.myvar = %d, want 5", v) + } + if v := C.myvar_def; v != 5 { + t.Errorf("C.myvar_def = %d, want 5", v) + } + if s := C.GoString(C.mytext); s != "abcdef" { + t.Errorf("C.mytext = %q, want %q", s, "abcdef") + } + if s := C.GoString(C.mytext_def); s != "abcdef" { + t.Errorf("C.mytext_def = %q, want %q", s, "abcdef") + } + if c := C.myenum; c != 1234 { + t.Errorf("C.myenum = %v, want 1234", c) + } + if c := C.myenum_def; c != 1234 { + t.Errorf("C.myenum_def = %v, want 1234", c) + } + { + const c = C.myenum + if c != 1234 { + t.Errorf("C.myenum as const = %v, want 1234", c) + } + } + { + const c = C.myenum_def + if c != 1234 { + t.Errorf("C.myenum as const = %v, want 1234", c) + } + } + if c := C.myint_def; c != 12345 { + t.Errorf("C.myint_def = %v, want 12345", c) + } + { + const c = C.myint_def + if c != 12345 { + t.Errorf("C.myint as const = %v, want 12345", c) + } + } + + // This would be nice, but it has never worked. + /* + if c := C.myfloat_def; c != 1.5 { + t.Errorf("C.myint_def = %v, want 1.5", c) + } + { + const c = C.myfloat_def + if c != 1.5 { + t.Errorf("C.myint as const = %v, want 1.5", c) + } + } + */ + + if s := C.mystring_def; s != "hello" { + t.Errorf("C.mystring_def = %q, want %q", s, "hello") + } +} diff --git a/src/cmd/cgo/doc.go b/src/cmd/cgo/doc.go index 2758726323..24ab61102c 100644 --- a/src/cmd/cgo/doc.go +++ b/src/cmd/cgo/doc.go @@ -269,29 +269,30 @@ Next, cgo needs to identify the kinds for each identifier. For the identifiers C.foo and C.bar, cgo generates this C program: - void __cgo__f__(void) { - #line 1 "cgo-test" - foo; - enum { _cgo_enum_0 = foo }; - bar; - enum { _cgo_enum_1 = bar }; - } + #line 1 "not-declared" + void __cgo_f_xxx_1(void) { typeof(foo) *__cgo_undefined__; } + #line 1 "not-type" + void __cgo_f_xxx_2(void) { foo *__cgo_undefined__; } + #line 1 "not-const" + void __cgo_f_xxx_3(void) { enum { __cgo_undefined__ = (foo)*1 }; } + #line 2 "not-declared" + void __cgo_f_xxx_1(void) { typeof(bar) *__cgo_undefined__; } + #line 2 "not-type" + void __cgo_f_xxx_2(void) { bar *__cgo_undefined__; } + #line 2 "not-const" + void __cgo_f_xxx_3(void) { enum { __cgo_undefined__ = (bar)*1 }; } -This program will not compile, but cgo can look at the error messages -to infer the kind of each identifier. The line number given in the -error tells cgo which identifier is involved. +This program will not compile, but cgo can use the presence or absence +of an error message on a given line to deduce the information it +needs. The program is syntactically valid regardless of whether each +name is a type or an ordinary identifier, so there will be no syntax +errors that might stop parsing early. -An error like "unexpected type name" or "useless type name in empty -declaration" or "declaration does not declare anything" tells cgo that -the identifier is a type. +An error on not-declared:1 indicates that foo is undeclared. +An error on not-type:1 indicates that foo is not a type (if declared at all, it is an identifier). +An error on not-const:1 indicates that foo is not an integer constant. -An error like "statement with no effect" or "expression result unused" -tells cgo that the identifier is not a type, but not whether it is a -constant, function, or global variable. - -An error like "not an integer constant" tells cgo that the identifier -is not a constant. If it is also not a type, it must be a function or -global variable. For now, those can be treated the same. +The line number specifies the name involved. In the example, 1 is foo and 2 is bar. Next, cgo must learn the details of each type, variable, function, or constant. It can do this by reading object files. If cgo has decided diff --git a/src/cmd/cgo/gcc.go b/src/cmd/cgo/gcc.go index b7e416213a..47531c7e2e 100644 --- a/src/cmd/cgo/gcc.go +++ b/src/cmd/cgo/gcc.go @@ -226,42 +226,22 @@ func (p *Package) loadDefines(f *File) { // name xxx for the references C.xxx in the Go input. // The kind is either a constant, type, or variable. func (p *Package) guessKinds(f *File) []*Name { - // Coerce gcc into telling us whether each name is - // a type, a value, or undeclared. We compile a function - // containing the line: - // name; - // If name is a type, gcc will print: - // cgo-test:2: warning: useless type name in empty declaration - // If name is a value, gcc will print - // cgo-test:2: warning: statement with no effect - // If name is undeclared, gcc will print - // cgo-test:2: error: 'name' undeclared (first use in this function) - // A line number directive causes the line number to - // correspond to the index in the names array. - // - // The line also has an enum declaration: - // name; enum { _cgo_enum_1 = name }; - // If name is not a constant, gcc will print: - // cgo-test:4: error: enumerator value for '_cgo_enum_4' is not an integer constant - // we assume lines without that error are constants. - - // Make list of names that need sniffing, type lookup. - toSniff := make([]*Name, 0, len(f.Name)) - needType := make([]*Name, 0, len(f.Name)) - + // Determine kinds for names we already know about, + // like #defines or 'struct foo', before bothering with gcc. + var names, needType []*Name for _, n := range f.Name { // If we've already found this name as a #define // and we can translate it as a constant value, do so. if n.Define != "" { - ok := false + isConst := false if _, err := strconv.Atoi(n.Define); err == nil { - ok = true + isConst = true } else if n.Define[0] == '"' || n.Define[0] == '\'' { if _, err := parser.ParseExpr(n.Define); err == nil { - ok = true + isConst = true } } - if ok { + if isConst { n.Kind = "const" // Turn decimal into hex, just for consistency // with enum-derived constants. Otherwise @@ -281,126 +261,139 @@ func (p *Package) guessKinds(f *File) []*Name { } } - // If this is a struct, union, or enum type name, - // record the kind but also that we need type information. + needType = append(needType, n) + + // If this is a struct, union, or enum type name, no need to guess the kind. if strings.HasPrefix(n.C, "struct ") || strings.HasPrefix(n.C, "union ") || strings.HasPrefix(n.C, "enum ") { n.Kind = "type" - i := len(needType) - needType = needType[0 : i+1] - needType[i] = n continue } - i := len(toSniff) - toSniff = toSniff[0 : i+1] - toSniff[i] = n + // Otherwise, we'll need to find out from gcc. + names = append(names, n) } - if len(toSniff) == 0 { + // Bypass gcc if there's nothing left to find out. + if len(names) == 0 { return needType } + // Coerce gcc into telling us whether each name is a type, a value, or undeclared. + // For names, find out whether they are integer constants. + // We used to look at specific warning or error messages here, but that tied the + // behavior too closely to specific versions of the compilers. + // Instead, arrange that we can infer what we need from only the presence or absence + // of an error on a specific line. + // + // For each name, we generate these lines, where xxx is the index in toSniff plus one. + // + // #line xxx "not-declared" + // void __cgo_f_xxx_1(void) { typeof(name) *__cgo_undefined__; } + // #line xxx "not-type" + // void __cgo_f_xxx_2(void) { name *__cgo_undefined__; } + // #line xxx "not-const" + // void __cgo_f_xxx_3(void) { enum { __cgo_undefined__ = (name)*1 }; } + // + // If we see an error at not-declared:xxx, the corresponding name is not declared. + // If we see an error at not-type:xxx, the corresponding name is a type. + // If we see an error at not-const:xxx, the corresponding name is not an integer constant. + // If we see no errors, we assume the name is an expression but not a constant + // (so a variable or a function). + // + // The specific input forms are chosen so that they are valid C syntax regardless of + // whether name denotes a type or an expression. + var b bytes.Buffer b.WriteString(f.Preamble) b.WriteString(builtinProlog) - b.WriteString("void __cgo__f__(void) {\n") - // For a #defined expression, clang silences the warning about "unused expression". - // http://llvm.org/viewvc/llvm-project?view=revision&revision=172696 - // Silencing the warning is not a big deal, because our default assumption is that - // (in the absence of other evidence) names are expressions. - // However, if all the C names we are investigating are #defined expressions, - // clang will print no warnings at all and then exit successfully. - // We want clang to print warnings, so seed the code with a function - // that is guaranteed to provoke a warning (that we will ignore). - // This way, if clang becomes even more broken, we'll find out. - // See golang.org/issue/6128. - fmt.Fprintf(&b, "1;\n") - - b.WriteString("#line 1 \"cgo-test\"\n") - for i, n := range toSniff { - fmt.Fprintf(&b, "%s; /* #%d */\nenum { _cgo_enum_%d = %s }; /* #%d */\n", n.C, i, i, n.C, i) + for i, n := range names { + fmt.Fprintf(&b, "#line %d \"not-declared\"\n"+ + "void __cgo_f_%d_1(void) { typeof(%s) *__cgo_undefined__; }\n"+ + "#line %d \"not-type\"\n"+ + "void __cgo_f_%d_2(void) { %s *__cgo_undefined__; }\n"+ + "#line %d \"not-const\"\n"+ + "void __cgo_f_%d_3(void) { enum { __cgo__undefined__ = (%s)*1 }; }\n", + i+1, i+1, n.C, + i+1, i+1, n.C, + i+1, i+1, n.C) } - b.WriteString("}\n") + fmt.Fprintf(&b, "#line 1 \"completed\"\n"+ + "int __cgo__1 = __cgo__2;\n") + stderr := p.gccErrors(b.Bytes()) if stderr == "" { fatalf("%s produced no output\non input:\n%s", p.gccBaseCmd()[0], b.Bytes()) } - names := make([]*Name, len(toSniff)) - copy(names, toSniff) - - isConst := make([]bool, len(toSniff)) - for i := range isConst { - isConst[i] = true // until proven otherwise - } - + completed := false + sniff := make([]int, len(names)) + const ( + notType = 1 << iota + notConst + ) for _, line := range strings.Split(stderr, "\n") { - if len(line) < 9 || line[0:9] != "cgo-test:" { - // the user will see any compiler errors when the code is compiled later. + if !strings.Contains(line, ": error:") { + // we only care about errors. + // we tried to turn off warnings on the command line, but one never knows. continue } - line = line[9:] - colon := strings.Index(line, ":") - if colon < 0 { - continue - } - i, err := strconv.Atoi(line[0:colon]) - if err != nil { - continue - } - i = (i - 1) / 2 - what := "" - switch { - default: - continue - case strings.Contains(line, ": useless type name in empty declaration"), - strings.Contains(line, ": declaration does not declare anything"), - strings.Contains(line, ": unexpected type name"): - what = "type" - isConst[i] = false - case strings.Contains(line, ": statement with no effect"), - strings.Contains(line, ": expression result unused"): - what = "not-type" // const or func or var - case strings.Contains(line, "undeclared"): - error_(token.NoPos, "%s", strings.TrimSpace(line[colon+1:])) - case strings.Contains(line, "is not an integer constant"): - isConst[i] = false - continue - } - n := toSniff[i] - if n == nil { - continue - } - toSniff[i] = nil - n.Kind = what - j := len(needType) - needType = needType[0 : j+1] - needType[j] = n - } - for i, b := range isConst { - if b { - names[i].Kind = "const" - if toSniff[i] != nil && names[i].Const == "" { - j := len(needType) - needType = needType[0 : j+1] - needType[j] = names[i] - } - } - } - for _, n := range toSniff { - if n == nil { + c1 := strings.Index(line, ":") + if c1 < 0 { continue } - if n.Kind != "" { + c2 := strings.Index(line[c1+1:], ":") + if c2 < 0 { continue } - error_(token.NoPos, "could not determine kind of name for C.%s", fixGo(n.Go)) + c2 += c1 + 1 + + filename := line[:c1] + i, _ := strconv.Atoi(line[c1+1 : c2]) + i-- + if i < 0 || i >= len(names) { + continue + } + + switch filename { + case "completed": + // Strictly speaking, there is no guarantee that seeing the error at completed:1 + // (at the end of the file) means we've seen all the errors from earlier in the file, + // but usually it does. Certainly if we don't see the completed:1 error, we did + // not get all the errors we expected. + completed = true + + case "not-declared": + error_(token.NoPos, "%s", strings.TrimSpace(line[c2+1:])) + case "not-type": + sniff[i] |= notType + case "not-const": + sniff[i] |= notConst + } + } + + if !completed { + fatalf("%s did not produce error at completed:1\non input:\n%s", p.gccBaseCmd()[0], b.Bytes()) + } + + for i, n := range names { + switch sniff[i] { + case 0: + error_(token.NoPos, "could not determine kind of name for C.%s", fixGo(n.Go)) + case notType: + n.Kind = "const" + case notConst: + n.Kind = "type" + case notConst | notType: + n.Kind = "not-type" + } } if nerrors > 0 { fatalf("unresolved names") } + + needType = append(needType, names...) return needType } @@ -739,8 +732,8 @@ func gccTmp() string { // the input. func (p *Package) gccCmd() []string { c := append(p.gccBaseCmd(), - "-Wall", // many warnings - "-Werror", // warnings are errors + "-Wno-all", // no warnings + "-Wno-error", // warnings are not errors "-o"+gccTmp(), // write object to tmp "-gdwarf-2", // generate DWARF v2 debugging symbols "-fno-eliminate-unused-debug-types", // gets rid of e.g. untyped enum otherwise @@ -876,14 +869,6 @@ func (p *Package) gccErrors(stdin []byte) string { // TODO(rsc): require failure args := p.gccCmd() - // GCC 4.8.0 has a bug: it sometimes does not apply - // -Wunused-value to values that are macros defined in system - // headers. See issue 5118. Adding -Wsystem-headers avoids - // that problem. This will produce additional errors, but it - // doesn't matter because we will ignore all errors that are - // not marked for the cgo-test file. - args = append(args, "-Wsystem-headers") - if *debugGcc { fmt.Fprintf(os.Stderr, "$ %s <