// Copyright 2009 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // Annotate Crefs in Prog with C types by parsing gcc debug output. // Conversion of debug output to Go types. package main import ( "bytes"; "debug/dwarf"; "debug/elf"; "debug/macho"; "fmt"; "go/ast"; "go/token"; "os"; "strconv"; "strings"; ) func (p *Prog) loadDebugInfo() { // Construct a slice of unique names from p.Crefs. m := make(map[string]int); for _, c := range p.Crefs { m[c.Name] = -1 } names := make([]string, 0, len(m)); for name, _ := range m { i := len(names); names = names[0 : i+1]; names[i] = name; m[name] = i; } // Coerce gcc into telling us whether each name is // a type, a value, or undeclared. We compile a function // containing the line: // name; // If name is a type, gcc will print: // x.c:2: warning: useless type name in empty declaration // If name is a value, gcc will print // x.c:2: warning: statement with no effect // If name is undeclared, gcc will print // x.c:2: error: 'name' undeclared (first use in this function) // A line number directive causes the line number to // correspond to the index in the names array. var b bytes.Buffer; b.WriteString(p.Preamble); b.WriteString("void f(void) {\n"); b.WriteString("#line 0 \"cgo-test\"\n"); for _, n := range names { b.WriteString(n); b.WriteString(";\n"); } b.WriteString("}\n"); kind := make(map[string]string); _, stderr := p.gccDebug(b.Bytes()); if stderr == "" { fatal("gcc produced no output") } for _, line := range strings.Split(stderr, "\n", 0) { if len(line) < 9 || line[0:9] != "cgo-test:" { continue } line = line[9:len(line)]; colon := strings.Index(line, ":"); if colon < 0 { continue } i, err := strconv.Atoi(line[0:colon]); if err != nil { continue } what := ""; switch { default: continue case strings.Index(line, ": useless type name in empty declaration") >= 0: what = "type" case strings.Index(line, ": statement with no effect") >= 0: what = "value" case strings.Index(line, "undeclared") >= 0: what = "error" } if old, ok := kind[names[i]]; ok && old != what { error(noPos, "inconsistent gcc output about C.%s", names[i]) } kind[names[i]] = what; } for _, n := range names { if _, ok := kind[n]; !ok { error(noPos, "could not determine kind of name for C.%s", n) } } // Extract the types from the DWARF section of an object // from a well-formed C program. Gcc only generates DWARF info // for symbols in the object file, so it is not enough to print the // preamble and hope the symbols we care about will be there. // Instead, emit // typeof(names[i]) *__cgo__i; // for each entry in names and then dereference the type we // learn for __cgo__i. b.Reset(); b.WriteString(p.Preamble); for i, n := range names { fmt.Fprintf(&b, "typeof(%s) *__cgo__%d;\n", n, i) } d, stderr := p.gccDebug(b.Bytes()); if d == nil { fatal("gcc failed:\n%s\non input:\n%s", stderr, b.Bytes()) } // Scan DWARF info for top-level TagVariable entries with AttrName __cgo__i. types := make([]dwarf.Type, len(names)); r := d.Reader(); for { e, err := r.Next(); if err != nil { fatal("reading DWARF entry: %s", err) } if e == nil { break } if e.Tag != dwarf.TagVariable { goto Continue } name, _ := e.Val(dwarf.AttrName).(string); typOff, _ := e.Val(dwarf.AttrType).(dwarf.Offset); if name == "" || typOff == 0 { fatal("malformed DWARF TagVariable entry") } if !strings.HasPrefix(name, "__cgo__") { goto Continue } typ, err := d.Type(typOff); if err != nil { fatal("loading DWARF type: %s", err) } t, ok := typ.(*dwarf.PtrType); if !ok || t == nil { fatal("internal error: %s has non-pointer type", name) } i, err := strconv.Atoi(name[7:len(name)]); if err != nil { fatal("malformed __cgo__ name: %s", name) } types[i] = t.Type; Continue: if e.Tag != dwarf.TagCompileUnit { r.SkipChildren() } } // Record types and typedef information in Crefs. var conv typeConv; conv.Init(p.PtrSize); for _, c := range p.Crefs { i := m[c.Name]; c.TypeName = kind[c.Name] == "type"; f, fok := types[i].(*dwarf.FuncType); if c.Context == "call" && !c.TypeName && fok { c.FuncType = conv.FuncType(f) } else { c.Type = conv.Type(types[i]) } } p.Typedef = conv.typedef; } func concat(a, b []string) []string { c := make([]string, len(a)+len(b)); for i, s := range a { c[i] = s } for i, s := range b { c[i+len(a)] = s } return c; } // gccDebug runs gcc -gdwarf-2 over the C program stdin and // returns the corresponding DWARF data and any messages // printed to standard error. func (p *Prog) gccDebug(stdin []byte) (*dwarf.Data, string) { machine := "-m32"; if p.PtrSize == 8 { machine = "-m64" } tmp := "_cgo_.o"; base := []string{ "gcc", machine, "-Wall", // many warnings "-Werror", // warnings are errors "-o" + tmp, // write object to tmp "-gdwarf-2", // generate DWARF v2 debugging symbols "-c", // do not link "-xc", // input language is C "-", // read input from standard input }; _, stderr, ok := run(stdin, concat(base, p.GccOptions)); if !ok { return nil, string(stderr) } // Try to parse f as ELF and Mach-O and hope one works. var f interface { DWARF() (*dwarf.Data, os.Error); } var err os.Error; if f, err = elf.Open(tmp); err != nil { if f, err = macho.Open(tmp); err != nil { fatal("cannot parse gcc output %s as ELF or Mach-O object", tmp) } } d, err := f.DWARF(); if err != nil { fatal("cannot load DWARF debug information from %s: %s", tmp, err) } return d, ""; } // A typeConv is a translator from dwarf types to Go types // with equivalent memory layout. type typeConv struct { // Cache of already-translated or in-progress types. m map[dwarf.Type]*Type; typedef map[string]ast.Expr; // Predeclared types. byte ast.Expr; // denotes padding int8, int16, int32, int64 ast.Expr; uint8, uint16, uint32, uint64, uintptr ast.Expr; float32, float64 ast.Expr; void ast.Expr; unsafePointer ast.Expr; string ast.Expr; ptrSize int64; tagGen int; } func (c *typeConv) Init(ptrSize int64) { c.ptrSize = ptrSize; c.m = make(map[dwarf.Type]*Type); c.typedef = make(map[string]ast.Expr); c.byte = c.Ident("byte"); c.int8 = c.Ident("int8"); c.int16 = c.Ident("int16"); c.int32 = c.Ident("int32"); c.int64 = c.Ident("int64"); c.uint8 = c.Ident("uint8"); c.uint16 = c.Ident("uint16"); c.uint32 = c.Ident("uint32"); c.uint64 = c.Ident("uint64"); c.uintptr = c.Ident("uintptr"); c.float32 = c.Ident("float32"); c.float64 = c.Ident("float64"); c.unsafePointer = c.Ident("unsafe.Pointer"); c.void = c.Ident("void"); c.string = c.Ident("string"); } // base strips away qualifiers and typedefs to get the underlying type func base(dt dwarf.Type) dwarf.Type { for { if d, ok := dt.(*dwarf.QualType); ok { dt = d.Type; continue; } if d, ok := dt.(*dwarf.TypedefType); ok { dt = d.Type; continue; } break; } return dt; } // Map from dwarf text names to aliases we use in package "C". var cnameMap = map[string]string{ "long int": "long", "long unsigned int": "ulong", "unsigned int": "uint", "short unsigned int": "ushort", "short int": "short", "long long int": "longlong", "long long unsigned int": "ulonglong", "signed char": "schar", } // Type returns a *Type with the same memory layout as // dtype when used as the type of a variable or a struct field. func (c *typeConv) Type(dtype dwarf.Type) *Type { if t, ok := c.m[dtype]; ok { if t.Go == nil { fatal("type conversion loop at %s", dtype) } return t; } t := new(Type); t.Size = dtype.Size(); t.Align = -1; t.C = dtype.Common().Name; if t.Size < 0 { fatal("dwarf.Type %s reports unknown size", dtype) } c.m[dtype] = t; switch dt := dtype.(type) { default: fatal("unexpected type: %s", dtype) case *dwarf.AddrType: if t.Size != c.ptrSize { fatal("unexpected: %d-byte address type - %s", t.Size, dtype) } t.Go = c.uintptr; t.Align = t.Size; case *dwarf.ArrayType: if dt.StrideBitSize > 0 { // Cannot represent bit-sized elements in Go. t.Go = c.Opaque(t.Size); break; } gt := &ast.ArrayType{ Len: c.intExpr(dt.Count), }; t.Go = gt; // publish before recursive call sub := c.Type(dt.Type); t.Align = sub.Align; gt.Elt = sub.Go; t.C = fmt.Sprintf("typeof(%s[%d])", sub.C, dt.Count); case *dwarf.CharType: if t.Size != 1 { fatal("unexpected: %d-byte char type - %s", t.Size, dtype) } t.Go = c.int8; t.Align = 1; case *dwarf.EnumType: switch t.Size { default: fatal("unexpected: %d-byte enum type - %s", t.Size, dtype) case 1: t.Go = c.uint8 case 2: t.Go = c.uint16 case 4: t.Go = c.uint32 case 8: t.Go = c.uint64 } if t.Align = t.Size; t.Align >= c.ptrSize { t.Align = c.ptrSize } t.C = "enum " + dt.EnumName; case *dwarf.FloatType: switch t.Size { default: fatal("unexpected: %d-byte float type - %s", t.Size, dtype) case 4: t.Go = c.float32 case 8: t.Go = c.float64 } if t.Align = t.Size; t.Align >= c.ptrSize { t.Align = c.ptrSize } case *dwarf.FuncType: // No attempt at translation: would enable calls // directly between worlds, but we need to moderate those. t.Go = c.uintptr; t.Align = c.ptrSize; case *dwarf.IntType: if dt.BitSize > 0 { fatal("unexpected: %d-bit int type - %s", dt.BitSize, dtype) } switch t.Size { default: fatal("unexpected: %d-byte int type - %s", t.Size, dtype) case 1: t.Go = c.int8 case 2: t.Go = c.int16 case 4: t.Go = c.int32 case 8: t.Go = c.int64 } if t.Align = t.Size; t.Align >= c.ptrSize { t.Align = c.ptrSize } case *dwarf.PtrType: t.Align = c.ptrSize; // Translate void* as unsafe.Pointer if _, ok := base(dt.Type).(*dwarf.VoidType); ok { t.Go = c.unsafePointer; t.C = "void*"; break; } gt := &ast.StarExpr{}; t.Go = gt; // publish before recursive call sub := c.Type(dt.Type); gt.X = sub.Go; t.C = sub.C + "*"; case *dwarf.QualType: // Ignore qualifier. t = c.Type(dt.Type); c.m[dtype] = t; return t; case *dwarf.StructType: // Convert to Go struct, being careful about alignment. // Have to give it a name to simulate C "struct foo" references. tag := dt.StructName; if tag == "" { tag = "__" + strconv.Itoa(c.tagGen); c.tagGen++; } else if t.C == "" { t.C = dt.Kind + " " + tag } name := c.Ident("_C" + dt.Kind + "_" + tag); t.Go = name; // publish before recursive calls switch dt.Kind { case "union", "class": c.typedef[name.Value] = c.Opaque(t.Size); if t.C == "" { t.C = fmt.Sprintf("typeof(unsigned char[%d])", t.Size) } case "struct": g, csyntax, align := c.Struct(dt); if t.C == "" { t.C = csyntax } t.Align = align; c.typedef[name.Value] = g; } case *dwarf.TypedefType: // Record typedef for printing. if dt.Name == "_GoString_" { // Special C name for Go string type. // Knows string layout used by compilers: pointer plus length, // which rounds up to 2 pointers after alignment. t.Go = c.string; t.Size = c.ptrSize * 2; t.Align = c.ptrSize; break; } name := c.Ident("_C_" + dt.Name); t.Go = name; // publish before recursive call sub := c.Type(dt.Type); t.Size = sub.Size; t.Align = sub.Align; if _, ok := c.typedef[name.Value]; !ok { c.typedef[name.Value] = sub.Go } case *dwarf.UcharType: if t.Size != 1 { fatal("unexpected: %d-byte uchar type - %s", t.Size, dtype) } t.Go = c.uint8; t.Align = 1; case *dwarf.UintType: if dt.BitSize > 0 { fatal("unexpected: %d-bit uint type - %s", dt.BitSize, dtype) } switch t.Size { default: fatal("unexpected: %d-byte uint type - %s", t.Size, dtype) case 1: t.Go = c.uint8 case 2: t.Go = c.uint16 case 4: t.Go = c.uint32 case 8: t.Go = c.uint64 } if t.Align = t.Size; t.Align >= c.ptrSize { t.Align = c.ptrSize } case *dwarf.VoidType: t.Go = c.void; t.C = "void"; } switch dtype.(type) { case *dwarf.AddrType, *dwarf.CharType, *dwarf.IntType, *dwarf.FloatType, *dwarf.UcharType, *dwarf.UintType: s := dtype.Common().Name; if s != "" { if ss, ok := cnameMap[s]; ok { s = ss } s = strings.Join(strings.Split(s, " ", 0), ""); // strip spaces name := c.Ident("_C_" + s); c.typedef[name.Value] = t.Go; t.Go = name; } } if t.C == "" { fatal("internal error: did not create C name for %s", dtype) } return t; } // FuncArg returns a Go type with the same memory layout as // dtype when used as the type of a C function argument. func (c *typeConv) FuncArg(dtype dwarf.Type) *Type { t := c.Type(dtype); switch dt := dtype.(type) { case *dwarf.ArrayType: // Arrays are passed implicitly as pointers in C. // In Go, we must be explicit. return &Type{ Size: c.ptrSize, Align: c.ptrSize, Go: &ast.StarExpr{X: t.Go}, C: t.C + "*", } case *dwarf.TypedefType: // C has much more relaxed rules than Go for // implicit type conversions. When the parameter // is type T defined as *X, simulate a little of the // laxness of C by making the argument *X instead of T. if ptr, ok := base(dt.Type).(*dwarf.PtrType); ok { return c.Type(ptr) } } return t; } // FuncType returns the Go type analogous to dtype. // There is no guarantee about matching memory layout. func (c *typeConv) FuncType(dtype *dwarf.FuncType) *FuncType { p := make([]*Type, len(dtype.ParamType)); gp := make([]*ast.Field, len(dtype.ParamType)); for i, f := range dtype.ParamType { p[i] = c.FuncArg(f); gp[i] = &ast.Field{Type: p[i].Go}; } var r *Type; var gr []*ast.Field; if _, ok := dtype.ReturnType.(*dwarf.VoidType); !ok && dtype.ReturnType != nil { r = c.Type(dtype.ReturnType); gr = []*ast.Field{&ast.Field{Type: r.Go}}; } return &FuncType{ Params: p, Result: r, Go: &ast.FuncType{ Params: gp, Results: gr, }, }; } // Identifier func (c *typeConv) Ident(s string) *ast.Ident { return &ast.Ident{Value: s} } // Opaque type of n bytes. func (c *typeConv) Opaque(n int64) ast.Expr { return &ast.ArrayType{ Len: c.intExpr(n), Elt: c.byte, } } // Expr for integer n. func (c *typeConv) intExpr(n int64) ast.Expr { return &ast.BasicLit{ Kind: token.INT, Value: strings.Bytes(strconv.Itoa64(n)), } } // Add padding of given size to fld. func (c *typeConv) pad(fld []*ast.Field, size int64) []*ast.Field { n := len(fld); fld = fld[0 : n+1]; fld[n] = &ast.Field{Names: []*ast.Ident{c.Ident("_")}, Type: c.Opaque(size)}; return fld; } // Struct conversion func (c *typeConv) Struct(dt *dwarf.StructType) (expr *ast.StructType, csyntax string, align int64) { csyntax = "struct { "; fld := make([]*ast.Field, 0, 2*len(dt.Field)+1); // enough for padding around every field off := int64(0); for _, f := range dt.Field { if f.ByteOffset > off { fld = c.pad(fld, f.ByteOffset-off); off = f.ByteOffset; } t := c.Type(f.Type); n := len(fld); fld = fld[0 : n+1]; fld[n] = &ast.Field{Names: []*ast.Ident{c.Ident(f.Name)}, Type: t.Go}; off += t.Size; csyntax += t.C + " " + f.Name + "; "; if t.Align > align { align = t.Align } } if off < dt.ByteSize { fld = c.pad(fld, dt.ByteSize-off); off = dt.ByteSize; } if off != dt.ByteSize { fatal("struct size calculation error") } csyntax += "}"; expr = &ast.StructType{Fields: fld}; return; }