1
0
mirror of https://github.com/golang/go synced 2024-11-11 22:20:22 -07:00

new tool godefs.

uses gcc to determine system header layouts and
emits simple C or Go.  see comment in main.c.

R=r
DELTA=1069  (1067 added, 0 deleted, 2 changed)
OCL=26682
CL=26880
This commit is contained in:
Russ Cox 2009-03-30 00:21:25 -07:00
parent a2cbc2998d
commit 47fbb7639a
7 changed files with 1078 additions and 2 deletions

View File

@ -3,7 +3,7 @@
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.
for i in cc 6l 6a 6c 8l 8a 8c 5l 5a 5c gc 6g ar db nm acid cov gobuild prof gotest
for i in cc 6l 6a 6c 8l 8a 8c 5l 5a 5c gc 6g ar db nm acid cov gobuild godefs prof gotest
do
cd $i
make clean

24
src/cmd/godefs/Makefile Normal file
View File

@ -0,0 +1,24 @@
# Copyright 2009 The Go Authors. All rights reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.
include ../../Make.conf
TARG=godefs
OFILES=\
main.$O\
stabs.$O\
util.$O\
HFILES=a.h
$(TARG): $(OFILES)
$(LD) -o $(TARG) -L$(GOROOT)/lib $(OFILES) -lbio -l9
clean:
rm -f $(OFILES) $(TARG)
install: $(TARG)
cp $(TARG) $(BIN)/$(TARG)
$(OFILES): $(HFILES)

101
src/cmd/godefs/a.h Normal file
View File

@ -0,0 +1,101 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include <u.h>
#include <libc.h>
#include <bio.h>
enum
{
Void = 1,
Int8,
Uint8,
Int16,
Uint16,
Int32,
Uint32,
Int64,
Uint64,
Float32,
Float64,
Ptr,
Struct,
Array,
Union,
Typedef,
};
typedef struct Field Field;
typedef struct Type Type;
struct Type
{
Type *next; // next in hash table
// stabs name and two-integer id
char *name;
int n1;
int n2;
// int kind
int kind;
// sub-type for ptr, array
Type *type;
// struct fields
Field *f;
int nf;
int size;
int saved; // recorded in typ array
int warned; // warned about needing type
};
struct Field
{
char *name;
Type *type;
int offset;
int size;
};
// Constants
typedef struct Const Const;
struct Const
{
char *name;
vlong value;
};
// Recorded constants and types, to be printed.
extern Const *con;
extern int ncon;
extern Type **typ;
extern int ntyp;
// Language output
typedef struct Lang Lang;
struct Lang
{
char *constbegin;
char *constfmt;
char *constend;
char *typdef;
char *structbegin;
char *unionbegin;
char *structpadfmt;
char *structend;
int (*typefmt)(Fmt*);
};
extern Lang go, c;
void* emalloc(int);
char* estrdup(char*);
void* erealloc(void*, int);
void parsestabtype(char*);

497
src/cmd/godefs/main.c Normal file
View File

@ -0,0 +1,497 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Godefs takes as input a host-compilable C file that includes
// standard system headers. From that input file, it generates
// a standalone (no #includes) C or Go file containing equivalent
// definitions.
//
// The input C file is expected to define new types and enumerated
// constants whose names begin with $ (a legal identifier character
// in gcc). The output is the standalone definitions of those names,
// with the $ removed.
//
// For example, if this is x.c:
//
// #include <sys/stat.h>
//
// typedef struct timespec $Timespec;
// typedef struct stat $Stat;
// enum {
// $S_IFMT = S_IFMT,
// $S_IFIFO = S_IFIFO,
// $S_IFCHR = S_IFCHR,
// };
//
// then "godefs x.c" generates:
//
// // godefs x.c
//
// // MACHINE GENERATED - DO NOT EDIT.
//
// // Constants
// enum {
// S_IFMT = 0xf000,
// S_IFIFO = 0x1000,
// S_IFCHR = 0x2000,
// };
//
// // Types
// #pragma pack on
//
// typedef struct Timespec Timespec;
// struct Timespec {
// int32 tv_sec;
// int32 tv_nsec;
// };
//
// typedef struct Stat Stat;
// struct Stat {
// int32 st_dev;
// uint32 st_ino;
// uint16 st_mode;
// uint16 st_nlink;
// uint32 st_uid;
// uint32 st_gid;
// int32 st_rdev;
// Timespec st_atimespec;
// Timespec st_mtimespec;
// Timespec st_ctimespec;
// int64 st_size;
// int64 st_blocks;
// int32 st_blksize;
// uint32 st_flags;
// uint32 st_gen;
// int32 st_lspare;
// int64 st_qspare[2];
// };
// #pragma pack off
//
// The -g flag to godefs causes it to generate Go output, not C.
// In the Go output, struct fields have leading xx_ prefixes removed
// and the first character capitalized (exported).
//
// Godefs works by invoking gcc to compile the given input file
// and then parses the debug info embedded in the assembly output.
// This is far easier than reading system headers on most machines.
//
// The -c flag sets the compiler (default "gcc").
//
// The -f flag adds a flag to pass to the compiler (e.g., -f -m64).
#include "a.h"
void
usage(void)
{
fprint(2, "usage: godefs [-g] [-c cc] [-f cc-flag] defs.c\n");
exit(1);
}
int gotypefmt(Fmt*);
int ctypefmt(Fmt*);
int prefixlen(Type*);
Lang go =
{
"const (\n",
"\t%s = %#llx;\n",
")\n",
"type",
"type %s struct {\n",
"type %s union {\n", // not really, but readable
"\tpad%d [%d]byte;\n",
"}\n",
gotypefmt,
};
Lang c =
{
"enum {\n",
"\t%s = %#llx,\n",
"};\n",
"typedef",
"typedef struct %s %s;\nstruct %s {\n",
"typedef union %s %s;\nunion %s {\n",
"\tbyte pad%d[%d];\n",
"};\n",
ctypefmt,
};
int oargc;
char **oargv;
Lang *lang = &c;
Const *con;
int ncon;
Type **typ;
int ntyp;
void
main(int argc, char **argv)
{
int p[2], pid, i, j, n, off, npad, prefix;
char *av[30], *q, *r, *tofree, *name;
Biobuf *bin, *bout;
Type *t;
Field *f;
quotefmtinstall();
oargc = argc;
oargv = argv;
n = 0;
av[n++] = "gcc";
av[n++] = "-S"; // write assembly
av[n++] = "-gstabs"; // include stabs info
av[n++] = "-o-"; // to stdout
ARGBEGIN{
case 'g':
lang = &go;
break;
case 'c':
av[0] = EARGF(usage());
break;
case 'f':
if(n+2 >= nelem(av))
sysfatal("too many -f options");
av[n++] = EARGF(usage());
break;
default:
usage();
}ARGEND
if(argc != 1)
usage();
av[n++] = argv[0];
av[n] = nil;
// Run gcc writing assembly and stabs debugging to p[1].
if(pipe(p) < 0)
sysfatal("pipe: %r");
pid = fork();
if(pid < 0)
sysfatal("fork: %r");
if(pid == 0) {
close(p[0]);
dup(p[1], 1);
close(0);
open("/dev/null", OREAD);
exec(av[0], av);
fprint(2, "exec gcc: %r\n");
exit(1);
}
close(p[1]);
// Read assembly, pulling out .stabs lines.
bin = Bfdopen(p[0], OREAD);
while((q = Brdstr(bin, '\n', 1)) != nil) {
// .stabs "float:t(0,12)=r(0,1);4;0;",128,0,0,0
tofree = q;
while(*q == ' ' || *q == '\t')
q++;
if(strncmp(q, ".stabs", 6) != 0)
goto Continue;
q += 6;
while(*q == ' ' || *q == '\t')
q++;
if(*q++ != '\"') {
Bad:
sysfatal("cannot parse .stabs line:\n%s", tofree);
}
r = strchr(q, '\"');
if(r == nil)
goto Bad;
*r++ = '\0';
if(*r++ != ',')
goto Bad;
if(*r < '0' || *r > '9')
goto Bad;
if(atoi(r) != 128) // stabs kind = local symbol
goto Continue;
parsestabtype(q);
Continue:
free(tofree);
}
Bterm(bin);
waitpid();
// Write defs to standard output.
bout = Bfdopen(1, OWRITE);
fmtinstall('T', lang->typefmt);
// Echo original command line in header.
Bprint(bout, "//");
for(i=0; i<oargc; i++)
Bprint(bout, " %q", oargv[i]);
Bprint(bout, "\n");
Bprint(bout, "\n");
Bprint(bout, "// MACHINE GENERATED - DO NOT EDIT.\n");
Bprint(bout, "\n");
// Constants.
Bprint(bout, "// Constants\n");
if(ncon > 0) {
Bprint(bout, lang->constbegin);
for(i=0; i<ncon; i++)
Bprint(bout, lang->constfmt, con[i].name, con[i].value);
Bprint(bout, lang->constend);
}
Bprint(bout, "\n");
// Types
// push our names down
for(i=0; i<ntyp; i++) {
t = typ[i];
name = t->name;
while(t && t->kind == Typedef)
t = t->type;
if(t)
t->name = name;
}
Bprint(bout, "// Types\n");
// Have to turn off structure padding in Plan 9 compiler,
// mainly because it is more aggressive than gcc tends to be.
if(lang == &c)
Bprint(bout, "#pragma pack on\n");
for(i=0; i<ntyp; i++) {
Bprint(bout, "\n");
t = typ[i];
while(t && t->kind == Typedef)
t = t->type;
name = t->name;
if(name[0] == '$')
name++;
npad = 0;
off = 0;
switch(t->kind) {
case 0:
fprint(2, "unknown type definition for %s\n", name);
break;
default: // numeric, array, or pointer
case Array:
case Ptr:
Bprint(bout, "%s %lT\n", lang->typdef, name, t);
break;
case Union:
if(lang == &go) {
fprint(2, "%s: cannot emit unions in go\n", name);
continue;
}
Bprint(bout, lang->unionbegin, name, name, name);
goto StructBody;
case Struct:
Bprint(bout, lang->structbegin, name, name, name);
StructBody:
prefix = 0;
if(lang == &go)
prefix = prefixlen(t);
for(j=0; j<t->nf; j++) {
f = &t->f[j];
// padding
if(t->kind == Struct) {
if(f->offset%8 != 0 || f->size%8 != 0) {
fprint(2, "ignoring bitfield %s.%s\n", t->name, f->name);
continue;
}
if(f->offset < off)
sysfatal("%s: struct fields went backward", t->name);
if(off < f->offset) {
Bprint(bout, lang->structpadfmt, npad++, (f->offset - off) / 8);
off = f->offset;
}
off += f->size;
}
Bprint(bout, "\t%lT;\n", f->name+prefix, f->type);
}
// final padding
if(t->kind == Struct) {
if(off/8 < t->size)
Bprint(bout, lang->structpadfmt, npad++, t->size - off/8);
}
Bprint(bout, lang->structend);
}
}
if(lang == &c)
Bprint(bout, "#pragma pack off\n");
Bterm(bout);
exit(0);
}
char *kindnames[] = {
"void", // actually unknown, but byte is good for pointers
"void",
"int8",
"uint8",
"int16",
"uint16",
"int32",
"uint32",
"int64",
"uint64",
"float32",
"float64",
"ptr",
"struct",
"array",
"union",
"typedef",
};
int
ctypefmt(Fmt *f)
{
char *name, *s;
Type *t;
name = nil;
if(f->flags & FmtLong) {
name = va_arg(f->args, char*);
if(name == nil || name[0] == '\0')
name = "_anon_";
}
t = va_arg(f->args, Type*);
while(t && t->kind == Typedef)
t = t->type;
switch(t->kind) {
case Struct:
case Union:
// must be named
s = t->name;
if(s == nil) {
fprint(2, "need name for anonymous struct\n");
goto bad;
}
else if(s[0] != '$')
fprint(2, "need name for struct %s\n", s);
else
s++;
fmtprint(f, "%s", s);
if(name)
fmtprint(f, " %s", name);
break;
case Array:
if(name)
fmtprint(f, "%T %s[%d]", t->type, name, t->size);
else
fmtprint(f, "%T[%d]", t->type, t->size);
break;
case Ptr:
if(name)
fmtprint(f, "%T *%s", t->type, name);
else
fmtprint(f, "%T*", t->type);
break;
default:
fmtprint(f, "%s", kindnames[t->kind]);
if(name)
fmtprint(f, " %s", name);
break;
bad:
if(name)
fmtprint(f, "byte %s[%d]", name, t->size);
else
fmtprint(f, "byte[%d]", t->size);
break;
}
return 0;
}
int
gotypefmt(Fmt *f)
{
char *name, *s;
Type *t;
if(f->flags & FmtLong) {
name = va_arg(f->args, char*);
if('a' <= name[0] && name[0] <= 'z')
name[0] += 'A' - 'a';
fmtprint(f, "%s ", name);
}
t = va_arg(f->args, Type*);
while(t && t->kind == Typedef)
t = t->type;
switch(t->kind) {
case Struct:
case Union:
// must be named
s = t->name;
if(s == nil) {
fprint(2, "need name for anonymous struct\n");
s = "STRUCT";
}
else if(s[0] != '$')
fprint(2, "need name for struct %s\n", s);
else
s++;
fmtprint(f, "%s", s);
break;
case Array:
fmtprint(f, "[%d]%T", t->size, t->type);
break;
case Ptr:
fmtprint(f, "*%T", t->type);
break;
default:
s = kindnames[t->kind];
if(strcmp(s, "void") == 0)
s = "byte";
fmtprint(f, "%s", s);
}
return 0;
}
// Figure out common struct prefix len
int
prefixlen(Type *t)
{
int i;
int len;
char *p, *name;
Field *f;
len = 0;
name = nil;
for(i=0; i<t->nf; i++) {
f = &t->f[i];
p = strchr(f->name, '_');
if(p == nil)
return 0;
if(name == nil) {
name = f->name;
len = p+1 - name;
}
else if(strncmp(f->name, name, len) != 0)
return 0;
}
return len;
}

418
src/cmd/godefs/stabs.c Normal file
View File

@ -0,0 +1,418 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Parse stabs debug info.
#include "a.h"
int stabsdebug = 1;
// Hash table for type lookup by number.
Type *hash[1024];
// Look up type by number pair.
// TODO(rsc): Iant points out that n1 and n2 are always small and dense,
// so an array of arrays would be a better representation.
Type*
typebynum(uint n1, uint n2)
{
uint h;
Type *t;
h = (n1*53+n2) % nelem(hash);
for(t=hash[h]; t; t=t->next)
if(t->n1 == n1 && t->n2 == n2)
return t;
t = emalloc(sizeof *t);
t->next = hash[h];
hash[h] = t;
t->n1 = n1;
t->n2 = n2;
return t;
}
// Parse name and colon from *pp, leaving copy in *sp.
static int
parsename(char **pp, char **sp)
{
char *p;
char *s;
p = *pp;
while(*p != '\0' && *p != ':')
p++;
if(*p == '\0') {
fprint(2, "parsename expected colon\n");
return -1;
}
s = emalloc(p - *pp + 1);
memmove(s, *pp, p - *pp);
*sp = s;
*pp = p+1;
return 0;
}
// Parse single number from *pp.
static int
parsenum1(char **pp, vlong *np)
{
char *p;
p = *pp;
if(*p != '-' && (*p < '0' || *p > '9')) {
fprint(2, "parsenum expected minus or digit\n");
return -1;
}
*np = strtoll(p, pp, 10);
return 0;
}
// Parse type number - either single number or (n1, n2).
static int
parsetypenum(char **pp, vlong *n1p, vlong *n2p)
{
char *p;
p = *pp;
if(*p == '(') {
p++;
if(parsenum1(&p, n1p) < 0)
return -1;
if(*p++ != ',') {
if(stabsdebug)
fprint(2, "parsetypenum expected comma\n");
return -1;
}
if(parsenum1(&p, n2p) < 0)
return -1;
if(*p++ != ')') {
if(stabsdebug)
fprint(2, "parsetypenum expected right paren\n");
return -1;
}
*pp = p;
return 0;
}
if(parsenum1(&p, n1p) < 0)
return -1;
*n2p = 0;
*pp = p;
return 0;
}
// Integer types are represented in stabs as a "range"
// type with a lo and a hi value. The lo and hi used to
// be lo and hi for the type, but there are now odd
// extensions for floating point and 64-bit numbers.
//
// Have to keep signs separate from values because
// Int64's lo is -0.
typedef struct Intrange Intrange;
struct Intrange
{
int signlo; // sign of lo
vlong lo;
int signhi; // sign of hi
vlong hi;
int kind;
};
// NOTE(rsc): Iant says that these might be different depending
// on the gcc mode, though I haven't observed this yet.
Intrange intranges[] = {
'+', 0, '+', 127, Int8, // char
'-', 128, '+', 127, Int8, // signed char
'+', 0, '+', 255, Uint8,
'-', 32768, '+', 32767, Int16,
'+', 0, '+', 65535, Uint16,
'-', 2147483648LL, '+', 2147483647LL, Int32,
'+', 0, '+', 4294967295LL, Uint32,
// abnormal cases
'-', 0, '+', 4294967295LL, Int64,
'+', 0, '-', 1, Uint64,
'+', 4, '+', 0, Float32,
'+', 8, '+', 0, Float64,
'+', 16, '+', 0, Void,
};
static int kindsize[] = {
0,
8,
8,
16,
16,
32,
32,
64,
64,
};
// Parse a single type definition from *pp.
static Type*
parsedef(char **pp, char *name)
{
char *p;
Type *t, *tt;
int i, signlo, signhi;
vlong n1, n2, lo, hi;
Field *f;
Intrange *r;
p = *pp;
// reference to another type?
if(isdigit(*p) || *p == '(') {
if(parsetypenum(&p, &n1, &n2) < 0)
return nil;
t = typebynum(n1, n2);
if(name && t->name == nil) {
t->name = name;
// save definitions of names beginning with $
if(name[0] == '$' && !t->saved) {
typ = erealloc(typ, (ntyp+1)*sizeof typ[0]);
typ[ntyp] = t;
ntyp++;
}
}
// is there an =def suffix?
if(*p == '=') {
p++;
tt = parsedef(&p, name);
if(tt == nil)
return nil;
if(tt == t) {
tt->kind = Void;
} else {
t->type = tt;
t->kind = Typedef;
}
// assign given name, but do not record in typ.
// assume the name came from a typedef
// which will be recorded.
if(name)
tt->name = name;
}
*pp = p;
return t;
}
// otherwise a type literal. first letter identifies kind
t = emalloc(sizeof *t);
switch(*p) {
default:
*pp = "";
return t;
case '*': // pointer
p++;
t->kind = Ptr;
tt = parsedef(&p, nil);
if(tt == nil)
return nil;
t->type = tt;
break;
case 'a': // array
p++;
t->kind = Array;
// index type
tt = parsedef(&p, nil);
if(tt == nil)
return nil;
t->size = tt->size;
// element type
tt = parsedef(&p, nil);
if(tt == nil)
return nil;
t->type = tt;
break;
case 'e': // enum type - record $names in con array.
p++;
for(;;) {
if(*p == '\0')
return nil;
if(*p == ';') {
p++;
break;
}
if(parsename(&p, &name) < 0)
return nil;
if(parsenum1(&p, &n1) < 0)
return nil;
if(name[0] == '$') {
con = erealloc(con, (ncon+1)*sizeof con[0]);
name++;
con[ncon].name = name;
con[ncon].value = n1;
ncon++;
}
if(*p != ',')
return nil;
p++;
}
break;
case 'f': // function
p++;
if(parsedef(&p, nil) == nil)
return nil;
break;
case 'r': // sub-range (used for integers)
p++;
if(parsedef(&p, nil) == nil)
return nil;
// usually, the return from parsedef == t, but not always.
if(*p != ';' || *++p == ';') {
if(stabsdebug)
fprint(2, "range expected number: %s\n", p);
return nil;
}
if(*p == '-') {
signlo = '-';
p++;
} else
signlo = '+';
lo = strtoll(p, &p, 10);
if(*p != ';' || *++p == ';') {
if(stabsdebug)
fprint(2, "range expected number: %s\n", p);
return nil;
}
if(*p == '-') {
signhi = '-';
p++;
} else
signhi = '+';
hi = strtoll(p, &p, 10);
if(*p != ';') {
if(stabsdebug)
fprint(2, "range expected trailing semi: %s\n", p);
return nil;
}
p++;
t->size = hi+1; // might be array size
for(i=0; i<nelem(intranges); i++) {
r = &intranges[i];
if(r->signlo == signlo && r->signhi == signhi && r->lo == lo && r->hi == hi) {
t->kind = r->kind;
break;
}
}
break;
case 's': // struct
case 'u': // union
t->kind = Struct;
if(*p == 'u')
t->kind = Union;
// assign given name, but do not record in typ.
// assume the name came from a typedef
// which will be recorded.
if(name)
t->name = name;
p++;
if(parsenum1(&p, &n1) < 0)
return nil;
t->size = n1;
for(;;) {
if(*p == '\0')
return nil;
if(*p == ';') {
p++;
break;
}
t->f = erealloc(t->f, (t->nf+1)*sizeof t->f[0]);
f = &t->f[t->nf];
if(parsename(&p, &f->name) < 0)
return nil;
f->type = parsedef(&p, nil);
if(f->type == nil)
return nil;
if(*p != ',') {
fprint(2, "expected comma after def of %s:\n%s\n", f->name, p);
return nil;
}
p++;
if(parsenum1(&p, &n1) < 0)
return nil;
f->offset = n1;
if(*p != ',') {
fprint(2, "expected comma after offset of %s:\n%s\n", f->name, p);
return nil;
}
p++;
if(parsenum1(&p, &n1) < 0)
return nil;
f->size = n1;
if(*p != ';') {
fprint(2, "expected semi after size of %s:\n%s\n", f->name, p);
return nil;
}
// rewrite
// uint32 x : 8;
// into
// uint8 x;
// hooray for bitfields.
while(Int16 <= f->type->kind && f->type->kind <= Uint64 && kindsize[f->type->kind] > f->size) {
t = emalloc(sizeof *t);
*t = *f->type;
f->type = t;
f->type->kind -= 2;
}
p++;
t->nf++;
}
break;
}
*pp = p;
return t;
}
// Parse a stab type in p, saving info in the type hash table
// and also in the list of recorded types if appropriate.
void
parsestabtype(char *p)
{
char *p0, *name;
p0 = p;
// p is the quoted string output from gcc -gstabs on a .stabs line.
// name:t(1,2)
// name:t(1,2)=def
if(parsename(&p, &name) < 0) {
Bad:
// Use fprint instead of sysfatal to avoid
// sysfatal's internal buffer size limit.
fprint(2, "cannot parse stabs type:\n%s\n(at %s)\n", p0, p);
sysfatal("stabs parse");
}
if(*p != 't' && *p != 'T')
goto Bad;
p++;
// parse the definition.
if(name[0] == '\0')
name = nil;
if(parsedef(&p, name) == nil)
goto Bad;
if(*p != '\0')
goto Bad;
}

36
src/cmd/godefs/util.c Normal file
View File

@ -0,0 +1,36 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "a.h"
void*
emalloc(int n)
{
void *p;
p = malloc(n);
if(p == nil)
sysfatal("out of memory");
memset(p, 0, n);
return p;
}
char*
estrdup(char *s)
{
s = strdup(s);
if(s == nil)
sysfatal("out of memory");
return s;
}
void*
erealloc(void *v, int n)
{
v = realloc(v, n);
if(v == nil)
sysfatal("out of memory");
return v;
}

View File

@ -12,7 +12,7 @@ bash mkenam
make enam.o
cd ..
for i in cc 6l 6a 6c gc 6g ar db nm acid cov gobuild prof gotest
for i in cc 6l 6a 6c gc 6g ar db nm acid cov gobuild godefs prof gotest
do
echo; echo; echo %%%% making $i %%%%; echo
cd $i