1
0
mirror of https://github.com/golang/go synced 2024-11-22 01:14:40 -07:00

arm floating point simulation

R=rsc
CC=golang-dev
https://golang.org/cl/3565041
This commit is contained in:
Ken Thompson 2010-12-09 14:45:27 -08:00
parent aa9c213e56
commit ae60526848
3 changed files with 432 additions and 339 deletions

View File

@ -29,6 +29,11 @@ softfloat(void)
p->cond->mark |= LABEL;
for(p = cursym->text; p != P; p = p->link) {
switch(p->as) {
case AMOVW:
if(p->to.type == D_FREG || p->from.type == D_FREG)
goto soft;
goto notsoft;
case AMOVWD:
case AMOVWF:
case AMOVDW:
@ -37,6 +42,7 @@ softfloat(void)
case AMOVDF:
case AMOVF:
case AMOVD:
case ACMPF:
case ACMPD:
case AADDF:
@ -47,13 +53,19 @@ softfloat(void)
case AMULD:
case ADIVF:
case ADIVD:
goto soft;
default:
goto notsoft;
soft:
if (psfloat == P)
diag("floats used with _sfloat not defined");
if (!wasfloat || (p->mark&LABEL)) {
next = prg();
*next = *p;
// BL _sfloat(SB)
// BL _sfloat(SB)
*p = zprg;
p->link = next;
p->as = ABL;
@ -65,7 +77,8 @@ softfloat(void)
wasfloat = 1;
}
break;
default:
notsoft:
wasfloat = 0;
}
}

View File

@ -8,7 +8,15 @@
#include "runtime.h"
void abort(void);
#define CPSR 14
#define FLAGS_N (1 << 31)
#define FLAGS_Z (1 << 30)
#define FLAGS_C (1 << 29)
#define FLAGS_V (1 << 28)
void runtime·abort(void);
static uint32 trace = 0;
static void
fabort(void)
@ -19,101 +27,36 @@ fabort(void)
}
}
static uint32 doabort = 0;
static uint32 trace = 0;
static const int8* opnames[] = {
// binary
"adf",
"muf",
"suf",
"rsf",
"dvf",
"rdf",
"pow",
"rpw",
"rmf",
"fml",
"fdv",
"frd",
"pol",
"UNDEFINED",
"UNDEFINED",
"UNDEFINED",
// unary
"mvf",
"mnf",
"abs",
"rnd",
"sqt",
"log",
"lgn",
"exp",
"sin",
"cos",
"tan",
"asn",
"acs",
"atn",
"urd",
"nrm"
};
static const int8* fpconst[] = {
"0.0", "1.0", "2.0", "3.0", "4.0", "5.0", "0.5", "10.0",
};
static const uint64 fpdconst[] = {
0x0000000000000000ll,
0x3ff0000000000000ll,
0x4000000000000000ll,
0x4008000000000000ll,
0x4010000000000000ll,
0x4014000000000000ll,
0x3fe0000000000000ll,
0x4024000000000000ll
};
static const int8* fpprec[] = {
"s", "d", "e", "?"
};
static uint32
precision(uint32 i)
static void
putf(uint32 reg, uint32 val)
{
switch (i&0x00080080) {
case 0:
return 0;
case 0x80:
return 1;
default:
fabort();
}
return 0;
m->freglo[reg] = val;
}
static void
putd(uint32 reg, uint64 val)
{
m->freglo[reg] = (uint32)val;
m->freghi[reg] = (uint32)(val>>32);
}
static uint64
frhs(uint32 rhs)
getd(uint32 reg)
{
if (rhs & 0x8) {
return fpdconst[rhs&0x7];
} else {
return m->freg[rhs&0x7];
}
return (uint64)m->freglo[reg] | ((uint64)m->freghi[reg]<<32);
}
static void
fprint(void)
{
uint32 i;
for (i = 0; i < 8; i++) {
runtime·printf("\tf%d:\t%X\n", i, m->freg[i]);
for (i = 0; i < 16; i++) {
runtime·printf("\tf%d:\t%X %X\n", i, m->freghi[i], m->freglo[i]);
}
}
static uint32
d2s(uint64 d)
d2f(uint64 d)
{
uint32 x;
@ -122,289 +65,420 @@ d2s(uint64 d)
}
static uint64
s2d(uint32 s)
f2d(uint32 f)
{
uint64 x;
runtime·f32to64c(s, &x);
runtime·f32to64c(f, &x);
return x;
}
// cdp, data processing instructions
static void
dataprocess(uint32* pc)
static uint32
fstatus(bool nan, int32 cmp)
{
uint32 i, opcode, unary, dest, lhs, rhs, prec;
uint64 l, r;
uint64 fd;
i = *pc;
// data processing
opcode = i>>20 & 15;
unary = i>>15 & 1;
dest = i>>12 & 7;
lhs = i>>16 & 7;
rhs = i & 15;
prec = precision(i);
// if (prec != 1)
// goto undef;
if (unary) {
switch (opcode) {
case 0: // mvf
fd = frhs(rhs);
if(prec == 0)
fd = s2d(d2s(fd));
m->freg[dest] = fd;
goto ret;
default:
goto undef;
}
} else {
l = m->freg[lhs];
r = frhs(rhs);
switch (opcode) {
default:
goto undef;
case 0:
runtime·fadd64c(l, r, &m->freg[dest]);
break;
case 1:
runtime·fmul64c(l, r, &m->freg[dest]);
break;
case 2:
runtime·fsub64c(l, r, &m->freg[dest]);
break;
case 4:
runtime·fdiv64c(l, r, &m->freg[dest]);
break;
}
goto ret;
}
undef:
doabort = 1;
ret:
if (trace || doabort) {
runtime·printf(" %p %x\t%s%s\tf%d, ", pc, *pc, opnames[opcode | unary<<4],
fpprec[prec], dest);
if (!unary)
runtime·printf("f%d, ", lhs);
if (rhs & 0x8)
runtime·printf("#%s\n", fpconst[rhs&0x7]);
else
runtime·printf("f%d\n", rhs&0x7);
fprint();
}
if (doabort)
fabort();
if(nan)
return FLAGS_C | FLAGS_V;
if(cmp == 0)
return FLAGS_Z | FLAGS_C;
if(cmp < 0)
return FLAGS_N;
return FLAGS_C;
}
#define CPSR 14
#define FLAGS_N (1 << 31)
#define FLAGS_Z (1 << 30)
#define FLAGS_C (1 << 29)
#define FLAGS_V (1 << 28)
// cmf, compare floating point
static void
compare(uint32 *pc, uint32 *regs)
{
uint32 i, flags, lhs, rhs;
uint64 l, r;
int32 cmp;
bool nan;
i = *pc;
flags = 0;
lhs = i>>16 & 0x7;
rhs = i & 0xf;
l = m->freg[lhs];
r = frhs(rhs);
runtime·fcmp64c(l, r, &cmp, &nan);
if (nan)
flags = FLAGS_C | FLAGS_V;
else if (cmp == 0)
flags = FLAGS_Z | FLAGS_C;
else if (cmp < 0)
flags = FLAGS_N;
else
flags = FLAGS_C;
if (trace) {
runtime·printf(" %p %x\tcmf\tf%d, ", pc, *pc, lhs);
if (rhs & 0x8)
runtime·printf("#%s\n", fpconst[rhs&0x7]);
else
runtime·printf("f%d\n", rhs&0x7);
}
regs[CPSR] = regs[CPSR] & 0x0fffffff | flags;
}
// ldf/stf, load/store floating
static void
loadstore(uint32 *pc, uint32 *regs)
{
uint32 i, isload, coproc, ud, wb, tlen, p, reg, freg, offset;
uint32 addr;
i = *pc;
coproc = i>>8&0xf;
isload = i>>20&1;
p = i>>24&1;
ud = i>>23&1;
tlen = i>>(22 - 1)&2 | i>>15&1;
wb = i>>21&1;
reg = i>>16 &0xf;
freg = i>>12 &0x7;
offset = (i&0xff) << 2;
if (coproc != 1 || p != 1 || wb != 0 || tlen > 1)
goto undef;
if (reg > 13)
goto undef;
if (ud)
addr = regs[reg] + offset;
else
addr = regs[reg] - offset;
if (isload)
if (tlen)
m->freg[freg] = *((uint64*)addr);
else
m->freg[freg] = s2d(*((uint32*)addr));
else
if (tlen)
*((uint64*)addr) = m->freg[freg];
else
*((uint32*)addr) = d2s(m->freg[freg]);
goto ret;
undef:
doabort = 1;
ret:
if (trace || doabort) {
if (isload)
runtime·printf(" %p %x\tldf", pc, *pc);
else
runtime·printf(" %p %x\tstf", pc, *pc);
runtime·printf("%s\t\tf%d, %s%d(r%d)", fpprec[tlen], freg, ud ? "" : "-", offset, reg);
runtime·printf("\t\t// %p", regs[reg] + (ud ? offset : -offset));
if (coproc != 1 || p != 1 || wb != 0)
runtime·printf(" coproc: %d pre: %d wb %d", coproc, p, wb);
runtime·printf("\n");
fprint();
}
if (doabort)
fabort();
}
static void
fltfix(uint32 *pc, uint32 *regs)
{
uint32 i, toarm, freg, reg, prec;
int64 val;
uint64 f0;
bool ok;
i = *pc;
toarm = i>>20 & 0x1;
freg = i>>16 & 0x7;
reg = i>>12 & 0xf;
prec = precision(i);
if (toarm) { // fix
f0 = m->freg[freg];
runtime·f64tointc(f0, &val, &ok);
if (!ok || (int32)val != val)
val = 0;
regs[reg] = val;
} else { // flt
runtime·fintto64c((int32)regs[reg], &f0);
m->freg[freg] = f0;
}
goto ret;
ret:
if (trace || doabort) {
if (toarm)
runtime·printf(" %p %x\tfix%s\t\tr%d, f%d\n", pc, *pc, fpprec[prec], reg, freg);
else
runtime·printf(" %p %x\tflt%s\t\tf%d, r%d\n", pc, *pc, fpprec[prec], freg, reg);
fprint();
}
if (doabort)
fabort();
}
// returns number of words that the fp instruction is occupying, 0 if next instruction isn't float.
// TODO(kaib): insert sanity checks for coproc 1
// returns number of words that the fp instruction
// is occupying, 0 if next instruction isn't float.
static uint32
stepflt(uint32 *pc, uint32 *regs)
{
uint32 i, c;
//printf("stepflt %p %p\n", pc, *pc);
uint32 i, regd, regm, regn;
uint32 *addr;
uint64 uval;
int64 sval;
bool nan, ok;
int32 cmp;
i = *pc;
// unconditional forward branches.
// inserted by linker after we instrument the code.
if ((i & 0xff000000) == 0xea000000) {
if (i & 0x00800000) {
return 0;
}
return (i & 0x007fffff) + 2;
}
c = i >> 25 & 7;
switch(c) {
case 6: // 110
loadstore(pc, regs);
return 1;
case 7: // 111
if (i>>24 & 1)
return 0; // ignore swi
if (i>>4 & 1) { //data transfer
if ((i&0x00f0ff00) == 0x0090f100) {
compare(pc, regs);
} else if ((i&0x00e00f10) == 0x00000110) {
fltfix(pc, regs);
} else {
runtime·printf(" %p %x\t// case 7 fail\n", pc, i);
fabort();
}
} else {
dataprocess(pc);
}
return 1;
}
if(trace)
runtime·printf("stepflt %p %x\n", pc, i);
// special cases
if((i&0xfffff000) == 0xe59fb000) {
// load r11 from pc-relative address.
// might be part of a floating point move
// (or might not, but no harm in simulating
// one instruction too many).
regs[11] = *(uint32*)((uint8*)pc + (i&0xfff) + 8);
addr = (uint32*)((uint8*)pc + (i&0xfff) + 8);
regs[11] = addr[0];
if(trace)
runtime·printf("*** cpu R[%d] = *(%p) %x\n",
11, addr, regs[11]);
return 1;
}
if(i == 0xe08bb00d) {
// add sp to 11.
// might be part of a large stack offset address
// (or might not, but again no harm done).
regs[11] += regs[13];
if(trace)
runtime·printf("*** cpu R[%d] += R[%d] %x\n",
11, 13, regs[11]);
return 1;
}
if(i == 0xeef1fa10) {
regs[CPSR] = (regs[CPSR]&0x0fffffff) | m->fflag;
if(trace)
runtime·printf("*** fpsr R[CPSR] = F[CPSR] %x\n", regs[CPSR]);
return 1;
}
goto stage1;
stage1: // load/store regn is cpureg, regm is 8bit offset
regd = i>>12 & 0xf;
regn = i>>16 & 0xf;
regm = (i & 0xff) << 2; // PLUS or MINUS ??
switch(i & 0xfff00f00) {
default:
goto stage2;
case 0xed900a00: // single load
addr = (uint32*)(regs[regn] + regm);
m->freglo[regd] = addr[0];
if(trace)
runtime·printf("*** load F[%d] = %x\n",
regd, m->freglo[regd]);
break;
case 0xed900b00: // double load
addr = (uint32*)(regs[regn] + regm);
m->freglo[regd] = addr[0];
m->freghi[regd] = addr[1];
if(trace)
runtime·printf("*** load D[%d] = %x-%x\n",
regd, m->freghi[regd], m->freglo[regd]);
break;
case 0xed800a00: // single store
addr = (uint32*)(regs[regn] + regm);
addr[0] = m->freglo[regd];
if(trace)
runtime·printf("*** *(%p) = %x\n",
addr, addr[0]);
break;
case 0xed800b00: // double store
addr = (uint32*)(regs[regn] + regm);
addr[0] = m->freglo[regd];
addr[1] = m->freghi[regd];
if(trace)
runtime·printf("*** *(%p) = %x-%x\n",
addr, addr[1], addr[0]);
break;
}
return 1;
stage2: // regd, regm, regn are 4bit variables
regm = i>>0 & 0xf;
switch(i & 0xfff00ff0) {
default:
goto stage3;
case 0xf3000110: // veor
m->freglo[regd] = m->freglo[regm]^m->freglo[regn];
m->freghi[regd] = m->freghi[regm]^m->freghi[regn];
if(trace)
runtime·printf("*** veor D[%d] = %x-%x\n",
regd, m->freghi[regd], m->freglo[regd]);
break;
case 0xeeb00b00: // D[regd] = const(regn,regm)
regn = (regn<<4) | regm;
regm = 0x40000000UL;
if(regn & 0x80)
regm |= 0x80000000UL;
if(regn & 0x40)
regm ^= 0x7fc00000UL;
regm |= (regn & 0x3f) << 16;
m->freglo[regd] = 0;
m->freghi[regd] = regm;
if(trace)
runtime·printf("*** immed D[%d] = %x-%x\n",
regd, m->freghi[regd], m->freglo[regd]);
break;
case 0xeeb00a00: // F[regd] = const(regn,regm)
regn = (regn<<4) | regm;
regm = 0x40000000UL;
if(regn & 0x80)
regm |= 0x80000000UL;
if(regn & 0x40)
regm ^= 0x7e000000UL;
regm |= (regn & 0x3f) << 19;
m->freglo[regd] = regm;
if(trace)
runtime·printf("*** immed D[%d] = %x\n",
regd, m->freglo[regd]);
break;
case 0xee300b00: // D[regd] = D[regn]+D[regm]
runtime·fadd64c(getd(regn), getd(regm), &uval);
putd(regd, uval);
if(trace)
runtime·printf("*** add D[%d] = D[%d]+D[%d] %x-%x\n",
regd, regn, regm, m->freghi[regd], m->freglo[regd]);
break;
case 0xee300a00: // F[regd] = F[regn]+F[regm]
runtime·fadd64c(f2d(m->freglo[regn]), f2d(m->freglo[regm]), &uval);
m->freglo[regd] = d2f(uval);
if(trace)
runtime·printf("*** add F[%d] = F[%d]+F[%d] %x\n",
regd, regn, regm, m->freglo[regd]);
break;
case 0xee300b40: // D[regd] = D[regn]-D[regm]
runtime·fsub64c(getd(regn), getd(regm), &uval);
putd(regd, uval);
if(trace)
runtime·printf("*** sub D[%d] = D[%d]-D[%d] %x-%x\n",
regd, regn, regm, m->freghi[regd], m->freglo[regd]);
break;
case 0xee300a40: // F[regd] = F[regn]-F[regm]
runtime·fsub64c(f2d(m->freglo[regn]), f2d(m->freglo[regm]), &uval);
m->freglo[regd] = d2f(uval);
if(trace)
runtime·printf("*** sub F[%d] = F[%d]-F[%d] %x\n",
regd, regn, regm, m->freglo[regd]);
break;
case 0xee200b00: // D[regd] = D[regn]*D[regm]
runtime·fmul64c(getd(regn), getd(regm), &uval);
putd(regd, uval);
if(trace)
runtime·printf("*** mul D[%d] = D[%d]*D[%d] %x-%x\n",
regd, regn, regm, m->freghi[regd], m->freglo[regd]);
break;
case 0xee200a00: // F[regd] = F[regn]*F[regm]
runtime·fmul64c(f2d(m->freglo[regn]), f2d(m->freglo[regm]), &uval);
m->freglo[regd] = d2f(uval);
if(trace)
runtime·printf("*** mul F[%d] = F[%d]*F[%d] %x\n",
regd, regn, regm, m->freglo[regd]);
break;
case 0xee800b00: // D[regd] = D[regn]/D[regm]
runtime·fdiv64c(getd(regn), getd(regm), &uval);
putd(regd, uval);
if(trace)
runtime·printf("*** div D[%d] = D[%d]/D[%d] %x-%x\n",
regd, regn, regm, m->freghi[regd], m->freglo[regd]);
break;
case 0xee800a00: // F[regd] = F[regn]/F[regm]
runtime·fdiv64c(f2d(m->freglo[regn]), f2d(m->freglo[regm]), &uval);
m->freglo[regd] = d2f(uval);
if(trace)
runtime·printf("*** div F[%d] = F[%d]/F[%d] %x\n",
regd, regn, regm, m->freglo[regd]);
break;
case 0xee000b10: // S[regn] = R[regd] (MOVW) (regm ignored)
m->freglo[regn] = regs[regd];
if(trace)
runtime·printf("*** cpy S[%d] = R[%d] %x\n",
regn, regd, m->freglo[regn]);
break;
case 0xee100b10: // R[regd] = S[regn] (MOVW) (regm ignored)
regs[regd] = m->freglo[regn];
if(trace)
runtime·printf("*** cpy R[%d] = S[%d] %x\n",
regd, regn, regs[regd]);
break;
}
return 1;
stage3: // regd, regm are 4bit variables
switch(i & 0xffff0ff0) {
default:
goto done;
case 0xeeb00a40: // F[regd] = F[regm] (MOVF)
m->freglo[regd] = m->freglo[regm];
if(trace)
runtime·printf("*** F[%d] = F[%d] %x\n",
regd, regm, m->freglo[regd]);
break;
case 0xeeb00b40: // D[regd] = D[regm] (MOVD)
m->freglo[regd] = m->freglo[regm];
m->freghi[regd] = m->freghi[regm];
if(trace)
runtime·printf("*** D[%d] = D[%d] %x-%x\n",
regd, regm, m->freghi[regd], m->freglo[regd]);
break;
case 0xeeb40bc0: // D[regd] :: D[regm] (CMPD)
runtime·fcmp64c(getd(regd), getd(regm), &cmp, &nan);
m->fflag = fstatus(nan, cmp);
if(trace)
runtime·printf("*** cmp D[%d]::D[%d] %x\n",
regd, regm, m->fflag);
break;
case 0xeeb40ac0: // F[regd] :: F[regm] (CMPF)
runtime·fcmp64c(f2d(m->freglo[regd]), f2d(m->freglo[regm]), &cmp, &nan);
m->fflag = fstatus(nan, cmp);
if(trace)
runtime·printf("*** cmp F[%d]::F[%d] %x\n",
regd, regm, m->fflag);
break;
case 0xeeb70ac0: // D[regd] = F[regm] (MOVFD)
putd(regd, f2d(m->freglo[regm]));
if(trace)
runtime·printf("*** f2d D[%d]=F[%d] %x-%x\n",
regd, regm, m->freghi[regd], m->freglo[regd]);
break;
case 0xeeb70bc0: // F[regd] = D[regm] (MOVDF)
m->freglo[regd] = d2f(getd(regm));
if(trace)
runtime·printf("*** d2f F[%d]=D[%d] %x-%x\n",
regd, regm, m->freghi[regd], m->freglo[regd]);
break;
case 0xeebd0ac0: // S[regd] = F[regm] (MOVFW)
runtime·f64tointc(f2d(m->freglo[regm]), &sval, &ok);
if(!ok || (int32)sval != sval)
sval = 0;
m->freglo[regd] = sval;
if(trace)
runtime·printf("*** fix S[%d]=F[%d] %x\n",
regd, regm, m->freglo[regd]);
break;
case 0xeebc0ac0: // S[regd] = F[regm] (MOVFW.U)
runtime·f64tointc(f2d(m->freglo[regm]), &sval, &ok);
if(!ok || (uint32)sval != sval)
sval = 0;
m->freglo[regd] = sval;
if(trace)
runtime·printf("*** fix unsigned S[%d]=F[%d] %x\n",
regd, regm, m->freglo[regd]);
break;
case 0xeebd0bc0: // S[regd] = D[regm] (MOVDW)
runtime·f64tointc(getd(regm), &sval, &ok);
if(!ok || (int32)sval != sval)
sval = 0;
m->freglo[regd] = sval;
if(trace)
runtime·printf("*** fix S[%d]=D[%d] %x\n",
regd, regm, m->freglo[regd]);
break;
case 0xeebc0bc0: // S[regd] = D[regm] (MOVDW.U)
runtime·f64tointc(getd(regm), &sval, &ok);
if(!ok || (uint32)sval != sval)
sval = 0;
m->freglo[regd] = sval;
if(trace)
runtime·printf("*** fix unsigned S[%d]=D[%d] %x\n",
regd, regm, m->freglo[regd]);
break;
case 0xeeb80ac0: // D[regd] = S[regm] (MOVWF)
cmp = m->freglo[regm];
if(cmp < 0) {
runtime·fintto64c(-cmp, &uval);
putf(regd, d2f(uval));
m->freglo[regd] ^= 0x80000000;
} else {
runtime·fintto64c(cmp, &uval);
putf(regd, d2f(uval));
}
if(trace)
runtime·printf("*** float D[%d]=S[%d] %x-%x\n",
regd, regm, m->freghi[regd], m->freglo[regd]);
break;
case 0xeeb80a40: // D[regd] = S[regm] (MOVWF.U)
runtime·fintto64c(m->freglo[regm], &uval);
putf(regd, d2f(uval));
if(trace)
runtime·printf("*** float unsigned D[%d]=S[%d] %x-%x\n",
regd, regm, m->freghi[regd], m->freglo[regd]);
break;
case 0xeeb80bc0: // D[regd] = S[regm] (MOVWD)
cmp = m->freglo[regm];
if(cmp < 0) {
runtime·fintto64c(-cmp, &uval);
putd(regd, uval);
m->freghi[regd] ^= 0x80000000;
} else {
runtime·fintto64c(cmp, &uval);
putd(regd, uval);
}
if(trace)
runtime·printf("*** float D[%d]=S[%d] %x-%x\n",
regd, regm, m->freghi[regd], m->freglo[regd]);
break;
case 0xeeb80b40: // D[regd] = S[regm] (MOVWD.U)
runtime·fintto64c(m->freglo[regm], &uval);
putd(regd, uval);
if(trace)
runtime·printf("*** float unsigned D[%d]=S[%d] %x-%x\n",
regd, regm, m->freghi[regd], m->freglo[regd]);
break;
}
return 1;
done:
if((i&0xff000000) == 0xee000000 ||
(i&0xff000000) == 0xed000000) {
runtime·printf("stepflt %p %x\n", pc, i);
fabort();
}
return 0;
}
@ -414,8 +488,12 @@ runtime·_sfloat2(uint32 *lr, uint32 r0)
{
uint32 skip;
skip = stepflt(lr, &r0);
if(skip == 0)
fabort(); // not ok to fail first instruction
lr += skip;
while(skip = stepflt(lr, &r0))
lr += skip;
return lr;
}

View File

@ -230,7 +230,9 @@ struct M
uint32 machport; // Return address for Mach IPC (OS X)
MCache *mcache;
G* lockedg;
uint64 freg[8]; // Floating point register storage used by ARM software fp routines
uint32 freglo[16]; // D[i] lsb and F[i]
uint32 freghi[16]; // D[i] msb and F[i+16]
uint32 fflag; // floating point compare flags
#ifdef __WINDOWS__
void* gostack; // bookmark to keep track of go stack during stdcall
#endif