2009-12-17 17:08:42 -07:00
|
|
|
// Copyright 2009 The Go Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
2010-10-25 18:55:50 -06:00
|
|
|
// Software floating point interpretaton of ARM 7500 FP instructions.
|
|
|
|
// The interpretation is not bit compatible with the 7500.
|
|
|
|
// It uses true little-endian doubles, while the 7500 used mixed-endian.
|
|
|
|
|
2009-12-17 17:08:42 -07:00
|
|
|
#include "runtime.h"
|
|
|
|
|
2010-12-09 15:45:27 -07:00
|
|
|
#define CPSR 14
|
2012-05-22 12:00:40 -06:00
|
|
|
#define FLAGS_N (1U << 31)
|
|
|
|
#define FLAGS_Z (1U << 30)
|
|
|
|
#define FLAGS_C (1U << 29)
|
|
|
|
#define FLAGS_V (1U << 28)
|
2010-12-09 15:45:27 -07:00
|
|
|
|
|
|
|
void runtime·abort(void);
|
math: regularize build
This will be nicer to the automatic tools.
It requires a few more assembly stubs
but fewer Go files.
There are a few instances where it looks like
there are new blobs of code, but they are just
being copied out of deleted files.
There is no new code here.
Suppose you have a portable implementation for Sin
and a 386-specific assembly one. The old way to
do this was to write three files
sin_decl.go
func Sin(x float64) float64 // declaration only
sin_386.s
assembly implementation
sin_port.go
func Sin(x float64) float64 { ... } // pure-Go impl
and then link in either sin_decl.go+sin_386.s or
just sin_port.go. The Makefile actually did the magic
of linking in only the _port.go files for those without
assembly and only the _decl.go files for those with
assembly, or at least some of that magic.
The biggest problem with this, beyond being hard
to explain to the build system, is that once you do
explain it to the build system, godoc knows which
of sin_port.go or sin_decl.go are involved on a given
architecture, and it (correctly) ignores the other.
That means you have to put identical doc comments
in both files.
The new approach, which is more like what we did
in the later packages math/big and sync/atomic,
is to have
sin.go
func Sin(x float64) float64 // decl only
func sin(x float64) float64 {...} // pure-Go impl
sin_386.s
// assembly for Sin (ignores sin)
sin_amd64.s
// assembly for Sin: jmp sin
sin_arm.s
// assembly for Sin: jmp sin
Once we abandon Makefiles we can put all the assembly
stubs in one source file, so the number of files will
actually go down.
Chris asked whether the branches cost anything.
Given that they are branching to pure-Go implementations
that are not typically known for their speed, the single
direct branch is not going to be noticeable. That is,
it's on the slow path.
An alternative would have been to preserve the old
"only write assembly files when there's an implementation"
and still have just one copy of the declaration of Sin
(and thus one doc comment) by doing:
sin.go
func Sin(x float64) float64 { return sin(x) }
sin_decl.go
func sin(x float64) float64 // declaration only
sin_386.s
// assembly for sin
sin_port.go
func sin(x float64) float64 { portable code }
In this version everyone would link in sin.go and
then either sin_decl.go+sin_386.s or sin_port.go.
This has an extra function call on all paths, including
the "fast path" to get to assembly, and it triples the
number of Go files involved compared to what I did
in this CL. On the other hand you don't have to
write assembly stubs. After starting down this path
I decided that the assembly stubs were the easier
approach.
As for generating the assembly stubs on the fly, much
of the goal here is to eliminate magic from the build
process, so that zero-configuration tools like goinstall
or the new go tool can handle this package.
R=golang-dev, r, cw, iant, r
CC=golang-dev
https://golang.org/cl/5488057
2011-12-13 13:20:12 -07:00
|
|
|
void math·sqrtC(uint64, uint64*);
|
2010-12-09 15:45:27 -07:00
|
|
|
|
|
|
|
static uint32 trace = 0;
|
2010-04-15 03:43:49 -06:00
|
|
|
|
|
|
|
static void
|
|
|
|
fabort(void)
|
|
|
|
{
|
|
|
|
if (1) {
|
runtime: ,s/[a-zA-Z0-9_]+/runtime·&/g, almost
Prefix all external symbols in runtime by runtime·,
to avoid conflicts with possible symbols of the same
name in linked-in C libraries. The obvious conflicts
are printf, malloc, and free, but hide everything to
avoid future pain.
The symbols left alone are:
** known to cgo **
_cgo_free
_cgo_malloc
libcgo_thread_start
initcgo
ncgocall
** known to linker **
_rt0_$GOARCH
_rt0_$GOARCH_$GOOS
text
etext
data
end
pclntab
epclntab
symtab
esymtab
** known to C compiler **
_divv
_modv
_div64by32
etc (arch specific)
Tested on darwin/386, darwin/amd64, linux/386, linux/amd64.
Built (but not tested) for freebsd/386, freebsd/amd64, linux/arm, windows/386.
R=r, PeterGo
CC=golang-dev
https://golang.org/cl/2899041
2010-11-04 12:00:19 -06:00
|
|
|
runtime·printf("Unsupported floating point instruction\n");
|
|
|
|
runtime·abort();
|
2010-04-15 03:43:49 -06:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-12-09 15:45:27 -07:00
|
|
|
static void
|
|
|
|
putf(uint32 reg, uint32 val)
|
|
|
|
{
|
|
|
|
m->freglo[reg] = val;
|
|
|
|
}
|
2010-04-15 03:43:49 -06:00
|
|
|
|
2010-12-09 15:45:27 -07:00
|
|
|
static void
|
|
|
|
putd(uint32 reg, uint64 val)
|
2010-04-15 03:43:49 -06:00
|
|
|
{
|
2010-12-09 15:45:27 -07:00
|
|
|
m->freglo[reg] = (uint32)val;
|
|
|
|
m->freghi[reg] = (uint32)(val>>32);
|
2010-04-15 03:43:49 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
static uint64
|
2010-12-09 15:45:27 -07:00
|
|
|
getd(uint32 reg)
|
2010-04-15 03:43:49 -06:00
|
|
|
{
|
2010-12-09 15:45:27 -07:00
|
|
|
return (uint64)m->freglo[reg] | ((uint64)m->freghi[reg]<<32);
|
2010-04-15 03:43:49 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2010-07-20 06:53:16 -06:00
|
|
|
fprint(void)
|
2009-12-17 17:08:42 -07:00
|
|
|
{
|
|
|
|
uint32 i;
|
2010-12-09 15:45:27 -07:00
|
|
|
for (i = 0; i < 16; i++) {
|
|
|
|
runtime·printf("\tf%d:\t%X %X\n", i, m->freghi[i], m->freglo[i]);
|
2010-04-15 03:43:49 -06:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static uint32
|
2010-12-09 15:45:27 -07:00
|
|
|
d2f(uint64 d)
|
2010-04-15 03:43:49 -06:00
|
|
|
{
|
2010-10-25 18:55:50 -06:00
|
|
|
uint32 x;
|
2010-12-09 15:45:27 -07:00
|
|
|
|
runtime: ,s/[a-zA-Z0-9_]+/runtime·&/g, almost
Prefix all external symbols in runtime by runtime·,
to avoid conflicts with possible symbols of the same
name in linked-in C libraries. The obvious conflicts
are printf, malloc, and free, but hide everything to
avoid future pain.
The symbols left alone are:
** known to cgo **
_cgo_free
_cgo_malloc
libcgo_thread_start
initcgo
ncgocall
** known to linker **
_rt0_$GOARCH
_rt0_$GOARCH_$GOOS
text
etext
data
end
pclntab
epclntab
symtab
esymtab
** known to C compiler **
_divv
_modv
_div64by32
etc (arch specific)
Tested on darwin/386, darwin/amd64, linux/386, linux/amd64.
Built (but not tested) for freebsd/386, freebsd/amd64, linux/arm, windows/386.
R=r, PeterGo
CC=golang-dev
https://golang.org/cl/2899041
2010-11-04 12:00:19 -06:00
|
|
|
runtime·f64to32c(d, &x);
|
2010-10-25 18:55:50 -06:00
|
|
|
return x;
|
2010-04-15 03:43:49 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
static uint64
|
2010-12-09 15:45:27 -07:00
|
|
|
f2d(uint32 f)
|
2010-04-15 03:43:49 -06:00
|
|
|
{
|
2010-10-25 18:55:50 -06:00
|
|
|
uint64 x;
|
2010-04-15 03:43:49 -06:00
|
|
|
|
2010-12-09 15:45:27 -07:00
|
|
|
runtime·f32to64c(f, &x);
|
|
|
|
return x;
|
2010-04-15 03:43:49 -06:00
|
|
|
}
|
|
|
|
|
2010-12-09 15:45:27 -07:00
|
|
|
static uint32
|
|
|
|
fstatus(bool nan, int32 cmp)
|
2010-07-20 06:53:16 -06:00
|
|
|
{
|
2010-12-09 15:45:27 -07:00
|
|
|
if(nan)
|
|
|
|
return FLAGS_C | FLAGS_V;
|
|
|
|
if(cmp == 0)
|
|
|
|
return FLAGS_Z | FLAGS_C;
|
|
|
|
if(cmp < 0)
|
|
|
|
return FLAGS_N;
|
|
|
|
return FLAGS_C;
|
2010-07-20 06:53:16 -06:00
|
|
|
}
|
2010-04-15 03:43:49 -06:00
|
|
|
|
2012-05-22 12:00:40 -06:00
|
|
|
// conditions array record the required CPSR cond field for the
|
|
|
|
// first 5 pairs of conditional execution opcodes
|
|
|
|
// higher 4 bits are must set, lower 4 bits are must clear
|
|
|
|
static const uint8 conditions[10/2] = {
|
|
|
|
[0/2] = (FLAGS_Z >> 24) | 0, // 0: EQ (Z set), 1: NE (Z clear)
|
|
|
|
[2/2] = (FLAGS_C >> 24) | 0, // 2: CS/HS (C set), 3: CC/LO (C clear)
|
|
|
|
[4/2] = (FLAGS_N >> 24) | 0, // 4: MI (N set), 5: PL (N clear)
|
|
|
|
[6/2] = (FLAGS_V >> 24) | 0, // 6: VS (V set), 7: VC (V clear)
|
|
|
|
[8/2] = (FLAGS_C >> 24) |
|
|
|
|
(FLAGS_Z >> 28), // 8: HI (C set and Z clear), 9: LS (C clear and Z set)
|
|
|
|
};
|
|
|
|
|
2010-12-09 15:45:27 -07:00
|
|
|
// returns number of words that the fp instruction
|
|
|
|
// is occupying, 0 if next instruction isn't float.
|
2010-04-15 03:43:49 -06:00
|
|
|
static uint32
|
|
|
|
stepflt(uint32 *pc, uint32 *regs)
|
|
|
|
{
|
2012-05-22 12:00:40 -06:00
|
|
|
uint32 i, opc, regd, regm, regn, cpsr;
|
2011-04-17 12:16:26 -06:00
|
|
|
int32 delta;
|
2010-12-09 15:45:27 -07:00
|
|
|
uint32 *addr;
|
|
|
|
uint64 uval;
|
|
|
|
int64 sval;
|
|
|
|
bool nan, ok;
|
|
|
|
int32 cmp;
|
2010-10-18 10:24:59 -06:00
|
|
|
|
2009-12-17 17:08:42 -07:00
|
|
|
i = *pc;
|
2010-04-05 13:51:09 -06:00
|
|
|
|
2010-12-09 15:45:27 -07:00
|
|
|
if(trace)
|
2012-05-22 12:00:40 -06:00
|
|
|
runtime·printf("stepflt %p %x (cpsr %x)\n", pc, i, regs[CPSR] >> 28);
|
|
|
|
|
|
|
|
opc = i >> 28;
|
|
|
|
if(opc == 14) // common case first
|
|
|
|
goto execute;
|
|
|
|
cpsr = regs[CPSR] >> 28;
|
|
|
|
switch(opc) {
|
|
|
|
case 0: case 1: case 2: case 3: case 4:
|
|
|
|
case 5: case 6: case 7: case 8: case 9:
|
|
|
|
if(((cpsr & (conditions[opc/2] >> 4)) == (conditions[opc/2] >> 4)) &&
|
|
|
|
((cpsr & (conditions[opc/2] & 0xf)) == 0)) {
|
|
|
|
if(opc & 1) return 1;
|
|
|
|
} else {
|
|
|
|
if(!(opc & 1)) return 1;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 10: // GE (N == V)
|
|
|
|
case 11: // LT (N != V)
|
|
|
|
if((cpsr & (FLAGS_N >> 28)) == (cpsr & (FLAGS_V >> 28))) {
|
|
|
|
if(opc & 1) return 1;
|
|
|
|
} else {
|
|
|
|
if(!(opc & 1)) return 1;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 12: // GT (N == V and Z == 0)
|
|
|
|
case 13: // LE (N != V or Z == 1)
|
|
|
|
if((cpsr & (FLAGS_N >> 28)) == (cpsr & (FLAGS_V >> 28)) &&
|
|
|
|
(cpsr & (FLAGS_Z >> 28)) == 0) {
|
|
|
|
if(opc & 1) return 1;
|
|
|
|
} else {
|
|
|
|
if(!(opc & 1)) return 1;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 14: // AL
|
|
|
|
break;
|
|
|
|
case 15: // shouldn't happen
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
if(trace)
|
|
|
|
runtime·printf("conditional %x (cpsr %x) pass\n", opc, cpsr);
|
|
|
|
i = (0xeU << 28) | (i & 0xfffffff);
|
2009-12-17 17:08:42 -07:00
|
|
|
|
2012-05-22 12:00:40 -06:00
|
|
|
execute:
|
2010-12-09 15:45:27 -07:00
|
|
|
// special cases
|
2010-10-18 11:24:19 -06:00
|
|
|
if((i&0xfffff000) == 0xe59fb000) {
|
|
|
|
// load r11 from pc-relative address.
|
|
|
|
// might be part of a floating point move
|
|
|
|
// (or might not, but no harm in simulating
|
|
|
|
// one instruction too many).
|
2010-12-09 15:45:27 -07:00
|
|
|
addr = (uint32*)((uint8*)pc + (i&0xfff) + 8);
|
|
|
|
regs[11] = addr[0];
|
|
|
|
|
|
|
|
if(trace)
|
|
|
|
runtime·printf("*** cpu R[%d] = *(%p) %x\n",
|
|
|
|
11, addr, regs[11]);
|
2010-10-18 11:24:19 -06:00
|
|
|
return 1;
|
2009-12-17 17:08:42 -07:00
|
|
|
}
|
2010-10-25 18:55:50 -06:00
|
|
|
if(i == 0xe08bb00d) {
|
2011-04-17 12:16:26 -06:00
|
|
|
// add sp to r11.
|
2010-10-25 18:55:50 -06:00
|
|
|
// might be part of a large stack offset address
|
|
|
|
// (or might not, but again no harm done).
|
|
|
|
regs[11] += regs[13];
|
2010-12-09 15:45:27 -07:00
|
|
|
|
|
|
|
if(trace)
|
|
|
|
runtime·printf("*** cpu R[%d] += R[%d] %x\n",
|
|
|
|
11, 13, regs[11]);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
if(i == 0xeef1fa10) {
|
|
|
|
regs[CPSR] = (regs[CPSR]&0x0fffffff) | m->fflag;
|
|
|
|
|
|
|
|
if(trace)
|
|
|
|
runtime·printf("*** fpsr R[CPSR] = F[CPSR] %x\n", regs[CPSR]);
|
2010-10-25 18:55:50 -06:00
|
|
|
return 1;
|
|
|
|
}
|
2011-04-17 12:16:26 -06:00
|
|
|
if((i&0xff000000) == 0xea000000) {
|
|
|
|
// unconditional branch
|
|
|
|
// can happen in the middle of floating point
|
|
|
|
// if the linker decides it is time to lay down
|
|
|
|
// a sequence of instruction stream constants.
|
|
|
|
delta = i&0xffffff;
|
|
|
|
delta = (delta<<8) >> 8; // sign extend
|
|
|
|
|
|
|
|
if(trace)
|
|
|
|
runtime·printf("*** cpu PC += %x\n", (delta+2)*4);
|
|
|
|
return delta+2;
|
|
|
|
}
|
|
|
|
|
2010-12-09 15:45:27 -07:00
|
|
|
goto stage1;
|
|
|
|
|
|
|
|
stage1: // load/store regn is cpureg, regm is 8bit offset
|
|
|
|
regd = i>>12 & 0xf;
|
|
|
|
regn = i>>16 & 0xf;
|
|
|
|
regm = (i & 0xff) << 2; // PLUS or MINUS ??
|
|
|
|
|
|
|
|
switch(i & 0xfff00f00) {
|
|
|
|
default:
|
|
|
|
goto stage2;
|
|
|
|
|
|
|
|
case 0xed900a00: // single load
|
|
|
|
addr = (uint32*)(regs[regn] + regm);
|
|
|
|
m->freglo[regd] = addr[0];
|
|
|
|
|
|
|
|
if(trace)
|
|
|
|
runtime·printf("*** load F[%d] = %x\n",
|
|
|
|
regd, m->freglo[regd]);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 0xed900b00: // double load
|
|
|
|
addr = (uint32*)(regs[regn] + regm);
|
|
|
|
m->freglo[regd] = addr[0];
|
|
|
|
m->freghi[regd] = addr[1];
|
|
|
|
|
|
|
|
if(trace)
|
|
|
|
runtime·printf("*** load D[%d] = %x-%x\n",
|
|
|
|
regd, m->freghi[regd], m->freglo[regd]);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 0xed800a00: // single store
|
|
|
|
addr = (uint32*)(regs[regn] + regm);
|
|
|
|
addr[0] = m->freglo[regd];
|
|
|
|
|
|
|
|
if(trace)
|
|
|
|
runtime·printf("*** *(%p) = %x\n",
|
|
|
|
addr, addr[0]);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 0xed800b00: // double store
|
|
|
|
addr = (uint32*)(regs[regn] + regm);
|
|
|
|
addr[0] = m->freglo[regd];
|
|
|
|
addr[1] = m->freghi[regd];
|
|
|
|
|
|
|
|
if(trace)
|
|
|
|
runtime·printf("*** *(%p) = %x-%x\n",
|
|
|
|
addr, addr[1], addr[0]);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
stage2: // regd, regm, regn are 4bit variables
|
|
|
|
regm = i>>0 & 0xf;
|
|
|
|
switch(i & 0xfff00ff0) {
|
|
|
|
default:
|
|
|
|
goto stage3;
|
|
|
|
|
|
|
|
case 0xf3000110: // veor
|
|
|
|
m->freglo[regd] = m->freglo[regm]^m->freglo[regn];
|
|
|
|
m->freghi[regd] = m->freghi[regm]^m->freghi[regn];
|
|
|
|
|
|
|
|
if(trace)
|
|
|
|
runtime·printf("*** veor D[%d] = %x-%x\n",
|
|
|
|
regd, m->freghi[regd], m->freglo[regd]);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 0xeeb00b00: // D[regd] = const(regn,regm)
|
|
|
|
regn = (regn<<4) | regm;
|
|
|
|
regm = 0x40000000UL;
|
|
|
|
if(regn & 0x80)
|
|
|
|
regm |= 0x80000000UL;
|
|
|
|
if(regn & 0x40)
|
|
|
|
regm ^= 0x7fc00000UL;
|
|
|
|
regm |= (regn & 0x3f) << 16;
|
|
|
|
m->freglo[regd] = 0;
|
|
|
|
m->freghi[regd] = regm;
|
|
|
|
|
|
|
|
if(trace)
|
|
|
|
runtime·printf("*** immed D[%d] = %x-%x\n",
|
|
|
|
regd, m->freghi[regd], m->freglo[regd]);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 0xeeb00a00: // F[regd] = const(regn,regm)
|
|
|
|
regn = (regn<<4) | regm;
|
|
|
|
regm = 0x40000000UL;
|
|
|
|
if(regn & 0x80)
|
|
|
|
regm |= 0x80000000UL;
|
|
|
|
if(regn & 0x40)
|
|
|
|
regm ^= 0x7e000000UL;
|
|
|
|
regm |= (regn & 0x3f) << 19;
|
|
|
|
m->freglo[regd] = regm;
|
|
|
|
|
|
|
|
if(trace)
|
|
|
|
runtime·printf("*** immed D[%d] = %x\n",
|
|
|
|
regd, m->freglo[regd]);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 0xee300b00: // D[regd] = D[regn]+D[regm]
|
|
|
|
runtime·fadd64c(getd(regn), getd(regm), &uval);
|
|
|
|
putd(regd, uval);
|
|
|
|
|
|
|
|
if(trace)
|
|
|
|
runtime·printf("*** add D[%d] = D[%d]+D[%d] %x-%x\n",
|
|
|
|
regd, regn, regm, m->freghi[regd], m->freglo[regd]);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 0xee300a00: // F[regd] = F[regn]+F[regm]
|
|
|
|
runtime·fadd64c(f2d(m->freglo[regn]), f2d(m->freglo[regm]), &uval);
|
|
|
|
m->freglo[regd] = d2f(uval);
|
|
|
|
|
|
|
|
if(trace)
|
|
|
|
runtime·printf("*** add F[%d] = F[%d]+F[%d] %x\n",
|
|
|
|
regd, regn, regm, m->freglo[regd]);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 0xee300b40: // D[regd] = D[regn]-D[regm]
|
|
|
|
runtime·fsub64c(getd(regn), getd(regm), &uval);
|
|
|
|
putd(regd, uval);
|
|
|
|
|
|
|
|
if(trace)
|
|
|
|
runtime·printf("*** sub D[%d] = D[%d]-D[%d] %x-%x\n",
|
|
|
|
regd, regn, regm, m->freghi[regd], m->freglo[regd]);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 0xee300a40: // F[regd] = F[regn]-F[regm]
|
|
|
|
runtime·fsub64c(f2d(m->freglo[regn]), f2d(m->freglo[regm]), &uval);
|
|
|
|
m->freglo[regd] = d2f(uval);
|
|
|
|
|
|
|
|
if(trace)
|
|
|
|
runtime·printf("*** sub F[%d] = F[%d]-F[%d] %x\n",
|
|
|
|
regd, regn, regm, m->freglo[regd]);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 0xee200b00: // D[regd] = D[regn]*D[regm]
|
|
|
|
runtime·fmul64c(getd(regn), getd(regm), &uval);
|
|
|
|
putd(regd, uval);
|
|
|
|
|
|
|
|
if(trace)
|
|
|
|
runtime·printf("*** mul D[%d] = D[%d]*D[%d] %x-%x\n",
|
|
|
|
regd, regn, regm, m->freghi[regd], m->freglo[regd]);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 0xee200a00: // F[regd] = F[regn]*F[regm]
|
|
|
|
runtime·fmul64c(f2d(m->freglo[regn]), f2d(m->freglo[regm]), &uval);
|
|
|
|
m->freglo[regd] = d2f(uval);
|
|
|
|
|
|
|
|
if(trace)
|
|
|
|
runtime·printf("*** mul F[%d] = F[%d]*F[%d] %x\n",
|
|
|
|
regd, regn, regm, m->freglo[regd]);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 0xee800b00: // D[regd] = D[regn]/D[regm]
|
|
|
|
runtime·fdiv64c(getd(regn), getd(regm), &uval);
|
|
|
|
putd(regd, uval);
|
|
|
|
|
|
|
|
if(trace)
|
|
|
|
runtime·printf("*** div D[%d] = D[%d]/D[%d] %x-%x\n",
|
|
|
|
regd, regn, regm, m->freghi[regd], m->freglo[regd]);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 0xee800a00: // F[regd] = F[regn]/F[regm]
|
|
|
|
runtime·fdiv64c(f2d(m->freglo[regn]), f2d(m->freglo[regm]), &uval);
|
|
|
|
m->freglo[regd] = d2f(uval);
|
|
|
|
|
|
|
|
if(trace)
|
|
|
|
runtime·printf("*** div F[%d] = F[%d]/F[%d] %x\n",
|
|
|
|
regd, regn, regm, m->freglo[regd]);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 0xee000b10: // S[regn] = R[regd] (MOVW) (regm ignored)
|
|
|
|
m->freglo[regn] = regs[regd];
|
|
|
|
|
|
|
|
if(trace)
|
|
|
|
runtime·printf("*** cpy S[%d] = R[%d] %x\n",
|
|
|
|
regn, regd, m->freglo[regn]);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 0xee100b10: // R[regd] = S[regn] (MOVW) (regm ignored)
|
|
|
|
regs[regd] = m->freglo[regn];
|
|
|
|
|
|
|
|
if(trace)
|
|
|
|
runtime·printf("*** cpy R[%d] = S[%d] %x\n",
|
|
|
|
regd, regn, regs[regd]);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
stage3: // regd, regm are 4bit variables
|
|
|
|
switch(i & 0xffff0ff0) {
|
|
|
|
default:
|
|
|
|
goto done;
|
|
|
|
|
|
|
|
case 0xeeb00a40: // F[regd] = F[regm] (MOVF)
|
|
|
|
m->freglo[regd] = m->freglo[regm];
|
|
|
|
|
|
|
|
if(trace)
|
|
|
|
runtime·printf("*** F[%d] = F[%d] %x\n",
|
|
|
|
regd, regm, m->freglo[regd]);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 0xeeb00b40: // D[regd] = D[regm] (MOVD)
|
|
|
|
m->freglo[regd] = m->freglo[regm];
|
|
|
|
m->freghi[regd] = m->freghi[regm];
|
|
|
|
|
|
|
|
if(trace)
|
|
|
|
runtime·printf("*** D[%d] = D[%d] %x-%x\n",
|
|
|
|
regd, regm, m->freghi[regd], m->freglo[regd]);
|
|
|
|
break;
|
|
|
|
|
2011-06-09 15:19:08 -06:00
|
|
|
case 0xeeb10bc0: // D[regd] = sqrt D[regm]
|
math: regularize build
This will be nicer to the automatic tools.
It requires a few more assembly stubs
but fewer Go files.
There are a few instances where it looks like
there are new blobs of code, but they are just
being copied out of deleted files.
There is no new code here.
Suppose you have a portable implementation for Sin
and a 386-specific assembly one. The old way to
do this was to write three files
sin_decl.go
func Sin(x float64) float64 // declaration only
sin_386.s
assembly implementation
sin_port.go
func Sin(x float64) float64 { ... } // pure-Go impl
and then link in either sin_decl.go+sin_386.s or
just sin_port.go. The Makefile actually did the magic
of linking in only the _port.go files for those without
assembly and only the _decl.go files for those with
assembly, or at least some of that magic.
The biggest problem with this, beyond being hard
to explain to the build system, is that once you do
explain it to the build system, godoc knows which
of sin_port.go or sin_decl.go are involved on a given
architecture, and it (correctly) ignores the other.
That means you have to put identical doc comments
in both files.
The new approach, which is more like what we did
in the later packages math/big and sync/atomic,
is to have
sin.go
func Sin(x float64) float64 // decl only
func sin(x float64) float64 {...} // pure-Go impl
sin_386.s
// assembly for Sin (ignores sin)
sin_amd64.s
// assembly for Sin: jmp sin
sin_arm.s
// assembly for Sin: jmp sin
Once we abandon Makefiles we can put all the assembly
stubs in one source file, so the number of files will
actually go down.
Chris asked whether the branches cost anything.
Given that they are branching to pure-Go implementations
that are not typically known for their speed, the single
direct branch is not going to be noticeable. That is,
it's on the slow path.
An alternative would have been to preserve the old
"only write assembly files when there's an implementation"
and still have just one copy of the declaration of Sin
(and thus one doc comment) by doing:
sin.go
func Sin(x float64) float64 { return sin(x) }
sin_decl.go
func sin(x float64) float64 // declaration only
sin_386.s
// assembly for sin
sin_port.go
func sin(x float64) float64 { portable code }
In this version everyone would link in sin.go and
then either sin_decl.go+sin_386.s or sin_port.go.
This has an extra function call on all paths, including
the "fast path" to get to assembly, and it triples the
number of Go files involved compared to what I did
in this CL. On the other hand you don't have to
write assembly stubs. After starting down this path
I decided that the assembly stubs were the easier
approach.
As for generating the assembly stubs on the fly, much
of the goal here is to eliminate magic from the build
process, so that zero-configuration tools like goinstall
or the new go tool can handle this package.
R=golang-dev, r, cw, iant, r
CC=golang-dev
https://golang.org/cl/5488057
2011-12-13 13:20:12 -07:00
|
|
|
math·sqrtC(getd(regm), &uval);
|
2011-06-09 15:19:08 -06:00
|
|
|
putd(regd, uval);
|
|
|
|
|
|
|
|
if(trace)
|
|
|
|
runtime·printf("*** D[%d] = sqrt D[%d] %x-%x\n",
|
|
|
|
regd, regm, m->freghi[regd], m->freglo[regd]);
|
|
|
|
break;
|
|
|
|
|
2010-12-09 15:45:27 -07:00
|
|
|
case 0xeeb40bc0: // D[regd] :: D[regm] (CMPD)
|
|
|
|
runtime·fcmp64c(getd(regd), getd(regm), &cmp, &nan);
|
|
|
|
m->fflag = fstatus(nan, cmp);
|
|
|
|
|
|
|
|
if(trace)
|
|
|
|
runtime·printf("*** cmp D[%d]::D[%d] %x\n",
|
|
|
|
regd, regm, m->fflag);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 0xeeb40ac0: // F[regd] :: F[regm] (CMPF)
|
|
|
|
runtime·fcmp64c(f2d(m->freglo[regd]), f2d(m->freglo[regm]), &cmp, &nan);
|
|
|
|
m->fflag = fstatus(nan, cmp);
|
|
|
|
|
|
|
|
if(trace)
|
|
|
|
runtime·printf("*** cmp F[%d]::F[%d] %x\n",
|
|
|
|
regd, regm, m->fflag);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 0xeeb70ac0: // D[regd] = F[regm] (MOVFD)
|
|
|
|
putd(regd, f2d(m->freglo[regm]));
|
|
|
|
|
|
|
|
if(trace)
|
|
|
|
runtime·printf("*** f2d D[%d]=F[%d] %x-%x\n",
|
|
|
|
regd, regm, m->freghi[regd], m->freglo[regd]);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 0xeeb70bc0: // F[regd] = D[regm] (MOVDF)
|
|
|
|
m->freglo[regd] = d2f(getd(regm));
|
|
|
|
|
|
|
|
if(trace)
|
|
|
|
runtime·printf("*** d2f F[%d]=D[%d] %x-%x\n",
|
|
|
|
regd, regm, m->freghi[regd], m->freglo[regd]);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 0xeebd0ac0: // S[regd] = F[regm] (MOVFW)
|
|
|
|
runtime·f64tointc(f2d(m->freglo[regm]), &sval, &ok);
|
|
|
|
if(!ok || (int32)sval != sval)
|
|
|
|
sval = 0;
|
|
|
|
m->freglo[regd] = sval;
|
|
|
|
|
|
|
|
if(trace)
|
|
|
|
runtime·printf("*** fix S[%d]=F[%d] %x\n",
|
|
|
|
regd, regm, m->freglo[regd]);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 0xeebc0ac0: // S[regd] = F[regm] (MOVFW.U)
|
|
|
|
runtime·f64tointc(f2d(m->freglo[regm]), &sval, &ok);
|
|
|
|
if(!ok || (uint32)sval != sval)
|
|
|
|
sval = 0;
|
|
|
|
m->freglo[regd] = sval;
|
|
|
|
|
|
|
|
if(trace)
|
|
|
|
runtime·printf("*** fix unsigned S[%d]=F[%d] %x\n",
|
|
|
|
regd, regm, m->freglo[regd]);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 0xeebd0bc0: // S[regd] = D[regm] (MOVDW)
|
|
|
|
runtime·f64tointc(getd(regm), &sval, &ok);
|
|
|
|
if(!ok || (int32)sval != sval)
|
|
|
|
sval = 0;
|
|
|
|
m->freglo[regd] = sval;
|
|
|
|
|
|
|
|
if(trace)
|
|
|
|
runtime·printf("*** fix S[%d]=D[%d] %x\n",
|
|
|
|
regd, regm, m->freglo[regd]);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 0xeebc0bc0: // S[regd] = D[regm] (MOVDW.U)
|
|
|
|
runtime·f64tointc(getd(regm), &sval, &ok);
|
|
|
|
if(!ok || (uint32)sval != sval)
|
|
|
|
sval = 0;
|
|
|
|
m->freglo[regd] = sval;
|
|
|
|
|
|
|
|
if(trace)
|
|
|
|
runtime·printf("*** fix unsigned S[%d]=D[%d] %x\n",
|
|
|
|
regd, regm, m->freglo[regd]);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 0xeeb80ac0: // D[regd] = S[regm] (MOVWF)
|
|
|
|
cmp = m->freglo[regm];
|
|
|
|
if(cmp < 0) {
|
|
|
|
runtime·fintto64c(-cmp, &uval);
|
|
|
|
putf(regd, d2f(uval));
|
|
|
|
m->freglo[regd] ^= 0x80000000;
|
|
|
|
} else {
|
|
|
|
runtime·fintto64c(cmp, &uval);
|
|
|
|
putf(regd, d2f(uval));
|
|
|
|
}
|
|
|
|
|
|
|
|
if(trace)
|
|
|
|
runtime·printf("*** float D[%d]=S[%d] %x-%x\n",
|
|
|
|
regd, regm, m->freghi[regd], m->freglo[regd]);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 0xeeb80a40: // D[regd] = S[regm] (MOVWF.U)
|
|
|
|
runtime·fintto64c(m->freglo[regm], &uval);
|
|
|
|
putf(regd, d2f(uval));
|
|
|
|
|
|
|
|
if(trace)
|
|
|
|
runtime·printf("*** float unsigned D[%d]=S[%d] %x-%x\n",
|
|
|
|
regd, regm, m->freghi[regd], m->freglo[regd]);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 0xeeb80bc0: // D[regd] = S[regm] (MOVWD)
|
|
|
|
cmp = m->freglo[regm];
|
|
|
|
if(cmp < 0) {
|
|
|
|
runtime·fintto64c(-cmp, &uval);
|
|
|
|
putd(regd, uval);
|
|
|
|
m->freghi[regd] ^= 0x80000000;
|
|
|
|
} else {
|
|
|
|
runtime·fintto64c(cmp, &uval);
|
|
|
|
putd(regd, uval);
|
|
|
|
}
|
|
|
|
|
|
|
|
if(trace)
|
|
|
|
runtime·printf("*** float D[%d]=S[%d] %x-%x\n",
|
|
|
|
regd, regm, m->freghi[regd], m->freglo[regd]);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 0xeeb80b40: // D[regd] = S[regm] (MOVWD.U)
|
|
|
|
runtime·fintto64c(m->freglo[regm], &uval);
|
|
|
|
putd(regd, uval);
|
|
|
|
|
|
|
|
if(trace)
|
|
|
|
runtime·printf("*** float unsigned D[%d]=S[%d] %x-%x\n",
|
|
|
|
regd, regm, m->freghi[regd], m->freglo[regd]);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return 1;
|
2009-12-17 17:08:42 -07:00
|
|
|
|
2010-12-09 15:45:27 -07:00
|
|
|
done:
|
|
|
|
if((i&0xff000000) == 0xee000000 ||
|
|
|
|
(i&0xff000000) == 0xed000000) {
|
|
|
|
runtime·printf("stepflt %p %x\n", pc, i);
|
|
|
|
fabort();
|
|
|
|
}
|
2009-12-17 17:08:42 -07:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
#pragma textflag 7
|
|
|
|
uint32*
|
runtime: ,s/[a-zA-Z0-9_]+/runtime·&/g, almost
Prefix all external symbols in runtime by runtime·,
to avoid conflicts with possible symbols of the same
name in linked-in C libraries. The obvious conflicts
are printf, malloc, and free, but hide everything to
avoid future pain.
The symbols left alone are:
** known to cgo **
_cgo_free
_cgo_malloc
libcgo_thread_start
initcgo
ncgocall
** known to linker **
_rt0_$GOARCH
_rt0_$GOARCH_$GOOS
text
etext
data
end
pclntab
epclntab
symtab
esymtab
** known to C compiler **
_divv
_modv
_div64by32
etc (arch specific)
Tested on darwin/386, darwin/amd64, linux/386, linux/amd64.
Built (but not tested) for freebsd/386, freebsd/amd64, linux/arm, windows/386.
R=r, PeterGo
CC=golang-dev
https://golang.org/cl/2899041
2010-11-04 12:00:19 -06:00
|
|
|
runtime·_sfloat2(uint32 *lr, uint32 r0)
|
2009-12-17 17:08:42 -07:00
|
|
|
{
|
|
|
|
uint32 skip;
|
2010-04-05 13:51:09 -06:00
|
|
|
|
2010-12-09 15:45:27 -07:00
|
|
|
skip = stepflt(lr, &r0);
|
2011-04-17 12:16:26 -06:00
|
|
|
if(skip == 0) {
|
|
|
|
runtime·printf("sfloat2 %p %x\n", lr, *lr);
|
2010-12-09 15:45:27 -07:00
|
|
|
fabort(); // not ok to fail first instruction
|
2011-04-17 12:16:26 -06:00
|
|
|
}
|
2010-12-09 15:45:27 -07:00
|
|
|
|
|
|
|
lr += skip;
|
2010-11-10 16:23:20 -07:00
|
|
|
while(skip = stepflt(lr, &r0))
|
2009-12-17 17:08:42 -07:00
|
|
|
lr += skip;
|
|
|
|
return lr;
|
|
|
|
}
|