1
0
mirror of https://github.com/golang/go synced 2024-11-14 20:20:30 -07:00
go/src/cmd/5g/cgen.c

1387 lines
27 KiB
C
Raw Normal View History

// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include <u.h>
#include <libc.h>
#include "gg.h"
/*
* generate:
* res = n;
* simplifies and calls gmove.
*/
void
cgen(Node *n, Node *res)
{
Node *nl, *nr, *r;
Node n1, n2, n3, f0, f1;
int a, w;
Prog *p1, *p2, *p3;
Addr addr;
if(debug['g']) {
dump("\ncgen-n", n);
dump("cgen-res", res);
}
if(n == N || n->type == T)
goto ret;
if(res == N || res->type == T)
fatal("cgen: res nil");
switch(n->op) {
case OSLICE:
case OSLICEARR:
case OSLICESTR:
if (res->op != ONAME || !res->addable) {
tempname(&n1, n->type);
cgen_slice(n, &n1);
cgen(&n1, res);
} else
cgen_slice(n, res);
return;
}
while(n->op == OCONVNOP)
n = n->left;
if(n->ullman >= UINF) {
if(n->op == OINDREG)
fatal("cgen: this is going to misscompile");
if(res->ullman >= UINF) {
tempname(&n1, n->type);
cgen(n, &n1);
cgen(&n1, res);
goto ret;
}
}
if(isfat(n->type)) {
if(n->type->width < 0)
fatal("forgot to compute width for %T", n->type);
sgen(n, res, n->type->width);
goto ret;
}
// update addressability for string, slice
// can't do in walk because n->left->addable
// changes if n->left is an escaping local variable.
switch(n->op) {
case OLEN:
if(isslice(n->left->type) || istype(n->left->type, TSTRING))
n->addable = n->left->addable;
break;
case OCAP:
if(isslice(n->left->type))
n->addable = n->left->addable;
break;
case OITAB:
n->addable = n->left->addable;
break;
}
// if both are addressable, move
if(n->addable && res->addable) {
if(is64(n->type) || is64(res->type) ||
n->op == OREGISTER || res->op == OREGISTER ||
iscomplex[n->type->etype] || iscomplex[res->type->etype]) {
gmove(n, res);
} else {
regalloc(&n1, n->type, N);
gmove(n, &n1);
cgen(&n1, res);
regfree(&n1);
}
goto ret;
}
// if both are not addressable, use a temporary.
if(!n->addable && !res->addable) {
// could use regalloc here sometimes,
// but have to check for ullman >= UINF.
tempname(&n1, n->type);
cgen(n, &n1);
cgen(&n1, res);
return;
}
// if result is not addressable directly but n is,
// compute its address and then store via the address.
if(!res->addable) {
igen(res, &n1, N);
cgen(n, &n1);
regfree(&n1);
return;
}
if(complexop(n, res)) {
complexgen(n, res);
return;
}
// if n is sudoaddable generate addr and move
if (!is64(n->type) && !is64(res->type) && !iscomplex[n->type->etype] && !iscomplex[res->type->etype]) {
a = optoas(OAS, n->type);
if(sudoaddable(a, n, &addr, &w)) {
if (res->op != OREGISTER) {
regalloc(&n2, res->type, N);
p1 = gins(a, N, &n2);
p1->from = addr;
if(debug['g'])
print("%P [ignore previous line]\n", p1);
gmove(&n2, res);
regfree(&n2);
} else {
p1 = gins(a, N, res);
p1->from = addr;
if(debug['g'])
print("%P [ignore previous line]\n", p1);
}
sudoclean();
goto ret;
}
}
// otherwise, the result is addressable but n is not.
// let's do some computation.
nl = n->left;
nr = n->right;
if(nl != N && nl->ullman >= UINF)
if(nr != N && nr->ullman >= UINF) {
tempname(&n1, nl->type);
cgen(nl, &n1);
n2 = *n;
n2.left = &n1;
cgen(&n2, res);
goto ret;
}
// 64-bit ops are hard on 32-bit machine.
if(is64(n->type) || is64(res->type) || n->left != N && is64(n->left->type)) {
switch(n->op) {
// math goes to cgen64.
case OMINUS:
case OCOM:
case OADD:
case OSUB:
case OMUL:
case OLROT:
case OLSH:
case ORSH:
case OAND:
case OOR:
case OXOR:
cgen64(n, res);
return;
}
}
if(nl != N && isfloat[n->type->etype] && isfloat[nl->type->etype])
goto flt;
switch(n->op) {
default:
dump("cgen", n);
fatal("cgen: unknown op %N", n);
break;
case OREAL:
case OIMAG:
case OCOMPLEX:
fatal("unexpected complex");
break;
// these call bgen to get a bool value
case OOROR:
case OANDAND:
case OEQ:
case ONE:
case OLT:
case OLE:
case OGE:
case OGT:
case ONOT:
cmd/gc: contiguous loop layout Drop expecttaken function in favor of extra argument to gbranch and bgen. Mark loop condition as likely to be true, so that loops are generated inline. The main benefit here is contiguous code when trying to read the generated assembly. It has only minor effects on the timing, and they mostly cancel the minor effects that aligning function entry points had. One exception: both changes made Fannkuch faster. Compared to before CL 6244066 (before aligned functions) benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4222117400 4201958800 -0.48% BenchmarkFannkuch11 3462631800 3215908600 -7.13% BenchmarkGobDecode 20887622 20899164 +0.06% BenchmarkGobEncode 9548772 9439083 -1.15% BenchmarkGzip 151687 152060 +0.25% BenchmarkGunzip 8742 8711 -0.35% BenchmarkJSONEncode 62730560 62686700 -0.07% BenchmarkJSONDecode 252569180 252368960 -0.08% BenchmarkMandelbrot200 5267599 5252531 -0.29% BenchmarkRevcomp25M 980813500 985248400 +0.45% BenchmarkTemplate 361259100 357414680 -1.06% Compared to tip (aligned functions): benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4140739800 4201958800 +1.48% BenchmarkFannkuch11 3259914400 3215908600 -1.35% BenchmarkGobDecode 20620222 20899164 +1.35% BenchmarkGobEncode 9384886 9439083 +0.58% BenchmarkGzip 150333 152060 +1.15% BenchmarkGunzip 8741 8711 -0.34% BenchmarkJSONEncode 65210990 62686700 -3.87% BenchmarkJSONDecode 249394860 252368960 +1.19% BenchmarkMandelbrot200 5273394 5252531 -0.40% BenchmarkRevcomp25M 996013800 985248400 -1.08% BenchmarkTemplate 360620840 357414680 -0.89% R=ken2 CC=golang-dev https://golang.org/cl/6245069
2012-05-30 16:07:39 -06:00
p1 = gbranch(AB, T, 0);
p2 = pc;
gmove(nodbool(1), res);
cmd/gc: contiguous loop layout Drop expecttaken function in favor of extra argument to gbranch and bgen. Mark loop condition as likely to be true, so that loops are generated inline. The main benefit here is contiguous code when trying to read the generated assembly. It has only minor effects on the timing, and they mostly cancel the minor effects that aligning function entry points had. One exception: both changes made Fannkuch faster. Compared to before CL 6244066 (before aligned functions) benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4222117400 4201958800 -0.48% BenchmarkFannkuch11 3462631800 3215908600 -7.13% BenchmarkGobDecode 20887622 20899164 +0.06% BenchmarkGobEncode 9548772 9439083 -1.15% BenchmarkGzip 151687 152060 +0.25% BenchmarkGunzip 8742 8711 -0.35% BenchmarkJSONEncode 62730560 62686700 -0.07% BenchmarkJSONDecode 252569180 252368960 -0.08% BenchmarkMandelbrot200 5267599 5252531 -0.29% BenchmarkRevcomp25M 980813500 985248400 +0.45% BenchmarkTemplate 361259100 357414680 -1.06% Compared to tip (aligned functions): benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4140739800 4201958800 +1.48% BenchmarkFannkuch11 3259914400 3215908600 -1.35% BenchmarkGobDecode 20620222 20899164 +1.35% BenchmarkGobEncode 9384886 9439083 +0.58% BenchmarkGzip 150333 152060 +1.15% BenchmarkGunzip 8741 8711 -0.34% BenchmarkJSONEncode 65210990 62686700 -3.87% BenchmarkJSONDecode 249394860 252368960 +1.19% BenchmarkMandelbrot200 5273394 5252531 -0.40% BenchmarkRevcomp25M 996013800 985248400 -1.08% BenchmarkTemplate 360620840 357414680 -0.89% R=ken2 CC=golang-dev https://golang.org/cl/6245069
2012-05-30 16:07:39 -06:00
p3 = gbranch(AB, T, 0);
patch(p1, pc);
cmd/gc: contiguous loop layout Drop expecttaken function in favor of extra argument to gbranch and bgen. Mark loop condition as likely to be true, so that loops are generated inline. The main benefit here is contiguous code when trying to read the generated assembly. It has only minor effects on the timing, and they mostly cancel the minor effects that aligning function entry points had. One exception: both changes made Fannkuch faster. Compared to before CL 6244066 (before aligned functions) benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4222117400 4201958800 -0.48% BenchmarkFannkuch11 3462631800 3215908600 -7.13% BenchmarkGobDecode 20887622 20899164 +0.06% BenchmarkGobEncode 9548772 9439083 -1.15% BenchmarkGzip 151687 152060 +0.25% BenchmarkGunzip 8742 8711 -0.35% BenchmarkJSONEncode 62730560 62686700 -0.07% BenchmarkJSONDecode 252569180 252368960 -0.08% BenchmarkMandelbrot200 5267599 5252531 -0.29% BenchmarkRevcomp25M 980813500 985248400 +0.45% BenchmarkTemplate 361259100 357414680 -1.06% Compared to tip (aligned functions): benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4140739800 4201958800 +1.48% BenchmarkFannkuch11 3259914400 3215908600 -1.35% BenchmarkGobDecode 20620222 20899164 +1.35% BenchmarkGobEncode 9384886 9439083 +0.58% BenchmarkGzip 150333 152060 +1.15% BenchmarkGunzip 8741 8711 -0.34% BenchmarkJSONEncode 65210990 62686700 -3.87% BenchmarkJSONDecode 249394860 252368960 +1.19% BenchmarkMandelbrot200 5273394 5252531 -0.40% BenchmarkRevcomp25M 996013800 985248400 -1.08% BenchmarkTemplate 360620840 357414680 -0.89% R=ken2 CC=golang-dev https://golang.org/cl/6245069
2012-05-30 16:07:39 -06:00
bgen(n, 1, 0, p2);
gmove(nodbool(0), res);
patch(p3, pc);
goto ret;
case OPLUS:
cgen(nl, res);
goto ret;
// unary
case OCOM:
a = optoas(OXOR, nl->type);
regalloc(&n1, nl->type, N);
cgen(nl, &n1);
nodconst(&n2, nl->type, -1);
gins(a, &n2, &n1);
gmove(&n1, res);
regfree(&n1);
goto ret;
case OMINUS:
regalloc(&n1, nl->type, N);
cgen(nl, &n1);
nodconst(&n3, nl->type, 0);
regalloc(&n2, nl->type, res);
gmove(&n3, &n2);
gins(optoas(OSUB, nl->type), &n1, &n2);
gmove(&n2, res);
regfree(&n1);
regfree(&n2);
goto ret;
// symmetric binary
case OAND:
case OOR:
case OXOR:
case OADD:
case OMUL:
a = optoas(n->op, nl->type);
goto sbop;
// asymmetric binary
case OSUB:
a = optoas(n->op, nl->type);
goto abop;
case OLROT:
case OLSH:
case ORSH:
cgen_shift(n->op, n->bounded, nl, nr, res);
break;
case OCONV:
if(eqtype(n->type, nl->type) || noconv(n->type, nl->type)) {
cgen(nl, res);
break;
}
if(nl->addable && !is64(nl->type)) {
regalloc(&n1, nl->type, res);
gmove(nl, &n1);
} else {
if(n->type->width > widthptr || is64(nl->type) || isfloat[nl->type->etype])
tempname(&n1, nl->type);
else
regalloc(&n1, nl->type, res);
cgen(nl, &n1);
}
if(n->type->width > widthptr || is64(n->type) || isfloat[n->type->etype])
tempname(&n2, n->type);
else
regalloc(&n2, n->type, N);
gmove(&n1, &n2);
gmove(&n2, res);
if(n1.op == OREGISTER)
regfree(&n1);
if(n2.op == OREGISTER)
regfree(&n2);
break;
case ODOT:
case ODOTPTR:
case OINDEX:
case OIND:
case ONAME: // PHEAP or PPARAMREF var
igen(n, &n1, res);
gmove(&n1, res);
regfree(&n1);
break;
case OITAB:
// itable of interface value
igen(nl, &n1, res);
n1.op = OREGISTER; // was OINDREG
regalloc(&n2, n->type, &n1);
n1.op = OINDREG;
n1.type = n->type;
n1.xoffset = 0;
gmove(&n1, &n2);
gmove(&n2, res);
regfree(&n1);
regfree(&n2);
break;
case OLEN:
if(istype(nl->type, TMAP) || istype(nl->type, TCHAN)) {
// map has len in the first 32-bit word.
// a zero pointer means zero length
regalloc(&n1, types[tptr], res);
cgen(nl, &n1);
nodconst(&n2, types[tptr], 0);
regalloc(&n3, n2.type, N);
gmove(&n2, &n3);
gcmp(optoas(OCMP, types[tptr]), &n1, &n3);
regfree(&n3);
cmd/gc: contiguous loop layout Drop expecttaken function in favor of extra argument to gbranch and bgen. Mark loop condition as likely to be true, so that loops are generated inline. The main benefit here is contiguous code when trying to read the generated assembly. It has only minor effects on the timing, and they mostly cancel the minor effects that aligning function entry points had. One exception: both changes made Fannkuch faster. Compared to before CL 6244066 (before aligned functions) benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4222117400 4201958800 -0.48% BenchmarkFannkuch11 3462631800 3215908600 -7.13% BenchmarkGobDecode 20887622 20899164 +0.06% BenchmarkGobEncode 9548772 9439083 -1.15% BenchmarkGzip 151687 152060 +0.25% BenchmarkGunzip 8742 8711 -0.35% BenchmarkJSONEncode 62730560 62686700 -0.07% BenchmarkJSONDecode 252569180 252368960 -0.08% BenchmarkMandelbrot200 5267599 5252531 -0.29% BenchmarkRevcomp25M 980813500 985248400 +0.45% BenchmarkTemplate 361259100 357414680 -1.06% Compared to tip (aligned functions): benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4140739800 4201958800 +1.48% BenchmarkFannkuch11 3259914400 3215908600 -1.35% BenchmarkGobDecode 20620222 20899164 +1.35% BenchmarkGobEncode 9384886 9439083 +0.58% BenchmarkGzip 150333 152060 +1.15% BenchmarkGunzip 8741 8711 -0.34% BenchmarkJSONEncode 65210990 62686700 -3.87% BenchmarkJSONDecode 249394860 252368960 +1.19% BenchmarkMandelbrot200 5273394 5252531 -0.40% BenchmarkRevcomp25M 996013800 985248400 -1.08% BenchmarkTemplate 360620840 357414680 -0.89% R=ken2 CC=golang-dev https://golang.org/cl/6245069
2012-05-30 16:07:39 -06:00
p1 = gbranch(optoas(OEQ, types[tptr]), T, -1);
n2 = n1;
n2.op = OINDREG;
n2.type = types[TINT32];
gmove(&n2, &n1);
patch(p1, pc);
gmove(&n1, res);
regfree(&n1);
break;
}
if(istype(nl->type, TSTRING) || isslice(nl->type)) {
// both slice and string have len one pointer into the struct.
igen(nl, &n1, res);
n1.op = OREGISTER; // was OINDREG
regalloc(&n2, types[TUINT32], &n1);
n1.op = OINDREG;
n1.type = types[TUINT32];
n1.xoffset = Array_nel;
gmove(&n1, &n2);
gmove(&n2, res);
regfree(&n1);
regfree(&n2);
break;
}
fatal("cgen: OLEN: unknown type %lT", nl->type);
break;
case OCAP:
if(istype(nl->type, TCHAN)) {
// chan has cap in the second 32-bit word.
// a zero pointer means zero length
regalloc(&n1, types[tptr], res);
cgen(nl, &n1);
nodconst(&n2, types[tptr], 0);
regalloc(&n3, n2.type, N);
gmove(&n2, &n3);
gcmp(optoas(OCMP, types[tptr]), &n1, &n3);
regfree(&n3);
cmd/gc: contiguous loop layout Drop expecttaken function in favor of extra argument to gbranch and bgen. Mark loop condition as likely to be true, so that loops are generated inline. The main benefit here is contiguous code when trying to read the generated assembly. It has only minor effects on the timing, and they mostly cancel the minor effects that aligning function entry points had. One exception: both changes made Fannkuch faster. Compared to before CL 6244066 (before aligned functions) benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4222117400 4201958800 -0.48% BenchmarkFannkuch11 3462631800 3215908600 -7.13% BenchmarkGobDecode 20887622 20899164 +0.06% BenchmarkGobEncode 9548772 9439083 -1.15% BenchmarkGzip 151687 152060 +0.25% BenchmarkGunzip 8742 8711 -0.35% BenchmarkJSONEncode 62730560 62686700 -0.07% BenchmarkJSONDecode 252569180 252368960 -0.08% BenchmarkMandelbrot200 5267599 5252531 -0.29% BenchmarkRevcomp25M 980813500 985248400 +0.45% BenchmarkTemplate 361259100 357414680 -1.06% Compared to tip (aligned functions): benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4140739800 4201958800 +1.48% BenchmarkFannkuch11 3259914400 3215908600 -1.35% BenchmarkGobDecode 20620222 20899164 +1.35% BenchmarkGobEncode 9384886 9439083 +0.58% BenchmarkGzip 150333 152060 +1.15% BenchmarkGunzip 8741 8711 -0.34% BenchmarkJSONEncode 65210990 62686700 -3.87% BenchmarkJSONDecode 249394860 252368960 +1.19% BenchmarkMandelbrot200 5273394 5252531 -0.40% BenchmarkRevcomp25M 996013800 985248400 -1.08% BenchmarkTemplate 360620840 357414680 -0.89% R=ken2 CC=golang-dev https://golang.org/cl/6245069
2012-05-30 16:07:39 -06:00
p1 = gbranch(optoas(OEQ, types[tptr]), T, -1);
n2 = n1;
n2.op = OINDREG;
n2.xoffset = 4;
n2.type = types[TINT32];
gmove(&n2, &n1);
patch(p1, pc);
gmove(&n1, res);
regfree(&n1);
break;
}
if(isslice(nl->type)) {
regalloc(&n1, types[tptr], res);
agen(nl, &n1);
n1.op = OINDREG;
n1.type = types[TUINT32];
n1.xoffset = Array_cap;
gmove(&n1, res);
regfree(&n1);
break;
}
fatal("cgen: OCAP: unknown type %lT", nl->type);
break;
case OADDR:
agen(nl, res);
break;
case OCALLMETH:
cgen_callmeth(n, 0);
cgen_callret(n, res);
break;
case OCALLINTER:
cgen_callinter(n, res, 0);
cgen_callret(n, res);
break;
case OCALLFUNC:
cgen_call(n, 0);
cgen_callret(n, res);
break;
case OMOD:
case ODIV:
a = optoas(n->op, nl->type);
goto abop;
}
goto ret;
sbop: // symmetric binary
if(nl->ullman < nr->ullman) {
r = nl;
nl = nr;
nr = r;
}
abop: // asymmetric binary
// TODO(kaib): use fewer registers here.
if(nl->ullman >= nr->ullman) {
regalloc(&n1, nl->type, res);
cgen(nl, &n1);
regalloc(&n2, nr->type, N);
cgen(nr, &n2);
} else {
regalloc(&n2, nr->type, res);
cgen(nr, &n2);
regalloc(&n1, nl->type, N);
cgen(nl, &n1);
}
gins(a, &n2, &n1);
gmove(&n1, res);
regfree(&n1);
regfree(&n2);
goto ret;
flt: // floating-point.
regalloc(&f0, nl->type, res);
if(nr != N)
goto flt2;
if(n->op == OMINUS) {
nr = nodintconst(-1);
convlit(&nr, n->type);
n->op = OMUL;
goto flt2;
}
// unary
cgen(nl, &f0);
if(n->op != OCONV && n->op != OPLUS)
gins(optoas(n->op, n->type), &f0, &f0);
gmove(&f0, res);
regfree(&f0);
goto ret;
flt2: // binary
if(nl->ullman >= nr->ullman) {
cgen(nl, &f0);
regalloc(&f1, n->type, N);
gmove(&f0, &f1);
cgen(nr, &f0);
gins(optoas(n->op, n->type), &f0, &f1);
} else {
cgen(nr, &f0);
regalloc(&f1, n->type, N);
cgen(nl, &f1);
gins(optoas(n->op, n->type), &f0, &f1);
}
gmove(&f1, res);
regfree(&f0);
regfree(&f1);
goto ret;
ret:
;
}
/*
* generate array index into res.
* n might be any size; res is 32-bit.
* returns Prog* to patch to panic call.
*/
Prog*
cgenindex(Node *n, Node *res)
{
Node tmp, lo, hi, zero, n1, n2;
if(!is64(n->type)) {
cgen(n, res);
return nil;
}
tempname(&tmp, types[TINT64]);
cgen(n, &tmp);
split64(&tmp, &lo, &hi);
gmove(&lo, res);
if(debug['B']) {
splitclean();
return nil;
}
regalloc(&n1, types[TINT32], N);
regalloc(&n2, types[TINT32], N);
nodconst(&zero, types[TINT32], 0);
gmove(&hi, &n1);
gmove(&zero, &n2);
gcmp(ACMP, &n1, &n2);
regfree(&n2);
regfree(&n1);
splitclean();
cmd/gc: contiguous loop layout Drop expecttaken function in favor of extra argument to gbranch and bgen. Mark loop condition as likely to be true, so that loops are generated inline. The main benefit here is contiguous code when trying to read the generated assembly. It has only minor effects on the timing, and they mostly cancel the minor effects that aligning function entry points had. One exception: both changes made Fannkuch faster. Compared to before CL 6244066 (before aligned functions) benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4222117400 4201958800 -0.48% BenchmarkFannkuch11 3462631800 3215908600 -7.13% BenchmarkGobDecode 20887622 20899164 +0.06% BenchmarkGobEncode 9548772 9439083 -1.15% BenchmarkGzip 151687 152060 +0.25% BenchmarkGunzip 8742 8711 -0.35% BenchmarkJSONEncode 62730560 62686700 -0.07% BenchmarkJSONDecode 252569180 252368960 -0.08% BenchmarkMandelbrot200 5267599 5252531 -0.29% BenchmarkRevcomp25M 980813500 985248400 +0.45% BenchmarkTemplate 361259100 357414680 -1.06% Compared to tip (aligned functions): benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4140739800 4201958800 +1.48% BenchmarkFannkuch11 3259914400 3215908600 -1.35% BenchmarkGobDecode 20620222 20899164 +1.35% BenchmarkGobEncode 9384886 9439083 +0.58% BenchmarkGzip 150333 152060 +1.15% BenchmarkGunzip 8741 8711 -0.34% BenchmarkJSONEncode 65210990 62686700 -3.87% BenchmarkJSONDecode 249394860 252368960 +1.19% BenchmarkMandelbrot200 5273394 5252531 -0.40% BenchmarkRevcomp25M 996013800 985248400 -1.08% BenchmarkTemplate 360620840 357414680 -0.89% R=ken2 CC=golang-dev https://golang.org/cl/6245069
2012-05-30 16:07:39 -06:00
return gbranch(ABNE, T, -1);
}
/*
* generate:
* res = &n;
*/
void
agen(Node *n, Node *res)
{
Node *nl, *nr;
Node n1, n2, n3, n4, n5, tmp;
Prog *p1, *p2;
uint32 w;
uint64 v;
int r;
if(debug['g']) {
dump("\nagen-res", res);
dump("agen-r", n);
}
if(n == N || n->type == T || res == N || res->type == T)
fatal("agen");
while(n->op == OCONVNOP)
n = n->left;
if(n->addable) {
memset(&n1, 0, sizeof n1);
n1.op = OADDR;
n1.left = n;
regalloc(&n2, types[tptr], res);
gins(AMOVW, &n1, &n2);
gmove(&n2, res);
regfree(&n2);
goto ret;
}
nl = n->left;
nr = n->right;
switch(n->op) {
default:
fatal("agen: unknown op %N", n);
break;
case OCALLMETH:
case OCALLFUNC:
// Release res so that it is available for cgen_call.
// Pick it up again after the call.
r = -1;
if(n->ullman >= UINF) {
if(res->op == OREGISTER || res->op == OINDREG) {
r = res->val.u.reg;
reg[r]--;
}
}
if(n->op == OCALLMETH)
cgen_callmeth(n, 0);
else
cgen_call(n, 0);
if(r >= 0)
reg[r]++;
cgen_aret(n, res);
break;
case OCALLINTER:
cgen_callinter(n, res, 0);
cgen_aret(n, res);
break;
case OSLICE:
case OSLICEARR:
case OSLICESTR:
tempname(&n1, n->type);
cgen_slice(n, &n1);
agen(&n1, res);
break;
case OINDEX:
p2 = nil; // to be patched to panicindex.
w = n->type->width;
if(nr->addable) {
if(!isconst(nr, CTINT))
tempname(&tmp, types[TINT32]);
if(!isconst(nl, CTSTR))
agenr(nl, &n3, res);
if(!isconst(nr, CTINT)) {
p2 = cgenindex(nr, &tmp);
regalloc(&n1, tmp.type, N);
gmove(&tmp, &n1);
}
} else
if(nl->addable) {
if(!isconst(nr, CTINT)) {
tempname(&tmp, types[TINT32]);
p2 = cgenindex(nr, &tmp);
regalloc(&n1, tmp.type, N);
gmove(&tmp, &n1);
}
if(!isconst(nl, CTSTR)) {
regalloc(&n3, types[tptr], res);
agen(nl, &n3);
}
} else {
tempname(&tmp, types[TINT32]);
p2 = cgenindex(nr, &tmp);
nr = &tmp;
if(!isconst(nl, CTSTR))
agenr(nl, &n3, res);
regalloc(&n1, tmp.type, N);
gins(optoas(OAS, tmp.type), &tmp, &n1);
}
// &a is in &n3 (allocated in res)
// i is in &n1 (if not constant)
// w is width
// constant index
if(isconst(nr, CTINT)) {
if(isconst(nl, CTSTR))
fatal("constant string constant index");
v = mpgetfix(nr->val.u.xval);
if(isslice(nl->type) || nl->type->etype == TSTRING) {
if(!debug['B'] && !n->bounded) {
n1 = n3;
n1.op = OINDREG;
n1.type = types[tptr];
n1.xoffset = Array_nel;
regalloc(&n4, n1.type, N);
cgen(&n1, &n4);
nodconst(&n2, types[TUINT32], v);
regalloc(&n5, n2.type, N);
gmove(&n2, &n5);
gcmp(optoas(OCMP, types[TUINT32]), &n4, &n5);
regfree(&n4);
regfree(&n5);
cmd/gc: contiguous loop layout Drop expecttaken function in favor of extra argument to gbranch and bgen. Mark loop condition as likely to be true, so that loops are generated inline. The main benefit here is contiguous code when trying to read the generated assembly. It has only minor effects on the timing, and they mostly cancel the minor effects that aligning function entry points had. One exception: both changes made Fannkuch faster. Compared to before CL 6244066 (before aligned functions) benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4222117400 4201958800 -0.48% BenchmarkFannkuch11 3462631800 3215908600 -7.13% BenchmarkGobDecode 20887622 20899164 +0.06% BenchmarkGobEncode 9548772 9439083 -1.15% BenchmarkGzip 151687 152060 +0.25% BenchmarkGunzip 8742 8711 -0.35% BenchmarkJSONEncode 62730560 62686700 -0.07% BenchmarkJSONDecode 252569180 252368960 -0.08% BenchmarkMandelbrot200 5267599 5252531 -0.29% BenchmarkRevcomp25M 980813500 985248400 +0.45% BenchmarkTemplate 361259100 357414680 -1.06% Compared to tip (aligned functions): benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4140739800 4201958800 +1.48% BenchmarkFannkuch11 3259914400 3215908600 -1.35% BenchmarkGobDecode 20620222 20899164 +1.35% BenchmarkGobEncode 9384886 9439083 +0.58% BenchmarkGzip 150333 152060 +1.15% BenchmarkGunzip 8741 8711 -0.34% BenchmarkJSONEncode 65210990 62686700 -3.87% BenchmarkJSONDecode 249394860 252368960 +1.19% BenchmarkMandelbrot200 5273394 5252531 -0.40% BenchmarkRevcomp25M 996013800 985248400 -1.08% BenchmarkTemplate 360620840 357414680 -0.89% R=ken2 CC=golang-dev https://golang.org/cl/6245069
2012-05-30 16:07:39 -06:00
p1 = gbranch(optoas(OGT, types[TUINT32]), T, +1);
ginscall(panicindex, 0);
patch(p1, pc);
}
n1 = n3;
n1.op = OINDREG;
n1.type = types[tptr];
n1.xoffset = Array_array;
gmove(&n1, &n3);
}
nodconst(&n2, types[tptr], v*w);
regalloc(&n4, n2.type, N);
gmove(&n2, &n4);
gins(optoas(OADD, types[tptr]), &n4, &n3);
regfree(&n4);
gmove(&n3, res);
regfree(&n3);
break;
}
regalloc(&n2, types[TINT32], &n1); // i
gmove(&n1, &n2);
regfree(&n1);
if(!debug['B'] && !n->bounded) {
// check bounds
regalloc(&n4, types[TUINT32], N);
if(isconst(nl, CTSTR)) {
nodconst(&n1, types[TUINT32], nl->val.u.sval->len);
gmove(&n1, &n4);
} else if(isslice(nl->type) || nl->type->etype == TSTRING) {
n1 = n3;
n1.op = OINDREG;
n1.type = types[tptr];
n1.xoffset = Array_nel;
cgen(&n1, &n4);
} else {
nodconst(&n1, types[TUINT32], nl->type->bound);
gmove(&n1, &n4);
}
gcmp(optoas(OCMP, types[TUINT32]), &n2, &n4);
regfree(&n4);
cmd/gc: contiguous loop layout Drop expecttaken function in favor of extra argument to gbranch and bgen. Mark loop condition as likely to be true, so that loops are generated inline. The main benefit here is contiguous code when trying to read the generated assembly. It has only minor effects on the timing, and they mostly cancel the minor effects that aligning function entry points had. One exception: both changes made Fannkuch faster. Compared to before CL 6244066 (before aligned functions) benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4222117400 4201958800 -0.48% BenchmarkFannkuch11 3462631800 3215908600 -7.13% BenchmarkGobDecode 20887622 20899164 +0.06% BenchmarkGobEncode 9548772 9439083 -1.15% BenchmarkGzip 151687 152060 +0.25% BenchmarkGunzip 8742 8711 -0.35% BenchmarkJSONEncode 62730560 62686700 -0.07% BenchmarkJSONDecode 252569180 252368960 -0.08% BenchmarkMandelbrot200 5267599 5252531 -0.29% BenchmarkRevcomp25M 980813500 985248400 +0.45% BenchmarkTemplate 361259100 357414680 -1.06% Compared to tip (aligned functions): benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4140739800 4201958800 +1.48% BenchmarkFannkuch11 3259914400 3215908600 -1.35% BenchmarkGobDecode 20620222 20899164 +1.35% BenchmarkGobEncode 9384886 9439083 +0.58% BenchmarkGzip 150333 152060 +1.15% BenchmarkGunzip 8741 8711 -0.34% BenchmarkJSONEncode 65210990 62686700 -3.87% BenchmarkJSONDecode 249394860 252368960 +1.19% BenchmarkMandelbrot200 5273394 5252531 -0.40% BenchmarkRevcomp25M 996013800 985248400 -1.08% BenchmarkTemplate 360620840 357414680 -0.89% R=ken2 CC=golang-dev https://golang.org/cl/6245069
2012-05-30 16:07:39 -06:00
p1 = gbranch(optoas(OLT, types[TUINT32]), T, +1);
if(p2)
patch(p2, pc);
ginscall(panicindex, 0);
patch(p1, pc);
}
if(isconst(nl, CTSTR)) {
regalloc(&n3, types[tptr], res);
p1 = gins(AMOVW, N, &n3);
datastring(nl->val.u.sval->s, nl->val.u.sval->len, &p1->from);
p1->from.type = D_CONST;
} else
if(isslice(nl->type) || nl->type->etype == TSTRING) {
n1 = n3;
n1.op = OINDREG;
n1.type = types[tptr];
n1.xoffset = Array_array;
gmove(&n1, &n3);
}
if(w == 0) {
// nothing to do
} else if(w == 1 || w == 2 || w == 4 || w == 8) {
memset(&n4, 0, sizeof n4);
n4.op = OADDR;
n4.left = &n2;
cgen(&n4, &n3);
if (w == 1)
gins(AADD, &n2, &n3);
else if(w == 2)
gshift(AADD, &n2, SHIFT_LL, 1, &n3);
else if(w == 4)
gshift(AADD, &n2, SHIFT_LL, 2, &n3);
else if(w == 8)
gshift(AADD, &n2, SHIFT_LL, 3, &n3);
} else {
regalloc(&n4, types[TUINT32], N);
nodconst(&n1, types[TUINT32], w);
gmove(&n1, &n4);
gins(optoas(OMUL, types[TUINT32]), &n4, &n2);
gins(optoas(OADD, types[tptr]), &n2, &n3);
regfree(&n4);
gmove(&n3, res);
}
gmove(&n3, res);
regfree(&n2);
regfree(&n3);
break;
case ONAME:
// should only get here with names in this func.
if(n->funcdepth > 0 && n->funcdepth != funcdepth) {
dump("bad agen", n);
fatal("agen: bad ONAME funcdepth %d != %d",
n->funcdepth, funcdepth);
}
// should only get here for heap vars or paramref
if(!(n->class & PHEAP) && n->class != PPARAMREF) {
dump("bad agen", n);
fatal("agen: bad ONAME class %#x", n->class);
}
cgen(n->heapaddr, res);
if(n->xoffset != 0) {
nodconst(&n1, types[TINT32], n->xoffset);
regalloc(&n2, n1.type, N);
regalloc(&n3, types[TINT32], N);
gmove(&n1, &n2);
gmove(res, &n3);
gins(optoas(OADD, types[tptr]), &n2, &n3);
gmove(&n3, res);
regfree(&n2);
regfree(&n3);
}
break;
case OIND:
cgen(nl, res);
break;
case ODOT:
agen(nl, res);
if(n->xoffset != 0) {
nodconst(&n1, types[TINT32], n->xoffset);
regalloc(&n2, n1.type, N);
regalloc(&n3, types[TINT32], N);
gmove(&n1, &n2);
gmove(res, &n3);
gins(optoas(OADD, types[tptr]), &n2, &n3);
gmove(&n3, res);
regfree(&n2);
regfree(&n3);
}
break;
case ODOTPTR:
cgen(nl, res);
if(n->xoffset != 0) {
// explicit check for nil if struct is large enough
// that we might derive too big a pointer.
if(nl->type->type->width >= unmappedzero) {
regalloc(&n1, types[tptr], N);
gmove(res, &n1);
p1 = gins(AMOVW, &n1, &n1);
p1->from.type = D_OREG;
p1->from.offset = 0;
regfree(&n1);
}
nodconst(&n1, types[TINT32], n->xoffset);
regalloc(&n2, n1.type, N);
regalloc(&n3, types[tptr], N);
gmove(&n1, &n2);
gmove(res, &n3);
gins(optoas(OADD, types[tptr]), &n2, &n3);
gmove(&n3, res);
regfree(&n2);
regfree(&n3);
}
break;
}
ret:
;
}
/*
* generate:
* newreg = &n;
* res = newreg
*
* on exit, a has been changed to be *newreg.
* caller must regfree(a).
*/
void
igen(Node *n, Node *a, Node *res)
{
regalloc(a, types[tptr], res);
agen(n, a);
a->op = OINDREG;
a->type = n->type;
}
/*
* generate:
* newreg = &n;
*
* caller must regfree(a).
*/
void
agenr(Node *n, Node *a, Node *res)
{
regalloc(a, types[tptr], res);
agen(n, a);
}
void
cmd/gc: contiguous loop layout Drop expecttaken function in favor of extra argument to gbranch and bgen. Mark loop condition as likely to be true, so that loops are generated inline. The main benefit here is contiguous code when trying to read the generated assembly. It has only minor effects on the timing, and they mostly cancel the minor effects that aligning function entry points had. One exception: both changes made Fannkuch faster. Compared to before CL 6244066 (before aligned functions) benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4222117400 4201958800 -0.48% BenchmarkFannkuch11 3462631800 3215908600 -7.13% BenchmarkGobDecode 20887622 20899164 +0.06% BenchmarkGobEncode 9548772 9439083 -1.15% BenchmarkGzip 151687 152060 +0.25% BenchmarkGunzip 8742 8711 -0.35% BenchmarkJSONEncode 62730560 62686700 -0.07% BenchmarkJSONDecode 252569180 252368960 -0.08% BenchmarkMandelbrot200 5267599 5252531 -0.29% BenchmarkRevcomp25M 980813500 985248400 +0.45% BenchmarkTemplate 361259100 357414680 -1.06% Compared to tip (aligned functions): benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4140739800 4201958800 +1.48% BenchmarkFannkuch11 3259914400 3215908600 -1.35% BenchmarkGobDecode 20620222 20899164 +1.35% BenchmarkGobEncode 9384886 9439083 +0.58% BenchmarkGzip 150333 152060 +1.15% BenchmarkGunzip 8741 8711 -0.34% BenchmarkJSONEncode 65210990 62686700 -3.87% BenchmarkJSONDecode 249394860 252368960 +1.19% BenchmarkMandelbrot200 5273394 5252531 -0.40% BenchmarkRevcomp25M 996013800 985248400 -1.08% BenchmarkTemplate 360620840 357414680 -0.89% R=ken2 CC=golang-dev https://golang.org/cl/6245069
2012-05-30 16:07:39 -06:00
gencmp0(Node *n, Type *t, int o, int likely, Prog *to)
{
Node n1, n2, n3;
int a;
regalloc(&n1, t, N);
cgen(n, &n1);
a = optoas(OCMP, t);
if(a != ACMP) {
nodconst(&n2, t, 0);
regalloc(&n3, t, N);
gmove(&n2, &n3);
gcmp(a, &n1, &n3);
regfree(&n3);
} else
gins(ATST, &n1, N);
a = optoas(o, t);
cmd/gc: contiguous loop layout Drop expecttaken function in favor of extra argument to gbranch and bgen. Mark loop condition as likely to be true, so that loops are generated inline. The main benefit here is contiguous code when trying to read the generated assembly. It has only minor effects on the timing, and they mostly cancel the minor effects that aligning function entry points had. One exception: both changes made Fannkuch faster. Compared to before CL 6244066 (before aligned functions) benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4222117400 4201958800 -0.48% BenchmarkFannkuch11 3462631800 3215908600 -7.13% BenchmarkGobDecode 20887622 20899164 +0.06% BenchmarkGobEncode 9548772 9439083 -1.15% BenchmarkGzip 151687 152060 +0.25% BenchmarkGunzip 8742 8711 -0.35% BenchmarkJSONEncode 62730560 62686700 -0.07% BenchmarkJSONDecode 252569180 252368960 -0.08% BenchmarkMandelbrot200 5267599 5252531 -0.29% BenchmarkRevcomp25M 980813500 985248400 +0.45% BenchmarkTemplate 361259100 357414680 -1.06% Compared to tip (aligned functions): benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4140739800 4201958800 +1.48% BenchmarkFannkuch11 3259914400 3215908600 -1.35% BenchmarkGobDecode 20620222 20899164 +1.35% BenchmarkGobEncode 9384886 9439083 +0.58% BenchmarkGzip 150333 152060 +1.15% BenchmarkGunzip 8741 8711 -0.34% BenchmarkJSONEncode 65210990 62686700 -3.87% BenchmarkJSONDecode 249394860 252368960 +1.19% BenchmarkMandelbrot200 5273394 5252531 -0.40% BenchmarkRevcomp25M 996013800 985248400 -1.08% BenchmarkTemplate 360620840 357414680 -0.89% R=ken2 CC=golang-dev https://golang.org/cl/6245069
2012-05-30 16:07:39 -06:00
patch(gbranch(a, t, likely), to);
regfree(&n1);
}
/*
* generate:
* if(n == true) goto to;
*/
void
cmd/gc: contiguous loop layout Drop expecttaken function in favor of extra argument to gbranch and bgen. Mark loop condition as likely to be true, so that loops are generated inline. The main benefit here is contiguous code when trying to read the generated assembly. It has only minor effects on the timing, and they mostly cancel the minor effects that aligning function entry points had. One exception: both changes made Fannkuch faster. Compared to before CL 6244066 (before aligned functions) benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4222117400 4201958800 -0.48% BenchmarkFannkuch11 3462631800 3215908600 -7.13% BenchmarkGobDecode 20887622 20899164 +0.06% BenchmarkGobEncode 9548772 9439083 -1.15% BenchmarkGzip 151687 152060 +0.25% BenchmarkGunzip 8742 8711 -0.35% BenchmarkJSONEncode 62730560 62686700 -0.07% BenchmarkJSONDecode 252569180 252368960 -0.08% BenchmarkMandelbrot200 5267599 5252531 -0.29% BenchmarkRevcomp25M 980813500 985248400 +0.45% BenchmarkTemplate 361259100 357414680 -1.06% Compared to tip (aligned functions): benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4140739800 4201958800 +1.48% BenchmarkFannkuch11 3259914400 3215908600 -1.35% BenchmarkGobDecode 20620222 20899164 +1.35% BenchmarkGobEncode 9384886 9439083 +0.58% BenchmarkGzip 150333 152060 +1.15% BenchmarkGunzip 8741 8711 -0.34% BenchmarkJSONEncode 65210990 62686700 -3.87% BenchmarkJSONDecode 249394860 252368960 +1.19% BenchmarkMandelbrot200 5273394 5252531 -0.40% BenchmarkRevcomp25M 996013800 985248400 -1.08% BenchmarkTemplate 360620840 357414680 -0.89% R=ken2 CC=golang-dev https://golang.org/cl/6245069
2012-05-30 16:07:39 -06:00
bgen(Node *n, int true, int likely, Prog *to)
{
int et, a;
Node *nl, *nr, *r;
Node n1, n2, n3, n4, tmp;
NodeList *ll;
Prog *p1, *p2;
USED(n4); // in unreachable code below
if(debug['g']) {
dump("\nbgen", n);
}
if(n == N)
n = nodbool(1);
if(n->ninit != nil)
genlist(n->ninit);
if(n->type == T) {
convlit(&n, types[TBOOL]);
if(n->type == T)
goto ret;
}
et = n->type->etype;
if(et != TBOOL) {
yyerror("cgen: bad type %T for %O", n->type, n->op);
patch(gins(AEND, N, N), to);
goto ret;
}
nr = N;
switch(n->op) {
default:
a = ONE;
if(!true)
a = OEQ;
cmd/gc: contiguous loop layout Drop expecttaken function in favor of extra argument to gbranch and bgen. Mark loop condition as likely to be true, so that loops are generated inline. The main benefit here is contiguous code when trying to read the generated assembly. It has only minor effects on the timing, and they mostly cancel the minor effects that aligning function entry points had. One exception: both changes made Fannkuch faster. Compared to before CL 6244066 (before aligned functions) benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4222117400 4201958800 -0.48% BenchmarkFannkuch11 3462631800 3215908600 -7.13% BenchmarkGobDecode 20887622 20899164 +0.06% BenchmarkGobEncode 9548772 9439083 -1.15% BenchmarkGzip 151687 152060 +0.25% BenchmarkGunzip 8742 8711 -0.35% BenchmarkJSONEncode 62730560 62686700 -0.07% BenchmarkJSONDecode 252569180 252368960 -0.08% BenchmarkMandelbrot200 5267599 5252531 -0.29% BenchmarkRevcomp25M 980813500 985248400 +0.45% BenchmarkTemplate 361259100 357414680 -1.06% Compared to tip (aligned functions): benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4140739800 4201958800 +1.48% BenchmarkFannkuch11 3259914400 3215908600 -1.35% BenchmarkGobDecode 20620222 20899164 +1.35% BenchmarkGobEncode 9384886 9439083 +0.58% BenchmarkGzip 150333 152060 +1.15% BenchmarkGunzip 8741 8711 -0.34% BenchmarkJSONEncode 65210990 62686700 -3.87% BenchmarkJSONDecode 249394860 252368960 +1.19% BenchmarkMandelbrot200 5273394 5252531 -0.40% BenchmarkRevcomp25M 996013800 985248400 -1.08% BenchmarkTemplate 360620840 357414680 -0.89% R=ken2 CC=golang-dev https://golang.org/cl/6245069
2012-05-30 16:07:39 -06:00
gencmp0(n, n->type, a, likely, to);
goto ret;
case OLITERAL:
// need to ask if it is bool?
if(!true == !n->val.u.bval)
cmd/gc: contiguous loop layout Drop expecttaken function in favor of extra argument to gbranch and bgen. Mark loop condition as likely to be true, so that loops are generated inline. The main benefit here is contiguous code when trying to read the generated assembly. It has only minor effects on the timing, and they mostly cancel the minor effects that aligning function entry points had. One exception: both changes made Fannkuch faster. Compared to before CL 6244066 (before aligned functions) benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4222117400 4201958800 -0.48% BenchmarkFannkuch11 3462631800 3215908600 -7.13% BenchmarkGobDecode 20887622 20899164 +0.06% BenchmarkGobEncode 9548772 9439083 -1.15% BenchmarkGzip 151687 152060 +0.25% BenchmarkGunzip 8742 8711 -0.35% BenchmarkJSONEncode 62730560 62686700 -0.07% BenchmarkJSONDecode 252569180 252368960 -0.08% BenchmarkMandelbrot200 5267599 5252531 -0.29% BenchmarkRevcomp25M 980813500 985248400 +0.45% BenchmarkTemplate 361259100 357414680 -1.06% Compared to tip (aligned functions): benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4140739800 4201958800 +1.48% BenchmarkFannkuch11 3259914400 3215908600 -1.35% BenchmarkGobDecode 20620222 20899164 +1.35% BenchmarkGobEncode 9384886 9439083 +0.58% BenchmarkGzip 150333 152060 +1.15% BenchmarkGunzip 8741 8711 -0.34% BenchmarkJSONEncode 65210990 62686700 -3.87% BenchmarkJSONDecode 249394860 252368960 +1.19% BenchmarkMandelbrot200 5273394 5252531 -0.40% BenchmarkRevcomp25M 996013800 985248400 -1.08% BenchmarkTemplate 360620840 357414680 -0.89% R=ken2 CC=golang-dev https://golang.org/cl/6245069
2012-05-30 16:07:39 -06:00
patch(gbranch(AB, T, 0), to);
goto ret;
case OANDAND:
if(!true)
goto caseor;
caseand:
cmd/gc: contiguous loop layout Drop expecttaken function in favor of extra argument to gbranch and bgen. Mark loop condition as likely to be true, so that loops are generated inline. The main benefit here is contiguous code when trying to read the generated assembly. It has only minor effects on the timing, and they mostly cancel the minor effects that aligning function entry points had. One exception: both changes made Fannkuch faster. Compared to before CL 6244066 (before aligned functions) benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4222117400 4201958800 -0.48% BenchmarkFannkuch11 3462631800 3215908600 -7.13% BenchmarkGobDecode 20887622 20899164 +0.06% BenchmarkGobEncode 9548772 9439083 -1.15% BenchmarkGzip 151687 152060 +0.25% BenchmarkGunzip 8742 8711 -0.35% BenchmarkJSONEncode 62730560 62686700 -0.07% BenchmarkJSONDecode 252569180 252368960 -0.08% BenchmarkMandelbrot200 5267599 5252531 -0.29% BenchmarkRevcomp25M 980813500 985248400 +0.45% BenchmarkTemplate 361259100 357414680 -1.06% Compared to tip (aligned functions): benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4140739800 4201958800 +1.48% BenchmarkFannkuch11 3259914400 3215908600 -1.35% BenchmarkGobDecode 20620222 20899164 +1.35% BenchmarkGobEncode 9384886 9439083 +0.58% BenchmarkGzip 150333 152060 +1.15% BenchmarkGunzip 8741 8711 -0.34% BenchmarkJSONEncode 65210990 62686700 -3.87% BenchmarkJSONDecode 249394860 252368960 +1.19% BenchmarkMandelbrot200 5273394 5252531 -0.40% BenchmarkRevcomp25M 996013800 985248400 -1.08% BenchmarkTemplate 360620840 357414680 -0.89% R=ken2 CC=golang-dev https://golang.org/cl/6245069
2012-05-30 16:07:39 -06:00
p1 = gbranch(AB, T, 0);
p2 = gbranch(AB, T, 0);
patch(p1, pc);
cmd/gc: contiguous loop layout Drop expecttaken function in favor of extra argument to gbranch and bgen. Mark loop condition as likely to be true, so that loops are generated inline. The main benefit here is contiguous code when trying to read the generated assembly. It has only minor effects on the timing, and they mostly cancel the minor effects that aligning function entry points had. One exception: both changes made Fannkuch faster. Compared to before CL 6244066 (before aligned functions) benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4222117400 4201958800 -0.48% BenchmarkFannkuch11 3462631800 3215908600 -7.13% BenchmarkGobDecode 20887622 20899164 +0.06% BenchmarkGobEncode 9548772 9439083 -1.15% BenchmarkGzip 151687 152060 +0.25% BenchmarkGunzip 8742 8711 -0.35% BenchmarkJSONEncode 62730560 62686700 -0.07% BenchmarkJSONDecode 252569180 252368960 -0.08% BenchmarkMandelbrot200 5267599 5252531 -0.29% BenchmarkRevcomp25M 980813500 985248400 +0.45% BenchmarkTemplate 361259100 357414680 -1.06% Compared to tip (aligned functions): benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4140739800 4201958800 +1.48% BenchmarkFannkuch11 3259914400 3215908600 -1.35% BenchmarkGobDecode 20620222 20899164 +1.35% BenchmarkGobEncode 9384886 9439083 +0.58% BenchmarkGzip 150333 152060 +1.15% BenchmarkGunzip 8741 8711 -0.34% BenchmarkJSONEncode 65210990 62686700 -3.87% BenchmarkJSONDecode 249394860 252368960 +1.19% BenchmarkMandelbrot200 5273394 5252531 -0.40% BenchmarkRevcomp25M 996013800 985248400 -1.08% BenchmarkTemplate 360620840 357414680 -0.89% R=ken2 CC=golang-dev https://golang.org/cl/6245069
2012-05-30 16:07:39 -06:00
bgen(n->left, !true, -likely, p2);
bgen(n->right, !true, -likely, p2);
p1 = gbranch(AB, T, 0);
patch(p1, to);
patch(p2, pc);
goto ret;
case OOROR:
if(!true)
goto caseand;
caseor:
cmd/gc: contiguous loop layout Drop expecttaken function in favor of extra argument to gbranch and bgen. Mark loop condition as likely to be true, so that loops are generated inline. The main benefit here is contiguous code when trying to read the generated assembly. It has only minor effects on the timing, and they mostly cancel the minor effects that aligning function entry points had. One exception: both changes made Fannkuch faster. Compared to before CL 6244066 (before aligned functions) benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4222117400 4201958800 -0.48% BenchmarkFannkuch11 3462631800 3215908600 -7.13% BenchmarkGobDecode 20887622 20899164 +0.06% BenchmarkGobEncode 9548772 9439083 -1.15% BenchmarkGzip 151687 152060 +0.25% BenchmarkGunzip 8742 8711 -0.35% BenchmarkJSONEncode 62730560 62686700 -0.07% BenchmarkJSONDecode 252569180 252368960 -0.08% BenchmarkMandelbrot200 5267599 5252531 -0.29% BenchmarkRevcomp25M 980813500 985248400 +0.45% BenchmarkTemplate 361259100 357414680 -1.06% Compared to tip (aligned functions): benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4140739800 4201958800 +1.48% BenchmarkFannkuch11 3259914400 3215908600 -1.35% BenchmarkGobDecode 20620222 20899164 +1.35% BenchmarkGobEncode 9384886 9439083 +0.58% BenchmarkGzip 150333 152060 +1.15% BenchmarkGunzip 8741 8711 -0.34% BenchmarkJSONEncode 65210990 62686700 -3.87% BenchmarkJSONDecode 249394860 252368960 +1.19% BenchmarkMandelbrot200 5273394 5252531 -0.40% BenchmarkRevcomp25M 996013800 985248400 -1.08% BenchmarkTemplate 360620840 357414680 -0.89% R=ken2 CC=golang-dev https://golang.org/cl/6245069
2012-05-30 16:07:39 -06:00
bgen(n->left, true, likely, to);
bgen(n->right, true, likely, to);
goto ret;
case OEQ:
case ONE:
case OLT:
case OGT:
case OLE:
case OGE:
nr = n->right;
if(nr == N || nr->type == T)
goto ret;
case ONOT: // unary
nl = n->left;
if(nl == N || nl->type == T)
goto ret;
}
switch(n->op) {
case ONOT:
cmd/gc: contiguous loop layout Drop expecttaken function in favor of extra argument to gbranch and bgen. Mark loop condition as likely to be true, so that loops are generated inline. The main benefit here is contiguous code when trying to read the generated assembly. It has only minor effects on the timing, and they mostly cancel the minor effects that aligning function entry points had. One exception: both changes made Fannkuch faster. Compared to before CL 6244066 (before aligned functions) benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4222117400 4201958800 -0.48% BenchmarkFannkuch11 3462631800 3215908600 -7.13% BenchmarkGobDecode 20887622 20899164 +0.06% BenchmarkGobEncode 9548772 9439083 -1.15% BenchmarkGzip 151687 152060 +0.25% BenchmarkGunzip 8742 8711 -0.35% BenchmarkJSONEncode 62730560 62686700 -0.07% BenchmarkJSONDecode 252569180 252368960 -0.08% BenchmarkMandelbrot200 5267599 5252531 -0.29% BenchmarkRevcomp25M 980813500 985248400 +0.45% BenchmarkTemplate 361259100 357414680 -1.06% Compared to tip (aligned functions): benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4140739800 4201958800 +1.48% BenchmarkFannkuch11 3259914400 3215908600 -1.35% BenchmarkGobDecode 20620222 20899164 +1.35% BenchmarkGobEncode 9384886 9439083 +0.58% BenchmarkGzip 150333 152060 +1.15% BenchmarkGunzip 8741 8711 -0.34% BenchmarkJSONEncode 65210990 62686700 -3.87% BenchmarkJSONDecode 249394860 252368960 +1.19% BenchmarkMandelbrot200 5273394 5252531 -0.40% BenchmarkRevcomp25M 996013800 985248400 -1.08% BenchmarkTemplate 360620840 357414680 -0.89% R=ken2 CC=golang-dev https://golang.org/cl/6245069
2012-05-30 16:07:39 -06:00
bgen(nl, !true, likely, to);
goto ret;
case OEQ:
case ONE:
case OLT:
case OGT:
case OLE:
case OGE:
a = n->op;
if(!true) {
if(isfloat[nl->type->etype]) {
// brcom is not valid on floats when NaN is involved.
cmd/gc: contiguous loop layout Drop expecttaken function in favor of extra argument to gbranch and bgen. Mark loop condition as likely to be true, so that loops are generated inline. The main benefit here is contiguous code when trying to read the generated assembly. It has only minor effects on the timing, and they mostly cancel the minor effects that aligning function entry points had. One exception: both changes made Fannkuch faster. Compared to before CL 6244066 (before aligned functions) benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4222117400 4201958800 -0.48% BenchmarkFannkuch11 3462631800 3215908600 -7.13% BenchmarkGobDecode 20887622 20899164 +0.06% BenchmarkGobEncode 9548772 9439083 -1.15% BenchmarkGzip 151687 152060 +0.25% BenchmarkGunzip 8742 8711 -0.35% BenchmarkJSONEncode 62730560 62686700 -0.07% BenchmarkJSONDecode 252569180 252368960 -0.08% BenchmarkMandelbrot200 5267599 5252531 -0.29% BenchmarkRevcomp25M 980813500 985248400 +0.45% BenchmarkTemplate 361259100 357414680 -1.06% Compared to tip (aligned functions): benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4140739800 4201958800 +1.48% BenchmarkFannkuch11 3259914400 3215908600 -1.35% BenchmarkGobDecode 20620222 20899164 +1.35% BenchmarkGobEncode 9384886 9439083 +0.58% BenchmarkGzip 150333 152060 +1.15% BenchmarkGunzip 8741 8711 -0.34% BenchmarkJSONEncode 65210990 62686700 -3.87% BenchmarkJSONDecode 249394860 252368960 +1.19% BenchmarkMandelbrot200 5273394 5252531 -0.40% BenchmarkRevcomp25M 996013800 985248400 -1.08% BenchmarkTemplate 360620840 357414680 -0.89% R=ken2 CC=golang-dev https://golang.org/cl/6245069
2012-05-30 16:07:39 -06:00
p1 = gbranch(AB, T, 0);
p2 = gbranch(AB, T, 0);
patch(p1, pc);
ll = n->ninit;
n->ninit = nil;
cmd/gc: contiguous loop layout Drop expecttaken function in favor of extra argument to gbranch and bgen. Mark loop condition as likely to be true, so that loops are generated inline. The main benefit here is contiguous code when trying to read the generated assembly. It has only minor effects on the timing, and they mostly cancel the minor effects that aligning function entry points had. One exception: both changes made Fannkuch faster. Compared to before CL 6244066 (before aligned functions) benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4222117400 4201958800 -0.48% BenchmarkFannkuch11 3462631800 3215908600 -7.13% BenchmarkGobDecode 20887622 20899164 +0.06% BenchmarkGobEncode 9548772 9439083 -1.15% BenchmarkGzip 151687 152060 +0.25% BenchmarkGunzip 8742 8711 -0.35% BenchmarkJSONEncode 62730560 62686700 -0.07% BenchmarkJSONDecode 252569180 252368960 -0.08% BenchmarkMandelbrot200 5267599 5252531 -0.29% BenchmarkRevcomp25M 980813500 985248400 +0.45% BenchmarkTemplate 361259100 357414680 -1.06% Compared to tip (aligned functions): benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4140739800 4201958800 +1.48% BenchmarkFannkuch11 3259914400 3215908600 -1.35% BenchmarkGobDecode 20620222 20899164 +1.35% BenchmarkGobEncode 9384886 9439083 +0.58% BenchmarkGzip 150333 152060 +1.15% BenchmarkGunzip 8741 8711 -0.34% BenchmarkJSONEncode 65210990 62686700 -3.87% BenchmarkJSONDecode 249394860 252368960 +1.19% BenchmarkMandelbrot200 5273394 5252531 -0.40% BenchmarkRevcomp25M 996013800 985248400 -1.08% BenchmarkTemplate 360620840 357414680 -0.89% R=ken2 CC=golang-dev https://golang.org/cl/6245069
2012-05-30 16:07:39 -06:00
bgen(n, 1, -likely, p2);
n->ninit = ll;
cmd/gc: contiguous loop layout Drop expecttaken function in favor of extra argument to gbranch and bgen. Mark loop condition as likely to be true, so that loops are generated inline. The main benefit here is contiguous code when trying to read the generated assembly. It has only minor effects on the timing, and they mostly cancel the minor effects that aligning function entry points had. One exception: both changes made Fannkuch faster. Compared to before CL 6244066 (before aligned functions) benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4222117400 4201958800 -0.48% BenchmarkFannkuch11 3462631800 3215908600 -7.13% BenchmarkGobDecode 20887622 20899164 +0.06% BenchmarkGobEncode 9548772 9439083 -1.15% BenchmarkGzip 151687 152060 +0.25% BenchmarkGunzip 8742 8711 -0.35% BenchmarkJSONEncode 62730560 62686700 -0.07% BenchmarkJSONDecode 252569180 252368960 -0.08% BenchmarkMandelbrot200 5267599 5252531 -0.29% BenchmarkRevcomp25M 980813500 985248400 +0.45% BenchmarkTemplate 361259100 357414680 -1.06% Compared to tip (aligned functions): benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4140739800 4201958800 +1.48% BenchmarkFannkuch11 3259914400 3215908600 -1.35% BenchmarkGobDecode 20620222 20899164 +1.35% BenchmarkGobEncode 9384886 9439083 +0.58% BenchmarkGzip 150333 152060 +1.15% BenchmarkGunzip 8741 8711 -0.34% BenchmarkJSONEncode 65210990 62686700 -3.87% BenchmarkJSONDecode 249394860 252368960 +1.19% BenchmarkMandelbrot200 5273394 5252531 -0.40% BenchmarkRevcomp25M 996013800 985248400 -1.08% BenchmarkTemplate 360620840 357414680 -0.89% R=ken2 CC=golang-dev https://golang.org/cl/6245069
2012-05-30 16:07:39 -06:00
patch(gbranch(AB, T, 0), to);
patch(p2, pc);
goto ret;
}
a = brcom(a);
true = !true;
}
// make simplest on right
if(nl->op == OLITERAL || (nl->ullman < UINF && nl->ullman < nr->ullman)) {
a = brrev(a);
r = nl;
nl = nr;
nr = r;
}
if(isslice(nl->type)) {
// only valid to cmp darray to literal nil
if((a != OEQ && a != ONE) || nr->op != OLITERAL) {
yyerror("illegal array comparison");
break;
}
regalloc(&n1, types[tptr], N);
agen(nl, &n1);
n2 = n1;
n2.op = OINDREG;
n2.xoffset = Array_array;
cmd/gc: contiguous loop layout Drop expecttaken function in favor of extra argument to gbranch and bgen. Mark loop condition as likely to be true, so that loops are generated inline. The main benefit here is contiguous code when trying to read the generated assembly. It has only minor effects on the timing, and they mostly cancel the minor effects that aligning function entry points had. One exception: both changes made Fannkuch faster. Compared to before CL 6244066 (before aligned functions) benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4222117400 4201958800 -0.48% BenchmarkFannkuch11 3462631800 3215908600 -7.13% BenchmarkGobDecode 20887622 20899164 +0.06% BenchmarkGobEncode 9548772 9439083 -1.15% BenchmarkGzip 151687 152060 +0.25% BenchmarkGunzip 8742 8711 -0.35% BenchmarkJSONEncode 62730560 62686700 -0.07% BenchmarkJSONDecode 252569180 252368960 -0.08% BenchmarkMandelbrot200 5267599 5252531 -0.29% BenchmarkRevcomp25M 980813500 985248400 +0.45% BenchmarkTemplate 361259100 357414680 -1.06% Compared to tip (aligned functions): benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4140739800 4201958800 +1.48% BenchmarkFannkuch11 3259914400 3215908600 -1.35% BenchmarkGobDecode 20620222 20899164 +1.35% BenchmarkGobEncode 9384886 9439083 +0.58% BenchmarkGzip 150333 152060 +1.15% BenchmarkGunzip 8741 8711 -0.34% BenchmarkJSONEncode 65210990 62686700 -3.87% BenchmarkJSONDecode 249394860 252368960 +1.19% BenchmarkMandelbrot200 5273394 5252531 -0.40% BenchmarkRevcomp25M 996013800 985248400 -1.08% BenchmarkTemplate 360620840 357414680 -0.89% R=ken2 CC=golang-dev https://golang.org/cl/6245069
2012-05-30 16:07:39 -06:00
gencmp0(&n2, types[tptr], a, likely, to);
regfree(&n1);
break;
#ifdef NOTDEF
a = optoas(a, types[tptr]);
regalloc(&n1, types[tptr], N);
regalloc(&n3, types[tptr], N);
regalloc(&n4, types[tptr], N);
agen(nl, &n1);
n2 = n1;
n2.op = OINDREG;
n2.xoffset = Array_array;
gmove(&n2, &n4);
nodconst(&tmp, types[tptr], 0);
gmove(&tmp, &n3);
gcmp(optoas(OCMP, types[tptr]), &n4, &n3);
cmd/gc: contiguous loop layout Drop expecttaken function in favor of extra argument to gbranch and bgen. Mark loop condition as likely to be true, so that loops are generated inline. The main benefit here is contiguous code when trying to read the generated assembly. It has only minor effects on the timing, and they mostly cancel the minor effects that aligning function entry points had. One exception: both changes made Fannkuch faster. Compared to before CL 6244066 (before aligned functions) benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4222117400 4201958800 -0.48% BenchmarkFannkuch11 3462631800 3215908600 -7.13% BenchmarkGobDecode 20887622 20899164 +0.06% BenchmarkGobEncode 9548772 9439083 -1.15% BenchmarkGzip 151687 152060 +0.25% BenchmarkGunzip 8742 8711 -0.35% BenchmarkJSONEncode 62730560 62686700 -0.07% BenchmarkJSONDecode 252569180 252368960 -0.08% BenchmarkMandelbrot200 5267599 5252531 -0.29% BenchmarkRevcomp25M 980813500 985248400 +0.45% BenchmarkTemplate 361259100 357414680 -1.06% Compared to tip (aligned functions): benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4140739800 4201958800 +1.48% BenchmarkFannkuch11 3259914400 3215908600 -1.35% BenchmarkGobDecode 20620222 20899164 +1.35% BenchmarkGobEncode 9384886 9439083 +0.58% BenchmarkGzip 150333 152060 +1.15% BenchmarkGunzip 8741 8711 -0.34% BenchmarkJSONEncode 65210990 62686700 -3.87% BenchmarkJSONDecode 249394860 252368960 +1.19% BenchmarkMandelbrot200 5273394 5252531 -0.40% BenchmarkRevcomp25M 996013800 985248400 -1.08% BenchmarkTemplate 360620840 357414680 -0.89% R=ken2 CC=golang-dev https://golang.org/cl/6245069
2012-05-30 16:07:39 -06:00
patch(gbranch(a, types[tptr], likely), to);
regfree(&n4);
regfree(&n3);
regfree(&n1);
break;
#endif
}
if(isinter(nl->type)) {
// front end shold only leave cmp to literal nil
if((a != OEQ && a != ONE) || nr->op != OLITERAL) {
yyerror("illegal interface comparison");
break;
}
regalloc(&n1, types[tptr], N);
agen(nl, &n1);
n2 = n1;
n2.op = OINDREG;
n2.xoffset = 0;
cmd/gc: contiguous loop layout Drop expecttaken function in favor of extra argument to gbranch and bgen. Mark loop condition as likely to be true, so that loops are generated inline. The main benefit here is contiguous code when trying to read the generated assembly. It has only minor effects on the timing, and they mostly cancel the minor effects that aligning function entry points had. One exception: both changes made Fannkuch faster. Compared to before CL 6244066 (before aligned functions) benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4222117400 4201958800 -0.48% BenchmarkFannkuch11 3462631800 3215908600 -7.13% BenchmarkGobDecode 20887622 20899164 +0.06% BenchmarkGobEncode 9548772 9439083 -1.15% BenchmarkGzip 151687 152060 +0.25% BenchmarkGunzip 8742 8711 -0.35% BenchmarkJSONEncode 62730560 62686700 -0.07% BenchmarkJSONDecode 252569180 252368960 -0.08% BenchmarkMandelbrot200 5267599 5252531 -0.29% BenchmarkRevcomp25M 980813500 985248400 +0.45% BenchmarkTemplate 361259100 357414680 -1.06% Compared to tip (aligned functions): benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4140739800 4201958800 +1.48% BenchmarkFannkuch11 3259914400 3215908600 -1.35% BenchmarkGobDecode 20620222 20899164 +1.35% BenchmarkGobEncode 9384886 9439083 +0.58% BenchmarkGzip 150333 152060 +1.15% BenchmarkGunzip 8741 8711 -0.34% BenchmarkJSONEncode 65210990 62686700 -3.87% BenchmarkJSONDecode 249394860 252368960 +1.19% BenchmarkMandelbrot200 5273394 5252531 -0.40% BenchmarkRevcomp25M 996013800 985248400 -1.08% BenchmarkTemplate 360620840 357414680 -0.89% R=ken2 CC=golang-dev https://golang.org/cl/6245069
2012-05-30 16:07:39 -06:00
gencmp0(&n2, types[tptr], a, likely, to);
regfree(&n1);
break;
#ifdef NOTDEF
a = optoas(a, types[tptr]);
regalloc(&n1, types[tptr], N);
regalloc(&n3, types[tptr], N);
regalloc(&n4, types[tptr], N);
agen(nl, &n1);
n2 = n1;
n2.op = OINDREG;
n2.xoffset = 0;
gmove(&n2, &n4);
nodconst(&tmp, types[tptr], 0);
gmove(&tmp, &n3);
gcmp(optoas(OCMP, types[tptr]), &n4, &n3);
cmd/gc: contiguous loop layout Drop expecttaken function in favor of extra argument to gbranch and bgen. Mark loop condition as likely to be true, so that loops are generated inline. The main benefit here is contiguous code when trying to read the generated assembly. It has only minor effects on the timing, and they mostly cancel the minor effects that aligning function entry points had. One exception: both changes made Fannkuch faster. Compared to before CL 6244066 (before aligned functions) benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4222117400 4201958800 -0.48% BenchmarkFannkuch11 3462631800 3215908600 -7.13% BenchmarkGobDecode 20887622 20899164 +0.06% BenchmarkGobEncode 9548772 9439083 -1.15% BenchmarkGzip 151687 152060 +0.25% BenchmarkGunzip 8742 8711 -0.35% BenchmarkJSONEncode 62730560 62686700 -0.07% BenchmarkJSONDecode 252569180 252368960 -0.08% BenchmarkMandelbrot200 5267599 5252531 -0.29% BenchmarkRevcomp25M 980813500 985248400 +0.45% BenchmarkTemplate 361259100 357414680 -1.06% Compared to tip (aligned functions): benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4140739800 4201958800 +1.48% BenchmarkFannkuch11 3259914400 3215908600 -1.35% BenchmarkGobDecode 20620222 20899164 +1.35% BenchmarkGobEncode 9384886 9439083 +0.58% BenchmarkGzip 150333 152060 +1.15% BenchmarkGunzip 8741 8711 -0.34% BenchmarkJSONEncode 65210990 62686700 -3.87% BenchmarkJSONDecode 249394860 252368960 +1.19% BenchmarkMandelbrot200 5273394 5252531 -0.40% BenchmarkRevcomp25M 996013800 985248400 -1.08% BenchmarkTemplate 360620840 357414680 -0.89% R=ken2 CC=golang-dev https://golang.org/cl/6245069
2012-05-30 16:07:39 -06:00
patch(gbranch(a, types[tptr], likely), to);
regfree(&n1);
regfree(&n3);
regfree(&n4);
break;
#endif
}
if(iscomplex[nl->type->etype]) {
cmd/gc: contiguous loop layout Drop expecttaken function in favor of extra argument to gbranch and bgen. Mark loop condition as likely to be true, so that loops are generated inline. The main benefit here is contiguous code when trying to read the generated assembly. It has only minor effects on the timing, and they mostly cancel the minor effects that aligning function entry points had. One exception: both changes made Fannkuch faster. Compared to before CL 6244066 (before aligned functions) benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4222117400 4201958800 -0.48% BenchmarkFannkuch11 3462631800 3215908600 -7.13% BenchmarkGobDecode 20887622 20899164 +0.06% BenchmarkGobEncode 9548772 9439083 -1.15% BenchmarkGzip 151687 152060 +0.25% BenchmarkGunzip 8742 8711 -0.35% BenchmarkJSONEncode 62730560 62686700 -0.07% BenchmarkJSONDecode 252569180 252368960 -0.08% BenchmarkMandelbrot200 5267599 5252531 -0.29% BenchmarkRevcomp25M 980813500 985248400 +0.45% BenchmarkTemplate 361259100 357414680 -1.06% Compared to tip (aligned functions): benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4140739800 4201958800 +1.48% BenchmarkFannkuch11 3259914400 3215908600 -1.35% BenchmarkGobDecode 20620222 20899164 +1.35% BenchmarkGobEncode 9384886 9439083 +0.58% BenchmarkGzip 150333 152060 +1.15% BenchmarkGunzip 8741 8711 -0.34% BenchmarkJSONEncode 65210990 62686700 -3.87% BenchmarkJSONDecode 249394860 252368960 +1.19% BenchmarkMandelbrot200 5273394 5252531 -0.40% BenchmarkRevcomp25M 996013800 985248400 -1.08% BenchmarkTemplate 360620840 357414680 -0.89% R=ken2 CC=golang-dev https://golang.org/cl/6245069
2012-05-30 16:07:39 -06:00
complexbool(a, nl, nr, true, likely, to);
break;
}
if(is64(nr->type)) {
if(!nl->addable) {
tempname(&n1, nl->type);
cgen(nl, &n1);
nl = &n1;
}
if(!nr->addable) {
tempname(&n2, nr->type);
cgen(nr, &n2);
nr = &n2;
}
cmd/gc: contiguous loop layout Drop expecttaken function in favor of extra argument to gbranch and bgen. Mark loop condition as likely to be true, so that loops are generated inline. The main benefit here is contiguous code when trying to read the generated assembly. It has only minor effects on the timing, and they mostly cancel the minor effects that aligning function entry points had. One exception: both changes made Fannkuch faster. Compared to before CL 6244066 (before aligned functions) benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4222117400 4201958800 -0.48% BenchmarkFannkuch11 3462631800 3215908600 -7.13% BenchmarkGobDecode 20887622 20899164 +0.06% BenchmarkGobEncode 9548772 9439083 -1.15% BenchmarkGzip 151687 152060 +0.25% BenchmarkGunzip 8742 8711 -0.35% BenchmarkJSONEncode 62730560 62686700 -0.07% BenchmarkJSONDecode 252569180 252368960 -0.08% BenchmarkMandelbrot200 5267599 5252531 -0.29% BenchmarkRevcomp25M 980813500 985248400 +0.45% BenchmarkTemplate 361259100 357414680 -1.06% Compared to tip (aligned functions): benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4140739800 4201958800 +1.48% BenchmarkFannkuch11 3259914400 3215908600 -1.35% BenchmarkGobDecode 20620222 20899164 +1.35% BenchmarkGobEncode 9384886 9439083 +0.58% BenchmarkGzip 150333 152060 +1.15% BenchmarkGunzip 8741 8711 -0.34% BenchmarkJSONEncode 65210990 62686700 -3.87% BenchmarkJSONDecode 249394860 252368960 +1.19% BenchmarkMandelbrot200 5273394 5252531 -0.40% BenchmarkRevcomp25M 996013800 985248400 -1.08% BenchmarkTemplate 360620840 357414680 -0.89% R=ken2 CC=golang-dev https://golang.org/cl/6245069
2012-05-30 16:07:39 -06:00
cmp64(nl, nr, a, likely, to);
break;
}
if(nr->op == OLITERAL) {
if(isconst(nr, CTINT) && mpgetfix(nr->val.u.xval) == 0) {
cmd/gc: contiguous loop layout Drop expecttaken function in favor of extra argument to gbranch and bgen. Mark loop condition as likely to be true, so that loops are generated inline. The main benefit here is contiguous code when trying to read the generated assembly. It has only minor effects on the timing, and they mostly cancel the minor effects that aligning function entry points had. One exception: both changes made Fannkuch faster. Compared to before CL 6244066 (before aligned functions) benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4222117400 4201958800 -0.48% BenchmarkFannkuch11 3462631800 3215908600 -7.13% BenchmarkGobDecode 20887622 20899164 +0.06% BenchmarkGobEncode 9548772 9439083 -1.15% BenchmarkGzip 151687 152060 +0.25% BenchmarkGunzip 8742 8711 -0.35% BenchmarkJSONEncode 62730560 62686700 -0.07% BenchmarkJSONDecode 252569180 252368960 -0.08% BenchmarkMandelbrot200 5267599 5252531 -0.29% BenchmarkRevcomp25M 980813500 985248400 +0.45% BenchmarkTemplate 361259100 357414680 -1.06% Compared to tip (aligned functions): benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4140739800 4201958800 +1.48% BenchmarkFannkuch11 3259914400 3215908600 -1.35% BenchmarkGobDecode 20620222 20899164 +1.35% BenchmarkGobEncode 9384886 9439083 +0.58% BenchmarkGzip 150333 152060 +1.15% BenchmarkGunzip 8741 8711 -0.34% BenchmarkJSONEncode 65210990 62686700 -3.87% BenchmarkJSONDecode 249394860 252368960 +1.19% BenchmarkMandelbrot200 5273394 5252531 -0.40% BenchmarkRevcomp25M 996013800 985248400 -1.08% BenchmarkTemplate 360620840 357414680 -0.89% R=ken2 CC=golang-dev https://golang.org/cl/6245069
2012-05-30 16:07:39 -06:00
gencmp0(nl, nl->type, a, likely, to);
break;
}
if(nr->val.ctype == CTNIL) {
cmd/gc: contiguous loop layout Drop expecttaken function in favor of extra argument to gbranch and bgen. Mark loop condition as likely to be true, so that loops are generated inline. The main benefit here is contiguous code when trying to read the generated assembly. It has only minor effects on the timing, and they mostly cancel the minor effects that aligning function entry points had. One exception: both changes made Fannkuch faster. Compared to before CL 6244066 (before aligned functions) benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4222117400 4201958800 -0.48% BenchmarkFannkuch11 3462631800 3215908600 -7.13% BenchmarkGobDecode 20887622 20899164 +0.06% BenchmarkGobEncode 9548772 9439083 -1.15% BenchmarkGzip 151687 152060 +0.25% BenchmarkGunzip 8742 8711 -0.35% BenchmarkJSONEncode 62730560 62686700 -0.07% BenchmarkJSONDecode 252569180 252368960 -0.08% BenchmarkMandelbrot200 5267599 5252531 -0.29% BenchmarkRevcomp25M 980813500 985248400 +0.45% BenchmarkTemplate 361259100 357414680 -1.06% Compared to tip (aligned functions): benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4140739800 4201958800 +1.48% BenchmarkFannkuch11 3259914400 3215908600 -1.35% BenchmarkGobDecode 20620222 20899164 +1.35% BenchmarkGobEncode 9384886 9439083 +0.58% BenchmarkGzip 150333 152060 +1.15% BenchmarkGunzip 8741 8711 -0.34% BenchmarkJSONEncode 65210990 62686700 -3.87% BenchmarkJSONDecode 249394860 252368960 +1.19% BenchmarkMandelbrot200 5273394 5252531 -0.40% BenchmarkRevcomp25M 996013800 985248400 -1.08% BenchmarkTemplate 360620840 357414680 -0.89% R=ken2 CC=golang-dev https://golang.org/cl/6245069
2012-05-30 16:07:39 -06:00
gencmp0(nl, nl->type, a, likely, to);
break;
}
}
a = optoas(a, nr->type);
if(nr->ullman >= UINF) {
regalloc(&n1, nl->type, N);
cgen(nl, &n1);
tempname(&tmp, nl->type);
gmove(&n1, &tmp);
regfree(&n1);
regalloc(&n2, nr->type, N);
cgen(nr, &n2);
regalloc(&n1, nl->type, N);
cgen(&tmp, &n1);
gcmp(optoas(OCMP, nr->type), &n1, &n2);
cmd/gc: contiguous loop layout Drop expecttaken function in favor of extra argument to gbranch and bgen. Mark loop condition as likely to be true, so that loops are generated inline. The main benefit here is contiguous code when trying to read the generated assembly. It has only minor effects on the timing, and they mostly cancel the minor effects that aligning function entry points had. One exception: both changes made Fannkuch faster. Compared to before CL 6244066 (before aligned functions) benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4222117400 4201958800 -0.48% BenchmarkFannkuch11 3462631800 3215908600 -7.13% BenchmarkGobDecode 20887622 20899164 +0.06% BenchmarkGobEncode 9548772 9439083 -1.15% BenchmarkGzip 151687 152060 +0.25% BenchmarkGunzip 8742 8711 -0.35% BenchmarkJSONEncode 62730560 62686700 -0.07% BenchmarkJSONDecode 252569180 252368960 -0.08% BenchmarkMandelbrot200 5267599 5252531 -0.29% BenchmarkRevcomp25M 980813500 985248400 +0.45% BenchmarkTemplate 361259100 357414680 -1.06% Compared to tip (aligned functions): benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4140739800 4201958800 +1.48% BenchmarkFannkuch11 3259914400 3215908600 -1.35% BenchmarkGobDecode 20620222 20899164 +1.35% BenchmarkGobEncode 9384886 9439083 +0.58% BenchmarkGzip 150333 152060 +1.15% BenchmarkGunzip 8741 8711 -0.34% BenchmarkJSONEncode 65210990 62686700 -3.87% BenchmarkJSONDecode 249394860 252368960 +1.19% BenchmarkMandelbrot200 5273394 5252531 -0.40% BenchmarkRevcomp25M 996013800 985248400 -1.08% BenchmarkTemplate 360620840 357414680 -0.89% R=ken2 CC=golang-dev https://golang.org/cl/6245069
2012-05-30 16:07:39 -06:00
patch(gbranch(a, nr->type, likely), to);
regfree(&n1);
regfree(&n2);
break;
}
tempname(&n3, nl->type);
cgen(nl, &n3);
tempname(&tmp, nr->type);
cgen(nr, &tmp);
regalloc(&n1, nl->type, N);
gmove(&n3, &n1);
regalloc(&n2, nr->type, N);
gmove(&tmp, &n2);
gcmp(optoas(OCMP, nr->type), &n1, &n2);
if(isfloat[nl->type->etype]) {
cmd/gc: contiguous loop layout Drop expecttaken function in favor of extra argument to gbranch and bgen. Mark loop condition as likely to be true, so that loops are generated inline. The main benefit here is contiguous code when trying to read the generated assembly. It has only minor effects on the timing, and they mostly cancel the minor effects that aligning function entry points had. One exception: both changes made Fannkuch faster. Compared to before CL 6244066 (before aligned functions) benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4222117400 4201958800 -0.48% BenchmarkFannkuch11 3462631800 3215908600 -7.13% BenchmarkGobDecode 20887622 20899164 +0.06% BenchmarkGobEncode 9548772 9439083 -1.15% BenchmarkGzip 151687 152060 +0.25% BenchmarkGunzip 8742 8711 -0.35% BenchmarkJSONEncode 62730560 62686700 -0.07% BenchmarkJSONDecode 252569180 252368960 -0.08% BenchmarkMandelbrot200 5267599 5252531 -0.29% BenchmarkRevcomp25M 980813500 985248400 +0.45% BenchmarkTemplate 361259100 357414680 -1.06% Compared to tip (aligned functions): benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4140739800 4201958800 +1.48% BenchmarkFannkuch11 3259914400 3215908600 -1.35% BenchmarkGobDecode 20620222 20899164 +1.35% BenchmarkGobEncode 9384886 9439083 +0.58% BenchmarkGzip 150333 152060 +1.15% BenchmarkGunzip 8741 8711 -0.34% BenchmarkJSONEncode 65210990 62686700 -3.87% BenchmarkJSONDecode 249394860 252368960 +1.19% BenchmarkMandelbrot200 5273394 5252531 -0.40% BenchmarkRevcomp25M 996013800 985248400 -1.08% BenchmarkTemplate 360620840 357414680 -0.89% R=ken2 CC=golang-dev https://golang.org/cl/6245069
2012-05-30 16:07:39 -06:00
if(n->op == ONE) {
p1 = gbranch(ABVS, nr->type, likely);
patch(gbranch(a, nr->type, likely), to);
patch(p1, to);
cmd/gc: contiguous loop layout Drop expecttaken function in favor of extra argument to gbranch and bgen. Mark loop condition as likely to be true, so that loops are generated inline. The main benefit here is contiguous code when trying to read the generated assembly. It has only minor effects on the timing, and they mostly cancel the minor effects that aligning function entry points had. One exception: both changes made Fannkuch faster. Compared to before CL 6244066 (before aligned functions) benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4222117400 4201958800 -0.48% BenchmarkFannkuch11 3462631800 3215908600 -7.13% BenchmarkGobDecode 20887622 20899164 +0.06% BenchmarkGobEncode 9548772 9439083 -1.15% BenchmarkGzip 151687 152060 +0.25% BenchmarkGunzip 8742 8711 -0.35% BenchmarkJSONEncode 62730560 62686700 -0.07% BenchmarkJSONDecode 252569180 252368960 -0.08% BenchmarkMandelbrot200 5267599 5252531 -0.29% BenchmarkRevcomp25M 980813500 985248400 +0.45% BenchmarkTemplate 361259100 357414680 -1.06% Compared to tip (aligned functions): benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4140739800 4201958800 +1.48% BenchmarkFannkuch11 3259914400 3215908600 -1.35% BenchmarkGobDecode 20620222 20899164 +1.35% BenchmarkGobEncode 9384886 9439083 +0.58% BenchmarkGzip 150333 152060 +1.15% BenchmarkGunzip 8741 8711 -0.34% BenchmarkJSONEncode 65210990 62686700 -3.87% BenchmarkJSONDecode 249394860 252368960 +1.19% BenchmarkMandelbrot200 5273394 5252531 -0.40% BenchmarkRevcomp25M 996013800 985248400 -1.08% BenchmarkTemplate 360620840 357414680 -0.89% R=ken2 CC=golang-dev https://golang.org/cl/6245069
2012-05-30 16:07:39 -06:00
} else {
p1 = gbranch(ABVS, nr->type, -likely);
patch(gbranch(a, nr->type, likely), to);
patch(p1, pc);
cmd/gc: contiguous loop layout Drop expecttaken function in favor of extra argument to gbranch and bgen. Mark loop condition as likely to be true, so that loops are generated inline. The main benefit here is contiguous code when trying to read the generated assembly. It has only minor effects on the timing, and they mostly cancel the minor effects that aligning function entry points had. One exception: both changes made Fannkuch faster. Compared to before CL 6244066 (before aligned functions) benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4222117400 4201958800 -0.48% BenchmarkFannkuch11 3462631800 3215908600 -7.13% BenchmarkGobDecode 20887622 20899164 +0.06% BenchmarkGobEncode 9548772 9439083 -1.15% BenchmarkGzip 151687 152060 +0.25% BenchmarkGunzip 8742 8711 -0.35% BenchmarkJSONEncode 62730560 62686700 -0.07% BenchmarkJSONDecode 252569180 252368960 -0.08% BenchmarkMandelbrot200 5267599 5252531 -0.29% BenchmarkRevcomp25M 980813500 985248400 +0.45% BenchmarkTemplate 361259100 357414680 -1.06% Compared to tip (aligned functions): benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4140739800 4201958800 +1.48% BenchmarkFannkuch11 3259914400 3215908600 -1.35% BenchmarkGobDecode 20620222 20899164 +1.35% BenchmarkGobEncode 9384886 9439083 +0.58% BenchmarkGzip 150333 152060 +1.15% BenchmarkGunzip 8741 8711 -0.34% BenchmarkJSONEncode 65210990 62686700 -3.87% BenchmarkJSONDecode 249394860 252368960 +1.19% BenchmarkMandelbrot200 5273394 5252531 -0.40% BenchmarkRevcomp25M 996013800 985248400 -1.08% BenchmarkTemplate 360620840 357414680 -0.89% R=ken2 CC=golang-dev https://golang.org/cl/6245069
2012-05-30 16:07:39 -06:00
}
} else {
cmd/gc: contiguous loop layout Drop expecttaken function in favor of extra argument to gbranch and bgen. Mark loop condition as likely to be true, so that loops are generated inline. The main benefit here is contiguous code when trying to read the generated assembly. It has only minor effects on the timing, and they mostly cancel the minor effects that aligning function entry points had. One exception: both changes made Fannkuch faster. Compared to before CL 6244066 (before aligned functions) benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4222117400 4201958800 -0.48% BenchmarkFannkuch11 3462631800 3215908600 -7.13% BenchmarkGobDecode 20887622 20899164 +0.06% BenchmarkGobEncode 9548772 9439083 -1.15% BenchmarkGzip 151687 152060 +0.25% BenchmarkGunzip 8742 8711 -0.35% BenchmarkJSONEncode 62730560 62686700 -0.07% BenchmarkJSONDecode 252569180 252368960 -0.08% BenchmarkMandelbrot200 5267599 5252531 -0.29% BenchmarkRevcomp25M 980813500 985248400 +0.45% BenchmarkTemplate 361259100 357414680 -1.06% Compared to tip (aligned functions): benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4140739800 4201958800 +1.48% BenchmarkFannkuch11 3259914400 3215908600 -1.35% BenchmarkGobDecode 20620222 20899164 +1.35% BenchmarkGobEncode 9384886 9439083 +0.58% BenchmarkGzip 150333 152060 +1.15% BenchmarkGunzip 8741 8711 -0.34% BenchmarkJSONEncode 65210990 62686700 -3.87% BenchmarkJSONDecode 249394860 252368960 +1.19% BenchmarkMandelbrot200 5273394 5252531 -0.40% BenchmarkRevcomp25M 996013800 985248400 -1.08% BenchmarkTemplate 360620840 357414680 -0.89% R=ken2 CC=golang-dev https://golang.org/cl/6245069
2012-05-30 16:07:39 -06:00
patch(gbranch(a, nr->type, likely), to);
}
regfree(&n1);
regfree(&n2);
break;
}
goto ret;
ret:
;
}
/*
* n is on stack, either local variable
* or return value from function call.
* return n's offset from SP.
*/
int32
stkof(Node *n)
{
Type *t;
Iter flist;
int32 off;
switch(n->op) {
case OINDREG:
return n->xoffset;
case ODOT:
t = n->left->type;
if(isptr[t->etype])
break;
off = stkof(n->left);
if(off == -1000 || off == 1000)
return off;
return off + n->xoffset;
case OINDEX:
t = n->left->type;
if(!isfixedarray(t))
break;
off = stkof(n->left);
if(off == -1000 || off == 1000)
return off;
if(isconst(n->right, CTINT))
return off + t->type->width * mpgetfix(n->right->val.u.xval);
return 1000;
case OCALLMETH:
case OCALLINTER:
case OCALLFUNC:
t = n->left->type;
if(isptr[t->etype])
t = t->type;
t = structfirst(&flist, getoutarg(t));
if(t != T)
return t->width + 4; // correct for LR
break;
}
// botch - probably failing to recognize address
// arithmetic on the above. eg INDEX and DOT
return -1000;
}
/*
* block copy:
* memmove(&res, &n, w);
* NB: character copy assumed little endian architecture
*/
void
sgen(Node *n, Node *res, int64 w)
{
Node dst, src, tmp, nend;
int32 c, odst, osrc;
int dir, align, op;
Prog *p, *ploop;
if(debug['g']) {
print("\nsgen w=%lld\n", w);
dump("r", n);
dump("res", res);
}
if(n->ullman >= UINF && res->ullman >= UINF)
fatal("sgen UINF");
if(w < 0 || (int32)w != w)
fatal("sgen copy %lld", w);
if(n->type == T)
fatal("sgen: missing type");
if(w == 0) {
// evaluate side effects only.
regalloc(&dst, types[tptr], N);
agen(res, &dst);
agen(n, &dst);
regfree(&dst);
return;
}
// determine alignment.
// want to avoid unaligned access, so have to use
// smaller operations for less aligned types.
// for example moving [4]byte must use 4 MOVB not 1 MOVW.
align = n->type->align;
switch(align) {
default:
fatal("sgen: invalid alignment %d for %T", align, n->type);
case 1:
op = AMOVB;
break;
case 2:
op = AMOVH;
break;
case 4:
op = AMOVW;
break;
}
if(w%align)
fatal("sgen: unaligned size %lld (align=%d) for %T", w, align, n->type);
c = w / align;
// offset on the stack
osrc = stkof(n);
odst = stkof(res);
if(osrc != -1000 && odst != -1000 && (osrc == 1000 || odst == 1000)) {
// osrc and odst both on stack, and at least one is in
// an unknown position. Could generate code to test
// for forward/backward copy, but instead just copy
// to a temporary location first.
tempname(&tmp, n->type);
sgen(n, &tmp, w);
sgen(&tmp, res, w);
return;
}
if(osrc%align != 0 || odst%align != 0)
fatal("sgen: unaligned offset src %d or dst %d (align %d)", osrc, odst, align);
// if we are copying forward on the stack and
// the src and dst overlap, then reverse direction
dir = align;
if(osrc < odst && odst < osrc+w)
dir = -dir;
regalloc(&dst, types[tptr], res);
if(n->ullman >= res->ullman) {
agen(n, &dst); // temporarily use dst
regalloc(&src, types[tptr], N);
gins(AMOVW, &dst, &src);
agen(res, &dst);
} else {
agen(res, &dst);
regalloc(&src, types[tptr], N);
agen(n, &src);
}
regalloc(&tmp, types[TUINT32], N);
// set up end marker
memset(&nend, 0, sizeof nend);
if(c >= 4) {
regalloc(&nend, types[TUINT32], N);
p = gins(AMOVW, &src, &nend);
p->from.type = D_CONST;
if(dir < 0)
p->from.offset = dir;
else
p->from.offset = w;
}
// move src and dest to the end of block if necessary
if(dir < 0) {
p = gins(AMOVW, &src, &src);
p->from.type = D_CONST;
p->from.offset = w + dir;
p = gins(AMOVW, &dst, &dst);
p->from.type = D_CONST;
p->from.offset = w + dir;
}
// move
if(c >= 4) {
p = gins(op, &src, &tmp);
p->from.type = D_OREG;
p->from.offset = dir;
p->scond |= C_PBIT;
ploop = p;
p = gins(op, &tmp, &dst);
p->to.type = D_OREG;
p->to.offset = dir;
p->scond |= C_PBIT;
p = gins(ACMP, &src, N);
raddr(&nend, p);
cmd/gc: contiguous loop layout Drop expecttaken function in favor of extra argument to gbranch and bgen. Mark loop condition as likely to be true, so that loops are generated inline. The main benefit here is contiguous code when trying to read the generated assembly. It has only minor effects on the timing, and they mostly cancel the minor effects that aligning function entry points had. One exception: both changes made Fannkuch faster. Compared to before CL 6244066 (before aligned functions) benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4222117400 4201958800 -0.48% BenchmarkFannkuch11 3462631800 3215908600 -7.13% BenchmarkGobDecode 20887622 20899164 +0.06% BenchmarkGobEncode 9548772 9439083 -1.15% BenchmarkGzip 151687 152060 +0.25% BenchmarkGunzip 8742 8711 -0.35% BenchmarkJSONEncode 62730560 62686700 -0.07% BenchmarkJSONDecode 252569180 252368960 -0.08% BenchmarkMandelbrot200 5267599 5252531 -0.29% BenchmarkRevcomp25M 980813500 985248400 +0.45% BenchmarkTemplate 361259100 357414680 -1.06% Compared to tip (aligned functions): benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4140739800 4201958800 +1.48% BenchmarkFannkuch11 3259914400 3215908600 -1.35% BenchmarkGobDecode 20620222 20899164 +1.35% BenchmarkGobEncode 9384886 9439083 +0.58% BenchmarkGzip 150333 152060 +1.15% BenchmarkGunzip 8741 8711 -0.34% BenchmarkJSONEncode 65210990 62686700 -3.87% BenchmarkJSONDecode 249394860 252368960 +1.19% BenchmarkMandelbrot200 5273394 5252531 -0.40% BenchmarkRevcomp25M 996013800 985248400 -1.08% BenchmarkTemplate 360620840 357414680 -0.89% R=ken2 CC=golang-dev https://golang.org/cl/6245069
2012-05-30 16:07:39 -06:00
patch(gbranch(ABNE, T, 0), ploop);
regfree(&nend);
} else {
while(c-- > 0) {
p = gins(op, &src, &tmp);
p->from.type = D_OREG;
p->from.offset = dir;
p->scond |= C_PBIT;
p = gins(op, &tmp, &dst);
p->to.type = D_OREG;
p->to.offset = dir;
p->scond |= C_PBIT;
}
}
regfree(&dst);
regfree(&src);
regfree(&tmp);
}