liblink, cmd/ld: reenable nosplit checking and test

The new code is adapted from the Go 1.2 nosplit code, but it does not have the bug reported in issue 7623: g% go run nosplit.go g% go1.2 run nosplit.go BUG rejected incorrectly: main 0 call f; f 120 linker output: # _/tmp/go-test-nosplit021064539 main.main: nosplit stack overflow 120 guaranteed after split check in main.main 112 on entry to main.f -8 after main.f uses 120 g% Fixes #6931. Fixes #7623. LGTM=iant R=golang-codereviews, iant, ality CC=golang-codereviews, r https://golang.org/cl/88190043
2024-11-23 02:00:03 -07:00 · 2014-04-16 22:08:00 -04:00 · 2014-04-16 22:08:00 -04:00 · 5e8c922625
commit 5e8c922625
parent 877e0a135f
26 changed files with 487 additions and 146 deletions
--- a/include/link.h
+++ b/include/link.h
@ -124,6 +124,8 @@ struct	LSym
 	short	type;
 	short	version;
 	uchar	dupok;
+	uchar	external;
+	uchar	nosplit;
 	uchar	reachable;
 	uchar	cgoexport;
 	uchar	special;
@ -229,7 +231,9 @@ enum
 {
 	R_ADDR = 1,
 	R_SIZE,
-	R_CALL,
+	R_CALL, // relocation for direct PC-relative call
+	R_CALLARM, // relocation for ARM direct call
+	R_CALLIND, // marker for indirect call (no actual relocating necessary)
 	R_CONST,
 	R_PCREL,
 	R_TLS,
@ -313,19 +317,20 @@ struct Pcln
 };

 // Pcdata iterator.
-//	for(pciterinit(&it, &pcd); !it.done; pciternext(&it)) { it.value holds in [it.pc, it.nextpc) }
+//	for(pciterinit(ctxt, &it, &pcd); !it.done; pciternext(&it)) { it.value holds in [it.pc, it.nextpc) }
 struct Pciter
 {
 	Pcdata d;
 	uchar *p;
 	uint32 pc;
 	uint32 nextpc;
+	uint32 pcscale;
 	int32 value;
 	int start;
 	int done;
 };

-void	pciterinit(Pciter*, Pcdata*);
+void	pciterinit(Link*, Pciter*, Pcdata*);
 void	pciternext(Pciter*);

 // symbol version, incremented each time a file is loaded.
--- a/src/cmd/5l/asm.c
+++ b/src/cmd/5l/asm.c
@ -102,7 +102,7 @@ adddynrel(LSym *s, Reloc *r)

 	// Handle relocations found in ELF object files.
 	case 256 + R_ARM_PLT32:
-		r->type = R_CALL;
+		r->type = R_CALLARM;
 		if(targ->type == SDYNIMPORT) {
 			addpltsym(ctxt, targ);
 			r->sym = linklookup(ctxt, ".plt", 0);
@ -148,7 +148,7 @@ adddynrel(LSym *s, Reloc *r)
 		return;

 	case 256 + R_ARM_CALL:
-		r->type = R_CALL;
+		r->type = R_CALLARM;
 		if(targ->type == SDYNIMPORT) {
 			addpltsym(ctxt, targ);
 			r->sym = linklookup(ctxt, ".plt", 0);
@ -178,7 +178,7 @@ adddynrel(LSym *s, Reloc *r)

 	case 256 + R_ARM_PC24:
 	case 256 + R_ARM_JUMP24:
-		r->type = R_CALL;
+		r->type = R_CALLARM;
 		if(targ->type == SDYNIMPORT) {
 			addpltsym(ctxt, targ);
 			r->sym = linklookup(ctxt, ".plt", 0);
@ -192,7 +192,7 @@ adddynrel(LSym *s, Reloc *r)
 		return;

 	switch(r->type) {
-	case R_PCREL:
+	case R_CALLARM:
 		addpltsym(ctxt, targ);
 		r->sym = linklookup(ctxt, ".plt", 0);
 		r->add = targ->plt;
@ -243,7 +243,7 @@ elfreloc1(Reloc *r, vlong sectoff)
 			return -1;
 		break;

-	case R_CALL:
+	case R_CALLARM:
 		if(r->siz == 4) {
 			if((r->add & 0xff000000) == 0xeb000000) // BL
 				LPUT(R_ARM_CALL | elfsym<<8);
@ -310,7 +310,7 @@ archreloc(Reloc *r, LSym *s, vlong *val)

 	if(linkmode == LinkExternal) {
 		switch(r->type) {
-		case R_CALL:
+		case R_CALLARM:
 			r->done = 0;

 			// set up addend for eventual relocation via outer symbol.
@ -357,7 +357,7 @@ archreloc(Reloc *r, LSym *s, vlong *val)
 		*val = 0xe5bcf000U +
 			(0xfff & (uint32)(symaddr(r->sym) - (symaddr(linklookup(ctxt, ".plt", 0)) + r->off) + r->add + 8));
 		return 0;
-	case R_CALL: // bl XXXXXX or b YYYYYY
+	case R_CALLARM: // bl XXXXXX or b YYYYYY
 		*val = braddoff((0xff000000U & (uint32)r->add), 
 		                (0xffffff & (uint32)
 		                   ((symaddr(r->sym) + ((uint32)r->add) * 4 - (s->value + r->off)) / 4)));
--- a/src/cmd/6l/asm.c
+++ b/src/cmd/6l/asm.c
@ -200,6 +200,7 @@ adddynrel(LSym *s, Reloc *r)
 		return;

 	switch(r->type) {
+	case R_CALL:
 	case R_PCREL:
 		addpltsym(targ);
 		r->sym = linklookup(ctxt, ".plt", 0);
@ -288,6 +289,7 @@ elfreloc1(Reloc *r, vlong sectoff)
 			return -1;
 		break;
 		
+	case R_CALL:
 	case R_PCREL:
 		if(r->siz == 4) {
 			if(r->xsym->type == SDYNIMPORT)
@ -342,6 +344,7 @@ machoreloc1(Reloc *r, vlong sectoff)
 	case R_ADDR:
 		v |= MACHO_X86_64_RELOC_UNSIGNED<<28;
 		break;
+	case R_CALL:
 	case R_PCREL:
 		v |= 1<<24; // pc-relative bit
 		v |= MACHO_X86_64_RELOC_BRANCH<<28;
--- a/src/cmd/8l/asm.c
+++ b/src/cmd/8l/asm.c
@ -189,6 +189,7 @@ adddynrel(LSym *s, Reloc *r)
 		return;

 	switch(r->type) {
+	case R_CALL:
 	case R_PCREL:
 		addpltsym(ctxt, targ);
 		r->sym = linklookup(ctxt, ".plt", 0);
@ -256,6 +257,7 @@ elfreloc1(Reloc *r, vlong sectoff)
 			return -1;
 		break;

+	case R_CALL:
 	case R_PCREL:
 		if(r->siz == 4)
 			LPUT(R_386_PC32 | elfsym<<8);
@ -303,6 +305,7 @@ machoreloc1(Reloc *r, vlong sectoff)
 	case R_ADDR:
 		v |= MACHO_GENERIC_RELOC_VANILLA<<28;
 		break;
+	case R_CALL:
 	case R_PCREL:
 		v |= 1<<24; // pc-relative bit
 		v |= MACHO_GENERIC_RELOC_VANILLA<<28;
--- a/src/cmd/ld/data.c
+++ b/src/cmd/ld/data.c
@ -132,13 +132,11 @@ relocsym(LSym *s)
 {
 	Reloc *r;
 	LSym *rs;
-	Prog p;
 	int32 i, off, siz, fl;
 	vlong o;
 	uchar *cast;

 	ctxt->cursym = s;
-	memset(&p, 0, sizeof p);
 	for(r=s->r; r<s->r+s->nr; r++) {
 		r->done = 1;
 		off = r->off;
@ -153,6 +151,8 @@ relocsym(LSym *s)
 		}
 		if(r->type >= 256)
 			continue;
+		if(r->siz == 0) // informational relocation - no work to do
+			continue;

 		// Solaris needs the ability to reference dynimport symbols.
 		if(HEADTYPE != Hsolaris && r->sym != S && r->sym->type == SDYNIMPORT)
@ -244,6 +244,7 @@ relocsym(LSym *s)
 			}
 			o = symaddr(r->sym) + r->add;
 			break;
+		case R_CALL:
 		case R_PCREL:
 			// r->sym can be null when CALL $(constant) is transformed from absolute PC to relative PC call.
 			if(linkmode == LinkExternal && r->sym && r->sym->type != SCONST && r->sym->sect != ctxt->cursym->sect) {
@ -299,7 +300,7 @@ relocsym(LSym *s)
 			s->p[off] = (int8)o;
 			break;
 		case 4:
-			if(r->type == R_PCREL) {
+			if(r->type == R_PCREL || r->type == R_CALL) {
 				if(o != (int32)o)
 					diag("pc-relative relocation address is too big: %#llx", o);
 			} else {
@ -568,6 +569,9 @@ datblk(int32 addr, int32 size)
 				case R_PCREL:
 					typ = "pcrel";
 					break;
+				case R_CALL:
+					typ = "call";
+					break;
 				}
 				Bprint(&bso, "\treloc %.8ux/%d %s %s+%#llx [%#llx]\n",
 					(uint)(sym->value+r->off), r->siz, typ, rsname, (vlong)r->add, (vlong)(r->sym->value+r->add));
--- a/src/cmd/ld/dwarf.c
+++ b/src/cmd/ld/dwarf.c
@ -1604,8 +1604,8 @@ writelines(void)

 		finddebugruntimepath(s);

-		pciterinit(&pcfile, &s->pcln->pcfile);
-		pciterinit(&pcline, &s->pcln->pcline);
+		pciterinit(ctxt, &pcfile, &s->pcln->pcfile);
+		pciterinit(ctxt, &pcline, &s->pcln->pcline);
 		epc = pc;
 		while(!pcfile.done && !pcline.done) {
 			if(epc - s->value >= pcfile.nextpc) {
@ -1761,7 +1761,7 @@ writeframes(void)
 		addrput(0);	// initial location
 		addrput(0);	// address range

-		for(pciterinit(&pcsp, &s->pcln->pcsp); !pcsp.done; pciternext(&pcsp))
+		for(pciterinit(ctxt, &pcsp, &s->pcln->pcsp); !pcsp.done; pciternext(&pcsp))
 			putpccfadelta(pcsp.nextpc - pcsp.pc, PtrSize + pcsp.value);

 		fdesize = cpos() - fdeo - 4;	// exclude the length field.
--- a/src/cmd/ld/ldelf.c
+++ b/src/cmd/ld/ldelf.c
@ -601,24 +601,9 @@ ldelf(Biobuf *f, char *pkg, int64 len, char *pn)
 		s->size = sym.size;
 		s->outer = sect->sym;
 		if(sect->sym->type == STEXT) {
-			Prog *p;
-
-			if(s->text != P) {
-				if(!s->dupok)
+			if(s->external && !s->dupok)
 					diag("%s: duplicate definition of %s", pn, s->name);
-			} else {
-				// build a TEXT instruction with a unique pc
-				// just to make the rest of the linker happy.
-				p = ctxt->arch->prg();
-				p->as = ATEXT;
-				p->from.type = D_EXTERN;
-				p->from.sym = s;
-				ctxt->arch->settextflag(p, 7);
-				p->to.type = D_CONST;
-				p->link = nil;
-				p->pc = ctxt->pc++;
-				s->text = p;
-			}
+			s->external = 1;
 		}
 	}
 	
--- a/src/cmd/ld/ldmacho.c
+++ b/src/cmd/ld/ldmacho.c
@ -641,22 +641,9 @@ ldmacho(Biobuf *f, char *pkg, int64 len, char *pn)
 		if(!(s->cgoexport & CgoExportDynamic))
 			s->dynimplib = nil;	// satisfy dynimport
 		if(outer->type == STEXT) {
-			Prog *p;
-
-			if(s->text != P)
-				diag("%s sym#%d: duplicate definition of %s", pn, i, s->name);
-			// build a TEXT instruction with a unique pc
-			// just to make the rest of the linker happy.
-			// TODO: this is too 6l-specific ?
-			p = ctxt->arch->prg();
-			p->as = ATEXT;
-			p->from.type = D_EXTERN;
-			p->from.sym = s;
-			ctxt->arch->settextflag(p, 7);
-			p->to.type = D_CONST;
-			p->link = nil;
-			p->pc = ctxt->pc++;
-			s->text = p;
+			if(s->external && !s->dupok)
+				diag("%s: duplicate definition of %s", pn, s->name);
+			s->external = 1;
 		}
 		sym->sym = s;
 	}
--- a/src/cmd/ld/ldpe.c
+++ b/src/cmd/ld/ldpe.c
@ -366,21 +366,9 @@ ldpe(Biobuf *f, char *pkg, int64 len, char *pn)
 		s->size = 4;
 		s->outer = sect->sym;
 		if(sect->sym->type == STEXT) {
-			Prog *p;
-	
-			if(s->text != P)
+			if(s->external && !s->dupok)
 				diag("%s: duplicate definition of %s", pn, s->name);
-			// build a TEXT instruction with a unique pc
-			// just to make the rest of the linker happy.
-			p = ctxt->arch->prg();
-			p->as = ATEXT;
-			p->from.type = D_EXTERN;
-			p->from.sym = s;
-			ctxt->arch->settextflag(p, 7);
-			p->to.type = D_CONST;
-			p->link = nil;
-			p->pc = ctxt->pc++;
-			s->text = p;
+			s->external = 1;
 		}
 	}

--- a/src/cmd/ld/lib.c
+++ b/src/cmd/ld/lib.c
@ -1021,50 +1021,59 @@ dostkcheck(void)
 	morestack = linklookup(ctxt, "runtime.morestack", 0);
 	newstack = linklookup(ctxt, "runtime.newstack", 0);

-	// TODO
-	// First the nosplits on their own.
-	for(s = ctxt->textp; s != nil; s = s->next) {
-		if(s->text == nil || s->text->link == nil || (ctxt->arch->textflag(s->text) & NOSPLIT) == 0)
-			continue;
-		ctxt->cursym = s;
-		ch.up = nil;
-		ch.sym = s;
-		ch.limit = StackLimit - callsize();
-		stkcheck(&ch, 0);
-		s->stkcheck = 1;
-	}
+	// Every splitting function ensures that there are at least StackLimit
+	// bytes available below SP when the splitting prologue finishes.
+	// If the splitting function calls F, then F begins execution with
+	// at least StackLimit - callsize() bytes available.
+	// Check that every function behaves correctly with this amount
+	// of stack, following direct calls in order to piece together chains
+	// of non-splitting functions.
+	ch.up = nil;
+	ch.limit = StackLimit - callsize();

-	// Check calling contexts.
-	// Some nosplits get called a little further down,
-	// like newproc and deferproc.	We could hard-code
-	// that knowledge but it's more robust to look at
-	// the actual call sites.
+	// Check every function, but do the nosplit functions in a first pass,
+	// to make the printed failure chains as short as possible.
 	for(s = ctxt->textp; s != nil; s = s->next) {
-		if(s->text == nil || s->text->link == nil || (ctxt->arch->textflag(s->text) & NOSPLIT) != 0)
+		// runtime.racesymbolizethunk is called from gcc-compiled C
+		// code running on the operating system thread stack.
+		// It uses more than the usual amount of stack but that's okay.
+		if(strcmp(s->name, "runtime.racesymbolizethunk") == 0)
 			continue;
+
+		if(s->nosplit) {
 		ctxt->cursym = s;
-		ch.up = nil;
 		ch.sym = s;
-		ch.limit = StackLimit - callsize();
 		stkcheck(&ch, 0);
 	}
+	}
+	for(s = ctxt->textp; s != nil; s = s->next) {
+		if(!s->nosplit) {
+		ctxt->cursym = s;
+		ch.sym = s;
+		stkcheck(&ch, 0);
+	}
+}
 }

 static int
 stkcheck(Chain *up, int depth)
 {
 	Chain ch, ch1;
-	Prog *p;
 	LSym *s;
-	int limit, prolog;
+	int limit;
+	Reloc *r, *endr;
+	Pciter pcsp;
 	
 	limit = up->limit;
 	s = up->sym;
-	p = s->text;
 	
-	// Small optimization: don't repeat work at top.
-	if(s->stkcheck && limit == StackLimit-callsize())
+	// Don't duplicate work: only need to consider each
+	// function at top of safe zone once.
+	if(limit == StackLimit-callsize()) {
+		if(s->stkcheck)
 		return 0;
+		s->stkcheck = 1;
+	}
 	
 	if(depth > 100) {
 		diag("nosplit stack check too deep");
@ -1072,7 +1081,7 @@ stkcheck(Chain *up, int depth)
 		return -1;
 	}

-	if(p == nil || p->link == nil) {
+	if(s->external || s->pcln == nil) {
 		// external function.
 		// should never be called directly.
 		// only diagnose the direct caller.
@ -1092,50 +1101,56 @@ stkcheck(Chain *up, int depth)
 		return 0;

 	ch.up = up;
-	prolog = (ctxt->arch->textflag(s->text) & NOSPLIT) == 0;
-	for(p = s->text; p != P; p = p->link) {
-		limit -= p->spadj;
-		if(prolog && p->spadj != 0) {
-			// The first stack adjustment in a function with a
-			// split-checking prologue marks the end of the
-			// prologue.  Assuming the split check is correct,
-			// after the adjustment there should still be at least
-			// StackLimit bytes available below the stack pointer.
-			// If this is not the top call in the chain, no need
-			// to duplicate effort, so just stop.
-			if(depth > 0)
-				return 0;
-			prolog = 0;
-			limit = StackLimit;
-		}
-		if(limit < 0) {
-			stkbroke(up, limit);
+	
+	// Walk through sp adjustments in function, consuming relocs.
+	r = s->r;
+	endr = r + s->nr;
+	for(pciterinit(ctxt, &pcsp, &s->pcln->pcsp); !pcsp.done; pciternext(&pcsp)) {
+		// pcsp.value is in effect for [pcsp.pc, pcsp.nextpc).
+
+		// Check stack size in effect for this span.
+		if(limit - pcsp.value < 0) {
+			stkbroke(up, limit - pcsp.value);
 			return -1;
 		}
-		if(ctxt->arch->iscall(p)) {
-			limit -= callsize();
-			ch.limit = limit;
-			if(p->to.type == D_BRANCH) {
+
+		// Process calls in this span.
+		for(; r < endr && r->off < pcsp.nextpc; r++) {
+			switch(r->type) {
+			case R_CALL:
+			case R_CALLARM:
 				// Direct call.
-				ch.sym = p->to.sym;
+				ch.limit = limit - pcsp.value - callsize();
+				ch.sym = r->sym;
 				if(stkcheck(&ch, depth+1) < 0)
 					return -1;
-			} else {
-				// Indirect call.  Assume it is a splitting function,
+
+				// If this is a call to morestack, we've just raised our limit back
+				// to StackLimit beyond the frame size.
+				if(strncmp(r->sym->name, "runtime.morestack", 17) == 0) {
+					limit = StackLimit + s->locals;
+					if(thechar == '5')
+						limit += 4; // saved LR
+				}
+				break;
+
+			case R_CALLIND:
+				// Indirect call.  Assume it is a call to a splitting function,
 				// so we have to make sure it can call morestack.
-				limit -= callsize();
+				// Arrange the data structures to report both calls, so that
+				// if there is an error, stkprint shows all the steps involved.
+				ch.limit = limit - pcsp.value - callsize();
 				ch.sym = nil;
-				ch1.limit = limit;
+				ch1.limit = ch.limit - callsize(); // for morestack in called prologue
 				ch1.up = &ch;
 				ch1.sym = morestack;
 				if(stkcheck(&ch1, depth+2) < 0)
 					return -1;
-				limit += callsize();
+				break;
 			}
-			limit += callsize();
+		}
 		}
 		
-	}
 	return 0;
 }

@ -1158,7 +1173,7 @@ stkprint(Chain *ch, int limit)

 	if(ch->up == nil) {
 		// top of chain.  ch->sym != nil.
-		if(ctxt->arch->textflag(ch->sym->text) & NOSPLIT)
+		if(ch->sym->nosplit)
 			print("\t%d\tassumed on entry to %s\n", ch->limit, name);
 		else
 			print("\t%d\tguaranteed after split check in %s\n", ch->limit, name);
--- a/src/cmd/ld/pcln.c
+++ b/src/cmd/ld/pcln.c
@ -55,7 +55,7 @@ ftabaddstring(LSym *ftab, char *s)
 }

 static void
-renumberfiles(LSym **files, int nfiles, Pcdata *d)
+renumberfiles(Link *ctxt, LSym **files, int nfiles, Pcdata *d)
 {
 	int i;
 	LSym *f;
@ -78,7 +78,7 @@ renumberfiles(LSym **files, int nfiles, Pcdata *d)
 	newval = -1;
 	memset(&out, 0, sizeof out);

-	for(pciterinit(&it, d); !it.done; pciternext(&it)) {
+	for(pciterinit(ctxt, &it, d); !it.done; pciternext(&it)) {
 		// value delta
 		oldval = it.value;
 		if(oldval == -1)
@ -94,7 +94,7 @@ renumberfiles(LSym **files, int nfiles, Pcdata *d)
 		addvarint(&out, v);

 		// pc delta
-		addvarint(&out, it.nextpc - it.pc);
+		addvarint(&out, (it.nextpc - it.pc) / it.pcscale);
 	}
 	
 	// terminating value delta
@ -179,10 +179,10 @@ pclntab(void)
 		off = setuint32(ctxt, ftab, off, ctxt->cursym->locals + frameptrsize);
 		
 		if(pcln != &zpcln) {
-			renumberfiles(pcln->file, pcln->nfile, &pcln->pcfile);
+			renumberfiles(ctxt, pcln->file, pcln->nfile, &pcln->pcfile);
 			if(0) {
 				// Sanity check the new numbering
-				for(pciterinit(&it, &pcln->pcfile); !it.done; pciternext(&it)) {
+				for(pciterinit(ctxt, &it, &pcln->pcfile); !it.done; pciternext(&it)) {
 					if(it.value < 1 || it.value > ctxt->nhistfile) {
 						diag("bad file number in pcfile: %d not in range [1, %d]\n", it.value, ctxt->nhistfile);
 						errorexit();
--- a/src/cmd/link/load.go
+++ b/src/cmd/link/load.go
@ -75,9 +75,13 @@ func (p *Prog) loadPackage(pkg *Package) {

 // TODO(rsc): Define full enumeration for relocation types.
 const (
-	R_ADDR  = 1
-	R_SIZE  = 2
-	R_PCREL = 5
+	R_ADDR    = 1
+	R_SIZE    = 2
+	R_CALL    = 3
+	R_CALLARM = 4
+	R_CALLIND = 5
+	R_CONST   = 6
+	R_PCREL   = 7
 )

 // relocateSym applies relocations to sym's data.
@ -93,9 +97,9 @@ func (p *Prog) relocateSym(sym *Sym, data []byte) {
 		switch r.Type {
 		default:
 			p.errorf("%v: unknown relocation type %d", sym, r.Type)
-		case R_ADDR:
+		case R_ADDR, R_CALLIND:
 			// ok
-		case R_PCREL:
+		case R_PCREL, R_CALL:
 			val -= sym.Addr + Addr(r.Offset+r.Size)
 		}
 		frag := data[r.Offset : r.Offset+r.Size]
--- a/src/cmd/link/testdata/autosection.6
+++ b/src/cmd/link/testdata/autosection.6
--- a/src/cmd/link/testdata/autoweak.6
+++ b/src/cmd/link/testdata/autoweak.6
--- a/src/cmd/link/testdata/dead.6
+++ b/src/cmd/link/testdata/dead.6
--- a/src/cmd/link/testdata/hello.6
+++ b/src/cmd/link/testdata/hello.6
--- a/src/cmd/link/testdata/layout.6
+++ b/src/cmd/link/testdata/layout.6
--- a/src/cmd/link/testdata/pclntab.6
+++ b/src/cmd/link/testdata/pclntab.6
--- a/src/liblink/asm5.c
+++ b/src/liblink/asm5.c
@ -1302,7 +1302,7 @@ if(0 /*debug['G']*/) print("%ux: %s: arm %d\n", (uint32)(p->pc), p->from.sym->na
 			rel->siz = 4;
 			rel->sym = p->to.sym;
 			rel->add = o1 | ((v >> 2) & 0xffffff);
-			rel->type = R_CALL;
+			rel->type = R_CALLARM;
 			break;
 		}
 		if(p->pcond != nil)
@ -1324,6 +1324,10 @@ if(0 /*debug['G']*/) print("%ux: %s: arm %d\n", (uint32)(p->pc), p->from.sym->na
 			ctxt->diag("%P: doesn't support BL offset(REG) where offset != 0", p);
 		o1 = oprrr(ctxt, ABL, p->scond);
 		o1 |= p->to.reg;
+		rel = addrel(ctxt->cursym);
+		rel->off = ctxt->pc;
+		rel->siz = 0;
+		rel->type = R_CALLIND;
 		break;

 	case 8:		/* sll $c,[R],R -> mov (R<<$c),R */
--- a/src/liblink/asm6.c
+++ b/src/liblink/asm6.c
@ -124,6 +124,7 @@ enum
 	Z_rp,
 	Zbr,
 	Zcall,
+	Zcallindreg,
 	Zib_,
 	Zib_rp,
 	Zibo_m,
@ -503,8 +504,8 @@ static uchar	yloop[] =
 };
 static uchar	ycall[] =
 {
-	Ynone,	Yml,	Zo_m64,	0,
-	Yrx,	Yrx,	Zo_m64,	2,
+	Ynone,	Yml,	Zcallindreg,	0,
+	Yrx,	Yrx,	Zcallindreg,	2,
 	Ynone,	Ybr,	Zcall,	1,
 	0
 };
@ -2903,6 +2904,7 @@ found:
 		break;

 	case Zo_m64:
+	case_Zo_m64:
 		*ctxt->andptr++ = op;
 		asmandsz(ctxt, &p->to, o->op[z+1], 0, 1);
 		break;
@ -3074,11 +3076,18 @@ found:
 		r->off = p->pc + ctxt->andptr - ctxt->and;
 		r->sym = p->to.sym;
 		r->add = p->to.offset;
-		r->type = R_PCREL;
+		r->type = R_CALL;
 		r->siz = 4;
 		put4(ctxt, 0);
 		break;

+	case Zcallindreg:
+		r = addrel(ctxt->cursym);
+		r->off = p->pc;
+		r->type = R_CALLIND;
+		r->siz = 0;
+		goto case_Zo_m64;
+
 	case Zbr:
 	case Zjmp:
 	case Zloop:
@ -3548,7 +3557,7 @@ asmins(Link *ctxt, Prog *p)
 			break;
 		if(ctxt->rexflag)
 			r->off++;
-		if(r->type == R_PCREL)
+		if(r->type == R_PCREL || r->type == R_CALL)
 			r->add -= p->pc + n - (r->off + r->siz);
 	}

--- a/src/liblink/asm8.c
+++ b/src/liblink/asm8.c
@ -98,6 +98,7 @@ enum
 	Zcall,
 	Zcallcon,
 	Zcallind,
+	Zcallindreg,
 	Zib_,
 	Zib_rp,
 	Zibo_m,
@ -414,8 +415,8 @@ static uchar	yloop[] =
 };
 static uchar	ycall[] =
 {
-	Ynone,	Yml,	Zo_m,	0,
-	Yrx,	Yrx,	Zo_m,	2,
+	Ynone,	Yml,	Zcallindreg,	0,
+	Yrx,	Yrx,	Zcallindreg,	2,
 	Ynone,	Ycol,	Zcallind,	2,
 	Ynone,	Ybr,	Zcall,	0,
 	Ynone,	Yi32,	Zcallcon,	1,
@ -2289,6 +2290,7 @@ found:
 		break;

 	case Zo_m:
+	case_Zo_m:
 		*ctxt->andptr++ = op;
 		asmand(ctxt, &p->to, o->op[z+1]);
 		break;
@ -2406,7 +2408,7 @@ found:
 		*ctxt->andptr++ = op;
 		r = addrel(ctxt->cursym);
 		r->off = p->pc + ctxt->andptr - ctxt->and;
-		r->type = R_PCREL;
+		r->type = R_CALL;
 		r->siz = 4;
 		r->sym = p->to.sym;
 		r->add = p->to.offset;
@ -2509,6 +2511,13 @@ found:
 		put4(ctxt, 0);
 		break;

+	case Zcallindreg:
+		r = addrel(ctxt->cursym);
+		r->off = p->pc;
+		r->type = R_CALLIND;
+		r->siz = 0;
+		goto case_Zo_m;
+
 	case Zbyte:
 		v = vaddr(ctxt, &p->from, &rel);
 		if(rel.siz != 0) {
--- a/src/liblink/obj6.c
+++ b/src/liblink/obj6.c
@ -417,16 +417,16 @@ addstacksplit(Link *ctxt, LSym *cursym)
 	cursym->locals = textstksiz;

 	if(autoffset < StackSmall && !(p->from.scale & NOSPLIT)) {
-		for(q = p; q != nil; q = q->link)
+		for(q = p; q != nil; q = q->link) {
 			if(q->as == ACALL)
 				goto noleaf;
+			if((q->as == ADUFFCOPY || q->as == ADUFFZERO) && autoffset >= StackSmall - 8)
+				goto noleaf;
+		}
 		p->from.scale |= NOSPLIT;
 	noleaf:;
 	}

-	if((p->from.scale & NOSPLIT) && autoffset >= StackLimit)
-		ctxt->diag("nosplit func likely to overflow stack");
-
 	q = nil;
 	if(!(p->from.scale & NOSPLIT) || (p->from.scale & WRAPPER)) {
 		p = appendp(ctxt, p);
--- a/src/liblink/objfile.c
+++ b/src/liblink/objfile.c
@ -43,12 +43,13 @@
 //	- gotype [symbol reference]
 //	- p [data block]
 //	- nr [int]
-//	- r [nr relocations]
+//	- r [nr relocations, sorted by off]
 //
 // If type == STEXT, there are a few more fields:
 //
 //	- args [int]
 //	- locals [int]
+//	- nosplit [int]
 //	- leaf [int]
 //	- nlocal [int]
 //	- local [nlocal automatics]
@ -226,6 +227,8 @@ writeobj(Link *ctxt, Biobuf *b)
 					flag = p->from.scale;
 				if(flag & DUPOK)
 					s->dupok = 1;
+				if(flag & NOSPLIT)
+					s->nosplit = 1;
 				s->next = nil;
 				s->type = STEXT;
 				s->text = p;
@ -294,6 +297,8 @@ writesym(Link *ctxt, Biobuf *b, LSym *s)
 			Bprint(ctxt->bso, "t=%d ", s->type);
 		if(s->dupok)
 			Bprint(ctxt->bso, "dupok ");
+		if(s->nosplit)
+			Bprint(ctxt->bso, "nosplit ");
 		Bprint(ctxt->bso, "size=%lld value=%lld", (vlong)s->size, (vlong)s->value);
 		if(s->type == STEXT) {
 			Bprint(ctxt->bso, " args=%#llux locals=%#llux", (uvlong)s->args, (uvlong)s->locals);
@ -353,6 +358,7 @@ writesym(Link *ctxt, Biobuf *b, LSym *s)
 	if(s->type == STEXT) {
 		wrint(b, s->args);
 		wrint(b, s->locals);
+		wrint(b, s->nosplit);
 		wrint(b, s->leaf);
 		n = 0;
 		for(a = s->autom; a != nil; a = a->link)
@ -574,6 +580,7 @@ readsym(Link *ctxt, Biobuf *f, char *pkg, char *pn)
 	if(s->type == STEXT) {
 		s->args = rdint(f);
 		s->locals = rdint(f);
+		s->nosplit = rdint(f);
 		s->leaf = rdint(f);
 		n = rdint(f);
 		for(i=0; i<n; i++) {
@ -630,6 +637,8 @@ readsym(Link *ctxt, Biobuf *f, char *pkg, char *pn)
 			Bprint(ctxt->bso, "t=%d ", s->type);
 		if(s->dupok)
 			Bprint(ctxt->bso, "dupok ");
+		if(s->nosplit)
+			Bprint(ctxt->bso, "nosplit ");
 		Bprint(ctxt->bso, "size=%lld value=%lld", (vlong)s->size, (vlong)s->value);
 		if(s->type == STEXT)
 			Bprint(ctxt->bso, " args=%#llux locals=%#llux", (uvlong)s->args, (uvlong)s->locals);
--- a/src/liblink/pcln.c
+++ b/src/liblink/pcln.c
@ -347,11 +347,11 @@ pciternext(Pciter *it)
 	
 	// pc delta
 	v = getvarint(&it->p);
-	it->nextpc = it->pc + v;
+	it->nextpc = it->pc + v*it->pcscale;
 }

 void
-pciterinit(Pciter *it, Pcdata *d)
+pciterinit(Link *ctxt, Pciter *it, Pcdata *d)
 {
 	it->d = *d;
 	it->p = it->d.p;
@ -360,5 +360,6 @@ pciterinit(Pciter *it, Pcdata *d)
 	it->value = -1;
 	it->start = 1;
 	it->done = 0;
+	it->pcscale = ctxt->arch->minlc;
 	pciternext(it);
 }
--- a/src/pkg/debug/goobj/read.go
+++ b/src/pkg/debug/goobj/read.go
@ -191,6 +191,7 @@ type Func struct {
 	Args     int        // size in bytes of argument frame: inputs and outputs
 	Frame    int        // size in bytes of local variable frame
 	Leaf     bool       // function omits save of link register (ARM)
+	NoSplit  bool       // function omits stack split prologue
 	Var      []Var      // detail about local variables
 	PCSP     Data       // PC → SP offset map
 	PCFile   Data       // PC → file number map (index into File)
@ -623,6 +624,7 @@ func (r *objReader) parseObject(prefix []byte) error {
 			f.Args = r.readInt()
 			f.Frame = r.readInt()
 			f.Leaf = r.readInt() != 0
+			f.NoSplit = r.readInt() != 0
 			f.Var = make([]Var, r.readInt())
 			for i := range f.Var {
 				v := &f.Var[i]
--- a/test/nosplit.go
+++ b/test/nosplit.go
@ -0,0 +1,313 @@
+// run
+
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import (
+	"bytes"
+	"fmt"
+	"io/ioutil"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"regexp"
+	"runtime"
+	"strconv"
+	"strings"
+)
+
+var tests = `
+# These are test cases for the linker analysis that detects chains of
+# nosplit functions that would cause a stack overflow.
+#
+# Lines beginning with # are comments.
+#
+# Each test case describes a sequence of functions, one per line.
+# Each function definition is the function name, then the frame size,
+# then optionally the keyword 'nosplit', then the body of the function.
+# The body is assembly code, with some shorthands.
+# The shorthand 'call x' stands for CALL x(SB).
+# The shorthand 'callind' stands for 'CALL R0', where R0 is a register.
+# Each test case must define a function named main, and it must be first.
+# That is, a line beginning "main " indicates the start of a new test case.
+# Within a stanza, ; can be used instead of \n to separate lines.
+#
+# After the function definition, the test case ends with an optional
+# REJECT line, specifying the architectures on which the case should
+# be rejected. "REJECT" without any architectures means reject on all architectures.
+# The linker should accept the test case on systems not explicitly rejected.
+#
+# 64-bit systems do not attempt to execute test cases with frame sizes
+# that are only 32-bit aligned.
+
+# Ordinary function should work
+main 0
+
+# Large frame marked nosplit is always wrong.
+main 10000 nosplit
+REJECT
+
+# Calling a large frame is okay.
+main 0 call big
+big 10000
+
+# But not if the frame is nosplit.
+main 0 call big
+big 10000 nosplit
+REJECT
+
+# Recursion is okay.
+main 0 call main
+
+# Recursive nosplit runs out of space.
+main 0 nosplit call main
+REJECT
+
+# Chains of ordinary functions okay.
+main 0 call f1
+f1 80 call f2
+f2 80
+
+# Chains of nosplit must fit in the stack limit, 128 bytes.
+main 0 call f1
+f1 80 nosplit call f2
+f2 80 nosplit
+REJECT
+
+# Larger chains.
+main 0 call f1
+f1 16 call f2
+f2 16 call f3
+f3 16 call f4
+f4 16 call f5
+f5 16 call f6
+f6 16 call f7
+f7 16 call f8
+f8 16 call end
+end 1000
+
+main 0 call f1
+f1 16 nosplit call f2
+f2 16 nosplit call f3
+f3 16 nosplit call f4
+f4 16 nosplit call f5
+f5 16 nosplit call f6
+f6 16 nosplit call f7
+f7 16 nosplit call f8
+f8 16 nosplit call end
+end 1000
+REJECT
+
+# Test cases near the 128-byte limit.
+
+# Ordinary stack split frame is always okay.
+main 112
+main 116
+main 120
+main 124
+main 128
+main 132
+main 136
+
+# A nosplit leaf can use the whole 128-CallSize bytes available on entry.
+main 112 nosplit
+main 116 nosplit
+main 120 nosplit
+main 124 nosplit
+main 128 nosplit; REJECT
+main 132 nosplit; REJECT
+main 136 nosplit; REJECT
+
+# Calling a nosplit function from a nosplit function requires
+# having room for the saved caller PC and the called frame.
+# Because ARM doesn't save LR in the leaf, it gets an extra 4 bytes.
+main 112 nosplit call f; f 0 nosplit
+main 116 nosplit call f; f 0 nosplit; REJECT amd64
+main 120 nosplit call f; f 0 nosplit; REJECT amd64
+main 124 nosplit call f; f 0 nosplit; REJECT amd64 386
+main 128 nosplit call f; f 0 nosplit; REJECT
+main 132 nosplit call f; f 0 nosplit; REJECT
+main 136 nosplit call f; f 0 nosplit; REJECT
+
+# Calling a splitting function from a nosplit function requires
+# having room for the saved caller PC of the call but also the
+# saved caller PC for the call to morestack. Again the ARM works
+# in less space.
+main 104 nosplit call f; f 0 call f
+main 108 nosplit call f; f 0 call f
+main 112 nosplit call f; f 0 call f; REJECT amd64
+main 116 nosplit call f; f 0 call f; REJECT amd64
+main 120 nosplit call f; f 0 call f; REJECT amd64 386
+main 124 nosplit call f; f 0 call f; REJECT amd64 386
+main 128 nosplit call f; f 0 call f; REJECT
+main 132 nosplit call f; f 0 call f; REJECT
+main 136 nosplit call f; f 0 call f; REJECT
+
+# Indirect calls are assumed to be splitting functions.
+main 104 nosplit callind
+main 108 nosplit callind
+main 112 nosplit callind; REJECT amd64
+main 116 nosplit callind; REJECT amd64
+main 120 nosplit callind; REJECT amd64 386
+main 124 nosplit callind; REJECT amd64 386
+main 128 nosplit callind; REJECT
+main 132 nosplit callind; REJECT
+main 136 nosplit callind; REJECT
+
+# Issue 7623
+main 0 call f; f 112
+main 0 call f; f 116
+main 0 call f; f 120
+main 0 call f; f 124
+main 0 call f; f 128
+main 0 call f; f 132
+main 0 call f; f 136
+`
+
+var (
+	commentRE = regexp.MustCompile(`(?m)^#.*`)
+	rejectRE  = regexp.MustCompile(`(?s)\A(.+?)((\n|; *)REJECT(.*))?\z`)
+	lineRE    = regexp.MustCompile(`(\w+) (\d+)( nosplit)?(.*)`)
+	callRE    = regexp.MustCompile(`\bcall (\w+)\b`)
+	callindRE = regexp.MustCompile(`\bcallind\b`)
+)
+
+func main() {
+	goarch := os.Getenv("GOARCH")
+	if goarch == "" {
+		goarch = runtime.GOARCH
+	}
+
+	dir, err := ioutil.TempDir("", "go-test-nosplit")
+	if err != nil {
+		bug()
+		fmt.Printf("creating temp dir: %v\n", err)
+		return
+	}
+	defer os.RemoveAll(dir)
+	ioutil.WriteFile(filepath.Join(dir, "main.go"), []byte("package main\nfunc main()\n"), 0666)
+
+	tests = strings.Replace(tests, "\t", " ", -1)
+	tests = commentRE.ReplaceAllString(tests, "")
+
+	nok := 0
+	nfail := 0
+TestCases:
+	for len(tests) > 0 {
+		var stanza string
+		i := strings.Index(tests, "\nmain ")
+		if i < 0 {
+			stanza, tests = tests, ""
+		} else {
+			stanza, tests = tests[:i], tests[i+1:]
+		}
+
+		m := rejectRE.FindStringSubmatch(stanza)
+		if m == nil {
+			bug()
+			fmt.Printf("invalid stanza:\n\t%s\n", indent(stanza))
+			continue
+		}
+		lines := strings.TrimSpace(m[1])
+		reject := false
+		if m[2] != "" {
+			if strings.TrimSpace(m[4]) == "" {
+				reject = true
+			} else {
+				for _, rej := range strings.Fields(m[4]) {
+					if rej == goarch {
+						reject = true
+					}
+				}
+			}
+		}
+		if lines == "" && !reject {
+			continue
+		}
+
+		var buf bytes.Buffer
+		if goarch == "arm" {
+			fmt.Fprintf(&buf, "#define CALL BL\n#define REGISTER (R0)\n")
+		} else {
+			fmt.Fprintf(&buf, "#define REGISTER AX\n")
+		}
+
+		for _, line := range strings.Split(lines, "\n") {
+			line = strings.TrimSpace(line)
+			if line == "" {
+				continue
+			}
+			for _, subline := range strings.Split(line, ";") {
+				subline = strings.TrimSpace(subline)
+				if subline == "" {
+					continue
+				}
+				m := lineRE.FindStringSubmatch(subline)
+				if m == nil {
+					bug()
+					fmt.Printf("invalid function line: %s\n", subline)
+					continue TestCases
+				}
+				name := m[1]
+				size, _ := strconv.Atoi(m[2])
+				if goarch == "amd64" && size%8 == 4 {
+					continue TestCases
+				}
+				nosplit := m[3]
+				body := m[4]
+
+				if nosplit != "" {
+					nosplit = ",7"
+				} else {
+					nosplit = ",0"
+				}
+				body = callRE.ReplaceAllString(body, "CALL ·$1(SB);")
+				body = callindRE.ReplaceAllString(body, "CALL REGISTER;")
+
+				fmt.Fprintf(&buf, "TEXT ·%s(SB)%s,$%d-0\n\t%s\n\tRET\n\n", name, nosplit, size, body)
+			}
+		}
+
+		ioutil.WriteFile(filepath.Join(dir, "asm.s"), buf.Bytes(), 0666)
+
+
+		cmd := exec.Command("go", "build")
+		cmd.Dir = dir
+		output, err := cmd.CombinedOutput()
+		if err == nil {
+			nok++
+			if reject {
+				bug()
+				fmt.Printf("accepted incorrectly:\n\t%s\n", indent(strings.TrimSpace(stanza)))
+			}
+		} else {
+			nfail++
+			if !reject {
+				bug()
+				fmt.Printf("rejected incorrectly:\n\t%s\n", indent(strings.TrimSpace(stanza)))
+				fmt.Printf("\n\tlinker output:\n\t%s\n", indent(string(output)))
+			}
+		}
+	}
+
+	if !bugged && (nok == 0 || nfail == 0) {
+		bug()
+		fmt.Printf("not enough test cases run\n")
+	}
+}
+
+func indent(s string) string {
+	return strings.Replace(s, "\n", "\n\t", -1)
+}
+
+var bugged = false
+
+func bug() {
+	if !bugged {
+		bugged = true
+		fmt.Printf("BUG\n")
+	}
+}