1
0
mirror of https://github.com/golang/go synced 2024-11-17 08:14:48 -07:00

[dev.link] cmd/link: parallelize asmb on amd64

Introduces a parallel OutBuf implementation, and POC on amd64. Due to
some of the weird behaviors I saw on MacOS (SIGBUS while calling msync),
I will wait for feedback to port to other architectures.

On my mac, sped up Asmb by ~78% for cmd/compile (below). Will likely
have an appreciable speedup on kubelet benchmark.

Asmb                      39.1ms ±11%     8.5ms ±10%     -78.17%  (p=0.000 n=10+9)
TotalTime                  596ms ± 2%     577ms ± 8%      -3.07%  (p=0.034 n=8+10)

Change-Id: Id2a2577c3f4da155d8dccc862897f43b941877ac
Reviewed-on: https://go-review.googlesource.com/c/go/+/223742
Run-TryBot: Jeremy Faller <jeremy@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
Reviewed-by: Than McIntosh <thanm@google.com>
This commit is contained in:
Jeremy Faller 2020-03-17 10:24:40 -04:00
parent 84111acd35
commit c46632a2e0
9 changed files with 245 additions and 59 deletions

View File

@ -38,6 +38,7 @@ import (
"cmd/link/internal/sym"
"debug/elf"
"log"
"sync"
)
func PADDR(x uint32) uint32 {
@ -693,29 +694,32 @@ func asmb(ctxt *ld.Link) {
ld.Asmbelfsetup()
}
var wg sync.WaitGroup
sect := ld.Segtext.Sections[0]
ctxt.Out.SeekSet(int64(sect.Vaddr - ld.Segtext.Vaddr + ld.Segtext.Fileoff))
// 0xCC is INT $3 - breakpoint instruction
ld.CodeblkPad(ctxt, int64(sect.Vaddr), int64(sect.Length), []byte{0xCC})
for _, sect = range ld.Segtext.Sections[1:] {
ctxt.Out.SeekSet(int64(sect.Vaddr - ld.Segtext.Vaddr + ld.Segtext.Fileoff))
ld.Datblk(ctxt, int64(sect.Vaddr), int64(sect.Length))
offset := sect.Vaddr - ld.Segtext.Vaddr + ld.Segtext.Fileoff
f := func(ctxt *ld.Link, out *ld.OutBuf, start, length int64) {
// 0xCC is INT $3 - breakpoint instruction
ld.CodeblkPad(ctxt, out, start, length, []byte{0xCC})
}
ld.WriteParallel(&wg, f, ctxt, offset, sect.Vaddr, sect.Length)
for _, sect := range ld.Segtext.Sections[1:] {
offset := sect.Vaddr - ld.Segtext.Vaddr + ld.Segtext.Fileoff
ld.WriteParallel(&wg, ld.Datblk2, ctxt, offset, sect.Vaddr, sect.Length)
}
if ld.Segrodata.Filelen > 0 {
ctxt.Out.SeekSet(int64(ld.Segrodata.Fileoff))
ld.Datblk(ctxt, int64(ld.Segrodata.Vaddr), int64(ld.Segrodata.Filelen))
ld.WriteParallel(&wg, ld.Datblk2, ctxt, ld.Segrodata.Fileoff, ld.Segrodata.Vaddr, ld.Segrodata.Filelen)
}
if ld.Segrelrodata.Filelen > 0 {
ctxt.Out.SeekSet(int64(ld.Segrelrodata.Fileoff))
ld.Datblk(ctxt, int64(ld.Segrelrodata.Vaddr), int64(ld.Segrelrodata.Filelen))
ld.WriteParallel(&wg, ld.Datblk2, ctxt, ld.Segrelrodata.Fileoff, ld.Segrelrodata.Vaddr, ld.Segrelrodata.Filelen)
}
ctxt.Out.SeekSet(int64(ld.Segdata.Fileoff))
ld.Datblk(ctxt, int64(ld.Segdata.Vaddr), int64(ld.Segdata.Filelen))
ld.WriteParallel(&wg, ld.Datblk2, ctxt, ld.Segdata.Fileoff, ld.Segdata.Vaddr, ld.Segdata.Filelen)
ctxt.Out.SeekSet(int64(ld.Segdwarf.Fileoff))
ld.Dwarfblk(ctxt, int64(ld.Segdwarf.Vaddr), int64(ld.Segdwarf.Filelen))
ld.WriteParallel(&wg, ld.Dwarfblk2, ctxt, ld.Segdwarf.Fileoff, ld.Segdwarf.Vaddr, ld.Segdwarf.Filelen)
wg.Wait()
}
func asmb2(ctxt *ld.Link) {

View File

@ -730,14 +730,15 @@ func dynreloc(ctxt *Link, data *[sym.SXREF][]*sym.Symbol) {
}
func Codeblk(ctxt *Link, addr int64, size int64) {
CodeblkPad(ctxt, addr, size, zeros[:])
CodeblkPad(ctxt, ctxt.Out, addr, size, zeros[:])
}
func CodeblkPad(ctxt *Link, addr int64, size int64, pad []byte) {
func CodeblkPad(ctxt *Link, out *OutBuf, addr int64, size int64, pad []byte) {
if *flagA {
ctxt.Logf("codeblk [%#x,%#x) at offset %#x\n", addr, addr+size, ctxt.Out.Offset())
}
blk(ctxt.Out, ctxt.Textp, addr, size, pad)
writeBlocks(out, ctxt.outSem, ctxt.Textp, addr, size, pad)
/* again for printing */
if !*flagA {
@ -795,9 +796,75 @@ func CodeblkPad(ctxt *Link, addr int64, size int64, pad []byte) {
}
}
func blk(out *OutBuf, syms []*sym.Symbol, addr, size int64, pad []byte) {
const blockSize = 1 << 20 // 1MB chunks written at a time.
// writeBlocks writes a specified chunk of symbols to the output buffer. It
// breaks the write up into ≥blockSize chunks to write them out, and schedules
// as many goroutines as necessary to accomplish this task. This call then
// blocks, waiting on the writes to complete. Note that we use the sem parameter
// to limit the number of concurrent writes taking place.
func writeBlocks(out *OutBuf, sem chan int, syms []*sym.Symbol, addr, size int64, pad []byte) {
for i, s := range syms {
if !s.Attr.SubSymbol() && s.Value >= addr {
if s.Value >= addr && !s.Attr.SubSymbol() {
syms = syms[i:]
break
}
}
var wg sync.WaitGroup
max, lastAddr, written := int64(blockSize), addr+size, int64(0)
for addr < lastAddr {
// Find the last symbol we'd write.
idx := -1
length := int64(0)
for i, s := range syms {
// If the next symbol's size would put us out of bounds on the total length,
// stop looking.
if s.Value+s.Size > lastAddr {
break
}
// We're gonna write this symbol.
idx = i
length = s.Value + s.Size - addr
// If we cross over the max size, we've got enough symbols.
if s.Value+s.Size > addr+max {
break
}
}
// If we didn't find any symbols to write, we're done here.
if idx < 0 {
break
}
// Start the block output operator.
if o, err := out.View(uint64(out.Offset() + written)); err == nil {
sem <- 1
wg.Add(1)
go func(o *OutBuf, syms []*sym.Symbol, addr, size int64, pad []byte) {
writeBlock(o, syms, addr, size, pad)
wg.Done()
<-sem
}(o, syms, addr, length, pad)
} else { // output not mmaped, don't parallelize.
writeBlock(out, syms, addr, length, pad)
}
// Prepare for the next loop.
if idx != -1 {
syms = syms[idx+1:]
}
written += length
addr += length
}
wg.Wait()
}
func writeBlock(out *OutBuf, syms []*sym.Symbol, addr, size int64, pad []byte) {
for i, s := range syms {
if s.Value >= addr && !s.Attr.SubSymbol() {
syms = syms[i:]
break
}
@ -841,13 +908,32 @@ func blk(out *OutBuf, syms []*sym.Symbol, addr, size int64, pad []byte) {
if addr < eaddr {
out.WriteStringPad("", int(eaddr-addr), pad)
}
out.Flush()
}
type writeFn func(*Link, *OutBuf, int64, int64)
// WriteParallel handles scheduling parallel execution of data write functions.
func WriteParallel(wg *sync.WaitGroup, fn writeFn, ctxt *Link, seek, vaddr, length uint64) {
if out, err := ctxt.Out.View(seek); err != nil {
ctxt.Out.SeekSet(int64(seek))
fn(ctxt, ctxt.Out, int64(vaddr), int64(length))
} else {
wg.Add(1)
go func() {
defer wg.Done()
fn(ctxt, out, int64(vaddr), int64(length))
}()
}
}
func Datblk(ctxt *Link, addr int64, size int64) {
writeDatblkToOutBuf(ctxt, ctxt.Out, addr, size)
}
func Datblk2(ctxt *Link, out *OutBuf, addr, size int64) {
writeDatblkToOutBuf(ctxt, out, addr, size)
}
// Used only on Wasm for now.
func DatblkBytes(ctxt *Link, addr int64, size int64) []byte {
buf := bytes.NewBuffer(make([]byte, 0, size))
@ -862,7 +948,7 @@ func writeDatblkToOutBuf(ctxt *Link, out *OutBuf, addr int64, size int64) {
ctxt.Logf("datblk [%#x,%#x) at offset %#x\n", addr, addr+size, ctxt.Out.Offset())
}
blk(out, ctxt.datap, addr, size, zeros[:])
writeBlocks(out, ctxt.outSem, ctxt.datap, addr, size, zeros[:])
/* again for printing */
if !*flagA {
@ -931,12 +1017,20 @@ func writeDatblkToOutBuf(ctxt *Link, out *OutBuf, addr int64, size int64) {
ctxt.Logf("\t%.8x|\n", uint(eaddr))
}
func Dwarfblk2(ctxt *Link, out *OutBuf, addr int64, size int64) {
if *flagA {
ctxt.Logf("dwarfblk [%#x,%#x) at offset %#x\n", addr, addr+size, ctxt.Out.Offset())
}
writeBlocks(out, ctxt.outSem, dwarfp, addr, size, zeros[:])
}
func Dwarfblk(ctxt *Link, addr int64, size int64) {
if *flagA {
ctxt.Logf("dwarfblk [%#x,%#x) at offset %#x\n", addr, addr+size, ctxt.Out.Offset())
}
blk(ctxt.Out, dwarfp, addr, size, zeros[:])
writeBlock(ctxt.Out, dwarfp, addr, size, zeros[:])
}
var zeros [512]byte

View File

@ -31,7 +31,6 @@
package ld
import (
"bufio"
"bytes"
"cmd/internal/bio"
"cmd/internal/obj"
@ -385,14 +384,11 @@ func libinit(ctxt *Link) {
Lflag(ctxt, filepath.Join(objabi.GOROOT, "pkg", fmt.Sprintf("%s_%s%s%s", objabi.GOOS, objabi.GOARCH, suffixsep, suffix)))
mayberemoveoutfile()
f, err := os.OpenFile(*flagOutfile, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0775)
if err != nil {
if err := ctxt.Out.Open(*flagOutfile); err != nil {
Exitf("cannot create %s: %v", *flagOutfile, err)
}
ctxt.Out.w = bufio.NewWriter(f)
ctxt.Out.f = f
if *flagEntrySymbol == "" {
switch ctxt.BuildMode {
case BuildModeCShared, BuildModeCArchive:
@ -1203,25 +1199,21 @@ func hostlinksetup(ctxt *Link) {
*flagTmpdir = dir
ownTmpDir = true
AtExit(func() {
ctxt.Out.f.Close()
ctxt.Out.Close()
os.RemoveAll(*flagTmpdir)
})
}
// change our output to temporary object file
ctxt.Out.f.Close()
if err := ctxt.Out.Close(); err != nil {
Exitf("error closing output file")
}
mayberemoveoutfile()
p := filepath.Join(*flagTmpdir, "go.o")
var err error
f, err := os.OpenFile(p, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0775)
if err != nil {
if err := ctxt.Out.Open(p); err != nil {
Exitf("cannot create %s: %v", p, err)
}
ctxt.Out.w = bufio.NewWriter(f)
ctxt.Out.f = f
ctxt.Out.off = 0
}
// hostobjCopy creates a copy of the object files in hostobj in a
@ -1305,11 +1297,9 @@ func (ctxt *Link) archive() {
// Force the buffer to flush here so that external
// tools will see a complete file.
ctxt.Out.Flush()
if err := ctxt.Out.f.Close(); err != nil {
Exitf("close: %v", err)
if err := ctxt.Out.Close(); err != nil {
Exitf("error closing %v", *flagOutfile)
}
ctxt.Out.f = nil
argv := []string{*flagExtar, "-q", "-c", "-s"}
if ctxt.HeadType == objabi.Haix {

View File

@ -53,7 +53,9 @@ type Link struct {
Target
ErrorReporter
ArchSyms
Out *OutBuf
outSem chan int // limits the number of output writers
Out *OutBuf
Syms *sym.Symbols

View File

@ -9,6 +9,7 @@ import (
"cmd/internal/sys"
"cmd/link/internal/sym"
"encoding/binary"
"errors"
"log"
"os"
)
@ -23,19 +24,100 @@ import (
// Second, it provides a very cheap offset counter that doesn't require
// any system calls to read the value.
//
// It also mmaps the output file (if available). The intended usage is:
// Third, it also mmaps the output file (if available). The intended usage is:
// - Mmap the output file
// - Write the content
// - possibly apply any edits in the output buffer
// - Munmap the output file
// - possibly write more content to the file, which will not be edited later.
//
// And finally, it provides a mechanism by which you can multithread the
// writing of output files. This mechanism is accomplished by copying a OutBuf,
// and using it in the thread/goroutine.
//
// Parallel OutBuf is intended to be used like:
//
// func write(out *OutBuf) {
// var wg sync.WaitGroup
// for i := 0; i < 10; i++ {
// wg.Add(1)
// view, err := out.View(start[i])
// if err != nil {
// // handle output
// continue
// }
// go func(out *OutBuf, i int) {
// // do output
// wg.Done()
// }(view, i)
// }
// wg.Wait()
// }
type OutBuf struct {
arch *sys.Arch
off int64
w *bufio.Writer
buf []byte // backing store of mmap'd output file
f *os.File
encbuf [8]byte // temp buffer used by WriteN methods
arch *sys.Arch
off int64
w *bufio.Writer
buf []byte // backing store of mmap'd output file
name string
f *os.File
encbuf [8]byte // temp buffer used by WriteN methods
isView bool // true if created from View()
start, length uint64 // start and length mmaped data.
}
func (out *OutBuf) Open(name string) error {
if out.f != nil {
return errors.New("cannont open more than one file")
}
f, err := os.OpenFile(name, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0775)
if err != nil {
return err
}
out.off = 0
out.name = name
out.w = bufio.NewWriter(f)
out.f = f
return nil
}
func NewOutBuf(arch *sys.Arch) *OutBuf {
return &OutBuf{
arch: arch,
}
}
var viewError = errors.New("output not mmapped")
func (out *OutBuf) View(start uint64) (*OutBuf, error) {
if out.buf == nil {
return nil, viewError
}
return &OutBuf{
arch: out.arch,
name: out.name,
buf: out.buf,
off: int64(start),
start: start,
length: out.length,
isView: true,
}, nil
}
var viewCloseError = errors.New("cannot Close OutBuf from View")
func (out *OutBuf) Close() error {
if out.isView {
return viewCloseError
}
out.Flush()
if out.f == nil {
return nil
}
if err := out.f.Close(); err != nil {
return err
}
out.f = nil
return nil
}
func (out *OutBuf) SeekSet(p int64) {
@ -45,7 +127,7 @@ func (out *OutBuf) SeekSet(p int64) {
if out.buf == nil {
out.Flush()
if _, err := out.f.Seek(p, 0); err != nil {
Exitf("seeking to %d in %s: %v", p, out.f.Name(), err)
Exitf("seeking to %d in %s: %v", p, out.name, err)
}
}
out.off = p
@ -154,13 +236,16 @@ func (out *OutBuf) WriteStringPad(s string, n int, pad []byte) {
// edit to the symbol content.
// If the output file is not Mmap'd, just writes the content.
func (out *OutBuf) WriteSym(s *sym.Symbol) {
// NB: We inline the Write call for speediness.
if out.buf != nil {
start := out.off
out.Write(s.P)
n := copy(out.buf[out.off:], s.P)
out.off += int64(n)
s.P = out.buf[start:out.off]
s.Attr.Set(sym.AttrReadOnly, false)
} else {
out.Write(s.P)
n, _ := out.w.Write(s.P)
out.off += int64(n)
}
}
@ -168,10 +253,11 @@ func (out *OutBuf) Flush() {
var err error
if out.buf != nil {
err = out.Msync()
} else {
}
if out.w != nil {
err = out.w.Flush()
}
if err != nil {
Exitf("flushing %s: %v", out.f.Name(), err)
Exitf("flushing %s: %v", out.name, err)
}
}

View File

@ -16,11 +16,15 @@ func (out *OutBuf) Mmap(filesize uint64) error {
if err != nil {
Exitf("resize output file failed: %v", err)
}
out.length = filesize
out.buf, err = syscall.Mmap(int(out.f.Fd()), 0, int(filesize), syscall.PROT_READ|syscall.PROT_WRITE, syscall.MAP_SHARED|syscall.MAP_FILE)
return err
}
func (out *OutBuf) Munmap() {
if out.buf == nil {
return
}
err := out.Msync()
if err != nil {
Exitf("msync output file failed: %v", err)
@ -34,9 +38,12 @@ func (out *OutBuf) Munmap() {
}
func (out *OutBuf) Msync() error {
if out.buf == nil || out.length <= 0 {
return nil
}
// TODO: netbsd supports mmap and msync, but the syscall package doesn't define MSYNC.
// It is excluded from the build tag for now.
_, _, errno := syscall.Syscall(syscall.SYS_MSYNC, uintptr(unsafe.Pointer(&out.buf[0])), uintptr(len(out.buf)), syscall.MS_SYNC)
_, _, errno := syscall.Syscall(syscall.SYS_MSYNC, uintptr(unsafe.Pointer(&out.buf[0])), uintptr(out.length), syscall.MS_SYNC)
if errno != 0 {
return errno
}

View File

@ -36,6 +36,7 @@ func (out *OutBuf) Munmap() {
return
}
err := syscall.UnmapViewOfFile(uintptr(unsafe.Pointer(&out.buf[0])))
out.buf = nil
if err != nil {
Exitf("UnmapViewOfFile failed: %v", err)
}

View File

@ -36,13 +36,15 @@ import (
"cmd/internal/sys"
"cmd/link/internal/sym"
"log"
"runtime"
)
func linknew(arch *sys.Arch) *Link {
ctxt := &Link{
Target: Target{Arch: arch},
Syms: sym.NewSymbols(),
Out: &OutBuf{arch: arch},
outSem: make(chan int, 2*runtime.GOMAXPROCS(0)),
Out: NewOutBuf(arch),
LibraryByPkg: make(map[string]*sym.Library),
}
@ -51,8 +53,8 @@ func linknew(arch *sys.Arch) *Link {
}
AtExit(func() {
if nerrors > 0 && ctxt.Out.f != nil {
ctxt.Out.f.Close()
if nerrors > 0 {
ctxt.Out.Close()
mayberemoveoutfile()
}
})

View File

@ -626,7 +626,7 @@ func asmb(ctxt *ld.Link) {
sect := ld.Segtext.Sections[0]
ctxt.Out.SeekSet(int64(sect.Vaddr - ld.Segtext.Vaddr + ld.Segtext.Fileoff))
// 0xCC is INT $3 - breakpoint instruction
ld.CodeblkPad(ctxt, int64(sect.Vaddr), int64(sect.Length), []byte{0xCC})
ld.CodeblkPad(ctxt, ctxt.Out, int64(sect.Vaddr), int64(sect.Length), []byte{0xCC})
for _, sect = range ld.Segtext.Sections[1:] {
ctxt.Out.SeekSet(int64(sect.Vaddr - ld.Segtext.Vaddr + ld.Segtext.Fileoff))
ld.Datblk(ctxt, int64(sect.Vaddr), int64(sect.Length))