1
0
mirror of https://github.com/golang/go synced 2024-11-19 12:14:42 -07:00

cmd/link: deduplicate read-only string data

Many read-only strings in Go binaries are substrings of other read-only
strings. A common source is the text form of type information, which
will include both "struct { X int }" and "*struct { X int }" or
"*bytes.Reader" and "func(*bytes.Reader)" in the same binary.

Because this character data is referred to by separate string headers,
we can skip writing the smaller string and modify the pointer
relocation to point to the larger string. This CL does this
deduplication in the linker after the reachable set of strings has
been determined.

This removes 765KB from juju (1.4% without DWARF).

Link time goes at tip goes form 4.6s to 6.3s, but note that this CL
is part of a series that recently reduced link time from 9.6s.

For #6853.

Change-Id: Ib2087cf627c9f1e9a1181f9b4c8f81d1a3f42191
Reviewed-on: https://go-review.googlesource.com/19987
Reviewed-by: Ian Lance Taylor <iant@golang.org>
Run-TryBot: David Crawshaw <crawshaw@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
This commit is contained in:
David Crawshaw 2016-02-27 08:33:14 -09:00
parent 69285a8b46
commit afd835434c

View File

@ -5,7 +5,10 @@
package ld
import (
"bytes"
"cmd/internal/obj"
"index/suffixarray"
"sort"
"strings"
)
@ -48,12 +51,87 @@ func mergestrings() {
}
}
// Sort the strings, shortest first.
//
// Ordering by length lets us use the largest matching substring
// index when there are multiple matches. This means we will not
// use a substring of a string that we will later in the pass
// mark as unreachable.
sort.Sort(strSymsByLen(strs))
// Build a suffix array.
dataOff := make([]int, len(strs))
data := make([]byte, 0, size)
for i := range strs {
dataOff[i] = len(data)
data = append(data, strs[i].P...)
}
index := suffixarray.New(data)
// Search for substring replacements.
type replacement struct {
str *LSym
off int
}
replacements := make(map[*LSym]replacement)
for i, s := range strs {
results := index.Lookup(s.P, -1)
if len(results) == 0 {
continue
}
var res int
for _, result := range results {
if result > res {
res = result
}
}
var off int
x := sort.SearchInts(dataOff, res)
if x == len(dataOff) || dataOff[x] > res {
x--
off = res - dataOff[x]
}
if x == i {
continue // found ourself
}
if len(s.P) > len(strs[x].P[off:]) {
// Do not use substrings that match across strings.
// In theory it is possible, but it would
// complicate accounting for which future strings
// are already used. It doesn't appear to be common
// enough to do the extra work.
continue
}
if off%Thearch.Minalign != 0 {
continue // Cannot relcate to this substring.
}
replacements[s] = replacement{
str: strs[x],
off: off,
}
}
// Put all string data into a single symbol and update the relocations.
alldata := Linklookup(Ctxt, "go.string.alldata", 0)
alldata.Type = obj.SGOSTRING
alldata.Attr |= AttrReachable
alldata.P = make([]byte, 0, size)
for _, str := range strs {
str.Attr.Set(AttrReachable, false)
if rep, isReplaced := replacements[str]; isReplaced {
// As strs is sorted, the replacement string
// is always later in the strs range. Shift the
// relocations to the replacement string symbol
// and process then.
relocs := relocsToStrs[rep.str]
for _, r := range relocsToStrs[str] {
r.Add += int64(rep.off)
relocs = append(relocs, r)
}
relocsToStrs[rep.str] = relocs
continue
}
off := len(alldata.P)
alldata.P = append(alldata.P, str.P...)
// Architectures with Minalign > 1 cannot have relocations pointing
@ -62,7 +140,6 @@ func mergestrings() {
for r := len(alldata.P) % Thearch.Minalign; r > 0; r-- {
alldata.P = append(alldata.P, 0)
}
str.Attr.Set(AttrReachable, false)
for _, r := range relocsToStrs[str] {
r.Add += int64(off)
r.Sym = alldata
@ -70,3 +147,15 @@ func mergestrings() {
}
alldata.Size = int64(len(alldata.P))
}
// strSymsByLen implements sort.Interface. It sorts *LSym by the length of P.
type strSymsByLen []*LSym
func (s strSymsByLen) Len() int { return len(s) }
func (s strSymsByLen) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
func (s strSymsByLen) Less(i, j int) bool {
if len(s[i].P) == len(s[j].P) {
return bytes.Compare(s[i].P, s[j].P) == -1
}
return len(s[i].P) < len(s[j].P)
}