mirror of
https://github.com/golang/go
synced 2024-11-19 13:14:42 -07:00
cmd/link: deduplicate read-only string data
Many read-only strings in Go binaries are substrings of other read-only strings. A common source is the text form of type information, which will include both "struct { X int }" and "*struct { X int }" or "*bytes.Reader" and "func(*bytes.Reader)" in the same binary. Because this character data is referred to by separate string headers, we can skip writing the smaller string and modify the pointer relocation to point to the larger string. This CL does this deduplication in the linker after the reachable set of strings has been determined. This removes 765KB from juju (1.4% without DWARF). Link time goes at tip goes form 4.6s to 6.3s, but note that this CL is part of a series that recently reduced link time from 9.6s. For #6853. Change-Id: Ib2087cf627c9f1e9a1181f9b4c8f81d1a3f42191 Reviewed-on: https://go-review.googlesource.com/19987 Reviewed-by: Ian Lance Taylor <iant@golang.org> Run-TryBot: David Crawshaw <crawshaw@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
This commit is contained in:
parent
69285a8b46
commit
afd835434c
@ -5,7 +5,10 @@
|
|||||||
package ld
|
package ld
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bytes"
|
||||||
"cmd/internal/obj"
|
"cmd/internal/obj"
|
||||||
|
"index/suffixarray"
|
||||||
|
"sort"
|
||||||
"strings"
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -48,12 +51,87 @@ func mergestrings() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Sort the strings, shortest first.
|
||||||
|
//
|
||||||
|
// Ordering by length lets us use the largest matching substring
|
||||||
|
// index when there are multiple matches. This means we will not
|
||||||
|
// use a substring of a string that we will later in the pass
|
||||||
|
// mark as unreachable.
|
||||||
|
sort.Sort(strSymsByLen(strs))
|
||||||
|
|
||||||
|
// Build a suffix array.
|
||||||
|
dataOff := make([]int, len(strs))
|
||||||
|
data := make([]byte, 0, size)
|
||||||
|
for i := range strs {
|
||||||
|
dataOff[i] = len(data)
|
||||||
|
data = append(data, strs[i].P...)
|
||||||
|
}
|
||||||
|
index := suffixarray.New(data)
|
||||||
|
|
||||||
|
// Search for substring replacements.
|
||||||
|
type replacement struct {
|
||||||
|
str *LSym
|
||||||
|
off int
|
||||||
|
}
|
||||||
|
replacements := make(map[*LSym]replacement)
|
||||||
|
for i, s := range strs {
|
||||||
|
results := index.Lookup(s.P, -1)
|
||||||
|
if len(results) == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
var res int
|
||||||
|
for _, result := range results {
|
||||||
|
if result > res {
|
||||||
|
res = result
|
||||||
|
}
|
||||||
|
}
|
||||||
|
var off int
|
||||||
|
x := sort.SearchInts(dataOff, res)
|
||||||
|
if x == len(dataOff) || dataOff[x] > res {
|
||||||
|
x--
|
||||||
|
off = res - dataOff[x]
|
||||||
|
}
|
||||||
|
if x == i {
|
||||||
|
continue // found ourself
|
||||||
|
}
|
||||||
|
if len(s.P) > len(strs[x].P[off:]) {
|
||||||
|
// Do not use substrings that match across strings.
|
||||||
|
// In theory it is possible, but it would
|
||||||
|
// complicate accounting for which future strings
|
||||||
|
// are already used. It doesn't appear to be common
|
||||||
|
// enough to do the extra work.
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if off%Thearch.Minalign != 0 {
|
||||||
|
continue // Cannot relcate to this substring.
|
||||||
|
}
|
||||||
|
replacements[s] = replacement{
|
||||||
|
str: strs[x],
|
||||||
|
off: off,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Put all string data into a single symbol and update the relocations.
|
// Put all string data into a single symbol and update the relocations.
|
||||||
alldata := Linklookup(Ctxt, "go.string.alldata", 0)
|
alldata := Linklookup(Ctxt, "go.string.alldata", 0)
|
||||||
alldata.Type = obj.SGOSTRING
|
alldata.Type = obj.SGOSTRING
|
||||||
alldata.Attr |= AttrReachable
|
alldata.Attr |= AttrReachable
|
||||||
alldata.P = make([]byte, 0, size)
|
alldata.P = make([]byte, 0, size)
|
||||||
for _, str := range strs {
|
for _, str := range strs {
|
||||||
|
str.Attr.Set(AttrReachable, false)
|
||||||
|
if rep, isReplaced := replacements[str]; isReplaced {
|
||||||
|
// As strs is sorted, the replacement string
|
||||||
|
// is always later in the strs range. Shift the
|
||||||
|
// relocations to the replacement string symbol
|
||||||
|
// and process then.
|
||||||
|
relocs := relocsToStrs[rep.str]
|
||||||
|
for _, r := range relocsToStrs[str] {
|
||||||
|
r.Add += int64(rep.off)
|
||||||
|
relocs = append(relocs, r)
|
||||||
|
}
|
||||||
|
relocsToStrs[rep.str] = relocs
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
off := len(alldata.P)
|
off := len(alldata.P)
|
||||||
alldata.P = append(alldata.P, str.P...)
|
alldata.P = append(alldata.P, str.P...)
|
||||||
// Architectures with Minalign > 1 cannot have relocations pointing
|
// Architectures with Minalign > 1 cannot have relocations pointing
|
||||||
@ -62,7 +140,6 @@ func mergestrings() {
|
|||||||
for r := len(alldata.P) % Thearch.Minalign; r > 0; r-- {
|
for r := len(alldata.P) % Thearch.Minalign; r > 0; r-- {
|
||||||
alldata.P = append(alldata.P, 0)
|
alldata.P = append(alldata.P, 0)
|
||||||
}
|
}
|
||||||
str.Attr.Set(AttrReachable, false)
|
|
||||||
for _, r := range relocsToStrs[str] {
|
for _, r := range relocsToStrs[str] {
|
||||||
r.Add += int64(off)
|
r.Add += int64(off)
|
||||||
r.Sym = alldata
|
r.Sym = alldata
|
||||||
@ -70,3 +147,15 @@ func mergestrings() {
|
|||||||
}
|
}
|
||||||
alldata.Size = int64(len(alldata.P))
|
alldata.Size = int64(len(alldata.P))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// strSymsByLen implements sort.Interface. It sorts *LSym by the length of P.
|
||||||
|
type strSymsByLen []*LSym
|
||||||
|
|
||||||
|
func (s strSymsByLen) Len() int { return len(s) }
|
||||||
|
func (s strSymsByLen) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
|
||||||
|
func (s strSymsByLen) Less(i, j int) bool {
|
||||||
|
if len(s[i].P) == len(s[j].P) {
|
||||||
|
return bytes.Compare(s[i].P, s[j].P) == -1
|
||||||
|
}
|
||||||
|
return len(s[i].P) < len(s[j].P)
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user