mirror of
https://github.com/golang/go
synced 2024-11-18 09:04:49 -07:00
cover: significantly improve the speed of cover.ParseProfiles
ParseProfiles currently uses a regex to parse each line. This is not very fast, and can lead to ParseProfiles being excessively slow on certain pathological inputs. This change substantially improves the performance by parsing manually instead. On an input of about 3 GB of data containing about 36 million lines, the time spent in ParseProfiles drops from 72 seconds to 11 seconds, with actual string parsing time dropping from 61 seconds to 2 seconds. Since this change completely changes the parsing, it also adds some tests for ParseProfiles to help ensure the new parsing is correct. A benchmark for parseLine is also included. Here is a comparison of the old regex implementation versus the new manual one: name old time/op new time/op delta ParseLine-12 2.43µs ± 2% 0.05µs ± 8% -97.98% (p=0.000 n=10+9) name old speed new speed delta ParseLine-12 42.5MB/s ± 2% 2103.2MB/s ± 7% +4853.14% (p=0.000 n=10+9) Fixes golang/go#32211 Change-Id: If8f91ecbda776c08243de4e423de4eea55f0082b Reviewed-on: https://go-review.googlesource.com/c/tools/+/179377 Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
This commit is contained in:
parent
be03d4f470
commit
7defa796fe
@ -8,10 +8,10 @@ package cover // import "golang.org/x/tools/cover"
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
"os"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
@ -64,11 +64,10 @@ func ParseProfiles(fileName string) ([]*Profile, error) {
|
||||
mode = line[len(p):]
|
||||
continue
|
||||
}
|
||||
m := lineRe.FindStringSubmatch(line)
|
||||
if m == nil {
|
||||
return nil, fmt.Errorf("line %q doesn't match expected format: %v", line, lineRe)
|
||||
fn, b, err := parseLine(line)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("line %q doesn't match expected format: %v", line, err)
|
||||
}
|
||||
fn := m[1]
|
||||
p := files[fn]
|
||||
if p == nil {
|
||||
p = &Profile{
|
||||
@ -77,14 +76,7 @@ func ParseProfiles(fileName string) ([]*Profile, error) {
|
||||
}
|
||||
files[fn] = p
|
||||
}
|
||||
p.Blocks = append(p.Blocks, ProfileBlock{
|
||||
StartLine: toInt(m[2]),
|
||||
StartCol: toInt(m[3]),
|
||||
EndLine: toInt(m[4]),
|
||||
EndCol: toInt(m[5]),
|
||||
NumStmt: toInt(m[6]),
|
||||
Count: toInt(m[7]),
|
||||
})
|
||||
p.Blocks = append(p.Blocks, b)
|
||||
}
|
||||
if err := s.Err(); err != nil {
|
||||
return nil, err
|
||||
@ -124,6 +116,64 @@ func ParseProfiles(fileName string) ([]*Profile, error) {
|
||||
return profiles, nil
|
||||
}
|
||||
|
||||
// parseLine parses a line from a coverage file.
|
||||
// It is equivalent to the regex
|
||||
// ^(.+):([0-9]+)\.([0-9]+),([0-9]+)\.([0-9]+) ([0-9]+) ([0-9]+)$
|
||||
//
|
||||
// However, it is much faster: https://golang.org/cl/179377
|
||||
func parseLine(l string) (fileName string, block ProfileBlock, err error) {
|
||||
end := len(l)
|
||||
|
||||
b := ProfileBlock{}
|
||||
b.Count, end, err = seekBack(l, ' ', end, "Count")
|
||||
if err != nil {
|
||||
return "", b, err
|
||||
}
|
||||
b.NumStmt, end, err = seekBack(l, ' ', end, "NumStmt")
|
||||
if err != nil {
|
||||
return "", b, err
|
||||
}
|
||||
b.EndCol, end, err = seekBack(l, '.', end, "EndCol")
|
||||
if err != nil {
|
||||
return "", b, err
|
||||
}
|
||||
b.EndLine, end, err = seekBack(l, ',', end, "EndLine")
|
||||
if err != nil {
|
||||
return "", b, err
|
||||
}
|
||||
b.StartCol, end, err = seekBack(l, '.', end, "StartCol")
|
||||
if err != nil {
|
||||
return "", b, err
|
||||
}
|
||||
b.StartLine, end, err = seekBack(l, ':', end, "StartLine")
|
||||
if err != nil {
|
||||
return "", b, err
|
||||
}
|
||||
fn := l[0:end]
|
||||
if fn == "" {
|
||||
return "", b, errors.New("a FileName cannot be blank")
|
||||
}
|
||||
return fn, b, nil
|
||||
}
|
||||
|
||||
// seekBack searches backwards from end to find sep in l, then returns the
|
||||
// value between sep and end as an integer.
|
||||
// If seekBack fails, the returned error will reference what.
|
||||
func seekBack(l string, sep byte, end int, what string) (value int, nextSep int, err error) {
|
||||
// Since we're seeking backwards and we know only ASCII is legal for these values,
|
||||
// we can ignore the possibility of non-ASCII characters.
|
||||
for start := end - 1; start >= 0; start-- {
|
||||
if l[start] == sep {
|
||||
i, err := strconv.Atoi(l[start+1 : end])
|
||||
if err != nil {
|
||||
return 0, 0, fmt.Errorf("couldn't parse %q: %v", what, err)
|
||||
}
|
||||
return i, start, nil
|
||||
}
|
||||
}
|
||||
return 0, 0, fmt.Errorf("couldn't find a %s before %s", string(sep), what)
|
||||
}
|
||||
|
||||
type blocksByStart []ProfileBlock
|
||||
|
||||
func (b blocksByStart) Len() int { return len(b) }
|
||||
@ -133,16 +183,6 @@ func (b blocksByStart) Less(i, j int) bool {
|
||||
return bi.StartLine < bj.StartLine || bi.StartLine == bj.StartLine && bi.StartCol < bj.StartCol
|
||||
}
|
||||
|
||||
var lineRe = regexp.MustCompile(`^(.+):([0-9]+).([0-9]+),([0-9]+).([0-9]+) ([0-9]+) ([0-9]+)$`)
|
||||
|
||||
func toInt(s string) int {
|
||||
i, err := strconv.Atoi(s)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return i
|
||||
}
|
||||
|
||||
// Boundary represents the position in a source file of the beginning or end of a
|
||||
// block as reported by the coverage profile. In HTML mode, it will correspond to
|
||||
// the opening or closing of a <span> tag and will be used to colorize the source
|
||||
|
255
cover/profile_test.go
Normal file
255
cover/profile_test.go
Normal file
@ -0,0 +1,255 @@
|
||||
// Copyright 2019 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package cover
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"reflect"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestParseProfiles(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input string
|
||||
output []*Profile
|
||||
expectErr bool
|
||||
}{
|
||||
{
|
||||
name: "parsing an empty file produces empty output",
|
||||
input: `mode: set`,
|
||||
output: []*Profile{},
|
||||
},
|
||||
{
|
||||
name: "simple valid file produces expected output",
|
||||
input: `mode: set
|
||||
some/fancy/path:42.69,44.16 2 1`,
|
||||
output: []*Profile{
|
||||
{
|
||||
FileName: "some/fancy/path",
|
||||
Mode: "set",
|
||||
Blocks: []ProfileBlock{
|
||||
{
|
||||
StartLine: 42, StartCol: 69,
|
||||
EndLine: 44, EndCol: 16,
|
||||
NumStmt: 2, Count: 1,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "file with syntax characters in path produces expected output",
|
||||
input: `mode: set
|
||||
some fancy:path/some,file.go:42.69,44.16 2 1`,
|
||||
output: []*Profile{
|
||||
{
|
||||
FileName: "some fancy:path/some,file.go",
|
||||
Mode: "set",
|
||||
Blocks: []ProfileBlock{
|
||||
{
|
||||
StartLine: 42, StartCol: 69,
|
||||
EndLine: 44, EndCol: 16,
|
||||
NumStmt: 2, Count: 1,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "file with multiple blocks in one file produces expected output",
|
||||
input: `mode: set
|
||||
some/fancy/path:42.69,44.16 2 1
|
||||
some/fancy/path:44.16,46.3 1 0`,
|
||||
output: []*Profile{
|
||||
{
|
||||
FileName: "some/fancy/path",
|
||||
Mode: "set",
|
||||
Blocks: []ProfileBlock{
|
||||
{
|
||||
StartLine: 42, StartCol: 69,
|
||||
EndLine: 44, EndCol: 16,
|
||||
NumStmt: 2, Count: 1,
|
||||
},
|
||||
{
|
||||
StartLine: 44, StartCol: 16,
|
||||
EndLine: 46, EndCol: 3,
|
||||
NumStmt: 1, Count: 0,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "file with multiple files produces expected output",
|
||||
input: `mode: set
|
||||
another/fancy/path:44.16,46.3 1 0
|
||||
some/fancy/path:42.69,44.16 2 1`,
|
||||
output: []*Profile{
|
||||
{
|
||||
FileName: "another/fancy/path",
|
||||
Mode: "set",
|
||||
Blocks: []ProfileBlock{
|
||||
{
|
||||
StartLine: 44, StartCol: 16,
|
||||
EndLine: 46, EndCol: 3,
|
||||
NumStmt: 1, Count: 0,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
FileName: "some/fancy/path",
|
||||
Mode: "set",
|
||||
Blocks: []ProfileBlock{
|
||||
{
|
||||
StartLine: 42, StartCol: 69,
|
||||
EndLine: 44, EndCol: 16,
|
||||
NumStmt: 2, Count: 1,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "intertwined files are merged correctly",
|
||||
input: `mode: set
|
||||
some/fancy/path:42.69,44.16 2 1
|
||||
another/fancy/path:47.2,47.13 1 1
|
||||
some/fancy/path:44.16,46.3 1 0`,
|
||||
output: []*Profile{
|
||||
{
|
||||
FileName: "another/fancy/path",
|
||||
Mode: "set",
|
||||
Blocks: []ProfileBlock{
|
||||
{
|
||||
StartLine: 47, StartCol: 2,
|
||||
EndLine: 47, EndCol: 13,
|
||||
NumStmt: 1, Count: 1,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
FileName: "some/fancy/path",
|
||||
Mode: "set",
|
||||
Blocks: []ProfileBlock{
|
||||
{
|
||||
StartLine: 42, StartCol: 69,
|
||||
EndLine: 44, EndCol: 16,
|
||||
NumStmt: 2, Count: 1,
|
||||
},
|
||||
{
|
||||
StartLine: 44, StartCol: 16,
|
||||
EndLine: 46, EndCol: 3,
|
||||
NumStmt: 1, Count: 0,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "duplicate blocks are merged correctly",
|
||||
input: `mode: count
|
||||
some/fancy/path:42.69,44.16 2 4
|
||||
some/fancy/path:42.69,44.16 2 3`,
|
||||
output: []*Profile{
|
||||
{
|
||||
FileName: "some/fancy/path",
|
||||
Mode: "count",
|
||||
Blocks: []ProfileBlock{
|
||||
{
|
||||
StartLine: 42, StartCol: 69,
|
||||
EndLine: 44, EndCol: 16,
|
||||
NumStmt: 2, Count: 7,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "an invalid mode line is an error",
|
||||
input: `mode:count`,
|
||||
expectErr: true,
|
||||
},
|
||||
{
|
||||
name: "a missing field is an error",
|
||||
input: `mode: count
|
||||
some/fancy/path:42.69,44.16 2`,
|
||||
expectErr: true,
|
||||
},
|
||||
{
|
||||
name: "a missing path field is an error",
|
||||
input: `mode: count
|
||||
42.69,44.16 2 3`,
|
||||
expectErr: true,
|
||||
},
|
||||
{
|
||||
name: "a non-numeric count is an error",
|
||||
input: `mode: count
|
||||
42.69,44.16 2 nope`,
|
||||
expectErr: true,
|
||||
},
|
||||
{
|
||||
name: "an empty path is an error",
|
||||
input: `mode: count
|
||||
:42.69,44.16 2 3`,
|
||||
expectErr: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
f, err := ioutil.TempFile("", "")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to create a temp file: %v.", err)
|
||||
}
|
||||
defer func() {
|
||||
f.Close()
|
||||
os.Remove(f.Name())
|
||||
}()
|
||||
n, err := f.WriteString(tc.input)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to write to temp file: %v", err)
|
||||
}
|
||||
if n < len(tc.input) {
|
||||
t.Fatalf("Didn't write enough bytes to temp file (wrote %d, expected %d).", n, len(tc.input))
|
||||
}
|
||||
if err := f.Sync(); err != nil {
|
||||
t.Fatalf("Failed to sync temp file: %v", err)
|
||||
}
|
||||
|
||||
result, err := ParseProfiles(f.Name())
|
||||
if err != nil {
|
||||
if !tc.expectErr {
|
||||
t.Errorf("Unexpected error: %v", err)
|
||||
}
|
||||
return
|
||||
}
|
||||
if tc.expectErr {
|
||||
t.Errorf("Expected an error, but got value %q", stringifyProfileArray(result))
|
||||
}
|
||||
if !reflect.DeepEqual(result, tc.output) {
|
||||
t.Errorf("Mismatched results.\nExpected: %s\nActual: %s", stringifyProfileArray(tc.output), stringifyProfileArray(result))
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func stringifyProfileArray(profiles []*Profile) string {
|
||||
deref := make([]Profile, 0, len(profiles))
|
||||
for _, p := range profiles {
|
||||
deref = append(deref, *p)
|
||||
}
|
||||
return fmt.Sprintf("%#v", deref)
|
||||
}
|
||||
|
||||
func BenchmarkParseLine(b *testing.B) {
|
||||
const line = "k8s.io/kubernetes/cmd/kube-controller-manager/app/options/ttlafterfinishedcontroller.go:31.73,32.14 1 1"
|
||||
b.SetBytes(int64(len(line)))
|
||||
for n := 0; n < b.N; n++ {
|
||||
parseLine(line)
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user