1
0
mirror of https://github.com/golang/go synced 2024-11-22 15:24:42 -07:00

internal/trace: add task analysis for v2 traces

For v1 traces, cmd/trace contains code for analyzing tasks separately
from the goroutine analysis code present in internal/trace. As I started
to look into porting that code to v2 traces, I noticed that it wouldn't
be too hard to just generalize the existing v2 goroutine summary code to
generate exactly the same information.

This change does exactly that.

For #60773.
For #63960.

Change-Id: I0cdd9bf9ba11fb292a9ffc37dbf18c2a6a2483b8
Reviewed-on: https://go-review.googlesource.com/c/go/+/542076
Auto-Submit: Michael Knyszek <mknyszek@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Michael Pratt <mpratt@google.com>
This commit is contained in:
Michael Anthony Knyszek 2023-11-13 21:06:40 +00:00 committed by Gopher Robot
parent 5dde69fd40
commit 816ee0be38
6 changed files with 1554 additions and 37 deletions

View File

@ -75,8 +75,8 @@ func Main(traceFile, httpAddr, pprof string, debug int) error {
mux.Handle("/static/", traceviewer.StaticHandler())
// Goroutines handlers.
mux.HandleFunc("/goroutines", GoroutinesHandlerFunc(parsed.gSummaries))
mux.HandleFunc("/goroutine", GoroutineHandler(parsed.gSummaries))
mux.HandleFunc("/goroutines", GoroutinesHandlerFunc(parsed.summary.Goroutines))
mux.HandleFunc("/goroutine", GoroutineHandler(parsed.summary.Goroutines))
// MMU handler.
mux.HandleFunc("/mmu", traceviewer.MMUHandlerFunc(ranges, mutatorUtil))
@ -98,8 +98,8 @@ func Main(traceFile, httpAddr, pprof string, debug int) error {
}
type parsedTrace struct {
events []tracev2.Event
gSummaries map[tracev2.GoID]*trace.GoroutineSummary
events []tracev2.Event
summary *trace.Summary
}
func parseTrace(tr io.Reader) (*parsedTrace, error) {
@ -107,7 +107,7 @@ func parseTrace(tr io.Reader) (*parsedTrace, error) {
if err != nil {
return nil, fmt.Errorf("failed to create trace reader: %w", err)
}
s := trace.NewGoroutineSummarizer()
s := trace.NewSummarizer()
t := new(parsedTrace)
for {
ev, err := r.ReadEvent()
@ -119,7 +119,7 @@ func parseTrace(tr io.Reader) (*parsedTrace, error) {
t.events = append(t.events, ev)
s.Event(&t.events[len(t.events)-1])
}
t.gSummaries = s.Finalize()
t.summary = s.Finalize()
return t, nil
}

View File

@ -57,7 +57,7 @@ func pprofMatchingGoroutines(id string, t *parsedTrace) (map[tracev2.GoID][]inte
return nil, fmt.Errorf("invalid goroutine type: %v", id)
}
res := make(map[tracev2.GoID][]interval)
for _, g := range t.gSummaries {
for _, g := range t.summary.Goroutines {
if g.PC != pc {
continue
}
@ -81,7 +81,7 @@ func pprofMatchingRegions(filter *regionFilter, t *parsedTrace) (map[tracev2.GoI
}
gToIntervals := make(map[tracev2.GoID][]interval)
for _, g := range t.gSummaries {
for _, g := range t.summary.Goroutines {
for _, r := range g.Regions {
if !filter.match(t, r) {
continue

View File

@ -11,6 +11,12 @@ import (
"time"
)
// Summary is the analysis result produced by the summarizer.
type Summary struct {
Goroutines map[tracev2.GoID]*GoroutineSummary
Tasks map[tracev2.TaskID]*UserTaskSummary
}
// GoroutineSummary contains statistics and execution details of a single goroutine.
// (For v2 traces.)
type GoroutineSummary struct {
@ -35,6 +41,29 @@ type GoroutineSummary struct {
*goroutineSummary
}
// UserTaskSummary represents a task in the trace.
type UserTaskSummary struct {
ID tracev2.TaskID
Name string
Parent *UserTaskSummary // nil if the parent is unknown.
Children []*UserTaskSummary
// Task begin event. An EventTaskBegin event or nil.
Start *tracev2.Event
// End end event. Normally EventTaskEnd event or nil,
End *tracev2.Event
// Logs is a list of tracev2.EventLog events associated with the task.
Logs []*tracev2.Event
// List of regions in the task, sorted based on the start time.
Regions []*UserRegionSummary
// Goroutines is the set of goroutines associated with this task.
Goroutines map[tracev2.GoID]*GoroutineSummary
}
// UserRegionSummary represents a region and goroutine execution stats
// while the region was active. (For v2 traces.)
type UserRegionSummary struct {
@ -209,11 +238,14 @@ type goroutineSummary struct {
activeRegions []*UserRegionSummary // stack of active regions
}
// GoroutineSummarizer constructs per-goroutine time statistics for v2 traces.
type GoroutineSummarizer struct {
// Summarizer constructs per-goroutine time statistics for v2 traces.
type Summarizer struct {
// gs contains the map of goroutine summaries we're building up to return to the caller.
gs map[tracev2.GoID]*GoroutineSummary
// tasks contains the map of task summaries we're building up to return to the caller.
tasks map[tracev2.TaskID]*UserTaskSummary
// syscallingP and syscallingG represent a binding between a P and G in a syscall.
// Used to correctly identify and clean up after syscalls (blocking or otherwise).
syscallingP map[tracev2.ProcID]tracev2.GoID
@ -229,10 +261,11 @@ type GoroutineSummarizer struct {
syncTs tracev2.Time // timestamp of the last sync event processed (or the first timestamp in the trace).
}
// NewGoroutineSummarizer creates a new struct to build goroutine stats from a trace.
func NewGoroutineSummarizer() *GoroutineSummarizer {
return &GoroutineSummarizer{
// NewSummarizer creates a new struct to build goroutine stats from a trace.
func NewSummarizer() *Summarizer {
return &Summarizer{
gs: make(map[tracev2.GoID]*GoroutineSummary),
tasks: make(map[tracev2.TaskID]*UserTaskSummary),
syscallingP: make(map[tracev2.ProcID]tracev2.GoID),
syscallingG: make(map[tracev2.GoID]tracev2.ProcID),
rangesP: make(map[rangeP]tracev2.GoID),
@ -245,7 +278,7 @@ type rangeP struct {
}
// Event feeds a single event into the stats summarizer.
func (s *GoroutineSummarizer) Event(ev *tracev2.Event) {
func (s *Summarizer) Event(ev *tracev2.Event) {
if s.syncTs == 0 {
s.syncTs = ev.Time()
}
@ -460,12 +493,17 @@ func (s *GoroutineSummarizer) Event(ev *tracev2.Event) {
case tracev2.EventRegionBegin:
g := s.gs[ev.Goroutine()]
r := ev.Region()
g.activeRegions = append(g.activeRegions, &UserRegionSummary{
region := &UserRegionSummary{
Name: r.Type,
TaskID: r.Task,
Start: ev,
GoroutineExecStats: g.snapshotStat(ev.Time()),
})
}
g.activeRegions = append(g.activeRegions, region)
// Associate the region and current goroutine to the task.
task := s.getOrAddTask(r.Task)
task.Regions = append(task.Regions, region)
task.Goroutines[g.ID] = g
case tracev2.EventRegionEnd:
g := s.gs[ev.Goroutine()]
r := ev.Region()
@ -476,19 +514,61 @@ func (s *GoroutineSummarizer) Event(ev *tracev2.Event) {
sd = regionStk[n-1]
regionStk = regionStk[:n-1]
g.activeRegions = regionStk
// N.B. No need to add the region to a task; the EventRegionBegin already handled it.
} else {
// This is an "end" without a start. Just fabricate the region now.
sd = &UserRegionSummary{Name: r.Type, TaskID: r.Task}
// Associate the region and current goroutine to the task.
task := s.getOrAddTask(r.Task)
task.Goroutines[g.ID] = g
task.Regions = append(task.Regions, sd)
}
sd.GoroutineExecStats = g.snapshotStat(ev.Time()).sub(sd.GoroutineExecStats)
sd.End = ev
g.Regions = append(g.Regions, sd)
// Handle tasks and logs.
case tracev2.EventTaskBegin, tracev2.EventTaskEnd:
// Initialize the task.
t := ev.Task()
task := s.getOrAddTask(t.ID)
task.Name = t.Type
task.Goroutines[ev.Goroutine()] = s.gs[ev.Goroutine()]
if ev.Kind() == tracev2.EventTaskBegin {
task.Start = ev
} else {
task.End = ev
}
// Initialize the parent, if one exists and it hasn't been done yet.
// We need to avoid doing it twice, otherwise we could appear twice
// in the parent's Children list.
if t.Parent != tracev2.NoTask && task.Parent == nil {
parent := s.getOrAddTask(t.Parent)
task.Parent = parent
parent.Children = append(parent.Children, task)
}
case tracev2.EventLog:
log := ev.Log()
// Just add the log to the task. We'll create the task if it
// doesn't exist (it's just been mentioned now).
task := s.getOrAddTask(log.Task)
task.Goroutines[ev.Goroutine()] = s.gs[ev.Goroutine()]
task.Logs = append(task.Logs, ev)
}
}
func (s *Summarizer) getOrAddTask(id tracev2.TaskID) *UserTaskSummary {
task := s.tasks[id]
if task == nil {
task = &UserTaskSummary{ID: id, Goroutines: make(map[tracev2.GoID]*GoroutineSummary)}
s.tasks[id] = task
}
return task
}
// Finalize indicates to the summarizer that we're done processing the trace.
// It cleans up any remaining state and returns the full summary.
func (s *GoroutineSummarizer) Finalize() map[tracev2.GoID]*GoroutineSummary {
func (s *Summarizer) Finalize() *Summary {
for _, g := range s.gs {
g.finalize(s.lastTs, nil)
@ -506,7 +586,10 @@ func (s *GoroutineSummarizer) Finalize() map[tracev2.GoID]*GoroutineSummary {
})
g.goroutineSummary = nil
}
return s.gs
return &Summary{
Goroutines: s.gs,
Tasks: s.tasks,
}
}
// RelatedGoroutinesV2 finds a set of goroutines related to goroutine goid for v2 traces.

View File

@ -12,14 +12,14 @@ import (
)
func TestSummarizeGoroutinesTrace(t *testing.T) {
summaries := summarizeTraceTest(t, "v2/testdata/tests/go122-gc-stress.test")
summaries := summarizeTraceTest(t, "v2/testdata/tests/go122-gc-stress.test").Goroutines
var (
hasSchedWaitTime bool
hasSyncBlockTime bool
hasGCMarkAssistTime bool
)
for _, summary := range summaries {
basicSummaryChecks(t, summary)
basicGoroutineSummaryChecks(t, summary)
hasSchedWaitTime = hasSchedWaitTime || summary.SchedWaitTime > 0
if dt, ok := summary.BlockTimeByReason["sync"]; ok && dt > 0 {
hasSyncBlockTime = true
@ -40,7 +40,7 @@ func TestSummarizeGoroutinesTrace(t *testing.T) {
}
func TestSummarizeGoroutinesRegionsTrace(t *testing.T) {
summaries := summarizeTraceTest(t, "v2/testdata/tests/go122-annotations.test")
summaries := summarizeTraceTest(t, "v2/testdata/tests/go122-annotations.test").Goroutines
type region struct {
startKind tracev2.EventKind
endKind tracev2.EventKind
@ -58,7 +58,7 @@ func TestSummarizeGoroutinesRegionsTrace(t *testing.T) {
"post-existing region": {tracev2.EventRegionBegin, tracev2.EventBad},
}
for _, summary := range summaries {
basicSummaryChecks(t, summary)
basicGoroutineSummaryChecks(t, summary)
for _, region := range summary.Regions {
want, ok := wantRegions[region.Name]
if !ok {
@ -73,7 +73,166 @@ func TestSummarizeGoroutinesRegionsTrace(t *testing.T) {
}
}
func basicSummaryChecks(t *testing.T, summary *GoroutineSummary) {
func TestSummarizeTasksTrace(t *testing.T) {
summaries := summarizeTraceTest(t, "v2/testdata/tests/go122-annotations-stress.test").Tasks
type task struct {
name string
parent *tracev2.TaskID
children []tracev2.TaskID
logs []tracev2.Log
goroutines []tracev2.GoID
}
parent := func(id tracev2.TaskID) *tracev2.TaskID {
p := new(tracev2.TaskID)
*p = id
return p
}
wantTasks := map[tracev2.TaskID]task{
tracev2.BackgroundTask: {
// The background task (0) is never any task's parent.
logs: []tracev2.Log{
{Task: tracev2.BackgroundTask, Category: "log", Message: "before do"},
{Task: tracev2.BackgroundTask, Category: "log", Message: "before do"},
},
goroutines: []tracev2.GoID{1},
},
1: {
// This started before tracing started and has no parents.
// Task 2 is technically a child, but we lost that information.
children: []tracev2.TaskID{3, 7, 16},
logs: []tracev2.Log{
{Task: 1, Category: "log", Message: "before do"},
{Task: 1, Category: "log", Message: "before do"},
},
goroutines: []tracev2.GoID{1},
},
2: {
// This started before tracing started and its parent is technically (1), but that information was lost.
children: []tracev2.TaskID{8, 17},
logs: []tracev2.Log{
{Task: 2, Category: "log", Message: "before do"},
{Task: 2, Category: "log", Message: "before do"},
},
goroutines: []tracev2.GoID{1},
},
3: {
parent: parent(1),
children: []tracev2.TaskID{10, 19},
logs: []tracev2.Log{
{Task: 3, Category: "log", Message: "before do"},
{Task: 3, Category: "log", Message: "before do"},
},
goroutines: []tracev2.GoID{1},
},
4: {
// Explicitly, no parent.
children: []tracev2.TaskID{12, 21},
logs: []tracev2.Log{
{Task: 4, Category: "log", Message: "before do"},
{Task: 4, Category: "log", Message: "before do"},
},
goroutines: []tracev2.GoID{1},
},
12: {
parent: parent(4),
children: []tracev2.TaskID{13},
logs: []tracev2.Log{
// TODO(mknyszek): This is computed asynchronously in the trace,
// which makes regenerating this test very annoying, since it will
// likely break this test. Resolve this by making the order not matter.
{Task: 12, Category: "log2", Message: "do"},
{Task: 12, Category: "log", Message: "fanout region4"},
{Task: 12, Category: "log", Message: "fanout region0"},
{Task: 12, Category: "log", Message: "fanout region1"},
{Task: 12, Category: "log", Message: "fanout region2"},
{Task: 12, Category: "log", Message: "before do"},
{Task: 12, Category: "log", Message: "fanout region3"},
},
goroutines: []tracev2.GoID{1, 5, 6, 7, 8, 9},
},
13: {
// Explicitly, no children.
parent: parent(12),
logs: []tracev2.Log{
{Task: 13, Category: "log2", Message: "do"},
},
goroutines: []tracev2.GoID{7},
},
}
for id, summary := range summaries {
want, ok := wantTasks[id]
if !ok {
continue
}
if id != summary.ID {
t.Errorf("ambiguous task %d (or %d?): field likely set incorrectly", id, summary.ID)
}
// Check parent.
if want.parent != nil {
if summary.Parent == nil {
t.Errorf("expected parent %d for task %d without a parent", *want.parent, id)
} else if summary.Parent.ID != *want.parent {
t.Errorf("bad parent for task %d: want %d, got %d", id, *want.parent, summary.Parent.ID)
}
} else if summary.Parent != nil {
t.Errorf("unexpected parent %d for task %d", summary.Parent.ID, id)
}
// Check children.
gotChildren := make(map[tracev2.TaskID]struct{})
for _, child := range summary.Children {
gotChildren[child.ID] = struct{}{}
}
for _, wantChild := range want.children {
if _, ok := gotChildren[wantChild]; ok {
delete(gotChildren, wantChild)
} else {
t.Errorf("expected child task %d for task %d not found", wantChild, id)
}
}
if len(gotChildren) != 0 {
for child := range gotChildren {
t.Errorf("unexpected child task %d for task %d", child, id)
}
}
// Check logs.
if len(want.logs) != len(summary.Logs) {
t.Errorf("wanted %d logs for task %d, got %d logs instead", len(want.logs), id, len(summary.Logs))
} else {
for i := range want.logs {
if want.logs[i] != summary.Logs[i].Log() {
t.Errorf("log mismatch: want %#v, got %#v", want.logs[i], summary.Logs[i].Log())
}
}
}
// Check goroutines.
if len(want.goroutines) != len(summary.Goroutines) {
t.Errorf("wanted %d goroutines for task %d, got %d goroutines instead", len(want.goroutines), id, len(summary.Goroutines))
} else {
for _, goid := range want.goroutines {
g, ok := summary.Goroutines[goid]
if !ok {
t.Errorf("want goroutine %d for task %d, not found", goid, id)
continue
}
if g.ID != goid {
t.Errorf("goroutine summary for %d does not match task %d listing of %d", g.ID, id, goid)
}
}
}
// Marked as seen.
delete(wantTasks, id)
}
if len(wantTasks) != 0 {
t.Errorf("failed to find tasks: %#v", wantTasks)
}
}
func basicGoroutineSummaryChecks(t *testing.T, summary *GoroutineSummary) {
if summary.ID == tracev2.NoGoroutine {
t.Error("summary found for no goroutine")
return
@ -91,13 +250,13 @@ func basicSummaryChecks(t *testing.T, summary *GoroutineSummary) {
}
}
func summarizeTraceTest(t *testing.T, testPath string) map[tracev2.GoID]*GoroutineSummary {
func summarizeTraceTest(t *testing.T, testPath string) *Summary {
trace, _, err := testtrace.ParseFile(testPath)
if err != nil {
t.Fatalf("malformed test %s: bad trace file: %v", testPath, err)
}
// Create the analysis state.
s := NewGoroutineSummarizer()
s := NewSummarizer()
// Create a reader.
r, err := tracev2.NewReader(trace)

View File

@ -7,7 +7,12 @@
package main
import (
"bytes"
"fmt"
"internal/trace/v2/raw"
"internal/trace/v2/version"
"internal/txtar"
"io"
"log"
"os"
"os/exec"
@ -17,27 +22,58 @@ import (
func main() {
log.SetFlags(0)
if err := run(); err != nil {
ctx, err := newContext()
if err != nil {
log.Fatal(err)
}
if err := ctx.runGenerators(); err != nil {
log.Fatal(err)
}
if err := ctx.runTestProg("./testprog/annotations.go"); err != nil {
log.Fatal(err)
}
if err := ctx.runTestProg("./testprog/annotations-stress.go"); err != nil {
log.Fatal(err)
}
}
func run() error {
type context struct {
testNames map[string]struct{}
filter *regexp.Regexp
}
func newContext() (*context, error) {
var filter *regexp.Regexp
var err error
if pattern := os.Getenv("GOTRACETEST"); pattern != "" {
filter, err = regexp.Compile(pattern)
if err != nil {
return nil, fmt.Errorf("compiling regexp %q for GOTRACETEST: %v", pattern, err)
}
}
return &context{
testNames: make(map[string]struct{}),
filter: filter,
}, nil
}
func (ctx *context) register(testName string) (skip bool, err error) {
if _, ok := ctx.testNames[testName]; ok {
return true, fmt.Errorf("duplicate test %s found", testName)
}
if ctx.filter != nil {
return !ctx.filter.MatchString(testName), nil
}
return false, nil
}
func (ctx *context) runGenerators() error {
generators, err := filepath.Glob("./generators/*.go")
if err != nil {
return fmt.Errorf("reading generators: %v", err)
}
genroot := "./tests"
// Grab a pattern, if any.
var re *regexp.Regexp
if pattern := os.Getenv("GOTRACETEST"); pattern != "" {
re, err = regexp.Compile(pattern)
if err != nil {
return fmt.Errorf("compiling regexp %q for GOTRACETEST: %v", pattern, err)
}
}
if err := os.MkdirAll(genroot, 0777); err != nil {
return fmt.Errorf("creating generated root: %v", err)
}
@ -46,7 +82,11 @@ func run() error {
name = name[:len(name)-len(filepath.Ext(name))]
// Skip if we have a pattern and this test doesn't match.
if re != nil && !re.MatchString(name) {
skip, err := ctx.register(name)
if err != nil {
return err
}
if skip {
continue
}
@ -64,3 +104,59 @@ func run() error {
}
return nil
}
func (ctx *context) runTestProg(progPath string) error {
name := filepath.Base(progPath)
name = name[:len(name)-len(filepath.Ext(name))]
name = fmt.Sprintf("go1%d-%s", version.Current, name)
// Skip if we have a pattern and this test doesn't match.
skip, err := ctx.register(name)
if err != nil {
return err
}
if skip {
return nil
}
// Create command.
var trace, stderr bytes.Buffer
cmd := exec.Command("go", "run", progPath)
// TODO(mknyszek): Remove if goexperiment.Exectracer2 becomes the default.
cmd.Env = append(os.Environ(), "GOEXPERIMENT=exectracer2")
cmd.Stdout = &trace
cmd.Stderr = &stderr
// Run trace program; the trace will appear in stdout.
fmt.Fprintf(os.Stderr, "running trace program %s...\n", name)
if err := cmd.Run(); err != nil {
log.Fatalf("running trace program: %v:\n%s", err, stderr.String())
}
// Write out the trace.
var textTrace bytes.Buffer
r, err := raw.NewReader(&trace)
if err != nil {
log.Fatalf("reading trace: %v", err)
}
w, err := raw.NewTextWriter(&textTrace, version.Current)
for {
ev, err := r.ReadEvent()
if err == io.EOF {
break
}
if err != nil {
log.Fatalf("reading trace: %v", err)
}
if err := w.WriteEvent(ev); err != nil {
log.Fatalf("writing trace: %v", err)
}
}
testData := txtar.Format(&txtar.Archive{
Files: []txtar.File{
{Name: "expect", Data: []byte("SUCCESS")},
{Name: "trace", Data: textTrace.Bytes()},
},
})
return os.WriteFile(fmt.Sprintf("./tests/%s.test", name), testData, 0o664)
}

File diff suppressed because it is too large Load Diff