mirror of
https://github.com/golang/go
synced 2024-11-21 23:34:42 -07:00
runtime, pprof: add profiling of thread creation
Same idea as heap profile: how did each thread get created? Low memory (256 bytes per OS thread), high reward for programs that suddenly have many threads running. Fixes #1477. R=golang-dev, r, dvyukov CC=golang-dev https://golang.org/cl/5639059
This commit is contained in:
parent
fff732ea2c
commit
5b93fc9da6
6
src/cmd/dist/build.c
vendored
6
src/cmd/dist/build.c
vendored
@ -684,6 +684,12 @@ install(char *dir)
|
||||
copy(bpathf(&b, "%s/signals_GOOS.h", workdir),
|
||||
bpathf(&b1, "%s/signals_%s.h", bstr(&path), goos));
|
||||
}
|
||||
|
||||
// For cmd/prof, copy pprof into the tool directory.
|
||||
if(streq(dir, "cmd/prof")) {
|
||||
copy(bpathf(&b, "%s/bin/tool/pprof", goroot),
|
||||
bpathf(&b, "%s/src/cmd/prof/pprof", goroot));
|
||||
}
|
||||
|
||||
// Generate any missing files; regenerate existing ones.
|
||||
for(i=0; i<files.len; i++) {
|
||||
|
@ -103,6 +103,7 @@ my $CURL = "curl";
|
||||
|
||||
# These are the web pages that servers need to support for dynamic profiles
|
||||
my $HEAP_PAGE = "/pprof/heap";
|
||||
my $THREAD_PAGE = "/pprof/thread";
|
||||
my $PROFILE_PAGE = "/pprof/profile"; # must support cgi-param "?seconds=#"
|
||||
my $PMUPROFILE_PAGE = "/pprof/pmuprofile(?:\\?.*)?"; # must support cgi-param
|
||||
# ?seconds=#&event=x&period=n
|
||||
@ -149,7 +150,7 @@ pprof [options] <profile>
|
||||
|
||||
The /<service> can be $HEAP_PAGE, $PROFILE_PAGE, /pprof/pmuprofile,
|
||||
$GROWTH_PAGE, $CONTENTION_PAGE, /pprof/wall,
|
||||
or /pprof/filteredprofile.
|
||||
$THREAD_PAGE, or /pprof/filteredprofile.
|
||||
For instance:
|
||||
pprof http://myserver.com:80$HEAP_PAGE
|
||||
If /<service> is omitted, the service defaults to $PROFILE_PAGE (cpu profiling).
|
||||
@ -2451,6 +2452,8 @@ sub Units {
|
||||
}
|
||||
} elsif ($main::profile_type eq 'contention' && !$main::opt_contentions) {
|
||||
return "seconds";
|
||||
} elsif ($main::profile_type eq 'thread') {
|
||||
return "threads";
|
||||
} else {
|
||||
return "samples";
|
||||
}
|
||||
@ -2968,7 +2971,7 @@ sub IsProfileURL {
|
||||
sub ParseProfileURL {
|
||||
my $profile_name = shift;
|
||||
if (defined($profile_name) &&
|
||||
$profile_name =~ m,^(http://|)([^/:]+):(\d+)(|\@\d+)(|/|(.*?)($PROFILE_PAGE|$PMUPROFILE_PAGE|$HEAP_PAGE|$GROWTH_PAGE|$CONTENTION_PAGE|$WALL_PAGE|$FILTEREDPROFILE_PAGE))$,o) {
|
||||
$profile_name =~ m,^(http://|)([^/:]+):(\d+)(|\@\d+)(|/|(.*?)($PROFILE_PAGE|$PMUPROFILE_PAGE|$HEAP_PAGE|$GROWTH_PAGE|$THREAD_PAGE|$CONTENTION_PAGE|$WALL_PAGE|$FILTEREDPROFILE_PAGE))$,o) {
|
||||
# $7 is $PROFILE_PAGE/$HEAP_PAGE/etc. $5 is *everything* after
|
||||
# the hostname, as long as that everything is the empty string,
|
||||
# a slash, or something ending in $PROFILE_PAGE/$HEAP_PAGE/etc.
|
||||
@ -3065,30 +3068,20 @@ sub FetchSymbols {
|
||||
|
||||
if (!defined($symbol_map)) {
|
||||
$symbol_map = {};
|
||||
my @toask = @pcs;
|
||||
while (@toask > 0) {
|
||||
my $n = @toask;
|
||||
# NOTE(rsc): Limiting the number of PCs requested per round
|
||||
# used to be necessary, but I think it was a bug in
|
||||
# debug/pprof/symbol's implementation. Leaving here
|
||||
# in case I am wrong.
|
||||
# if ($n > 49) { $n = 49; }
|
||||
my @thisround = @toask[0..$n];
|
||||
@toask = @toask[($n+1)..(@toask-1)];
|
||||
my $post_data = join("+", sort((map {"0x" . "$_"} @thisround)));
|
||||
open(POSTFILE, ">$main::tmpfile_sym");
|
||||
print POSTFILE $post_data;
|
||||
close(POSTFILE);
|
||||
|
||||
my $url = SymbolPageURL();
|
||||
$url = ResolveRedirectionForCurl($url);
|
||||
my $command_line = "$CURL -sd '\@$main::tmpfile_sym' '$url'";
|
||||
# We use c++filt in case $SYMBOL_PAGE gives us mangled symbols.
|
||||
my $cppfilt = $obj_tool_map{"c++filt"};
|
||||
open(SYMBOL, "$command_line | $cppfilt |") or error($command_line);
|
||||
ReadSymbols(*SYMBOL{IO}, $symbol_map);
|
||||
close(SYMBOL);
|
||||
}
|
||||
my $post_data = join("+", sort((map {"0x" . "$_"} @pcs)));
|
||||
open(POSTFILE, ">$main::tmpfile_sym");
|
||||
print POSTFILE $post_data;
|
||||
close(POSTFILE);
|
||||
|
||||
my $url = SymbolPageURL();
|
||||
$url = ResolveRedirectionForCurl($url);
|
||||
my $command_line = "$CURL -sd '\@$main::tmpfile_sym' '$url'";
|
||||
# We use c++filt in case $SYMBOL_PAGE gives us mangled symbols.
|
||||
my $cppfilt = $obj_tool_map{"c++filt"};
|
||||
open(SYMBOL, "$command_line | $cppfilt |") or error($command_line);
|
||||
ReadSymbols(*SYMBOL{IO}, $symbol_map);
|
||||
close(SYMBOL);
|
||||
}
|
||||
|
||||
my $symbols = {};
|
||||
@ -3462,6 +3455,9 @@ sub ReadProfile {
|
||||
"condition variable signals as lock contentions.\n";
|
||||
$main::profile_type = 'contention';
|
||||
$result = ReadSynchProfile($prog, $fname);
|
||||
} elsif ($header =~ m/^thread creation profile:/) {
|
||||
$main::profile_type = 'thread';
|
||||
$result = ReadThreadProfile($prog, $fname);
|
||||
} elsif ($header =~ m/^--- *$profile_marker/) {
|
||||
# the binary cpu profile data starts immediately after this line
|
||||
$main::profile_type = 'cpu';
|
||||
@ -3777,6 +3773,61 @@ sub ReadHeapProfile {
|
||||
return $r;
|
||||
}
|
||||
|
||||
sub ReadThreadProfile {
|
||||
my $prog = shift;
|
||||
my $fname = shift;
|
||||
|
||||
my $profile = {};
|
||||
my $pcs = {};
|
||||
my $map = "";
|
||||
|
||||
while (<PROFILE>) {
|
||||
s/\r//g; # turn windows-looking lines into unix-looking lines
|
||||
if (/^MAPPED_LIBRARIES:/) {
|
||||
# Read the /proc/self/maps data
|
||||
while (<PROFILE>) {
|
||||
s/\r//g; # turn windows-looking lines into unix-looking lines
|
||||
$map .= $_;
|
||||
}
|
||||
last;
|
||||
}
|
||||
|
||||
if (/^--- Memory map:/) {
|
||||
# Read /proc/self/maps data as formatted by DumpAddressMap()
|
||||
my $buildvar = "";
|
||||
while (<PROFILE>) {
|
||||
s/\r//g; # turn windows-looking lines into unix-looking lines
|
||||
# Parse "build=<dir>" specification if supplied
|
||||
if (m/^\s*build=(.*)\n/) {
|
||||
$buildvar = $1;
|
||||
}
|
||||
|
||||
# Expand "$build" variable if available
|
||||
$_ =~ s/\$build\b/$buildvar/g;
|
||||
|
||||
$map .= $_;
|
||||
}
|
||||
last;
|
||||
}
|
||||
|
||||
# Read entry of the form:
|
||||
# @ a1 a2 a3 ... an
|
||||
s/^\s*//;
|
||||
s/\s*$//;
|
||||
if (m/^@\s+(.*)$/) {
|
||||
AddEntries($profile, $pcs, FixCallerAddresses($1), 1);
|
||||
}
|
||||
}
|
||||
|
||||
my $r = {};
|
||||
$r->{version} = "thread";
|
||||
$r->{period} = 1;
|
||||
$r->{profile} = $profile;
|
||||
$r->{libs} = ParseLibraries($prog, $map, $pcs);
|
||||
$r->{pcs} = $pcs;
|
||||
return $r;
|
||||
}
|
||||
|
||||
sub ReadSynchProfile {
|
||||
my ($prog, $fname, $header) = @_;
|
||||
|
||||
|
@ -16,11 +16,15 @@
|
||||
//
|
||||
// Then use the pprof tool to look at the heap profile:
|
||||
//
|
||||
// pprof http://localhost:6060/debug/pprof/heap
|
||||
// go tool pprof http://localhost:6060/debug/pprof/heap
|
||||
//
|
||||
// Or to look at a 30-second CPU profile:
|
||||
//
|
||||
// pprof http://localhost:6060/debug/pprof/profile
|
||||
// go tool pprof http://localhost:6060/debug/pprof/profile
|
||||
//
|
||||
// Or to look at the thread creation profile:
|
||||
//
|
||||
// go tool pprof http://localhost:6060/debug/pprof/thread
|
||||
//
|
||||
package pprof
|
||||
|
||||
@ -43,6 +47,7 @@ func init() {
|
||||
http.Handle("/debug/pprof/profile", http.HandlerFunc(Profile))
|
||||
http.Handle("/debug/pprof/heap", http.HandlerFunc(Heap))
|
||||
http.Handle("/debug/pprof/symbol", http.HandlerFunc(Symbol))
|
||||
http.Handle("/debug/pprof/thread", http.HandlerFunc(Thread))
|
||||
}
|
||||
|
||||
// Cmdline responds with the running program's
|
||||
@ -60,6 +65,13 @@ func Heap(w http.ResponseWriter, r *http.Request) {
|
||||
pprof.WriteHeapProfile(w)
|
||||
}
|
||||
|
||||
// Thread responds with the pprof-formatted thread creation profile.
|
||||
// The package initialization registers it as /debug/pprof/thread.
|
||||
func Thread(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
|
||||
pprof.WriteThreadProfile(w)
|
||||
}
|
||||
|
||||
// Profile responds with the pprof-formatted cpu profile.
|
||||
// The package initialization registers it as /debug/pprof/profile.
|
||||
func Profile(w http.ResponseWriter, r *http.Request) {
|
||||
|
@ -2,42 +2,4 @@
|
||||
# Use of this source code is governed by a BSD-style
|
||||
# license that can be found in the LICENSE file.
|
||||
|
||||
include ../../Make.inc
|
||||
|
||||
AUTOHFILES=\
|
||||
arch_GOARCH.h\
|
||||
os_GOOS.h\
|
||||
signals_GOOS.h\
|
||||
defs_GOOS_GOARCH.h\
|
||||
zasm_GOOS_GOARCH.h\
|
||||
|
||||
all:
|
||||
@echo use make install, make clean
|
||||
|
||||
pkgdir=$(QUOTED_GOROOT)/pkg/$(GOOS)_$(GOARCH)
|
||||
|
||||
$(pkgdir)/%.h: %.h
|
||||
@test -d $(QUOTED_GOROOT)/pkg && mkdir -p $(pkgdir)
|
||||
cp $< "$@"
|
||||
|
||||
install: $(pkgdir)/runtime.h $(pkgdir)/cgocall.h
|
||||
|
||||
clean: clean-local
|
||||
|
||||
clean-local:
|
||||
rm -f $(AUTOHFILES) runtime_defs.go version*.go asm_*.h
|
||||
|
||||
arch_GOARCH.h: arch_$(GOARCH).h
|
||||
cp $^ $@
|
||||
|
||||
defs_GOOS_GOARCH.h: defs_$(GOOS)_$(GOARCH).h
|
||||
cp $^ $@
|
||||
|
||||
os_GOOS.h: os_$(GOOS).h
|
||||
cp $^ $@
|
||||
|
||||
signals_GOOS.h: signals_$(GOOS).h
|
||||
cp $^ $@
|
||||
|
||||
zasm_GOOS_GOARCH.h: zasm_$(GOOS)_$(GOARCH).h
|
||||
cp $^ $@
|
||||
include ../../Make.dist
|
||||
|
@ -95,11 +95,37 @@ func (r *MemProfileRecord) Stack() []uintptr {
|
||||
// where r.AllocBytes > 0 but r.AllocBytes == r.FreeBytes.
|
||||
// These are sites where memory was allocated, but it has all
|
||||
// been released back to the runtime.
|
||||
//
|
||||
// Most clients should use the runtime/pprof package or
|
||||
// the testing package's -test.memprofile flag instead
|
||||
// of calling MemProfile directly.
|
||||
func MemProfile(p []MemProfileRecord, inuseZero bool) (n int, ok bool)
|
||||
|
||||
// A ThreadProfileRecord describes the execution stack that
|
||||
// caused a new thread to be created.
|
||||
type ThreadProfileRecord struct {
|
||||
Stack0 [32]uintptr // stack trace for this record; ends at first 0 entry
|
||||
}
|
||||
|
||||
// Stack returns the stack trace associated with the record,
|
||||
// a prefix of r.Stack0.
|
||||
func (r *ThreadProfileRecord) Stack() []uintptr {
|
||||
for i, v := range r.Stack0 {
|
||||
if v == 0 {
|
||||
return r.Stack0[0:i]
|
||||
}
|
||||
}
|
||||
return r.Stack0[0:]
|
||||
}
|
||||
|
||||
// ThreadProfile returns n, the number of records in the current thread profile.
|
||||
// If len(p) >= n, ThreadProfile copies the profile into p and returns n, true.
|
||||
// If len(p) < n, ThreadProfile does not change p and returns n, false.
|
||||
//
|
||||
// Most clients should use the runtime/pprof package instead
|
||||
// of calling ThreadProfile directly.
|
||||
func ThreadProfile(p []ThreadProfileRecord) (n int, ok bool)
|
||||
|
||||
// CPUProfile returns the next chunk of binary CPU profiling stack trace data,
|
||||
// blocking until data is available. If profiling is turned off and all the profile
|
||||
// data accumulated while it was on has been returned, CPUProfile returns nil.
|
||||
|
@ -230,7 +230,7 @@ runtime·MProf_Free(void *p, uintptr size)
|
||||
// Go interface to profile data. (Declared in extern.go)
|
||||
// Assumes Go sizeof(int) == sizeof(int32)
|
||||
|
||||
// Must match MemProfileRecord in extern.go.
|
||||
// Must match MemProfileRecord in debug.go.
|
||||
typedef struct Record Record;
|
||||
struct Record {
|
||||
int64 alloc_bytes, free_bytes;
|
||||
@ -273,3 +273,28 @@ func MemProfile(p Slice, include_inuse_zero bool) (n int32, ok bool) {
|
||||
}
|
||||
runtime·unlock(&proflock);
|
||||
}
|
||||
|
||||
// Must match ThreadProfileRecord in debug.go.
|
||||
typedef struct TRecord TRecord;
|
||||
struct TRecord {
|
||||
uintptr stk[32];
|
||||
};
|
||||
|
||||
func ThreadProfile(p Slice) (n int32, ok bool) {
|
||||
TRecord *r;
|
||||
M *first, *m;
|
||||
|
||||
first = runtime·atomicloadp(&runtime·allm);
|
||||
n = 0;
|
||||
for(m=first; m; m=m->alllink)
|
||||
n++;
|
||||
ok = false;
|
||||
if(n <= p.len) {
|
||||
ok = true;
|
||||
r = (TRecord*)p.array;
|
||||
for(m=first; m; m=m->alllink) {
|
||||
runtime·memmove(r->stk, m->createstack, sizeof r->stk);
|
||||
r++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -110,6 +110,44 @@ func WriteHeapProfile(w io.Writer) error {
|
||||
return b.Flush()
|
||||
}
|
||||
|
||||
// WriteThreadProfile writes a pprof-formatted thread creation profile to w.
|
||||
// If a write to w returns an error, WriteThreadProfile returns that error.
|
||||
// Otherwise, WriteThreadProfile returns nil.
|
||||
func WriteThreadProfile(w io.Writer) error {
|
||||
// Find out how many records there are (ThreadProfile(nil)),
|
||||
// allocate that many records, and get the data.
|
||||
// There's a race—more records (threads) might be added between
|
||||
// the two calls—so allocate a few extra records for safety
|
||||
// and also try again if we're very unlucky.
|
||||
// The loop should only execute one iteration in the common case.
|
||||
var p []runtime.ThreadProfileRecord
|
||||
n, ok := runtime.ThreadProfile(nil)
|
||||
for {
|
||||
// Allocate room for a slightly bigger profile,
|
||||
// in case a few more entries have been added
|
||||
// since the call to ThreadProfile.
|
||||
p = make([]runtime.ThreadProfileRecord, n+10)
|
||||
n, ok = runtime.ThreadProfile(p)
|
||||
if ok {
|
||||
p = p[0:n]
|
||||
break
|
||||
}
|
||||
// Profile grew; try again.
|
||||
}
|
||||
|
||||
b := bufio.NewWriter(w)
|
||||
fmt.Fprintf(b, "thread creation profile: %d threads\n", n)
|
||||
for i := range p {
|
||||
r := &p[i]
|
||||
fmt.Fprintf(b, "@")
|
||||
for _, pc := range r.Stack() {
|
||||
fmt.Fprintf(b, " %#x", pc)
|
||||
}
|
||||
fmt.Fprintf(b, "\n")
|
||||
}
|
||||
return b.Flush()
|
||||
}
|
||||
|
||||
var cpu struct {
|
||||
sync.Mutex
|
||||
profiling bool
|
||||
|
@ -324,13 +324,6 @@ runtime·idlegoroutine(void)
|
||||
static void
|
||||
mcommoninit(M *m)
|
||||
{
|
||||
// Add to runtime·allm so garbage collector doesn't free m
|
||||
// when it is just in a register or thread-local storage.
|
||||
m->alllink = runtime·allm;
|
||||
// runtime·Cgocalls() iterates over allm w/o schedlock,
|
||||
// so we need to publish it safely.
|
||||
runtime·atomicstorep(&runtime·allm, m);
|
||||
|
||||
m->id = runtime·sched.mcount++;
|
||||
m->fastrand = 0x49f6428aUL + m->id + runtime·cputicks();
|
||||
m->stackalloc = runtime·malloc(sizeof(*m->stackalloc));
|
||||
@ -338,6 +331,15 @@ mcommoninit(M *m)
|
||||
|
||||
if(m->mcache == nil)
|
||||
m->mcache = runtime·allocmcache();
|
||||
|
||||
runtime·callers(1, m->createstack, nelem(m->createstack));
|
||||
|
||||
// Add to runtime·allm so garbage collector doesn't free m
|
||||
// when it is just in a register or thread-local storage.
|
||||
m->alllink = runtime·allm;
|
||||
// runtime·Cgocalls() iterates over allm w/o schedlock,
|
||||
// so we need to publish it safely.
|
||||
runtime·atomicstorep(&runtime·allm, m);
|
||||
}
|
||||
|
||||
// Try to increment mcpu. Report whether succeeded.
|
||||
|
@ -233,6 +233,7 @@ struct M
|
||||
FixAlloc *stackalloc;
|
||||
G* lockedg;
|
||||
G* idleg;
|
||||
uintptr createstack[32]; // Stack that created this thread.
|
||||
uint32 freglo[16]; // D[i] lsb and F[i]
|
||||
uint32 freghi[16]; // D[i] msb and F[i+16]
|
||||
uint32 fflag; // floating point compare flags
|
||||
|
Loading…
Reference in New Issue
Block a user