From e83d1bcf33c313f4b6c754799646fe038b9fad0f Mon Sep 17 00:00:00 2001 From: Brendan Gregg Date: Wed, 21 Jun 2017 23:16:11 -0700 Subject: [PATCH] improve accuracy of java/js color palettes with --jit annotations --- README.md | 4 +-- flamegraph.pl | 69 +++++++++++++++++++++++++++++++------------ stackcollapse-perf.pl | 32 ++++++++++++++++---- 3 files changed, 78 insertions(+), 27 deletions(-) diff --git a/README.md b/README.md index 7cd597b1..7924ddbe 100644 --- a/README.md +++ b/README.md @@ -126,12 +126,12 @@ An example output from Linux "perf script" is included, gzip'd, as example-perf- You can create this using: ``` -$ gunzip -c example-perf-stacks.txt.gz | ./stackcollapse-perf.pl --kernel | ./flamegraph.pl --color=java --hash > example-perf.svg +$ gunzip -c example-perf-stacks.txt.gz | ./stackcollapse-perf.pl --all | ./flamegraph.pl --color=java --hash > example-perf.svg ``` This shows my typical workflow: I'll gzip profiles on the target, then copy them to my laptop for analysis. Since I have hundreds of profiles, I leave them gzip'd! -Since this profile included Java, I used the flamegraph.pl --color=java palette. I've also used stackcollapse-perf.pl --kernel, which allows a separate color to be used for kernel code. The resulting flame graph uses: green == Java, yellow == C++, red == user-mode native, orange == kernel. +Since this profile included Java, I used the flamegraph.pl --color=java palette. I've also used stackcollapse-perf.pl --all, which includes all annotations that help flamegraph.pl use separate colors for kernel and user level code. The resulting flame graph uses: green == Java, yellow == C++, red == user-mode native, orange == kernel. This profile was from an analysis of vert.x performance. The benchmark client, wrk, is also visible in the flame graph. diff --git a/flamegraph.pl b/flamegraph.pl index ce5e899c..318433da 100755 --- a/flamegraph.pl +++ b/flamegraph.pl @@ -17,17 +17,31 @@ # # The input is stack frames and sample counts formatted as single lines. Each # frame in the stack is semicolon separated, with a space and count at the end -# of the line. These can be generated using DTrace with stackcollapse.pl, -# and other tools using the stackcollapse variants. +# of the line. These can be generated for Linux perf script output using +# stackcollapse-perf.pl, for DTrace using stackcollapse.pl, and for other tools +# using the other stackcollapse programs. Example input: +# +# swapper;start_kernel;rest_init;cpu_idle;default_idle;native_safe_halt 1 # # An optional extra column of counts can be provided to generate a differential # flame graph of the counts, colored red for more, and blue for less. This # can be useful when using flame graphs for non-regression testing. # See the header comment in the difffolded.pl program for instructions. # -# The output graph shows relative presence of functions in stack samples. The -# ordering on the x-axis has no meaning; since the data is samples, time order -# of events is not known. The order used sorts function names alphabetically. +# The input functions can optionally have annotations at the end of each +# function name, following a precedent by some tools (Linux perf's _[k]): +# _[k] for kernel +# _[i] for inlined +# _[j] for jit +# _[w] for waker +# Some of the stackcollapse programs support adding these annotations, eg, +# stackcollapse-perf.pl --kernel --jit. They are used merely for colors by +# some palettes, eg, flamegraph.pl --color=java. +# +# The output flame graph shows relative presence of functions in stack samples. +# The ordering on the x-axis has no meaning; since the data is samples, time +# order of events is not known. The order used sorts function names +# alphabetically. # # While intended to process stack samples, this can also process stack traces. # For example, tracing stacks for memory allocation, or resource usage. You @@ -343,13 +357,20 @@ sub color { # multi palettes if (defined $type and $type eq "java") { - if ($name =~ m:/:) { # Java (match "/" in path) + # Handle both annotations (_[j], _[i], ...; which are + # accurate), as well as input that lacks any annotations, as + # best as possible. Without annotations, we get a little hacky + # and match on java|org|com, etc. + if ($name =~ m:_\[j\]$:) { # jit annotation + $type = "green"; + } elsif ($name =~ m:_\[i\]$:) { # inline annotation + $type = "aqua"; + } elsif ($name =~ m:^L?(java|org|com|io|sun)/:) { # Java $type = "green"; - $type = "aqua" if $name =~ m/_\[i\]/; #inline } elsif ($name =~ /::/) { # C++ $type = "yellow"; - } elsif ($name =~ m:_\[k\]:) { # kernel - $type = "orange" + } elsif ($name =~ m:_\[k\]$:) { # kernel annotation + $type = "orange"; } else { # system $type = "red"; } @@ -360,24 +381,34 @@ sub color { $type = "yellow"; } elsif ($name =~ m:Perl: or $name =~ m:\.pl:) { # Perl $type = "green"; - } elsif ($name =~ m:_\[k\]:) { # kernel - $type = "orange" + } elsif ($name =~ m:_\[k\]$:) { # kernel + $type = "orange"; } else { # system $type = "red"; } # fall-through to color palettes } if (defined $type and $type eq "js") { - if ($name =~ /::/) { # C++ + # Handle both annotations (_[j], _[i], ...; which are + # accurate), as well as input that lacks any annotations, as + # best as possible. Without annotations, we get a little hacky, + # and match on a "/" with a ".js", etc. + if ($name =~ m:_\[j\]$:) { # jit annotation + if ($name =~ m:/:) { + $type = "green"; # source + } else { + $type = "aqua"; # builtin + } + } elsif ($name =~ /::/) { # C++ $type = "yellow"; - } elsif ($name =~ m:/:) { # JavaScript (match "/" in path) - $type = "green" + } elsif ($name =~ m:/.*\.js:) { # JavaScript (match "/" in path) + $type = "green"; } elsif ($name =~ m/:/) { # JavaScript (match ":" in builtin) - $type = "aqua" + $type = "aqua"; } elsif ($name =~ m/^ $/) { # Missing symbol - $type = "green" + $type = "green"; } elsif ($name =~ m:_\[k\]:) { # kernel - $type = "orange" + $type = "orange"; } else { # system $type = "red"; } @@ -1023,7 +1054,7 @@ sub flow { $escaped_func =~ s//>/g; $escaped_func =~ s/"/"/g; - $escaped_func =~ s/_\[[kwi]\]$//; # strip any annotation + $escaped_func =~ s/_\[[kwij]\]$//; # strip any annotation unless (defined $delta) { $info = "$escaped_func ($samples_txt $countname, $pct%)"; } else { @@ -1059,7 +1090,7 @@ sub flow { my $chars = int( ($x2 - $x1) / ($fontsize * $fontwidth)); my $text = ""; if ($chars >= 3) { # room for one char plus two dots - $func =~ s/_\[[kwi]\]$//; # strip any annotation + $func =~ s/_\[[kwij]\]$//; # strip any annotation $text = substr $func, 0, $chars; substr($text, -2, 2) = ".." if $chars < length $func; $text =~ s/&/&/g; diff --git a/stackcollapse-perf.pl b/stackcollapse-perf.pl index 89c7b962..e3dbe0c0 100755 --- a/stackcollapse-perf.pl +++ b/stackcollapse-perf.pl @@ -73,6 +73,8 @@ sub remember_stack { $collapsed{$stack} += $count; } my $annotate_kernel = 0; # put an annotation on kernel function +my $annotate_jit = 0; # put an annotation on jit symbols +my $annotate_all = 0; # enale all annotations my $include_pname = 1; # include process names in stacks my $include_pid = 0; # include process ID with process name my $include_tid = 0; # include process & thread ID with process name @@ -86,18 +88,26 @@ sub remember_stack { 'context' => \$show_context, 'pid' => \$include_pid, 'kernel' => \$annotate_kernel, + 'jit' => \$annotate_jit, + 'all' => \$annotate_all, 'tid' => \$include_tid) or die < outfile\n --pid # include PID with process names [1] --tid # include TID and PID with process names [1] --inline # un-inline using addr2line + --all # all annotations (--kernel --jit) --kernel # annotate kernel functions with a _[k] + --jit # annotate jit functions with a _[j] --context # adds source context to --inline\n [1] perf script must emit both PID and TIDs for these to work; eg: perf script -f comm,pid,tid,cpu,time,event,ip,sym,dso,trace USAGE_END +if ($annotate_all) { + $annotate_kernel = $annotate_jit = 1; +} + # for the --inline option sub inline { my ($pc, $mod) = @_; @@ -211,11 +221,6 @@ sub inline { # strip these off: $rawfunc =~ s/\+0x[\da-f]+$//; - # detect kernel from the module name; eg, frames to parse include: - # ffffffff8103ce3b native_safe_halt ([kernel.kallsyms]) - # 8c3453 tcp_sendmsg (/lib/modules/4.3.0-rc1-virtual/build/vmlinux) - $rawfunc.="_[k]" if ($annotate_kernel == 1 && $mod =~ m/(kernel\.|vmlinux$)/); - if ($show_inline == 1 && $mod !~ m/(perf-\d+.map|kernel\.|\[[^\]]+\])/) { unshift @stack, inline($pc, $mod); next; @@ -260,7 +265,22 @@ sub inline { $func =~ s/^L// if $func =~ m:/:; } - $func .= "_[i]" if scalar(@inline) > 0; #inlined + # + # Annotations + # + # detect inlined from the @inline array + # detect kernel from the module name; eg, frames to parse include: + # ffffffff8103ce3b native_safe_halt ([kernel.kallsyms]) + # 8c3453 tcp_sendmsg (/lib/modules/4.3.0-rc1-virtual/build/vmlinux) + # detect jit from the module name; eg: + # 7f722d142778 Ljava/io/PrintStream;::print (/tmp/perf-19982.map) + if (scalar(@inline) > 0) { + $func .= "_[i]"; # inlined + } elsif ($annotate_kernel == 1 && $mod =~ m/(kernel\.|vmlinux$)/) { + $func .= "_[k]"; # kernel + } elsif ($annotate_jit == 1 && $mod =~ m:/tmp/perf-\d+\.map:) { + $func .= "_[j]"; # jitted + } push @inline, $func; }