summaryrefslogtreecommitdiffstats
path: root/usr.sbin/pmcstat
diff options
context:
space:
mode:
authorjkoshy <jkoshy@FreeBSD.org>2007-12-07 08:26:21 +0000
committerjkoshy <jkoshy@FreeBSD.org>2007-12-07 08:26:21 +0000
commit2aef7957ec96361c7cf73dee65ee4512b39e1de5 (patch)
tree6048997529363b686719d6493260bacb261191c8 /usr.sbin/pmcstat
parent72c27d71d82569aec187c30f6ff208631abc02f4 (diff)
downloadFreeBSD-src-2aef7957ec96361c7cf73dee65ee4512b39e1de5.zip
FreeBSD-src-2aef7957ec96361c7cf73dee65ee4512b39e1de5.tar.gz
Introduce pmcstat(8) changes for summarizing hwpmc(4) callchain records in
in textual form and in gmon.out format. Update manual page. Sponsored by: FreeBSD Foundation and Google Inc.
Diffstat (limited to 'usr.sbin/pmcstat')
-rw-r--r--usr.sbin/pmcstat/Makefile2
-rw-r--r--usr.sbin/pmcstat/pmcstat.859
-rw-r--r--usr.sbin/pmcstat/pmcstat.c121
-rw-r--r--usr.sbin/pmcstat/pmcstat.h16
-rw-r--r--usr.sbin/pmcstat/pmcstat_log.c1086
5 files changed, 1075 insertions, 209 deletions
diff --git a/usr.sbin/pmcstat/Makefile b/usr.sbin/pmcstat/Makefile
index 819a0c0..9b992b2 100644
--- a/usr.sbin/pmcstat/Makefile
+++ b/usr.sbin/pmcstat/Makefile
@@ -6,7 +6,7 @@ PROG= pmcstat
MAN= pmcstat.8
DPADD= ${LIBKVM} ${LIBPMC} ${LIBM}
-LDADD= -lkvm -lpmc -lm
+LDADD= -lelf -lkvm -lpmc -lm
WARNS?= 6
diff --git a/usr.sbin/pmcstat/pmcstat.8 b/usr.sbin/pmcstat/pmcstat.8
index 951dba6..16cd876 100644
--- a/usr.sbin/pmcstat/pmcstat.8
+++ b/usr.sbin/pmcstat/pmcstat.8
@@ -1,4 +1,6 @@
-.\" Copyright (c) 2003-2007 Joseph Koshy. All rights reserved.
+.\" Copyright (c) 2003-2007 Joseph Koshy
+.\" Copyright (c) 2007 The FreeBSD Foundation
+.\" All rights reserved.
.\"
.\" Redistribution and use in source and binary forms, with or without
.\" modification, are permitted provided that the following conditions
@@ -34,7 +36,9 @@
.Op Fl C
.Op Fl D Ar pathname
.Op Fl E
+.Op Fl G Ar pathname
.Op Fl M Ar mapfilename
+.Op Fl N
.Op Fl O Ar logfilename
.Op Fl P Ar event-spec
.Op Fl R Ar logfilename
@@ -53,6 +57,7 @@
.Op Fl t Ar process-spec
.Op Fl v
.Op Fl w Ar secs
+.Op Fl z Ar graphdepth
.Op Ar command Op Ar args
.Sh DESCRIPTION
The
@@ -123,6 +128,16 @@ complex pipeline of processes when used in conjunction with the
.Fl d
option.
The default is to not to enable per-process tracking.
+.It Fl G Ar pathname
+Print callchain information to file
+.Ar pathname .
+If argument
+.Ar pathname
+is a
+.Dq Li -
+this information is sent to the output file specified by the
+.Fl o
+option.
.It Fl M Ar mapfilename
Write the mapping between executable objects encountered in the event
log and the abbreviated pathnames used for
@@ -138,6 +153,9 @@ in which case this mapping information is sent to the output
file configured by the
.Fl o
option.
+.It Fl N
+Toggle capturing callchain information for subsequent sampling PMCs.
+The default is for sampling PMCs to capture callchain information.
.It Fl O Ar logfilename
Send logging output to file
.Ar logfilename .
@@ -192,14 +210,15 @@ Argument
is a comma separated list of CPU numbers, or the literal
.Sq *
denoting all CPUs.
-The default is to allocate system mode PMCs on all CPUs.
+The default is to allocate system mode PMCs on all active CPUs in
+the system.
.It Fl d
Toggle between process mode PMCs measuring events for the target
process' current and future children or only measuring events for
the target process.
The default is to measure events for the target process alone.
.It Fl g
-Produce flat execution profiles in a format compatible with
+Produce profiles in a format compatible with
.Xr gprof 1 .
A separate profile file is generated for each executable object
encountered.
@@ -223,7 +242,10 @@ Send counter readings and textual representations of logged data
to file
.Ar outputfile .
The default is to send output to
-.Pa stderr .
+.Pa stderr
+when collecting live data and to
+.Pa stdout
+when processing a pre-existing logfile.
.It Fl p Ar event-spec
Allocate a process mode counting PMC measuring hardware events
specified in
@@ -257,6 +279,10 @@ The argument
.Ar secs
may be a fractional value.
The default interval is 5 seconds.
+.It Fl z Ar graphdepth
+When printing system-wide callgraphs, limit callgraphs to the depth
+specified by argument
+.Ar graphdepth .
.El
.Pp
If
@@ -286,9 +312,15 @@ To count instruction tlb-misses on CPUs 0 and 2 on a Intel
Pentium Pro/Pentium III SMP system use:
.Dl "pmcstat -c 0,2 -s p6-itlb-miss"
.Pp
+To collect profiling information for a specific process with pid 1234
+based on instruction cache misses seen by it use:
+.Dl "pmcstat -P ic-misses -t 1234 -O /tmp/sample.out"
+.Pp
To perform system-wide sampling on all configured processors
based on processor instructions retired use:
.Dl "pmcstat -S instructions -O /tmp/sample.out"
+If callgraph capture is not desired use:
+.Dl "pmcstat -N -S instructions -O /tmp/sample.out"
.Pp
To send the generated event log to a remote machine use:
.Dl "pmcstat -S instructions -O remotehost:port"
@@ -298,10 +330,27 @@ On the remote machine, the sample log can be collected using
.Pp
To generate
.Xr gprof 1
-compatible flat profiles from a sample file use:
+compatible profiles from a sample file use:
.Dl "pmcstat -R /tmp/sample.out -g"
+.Pp
+To print a system-wide profile with callgraphs to file
+.Pa "foo.graph"
+use:
+.Dl "pmcstat -R /tmp/sample.out -G foo.graph"
.Sh DIAGNOSTICS
.Ex -std
+.Sh COMPATIBILITY
+Due to the limitations of the
+.Pa gmon.out
+file format,
+.Xr gprof 1
+compatible profiles generated by the
+.Fl g
+option do not contain information about calls that cross executable
+boundaries.
+The generated
+.Pa gmon.out
+files are also only meaningful for native executables.
.Sh SEE ALSO
.Xr gprof 1 ,
.Xr nc 1 ,
diff --git a/usr.sbin/pmcstat/pmcstat.c b/usr.sbin/pmcstat/pmcstat.c
index b0c2600..1d7724e 100644
--- a/usr.sbin/pmcstat/pmcstat.c
+++ b/usr.sbin/pmcstat/pmcstat.c
@@ -1,7 +1,11 @@
/*-
* Copyright (c) 2003-2007, Joseph Koshy
+ * Copyright (c) 2007 The FreeBSD Foundation
* All rights reserved.
*
+ * Portions of this software were developed by A. Joseph Koshy under
+ * sponsorship from the FreeBSD Foundation and Google, Inc.
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -486,7 +490,9 @@ pmcstat_show_usage(void)
"\t -C\t\t (toggle) show cumulative counts\n"
"\t -D path\t create profiles in directory \"path\"\n"
"\t -E\t\t (toggle) show counts at process exit\n"
+ "\t -G file\t write a system-wide callgraph to \"file\"\n"
"\t -M file\t print executable/gmon file map to \"file\"\n"
+ "\t -N\t\t (toggle) capture callchains\n"
"\t -O file\t send log output to \"file\"\n"
"\t -P spec\t allocate a process-private sampling PMC\n"
"\t -R file\t read events from \"file\"\n"
@@ -504,7 +510,8 @@ pmcstat_show_usage(void)
"\t -s spec\t allocate a system-wide counting PMC\n"
"\t -t pid\t\t attach to running process with pid \"pid\"\n"
"\t -v\t\t increase verbosity\n"
- "\t -w secs\t set printing time interval"
+ "\t -w secs\t set printing time interval\n"
+ "\t -z depth\t limit callchain display depth"
);
}
@@ -516,16 +523,17 @@ int
main(int argc, char **argv)
{
double interval;
- int option, npmc, ncpu;
+ int option, npmc, ncpu, haltedcpus;
int c, check_driver_stats, current_cpu, current_sampling_count;
- int do_print, do_descendants;
- int do_logproccsw, do_logprocexit;
+ int do_callchain, do_descendants, do_logproccsw, do_logprocexit;
+ int do_print;
size_t dummy;
+ int graphdepth;
int pipefd[2];
int use_cumulative_counts;
uint32_t cpumask;
char *end, *tmp;
- const char *errmsg;
+ const char *errmsg, *graphfilename;
enum pmcstat_state runstate;
struct pmc_driverstats ds_start, ds_end;
struct pmcstat_ev *ev;
@@ -538,10 +546,12 @@ main(int argc, char **argv)
check_driver_stats = 0;
current_cpu = 0;
current_sampling_count = DEFAULT_SAMPLE_COUNT;
+ do_callchain = 1;
do_descendants = 0;
do_logproccsw = 0;
do_logprocexit = 0;
use_cumulative_counts = 0;
+ graphfilename = "-";
args.pa_required = 0;
args.pa_flags = 0;
args.pa_verbosity = 1;
@@ -550,21 +560,33 @@ main(int argc, char **argv)
args.pa_kernel = strdup("/boot/kernel");
args.pa_samplesdir = ".";
args.pa_printfile = stderr;
+ args.pa_graphdepth = DEFAULT_CALLGRAPH_DEPTH;
+ args.pa_graphfile = NULL;
args.pa_interval = DEFAULT_WAIT_INTERVAL;
args.pa_mapfilename = NULL;
+ args.pa_inputpath = NULL;
+ args.pa_outputpath = NULL;
STAILQ_INIT(&args.pa_events);
SLIST_INIT(&args.pa_targets);
bzero(&ds_start, sizeof(ds_start));
bzero(&ds_end, sizeof(ds_end));
ev = NULL;
- dummy = sizeof(ncpu);
+ /*
+ * The initial CPU mask specifies all non-halted CPUS in the
+ * system.
+ */
+ dummy = sizeof(int);
if (sysctlbyname("hw.ncpu", &ncpu, &dummy, NULL, 0) < 0)
- err(EX_OSERR, "ERROR: Cannot determine #cpus");
+ err(EX_OSERR, "ERROR: Cannot determine the number of CPUs");
cpumask = (1 << ncpu) - 1;
+ if (sysctlbyname("machdep.hlt_cpus", &haltedcpus, &dummy,
+ NULL, 0) < 0)
+ err(EX_OSERR, "ERROR: Cannot determine which CPUs are halted");
+ cpumask &= ~haltedcpus;
while ((option = getopt(argc, argv,
- "CD:EM:O:P:R:S:Wc:dgk:n:o:p:qr:s:t:vw:")) != -1)
+ "CD:EG:M:NO:P:R:S:Wc:dgk:n:o:p:qr:s:t:vw:z:")) != -1)
switch (option) {
case 'C': /* cumulative values */
use_cumulative_counts = !use_cumulative_counts;
@@ -598,6 +620,11 @@ main(int argc, char **argv)
args.pa_required |= FLAG_HAS_PROCESS_PMCS;
break;
+ case 'G': /* produce a system-wide callgraph */
+ args.pa_flags |= FLAG_DO_CALLGRAPHS;
+ graphfilename = optarg;
+ break;
+
case 'g': /* produce gprof compatible profiles */
args.pa_flags |= FLAG_DO_GPROF;
break;
@@ -605,7 +632,7 @@ main(int argc, char **argv)
case 'k': /* pathname to the kernel */
free(args.pa_kernel);
args.pa_kernel = strdup(optarg);
- args.pa_required |= FLAG_DO_GPROF;
+ args.pa_required |= FLAG_DO_ANALYSIS;
args.pa_flags |= FLAG_HAS_KERNELPATH;
break;
@@ -619,6 +646,11 @@ main(int argc, char **argv)
args.pa_mapfilename = optarg;
break;
+ case 'N':
+ do_callchain = !do_callchain;
+ args.pa_required |= FLAG_HAS_SAMPLING_PMCS;
+ break;
+
case 'p': /* process virtual counting PMC */
case 's': /* system-wide counting PMC */
case 'P': /* process virtual sampling PMC */
@@ -664,6 +696,8 @@ main(int argc, char **argv)
ev->ev_cpu = PMC_CPU_ANY;
ev->ev_flags = 0;
+ if (do_callchain)
+ ev->ev_flags |= PMC_F_CALLCHAIN;
if (do_descendants)
ev->ev_flags |= PMC_F_DESCENDANTS;
if (do_logprocexit)
@@ -725,7 +759,7 @@ main(int argc, char **argv)
break;
case 'R': /* read an existing log file */
- if (args.pa_logparser != NULL)
+ if (args.pa_inputpath != NULL)
errx(EX_USAGE, "ERROR: option -R may only be "
"specified once.");
args.pa_inputpath = optarg;
@@ -761,6 +795,15 @@ main(int argc, char **argv)
FLAG_HAS_COUNTING_PMCS | FLAG_HAS_OUTPUT_LOGFILE);
break;
+ case 'z':
+ graphdepth = strtod(optarg, &end);
+ if (*end != '\0' || graphdepth <= 0)
+ errx(EX_USAGE, "ERROR: Illegal callchain "
+ "depth \"%s\".", optarg);
+ args.pa_graphdepth = graphdepth;
+ args.pa_required |= FLAG_DO_CALLGRAPHS;
+ break;
+
case '?':
default:
pmcstat_show_usage();
@@ -771,9 +814,14 @@ main(int argc, char **argv)
args.pa_argc = (argc -= optind);
args.pa_argv = (argv += optind);
+ args.pa_cpumask = cpumask; /* For selecting CPUs using -R. */
+
if (argc) /* command line present */
args.pa_flags |= FLAG_HAS_COMMANDLINE;
+ if (args.pa_flags & (FLAG_DO_GPROF | FLAG_DO_CALLGRAPHS))
+ args.pa_flags |= FLAG_DO_ANALYSIS;
+
/*
* Check invocation syntax.
*/
@@ -822,9 +870,10 @@ main(int argc, char **argv)
errx(EX_USAGE, "ERROR: options -d, -E, and -W require a "
"process mode PMC to be specified.");
- /* check for -c cpu and not system mode PMCs */
+ /* check for -c cpu with no system mode PMCs or logfile. */
if ((args.pa_required & FLAG_HAS_SYSTEM_PMCS) &&
- (args.pa_flags & FLAG_HAS_SYSTEM_PMCS) == 0)
+ (args.pa_flags & FLAG_HAS_SYSTEM_PMCS) == 0 &&
+ (args.pa_flags & FLAG_READ_LOGFILE) == 0)
errx(EX_USAGE, "ERROR: option -c requires at least one "
"system mode PMC to be specified.");
@@ -837,14 +886,14 @@ main(int argc, char **argv)
/* check for sampling mode options without a sampling PMC spec */
if ((args.pa_required & FLAG_HAS_SAMPLING_PMCS) &&
(args.pa_flags & FLAG_HAS_SAMPLING_PMCS) == 0)
- errx(EX_USAGE, "ERROR: options -n and -O require at least "
- "one sampling mode PMC to be specified.");
+ errx(EX_USAGE, "ERROR: options -N, -n and -O require at "
+ "least one sampling mode PMC to be specified.");
- /* check if -g is being used correctly */
- if ((args.pa_flags & FLAG_DO_GPROF) &&
+ /* check if -g/-G are being used correctly */
+ if ((args.pa_flags & FLAG_DO_ANALYSIS) &&
!(args.pa_flags & (FLAG_HAS_SAMPLING_PMCS|FLAG_READ_LOGFILE)))
- errx(EX_USAGE, "ERROR: option -g requires sampling PMCs or -R "
- "to be specified.");
+ errx(EX_USAGE, "ERROR: options -g/-G require sampling PMCs "
+ "or -R to be specified.");
/* check if -O was spuriously specified */
if ((args.pa_flags & FLAG_HAS_OUTPUT_LOGFILE) &&
@@ -853,16 +902,16 @@ main(int argc, char **argv)
"ERROR: option -O is used only with options "
"-E, -P, -S and -W.");
- /* -D dir and -k kernel path require -g or -R */
+ /* -k kernel path require -g/-G or -R */
if ((args.pa_flags & FLAG_HAS_KERNELPATH) &&
- (args.pa_flags & FLAG_DO_GPROF) == 0 &&
+ (args.pa_flags & FLAG_DO_ANALYSIS) == 0 &&
(args.pa_flags & FLAG_READ_LOGFILE) == 0)
errx(EX_USAGE, "ERROR: option -k is only used with -g/-R.");
+ /* -D only applies to gprof output mode (-g) */
if ((args.pa_flags & FLAG_HAS_SAMPLESDIR) &&
- (args.pa_flags & FLAG_DO_GPROF) == 0 &&
- (args.pa_flags & FLAG_READ_LOGFILE) == 0)
- errx(EX_USAGE, "ERROR: option -D is only used with -g/-R.");
+ (args.pa_flags & FLAG_DO_GPROF) == 0)
+ errx(EX_USAGE, "ERROR: option -D is only used with -g.");
/* -M mapfile requires -g or -R */
if (args.pa_mapfilename != NULL &&
@@ -882,9 +931,9 @@ main(int argc, char **argv)
"sampling PMCs are specified together.");
/*
- * Check if "-k kerneldir" was specified, and if whether 'kerneldir'
- * actually refers to a a file. If so, use `dirname path` to determine
- * the kernel directory.
+ * Check if "-k kerneldir" was specified, and if whether
+ * 'kerneldir' actually refers to a a file. If so, use
+ * `dirname path` to determine the kernel directory.
*/
if (args.pa_flags & FLAG_HAS_KERNELPATH) {
(void) snprintf(buffer, sizeof(buffer), "%s%s", args.pa_fsroot,
@@ -910,13 +959,27 @@ main(int argc, char **argv)
}
}
+ /*
+ * If we have a callgraph be created, select the outputfile.
+ */
+ if (args.pa_flags & FLAG_DO_CALLGRAPHS) {
+ if (strcmp(graphfilename, "-") == 0)
+ args.pa_graphfile = args.pa_printfile;
+ else {
+ args.pa_graphfile = fopen(graphfilename, "w");
+ if (args.pa_graphfile == NULL)
+ err(EX_OSERR, "ERROR: cannot open \"%s\" "
+ "for writing", graphfilename);
+ }
+ }
+
/* if we've been asked to process a log file, do that and exit */
if (args.pa_flags & FLAG_READ_LOGFILE) {
/*
* Print the log in textual form if we haven't been
- * asked to generate gmon.out files.
+ * asked to generate profiling information.
*/
- if ((args.pa_flags & FLAG_DO_GPROF) == 0)
+ if ((args.pa_flags & FLAG_DO_ANALYSIS) == 0)
args.pa_flags |= FLAG_DO_PRINT;
pmcstat_initialize_logging(&args);
@@ -1162,7 +1225,7 @@ main(int argc, char **argv)
FLAG_HAS_PIPE)) {
runstate = pmcstat_close_log(&args);
if (args.pa_flags &
- (FLAG_DO_PRINT|FLAG_DO_GPROF))
+ (FLAG_DO_PRINT|FLAG_DO_ANALYSIS))
pmcstat_process_log(&args);
}
do_print = 1; /* print PMCs at exit */
diff --git a/usr.sbin/pmcstat/pmcstat.h b/usr.sbin/pmcstat/pmcstat.h
index 5b062a9..e46109a 100644
--- a/usr.sbin/pmcstat/pmcstat.h
+++ b/usr.sbin/pmcstat/pmcstat.h
@@ -1,7 +1,11 @@
/*-
* Copyright (c) 2005-2007, Joseph Koshy
+ * Copyright (c) 2007 The FreeBSD Foundation
* All rights reserved.
*
+ * Portions of this software were developed by A. Joseph Koshy under
+ * sponsorship from the FreeBSD Foundation and Google, Inc.
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -43,11 +47,14 @@
#define FLAG_HAS_SAMPLESDIR 0x00000800 /* -D dir */
#define FLAG_HAS_KERNELPATH 0x00001000 /* -k kernel */
#define FLAG_DO_PRINT 0x00002000 /* -o */
+#define FLAG_DO_CALLGRAPHS 0x00004000 /* -G */
+#define FLAG_DO_ANALYSIS 0x00008000 /* -g or -G */
#define DEFAULT_SAMPLE_COUNT 65536
#define DEFAULT_WAIT_INTERVAL 5.0
#define DEFAULT_DISPLAY_HEIGHT 23
#define DEFAULT_BUFFER_SIZE 4096
+#define DEFAULT_CALLGRAPH_DEPTH 4
#define PRINT_HEADER_PREFIX "# "
#define READPIPEFD 0
@@ -68,9 +75,9 @@
#define PMCSTAT_LDD_COMMAND "/usr/bin/ldd"
#define PMCSTAT_PRINT_ENTRY(A,T,...) do { \
- fprintf((A)->pa_printfile, "%-8s", T); \
- fprintf((A)->pa_printfile, " " __VA_ARGS__); \
- fprintf((A)->pa_printfile, "\n"); \
+ (void) fprintf((A)->pa_printfile, "%-9s", T); \
+ (void) fprintf((A)->pa_printfile, " " __VA_ARGS__); \
+ (void) fprintf((A)->pa_printfile, "\n"); \
} while (0)
enum pmcstat_state {
@@ -112,7 +119,10 @@ struct pmcstat_args {
char *pa_kernel; /* pathname of the kernel */
const char *pa_samplesdir; /* directory for profile files */
const char *pa_mapfilename;/* mapfile name */
+ FILE *pa_graphfile; /* where to send the callgraph */
+ int pa_graphdepth; /* print depth for callgraphs */
double pa_interval; /* printing interval in seconds */
+ uint32_t pa_cpumask; /* filter for CPUs analysed */
int pa_argc;
char **pa_argv;
STAILQ_HEAD(, pmcstat_ev) pa_events;
diff --git a/usr.sbin/pmcstat/pmcstat_log.c b/usr.sbin/pmcstat/pmcstat_log.c
index 90cd058..d895ab7 100644
--- a/usr.sbin/pmcstat/pmcstat_log.c
+++ b/usr.sbin/pmcstat/pmcstat_log.c
@@ -1,7 +1,11 @@
/*-
- * Copyright (c) 2005-2006, Joseph Koshy
+ * Copyright (c) 2005-2007, Joseph Koshy
+ * Copyright (c) 2007 The FreeBSD Foundation
* All rights reserved.
*
+ * Portions of this software were developed by A. Joseph Koshy under
+ * sponsorship from the FreeBSD Foundation and Google, Inc.
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -50,6 +54,7 @@ __FBSDID("$FreeBSD$");
#include <err.h>
#include <errno.h>
#include <fcntl.h>
+#include <gelf.h>
#include <libgen.h>
#include <limits.h>
#include <netdb.h>
@@ -67,6 +72,8 @@ __FBSDID("$FreeBSD$");
#define min(A,B) ((A) < (B) ? (A) : (B))
#define max(A,B) ((A) > (B) ? (A) : (B))
+#define PMCSTAT_ALLOCATE 1
+
/*
* PUBLIC INTERFACES
*
@@ -76,22 +83,17 @@ __FBSDID("$FreeBSD$");
* pmcstat_process_log() print/convert an event log
* pmcstat_close_log() finish processing an event log
*
- * IMPLEMENTATION OF GMON OUTPUT
+ * IMPLEMENTATION NOTES
*
- * We correlate each 'sample' seen in the event log back to an
- * executable object in the system. Executable objects include:
+ * We correlate each 'callchain' or 'sample' entry seen in the event
+ * log back to an executable object in the system. Executable objects
+ * include:
* - program executables,
* - shared libraries loaded by the runtime loader,
* - dlopen()'ed objects loaded by the program,
* - the runtime loader itself,
* - the kernel and kernel modules.
*
- * Each such executable object gets one 'gmon.out' profile, per PMC in
- * use. Creation of 'gmon.out' profiles is done lazily. The
- * 'gmon.out' profiles generated for a given sampling PMC are
- * aggregates of all the samples for that particular executable
- * object.
- *
* Each process that we know about is treated as a set of regions that
* map to executable objects. Processes are described by
* 'pmcstat_process' structures. Executable objects are tracked by
@@ -106,6 +108,23 @@ __FBSDID("$FreeBSD$");
*
* The sample log could have samples from multiple PMCs; we
* generate one 'gmon.out' profile per PMC.
+ *
+ * IMPLEMENTATION OF GMON OUTPUT
+ *
+ * Each executable object gets one 'gmon.out' profile, per PMC in
+ * use. Creation of 'gmon.out' profiles is done lazily. The
+ * 'gmon.out' profiles generated for a given sampling PMC are
+ * aggregates of all the samples for that particular executable
+ * object.
+ *
+ * IMPLEMENTATION OF SYSTEM-WIDE CALLGRAPH OUTPUT
+ *
+ * Each active pmcid has its own callgraph structure, described by a
+ * 'struct pmcstat_callgraph'. Given a process id and a list of pc
+ * values, we map each pc value to a tuple (image, symbol), where
+ * 'image' denotes an executable object and 'symbol' is the closest
+ * symbol that precedes the pc value. Each pc value in the list is
+ * also given a 'rank' that reflects its depth in the call stack.
*/
typedef const void *pmcstat_interned_string;
@@ -139,6 +158,7 @@ struct pmcstat_gmonfile {
pmcstat_interned_string pgf_name; /* pathname of gmon.out file */
size_t pgf_ndatabytes; /* number of bytes mapped */
void *pgf_gmondata; /* pointer to mmap'ed data */
+ FILE *pgf_file; /* used when writing gmon arcs */
};
/*
@@ -161,8 +181,9 @@ enum pmcstat_image_type {
struct pmcstat_image {
LIST_ENTRY(pmcstat_image) pi_next; /* hash link */
TAILQ_ENTRY(pmcstat_image) pi_lru; /* LRU list */
- pmcstat_interned_string pi_execpath;/* cookie */
+ pmcstat_interned_string pi_execpath; /* cookie */
pmcstat_interned_string pi_samplename; /* sample path name */
+ pmcstat_interned_string pi_fullpath; /* path to FS object */
enum pmcstat_image_type pi_type; /* executable type */
@@ -170,15 +191,18 @@ struct pmcstat_image {
* Executables have pi_start and pi_end; these are zero
* for shared libraries.
*/
- uintfptr_t pi_start; /* start address (inclusive) */
- uintfptr_t pi_end; /* end address (exclusive) */
- uintfptr_t pi_entry; /* entry address */
- uintfptr_t pi_vaddr; /* virtual address where loaded */
- int pi_isdynamic; /* whether a dynamic
- * object */
+ uintfptr_t pi_start; /* start address (inclusive) */
+ uintfptr_t pi_end; /* end address (exclusive) */
+ uintfptr_t pi_entry; /* entry address */
+ uintfptr_t pi_vaddr; /* virtual address where loaded */
+ int pi_isdynamic; /* whether a dynamic object */
int pi_iskernelmodule;
pmcstat_interned_string pi_dynlinkerpath; /* path in .interp */
+ /* All symbols associated with this object. */
+ struct pmcstat_symbol *pi_symbols;
+ size_t pi_symcount;
+
/*
* An image can be associated with one or more gmon.out files;
* one per PMC.
@@ -190,8 +214,6 @@ struct pmcstat_image {
* All image descriptors are kept in a hash table.
*/
static LIST_HEAD(,pmcstat_image) pmcstat_image_hash[PMCSTAT_NHASH];
-static TAILQ_HEAD(,pmcstat_image) pmcstat_image_lru =
- TAILQ_HEAD_INITIALIZER(pmcstat_image_lru);
/*
* A 'pmcstat_pcmap' structure maps a virtual address range to an
@@ -224,8 +246,6 @@ struct pmcstat_process {
TAILQ_HEAD(,pmcstat_pcmap) pp_map; /* address range map */
};
-#define PMCSTAT_ALLOCATE 1
-
/*
* All process descriptors are kept in a hash table.
*/
@@ -233,6 +253,44 @@ static LIST_HEAD(,pmcstat_process) pmcstat_process_hash[PMCSTAT_NHASH];
static struct pmcstat_process *pmcstat_kernproc; /* kernel 'process' */
+/*
+ * Each function symbol tracked by pmcstat(8).
+ */
+
+struct pmcstat_symbol {
+ pmcstat_interned_string ps_name;
+ uint64_t ps_start;
+ uint64_t ps_end;
+};
+
+/*
+ * Each call graph node is tracked by a pmcstat_cgnode struct.
+ */
+
+struct pmcstat_cgnode {
+ struct pmcstat_image *pcg_image;
+ uintfptr_t pcg_func;
+ uint32_t pcg_count;
+ uint32_t pcg_nchildren;
+ LIST_ENTRY(pmcstat_cgnode) pcg_sibling;
+ LIST_HEAD(,pmcstat_cgnode) pcg_children;
+};
+
+struct pmcstat_cgnode_hash {
+ struct pmcstat_cgnode *pch_cgnode;
+ uint32_t pch_pmcid;
+ LIST_ENTRY(pmcstat_cgnode_hash) pch_next;
+};
+
+static int pmcstat_cgnode_hash_count;
+static pmcstat_interned_string pmcstat_previous_filename_printed;
+
+/*
+ * The toplevel CG nodes (i.e., with rank == 0) are placed in a hash table.
+ */
+
+static LIST_HEAD(,pmcstat_cgnode_hash) pmcstat_cgnode_hash[PMCSTAT_NHASH];
+
/* Misc. statistics */
static struct pmcstat_stats {
int ps_exec_aout; /* # a.out executables seen */
@@ -240,10 +298,13 @@ static struct pmcstat_stats {
int ps_exec_errors; /* # errors processing executables */
int ps_exec_indeterminable; /* # unknown executables seen */
int ps_samples_total; /* total number of samples processed */
- int ps_samples_unknown_offset; /* #samples not in any map */
+ int ps_samples_skipped; /* #samples filtered out for any reason */
+ int ps_samples_unknown_offset; /* #samples of rank 0 not in a map */
int ps_samples_indeterminable; /* #samples in indeterminable images */
+ int ps_callchain_dubious_frames;/* #dubious frame pointers seen */
} pmcstat_stats;
+
/*
* Prototypes
*/
@@ -257,6 +318,8 @@ static void pmcstat_gmon_unmap_file(struct pmcstat_gmonfile *_pgf);
static void pmcstat_image_determine_type(struct pmcstat_image *_image,
struct pmcstat_args *_a);
+static struct pmcstat_gmonfile *pmcstat_image_find_gmonfile(struct
+ pmcstat_image *_i, pmc_id_t _id);
static struct pmcstat_image *pmcstat_image_from_path(pmcstat_interned_string
_path, int _iskernelmodule);
static void pmcstat_image_get_aout_params(struct pmcstat_image *_image,
@@ -464,8 +527,6 @@ pmcstat_gmon_create_file(struct pmcstat_gmonfile *pgf,
if (write(fd, &buffer, count) < 0)
goto error;
- /* TODO size the arc table */
-
(void) close(fd);
return;
@@ -534,6 +595,42 @@ pmcstat_gmon_unmap_file(struct pmcstat_gmonfile *pgf)
pgf->pgf_gmondata = NULL;
}
+static void
+pmcstat_gmon_append_arc(struct pmcstat_image *image, pmc_id_t pmcid,
+ uintptr_t rawfrom, uintptr_t rawto, uint32_t count)
+{
+ struct rawarc arc; /* from <sys/gmon.h> */
+ const char *pathname;
+ struct pmcstat_gmonfile *pgf;
+
+ if ((pgf = pmcstat_image_find_gmonfile(image, pmcid)) == NULL)
+ return;
+
+ if (pgf->pgf_file == NULL) {
+ pathname = pmcstat_string_unintern(pgf->pgf_name);
+ if ((pgf->pgf_file = fopen(pathname, "a")) == NULL)
+ return;
+ }
+
+ arc.raw_frompc = rawfrom + image->pi_vaddr;
+ arc.raw_selfpc = rawto + image->pi_vaddr;
+ arc.raw_count = count;
+
+ (void) fwrite(&arc, sizeof(arc), 1, pgf->pgf_file);
+
+}
+
+static struct pmcstat_gmonfile *
+pmcstat_image_find_gmonfile(struct pmcstat_image *image, pmc_id_t pmcid)
+{
+ struct pmcstat_gmonfile *pgf;
+ LIST_FOREACH(pgf, &image->pi_gmlist, pgf_next)
+ if (pgf->pgf_pmcid == pmcid)
+ return (pgf);
+ return (NULL);
+}
+
+
/*
* Determine whether a given executable image is an A.OUT object, and
* if so, fill in its parameters from the text file.
@@ -583,6 +680,145 @@ pmcstat_image_get_aout_params(struct pmcstat_image *image,
}
/*
+ * Helper function.
+ */
+
+static int
+pmcstat_symbol_compare(const void *a, const void *b)
+{
+ const struct pmcstat_symbol *sym1, *sym2;
+
+ sym1 = (const struct pmcstat_symbol *) a;
+ sym2 = (const struct pmcstat_symbol *) b;
+
+ if (sym1->ps_end <= sym2->ps_start)
+ return (-1);
+ if (sym1->ps_start >= sym2->ps_end)
+ return (1);
+ return (0);
+}
+
+/*
+ * Map an address to a symbol in an image.
+ */
+
+static struct pmcstat_symbol *
+pmcstat_symbol_search(struct pmcstat_image *image, uintfptr_t addr)
+{
+ struct pmcstat_symbol sym;
+
+ if (image->pi_symbols == NULL)
+ return (NULL);
+
+ sym.ps_name = NULL;
+ sym.ps_start = addr;
+ sym.ps_end = addr + 1;
+
+ return (bsearch((void *) &sym, image->pi_symbols,
+ image->pi_symcount, sizeof(struct pmcstat_symbol),
+ pmcstat_symbol_compare));
+}
+
+/*
+ * Add the list of symbols in the given section to the list associated
+ * with the object.
+ */
+static void
+pmcstat_image_add_symbols(struct pmcstat_image *image, Elf *e,
+ Elf_Scn *scn, GElf_Shdr *sh)
+{
+ int firsttime;
+ size_t n, newsyms, nshsyms, nfuncsyms;
+ struct pmcstat_symbol *symptr;
+ char *fnname;
+ GElf_Sym sym;
+ Elf_Data *data;
+
+ if ((data = elf_getdata(scn, NULL)) == NULL)
+ return;
+
+ /*
+ * Determine the number of functions named in this
+ * section.
+ */
+
+ nshsyms = sh->sh_size / sh->sh_entsize;
+ for (n = nfuncsyms = 0; n < nshsyms; n++) {
+ if (gelf_getsym(data, (int) n, &sym) != &sym)
+ return;
+ if (GELF_ST_TYPE(sym.st_info) == STT_FUNC)
+ nfuncsyms++;
+ }
+
+ if (nfuncsyms == 0)
+ return;
+
+ /*
+ * Allocate space for the new entries.
+ */
+ firsttime = image->pi_symbols == NULL;
+ symptr = realloc(image->pi_symbols,
+ sizeof(*symptr) * (image->pi_symcount + nfuncsyms));
+ if (symptr == image->pi_symbols) /* realloc() failed. */
+ return;
+ image->pi_symbols = symptr;
+
+ /*
+ * Append new symbols to the end of the current table.
+ */
+ symptr += image->pi_symcount;
+
+ for (n = newsyms = 0; n < nshsyms; n++) {
+ if (gelf_getsym(data, (int) n, &sym) != &sym)
+ return;
+ if (GELF_ST_TYPE(sym.st_info) != STT_FUNC)
+ continue;
+
+ if (!firsttime && pmcstat_symbol_search(image, sym.st_value))
+ continue; /* We've seen this symbol already. */
+
+ if ((fnname = elf_strptr(e, sh->sh_link, sym.st_name))
+ == NULL)
+ continue;
+
+ symptr->ps_name = pmcstat_string_intern(fnname);
+ symptr->ps_start = sym.st_value - image->pi_vaddr;
+ symptr->ps_end = symptr->ps_start + sym.st_size;
+ symptr++;
+
+ newsyms++;
+ }
+
+ image->pi_symcount += newsyms;
+
+ assert(newsyms <= nfuncsyms);
+
+ /*
+ * Return space to the system if there were duplicates.
+ */
+ if (newsyms < nfuncsyms)
+ image->pi_symbols = realloc(image->pi_symbols,
+ sizeof(*symptr) * image->pi_symcount);
+
+ /*
+ * Keep the list of symbols sorted.
+ */
+ qsort(image->pi_symbols, image->pi_symcount, sizeof(*symptr),
+ pmcstat_symbol_compare);
+
+ /*
+ * Deal with function symbols that have a size of 'zero' by
+ * making them extend to the next higher address. These
+ * symbols are usually defined in assembly code.
+ */
+ for (symptr = image->pi_symbols;
+ symptr < image->pi_symbols + (image->pi_symcount - 1);
+ symptr++)
+ if (symptr->ps_start == symptr->ps_end)
+ symptr->ps_end = (symptr+1)->ps_start;
+}
+
+/*
* Examine an ELF file to determine the size of its text segment.
* Sets image->pi_type if anything conclusive can be determined about
* this image.
@@ -592,28 +828,28 @@ static void
pmcstat_image_get_elf_params(struct pmcstat_image *image,
struct pmcstat_args *a)
{
- int fd, i;
- const char *path;
- void *mapbase;
+ int fd;
+ size_t i, nph, nsh;
+ const char *path, *elfbase;
uintfptr_t minva, maxva;
- const Elf_Ehdr *h;
- const Elf_Phdr *ph;
- const Elf_Shdr *sh;
-#if defined(__amd64__)
- const Elf32_Ehdr *h32;
- const Elf32_Phdr *ph32;
- const Elf32_Shdr *sh32;
-#endif
+ Elf *e;
+ Elf_Scn *scn;
+ GElf_Ehdr eh;
+ GElf_Phdr ph;
+ GElf_Shdr sh;
enum pmcstat_image_type image_type;
- struct stat st;
char buffer[PATH_MAX];
assert(image->pi_type == PMCSTAT_IMAGE_UNKNOWN);
- minva = ~(uintfptr_t) 0;
- maxva = (uintfptr_t) 0;
- path = pmcstat_string_unintern(image->pi_execpath);
+ image->pi_start = minva = ~(uintfptr_t) 0;
+ image->pi_end = maxva = (uintfptr_t) 0;
+ image->pi_type = image_type = PMCSTAT_IMAGE_INDETERMINABLE;
+ image->pi_isdynamic = 0;
+ image->pi_dynlinkerpath = NULL;
+ image->pi_vaddr = 0;
+ path = pmcstat_string_unintern(image->pi_execpath);
assert(path != NULL);
/*
@@ -627,112 +863,107 @@ pmcstat_image_get_elf_params(struct pmcstat_image *image,
(void) snprintf(buffer, sizeof(buffer), "%s%s",
a->pa_fsroot, path);
+ e = NULL;
if ((fd = open(buffer, O_RDONLY, 0)) < 0 ||
- fstat(fd, &st) < 0 ||
- (mapbase = mmap(0, st.st_size, PROT_READ, MAP_SHARED,
- fd, 0)) == MAP_FAILED) {
- warn("WARNING: Cannot determine type of \"%s\"", buffer);
- image->pi_type = PMCSTAT_IMAGE_INDETERMINABLE;
- if (fd != -1)
- (void) close(fd);
- return;
+ (e = elf_begin(fd, ELF_C_READ, NULL)) == NULL ||
+ (elf_kind(e) != ELF_K_ELF)) {
+ warnx("WARNING: Cannot determine the type of \"%s\".",
+ buffer);
+ goto done;
}
- (void) close(fd);
+ if (gelf_getehdr(e, &eh) != &eh) {
+ warnx("WARNING: Cannot retrieve the ELF Header for "
+ "\"%s\": %s.", buffer, elf_errmsg(-1));
+ goto done;
+ }
- /* Punt on non-ELF objects */
- h = (const Elf_Ehdr *) mapbase;
- if (!IS_ELF(*h))
- return;
+ if (eh.e_type != ET_EXEC && eh.e_type != ET_DYN &&
+ !(image->pi_iskernelmodule && eh.e_type == ET_REL)) {
+ warnx("WARNING: \"%s\" is of an unsupported ELF type.",
+ buffer);
+ goto done;
+ }
+
+ image_type = eh.e_ident[EI_CLASS] == ELFCLASS32 ?
+ PMCSTAT_IMAGE_ELF32 : PMCSTAT_IMAGE_ELF64;
/*
- * We only handle executable ELF objects and kernel
- * modules.
+ * Determine the virtual address where an executable would be
+ * loaded. Additionally, for dynamically linked executables,
+ * save the pathname to the runtime linker.
*/
- if (h->e_type != ET_EXEC && h->e_type != ET_DYN &&
- !(image->pi_iskernelmodule && h->e_type == ET_REL))
- return;
-
- image->pi_isdynamic = 0;
- image->pi_dynlinkerpath = NULL;
- image->pi_vaddr = 0;
-
-#define GET_VA(H, SH, MINVA, MAXVA) do { \
- for (i = 0; i < (H)->e_shnum; i++) \
- if ((SH)[i].sh_flags & SHF_EXECINSTR) { \
- (MINVA) = min((MINVA),(SH)[i].sh_addr); \
- (MAXVA) = max((MAXVA),(SH)[i].sh_addr + \
- (SH)[i].sh_size); \
- } \
- } while (0)
-
-
-#define GET_PHDR_INFO(H, PH, IMAGE) do { \
- for (i = 0; i < (H)->e_phnum; i++) { \
- switch ((PH)[i].p_type) { \
- case PT_DYNAMIC: \
- image->pi_isdynamic = 1; \
- break; \
- case PT_INTERP: \
- image->pi_dynlinkerpath = \
- pmcstat_string_intern( \
- (char *) mapbase + \
- (PH)[i].p_offset); \
- break; \
- case PT_LOAD: \
- if ((PH)[i].p_offset == 0) \
- image->pi_vaddr = \
- (PH)[i].p_vaddr; \
- break; \
- } \
- } \
- } while (0)
-
- switch (h->e_machine) {
- case EM_386:
- case EM_486:
-#if defined(__amd64__)
- /* a 32 bit executable */
- h32 = (const Elf32_Ehdr *) h;
- sh32 = (const Elf32_Shdr *)((uintptr_t) mapbase + h32->e_shoff);
-
- GET_VA(h32, sh32, minva, maxva);
-
- image->pi_entry = h32->e_entry;
-
- if (h32->e_type == ET_EXEC) {
- ph32 = (const Elf32_Phdr *)((uintptr_t) mapbase +
- h32->e_phoff);
- GET_PHDR_INFO(h32, ph32, image);
+ if (eh.e_type == ET_EXEC) {
+ if (elf_getphnum(e, &nph) == 0) {
+ warnx("WARNING: Could not determine the number of "
+ "program headers in \"%s\": %s.", buffer,
+ elf_errmsg(-1));
+ goto done;
}
- image_type = PMCSTAT_IMAGE_ELF32;
- break;
-#endif
- default:
- sh = (const Elf_Shdr *)((uintptr_t) mapbase + h->e_shoff);
-
- GET_VA(h, sh, minva, maxva);
+ for (i = 0; i < eh.e_phnum; i++) {
+ if (gelf_getphdr(e, i, &ph) != &ph) {
+ warnx("WARNING: Retrieval of PHDR entry #%ju "
+ "in \"%s\" failed: %s.", (uintmax_t) i,
+ buffer, elf_errmsg(-1));
+ goto done;
+ }
+ switch (ph.p_type) {
+ case PT_DYNAMIC:
+ image->pi_isdynamic = 1;
+ break;
+ case PT_INTERP:
+ if ((elfbase = elf_rawfile(e, NULL)) == NULL) {
+ warnx("WARNING: Cannot retrieve the "
+ "interpreter for \"%s\": %s.",
+ buffer, elf_errmsg(-1));
+ goto done;
+ }
+ image->pi_dynlinkerpath =
+ pmcstat_string_intern(elfbase +
+ ph.p_offset);
+ break;
+ case PT_LOAD:
+ if (ph.p_offset == 0)
+ image->pi_vaddr = ph.p_vaddr;
+ break;
+ }
+ }
+ }
- image->pi_entry = h->e_entry;
+ /*
+ * Get the min and max VA associated with this ELF object.
+ */
+ if (elf_getshnum(e, &nsh) == 0) {
+ warnx("WARNING: Could not determine the number of sections "
+ "for \"%s\": %s.", buffer, elf_errmsg(-1));
+ goto done;
+ }
- if (h->e_type == ET_EXEC) {
- ph = (const Elf_Phdr *)((uintptr_t) mapbase +
- h->e_phoff);
- GET_PHDR_INFO(h, ph, image);
+ for (i = 0; i < nsh; i++) {
+ if ((scn = elf_getscn(e, i)) == NULL ||
+ gelf_getshdr(scn, &sh) != &sh) {
+ warnx("WARNING: Could not retrieve section header "
+ "#%ju in \"%s\": %s.", (uintmax_t) i, buffer,
+ elf_errmsg(-1));
+ goto done;
}
- image_type = PMCSTAT_IMAGE_ELF64;
- break;
+ if (sh.sh_flags & SHF_EXECINSTR) {
+ minva = min(minva, sh.sh_addr);
+ maxva = max(maxva, sh.sh_addr + sh.sh_size);
+ }
+ if (sh.sh_type == SHT_SYMTAB || sh.sh_type == SHT_DYNSYM)
+ pmcstat_image_add_symbols(image, e, scn, &sh);
}
-#undef GET_PHDR_INFO
-#undef GET_VA
-
image->pi_start = minva;
image->pi_end = maxva;
image->pi_type = image_type;
+ image->pi_fullpath = pmcstat_string_intern(buffer);
- if (munmap(mapbase, st.st_size) < 0)
- err(EX_OSERR, "ERROR: Cannot unmap \"%s\"", path);
+ done:
+ (void) elf_end(e);
+ if (fd >= 0)
+ (void) close(fd);
return;
}
@@ -785,16 +1016,12 @@ pmcstat_image_from_path(pmcstat_interned_string internedpath,
/* First, look for an existing entry. */
LIST_FOREACH(pi, &pmcstat_image_hash[hash], pi_next)
if (pi->pi_execpath == internedpath &&
- pi->pi_iskernelmodule == iskernelmodule) {
- /* move descriptor to the head of the lru list */
- TAILQ_REMOVE(&pmcstat_image_lru, pi, pi_lru);
- TAILQ_INSERT_HEAD(&pmcstat_image_lru, pi, pi_lru);
+ pi->pi_iskernelmodule == iskernelmodule)
return (pi);
- }
/*
- * Allocate a new entry and place at the head of the hash and
- * LRU lists.
+ * Allocate a new entry and place it at the head of the hash
+ * and LRU lists.
*/
pi = malloc(sizeof(*pi));
if (pi == NULL)
@@ -803,9 +1030,14 @@ pmcstat_image_from_path(pmcstat_interned_string internedpath,
pi->pi_type = PMCSTAT_IMAGE_UNKNOWN;
pi->pi_execpath = internedpath;
pi->pi_start = ~0;
- pi->pi_entry = ~0;
pi->pi_end = 0;
+ pi->pi_entry = 0;
+ pi->pi_vaddr = 0;
+ pi->pi_isdynamic = 0;
pi->pi_iskernelmodule = iskernelmodule;
+ pi->pi_dynlinkerpath = NULL;
+ pi->pi_symbols = NULL;
+ pi->pi_symcount = 0;
/*
* Look for a suitable name for the sample files associated
@@ -836,12 +1068,13 @@ pmcstat_image_from_path(pmcstat_interned_string internedpath,
count = 0;
do {
if (++count > 999)
- errx(EX_CANTCREAT, "ERROR: cannot create a gmon "
- "file for \"%s\"", name);
+ errx(EX_CANTCREAT, "ERROR: cannot create a "
+ "gmon file for \"%s\"", name);
snprintf(name, sizeof(name), "%.*s~%3.3d.gmon",
nlen, sn, count);
if (pmcstat_string_lookup(name) == NULL) {
- pi->pi_samplename = pmcstat_string_intern(name);
+ pi->pi_samplename =
+ pmcstat_string_intern(name);
count = 0;
}
} while (count > 0);
@@ -851,7 +1084,6 @@ pmcstat_image_from_path(pmcstat_interned_string internedpath,
LIST_INIT(&pi->pi_gmlist);
LIST_INSERT_HEAD(&pmcstat_image_hash[hash], pi, pi_next);
- TAILQ_INSERT_HEAD(&pmcstat_image_lru, pi, pi_lru);
return (pi);
}
@@ -893,11 +1125,7 @@ pmcstat_image_increment_bucket(struct pmcstat_pcmap *map, uintfptr_t pc,
* Find the gmon file corresponding to 'pmcid', creating it if
* needed.
*/
- LIST_FOREACH(pgf, &image->pi_gmlist, pgf_next)
- if (pgf->pgf_pmcid == pmcid)
- break;
-
- /* If we don't have a gmon.out file for this PMCid, create one */
+ pgf = pmcstat_image_find_gmonfile(image, pmcid);
if (pgf == NULL) {
if ((pgf = calloc(1, sizeof(*pgf))) == NULL)
err(EX_OSERR, "ERROR:");
@@ -912,6 +1140,7 @@ pmcstat_image_increment_bucket(struct pmcstat_pcmap *map, uintfptr_t pc,
pgf->pgf_ndatabytes = sizeof(struct gmonhdr) +
pgf->pgf_nbuckets * sizeof(HISTCOUNTER);
pgf->pgf_nsamples = 0;
+ pgf->pgf_file = NULL;
pmcstat_gmon_create_file(pgf, image);
@@ -1012,12 +1241,11 @@ pmcstat_image_unmap(struct pmcstat_process *pp, uintfptr_t start,
* - we could have either 'start' or 'end' falling in the
* middle of a pcmap; in this case shorten the entry.
*/
-
TAILQ_FOREACH_SAFE(pcm, &pp->pp_map, ppm_next, pcmtmp) {
assert(pcm->ppm_lowpc < pcm->ppm_highpc);
if (pcm->ppm_highpc <= start)
continue;
- if (pcm->ppm_lowpc > end)
+ if (pcm->ppm_lowpc >= end)
return;
if (pcm->ppm_lowpc >= start && pcm->ppm_highpc <= end) {
/*
@@ -1046,10 +1274,10 @@ pmcstat_image_unmap(struct pmcstat_process *pp, uintfptr_t start,
TAILQ_INSERT_AFTER(&pp->pp_map, pcm, pcmnew, ppm_next);
return;
- } else if (pcm->ppm_lowpc < start)
- pcm->ppm_lowpc = start;
- else if (pcm->ppm_highpc > end)
- pcm->ppm_highpc = end;
+ } else if (pcm->ppm_lowpc < start && pcm->ppm_highpc <= end)
+ pcm->ppm_highpc = start;
+ else if (pcm->ppm_lowpc >= start && pcm->ppm_highpc > end)
+ pcm->ppm_lowpc = end;
else
assert(0);
}
@@ -1067,12 +1295,17 @@ pmcstat_pmcid_add(pmc_id_t pmcid, pmcstat_interned_string ps,
struct stat st;
char fullpath[PATH_MAX];
+ /* Replace an existing name for the PMC. */
LIST_FOREACH(pr, &pmcstat_pmcs, pr_next)
if (pr->pr_pmcid == pmcid) {
pr->pr_pmcname = ps;
return;
}
+ /*
+ * Otherwise, allocate a new descriptor and create the
+ * appropriate directory to hold gmon.out files.
+ */
if ((pr = malloc(sizeof(*pr))) == NULL)
err(EX_OSERR, "ERROR: Cannot allocate pmc record");
@@ -1154,11 +1387,11 @@ pmcstat_process_elf_exec(struct pmcstat_process *pp,
pmcstat_image_link(pp, image, image->pi_vaddr);
/*
- * For dynamically linked executables we need to:
- * (a) find where the dynamic linker was mapped to for this
- * process,
- * (b) find all the executable objects that the dynamic linker
- * brought in.
+ * For dynamically linked executables we need to determine
+ * where the dynamic linker was mapped to for this process,
+ * Subsequent executable objects that are mapped in by the
+ * dynamic linker will be tracked by log events of type
+ * PMCLOG_TYPE_MAP_IN.
*/
if (image->pi_isdynamic) {
@@ -1319,11 +1552,423 @@ pmcstat_process_find_map(struct pmcstat_process *p, uintfptr_t pc)
return (NULL);
}
+static struct pmcstat_cgnode *
+pmcstat_cgnode_allocate(struct pmcstat_image *image, uintfptr_t pc)
+{
+ struct pmcstat_cgnode *cg;
+
+ if ((cg = malloc(sizeof(*cg))) == NULL)
+ err(EX_OSERR, "ERROR: Cannot allocate callgraph node");
+
+ cg->pcg_image = image;
+ cg->pcg_func = pc;
+
+ cg->pcg_count = 0;
+ cg->pcg_nchildren = 0;
+ LIST_INIT(&cg->pcg_children);
+
+ return (cg);
+}
+
+/*
+ * Free a node and its children.
+ */
+static void
+pmcstat_cgnode_free(struct pmcstat_cgnode *cg)
+{
+ struct pmcstat_cgnode *cgc, *cgtmp;
+
+ LIST_FOREACH_SAFE(cgc, &cg->pcg_children, pcg_sibling, cgtmp)
+ pmcstat_cgnode_free(cgc);
+ free(cg);
+}
+
+/*
+ * Look for a callgraph node associated with pmc `pmcid' in the global
+ * hash table that corresponds to the given `pc' value in the process
+ * `pp'.
+ */
+static struct pmcstat_cgnode *
+pmcstat_cgnode_hash_lookup_pc(struct pmcstat_process *pp, uint32_t pmcid,
+ uintfptr_t pc, int usermode)
+{
+ struct pmcstat_pcmap *ppm;
+ struct pmcstat_symbol *sym;
+ struct pmcstat_image *image;
+ struct pmcstat_cgnode *cg;
+ struct pmcstat_cgnode_hash *h;
+ uintfptr_t loadaddress;
+ unsigned int i, hash;
+
+ ppm = pmcstat_process_find_map(usermode ? pp : pmcstat_kernproc, pc);
+ if (ppm == NULL)
+ return (NULL);
+
+ image = ppm->ppm_image;
+
+ loadaddress = ppm->ppm_lowpc + image->pi_vaddr - image->pi_start;
+ pc -= loadaddress; /* Convert to an offset in the image. */
+
+ /*
+ * Try determine the function at this offset. If we can't
+ * find a function round leave the `pc' value alone.
+ */
+ if ((sym = pmcstat_symbol_search(image, pc)) != NULL)
+ pc = sym->ps_start;
+
+ for (hash = i = 0; i < sizeof(uintfptr_t); i++)
+ hash += (pc >> i) & 0xFF;
+
+ hash &= PMCSTAT_HASH_MASK;
+
+ cg = NULL;
+ LIST_FOREACH(h, &pmcstat_cgnode_hash[hash], pch_next)
+ {
+ if (h->pch_pmcid != pmcid)
+ continue;
+
+ cg = h->pch_cgnode;
+
+ assert(cg != NULL);
+
+ if (cg->pcg_image == image && cg->pcg_func == pc)
+ return (cg);
+ }
+
+ /*
+ * We haven't seen this (pmcid, pc) tuple yet, so allocate a
+ * new callgraph node and a new hash table entry for it.
+ */
+ cg = pmcstat_cgnode_allocate(image, pc);
+ if ((h = malloc(sizeof(*h))) == NULL)
+ err(EX_OSERR, "ERROR: Could not allocate callgraph node");
+
+ h->pch_pmcid = pmcid;
+ h->pch_cgnode = cg;
+ LIST_INSERT_HEAD(&pmcstat_cgnode_hash[hash], h, pch_next);
+
+ pmcstat_cgnode_hash_count++;
+
+ return (cg);
+}
+
+/*
+ * Compare two callgraph nodes for sorting.
+ */
+static int
+pmcstat_cgnode_compare(const void *a, const void *b)
+{
+ const struct pmcstat_cgnode *const *pcg1, *const *pcg2, *cg1, *cg2;
+
+ pcg1 = (const struct pmcstat_cgnode *const *) a;
+ cg1 = *pcg1;
+ pcg2 = (const struct pmcstat_cgnode *const *) b;
+ cg2 = *pcg2;
+
+ /* Sort in reverse order */
+ if (cg1->pcg_count < cg2->pcg_count)
+ return (1);
+ if (cg1->pcg_count > cg2->pcg_count)
+ return (-1);
+ return (0);
+}
+
+/*
+ * Find (allocating if a needed) a callgraph node in the given
+ * parent with the same (image, pcoffset) pair.
+ */
+
+static struct pmcstat_cgnode *
+pmcstat_cgnode_find(struct pmcstat_cgnode *parent, struct pmcstat_image *image,
+ uintfptr_t pcoffset)
+{
+ struct pmcstat_cgnode *child;
+
+ LIST_FOREACH(child, &parent->pcg_children, pcg_sibling) {
+ if (child->pcg_image == image &&
+ child->pcg_func == pcoffset)
+ return (child);
+ }
+
+ /*
+ * Allocate a new structure.
+ */
+
+ child = pmcstat_cgnode_allocate(image, pcoffset);
+
+ /*
+ * Link it into the parent.
+ */
+ LIST_INSERT_HEAD(&parent->pcg_children, child, pcg_sibling);
+ parent->pcg_nchildren++;
+
+ return (child);
+}
+
+/*
+ * Print one callgraph node. The output format is:
+ *
+ * indentation %(parent's samples) #nsamples function@object
+ */
+static void
+pmcstat_cgnode_print(struct pmcstat_args *a, struct pmcstat_cgnode *cg,
+ int depth, uint32_t total)
+{
+ uint32_t n;
+ const char *space;
+ struct pmcstat_symbol *sym;
+ struct pmcstat_cgnode **sortbuffer, **cgn, *pcg;
+
+ space = " ";
+
+ if (depth > 0)
+ (void) fprintf(a->pa_graphfile, "%*s", depth, space);
+
+ if (cg->pcg_count == total)
+ (void) fprintf(a->pa_graphfile, "100.0%% ");
+ else
+ (void) fprintf(a->pa_graphfile, "%05.2f%% ",
+ 100.0 * cg->pcg_count / total);
+
+ n = fprintf(a->pa_graphfile, " [%u] ", cg->pcg_count);
+
+ /* #samples is a 12 character wide field. */
+ if (n < 12)
+ (void) fprintf(a->pa_graphfile, "%*s", 12 - n, space);
+
+ if (depth > 0)
+ (void) fprintf(a->pa_graphfile, "%*s", depth, space);
+
+ sym = pmcstat_symbol_search(cg->pcg_image, cg->pcg_func);
+ if (sym)
+ (void) fprintf(a->pa_graphfile, "%s",
+ pmcstat_string_unintern(sym->ps_name));
+ else
+ (void) fprintf(a->pa_graphfile, "%p",
+ (void *) (cg->pcg_image->pi_vaddr + cg->pcg_func));
+
+ if (pmcstat_previous_filename_printed !=
+ cg->pcg_image->pi_fullpath) {
+ pmcstat_previous_filename_printed = cg->pcg_image->pi_fullpath;
+ (void) fprintf(a->pa_graphfile, " @ %s\n",
+ pmcstat_string_unintern(
+ pmcstat_previous_filename_printed));
+ } else
+ (void) fprintf(a->pa_graphfile, "\n");
+
+ if (cg->pcg_nchildren == 0)
+ return;
+
+ if ((sortbuffer = (struct pmcstat_cgnode **)
+ malloc(sizeof(struct pmcstat_cgnode *) *
+ cg->pcg_nchildren)) == NULL)
+ err(EX_OSERR, "ERROR: Cannot print callgraph");
+ cgn = sortbuffer;
+
+ LIST_FOREACH(pcg, &cg->pcg_children, pcg_sibling)
+ *cgn++ = pcg;
+
+ assert(cgn - sortbuffer == (int) cg->pcg_nchildren);
+
+ qsort(sortbuffer, cg->pcg_nchildren, sizeof(struct pmcstat_cgnode *),
+ pmcstat_cgnode_compare);
+
+ for (cgn = sortbuffer, n = 0; n < cg->pcg_nchildren; n++, cgn++)
+ pmcstat_cgnode_print(a, *cgn, depth+1, cg->pcg_count);
+
+ free(sortbuffer);
+}
+
+/*
+ * Record a callchain.
+ */
+
+static void
+pmcstat_record_callchain(struct pmcstat_process *pp, uint32_t pmcid,
+ uint32_t nsamples, uintfptr_t *cc, int usermode, struct pmcstat_args *a)
+{
+ uintfptr_t pc, loadaddress;
+ uint32_t n;
+ struct pmcstat_image *image;
+ struct pmcstat_pcmap *ppm;
+ struct pmcstat_symbol *sym;
+ struct pmcstat_cgnode *parent, *child;
+
+ /*
+ * Find the callgraph node recorded in the global hash table
+ * for this (pmcid, pc).
+ */
+
+ pc = cc[0];
+ parent = pmcstat_cgnode_hash_lookup_pc(pp, pmcid, pc, usermode);
+ if (parent == NULL) {
+ pmcstat_stats.ps_callchain_dubious_frames++;
+ return;
+ }
+
+ parent->pcg_count++;
+
+ /*
+ * For each return address in the call chain record, subject
+ * to the maximum depth desired.
+ * - Find the image associated with the sample. Stop if there
+ * there is no valid image at that address.
+ * - Find the function that overlaps the return address.
+ * - If found: use the start address of the function.
+ * If not found (say an object's symbol table is not present or
+ * is incomplete), round down to th gprof bucket granularity.
+ * - Convert return virtual address to an offset in the image.
+ * - Look for a child with the same {offset,image} tuple,
+ * inserting one if needed.
+ * - Increment the count of occurrences of the child.
+ */
+
+ for (n = 1; n < (uint32_t) a->pa_graphdepth && n < nsamples; n++,
+ parent = child) {
+ pc = cc[n];
+
+ ppm = pmcstat_process_find_map(usermode ? pp :
+ pmcstat_kernproc, pc);
+ if (ppm == NULL)
+ return;
+
+ image = ppm->ppm_image;
+ loadaddress = ppm->ppm_lowpc + image->pi_vaddr -
+ image->pi_start;
+ pc -= loadaddress;
+
+ if ((sym = pmcstat_symbol_search(image, pc)) != NULL)
+ pc = sym->ps_start;
+
+ child = pmcstat_cgnode_find(parent, image, pc);
+ child->pcg_count++;
+ }
+}
+
+/*
+ * Printing a callgraph for a PMC.
+ */
+static void
+pmcstat_callgraph_print_for_pmcid(struct pmcstat_args *a,
+ struct pmcstat_pmcrecord *pmcr)
+{
+ int n, nentries;
+ uint32_t nsamples, pmcid;
+ struct pmcstat_cgnode **sortbuffer, **cgn;
+ struct pmcstat_cgnode_hash *pch;
+
+ /*
+ * We pull out all callgraph nodes in the top-level hash table
+ * with a matching PMC id. We then sort these based on the
+ * frequency of occurrence. Each callgraph node is then
+ * printed.
+ */
+
+ nsamples = 0;
+ pmcid = pmcr->pr_pmcid;
+ if ((sortbuffer = (struct pmcstat_cgnode **)
+ malloc(sizeof(struct pmcstat_cgnode *) *
+ pmcstat_cgnode_hash_count)) == NULL)
+ err(EX_OSERR, "ERROR: Cannot sort callgraph");
+ cgn = sortbuffer;
+
+ memset(sortbuffer, 0xFF, pmcstat_cgnode_hash_count *
+ sizeof(struct pmcstat_cgnode **));
+
+ for (n = 0; n < PMCSTAT_NHASH; n++)
+ LIST_FOREACH(pch, &pmcstat_cgnode_hash[n], pch_next)
+ if (pch->pch_pmcid == pmcid) {
+ nsamples += pch->pch_cgnode->pcg_count;
+ *cgn++ = pch->pch_cgnode;
+ }
+
+ nentries = cgn - sortbuffer;
+ assert(nentries <= pmcstat_cgnode_hash_count);
+
+ if (nentries == 0)
+ return;
+
+ qsort(sortbuffer, nentries, sizeof(struct pmcstat_cgnode *),
+ pmcstat_cgnode_compare);
+ (void) fprintf(a->pa_graphfile,
+ "@ %s [%u samples]\n\n",
+ pmcstat_string_unintern(pmcr->pr_pmcname),
+ nsamples);
+
+ for (cgn = sortbuffer, n = 0; n < nentries; n++, cgn++) {
+ pmcstat_previous_filename_printed = NULL;
+ pmcstat_cgnode_print(a, *cgn, 0, nsamples);
+ (void) fprintf(a->pa_graphfile, "\n");
+ }
+
+ free(sortbuffer);
+}
+
+/*
+ * Print out callgraphs.
+ */
+
+static void
+pmcstat_callgraph_print(struct pmcstat_args *a)
+{
+ struct pmcstat_pmcrecord *pmcr;
+
+ LIST_FOREACH(pmcr, &pmcstat_pmcs, pr_next)
+ pmcstat_callgraph_print_for_pmcid(a, pmcr);
+}
+static void
+pmcstat_cgnode_do_gmon_arcs(struct pmcstat_cgnode *cg, pmc_id_t pmcid)
+{
+ struct pmcstat_cgnode *cgc;
+
+ /*
+ * Look for child nodes that belong to the same image.
+ */
+
+ LIST_FOREACH(cgc, &cg->pcg_children, pcg_sibling) {
+ if (cgc->pcg_image == cg->pcg_image)
+ pmcstat_gmon_append_arc(cg->pcg_image, pmcid,
+ cgc->pcg_func, cg->pcg_func, cgc->pcg_count);
+ if (cgc->pcg_nchildren > 0)
+ pmcstat_cgnode_do_gmon_arcs(cgc, pmcid);
+ }
+}
+
+static void
+pmcstat_callgraph_do_gmon_arcs_for_pmcid(pmc_id_t pmcid)
+{
+ int n;
+ struct pmcstat_cgnode_hash *pch;
+
+ for (n = 0; n < PMCSTAT_NHASH; n++)
+ LIST_FOREACH(pch, &pmcstat_cgnode_hash[n], pch_next)
+ if (pch->pch_pmcid == pmcid &&
+ pch->pch_cgnode->pcg_nchildren > 1)
+ pmcstat_cgnode_do_gmon_arcs(pch->pch_cgnode,
+ pmcid);
+}
+
+
+static void
+pmcstat_callgraph_do_gmon_arcs(void)
+{
+ struct pmcstat_pmcrecord *pmcr;
+
+ LIST_FOREACH(pmcr, &pmcstat_pmcs, pr_next)
+ pmcstat_callgraph_do_gmon_arcs_for_pmcid(pmcr->pr_pmcid);
+}
+
+/*
+ * Convert a hwpmc(4) log to profile information. A system-wide
+ * callgraph is generated if FLAG_DO_CALLGRAPHS is set. gmon.out
+ * files usable by gprof(1) are created if FLAG_DO_GPROF is set.
+ */
static int
-pmcstat_convert_log(struct pmcstat_args *a)
+pmcstat_analyze_log(struct pmcstat_args *a)
{
+ uint32_t cpu, cpuflags;
uintfptr_t pc;
pid_t pid;
struct pmcstat_image *image;
@@ -1332,6 +1977,11 @@ pmcstat_convert_log(struct pmcstat_args *a)
struct pmclog_ev ev;
pmcstat_interned_string image_path;
+ assert(a->pa_flags & FLAG_DO_ANALYSIS);
+
+ if (elf_version(EV_CURRENT) == EV_NONE)
+ err(EX_UNAVAILABLE, "Elf library intialization failed");
+
while (pmclog_read(a->pa_logparser, &ev) == 0) {
assert(ev.pl_state == PMCLOG_OK);
@@ -1344,6 +1994,7 @@ pmcstat_convert_log(struct pmcstat_args *a)
ev.pl_u.pl_i.pl_version,
PMC_VERSION_MAJOR);
break;
+
case PMCLOG_TYPE_MAP_IN:
/*
* Introduce an address range mapping for a
@@ -1391,6 +2042,10 @@ pmcstat_convert_log(struct pmcstat_args *a)
break;
case PMCLOG_TYPE_PCSAMPLE:
+ /*
+ * Note: the `PCSAMPLE' log entry is not
+ * generated by hpwmc(4) after version 2.
+ */
/*
* We bring in the gmon file for the image
@@ -1415,6 +2070,42 @@ pmcstat_convert_log(struct pmcstat_args *a)
break;
+ case PMCLOG_TYPE_CALLCHAIN:
+ pmcstat_stats.ps_samples_total++;
+
+ cpuflags = ev.pl_u.pl_cc.pl_cpuflags;
+ cpu = PMC_CALLCHAIN_CPUFLAGS_TO_CPU(cpuflags);
+
+ /* Filter on the CPU id. */
+ if ((a->pa_cpumask & (1 << cpu)) == 0) {
+ pmcstat_stats.ps_samples_skipped++;
+ break;
+ }
+
+ pp = pmcstat_process_lookup(ev.pl_u.pl_cc.pl_pid,
+ PMCSTAT_ALLOCATE);
+
+ pmcstat_record_callchain(pp,
+ ev.pl_u.pl_cc.pl_pmcid, ev.pl_u.pl_cc.pl_npc,
+ ev.pl_u.pl_cc.pl_pc,
+ PMC_CALLCHAIN_CPUFLAGS_TO_USERMODE(cpuflags), a);
+
+ if ((a->pa_flags & FLAG_DO_GPROF) == 0)
+ break;
+
+ pc = ev.pl_u.pl_cc.pl_pc[0];
+ if ((ppm = pmcstat_process_find_map(pp, pc)) == NULL &&
+ (ppm = pmcstat_process_find_map(pmcstat_kernproc,
+ pc)) == NULL) { /* unknown offset */
+ pmcstat_stats.ps_samples_unknown_offset++;
+ break;
+ }
+
+ pmcstat_image_increment_bucket(ppm, pc,
+ ev.pl_u.pl_cc.pl_pmcid, a);
+
+ break;
+
case PMCLOG_TYPE_PMCALLOCATE:
/*
* Record the association pmc id between this
@@ -1515,10 +2206,23 @@ static int
pmcstat_print_log(struct pmcstat_args *a)
{
struct pmclog_ev ev;
+ uint32_t npc;
while (pmclog_read(a->pa_logparser, &ev) == 0) {
assert(ev.pl_state == PMCLOG_OK);
switch (ev.pl_type) {
+ case PMCLOG_TYPE_CALLCHAIN:
+ PMCSTAT_PRINT_ENTRY(a, "callchain",
+ "%d 0x%x %d %d %c", ev.pl_u.pl_cc.pl_pid,
+ ev.pl_u.pl_cc.pl_pmcid,
+ PMC_CALLCHAIN_CPUFLAGS_TO_CPU(ev.pl_u.pl_cc. \
+ pl_cpuflags), ev.pl_u.pl_cc.pl_npc,
+ PMC_CALLCHAIN_CPUFLAGS_TO_USERMODE(ev.pl_u.pl_cc.\
+ pl_cpuflags) ? 'u' : 's');
+ for (npc = 0; npc < ev.pl_u.pl_cc.pl_npc; npc++)
+ PMCSTAT_PRINT_ENTRY(a, "...", "%p",
+ (void *) ev.pl_u.pl_cc.pl_pc[npc]);
+ break;
case PMCLOG_TYPE_CLOSELOG:
PMCSTAT_PRINT_ENTRY(a,"closelog",);
break;
@@ -1663,7 +2367,7 @@ pmcstat_open_log(const char *path, int mode)
/*
* If 'path' is "-" then open one of stdin or stdout depending
* on the value of 'mode'.
- *
+ *
* If 'path' contains a ':' and does not start with a '/' or '.',
* and is being opened for writing, treat it as a "host:port"
* specification and open a network socket.
@@ -1717,7 +2421,7 @@ pmcstat_open_log(const char *path, int mode)
S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH)) < 0)
errstr = strerror(errno);
- done:
+ done:
if (errstr)
errx(EX_OSERR, "ERROR: Cannot open \"%s\" for %s: %s.", path,
(mode == PMCSTAT_OPEN_FOR_READ ? "reading" : "writing"),
@@ -1735,14 +2439,13 @@ pmcstat_process_log(struct pmcstat_args *a)
{
/*
- * If gprof style profiles haven't been asked for, just print the
- * log to the current output file.
+ * If analysis has not been asked for, just print the log to
+ * the current output file.
*/
if (a->pa_flags & FLAG_DO_PRINT)
return (pmcstat_print_log(a));
else
- /* convert the log to gprof compatible profiles */
- return (pmcstat_convert_log(a));
+ return (pmcstat_analyze_log(a));
}
/*
@@ -1789,6 +2492,7 @@ pmcstat_shutdown_logging(struct pmcstat_args *a)
struct pmcstat_gmonfile *pgf, *pgftmp;
struct pmcstat_image *pi, *pitmp;
struct pmcstat_process *pp, *pptmp;
+ struct pmcstat_cgnode_hash *pch, *pchtmp;
/* determine where to send the map file */
mf = NULL;
@@ -1803,37 +2507,75 @@ pmcstat_shutdown_logging(struct pmcstat_args *a)
if (mf)
(void) fprintf(mf, "MAP:\n");
- for (i = 0; i < PMCSTAT_NHASH; i++) {
- LIST_FOREACH_SAFE(pi, &pmcstat_image_hash[i], pi_next, pitmp) {
+ if (a->pa_flags & FLAG_DO_CALLGRAPHS)
+ pmcstat_callgraph_print(a);
+
+ /*
+ * Sync back all gprof flat profile data.
+ */
+ for (i = 0; i < PMCSTAT_NHASH; i++) {
+ LIST_FOREACH(pi, &pmcstat_image_hash[i], pi_next) {
if (mf)
(void) fprintf(mf, " \"%s\" => \"%s\"",
pmcstat_string_unintern(pi->pi_execpath),
- pmcstat_string_unintern(pi->pi_samplename));
+ pmcstat_string_unintern(
+ pi->pi_samplename));
/* flush gmon.out data to disk */
- LIST_FOREACH_SAFE(pgf, &pi->pi_gmlist, pgf_next,
- pgftmp) {
+ LIST_FOREACH(pgf, &pi->pi_gmlist, pgf_next) {
pmcstat_gmon_unmap_file(pgf);
- LIST_REMOVE(pgf, pgf_next);
if (mf)
(void) fprintf(mf, " %s/%d",
- pmcstat_pmcid_to_name(pgf->pgf_pmcid),
+ pmcstat_pmcid_to_name(
+ pgf->pgf_pmcid),
pgf->pgf_nsamples);
if (pgf->pgf_overflow && a->pa_verbosity >= 1)
warnx("WARNING: profile \"%s\" "
"overflowed.",
pmcstat_string_unintern(
pgf->pgf_name));
- free(pgf);
}
if (mf)
(void) fprintf(mf, "\n");
+ }
+ }
+
+ /*
+ * Compute arcs and add these to the gprof files.
+ */
+ if (a->pa_flags & FLAG_DO_GPROF && a->pa_graphdepth > 1)
+ pmcstat_callgraph_do_gmon_arcs();
+
+ /*
+ * Free memory.
+ */
+ for (i = 0; i < PMCSTAT_NHASH; i++) {
+ LIST_FOREACH_SAFE(pch, &pmcstat_cgnode_hash[i], pch_next,
+ pchtmp) {
+ pmcstat_cgnode_free(pch->pch_cgnode);
+ free(pch);
+ }
+ }
+
+ for (i = 0; i < PMCSTAT_NHASH; i++) {
+ LIST_FOREACH_SAFE(pi, &pmcstat_image_hash[i], pi_next, pitmp)
+ {
+ LIST_FOREACH_SAFE(pgf, &pi->pi_gmlist, pgf_next,
+ pgftmp) {
+ if (pgf->pgf_file)
+ (void) fclose(pgf->pgf_file);
+ LIST_REMOVE(pgf, pgf_next);
+ free(pgf);
+ }
+ if (pi->pi_symbols)
+ free(pi->pi_symbols);
LIST_REMOVE(pi, pi_next);
free(pi);
}
+
LIST_FOREACH_SAFE(pp, &pmcstat_process_hash[i], pp_next,
pptmp) {
LIST_REMOVE(pp, pp_next);
@@ -1862,6 +2604,8 @@ pmcstat_shutdown_logging(struct pmcstat_args *a)
PRINT("#samples/total", samples_total, a);
PRINT("#samples/unclaimed", samples_unknown_offset, a);
PRINT("#samples/unknown-object", samples_indeterminable, a);
+ PRINT("#callchain/dubious-frames", callchain_dubious_frames,
+ a);
}
if (mf)
OpenPOWER on IntegriCloud