Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 86a9eee0 authored by Arnaldo Carvalho de Melo's avatar Arnaldo Carvalho de Melo Committed by Ingo Molnar
Browse files

perf diff: Introduce tool to show performance difference



I guess it is enough to show some examples:

[root@doppio linux-2.6-tip]# rm -f perf.data*
[root@doppio linux-2.6-tip]# ls -la perf.data*
ls: cannot access perf.data*: No such file or directory
[root@doppio linux-2.6-tip]# perf record -f find / > /dev/null
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.062 MB perf.data (~2699 samples) ]
[root@doppio linux-2.6-tip]# ls -la perf.data*
-rw------- 1 root root 74440 2009-12-14 20:03 perf.data
[root@doppio linux-2.6-tip]# perf record -f find / > /dev/null
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.062 MB perf.data (~2692 samples) ]
[root@doppio linux-2.6-tip]# ls -la perf.data*
-rw------- 1 root root 74280 2009-12-14 20:03 perf.data
-rw------- 1 root root 74440 2009-12-14 20:03 perf.data.old
[root@doppio linux-2.6-tip]# perf diff | head -5
   1        -34994580     /lib64/libc-2.10.1.so   _IO_vfprintf_internal
   2        -15307806         [kernel.kallsyms]   __kmalloc
   3    +1   +3665941     /lib64/libc-2.10.1.so   __GI_memmove
   4    +4  +23508995     /lib64/libc-2.10.1.so   _int_malloc
   5    +7  +38538813         [kernel.kallsyms]   __d_lookup
[root@doppio linux-2.6-tip]# perf diff -p | head -5
   1        +1.00%     /lib64/libc-2.10.1.so   _IO_vfprintf_internal
   2                       [kernel.kallsyms]   __kmalloc
   3    +1             /lib64/libc-2.10.1.so   __GI_memmove
   4    +4             /lib64/libc-2.10.1.so   _int_malloc
   5    +7  -1.00%         [kernel.kallsyms]   __d_lookup
[root@doppio linux-2.6-tip]# perf diff -v | head -5
   1        361449551 326454971 -34994580     /lib64/libc-2.10.1.so   _IO_vfprintf_internal
   2        151009241 135701435 -15307806         [kernel.kallsyms]   __kmalloc
   3    +1  101805328 105471269  +3665941     /lib64/libc-2.10.1.so   __GI_memmove
   4    +4   78041440 101550435 +23508995     /lib64/libc-2.10.1.so   _int_malloc
   5    +7   59536172  98074985 +38538813         [kernel.kallsyms]   __d_lookup
[root@doppio linux-2.6-tip]# perf diff -vp | head -5
   1        9.00% 8.00% +1.00%     /lib64/libc-2.10.1.so   _IO_vfprintf_internal
   2        3.00% 3.00%                [kernel.kallsyms]   __kmalloc
   3    +1  2.00% 2.00%            /lib64/libc-2.10.1.so   __GI_memmove
   4    +4  2.00% 2.00%            /lib64/libc-2.10.1.so   _int_malloc
   5    +7  1.00% 2.00% -1.00%         [kernel.kallsyms]   __d_lookup
[root@doppio linux-2.6-tip]#

This should be enough for diffs where the system is non
volatile, i.e. when one doesn't updates binaries.

For volatile environments, stay tuned for the next perf tool
feature: a buildid cache populated by 'perf record', managed by
'perf buildid-cache' a-la ccache, and used by all the report
tools.

Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Stephen Hemminger <shemminger@vyatta.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
LKML-Reference: <1260828571-3613-3-git-send-email-acme@infradead.org>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent b38d3464
Loading
Loading
Loading
Loading
+31 −0
Original line number Diff line number Diff line
perf-diff(1)
==============

NAME
----
perf-diff - Read perf.data (created by perf record) and display the profile

SYNOPSIS
--------
[verse]
'perf diff' [oldfile] [newfile]

DESCRIPTION
-----------
This command displays the performance difference among two perf.data files
captured via perf record.

If no parameters are passed it will assume perf.data.old and perf.data.

OPTIONS
-------
-p::
--percentage::
	Show percentages instead of raw counters
-v::
--verbose::
	Be verbose, for instance, show the raw counters in addition to the
	diff.
SEE ALSO
--------
linkperf:perf-record[1]
+1 −0
Original line number Diff line number Diff line
@@ -427,6 +427,7 @@ BUILTIN_OBJS += bench/sched-messaging.o
BUILTIN_OBJS += bench/sched-pipe.o
BUILTIN_OBJS += bench/mem-memcpy.o

BUILTIN_OBJS += builtin-diff.o
BUILTIN_OBJS += builtin-help.o
BUILTIN_OBJS += builtin-sched.o
BUILTIN_OBJS += builtin-buildid-list.o
+288 −0
Original line number Diff line number Diff line
/*
 * builtin-diff.c
 *
 * Builtin diff command: Analyze two perf.data input files, look up and read
 * DSOs and symbol information, sort them and produce a diff.
 */
#include "builtin.h"

#include "util/debug.h"
#include "util/event.h"
#include "util/hist.h"
#include "util/session.h"
#include "util/sort.h"
#include "util/symbol.h"
#include "util/util.h"

#include <stdlib.h>

static char	   const *input_old = "perf.data.old",
			 *input_new = "perf.data";
static int	   force;
static bool 	   show_percent;

struct symbol_conf symbol_conf;

static int perf_session__add_hist_entry(struct perf_session *self,
					struct addr_location *al, u64 count)
{
	bool hit;
	struct hist_entry *he = __perf_session__add_hist_entry(self, al, NULL,
							       count, &hit);
	if (he == NULL)
		return -ENOMEM;

	if (hit)
		he->count += count;

	return 0;
}

static int diff__process_sample_event(event_t *event, struct perf_session *session)
{
	struct addr_location al;
	struct sample_data data = { .period = 1, };

	dump_printf("(IP, %d): %d: %p\n", event->header.misc,
		    event->ip.pid, (void *)(long)event->ip.ip);

	if (event__preprocess_sample(event, session, &al, NULL) < 0) {
		pr_warning("problem processing %d event, skipping it.\n",
			   event->header.type);
		return -1;
	}

	event__parse_sample(event, session->sample_type, &data);

	if (al.sym && perf_session__add_hist_entry(session, &al, data.period)) {
		pr_warning("problem incrementing symbol count, skipping event\n");
		return -1;
	}

	session->events_stats.total += data.period;
	return 0;
}

static struct perf_event_ops event_ops = {
	.process_sample_event = diff__process_sample_event,
	.process_mmap_event   = event__process_mmap,
	.process_comm_event   = event__process_comm,
	.process_exit_event   = event__process_task,
	.process_fork_event   = event__process_task,
	.process_lost_event   = event__process_lost,
};

static void perf_session__insert_hist_entry_by_name(struct rb_root *root,
						    struct hist_entry *he)
{
	struct rb_node **p = &root->rb_node;
	struct rb_node *parent = NULL;
	struct hist_entry *iter;

	while (*p != NULL) {
		int cmp;
		parent = *p;
		iter = rb_entry(parent, struct hist_entry, rb_node);

		cmp = strcmp(he->map->dso->name, iter->map->dso->name);
		if (cmp > 0)
			p = &(*p)->rb_left;
		else if (cmp < 0)
			p = &(*p)->rb_right;
		else {
			cmp = strcmp(he->sym->name, iter->sym->name);
			if (cmp > 0)
				p = &(*p)->rb_left;
			else
				p = &(*p)->rb_right;
		}
	}

	rb_link_node(&he->rb_node, parent, p);
	rb_insert_color(&he->rb_node, root);
}

static void perf_session__resort_by_name(struct perf_session *self)
{
	unsigned long position = 1;
	struct rb_root tmp = RB_ROOT;
	struct rb_node *next = rb_first(&self->hists);

	while (next != NULL) {
		struct hist_entry *n = rb_entry(next, struct hist_entry, rb_node);

		next = rb_next(&n->rb_node);
		rb_erase(&n->rb_node, &self->hists);
		n->position = position++;
		perf_session__insert_hist_entry_by_name(&tmp, n);
	}

	self->hists = tmp;
}

static struct hist_entry *
perf_session__find_hist_entry_by_name(struct perf_session *self,
				      struct hist_entry *he)
{
	struct rb_node *n = self->hists.rb_node;

	while (n) {
		struct hist_entry *iter = rb_entry(n, struct hist_entry, rb_node);
		int cmp = strcmp(he->map->dso->name, iter->map->dso->name);

		if (cmp > 0)
			n = n->rb_left;
		else if (cmp < 0)
			n = n->rb_right;
		else {
			cmp = strcmp(he->sym->name, iter->sym->name);
			if (cmp > 0)
				n = n->rb_left;
			else if (cmp < 0)
				n = n->rb_right;
			else
				return iter;
		}
	}

	return NULL;
}

static void perf_session__match_hists(struct perf_session *old_session,
				      struct perf_session *new_session)
{
	struct rb_node *nd;

	perf_session__resort_by_name(old_session);

	for (nd = rb_first(&new_session->hists); nd; nd = rb_next(nd)) {
		struct hist_entry *pos = rb_entry(nd, struct hist_entry, rb_node);
		pos->pair = perf_session__find_hist_entry_by_name(old_session, pos);
	}
}

static size_t hist_entry__fprintf_matched(struct hist_entry *self,
					  unsigned long pos,
					  struct perf_session *session,
					  struct perf_session *pair_session,
					  FILE *fp)
{
	u64 old_count = 0;
	char displacement[16];
	size_t printed;

	if (self->pair != NULL) {
		long pdiff = (long)self->pair->position - (long)pos;
		old_count = self->pair->count;
		if (pdiff == 0)
			goto blank;
		snprintf(displacement, sizeof(displacement), "%+4ld", pdiff);
	} else {
blank:		memset(displacement, ' ', sizeof(displacement));
	}

	printed = fprintf(fp, "%4lu %5.5s ", pos, displacement);

	if (show_percent) {
		double old_percent = (old_count * 100) / pair_session->events_stats.total,
		       new_percent = (self->count * 100) / session->events_stats.total;
		double diff = old_percent - new_percent;

		if (verbose)
			printed += fprintf(fp, " %3.2f%% %3.2f%%", old_percent, new_percent);

		if ((u64)diff != 0)
			printed += fprintf(fp, " %+4.2F%%", diff);
		else
			printed += fprintf(fp, "       ");
	} else {
		if (verbose)
			printed += fprintf(fp, " %9Lu %9Lu", old_count, self->count);
		printed += fprintf(fp, " %+9Ld", (s64)self->count - (s64)old_count);
	}

	return printed + fprintf(fp, " %25.25s   %s\n",
				 self->map->dso->name, self->sym->name);
}

static size_t perf_session__fprintf_matched_hists(struct perf_session *self,
						  struct perf_session *pair,
						  FILE *fp)
{
	struct rb_node *nd;
	size_t printed = 0;
	unsigned long pos = 1;

	for (nd = rb_first(&self->hists); nd; nd = rb_next(nd)) {
		struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node);
		printed += hist_entry__fprintf_matched(he, pos++, self, pair, fp);
	}

	return printed;
}

static int __cmd_diff(void)
{
	int ret, i;
	struct perf_session *session[2];

	session[0] = perf_session__new(input_old, O_RDONLY, force, &symbol_conf);
	session[1] = perf_session__new(input_new, O_RDONLY, force, &symbol_conf);
	if (session[0] == NULL || session[1] == NULL)
		return -ENOMEM;

	for (i = 0; i < 2; ++i) {
		ret = perf_session__process_events(session[i], &event_ops);
		if (ret)
			goto out_delete;
		perf_session__output_resort(session[i], session[i]->events_stats.total);
	}

	perf_session__match_hists(session[0], session[1]);
	perf_session__fprintf_matched_hists(session[1], session[0], stdout);
out_delete:
	for (i = 0; i < 2; ++i)
		perf_session__delete(session[i]);
	return ret;
}

static const char *const diff_usage[] = {
	"perf diff [<options>] [old_file] [new_file]",
};

static const struct option options[] = {
	OPT_BOOLEAN('v', "verbose", &verbose,
		    "be more verbose (show symbol address, etc)"),
	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
		    "dump raw trace in ASCII"),
	OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
	OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules,
		    "load module symbols - WARNING: use only with -k and LIVE kernel"),
	OPT_BOOLEAN('p', "percentages", &show_percent,
		    "Don't shorten the pathnames taking into account the cwd"),
	OPT_BOOLEAN('P', "full-paths", &event_ops.full_paths,
		    "Don't shorten the pathnames taking into account the cwd"),
	OPT_END()
};

int cmd_diff(int argc, const char **argv, const char *prefix __used)
{
	if (symbol__init(&symbol_conf) < 0)
		return -1;

	setup_sorting(diff_usage, options);

	argc = parse_options(argc, argv, options, diff_usage, 0);
	if (argc) {
		if (argc > 2)
			usage_with_options(diff_usage, options);
		if (argc == 2) {
			input_old = argv[0];
			input_new = argv[1];
		} else
			input_new = argv[0];
	}

	setup_pager();
	return __cmd_diff();
}
+1 −0
Original line number Diff line number Diff line
@@ -17,6 +17,7 @@ extern int check_pager_config(const char *cmd);
extern int cmd_annotate(int argc, const char **argv, const char *prefix);
extern int cmd_bench(int argc, const char **argv, const char *prefix);
extern int cmd_buildid_list(int argc, const char **argv, const char *prefix);
extern int cmd_diff(int argc, const char **argv, const char *prefix);
extern int cmd_help(int argc, const char **argv, const char *prefix);
extern int cmd_sched(int argc, const char **argv, const char *prefix);
extern int cmd_list(int argc, const char **argv, const char *prefix);
+1 −0
Original line number Diff line number Diff line
@@ -5,6 +5,7 @@
perf-annotate			mainporcelain common
perf-bench			mainporcelain common
perf-buildid-list		mainporcelain common
perf-diff			mainporcelain common
perf-list			mainporcelain common
perf-sched			mainporcelain common
perf-record			mainporcelain common
Loading