Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit cdce9d73 authored by Frederic Weisbecker's avatar Frederic Weisbecker Committed by Ingo Molnar
Browse files

perf sched: Add sched latency profiling



Add the -l --latency option that reports statistics about the
scheduler latencies.

For now, the latencies are measured in the following sequence
scope:

- task A is sleeping (D or S state)
- task B wakes up A
         ^
         |
         |

   latency timeframe

         |
         |
         v
- task A is scheduled in

Start by recording every scheduler events:

	perf record -e sched:*

and then fetch the results:

	perf sched -l

 Tasks                     count          total              avg            max

migration/0                  2             39849            19924           28826
ksoftirqd/0                  7            756383           108054          373014
migration/1                  5             45391             9078           10452
ksoftirqd/1                  2            399055           199527          359130
events/0                     8           4780110           597513         4500250
events/1                     9           6353057           705895         2986012
kblockd/0                   42          37805097           900121         5077684

The snapshot are in nanoseconds.

- Count: number of snapshots taken for the given task
- Total: total latencies in nanosec
- Avg  : average of latency between wake up and sched in
- Max  : max snapshot latency

Signed-off-by: default avatarFrederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent 419ab0d6
Loading
Loading
Loading
Loading
+285 −11
Original line number Diff line number Diff line
@@ -31,6 +31,7 @@ static struct perf_header *header;
static u64			sample_type;

static int			replay_mode;
static int			lat_mode;


/*
@@ -847,6 +848,269 @@ static struct trace_sched_handler replay_ops = {
	.fork_event = replay_fork_event,
};

#define TASK_STATE_TO_CHAR_STR "RSDTtZX"

enum thread_state {
	THREAD_SLEEPING,
	THREAD_WAKED_UP,
	THREAD_SCHED_IN,
	THREAD_IGNORE
};

struct lat_snapshot {
	struct list_head	list;
	enum thread_state	state;
	u64			wake_up_time;
	u64			sched_in_time;
};

struct thread_latency {
	struct list_head	snapshot_list;
	struct thread		*thread;
	struct rb_node		node;
};

static struct rb_root lat_snapshot_root;

static struct thread_latency *
thread_latency_search(struct rb_root *root, struct thread *thread)
{
	struct rb_node *node = root->rb_node;

	while (node) {
		struct thread_latency *lat;

		lat = container_of(node, struct thread_latency, node);
		if (thread->pid < lat->thread->pid)
			node = node->rb_left;
		else if (thread->pid > lat->thread->pid)
			node = node->rb_right;
		else {
			return lat;
		}
	}
	return NULL;
}

static void
__thread_latency_insert(struct rb_root *root, struct thread_latency *data)
{
	struct rb_node **new = &(root->rb_node), *parent = NULL;

	while (*new) {
		struct thread_latency *this;

		this = container_of(*new, struct thread_latency, node);
		parent = *new;
		if (data->thread->pid < this->thread->pid)
			new = &((*new)->rb_left);
		else if (data->thread->pid > this->thread->pid)
			new = &((*new)->rb_right);
		else
			die("Double thread insertion\n");
	}

	rb_link_node(&data->node, parent, new);
	rb_insert_color(&data->node, root);
}

static void thread_latency_insert(struct thread *thread)
{
	struct thread_latency *lat;
	lat = calloc(sizeof(*lat), 1);
	if (!lat)
		die("No memory");

	lat->thread = thread;
	INIT_LIST_HEAD(&lat->snapshot_list);
	__thread_latency_insert(&lat_snapshot_root, lat);
}

static void
latency_fork_event(struct trace_fork_event *fork_event __used,
		   struct event *event __used,
		   int cpu __used,
		   u64 timestamp __used,
		   struct thread *thread __used)
{
	/* should insert the newcomer */
}

static char sched_out_state(struct trace_switch_event *switch_event)
{
	const char *str = TASK_STATE_TO_CHAR_STR;

	return str[switch_event->prev_state];
}

static void
lat_sched_out(struct thread_latency *lat,
	     struct trace_switch_event *switch_event)
{
	struct lat_snapshot *snapshot;

	if (sched_out_state(switch_event) == 'R')
		return;

	snapshot = calloc(sizeof(*snapshot), 1);
	if (!snapshot)
		die("Non memory");

	list_add_tail(&snapshot->list, &lat->snapshot_list);
}

static void
lat_sched_in(struct thread_latency *lat, u64 timestamp)
{
	struct lat_snapshot *snapshot;

	if (list_empty(&lat->snapshot_list))
		return;

	snapshot = list_entry(lat->snapshot_list.prev, struct lat_snapshot,
			      list);

	if (snapshot->state != THREAD_WAKED_UP)
		return;

	if (timestamp < snapshot->wake_up_time) {
		snapshot->state = THREAD_IGNORE;
		return;
	}

	snapshot->state = THREAD_SCHED_IN;
	snapshot->sched_in_time = timestamp;
}


static void
latency_switch_event(struct trace_switch_event *switch_event,
		     struct event *event __used,
		     int cpu __used,
		     u64 timestamp,
		     struct thread *thread __used)
{
	struct thread_latency *out_lat, *in_lat;
	struct thread *sched_out, *sched_in;

	sched_out = threads__findnew(switch_event->prev_pid, &threads, &last_match);
	sched_in = threads__findnew(switch_event->next_pid, &threads, &last_match);

	in_lat = thread_latency_search(&lat_snapshot_root, sched_in);
	if (!in_lat) {
		thread_latency_insert(sched_in);
		in_lat = thread_latency_search(&lat_snapshot_root, sched_in);
		if (!in_lat)
			die("Internal latency tree error");
	}

	out_lat = thread_latency_search(&lat_snapshot_root, sched_out);
	if (!out_lat) {
		thread_latency_insert(sched_out);
		out_lat = thread_latency_search(&lat_snapshot_root, sched_out);
		if (!out_lat)
			die("Internal latency tree error");
	}

	lat_sched_in(in_lat, timestamp);
	lat_sched_out(out_lat, switch_event);
}

static void
latency_wakeup_event(struct trace_wakeup_event *wakeup_event,
		     struct event *event __used,
		     int cpu __used,
		     u64 timestamp,
		     struct thread *thread __used)
{
	struct thread_latency *lat;
	struct lat_snapshot *snapshot;
	struct thread *wakee;

	/* Note for later, it may be interesting to observe the failing cases */
	if (!wakeup_event->success)
		return;

	wakee = threads__findnew(wakeup_event->pid, &threads, &last_match);
	lat = thread_latency_search(&lat_snapshot_root, wakee);
	if (!lat) {
		thread_latency_insert(wakee);
		return;
	}

	if (list_empty(&lat->snapshot_list))
		return;

	snapshot = list_entry(lat->snapshot_list.prev, struct lat_snapshot,
			      list);

	if (snapshot->state != THREAD_SLEEPING)
		return;

	snapshot->state = THREAD_WAKED_UP;
	snapshot->wake_up_time = timestamp;
}

static struct trace_sched_handler lat_ops  = {
	.wakeup_event = latency_wakeup_event,
	.switch_event = latency_switch_event,
	.fork_event = latency_fork_event,
};

static void output_lat_thread(struct thread_latency *lat)
{
	struct lat_snapshot *shot;
	int count = 0;
	int i;
	int ret;
	u64 max = 0, avg;
	u64 total = 0, delta;

	list_for_each_entry(shot, &lat->snapshot_list, list) {
		if (shot->state != THREAD_SCHED_IN)
			continue;

		count++;

		delta = shot->sched_in_time - shot->wake_up_time;
		if (delta > max)
			max = delta;
		total += delta;
	}

	if (!count)
		return;

	ret = printf("%s", lat->thread->comm);

	for (i = 0; i < 25 - ret; i++)
		printf(" ");

	avg = total / count;

	printf("%5d        %10llu       %10llu      %10llu\n", count, total, avg, max);
}

static void output_lat_results(void)
{
	struct rb_node *next;

	printf(" Tasks");
	printf("                     count");
	printf("          total");
	printf("              avg");
	printf("            max\n\n");

	next = rb_first(&lat_snapshot_root);

	while (next) {
		struct thread_latency *lat;

		lat = rb_entry(next, struct thread_latency, node);
		output_lat_thread(lat);
		next = rb_next(next);
	}
}

static struct trace_sched_handler *trace_handler;

@@ -1154,6 +1418,8 @@ static const struct option options[] = {
		    "dump raw trace in ASCII"),
	OPT_BOOLEAN('r', "replay", &replay_mode,
		    "replay sched behaviour from traces"),
	OPT_BOOLEAN('l', "latency", &lat_mode,
		    "measure various latencies"),
	OPT_BOOLEAN('v', "verbose", &verbose,
		    "be more verbose (show symbol address, etc)"),
	OPT_END()
@@ -1180,9 +1446,12 @@ int cmd_sched(int argc, const char **argv, const char *prefix __used)

	if (replay_mode)
		trace_handler = &replay_ops;
	else /* We may need a default subcommand */
	else if (lat_mode)
		trace_handler = &lat_ops;
	else /* We may need a default subcommand (perf trace?) */
		die("Please select a sub command (-r)\n");

	if (replay_mode) {
		calibrate_run_measurement_overhead();
		calibrate_sleep_measurement_overhead();

@@ -1196,6 +1465,11 @@ int cmd_sched(int argc, const char **argv, const char *prefix __used)
		printf("------------------------------------------------------------\n");
		for (i = 0; i < nr_iterations; i++)
			run_one_test();
	} else if (lat_mode) {
		setup_pager();
		__cmd_sched();
		output_lat_results();
	}

	return 0;
}