Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 28397bab authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'perf-fixes-for-linus' of...

Merge branch 'perf-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'perf-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
  perf, amd: Use kmalloc_node(,__GFP_ZERO) for northbridge structure allocation
  perf_events: Fix time tracking in samples
  perf trace: update usage
  perf trace: update Documentation with new perf trace variants
  perf trace: live-mode command-line cleanup
  perf trace record: handle commands correctly
  perf record: make the record options available outside perf record
  perf trace scripting: remove system-wide param from shell scripts
  perf trace scripting: fix some small memory leaks and missing error checks
  perf: Fix usages of profile_cpu in builtin-top.c to use cpu_list
  perf, ui: Eliminate stack-smashing protection compiler complaint
parents 99efb936 034c6efa
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -280,11 +280,11 @@ static struct amd_nb *amd_alloc_nb(int cpu, int nb_id)
	struct amd_nb *nb;
	int i;

	nb = kmalloc(sizeof(struct amd_nb), GFP_KERNEL);
	nb = kmalloc_node(sizeof(struct amd_nb), GFP_KERNEL | __GFP_ZERO,
			  cpu_to_node(cpu));
	if (!nb)
		return NULL;

	memset(nb, 0, sizeof(*nb));
	nb->nb_id = nb_id;

	/*
+10 −0
Original line number Diff line number Diff line
@@ -747,6 +747,16 @@ struct perf_event {
	u64				tstamp_running;
	u64				tstamp_stopped;

	/*
	 * timestamp shadows the actual context timing but it can
	 * be safely used in NMI interrupt context. It reflects the
	 * context time as it was when the event was last scheduled in.
	 *
	 * ctx_time already accounts for ctx->timestamp. Therefore to
	 * compute ctx_time for a sample, simply add perf_clock().
	 */
	u64				shadow_ctx_time;

	struct perf_event_attr		attr;
	struct hw_perf_event		hw;

+34 −8
Original line number Diff line number Diff line
@@ -674,6 +674,8 @@ event_sched_in(struct perf_event *event,

	event->tstamp_running += ctx->time - event->tstamp_stopped;

	event->shadow_ctx_time = ctx->time - ctx->timestamp;

	if (!is_software_event(event))
		cpuctx->active_oncpu++;
	ctx->nr_active++;
@@ -3396,7 +3398,8 @@ static u32 perf_event_tid(struct perf_event *event, struct task_struct *p)
}

static void perf_output_read_one(struct perf_output_handle *handle,
				 struct perf_event *event)
				 struct perf_event *event,
				 u64 enabled, u64 running)
{
	u64 read_format = event->attr.read_format;
	u64 values[4];
@@ -3404,11 +3407,11 @@ static void perf_output_read_one(struct perf_output_handle *handle,

	values[n++] = perf_event_count(event);
	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
		values[n++] = event->total_time_enabled +
		values[n++] = enabled +
			atomic64_read(&event->child_total_time_enabled);
	}
	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
		values[n++] = event->total_time_running +
		values[n++] = running +
			atomic64_read(&event->child_total_time_running);
	}
	if (read_format & PERF_FORMAT_ID)
@@ -3421,7 +3424,8 @@ static void perf_output_read_one(struct perf_output_handle *handle,
 * XXX PERF_FORMAT_GROUP vs inherited events seems difficult.
 */
static void perf_output_read_group(struct perf_output_handle *handle,
			    struct perf_event *event)
			    struct perf_event *event,
			    u64 enabled, u64 running)
{
	struct perf_event *leader = event->group_leader, *sub;
	u64 read_format = event->attr.read_format;
@@ -3431,10 +3435,10 @@ static void perf_output_read_group(struct perf_output_handle *handle,
	values[n++] = 1 + leader->nr_siblings;

	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
		values[n++] = leader->total_time_enabled;
		values[n++] = enabled;

	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
		values[n++] = leader->total_time_running;
		values[n++] = running;

	if (leader != event)
		leader->pmu->read(leader);
@@ -3459,13 +3463,35 @@ static void perf_output_read_group(struct perf_output_handle *handle,
	}
}

#define PERF_FORMAT_TOTAL_TIMES (PERF_FORMAT_TOTAL_TIME_ENABLED|\
				 PERF_FORMAT_TOTAL_TIME_RUNNING)

static void perf_output_read(struct perf_output_handle *handle,
			     struct perf_event *event)
{
	u64 enabled = 0, running = 0, now, ctx_time;
	u64 read_format = event->attr.read_format;

	/*
	 * compute total_time_enabled, total_time_running
	 * based on snapshot values taken when the event
	 * was last scheduled in.
	 *
	 * we cannot simply called update_context_time()
	 * because of locking issue as we are called in
	 * NMI context
	 */
	if (read_format & PERF_FORMAT_TOTAL_TIMES) {
		now = perf_clock();
		ctx_time = event->shadow_ctx_time + now;
		enabled = ctx_time - event->tstamp_enabled;
		running = ctx_time - event->tstamp_running;
	}

	if (event->attr.read_format & PERF_FORMAT_GROUP)
		perf_output_read_group(handle, event);
		perf_output_read_group(handle, event, enabled, running);
	else
		perf_output_read_one(handle, event);
		perf_output_read_one(handle, event, enabled, running);
}

void perf_output_sample(struct perf_output_handle *handle,
+49 −8
Original line number Diff line number Diff line
@@ -8,7 +8,11 @@ perf-trace - Read perf.data (created by perf record) and display trace output
SYNOPSIS
--------
[verse]
'perf trace' {record <script> | report <script> [args] }
'perf trace' [<options>]
'perf trace' [<options>] record <script> [<record-options>] <command>
'perf trace' [<options>] report <script> [script-args]
'perf trace' [<options>] <script> <required-script-args> [<record-options>] <command>
'perf trace' [<options>] <top-script> [script-args]

DESCRIPTION
-----------
@@ -24,23 +28,53 @@ There are several variants of perf trace:
  available via 'perf trace -l').  The following variants allow you to
  record and run those scripts:

  'perf trace record <script>' to record the events required for 'perf
  trace report'.  <script> is the name displayed in the output of
  'perf trace --list' i.e. the actual script name minus any language
  extension.
  'perf trace record <script> <command>' to record the events required
  for 'perf trace report'.  <script> is the name displayed in the
  output of 'perf trace --list' i.e. the actual script name minus any
  language extension.  If <command> is not specified, the events are
  recorded using the -a (system-wide) 'perf record' option.

  'perf trace report <script>' to run and display the results of
  <script>.  <script> is the name displayed in the output of 'perf
  'perf trace report <script> [args]' to run and display the results
  of <script>.  <script> is the name displayed in the output of 'perf
  trace --list' i.e. the actual script name minus any language
  extension.  The perf.data output from a previous run of 'perf trace
  record <script>' is used and should be present for this command to
  succeed.
  succeed.  [args] refers to the (mainly optional) args expected by
  the script.

  'perf trace <script> <required-script-args> <command>' to both
  record the events required for <script> and to run the <script>
  using 'live-mode' i.e. without writing anything to disk.  <script>
  is the name displayed in the output of 'perf trace --list' i.e. the
  actual script name minus any language extension.  If <command> is
  not specified, the events are recorded using the -a (system-wide)
  'perf record' option.  If <script> has any required args, they
  should be specified before <command>.  This mode doesn't allow for
  optional script args to be specified; if optional script args are
  desired, they can be specified using separate 'perf trace record'
  and 'perf trace report' commands, with the stdout of the record step
  piped to the stdin of the report script, using the '-o -' and '-i -'
  options of the corresponding commands.

  'perf trace <top-script>' to both record the events required for
  <top-script> and to run the <top-script> using 'live-mode'
  i.e. without writing anything to disk.  <top-script> is the name
  displayed in the output of 'perf trace --list' i.e. the actual
  script name minus any language extension; a <top-script> is defined
  as any script name ending with the string 'top'.

  [<record-options>] can be passed to the record steps of 'perf trace
  record' and 'live-mode' variants; this isn't possible however for
  <top-script> 'live-mode' or 'perf trace report' variants.

  See the 'SEE ALSO' section for links to language-specific
  information on how to write and run your own trace scripts.

OPTIONS
-------
<command>...::
	Any command you can specify in a shell.

-D::
--dump-raw-trace=::
        Display verbose dump of the trace data.
@@ -64,6 +98,13 @@ OPTIONS
        Generate perf-trace.[ext] starter script for given language,
        using current perf.data.

-a::
        Force system-wide collection.  Scripts run without a <command>
        normally use -a by default, while scripts run with a <command>
        normally don't - this option allows the latter to be run in
        system-wide mode.


SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-trace-perl[1],
+5 −5
Original line number Diff line number Diff line
@@ -790,7 +790,7 @@ static const char * const record_usage[] = {

static bool force, append_file;

static const struct option options[] = {
const struct option record_options[] = {
	OPT_CALLBACK('e', "event", NULL, "event",
		     "event selector. use 'perf list' to list available events",
		     parse_events),
@@ -839,16 +839,16 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
{
	int i, j, err = -ENOMEM;

	argc = parse_options(argc, argv, options, record_usage,
	argc = parse_options(argc, argv, record_options, record_usage,
			    PARSE_OPT_STOP_AT_NON_OPTION);
	if (!argc && target_pid == -1 && target_tid == -1 &&
		!system_wide && !cpu_list)
		usage_with_options(record_usage, options);
		usage_with_options(record_usage, record_options);

	if (force && append_file) {
		fprintf(stderr, "Can't overwrite and append at the same time."
				" You need to choose between -f and -A");
		usage_with_options(record_usage, options);
		usage_with_options(record_usage, record_options);
	} else if (append_file) {
		write_mode = WRITE_APPEND;
	} else {
@@ -871,7 +871,7 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
		if (thread_num <= 0) {
			fprintf(stderr, "Can't find all threads of pid %d\n",
					target_pid);
			usage_with_options(record_usage, options);
			usage_with_options(record_usage, record_options);
		}
	} else {
		all_tids=malloc(sizeof(pid_t));
Loading