Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 28397bab authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'perf-fixes-for-linus' of...

Merge branch 'perf-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'perf-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
  perf, amd: Use kmalloc_node(,__GFP_ZERO) for northbridge structure allocation
  perf_events: Fix time tracking in samples
  perf trace: update usage
  perf trace: update Documentation with new perf trace variants
  perf trace: live-mode command-line cleanup
  perf trace record: handle commands correctly
  perf record: make the record options available outside perf record
  perf trace scripting: remove system-wide param from shell scripts
  perf trace scripting: fix some small memory leaks and missing error checks
  perf: Fix usages of profile_cpu in builtin-top.c to use cpu_list
  perf, ui: Eliminate stack-smashing protection compiler complaint
parents 99efb936 034c6efa
Loading
Loading
Loading
Loading
+2 −2
Original line number Original line Diff line number Diff line
@@ -280,11 +280,11 @@ static struct amd_nb *amd_alloc_nb(int cpu, int nb_id)
	struct amd_nb *nb;
	struct amd_nb *nb;
	int i;
	int i;


	nb = kmalloc(sizeof(struct amd_nb), GFP_KERNEL);
	nb = kmalloc_node(sizeof(struct amd_nb), GFP_KERNEL | __GFP_ZERO,
			  cpu_to_node(cpu));
	if (!nb)
	if (!nb)
		return NULL;
		return NULL;


	memset(nb, 0, sizeof(*nb));
	nb->nb_id = nb_id;
	nb->nb_id = nb_id;


	/*
	/*
+10 −0
Original line number Original line Diff line number Diff line
@@ -747,6 +747,16 @@ struct perf_event {
	u64				tstamp_running;
	u64				tstamp_running;
	u64				tstamp_stopped;
	u64				tstamp_stopped;


	/*
	 * timestamp shadows the actual context timing but it can
	 * be safely used in NMI interrupt context. It reflects the
	 * context time as it was when the event was last scheduled in.
	 *
	 * ctx_time already accounts for ctx->timestamp. Therefore to
	 * compute ctx_time for a sample, simply add perf_clock().
	 */
	u64				shadow_ctx_time;

	struct perf_event_attr		attr;
	struct perf_event_attr		attr;
	struct hw_perf_event		hw;
	struct hw_perf_event		hw;


+34 −8
Original line number Original line Diff line number Diff line
@@ -674,6 +674,8 @@ event_sched_in(struct perf_event *event,


	event->tstamp_running += ctx->time - event->tstamp_stopped;
	event->tstamp_running += ctx->time - event->tstamp_stopped;


	event->shadow_ctx_time = ctx->time - ctx->timestamp;

	if (!is_software_event(event))
	if (!is_software_event(event))
		cpuctx->active_oncpu++;
		cpuctx->active_oncpu++;
	ctx->nr_active++;
	ctx->nr_active++;
@@ -3396,7 +3398,8 @@ static u32 perf_event_tid(struct perf_event *event, struct task_struct *p)
}
}


static void perf_output_read_one(struct perf_output_handle *handle,
static void perf_output_read_one(struct perf_output_handle *handle,
				 struct perf_event *event)
				 struct perf_event *event,
				 u64 enabled, u64 running)
{
{
	u64 read_format = event->attr.read_format;
	u64 read_format = event->attr.read_format;
	u64 values[4];
	u64 values[4];
@@ -3404,11 +3407,11 @@ static void perf_output_read_one(struct perf_output_handle *handle,


	values[n++] = perf_event_count(event);
	values[n++] = perf_event_count(event);
	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
		values[n++] = event->total_time_enabled +
		values[n++] = enabled +
			atomic64_read(&event->child_total_time_enabled);
			atomic64_read(&event->child_total_time_enabled);
	}
	}
	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
		values[n++] = event->total_time_running +
		values[n++] = running +
			atomic64_read(&event->child_total_time_running);
			atomic64_read(&event->child_total_time_running);
	}
	}
	if (read_format & PERF_FORMAT_ID)
	if (read_format & PERF_FORMAT_ID)
@@ -3421,7 +3424,8 @@ static void perf_output_read_one(struct perf_output_handle *handle,
 * XXX PERF_FORMAT_GROUP vs inherited events seems difficult.
 * XXX PERF_FORMAT_GROUP vs inherited events seems difficult.
 */
 */
static void perf_output_read_group(struct perf_output_handle *handle,
static void perf_output_read_group(struct perf_output_handle *handle,
			    struct perf_event *event)
			    struct perf_event *event,
			    u64 enabled, u64 running)
{
{
	struct perf_event *leader = event->group_leader, *sub;
	struct perf_event *leader = event->group_leader, *sub;
	u64 read_format = event->attr.read_format;
	u64 read_format = event->attr.read_format;
@@ -3431,10 +3435,10 @@ static void perf_output_read_group(struct perf_output_handle *handle,
	values[n++] = 1 + leader->nr_siblings;
	values[n++] = 1 + leader->nr_siblings;


	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
		values[n++] = leader->total_time_enabled;
		values[n++] = enabled;


	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
		values[n++] = leader->total_time_running;
		values[n++] = running;


	if (leader != event)
	if (leader != event)
		leader->pmu->read(leader);
		leader->pmu->read(leader);
@@ -3459,13 +3463,35 @@ static void perf_output_read_group(struct perf_output_handle *handle,
	}
	}
}
}


#define PERF_FORMAT_TOTAL_TIMES (PERF_FORMAT_TOTAL_TIME_ENABLED|\
				 PERF_FORMAT_TOTAL_TIME_RUNNING)

static void perf_output_read(struct perf_output_handle *handle,
static void perf_output_read(struct perf_output_handle *handle,
			     struct perf_event *event)
			     struct perf_event *event)
{
{
	u64 enabled = 0, running = 0, now, ctx_time;
	u64 read_format = event->attr.read_format;

	/*
	 * compute total_time_enabled, total_time_running
	 * based on snapshot values taken when the event
	 * was last scheduled in.
	 *
	 * we cannot simply called update_context_time()
	 * because of locking issue as we are called in
	 * NMI context
	 */
	if (read_format & PERF_FORMAT_TOTAL_TIMES) {
		now = perf_clock();
		ctx_time = event->shadow_ctx_time + now;
		enabled = ctx_time - event->tstamp_enabled;
		running = ctx_time - event->tstamp_running;
	}

	if (event->attr.read_format & PERF_FORMAT_GROUP)
	if (event->attr.read_format & PERF_FORMAT_GROUP)
		perf_output_read_group(handle, event);
		perf_output_read_group(handle, event, enabled, running);
	else
	else
		perf_output_read_one(handle, event);
		perf_output_read_one(handle, event, enabled, running);
}
}


void perf_output_sample(struct perf_output_handle *handle,
void perf_output_sample(struct perf_output_handle *handle,
+49 −8
Original line number Original line Diff line number Diff line
@@ -8,7 +8,11 @@ perf-trace - Read perf.data (created by perf record) and display trace output
SYNOPSIS
SYNOPSIS
--------
--------
[verse]
[verse]
'perf trace' {record <script> | report <script> [args] }
'perf trace' [<options>]
'perf trace' [<options>] record <script> [<record-options>] <command>
'perf trace' [<options>] report <script> [script-args]
'perf trace' [<options>] <script> <required-script-args> [<record-options>] <command>
'perf trace' [<options>] <top-script> [script-args]


DESCRIPTION
DESCRIPTION
-----------
-----------
@@ -24,23 +28,53 @@ There are several variants of perf trace:
  available via 'perf trace -l').  The following variants allow you to
  available via 'perf trace -l').  The following variants allow you to
  record and run those scripts:
  record and run those scripts:


  'perf trace record <script>' to record the events required for 'perf
  'perf trace record <script> <command>' to record the events required
  trace report'.  <script> is the name displayed in the output of
  for 'perf trace report'.  <script> is the name displayed in the
  'perf trace --list' i.e. the actual script name minus any language
  output of 'perf trace --list' i.e. the actual script name minus any
  extension.
  language extension.  If <command> is not specified, the events are
  recorded using the -a (system-wide) 'perf record' option.


  'perf trace report <script>' to run and display the results of
  'perf trace report <script> [args]' to run and display the results
  <script>.  <script> is the name displayed in the output of 'perf
  of <script>.  <script> is the name displayed in the output of 'perf
  trace --list' i.e. the actual script name minus any language
  trace --list' i.e. the actual script name minus any language
  extension.  The perf.data output from a previous run of 'perf trace
  extension.  The perf.data output from a previous run of 'perf trace
  record <script>' is used and should be present for this command to
  record <script>' is used and should be present for this command to
  succeed.
  succeed.  [args] refers to the (mainly optional) args expected by
  the script.

  'perf trace <script> <required-script-args> <command>' to both
  record the events required for <script> and to run the <script>
  using 'live-mode' i.e. without writing anything to disk.  <script>
  is the name displayed in the output of 'perf trace --list' i.e. the
  actual script name minus any language extension.  If <command> is
  not specified, the events are recorded using the -a (system-wide)
  'perf record' option.  If <script> has any required args, they
  should be specified before <command>.  This mode doesn't allow for
  optional script args to be specified; if optional script args are
  desired, they can be specified using separate 'perf trace record'
  and 'perf trace report' commands, with the stdout of the record step
  piped to the stdin of the report script, using the '-o -' and '-i -'
  options of the corresponding commands.

  'perf trace <top-script>' to both record the events required for
  <top-script> and to run the <top-script> using 'live-mode'
  i.e. without writing anything to disk.  <top-script> is the name
  displayed in the output of 'perf trace --list' i.e. the actual
  script name minus any language extension; a <top-script> is defined
  as any script name ending with the string 'top'.

  [<record-options>] can be passed to the record steps of 'perf trace
  record' and 'live-mode' variants; this isn't possible however for
  <top-script> 'live-mode' or 'perf trace report' variants.


  See the 'SEE ALSO' section for links to language-specific
  See the 'SEE ALSO' section for links to language-specific
  information on how to write and run your own trace scripts.
  information on how to write and run your own trace scripts.


OPTIONS
OPTIONS
-------
-------
<command>...::
	Any command you can specify in a shell.

-D::
-D::
--dump-raw-trace=::
--dump-raw-trace=::
        Display verbose dump of the trace data.
        Display verbose dump of the trace data.
@@ -64,6 +98,13 @@ OPTIONS
        Generate perf-trace.[ext] starter script for given language,
        Generate perf-trace.[ext] starter script for given language,
        using current perf.data.
        using current perf.data.


-a::
        Force system-wide collection.  Scripts run without a <command>
        normally use -a by default, while scripts run with a <command>
        normally don't - this option allows the latter to be run in
        system-wide mode.


SEE ALSO
SEE ALSO
--------
--------
linkperf:perf-record[1], linkperf:perf-trace-perl[1],
linkperf:perf-record[1], linkperf:perf-trace-perl[1],
+5 −5
Original line number Original line Diff line number Diff line
@@ -790,7 +790,7 @@ static const char * const record_usage[] = {


static bool force, append_file;
static bool force, append_file;


static const struct option options[] = {
const struct option record_options[] = {
	OPT_CALLBACK('e', "event", NULL, "event",
	OPT_CALLBACK('e', "event", NULL, "event",
		     "event selector. use 'perf list' to list available events",
		     "event selector. use 'perf list' to list available events",
		     parse_events),
		     parse_events),
@@ -839,16 +839,16 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
{
{
	int i, j, err = -ENOMEM;
	int i, j, err = -ENOMEM;


	argc = parse_options(argc, argv, options, record_usage,
	argc = parse_options(argc, argv, record_options, record_usage,
			    PARSE_OPT_STOP_AT_NON_OPTION);
			    PARSE_OPT_STOP_AT_NON_OPTION);
	if (!argc && target_pid == -1 && target_tid == -1 &&
	if (!argc && target_pid == -1 && target_tid == -1 &&
		!system_wide && !cpu_list)
		!system_wide && !cpu_list)
		usage_with_options(record_usage, options);
		usage_with_options(record_usage, record_options);


	if (force && append_file) {
	if (force && append_file) {
		fprintf(stderr, "Can't overwrite and append at the same time."
		fprintf(stderr, "Can't overwrite and append at the same time."
				" You need to choose between -f and -A");
				" You need to choose between -f and -A");
		usage_with_options(record_usage, options);
		usage_with_options(record_usage, record_options);
	} else if (append_file) {
	} else if (append_file) {
		write_mode = WRITE_APPEND;
		write_mode = WRITE_APPEND;
	} else {
	} else {
@@ -871,7 +871,7 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
		if (thread_num <= 0) {
		if (thread_num <= 0) {
			fprintf(stderr, "Can't find all threads of pid %d\n",
			fprintf(stderr, "Can't find all threads of pid %d\n",
					target_pid);
					target_pid);
			usage_with_options(record_usage, options);
			usage_with_options(record_usage, record_options);
		}
		}
	} else {
	} else {
		all_tids=malloc(sizeof(pid_t));
		all_tids=malloc(sizeof(pid_t));
Loading