Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit daefd0bc authored by Kan Liang's avatar Kan Liang Committed by Arnaldo Carvalho de Melo
Browse files

perf stat: Add support to measure SMI cost



Implementing a new --smi-cost mode in perf stat to measure SMI cost.

During the measurement, the /sys/device/cpu/freeze_on_smi will be set.

The measurement can be done with one counter (unhalted core cycles), and
two free running MSR counters (IA32_APERF and SMI_COUNT).

In practice, the percentages of SMI core cycles should be more useful
than absolute value. So the output will be the percentage of SMI core
cycles and SMI#. metric_only will be set by default.

SMI cycles% = (aperf - unhalted core cycles) / aperf

Here is an example output.

 Performance counter stats for 'sudo echo ':

SMI cycles%          SMI#
    0.1%              1

       0.010858678 seconds time elapsed

Users who wants to get the actual value can apply additional
--no-metric-only.

Signed-off-by: default avatarKan Liang <Kan.liang@intel.com>
Acked-by: default avatarJiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Robert Elliott <elliott@hpe.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1495825538-5230-3-git-send-email-kan.liang@intel.com


Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
parent 3b00ea93
Loading
Loading
Loading
Loading
+14 −0
Original line number Diff line number Diff line
@@ -239,6 +239,20 @@ taskset.
--no-merge::
Do not merge results from same PMUs.

--smi-cost::
Measure SMI cost if msr/aperf/ and msr/smi/ events are supported.

During the measurement, the /sys/device/cpu/freeze_on_smi will be set to
freeze core counters on SMI.
The aperf counter will not be effected by the setting.
The cost of SMI can be measured by (aperf - unhalted core cycles).

In practice, the percentages of SMI cycles is very useful for performance
oriented analysis. --metric_only will be applied by default.
The output is SMI cycles%, equals to (aperf - unhalted core cycles) / aperf

Users who wants to get the actual value can apply --no-metric-only.

EXAMPLES
--------

+49 −0
Original line number Diff line number Diff line
@@ -86,6 +86,7 @@
#define DEFAULT_SEPARATOR	" "
#define CNTR_NOT_SUPPORTED	"<not supported>"
#define CNTR_NOT_COUNTED	"<not counted>"
#define FREEZE_ON_SMI_PATH	"devices/cpu/freeze_on_smi"

static void print_counters(struct timespec *ts, int argc, const char **argv);

@@ -122,6 +123,14 @@ static const char * topdown_attrs[] = {
	NULL,
};

static const char *smi_cost_attrs = {
	"{"
	"msr/aperf/,"
	"msr/smi/,"
	"cycles"
	"}"
};

static struct perf_evlist	*evsel_list;

static struct target target = {
@@ -137,6 +146,8 @@ static bool null_run = false;
static int			detailed_run			=  0;
static bool			transaction_run;
static bool			topdown_run			= false;
static bool			smi_cost			= false;
static bool			smi_reset			= false;
static bool			big_num				=  true;
static int			big_num_opt			=  -1;
static const char		*csv_sep			= NULL;
@@ -1782,6 +1793,8 @@ static const struct option stat_options[] = {
			"Only print computed metrics. No raw values", enable_metric_only),
	OPT_BOOLEAN(0, "topdown", &topdown_run,
			"measure topdown level 1 statistics"),
	OPT_BOOLEAN(0, "smi-cost", &smi_cost,
			"measure SMI cost"),
	OPT_END()
};

@@ -2160,6 +2173,39 @@ static int add_default_attributes(void)
		return 0;
	}

	if (smi_cost) {
		int smi;

		if (sysfs__read_int(FREEZE_ON_SMI_PATH, &smi) < 0) {
			fprintf(stderr, "freeze_on_smi is not supported.\n");
			return -1;
		}

		if (!smi) {
			if (sysfs__write_int(FREEZE_ON_SMI_PATH, 1) < 0) {
				fprintf(stderr, "Failed to set freeze_on_smi.\n");
				return -1;
			}
			smi_reset = true;
		}

		if (pmu_have_event("msr", "aperf") &&
		    pmu_have_event("msr", "smi")) {
			if (!force_metric_only)
				metric_only = true;
			err = parse_events(evsel_list, smi_cost_attrs, NULL);
		} else {
			fprintf(stderr, "To measure SMI cost, it needs "
				"msr/aperf/, msr/smi/ and cpu/cycles/ support\n");
			return -1;
		}
		if (err) {
			fprintf(stderr, "Cannot set up SMI cost events\n");
			return -1;
		}
		return 0;
	}

	if (topdown_run) {
		char *str = NULL;
		bool warn = false;
@@ -2742,6 +2788,9 @@ int cmd_stat(int argc, const char **argv)
	perf_stat__exit_aggr_mode();
	perf_evlist__free_stats(evsel_list);
out:
	if (smi_cost && smi_reset)
		sysfs__write_int(FREEZE_ON_SMI_PATH, 0);

	perf_evlist__delete(evsel_list);
	return status;
}
+33 −0
Original line number Diff line number Diff line
@@ -44,6 +44,8 @@ static struct stats runtime_topdown_slots_issued[NUM_CTX][MAX_NR_CPUS];
static struct stats runtime_topdown_slots_retired[NUM_CTX][MAX_NR_CPUS];
static struct stats runtime_topdown_fetch_bubbles[NUM_CTX][MAX_NR_CPUS];
static struct stats runtime_topdown_recovery_bubbles[NUM_CTX][MAX_NR_CPUS];
static struct stats runtime_smi_num_stats[NUM_CTX][MAX_NR_CPUS];
static struct stats runtime_aperf_stats[NUM_CTX][MAX_NR_CPUS];
static struct rblist runtime_saved_values;
static bool have_frontend_stalled;

@@ -157,6 +159,8 @@ void perf_stat__reset_shadow_stats(void)
	memset(runtime_topdown_slots_issued, 0, sizeof(runtime_topdown_slots_issued));
	memset(runtime_topdown_fetch_bubbles, 0, sizeof(runtime_topdown_fetch_bubbles));
	memset(runtime_topdown_recovery_bubbles, 0, sizeof(runtime_topdown_recovery_bubbles));
	memset(runtime_smi_num_stats, 0, sizeof(runtime_smi_num_stats));
	memset(runtime_aperf_stats, 0, sizeof(runtime_aperf_stats));

	next = rb_first(&runtime_saved_values.entries);
	while (next) {
@@ -217,6 +221,10 @@ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count,
		update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count[0]);
	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
		update_stats(&runtime_itlb_cache_stats[ctx][cpu], count[0]);
	else if (perf_stat_evsel__is(counter, SMI_NUM))
		update_stats(&runtime_smi_num_stats[ctx][cpu], count[0]);
	else if (perf_stat_evsel__is(counter, APERF))
		update_stats(&runtime_aperf_stats[ctx][cpu], count[0]);

	if (counter->collect_stat) {
		struct saved_value *v = saved_value_lookup(counter, cpu, ctx,
@@ -592,6 +600,29 @@ static double td_be_bound(int ctx, int cpu)
	return sanitize_val(1.0 - sum);
}

static void print_smi_cost(int cpu, struct perf_evsel *evsel,
			   struct perf_stat_output_ctx *out)
{
	double smi_num, aperf, cycles, cost = 0.0;
	int ctx = evsel_context(evsel);
	const char *color = NULL;

	smi_num = avg_stats(&runtime_smi_num_stats[ctx][cpu]);
	aperf = avg_stats(&runtime_aperf_stats[ctx][cpu]);
	cycles = avg_stats(&runtime_cycles_stats[ctx][cpu]);

	if ((cycles == 0) || (aperf == 0))
		return;

	if (smi_num)
		cost = (aperf - cycles) / aperf * 100.00;

	if (cost > 10)
		color = PERF_COLOR_RED;
	out->print_metric(out->ctx, color, "%8.1f%%", "SMI cycles%", cost);
	out->print_metric(out->ctx, NULL, "%4.0f", "SMI#", smi_num);
}

void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
				   double avg, int cpu,
				   struct perf_stat_output_ctx *out)
@@ -825,6 +856,8 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
		}
		snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
		print_metric(ctxp, NULL, "%8.3f", unit_buf, ratio);
	} else if (perf_stat_evsel__is(evsel, SMI_NUM)) {
		print_smi_cost(cpu, evsel, out);
	} else {
		print_metric(ctxp, NULL, NULL, NULL, 0);
	}
+2 −0
Original line number Diff line number Diff line
@@ -86,6 +86,8 @@ static const char *id_str[PERF_STAT_EVSEL_ID__MAX] = {
	ID(TOPDOWN_SLOTS_RETIRED, topdown-slots-retired),
	ID(TOPDOWN_FETCH_BUBBLES, topdown-fetch-bubbles),
	ID(TOPDOWN_RECOVERY_BUBBLES, topdown-recovery-bubbles),
	ID(SMI_NUM, msr/smi/),
	ID(APERF, msr/aperf/),
};
#undef ID

+2 −0
Original line number Diff line number Diff line
@@ -22,6 +22,8 @@ enum perf_stat_evsel_id {
	PERF_STAT_EVSEL_ID__TOPDOWN_SLOTS_RETIRED,
	PERF_STAT_EVSEL_ID__TOPDOWN_FETCH_BUBBLES,
	PERF_STAT_EVSEL_ID__TOPDOWN_RECOVERY_BUBBLES,
	PERF_STAT_EVSEL_ID__SMI_NUM,
	PERF_STAT_EVSEL_ID__APERF,
	PERF_STAT_EVSEL_ID__MAX,
};