Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 2e6553aa authored by Thiago Jung Bauermann's avatar Thiago Jung Bauermann Committed by Michael Ellerman
Browse files

powerpc/perf/hv-24x7: Support v2 of the hypervisor API



POWER9 introduces a new version of the hypervisor API to access the 24x7
perf counters. The new version changed some of the structures used for
requests and results.

Signed-off-by: default avatarThiago Jung Bauermann <bauerman@linux.vnet.ibm.com>
Signed-off-by: default avatarMichael Ellerman <mpe@ellerman.id.au>
parent ebd4a5a3
Loading
Loading
Loading
Loading
+110 −25
Original line number Diff line number Diff line
@@ -18,6 +18,7 @@
#include <linux/slab.h>
#include <linux/vmalloc.h>

#include <asm/cputhreads.h>
#include <asm/firmware.h>
#include <asm/hvcall.h>
#include <asm/io.h>
@@ -27,6 +28,9 @@
#include "hv-24x7-catalog.h"
#include "hv-common.h"

/* Version of the 24x7 hypervisor API that we should use in this machine. */
static int interface_version;

static bool domain_is_valid(unsigned domain)
{
	switch (domain) {
@@ -74,7 +78,11 @@ static const char *domain_name(unsigned domain)

static bool catalog_entry_domain_is_valid(unsigned domain)
{
	/* POWER8 doesn't support virtual domains. */
	if (interface_version == 1)
		return is_physical_domain(domain);
	else
		return domain_is_valid(domain);
}

/*
@@ -166,9 +174,11 @@ DEFINE_PER_CPU(struct hv_24x7_hw, hv_24x7_hw);
DEFINE_PER_CPU(char, hv_24x7_reqb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096);
DEFINE_PER_CPU(char, hv_24x7_resb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096);

#define MAX_NUM_REQUESTS	((H24x7_DATA_BUFFER_SIZE -		       \
					sizeof(struct hv_24x7_request_buffer)) \
					/ sizeof(struct hv_24x7_request))
static unsigned int max_num_requests(int interface_version)
{
	return (H24x7_DATA_BUFFER_SIZE - sizeof(struct hv_24x7_request_buffer))
		/ H24x7_REQUEST_SIZE(interface_version);
}

static char *event_name(struct hv_24x7_event_data *ev, int *len)
{
@@ -1052,7 +1062,7 @@ static void init_24x7_request(struct hv_24x7_request_buffer *request_buffer,
	memset(request_buffer, 0, H24x7_DATA_BUFFER_SIZE);
	memset(result_buffer, 0, H24x7_DATA_BUFFER_SIZE);

	request_buffer->interface_version = HV_24X7_IF_VERSION_CURRENT;
	request_buffer->interface_version = interface_version;
	/* memset above set request_buffer->num_requests to 0 */
}

@@ -1077,7 +1087,7 @@ static int make_24x7_request(struct hv_24x7_request_buffer *request_buffer,
	if (ret) {
		struct hv_24x7_request *req;

		req = &request_buffer->requests[0];
		req = request_buffer->requests;
		pr_notice_ratelimited("hcall failed: [%d %#x %#x %d] => ret 0x%lx (%ld) detail=0x%x failing ix=%x\n",
				      req->performance_domain, req->data_offset,
				      req->starting_ix, req->starting_lpar_ix,
@@ -1101,9 +1111,11 @@ static int add_event_to_24x7_request(struct perf_event *event,
{
	u16 idx;
	int i;
	size_t req_size;
	struct hv_24x7_request *req;

	if (request_buffer->num_requests >= MAX_NUM_REQUESTS) {
	if (request_buffer->num_requests >=
	    max_num_requests(request_buffer->interface_version)) {
		pr_devel("Too many requests for 24x7 HCALL %d\n",
				request_buffer->num_requests);
		return -EINVAL;
@@ -1120,8 +1132,10 @@ static int add_event_to_24x7_request(struct perf_event *event,
		idx = event_get_vcpu(event);
	}

	req_size = H24x7_REQUEST_SIZE(request_buffer->interface_version);

	i = request_buffer->num_requests++;
	req = &request_buffer->requests[i];
	req = (void *) request_buffer->requests + i * req_size;

	req->performance_domain = event_get_domain(event);
	req->data_size = cpu_to_be16(8);
@@ -1131,14 +1145,86 @@ static int add_event_to_24x7_request(struct perf_event *event,
	req->starting_ix = cpu_to_be16(idx);
	req->max_ix = cpu_to_be16(1);

	if (request_buffer->interface_version > 1 &&
	    req->performance_domain != HV_PERF_DOMAIN_PHYS_CHIP) {
		req->starting_thread_group_ix = idx % 2;
		req->max_num_thread_groups = 1;
	}

	return 0;
}

/**
 * get_count_from_result - get event count from the given result
 *
 * @event:	Event associated with @res.
 * @resb:	Result buffer containing @res.
 * @res:	Result to work on.
 * @countp:	Output variable containing the event count.
 * @next:	Optional output variable pointing to the next result in @resb.
 */
static int get_count_from_result(struct perf_event *event,
				 struct hv_24x7_data_result_buffer *resb,
				 struct hv_24x7_result *res, u64 *countp,
				 struct hv_24x7_result **next)
{
	u16 num_elements = be16_to_cpu(res->num_elements_returned);
	u16 data_size = be16_to_cpu(res->result_element_data_size);
	unsigned int data_offset;
	void *element_data;

	/*
	 * We can bail out early if the result is empty.
	 */
	if (!num_elements) {
		pr_debug("Result of request %hhu is empty, nothing to do\n",
			 res->result_ix);

		if (next)
			*next = (struct hv_24x7_result *) res->elements;

		return -ENODATA;
	}

	/*
	 * Since we always specify 1 as the maximum for the smallest resource
	 * we're requesting, there should to be only one element per result.
	 */
	if (num_elements != 1) {
		pr_err("Error: result of request %hhu has %hu elements\n",
		       res->result_ix, num_elements);

		return -EIO;
	}

	if (data_size != sizeof(u64)) {
		pr_debug("Error: result of request %hhu has data of %hu bytes\n",
			 res->result_ix, data_size);

		return -ENOTSUPP;
	}

	if (resb->interface_version == 1)
		data_offset = offsetof(struct hv_24x7_result_element_v1,
				       element_data);
	else
		data_offset = offsetof(struct hv_24x7_result_element_v2,
				       element_data);

	element_data = res->elements + data_offset;

	*countp = be64_to_cpu(*((u64 *) element_data));

	/* The next result is after the result element. */
	if (next)
		*next = element_data + data_size;

	return 0;
}

static int single_24x7_request(struct perf_event *event, u64 *count)
{
	int ret;
	u16 num_elements;
	struct hv_24x7_result *result;
	struct hv_24x7_request_buffer *request_buffer;
	struct hv_24x7_data_result_buffer *result_buffer;

@@ -1158,14 +1244,9 @@ static int single_24x7_request(struct perf_event *event, u64 *count)
	if (ret)
		goto out;

	result = result_buffer->results;

	/* This code assumes that a result has only one element. */
	num_elements = be16_to_cpu(result->num_elements_returned);
	WARN_ON_ONCE(num_elements != 1);

	/* process result from hcall */
	*count = be64_to_cpu(result->elements[0].element_data[0]);
	ret = get_count_from_result(event, result_buffer,
				    result_buffer->results, count, NULL);

out:
	put_cpu_var(hv_24x7_reqb);
@@ -1425,16 +1506,13 @@ static int h_24x7_event_commit_txn(struct pmu *pmu)
	for (i = 0, res = result_buffer->results;
	     i < result_buffer->num_results; i++, res = next_res) {
		struct perf_event *event = h24x7hw->events[res->result_ix];
		u16 num_elements = be16_to_cpu(res->num_elements_returned);
		u16 data_size = be16_to_cpu(res->result_element_data_size);

		/* This code assumes that a result has only one element. */
		WARN_ON_ONCE(num_elements != 1);
		ret = get_count_from_result(event, result_buffer, res, &count,
					    &next_res);
		if (ret)
			break;

		count = be64_to_cpu(res->elements[0].element_data[0]);
		update_event_count(event, count);

		next_res = (void *) res->elements[0].element_data + data_size;
	}

	put_cpu_var(hv_24x7_hw);
@@ -1484,7 +1562,14 @@ static int hv_24x7_init(void)
	if (!firmware_has_feature(FW_FEATURE_LPAR)) {
		pr_debug("not a virtualized system, not enabling\n");
		return -ENODEV;
	}
	} else if (!cur_cpu_spec->oprofile_cpu_type)
		return -ENODEV;

	/* POWER8 only supports v1, while POWER9 only supports v2. */
	if (!strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power8"))
		interface_version = 1;
	else
		interface_version = 2;

	hret = hv_perf_caps_get(&caps);
	if (hret) {
+49 −9
Original line number Diff line number Diff line
@@ -10,6 +10,8 @@ enum hv_perf_domains {
	HV_PERF_DOMAIN_MAX,
};

#define H24x7_REQUEST_SIZE(iface_version)	(iface_version == 1 ? 16 : 32)

struct hv_24x7_request {
	/* PHYSICAL domains require enabling via phyp/hmc. */
	__u8 performance_domain;
@@ -42,19 +44,47 @@ struct hv_24x7_request {
	/* chip, core, or virtual processor based on @performance_domain */
	__be16 starting_ix;
	__be16 max_ix;

	/* The following fields were added in v2 of the 24x7 interface. */

	__u8 starting_thread_group_ix;

	/* -1 means all thread groups starting at @starting_thread_group_ix */
	__u8 max_num_thread_groups;

	__u8 reserved2[0xE];
} __packed;

struct hv_24x7_request_buffer {
	/* 0 - ? */
	/* 1 - ? */
#define HV_24X7_IF_VERSION_CURRENT 0x01
	__u8 interface_version;
	__u8 num_requests;
	__u8 reserved[0xE];
	struct hv_24x7_request requests[1];
	struct hv_24x7_request requests[];
} __packed;

struct hv_24x7_result_element_v1 {
	__be16 lpar_ix;

	/*
	 * represents the core, chip, or virtual processor based on the
	 * request's @performance_domain
	 */
	__be16 domain_ix;

	/* -1 if @performance_domain does not refer to a virtual processor */
	__be32 lpar_cfg_instance_id;

	/* size = @result_element_data_size of containing result. */
	__u64 element_data[];
} __packed;

struct hv_24x7_result_element {
/*
 * We need a separate struct for v2 because the offset of @element_data changed
 * between versions.
 */
struct hv_24x7_result_element_v2 {
	__be16 lpar_ix;

	/*
@@ -66,8 +96,12 @@ struct hv_24x7_result_element {
	/* -1 if @performance_domain does not refer to a virtual processor */
	__be32 lpar_cfg_instance_id;

	__u8 thread_group_ix;

	__u8 reserved[7];

	/* size = @result_element_data_size of containing result. */
	__u64 element_data[1];
	__u64 element_data[];
} __packed;

struct hv_24x7_result {
@@ -94,10 +128,16 @@ struct hv_24x7_result {
	__be16 result_element_data_size;
	__u8 reserved[0x2];

	/* WARNING: only valid for first result element due to variable sizes
	 *          of result elements */
	/* struct hv_24x7_result_element[@num_elements_returned] */
	struct hv_24x7_result_element elements[1];
	/*
	 * Either
	 *	struct hv_24x7_result_element_v1[@num_elements_returned]
	 * or
	 *	struct hv_24x7_result_element_v2[@num_elements_returned]
	 *
	 * depending on the interface_version field of the
	 * struct hv_24x7_data_result_buffer containing this result.
	 */
	char elements[];
} __packed;

struct hv_24x7_data_result_buffer {
@@ -113,7 +153,7 @@ struct hv_24x7_data_result_buffer {
	__u8 reserved2[0x8];
	/* WARNING: only valid for the first result due to variable sizes of
	 *	    results */
	struct hv_24x7_result results[1]; /* [@num_results] */
	struct hv_24x7_result results[]; /* [@num_results] */
} __packed;

#endif
+1 −1
Original line number Diff line number Diff line
@@ -124,7 +124,7 @@ config HV_PERF_CTRS
	  Enable access to hypervisor supplied counters in perf. Currently,
	  this enables code that uses the hcall GetPerfCounterInfo and 24x7
	  interfaces to retrieve counters. GPCI exists on Power 6 and later
	  systems. 24x7 is available on Power 8 systems.
	  systems. 24x7 is available on Power 8 and later systems.

          If unsure, select Y.