Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 974802ea authored by Peter Zijlstra's avatar Peter Zijlstra Committed by Ingo Molnar
Browse files

perf_counter: Add forward/backward attribute ABI compatibility



Provide for means of extending the perf_counter_attr in a 'natural' way.

We allow growing the structure by appending fields at the end by specifying
the full structure size inside it.

When a new kernel sees a smaller (old) structure, it will 0 pad the tail.
When an old kernel sees a larger (new) structure, it will verify the tail
consists of 0s, otherwise fail.

If we fail due to a size-mismatch, we return -E2BIG and write the kernel's
native attribe size back into the provided structure.

Furthermore, add some attribute verification, so that we'll fail counter
creation when unknown bits are present (PERF_SAMPLE, PERF_FORMAT, or in
the __reserved fields).

(This ABI detail is introduced while keeping the existing syscall ABI.)

Signed-off-by: default avatarPeter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent bbd36e5e
Loading
Loading
Loading
Loading
+15 −4
Original line number Original line Diff line number Diff line
@@ -120,6 +120,8 @@ enum perf_counter_sample_format {
	PERF_SAMPLE_ID				= 1U << 6,
	PERF_SAMPLE_ID				= 1U << 6,
	PERF_SAMPLE_CPU				= 1U << 7,
	PERF_SAMPLE_CPU				= 1U << 7,
	PERF_SAMPLE_PERIOD			= 1U << 8,
	PERF_SAMPLE_PERIOD			= 1U << 8,

	PERF_SAMPLE_MAX = 1U << 9,		/* non-ABI */
};
};


/*
/*
@@ -131,17 +133,26 @@ enum perf_counter_read_format {
	PERF_FORMAT_TOTAL_TIME_ENABLED		= 1U << 0,
	PERF_FORMAT_TOTAL_TIME_ENABLED		= 1U << 0,
	PERF_FORMAT_TOTAL_TIME_RUNNING		= 1U << 1,
	PERF_FORMAT_TOTAL_TIME_RUNNING		= 1U << 1,
	PERF_FORMAT_ID				= 1U << 2,
	PERF_FORMAT_ID				= 1U << 2,

	PERF_FORMAT_MAX = 1U << 3, 		/* non-ABI */
};
};


#define PERF_ATTR_SIZE_VER0	64	/* sizeof first published struct */

/*
/*
 * Hardware event to monitor via a performance monitoring counter:
 * Hardware event to monitor via a performance monitoring counter:
 */
 */
struct perf_counter_attr {
struct perf_counter_attr {

	/*
	/*
	 * Major type: hardware/software/tracepoint/etc.
	 * Major type: hardware/software/tracepoint/etc.
	 */
	 */
	__u32			type;
	__u32			type;
	__u32			__reserved_1;

	/*
	 * Size of the attr structure, for fwd/bwd compat.
	 */
	__u32			size;


	/*
	/*
	 * Type specific configuration information.
	 * Type specific configuration information.
@@ -168,12 +179,12 @@ struct perf_counter_attr {
				comm	       :  1, /* include comm data     */
				comm	       :  1, /* include comm data     */
				freq           :  1, /* use freq, not period  */
				freq           :  1, /* use freq, not period  */


				__reserved_2   : 53;
				__reserved_1   : 53;


	__u32			wakeup_events;	/* wakeup every n events */
	__u32			wakeup_events;	/* wakeup every n events */
	__u32			__reserved_3;
	__u32			__reserved_2;


	__u64			__reserved_4;
	__u64			__reserved_3;
};
};


/*
/*
+1 −1
Original line number Original line Diff line number Diff line
@@ -758,6 +758,6 @@ int kernel_execve(const char *filename, char *const argv[], char *const envp[]);




asmlinkage long sys_perf_counter_open(
asmlinkage long sys_perf_counter_open(
		const struct perf_counter_attr __user *attr_uptr,
		struct perf_counter_attr __user *attr_uptr,
		pid_t pid, int cpu, int group_fd, unsigned long flags);
		pid_t pid, int cpu, int group_fd, unsigned long flags);
#endif
#endif
+86 −3
Original line number Original line Diff line number Diff line
@@ -3584,6 +3584,9 @@ perf_counter_alloc(struct perf_counter_attr *attr,
	case PERF_TYPE_TRACEPOINT:
	case PERF_TYPE_TRACEPOINT:
		pmu = tp_perf_counter_init(counter);
		pmu = tp_perf_counter_init(counter);
		break;
		break;

	default:
		break;
	}
	}
done:
done:
	err = 0;
	err = 0;
@@ -3610,6 +3613,85 @@ perf_counter_alloc(struct perf_counter_attr *attr,
	return counter;
	return counter;
}
}


static int perf_copy_attr(struct perf_counter_attr __user *uattr,
			  struct perf_counter_attr *attr)
{
	int ret;
	u32 size;

	if (!access_ok(VERIFY_WRITE, uattr, PERF_ATTR_SIZE_VER0))
		return -EFAULT;

	/*
	 * zero the full structure, so that a short copy will be nice.
	 */
	memset(attr, 0, sizeof(*attr));

	ret = get_user(size, &uattr->size);
	if (ret)
		return ret;

	if (size > PAGE_SIZE)	/* silly large */
		goto err_size;

	if (!size)		/* abi compat */
		size = PERF_ATTR_SIZE_VER0;

	if (size < PERF_ATTR_SIZE_VER0)
		goto err_size;

	/*
	 * If we're handed a bigger struct than we know of,
	 * ensure all the unknown bits are 0.
	 */
	if (size > sizeof(*attr)) {
		unsigned long val;
		unsigned long __user *addr;
		unsigned long __user *end;

		addr = PTR_ALIGN((void __user *)uattr + sizeof(*attr),
				sizeof(unsigned long));
		end  = PTR_ALIGN((void __user *)uattr + size,
				sizeof(unsigned long));

		for (; addr < end; addr += sizeof(unsigned long)) {
			ret = get_user(val, addr);
			if (ret)
				return ret;
			if (val)
				goto err_size;
		}
	}

	ret = copy_from_user(attr, uattr, size);
	if (ret)
		return -EFAULT;

	/*
	 * If the type exists, the corresponding creation will verify
	 * the attr->config.
	 */
	if (attr->type >= PERF_TYPE_MAX)
		return -EINVAL;

	if (attr->__reserved_1 || attr->__reserved_2 || attr->__reserved_3)
		return -EINVAL;

	if (attr->sample_type & ~(PERF_SAMPLE_MAX-1))
		return -EINVAL;

	if (attr->read_format & ~(PERF_FORMAT_MAX-1))
		return -EINVAL;

out:
	return ret;

err_size:
	put_user(sizeof(*attr), &uattr->size);
	ret = -E2BIG;
	goto out;
}

/**
/**
 * sys_perf_counter_open - open a performance counter, associate it to a task/cpu
 * sys_perf_counter_open - open a performance counter, associate it to a task/cpu
 *
 *
@@ -3619,7 +3701,7 @@ perf_counter_alloc(struct perf_counter_attr *attr,
 * @group_fd:		group leader counter fd
 * @group_fd:		group leader counter fd
 */
 */
SYSCALL_DEFINE5(perf_counter_open,
SYSCALL_DEFINE5(perf_counter_open,
		const struct perf_counter_attr __user *, attr_uptr,
		struct perf_counter_attr __user *, attr_uptr,
		pid_t, pid, int, cpu, int, group_fd, unsigned long, flags)
		pid_t, pid, int, cpu, int, group_fd, unsigned long, flags)
{
{
	struct perf_counter *counter, *group_leader;
	struct perf_counter *counter, *group_leader;
@@ -3635,8 +3717,9 @@ SYSCALL_DEFINE5(perf_counter_open,
	if (flags)
	if (flags)
		return -EINVAL;
		return -EINVAL;


	if (copy_from_user(&attr, attr_uptr, sizeof(attr)) != 0)
	ret = perf_copy_attr(attr_uptr, &attr);
		return -EFAULT;
	if (ret)
		return ret;


	if (!attr.exclude_kernel) {
	if (!attr.exclude_kernel) {
		if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
		if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
+3 −2
Original line number Original line Diff line number Diff line
@@ -53,11 +53,12 @@ static inline unsigned long long rdclock(void)
	_min1 < _min2 ? _min1 : _min2; })
	_min1 < _min2 ? _min1 : _min2; })


static inline int
static inline int
sys_perf_counter_open(struct perf_counter_attr *attr_uptr,
sys_perf_counter_open(struct perf_counter_attr *attr,
		      pid_t pid, int cpu, int group_fd,
		      pid_t pid, int cpu, int group_fd,
		      unsigned long flags)
		      unsigned long flags)
{
{
	return syscall(__NR_perf_counter_open, attr_uptr, pid, cpu,
	attr->size = sizeof(*attr);
	return syscall(__NR_perf_counter_open, attr, pid, cpu,
		       group_fd, flags);
		       group_fd, flags);
}
}