Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit e338d263 authored by Andrew Morgan's avatar Andrew Morgan Committed by Linus Torvalds
Browse files

Add 64-bit capability support to the kernel

The patch supports legacy (32-bit) capability userspace, and where possible
translates 32-bit capabilities to/from userspace and the VFS to 64-bit
kernel space capabilities.  If a capability set cannot be compressed into
32-bits for consumption by user space, the system call fails, with -ERANGE.

FWIW libcap-2.00 supports this change (and earlier capability formats)

 http://www.kernel.org/pub/linux/libs/security/linux-privs/kernel-2.6/



[akpm@linux-foundation.org: coding-syle fixes]
[akpm@linux-foundation.org: use get_task_comm()]
[ezk@cs.sunysb.edu: build fix]
[akpm@linux-foundation.org: do not initialise statics to 0 or NULL]
[akpm@linux-foundation.org: unused var]
[serue@us.ibm.com: export __cap_ symbols]
Signed-off-by: default avatarAndrew G. Morgan <morgan@kernel.org>
Cc: Stephen Smalley <sds@tycho.nsa.gov>
Acked-by: default avatarSerge Hallyn <serue@us.ibm.com>
Cc: Chris Wright <chrisw@sous-sol.org>
Cc: James Morris <jmorris@namei.org>
Cc: Casey Schaufler <casey@schaufler-ca.com>
Signed-off-by: default avatarErez Zadok <ezk@cs.sunysb.edu>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 8f6936f4
Loading
Loading
Loading
Loading
+5 −5
Original line number Diff line number Diff line
@@ -11,8 +11,6 @@
#include <linux/nfsd/nfsd.h>
#include <linux/nfsd/export.h>

#define	CAP_NFSD_MASK (CAP_FS_MASK|CAP_TO_MASK(CAP_SYS_RESOURCE))

int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp)
{
	struct exp_flavor_info *f;
@@ -69,9 +67,11 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
	ret = set_current_groups(cred.cr_group_info);
	put_group_info(cred.cr_group_info);
	if ((cred.cr_uid)) {
		cap_t(current->cap_effective) &= ~CAP_NFSD_MASK;
		current->cap_effective =
			cap_drop_nfsd_set(current->cap_effective);
	} else {
		cap_t(current->cap_effective) |= (CAP_NFSD_MASK &
		current->cap_effective =
			cap_raise_nfsd_set(current->cap_effective,
					   current->cap_permitted);
	}
	return ret;
+15 −6
Original line number Diff line number Diff line
@@ -281,14 +281,23 @@ static inline char *task_sig(struct task_struct *p, char *buffer)
	return buffer;
}

static char *render_cap_t(const char *header, kernel_cap_t *a, char *buffer)
{
	unsigned __capi;

	buffer += sprintf(buffer, "%s", header);
	CAP_FOR_EACH_U32(__capi) {
		buffer += sprintf(buffer, "%08x",
				  a->cap[(_LINUX_CAPABILITY_U32S-1) - __capi]);
	}
	return buffer + sprintf(buffer, "\n");
}

static inline char *task_cap(struct task_struct *p, char *buffer)
{
    return buffer + sprintf(buffer, "CapInh:\t%016x\n"
			    "CapPrm:\t%016x\n"
			    "CapEff:\t%016x\n",
			    cap_t(p->cap_inheritable),
			    cap_t(p->cap_permitted),
			    cap_t(p->cap_effective));
	buffer = render_cap_t("CapInh:\t", &p->cap_inheritable, buffer);
	buffer = render_cap_t("CapPrm:\t", &p->cap_permitted, buffer);
	return render_cap_t("CapEff:\t", &p->cap_effective, buffer);
}

static inline char *task_context_switch_counts(struct task_struct *p,
+155 −66
Original line number Diff line number Diff line
@@ -23,13 +23,20 @@ struct task_struct;
   kernel might be somewhat backwards compatible, but don't bet on
   it. */

/* XXX - Note, cap_t, is defined by POSIX to be an "opaque" pointer to
/* Note, cap_t, is defined by POSIX (draft) to be an "opaque" pointer to
   a set of three capability sets.  The transposition of 3*the
   following structure to such a composite is better handled in a user
   library since the draft standard requires the use of malloc/free
   etc.. */

#define _LINUX_CAPABILITY_VERSION  0x19980330
#define _LINUX_CAPABILITY_VERSION_1  0x19980330
#define _LINUX_CAPABILITY_U32S_1     1

#define _LINUX_CAPABILITY_VERSION_2  0x20071026
#define _LINUX_CAPABILITY_U32S_2     2

#define _LINUX_CAPABILITY_VERSION    _LINUX_CAPABILITY_VERSION_2
#define _LINUX_CAPABILITY_U32S       _LINUX_CAPABILITY_U32S_2

typedef struct __user_cap_header_struct {
	__u32 version;
@@ -42,43 +49,42 @@ typedef struct __user_cap_data_struct {
        __u32 inheritable;
} __user *cap_user_data_t;


#define XATTR_CAPS_SUFFIX "capability"
#define XATTR_NAME_CAPS XATTR_SECURITY_PREFIX XATTR_CAPS_SUFFIX

#define XATTR_CAPS_SZ (3*sizeof(__le32))
#define VFS_CAP_REVISION_MASK	0xFF000000
#define VFS_CAP_FLAGS_MASK	~VFS_CAP_REVISION_MASK
#define VFS_CAP_FLAGS_EFFECTIVE	0x000001

#define VFS_CAP_REVISION_1	0x01000000
#define VFS_CAP_U32_1           1
#define XATTR_CAPS_SZ_1         (sizeof(__le32)*(1 + 2*VFS_CAP_U32_1))

#define VFS_CAP_REVISION	VFS_CAP_REVISION_1
#define VFS_CAP_REVISION_2	0x02000000
#define VFS_CAP_U32_2           2
#define XATTR_CAPS_SZ_2         (sizeof(__le32)*(1 + 2*VFS_CAP_U32_2))

#define XATTR_CAPS_SZ           XATTR_CAPS_SZ_2
#define VFS_CAP_U32             VFS_CAP_U32_2
#define VFS_CAP_REVISION	VFS_CAP_REVISION_2

#define VFS_CAP_FLAGS_MASK	~VFS_CAP_REVISION_MASK
#define VFS_CAP_FLAGS_EFFECTIVE	0x000001

struct vfs_cap_data {
	__u32 magic_etc;  /* Little endian */
	__le32 magic_etc;            /* Little endian */
	struct {
		__u32 permitted;    /* Little endian */
		__u32 inheritable;  /* Little endian */
	} data[1];
		__le32 permitted;    /* Little endian */
		__le32 inheritable;  /* Little endian */
	} data[VFS_CAP_U32];
};

#ifdef __KERNEL__

/* #define STRICT_CAP_T_TYPECHECKS */

#ifdef STRICT_CAP_T_TYPECHECKS

typedef struct kernel_cap_struct {
	__u32 cap;
	__u32 cap[_LINUX_CAPABILITY_U32S];
} kernel_cap_t;

#else

typedef __u32 kernel_cap_t;

#endif

#define _USER_CAP_HEADER_SIZE  (2*sizeof(__u32))
#define _USER_CAP_HEADER_SIZE  (sizeof(struct __user_cap_header_struct))
#define _KERNEL_CAP_T_SIZE     (sizeof(kernel_cap_t))

#endif
@@ -121,10 +127,6 @@ typedef __u32 kernel_cap_t;

#define CAP_FSETID           4

/* Used to decide between falling back on the old suser() or fsuser(). */

#define CAP_FS_MASK          0x1f

/* Overrides the restriction that the real or effective user ID of a
   process sending a signal must match the real or effective user ID
   of the process receiving the signal. */
@@ -147,8 +149,12 @@ typedef __u32 kernel_cap_t;
 ** Linux-specific capabilities
 **/

/* Transfer any capability in your permitted set to any pid,
   remove any capability in your permitted set from any pid */
/* Without VFS support for capabilities:
 *   Transfer any capability in your permitted set to any pid,
 *   remove any capability in your permitted set from any pid
 * With VFS support for capabilities (neither of above, but)
 *   Add any capability to the current process' inheritable set
 */

#define CAP_SETPCAP          8

@@ -309,70 +315,153 @@ typedef __u32 kernel_cap_t;

#define CAP_SETFCAP	     31

#ifdef __KERNEL__

/*
 * Internal kernel functions only
 * Bit location of each capability (used by user-space library and kernel)
 */

#ifdef STRICT_CAP_T_TYPECHECKS

#define to_cap_t(x) { x }
#define cap_t(x) (x).cap

#else

#define to_cap_t(x) (x)
#define cap_t(x) (x)

#endif
#define CAP_TO_INDEX(x)     ((x) >> 5)        /* 1 << 5 == bits in __u32 */
#define CAP_TO_MASK(x)      (1 << ((x) & 31)) /* mask for indexed __u32 */

#define CAP_EMPTY_SET       to_cap_t(0)
#define CAP_FULL_SET        to_cap_t(~0)
#define CAP_INIT_EFF_SET    to_cap_t(~0 & ~CAP_TO_MASK(CAP_SETPCAP))
#define CAP_INIT_INH_SET    to_cap_t(0)
#ifdef __KERNEL__

#define CAP_TO_MASK(x) (1 << (x))
#define cap_raise(c, flag)   (cap_t(c) |=  CAP_TO_MASK(flag))
#define cap_lower(c, flag)   (cap_t(c) &= ~CAP_TO_MASK(flag))
#define cap_raised(c, flag)  (cap_t(c) & CAP_TO_MASK(flag))
/*
 * Internal kernel functions only
 */

static inline kernel_cap_t cap_combine(kernel_cap_t a, kernel_cap_t b)
#define CAP_FOR_EACH_U32(__capi)  \
	for (__capi = 0; __capi < _LINUX_CAPABILITY_U32S; ++__capi)

# define CAP_FS_MASK_B0     (CAP_TO_MASK(CAP_CHOWN)		\
			    | CAP_TO_MASK(CAP_DAC_OVERRIDE)	\
			    | CAP_TO_MASK(CAP_DAC_READ_SEARCH)	\
			    | CAP_TO_MASK(CAP_FOWNER)		\
			    | CAP_TO_MASK(CAP_FSETID))

#if _LINUX_CAPABILITY_U32S != 2
# error Fix up hand-coded capability macro initializers
#else /* HAND-CODED capability initializers */

# define CAP_EMPTY_SET    {{ 0, 0 }}
# define CAP_FULL_SET     {{ ~0, ~0 }}
# define CAP_INIT_EFF_SET {{ ~CAP_TO_MASK(CAP_SETPCAP), ~0 }}
# define CAP_FS_SET       {{ CAP_FS_MASK_B0, 0 }}
# define CAP_NFSD_SET     {{ CAP_FS_MASK_B0|CAP_TO_MASK(CAP_SYS_RESOURCE), 0 }}

#endif /* _LINUX_CAPABILITY_U32S != 2 */

#define CAP_INIT_INH_SET    CAP_EMPTY_SET

# define cap_clear(c)         do { (c) = __cap_empty_set; } while (0)
# define cap_set_full(c)      do { (c) = __cap_full_set; } while (0)
# define cap_set_init_eff(c)  do { (c) = __cap_init_eff_set; } while (0)

#define cap_raise(c, flag)  ((c).cap[CAP_TO_INDEX(flag)] |= CAP_TO_MASK(flag))
#define cap_lower(c, flag)  ((c).cap[CAP_TO_INDEX(flag)] &= ~CAP_TO_MASK(flag))
#define cap_raised(c, flag) ((c).cap[CAP_TO_INDEX(flag)] & CAP_TO_MASK(flag))

#define CAP_BOP_ALL(c, a, b, OP)                                    \
do {                                                                \
	unsigned __capi;                                            \
	CAP_FOR_EACH_U32(__capi) {                                  \
		c.cap[__capi] = a.cap[__capi] OP b.cap[__capi];     \
	}                                                           \
} while (0)

#define CAP_UOP_ALL(c, a, OP)                                       \
do {                                                                \
	unsigned __capi;                                            \
	CAP_FOR_EACH_U32(__capi) {                                  \
		c.cap[__capi] = OP a.cap[__capi];                   \
	}                                                           \
} while (0)

static inline kernel_cap_t cap_combine(const kernel_cap_t a,
				       const kernel_cap_t b)
{
	kernel_cap_t dest;
     cap_t(dest) = cap_t(a) | cap_t(b);
	CAP_BOP_ALL(dest, a, b, |);
	return dest;
}

static inline kernel_cap_t cap_intersect(kernel_cap_t a, kernel_cap_t b)
static inline kernel_cap_t cap_intersect(const kernel_cap_t a,
					 const kernel_cap_t b)
{
	kernel_cap_t dest;
     cap_t(dest) = cap_t(a) & cap_t(b);
	CAP_BOP_ALL(dest, a, b, &);
	return dest;
}

static inline kernel_cap_t cap_drop(kernel_cap_t a, kernel_cap_t drop)
static inline kernel_cap_t cap_drop(const kernel_cap_t a,
				    const kernel_cap_t drop)
{
	kernel_cap_t dest;
     cap_t(dest) = cap_t(a) & ~cap_t(drop);
	CAP_BOP_ALL(dest, a, drop, &~);
	return dest;
}

static inline kernel_cap_t cap_invert(kernel_cap_t c)
static inline kernel_cap_t cap_invert(const kernel_cap_t c)
{
	kernel_cap_t dest;
     cap_t(dest) = ~cap_t(c);
	CAP_UOP_ALL(dest, c, ~);
	return dest;
}

#define cap_isclear(c)       (!cap_t(c))
#define cap_issubset(a,set)  (!(cap_t(a) & ~cap_t(set)))
static inline int cap_isclear(const kernel_cap_t a)
{
	unsigned __capi;
	CAP_FOR_EACH_U32(__capi) {
		if (a.cap[__capi] != 0)
			return 0;
	}
	return 1;
}

static inline int cap_issubset(const kernel_cap_t a, const kernel_cap_t set)
{
	kernel_cap_t dest;
	dest = cap_drop(a, set);
	return cap_isclear(dest);
}

#define cap_clear(c)         do { cap_t(c) =  0; } while(0)
#define cap_set_full(c)      do { cap_t(c) = ~0; } while(0)
#define cap_mask(c,mask)     do { cap_t(c) &= cap_t(mask); } while(0)
/* Used to decide between falling back on the old suser() or fsuser(). */

static inline int cap_is_fs_cap(int cap)
{
	const kernel_cap_t __cap_fs_set = CAP_FS_SET;
	return !!(CAP_TO_MASK(cap) & __cap_fs_set.cap[CAP_TO_INDEX(cap)]);
}

static inline kernel_cap_t cap_drop_fs_set(const kernel_cap_t a)
{
	const kernel_cap_t __cap_fs_set = CAP_FS_SET;
	return cap_drop(a, __cap_fs_set);
}

static inline kernel_cap_t cap_raise_fs_set(const kernel_cap_t a,
					    const kernel_cap_t permitted)
{
	const kernel_cap_t __cap_fs_set = CAP_FS_SET;
	return cap_combine(a,
			   cap_intersect(permitted, __cap_fs_set));
}

static inline kernel_cap_t cap_drop_nfsd_set(const kernel_cap_t a)
{
	const kernel_cap_t __cap_fs_set = CAP_NFSD_SET;
	return cap_drop(a, __cap_fs_set);
}

static inline kernel_cap_t cap_raise_nfsd_set(const kernel_cap_t a,
					      const kernel_cap_t permitted)
{
	const kernel_cap_t __cap_nfsd_set = CAP_NFSD_SET;
	return cap_combine(a,
			   cap_intersect(permitted, __cap_nfsd_set));
}

#define cap_is_fs_cap(c)     (CAP_TO_MASK(c) & CAP_FS_MASK)
extern const kernel_cap_t __cap_empty_set;
extern const kernel_cap_t __cap_full_set;
extern const kernel_cap_t __cap_init_eff_set;

int capable(int cap);
int __capable(struct task_struct *t, int cap);
+104 −9
Original line number Diff line number Diff line
@@ -21,6 +21,37 @@
 */
static DEFINE_SPINLOCK(task_capability_lock);

/*
 * Leveraged for setting/resetting capabilities
 */

const kernel_cap_t __cap_empty_set = CAP_EMPTY_SET;
const kernel_cap_t __cap_full_set = CAP_FULL_SET;
const kernel_cap_t __cap_init_eff_set = CAP_INIT_EFF_SET;

EXPORT_SYMBOL(__cap_empty_set);
EXPORT_SYMBOL(__cap_full_set);
EXPORT_SYMBOL(__cap_init_eff_set);

/*
 * More recent versions of libcap are available from:
 *
 *   http://www.kernel.org/pub/linux/libs/security/linux-privs/
 */

static void warn_legacy_capability_use(void)
{
	static int warned;
	if (!warned) {
		char name[sizeof(current->comm)];

		printk(KERN_INFO "warning: `%s' uses 32-bit capabilities"
		       " (legacy support in use)\n",
		       get_task_comm(name, current));
		warned = 1;
	}
}

/*
 * For sys_getproccap() and sys_setproccap(), any of the three
 * capability set pointers may be NULL -- indicating that that set is
@@ -42,12 +73,21 @@ asmlinkage long sys_capget(cap_user_header_t header, cap_user_data_t dataptr)
	pid_t pid;
	__u32 version;
	struct task_struct *target;
	struct __user_cap_data_struct data;
	unsigned tocopy;
	kernel_cap_t pE, pI, pP;

	if (get_user(version, &header->version))
		return -EFAULT;

	if (version != _LINUX_CAPABILITY_VERSION) {
	switch (version) {
	case _LINUX_CAPABILITY_VERSION_1:
		warn_legacy_capability_use();
		tocopy = _LINUX_CAPABILITY_U32S_1;
		break;
	case _LINUX_CAPABILITY_VERSION_2:
		tocopy = _LINUX_CAPABILITY_U32S_2;
		break;
	default:
		if (put_user(_LINUX_CAPABILITY_VERSION, &header->version))
			return -EFAULT;
		return -EINVAL;
@@ -71,14 +111,47 @@ asmlinkage long sys_capget(cap_user_header_t header, cap_user_data_t dataptr)
	} else
		target = current;

	ret = security_capget(target, &data.effective, &data.inheritable, &data.permitted);
	ret = security_capget(target, &pE, &pI, &pP);

out:
	read_unlock(&tasklist_lock);
	spin_unlock(&task_capability_lock);

	if (!ret && copy_to_user(dataptr, &data, sizeof data))
	if (!ret) {
		struct __user_cap_data_struct kdata[_LINUX_CAPABILITY_U32S];
		unsigned i;

		for (i = 0; i < tocopy; i++) {
			kdata[i].effective = pE.cap[i];
			kdata[i].permitted = pP.cap[i];
			kdata[i].inheritable = pI.cap[i];
		}

		/*
		 * Note, in the case, tocopy < _LINUX_CAPABILITY_U32S,
		 * we silently drop the upper capabilities here. This
		 * has the effect of making older libcap
		 * implementations implicitly drop upper capability
		 * bits when they perform a: capget/modify/capset
		 * sequence.
		 *
		 * This behavior is considered fail-safe
		 * behavior. Upgrading the application to a newer
		 * version of libcap will enable access to the newer
		 * capabilities.
		 *
		 * An alternative would be to return an error here
		 * (-ERANGE), but that causes legacy applications to
		 * unexpectidly fail; the capget/modify/capset aborts
		 * before modification is attempted and the application
		 * fails.
		 */

		if (copy_to_user(dataptr, kdata, tocopy
				 * sizeof(struct __user_cap_data_struct))) {
			return -EFAULT;
		}
	}

	return ret;
}
@@ -167,6 +240,8 @@ static inline int cap_set_all(kernel_cap_t *effective,
 */
asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data)
{
	struct __user_cap_data_struct kdata[_LINUX_CAPABILITY_U32S];
	unsigned i, tocopy;
	kernel_cap_t inheritable, permitted, effective;
	__u32 version;
	struct task_struct *target;
@@ -176,7 +251,15 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data)
	if (get_user(version, &header->version))
		return -EFAULT;

	if (version != _LINUX_CAPABILITY_VERSION) {
	switch (version) {
	case _LINUX_CAPABILITY_VERSION_1:
		warn_legacy_capability_use();
		tocopy = _LINUX_CAPABILITY_U32S_1;
		break;
	case _LINUX_CAPABILITY_VERSION_2:
		tocopy = _LINUX_CAPABILITY_U32S_2;
		break;
	default:
		if (put_user(_LINUX_CAPABILITY_VERSION, &header->version))
			return -EFAULT;
		return -EINVAL;
@@ -188,10 +271,22 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data)
	if (pid && pid != task_pid_vnr(current) && !capable(CAP_SETPCAP))
		return -EPERM;

	if (copy_from_user(&effective, &data->effective, sizeof(effective)) ||
	    copy_from_user(&inheritable, &data->inheritable, sizeof(inheritable)) ||
	    copy_from_user(&permitted, &data->permitted, sizeof(permitted)))
	if (copy_from_user(&kdata, data, tocopy
			   * sizeof(struct __user_cap_data_struct))) {
		return -EFAULT;
	}

	for (i = 0; i < tocopy; i++) {
		effective.cap[i] = kdata[i].effective;
		permitted.cap[i] = kdata[i].permitted;
		inheritable.cap[i] = kdata[i].inheritable;
	}
	while (i < _LINUX_CAPABILITY_U32S) {
		effective.cap[i] = 0;
		permitted.cap[i] = 0;
		inheritable.cap[i] = 0;
		i++;
	}

	spin_lock(&task_capability_lock);
	read_lock(&tasklist_lock);
+2 −3
Original line number Diff line number Diff line
@@ -125,8 +125,7 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
	 * Superuser processes are usually more important, so we make it
	 * less likely that we kill those.
	 */
	if (cap_t(p->cap_effective) & CAP_TO_MASK(CAP_SYS_ADMIN) ||
				p->uid == 0 || p->euid == 0)
	if (__capable(p, CAP_SYS_ADMIN) || p->uid == 0 || p->euid == 0)
		points /= 4;

	/*
@@ -135,7 +134,7 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
	 * tend to only have this flag set on applications they think
	 * of as important.
	 */
	if (cap_t(p->cap_effective) & CAP_TO_MASK(CAP_SYS_RAWIO))
	if (__capable(p, CAP_SYS_RAWIO))
		points /= 4;

	/*
Loading