Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 26a41cd1 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull cgroup fixes from Tejun Heo:
 "During recent restructuring, device_cgroup unified config input check
  and enforcement logic; unfortunately, it turned out to share too much.
  Aristeu's patches fix the breakage and marked for -stable backport.

  The other two patches are fallouts from kernfs conversion.  The blkcg
  change is temporary and will go away once kernfs internal locking gets
  simplified (patches pending)"

* 'for-3.15-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup:
  blkcg: use trylock on blkcg_pol_mutex in blkcg_reset_stats()
  device_cgroup: check if exception removal is allowed
  device_cgroup: fix the comment format for recently added functions
  device_cgroup: rework device access check and exception checking
  cgroup: fix the retry path of cgroup_mount()
parents 619b5891 36c38fb7
Loading
Loading
Loading
Loading
+14 −1
Original line number Diff line number Diff line
@@ -451,7 +451,20 @@ static int blkcg_reset_stats(struct cgroup_subsys_state *css,
	struct blkcg_gq *blkg;
	int i;

	mutex_lock(&blkcg_pol_mutex);
	/*
	 * XXX: We invoke cgroup_add/rm_cftypes() under blkcg_pol_mutex
	 * which ends up putting cgroup's internal cgroup_tree_mutex under
	 * it; however, cgroup_tree_mutex is nested above cgroup file
	 * active protection and grabbing blkcg_pol_mutex from a cgroup
	 * file operation creates a possible circular dependency.  cgroup
	 * internal locking is planned to go through further simplification
	 * and this issue should go away soon.  For now, let's trylock
	 * blkcg_pol_mutex and restart the write on failure.
	 *
	 * http://lkml.kernel.org/g/5363C04B.4010400@oracle.com
	 */
	if (!mutex_trylock(&blkcg_pol_mutex))
		return restart_syscall();
	spin_lock_irq(&blkcg->lock);

	/*
+4 −4
Original line number Diff line number Diff line
@@ -1495,7 +1495,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
	 */
	if (!use_task_css_set_links)
		cgroup_enable_task_cg_lists();
retry:

	mutex_lock(&cgroup_tree_mutex);
	mutex_lock(&cgroup_mutex);

@@ -1503,7 +1503,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
	ret = parse_cgroupfs_options(data, &opts);
	if (ret)
		goto out_unlock;

retry:
	/* look for a matching existing root */
	if (!opts.subsys_mask && !opts.none && !opts.name) {
		cgrp_dfl_root_visible = true;
@@ -1562,9 +1562,9 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
		if (!atomic_inc_not_zero(&root->cgrp.refcnt)) {
			mutex_unlock(&cgroup_mutex);
			mutex_unlock(&cgroup_tree_mutex);
			kfree(opts.release_agent);
			kfree(opts.name);
			msleep(10);
			mutex_lock(&cgroup_tree_mutex);
			mutex_lock(&cgroup_mutex);
			goto retry;
		}

+159 −43
Original line number Diff line number Diff line
@@ -306,57 +306,138 @@ static int devcgroup_seq_show(struct seq_file *m, void *v)
}

/**
 * may_access - verifies if a new exception is part of what is allowed
 *		by a dev cgroup based on the default policy +
 *		exceptions. This is used to make sure a child cgroup
 *		won't have more privileges than its parent or to
 *		verify if a certain access is allowed.
 * @dev_cgroup: dev cgroup to be tested against
 * @refex: new exception
 * @behavior: behavior of the exception
 * match_exception	- iterates the exception list trying to find a complete match
 * @exceptions: list of exceptions
 * @type: device type (DEV_BLOCK or DEV_CHAR)
 * @major: device file major number, ~0 to match all
 * @minor: device file minor number, ~0 to match all
 * @access: permission mask (ACC_READ, ACC_WRITE, ACC_MKNOD)
 *
 * It is considered a complete match if an exception is found that will
 * contain the entire range of provided parameters.
 *
 * Return: true in case it matches an exception completely
 */
static bool may_access(struct dev_cgroup *dev_cgroup,
		       struct dev_exception_item *refex,
		       enum devcg_behavior behavior)
static bool match_exception(struct list_head *exceptions, short type,
			    u32 major, u32 minor, short access)
{
	struct dev_exception_item *ex;
	bool match = false;

	rcu_lockdep_assert(rcu_read_lock_held() ||
			   lockdep_is_held(&devcgroup_mutex),
			   "device_cgroup::may_access() called without proper synchronization");
	list_for_each_entry_rcu(ex, exceptions, list) {
		if ((type & DEV_BLOCK) && !(ex->type & DEV_BLOCK))
			continue;
		if ((type & DEV_CHAR) && !(ex->type & DEV_CHAR))
			continue;
		if (ex->major != ~0 && ex->major != major)
			continue;
		if (ex->minor != ~0 && ex->minor != minor)
			continue;
		/* provided access cannot have more than the exception rule */
		if (access & (~ex->access))
			continue;
		return true;
	}
	return false;
}

/**
 * match_exception_partial - iterates the exception list trying to find a partial match
 * @exceptions: list of exceptions
 * @type: device type (DEV_BLOCK or DEV_CHAR)
 * @major: device file major number, ~0 to match all
 * @minor: device file minor number, ~0 to match all
 * @access: permission mask (ACC_READ, ACC_WRITE, ACC_MKNOD)
 *
 * It is considered a partial match if an exception's range is found to
 * contain *any* of the devices specified by provided parameters. This is
 * used to make sure no extra access is being granted that is forbidden by
 * any of the exception list.
 *
 * Return: true in case the provided range mat matches an exception completely
 */
static bool match_exception_partial(struct list_head *exceptions, short type,
				    u32 major, u32 minor, short access)
{
	struct dev_exception_item *ex;

	list_for_each_entry_rcu(ex, &dev_cgroup->exceptions, list) {
		if ((refex->type & DEV_BLOCK) && !(ex->type & DEV_BLOCK))
	list_for_each_entry_rcu(ex, exceptions, list) {
		if ((type & DEV_BLOCK) && !(ex->type & DEV_BLOCK))
			continue;
		if ((refex->type & DEV_CHAR) && !(ex->type & DEV_CHAR))
		if ((type & DEV_CHAR) && !(ex->type & DEV_CHAR))
			continue;
		if (ex->major != ~0 && ex->major != refex->major)
		/*
		 * We must be sure that both the exception and the provided
		 * range aren't masking all devices
		 */
		if (ex->major != ~0 && major != ~0 && ex->major != major)
			continue;
		if (ex->minor != ~0 && ex->minor != refex->minor)
		if (ex->minor != ~0 && minor != ~0 && ex->minor != minor)
			continue;
		if (refex->access & (~ex->access))
		/*
		 * In order to make sure the provided range isn't matching
		 * an exception, all its access bits shouldn't match the
		 * exception's access bits
		 */
		if (!(access & ex->access))
			continue;
		match = true;
		break;
		return true;
	}
	return false;
}

/**
 * verify_new_ex - verifies if a new exception is allowed by parent cgroup's permissions
 * @dev_cgroup: dev cgroup to be tested against
 * @refex: new exception
 * @behavior: behavior of the exception's dev_cgroup
 *
 * This is used to make sure a child cgroup won't have more privileges
 * than its parent
 */
static bool verify_new_ex(struct dev_cgroup *dev_cgroup,
		          struct dev_exception_item *refex,
		          enum devcg_behavior behavior)
{
	bool match = false;

	rcu_lockdep_assert(rcu_read_lock_held() ||
			   lockdep_is_held(&devcgroup_mutex),
			   "device_cgroup:verify_new_ex called without proper synchronization");

	if (dev_cgroup->behavior == DEVCG_DEFAULT_ALLOW) {
		if (behavior == DEVCG_DEFAULT_ALLOW) {
			/* the exception will deny access to certain devices */
			/*
			 * new exception in the child doesn't matter, only
			 * adding extra restrictions
			 */ 
			return true;
		} else {
			/* the exception will allow access to certain devices */
			if (match)
			/*
				 * a new exception allowing access shouldn't
				 * match an parent's exception
			 * new exception in the child will add more devices
			 * that can be acessed, so it can't match any of
			 * parent's exceptions, even slightly
			 */ 
			match = match_exception_partial(&dev_cgroup->exceptions,
							refex->type,
							refex->major,
							refex->minor,
							refex->access);

			if (match)
				return false;
			return true;
		}
	} else {
		/* only behavior == DEVCG_DEFAULT_DENY allowed here */
		/*
		 * Only behavior == DEVCG_DEFAULT_DENY allowed here, therefore
		 * the new exception will add access to more devices and must
		 * be contained completely in an parent's exception to be
		 * allowed
		 */
		match = match_exception(&dev_cgroup->exceptions, refex->type,
					refex->major, refex->minor,
					refex->access);

		if (match)
			/* parent has an exception that matches the proposed */
			return true;
@@ -378,7 +459,38 @@ static int parent_has_perm(struct dev_cgroup *childcg,

	if (!parent)
		return 1;
	return may_access(parent, ex, childcg->behavior);
	return verify_new_ex(parent, ex, childcg->behavior);
}

/**
 * parent_allows_removal - verify if it's ok to remove an exception
 * @childcg: child cgroup from where the exception will be removed
 * @ex: exception being removed
 *
 * When removing an exception in cgroups with default ALLOW policy, it must
 * be checked if removing it will give the child cgroup more access than the
 * parent.
 *
 * Return: true if it's ok to remove exception, false otherwise
 */
static bool parent_allows_removal(struct dev_cgroup *childcg,
				  struct dev_exception_item *ex)
{
	struct dev_cgroup *parent = css_to_devcgroup(css_parent(&childcg->css));

	if (!parent)
		return true;

	/* It's always allowed to remove access to devices */
	if (childcg->behavior == DEVCG_DEFAULT_DENY)
		return true;

	/*
	 * Make sure you're not removing part or a whole exception existing in
	 * the parent cgroup
	 */
	return !match_exception_partial(&parent->exceptions, ex->type,
					ex->major, ex->minor, ex->access);
}

/**
@@ -616,17 +728,21 @@ static int devcgroup_update_access(struct dev_cgroup *devcgroup,

	switch (filetype) {
	case DEVCG_ALLOW:
		if (!parent_has_perm(devcgroup, &ex))
			return -EPERM;
		/*
		 * If the default policy is to allow by default, try to remove
		 * an matching exception instead. And be silent about it: we
		 * don't want to break compatibility
		 */
		if (devcgroup->behavior == DEVCG_DEFAULT_ALLOW) {
			/* Check if the parent allows removing it first */
			if (!parent_allows_removal(devcgroup, &ex))
				return -EPERM;
			dev_exception_rm(devcgroup, &ex);
			return 0;
			break;
		}

		if (!parent_has_perm(devcgroup, &ex))
			return -EPERM;
		rc = dev_exception_add(devcgroup, &ex);
		break;
	case DEVCG_DENY:
@@ -704,18 +820,18 @@ static int __devcgroup_check_permission(short type, u32 major, u32 minor,
				        short access)
{
	struct dev_cgroup *dev_cgroup;
	struct dev_exception_item ex;
	int rc;

	memset(&ex, 0, sizeof(ex));
	ex.type = type;
	ex.major = major;
	ex.minor = minor;
	ex.access = access;
	bool rc;

	rcu_read_lock();
	dev_cgroup = task_devcgroup(current);
	rc = may_access(dev_cgroup, &ex, dev_cgroup->behavior);
	if (dev_cgroup->behavior == DEVCG_DEFAULT_ALLOW)
		/* Can't match any of the exceptions, even partially */
		rc = !match_exception_partial(&dev_cgroup->exceptions,
					      type, major, minor, access);
	else
		/* Need to match completely one exception to be allowed */
		rc = match_exception(&dev_cgroup->exceptions, type, major,
				     minor, access);
	rcu_read_unlock();

	if (!rc)