Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 35c2e967 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs-2.6:
  Switch !O_CREAT case to use of do_last()
  Get rid of symlink body copying
  Finish pulling of -ESTALE handling to upper level in do_filp_open()
  Turn do_link spaghetty into a normal loop
  Unify exits in O_CREAT handling
  Kill is_link argument of do_last()
  Pull handling of LAST_BIND into do_last(), clean up ok: part in do_filp_open()
  Leave mangled flag only for setting nd.intent.open.flag
  Get rid of passing mangled flag to do_last()
  Don't pass mangled open_flag to finish_open()
  pull more into do_last()
  bail out with ELOOP earlier in do_link loop
  pull the common predecessors into do_last()
  postpone __putname() until after do_last()
  unroll do_last: loop in do_filp_open()
  Shift releasing nd->root from do_last() to its caller
  gut do_filp_open() a bit more (do_last separation)
  beginning to untangle do_filp_open()
parents 6c550ee4 1f36f774
Loading
Loading
Loading
Loading
+262 −259
Original line number Diff line number Diff line
@@ -498,8 +498,6 @@ static int link_path_walk(const char *, struct nameidata *);

static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link)
{
	int res = 0;
	char *name;
	if (IS_ERR(link))
		goto fail;

@@ -510,22 +508,7 @@ static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *l
		path_get(&nd->root);
	}

	res = link_path_walk(link, nd);
	if (nd->depth || res || nd->last_type!=LAST_NORM)
		return res;
	/*
	 * If it is an iterative symlinks resolution in open_namei() we
	 * have to copy the last component. And all that crap because of
	 * bloody create() on broken symlinks. Furrfu...
	 */
	name = __getname();
	if (unlikely(!name)) {
		path_put(&nd->path);
		return -ENOMEM;
	}
	strcpy(name, nd->last.name);
	nd->last.name = name;
	return 0;
	return link_path_walk(link, nd);
fail:
	path_put(&nd->path);
	return PTR_ERR(link);
@@ -547,10 +530,10 @@ static inline void path_to_nameidata(struct path *path, struct nameidata *nd)
	nd->path.dentry = path->dentry;
}

static __always_inline int __do_follow_link(struct path *path, struct nameidata *nd)
static __always_inline int
__do_follow_link(struct path *path, struct nameidata *nd, void **p)
{
	int error;
	void *cookie;
	struct dentry *dentry = path->dentry;

	touch_atime(path->mnt, dentry);
@@ -562,9 +545,9 @@ static __always_inline int __do_follow_link(struct path *path, struct nameidata
	}
	mntget(path->mnt);
	nd->last_type = LAST_BIND;
	cookie = dentry->d_inode->i_op->follow_link(dentry, nd);
	error = PTR_ERR(cookie);
	if (!IS_ERR(cookie)) {
	*p = dentry->d_inode->i_op->follow_link(dentry, nd);
	error = PTR_ERR(*p);
	if (!IS_ERR(*p)) {
		char *s = nd_get_link(nd);
		error = 0;
		if (s)
@@ -574,8 +557,6 @@ static __always_inline int __do_follow_link(struct path *path, struct nameidata
			if (error)
				path_put(&nd->path);
		}
		if (dentry->d_inode->i_op->put_link)
			dentry->d_inode->i_op->put_link(dentry, nd, cookie);
	}
	return error;
}
@@ -589,6 +570,7 @@ static __always_inline int __do_follow_link(struct path *path, struct nameidata
 */
static inline int do_follow_link(struct path *path, struct nameidata *nd)
{
	void *cookie;
	int err = -ELOOP;
	if (current->link_count >= MAX_NESTED_LINKS)
		goto loop;
@@ -602,7 +584,9 @@ static inline int do_follow_link(struct path *path, struct nameidata *nd)
	current->link_count++;
	current->total_link_count++;
	nd->depth++;
	err = __do_follow_link(path, nd);
	err = __do_follow_link(path, nd, &cookie);
	if (!IS_ERR(cookie) && path->dentry->d_inode->i_op->put_link)
		path->dentry->d_inode->i_op->put_link(path->dentry, nd, cookie);
	path_put(path);
	current->link_count--;
	nd->depth--;
@@ -1375,22 +1359,6 @@ static inline int may_create(struct inode *dir, struct dentry *child)
	return inode_permission(dir, MAY_WRITE | MAY_EXEC);
}

/* 
 * O_DIRECTORY translates into forcing a directory lookup.
 */
static inline int lookup_flags(unsigned int f)
{
	unsigned long retval = LOOKUP_FOLLOW;

	if (f & O_NOFOLLOW)
		retval &= ~LOOKUP_FOLLOW;
	
	if (f & O_DIRECTORY)
		retval |= LOOKUP_DIRECTORY;

	return retval;
}

/*
 * p1 and p2 should be directories on the same fs.
 */
@@ -1590,129 +1558,135 @@ static int open_will_truncate(int flag, struct inode *inode)
	return (flag & O_TRUNC);
}

/*
 * Note that the low bits of the passed in "open_flag"
 * are not the same as in the local variable "flag". See
 * open_to_namei_flags() for more details.
 */
struct file *do_filp_open(int dfd, const char *pathname,
		int open_flag, int mode, int acc_mode)
static struct file *finish_open(struct nameidata *nd,
				int open_flag, int acc_mode)
{
	struct file *filp;
	struct nameidata nd;
	int error;
	struct path path;
	struct dentry *dir;
	int count = 0;
	int will_truncate;
	int flag = open_to_namei_flags(open_flag);
	int force_reval = 0;
	int error;

	will_truncate = open_will_truncate(open_flag, nd->path.dentry->d_inode);
	if (will_truncate) {
		error = mnt_want_write(nd->path.mnt);
		if (error)
			goto exit;
	}
	error = may_open(&nd->path, acc_mode, open_flag);
	if (error) {
		if (will_truncate)
			mnt_drop_write(nd->path.mnt);
		goto exit;
	}
	filp = nameidata_to_filp(nd);
	if (!IS_ERR(filp)) {
		error = ima_file_check(filp, acc_mode);
		if (error) {
			fput(filp);
			filp = ERR_PTR(error);
		}
	}
	if (!IS_ERR(filp)) {
		if (acc_mode & MAY_WRITE)
			vfs_dq_init(nd->path.dentry->d_inode);

		if (will_truncate) {
			error = handle_truncate(&nd->path);
			if (error) {
				fput(filp);
				filp = ERR_PTR(error);
			}
		}
	}
	/*
	 * O_SYNC is implemented as __O_SYNC|O_DSYNC.  As many places only
	 * check for O_DSYNC if the need any syncing at all we enforce it's
	 * always set instead of having to deal with possibly weird behaviour
	 * for malicious applications setting only __O_SYNC.
	 * It is now safe to drop the mnt write
	 * because the filp has had a write taken
	 * on its behalf.
	 */
	if (open_flag & __O_SYNC)
		open_flag |= O_DSYNC;

	if (!acc_mode)
		acc_mode = MAY_OPEN | ACC_MODE(open_flag);

	/* O_TRUNC implies we need access checks for write permissions */
	if (flag & O_TRUNC)
		acc_mode |= MAY_WRITE;
	if (will_truncate)
		mnt_drop_write(nd->path.mnt);
	return filp;

	/* Allow the LSM permission hook to distinguish append 
	   access from general write access. */
	if (flag & O_APPEND)
		acc_mode |= MAY_APPEND;
exit:
	if (!IS_ERR(nd->intent.open.file))
		release_open_intent(nd);
	path_put(&nd->path);
	return ERR_PTR(error);
}

	/*
	 * The simplest case - just a plain lookup.
	 */
	if (!(flag & O_CREAT)) {
		filp = get_empty_filp();
static struct file *do_last(struct nameidata *nd, struct path *path,
			    int open_flag, int acc_mode,
			    int mode, const char *pathname,
			    int *want_dir)
{
	struct dentry *dir = nd->path.dentry;
	struct file *filp;
	int error = -EISDIR;

		if (filp == NULL)
			return ERR_PTR(-ENFILE);
		nd.intent.open.file = filp;
		filp->f_flags = open_flag;
		nd.intent.open.flags = flag;
		nd.intent.open.create_mode = 0;
		error = do_path_lookup(dfd, pathname,
					lookup_flags(flag)|LOOKUP_OPEN, &nd);
		if (IS_ERR(nd.intent.open.file)) {
			if (error == 0) {
				error = PTR_ERR(nd.intent.open.file);
				path_put(&nd.path);
	switch (nd->last_type) {
	case LAST_DOTDOT:
		follow_dotdot(nd);
		dir = nd->path.dentry;
		if (nd->path.mnt->mnt_sb->s_type->fs_flags & FS_REVAL_DOT) {
			if (!dir->d_op->d_revalidate(dir, nd)) {
				error = -ESTALE;
				goto exit;
			}
		} else if (error)
			release_open_intent(&nd);
		if (error)
			return ERR_PTR(error);
		}
		/* fallthrough */
	case LAST_DOT:
	case LAST_ROOT:
		if (open_flag & O_CREAT)
			goto exit;
		/* fallthrough */
	case LAST_BIND:
		audit_inode(pathname, dir);
		goto ok;
	}

	/*
	 * Create - we need to know the parent.
	 */
reval:
	error = path_init(dfd, pathname, LOOKUP_PARENT, &nd);
	if (error)
		return ERR_PTR(error);
	if (force_reval)
		nd.flags |= LOOKUP_REVAL;
	error = path_walk(pathname, &nd);
	if (error) {
		if (nd.root.mnt)
			path_put(&nd.root);
		return ERR_PTR(error);
	/* trailing slashes? */
	if (nd->last.name[nd->last.len]) {
		if (open_flag & O_CREAT)
			goto exit;
		*want_dir = 1;
	}
	if (unlikely(!audit_dummy_context()))
		audit_inode(pathname, nd.path.dentry);

	/*
	 * We have the parent and last component. First of all, check
	 * that we are not asked to creat(2) an obvious directory - that
	 * will not do.
	 */
	error = -EISDIR;
	if (nd.last_type != LAST_NORM || nd.last.name[nd.last.len])
		goto exit_parent;
	/* just plain open? */
	if (!(open_flag & O_CREAT)) {
		error = do_lookup(nd, &nd->last, path);
		if (error)
			goto exit;
		error = -ENOENT;
		if (!path->dentry->d_inode)
			goto exit_dput;
		if (path->dentry->d_inode->i_op->follow_link)
			return NULL;
		error = -ENOTDIR;
		if (*want_dir & !path->dentry->d_inode->i_op->lookup)
			goto exit_dput;
		path_to_nameidata(path, nd);
		audit_inode(pathname, nd->path.dentry);
		goto ok;
	}

	error = -ENFILE;
	filp = get_empty_filp();
	if (filp == NULL)
		goto exit_parent;
	nd.intent.open.file = filp;
	filp->f_flags = open_flag;
	nd.intent.open.flags = flag;
	nd.intent.open.create_mode = mode;
	dir = nd.path.dentry;
	nd.flags &= ~LOOKUP_PARENT;
	nd.flags |= LOOKUP_CREATE | LOOKUP_OPEN;
	if (flag & O_EXCL)
		nd.flags |= LOOKUP_EXCL;
	/* OK, it's O_CREAT */
	mutex_lock(&dir->d_inode->i_mutex);
	path.dentry = lookup_hash(&nd);
	path.mnt = nd.path.mnt;

do_last:
	error = PTR_ERR(path.dentry);
	if (IS_ERR(path.dentry)) {
	path->dentry = lookup_hash(nd);
	path->mnt = nd->path.mnt;

	error = PTR_ERR(path->dentry);
	if (IS_ERR(path->dentry)) {
		mutex_unlock(&dir->d_inode->i_mutex);
		goto exit;
	}

	if (IS_ERR(nd.intent.open.file)) {
		error = PTR_ERR(nd.intent.open.file);
	if (IS_ERR(nd->intent.open.file)) {
		error = PTR_ERR(nd->intent.open.file);
		goto exit_mutex_unlock;
	}

	/* Negative dentry, just create the file */
	if (!path.dentry->d_inode) {
	if (!path->dentry->d_inode) {
		/*
		 * This write is needed to ensure that a
		 * ro->rw transition does not occur between
@@ -1720,18 +1694,16 @@ do_last:
		 * a permanent write count is taken through
		 * the 'struct file' in nameidata_to_filp().
		 */
		error = mnt_want_write(nd.path.mnt);
		error = mnt_want_write(nd->path.mnt);
		if (error)
			goto exit_mutex_unlock;
		error = __open_namei_create(&nd, &path, open_flag, mode);
		error = __open_namei_create(nd, path, open_flag, mode);
		if (error) {
			mnt_drop_write(nd.path.mnt);
			mnt_drop_write(nd->path.mnt);
			goto exit;
		}
		filp = nameidata_to_filp(&nd);
		mnt_drop_write(nd.path.mnt);
		if (nd.root.mnt)
			path_put(&nd.root);
		filp = nameidata_to_filp(nd);
		mnt_drop_write(nd->path.mnt);
		if (!IS_ERR(filp)) {
			error = ima_file_check(filp, acc_mode);
			if (error) {
@@ -1746,150 +1718,181 @@ do_last:
	 * It already exists.
	 */
	mutex_unlock(&dir->d_inode->i_mutex);
	audit_inode(pathname, path.dentry);
	audit_inode(pathname, path->dentry);

	error = -EEXIST;
	if (flag & O_EXCL)
	if (open_flag & O_EXCL)
		goto exit_dput;

	if (__follow_mount(&path)) {
	if (__follow_mount(path)) {
		error = -ELOOP;
		if (flag & O_NOFOLLOW)
		if (open_flag & O_NOFOLLOW)
			goto exit_dput;
	}

	error = -ENOENT;
	if (!path.dentry->d_inode)
	if (!path->dentry->d_inode)
		goto exit_dput;
	if (path.dentry->d_inode->i_op->follow_link)
		goto do_link;

	path_to_nameidata(&path, &nd);
	if (path->dentry->d_inode->i_op->follow_link)
		return NULL;

	path_to_nameidata(path, nd);
	error = -EISDIR;
	if (S_ISDIR(path.dentry->d_inode->i_mode))
	if (S_ISDIR(path->dentry->d_inode->i_mode))
		goto exit;
ok:
	filp = finish_open(nd, open_flag, acc_mode);
	return filp;

exit_mutex_unlock:
	mutex_unlock(&dir->d_inode->i_mutex);
exit_dput:
	path_put_conditional(path, nd);
exit:
	if (!IS_ERR(nd->intent.open.file))
		release_open_intent(nd);
	path_put(&nd->path);
	return ERR_PTR(error);
}

/*
	 * Consider:
	 * 1. may_open() truncates a file
	 * 2. a rw->ro mount transition occurs
	 * 3. nameidata_to_filp() fails due to
	 *    the ro mount.
	 * That would be inconsistent, and should
	 * be avoided. Taking this mnt write here
	 * ensures that (2) can not occur.
	 */
	will_truncate = open_will_truncate(flag, nd.path.dentry->d_inode);
	if (will_truncate) {
		error = mnt_want_write(nd.path.mnt);
 * Note that the low bits of the passed in "open_flag"
 * are not the same as in the local variable "flag". See
 * open_to_namei_flags() for more details.
 */
struct file *do_filp_open(int dfd, const char *pathname,
		int open_flag, int mode, int acc_mode)
{
	struct file *filp;
	struct nameidata nd;
	int error;
	struct path path;
	int count = 0;
	int flag = open_to_namei_flags(open_flag);
	int force_reval = 0;
	int want_dir = open_flag & O_DIRECTORY;

	if (!(open_flag & O_CREAT))
		mode = 0;

	/*
	 * O_SYNC is implemented as __O_SYNC|O_DSYNC.  As many places only
	 * check for O_DSYNC if the need any syncing at all we enforce it's
	 * always set instead of having to deal with possibly weird behaviour
	 * for malicious applications setting only __O_SYNC.
	 */
	if (open_flag & __O_SYNC)
		open_flag |= O_DSYNC;

	if (!acc_mode)
		acc_mode = MAY_OPEN | ACC_MODE(open_flag);

	/* O_TRUNC implies we need access checks for write permissions */
	if (open_flag & O_TRUNC)
		acc_mode |= MAY_WRITE;

	/* Allow the LSM permission hook to distinguish append 
	   access from general write access. */
	if (open_flag & O_APPEND)
		acc_mode |= MAY_APPEND;

	/* find the parent */
reval:
	error = path_init(dfd, pathname, LOOKUP_PARENT, &nd);
	if (error)
			goto exit;
	}
	error = may_open(&nd.path, acc_mode, open_flag);
	if (error) {
		if (will_truncate)
			mnt_drop_write(nd.path.mnt);
		goto exit;
	}
	filp = nameidata_to_filp(&nd);
	if (!IS_ERR(filp)) {
		error = ima_file_check(filp, acc_mode);
		if (error) {
			fput(filp);
			filp = ERR_PTR(error);
		}
	}
	if (!IS_ERR(filp)) {
		if (acc_mode & MAY_WRITE)
			vfs_dq_init(nd.path.dentry->d_inode);
		return ERR_PTR(error);
	if (force_reval)
		nd.flags |= LOOKUP_REVAL;

		if (will_truncate) {
			error = handle_truncate(&nd.path);
	current->total_link_count = 0;
	error = link_path_walk(pathname, &nd);
	if (error) {
				fput(filp);
		filp = ERR_PTR(error);
		goto out;
	}
		}
	}
	if (unlikely(!audit_dummy_context()) && (open_flag & O_CREAT))
		audit_inode(pathname, nd.path.dentry);

	/*
	 * It is now safe to drop the mnt write
	 * because the filp has had a write taken
	 * on its behalf.
	 * We have the parent and last component.
	 */
	if (will_truncate)
		mnt_drop_write(nd.path.mnt);
	if (nd.root.mnt)
		path_put(&nd.root);
	return filp;

exit_mutex_unlock:
	mutex_unlock(&dir->d_inode->i_mutex);
exit_dput:
	path_put_conditional(&path, &nd);
exit:
	if (!IS_ERR(nd.intent.open.file))
		release_open_intent(&nd);
exit_parent:
	if (nd.root.mnt)
		path_put(&nd.root);
	path_put(&nd.path);
	return ERR_PTR(error);

do_link:
	error = -ENFILE;
	filp = get_empty_filp();
	if (filp == NULL)
		goto exit_parent;
	nd.intent.open.file = filp;
	filp->f_flags = open_flag;
	nd.intent.open.flags = flag;
	nd.intent.open.create_mode = mode;
	nd.flags &= ~LOOKUP_PARENT;
	nd.flags |= LOOKUP_OPEN;
	if (open_flag & O_CREAT) {
		nd.flags |= LOOKUP_CREATE;
		if (open_flag & O_EXCL)
			nd.flags |= LOOKUP_EXCL;
	}
	filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname, &want_dir);
	while (unlikely(!filp)) { /* trailing symlink */
		struct path holder;
		struct inode *inode = path.dentry->d_inode;
		void *cookie;
		error = -ELOOP;
	if (flag & O_NOFOLLOW)
		/* S_ISDIR part is a temporary automount kludge */
		if ((open_flag & O_NOFOLLOW) && !S_ISDIR(inode->i_mode))
			goto exit_dput;
		if (count++ == 32)
			goto exit_dput;
		/*
	 * This is subtle. Instead of calling do_follow_link() we do the
	 * thing by hands. The reason is that this way we have zero link_count
	 * and path_walk() (called from ->follow_link) honoring LOOKUP_PARENT.
	 * After that we have the parent and last component, i.e.
	 * we are in the same situation as after the first path_walk().
	 * Well, almost - if the last component is normal we get its copy
	 * stored in nd->last.name and we will have to putname() it when we
	 * are done. Procfs-like symlinks just set LAST_BIND.
		 * This is subtle. Instead of calling do_follow_link() we do
		 * the thing by hands. The reason is that this way we have zero
		 * link_count and path_walk() (called from ->follow_link)
		 * honoring LOOKUP_PARENT.  After that we have the parent and
		 * last component, i.e. we are in the same situation as after
		 * the first path_walk().  Well, almost - if the last component
		 * is normal we get its copy stored in nd->last.name and we will
		 * have to putname() it when we are done. Procfs-like symlinks
		 * just set LAST_BIND.
		 */
		nd.flags |= LOOKUP_PARENT;
		error = security_inode_follow_link(path.dentry, &nd);
		if (error)
			goto exit_dput;
	error = __do_follow_link(&path, &nd);
		error = __do_follow_link(&path, &nd, &cookie);
		if (unlikely(error)) {
			/* nd.path had been dropped */
			if (!IS_ERR(cookie) && inode->i_op->put_link)
				inode->i_op->put_link(path.dentry, &nd, cookie);
			path_put(&path);
	if (error) {
		/* Does someone understand code flow here? Or it is only
		 * me so stupid? Anathema to whoever designed this non-sense
		 * with "intent.open".
		 */
			release_open_intent(&nd);
			filp = ERR_PTR(error);
			goto out;
		}
		holder = path;
		nd.flags &= ~LOOKUP_PARENT;
		filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname, &want_dir);
		if (inode->i_op->put_link)
			inode->i_op->put_link(holder.dentry, &nd, cookie);
		path_put(&holder);
	}
out:
	if (nd.root.mnt)
		path_put(&nd.root);
		if (error == -ESTALE && !force_reval) {
	if (filp == ERR_PTR(-ESTALE) && !force_reval) {
		force_reval = 1;
		goto reval;
	}
		return ERR_PTR(error);
	}
	nd.flags &= ~LOOKUP_PARENT;
	if (nd.last_type == LAST_BIND)
		goto ok;
	error = -EISDIR;
	if (nd.last_type != LAST_NORM)
		goto exit;
	if (nd.last.name[nd.last.len]) {
		__putname(nd.last.name);
		goto exit;
	}
	error = -ELOOP;
	if (count++==32) {
		__putname(nd.last.name);
		goto exit;
	}
	dir = nd.path.dentry;
	mutex_lock(&dir->d_inode->i_mutex);
	path.dentry = lookup_hash(&nd);
	path.mnt = nd.path.mnt;
	__putname(nd.last.name);
	goto do_last;
	return filp;

exit_dput:
	path_put_conditional(&path, &nd);
	if (!IS_ERR(nd.intent.open.file))
		release_open_intent(&nd);
exit_parent:
	path_put(&nd.path);
	filp = ERR_PTR(error);
	goto out;
}

/**