Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 62b9510c authored by J. Bruce Fields's avatar J. Bruce Fields
Browse files

nfsd: merge cookie collision fixes from ext4 tree

These changes fix readdir loops on ext4 filesystems with dir_index
turned on.  I'm pulling them from Ted's tree as I'd like to give them
some extra nfsd testing, and expect to be applying (potentially
conflicting) patches to the same code before the next merge window.

From the nfs-ext4-premerge branch of

	git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4



Signed-off-by: default avatarJ. Bruce Fields <bfields@redhat.com>
parents 8546ee51 06effdbb
Loading
Loading
Loading
Loading
+169 −45
Original line number Original line Diff line number Diff line
@@ -32,24 +32,8 @@ static unsigned char ext4_filetype_table[] = {
	DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
	DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
};
};


static int ext4_readdir(struct file *, void *, filldir_t);
static int ext4_dx_readdir(struct file *filp,
static int ext4_dx_readdir(struct file *filp,
			   void *dirent, filldir_t filldir);
			   void *dirent, filldir_t filldir);
static int ext4_release_dir(struct inode *inode,
				struct file *filp);

const struct file_operations ext4_dir_operations = {
	.llseek		= ext4_llseek,
	.read		= generic_read_dir,
	.readdir	= ext4_readdir,		/* we take BKL. needed?*/
	.unlocked_ioctl = ext4_ioctl,
#ifdef CONFIG_COMPAT
	.compat_ioctl	= ext4_compat_ioctl,
#endif
	.fsync		= ext4_sync_file,
	.release	= ext4_release_dir,
};



static unsigned char get_dtype(struct super_block *sb, int filetype)
static unsigned char get_dtype(struct super_block *sb, int filetype)
{
{
@@ -60,6 +44,26 @@ static unsigned char get_dtype(struct super_block *sb, int filetype)
	return (ext4_filetype_table[filetype]);
	return (ext4_filetype_table[filetype]);
}
}


/**
 * Check if the given dir-inode refers to an htree-indexed directory
 * (or a directory which chould potentially get coverted to use htree
 * indexing).
 *
 * Return 1 if it is a dx dir, 0 if not
 */
static int is_dx_dir(struct inode *inode)
{
	struct super_block *sb = inode->i_sb;

	if (EXT4_HAS_COMPAT_FEATURE(inode->i_sb,
		     EXT4_FEATURE_COMPAT_DIR_INDEX) &&
	    ((ext4_test_inode_flag(inode, EXT4_INODE_INDEX)) ||
	     ((inode->i_size >> sb->s_blocksize_bits) == 1)))
		return 1;

	return 0;
}

/*
/*
 * Return 0 if the directory entry is OK, and 1 if there is a problem
 * Return 0 if the directory entry is OK, and 1 if there is a problem
 *
 *
@@ -115,18 +119,13 @@ static int ext4_readdir(struct file *filp,
	unsigned int offset;
	unsigned int offset;
	int i, stored;
	int i, stored;
	struct ext4_dir_entry_2 *de;
	struct ext4_dir_entry_2 *de;
	struct super_block *sb;
	int err;
	int err;
	struct inode *inode = filp->f_path.dentry->d_inode;
	struct inode *inode = filp->f_path.dentry->d_inode;
	struct super_block *sb = inode->i_sb;
	int ret = 0;
	int ret = 0;
	int dir_has_error = 0;
	int dir_has_error = 0;


	sb = inode->i_sb;
	if (is_dx_dir(inode)) {

	if (EXT4_HAS_COMPAT_FEATURE(inode->i_sb,
				    EXT4_FEATURE_COMPAT_DIR_INDEX) &&
	    ((ext4_test_inode_flag(inode, EXT4_INODE_INDEX)) ||
	     ((inode->i_size >> sb->s_blocksize_bits) == 1))) {
		err = ext4_dx_readdir(filp, dirent, filldir);
		err = ext4_dx_readdir(filp, dirent, filldir);
		if (err != ERR_BAD_DX_DIR) {
		if (err != ERR_BAD_DX_DIR) {
			ret = err;
			ret = err;
@@ -254,22 +253,134 @@ static int ext4_readdir(struct file *filp,
	return ret;
	return ret;
}
}


static inline int is_32bit_api(void)
{
#ifdef CONFIG_COMPAT
	return is_compat_task();
#else
	return (BITS_PER_LONG == 32);
#endif
}

/*
/*
 * These functions convert from the major/minor hash to an f_pos
 * These functions convert from the major/minor hash to an f_pos
 * value.
 * value for dx directories
 *
 * Upper layer (for example NFS) should specify FMODE_32BITHASH or
 * FMODE_64BITHASH explicitly. On the other hand, we allow ext4 to be mounted
 * directly on both 32-bit and 64-bit nodes, under such case, neither
 * FMODE_32BITHASH nor FMODE_64BITHASH is specified.
 */
static inline loff_t hash2pos(struct file *filp, __u32 major, __u32 minor)
{
	if ((filp->f_mode & FMODE_32BITHASH) ||
	    (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
		return major >> 1;
	else
		return ((__u64)(major >> 1) << 32) | (__u64)minor;
}

static inline __u32 pos2maj_hash(struct file *filp, loff_t pos)
{
	if ((filp->f_mode & FMODE_32BITHASH) ||
	    (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
		return (pos << 1) & 0xffffffff;
	else
		return ((pos >> 32) << 1) & 0xffffffff;
}

static inline __u32 pos2min_hash(struct file *filp, loff_t pos)
{
	if ((filp->f_mode & FMODE_32BITHASH) ||
	    (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
		return 0;
	else
		return pos & 0xffffffff;
}

/*
 * Return 32- or 64-bit end-of-file for dx directories
 */
static inline loff_t ext4_get_htree_eof(struct file *filp)
{
	if ((filp->f_mode & FMODE_32BITHASH) ||
	    (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
		return EXT4_HTREE_EOF_32BIT;
	else
		return EXT4_HTREE_EOF_64BIT;
}


/*
 * ext4_dir_llseek() based on generic_file_llseek() to handle both
 * non-htree and htree directories, where the "offset" is in terms
 * of the filename hash value instead of the byte offset.
 *
 *
 * Currently we only use major hash numer.  This is unfortunate, but
 * NOTE: offsets obtained *before* ext4_set_inode_flag(dir, EXT4_INODE_INDEX)
 * on 32-bit machines, the same VFS interface is used for lseek and
 *       will be invalid once the directory was converted into a dx directory
 * llseek, so if we use the 64 bit offset, then the 32-bit versions of
 */
 * lseek/telldir/seekdir will blow out spectacularly, and from within
loff_t ext4_dir_llseek(struct file *file, loff_t offset, int origin)
 * the ext2 low-level routine, we don't know if we're being called by
{
 * a 64-bit version of the system call or the 32-bit version of the
	struct inode *inode = file->f_mapping->host;
 * system call.  Worse yet, NFSv2 only allows for a 32-bit readdir
	loff_t ret = -EINVAL;
 * cookie.  Sigh.
	int dx_dir = is_dx_dir(inode);

	mutex_lock(&inode->i_mutex);

	/* NOTE: relative offsets with dx directories might not work
	 *       as expected, as it is difficult to figure out the
	 *       correct offset between dx hashes */

	switch (origin) {
	case SEEK_END:
		if (unlikely(offset > 0))
			goto out_err; /* not supported for directories */

		/* so only negative offsets are left, does that have a
		 * meaning for directories at all? */
		if (dx_dir)
			offset += ext4_get_htree_eof(file);
		else
			offset += inode->i_size;
		break;
	case SEEK_CUR:
		/*
		 * Here we special-case the lseek(fd, 0, SEEK_CUR)
		 * position-querying operation.  Avoid rewriting the "same"
		 * f_pos value back to the file because a concurrent read(),
		 * write() or lseek() might have altered it
		 */
		 */
#define hash2pos(major, minor)	(major >> 1)
		if (offset == 0) {
#define pos2maj_hash(pos)	((pos << 1) & 0xffffffff)
			offset = file->f_pos;
#define pos2min_hash(pos)	(0)
			goto out_ok;
		}

		offset += file->f_pos;
		break;
	}

	if (unlikely(offset < 0))
		goto out_err;

	if (!dx_dir) {
		if (offset > inode->i_sb->s_maxbytes)
			goto out_err;
	} else if (offset > ext4_get_htree_eof(file))
		goto out_err;

	/* Special lock needed here? */
	if (offset != file->f_pos) {
		file->f_pos = offset;
		file->f_version = 0;
	}

out_ok:
	ret = offset;
out_err:
	mutex_unlock(&inode->i_mutex);

	return ret;
}


/*
/*
 * This structure holds the nodes of the red-black tree used to store
 * This structure holds the nodes of the red-black tree used to store
@@ -330,15 +441,16 @@ static void free_rb_tree_fname(struct rb_root *root)
}
}




static struct dir_private_info *ext4_htree_create_dir_info(loff_t pos)
static struct dir_private_info *ext4_htree_create_dir_info(struct file *filp,
							   loff_t pos)
{
{
	struct dir_private_info *p;
	struct dir_private_info *p;


	p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL);
	p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL);
	if (!p)
	if (!p)
		return NULL;
		return NULL;
	p->curr_hash = pos2maj_hash(pos);
	p->curr_hash = pos2maj_hash(filp, pos);
	p->curr_minor_hash = pos2min_hash(pos);
	p->curr_minor_hash = pos2min_hash(filp, pos);
	return p;
	return p;
}
}


@@ -429,7 +541,7 @@ static int call_filldir(struct file *filp, void *dirent,
		       "null fname?!?\n");
		       "null fname?!?\n");
		return 0;
		return 0;
	}
	}
	curr_pos = hash2pos(fname->hash, fname->minor_hash);
	curr_pos = hash2pos(filp, fname->hash, fname->minor_hash);
	while (fname) {
	while (fname) {
		error = filldir(dirent, fname->name,
		error = filldir(dirent, fname->name,
				fname->name_len, curr_pos,
				fname->name_len, curr_pos,
@@ -454,13 +566,13 @@ static int ext4_dx_readdir(struct file *filp,
	int	ret;
	int	ret;


	if (!info) {
	if (!info) {
		info = ext4_htree_create_dir_info(filp->f_pos);
		info = ext4_htree_create_dir_info(filp, filp->f_pos);
		if (!info)
		if (!info)
			return -ENOMEM;
			return -ENOMEM;
		filp->private_data = info;
		filp->private_data = info;
	}
	}


	if (filp->f_pos == EXT4_HTREE_EOF)
	if (filp->f_pos == ext4_get_htree_eof(filp))
		return 0;	/* EOF */
		return 0;	/* EOF */


	/* Some one has messed with f_pos; reset the world */
	/* Some one has messed with f_pos; reset the world */
@@ -468,8 +580,8 @@ static int ext4_dx_readdir(struct file *filp,
		free_rb_tree_fname(&info->root);
		free_rb_tree_fname(&info->root);
		info->curr_node = NULL;
		info->curr_node = NULL;
		info->extra_fname = NULL;
		info->extra_fname = NULL;
		info->curr_hash = pos2maj_hash(filp->f_pos);
		info->curr_hash = pos2maj_hash(filp, filp->f_pos);
		info->curr_minor_hash = pos2min_hash(filp->f_pos);
		info->curr_minor_hash = pos2min_hash(filp, filp->f_pos);
	}
	}


	/*
	/*
@@ -501,7 +613,7 @@ static int ext4_dx_readdir(struct file *filp,
			if (ret < 0)
			if (ret < 0)
				return ret;
				return ret;
			if (ret == 0) {
			if (ret == 0) {
				filp->f_pos = EXT4_HTREE_EOF;
				filp->f_pos = ext4_get_htree_eof(filp);
				break;
				break;
			}
			}
			info->curr_node = rb_first(&info->root);
			info->curr_node = rb_first(&info->root);
@@ -521,7 +633,7 @@ static int ext4_dx_readdir(struct file *filp,
			info->curr_minor_hash = fname->minor_hash;
			info->curr_minor_hash = fname->minor_hash;
		} else {
		} else {
			if (info->next_hash == ~0) {
			if (info->next_hash == ~0) {
				filp->f_pos = EXT4_HTREE_EOF;
				filp->f_pos = ext4_get_htree_eof(filp);
				break;
				break;
			}
			}
			info->curr_hash = info->next_hash;
			info->curr_hash = info->next_hash;
@@ -540,3 +652,15 @@ static int ext4_release_dir(struct inode *inode, struct file *filp)


	return 0;
	return 0;
}
}

const struct file_operations ext4_dir_operations = {
	.llseek		= ext4_dir_llseek,
	.read		= generic_read_dir,
	.readdir	= ext4_readdir,
	.unlocked_ioctl = ext4_ioctl,
#ifdef CONFIG_COMPAT
	.compat_ioctl	= ext4_compat_ioctl,
#endif
	.fsync		= ext4_sync_file,
	.release	= ext4_release_dir,
};
+5 −1
Original line number Original line Diff line number Diff line
@@ -1612,7 +1612,11 @@ struct dx_hash_info
	u32		*seed;
	u32		*seed;
};
};


#define EXT4_HTREE_EOF	0x7fffffff

/* 32 and 64 bit signed EOF for dx directories */
#define EXT4_HTREE_EOF_32BIT   ((1UL  << (32 - 1)) - 1)
#define EXT4_HTREE_EOF_64BIT   ((1ULL << (64 - 1)) - 1)



/*
/*
 * Control parameters used by ext4_htree_next_block
 * Control parameters used by ext4_htree_next_block
+2 −2
Original line number Original line Diff line number Diff line
@@ -200,8 +200,8 @@ int ext4fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo)
		return -1;
		return -1;
	}
	}
	hash = hash & ~1;
	hash = hash & ~1;
	if (hash == (EXT4_HTREE_EOF << 1))
	if (hash == (EXT4_HTREE_EOF_32BIT << 1))
		hash = (EXT4_HTREE_EOF-1) << 1;
		hash = (EXT4_HTREE_EOF_32BIT - 1) << 1;
	hinfo->hash = hash;
	hinfo->hash = hash;
	hinfo->minor_hash = minor_hash;
	hinfo->minor_hash = minor_hash;
	return 0;
	return 0;
+23 −10
Original line number Original line Diff line number Diff line
@@ -737,12 +737,13 @@ static int nfsd_open_break_lease(struct inode *inode, int access)


/*
/*
 * Open an existing file or directory.
 * Open an existing file or directory.
 * The access argument indicates the type of open (read/write/lock)
 * The may_flags argument indicates the type of open (read/write/lock)
 * and additional flags.
 * N.B. After this call fhp needs an fh_put
 * N.B. After this call fhp needs an fh_put
 */
 */
__be32
__be32
nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
			int access, struct file **filp)
			int may_flags, struct file **filp)
{
{
	struct dentry	*dentry;
	struct dentry	*dentry;
	struct inode	*inode;
	struct inode	*inode;
@@ -757,7 +758,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
	 * and (hopefully) checked permission - so allow OWNER_OVERRIDE
	 * and (hopefully) checked permission - so allow OWNER_OVERRIDE
	 * in case a chmod has now revoked permission.
	 * in case a chmod has now revoked permission.
	 */
	 */
	err = fh_verify(rqstp, fhp, type, access | NFSD_MAY_OWNER_OVERRIDE);
	err = fh_verify(rqstp, fhp, type, may_flags | NFSD_MAY_OWNER_OVERRIDE);
	if (err)
	if (err)
		goto out;
		goto out;


@@ -768,7 +769,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
	 * or any access when mandatory locking enabled
	 * or any access when mandatory locking enabled
	 */
	 */
	err = nfserr_perm;
	err = nfserr_perm;
	if (IS_APPEND(inode) && (access & NFSD_MAY_WRITE))
	if (IS_APPEND(inode) && (may_flags & NFSD_MAY_WRITE))
		goto out;
		goto out;
	/*
	/*
	 * We must ignore files (but only files) which might have mandatory
	 * We must ignore files (but only files) which might have mandatory
@@ -781,12 +782,12 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
	if (!inode->i_fop)
	if (!inode->i_fop)
		goto out;
		goto out;


	host_err = nfsd_open_break_lease(inode, access);
	host_err = nfsd_open_break_lease(inode, may_flags);
	if (host_err) /* NOMEM or WOULDBLOCK */
	if (host_err) /* NOMEM or WOULDBLOCK */
		goto out_nfserr;
		goto out_nfserr;


	if (access & NFSD_MAY_WRITE) {
	if (may_flags & NFSD_MAY_WRITE) {
		if (access & NFSD_MAY_READ)
		if (may_flags & NFSD_MAY_READ)
			flags = O_RDWR|O_LARGEFILE;
			flags = O_RDWR|O_LARGEFILE;
		else
		else
			flags = O_WRONLY|O_LARGEFILE;
			flags = O_WRONLY|O_LARGEFILE;
@@ -795,8 +796,15 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
			    flags, current_cred());
			    flags, current_cred());
	if (IS_ERR(*filp))
	if (IS_ERR(*filp))
		host_err = PTR_ERR(*filp);
		host_err = PTR_ERR(*filp);
	else {
		host_err = ima_file_check(*filp, may_flags);

		if (may_flags & NFSD_MAY_64BIT_COOKIE)
			(*filp)->f_mode |= FMODE_64BITHASH;
		else
		else
		host_err = ima_file_check(*filp, access);
			(*filp)->f_mode |= FMODE_32BITHASH;
	}

out_nfserr:
out_nfserr:
	err = nfserrno(host_err);
	err = nfserrno(host_err);
out:
out:
@@ -2020,8 +2028,13 @@ nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp,
	__be32		err;
	__be32		err;
	struct file	*file;
	struct file	*file;
	loff_t		offset = *offsetp;
	loff_t		offset = *offsetp;
	int             may_flags = NFSD_MAY_READ;

	/* NFSv2 only supports 32 bit cookies */
	if (rqstp->rq_vers > 2)
		may_flags |= NFSD_MAY_64BIT_COOKIE;


	err = nfsd_open(rqstp, fhp, S_IFDIR, NFSD_MAY_READ, &file);
	err = nfsd_open(rqstp, fhp, S_IFDIR, may_flags, &file);
	if (err)
	if (err)
		goto out;
		goto out;


+2 −0
Original line number Original line Diff line number Diff line
@@ -27,6 +27,8 @@
#define NFSD_MAY_BYPASS_GSS		0x400
#define NFSD_MAY_BYPASS_GSS		0x400
#define NFSD_MAY_READ_IF_EXEC		0x800
#define NFSD_MAY_READ_IF_EXEC		0x800


#define NFSD_MAY_64BIT_COOKIE		0x1000 /* 64 bit readdir cookies for >= NFSv3 */

#define NFSD_MAY_CREATE		(NFSD_MAY_EXEC|NFSD_MAY_WRITE)
#define NFSD_MAY_CREATE		(NFSD_MAY_EXEC|NFSD_MAY_WRITE)
#define NFSD_MAY_REMOVE		(NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC)
#define NFSD_MAY_REMOVE		(NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC)


Loading