Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit d7dab39b authored by Eric Sandeen's avatar Eric Sandeen Committed by Jan Kara
Browse files

ext3: return 32/64-bit dir name hash according to usage type



This is based on commit d1f5273e
ext4: return 32/64-bit dir name hash according to usage type
by Fan Yong <yong.fan@whamcloud.com>

Traditionally ext2/3/4 has returned a 32-bit hash value from llseek()
to appease NFSv2, which can only handle a 32-bit cookie for seekdir()
and telldir().  However, this causes problems if there are 32-bit hash
collisions, since the NFSv2 server can get stuck resending the same
entries from the directory repeatedly.

Allow ext3 to return a full 64-bit hash (both major and minor) for
telldir to decrease the chance of hash collisions.

This patch does implement a new ext3_dir_llseek op, because with 64-bit
hashes, nfs will attempt to seek to a hash "offset" which is much
larger than ext3's s_maxbytes.  So for dx dirs, we call
generic_file_llseek_size() with the appropriate max hash value as the
maximum seekable size.  Otherwise we just pass through to
generic_file_llseek().

Patch-updated-by: default avatarBernd Schubert <bernd.schubert@itwm.fraunhofer.de>
Patch-updated-by: default avatarEric Sandeen <sandeen@redhat.com>
(blame us if something is not correct)

Signed-off-by: default avatarEric Sandeen <sandeen@redhat.com>
Signed-off-by: default avatarJan Kara <jack@suse.cz>
parent a80b12c3
Loading
Loading
Loading
Loading
+122 −45
Original line number Diff line number Diff line
@@ -21,30 +21,15 @@
 *
 */

#include <linux/compat.h>
#include "ext3.h"

static unsigned char ext3_filetype_table[] = {
	DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
};

static int ext3_readdir(struct file *, void *, filldir_t);
static int ext3_dx_readdir(struct file * filp,
			   void * dirent, filldir_t filldir);
static int ext3_release_dir (struct inode * inode,
				struct file * filp);

const struct file_operations ext3_dir_operations = {
	.llseek		= generic_file_llseek,
	.read		= generic_read_dir,
	.readdir	= ext3_readdir,		/* we take BKL. needed?*/
	.unlocked_ioctl	= ext3_ioctl,
#ifdef CONFIG_COMPAT
	.compat_ioctl	= ext3_compat_ioctl,
#endif
	.fsync		= ext3_sync_file,	/* BKL held */
	.release	= ext3_release_dir,
};


static unsigned char get_dtype(struct super_block *sb, int filetype)
{
@@ -55,6 +40,25 @@ static unsigned char get_dtype(struct super_block *sb, int filetype)
	return (ext3_filetype_table[filetype]);
}

/**
 * Check if the given dir-inode refers to an htree-indexed directory
 * (or a directory which chould potentially get coverted to use htree
 * indexing).
 *
 * Return 1 if it is a dx dir, 0 if not
 */
static int is_dx_dir(struct inode *inode)
{
	struct super_block *sb = inode->i_sb;

	if (EXT3_HAS_COMPAT_FEATURE(inode->i_sb,
		     EXT3_FEATURE_COMPAT_DIR_INDEX) &&
	    ((EXT3_I(inode)->i_flags & EXT3_INDEX_FL) ||
	     ((inode->i_size >> sb->s_blocksize_bits) == 1)))
		return 1;

	return 0;
}

int ext3_check_dir_entry (const char * function, struct inode * dir,
			  struct ext3_dir_entry_2 * de,
@@ -94,18 +98,13 @@ static int ext3_readdir(struct file * filp,
	unsigned long offset;
	int i, stored;
	struct ext3_dir_entry_2 *de;
	struct super_block *sb;
	int err;
	struct inode *inode = filp->f_path.dentry->d_inode;
	struct super_block *sb = inode->i_sb;
	int ret = 0;
	int dir_has_error = 0;

	sb = inode->i_sb;

	if (EXT3_HAS_COMPAT_FEATURE(inode->i_sb,
				    EXT3_FEATURE_COMPAT_DIR_INDEX) &&
	    ((EXT3_I(inode)->i_flags & EXT3_INDEX_FL) ||
	     ((inode->i_size >> sb->s_blocksize_bits) == 1))) {
	if (is_dx_dir(inode)) {
		err = ext3_dx_readdir(filp, dirent, filldir);
		if (err != ERR_BAD_DX_DIR) {
			ret = err;
@@ -227,22 +226,87 @@ out:
	return ret;
}

static inline int is_32bit_api(void)
{
#ifdef CONFIG_COMPAT
	return is_compat_task();
#else
	return (BITS_PER_LONG == 32);
#endif
}

/*
 * These functions convert from the major/minor hash to an f_pos
 * value.
 * value for dx directories
 *
 * Currently we only use major hash numer.  This is unfortunate, but
 * on 32-bit machines, the same VFS interface is used for lseek and
 * llseek, so if we use the 64 bit offset, then the 32-bit versions of
 * lseek/telldir/seekdir will blow out spectacularly, and from within
 * the ext2 low-level routine, we don't know if we're being called by
 * a 64-bit version of the system call or the 32-bit version of the
 * system call.  Worse yet, NFSv2 only allows for a 32-bit readdir
 * cookie.  Sigh.
 * Upper layer (for example NFS) should specify FMODE_32BITHASH or
 * FMODE_64BITHASH explicitly. On the other hand, we allow ext3 to be mounted
 * directly on both 32-bit and 64-bit nodes, under such case, neither
 * FMODE_32BITHASH nor FMODE_64BITHASH is specified.
 */
#define hash2pos(major, minor)	(major >> 1)
#define pos2maj_hash(pos)	((pos << 1) & 0xffffffff)
#define pos2min_hash(pos)	(0)
static inline loff_t hash2pos(struct file *filp, __u32 major, __u32 minor)
{
	if ((filp->f_mode & FMODE_32BITHASH) ||
	    (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
		return major >> 1;
	else
		return ((__u64)(major >> 1) << 32) | (__u64)minor;
}

static inline __u32 pos2maj_hash(struct file *filp, loff_t pos)
{
	if ((filp->f_mode & FMODE_32BITHASH) ||
	    (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
		return (pos << 1) & 0xffffffff;
	else
		return ((pos >> 32) << 1) & 0xffffffff;
}

static inline __u32 pos2min_hash(struct file *filp, loff_t pos)
{
	if ((filp->f_mode & FMODE_32BITHASH) ||
	    (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
		return 0;
	else
		return pos & 0xffffffff;
}

/*
 * Return 32- or 64-bit end-of-file for dx directories
 */
static inline loff_t ext3_get_htree_eof(struct file *filp)
{
	if ((filp->f_mode & FMODE_32BITHASH) ||
	    (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
		return EXT3_HTREE_EOF_32BIT;
	else
		return EXT3_HTREE_EOF_64BIT;
}


/*
 * ext3_dir_llseek() calls generic_file_llseek[_size]() to handle both
 * non-htree and htree directories, where the "offset" is in terms
 * of the filename hash value instead of the byte offset.
 *
 * Because we may return a 64-bit hash that is well beyond s_maxbytes,
 * we need to pass the max hash as the maximum allowable offset in
 * the htree directory case.
 *
 * NOTE: offsets obtained *before* ext3_set_inode_flag(dir, EXT3_INODE_INDEX)
 *       will be invalid once the directory was converted into a dx directory
 */
loff_t ext3_dir_llseek(struct file *file, loff_t offset, int origin)
{
	struct inode *inode = file->f_mapping->host;
	int dx_dir = is_dx_dir(inode);

	if (likely(dx_dir))
		return generic_file_llseek_size(file, offset, origin,
					        ext3_get_htree_eof(file));
	else
		return generic_file_llseek(file, offset, origin);
}

/*
 * This structure holds the nodes of the red-black tree used to store
@@ -303,15 +367,16 @@ static void free_rb_tree_fname(struct rb_root *root)
}


static struct dir_private_info *ext3_htree_create_dir_info(loff_t pos)
static struct dir_private_info *ext3_htree_create_dir_info(struct file *filp,
							   loff_t pos)
{
	struct dir_private_info *p;

	p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL);
	if (!p)
		return NULL;
	p->curr_hash = pos2maj_hash(pos);
	p->curr_minor_hash = pos2min_hash(pos);
	p->curr_hash = pos2maj_hash(filp, pos);
	p->curr_minor_hash = pos2min_hash(filp, pos);
	return p;
}

@@ -401,7 +466,7 @@ static int call_filldir(struct file * filp, void * dirent,
		printk("call_filldir: called with null fname?!?\n");
		return 0;
	}
	curr_pos = hash2pos(fname->hash, fname->minor_hash);
	curr_pos = hash2pos(filp, fname->hash, fname->minor_hash);
	while (fname) {
		error = filldir(dirent, fname->name,
				fname->name_len, curr_pos,
@@ -426,13 +491,13 @@ static int ext3_dx_readdir(struct file * filp,
	int	ret;

	if (!info) {
		info = ext3_htree_create_dir_info(filp->f_pos);
		info = ext3_htree_create_dir_info(filp, filp->f_pos);
		if (!info)
			return -ENOMEM;
		filp->private_data = info;
	}

	if (filp->f_pos == EXT3_HTREE_EOF)
	if (filp->f_pos == ext3_get_htree_eof(filp))
		return 0;	/* EOF */

	/* Some one has messed with f_pos; reset the world */
@@ -440,8 +505,8 @@ static int ext3_dx_readdir(struct file * filp,
		free_rb_tree_fname(&info->root);
		info->curr_node = NULL;
		info->extra_fname = NULL;
		info->curr_hash = pos2maj_hash(filp->f_pos);
		info->curr_minor_hash = pos2min_hash(filp->f_pos);
		info->curr_hash = pos2maj_hash(filp, filp->f_pos);
		info->curr_minor_hash = pos2min_hash(filp, filp->f_pos);
	}

	/*
@@ -473,7 +538,7 @@ static int ext3_dx_readdir(struct file * filp,
			if (ret < 0)
				return ret;
			if (ret == 0) {
				filp->f_pos = EXT3_HTREE_EOF;
				filp->f_pos = ext3_get_htree_eof(filp);
				break;
			}
			info->curr_node = rb_first(&info->root);
@@ -493,7 +558,7 @@ static int ext3_dx_readdir(struct file * filp,
			info->curr_minor_hash = fname->minor_hash;
		} else {
			if (info->next_hash == ~0) {
				filp->f_pos = EXT3_HTREE_EOF;
				filp->f_pos = ext3_get_htree_eof(filp);
				break;
			}
			info->curr_hash = info->next_hash;
@@ -512,3 +577,15 @@ static int ext3_release_dir (struct inode * inode, struct file * filp)

	return 0;
}

const struct file_operations ext3_dir_operations = {
	.llseek		= ext3_dir_llseek,
	.read		= generic_read_dir,
	.readdir	= ext3_readdir,
	.unlocked_ioctl = ext3_ioctl,
#ifdef CONFIG_COMPAT
	.compat_ioctl	= ext3_compat_ioctl,
#endif
	.fsync		= ext3_sync_file,
	.release	= ext3_release_dir,
};
+5 −1
Original line number Diff line number Diff line
@@ -920,7 +920,11 @@ struct dx_hash_info
	u32		*seed;
};

#define EXT3_HTREE_EOF	0x7fffffff

/* 32 and 64 bit signed EOF for dx directories */
#define EXT3_HTREE_EOF_32BIT   ((1UL  << (32 - 1)) - 1)
#define EXT3_HTREE_EOF_64BIT   ((1ULL << (64 - 1)) - 1)


/*
 * Control parameters used by ext3_htree_next_block
+2 −2
Original line number Diff line number Diff line
@@ -198,8 +198,8 @@ int ext3fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo)
		return -1;
	}
	hash = hash & ~1;
	if (hash == (EXT3_HTREE_EOF << 1))
		hash = (EXT3_HTREE_EOF-1) << 1;
	if (hash == (EXT3_HTREE_EOF_32BIT << 1))
		hash = (EXT3_HTREE_EOF_32BIT - 1) << 1;
	hinfo->hash = hash;
	hinfo->minor_hash = minor_hash;
	return 0;