Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit e08ac99f authored by Artem Blagodarenko's avatar Artem Blagodarenko Committed by Theodore Ts'o
Browse files

ext4: add largedir feature



This INCOMPAT_LARGEDIR feature allows larger directories to be created
in ldiskfs, both with directory sizes over 2GB and and a maximum htree
depth of 3 instead of the current limit of 2. These features are needed
in order to exceed the current limit of approximately 10M entries in a
single directory.

This patch was originally written by Yang Sheng to support the Lustre server.

[ Bumped the credits needed to update an indexed directory -- tytso ]

Signed-off-by: default avatarLiang Zhen <liang.zhen@intel.com>
Signed-off-by: default avatarYang Sheng <yang.sheng@intel.com>
Signed-off-by: default avatarArtem Blagodarenko <artem.blagodarenko@seagate.com>
Signed-off-by: default avatarTheodore Ts'o <tytso@mit.edu>
Reviewed-by: default avatarAndreas Dilger <andreas.dilger@intel.com>
parent 67a7d5f5
Loading
Loading
Loading
Loading
+18 −5
Original line number Original line Diff line number Diff line
@@ -1800,7 +1800,8 @@ EXT4_FEATURE_INCOMPAT_FUNCS(encrypt, ENCRYPT)
					 EXT4_FEATURE_INCOMPAT_MMP | \
					 EXT4_FEATURE_INCOMPAT_MMP | \
					 EXT4_FEATURE_INCOMPAT_INLINE_DATA | \
					 EXT4_FEATURE_INCOMPAT_INLINE_DATA | \
					 EXT4_FEATURE_INCOMPAT_ENCRYPT | \
					 EXT4_FEATURE_INCOMPAT_ENCRYPT | \
					 EXT4_FEATURE_INCOMPAT_CSUM_SEED)
					 EXT4_FEATURE_INCOMPAT_CSUM_SEED | \
					 EXT4_FEATURE_INCOMPAT_LARGEDIR)
#define EXT4_FEATURE_RO_COMPAT_SUPP	(EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
#define EXT4_FEATURE_RO_COMPAT_SUPP	(EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
					 EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
					 EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
					 EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \
					 EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \
@@ -2126,6 +2127,16 @@ ext4_group_first_block_no(struct super_block *sb, ext4_group_t group_no)
 */
 */
#define ERR_BAD_DX_DIR	(-(MAX_ERRNO - 1))
#define ERR_BAD_DX_DIR	(-(MAX_ERRNO - 1))


/* htree levels for ext4 */
#define	EXT4_HTREE_LEVEL_COMPAT	2
#define	EXT4_HTREE_LEVEL	3

static inline int ext4_dir_htree_level(struct super_block *sb)
{
	return ext4_has_feature_largedir(sb) ?
		EXT4_HTREE_LEVEL : EXT4_HTREE_LEVEL_COMPAT;
}

/*
/*
 * Timeout and state flag for lazy initialization inode thread.
 * Timeout and state flag for lazy initialization inode thread.
 */
 */
@@ -2756,12 +2767,14 @@ static inline void ext4_r_blocks_count_set(struct ext4_super_block *es,
	es->s_r_blocks_count_hi = cpu_to_le32(blk >> 32);
	es->s_r_blocks_count_hi = cpu_to_le32(blk >> 32);
}
}


static inline loff_t ext4_isize(struct ext4_inode *raw_inode)
static inline loff_t ext4_isize(struct super_block *sb,
				struct ext4_inode *raw_inode)
{
{
	if (S_ISREG(le16_to_cpu(raw_inode->i_mode)))
	if (ext4_has_feature_largedir(sb) ||
	    S_ISREG(le16_to_cpu(raw_inode->i_mode)))
		return ((loff_t)le32_to_cpu(raw_inode->i_size_high) << 32) |
		return ((loff_t)le32_to_cpu(raw_inode->i_size_high) << 32) |
			le32_to_cpu(raw_inode->i_size_lo);
			le32_to_cpu(raw_inode->i_size_lo);
	else

	return (loff_t) le32_to_cpu(raw_inode->i_size_lo);
	return (loff_t) le32_to_cpu(raw_inode->i_size_lo);
}
}


+8 −1
Original line number Original line Diff line number Diff line
@@ -77,7 +77,14 @@


#define EXT4_RESERVE_TRANS_BLOCKS	12U
#define EXT4_RESERVE_TRANS_BLOCKS	12U


#define EXT4_INDEX_EXTRA_TRANS_BLOCKS	8
/*
 * Number of credits needed if we need to insert an entry into a
 * directory.  For each new index block, we need 4 blocks (old index
 * block, new index block, bitmap block, bg summary).  For normal
 * htree directories there are 2 levels; if the largedir feature
 * enabled it's 3 levels.
 */
#define EXT4_INDEX_EXTRA_TRANS_BLOCKS	12U


#ifdef CONFIG_QUOTA
#ifdef CONFIG_QUOTA
/* Amount of blocks needed for quota update - we know that the structure was
/* Amount of blocks needed for quota update - we know that the structure was
+2 −2
Original line number Original line Diff line number Diff line
@@ -4712,7 +4712,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
	if (ext4_has_feature_64bit(sb))
	if (ext4_has_feature_64bit(sb))
		ei->i_file_acl |=
		ei->i_file_acl |=
			((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32;
			((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32;
	inode->i_size = ext4_isize(raw_inode);
	inode->i_size = ext4_isize(sb, raw_inode);
	if ((size = i_size_read(inode)) < 0) {
	if ((size = i_size_read(inode)) < 0) {
		EXT4_ERROR_INODE(inode, "bad i_size value: %lld", size);
		EXT4_ERROR_INODE(inode, "bad i_size value: %lld", size);
		ret = -EFSCORRUPTED;
		ret = -EFSCORRUPTED;
@@ -5037,7 +5037,7 @@ static int ext4_do_update_inode(handle_t *handle,
		raw_inode->i_file_acl_high =
		raw_inode->i_file_acl_high =
			cpu_to_le16(ei->i_file_acl >> 32);
			cpu_to_le16(ei->i_file_acl >> 32);
	raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl);
	raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl);
	if (ei->i_disksize != ext4_isize(raw_inode)) {
	if (ei->i_disksize != ext4_isize(inode->i_sb, raw_inode)) {
		ext4_isize_set(raw_inode, ei->i_disksize);
		ext4_isize_set(raw_inode, ei->i_disksize);
		need_datasync = 1;
		need_datasync = 1;
	}
	}
+85 −39
Original line number Original line Diff line number Diff line
@@ -513,7 +513,7 @@ ext4_next_entry(struct ext4_dir_entry_2 *p, unsigned long blocksize)


static inline ext4_lblk_t dx_get_block(struct dx_entry *entry)
static inline ext4_lblk_t dx_get_block(struct dx_entry *entry)
{
{
	return le32_to_cpu(entry->block) & 0x00ffffff;
	return le32_to_cpu(entry->block) & 0x0fffffff;
}
}


static inline void dx_set_block(struct dx_entry *entry, ext4_lblk_t value)
static inline void dx_set_block(struct dx_entry *entry, ext4_lblk_t value)
@@ -739,6 +739,7 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
	struct dx_frame *ret_err = ERR_PTR(ERR_BAD_DX_DIR);
	struct dx_frame *ret_err = ERR_PTR(ERR_BAD_DX_DIR);
	u32 hash;
	u32 hash;


	memset(frame_in, 0, EXT4_HTREE_LEVEL * sizeof(frame_in[0]));
	frame->bh = ext4_read_dirblock(dir, 0, INDEX);
	frame->bh = ext4_read_dirblock(dir, 0, INDEX);
	if (IS_ERR(frame->bh))
	if (IS_ERR(frame->bh))
		return (struct dx_frame *) frame->bh;
		return (struct dx_frame *) frame->bh;
@@ -768,9 +769,15 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
	}
	}


	indirect = root->info.indirect_levels;
	indirect = root->info.indirect_levels;
	if (indirect > 1) {
	if (indirect >= ext4_dir_htree_level(dir->i_sb)) {
		ext4_warning_inode(dir, "Unimplemented hash depth: %#06x",
		ext4_warning(dir->i_sb,
				   root->info.indirect_levels);
			     "Directory (ino: %lu) htree depth %#06x exceed"
			     "supported value", dir->i_ino,
			     ext4_dir_htree_level(dir->i_sb));
		if (ext4_dir_htree_level(dir->i_sb) < EXT4_HTREE_LEVEL) {
			ext4_warning(dir->i_sb, "Enable large directory "
						"feature to access it");
		}
		goto fail;
		goto fail;
	}
	}


@@ -859,12 +866,19 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,


static void dx_release(struct dx_frame *frames)
static void dx_release(struct dx_frame *frames)
{
{
	struct dx_root_info *info;
	int i;

	if (frames[0].bh == NULL)
	if (frames[0].bh == NULL)
		return;
		return;


	if (((struct dx_root *)frames[0].bh->b_data)->info.indirect_levels)
	info = &((struct dx_root *)frames[0].bh->b_data)->info;
		brelse(frames[1].bh);
	for (i = 0; i <= info->indirect_levels; i++) {
	brelse(frames[0].bh);
		if (frames[i].bh == NULL)
			break;
		brelse(frames[i].bh);
		frames[i].bh = NULL;
	}
}
}


/*
/*
@@ -1050,7 +1064,7 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
{
{
	struct dx_hash_info hinfo;
	struct dx_hash_info hinfo;
	struct ext4_dir_entry_2 *de;
	struct ext4_dir_entry_2 *de;
	struct dx_frame frames[2], *frame;
	struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
	struct inode *dir;
	struct inode *dir;
	ext4_lblk_t block;
	ext4_lblk_t block;
	int count = 0;
	int count = 0;
@@ -1485,7 +1499,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
			struct ext4_dir_entry_2 **res_dir)
			struct ext4_dir_entry_2 **res_dir)
{
{
	struct super_block * sb = dir->i_sb;
	struct super_block * sb = dir->i_sb;
	struct dx_frame frames[2], *frame;
	struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
	struct buffer_head *bh;
	struct buffer_head *bh;
	ext4_lblk_t block;
	ext4_lblk_t block;
	int retval;
	int retval;
@@ -1889,7 +1903,7 @@ static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname,
	 */
	 */
	dir->i_mtime = dir->i_ctime = current_time(dir);
	dir->i_mtime = dir->i_ctime = current_time(dir);
	ext4_update_dx_flag(dir);
	ext4_update_dx_flag(dir);
	dir->i_version++;
	inode_inc_iversion(dir);
	ext4_mark_inode_dirty(handle, dir);
	ext4_mark_inode_dirty(handle, dir);
	BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
	BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
	err = ext4_handle_dirty_dirent_node(handle, dir, bh);
	err = ext4_handle_dirty_dirent_node(handle, dir, bh);
@@ -1908,7 +1922,7 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
{
{
	struct buffer_head *bh2;
	struct buffer_head *bh2;
	struct dx_root	*root;
	struct dx_root	*root;
	struct dx_frame	frames[2], *frame;
	struct dx_frame	frames[EXT4_HTREE_LEVEL], *frame;
	struct dx_entry *entries;
	struct dx_entry *entries;
	struct ext4_dir_entry_2	*de, *de2;
	struct ext4_dir_entry_2	*de, *de2;
	struct ext4_dir_entry_tail *t;
	struct ext4_dir_entry_tail *t;
@@ -2127,13 +2141,16 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
			     struct inode *dir, struct inode *inode)
			     struct inode *dir, struct inode *inode)
{
{
	struct dx_frame frames[2], *frame;
	struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
	struct dx_entry *entries, *at;
	struct dx_entry *entries, *at;
	struct buffer_head *bh;
	struct buffer_head *bh;
	struct super_block *sb = dir->i_sb;
	struct super_block *sb = dir->i_sb;
	struct ext4_dir_entry_2 *de;
	struct ext4_dir_entry_2 *de;
	int restart;
	int err;
	int err;


again:
	restart = 0;
	frame = dx_probe(fname, dir, NULL, frames);
	frame = dx_probe(fname, dir, NULL, frames);
	if (IS_ERR(frame))
	if (IS_ERR(frame))
		return PTR_ERR(frame);
		return PTR_ERR(frame);
@@ -2155,24 +2172,44 @@ static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
	if (err != -ENOSPC)
	if (err != -ENOSPC)
		goto cleanup;
		goto cleanup;


	err = 0;
	/* Block full, should compress but for now just split */
	/* Block full, should compress but for now just split */
	dxtrace(printk(KERN_DEBUG "using %u of %u node entries\n",
	dxtrace(printk(KERN_DEBUG "using %u of %u node entries\n",
		       dx_get_count(entries), dx_get_limit(entries)));
		       dx_get_count(entries), dx_get_limit(entries)));
	/* Need to split index? */
	/* Need to split index? */
	if (dx_get_count(entries) == dx_get_limit(entries)) {
	if (dx_get_count(entries) == dx_get_limit(entries)) {
		ext4_lblk_t newblock;
		ext4_lblk_t newblock;
		unsigned icount = dx_get_count(entries);
		int levels = frame - frames + 1;
		int levels = frame - frames;
		unsigned int icount;
		int add_level = 1;
		struct dx_entry *entries2;
		struct dx_entry *entries2;
		struct dx_node *node2;
		struct dx_node *node2;
		struct buffer_head *bh2;
		struct buffer_head *bh2;


		if (levels && (dx_get_count(frames->entries) ==
		while (frame > frames) {
			       dx_get_limit(frames->entries))) {
			if (dx_get_count((frame - 1)->entries) <
			ext4_warning_inode(dir, "Directory index full!");
			    dx_get_limit((frame - 1)->entries)) {
				add_level = 0;
				break;
			}
			frame--; /* split higher index block */
			at = frame->at;
			entries = frame->entries;
			restart = 1;
		}
		if (add_level && levels == ext4_dir_htree_level(sb)) {
			ext4_warning(sb, "Directory (ino: %lu) index full, "
					 "reach max htree level :%d",
					 dir->i_ino, levels);
			if (ext4_dir_htree_level(sb) < EXT4_HTREE_LEVEL) {
				ext4_warning(sb, "Large directory feature is "
						 "not enabled on this "
						 "filesystem");
			}
			err = -ENOSPC;
			err = -ENOSPC;
			goto cleanup;
			goto cleanup;
		}
		}
		icount = dx_get_count(entries);
		bh2 = ext4_append(handle, dir, &newblock);
		bh2 = ext4_append(handle, dir, &newblock);
		if (IS_ERR(bh2)) {
		if (IS_ERR(bh2)) {
			err = PTR_ERR(bh2);
			err = PTR_ERR(bh2);
@@ -2187,7 +2224,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
		err = ext4_journal_get_write_access(handle, frame->bh);
		err = ext4_journal_get_write_access(handle, frame->bh);
		if (err)
		if (err)
			goto journal_error;
			goto journal_error;
		if (levels) {
		if (!add_level) {
			unsigned icount1 = icount/2, icount2 = icount - icount1;
			unsigned icount1 = icount/2, icount2 = icount - icount1;
			unsigned hash2 = dx_get_hash(entries + icount1);
			unsigned hash2 = dx_get_hash(entries + icount1);
			dxtrace(printk(KERN_DEBUG "Split index %i/%i\n",
			dxtrace(printk(KERN_DEBUG "Split index %i/%i\n",
@@ -2195,7 +2232,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,


			BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */
			BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */
			err = ext4_journal_get_write_access(handle,
			err = ext4_journal_get_write_access(handle,
							     frames[0].bh);
							     (frame - 1)->bh);
			if (err)
			if (err)
				goto journal_error;
				goto journal_error;


@@ -2211,17 +2248,25 @@ static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
				frame->entries = entries = entries2;
				frame->entries = entries = entries2;
				swap(frame->bh, bh2);
				swap(frame->bh, bh2);
			}
			}
			dx_insert_block(frames + 0, hash2, newblock);
			dx_insert_block((frame - 1), hash2, newblock);
			dxtrace(dx_show_index("node", frames[1].entries));
			dxtrace(dx_show_index("node", frame->entries));
			dxtrace(dx_show_index("node",
			dxtrace(dx_show_index("node",
			       ((struct dx_node *) bh2->b_data)->entries));
			       ((struct dx_node *) bh2->b_data)->entries));
			err = ext4_handle_dirty_dx_node(handle, dir, bh2);
			err = ext4_handle_dirty_dx_node(handle, dir, bh2);
			if (err)
			if (err)
				goto journal_error;
				goto journal_error;
			brelse (bh2);
			brelse (bh2);
			err = ext4_handle_dirty_dx_node(handle, dir,
						   (frame - 1)->bh);
			if (err)
				goto journal_error;
			if (restart) {
				err = ext4_handle_dirty_dx_node(handle, dir,
							   frame->bh);
				goto journal_error;
			}
		} else {
		} else {
			dxtrace(printk(KERN_DEBUG
			struct dx_root *dxroot;
				       "Creating second level index...\n"));
			memcpy((char *) entries2, (char *) entries,
			memcpy((char *) entries2, (char *) entries,
			       icount * sizeof(struct dx_entry));
			       icount * sizeof(struct dx_entry));
			dx_set_limit(entries2, dx_node_limit(dir));
			dx_set_limit(entries2, dx_node_limit(dir));
@@ -2229,22 +2274,18 @@ static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
			/* Set up root */
			/* Set up root */
			dx_set_count(entries, 1);
			dx_set_count(entries, 1);
			dx_set_block(entries + 0, newblock);
			dx_set_block(entries + 0, newblock);
			((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels = 1;
			dxroot = (struct dx_root *)frames[0].bh->b_data;

			dxroot->info.indirect_levels += 1;
			/* Add new access path frame */
			dxtrace(printk(KERN_DEBUG
			frame = frames + 1;
				       "Creating %d level index...\n",
			frame->at = at = at - entries + entries2;
				       info->indirect_levels));
			frame->entries = entries = entries2;
			err = ext4_handle_dirty_dx_node(handle, dir, frame->bh);
			frame->bh = bh2;
			err = ext4_journal_get_write_access(handle,
							     frame->bh);
			if (err)
			if (err)
				goto journal_error;
				goto journal_error;
		}
			err = ext4_handle_dirty_dx_node(handle, dir, bh2);
		err = ext4_handle_dirty_dx_node(handle, dir, frames[0].bh);
			brelse(bh2);
		if (err) {
			restart = 1;
			ext4_std_error(inode->i_sb, err);
			goto journal_error;
			goto cleanup;
		}
		}
	}
	}
	de = do_split(handle, dir, &bh, frame, &fname->hinfo);
	de = do_split(handle, dir, &bh, frame, &fname->hinfo);
@@ -2256,10 +2297,15 @@ static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
	goto cleanup;
	goto cleanup;


journal_error:
journal_error:
	ext4_std_error(dir->i_sb, err);
	ext4_std_error(dir->i_sb, err); /* this is a no-op if err == 0 */
cleanup:
cleanup:
	brelse(bh);
	brelse(bh);
	dx_release(frames);
	dx_release(frames);
	/* @restart is true means htree-path has been changed, we need to
	 * repeat dx_probe() to find out valid htree-path
	 */
	if (restart && err == 0)
		goto again;
	return err;
	return err;
}
}


@@ -2296,7 +2342,7 @@ int ext4_generic_delete_entry(handle_t *handle,
					blocksize);
					blocksize);
			else
			else
				de->inode = 0;
				de->inode = 0;
			dir->i_version++;
			inode_inc_iversion(dir);
			return 0;
			return 0;
		}
		}
		i += ext4_rec_len_from_disk(de->rec_len, blocksize);
		i += ext4_rec_len_from_disk(de->rec_len, blocksize);