Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 4ed8a6bb authored by Mark Fasheh's avatar Mark Fasheh
Browse files

ocfs2: Store dir index records inline



Allow us to store a small number of directory index records in the
ocfs2_dx_root_block. This saves us a disk read on small to medium sized
directories (less than about 250 entries). The inline root is automatically
turned into a root block with extents if the directory size increases beyond
it's capacity.

Signed-off-by: default avatarMark Fasheh <mfasheh@suse.com>
Acked-by: default avatarJoel Becker <joel.becker@oracle.com>
parent 9b7895ef
Loading
Loading
Loading
Loading
+434 −138
Original line number Diff line number Diff line
@@ -151,6 +151,7 @@ static void ocfs2_init_dir_trailer(struct inode *inode,

void ocfs2_free_dir_lookup_result(struct ocfs2_dir_lookup_result *res)
{
	brelse(res->dl_dx_root_bh);
	brelse(res->dl_leaf_bh);
	brelse(res->dl_dx_leaf_bh);
}
@@ -162,6 +163,11 @@ static int ocfs2_dir_indexed(struct inode *inode)
	return 0;
}

static inline int ocfs2_dx_root_inline(struct ocfs2_dx_root_block *dx_root)
{
	return dx_root->dr_flags & OCFS2_DX_FLAG_INLINE;
}

/*
 * Hashing code adapted from ext3
 */
@@ -799,13 +805,18 @@ static int ocfs2_dx_dir_lookup_rec(struct inode *inode,
 * Returns the block index, from the start of the cluster which this
 * hash belongs too.
 */
static unsigned int ocfs2_dx_dir_hash_idx(struct ocfs2_super *osb,
					  struct ocfs2_dx_hinfo *hinfo)
static inline unsigned int __ocfs2_dx_dir_hash_idx(struct ocfs2_super *osb,
						   u32 minor_hash)
{
	u32 minor_hash = hinfo->minor_hash;
	return minor_hash & osb->osb_dx_mask;
}

static inline unsigned int ocfs2_dx_dir_hash_idx(struct ocfs2_super *osb,
					  struct ocfs2_dx_hinfo *hinfo)
{
	return __ocfs2_dx_dir_hash_idx(osb, hinfo->minor_hash);
}

static int ocfs2_dx_dir_lookup(struct inode *inode,
			       struct ocfs2_extent_list *el,
			       struct ocfs2_dx_hinfo *hinfo,
@@ -855,7 +866,7 @@ static int ocfs2_dx_dir_lookup(struct inode *inode,

static int ocfs2_dx_dir_search(const char *name, int namelen,
			       struct inode *dir,
			       struct ocfs2_extent_list *dr_el,
			       struct ocfs2_dx_root_block *dx_root,
			       struct ocfs2_dir_lookup_result *res)
{
	int ret, i, found;
@@ -866,9 +877,18 @@ static int ocfs2_dx_dir_search(const char *name, int namelen,
	struct buffer_head *dir_ent_bh = NULL;
	struct ocfs2_dir_entry *dir_ent = NULL;
	struct ocfs2_dx_hinfo *hinfo = &res->dl_hinfo;
	struct ocfs2_extent_list *dr_el;
	struct ocfs2_dx_entry_list *entry_list;

	ocfs2_dx_dir_name_hash(dir, name, namelen, &res->dl_hinfo);

	if (ocfs2_dx_root_inline(dx_root)) {
		entry_list = &dx_root->dr_entries;
		goto search;
	}

	dr_el = &dx_root->dr_list;

	ret = ocfs2_dx_dir_lookup(dir, dr_el, hinfo, NULL, &phys);
	if (ret) {
		mlog_errno(ret);
@@ -893,12 +913,15 @@ static int ocfs2_dx_dir_search(const char *name, int namelen,
	     le16_to_cpu(dx_leaf->dl_list.de_num_used),
	     le16_to_cpu(dx_leaf->dl_list.de_count));

	entry_list = &dx_leaf->dl_list;

search:
	/*
	 * Empty leaf is legal, so no need to check for that.
	 */
	found = 0;
	for (i = 0; i < le16_to_cpu(dx_leaf->dl_list.de_num_used); i++) {
		dx_entry = &dx_leaf->dl_list.de_entries[i];
	for (i = 0; i < le16_to_cpu(entry_list->de_num_used); i++) {
		dx_entry = &entry_list->de_entries[i];

		if (hinfo->major_hash != le32_to_cpu(dx_entry->dx_major_hash)
		    || hinfo->minor_hash != le32_to_cpu(dx_entry->dx_minor_hash))
@@ -982,14 +1005,15 @@ static int ocfs2_find_entry_dx(const char *name, int namelen,
	}
	dx_root = (struct ocfs2_dx_root_block *) dx_root_bh->b_data;

	ret = ocfs2_dx_dir_search(name, namelen, dir, &dx_root->dr_list,
				  lookup);
	ret = ocfs2_dx_dir_search(name, namelen, dir, dx_root, lookup);
	if (ret) {
		if (ret != -ENOENT)
			mlog_errno(ret);
		goto out;
	}

	lookup->dl_dx_root_bh = dx_root_bh;
	dx_root_bh = NULL;
out:
	brelse(di_bh);
	brelse(dx_root_bh);
@@ -1126,63 +1150,87 @@ static int __ocfs2_delete_entry(handle_t *handle, struct inode *dir,
	return status;
}

static void ocfs2_dx_leaf_remove_entry(struct ocfs2_dx_leaf *dx_leaf, int index)
static void ocfs2_dx_list_remove_entry(struct ocfs2_dx_entry_list *entry_list,
				       int index)
{
	struct ocfs2_dx_entry_list *dl_list = &dx_leaf->dl_list;
	int num_used = le16_to_cpu(dl_list->de_num_used);
	int num_used = le16_to_cpu(entry_list->de_num_used);

	if (num_used == 1 || index == (num_used - 1))
		goto clear;

	memmove(&dl_list->de_entries[index], &dl_list->de_entries[index + 1],
	memmove(&entry_list->de_entries[index],
		&entry_list->de_entries[index + 1],
		(num_used - index - 1)*sizeof(struct ocfs2_dx_entry));
clear:
	num_used--;
	memset(&dl_list->de_entries[num_used], 0,
	memset(&entry_list->de_entries[num_used], 0,
	       sizeof(struct ocfs2_dx_entry));
	dl_list->de_num_used = cpu_to_le16(num_used);
	entry_list->de_num_used = cpu_to_le16(num_used);
}

static int ocfs2_delete_entry_dx(handle_t *handle, struct inode *dir,
				 struct ocfs2_dir_lookup_result *lookup)
{
	int ret, index;
	struct buffer_head *dx_root_bh = lookup->dl_dx_root_bh;
	struct buffer_head *leaf_bh = lookup->dl_leaf_bh;
	struct ocfs2_dx_leaf *dx_leaf;
	struct ocfs2_dx_entry *dx_entry = lookup->dl_dx_entry;
	struct ocfs2_dx_root_block *dx_root;
	struct ocfs2_dx_entry_list *entry_list;

	dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
	if (ocfs2_dx_root_inline(dx_root)) {
		entry_list = &dx_root->dr_entries;
	} else {
		dx_leaf = (struct ocfs2_dx_leaf *) lookup->dl_dx_leaf_bh->b_data;
		entry_list = &dx_leaf->dl_list;
	}

	/* Neither of these are a disk corruption - that should have
	 * been caught by lookup, before we got here. */
	BUG_ON(le16_to_cpu(dx_leaf->dl_list.de_count) <= 0);
	BUG_ON(le16_to_cpu(dx_leaf->dl_list.de_num_used) <= 0);
	BUG_ON(le16_to_cpu(entry_list->de_count) <= 0);
	BUG_ON(le16_to_cpu(entry_list->de_num_used) <= 0);

	index = (char *)dx_entry - (char *)dx_leaf->dl_list.de_entries;
	index = (char *)dx_entry - (char *)entry_list->de_entries;
	index /= sizeof(*dx_entry);

	if (index >= le16_to_cpu(dx_leaf->dl_list.de_num_used)) {
	if (index >= le16_to_cpu(entry_list->de_num_used)) {
		mlog(ML_ERROR, "Dir %llu: Bad dx_entry ptr idx %d, (%p, %p)\n",
		     (unsigned long long)OCFS2_I(dir)->ip_blkno, index, dx_leaf,
		     dx_entry);
		     (unsigned long long)OCFS2_I(dir)->ip_blkno, index,
		     entry_list, dx_entry);
		return -EIO;
	}

	mlog(0, "Dir %llu: delete entry at index: %d\n",
	     (unsigned long long)OCFS2_I(dir)->ip_blkno, index);

	/*
	 * Add the index leaf into the journal before removing the
	 * unindexed entry. If we get an error return from
	 * __ocfs2_delete_entry(), then it hasn't removed the entry
	 * yet. Likewise, successful return means we *must* remove the
	 * indexed entry.
	 * Add the block holding our index into the journal before
	 * removing the unindexed entry. If we get an error return
	 * from __ocfs2_delete_entry(), then it hasn't removed the
	 * entry yet. Likewise, successful return means we *must*
	 * remove the indexed entry.
	 *
	 * We're also careful to journal the root tree block here if
	 * we're going to be adding to the start of the free list.
	 */
	ret = ocfs2_journal_access_dl(handle, dir, lookup->dl_dx_leaf_bh,
	if (ocfs2_dx_root_inline(dx_root)) {
		ret = ocfs2_journal_access_dr(handle, dir, dx_root_bh,
					      OCFS2_JOURNAL_ACCESS_WRITE);
		if (ret) {
			mlog_errno(ret);
			goto out;
		}
	} else {
		ret = ocfs2_journal_access_dl(handle, dir,
					      lookup->dl_dx_leaf_bh,
					      OCFS2_JOURNAL_ACCESS_WRITE);
		if (ret) {
			mlog_errno(ret);
			goto out;
		}
	}

	mlog(0, "Dir %llu: delete entry at index: %d\n",
	     (unsigned long long)OCFS2_I(dir)->ip_blkno, index);

	ret = __ocfs2_delete_entry(handle, dir, lookup->dl_entry,
				   leaf_bh, leaf_bh->b_data, leaf_bh->b_size);
@@ -1191,8 +1239,11 @@ static int ocfs2_delete_entry_dx(handle_t *handle, struct inode *dir,
		goto out;
	}

	ocfs2_dx_leaf_remove_entry(dx_leaf, index);
	ocfs2_dx_list_remove_entry(entry_list, index);

	if (ocfs2_dx_root_inline(dx_root))
		ocfs2_journal_dirty(handle, dx_root_bh);
	else
		ocfs2_journal_dirty(handle, lookup->dl_dx_leaf_bh);

out:
@@ -1290,13 +1341,30 @@ static void ocfs2_dx_dir_leaf_insert_tail(struct ocfs2_dx_leaf *dx_leaf,
	le16_add_cpu(&dx_leaf->dl_list.de_num_used, 1);
}

static void ocfs2_dx_entry_list_insert(struct ocfs2_dx_entry_list *entry_list,
				       struct ocfs2_dx_hinfo *hinfo,
				       u64 dirent_blk)
{
	int i;
	struct ocfs2_dx_entry *dx_entry;

	i = le16_to_cpu(entry_list->de_num_used);
	dx_entry = &entry_list->de_entries[i];

	memset(dx_entry, 0, sizeof(*dx_entry));
	dx_entry->dx_major_hash = cpu_to_le32(hinfo->major_hash);
	dx_entry->dx_minor_hash = cpu_to_le32(hinfo->minor_hash);
	dx_entry->dx_dirent_blk = cpu_to_le64(dirent_blk);

	le16_add_cpu(&entry_list->de_num_used, 1);
}

static int __ocfs2_dx_dir_leaf_insert(struct inode *dir, handle_t *handle,
				      struct ocfs2_dx_hinfo *hinfo,
				      u64 dirent_blk,
				      struct buffer_head *dx_leaf_bh)
{
	int ret, i;
	struct ocfs2_dx_entry *dx_entry;
	int ret;
	struct ocfs2_dx_leaf *dx_leaf;

	ret = ocfs2_journal_access_dl(handle, dir, dx_leaf_bh,
@@ -1307,25 +1375,48 @@ static int __ocfs2_dx_dir_leaf_insert(struct inode *dir, handle_t *handle,
	}

	dx_leaf = (struct ocfs2_dx_leaf *)dx_leaf_bh->b_data;
	i = le16_to_cpu(dx_leaf->dl_list.de_num_used);
	dx_entry = &dx_leaf->dl_list.de_entries[i];
	ocfs2_dx_entry_list_insert(&dx_leaf->dl_list, hinfo, dirent_blk);
	ocfs2_journal_dirty(handle, dx_leaf_bh);

	memset(dx_entry, 0, sizeof(*dx_entry));
	dx_entry->dx_major_hash = cpu_to_le32(hinfo->major_hash);
	dx_entry->dx_minor_hash = cpu_to_le32(hinfo->minor_hash);
	dx_entry->dx_dirent_blk = cpu_to_le64(dirent_blk);
out:
	return ret;
}

	le16_add_cpu(&dx_leaf->dl_list.de_num_used, 1);
static int ocfs2_dx_inline_root_insert(struct inode *dir, handle_t *handle,
				       struct ocfs2_dx_hinfo *hinfo,
				       u64 dirent_blk,
				       struct buffer_head *dx_root_bh)
{
	int ret;
	struct ocfs2_dx_root_block *dx_root;

	ocfs2_journal_dirty(handle, dx_leaf_bh);
	ret = ocfs2_journal_access_dr(handle, dir, dx_root_bh,
				      OCFS2_JOURNAL_ACCESS_WRITE);
	if (ret) {
		mlog_errno(ret);
		goto out;
	}

	dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
	ocfs2_dx_entry_list_insert(&dx_root->dr_entries, hinfo, dirent_blk);
	ocfs2_journal_dirty(handle, dx_root_bh);

out:
	return ret;
}

static int ocfs2_dx_dir_leaf_insert(struct inode *dir, handle_t *handle,
static int ocfs2_dx_dir_insert(struct inode *dir, handle_t *handle,
			       struct ocfs2_dir_lookup_result *lookup)
{
	struct ocfs2_dx_root_block *dx_root;

	dx_root = (struct ocfs2_dx_root_block *)lookup->dl_dx_root_bh->b_data;
	if (ocfs2_dx_root_inline(dx_root))
		return ocfs2_dx_inline_root_insert(dir, handle,
						   &lookup->dl_hinfo,
						   lookup->dl_leaf_bh->b_blocknr,
						   lookup->dl_dx_root_bh);

	return __ocfs2_dx_dir_leaf_insert(dir, handle, &lookup->dl_hinfo,
					  lookup->dl_leaf_bh->b_blocknr,
					  lookup->dl_dx_leaf_bh);
@@ -1410,8 +1501,9 @@ int __ocfs2_add_entry(handle_t *handle,
				status = ocfs2_journal_access_db(handle, dir,
								 insert_bh,
					      OCFS2_JOURNAL_ACCESS_WRITE);

				if (ocfs2_dir_indexed(dir)) {
					status = ocfs2_dx_dir_leaf_insert(dir,
					status = ocfs2_dx_dir_insert(dir,
								handle,
								lookup);
					if (status) {
@@ -2019,6 +2111,7 @@ static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb,
				     handle_t *handle, struct inode *dir,
				     struct buffer_head *di_bh,
				     struct ocfs2_alloc_context *meta_ac,
				     int dx_inline,
				     struct buffer_head **ret_dx_root_bh)
{
	int ret;
@@ -2062,8 +2155,15 @@ static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb,
	dx_root->dr_fs_generation = cpu_to_le32(osb->fs_generation);
	dx_root->dr_blkno = cpu_to_le64(dr_blkno);
	dx_root->dr_dir_blkno = cpu_to_le64(OCFS2_I(dir)->ip_blkno);

	if (dx_inline) {
		dx_root->dr_flags |= OCFS2_DX_FLAG_INLINE;
		dx_root->dr_entries.de_count =
			cpu_to_le16(ocfs2_dx_entries_per_root(osb->sb));
	} else {
		dx_root->dr_list.l_count =
			cpu_to_le16(ocfs2_extent_recs_per_dx_root(osb->sb));
	}

	ret = ocfs2_journal_dirty(handle, dx_root_bh);
	if (ret)
@@ -2236,20 +2336,12 @@ static int ocfs2_fill_new_dir_dx(struct ocfs2_super *osb,
				 struct ocfs2_alloc_context *data_ac,
				 struct ocfs2_alloc_context *meta_ac)
{
	int ret, num_dx_leaves, i;
	int ret;
	struct buffer_head *leaf_bh = NULL;
	struct buffer_head *dx_root_bh = NULL;
	struct buffer_head **dx_leaves = NULL;
	struct ocfs2_extent_tree et;
	struct ocfs2_dx_hinfo hinfo;
	u64 insert_blkno;

	dx_leaves = ocfs2_dx_dir_kmalloc_leaves(osb->sb, &num_dx_leaves);
	if (!dx_leaves) {
		ret = -ENOMEM;
		mlog_errno(ret);
		goto out;
	}
	struct ocfs2_dx_root_block *dx_root;
	struct ocfs2_dx_entry_list *entry_list;

	/*
	 * Our strategy is to create the directory as though it were
@@ -2258,7 +2350,8 @@ static int ocfs2_fill_new_dir_dx(struct ocfs2_super *osb,
	 * very well known quantity.
	 *
	 * Essentially, we have two dirents ("." and ".."), in the 1st
	 * block which need indexing.
	 * block which need indexing. These are easily inserted into
	 * the index block.
	 */

	ret = ocfs2_fill_new_dir_el(osb, handle, parent, inode, di_bh,
@@ -2268,61 +2361,22 @@ static int ocfs2_fill_new_dir_dx(struct ocfs2_super *osb,
		goto out;
	}

	/*
	 * Allocate and format the index leaf first, before attaching
	 * the index root. That way we're sure that the main bitmap
	 * won't -enospc on us with a half-created dir index.
	 *
	 * The meta data allocation for our index block will not
	 * -enospc on us unless there is a disk corruption.
	 */

	ret = __ocfs2_dx_dir_new_cluster(inode, 0, handle, data_ac, dx_leaves,
					 num_dx_leaves, &insert_blkno);
	ret = ocfs2_dx_dir_attach_index(osb, handle, inode, di_bh,
					meta_ac, 1, &dx_root_bh);
	if (ret) {
		mlog_errno(ret);
		goto out;
	}
	dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
	entry_list = &dx_root->dr_entries;

	ocfs2_dx_dir_name_hash(inode, ".", 1, &hinfo);
	i = ocfs2_dx_dir_hash_idx(osb, &hinfo);
	ret = __ocfs2_dx_dir_leaf_insert(inode, handle, &hinfo,
					 leaf_bh->b_blocknr, dx_leaves[i]);
	if (ret) {
		mlog_errno(ret);
		goto out;
	}
	/* Buffer has been journaled for us by ocfs2_dx_dir_attach_index */
	ocfs2_dx_entry_list_insert(entry_list, &hinfo, leaf_bh->b_blocknr);

	ocfs2_dx_dir_name_hash(inode, "..", 2, &hinfo);
	i = ocfs2_dx_dir_hash_idx(osb, &hinfo);
	ret = __ocfs2_dx_dir_leaf_insert(inode, handle, &hinfo,
					 leaf_bh->b_blocknr, dx_leaves[i]);
	if (ret) {
		mlog_errno(ret);
		goto out;
	}

	ret = ocfs2_dx_dir_attach_index(osb, handle, inode, di_bh, meta_ac,
					&dx_root_bh);
	if (ret) {
		mlog_errno(ret);
		goto out;
	}

	/* This should never fail considering we start with an empty
	 * dx_root. */
	ocfs2_init_dx_root_extent_tree(&et, inode, dx_root_bh);
	ret = ocfs2_insert_extent(osb, handle, inode, &et, 0,
				  insert_blkno, 1, 0, NULL);
	if (ret)
		mlog_errno(ret);
	ocfs2_dx_entry_list_insert(entry_list, &hinfo, leaf_bh->b_blocknr);

out:
	if (dx_leaves) {
		for (i = 0; i < num_dx_leaves; i++)
			brelse(dx_leaves[i]);
		kfree(dx_leaves);
	}
	brelse(dx_root_bh);
	brelse(leaf_bh);
	return ret;
@@ -2392,6 +2446,74 @@ static int ocfs2_dx_dir_index_block(struct inode *dir,
out:
	return ret;
}
 /*
 * XXX: This expects dx_root_bh to already be part of the transaction.
 */
static void ocfs2_dx_dir_index_root_block(struct inode *dir,
					 struct buffer_head *dx_root_bh,
					 struct buffer_head *dirent_bh)
{
	char *de_buf, *limit;
	struct ocfs2_dx_root_block *dx_root;
	struct ocfs2_dir_entry *de;
	struct ocfs2_dx_hinfo hinfo;
	u64 dirent_blk = dirent_bh->b_blocknr;

	dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;

	de_buf = dirent_bh->b_data;
	limit = de_buf + dir->i_sb->s_blocksize;

	while (de_buf < limit) {
		de = (struct ocfs2_dir_entry *)de_buf;

		if (!de->name_len || !de->inode)
			goto inc;

		ocfs2_dx_dir_name_hash(dir, de->name, de->name_len, &hinfo);

		mlog(0,
		     "dir: %llu, major: 0x%x minor: 0x%x, index: %u, name: %.*s\n",
		     (unsigned long long)dir->i_ino, hinfo.major_hash,
		     hinfo.minor_hash,
		     le16_to_cpu(dx_root->dr_entries.de_num_used),
		     de->name_len, de->name);

		ocfs2_dx_entry_list_insert(&dx_root->dr_entries, &hinfo,
					   dirent_blk);
inc:
		de_buf += le16_to_cpu(de->rec_len);
	}
}

/*
 * Count the number of inline directory entries in di_bh and compare
 * them against the number of entries we can hold in an inline dx root
 * block.
 */
static int ocfs2_new_dx_should_be_inline(struct inode *dir,
					 struct buffer_head *di_bh)
{
	int dirent_count = 0;
	char *de_buf, *limit;
	struct ocfs2_dir_entry *de;
	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;

	de_buf = di->id2.i_data.id_data;
	limit = de_buf + i_size_read(dir);

	while (de_buf < limit) {
		de = (struct ocfs2_dir_entry *)de_buf;

		if (de->name_len && de->inode)
			dirent_count++;

		de_buf += le16_to_cpu(de->rec_len);
	}

	/* We are careful to leave room for one extra record. */
	return dirent_count < ocfs2_dx_entries_per_root(dir->i_sb);
}

/*
 * Expand rec_len of the rightmost dirent in a directory block so that it
@@ -2442,7 +2564,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
{
	u32 alloc, dx_alloc, bit_off, len;
	struct super_block *sb = dir->i_sb;
	int ret, i, num_dx_leaves = 0,
	int ret, i, num_dx_leaves = 0, dx_inline = 0,
		credits = ocfs2_inline_to_extents_credits(sb);
	u64 dx_insert_blkno, blkno,
		bytes = blocks_wanted << sb->s_blocksize_bits;
@@ -2465,16 +2587,20 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
	dx_alloc = 0;

	if (ocfs2_supports_indexed_dirs(osb)) {
		/* Add one more cluster for an index leaf */
		dx_alloc++;
		credits += ocfs2_add_dir_index_credits(sb);

		dx_leaves = ocfs2_dx_dir_kmalloc_leaves(sb, &num_dx_leaves);
		dx_inline = ocfs2_new_dx_should_be_inline(dir, di_bh);
		if (!dx_inline) {
			/* Add one more cluster for an index leaf */
			dx_alloc++;
			dx_leaves = ocfs2_dx_dir_kmalloc_leaves(sb,
								&num_dx_leaves);
			if (!dx_leaves) {
				ret = -ENOMEM;
				mlog_errno(ret);
				goto out;
			}
		}

		/* This gets us the dx_root */
		ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &meta_ac);
@@ -2524,7 +2650,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
	}
	did_quota = 1;

	if (ocfs2_supports_indexed_dirs(osb)) {
	if (ocfs2_supports_indexed_dirs(osb) && !dx_inline) {
		/*
		 * Allocate our index cluster first, to maximize the
		 * possibility that unindexed leaves grow
@@ -2587,7 +2713,12 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
		goto out_commit;
	}

	if (ocfs2_supports_indexed_dirs(osb)) {
	if (ocfs2_supports_indexed_dirs(osb) && !dx_inline) {
		/*
		 * Dx dirs with an external cluster need to do this up
		 * front. Inline dx root's get handled later, after
		 * we've allocated our root block.
		 */
		ret = ocfs2_dx_dir_index_block(dir, handle, dx_leaves,
					       num_dx_leaves, dirdata_bh);
		if (ret) {
@@ -2650,18 +2781,24 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,

	if (ocfs2_supports_indexed_dirs(osb)) {
		ret = ocfs2_dx_dir_attach_index(osb, handle, dir, di_bh,
						meta_ac, &dx_root_bh);
						meta_ac, dx_inline,
						&dx_root_bh);
		if (ret) {
			mlog_errno(ret);
			goto out_commit;
		}

		if (dx_inline) {
			ocfs2_dx_dir_index_root_block(dir, dx_root_bh,
						      dirdata_bh);
		} else {
			ocfs2_init_dx_root_extent_tree(&dx_et, dir, dx_root_bh);
			ret = ocfs2_insert_extent(osb, handle, dir, &dx_et, 0,
						  dx_insert_blkno, 1, 0, NULL);
			if (ret)
				mlog_errno(ret);
		}
	}

	/*
	 * We asked for two clusters, but only got one in the 1st
@@ -2690,6 +2827,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
	if (ocfs2_supports_indexed_dirs(osb)) {
		unsigned int off;

		if (!dx_inline) {
			/*
			 * We need to return the correct block within the
			 * cluster which should hold our entry.
@@ -2699,6 +2837,9 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
			get_bh(dx_leaves[off]);
			lookup->dl_dx_leaf_bh = dx_leaves[off];
		}
		lookup->dl_dx_root_bh = dx_root_bh;
		dx_root_bh = NULL;
	}

out_commit:
	if (ret < 0 && did_quota)
@@ -3506,6 +3647,138 @@ static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir,
	return ret;
}

static int ocfs2_expand_inline_dx_root(struct inode *dir,
				       struct buffer_head *dx_root_bh)
{
	int ret, num_dx_leaves, i, j, did_quota = 0;
	struct buffer_head **dx_leaves = NULL;
	struct ocfs2_extent_tree et;
	u64 insert_blkno;
	struct ocfs2_alloc_context *data_ac = NULL;
	struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
	handle_t *handle = NULL;
	struct ocfs2_dx_root_block *dx_root;
	struct ocfs2_dx_entry_list *entry_list;
	struct ocfs2_dx_entry *dx_entry;
	struct ocfs2_dx_leaf *target_leaf;

	ret = ocfs2_reserve_clusters(osb, 1, &data_ac);
	if (ret) {
		mlog_errno(ret);
		goto out;
	}

	dx_leaves = ocfs2_dx_dir_kmalloc_leaves(osb->sb, &num_dx_leaves);
	if (!dx_leaves) {
		ret = -ENOMEM;
		mlog_errno(ret);
		goto out;
	}

	handle = ocfs2_start_trans(osb, ocfs2_calc_dxi_expand_credits(osb->sb));
	if (IS_ERR(handle)) {
		ret = PTR_ERR(handle);
		mlog_errno(ret);
		goto out;
	}

	if (vfs_dq_alloc_space_nodirty(dir,
				       ocfs2_clusters_to_bytes(osb->sb, 1))) {
		ret = -EDQUOT;
		goto out_commit;
	}
	did_quota = 1;

	/*
	 * We do this up front, before the allocation, so that a
	 * failure to add the dx_root_bh to the journal won't result
	 * us losing clusters.
	 */
	ret = ocfs2_journal_access_dr(handle, dir, dx_root_bh,
				      OCFS2_JOURNAL_ACCESS_WRITE);
	if (ret) {
		mlog_errno(ret);
		goto out_commit;
	}

	ret = __ocfs2_dx_dir_new_cluster(dir, 0, handle, data_ac, dx_leaves,
					 num_dx_leaves, &insert_blkno);
	if (ret) {
		mlog_errno(ret);
		goto out_commit;
	}

	/*
	 * Transfer the entries from our dx_root into the appropriate
	 * block
	 */
	dx_root = (struct ocfs2_dx_root_block *) dx_root_bh->b_data;
	entry_list = &dx_root->dr_entries;

	for (i = 0; i < le16_to_cpu(entry_list->de_num_used); i++) {
		dx_entry = &entry_list->de_entries[i];

		j = __ocfs2_dx_dir_hash_idx(osb,
					    le32_to_cpu(dx_entry->dx_minor_hash));
		target_leaf = (struct ocfs2_dx_leaf *)dx_leaves[j]->b_data;

		ocfs2_dx_dir_leaf_insert_tail(target_leaf, dx_entry);

		/* Each leaf has been passed to the journal already
		 * via __ocfs2_dx_dir_new_cluster() */
	}

	dx_root->dr_flags &= ~OCFS2_DX_FLAG_INLINE;
	memset(&dx_root->dr_list, 0, osb->sb->s_blocksize -
	       offsetof(struct ocfs2_dx_root_block, dr_list));
	dx_root->dr_list.l_count =
		cpu_to_le16(ocfs2_extent_recs_per_dx_root(osb->sb));

	/* This should never fail considering we start with an empty
	 * dx_root. */
	ocfs2_init_dx_root_extent_tree(&et, dir, dx_root_bh);
	ret = ocfs2_insert_extent(osb, handle, dir, &et, 0,
				  insert_blkno, 1, 0, NULL);
	if (ret)
		mlog_errno(ret);
	did_quota = 0;

	ocfs2_journal_dirty(handle, dx_root_bh);

out_commit:
	if (ret < 0 && did_quota)
		vfs_dq_free_space_nodirty(dir,
					  ocfs2_clusters_to_bytes(dir->i_sb, 1));

	ocfs2_commit_trans(osb, handle);

out:
	if (data_ac)
		ocfs2_free_alloc_context(data_ac);

	if (dx_leaves) {
		for (i = 0; i < num_dx_leaves; i++)
			brelse(dx_leaves[i]);
		kfree(dx_leaves);
	}
	return ret;
}

static int ocfs2_inline_dx_has_space(struct buffer_head *dx_root_bh)
{
	struct ocfs2_dx_root_block *dx_root;
	struct ocfs2_dx_entry_list *entry_list;

	dx_root = (struct ocfs2_dx_root_block *) dx_root_bh->b_data;
	entry_list = &dx_root->dr_entries;

	if (le16_to_cpu(entry_list->de_num_used) >=
	    le16_to_cpu(entry_list->de_count))
		return -ENOSPC;

	return 0;
}

static int ocfs2_find_dir_space_dx(struct ocfs2_super *osb, struct inode *dir,
				   struct buffer_head *di_bh, const char *name,
				   int namelen,
@@ -3527,6 +3800,23 @@ static int ocfs2_find_dir_space_dx(struct ocfs2_super *osb, struct inode *dir,
	}

	dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
	if (ocfs2_dx_root_inline(dx_root)) {
		ret = ocfs2_inline_dx_has_space(dx_root_bh);

		if (ret == 0)
			goto search_el;

		/*
		 * We ran out of room in the root block. Expand it to
		 * an extent, then allow ocfs2_find_dir_space_dx to do
		 * the rest.
		 */
		ret = ocfs2_expand_inline_dx_root(dir, dx_root_bh);
		if (ret) {
			mlog_errno(ret);
			goto out;
		}
	}

restart_search:
	ret = ocfs2_dx_dir_lookup(dir, &dx_root->dr_list, &lookup->dl_hinfo,
@@ -3578,8 +3868,11 @@ static int ocfs2_find_dir_space_dx(struct ocfs2_super *osb, struct inode *dir,
		goto restart_search;
	}

search_el:
	lookup->dl_dx_leaf_bh = dx_leaf_bh;
	dx_leaf_bh = NULL;
	lookup->dl_dx_root_bh = dx_root_bh;
	dx_root_bh = NULL;

out:
	brelse(dx_leaf_bh);
@@ -3774,10 +4067,12 @@ int ocfs2_dx_dir_truncate(struct inode *dir, struct buffer_head *di_bh)
		mlog_errno(ret);
		goto out;
	}
	dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;

	ocfs2_init_dx_root_extent_tree(&et, dir, dx_root_bh);
	if (ocfs2_dx_root_inline(dx_root))
		goto remove_index;

	dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
	ocfs2_init_dx_root_extent_tree(&et, dir, dx_root_bh);

	/* XXX: What if dr_clusters is too large? */
	while (le32_to_cpu(dx_root->dr_clusters)) {
@@ -3803,6 +4098,7 @@ int ocfs2_dx_dir_truncate(struct inode *dir, struct buffer_head *di_bh)
		major_hash = cpos - 1;
	}

remove_index:
	ret = ocfs2_dx_dir_remove_index(dir, di_bh, dx_root_bh);
	if (ret) {
		mlog_errno(ret);
+2 −0
Original line number Diff line number Diff line
@@ -37,6 +37,8 @@ struct ocfs2_dir_lookup_result {
	struct ocfs2_dir_entry		*dl_entry;	/* Target dirent in
							 * unindexed leaf */

	struct buffer_head		*dl_dx_root_bh;	/* Root of indexed
							 * tree */
	struct buffer_head		*dl_dx_leaf_bh;	/* Indexed leaf block */
	struct ocfs2_dx_entry		*dl_dx_entry;	/* Target dx_entry in
							 * indexed leaf */
+10 −0
Original line number Diff line number Diff line
@@ -458,6 +458,16 @@ static inline int ocfs2_rename_credits(struct super_block *sb)
#define OCFS2_DX_ROOT_REMOVE_CREDITS (OCFS2_INODE_UPDATE_CREDITS +	\
				      OCFS2_SUBALLOC_FREE)

static inline int ocfs2_calc_dxi_expand_credits(struct super_block *sb)
{
	int credits = 1 + OCFS2_SUBALLOC_ALLOC;

	credits += ocfs2_clusters_to_blocks(sb, 1);
	credits += ocfs2_quota_trans_credits(sb);

	return credits;
}

/*
 * Please note that the caller must make sure that root_el is the root
 * of extent tree. So for an inode, it should be &fe->id2.i_list. Otherwise
+1 −3
Original line number Diff line number Diff line
@@ -321,11 +321,9 @@ static int ocfs2_mknod(struct inode *dir,
		want_clusters += 1;

		/* Dir indexing requires extra space as well */
		if (ocfs2_supports_indexed_dirs(osb)) {
			want_clusters++;
		if (ocfs2_supports_indexed_dirs(osb))
			want_meta++;
	}
	}

	status = ocfs2_reserve_new_metadata_blocks(osb, want_meta, &meta_ac);
	if (status < 0) {
+24 −4
Original line number Diff line number Diff line
@@ -815,6 +815,8 @@ struct ocfs2_dx_entry_list {
							 * length de_num_used */
};

#define OCFS2_DX_FLAG_INLINE	0x01

/*
 * A directory indexing block. Each indexed directory has one of these,
 * pointed to by ocfs2_dinode.
@@ -835,13 +837,21 @@ struct ocfs2_dx_root_block {
						 * extent block */
	__le32		dr_clusters;		/* Clusters allocated
						 * to the indexed tree. */
	__le32		dr_reserved1;
	__u8		dr_flags;		/* OCFS2_DX_FLAG_* flags */
	__u8		dr_reserved0;
	__le16		dr_reserved1;
	__le64		dr_dir_blkno;		/* Pointer to parent inode */
	__le64		dr_reserved2;
	__le64		dr_reserved3[16];
	union {
		struct ocfs2_extent_list dr_list; /* Keep this aligned to 128
						   * bits for maximum space
						   * efficiency. */
		struct ocfs2_dx_entry_list dr_entries; /* In-root-block list of
							* entries. We grow out
							* to extents if this
							* gets too big. */
	};
};

/*
@@ -1228,6 +1238,16 @@ static inline int ocfs2_dx_entries_per_leaf(struct super_block *sb)
	return size / sizeof(struct ocfs2_dx_entry);
}

static inline int ocfs2_dx_entries_per_root(struct super_block *sb)
{
	int size;

	size = sb->s_blocksize -
		offsetof(struct ocfs2_dx_root_block, dr_entries.de_entries);

	return size / sizeof(struct ocfs2_dx_entry);
}

static inline u16 ocfs2_local_alloc_size(struct super_block *sb)
{
	u16 size;