Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 9bacbb4c authored by Eric Whitney's avatar Eric Whitney Committed by Greg Kroah-Hartman
Browse files

ext4: add new pending reservation mechanism



[ Upstream commit 1dc0aa46e74a3366e12f426b7caaca477853e9c3 ]

Add new pending reservation mechanism to help manage reserved cluster
accounting.  Its primary function is to avoid the need to read extents
from the disk when invalidating pages as a result of a truncate, punch
hole, or collapse range operation.

Signed-off-by: default avatarEric Whitney <enwlinux@gmail.com>
Signed-off-by: default avatarTheodore Ts'o <tytso@mit.edu>
Stable-dep-of: 131294c35ed6 ("ext4: fix delayed allocation bug in ext4_clu_mapped for bigalloc + inline")
Signed-off-by: default avatarSasha Levin <sashal@kernel.org>
parent cca8671f
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -1041,6 +1041,9 @@ struct ext4_inode_info {
	ext4_lblk_t i_da_metadata_calc_last_lblock;
	int i_da_metadata_calc_len;

	/* pending cluster reservations for bigalloc file systems */
	struct ext4_pending_tree i_pending_tree;

	/* on-disk additional length */
	__u16 i_extra_isize;

+187 −0
Original line number Diff line number Diff line
@@ -142,6 +142,7 @@
 */

static struct kmem_cache *ext4_es_cachep;
static struct kmem_cache *ext4_pending_cachep;

static int __es_insert_extent(struct inode *inode, struct extent_status *newes);
static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
@@ -1363,3 +1364,189 @@ static int es_reclaim_extents(struct ext4_inode_info *ei, int *nr_to_scan)
	ei->i_es_tree.cache_es = NULL;
	return nr_shrunk;
}

#ifdef ES_DEBUG__
static void ext4_print_pending_tree(struct inode *inode)
{
	struct ext4_pending_tree *tree;
	struct rb_node *node;
	struct pending_reservation *pr;

	printk(KERN_DEBUG "pending reservations for inode %lu:", inode->i_ino);
	tree = &EXT4_I(inode)->i_pending_tree;
	node = rb_first(&tree->root);
	while (node) {
		pr = rb_entry(node, struct pending_reservation, rb_node);
		printk(KERN_DEBUG " %u", pr->lclu);
		node = rb_next(node);
	}
	printk(KERN_DEBUG "\n");
}
#else
#define ext4_print_pending_tree(inode)
#endif

int __init ext4_init_pending(void)
{
	ext4_pending_cachep = kmem_cache_create("ext4_pending_reservation",
					   sizeof(struct pending_reservation),
					   0, (SLAB_RECLAIM_ACCOUNT), NULL);
	if (ext4_pending_cachep == NULL)
		return -ENOMEM;
	return 0;
}

void ext4_exit_pending(void)
{
	kmem_cache_destroy(ext4_pending_cachep);
}

void ext4_init_pending_tree(struct ext4_pending_tree *tree)
{
	tree->root = RB_ROOT;
}

/*
 * __get_pending - retrieve a pointer to a pending reservation
 *
 * @inode - file containing the pending cluster reservation
 * @lclu - logical cluster of interest
 *
 * Returns a pointer to a pending reservation if it's a member of
 * the set, and NULL if not.  Must be called holding i_es_lock.
 */
static struct pending_reservation *__get_pending(struct inode *inode,
						 ext4_lblk_t lclu)
{
	struct ext4_pending_tree *tree;
	struct rb_node *node;
	struct pending_reservation *pr = NULL;

	tree = &EXT4_I(inode)->i_pending_tree;
	node = (&tree->root)->rb_node;

	while (node) {
		pr = rb_entry(node, struct pending_reservation, rb_node);
		if (lclu < pr->lclu)
			node = node->rb_left;
		else if (lclu > pr->lclu)
			node = node->rb_right;
		else if (lclu == pr->lclu)
			return pr;
	}
	return NULL;
}

/*
 * __insert_pending - adds a pending cluster reservation to the set of
 *                    pending reservations
 *
 * @inode - file containing the cluster
 * @lblk - logical block in the cluster to be added
 *
 * Returns 0 on successful insertion and -ENOMEM on failure.  If the
 * pending reservation is already in the set, returns successfully.
 */
static int __insert_pending(struct inode *inode, ext4_lblk_t lblk)
{
	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
	struct ext4_pending_tree *tree = &EXT4_I(inode)->i_pending_tree;
	struct rb_node **p = &tree->root.rb_node;
	struct rb_node *parent = NULL;
	struct pending_reservation *pr;
	ext4_lblk_t lclu;
	int ret = 0;

	lclu = EXT4_B2C(sbi, lblk);
	/* search to find parent for insertion */
	while (*p) {
		parent = *p;
		pr = rb_entry(parent, struct pending_reservation, rb_node);

		if (lclu < pr->lclu) {
			p = &(*p)->rb_left;
		} else if (lclu > pr->lclu) {
			p = &(*p)->rb_right;
		} else {
			/* pending reservation already inserted */
			goto out;
		}
	}

	pr = kmem_cache_alloc(ext4_pending_cachep, GFP_ATOMIC);
	if (pr == NULL) {
		ret = -ENOMEM;
		goto out;
	}
	pr->lclu = lclu;

	rb_link_node(&pr->rb_node, parent, p);
	rb_insert_color(&pr->rb_node, &tree->root);

out:
	return ret;
}

/*
 * __remove_pending - removes a pending cluster reservation from the set
 *                    of pending reservations
 *
 * @inode - file containing the cluster
 * @lblk - logical block in the pending cluster reservation to be removed
 *
 * Returns successfully if pending reservation is not a member of the set.
 */
static void __remove_pending(struct inode *inode, ext4_lblk_t lblk)
{
	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
	struct pending_reservation *pr;
	struct ext4_pending_tree *tree;

	pr = __get_pending(inode, EXT4_B2C(sbi, lblk));
	if (pr != NULL) {
		tree = &EXT4_I(inode)->i_pending_tree;
		rb_erase(&pr->rb_node, &tree->root);
		kmem_cache_free(ext4_pending_cachep, pr);
	}
}

/*
 * ext4_remove_pending - removes a pending cluster reservation from the set
 *                       of pending reservations
 *
 * @inode - file containing the cluster
 * @lblk - logical block in the pending cluster reservation to be removed
 *
 * Locking for external use of __remove_pending.
 */
void ext4_remove_pending(struct inode *inode, ext4_lblk_t lblk)
{
	struct ext4_inode_info *ei = EXT4_I(inode);

	write_lock(&ei->i_es_lock);
	__remove_pending(inode, lblk);
	write_unlock(&ei->i_es_lock);
}

/*
 * ext4_is_pending - determine whether a cluster has a pending reservation
 *                   on it
 *
 * @inode - file containing the cluster
 * @lblk - logical block in the cluster
 *
 * Returns true if there's a pending reservation for the cluster in the
 * set of pending reservations, and false if not.
 */
bool ext4_is_pending(struct inode *inode, ext4_lblk_t lblk)
{
	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
	struct ext4_inode_info *ei = EXT4_I(inode);
	bool ret;

	read_lock(&ei->i_es_lock);
	ret = (bool)(__get_pending(inode, EXT4_B2C(sbi, lblk)) != NULL);
	read_unlock(&ei->i_es_lock);

	return ret;
}
+51 −0
Original line number Diff line number Diff line
@@ -78,6 +78,51 @@ struct ext4_es_stats {
	struct percpu_counter es_stats_shk_cnt;
};

/*
 * Pending cluster reservations for bigalloc file systems
 *
 * A cluster with a pending reservation is a logical cluster shared by at
 * least one extent in the extents status tree with delayed and unwritten
 * status and at least one other written or unwritten extent.  The
 * reservation is said to be pending because a cluster reservation would
 * have to be taken in the event all blocks in the cluster shared with
 * written or unwritten extents were deleted while the delayed and
 * unwritten blocks remained.
 *
 * The set of pending cluster reservations is an auxiliary data structure
 * used with the extents status tree to implement reserved cluster/block
 * accounting for bigalloc file systems.  The set is kept in memory and
 * records all pending cluster reservations.
 *
 * Its primary function is to avoid the need to read extents from the
 * disk when invalidating pages as a result of a truncate, punch hole, or
 * collapse range operation.  Page invalidation requires a decrease in the
 * reserved cluster count if it results in the removal of all delayed
 * and unwritten extents (blocks) from a cluster that is not shared with a
 * written or unwritten extent, and no decrease otherwise.  Determining
 * whether the cluster is shared can be done by searching for a pending
 * reservation on it.
 *
 * Secondarily, it provides a potentially faster method for determining
 * whether the reserved cluster count should be increased when a physical
 * cluster is deallocated as a result of a truncate, punch hole, or
 * collapse range operation.  The necessary information is also present
 * in the extents status tree, but might be more rapidly accessed in
 * the pending reservation set in many cases due to smaller size.
 *
 * The pending cluster reservation set is implemented as a red-black tree
 * with the goal of minimizing per page search time overhead.
 */

struct pending_reservation {
	struct rb_node rb_node;
	ext4_lblk_t lclu;
};

struct ext4_pending_tree {
	struct rb_root root;
};

extern int __init ext4_init_es(void);
extern void ext4_exit_es(void);
extern void ext4_es_init_tree(struct ext4_es_tree *tree);
@@ -182,4 +227,10 @@ extern void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi);

extern int ext4_seq_es_shrinker_info_show(struct seq_file *seq, void *v);

extern int __init ext4_init_pending(void);
extern void ext4_exit_pending(void);
extern void ext4_init_pending_tree(struct ext4_pending_tree *tree);
extern void ext4_remove_pending(struct inode *inode, ext4_lblk_t lblk);
extern bool ext4_is_pending(struct inode *inode, ext4_lblk_t lblk);

#endif /* _EXT4_EXTENTS_STATUS_H */
+8 −0
Original line number Diff line number Diff line
@@ -1095,6 +1095,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
	ei->i_da_metadata_calc_len = 0;
	ei->i_da_metadata_calc_last_lblock = 0;
	spin_lock_init(&(ei->i_block_reservation_lock));
	ext4_init_pending_tree(&ei->i_pending_tree);
#ifdef CONFIG_QUOTA
	ei->i_reserved_quota = 0;
	memset(&ei->i_dquot, 0, sizeof(ei->i_dquot));
@@ -6189,6 +6190,10 @@ static int __init ext4_init_fs(void)
	if (err)
		return err;

	err = ext4_init_pending();
	if (err)
		goto out6;

	err = ext4_init_pageio();
	if (err)
		goto out5;
@@ -6227,6 +6232,8 @@ static int __init ext4_init_fs(void)
out4:
	ext4_exit_pageio();
out5:
	ext4_exit_pending();
out6:
	ext4_exit_es();

	return err;
@@ -6244,6 +6251,7 @@ static void __exit ext4_exit_fs(void)
	ext4_exit_system_zone();
	ext4_exit_pageio();
	ext4_exit_es();
	ext4_exit_pending();
}

MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");