Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit f4b109c6 authored by Johannes Weiner's avatar Johannes Weiner Committed by Linus Torvalds
Browse files

lib: radix-tree: add entry deletion support to __radix_tree_replace()

Page cache shadow entry handling will be a lot simpler when it can use a
single generic replacement function for pages, shadow entries, and
emptying slots.

Make __radix_tree_replace() properly account insertions and deletions in
node->count and garbage collect nodes as they become empty.  Then
re-implement radix_tree_delete() on top of it.

Link: http://lkml.kernel.org/r/20161117193058.GC23430@cmpxchg.org


Signed-off-by: default avatarJohannes Weiner <hannes@cmpxchg.org>
Reviewed-by: default avatarJan Kara <jack@suse.cz>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Matthew Wilcox <mawilcox@linuxonhyperv.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 6d75f366
Loading
Loading
Loading
Loading
+116 −111
Original line number Diff line number Diff line
@@ -538,6 +538,107 @@ static int radix_tree_extend(struct radix_tree_root *root,
	return maxshift + RADIX_TREE_MAP_SHIFT;
}

/**
 *	radix_tree_shrink    -    shrink radix tree to minimum height
 *	@root		radix tree root
 */
static inline bool radix_tree_shrink(struct radix_tree_root *root)
{
	bool shrunk = false;

	for (;;) {
		struct radix_tree_node *node = root->rnode;
		struct radix_tree_node *child;

		if (!radix_tree_is_internal_node(node))
			break;
		node = entry_to_node(node);

		/*
		 * The candidate node has more than one child, or its child
		 * is not at the leftmost slot, or the child is a multiorder
		 * entry, we cannot shrink.
		 */
		if (node->count != 1)
			break;
		child = node->slots[0];
		if (!child)
			break;
		if (!radix_tree_is_internal_node(child) && node->shift)
			break;

		if (radix_tree_is_internal_node(child))
			entry_to_node(child)->parent = NULL;

		/*
		 * We don't need rcu_assign_pointer(), since we are simply
		 * moving the node from one part of the tree to another: if it
		 * was safe to dereference the old pointer to it
		 * (node->slots[0]), it will be safe to dereference the new
		 * one (root->rnode) as far as dependent read barriers go.
		 */
		root->rnode = child;

		/*
		 * We have a dilemma here. The node's slot[0] must not be
		 * NULLed in case there are concurrent lookups expecting to
		 * find the item. However if this was a bottom-level node,
		 * then it may be subject to the slot pointer being visible
		 * to callers dereferencing it. If item corresponding to
		 * slot[0] is subsequently deleted, these callers would expect
		 * their slot to become empty sooner or later.
		 *
		 * For example, lockless pagecache will look up a slot, deref
		 * the page pointer, and if the page has 0 refcount it means it
		 * was concurrently deleted from pagecache so try the deref
		 * again. Fortunately there is already a requirement for logic
		 * to retry the entire slot lookup -- the indirect pointer
		 * problem (replacing direct root node with an indirect pointer
		 * also results in a stale slot). So tag the slot as indirect
		 * to force callers to retry.
		 */
		if (!radix_tree_is_internal_node(child))
			node->slots[0] = RADIX_TREE_RETRY;

		radix_tree_node_free(node);
		shrunk = true;
	}

	return shrunk;
}

static bool delete_node(struct radix_tree_root *root,
			struct radix_tree_node *node)
{
	bool deleted = false;

	do {
		struct radix_tree_node *parent;

		if (node->count) {
			if (node == entry_to_node(root->rnode))
				deleted |= radix_tree_shrink(root);
			return deleted;
		}

		parent = node->parent;
		if (parent) {
			parent->slots[node->offset] = NULL;
			parent->count--;
		} else {
			root_tag_clear_all(root);
			root->rnode = NULL;
		}

		radix_tree_node_free(node);
		deleted = true;

		node = parent;
	} while (node);

	return deleted;
}

/**
 *	__radix_tree_create	-	create a slot in a radix tree
 *	@root:		radix tree root
@@ -759,18 +860,20 @@ static void replace_slot(struct radix_tree_root *root,
			 bool warn_typeswitch)
{
	void *old = rcu_dereference_raw(*slot);
	int exceptional;
	int count, exceptional;

	WARN_ON_ONCE(radix_tree_is_internal_node(item));
	WARN_ON_ONCE(!!item - !!old);

	count = !!item - !!old;
	exceptional = !!radix_tree_exceptional_entry(item) -
		      !!radix_tree_exceptional_entry(old);

	WARN_ON_ONCE(warn_typeswitch && exceptional);
	WARN_ON_ONCE(warn_typeswitch && (count || exceptional));

	if (node)
	if (node) {
		node->count += count;
		node->exceptional += exceptional;
	}

	rcu_assign_pointer(*slot, item);
}
@@ -790,12 +893,14 @@ void __radix_tree_replace(struct radix_tree_root *root,
			  void **slot, void *item)
{
	/*
	 * This function supports replacing exceptional entries, but
	 * that needs accounting against the node unless the slot is
	 * root->rnode.
	 * This function supports replacing exceptional entries and
	 * deleting entries, but that needs accounting against the
	 * node unless the slot is root->rnode.
	 */
	replace_slot(root, node, slot, item,
		     !node && slot != (void **)&root->rnode);

	delete_node(root, node);
}

/**
@@ -810,8 +915,8 @@ void __radix_tree_replace(struct radix_tree_root *root,
 *
 * NOTE: This cannot be used to switch between non-entries (empty slots),
 * regular entries, and exceptional entries, as that requires accounting
 * inside the radix tree node. When switching from one type of entry to
 * another, use __radix_tree_lookup() and __radix_tree_replace().
 * inside the radix tree node. When switching from one type of entry or
 * deleting, use __radix_tree_lookup() and __radix_tree_replace().
 */
void radix_tree_replace_slot(struct radix_tree_root *root,
			     void **slot, void *item)
@@ -1466,75 +1571,6 @@ unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item)
}
#endif /* CONFIG_SHMEM && CONFIG_SWAP */

/**
 *	radix_tree_shrink    -    shrink radix tree to minimum height
 *	@root		radix tree root
 */
static inline bool radix_tree_shrink(struct radix_tree_root *root)
{
	bool shrunk = false;

	for (;;) {
		struct radix_tree_node *node = root->rnode;
		struct radix_tree_node *child;

		if (!radix_tree_is_internal_node(node))
			break;
		node = entry_to_node(node);

		/*
		 * The candidate node has more than one child, or its child
		 * is not at the leftmost slot, or the child is a multiorder
		 * entry, we cannot shrink.
		 */
		if (node->count != 1)
			break;
		child = node->slots[0];
		if (!child)
			break;
		if (!radix_tree_is_internal_node(child) && node->shift)
			break;

		if (radix_tree_is_internal_node(child))
			entry_to_node(child)->parent = NULL;

		/*
		 * We don't need rcu_assign_pointer(), since we are simply
		 * moving the node from one part of the tree to another: if it
		 * was safe to dereference the old pointer to it
		 * (node->slots[0]), it will be safe to dereference the new
		 * one (root->rnode) as far as dependent read barriers go.
		 */
		root->rnode = child;

		/*
		 * We have a dilemma here. The node's slot[0] must not be
		 * NULLed in case there are concurrent lookups expecting to
		 * find the item. However if this was a bottom-level node,
		 * then it may be subject to the slot pointer being visible
		 * to callers dereferencing it. If item corresponding to
		 * slot[0] is subsequently deleted, these callers would expect
		 * their slot to become empty sooner or later.
		 *
		 * For example, lockless pagecache will look up a slot, deref
		 * the page pointer, and if the page has 0 refcount it means it
		 * was concurrently deleted from pagecache so try the deref
		 * again. Fortunately there is already a requirement for logic
		 * to retry the entire slot lookup -- the indirect pointer
		 * problem (replacing direct root node with an indirect pointer
		 * also results in a stale slot). So tag the slot as indirect
		 * to force callers to retry.
		 */
		if (!radix_tree_is_internal_node(child))
			node->slots[0] = RADIX_TREE_RETRY;

		radix_tree_node_free(node);
		shrunk = true;
	}

	return shrunk;
}

/**
 *	__radix_tree_delete_node    -    try to free node after clearing a slot
 *	@root:		radix tree root
@@ -1549,33 +1585,7 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root)
bool __radix_tree_delete_node(struct radix_tree_root *root,
			      struct radix_tree_node *node)
{
	bool deleted = false;

	do {
		struct radix_tree_node *parent;

		if (node->count) {
			if (node == entry_to_node(root->rnode))
				deleted |= radix_tree_shrink(root);
			return deleted;
		}

		parent = node->parent;
		if (parent) {
			parent->slots[node->offset] = NULL;
			parent->count--;
		} else {
			root_tag_clear_all(root);
			root->rnode = NULL;
		}

		radix_tree_node_free(node);
		deleted = true;

		node = parent;
	} while (node);

	return deleted;
	return delete_node(root, node);
}

static inline void delete_sibling_entries(struct radix_tree_node *node,
@@ -1632,12 +1642,7 @@ void *radix_tree_delete_item(struct radix_tree_root *root,
		node_tag_clear(root, node, tag, offset);

	delete_sibling_entries(node, node_to_entry(slot), offset);
	node->slots[offset] = NULL;
	node->count--;
	if (radix_tree_exceptional_entry(entry))
		node->exceptional--;

	__radix_tree_delete_node(root, node);
	__radix_tree_replace(root, node, slot, NULL);

	return entry;
}