Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit e157b555 authored by Matthew Wilcox's avatar Matthew Wilcox Committed by Linus Torvalds
Browse files

radix-tree: add radix_tree_split

This new function splits a larger multiorder entry into smaller entries
(potentially multi-order entries).  These entries are initialised to
RADIX_TREE_RETRY to ensure that RCU walkers who see this state aren't
confused.  The caller should then call radix_tree_for_each_slot() and
radix_tree_replace_slot() in order to turn these retry entries into the
intended new entries.  Tags are replicated from the original multiorder
entry into each new entry.

Link: http://lkml.kernel.org/r/1480369871-5271-59-git-send-email-mawilcox@linuxonhyperv.com


Signed-off-by: default avatarMatthew Wilcox <willy@linux.intel.com>
Tested-by: default avatarKirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Konstantin Khlebnikov <koct9i@gmail.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Matthew Wilcox <mawilcox@microsoft.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 175542f5
Loading
Loading
Loading
Loading
+12 −0
Original line number Diff line number Diff line
@@ -80,6 +80,14 @@ static inline bool radix_tree_is_internal_node(void *ptr)
#define RADIX_TREE_MAX_PATH (DIV_ROUND_UP(RADIX_TREE_INDEX_BITS, \
					  RADIX_TREE_MAP_SHIFT))

/*
 * @count is the count of every non-NULL element in the ->slots array
 * whether that is an exceptional entry, a retry entry, a user pointer,
 * a sibling entry or a pointer to the next level of the tree.
 * @exceptional is the count of every element in ->slots which is
 * either radix_tree_exceptional_entry() or is a sibling entry for an
 * exceptional entry.
 */
struct radix_tree_node {
	unsigned char	shift;		/* Bits remaining in each slot */
	unsigned char	offset;		/* Slot offset in parent */
@@ -293,6 +301,8 @@ void __radix_tree_replace(struct radix_tree_root *root,
			  struct radix_tree_node *node,
			  void **slot, void *item,
			  radix_tree_update_node_t update_node, void *private);
void radix_tree_iter_replace(struct radix_tree_root *,
		const struct radix_tree_iter *, void **slot, void *item);
void radix_tree_replace_slot(struct radix_tree_root *root,
			     void **slot, void *item);
void __radix_tree_delete_node(struct radix_tree_root *root,
@@ -335,6 +345,8 @@ static inline void radix_tree_preload_end(void)
	preempt_enable();
}

int radix_tree_split(struct radix_tree_root *, unsigned long index,
			unsigned new_order);
int radix_tree_join(struct radix_tree_root *, unsigned long index,
			unsigned new_order, void *);

+138 −4
Original line number Diff line number Diff line
@@ -22,6 +22,7 @@
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

#include <linux/cpu.h>
#include <linux/errno.h>
#include <linux/init.h>
#include <linux/kernel.h>
@@ -758,7 +759,10 @@ static inline int insert_entries(struct radix_tree_node *node, void **slot,
	unsigned i, n, tag, offset, tags = 0;

	if (node) {
		if (order > node->shift)
			n = 1 << (order - node->shift);
		else
			n = 1;
		offset = get_slot_offset(node, slot);
	} else {
		n = 1;
@@ -797,7 +801,8 @@ static inline int insert_entries(struct radix_tree_node *node, void **slot,
					tag_set(node, tag, offset);
		}
		if (radix_tree_is_internal_node(old) &&
					!is_sibling_entry(node, old))
					!is_sibling_entry(node, old) &&
					(old != RADIX_TREE_RETRY))
			radix_tree_free_nodes(old);
		if (radix_tree_exceptional_entry(old))
			node->exceptional--;
@@ -1021,7 +1026,8 @@ void __radix_tree_replace(struct radix_tree_root *root,
 * NOTE: This cannot be used to switch between non-entries (empty slots),
 * regular entries, and exceptional entries, as that requires accounting
 * inside the radix tree node. When switching from one type of entry or
 * deleting, use __radix_tree_lookup() and __radix_tree_replace().
 * deleting, use __radix_tree_lookup() and __radix_tree_replace() or
 * radix_tree_iter_replace().
 */
void radix_tree_replace_slot(struct radix_tree_root *root,
			     void **slot, void *item)
@@ -1029,6 +1035,21 @@ void radix_tree_replace_slot(struct radix_tree_root *root,
	replace_slot(root, NULL, slot, item, true);
}

/**
 * radix_tree_iter_replace - replace item in a slot
 * @root:	radix tree root
 * @slot:	pointer to slot
 * @item:	new item to store in the slot.
 *
 * For use with radix_tree_split() and radix_tree_for_each_slot().
 * Caller must hold tree write locked across split and replacement.
 */
void radix_tree_iter_replace(struct radix_tree_root *root,
		const struct radix_tree_iter *iter, void **slot, void *item)
{
	__radix_tree_replace(root, iter->node, slot, item, NULL, NULL);
}

#ifdef CONFIG_RADIX_TREE_MULTIORDER
/**
 * radix_tree_join - replace multiple entries with one multiorder entry
@@ -1061,6 +1082,117 @@ int radix_tree_join(struct radix_tree_root *root, unsigned long index,

	return error;
}

/**
 * radix_tree_split - Split an entry into smaller entries
 * @root: radix tree root
 * @index: An index within the large entry
 * @order: Order of new entries
 *
 * Call this function as the first step in replacing a multiorder entry
 * with several entries of lower order.  After this function returns,
 * loop over the relevant portion of the tree using radix_tree_for_each_slot()
 * and call radix_tree_iter_replace() to set up each new entry.
 *
 * The tags from this entry are replicated to all the new entries.
 *
 * The radix tree should be locked against modification during the entire
 * replacement operation.  Lock-free lookups will see RADIX_TREE_RETRY which
 * should prompt RCU walkers to restart the lookup from the root.
 */
int radix_tree_split(struct radix_tree_root *root, unsigned long index,
				unsigned order)
{
	struct radix_tree_node *parent, *node, *child;
	void **slot;
	unsigned int offset, end;
	unsigned n, tag, tags = 0;

	if (!__radix_tree_lookup(root, index, &parent, &slot))
		return -ENOENT;
	if (!parent)
		return -ENOENT;

	offset = get_slot_offset(parent, slot);

	for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
		if (tag_get(parent, tag, offset))
			tags |= 1 << tag;

	for (end = offset + 1; end < RADIX_TREE_MAP_SIZE; end++) {
		if (!is_sibling_entry(parent, parent->slots[end]))
			break;
		for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
			if (tags & (1 << tag))
				tag_set(parent, tag, end);
		/* rcu_assign_pointer ensures tags are set before RETRY */
		rcu_assign_pointer(parent->slots[end], RADIX_TREE_RETRY);
	}
	rcu_assign_pointer(parent->slots[offset], RADIX_TREE_RETRY);
	parent->exceptional -= (end - offset);

	if (order == parent->shift)
		return 0;
	if (order > parent->shift) {
		while (offset < end)
			offset += insert_entries(parent, &parent->slots[offset],
					RADIX_TREE_RETRY, order, true);
		return 0;
	}

	node = parent;

	for (;;) {
		if (node->shift > order) {
			child = radix_tree_node_alloc(root);
			if (!child)
				goto nomem;
			child->shift = node->shift - RADIX_TREE_MAP_SHIFT;
			child->offset = offset;
			child->count = 0;
			child->parent = node;
			if (node != parent) {
				node->count++;
				node->slots[offset] = node_to_entry(child);
				for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
					if (tags & (1 << tag))
						tag_set(node, tag, offset);
			}

			node = child;
			offset = 0;
			continue;
		}

		n = insert_entries(node, &node->slots[offset],
					RADIX_TREE_RETRY, order, false);
		BUG_ON(n > RADIX_TREE_MAP_SIZE);

		for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
			if (tags & (1 << tag))
				tag_set(node, tag, offset);
		offset += n;

		while (offset == RADIX_TREE_MAP_SIZE) {
			if (node == parent)
				break;
			offset = node->offset;
			child = node;
			node = node->parent;
			rcu_assign_pointer(node->slots[offset],
						node_to_entry(child));
			offset++;
		}
		if ((node == parent) && (offset == end))
			return 0;
	}

 nomem:
	/* Shouldn't happen; did user forget to preload? */
	/* TODO: free all the allocated nodes */
	WARN_ON(1);
	return -ENOMEM;
}
#endif

/**
@@ -1441,8 +1573,10 @@ void **radix_tree_next_chunk(struct radix_tree_root *root,
			child = rcu_dereference_raw(node->slots[offset]);
		}

		if ((child == NULL) || (child == RADIX_TREE_RETRY))
		if (!child)
			goto restart;
		if (child == RADIX_TREE_RETRY)
			break;
	} while (radix_tree_is_internal_node(child));

	/* Update the iterator state */
+64 −0
Original line number Diff line number Diff line
@@ -389,6 +389,69 @@ static void multiorder_join(void)
	}
}

static void __multiorder_split(int old_order, int new_order)
{
	RADIX_TREE(tree, GFP_KERNEL);
	void **slot;
	struct radix_tree_iter iter;
	struct radix_tree_node *node;
	void *item;

	item_insert_order(&tree, 0, old_order);
	radix_tree_tag_set(&tree, 0, 2);
	radix_tree_split(&tree, 0, new_order);
	radix_tree_for_each_slot(slot, &tree, &iter, 0) {
		radix_tree_iter_replace(&tree, &iter, slot,
					item_create(iter.index, new_order));
	}

	item_kill_tree(&tree);

	__radix_tree_insert(&tree, 0, old_order, (void *)0x12);

	item = __radix_tree_lookup(&tree, 0, &node, NULL);
	assert(item == (void *)0x12);
	assert(node->exceptional > 0);

	radix_tree_split(&tree, 0, new_order);
	radix_tree_for_each_slot(slot, &tree, &iter, 0) {
		radix_tree_iter_replace(&tree, &iter, slot,
					item_create(iter.index, new_order));
	}

	item = __radix_tree_lookup(&tree, 0, &node, NULL);
	assert(item != (void *)0x12);
	assert(node->exceptional == 0);

	item_kill_tree(&tree);

	__radix_tree_insert(&tree, 0, old_order, (void *)0x12);

	item = __radix_tree_lookup(&tree, 0, &node, NULL);
	assert(item == (void *)0x12);
	assert(node->exceptional > 0);

	radix_tree_split(&tree, 0, new_order);
	radix_tree_for_each_slot(slot, &tree, &iter, 0) {
		radix_tree_iter_replace(&tree, &iter, slot, (void *)0x16);
	}

	item = __radix_tree_lookup(&tree, 0, &node, NULL);
	assert(item == (void *)0x16);
	assert(node->exceptional > 0);

	item_kill_tree(&tree);
}

static void multiorder_split(void)
{
	int i, j;

	for (i = 9; i < 19; i++)
		for (j = 0; j < i; j++)
			__multiorder_split(i, j);
}

void multiorder_checks(void)
{
	int i;
@@ -407,4 +470,5 @@ void multiorder_checks(void)
	multiorder_iteration();
	multiorder_tagged_iteration();
	multiorder_join();
	multiorder_split();
}