Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 7d111c81 authored by Joe Thornber's avatar Joe Thornber Committed by Mike Snitzer
Browse files

dm btree: introduce cursor api



This uses prefetching to speed up iteration through a btree.

Signed-off-by: default avatarJoe Thornber <ejt@redhat.com>
Signed-off-by: default avatarMike Snitzer <snitzer@redhat.com>
parent 9d1b404c
Loading
Loading
Loading
Loading
+162 −0
Original line number Diff line number Diff line
@@ -994,3 +994,165 @@ int dm_btree_walk(struct dm_btree_info *info, dm_block_t root,
	return walk_node(info, root, fn, context);
}
EXPORT_SYMBOL_GPL(dm_btree_walk);

/*----------------------------------------------------------------*/

static void prefetch_values(struct dm_btree_cursor *c)
{
	unsigned i, nr;
	__le64 value_le;
	struct cursor_node *n = c->nodes + c->depth - 1;
	struct btree_node *bn = dm_block_data(n->b);
	struct dm_block_manager *bm = dm_tm_get_bm(c->info->tm);

	BUG_ON(c->info->value_type.size != sizeof(value_le));

	nr = le32_to_cpu(bn->header.nr_entries);
	for (i = 0; i < nr; i++) {
		memcpy(&value_le, value_ptr(bn, i), sizeof(value_le));
		dm_bm_prefetch(bm, le64_to_cpu(value_le));
	}
}

static bool leaf_node(struct dm_btree_cursor *c)
{
	struct cursor_node *n = c->nodes + c->depth - 1;
	struct btree_node *bn = dm_block_data(n->b);

	return le32_to_cpu(bn->header.flags) & LEAF_NODE;
}

static int push_node(struct dm_btree_cursor *c, dm_block_t b)
{
	int r;
	struct cursor_node *n = c->nodes + c->depth;

	if (c->depth >= DM_BTREE_CURSOR_MAX_DEPTH - 1) {
		DMERR("couldn't push cursor node, stack depth too high");
		return -EINVAL;
	}

	r = bn_read_lock(c->info, b, &n->b);
	if (r)
		return r;

	n->index = 0;
	c->depth++;

	if (c->prefetch_leaves || !leaf_node(c))
		prefetch_values(c);

	return 0;
}

static void pop_node(struct dm_btree_cursor *c)
{
	c->depth--;
	unlock_block(c->info, c->nodes[c->depth].b);
}

static int inc_or_backtrack(struct dm_btree_cursor *c)
{
	struct cursor_node *n;
	struct btree_node *bn;

	for (;;) {
		if (!c->depth)
			return -ENODATA;

		n = c->nodes + c->depth - 1;
		bn = dm_block_data(n->b);

		n->index++;
		if (n->index < le32_to_cpu(bn->header.nr_entries))
			break;

		pop_node(c);
	}

	return 0;
}

static int find_leaf(struct dm_btree_cursor *c)
{
	int r = 0;
	struct cursor_node *n;
	struct btree_node *bn;
	__le64 value_le;

	for (;;) {
		n = c->nodes + c->depth - 1;
		bn = dm_block_data(n->b);

		if (le32_to_cpu(bn->header.flags) & LEAF_NODE)
			break;

		memcpy(&value_le, value_ptr(bn, n->index), sizeof(value_le));
		r = push_node(c, le64_to_cpu(value_le));
		if (r) {
			DMERR("push_node failed");
			break;
		}
	}

	if (!r && (le32_to_cpu(bn->header.nr_entries) == 0))
		return -ENODATA;

	return r;
}

int dm_btree_cursor_begin(struct dm_btree_info *info, dm_block_t root,
			  bool prefetch_leaves, struct dm_btree_cursor *c)
{
	int r;

	c->info = info;
	c->root = root;
	c->depth = 0;
	c->prefetch_leaves = prefetch_leaves;

	r = push_node(c, root);
	if (r)
		return r;

	return find_leaf(c);
}
EXPORT_SYMBOL_GPL(dm_btree_cursor_begin);

void dm_btree_cursor_end(struct dm_btree_cursor *c)
{
	while (c->depth)
		pop_node(c);
}
EXPORT_SYMBOL_GPL(dm_btree_cursor_end);

int dm_btree_cursor_next(struct dm_btree_cursor *c)
{
	int r = inc_or_backtrack(c);
	if (!r) {
		r = find_leaf(c);
		if (r)
			DMERR("find_leaf failed");
	}

	return r;
}
EXPORT_SYMBOL_GPL(dm_btree_cursor_next);

int dm_btree_cursor_get_value(struct dm_btree_cursor *c, uint64_t *key, void *value_le)
{
	if (c->depth) {
		struct cursor_node *n = c->nodes + c->depth - 1;
		struct btree_node *bn = dm_block_data(n->b);

		if (le32_to_cpu(bn->header.flags) & INTERNAL_NODE)
			return -EINVAL;

		*key = le64_to_cpu(*key_ptr(bn, n->index));
		memcpy(value_le, value_ptr(bn, n->index), c->info->value_type.size);
		return 0;

	} else
		return -ENODATA;
}
EXPORT_SYMBOL_GPL(dm_btree_cursor_get_value);
+35 −0
Original line number Diff line number Diff line
@@ -176,4 +176,39 @@ int dm_btree_walk(struct dm_btree_info *info, dm_block_t root,
		  int (*fn)(void *context, uint64_t *keys, void *leaf),
		  void *context);


/*----------------------------------------------------------------*/

/*
 * Cursor API.  This does not follow the rolling lock convention.  Since we
 * know the order that values are required we can issue prefetches to speed
 * up iteration.  Use on a single level btree only.
 */
#define DM_BTREE_CURSOR_MAX_DEPTH 16

struct cursor_node {
	struct dm_block *b;
	unsigned index;
};

struct dm_btree_cursor {
	struct dm_btree_info *info;
	dm_block_t root;

	bool prefetch_leaves;
	unsigned depth;
	struct cursor_node nodes[DM_BTREE_CURSOR_MAX_DEPTH];
};

/*
 * Creates a fresh cursor.  If prefetch_leaves is set then it is assumed
 * the btree contains block indexes that will be prefetched.  The cursor is
 * quite large, so you probably don't want to put it on the stack.
 */
int dm_btree_cursor_begin(struct dm_btree_info *info, dm_block_t root,
			  bool prefetch_leaves, struct dm_btree_cursor *c);
void dm_btree_cursor_end(struct dm_btree_cursor *c);
int dm_btree_cursor_next(struct dm_btree_cursor *c);
int dm_btree_cursor_get_value(struct dm_btree_cursor *c, uint64_t *key, void *value_le);

#endif	/* _LINUX_DM_BTREE_H */