Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit debf12d5 authored by Artem Bityutskiy's avatar Artem Bityutskiy
Browse files

UBIFS: substitute the replay tree with a replay list



This patch simplifies replay even further - it removes the replay tree and
adds the replay list instead. Indeed, we just do not need to use a tree here -
all we need to do is to add all nodes to the list and then sort it. Using
RB-tree is an overkill - more code and slower. And since we replay buds in
order, we expect the nodes to follow in _mostly_ sorted order, so the merge
sort becomes much cheaper in average than an RB-tree.

Signed-off-by: default avatarArtem Bityutskiy <Artem.Bityutskiy@nokia.com>
parent 074bcb9b
Loading
Loading
Loading
Loading
+74 −98
Original line number Diff line number Diff line
@@ -33,22 +33,24 @@
 */

#include "ubifs.h"
#include <linux/list_sort.h>

/**
 * struct replay_entry - replay tree entry.
 * struct replay_entry - replay list entry.
 * @lnum: logical eraseblock number of the node
 * @offs: node offset
 * @len: node length
 * @deletion: non-zero if this entry corresponds to a node deletion
 * @sqnum: node sequence number
 * @rb: links the replay tree
 * @list: links the replay list
 * @key: node key
 * @nm: directory entry name
 * @old_size: truncation old size
 * @new_size: truncation new size
 *
 * UBIFS journal replay must compare node sequence numbers, which means it must
 * build a tree of node information to insert into the TNC.
 * The replay process first scans all buds and builds the replay list, then
 * sorts the replay list in nodes sequence number order, and then inserts all
 * the replay entries to the TNC.
 */
struct replay_entry {
	int lnum;
@@ -56,7 +58,7 @@ struct replay_entry {
	int len;
	unsigned int deletion:1;
	unsigned long long sqnum;
	struct rb_node rb;
	struct list_head list;
	union ubifs_key key;
	union {
		struct qstr nm;
@@ -263,68 +265,77 @@ static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r)
}

/**
 * destroy_replay_tree - destroy the replay.
 * @c: UBIFS file-system description object
 * replay_entries_cmp - compare 2 replay entries.
 * @priv: UBIFS file-system description object
 * @a: first replay entry
 * @a: second replay entry
 *
 * Destroy the replay tree.
 * This is a comparios function for 'list_sort()' which compares 2 replay
 * entries @a and @b by comparing their sequence numer.  Returns %1 if @a has
 * greater sequence number and %-1 otherwise.
 */
static void destroy_replay_tree(struct ubifs_info *c)
static int replay_entries_cmp(void *priv, struct list_head *a,
			      struct list_head *b)
{
	struct rb_node *this = c->replay_tree.rb_node;
	struct replay_entry *r;
	struct replay_entry *ra, *rb;

	while (this) {
		if (this->rb_left) {
			this = this->rb_left;
			continue;
		} else if (this->rb_right) {
			this = this->rb_right;
			continue;
		}
		r = rb_entry(this, struct replay_entry, rb);
		this = rb_parent(this);
		if (this) {
			if (this->rb_left == &r->rb)
				this->rb_left = NULL;
			else
				this->rb_right = NULL;
		}
		if (is_hash_key(c, &r->key))
			kfree(r->nm.name);
		kfree(r);
	}
	c->replay_tree = RB_ROOT;
	cond_resched();
	if (a == b)
		return 0;

	ra = list_entry(a, struct replay_entry, list);
	rb = list_entry(b, struct replay_entry, list);
	ubifs_assert(ra->sqnum != rb->sqnum);
	if (ra->sqnum > rb->sqnum)
		return 1;
	return -1;
}

/**
 * apply_replay_tree - apply the replay tree to the TNC.
 * apply_replay_list - apply the replay list to the TNC.
 * @c: UBIFS file-system description object
 *
 * Apply the replay tree.
 * Returns zero in case of success and a negative error code in case of
 * failure.
 * Apply all entries in the replay list to the TNC. Returns zero in case of
 * success and a negative error code in case of failure.
 */
static int apply_replay_tree(struct ubifs_info *c)
static int apply_replay_list(struct ubifs_info *c)
{
	struct rb_node *this = rb_first(&c->replay_tree);

	while (this) {
	struct replay_entry *r;
	int err;

	list_sort(c, &c->replay_list, &replay_entries_cmp);

	list_for_each_entry(r, &c->replay_list, list) {
		cond_resched();

		r = rb_entry(this, struct replay_entry, rb);
		err = apply_replay_entry(c, r);
		if (err)
			return err;
		this = rb_next(this);
	}

	return 0;
}

/**
 * insert_node - insert a node to the replay tree.
 * destroy_replay_list - destroy the replay.
 * @c: UBIFS file-system description object
 *
 * Destroy the replay list.
 */
static void destroy_replay_list(struct ubifs_info *c)
{
	struct replay_entry *r, *tmp;

	list_for_each_entry_safe(r, tmp, &c->replay_list, list) {
		if (is_hash_key(c, &r->key))
			kfree(r->nm.name);
		list_del(&r->list);
		kfree(r);
	}
}

/**
 * insert_node - insert a node to the replay list
 * @c: UBIFS file-system description object
 * @lnum: node logical eraseblock number
 * @offs: node offset
@@ -336,39 +347,25 @@ static int apply_replay_tree(struct ubifs_info *c)
 * @old_size: truncation old size
 * @new_size: truncation new size
 *
 * This function inserts a scanned non-direntry node to the replay tree. The
 * replay tree is an RB-tree containing @struct replay_entry elements which are
 * indexed by the sequence number. The replay tree is applied at the very end
 * of the replay process. Since the tree is sorted in sequence number order,
 * the older modifications are applied first. This function returns zero in
 * case of success and a negative error code in case of failure.
 * This function inserts a scanned non-direntry node to the replay list. The
 * replay list contains @struct replay_entry elements, and we sort this list in
 * sequence number order before applying it. The replay list is applied at the
 * very end of the replay process. Since the list is sorted in sequence number
 * order, the older modifications are applied first. This function returns zero
 * in case of success and a negative error code in case of failure.
 */
static int insert_node(struct ubifs_info *c, int lnum, int offs, int len,
		       union ubifs_key *key, unsigned long long sqnum,
		       int deletion, int *used, loff_t old_size,
		       loff_t new_size)
{
	struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL;
	struct replay_entry *r;

	dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key));

	if (key_inum(c, key) >= c->highest_inum)
		c->highest_inum = key_inum(c, key);

	dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key));
	while (*p) {
		parent = *p;
		r = rb_entry(parent, struct replay_entry, rb);
		if (sqnum < r->sqnum) {
			p = &(*p)->rb_left;
			continue;
		} else if (sqnum > r->sqnum) {
			p = &(*p)->rb_right;
			continue;
		}
		ubifs_err("duplicate sqnum in replay");
		return -EINVAL;
	}

	r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL);
	if (!r)
		return -ENOMEM;
@@ -384,13 +381,12 @@ static int insert_node(struct ubifs_info *c, int lnum, int offs, int len,
	r->old_size = old_size;
	r->new_size = new_size;

	rb_link_node(&r->rb, parent, p);
	rb_insert_color(&r->rb, &c->replay_tree);
	list_add_tail(&r->list, &c->replay_list);
	return 0;
}

/**
 * insert_dent - insert a directory entry node into the replay tree.
 * insert_dent - insert a directory entry node into the replay list.
 * @c: UBIFS file-system description object
 * @lnum: node logical eraseblock number
 * @offs: node offset
@@ -402,43 +398,25 @@ static int insert_node(struct ubifs_info *c, int lnum, int offs, int len,
 * @deletion: non-zero if this is a deletion
 * @used: number of bytes in use in a LEB
 *
 * This function inserts a scanned directory entry node to the replay tree.
 * Returns zero in case of success and a negative error code in case of
 * failure.
 *
 * This function is also used for extended attribute entries because they are
 * implemented as directory entry nodes.
 * This function inserts a scanned directory entry node or an extended
 * attribute entry to the replay list. Returns zero in case of success and a
 * negative error code in case of failure.
 */
static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len,
		       union ubifs_key *key, const char *name, int nlen,
		       unsigned long long sqnum, int deletion, int *used)
{
	struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL;
	struct replay_entry *r;
	char *nbuf;

	dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key));
	if (key_inum(c, key) >= c->highest_inum)
		c->highest_inum = key_inum(c, key);

	dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key));
	while (*p) {
		parent = *p;
		r = rb_entry(parent, struct replay_entry, rb);
		if (sqnum < r->sqnum) {
			p = &(*p)->rb_left;
			continue;
		}
		if (sqnum > r->sqnum) {
			p = &(*p)->rb_right;
			continue;
		}
		ubifs_err("duplicate sqnum in replay");
		return -EINVAL;
	}

	r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL);
	if (!r)
		return -ENOMEM;

	nbuf = kmalloc(nlen + 1, GFP_KERNEL);
	if (!nbuf) {
		kfree(r);
@@ -458,9 +436,7 @@ static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len,
	nbuf[nlen] = '\0';
	r->nm.name = nbuf;

	ubifs_assert(!*p);
	rb_link_node(&r->rb, parent, p);
	rb_insert_color(&r->rb, &c->replay_tree);
	list_add_tail(&r->list, &c->replay_list);
	return 0;
}

@@ -1017,7 +993,7 @@ int ubifs_replay_journal(struct ubifs_info *c)
	if (err)
		goto out;

	err = apply_replay_tree(c);
	err = apply_replay_list(c);
	if (err)
		goto out;

@@ -1039,7 +1015,7 @@ int ubifs_replay_journal(struct ubifs_info *c)
		"highest_inum %lu", c->lhead_lnum, c->lhead_offs, c->max_sqnum,
		(unsigned long)c->highest_inum);
out:
	destroy_replay_tree(c);
	destroy_replay_list(c);
	destroy_bud_list(c);
	c->replaying = 0;
	return err;
+0 −2
Original line number Diff line number Diff line
@@ -1205,7 +1205,6 @@ struct ubifs_debug_info;
 * @replaying: %1 during journal replay
 * @mounting: %1 while mounting
 * @remounting_rw: %1 while re-mounting from R/O mode to R/W mode
 * @replay_tree: temporary tree used during journal replay
 * @replay_list: temporary list used during journal replay
 * @replay_buds: list of buds to replay
 * @cs_sqnum: sequence number of first node in the log (commit start node)
@@ -1435,7 +1434,6 @@ struct ubifs_info {
	unsigned int replaying:1;
	unsigned int mounting:1;
	unsigned int remounting_rw:1;
	struct rb_root replay_tree;
	struct list_head replay_list;
	struct list_head replay_buds;
	unsigned long long cs_sqnum;