Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 1cea312a authored by Boaz Harrosh's avatar Boaz Harrosh
Browse files

exofs: Write sbi->s_nextid as part of the Create command



Before when creating a new inode, we'd set the sb->s_dirt flag,
and sometime later the system would write out s_nextid as part
of the sb_info. Also on inode sync we would force the sb sync
as well.

Define the s_nextid as a new partition attribute and set it
every time we create a new object.
At mount we read it from it's new place.

We now never set sb->s_dirt anywhere in exofs. write_super
is actually never called. The call to exofs_write_super from
exofs_put_super is also removed because the VFS always calls
->sync_fs before calling ->put_super twice.

To stay backward-and-forward compatible we also write the old
s_nextid in the super_block object at unmount, and support zero
length attribute on mount.

This also fixes a BUG where in layouts when group_width was not
a divisor of EXOFS_SUPER_ID (0x10000) the s_nextid was not read
from the device it was written to. Because of the sliding window
layout trick, and because the read was always done from the 0
device but the write was done via the raid engine that might slide
the device view. Now we read and write through the raid engine.

Signed-off-by: default avatarBoaz Harrosh <bharrosh@panasas.com>
parent 9ed96484
Loading
Loading
Loading
Loading
+16 −2
Original line number Diff line number Diff line
@@ -53,10 +53,14 @@
#define EXOFS_ROOT_ID	0x10002	/* object ID for root directory */

/* exofs Application specific page/attribute */
/* Inode attrs */
# define EXOFS_APAGE_FS_DATA	(OSD_APAGE_APP_DEFINED_FIRST + 3)
# define EXOFS_ATTR_INODE_DATA	1
# define EXOFS_ATTR_INODE_FILE_LAYOUT	2
# define EXOFS_ATTR_INODE_DIR_LAYOUT	3
/* Partition attrs */
# define EXOFS_APAGE_SB_DATA	(0xF0000000U + 3)
# define EXOFS_ATTR_SB_STATS	1

/*
 * The maximum number of files we can have is limited by the size of the
@@ -86,8 +90,8 @@ enum {
 */
enum {EXOFS_FSCB_VER = 1, EXOFS_DT_VER = 1};
struct exofs_fscb {
	__le64  s_nextid;	/* Highest object ID used */
	__le64  s_numfiles;	/* Number of files on fs */
	__le64  s_nextid;	/* Only used after mkfs */
	__le64  s_numfiles;	/* Only used after mkfs */
	__le32	s_version;	/* == EXOFS_FSCB_VER */
	__le16  s_magic;	/* Magic signature */
	__le16  s_newfs;	/* Non-zero if this is a new fs */
@@ -97,6 +101,16 @@ struct exofs_fscb {
	__le64	s_dev_table_count; /* == 0 means no dev_table */
} __packed;

/*
 * This struct is set on the FS partition's attributes.
 * [EXOFS_APAGE_SB_DATA, EXOFS_ATTR_SB_STATS] and is written together
 * with the create command, to atomically persist the sb writeable information.
 */
struct exofs_sb_stats {
	__le64  s_nextid;	/* Highest object ID used */
	__le64  s_numfiles;	/* Number of files on fs */
} __packed;

/*
 * Describes the raid used in the FS. It is part of the device table.
 * This here is taken from the pNFS-objects definition. In exofs we
+2 −2
Original line number Diff line number Diff line
@@ -77,7 +77,7 @@ struct exofs_layout {
 * our extension to the in-memory superblock
 */
struct exofs_sb_info {
	struct exofs_fscb s_fscb;		/* Written often, pre-allocate*/
	struct exofs_sb_stats s_ess;		/* Written often, pre-allocate*/
	int		s_timeout;		/* timeout for OSD operations */
	uint64_t	s_nextid;		/* highest object ID used     */
	uint32_t	s_numfiles;		/* number of files on fs      */
@@ -281,7 +281,7 @@ int exofs_set_link(struct inode *, struct exofs_dir_entry *, struct page *,
		    struct inode *);

/* super.c               */
int exofs_sync_fs(struct super_block *sb, int wait);
int exofs_sbi_write_stats(struct exofs_sb_info *sbi);

/*********************
 * operation vectors *
+1 −10
Original line number Diff line number Diff line
@@ -45,17 +45,8 @@ static int exofs_release_file(struct inode *inode, struct file *filp)
static int exofs_file_fsync(struct file *filp, int datasync)
{
	int ret;
	struct inode *inode = filp->f_mapping->host;
	struct super_block *sb;

	ret = sync_inode_metadata(inode, 1);

	/* This is a good place to write the sb */
	/* TODO: Sechedule an sb-sync on create */
	sb = inode->i_sb;
	if (sb->s_dirt)
		exofs_sync_fs(sb, 1);

	ret = sync_inode_metadata(filp->f_mapping->host, 1);
	return ret;
}

+3 −1
Original line number Diff line number Diff line
@@ -1102,6 +1102,7 @@ int __exofs_wait_obj_created(struct exofs_i_info *oi)
	}
	return unlikely(is_bad_inode(&oi->vfs_inode)) ? -EIO : 0;
}

/*
 * Callback function from exofs_new_inode().  The important thing is that we
 * set the obj_created flag so that other methods know that the object exists on
@@ -1160,7 +1161,6 @@ struct inode *exofs_new_inode(struct inode *dir, int mode)
	sbi = sb->s_fs_info;

	inode->i_mapping->backing_dev_info = sb->s_bdi;
	sb->s_dirt = 1;
	inode_init_owner(inode, dir, mode);
	inode->i_ino = sbi->s_nextid++;
	inode->i_blkbits = EXOFS_BLKSHIFT;
@@ -1171,6 +1171,8 @@ struct inode *exofs_new_inode(struct inode *dir, int mode)
	spin_unlock(&sbi->s_next_gen_lock);
	insert_inode_hash(inode);

	exofs_sbi_write_stats(sbi); /* Make sure new sbi->s_nextid is on disk */

	mark_inode_dirty(inode);

	ret = exofs_get_io_state(&sbi->layout, &ios);
+119 −16
Original line number Diff line number Diff line
@@ -213,6 +213,101 @@ static void destroy_inodecache(void)
static const struct super_operations exofs_sops;
static const struct export_operations exofs_export_ops;

static const struct osd_attr g_attr_sb_stats = ATTR_DEF(
	EXOFS_APAGE_SB_DATA,
	EXOFS_ATTR_SB_STATS,
	sizeof(struct exofs_sb_stats));

static int __sbi_read_stats(struct exofs_sb_info *sbi)
{
	struct osd_attr attrs[] = {
		[0] = g_attr_sb_stats,
	};
	struct exofs_io_state *ios;
	int ret;

	ret = exofs_get_io_state(&sbi->layout, &ios);
	if (unlikely(ret)) {
		EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__);
		return ret;
	}

	ios->cred = sbi->s_cred;

	ios->in_attr = attrs;
	ios->in_attr_len = ARRAY_SIZE(attrs);

	ret = exofs_sbi_read(ios);
	if (unlikely(ret)) {
		EXOFS_ERR("Error reading super_block stats => %d\n", ret);
		goto out;
	}

	ret = extract_attr_from_ios(ios, &attrs[0]);
	if (ret) {
		EXOFS_ERR("%s: extract_attr of sb_stats failed\n", __func__);
		goto out;
	}
	if (attrs[0].len) {
		struct exofs_sb_stats *ess;

		if (unlikely(attrs[0].len != sizeof(*ess))) {
			EXOFS_ERR("%s: Wrong version of exofs_sb_stats "
				  "size(%d) != expected(%zd)\n",
				  __func__, attrs[0].len, sizeof(*ess));
			goto out;
		}

		ess = attrs[0].val_ptr;
		sbi->s_nextid = le64_to_cpu(ess->s_nextid);
		sbi->s_numfiles = le32_to_cpu(ess->s_numfiles);
	}

out:
	exofs_put_io_state(ios);
	return ret;
}

static void stats_done(struct exofs_io_state *ios, void *p)
{
	exofs_put_io_state(ios);
	/* Good thanks nothing to do anymore */
}

/* Asynchronously write the stats attribute */
int exofs_sbi_write_stats(struct exofs_sb_info *sbi)
{
	struct osd_attr attrs[] = {
		[0] = g_attr_sb_stats,
	};
	struct exofs_io_state *ios;
	int ret;

	ret = exofs_get_io_state(&sbi->layout, &ios);
	if (unlikely(ret)) {
		EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__);
		return ret;
	}

	sbi->s_ess.s_nextid   = cpu_to_le64(sbi->s_nextid);
	sbi->s_ess.s_numfiles = cpu_to_le64(sbi->s_numfiles);
	attrs[0].val_ptr = &sbi->s_ess;

	ios->cred = sbi->s_cred;
	ios->done = stats_done;
	ios->private = sbi;
	ios->out_attr = attrs;
	ios->out_attr_len = ARRAY_SIZE(attrs);

	ret = exofs_sbi_write(ios);
	if (unlikely(ret)) {
		EXOFS_ERR("%s: exofs_sbi_write failed.\n", __func__);
		exofs_put_io_state(ios);
	}

	return ret;
}

/*
 * Write the superblock to the OSD
 */
@@ -223,18 +318,25 @@ int exofs_sync_fs(struct super_block *sb, int wait)
	struct exofs_io_state *ios;
	int ret = -ENOMEM;

	lock_super(sb);
	fscb = kmalloc(sizeof(*fscb), GFP_KERNEL);
	if (unlikely(!fscb))
		return -ENOMEM;

	sbi = sb->s_fs_info;
	fscb = &sbi->s_fscb;

	/* NOTE: We no longer dirty the super_block anywhere in exofs. The
	 * reason we write the fscb here on unmount is so we can stay backwards
	 * compatible with fscb->s_version == 1. (What we are not compatible
	 * with is if a new version FS crashed and then we try to mount an old
	 * version). Otherwise the exofs_fscb is read-only from mkfs time. All
	 * the writeable info is set in exofs_sbi_write_stats() above.
	 */
	ret = exofs_get_io_state(&sbi->layout, &ios);
	if (ret)
	if (unlikely(ret))
		goto out;

	/* Note: We only write the changing part of the fscb. .i.e upto the
	 *       the fscb->s_dev_table_oid member. There is no read-modify-write
	 *       here.
	 */
	lock_super(sb);

	ios->length = offsetof(struct exofs_fscb, s_dev_table_oid);
	memset(fscb, 0, ios->length);
	fscb->s_nextid = cpu_to_le64(sbi->s_nextid);
@@ -249,16 +351,17 @@ int exofs_sync_fs(struct super_block *sb, int wait)
	ios->cred = sbi->s_cred;

	ret = exofs_sbi_write(ios);
	if (unlikely(ret)) {
	if (unlikely(ret))
		EXOFS_ERR("%s: exofs_sbi_write failed.\n", __func__);
		goto out;
	}
	else
		sb->s_dirt = 0;


	unlock_super(sb);
out:
	EXOFS_DBGMSG("s_nextid=0x%llx ret=%d\n", _LLU(sbi->s_nextid), ret);
	exofs_put_io_state(ios);
	unlock_super(sb);
	kfree(fscb);
	return ret;
}

@@ -302,9 +405,6 @@ static void exofs_put_super(struct super_block *sb)
	int num_pend;
	struct exofs_sb_info *sbi = sb->s_fs_info;

	if (sb->s_dirt)
		exofs_write_super(sb);

	/* make sure there are no pending commands */
	for (num_pend = atomic_read(&sbi->s_curr_pending); num_pend > 0;
	     num_pend = atomic_read(&sbi->s_curr_pending)) {
@@ -629,6 +729,7 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
		goto free_sbi;

	sb->s_magic = le16_to_cpu(fscb.s_magic);
	/* NOTE: we read below to be backward compatible with old versions */
	sbi->s_nextid = le64_to_cpu(fscb.s_nextid);
	sbi->s_numfiles = le32_to_cpu(fscb.s_numfiles);

@@ -639,7 +740,7 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
		ret = -EINVAL;
		goto free_sbi;
	}
	if (le32_to_cpu(fscb.s_version) != EXOFS_FSCB_VER) {
	if (le32_to_cpu(fscb.s_version) > EXOFS_FSCB_VER) {
		EXOFS_ERR("ERROR: Bad FSCB version expected-%d got-%d\n",
			  EXOFS_FSCB_VER, le32_to_cpu(fscb.s_version));
		ret = -EINVAL;
@@ -657,6 +758,8 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
			goto free_sbi;
	}

	__sbi_read_stats(sbi);

	/* set up operation vectors */
	sbi->bdi.ra_pages = __ra_pages(&sbi->layout);
	sb->s_bdi = &sbi->bdi;