Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit d9c740d2 authored by Boaz Harrosh's avatar Boaz Harrosh
Browse files

exofs: Define on-disk per-inode optional layout attribute



* Layouts describe the way a file is spread on multiple devices.
  The layout information is stored in the objects attribute introduced
  in this patch.

* There can be multiple generating function for the layout.
  Currently defined:
    - No attribute present - use below moving-window on global
      device table, all devices.
      (This is the only one currently used in exofs)
    - an obj_id generated moving window - the obj_id is a randomizing
      factor in the otherwise global map layout.
    - An explicit layout stored, including a data_map and a device
      index list.
    - More might be defined in future ...

* There are two attributes defined of the same structure:
  A-data-files-layout - This layout is used by data-files. If present
                        at a directory, all files of that directory will
                        be created with this layout.
  A-meta-data-layout - This layout is used by a directory and other
                       meta-data information. Also inherited at creation
                       of subdirectories.

* At creation time inodes are created with the layout specified above.
  A usermode utility may change the creation layout on a give directory
  or file. Which in the case of directories, will also apply to newly
  created files/subdirectories, children of that directory.
  In the simple unaltered case of a newly created exofs, no layout
  attributes are present, and all layouts adhere to the layout specified
  at the device-table.

* In case of a future file system loaded in an old exofs-driver.
  At iget(), the generating_function is inspected and if not supported
  will return an IO error to the application and the inode will not
  be loaded. So not to damage any data.
  Note: After this patch we do not yet support any type of layout
        only the RAID0 patch that enables striping at the super-block
        level will add support for RAID0 layouts above. This way we
        are past and future compatible and fully bisectable.

* Access to the device table is done by an accessor since
  it will change according to above information.

Signed-off-by: default avatarBoaz Harrosh <bharrosh@panasas.com>
parent 46f4d973
Loading
Loading
Loading
Loading
+39 −0
Original line number Diff line number Diff line
@@ -55,6 +55,8 @@
/* exofs Application specific page/attribute */
# define EXOFS_APAGE_FS_DATA	(OSD_APAGE_APP_DEFINED_FIRST + 3)
# define EXOFS_ATTR_INODE_DATA	1
# define EXOFS_ATTR_INODE_FILE_LAYOUT	2
# define EXOFS_ATTR_INODE_DIR_LAYOUT	3

/*
 * The maximum number of files we can have is limited by the size of the
@@ -206,4 +208,41 @@ enum {
	(((name_len) + offsetof(struct exofs_dir_entry, name)  + \
	  EXOFS_DIR_ROUND) & ~EXOFS_DIR_ROUND)

/*
 * The on-disk (optional) layout structure.
 * sits in an EXOFS_ATTR_INODE_FILE_LAYOUT or EXOFS_ATTR_INODE_DIR_LAYOUT
 * attribute, attached to any inode, usually to a directory.
 */

enum exofs_inode_layout_gen_functions {
	LAYOUT_MOVING_WINDOW = 0,
	LAYOUT_IMPLICT = 1,
};

struct exofs_on_disk_inode_layout {
	__le16 gen_func; /* One of enum exofs_inode_layout_gen_functions */
	__le16 pad;
	union {
		/* gen_func == LAYOUT_MOVING_WINDOW (default) */
		struct exofs_layout_sliding_window {
			__le32 num_devices; /* first n devices in global-table*/
		} sliding_window __packed;

		/* gen_func == LAYOUT_IMPLICT */
		struct exofs_layout_implict_list {
			struct exofs_dt_data_map data_map;
			/* Variable array of size data_map.cb_num_comps. These
			 * are device indexes of the devices in the global table
			 */
			__le32 dev_indexes[];
		} implict __packed;
	};
} __packed;

static inline size_t exofs_on_disk_inode_layout_size(unsigned max_devs)
{
	return sizeof(struct exofs_on_disk_inode_layout) +
		max_devs * sizeof(__le32);
}

#endif /*ifndef __EXOFS_COM_H__*/
+6 −0
Original line number Diff line number Diff line
@@ -185,6 +185,12 @@ static inline struct exofs_i_info *exofs_i(struct inode *inode)
	return container_of(inode, struct exofs_i_info, vfs_inode);
}

/*
 * Given a layout, object_number and stripe_index return the associated global
 * dev_index
 */
unsigned exofs_layout_od_id(struct exofs_layout *layout,
			    osd_id obj_no, unsigned layout_index);
/*
 * Maximum count of links to a file
 */
+51 −5
Original line number Diff line number Diff line
@@ -859,6 +859,15 @@ int exofs_setattr(struct dentry *dentry, struct iattr *iattr)
	return error;
}

static const struct osd_attr g_attr_inode_file_layout = ATTR_DEF(
	EXOFS_APAGE_FS_DATA,
	EXOFS_ATTR_INODE_FILE_LAYOUT,
	0);
static const struct osd_attr g_attr_inode_dir_layout = ATTR_DEF(
	EXOFS_APAGE_FS_DATA,
	EXOFS_ATTR_INODE_DIR_LAYOUT,
	0);

/*
 * Read an inode from the OSD, and return it as is.  We also return the size
 * attribute in the 'obj_size' argument.
@@ -867,11 +876,16 @@ static int exofs_get_inode(struct super_block *sb, struct exofs_i_info *oi,
		    struct exofs_fcb *inode, uint64_t *obj_size)
{
	struct exofs_sb_info *sbi = sb->s_fs_info;
	struct osd_attr attrs[2];
	struct osd_attr attrs[] = {
		[0] = g_attr_inode_data,
		[1] = g_attr_inode_file_layout,
		[2] = g_attr_inode_dir_layout,
		[3] = g_attr_logical_length,
	};
	struct exofs_io_state *ios;
	struct exofs_on_disk_inode_layout *layout;
	int ret;

	*obj_size = ~0;
	ret = exofs_get_io_state(&sbi->layout, &ios);
	if (unlikely(ret)) {
		EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__);
@@ -882,8 +896,9 @@ static int exofs_get_inode(struct super_block *sb, struct exofs_i_info *oi,
	exofs_make_credential(oi->i_cred, &ios->obj);
	ios->cred = oi->i_cred;

	attrs[0] = g_attr_inode_data;
	attrs[1] = g_attr_logical_length;
	attrs[1].len = exofs_on_disk_inode_layout_size(sbi->layout.s_numdevs);
	attrs[2].len = exofs_on_disk_inode_layout_size(sbi->layout.s_numdevs);

	ios->in_attr = attrs;
	ios->in_attr_len = ARRAY_SIZE(attrs);

@@ -900,12 +915,43 @@ static int exofs_get_inode(struct super_block *sb, struct exofs_i_info *oi,
	memcpy(inode, attrs[0].val_ptr, EXOFS_INO_ATTR_SIZE);

	ret = extract_attr_from_ios(ios, &attrs[1]);
	if (ret) {
		EXOFS_ERR("%s: extract_attr of inode_data failed\n", __func__);
		goto out;
	}
	if (attrs[1].len) {
		layout = attrs[1].val_ptr;
		if (layout->gen_func != cpu_to_le16(LAYOUT_MOVING_WINDOW)) {
			EXOFS_ERR("%s: unsupported files layout %d\n",
				__func__, layout->gen_func);
			ret = -ENOTSUPP;
			goto out;
		}
	}

	ret = extract_attr_from_ios(ios, &attrs[2]);
	if (ret) {
		EXOFS_ERR("%s: extract_attr of inode_data failed\n", __func__);
		goto out;
	}
	if (attrs[2].len) {
		layout = attrs[2].val_ptr;
		if (layout->gen_func != cpu_to_le16(LAYOUT_MOVING_WINDOW)) {
			EXOFS_ERR("%s: unsupported meta-data layout %d\n",
				__func__, layout->gen_func);
			ret = -ENOTSUPP;
			goto out;
		}
	}

	*obj_size = ~0;
	ret = extract_attr_from_ios(ios, &attrs[3]);
	if (ret) {
		EXOFS_ERR("%s: extract_attr of logical_length failed\n",
			  __func__);
		goto out;
	}
	*obj_size = get_unaligned_be64(attrs[1].val_ptr);
	*obj_size = get_unaligned_be64(attrs[3].val_ptr);

out:
	exofs_put_io_state(ios);
+18 −5
Original line number Diff line number Diff line
@@ -107,6 +107,19 @@ void exofs_put_io_state(struct exofs_io_state *ios)
	}
}

unsigned exofs_layout_od_id(struct exofs_layout *layout,
			    osd_id obj_no, unsigned layout_index)
{
	return layout_index;
}

static inline struct osd_dev *exofs_ios_od(struct exofs_io_state *ios,
					   unsigned layout_index)
{
	return ios->layout->s_ods[
		exofs_layout_od_id(ios->layout, ios->obj.id, layout_index)];
}

static void _sync_done(struct exofs_io_state *ios, void *p)
{
	struct completion *waiting = p;
@@ -242,7 +255,7 @@ int exofs_sbi_create(struct exofs_io_state *ios)
	for (i = 0; i < ios->layout->s_numdevs; i++) {
		struct osd_request *or;

		or = osd_start_request(ios->layout->s_ods[i], GFP_KERNEL);
		or = osd_start_request(exofs_ios_od(ios, i), GFP_KERNEL);
		if (unlikely(!or)) {
			EXOFS_ERR("%s: osd_start_request failed\n", __func__);
			ret = -ENOMEM;
@@ -266,7 +279,7 @@ int exofs_sbi_remove(struct exofs_io_state *ios)
	for (i = 0; i < ios->layout->s_numdevs; i++) {
		struct osd_request *or;

		or = osd_start_request(ios->layout->s_ods[i], GFP_KERNEL);
		or = osd_start_request(exofs_ios_od(ios, i), GFP_KERNEL);
		if (unlikely(!or)) {
			EXOFS_ERR("%s: osd_start_request failed\n", __func__);
			ret = -ENOMEM;
@@ -290,7 +303,7 @@ int exofs_sbi_write(struct exofs_io_state *ios)
	for (i = 0; i < ios->layout->s_numdevs; i++) {
		struct osd_request *or;

		or = osd_start_request(ios->layout->s_ods[i], GFP_KERNEL);
		or = osd_start_request(exofs_ios_od(ios, i), GFP_KERNEL);
		if (unlikely(!or)) {
			EXOFS_ERR("%s: osd_start_request failed\n", __func__);
			ret = -ENOMEM;
@@ -361,7 +374,7 @@ int exofs_sbi_read(struct exofs_io_state *ios)
	unsigned first_dev = (unsigned)ios->obj.id;

	first_dev %= ios->layout->s_numdevs;
	or = osd_start_request(ios->layout->s_ods[first_dev], GFP_KERNEL);
	or = osd_start_request(exofs_ios_od(ios, first_dev), GFP_KERNEL);
	if (unlikely(!or)) {
		EXOFS_ERR("%s: osd_start_request failed\n", __func__);
		return -ENOMEM;
@@ -442,7 +455,7 @@ int exofs_oi_truncate(struct exofs_i_info *oi, u64 size)
	for (i = 0; i < sbi->layout.s_numdevs; i++) {
		struct osd_request *or;

		or = osd_start_request(sbi->layout.s_ods[i], GFP_KERNEL);
		or = osd_start_request(exofs_ios_od(ios, i), GFP_KERNEL);
		if (unlikely(!or)) {
			EXOFS_ERR("%s: osd_start_request failed\n", __func__);
			ret = -ENOMEM;