Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit d866d875 authored by Boaz Harrosh's avatar Boaz Harrosh
Browse files

ore/exofs: Change the type of the devices array (API change)



In the pNFS obj-LD the device table at the layout level needs
to point to a device_cache node, where it is possible and likely
that many layouts will point to the same device-nodes.

In Exofs we have a more orderly structure where we have a single
array of devices that repeats twice for a round-robin view of the
device table

This patch moves to a model that can be used by the pNFS obj-LD
where struct ore_components holds an array of ore_dev-pointers.
(ore_dev is newly defined and contains a struct osd_dev *od
 member)

Each pointer in the array of pointers will point to a bigger
user-defined dev_struct. That can be accessed by use of the
container_of macro.

In Exofs an __alloc_dev_table() function allocates the
ore_dev-pointers array as well as an exofs_dev array, in one
allocation and does the addresses dance to set everything pointing
correctly. It still keeps the double allocation trick for the
inodes round-robin view of the table.

The device table is always allocated dynamically, also for the
single device case. So it is unconditionally freed at umount.

Signed-off-by: default avatarBoaz Harrosh <bharrosh@panasas.com>
parent eb507bc1
Loading
Loading
Loading
Loading
+7 −3
Original line number Diff line number Diff line
@@ -53,6 +53,10 @@
/* u64 has problems with printk this will cast it to unsigned long long */
#define _LLU(x) (unsigned long long)(x)

struct exofs_dev {
	struct ore_dev ored;
	unsigned did;
};
/*
 * our extension to the in-memory superblock
 */
@@ -69,7 +73,6 @@ struct exofs_sb_info {
	struct ore_layout	layout;		/* Default files layout       */
	struct ore_comp one_comp;		/* id & cred of partition id=0*/
	struct ore_components oc;		/* comps for the partition    */
	struct osd_dev	*_min_one_dev[1];	/* Place holder for one dev   */
};

/*
@@ -214,13 +217,14 @@ static inline void exofs_init_comps(struct ore_components *oc,
	one_comp->obj.id = oid;
	exofs_make_credential(one_comp->cred, &one_comp->obj);

	oc->numdevs = sbi->oc.numdevs;
	oc->numdevs = sbi->layout.group_width * sbi->layout.mirrors_p1 *
							sbi->layout.group_count;
	oc->single_comp = EC_SINGLE_COMP;
	oc->comps = one_comp;

	/* Round robin device view of the table */
	first_dev = (dev_mod * sbi->layout.mirrors_p1) % sbi->oc.numdevs;
	oc->ods = sbi->oc.ods + first_dev;
	oc->ods = &sbi->oc.ods[first_dev];
}

#endif
+1 −1
Original line number Diff line number Diff line
@@ -59,7 +59,7 @@ static struct osd_obj_id *_ios_obj(struct ore_io_state *ios, unsigned index)

static struct osd_dev *_ios_od(struct ore_io_state *ios, unsigned index)
{
	return ios->oc->ods[index];
	return ore_comp_dev(ios->oc, index);
}

int  ore_get_rw_state(struct ore_layout *layout, struct ore_components *oc,
+61 −38
Original line number Diff line number Diff line
@@ -431,16 +431,17 @@ static void _exofs_print_device(const char *msg, const char *dev_path,

static void exofs_free_sbi(struct exofs_sb_info *sbi)
{
	while (sbi->oc.numdevs) {
		int i = --sbi->oc.numdevs;
		struct osd_dev *od = sbi->oc.ods[i];
	unsigned numdevs = sbi->oc.numdevs;

	while (numdevs) {
		unsigned i = --numdevs;
		struct osd_dev *od = ore_comp_dev(&sbi->oc, i);

		if (od) {
			sbi->oc.ods[i] = NULL;
			ore_comp_set_dev(&sbi->oc, i, NULL);
			osduld_put_device(od);
		}
	}
	if (sbi->oc.ods != sbi->_min_one_dev)
	kfree(sbi->oc.ods);
	kfree(sbi);
}
@@ -468,7 +469,7 @@ static void exofs_put_super(struct super_block *sb)
				  msecs_to_jiffies(100));
	}

	_exofs_print_device("Unmounting", NULL, sbi->oc.ods[0],
	_exofs_print_device("Unmounting", NULL, ore_comp_dev(&sbi->oc, 0),
			    sbi->one_comp.obj.partition);

	bdi_destroy(&sbi->bdi);
@@ -592,12 +593,40 @@ static int exofs_devs_2_odi(struct exofs_dt_device_info *dt_dev,
	return !(odi->systemid_len || odi->osdname_len);
}

int __alloc_dev_table(struct exofs_sb_info *sbi, unsigned numdevs,
		      struct exofs_dev **peds)
{
	struct __alloc_ore_devs_and_exofs_devs {
		/* Twice bigger table: See exofs_init_comps() and comment at
		 * exofs_read_lookup_dev_table()
		 */
		struct ore_dev *oreds[numdevs * 2 - 1];
		struct exofs_dev eds[numdevs];
	} *aoded;
	struct exofs_dev *eds;
	unsigned i;

	aoded = kzalloc(sizeof(*aoded), GFP_KERNEL);
	if (unlikely(!aoded)) {
		EXOFS_ERR("ERROR: faild allocating Device array[%d]\n",
			  numdevs);
		return -ENOMEM;
	}

	sbi->oc.ods = aoded->oreds;
	*peds = eds = aoded->eds;
	for (i = 0; i < numdevs; ++i)
		aoded->oreds[i] = &eds[i].ored;
	return 0;
}

static int exofs_read_lookup_dev_table(struct exofs_sb_info *sbi,
				       struct osd_dev *fscb_od,
				       unsigned table_count)
{
	struct ore_comp comp;
	struct exofs_device_table *dt;
	struct exofs_dev *eds;
	unsigned table_bytes = table_count * sizeof(dt->dt_dev_table[0]) +
					     sizeof(*dt);
	unsigned numdevs, i;
@@ -634,20 +663,16 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info *sbi,
	if (unlikely(ret))
		goto out;

	if (likely(numdevs > 1)) {
		unsigned size = numdevs * sizeof(sbi->oc.ods[0]);

		/* Twice bigger table: See exofs_init_comps() and below
		 * comment
		 */
		sbi->oc.ods = kzalloc(size + size - 1, GFP_KERNEL);
		if (unlikely(!sbi->oc.ods)) {
			EXOFS_ERR("ERROR: faild allocating Device array[%d]\n",
				  numdevs);
			ret = -ENOMEM;
	ret = __alloc_dev_table(sbi, numdevs, &eds);
	if (unlikely(ret))
		goto out;
		}
	}
	/* exofs round-robins the device table view according to inode
	 * number. We hold a: twice bigger table hence inodes can point
	 * to any device and have a sequential view of the table
	 * starting at this device. See exofs_init_comps()
	 */
	memcpy(&sbi->oc.ods[numdevs], &sbi->oc.ods[0],
		(numdevs - 1) * sizeof(sbi->oc.ods[0]));

	for (i = 0; i < numdevs; i++) {
		struct exofs_fscb fscb;
@@ -663,12 +688,15 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info *sbi,
		printk(KERN_NOTICE "Add device[%d]: osd_name-%s\n",
		       i, odi.osdname);

		/* the exofs id is currently the table index */
		eds[i].did = i;

		/* On all devices the device table is identical. The user can
		 * specify any one of the participating devices on the command
		 * line. We always keep them in device-table order.
		 */
		if (fscb_od && osduld_device_same(fscb_od, &odi)) {
			sbi->oc.ods[i] = fscb_od;
			eds[i].ored.od = fscb_od;
			++sbi->oc.numdevs;
			fscb_od = NULL;
			continue;
@@ -682,7 +710,7 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info *sbi,
			goto out;
		}

		sbi->oc.ods[i] = od;
		eds[i].ored.od = od;
		++sbi->oc.numdevs;

		/* Read the fscb of the other devices to make sure the FS
@@ -705,22 +733,11 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info *sbi,

out:
	kfree(dt);
	if (likely(!ret)) {
		unsigned numdevs = sbi->oc.numdevs;

		if (unlikely(fscb_od)) {
	if (unlikely(fscb_od && !ret)) {
			EXOFS_ERR("ERROR: Bad device-table container device not present\n");
			osduld_put_device(fscb_od);
			return -EINVAL;
	}
		/* exofs round-robins the device table view according to inode
		 * number. We hold a: twice bigger table hence inodes can point
		 * to any device and have a sequential view of the table
		 * starting at this device. See exofs_init_comps()
		 */
		for (i = 0; i < numdevs - 1; ++i)
			sbi->oc.ods[i + numdevs] = sbi->oc.ods[i];
	}
	return ret;
}

@@ -773,7 +790,6 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
	sbi->oc.numdevs = 1;
	sbi->oc.single_comp = EC_SINGLE_COMP;
	sbi->oc.comps = &sbi->one_comp;
	sbi->oc.ods = sbi->_min_one_dev;

	/* fill in some other data by hand */
	memset(sb->s_id, 0, sizeof(sb->s_id));
@@ -822,7 +838,13 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
		if (unlikely(ret))
			goto free_sbi;
	} else {
		sbi->oc.ods[0] = od;
		struct exofs_dev *eds;

		ret = __alloc_dev_table(sbi, 1, &eds);
		if (unlikely(ret))
			goto free_sbi;

		ore_comp_set_dev(&sbi->oc, 0, od);
	}

	__sbi_read_stats(sbi);
@@ -862,7 +884,8 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
		goto free_sbi;
	}

	_exofs_print_device("Mounting", opts->dev_name, sbi->oc.ods[0],
	_exofs_print_device("Mounting", opts->dev_name,
			    ore_comp_dev(&sbi->oc, 0),
			    sbi->one_comp.obj.partition);
	return 0;

+25 −1
Original line number Diff line number Diff line
@@ -44,6 +44,10 @@ struct ore_layout {
	unsigned group_count;
};

struct ore_dev {
	struct osd_dev *od;
};

struct ore_components {
	unsigned	numdevs;		/* Num of devices in array    */
	/* If @single_comp == EC_SINGLE_COMP, @comps points to a single
@@ -53,9 +57,29 @@ struct ore_components {
		EC_SINGLE_COMP = 0, EC_MULTPLE_COMPS = 0xffffffff
	}		single_comp;
	struct ore_comp	*comps;
	struct osd_dev	**ods;			/* osd_dev array              */

	/* Array of pointers to ore_dev-* . User will usually have these pointed
	 * too a bigger struct which contain an "ore_dev ored" member and use
	 * container_of(oc->ods[i], struct foo_dev, ored) to access the bigger
	 * structure.
	 */
	struct ore_dev	**ods;
};

/* ore_comp_dev Recievies a logical device index */
static inline struct osd_dev *ore_comp_dev(
	const struct ore_components *oc, unsigned i)
{
	BUG_ON(oc->numdevs <= i);
	return oc->ods[i]->od;
}

static inline void ore_comp_set_dev(
	struct ore_components *oc, unsigned i, struct osd_dev *od)
{
	oc->ods[i]->od = od;
}

struct ore_striping_info {
	u64 obj_offset;
	u64 group_length;