ore: Only IO one group at a time (API change) (b916c5cd) · Commits · e / devices / android_kernel_oneplus_sm8150

fs/exofs/inode.c

+85 −15

Original line number	Diff line number	Diff line
		@@ -259,6 +259,46 @@ static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw)
		}
		}

		static int _maybe_not_all_in_one_io(struct ore_io_state *ios,
		struct page_collect pcol_src, struct page_collect pcol)
		{
		/* length was wrong or offset was not page aligned */
		BUG_ON(pcol_src->nr_pages < ios->nr_pages);

		if (pcol_src->nr_pages > ios->nr_pages) {
		struct page **src_page;
		unsigned pages_less = pcol_src->nr_pages - ios->nr_pages;
		unsigned long len_less = pcol_src->length - ios->length;
		unsigned i;
		int ret;

		/* This IO was trimmed */
		pcol_src->nr_pages = ios->nr_pages;
		pcol_src->length = ios->length;

		/* Left over pages are passed to the next io */
		pcol->expected_pages += pages_less;
		pcol->nr_pages = pages_less;
		pcol->length = len_less;
		src_page = pcol_src->pages + pcol_src->nr_pages;
		pcol->pg_first = (*src_page)->index;

		ret = pcol_try_alloc(pcol);
		if (unlikely(ret))
		return ret;

		for (i = 0; i < pages_less; ++i)
		pcol->pages[i] = *src_page++;

		EXOFS_DBGMSG("Length was adjusted nr_pages=0x%x "
		"pages_less=0x%x expected_pages=0x%x "
		"next_offset=0x%llx next_len=0x%lx\n",
		pcol_src->nr_pages, pages_less, pcol->expected_pages,
		pcol->pg_first * PAGE_SIZE, pcol->length);
		}
		return 0;
		}

		static int read_exec(struct page_collect *pcol)
		{
		struct exofs_i_info *oi = exofs_i(pcol->inode);
		@@ -280,7 +320,6 @@ static int read_exec(struct page_collect *pcol)

		ios = pcol->ios;
		ios->pages = pcol->pages;
		ios->nr_pages = pcol->nr_pages;

		if (pcol->read_4_write) {
		ore_read(pcol->ios);
		@@ -296,17 +335,23 @@ static int read_exec(struct page_collect *pcol)
		pcol_copy = pcol;
		ios->done = readpages_done;
		ios->private = pcol_copy;

		/* pages ownership was passed to pcol_copy */
		_pcol_reset(pcol);

		ret = _maybe_not_all_in_one_io(ios, pcol_copy, pcol);
		if (unlikely(ret))
		goto err;

		EXOFS_DBGMSG2("read_exec(0x%lx) offset=0x%llx length=0x%llx\n",
		pcol->inode->i_ino, _LLU(ios->offset), _LLU(ios->length));

		ret = ore_read(ios);
		if (unlikely(ret))
		goto err;

		atomic_inc(&pcol->sbi->s_curr_pending);

		EXOFS_DBGMSG2("read_exec obj=0x%llx start=0x%llx length=0x%lx\n",
		oi->one_comp.obj.id, _LLU(ios->offset), pcol->length);

		/* pages ownership was passed to pcol_copy */
		_pcol_reset(pcol);
		return 0;

		err:
		@@ -429,6 +474,10 @@ static int exofs_readpages(struct file file, struct address_space mapping,
		return ret;
		}

		ret = read_exec(&pcol);
		if (unlikely(ret))
		return ret;

		return read_exec(&pcol);
		}

		@@ -519,7 +568,6 @@ static int write_exec(struct page_collect *pcol)
		ret = ore_get_rw_state(&pcol->sbi->layout, &oi->oc, false,
		pcol->pg_first << PAGE_CACHE_SHIFT,
		pcol->length, &pcol->ios);

		if (unlikely(ret))
		goto err;

		@@ -534,10 +582,19 @@ static int write_exec(struct page_collect *pcol)

		ios = pcol->ios;
		ios->pages = pcol_copy->pages;
		ios->nr_pages = pcol_copy->nr_pages;
		ios->done = writepages_done;
		ios->private = pcol_copy;

		/* pages ownership was passed to pcol_copy */
		_pcol_reset(pcol);

		ret = _maybe_not_all_in_one_io(ios, pcol_copy, pcol);
		if (unlikely(ret))
		goto err;

		EXOFS_DBGMSG2("write_exec(0x%lx) offset=0x%llx length=0x%llx\n",
		pcol->inode->i_ino, _LLU(ios->offset), _LLU(ios->length));

		ret = ore_write(ios);
		if (unlikely(ret)) {
		EXOFS_ERR("write_exec: ore_write() Failed\n");
		@@ -545,11 +602,6 @@ static int write_exec(struct page_collect *pcol)
		}

		atomic_inc(&pcol->sbi->s_curr_pending);
		EXOFS_DBGMSG2("write_exec(0x%lx, 0x%llx) start=0x%llx length=0x%lx\n",
		pcol->inode->i_ino, pcol->pg_first, _LLU(ios->offset),
		pcol->length);
		/* pages ownership was passed to pcol_copy */
		_pcol_reset(pcol);
		return 0;

		err:
		@@ -689,12 +741,30 @@ static int exofs_writepages(struct address_space *mapping,
		_pcol_init(&pcol, expected_pages, mapping->host);

		ret = write_cache_pages(mapping, wbc, writepage_strip, &pcol);
		if (ret) {
		if (unlikely(ret)) {
		EXOFS_ERR("write_cache_pages => %d\n", ret);
		return ret;
		}

		return write_exec(&pcol);
		ret = write_exec(&pcol);
		if (unlikely(ret))
		return ret;

		if (wbc->sync_mode == WB_SYNC_ALL) {
		return write_exec(&pcol); /* pump the last reminder */
		} else if (pcol.nr_pages) {
		/* not SYNC let the reminder join the next writeout */
		unsigned i;

		for (i = 0; i < pcol.nr_pages; i++) {
		struct page *page = pcol.pages[i];

		end_page_writeback(page);
		set_page_dirty(page);
		unlock_page(page);
		}
		}
		return 0;
		}

		static int exofs_writepage(struct page page, struct writeback_control wbc)

fs/exofs/ore.c

+69 −36

Original line number	Diff line number	Diff line
		@@ -47,6 +47,9 @@ MODULE_AUTHOR("Boaz Harrosh <bharrosh@panasas.com>");
		MODULE_DESCRIPTION("Objects Raid Engine ore.ko");
		MODULE_LICENSE("GPL");

		static void ore_calc_stripe_info(struct ore_layout *layout, u64 file_offset,
		struct ore_striping_info *si);

		static u8 _ios_cred(struct ore_io_state ios, unsigned index)
		{
		return ios->oc->comps[index & ios->oc->single_comp].cred;
		@@ -62,8 +65,8 @@ static struct osd_dev _ios_od(struct ore_io_state ios, unsigned index)
		return ore_comp_dev(ios->oc, index);
		}

		int ore_get_rw_state(struct ore_layout layout, struct ore_components oc,
		bool is_reading, u64 offset, u64 length,
		static int _get_io_state(struct ore_layout *layout,
		struct ore_components *oc, unsigned numdevs,
		struct ore_io_state **pios)
		{
		struct ore_io_state *ios;
		@@ -71,29 +74,76 @@ int ore_get_rw_state(struct ore_layout layout, struct ore_components oc,
		/*TODO: Maybe use kmem_cach per sbi of size
		* exofs_io_state_size(layout->s_numdevs)
		*/
		ios = kzalloc(ore_io_state_size(oc->numdevs), GFP_KERNEL);
		ios = kzalloc(ore_io_state_size(numdevs), GFP_KERNEL);
		if (unlikely(!ios)) {
		ORE_DBGMSG("Failed kzalloc bytes=%d\n",
		ore_io_state_size(oc->numdevs));
		ore_io_state_size(numdevs));
		*pios = NULL;
		return -ENOMEM;
		}

		ios->layout = layout;
		ios->oc = oc;
		ios->offset = offset;
		ios->length = length;
		*pios = ios;
		return 0;
		}

		/* Allocate an io_state for only a single group of devices
		*
		* If a user needs to call ore_read/write() this version must be used becase it
		* allocates extra stuff for striping and raid.
		* The ore might decide to only IO less then @length bytes do to alignmets
		* and constrains as follows:
		* - The IO cannot cross group boundary.
		* - In raid5/6 The end of the IO must align at end of a stripe eg.
		* (@offset + @length) % strip_size == 0. Or the complete range is within a
		* single stripe.
		* - Memory condition only permitted a shorter IO. (A user can use @length=~0
		* And check the returned ios->length for max_io_size.)
		*
		* The caller must check returned ios->length (and/or ios->nr_pages) and
		* re-issue these pages that fall outside of ios->length
		*/
		int ore_get_rw_state(struct ore_layout layout, struct ore_components oc,
		bool is_reading, u64 offset, u64 length,
		struct ore_io_state **pios)
		{
		struct ore_io_state *ios;
		unsigned numdevs = layout->group_width * layout->mirrors_p1;
		int ret;

		ret = _get_io_state(layout, oc, numdevs, pios);
		if (unlikely(ret))
		return ret;

		ios = *pios;
		ios->reading = is_reading;
		ios->offset = offset;

		if (length) {
		struct ore_striping_info si;

		ore_calc_stripe_info(layout, offset, &si);
		ios->length = (length <= si.group_length) ? length :
		si.group_length;
		ios->nr_pages = (ios->length + PAGE_SIZE - 1) / PAGE_SIZE;
		}

		*pios = ios;
		return 0;
		}
		EXPORT_SYMBOL(ore_get_rw_state);

		/* Allocate an io_state for all the devices in the comps array
		*
		* This version of io_state allocation is used mostly by create/remove
		* and trunc where we currently need all the devices. The only wastful
		* bit is the read/write_attributes with no IO. Those sites should
		* be converted to use ore_get_rw_state() with length=0
		*/
		int ore_get_io_state(struct ore_layout layout, struct ore_components oc,
		struct ore_io_state **ios)
		struct ore_io_state **pios)
		{
		return ore_get_rw_state(layout, oc, true, 0, 0, ios);
		return _get_io_state(layout, oc, oc->numdevs, pios);
		}
		EXPORT_SYMBOL(ore_get_io_state);

		@@ -374,12 +424,12 @@ static int _prepare_one_group(struct ore_io_state *ios, u64 length,
		unsigned devs_in_group = ios->layout->group_width * mirrors_p1;
		unsigned dev = si->dev;
		unsigned first_dev = dev - (dev % devs_in_group);
		unsigned max_comp = ios->numdevs ? ios->numdevs - mirrors_p1 : 0;
		unsigned cur_pg = ios->pages_consumed;
		int ret = 0;

		while (length) {
		struct ore_per_dev_state *per_dev = &ios->per_dev[dev];
		unsigned comp = dev - first_dev;
		struct ore_per_dev_state *per_dev = &ios->per_dev[comp];
		unsigned cur_len, page_off = 0;

		if (!per_dev->length) {
		@@ -397,9 +447,6 @@ static int _prepare_one_group(struct ore_io_state *ios, u64 length,
		per_dev->offset = si->obj_offset - si->unit_off;
		cur_len = stripe_unit;
		}

		if (max_comp < dev)
		max_comp = dev;
		} else {
		cur_len = stripe_unit;
		}
		@@ -417,17 +464,15 @@ static int _prepare_one_group(struct ore_io_state *ios, u64 length,
		length -= cur_len;
		}
		out:
		ios->numdevs = max_comp + mirrors_p1;
		ios->numdevs = devs_in_group;
		ios->pages_consumed = cur_pg;
		return ret;
		}

		static int _prepare_for_striping(struct ore_io_state *ios)
		{
		u64 length = ios->length;
		u64 offset = ios->offset;
		struct ore_striping_info si;
		int ret = 0;
		int ret;

		if (!ios->pages) {
		if (ios->kern_buff) {
		@@ -446,21 +491,11 @@ static int _prepare_for_striping(struct ore_io_state *ios)
		return 0;
		}

		while (length) {
		ore_calc_stripe_info(ios->layout, offset, &si);

		if (length < si.group_length)
		si.group_length = length;

		ret = _prepare_one_group(ios, si.group_length, &si);
		if (unlikely(ret))
		goto out;
		ore_calc_stripe_info(ios->layout, ios->offset, &si);

		offset += si.group_length;
		length -= si.group_length;
		}
		BUG_ON(ios->length > si.group_length);
		ret = _prepare_one_group(ios, ios->length, &si);

		out:
		return ret;
		}

		@@ -742,7 +777,6 @@ struct _trunc_info {

		unsigned first_group_dev;
		unsigned nex_group_dev;
		unsigned max_devs;
		};

		static void _calc_trunk_info(struct ore_layout *layout, u64 file_offset,
		@@ -757,7 +791,6 @@ static void _calc_trunk_info(struct ore_layout *layout, u64 file_offset,

		ti->first_group_dev = ti->si.dev - (ti->si.dev % layout->group_width);
		ti->nex_group_dev = ti->first_group_dev + layout->group_width;
		ti->max_devs = layout->group_width * layout->group_count;
		}

		int ore_truncate(struct ore_layout layout, struct ore_components oc,
		@@ -777,7 +810,7 @@ int ore_truncate(struct ore_layout layout, struct ore_components oc,

		_calc_trunk_info(ios->layout, size, &ti);

		size_attrs = kcalloc(ti.max_devs, sizeof(*size_attrs),
		size_attrs = kcalloc(ios->oc->numdevs, sizeof(*size_attrs),
		GFP_KERNEL);
		if (unlikely(!size_attrs)) {
		ret = -ENOMEM;
		@@ -786,7 +819,7 @@ int ore_truncate(struct ore_layout layout, struct ore_components oc,

		ios->numdevs = ios->oc->numdevs;

		for (i = 0; i < ti.max_devs; ++i) {
		for (i = 0; i < ios->numdevs; ++i) {
		struct exofs_trunc_attr *size_attr = &size_attrs[i];
		u64 obj_size;