Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 1e1db2a9 authored by Jinshan Xiong's avatar Jinshan Xiong Committed by Greg Kroah-Hartman
Browse files

staging: lustre: clio: Revise read ahead implementation



In this implementation, read ahead will hold the underlying DLM lock
to add read ahead pages. A new cl_io operation cio_read_ahead() is
added for this purpose. It takes parameter cl_read_ahead{} so that
each layer can adjust it by their own requirements. For example, at
OSC layer, it will make sure the read ahead region is covered by a
LDLM lock; at the LOV layer, it will make sure that the region won't
cross stripe boundary.

Legacy callback cpo_is_under_lock() is removed.

Signed-off-by: default avatarJinshan Xiong <jinshan.xiong@intel.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-3259
Reviewed-on: http://review.whamcloud.com/10859


Reviewed-by: default avatarJohn L. Hammond <john.hammond@intel.com>
Reviewed-by: default avatarBobi Jam <bobijam@hotmail.com>
Reviewed-by: default avatarOleg Drokin <oleg.drokin@intel.com>
Signed-off-by: default avatarJames Simmons <jsimmons@infradead.org>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent f28f1a45
Loading
Loading
Loading
Loading
+31 −32
Original line number Diff line number Diff line
@@ -884,26 +884,6 @@ struct cl_page_operations {
	/** Destructor. Frees resources and slice itself. */
	void (*cpo_fini)(const struct lu_env *env,
			 struct cl_page_slice *slice);

	/**
	 * Checks whether the page is protected by a cl_lock. This is a
	 * per-layer method, because certain layers have ways to check for the
	 * lock much more efficiently than through the generic locks scan, or
	 * implement locking mechanisms separate from cl_lock, e.g.,
	 * LL_FILE_GROUP_LOCKED in vvp. If \a pending is true, check for locks
	 * being canceled, or scheduled for cancellation as soon as the last
	 * user goes away, too.
	 *
	 * \retval    -EBUSY: page is protected by a lock of a given mode;
	 * \retval  -ENODATA: page is not protected by a lock;
	 * \retval	 0: this layer cannot decide.
	 *
	 * \see cl_page_is_under_lock()
	 */
	int (*cpo_is_under_lock)(const struct lu_env *env,
				 const struct cl_page_slice *slice,
				 struct cl_io *io, pgoff_t *max);

	/**
	 * Optional debugging helper. Prints given page slice.
	 *
@@ -1365,7 +1345,6 @@ struct cl_2queue {
 *     (3) sort all locks to avoid dead-locks, and acquire them
 *
 *     (4) process the chunk: call per-page methods
 *	 (cl_io_operations::cio_read_page() for read,
 *	 cl_io_operations::cio_prepare_write(),
 *	 cl_io_operations::cio_commit_write() for write)
 *
@@ -1467,6 +1446,31 @@ struct cl_io_slice {

typedef void (*cl_commit_cbt)(const struct lu_env *, struct cl_io *,
			      struct cl_page *);

struct cl_read_ahead {
	/*
	 * Maximum page index the readahead window will end.
	 * This is determined DLM lock coverage, RPC and stripe boundary.
	 * cra_end is included.
	 */
	pgoff_t cra_end;
	/*
	 * Release routine. If readahead holds resources underneath, this
	 * function should be called to release it.
	 */
	void (*cra_release)(const struct lu_env *env, void *cbdata);
	/* Callback data for cra_release routine */
	void *cra_cbdata;
};

static inline void cl_read_ahead_release(const struct lu_env *env,
					 struct cl_read_ahead *ra)
{
	if (ra->cra_release)
		ra->cra_release(env, ra->cra_cbdata);
	memset(ra, 0, sizeof(*ra));
}

/**
 * Per-layer io operations.
 * \see vvp_io_ops, lov_io_ops, lovsub_io_ops, osc_io_ops
@@ -1573,16 +1577,13 @@ struct cl_io_operations {
				 struct cl_page_list *queue, int from, int to,
				 cl_commit_cbt cb);
	/**
	 * Read missing page.
	 *
	 * Called by a top-level cl_io_operations::op[CIT_READ]::cio_start()
	 * method, when it hits not-up-to-date page in the range. Optional.
	 * Decide maximum read ahead extent
	 *
	 * \pre io->ci_type == CIT_READ
	 */
	int (*cio_read_page)(const struct lu_env *env,
	int (*cio_read_ahead)(const struct lu_env *env,
			      const struct cl_io_slice *slice,
			     const struct cl_page_slice *page);
			      pgoff_t start, struct cl_read_ahead *ra);
	/**
	 * Optional debugging helper. Print given io slice.
	 */
@@ -2302,8 +2303,6 @@ void cl_page_discard(const struct lu_env *env, struct cl_io *io,
void cl_page_delete(const struct lu_env *env, struct cl_page *pg);
int cl_page_is_vmlocked(const struct lu_env *env, const struct cl_page *pg);
void cl_page_export(const struct lu_env *env, struct cl_page *pg, int uptodate);
int cl_page_is_under_lock(const struct lu_env *env, struct cl_io *io,
			  struct cl_page *page, pgoff_t *max_index);
loff_t cl_offset(const struct cl_object *obj, pgoff_t idx);
pgoff_t cl_index(const struct cl_object *obj, loff_t offset);
size_t cl_page_size(const struct cl_object *obj);
@@ -2414,8 +2413,6 @@ int cl_io_lock_add(const struct lu_env *env, struct cl_io *io,
		   struct cl_io_lock_link *link);
int cl_io_lock_alloc_add(const struct lu_env *env, struct cl_io *io,
			 struct cl_lock_descr *descr);
int cl_io_read_page(const struct lu_env *env, struct cl_io *io,
		    struct cl_page *page);
int cl_io_submit_rw(const struct lu_env *env, struct cl_io *io,
		    enum cl_req_type iot, struct cl_2queue *queue);
int cl_io_submit_sync(const struct lu_env *env, struct cl_io *io,
@@ -2424,6 +2421,8 @@ int cl_io_submit_sync(const struct lu_env *env, struct cl_io *io,
int cl_io_commit_async(const struct lu_env *env, struct cl_io *io,
		       struct cl_page_list *queue, int from, int to,
		       cl_commit_cbt cb);
int cl_io_read_ahead(const struct lu_env *env, struct cl_io *io,
		     pgoff_t start, struct cl_read_ahead *ra);
int cl_io_is_going(const struct lu_env *env);

/**
+1 −6
Original line number Diff line number Diff line
@@ -722,9 +722,7 @@ int ll_writepage(struct page *page, struct writeback_control *wbc);
int ll_writepages(struct address_space *, struct writeback_control *wbc);
int ll_readpage(struct file *file, struct page *page);
void ll_readahead_init(struct inode *inode, struct ll_readahead_state *ras);
int ll_readahead(const struct lu_env *env, struct cl_io *io,
		 struct cl_page_list *queue, struct ll_readahead_state *ras,
		 bool hit);
int vvp_io_write_commit(const struct lu_env *env, struct cl_io *io);
struct ll_cl_context *ll_cl_find(struct file *file);
void ll_cl_add(struct file *file, const struct lu_env *env, struct cl_io *io);
void ll_cl_remove(struct file *file, const struct lu_env *env);
@@ -1009,9 +1007,6 @@ int cl_sb_init(struct super_block *sb);
int cl_sb_fini(struct super_block *sb);
void ll_io_init(struct cl_io *io, const struct file *file, int write);

void ras_update(struct ll_sb_info *sbi, struct inode *inode,
		struct ll_readahead_state *ras, unsigned long index,
		unsigned hit);
void ll_ra_count_put(struct ll_sb_info *sbi, unsigned long len);
void ll_ra_stats_inc(struct inode *inode, enum ra_stat which);

+131 −87
Original line number Diff line number Diff line
@@ -180,90 +180,73 @@ void ll_ras_enter(struct file *f)
	spin_unlock(&ras->ras_lock);
}

static int cl_read_ahead_page(const struct lu_env *env, struct cl_io *io,
			      struct cl_page_list *queue, struct cl_page *page,
			      struct cl_object *clob, pgoff_t *max_index)
{
	struct page *vmpage = page->cp_vmpage;
	struct vvp_page *vpg;
	int	      rc;

	rc = 0;
	cl_page_assume(env, io, page);
	lu_ref_add(&page->cp_reference, "ra", current);
	vpg = cl2vvp_page(cl_object_page_slice(clob, page));
	if (!vpg->vpg_defer_uptodate && !PageUptodate(vmpage)) {
		CDEBUG(D_READA, "page index %lu, max_index: %lu\n",
		       vvp_index(vpg), *max_index);
		if (*max_index == 0 || vvp_index(vpg) > *max_index)
			rc = cl_page_is_under_lock(env, io, page, max_index);
		if (rc == 0) {
			vpg->vpg_defer_uptodate = 1;
			vpg->vpg_ra_used = 0;
			cl_page_list_add(queue, page);
			rc = 1;
		} else {
			cl_page_discard(env, io, page);
			rc = -ENOLCK;
		}
	} else {
		/* skip completed pages */
		cl_page_unassume(env, io, page);
	}
	lu_ref_del(&page->cp_reference, "ra", current);
	cl_page_put(env, page);
	return rc;
}

/**
 * Initiates read-ahead of a page with given index.
 *
 * \retval     +ve: page was added to \a queue.
 *
 * \retval -ENOLCK: there is no extent lock for this part of a file, stop
 *		  read-ahead.
 *
 * \retval  -ve, 0: page wasn't added to \a queue for other reason.
 * \retval +ve:	page was already uptodate so it will be skipped
 *		from being added;
 * \retval -ve:	page wasn't added to \a queue for error;
 * \retval   0:	page was added into \a queue for read ahead.
 */
static int ll_read_ahead_page(const struct lu_env *env, struct cl_io *io,
			      struct cl_page_list *queue,
			      pgoff_t index, pgoff_t *max_index)
			      struct cl_page_list *queue, pgoff_t index)
{
	enum ra_stat which = _NR_RA_STAT; /* keep gcc happy */
	struct cl_object *clob = io->ci_obj;
	struct inode *inode = vvp_object_inode(clob);
	struct page      *vmpage;
	const char *msg = NULL;
	struct cl_page *page;
	enum ra_stat      which = _NR_RA_STAT; /* keep gcc happy */
	struct vvp_page *vpg;
	struct page *vmpage;
	int rc = 0;
	const char       *msg   = NULL;

	vmpage = grab_cache_page_nowait(inode->i_mapping, index);
	if (vmpage) {
	if (!vmpage) {
		which = RA_STAT_FAILED_GRAB_PAGE;
		msg = "g_c_p_n failed";
		rc = -EBUSY;
		goto out;
	}

	/* Check if vmpage was truncated or reclaimed */
		if (vmpage->mapping == inode->i_mapping) {
			page = cl_page_find(env, clob, vmpage->index,
					    vmpage, CPT_CACHEABLE);
			if (!IS_ERR(page)) {
				rc = cl_read_ahead_page(env, io, queue,
							page, clob, max_index);
				if (rc == -ENOLCK) {
					which = RA_STAT_FAILED_MATCH;
					msg   = "lock match failed";
	if (vmpage->mapping != inode->i_mapping) {
		which = RA_STAT_WRONG_GRAB_PAGE;
		msg = "g_c_p_n returned invalid page";
		rc = -EBUSY;
		goto out;
	}
			} else {

	page = cl_page_find(env, clob, vmpage->index, vmpage, CPT_CACHEABLE);
	if (IS_ERR(page)) {
		which = RA_STAT_FAILED_GRAB_PAGE;
		msg = "cl_page_find failed";
		rc = PTR_ERR(page);
		goto out;
	}

	lu_ref_add(&page->cp_reference, "ra", current);
	cl_page_assume(env, io, page);
	vpg = cl2vvp_page(cl_object_page_slice(clob, page));
	if (!vpg->vpg_defer_uptodate && !PageUptodate(vmpage)) {
		vpg->vpg_defer_uptodate = 1;
		vpg->vpg_ra_used = 0;
		cl_page_list_add(queue, page);
	} else {
			which = RA_STAT_WRONG_GRAB_PAGE;
			msg   = "g_c_p_n returned invalid page";
		/* skip completed pages */
		cl_page_unassume(env, io, page);
		/* This page is already uptodate, returning a positive number
		 * to tell the callers about this
		 */
		rc = 1;
	}
		if (rc != 1)

	lu_ref_del(&page->cp_reference, "ra", current);
	cl_page_put(env, page);
out:
	if (vmpage) {
		if (rc)
			unlock_page(vmpage);
		put_page(vmpage);
	} else {
		which = RA_STAT_FAILED_GRAB_PAGE;
		msg   = "g_c_p_n failed";
	}
	if (msg) {
		ll_ra_stats_inc(inode, which);
@@ -378,12 +361,12 @@ static int ll_read_ahead_pages(const struct lu_env *env,
			       struct cl_io *io, struct cl_page_list *queue,
			       struct ra_io_arg *ria,
			       unsigned long *reserved_pages,
			       unsigned long *ra_end)
			       pgoff_t *ra_end)
{
	struct cl_read_ahead ra = { 0 };
	int rc, count = 0;
	bool stride_ria;
	pgoff_t page_idx;
	pgoff_t max_index = 0;

	LASSERT(ria);
	RIA_DEBUG(ria);
@@ -392,14 +375,23 @@ static int ll_read_ahead_pages(const struct lu_env *env,
	for (page_idx = ria->ria_start;
	     page_idx <= ria->ria_end && *reserved_pages > 0; page_idx++) {
		if (ras_inside_ra_window(page_idx, ria)) {
			if (!ra.cra_end || ra.cra_end < page_idx) {
				cl_read_ahead_release(env, &ra);

				rc = cl_io_read_ahead(env, io, page_idx, &ra);
				if (rc < 0)
					break;

				LASSERTF(ra.cra_end >= page_idx,
					 "object: %p, indcies %lu / %lu\n",
					 io->ci_obj, ra.cra_end, page_idx);
			}

			/* If the page is inside the read-ahead window*/
			rc = ll_read_ahead_page(env, io, queue,
						page_idx, &max_index);
			if (rc == 1) {
			rc = ll_read_ahead_page(env, io, queue, page_idx);
			if (!rc) {
				(*reserved_pages)--;
				count++;
			} else if (rc == -ENOLCK) {
				break;
			}
		} else if (stride_ria) {
			/* If it is not in the read-ahead window, and it is
@@ -425,19 +417,21 @@ static int ll_read_ahead_pages(const struct lu_env *env,
			}
		}
	}
	cl_read_ahead_release(env, &ra);

	*ra_end = page_idx;
	return count;
}

int ll_readahead(const struct lu_env *env, struct cl_io *io,
		 struct cl_page_list *queue, struct ll_readahead_state *ras,
		 bool hit)
static int ll_readahead(const struct lu_env *env, struct cl_io *io,
			struct cl_page_list *queue,
			struct ll_readahead_state *ras, bool hit)
{
	struct vvp_io *vio = vvp_env_io(env);
	struct ll_thread_info *lti = ll_env_info(env);
	struct cl_attr *attr = vvp_env_thread_attr(env);
	unsigned long start = 0, end = 0, reserved;
	unsigned long ra_end, len, mlen = 0;
	unsigned long len, mlen = 0, reserved;
	pgoff_t ra_end, start = 0, end = 0;
	struct inode *inode;
	struct ra_io_arg *ria = &lti->lti_ria;
	struct cl_object *clob;
@@ -575,8 +569,8 @@ int ll_readahead(const struct lu_env *env, struct cl_io *io,
	 * if the region we failed to issue read-ahead on is still ahead
	 * of the app and behind the next index to start read-ahead from
	 */
	CDEBUG(D_READA, "ra_end %lu end %lu stride end %lu\n",
	       ra_end, end, ria->ria_end);
	CDEBUG(D_READA, "ra_end = %lu end = %lu stride end = %lu pages = %d\n",
	       ra_end, end, ria->ria_end, ret);

	if (ra_end != end + 1) {
		ll_ra_stats_inc(inode, RA_STAT_FAILED_REACH_END);
@@ -737,9 +731,9 @@ static void ras_increase_window(struct inode *inode,
					  ra->ra_max_pages_per_file);
}

void ras_update(struct ll_sb_info *sbi, struct inode *inode,
static void ras_update(struct ll_sb_info *sbi, struct inode *inode,
		       struct ll_readahead_state *ras, unsigned long index,
		unsigned hit)
		       unsigned int hit)
{
	struct ll_ra_info *ra = &sbi->ll_ra_info;
	int zero = 0, stride_detect = 0, ra_miss = 0;
@@ -1087,6 +1081,56 @@ void ll_cl_remove(struct file *file, const struct lu_env *env)
	write_unlock(&fd->fd_lock);
}

static int ll_io_read_page(const struct lu_env *env, struct cl_io *io,
			   struct cl_page *page)
{
	struct inode *inode = vvp_object_inode(page->cp_obj);
	struct ll_file_data *fd = vvp_env_io(env)->vui_fd;
	struct ll_readahead_state *ras = &fd->fd_ras;
	struct cl_2queue *queue  = &io->ci_queue;
	struct ll_sb_info *sbi = ll_i2sbi(inode);
	struct vvp_page *vpg;
	int rc = 0;

	vpg = cl2vvp_page(cl_object_page_slice(page->cp_obj, page));
	if (sbi->ll_ra_info.ra_max_pages_per_file > 0 &&
	    sbi->ll_ra_info.ra_max_pages > 0)
		ras_update(sbi, inode, ras, vvp_index(vpg),
			   vpg->vpg_defer_uptodate);

	if (vpg->vpg_defer_uptodate) {
		vpg->vpg_ra_used = 1;
		cl_page_export(env, page, 1);
	}

	cl_2queue_init(queue);
	/*
	 * Add page into the queue even when it is marked uptodate above.
	 * this will unlock it automatically as part of cl_page_list_disown().
	 */
	cl_page_list_add(&queue->c2_qin, page);
	if (sbi->ll_ra_info.ra_max_pages_per_file > 0 &&
	    sbi->ll_ra_info.ra_max_pages > 0) {
		int rc2;

		rc2 = ll_readahead(env, io, &queue->c2_qin, ras,
				   vpg->vpg_defer_uptodate);
		CDEBUG(D_READA, DFID "%d pages read ahead at %lu\n",
		       PFID(ll_inode2fid(inode)), rc2, vvp_index(vpg));
	}

	if (queue->c2_qin.pl_nr > 0)
		rc = cl_io_submit_rw(env, io, CRT_READ, queue);

	/*
	 * Unlock unsent pages in case of error.
	 */
	cl_page_list_disown(env, io, &queue->c2_qin);
	cl_2queue_fini(env, queue);

	return rc;
}

int ll_readpage(struct file *file, struct page *vmpage)
{
	struct cl_object *clob = ll_i2info(file_inode(file))->lli_clob;
@@ -1110,7 +1154,7 @@ int ll_readpage(struct file *file, struct page *vmpage)
		LASSERT(page->cp_type == CPT_CACHEABLE);
		if (likely(!PageUptodate(vmpage))) {
			cl_page_assume(env, io, page);
			result = cl_io_read_page(env, io, page);
			result = ll_io_read_page(env, io, page);
		} else {
			/* Page from a non-object file. */
			unlock_page(vmpage);
+15 −32
Original line number Diff line number Diff line
@@ -1191,40 +1191,23 @@ static int vvp_io_fsync_start(const struct lu_env *env,
	return 0;
}

static int vvp_io_read_page(const struct lu_env *env,
static int vvp_io_read_ahead(const struct lu_env *env,
			     const struct cl_io_slice *ios,
			    const struct cl_page_slice *slice)
			     pgoff_t start, struct cl_read_ahead *ra)
{
	struct cl_io	      *io     = ios->cis_io;
	struct vvp_page           *vpg    = cl2vvp_page(slice);
	struct cl_page	    *page   = slice->cpl_page;
	struct inode              *inode  = vvp_object_inode(slice->cpl_obj);
	struct ll_sb_info	 *sbi    = ll_i2sbi(inode);
	struct ll_file_data       *fd     = cl2vvp_io(env, ios)->vui_fd;
	struct ll_readahead_state *ras    = &fd->fd_ras;
	struct cl_2queue	  *queue  = &io->ci_queue;
	int result = 0;

	if (sbi->ll_ra_info.ra_max_pages_per_file &&
	    sbi->ll_ra_info.ra_max_pages)
		ras_update(sbi, inode, ras, vvp_index(vpg),
			   vpg->vpg_defer_uptodate);
	if (ios->cis_io->ci_type == CIT_READ ||
	    ios->cis_io->ci_type == CIT_FAULT) {
		struct vvp_io *vio = cl2vvp_io(env, ios);

	if (vpg->vpg_defer_uptodate) {
		vpg->vpg_ra_used = 1;
		cl_page_export(env, page, 1);
		if (unlikely(vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
			ra->cra_end = CL_PAGE_EOF;
			result = 1; /* no need to call down */
		}
	}
	/*
	 * Add page into the queue even when it is marked uptodate above.
	 * this will unlock it automatically as part of cl_page_list_disown().
	 */

	cl_page_list_add(&queue->c2_qin, page);
	if (sbi->ll_ra_info.ra_max_pages_per_file &&
	    sbi->ll_ra_info.ra_max_pages)
		ll_readahead(env, io, &queue->c2_qin, ras,
			     vpg->vpg_defer_uptodate);

	return 0;
	return result;
}

static void vvp_io_end(const struct lu_env *env, const struct cl_io_slice *ios)
@@ -1271,7 +1254,7 @@ static const struct cl_io_operations vvp_io_ops = {
			.cio_fini   = vvp_io_fini
		}
	},
	.cio_read_page     = vvp_io_read_page,
	.cio_read_ahead	= vvp_io_read_ahead,
};

int vvp_io_init(const struct lu_env *env, struct cl_object *obj,
+0 −16
Original line number Diff line number Diff line
@@ -339,20 +339,6 @@ static int vvp_page_make_ready(const struct lu_env *env,
	return result;
}

static int vvp_page_is_under_lock(const struct lu_env *env,
				  const struct cl_page_slice *slice,
				  struct cl_io *io, pgoff_t *max_index)
{
	if (io->ci_type == CIT_READ || io->ci_type == CIT_WRITE ||
	    io->ci_type == CIT_FAULT) {
		struct vvp_io *vio = vvp_env_io(env);

		if (unlikely(vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED))
			*max_index = CL_PAGE_EOF;
	}
	return 0;
}

static int vvp_page_print(const struct lu_env *env,
			  const struct cl_page_slice *slice,
			  void *cookie, lu_printer_t printer)
@@ -397,7 +383,6 @@ static const struct cl_page_operations vvp_page_ops = {
	.cpo_is_vmlocked   = vvp_page_is_vmlocked,
	.cpo_fini	  = vvp_page_fini,
	.cpo_print	 = vvp_page_print,
	.cpo_is_under_lock = vvp_page_is_under_lock,
	.io = {
		[CRT_READ] = {
			.cpo_prep	= vvp_page_prep_read,
@@ -496,7 +481,6 @@ static const struct cl_page_operations vvp_transient_page_ops = {
	.cpo_fini	  = vvp_transient_page_fini,
	.cpo_is_vmlocked   = vvp_transient_page_is_vmlocked,
	.cpo_print	 = vvp_page_print,
	.cpo_is_under_lock	= vvp_page_is_under_lock,
	.io = {
		[CRT_READ] = {
			.cpo_prep	= vvp_transient_page_prep,
Loading