staging: lustre: llite: move dir cache to MDC layer (4f76f0ec) · Commits · e / devices / android_kernel_fairphone_FP4

drivers/staging/lustre/lustre/include/lustre_lite.h

+0 −11

Original line number	Original line	Diff line number	Diff line
	@@ -80,17 +80,6 @@ static inline void ll_dir_chain_fini(struct ll_dir_chain *chain)
	{		{
	}		}

	static inline unsigned long hash_x_index(__u64 hash, int hash64)
	{
	if (BITS_PER_LONG == 32 && hash64)
	hash >>= 32;
	/* save hash 0 as index 0 because otherwise we'll save it at
	* page index end (~0UL) and it causes truncate_inode_pages_range()
	* to loop forever.
	*/
	return ~0UL - (hash + !hash);
	}

	/** @} lite */		/** @} lite */

	#endif		#endif

drivers/staging/lustre/lustre/llite/dir.c

+19 −315

Original line number	Original line	Diff line number	Diff line
	@@ -134,111 +134,35 @@
	* for this integrated page will be adjusted. See lmv_adjust_dirpages().		* for this integrated page will be adjusted. See lmv_adjust_dirpages().
	*		*
	*/		*/
			struct page ll_get_dir_page(struct inode dir, struct md_op_data *op_data,
	/* returns the page unlocked, but with a reference */		__u64 offset, struct ll_dir_chain *chain)
	static int ll_dir_filler(void _hash, struct page page0)
	{		{
	struct inode *inode = page0->mapping->host;		struct md_callback cb_op;
	int hash64 = ll_i2sbi(inode)->ll_flags & LL_SBI_64BIT_HASH;
	struct obd_export *exp = ll_i2sbi(inode)->ll_md_exp;
	struct ptlrpc_request *request;
	struct mdt_body *body;
	struct md_op_data *op_data;
	__u64 hash = ((__u64 )_hash);
	struct page **page_pool;
	struct page *page;		struct page *page;
	struct lu_dirpage *dp;
	int max_pages = ll_i2sbi(inode)->ll_md_brw_pages;
	int nrdpgs = 0; /* number of pages read actually */
	int npages;
	int i;
	int rc;		int rc;

	CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p) hash %llu\n",		cb_op.md_blocking_ast = ll_md_blocking_ast;
	PFID(ll_inode2fid(inode)), inode, hash);		rc = md_read_page(ll_i2mdexp(dir), op_data, &cb_op, offset, &page);
			if (rc)
	LASSERT(max_pages > 0 && max_pages <= MD_MAX_BRW_PAGES);		return ERR_PTR(rc);

	op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
	LUSTRE_OPC_ANY, NULL);
	if (IS_ERR(op_data))
	return PTR_ERR(op_data);

	page_pool = kcalloc(max_pages, sizeof(page), GFP_NOFS);
	if (page_pool) {
	page_pool[0] = page0;
	} else {
	page_pool = &page0;
	max_pages = 1;
	}
	for (npages = 1; npages < max_pages; npages++) {
	page = page_cache_alloc_cold(inode->i_mapping);
	if (!page)
	break;
	page_pool[npages] = page;
	}

	op_data->op_npages = npages;
	op_data->op_offset = hash;
	rc = md_readpage(exp, op_data, page_pool, &request);
	ll_finish_md_op_data(op_data);
	if (rc < 0) {
	/* page0 is special, which was added into page cache early */
	delete_from_page_cache(page0);
	} else if (rc == 0) {
	body = req_capsule_server_get(&request->rq_pill, &RMF_MDT_BODY);
	/* Checked by mdc_readpage() */
	if (body->mbo_valid & OBD_MD_FLSIZE)
	i_size_write(inode, body->mbo_size);

	nrdpgs = (request->rq_bulk->bd_nob_transferred+PAGE_SIZE-1)
	>> PAGE_SHIFT;
	SetPageUptodate(page0);
	}
	unlock_page(page0);
	ptlrpc_req_finished(request);

	CDEBUG(D_VFSTRACE, "read %d/%d pages\n", nrdpgs, npages);

	for (i = 1; i < npages; i++) {
	unsigned long offset;
	int ret;

	page = page_pool[i];

	if (rc < 0 \|\| i >= nrdpgs) {		return page;
	put_page(page);
	continue;
	}		}

	SetPageUptodate(page);		void ll_release_page(struct inode inode, struct page page, bool remove)
			{
	dp = kmap(page);
	hash = le64_to_cpu(dp->ldp_hash_start);
	kunmap(page);		kunmap(page);

	offset = hash_x_index(hash, hash64);		/*
			* Always remove the page for striped dir, because the page is
	prefetchw(&page->flags);		* built from temporarily in LMV layer
	ret = add_to_page_cache_lru(page, inode->i_mapping, offset,		*/
	GFP_NOFS);		if (inode && S_ISDIR(inode->i_mode) &&
	if (ret == 0) {		ll_i2info(inode)->lli_lsm_md) {
	unlock_page(page);		__free_page(page);
	} else {		return;
	CDEBUG(D_VFSTRACE, "page %lu add to page cache failed: %d\n",
	offset, ret);
	}
	put_page(page);
	}

	if (page_pool != &page0)
	kfree(page_pool);
	return rc;
	}		}

	void ll_release_page(struct inode inode, struct page page, bool remove)
	{
	kunmap(page);
	if (remove) {		if (remove) {
	lock_page(page);		lock_page(page);
	if (likely(page->mapping))		if (likely(page->mapping))
	@@ -248,226 +172,6 @@ void ll_release_page(struct inode inode, struct page page, bool remove)
	put_page(page);		put_page(page);
	}		}

	/*
	* Find, kmap and return page that contains given hash.
	*/
	static struct page ll_dir_page_locate(struct inode dir, __u64 *hash,
	__u64 start, __u64 end)
	{
	int hash64 = ll_i2sbi(dir)->ll_flags & LL_SBI_64BIT_HASH;
	struct address_space *mapping = dir->i_mapping;
	/*
	* Complement of hash is used as an index so that
	* radix_tree_gang_lookup() can be used to find a page with starting
	* hash _smaller_ than one we are looking for.
	*/
	unsigned long offset = hash_x_index(*hash, hash64);
	struct page *page;
	int found;

	spin_lock_irq(&mapping->tree_lock);
	found = radix_tree_gang_lookup(&mapping->page_tree,
	(void **)&page, offset, 1);
	if (found > 0 && !radix_tree_exceptional_entry(page)) {
	struct lu_dirpage *dp;

	get_page(page);
	spin_unlock_irq(&mapping->tree_lock);
	/*
	* In contrast to find_lock_page() we are sure that directory
	* page cannot be truncated (while DLM lock is held) and,
	* hence, can avoid restart.
	*
	* In fact, page cannot be locked here at all, because
	* ll_dir_filler() does synchronous io.
	*/
	wait_on_page_locked(page);
	if (PageUptodate(page)) {
	dp = kmap(page);
	if (BITS_PER_LONG == 32 && hash64) {
	*start = le64_to_cpu(dp->ldp_hash_start) >> 32;
	*end = le64_to_cpu(dp->ldp_hash_end) >> 32;
	hash = hash >> 32;
	} else {
	*start = le64_to_cpu(dp->ldp_hash_start);
	*end = le64_to_cpu(dp->ldp_hash_end);
	}
	LASSERTF(start <= hash, "start = %#llx,end = %#llx,hash = %#llx\n",
	start, end, *hash);
	CDEBUG(D_VFSTRACE, "page %lu [%llu %llu], hash %llu\n",
	offset, start, end, *hash);
	if (hash > end) {
	ll_release_page(dir, page, false);
	page = NULL;
	} else if (end != start && hash == end) {
	/*
	* upon hash collision, remove this page,
	* otherwise put page reference, and
	* ll_get_dir_page() will issue RPC to fetch
	* the page we want.
	*/
	ll_release_page(dir, page,
	le32_to_cpu(dp->ldp_flags) &
	LDF_COLLIDE);
	page = NULL;
	}
	} else {
	put_page(page);
	page = ERR_PTR(-EIO);
	}

	} else {
	spin_unlock_irq(&mapping->tree_lock);
	page = NULL;
	}
	return page;
	}

	struct page ll_get_dir_page(struct inode dir, struct md_op_data *op_data,
	__u64 hash, struct ll_dir_chain *chain)
	{
	ldlm_policy_data_t policy = {.l_inodebits = {MDS_INODELOCK_UPDATE} };
	struct address_space *mapping = dir->i_mapping;
	struct lustre_handle lockh;
	struct lu_dirpage *dp;
	struct page *page;
	enum ldlm_mode mode;
	int rc;
	__u64 start = 0;
	__u64 end = 0;
	__u64 lhash = hash;
	struct ll_inode_info *lli = ll_i2info(dir);
	int hash64 = ll_i2sbi(dir)->ll_flags & LL_SBI_64BIT_HASH;

	mode = LCK_PR;
	rc = md_lock_match(ll_i2sbi(dir)->ll_md_exp, LDLM_FL_BLOCK_GRANTED,
	ll_inode2fid(dir), LDLM_IBITS, &policy, mode, &lockh);
	if (!rc) {
	struct ldlm_enqueue_info einfo = {
	.ei_type = LDLM_IBITS,
	.ei_mode = mode,
	.ei_cb_bl = ll_md_blocking_ast,
	.ei_cb_cp = ldlm_completion_ast,
	};
	struct lookup_intent it = { .it_op = IT_READDIR };
	struct ptlrpc_request *request;
	struct md_op_data *op_data;

	op_data = ll_prep_md_op_data(NULL, dir, dir, NULL, 0, 0,
	LUSTRE_OPC_ANY, NULL);
	if (IS_ERR(op_data))
	return (void *)op_data;

	rc = md_enqueue(ll_i2sbi(dir)->ll_md_exp, &einfo, &it,
	op_data, &lockh, NULL, 0, NULL, 0);

	ll_finish_md_op_data(op_data);

	request = (struct ptlrpc_request *)it.it_request;
	if (request)
	ptlrpc_req_finished(request);
	if (rc < 0) {
	CERROR("lock enqueue: " DFID " at %llu: rc %d\n",
	PFID(ll_inode2fid(dir)), hash, rc);
	return ERR_PTR(rc);
	}

	CDEBUG(D_INODE, "setting lr_lvb_inode to inode "DFID"(%p)\n",
	PFID(ll_inode2fid(dir)), dir);
	md_set_lock_data(ll_i2sbi(dir)->ll_md_exp,
	&it.it_lock_handle, dir, NULL);
	} else {
	/* for cross-ref object, l_ast_data of the lock may not be set,
	* we reset it here
	*/
	md_set_lock_data(ll_i2sbi(dir)->ll_md_exp, &lockh.cookie,
	dir, NULL);
	}
	ldlm_lock_dump_handle(D_OTHER, &lockh);

	mutex_lock(&lli->lli_readdir_mutex);
	page = ll_dir_page_locate(dir, &lhash, &start, &end);
	if (IS_ERR(page)) {
	CERROR("dir page locate: "DFID" at %llu: rc %ld\n",
	PFID(ll_inode2fid(dir)), lhash, PTR_ERR(page));
	goto out_unlock;
	} else if (page) {
	/*
	* XXX nikita: not entirely correct handling of a corner case:
	* suppose hash chain of entries with hash value HASH crosses
	* border between pages P0 and P1. First both P0 and P1 are
	* cached, seekdir() is called for some entry from the P0 part
	* of the chain. Later P0 goes out of cache. telldir(HASH)
	* happens and finds P1, as it starts with matching hash
	* value. Remaining entries from P0 part of the chain are
	* skipped. (Is that really a bug?)
	*
	* Possible solutions: 0. don't cache P1 is such case, handle
	* it as an "overflow" page. 1. invalidate all pages at
	* once. 2. use HASH\|1 as an index for P1.
	*/
	goto hash_collision;
	}

	page = read_cache_page(mapping, hash_x_index(hash, hash64),
	ll_dir_filler, &lhash);
	if (IS_ERR(page)) {
	CERROR("read cache page: "DFID" at %llu: rc %ld\n",
	PFID(ll_inode2fid(dir)), hash, PTR_ERR(page));
	goto out_unlock;
	}

	wait_on_page_locked(page);
	(void)kmap(page);
	if (!PageUptodate(page)) {
	CERROR("page not updated: "DFID" at %llu: rc %d\n",
	PFID(ll_inode2fid(dir)), hash, -5);
	goto fail;
	}
	if (!PageChecked(page))
	/* XXX: check page format later */
	SetPageChecked(page);
	if (PageError(page)) {
	CERROR("page error: "DFID" at %llu: rc %d\n",
	PFID(ll_inode2fid(dir)), hash, -5);
	goto fail;
	}
	hash_collision:
	dp = page_address(page);
	if (BITS_PER_LONG == 32 && hash64) {
	start = le64_to_cpu(dp->ldp_hash_start) >> 32;
	end = le64_to_cpu(dp->ldp_hash_end) >> 32;
	lhash = hash >> 32;
	} else {
	start = le64_to_cpu(dp->ldp_hash_start);
	end = le64_to_cpu(dp->ldp_hash_end);
	lhash = hash;
	}
	if (end == start) {
	LASSERT(start == lhash);
	CWARN("Page-wide hash collision: %llu\n", end);
	if (BITS_PER_LONG == 32 && hash64)
	CWARN("Real page-wide hash collision at [%llu %llu] with hash %llu\n",
	le64_to_cpu(dp->ldp_hash_start),
	le64_to_cpu(dp->ldp_hash_end), hash);
	/*
	* Fetch whole overflow chain...
	*
	* XXX not yet.
	*/
	goto fail;
	}
	out_unlock:
	mutex_unlock(&lli->lli_readdir_mutex);
	ldlm_lock_decref(&lockh, mode);
	return page;

	fail:
	ll_release_page(dir, page, true);
	page = ERR_PTR(-EIO);
	goto out_unlock;
	}

	/**		/**
	* return IF_* type for given lu_dirent entry.		* return IF_* type for given lu_dirent entry.
	* IF_* flag shld be converted to particular OS file type in		* IF_* flag shld be converted to particular OS file type in

drivers/staging/lustre/lustre/llite/llite_internal.h

+1 −1

Original line number	Original line	Diff line number	Diff line
	@@ -665,7 +665,7 @@ int ll_dir_read(struct inode inode, __u64 ppos, struct md_op_data *op_data,
	int ll_get_mdt_idx(struct inode *inode);		int ll_get_mdt_idx(struct inode *inode);
	int ll_get_mdt_idx_by_fid(struct ll_sb_info sbi, const struct lu_fid fid);		int ll_get_mdt_idx_by_fid(struct ll_sb_info sbi, const struct lu_fid fid);
	struct page ll_get_dir_page(struct inode dir, struct md_op_data *op_data,		struct page ll_get_dir_page(struct inode dir, struct md_op_data *op_data,
	__u64 hash, struct ll_dir_chain *chain);		__u64 offset, struct ll_dir_chain *chain);
	void ll_release_page(struct inode inode, struct page page, bool remove);		void ll_release_page(struct inode inode, struct page page, bool remove);

	/* llite/namei.c */		/* llite/namei.c */

drivers/staging/lustre/lustre/llite/statahead.c

+4 −8

Original line number	Original line	Diff line number	Diff line
	@@ -1035,7 +1035,7 @@ static int ll_statahead_thread(void *arg)
	struct ll_statahead_info *sai = ll_sai_get(plli->lli_sai);		struct ll_statahead_info *sai = ll_sai_get(plli->lli_sai);
	struct ptlrpc_thread *thread = &sai->sai_thread;		struct ptlrpc_thread *thread = &sai->sai_thread;
	struct ptlrpc_thread *agl_thread = &sai->sai_agl_thread;		struct ptlrpc_thread *agl_thread = &sai->sai_agl_thread;
	struct page *page;		struct page *page = NULL;
	__u64 pos = 0;		__u64 pos = 0;
	int first = 0;		int first = 0;
	int rc = 0;		int rc = 0;
	@@ -1166,8 +1166,7 @@ static int ll_statahead_thread(void *arg)
	if (!list_empty(&sai->sai_entries_received))		if (!list_empty(&sai->sai_entries_received))
	goto interpret_it;		goto interpret_it;

	if (unlikely(		if (unlikely(!thread_is_running(thread))) {
	!thread_is_running(thread))) {
	ll_release_page(dir, page, false);		ll_release_page(dir, page, false);
	rc = 0;		rc = 0;
	goto out;		goto out;
	@@ -1182,10 +1181,10 @@ static int ll_statahead_thread(void *arg)

	goto keep_it;		goto keep_it;
	}		}

	do_it:		do_it:
	ll_statahead_one(parent, name, namelen);		ll_statahead_one(parent, name, namelen);
	}		}

	pos = le64_to_cpu(dp->ldp_hash_end);		pos = le64_to_cpu(dp->ldp_hash_end);
	if (pos == MDS_DIR_END_OFF) {		if (pos == MDS_DIR_END_OFF) {
	/*		/*
	@@ -1232,14 +1231,12 @@ static int ll_statahead_thread(void *arg)
	* Normal case: continue to the next page.		* Normal case: continue to the next page.
	*/		*/
	ll_release_page(dir, page,		ll_release_page(dir, page,
	le32_to_cpu(dp->ldp_flags) &		le32_to_cpu(dp->ldp_flags) & LDF_COLLIDE);
	LDF_COLLIDE);
	sai->sai_in_readpage = 1;		sai->sai_in_readpage = 1;
	page = ll_get_dir_page(dir, op_data, pos, &chain);		page = ll_get_dir_page(dir, op_data, pos, &chain);
	sai->sai_in_readpage = 0;		sai->sai_in_readpage = 0;
	}		}
	}		}

	out:		out:
	ll_finish_md_op_data(op_data);		ll_finish_md_op_data(op_data);
	if (sai->sai_agl_valid) {		if (sai->sai_agl_valid) {
	@@ -1455,7 +1452,6 @@ static int is_first_dirent(struct inode dir, struct dentry dentry)
	page = ll_get_dir_page(dir, op_data, pos, &chain);		page = ll_get_dir_page(dir, op_data, pos, &chain);
	}		}
	}		}

	out:		out:
	ll_dir_chain_fini(&chain);		ll_dir_chain_fini(&chain);
	ll_finish_md_op_data(op_data);		ll_finish_md_op_data(op_data);

drivers/staging/lustre/lustre/mdc/mdc_internal.h

+8 −0

Original line number	Original line	Diff line number	Diff line
	@@ -135,4 +135,12 @@ static inline int mdc_prep_elc_req(struct obd_export *exp,
	count);		count);
	}		}

			static inline unsigned long hash_x_index(__u64 hash, int hash64)
			{
			if (BITS_PER_LONG == 32 && hash64)
			hash >>= 32;
			/* save hash 0 with hash 1 */
			return ~0UL - (hash + !hash);
			}

	#endif		#endif