Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit d806f30e authored by Jinshan Xiong's avatar Jinshan Xiong Committed by Greg Kroah-Hartman
Browse files

staging: lustre: osc: revise unstable pages accounting



A few changes are made in this patch for unstable pages tracking:

1. Remove kernel NFS unstable pages tracking because it killed
   performance
2. Track unstable pages as part of LRU cache. Otherwise Lustre
   can use much more memory than max_cached_mb
3. Remove obd_unstable_pages tracking to avoid using global
   atomic counter
4. Make unstable pages track optional. Tracking unstable pages is
   turned off by default, and can be controlled by
   llite.*.unstable_stats.

Signed-off-by: default avatarJinshan Xiong <jinshan.xiong@intel.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-4841
Reviewed-on: http://review.whamcloud.com/10003


Reviewed-by: default avatarAndreas Dilger <andreas.dilger@intel.com>
Reviewed-by: default avatarLai Siyao <lai.siyao@intel.com>
Reviewed-by: default avatarOleg Drokin <oleg.drokin@intel.com>
Signed-off-by: default avatarJames Simmons <jsimmons@infradead.org>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent 96c53363
Loading
Loading
Loading
Loading
+24 −11
Original line number Diff line number Diff line
@@ -1039,23 +1039,32 @@ do { \
	}								     \
} while (0)

static inline int __page_in_use(const struct cl_page *page, int refc)
{
	if (page->cp_type == CPT_CACHEABLE)
		++refc;
	LASSERT(atomic_read(&page->cp_ref) > 0);
	return (atomic_read(&page->cp_ref) > refc);
}

#define cl_page_in_use(pg)       __page_in_use(pg, 1)
#define cl_page_in_use_noref(pg) __page_in_use(pg, 0)

static inline struct page *cl_page_vmpage(struct cl_page *page)
{
	LASSERT(page->cp_vmpage);
	return page->cp_vmpage;
}

/**
 * Check if a cl_page is in use.
 *
 * Client cache holds a refcount, this refcount will be dropped when
 * the page is taken out of cache, see vvp_page_delete().
 */
static inline bool __page_in_use(const struct cl_page *page, int refc)
{
	return (atomic_read(&page->cp_ref) > refc + 1);
}

/**
 * Caller itself holds a refcount of cl_page.
 */
#define cl_page_in_use(pg)	 __page_in_use(pg, 1)
/**
 * Caller doesn't hold a refcount.
 */
#define cl_page_in_use_noref(pg) __page_in_use(pg, 0)

/** @} cl_page */

/** \addtogroup cl_lock cl_lock
@@ -2330,6 +2339,10 @@ struct cl_client_cache {
	 * Lock to protect ccc_lru list
	 */
	spinlock_t		ccc_lru_lock;
	/**
	 * Set if unstable check is enabled
	 */
	unsigned int		ccc_unstable_check:1;
	/**
	 * # of unstable pages for this mount point
	 */
+0 −1
Original line number Diff line number Diff line
@@ -54,7 +54,6 @@ extern int at_early_margin;
extern int at_extra;
extern unsigned int obd_sync_filter;
extern unsigned int obd_max_dirty_pages;
extern atomic_t obd_unstable_pages;
extern atomic_t obd_dirty_pages;
extern atomic_t obd_dirty_transit_pages;
extern char obd_jobid_var[];
+38 −3
Original line number Diff line number Diff line
@@ -828,10 +828,45 @@ static ssize_t unstable_stats_show(struct kobject *kobj,
	pages = atomic_read(&cache->ccc_unstable_nr);
	mb = (pages * PAGE_SIZE) >> 20;

	return sprintf(buf, "unstable_pages: %8d\n"
			    "unstable_mb:    %8d\n", pages, mb);
	return sprintf(buf, "unstable_check: %8d\n"
			    "unstable_pages: %8d\n"
			    "unstable_mb:    %8d\n",
			    cache->ccc_unstable_check, pages, mb);
}
LUSTRE_RO_ATTR(unstable_stats);

static ssize_t unstable_stats_store(struct kobject *kobj,
				    struct attribute *attr,
				    const char *buffer,
				    size_t count)
{
	struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
					      ll_kobj);
	char kernbuf[128];
	int val, rc;

	if (!count)
		return 0;
	if (count < 0 || count >= sizeof(kernbuf))
		return -EINVAL;

	if (copy_from_user(kernbuf, buffer, count))
		return -EFAULT;
	kernbuf[count] = 0;

	buffer += lprocfs_find_named_value(kernbuf, "unstable_check:", &count) -
		  kernbuf;
	rc = lprocfs_write_helper(buffer, count, &val);
	if (rc < 0)
		return rc;

	/* borrow lru lock to set the value */
	spin_lock(&sbi->ll_cache->ccc_lru_lock);
	sbi->ll_cache->ccc_unstable_check = !!val;
	spin_unlock(&sbi->ll_cache->ccc_lru_lock);

	return count;
}
LUSTRE_RW_ATTR(unstable_stats);

static ssize_t root_squash_show(struct kobject *kobj, struct attribute *attr,
				char *buf)
+0 −2
Original line number Diff line number Diff line
@@ -57,8 +57,6 @@ unsigned int obd_dump_on_eviction;
EXPORT_SYMBOL(obd_dump_on_eviction);
unsigned int obd_max_dirty_pages = 256;
EXPORT_SYMBOL(obd_max_dirty_pages);
atomic_t obd_unstable_pages;
EXPORT_SYMBOL(obd_unstable_pages);
atomic_t obd_dirty_pages;
EXPORT_SYMBOL(obd_dirty_pages);
unsigned int obd_timeout = OBD_TIMEOUT_DEFAULT;   /* seconds */
+4 −92
Original line number Diff line number Diff line
@@ -1384,13 +1384,11 @@ static int osc_completion(const struct lu_env *env, struct osc_async_page *oap,
#define OSC_DUMP_GRANT(lvl, cli, fmt, args...) do {			      \
	struct client_obd *__tmp = (cli);				      \
	CDEBUG(lvl, "%s: grant { dirty: %ld/%ld dirty_pages: %d/%d "	      \
	       "unstable_pages: %d/%d dropped: %ld avail: %ld, "	      \
	       "reserved: %ld, flight: %d } lru {in list: %d, "		      \
	       "left: %d, waiters: %d }" fmt,                                 \
	       "dropped: %ld avail: %ld, reserved: %ld, flight: %d }"	      \
	       "lru {in list: %d, left: %d, waiters: %d }" fmt,		      \
	       __tmp->cl_import->imp_obd->obd_name,			      \
	       __tmp->cl_dirty, __tmp->cl_dirty_max,			      \
	       atomic_read(&obd_dirty_pages), obd_max_dirty_pages,	      \
	       atomic_read(&obd_unstable_pages), obd_max_dirty_pages,	      \
	       __tmp->cl_lost_grant, __tmp->cl_avail_grant,		      \
	       __tmp->cl_reserved_grant, __tmp->cl_w_in_flight,		      \
	       atomic_read(&__tmp->cl_lru_in_list),			      \
@@ -1542,8 +1540,7 @@ static int osc_enter_cache_try(struct client_obd *cli,
		return 0;

	if (cli->cl_dirty + PAGE_SIZE <= cli->cl_dirty_max &&
	    atomic_read(&obd_unstable_pages) + 1 +
	    atomic_read(&obd_dirty_pages) <= obd_max_dirty_pages) {
	    atomic_read(&obd_dirty_pages) + 1 <= obd_max_dirty_pages) {
		osc_consume_write_grant(cli, &oap->oap_brw_page);
		if (transient) {
			cli->cl_dirty_transit += PAGE_SIZE;
@@ -1671,8 +1668,7 @@ void osc_wake_cache_waiters(struct client_obd *cli)
		ocw->ocw_rc = -EDQUOT;
		/* we can't dirty more */
		if ((cli->cl_dirty + PAGE_SIZE > cli->cl_dirty_max) ||
		    (atomic_read(&obd_unstable_pages) + 1 +
		     atomic_read(&obd_dirty_pages) > obd_max_dirty_pages)) {
		    (atomic_read(&obd_dirty_pages) + 1 > obd_max_dirty_pages)) {
			CDEBUG(D_CACHE, "no dirty room: dirty: %ld osc max %ld, sys max %d\n",
			       cli->cl_dirty,
			       cli->cl_dirty_max, obd_max_dirty_pages);
@@ -1843,84 +1839,6 @@ static void osc_process_ar(struct osc_async_rc *ar, __u64 xid,
		ar->ar_force_sync = 0;
}

/**
 * Performs "unstable" page accounting. This function balances the
 * increment operations performed in osc_inc_unstable_pages. It is
 * registered as the RPC request callback, and is executed when the
 * bulk RPC is committed on the server. Thus at this point, the pages
 * involved in the bulk transfer are no longer considered unstable.
 */
void osc_dec_unstable_pages(struct ptlrpc_request *req)
{
	struct client_obd *cli = &req->rq_import->imp_obd->u.cli;
	struct ptlrpc_bulk_desc *desc = req->rq_bulk;
	int page_count = desc->bd_iov_count;
	int i;

	/* No unstable page tracking */
	if (!cli->cl_cache)
		return;

	LASSERT(page_count >= 0);

	for (i = 0; i < page_count; i++)
		dec_node_page_state(desc->bd_iov[i].bv_page, NR_UNSTABLE_NFS);

	atomic_sub(page_count, &cli->cl_cache->ccc_unstable_nr);
	LASSERT(atomic_read(&cli->cl_cache->ccc_unstable_nr) >= 0);

	atomic_sub(page_count, &cli->cl_unstable_count);
	LASSERT(atomic_read(&cli->cl_unstable_count) >= 0);

	atomic_sub(page_count, &obd_unstable_pages);
	LASSERT(atomic_read(&obd_unstable_pages) >= 0);

	wake_up_all(&cli->cl_cache->ccc_unstable_waitq);
}

/* "unstable" page accounting. See: osc_dec_unstable_pages. */
void osc_inc_unstable_pages(struct ptlrpc_request *req)
{
	struct client_obd *cli = &req->rq_import->imp_obd->u.cli;
	struct ptlrpc_bulk_desc *desc = req->rq_bulk;
	long page_count = desc->bd_iov_count;
	int i;

	/* No unstable page tracking */
	if (!cli->cl_cache)
		return;

	LASSERT(page_count >= 0);

	for (i = 0; i < page_count; i++)
		inc_node_page_state(desc->bd_iov[i].bv_page, NR_UNSTABLE_NFS);

	LASSERT(atomic_read(&cli->cl_cache->ccc_unstable_nr) >= 0);
	atomic_add(page_count, &cli->cl_cache->ccc_unstable_nr);

	LASSERT(atomic_read(&cli->cl_unstable_count) >= 0);
	atomic_add(page_count, &cli->cl_unstable_count);

	LASSERT(atomic_read(&obd_unstable_pages) >= 0);
	atomic_add(page_count, &obd_unstable_pages);

	/*
	 * If the request has already been committed (i.e. brw_commit
	 * called via rq_commit_cb), we need to undo the unstable page
	 * increments we just performed because rq_commit_cb wont be
	 * called again.
	 */
	spin_lock(&req->rq_lock);
	if (unlikely(req->rq_committed)) {
		/* Drop lock before calling osc_dec_unstable_pages */
		spin_unlock(&req->rq_lock);
		osc_dec_unstable_pages(req);
	} else {
		req->rq_unstable = 1;
		spin_unlock(&req->rq_lock);
	}
}

/* this must be called holding the loi list lock to give coverage to exit_cache,
 * async_flag maintenance, and oap_request
 */
@@ -1932,9 +1850,6 @@ static void osc_ap_completion(const struct lu_env *env, struct client_obd *cli,
	__u64 xid = 0;

	if (oap->oap_request) {
		if (!rc)
			osc_inc_unstable_pages(oap->oap_request);

		xid = ptlrpc_req_xid(oap->oap_request);
		ptlrpc_req_finished(oap->oap_request);
		oap->oap_request = NULL;
@@ -2421,9 +2336,6 @@ int osc_queue_async_io(const struct lu_env *env, struct cl_io *io,
			return rc;
	}

	if (osc_over_unstable_soft_limit(cli))
		brw_flags |= OBD_BRW_SOFT_SYNC;

	oap->oap_cmd = cmd;
	oap->oap_page_off = ops->ops_from;
	oap->oap_count = ops->ops_to - ops->ops_from;
Loading