Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 44779340 authored by Brian Behlendorf's avatar Brian Behlendorf Committed by Greg Kroah-Hartman
Browse files

staging/lustre: Limit reply buffer size



When allocating a reply buffer for the striping information don't
assume the unlikely worst case.  Instead, assume the common case
and size the buffer based on the observed default ea/cookie size.

The default size is initialized to a single stripe and allowed to
grow up to an entire page if needed.  This means that for smallish
filesystems (less than ~21 OSTs) where the worst case striping
information can fit in a single page there is effectively no
change.  Only for larger filesystem will the default be less than
the maximum.  This has a number of advantages.

* By limiting the default reply buffer size we avoid always
  vmalloc()'ing the buffer because it exceeds four pages in size
  and instead kmalloc() it.  This prevents the client from
  thrashing on the global vmalloc() spin lock.

* A reply buffer of exactly the right size (no larger) is allocated
  in the overflow case.  These larger reply buffers are still
  unlikely to exceed the 16k limit where a vmalloc() will occur.

* Saves memory in the common case.  Wide striped files exceeded
  the default are expected to be the exception.

The reason this patch works is because the ptlrpc layer is smart
enough to reallocate the reply buffer when an overflow occurs.
Therefore the client doesn't have to drop the incoming reply and
send a new request with a larger reply buffer.

It's also worth mentioning that the reply buffer always contains
a significant amount of extra padding because they are rounded up
to the nearest power of two.  This means that even files striped
wider than the default have a good chance of fitting in the
allocated reply buffer.

Also remove client eadatasize check in mdt xattr packing because
as said above client can handle -EOVERFLOW.

Signed-off-by: default avatarBrian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: default avatarLai Siyao <lai.siyao@intel.com>
Reviewed-on: http://review.whamcloud.com/6339
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-3338


Reviewed-by: default avatarJames Simmons <uja.ornl@gmail.com>
Reviewed-by: default avatarAndreas Dilger <andreas.dilger@intel.com>
Reviewed-by: default avatarBob Glossman <bob.glossman@intel.com>
Signed-off-by: default avatarOleg Drokin <oleg.drokin@intel.com>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent e69cd00c
Loading
Loading
Loading
Loading
+16 −7
Original line number Diff line number Diff line
@@ -140,17 +140,26 @@ static inline void mdc_put_rpc_lock(struct mdc_rpc_lock *lck,
	mutex_unlock(&lck->rpcl_mutex);
}

/* Update the maximum observed easize and cookiesize.  The default easize
 * and cookiesize is initialized to the minimum value but allowed to grow
 * up to a single page in size if required to handle the common case.
 */
static inline void mdc_update_max_ea_from_body(struct obd_export *exp,
					       struct mdt_body *body)
{
	if (body->valid & OBD_MD_FLMODEASIZE) {
		if (exp->exp_obd->u.cli.cl_max_mds_easize < body->max_mdsize)
			exp->exp_obd->u.cli.cl_max_mds_easize =
						body->max_mdsize;
		if (exp->exp_obd->u.cli.cl_max_mds_cookiesize <
						body->max_cookiesize)
			exp->exp_obd->u.cli.cl_max_mds_cookiesize =
						body->max_cookiesize;
		struct client_obd *cli = &exp->exp_obd->u.cli;

		if (cli->cl_max_mds_easize < body->max_mdsize) {
			cli->cl_max_mds_easize = body->max_mdsize;
			cli->cl_default_mds_easize =
			    min_t(__u32, body->max_mdsize, PAGE_CACHE_SIZE);
		}
		if (cli->cl_max_mds_cookiesize < body->max_cookiesize) {
			cli->cl_max_mds_cookiesize = body->max_cookiesize;
			cli->cl_default_mds_cookiesize =
			    min_t(__u32, body->max_cookiesize, PAGE_CACHE_SIZE);
		}
	}
}

+10 −5
Original line number Diff line number Diff line
@@ -318,6 +318,7 @@ struct client_obd {
	 * call obd_size_diskmd() all the time. */
	int			 cl_default_mds_easize;
	int			 cl_max_mds_easize;
	int			 cl_default_mds_cookiesize;
	int			 cl_max_mds_cookiesize;

	enum lustre_sec_part     cl_sp_me;
@@ -605,6 +606,7 @@ struct lmv_obd {
	int			max_easize;
	int			max_def_easize;
	int			max_cookiesize;
	int			max_def_cookiesize;
	int			server_timeout;

	int			tgts_size; /* size of tgts array */
@@ -997,6 +999,9 @@ enum obd_cleanup_stage {
#define KEY_LOVDESC	     "lovdesc"
#define KEY_LOV_IDX	     "lov_idx"
#define KEY_MAX_EASIZE		"max_easize"
#define KEY_DEFAULT_EASIZE	"default_easize"
#define KEY_MAX_COOKIESIZE	"max_cookiesize"
#define KEY_DEFAULT_COOKIESIZE	"default_cookiesize"
#define KEY_MDS_CONN	    "mds_conn"
#define KEY_MGSSEC	      "mgssec"
#define KEY_NEXT_ID	     "next_id"
@@ -1390,7 +1395,7 @@ struct md_ops {
			  const char *, int, int, int,
			  struct ptlrpc_request **);

	int (*m_init_ea_size)(struct obd_export *, int, int, int);
	int (*m_init_ea_size)(struct obd_export *, int, int, int, int);

	int (*m_get_lustre_md)(struct obd_export *, struct ptlrpc_request *,
			       struct obd_export *, struct obd_export *,
+3 −2
Original line number Diff line number Diff line
@@ -2046,12 +2046,13 @@ static inline ldlm_mode_t md_lock_match(struct obd_export *exp, __u64 flags,
}

static inline int md_init_ea_size(struct obd_export *exp, int easize,
				  int def_asize, int cookiesize)
				  int def_asize, int cookiesize,
				  int def_cookiesize)
{
	EXP_CHECK_MD_OP(exp, init_ea_size);
	EXP_MD_COUNTER_INCREMENT(exp, init_ea_size);
	return MDP(exp->exp_obd, init_ea_size)(exp, easize, def_asize,
					       cookiesize);
					       cookiesize, def_cookiesize);
}

static inline int md_get_remote_perm(struct obd_export *exp,
+10 −5
Original line number Diff line number Diff line
@@ -56,7 +56,7 @@ int cl_init_ea_size(struct obd_export *md_exp, struct obd_export *dt_exp)
	__u32 valsize = sizeof(struct lov_desc);
	int rc, easize, def_easize, cookiesize;
	struct lov_desc desc;
	__u16 stripes;
	__u16 stripes, def_stripes;

	rc = obd_get_info(NULL, dt_exp, sizeof(KEY_LOVDESC), KEY_LOVDESC,
			  &valsize, &desc, NULL);
@@ -67,15 +67,20 @@ int cl_init_ea_size(struct obd_export *md_exp, struct obd_export *dt_exp)
	lsm.lsm_stripe_count = stripes;
	easize = obd_size_diskmd(dt_exp, &lsm);

	lsm.lsm_stripe_count = desc.ld_default_stripe_count;
	def_stripes = min_t(__u32, desc.ld_default_stripe_count,
			    LOV_MAX_STRIPE_COUNT);
	lsm.lsm_stripe_count = def_stripes;
	def_easize = obd_size_diskmd(dt_exp, &lsm);

	cookiesize = stripes * sizeof(struct llog_cookie);

	CDEBUG(D_HA, "updating max_mdsize/max_cookiesize: %d/%d\n",
	       easize, cookiesize);
	/* default cookiesize is 0 because from 2.4 server doesn't send
	 * llog cookies to client. */
	CDEBUG(D_HA,
	       "updating def/max_easize: %d/%d def/max_cookiesize: 0/%d\n",
	       def_easize, easize, cookiesize);

	rc = md_init_ea_size(md_exp, easize, def_easize, cookiesize);
	rc = md_init_ea_size(md_exp, easize, def_easize, cookiesize, 0);
	return rc;
}

+1 −1
Original line number Diff line number Diff line
@@ -795,7 +795,7 @@ int ll_dir_getstripe(struct inode *inode, struct lov_mds_md **lmmp,
	int rc, lmmsize;
	struct md_op_data *op_data;

	rc = ll_get_max_mdsize(sbi, &lmmsize);
	rc = ll_get_default_mdsize(sbi, &lmmsize);
	if (rc)
		return rc;

Loading