Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 122d328d authored by Trond Myklebust's avatar Trond Myklebust
Browse files

Merge branch 'layoutstats'

* layoutstats:
  pnfs/flexfiles: protect ktime manipulation with mirror lock
  nfs: provide pnfs_report_layoutstat when NFS42 is disabled
  pnfs/flexfiles: report layoutstat regularly
  nfs42: serialize LAYOUTSTATS calls of the same file
  pnfs/flexfiles: encode LAYOUTSTATS flexfiles specific data
  pnfs/flexfiles: add ff_layout_prepare_layoutstats
  pNFS/flexfiles: track when layout is first used
  pNFS/flexfiles: add layoutstats tracking
  pNFS/flexfiles: Remove unused struct members user_name, group_name
  pnfs: add pnfs_report_layoutstat helper function
  pNFS: fill in nfs42_layoutstat_ops
  NFSv.2/pnfs Add a LAYOUTSTATS rpc function
parents 18a60089 9bbd9bb4
Loading
Loading
Loading
Loading
+442 −11
Original line number Diff line number Diff line
@@ -20,6 +20,7 @@
#include "../nfs4trace.h"
#include "../iostat.h"
#include "../nfs.h"
#include "../nfs42.h"

#define NFSDBG_FACILITY         NFSDBG_PNFS_LD

@@ -271,6 +272,7 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,

		spin_lock_init(&fls->mirror_array[i]->lock);
		fls->mirror_array[i]->ds_count = ds_count;
		fls->mirror_array[i]->lseg = &fls->generic_hdr;

		/* deviceid */
		rc = decode_deviceid(&stream, &devid);
@@ -412,6 +414,146 @@ ff_layout_get_lseg_count(struct nfs4_ff_layout_segment *fls)
	return 1;
}

static void
nfs4_ff_start_busy_timer(struct nfs4_ff_busy_timer *timer)
{
	/* first IO request? */
	if (atomic_inc_return(&timer->n_ops) == 1) {
		timer->start_time = ktime_get();
	}
}

static ktime_t
nfs4_ff_end_busy_timer(struct nfs4_ff_busy_timer *timer)
{
	ktime_t start, now;

	if (atomic_dec_return(&timer->n_ops) < 0)
		WARN_ON_ONCE(1);

	now = ktime_get();
	start = timer->start_time;
	timer->start_time = now;
	return ktime_sub(now, start);
}

static ktime_t
nfs4_ff_layout_calc_completion_time(struct rpc_task *task)
{
	return ktime_sub(ktime_get(), task->tk_start);
}

static bool
nfs4_ff_layoutstat_start_io(struct nfs4_ff_layout_mirror *mirror,
			    struct nfs4_ff_layoutstat *layoutstat)
{
	static const ktime_t notime = {0};
	ktime_t now = ktime_get();

	nfs4_ff_start_busy_timer(&layoutstat->busy_timer);
	if (ktime_equal(mirror->start_time, notime))
		mirror->start_time = now;
	if (ktime_equal(mirror->last_report_time, notime))
		mirror->last_report_time = now;
	if (ktime_to_ms(ktime_sub(now, mirror->last_report_time)) >=
			FF_LAYOUTSTATS_REPORT_INTERVAL) {
		mirror->last_report_time = now;
		return true;
	}

	return false;
}

static void
nfs4_ff_layout_stat_io_update_requested(struct nfs4_ff_layoutstat *layoutstat,
		__u64 requested)
{
	struct nfs4_ff_io_stat *iostat = &layoutstat->io_stat;

	iostat->ops_requested++;
	iostat->bytes_requested += requested;
}

static void
nfs4_ff_layout_stat_io_update_completed(struct nfs4_ff_layoutstat *layoutstat,
		__u64 requested,
		__u64 completed,
		ktime_t time_completed)
{
	struct nfs4_ff_io_stat *iostat = &layoutstat->io_stat;
	ktime_t timer;

	iostat->ops_completed++;
	iostat->bytes_completed += completed;
	iostat->bytes_not_delivered += requested - completed;

	timer = nfs4_ff_end_busy_timer(&layoutstat->busy_timer);
	iostat->total_busy_time =
			ktime_add(iostat->total_busy_time, timer);
	iostat->aggregate_completion_time =
			ktime_add(iostat->aggregate_completion_time, time_completed);
}

static void
nfs4_ff_layout_stat_io_start_read(struct nfs4_ff_layout_mirror *mirror,
		__u64 requested)
{
	bool report;

	spin_lock(&mirror->lock);
	report = nfs4_ff_layoutstat_start_io(mirror, &mirror->read_stat);
	nfs4_ff_layout_stat_io_update_requested(&mirror->read_stat, requested);
	spin_unlock(&mirror->lock);

	if (report)
		pnfs_report_layoutstat(mirror->lseg->pls_layout->plh_inode);
}

static void
nfs4_ff_layout_stat_io_end_read(struct rpc_task *task,
		struct nfs4_ff_layout_mirror *mirror,
		__u64 requested,
		__u64 completed)
{
	spin_lock(&mirror->lock);
	nfs4_ff_layout_stat_io_update_completed(&mirror->read_stat,
			requested, completed,
			nfs4_ff_layout_calc_completion_time(task));
	spin_unlock(&mirror->lock);
}

static void
nfs4_ff_layout_stat_io_start_write(struct nfs4_ff_layout_mirror *mirror,
		__u64 requested)
{
	bool report;

	spin_lock(&mirror->lock);
	report = nfs4_ff_layoutstat_start_io(mirror , &mirror->write_stat);
	nfs4_ff_layout_stat_io_update_requested(&mirror->write_stat, requested);
	spin_unlock(&mirror->lock);

	if (report)
		pnfs_report_layoutstat(mirror->lseg->pls_layout->plh_inode);
}

static void
nfs4_ff_layout_stat_io_end_write(struct rpc_task *task,
		struct nfs4_ff_layout_mirror *mirror,
		__u64 requested,
		__u64 completed,
		enum nfs3_stable_how committed)
{
	if (committed == NFS_UNSTABLE)
		requested = completed = 0;

	spin_lock(&mirror->lock);
	nfs4_ff_layout_stat_io_update_completed(&mirror->write_stat,
			requested, completed,
			nfs4_ff_layout_calc_completion_time(task));
	spin_unlock(&mirror->lock);
}

static int
ff_layout_alloc_commit_info(struct pnfs_layout_segment *lseg,
			    struct nfs_commit_info *cinfo,
@@ -906,6 +1048,10 @@ ff_layout_reset_to_mds(struct pnfs_layout_segment *lseg, int idx)
static int ff_layout_read_prepare_common(struct rpc_task *task,
					 struct nfs_pgio_header *hdr)
{
	nfs4_ff_layout_stat_io_start_read(
			FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
			hdr->args.count);

	if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) {
		rpc_exit(task, -EIO);
		return -EIO;
@@ -959,15 +1105,15 @@ static void ff_layout_read_prepare_v4(struct rpc_task *task, void *data)
{
	struct nfs_pgio_header *hdr = data;

	if (ff_layout_read_prepare_common(task, hdr))
		return;

	if (ff_layout_setup_sequence(hdr->ds_clp,
				     &hdr->args.seq_args,
				     &hdr->res.seq_res,
				     task))
		return;

	if (ff_layout_read_prepare_common(task, hdr))
		return;

	if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context,
			hdr->args.lock_context, FMODE_READ) == -EIO)
		rpc_exit(task, -EIO); /* lost lock, terminate I/O */
@@ -979,6 +1125,10 @@ static void ff_layout_read_call_done(struct rpc_task *task, void *data)

	dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status);

	nfs4_ff_layout_stat_io_end_read(task,
			FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
			hdr->args.count, hdr->res.count);

	if (test_bit(NFS_IOHDR_REDO, &hdr->flags) &&
	    task->tk_status == 0) {
		nfs4_sequence_done(task, &hdr->res.seq_res);
@@ -1080,6 +1230,10 @@ static int ff_layout_commit_done_cb(struct rpc_task *task,
static int ff_layout_write_prepare_common(struct rpc_task *task,
					  struct nfs_pgio_header *hdr)
{
	nfs4_ff_layout_stat_io_start_write(
			FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
			hdr->args.count);

	if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) {
		rpc_exit(task, -EIO);
		return -EIO;
@@ -1113,15 +1267,15 @@ static void ff_layout_write_prepare_v4(struct rpc_task *task, void *data)
{
	struct nfs_pgio_header *hdr = data;

	if (ff_layout_write_prepare_common(task, hdr))
		return;

	if (ff_layout_setup_sequence(hdr->ds_clp,
				     &hdr->args.seq_args,
				     &hdr->res.seq_res,
				     task))
		return;

	if (ff_layout_write_prepare_common(task, hdr))
		return;

	if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context,
			hdr->args.lock_context, FMODE_WRITE) == -EIO)
		rpc_exit(task, -EIO); /* lost lock, terminate I/O */
@@ -1131,6 +1285,11 @@ static void ff_layout_write_call_done(struct rpc_task *task, void *data)
{
	struct nfs_pgio_header *hdr = data;

	nfs4_ff_layout_stat_io_end_write(task,
			FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
			hdr->args.count, hdr->res.count,
			hdr->res.verf->committed);

	if (test_bit(NFS_IOHDR_REDO, &hdr->flags) &&
	    task->tk_status == 0) {
		nfs4_sequence_done(task, &hdr->res.seq_res);
@@ -1149,8 +1308,17 @@ static void ff_layout_write_count_stats(struct rpc_task *task, void *data)
	    &NFS_CLIENT(hdr->inode)->cl_metrics[NFSPROC4_CLNT_WRITE]);
}

static void ff_layout_commit_prepare_common(struct rpc_task *task,
		struct nfs_commit_data *cdata)
{
	nfs4_ff_layout_stat_io_start_write(
			FF_LAYOUT_COMP(cdata->lseg, cdata->ds_commit_index),
			0);
}

static void ff_layout_commit_prepare_v3(struct rpc_task *task, void *data)
{
	ff_layout_commit_prepare_common(task, data);
	rpc_call_start(task);
}

@@ -1158,10 +1326,30 @@ static void ff_layout_commit_prepare_v4(struct rpc_task *task, void *data)
{
	struct nfs_commit_data *wdata = data;

	ff_layout_setup_sequence(wdata->ds_clp,
	if (ff_layout_setup_sequence(wdata->ds_clp,
				 &wdata->args.seq_args,
				 &wdata->res.seq_res,
				 task);
				 task))
		return;
	ff_layout_commit_prepare_common(task, data);
}

static void ff_layout_commit_done(struct rpc_task *task, void *data)
{
	struct nfs_commit_data *cdata = data;
	struct nfs_page *req;
	__u64 count = 0;

	if (task->tk_status == 0) {
		list_for_each_entry(req, &cdata->pages, wb_list)
			count += req->wb_bytes;
	}

	nfs4_ff_layout_stat_io_end_write(task,
			FF_LAYOUT_COMP(cdata->lseg, cdata->ds_commit_index),
			count, count, NFS_FILE_SYNC);

	pnfs_generic_write_commit_done(task, data);
}

static void ff_layout_commit_count_stats(struct rpc_task *task, void *data)
@@ -1202,14 +1390,14 @@ static const struct rpc_call_ops ff_layout_write_call_ops_v4 = {

static const struct rpc_call_ops ff_layout_commit_call_ops_v3 = {
	.rpc_call_prepare = ff_layout_commit_prepare_v3,
	.rpc_call_done = pnfs_generic_write_commit_done,
	.rpc_call_done = ff_layout_commit_done,
	.rpc_count_stats = ff_layout_commit_count_stats,
	.rpc_release = pnfs_generic_commit_release,
};

static const struct rpc_call_ops ff_layout_commit_call_ops_v4 = {
	.rpc_call_prepare = ff_layout_commit_prepare_v4,
	.rpc_call_done = pnfs_generic_write_commit_done,
	.rpc_call_done = ff_layout_commit_done,
	.rpc_count_stats = ff_layout_commit_count_stats,
	.rpc_release = pnfs_generic_commit_release,
};
@@ -1253,7 +1441,6 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
	fh = nfs4_ff_layout_select_ds_fh(lseg, idx);
	if (fh)
		hdr->args.fh = fh;

	/*
	 * Note that if we ever decide to split across DSes,
	 * then we may need to handle dense-like offsets.
@@ -1382,6 +1569,7 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how)
	fh = select_ds_fh_from_commit(lseg, data->ds_commit_index);
	if (fh)
		data->args.fh = fh;

	return nfs_initiate_commit(ds_clnt, data, ds->ds_clp->rpc_ops,
				   vers == 3 ? &ff_layout_commit_call_ops_v3 :
					       &ff_layout_commit_call_ops_v4,
@@ -1485,6 +1673,247 @@ ff_layout_encode_layoutreturn(struct pnfs_layout_hdr *lo,
	dprintk("%s: Return\n", __func__);
}

static int
ff_layout_ntop4(const struct sockaddr *sap, char *buf, const size_t buflen)
{
	const struct sockaddr_in *sin = (struct sockaddr_in *)sap;

	return snprintf(buf, buflen, "%pI4", &sin->sin_addr);
}

static size_t
ff_layout_ntop6_noscopeid(const struct sockaddr *sap, char *buf,
			  const int buflen)
{
	const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap;
	const struct in6_addr *addr = &sin6->sin6_addr;

	/*
	 * RFC 4291, Section 2.2.2
	 *
	 * Shorthanded ANY address
	 */
	if (ipv6_addr_any(addr))
		return snprintf(buf, buflen, "::");

	/*
	 * RFC 4291, Section 2.2.2
	 *
	 * Shorthanded loopback address
	 */
	if (ipv6_addr_loopback(addr))
		return snprintf(buf, buflen, "::1");

	/*
	 * RFC 4291, Section 2.2.3
	 *
	 * Special presentation address format for mapped v4
	 * addresses.
	 */
	if (ipv6_addr_v4mapped(addr))
		return snprintf(buf, buflen, "::ffff:%pI4",
					&addr->s6_addr32[3]);

	/*
	 * RFC 4291, Section 2.2.1
	 */
	return snprintf(buf, buflen, "%pI6c", addr);
}

/* Derived from rpc_sockaddr2uaddr */
static void
ff_layout_encode_netaddr(struct xdr_stream *xdr, struct nfs4_pnfs_ds_addr *da)
{
	struct sockaddr *sap = (struct sockaddr *)&da->da_addr;
	char portbuf[RPCBIND_MAXUADDRPLEN];
	char addrbuf[RPCBIND_MAXUADDRLEN];
	char *netid;
	unsigned short port;
	int len, netid_len;
	__be32 *p;

	switch (sap->sa_family) {
	case AF_INET:
		if (ff_layout_ntop4(sap, addrbuf, sizeof(addrbuf)) == 0)
			return;
		port = ntohs(((struct sockaddr_in *)sap)->sin_port);
		netid = "tcp";
		netid_len = 3;
		break;
	case AF_INET6:
		if (ff_layout_ntop6_noscopeid(sap, addrbuf, sizeof(addrbuf)) == 0)
			return;
		port = ntohs(((struct sockaddr_in6 *)sap)->sin6_port);
		netid = "tcp6";
		netid_len = 4;
		break;
	default:
		/* we only support tcp and tcp6 */
		WARN_ON_ONCE(1);
		return;
	}

	snprintf(portbuf, sizeof(portbuf), ".%u.%u", port >> 8, port & 0xff);
	len = strlcat(addrbuf, portbuf, sizeof(addrbuf));

	p = xdr_reserve_space(xdr, 4 + netid_len);
	xdr_encode_opaque(p, netid, netid_len);

	p = xdr_reserve_space(xdr, 4 + len);
	xdr_encode_opaque(p, addrbuf, len);
}

static void
ff_layout_encode_nfstime(struct xdr_stream *xdr,
			 ktime_t t)
{
	struct timespec64 ts;
	__be32 *p;

	p = xdr_reserve_space(xdr, 12);
	ts = ktime_to_timespec64(t);
	p = xdr_encode_hyper(p, ts.tv_sec);
	*p++ = cpu_to_be32(ts.tv_nsec);
}

static void
ff_layout_encode_io_latency(struct xdr_stream *xdr,
			    struct nfs4_ff_io_stat *stat)
{
	__be32 *p;

	p = xdr_reserve_space(xdr, 5 * 8);
	p = xdr_encode_hyper(p, stat->ops_requested);
	p = xdr_encode_hyper(p, stat->bytes_requested);
	p = xdr_encode_hyper(p, stat->ops_completed);
	p = xdr_encode_hyper(p, stat->bytes_completed);
	p = xdr_encode_hyper(p, stat->bytes_not_delivered);
	ff_layout_encode_nfstime(xdr, stat->total_busy_time);
	ff_layout_encode_nfstime(xdr, stat->aggregate_completion_time);
}

static void
ff_layout_encode_layoutstats(struct xdr_stream *xdr,
			     struct nfs42_layoutstat_args *args,
			     struct nfs42_layoutstat_devinfo *devinfo)
{
	struct nfs4_ff_layout_mirror *mirror = devinfo->layout_private;
	struct nfs4_pnfs_ds_addr *da;
	struct nfs4_pnfs_ds *ds = mirror->mirror_ds->ds;
	struct nfs_fh *fh = &mirror->fh_versions[0];
	__be32 *p, *start;

	da = list_first_entry(&ds->ds_addrs, struct nfs4_pnfs_ds_addr, da_node);
	dprintk("%s: DS %s: encoding address %s\n",
		__func__, ds->ds_remotestr, da->da_remotestr);
	/* layoutupdate length */
	start = xdr_reserve_space(xdr, 4);
	/* netaddr4 */
	ff_layout_encode_netaddr(xdr, da);
	/* nfs_fh4 */
	p = xdr_reserve_space(xdr, 4 + fh->size);
	xdr_encode_opaque(p, fh->data, fh->size);
	/* ff_io_latency4 read */
	spin_lock(&mirror->lock);
	ff_layout_encode_io_latency(xdr, &mirror->read_stat.io_stat);
	/* ff_io_latency4 write */
	ff_layout_encode_io_latency(xdr, &mirror->write_stat.io_stat);
	spin_unlock(&mirror->lock);
	/* nfstime4 */
	ff_layout_encode_nfstime(xdr, ktime_sub(ktime_get(), mirror->start_time));
	/* bool */
	p = xdr_reserve_space(xdr, 4);
	*p = cpu_to_be32(false);

	*start = cpu_to_be32((xdr->p - start - 1) * 4);
}

static bool
ff_layout_mirror_prepare_stats(struct nfs42_layoutstat_args *args,
			       struct pnfs_layout_segment *pls,
			       int *dev_count, int dev_limit)
{
	struct nfs4_ff_layout_mirror *mirror;
	struct nfs4_deviceid_node *dev;
	struct nfs42_layoutstat_devinfo *devinfo;
	int i;

	for (i = 0; i <= FF_LAYOUT_MIRROR_COUNT(pls); i++) {
		if (*dev_count >= dev_limit)
			break;
		mirror = FF_LAYOUT_COMP(pls, i);
		if (!mirror || !mirror->mirror_ds)
			continue;
		dev = FF_LAYOUT_DEVID_NODE(pls, i);
		devinfo = &args->devinfo[*dev_count];
		memcpy(&devinfo->dev_id, &dev->deviceid, NFS4_DEVICEID4_SIZE);
		devinfo->offset = pls->pls_range.offset;
		devinfo->length = pls->pls_range.length;
		/* well, we don't really know if IO is continuous or not! */
		devinfo->read_count = mirror->read_stat.io_stat.bytes_completed;
		devinfo->read_bytes = mirror->read_stat.io_stat.bytes_completed;
		devinfo->write_count = mirror->write_stat.io_stat.bytes_completed;
		devinfo->write_bytes = mirror->write_stat.io_stat.bytes_completed;
		devinfo->layout_type = LAYOUT_FLEX_FILES;
		devinfo->layoutstats_encode = ff_layout_encode_layoutstats;
		devinfo->layout_private = mirror;
		/* lseg refcount put in cleanup_layoutstats */
		pnfs_get_lseg(pls);

		++(*dev_count);
	}

	return *dev_count < dev_limit;
}

static int
ff_layout_prepare_layoutstats(struct nfs42_layoutstat_args *args)
{
	struct pnfs_layout_segment *pls;
	int dev_count = 0;

	spin_lock(&args->inode->i_lock);
	list_for_each_entry(pls, &NFS_I(args->inode)->layout->plh_segs, pls_list) {
		dev_count += FF_LAYOUT_MIRROR_COUNT(pls);
	}
	spin_unlock(&args->inode->i_lock);
	/* For now, send at most PNFS_LAYOUTSTATS_MAXDEV statistics */
	if (dev_count > PNFS_LAYOUTSTATS_MAXDEV) {
		dprintk("%s: truncating devinfo to limit (%d:%d)\n",
			__func__, dev_count, PNFS_LAYOUTSTATS_MAXDEV);
		dev_count = PNFS_LAYOUTSTATS_MAXDEV;
	}
	args->devinfo = kmalloc(dev_count * sizeof(*args->devinfo), GFP_KERNEL);
	if (!args->devinfo)
		return -ENOMEM;

	dev_count = 0;
	spin_lock(&args->inode->i_lock);
	list_for_each_entry(pls, &NFS_I(args->inode)->layout->plh_segs, pls_list) {
		if (!ff_layout_mirror_prepare_stats(args, pls, &dev_count,
						    PNFS_LAYOUTSTATS_MAXDEV)) {
			break;
		}
	}
	spin_unlock(&args->inode->i_lock);
	args->num_dev = dev_count;

	return 0;
}

static void
ff_layout_cleanup_layoutstats(struct nfs42_layoutstat_data *data)
{
	struct nfs4_ff_layout_mirror *mirror;
	int i;

	for (i = 0; i < data->args.num_dev; i++) {
		mirror = data->args.devinfo[i].layout_private;
		data->args.devinfo[i].layout_private = NULL;
		pnfs_put_lseg(mirror->lseg);
	}
}

static struct pnfs_layoutdriver_type flexfilelayout_type = {
	.id			= LAYOUT_FLEX_FILES,
	.name			= "LAYOUT_FLEX_FILES",
@@ -1507,6 +1936,8 @@ static struct pnfs_layoutdriver_type flexfilelayout_type = {
	.alloc_deviceid_node    = ff_layout_alloc_deviceid_node,
	.encode_layoutreturn    = ff_layout_encode_layoutreturn,
	.sync			= pnfs_nfs_generic_sync,
	.prepare_layoutstats	= ff_layout_prepare_layoutstats,
	.cleanup_layoutstats	= ff_layout_cleanup_layoutstats,
};

static int __init nfs4flexfilelayout_init(void)
+28 −2
Original line number Diff line number Diff line
@@ -15,6 +15,9 @@
 * due to network error etc. */
#define NFS4_FLEXFILE_LAYOUT_MAX_MIRROR_CNT 4096

/* LAYOUTSTATS report interval in ms */
#define FF_LAYOUTSTATS_REPORT_INTERVAL (60000L)

struct nfs4_ff_ds_version {
	u32				version;
	u32				minor_version;
@@ -41,19 +44,42 @@ struct nfs4_ff_layout_ds_err {
	struct nfs4_deviceid		deviceid;
};

struct nfs4_ff_io_stat {
	__u64				ops_requested;
	__u64				bytes_requested;
	__u64				ops_completed;
	__u64				bytes_completed;
	__u64				bytes_not_delivered;
	ktime_t				total_busy_time;
	ktime_t				aggregate_completion_time;
};

struct nfs4_ff_busy_timer {
	ktime_t start_time;
	atomic_t n_ops;
};

struct nfs4_ff_layoutstat {
	struct nfs4_ff_io_stat io_stat;
	struct nfs4_ff_busy_timer busy_timer;
};

struct nfs4_ff_layout_mirror {
	struct pnfs_layout_segment	*lseg; /* back pointer */
	u32				ds_count;
	u32				efficiency;
	struct nfs4_ff_layout_ds	*mirror_ds;
	u32				fh_versions_cnt;
	struct nfs_fh			*fh_versions;
	nfs4_stateid			stateid;
	struct nfs4_string		user_name;
	struct nfs4_string		group_name;
	u32				uid;
	u32				gid;
	struct rpc_cred			*cred;
	spinlock_t			lock;
	struct nfs4_ff_layoutstat	read_stat;
	struct nfs4_ff_layoutstat	write_stat;
	ktime_t				start_time;
	ktime_t				last_report_time;
};

struct nfs4_ff_layout_segment {
+8 −1
Original line number Diff line number Diff line
@@ -5,11 +5,18 @@
#ifndef __LINUX_FS_NFS_NFS4_2_H
#define __LINUX_FS_NFS_NFS4_2_H

/*
 * FIXME:  four LAYOUTSTATS calls per compound at most! Do we need to support
 * more? Need to consider not to pre-alloc too much for a compound.
 */
#define PNFS_LAYOUTSTATS_MAXDEV (4)

/* nfs4.2proc.c */
int nfs42_proc_allocate(struct file *, loff_t, loff_t);
int nfs42_proc_deallocate(struct file *, loff_t, loff_t);
loff_t nfs42_proc_llseek(struct file *, loff_t, int);

int nfs42_proc_layoutstats_generic(struct nfs_server *,
				   struct nfs42_layoutstat_data *);
/* nfs4.2xdr.h */
extern struct rpc_procinfo nfs4_2_procedures[];

+81 −0
Original line number Diff line number Diff line
@@ -10,6 +10,11 @@
#include <linux/nfs_fs.h>
#include "nfs4_fs.h"
#include "nfs42.h"
#include "iostat.h"
#include "pnfs.h"
#include "internal.h"

#define NFSDBG_FACILITY NFSDBG_PNFS

static int nfs42_set_rw_stateid(nfs4_stateid *dst, struct file *file,
				fmode_t fmode)
@@ -165,3 +170,79 @@ loff_t nfs42_proc_llseek(struct file *filep, loff_t offset, int whence)

	return vfs_setpos(filep, res.sr_offset, inode->i_sb->s_maxbytes);
}

static void
nfs42_layoutstat_prepare(struct rpc_task *task, void *calldata)
{
	struct nfs42_layoutstat_data *data = calldata;
	struct nfs_server *server = NFS_SERVER(data->args.inode);

	nfs41_setup_sequence(nfs4_get_session(server), &data->args.seq_args,
			     &data->res.seq_res, task);
}

static void
nfs42_layoutstat_done(struct rpc_task *task, void *calldata)
{
	struct nfs42_layoutstat_data *data = calldata;

	if (!nfs4_sequence_done(task, &data->res.seq_res))
		return;

	/* well, we don't care about errors at all! */
	if (task->tk_status)
		dprintk("%s server returns %d\n", __func__, task->tk_status);
}

static void
nfs42_layoutstat_release(void *calldata)
{
	struct nfs42_layoutstat_data *data = calldata;
	struct nfs_server *nfss = NFS_SERVER(data->args.inode);

	if (nfss->pnfs_curr_ld->cleanup_layoutstats)
		nfss->pnfs_curr_ld->cleanup_layoutstats(data);

	pnfs_put_layout_hdr(NFS_I(data->args.inode)->layout);
	smp_mb__before_atomic();
	clear_bit(NFS_INO_LAYOUTSTATS, &NFS_I(data->args.inode)->flags);
	smp_mb__after_atomic();
	nfs_iput_and_deactive(data->inode);
	kfree(data->args.devinfo);
	kfree(data);
}

static const struct rpc_call_ops nfs42_layoutstat_ops = {
	.rpc_call_prepare = nfs42_layoutstat_prepare,
	.rpc_call_done = nfs42_layoutstat_done,
	.rpc_release = nfs42_layoutstat_release,
};

int nfs42_proc_layoutstats_generic(struct nfs_server *server,
				   struct nfs42_layoutstat_data *data)
{
	struct rpc_message msg = {
		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTSTATS],
		.rpc_argp = &data->args,
		.rpc_resp = &data->res,
	};
	struct rpc_task_setup task_setup = {
		.rpc_client = server->client,
		.rpc_message = &msg,
		.callback_ops = &nfs42_layoutstat_ops,
		.callback_data = data,
		.flags = RPC_TASK_ASYNC,
	};
	struct rpc_task *task;

	data->inode = nfs_igrab_and_active(data->args.inode);
	if (!data->inode) {
		nfs42_layoutstat_release(data);
		return -EAGAIN;
	}
	nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0);
	task = rpc_run_task(&task_setup);
	if (IS_ERR(task))
		return PTR_ERR(task);
	return 0;
}
+122 −0
Original line number Diff line number Diff line
@@ -4,6 +4,8 @@
#ifndef __LINUX_FS_NFS_NFS4_2XDR_H
#define __LINUX_FS_NFS_NFS4_2XDR_H

#include "nfs42.h"

#define encode_fallocate_maxsz		(encode_stateid_maxsz + \
					 2 /* offset */ + \
					 2 /* length */)
@@ -22,6 +24,16 @@
					 1 /* whence */ + \
					 2 /* offset */ + \
					 2 /* length */)
#define encode_io_info_maxsz		4
#define encode_layoutstats_maxsz	(op_decode_hdr_maxsz + \
					2 /* offset */ + \
					2 /* length */ + \
					encode_stateid_maxsz + \
					encode_io_info_maxsz + \
					encode_io_info_maxsz + \
					1 /* opaque devaddr4 length */ + \
					XDR_QUADLEN(PNFS_LAYOUTSTATS_MAXSIZE))
#define decode_layoutstats_maxsz	(op_decode_hdr_maxsz)

#define NFS4_enc_allocate_sz		(compound_encode_hdr_maxsz + \
					 encode_putfh_maxsz + \
@@ -45,6 +57,14 @@
#define NFS4_dec_seek_sz		(compound_decode_hdr_maxsz + \
					 decode_putfh_maxsz + \
					 decode_seek_maxsz)
#define NFS4_enc_layoutstats_sz		(compound_encode_hdr_maxsz + \
					 encode_sequence_maxsz + \
					 encode_putfh_maxsz + \
					 PNFS_LAYOUTSTATS_MAXDEV * encode_layoutstats_maxsz)
#define NFS4_dec_layoutstats_sz		(compound_decode_hdr_maxsz + \
					 decode_sequence_maxsz + \
					 decode_putfh_maxsz + \
					 PNFS_LAYOUTSTATS_MAXDEV * decode_layoutstats_maxsz)


static void encode_fallocate(struct xdr_stream *xdr,
@@ -81,6 +101,33 @@ static void encode_seek(struct xdr_stream *xdr,
	encode_uint32(xdr, args->sa_what);
}

static void encode_layoutstats(struct xdr_stream *xdr,
			       struct nfs42_layoutstat_args *args,
			       struct nfs42_layoutstat_devinfo *devinfo,
			       struct compound_hdr *hdr)
{
	__be32 *p;

	encode_op_hdr(xdr, OP_LAYOUTSTATS, decode_layoutstats_maxsz, hdr);
	p = reserve_space(xdr, 8 + 8);
	p = xdr_encode_hyper(p, devinfo->offset);
	p = xdr_encode_hyper(p, devinfo->length);
	encode_nfs4_stateid(xdr, &args->stateid);
	p = reserve_space(xdr, 4*8 + NFS4_DEVICEID4_SIZE + 4);
	p = xdr_encode_hyper(p, devinfo->read_count);
	p = xdr_encode_hyper(p, devinfo->read_bytes);
	p = xdr_encode_hyper(p, devinfo->write_count);
	p = xdr_encode_hyper(p, devinfo->write_bytes);
	p = xdr_encode_opaque_fixed(p, devinfo->dev_id.data,
			NFS4_DEVICEID4_SIZE);
	/* Encode layoutupdate4 */
	*p++ = cpu_to_be32(devinfo->layout_type);
	if (devinfo->layoutstats_encode != NULL)
		devinfo->layoutstats_encode(xdr, args, devinfo);
	else
		encode_uint32(xdr, 0);
}

/*
 * Encode ALLOCATE request
 */
@@ -137,6 +184,28 @@ static void nfs4_xdr_enc_seek(struct rpc_rqst *req,
	encode_nops(&hdr);
}

/*
 * Encode LAYOUTSTATS request
 */
static void nfs4_xdr_enc_layoutstats(struct rpc_rqst *req,
				     struct xdr_stream *xdr,
				     struct nfs42_layoutstat_args *args)
{
	int i;

	struct compound_hdr hdr = {
		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
	};

	encode_compound_hdr(xdr, req, &hdr);
	encode_sequence(xdr, &args->seq_args, &hdr);
	encode_putfh(xdr, args->fh, &hdr);
	WARN_ON(args->num_dev > PNFS_LAYOUTSTATS_MAXDEV);
	for (i = 0; i < args->num_dev; i++)
		encode_layoutstats(xdr, args, &args->devinfo[i], &hdr);
	encode_nops(&hdr);
}

static int decode_allocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res)
{
	return decode_op_hdr(xdr, OP_ALLOCATE);
@@ -169,6 +238,28 @@ static int decode_seek(struct xdr_stream *xdr, struct nfs42_seek_res *res)
	return -EIO;
}

static int decode_layoutstats(struct xdr_stream *xdr,
			      struct nfs42_layoutstat_res *res)
{
	int status;
	__be32 *p;

	status = decode_op_hdr(xdr, OP_LAYOUTSTATS);
	if (status)
		return status;

	p = xdr_inline_decode(xdr, 4);
	if (unlikely(!p))
		goto out_overflow;

	res->rpc_status = be32_to_cpup(p++);
	return 0;

out_overflow:
	print_overflow_msg(__func__, xdr);
	return -EIO;
}

/*
 * Decode ALLOCATE request
 */
@@ -246,4 +337,35 @@ static int nfs4_xdr_dec_seek(struct rpc_rqst *rqstp,
out:
	return status;
}

/*
 * Decode LAYOUTSTATS request
 */
static int nfs4_xdr_dec_layoutstats(struct rpc_rqst *rqstp,
				    struct xdr_stream *xdr,
				    struct nfs42_layoutstat_res *res)
{
	struct compound_hdr hdr;
	int status, i;

	status = decode_compound_hdr(xdr, &hdr);
	if (status)
		goto out;
	status = decode_sequence(xdr, &res->seq_res, rqstp);
	if (status)
		goto out;
	status = decode_putfh(xdr);
	if (status)
		goto out;
	WARN_ON(res->num_dev > PNFS_LAYOUTSTATS_MAXDEV);
	for (i = 0; i < res->num_dev; i++) {
		status = decode_layoutstats(xdr, res);
		if (status)
			goto out;
	}
out:
	res->rpc_status = status;
	return status;
}

#endif /* __LINUX_FS_NFS_NFS4_2XDR_H */
Loading