Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 61845143 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'for-3.20' of git://linux-nfs.org/~bfields/linux

Pull nfsd updates from Bruce Fields:
 "The main change is the pNFS block server support from Christoph, which
  allows an NFS client connected to shared disk to do block IO to the
  shared disk in place of NFS reads and writes.  This also requires xfs
  patches, which should arrive soon through the xfs tree, barring
  unexpected problems.  Support for other filesystems is also possible
  if there's interest.

  Thanks also to Chuck Lever for continuing work to get NFS/RDMA into
  shape"

* 'for-3.20' of git://linux-nfs.org/~bfields/linux: (32 commits)
  nfsd: default NFSv4.2 to on
  nfsd: pNFS block layout driver
  exportfs: add methods for block layout exports
  nfsd: add trace events
  nfsd: update documentation for pNFS support
  nfsd: implement pNFS layout recalls
  nfsd: implement pNFS operations
  nfsd: make find_any_file available outside nfs4state.c
  nfsd: make find/get/put file available outside nfs4state.c
  nfsd: make lookup/alloc/unhash_stid available outside nfs4state.c
  nfsd: add fh_fsid_match helper
  nfsd: move nfsd_fh_match to nfsfh.h
  fs: add FL_LAYOUT lease type
  fs: track fl_owner for leases
  nfs: add LAYOUT_TYPE_MAX enum value
  nfsd: factor out a helper to decode nfstime4 values
  sunrpc/lockd: fix references to the BKL
  nfsd: fix year-2038 nfs4 state problem
  svcrdma: Handle additional inline content
  svcrdma: Move read list XDR round-up logic
  ...
parents a26be149 c23ae601
Loading
Loading
Loading
Loading
+8 −15
Original line number Original line Diff line number Diff line
@@ -24,11 +24,6 @@ focuses on the mandatory-to-implement NFSv4.1 Sessions, providing
"exactly once" semantics and better control and throttling of the
"exactly once" semantics and better control and throttling of the
resources allocated for each client.
resources allocated for each client.


Other NFSv4.1 features, Parallel NFS operations in particular,
are still under development out of tree.
See http://wiki.linux-nfs.org/wiki/index.php/PNFS_prototype_design
for more information.

The table below, taken from the NFSv4.1 document, lists
The table below, taken from the NFSv4.1 document, lists
the operations that are mandatory to implement (REQ), optional
the operations that are mandatory to implement (REQ), optional
(OPT), and NFSv4.0 operations that are required not to implement (MNI)
(OPT), and NFSv4.0 operations that are required not to implement (MNI)
@@ -43,9 +38,7 @@ The OPTIONAL features identified and their abbreviations are as follows:
The following abbreviations indicate the linux server implementation status.
The following abbreviations indicate the linux server implementation status.
	I	Implemented NFSv4.1 operations.
	I	Implemented NFSv4.1 operations.
	NS	Not Supported.
	NS	Not Supported.
	NS*	unimplemented optional feature.
	NS*	Unimplemented optional feature.
	P	pNFS features implemented out of tree.
	PNS	pNFS features that are not supported yet (out of tree).


Operations
Operations


@@ -70,13 +63,13 @@ I | DESTROY_SESSION | REQ | | Section 18.37 |
I  | EXCHANGE_ID          | REQ        |              | Section 18.35  |
I  | EXCHANGE_ID          | REQ        |              | Section 18.35  |
I  | FREE_STATEID         | REQ        |              | Section 18.38  |
I  | FREE_STATEID         | REQ        |              | Section 18.38  |
   | GETATTR              | REQ        |              | Section 18.7   |
   | GETATTR              | REQ        |              | Section 18.7   |
P  | GETDEVICEINFO        | OPT        | pNFS (REQ)   | Section 18.40  |
I  | GETDEVICEINFO        | OPT        | pNFS (REQ)   | Section 18.40  |
P  | GETDEVICELIST        | OPT        | pNFS (OPT)   | Section 18.41  |
NS*| GETDEVICELIST        | OPT        | pNFS (OPT)   | Section 18.41  |
   | GETFH                | REQ        |              | Section 18.8   |
   | GETFH                | REQ        |              | Section 18.8   |
NS*| GET_DIR_DELEGATION   | OPT        | DDELG (REQ)  | Section 18.39  |
NS*| GET_DIR_DELEGATION   | OPT        | DDELG (REQ)  | Section 18.39  |
P  | LAYOUTCOMMIT         | OPT        | pNFS (REQ)   | Section 18.42  |
I  | LAYOUTCOMMIT         | OPT        | pNFS (REQ)   | Section 18.42  |
P  | LAYOUTGET            | OPT        | pNFS (REQ)   | Section 18.43  |
I  | LAYOUTGET            | OPT        | pNFS (REQ)   | Section 18.43  |
P  | LAYOUTRETURN         | OPT        | pNFS (REQ)   | Section 18.44  |
I  | LAYOUTRETURN         | OPT        | pNFS (REQ)   | Section 18.44  |
   | LINK                 | OPT        |              | Section 18.9   |
   | LINK                 | OPT        |              | Section 18.9   |
   | LOCK                 | REQ        |              | Section 18.10  |
   | LOCK                 | REQ        |              | Section 18.10  |
   | LOCKT                | REQ        |              | Section 18.11  |
   | LOCKT                | REQ        |              | Section 18.11  |
@@ -122,9 +115,9 @@ Callback Operations
   |                         | MNI       | or OPT)     |               |
   |                         | MNI       | or OPT)     |               |
   +-------------------------+-----------+-------------+---------------+
   +-------------------------+-----------+-------------+---------------+
   | CB_GETATTR              | OPT       | FDELG (REQ) | Section 20.1  |
   | CB_GETATTR              | OPT       | FDELG (REQ) | Section 20.1  |
P  | CB_LAYOUTRECALL         | OPT       | pNFS (REQ)  | Section 20.3  |
I  | CB_LAYOUTRECALL         | OPT       | pNFS (REQ)  | Section 20.3  |
NS*| CB_NOTIFY               | OPT       | DDELG (REQ) | Section 20.4  |
NS*| CB_NOTIFY               | OPT       | DDELG (REQ) | Section 20.4  |
P  | CB_NOTIFY_DEVICEID      | OPT       | pNFS (OPT)  | Section 20.12 |
NS*| CB_NOTIFY_DEVICEID      | OPT       | pNFS (OPT)  | Section 20.12 |
NS*| CB_NOTIFY_LOCK          | OPT       |             | Section 20.11 |
NS*| CB_NOTIFY_LOCK          | OPT       |             | Section 20.11 |
NS*| CB_PUSH_DELEG           | OPT       | FDELG (OPT) | Section 20.5  |
NS*| CB_PUSH_DELEG           | OPT       | FDELG (OPT) | Section 20.5  |
   | CB_RECALL               | OPT       | FDELG,      | Section 20.2  |
   | CB_RECALL               | OPT       | FDELG,      | Section 20.2  |
+37 −0
Original line number Original line Diff line number Diff line
pNFS block layout server user guide

The Linux NFS server now supports the pNFS block layout extension.  In this
case the NFS server acts as Metadata Server (MDS) for pNFS, which in addition
to handling all the metadata access to the NFS export also hands out layouts
to the clients to directly access the underlying block devices that are
shared with the client.

To use pNFS block layouts with with the Linux NFS server the exported file
system needs to support the pNFS block layouts (currently just XFS), and the
file system must sit on shared storage (typically iSCSI) that is accessible
to the clients in addition to the MDS.  As of now the file system needs to
sit directly on the exported volume, striping or concatenation of
volumes on the MDS and clients is not supported yet.

On the server, pNFS block volume support is automatically if the file system
support it.  On the client make sure the kernel has the CONFIG_PNFS_BLOCK
option enabled, the blkmapd daemon from nfs-utils is running, and the
file system is mounted using the NFSv4.1 protocol version (mount -o vers=4.1).

If the nfsd server needs to fence a non-responding client it calls
/sbin/nfsd-recall-failed with the first argument set to the IP address of
the client, and the second argument set to the device node without the /dev
prefix for the file system to be fenced. Below is an example file that shows
how to translate the device into a serial number from SCSI EVPD 0x80:

cat > /sbin/nfsd-recall-failed << EOF
#!/bin/sh

CLIENT="$1"
DEV="/dev/$2"
EVPD=`sg_inq --page=0x80 ${DEV} | \
	grep "Unit serial number:" | \
	awk -F ': ' '{print $2}'`

echo "fencing client ${CLIENT} serial ${EVPD}" >> /var/log/pnfsd-fence.log
EOF
+2 −2
Original line number Original line Diff line number Diff line
@@ -57,8 +57,8 @@ static DEFINE_SPINLOCK(nlm_blocked_lock);
static const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie)
static const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie)
{
{
	/*
	/*
	 * We can get away with a static buffer because we're only
	 * We can get away with a static buffer because this is only called
	 * called with BKL held.
	 * from lockd, which is single-threaded.
	 */
	 */
	static char buf[2*NLM_MAXCOOKIELEN+1];
	static char buf[2*NLM_MAXCOOKIELEN+1];
	unsigned int i, len = sizeof(buf);
	unsigned int i, len = sizeof(buf);
+0 −8
Original line number Original line Diff line number Diff line
@@ -95,14 +95,6 @@ nlm_decode_fh(__be32 *p, struct nfs_fh *f)
	return p + XDR_QUADLEN(NFS2_FHSIZE);
	return p + XDR_QUADLEN(NFS2_FHSIZE);
}
}


static inline __be32 *
nlm_encode_fh(__be32 *p, struct nfs_fh *f)
{
	*p++ = htonl(NFS2_FHSIZE);
	memcpy(p, f->data, NFS2_FHSIZE);
	return p + XDR_QUADLEN(NFS2_FHSIZE);
}

/*
/*
 * Encode and decode owner handle
 * Encode and decode owner handle
 */
 */
+17 −9
Original line number Original line Diff line number Diff line
@@ -137,7 +137,7 @@


#define IS_POSIX(fl)	(fl->fl_flags & FL_POSIX)
#define IS_POSIX(fl)	(fl->fl_flags & FL_POSIX)
#define IS_FLOCK(fl)	(fl->fl_flags & FL_FLOCK)
#define IS_FLOCK(fl)	(fl->fl_flags & FL_FLOCK)
#define IS_LEASE(fl)	(fl->fl_flags & (FL_LEASE|FL_DELEG))
#define IS_LEASE(fl)	(fl->fl_flags & (FL_LEASE|FL_DELEG|FL_LAYOUT))
#define IS_OFDLCK(fl)	(fl->fl_flags & FL_OFDLCK)
#define IS_OFDLCK(fl)	(fl->fl_flags & FL_OFDLCK)


static bool lease_breaking(struct file_lock *fl)
static bool lease_breaking(struct file_lock *fl)
@@ -1371,6 +1371,8 @@ static void time_out_leases(struct inode *inode, struct list_head *dispose)


static bool leases_conflict(struct file_lock *lease, struct file_lock *breaker)
static bool leases_conflict(struct file_lock *lease, struct file_lock *breaker)
{
{
	if ((breaker->fl_flags & FL_LAYOUT) != (lease->fl_flags & FL_LAYOUT))
		return false;
	if ((breaker->fl_flags & FL_DELEG) && (lease->fl_flags & FL_LEASE))
	if ((breaker->fl_flags & FL_DELEG) && (lease->fl_flags & FL_LEASE))
		return false;
		return false;
	return locks_conflict(breaker, lease);
	return locks_conflict(breaker, lease);
@@ -1594,11 +1596,14 @@ int fcntl_getlease(struct file *filp)
 * conflict with the lease we're trying to set.
 * conflict with the lease we're trying to set.
 */
 */
static int
static int
check_conflicting_open(const struct dentry *dentry, const long arg)
check_conflicting_open(const struct dentry *dentry, const long arg, int flags)
{
{
	int ret = 0;
	int ret = 0;
	struct inode *inode = dentry->d_inode;
	struct inode *inode = dentry->d_inode;


	if (flags & FL_LAYOUT)
		return 0;

	if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0))
	if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0))
		return -EAGAIN;
		return -EAGAIN;


@@ -1647,7 +1652,7 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr


	spin_lock(&ctx->flc_lock);
	spin_lock(&ctx->flc_lock);
	time_out_leases(inode, &dispose);
	time_out_leases(inode, &dispose);
	error = check_conflicting_open(dentry, arg);
	error = check_conflicting_open(dentry, arg, lease->fl_flags);
	if (error)
	if (error)
		goto out;
		goto out;


@@ -1661,7 +1666,8 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr
	 */
	 */
	error = -EAGAIN;
	error = -EAGAIN;
	list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
	list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
		if (fl->fl_file == filp) {
		if (fl->fl_file == filp &&
		    fl->fl_owner == lease->fl_owner) {
			my_fl = fl;
			my_fl = fl;
			continue;
			continue;
		}
		}
@@ -1702,7 +1708,7 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr
	 * precedes these checks.
	 * precedes these checks.
	 */
	 */
	smp_mb();
	smp_mb();
	error = check_conflicting_open(dentry, arg);
	error = check_conflicting_open(dentry, arg, lease->fl_flags);
	if (error) {
	if (error) {
		locks_unlink_lock_ctx(lease, &ctx->flc_lease_cnt);
		locks_unlink_lock_ctx(lease, &ctx->flc_lease_cnt);
		goto out;
		goto out;
@@ -1721,7 +1727,7 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr
	return error;
	return error;
}
}


static int generic_delete_lease(struct file *filp)
static int generic_delete_lease(struct file *filp, void *owner)
{
{
	int error = -EAGAIN;
	int error = -EAGAIN;
	struct file_lock *fl, *victim = NULL;
	struct file_lock *fl, *victim = NULL;
@@ -1737,7 +1743,8 @@ static int generic_delete_lease(struct file *filp)


	spin_lock(&ctx->flc_lock);
	spin_lock(&ctx->flc_lock);
	list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
	list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
		if (fl->fl_file == filp) {
		if (fl->fl_file == filp &&
		    fl->fl_owner == owner) {
			victim = fl;
			victim = fl;
			break;
			break;
		}
		}
@@ -1778,13 +1785,14 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp,


	switch (arg) {
	switch (arg) {
	case F_UNLCK:
	case F_UNLCK:
		return generic_delete_lease(filp);
		return generic_delete_lease(filp, *priv);
	case F_RDLCK:
	case F_RDLCK:
	case F_WRLCK:
	case F_WRLCK:
		if (!(*flp)->fl_lmops->lm_break) {
		if (!(*flp)->fl_lmops->lm_break) {
			WARN_ON_ONCE(1);
			WARN_ON_ONCE(1);
			return -ENOLCK;
			return -ENOLCK;
		}
		}

		return generic_add_lease(filp, arg, flp, priv);
		return generic_add_lease(filp, arg, flp, priv);
	default:
	default:
		return -EINVAL;
		return -EINVAL;
@@ -1857,7 +1865,7 @@ static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg)
int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
{
{
	if (arg == F_UNLCK)
	if (arg == F_UNLCK)
		return vfs_setlease(filp, F_UNLCK, NULL, NULL);
		return vfs_setlease(filp, F_UNLCK, NULL, (void **)&filp);
	return do_fcntl_add_lease(fd, filp, arg);
	return do_fcntl_add_lease(fd, filp, arg);
}
}


Loading