Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 4e4adb2f authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull NFS client updates from Trond Myklebust:
 "Highlights include:

  Stable patches:
   - Fix atomicity of pNFS commit list updates
   - Fix NFSv4 handling of open(O_CREAT|O_EXCL|O_RDONLY)
   - nfs_set_pgio_error sometimes misses errors
   - Fix a thinko in xs_connect()
   - Fix borkage in _same_data_server_addrs_locked()
   - Fix a NULL pointer dereference of migration recovery ops for v4.2
     client
   - Don't let the ctime override attribute barriers.
   - Revert "NFSv4: Remove incorrect check in can_open_delegated()"
   - Ensure flexfiles pNFS driver updates the inode after write finishes
   - flexfiles must not pollute the attribute cache with attrbutes from
     the DS
   - Fix a protocol error in layoutreturn
   - Fix a protocol issue with NFSv4.1 CLOSE stateids

  Bugfixes + cleanups
   - pNFS blocks bugfixes from Christoph
   - Various cleanups from Anna
   - More fixes for delegation corner cases
   - Don't fsync twice for O_SYNC/IS_SYNC files
   - Fix pNFS and flexfiles layoutstats bugs
   - pnfs/flexfiles: avoid duplicate tracking of mirror data
   - pnfs: Fix layoutget/layoutreturn/return-on-close serialisation
     issues
   - pnfs/flexfiles: error handling retries a layoutget before fallback
     to MDS

  Features:
   - Full support for the OPEN NFS4_CREATE_EXCLUSIVE4_1 mode from
     Kinglong
   - More RDMA client transport improvements from Chuck
   - Removal of the deprecated ib_reg_phys_mr() and ib_rereg_phys_mr()
     verbs from the SUNRPC, Lustre and core infiniband tree.
   - Optimise away the close-to-open getattr if there is no cached data"

* tag 'nfs-for-4.3-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (108 commits)
  NFSv4: Respect the server imposed limit on how many changes we may cache
  NFSv4: Express delegation limit in units of pages
  Revert "NFS: Make close(2) asynchronous when closing NFS O_DIRECT files"
  NFS: Optimise away the close-to-open getattr if there is no cached data
  NFSv4.1/flexfiles: Clean up ff_layout_write_done_cb/ff_layout_commit_done_cb
  NFSv4.1/flexfiles: Mark the layout for return in ff_layout_io_track_ds_error()
  nfs: Remove unneeded checking of the return value from scnprintf
  nfs: Fix truncated client owner id without proto type
  NFSv4.1/flexfiles: Mark layout for return if the mirrors are invalid
  NFSv4.1/flexfiles: RW layouts are valid only if all mirrors are valid
  NFSv4.1/flexfiles: Fix incorrect usage of pnfs_generic_mark_devid_invalid()
  NFSv4.1/flexfiles: Fix freeing of mirrors
  NFSv4.1/pNFS: Don't request a minimal read layout beyond the end of file
  NFSv4.1/pnfs: Handle LAYOUTGET return values correctly
  NFSv4.1/pnfs: Don't ask for a read layout for an empty file.
  NFSv4.1: Fix a protocol issue with CLOSE stateids
  NFSv4.1/flexfiles: Don't mark the entire deviceid as bad for file errors
  SUNRPC: Prevent SYN+SYNACK+RST storms
  SUNRPC: xs_reset_transport must mark the connection as disconnected
  NFSv4.1/pnfs: Ensure layoutreturn reserves space for the opaque payload
  ...
parents 77a78806 5445b1fb
Loading
Loading
Loading
Loading
+9 −0
Original line number Diff line number Diff line
@@ -2285,6 +2285,15 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
			The default parameter value of '0' causes the kernel
			not to attempt recovery of lost locks.

	nfs4.layoutstats_timer =
			[NFSv4.2] Change the rate at which the kernel sends
			layoutstats to the pNFS metadata server.

			Setting this to value to 0 causes the kernel to use
			whatever value is the default set by the layout
			driver. A non-zero value sets the minimum interval
			in seconds between layoutstats transmissions.

	nfsd.nfs4_disable_idmapping=
			[NFSv4] When set to the default of '1', the NFSv4
			server will return only numeric uids and gids to
+0 −67
Original line number Diff line number Diff line
@@ -1144,73 +1144,6 @@ struct ib_mr *ib_get_dma_mr(struct ib_pd *pd, int mr_access_flags)
}
EXPORT_SYMBOL(ib_get_dma_mr);

struct ib_mr *ib_reg_phys_mr(struct ib_pd *pd,
			     struct ib_phys_buf *phys_buf_array,
			     int num_phys_buf,
			     int mr_access_flags,
			     u64 *iova_start)
{
	struct ib_mr *mr;
	int err;

	err = ib_check_mr_access(mr_access_flags);
	if (err)
		return ERR_PTR(err);

	if (!pd->device->reg_phys_mr)
		return ERR_PTR(-ENOSYS);

	mr = pd->device->reg_phys_mr(pd, phys_buf_array, num_phys_buf,
				     mr_access_flags, iova_start);

	if (!IS_ERR(mr)) {
		mr->device  = pd->device;
		mr->pd      = pd;
		mr->uobject = NULL;
		atomic_inc(&pd->usecnt);
		atomic_set(&mr->usecnt, 0);
	}

	return mr;
}
EXPORT_SYMBOL(ib_reg_phys_mr);

int ib_rereg_phys_mr(struct ib_mr *mr,
		     int mr_rereg_mask,
		     struct ib_pd *pd,
		     struct ib_phys_buf *phys_buf_array,
		     int num_phys_buf,
		     int mr_access_flags,
		     u64 *iova_start)
{
	struct ib_pd *old_pd;
	int ret;

	ret = ib_check_mr_access(mr_access_flags);
	if (ret)
		return ret;

	if (!mr->device->rereg_phys_mr)
		return -ENOSYS;

	if (atomic_read(&mr->usecnt))
		return -EBUSY;

	old_pd = mr->pd;

	ret = mr->device->rereg_phys_mr(mr, mr_rereg_mask, pd,
					phys_buf_array, num_phys_buf,
					mr_access_flags, iova_start);

	if (!ret && (mr_rereg_mask & IB_MR_REREG_PD)) {
		atomic_dec(&old_pd->usecnt);
		atomic_inc(&pd->usecnt);
	}

	return ret;
}
EXPORT_SYMBOL(ib_rereg_phys_mr);

int ib_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr)
{
	return mr->device->query_mr ?
+1 −18
Original line number Diff line number Diff line
@@ -46,13 +46,6 @@

struct pnfs_block_dev;

enum pnfs_block_volume_type {
	PNFS_BLOCK_VOLUME_SIMPLE	= 0,
	PNFS_BLOCK_VOLUME_SLICE		= 1,
	PNFS_BLOCK_VOLUME_CONCAT	= 2,
	PNFS_BLOCK_VOLUME_STRIPE	= 3,
};

#define PNFS_BLOCK_MAX_UUIDS	4
#define PNFS_BLOCK_MAX_DEVICES	64

@@ -117,13 +110,6 @@ struct pnfs_block_dev {
			struct pnfs_block_dev_map *map);
};

enum exstate4 {
	PNFS_BLOCK_READWRITE_DATA	= 0,
	PNFS_BLOCK_READ_DATA		= 1,
	PNFS_BLOCK_INVALID_DATA		= 2, /* mapped, but data is invalid */
	PNFS_BLOCK_NONE_DATA		= 3  /* unmapped, it's a hole */
};

/* sector_t fields are all in 512-byte sectors */
struct pnfs_block_extent {
	union {
@@ -134,15 +120,12 @@ struct pnfs_block_extent {
	sector_t	be_f_offset;	/* the starting offset in the file */
	sector_t	be_length;	/* the size of the extent */
	sector_t	be_v_offset;	/* the starting offset in the volume */
	enum exstate4	be_state;	/* the state of this extent */
	enum pnfs_block_extent_state be_state;	/* the state of this extent */
#define EXTENT_WRITTEN		1
#define EXTENT_COMMITTING	2
	unsigned int	be_tag;
};

/* on the wire size of the extent */
#define BL_EXTENT_SIZE	(7 * sizeof(__be32) + NFS4_DEVICEID4_SIZE)

struct pnfs_block_layout {
	struct pnfs_layout_hdr	bl_layout;
	struct rb_root		bl_ext_rw;
+7 −2
Original line number Diff line number Diff line
@@ -22,7 +22,7 @@ bl_free_device(struct pnfs_block_dev *dev)
		kfree(dev->children);
	} else {
		if (dev->bdev)
			blkdev_put(dev->bdev, FMODE_READ);
			blkdev_put(dev->bdev, FMODE_READ | FMODE_WRITE);
	}
}

@@ -65,6 +65,11 @@ nfs4_block_decode_volume(struct xdr_stream *xdr, struct pnfs_block_volume *b)
				return -EIO;
			p = xdr_decode_hyper(p, &b->simple.sigs[i].offset);
			b->simple.sigs[i].sig_len = be32_to_cpup(p++);
			if (b->simple.sigs[i].sig_len > PNFS_BLOCK_UUID_LEN) {
				pr_info("signature too long: %d\n",
					b->simple.sigs[i].sig_len);
				return -EIO;
			}

			p = xdr_inline_decode(xdr, b->simple.sigs[i].sig_len);
			if (!p)
@@ -195,7 +200,7 @@ bl_parse_simple(struct nfs_server *server, struct pnfs_block_dev *d,
	if (!dev)
		return -EIO;

	d->bdev = blkdev_get_by_dev(dev, FMODE_READ, NULL);
	d->bdev = blkdev_get_by_dev(dev, FMODE_READ | FMODE_WRITE, NULL);
	if (IS_ERR(d->bdev)) {
		printk(KERN_WARNING "pNFS: failed to open device %d:%d (%ld)\n",
			MAJOR(dev), MINOR(dev), PTR_ERR(d->bdev));
+11 −8
Original line number Diff line number Diff line
@@ -462,6 +462,12 @@ ext_tree_mark_written(struct pnfs_block_layout *bl, sector_t start,
	return err;
}

static size_t ext_tree_layoutupdate_size(size_t count)
{
	return sizeof(__be32) /* number of entries */ +
		PNFS_BLOCK_EXTENT_SIZE * count;
}

static void ext_tree_free_commitdata(struct nfs4_layoutcommit_args *arg,
		size_t buffer_size)
{
@@ -489,7 +495,7 @@ static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p,
			continue;

		(*count)++;
		if (*count * BL_EXTENT_SIZE > buffer_size) {
		if (ext_tree_layoutupdate_size(*count) > buffer_size) {
			/* keep counting.. */
			ret = -ENOSPC;
			continue;
@@ -530,7 +536,7 @@ ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg)
	if (unlikely(ret)) {
		ext_tree_free_commitdata(arg, buffer_size);

		buffer_size = sizeof(__be32) + BL_EXTENT_SIZE * count;
		buffer_size = ext_tree_layoutupdate_size(count);
		count = 0;

		arg->layoutupdate_pages =
@@ -549,18 +555,15 @@ ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg)
	}

	*start_p = cpu_to_be32(count);
	arg->layoutupdate_len = sizeof(__be32) + BL_EXTENT_SIZE * count;
	arg->layoutupdate_len = ext_tree_layoutupdate_size(count);

	if (unlikely(arg->layoutupdate_pages != &arg->layoutupdate_page)) {
		__be32 *p = start_p;
		void *p = start_p, *end = p + arg->layoutupdate_len;
		int i = 0;

		for (p = start_p;
		     p < start_p + arg->layoutupdate_len;
		     p += PAGE_SIZE) {
		for ( ; p < end; p += PAGE_SIZE)
			arg->layoutupdate_pages[i++] = vmalloc_to_page(p);
	}
	}

	dprintk("%s found %zu ranges\n", __func__, count);
	return 0;
Loading