Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 31990f0f authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'ceph-for-4.20-rc1' of git://github.com/ceph/ceph-client

Pull ceph updates from Ilya Dryomov:
 "The highlights are:

   - a series that fixes some old memory allocation issues in libceph
     (myself). We no longer allocate memory in places where allocation
     failures cannot be handled and BUG when the allocation fails.

   - support for copy_file_range() syscall (Luis Henriques). If size and
     alignment conditions are met, it leverages RADOS copy-from
     operation. Otherwise, a local copy is performed.

   - a patch that reduces memory requirement of ceph_sync_read() from
     the size of the entire read to the size of one object (Zheng Yan).

   - fallocate() syscall is now restricted to FALLOC_FL_PUNCH_HOLE (Luis
     Henriques)"

* tag 'ceph-for-4.20-rc1' of git://github.com/ceph/ceph-client: (25 commits)
  ceph: new mount option to disable usage of copy-from op
  ceph: support copy_file_range file operation
  libceph: support the RADOS copy-from operation
  ceph: add non-blocking parameter to ceph_try_get_caps()
  libceph: check reply num_data_items in setup_request_data()
  libceph: preallocate message data items
  libceph, rbd, ceph: move ceph_osdc_alloc_messages() calls
  libceph: introduce alloc_watch_request()
  libceph: assign cookies in linger_submit()
  libceph: enable fallback to ceph_msg_new() in ceph_msgpool_get()
  ceph: num_ops is off by one in ceph_aio_retry_work()
  libceph: no need to call osd_req_opcode_valid() in osd_req_encode_op()
  ceph: set timeout conditionally in __cap_delay_requeue
  libceph: don't consume a ref on pagelist in ceph_msg_data_add_pagelist()
  libceph: introduce ceph_pagelist_alloc()
  libceph: osd_req_op_cls_init() doesn't need to take opcode
  libceph: bump CEPH_MSG_MAX_DATA_LEN
  ceph: only allow punch hole mode in fallocate
  ceph: refactor ceph_sync_read()
  ceph: check if LOOKUPNAME request was aborted when filling trace
  ...
parents a9ac6cc4 ea4cdc54
Loading
Loading
Loading
Loading
+5 −0
Original line number Original line Diff line number Diff line
@@ -151,6 +151,11 @@ Mount Options
        Report overall filesystem usage in statfs instead of using the root
        Report overall filesystem usage in statfs instead of using the root
        directory quota.
        directory quota.


  nocopyfrom
        Don't use the RADOS 'copy-from' operation to perform remote object
        copies.  Currently, it's only used in copy_file_range, which will revert
        to the default VFS implementation if this option is used.

More Information
More Information
================
================


+16 −12
Original line number Original line Diff line number Diff line
@@ -1500,9 +1500,6 @@ rbd_osd_req_create(struct rbd_obj_request *obj_req, unsigned int num_ops)
			rbd_dev->header.object_prefix, obj_req->ex.oe_objno))
			rbd_dev->header.object_prefix, obj_req->ex.oe_objno))
		goto err_req;
		goto err_req;


	if (ceph_osdc_alloc_messages(req, GFP_NOIO))
		goto err_req;

	return req;
	return req;


err_req:
err_req:
@@ -1945,6 +1942,10 @@ static int __rbd_img_fill_request(struct rbd_img_request *img_req)
		}
		}
		if (ret)
		if (ret)
			return ret;
			return ret;

		ret = ceph_osdc_alloc_messages(obj_req->osd_req, GFP_NOIO);
		if (ret)
			return ret;
	}
	}


	return 0;
	return 0;
@@ -2374,8 +2375,7 @@ static int rbd_obj_issue_copyup(struct rbd_obj_request *obj_req, u32 bytes)
	if (!obj_req->osd_req)
	if (!obj_req->osd_req)
		return -ENOMEM;
		return -ENOMEM;


	ret = osd_req_op_cls_init(obj_req->osd_req, 0, CEPH_OSD_OP_CALL, "rbd",
	ret = osd_req_op_cls_init(obj_req->osd_req, 0, "rbd", "copyup");
				  "copyup");
	if (ret)
	if (ret)
		return ret;
		return ret;


@@ -2405,6 +2405,10 @@ static int rbd_obj_issue_copyup(struct rbd_obj_request *obj_req, u32 bytes)
		rbd_assert(0);
		rbd_assert(0);
	}
	}


	ret = ceph_osdc_alloc_messages(obj_req->osd_req, GFP_NOIO);
	if (ret)
		return ret;

	rbd_obj_request_submit(obj_req);
	rbd_obj_request_submit(obj_req);
	return 0;
	return 0;
}
}
@@ -3784,10 +3788,6 @@ static int rbd_obj_read_sync(struct rbd_device *rbd_dev,
	ceph_oloc_copy(&req->r_base_oloc, oloc);
	ceph_oloc_copy(&req->r_base_oloc, oloc);
	req->r_flags = CEPH_OSD_FLAG_READ;
	req->r_flags = CEPH_OSD_FLAG_READ;


	ret = ceph_osdc_alloc_messages(req, GFP_KERNEL);
	if (ret)
		goto out_req;

	pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL);
	pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL);
	if (IS_ERR(pages)) {
	if (IS_ERR(pages)) {
		ret = PTR_ERR(pages);
		ret = PTR_ERR(pages);
@@ -3798,6 +3798,10 @@ static int rbd_obj_read_sync(struct rbd_device *rbd_dev,
	osd_req_op_extent_osd_data_pages(req, 0, pages, buf_len, 0, false,
	osd_req_op_extent_osd_data_pages(req, 0, pages, buf_len, 0, false,
					 true);
					 true);


	ret = ceph_osdc_alloc_messages(req, GFP_KERNEL);
	if (ret)
		goto out_req;

	ceph_osdc_start_request(osdc, req, false);
	ceph_osdc_start_request(osdc, req, false);
	ret = ceph_osdc_wait_request(osdc, req);
	ret = ceph_osdc_wait_request(osdc, req);
	if (ret >= 0)
	if (ret >= 0)
@@ -6067,7 +6071,7 @@ static ssize_t rbd_remove_single_major(struct bus_type *bus,
 * create control files in sysfs
 * create control files in sysfs
 * /sys/bus/rbd/...
 * /sys/bus/rbd/...
 */
 */
static int rbd_sysfs_init(void)
static int __init rbd_sysfs_init(void)
{
{
	int ret;
	int ret;


@@ -6082,13 +6086,13 @@ static int rbd_sysfs_init(void)
	return ret;
	return ret;
}
}


static void rbd_sysfs_cleanup(void)
static void __exit rbd_sysfs_cleanup(void)
{
{
	bus_unregister(&rbd_bus_type);
	bus_unregister(&rbd_bus_type);
	device_unregister(&rbd_root_dev);
	device_unregister(&rbd_root_dev);
}
}


static int rbd_slab_init(void)
static int __init rbd_slab_init(void)
{
{
	rbd_assert(!rbd_img_request_cache);
	rbd_assert(!rbd_img_request_cache);
	rbd_img_request_cache = KMEM_CACHE(rbd_img_request, 0);
	rbd_img_request_cache = KMEM_CACHE(rbd_img_request, 0);
+6 −7
Original line number Original line Diff line number Diff line
@@ -104,6 +104,11 @@ int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type)
	struct timespec64 old_ctime = inode->i_ctime;
	struct timespec64 old_ctime = inode->i_ctime;
	umode_t new_mode = inode->i_mode, old_mode = inode->i_mode;
	umode_t new_mode = inode->i_mode, old_mode = inode->i_mode;


	if (ceph_snap(inode) != CEPH_NOSNAP) {
		ret = -EROFS;
		goto out;
	}

	switch (type) {
	switch (type) {
	case ACL_TYPE_ACCESS:
	case ACL_TYPE_ACCESS:
		name = XATTR_NAME_POSIX_ACL_ACCESS;
		name = XATTR_NAME_POSIX_ACL_ACCESS;
@@ -138,11 +143,6 @@ int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type)
			goto out_free;
			goto out_free;
	}
	}


	if (ceph_snap(inode) != CEPH_NOSNAP) {
		ret = -EROFS;
		goto out_free;
	}

	if (new_mode != old_mode) {
	if (new_mode != old_mode) {
		newattrs.ia_ctime = current_time(inode);
		newattrs.ia_ctime = current_time(inode);
		newattrs.ia_mode = new_mode;
		newattrs.ia_mode = new_mode;
@@ -206,10 +206,9 @@ int ceph_pre_init_acls(struct inode *dir, umode_t *mode,
	tmp_buf = kmalloc(max(val_size1, val_size2), GFP_KERNEL);
	tmp_buf = kmalloc(max(val_size1, val_size2), GFP_KERNEL);
	if (!tmp_buf)
	if (!tmp_buf)
		goto out_err;
		goto out_err;
	pagelist = kmalloc(sizeof(struct ceph_pagelist), GFP_KERNEL);
	pagelist = ceph_pagelist_alloc(GFP_KERNEL);
	if (!pagelist)
	if (!pagelist)
		goto out_err;
		goto out_err;
	ceph_pagelist_init(pagelist);


	err = ceph_pagelist_reserve(pagelist, PAGE_SIZE);
	err = ceph_pagelist_reserve(pagelist, PAGE_SIZE);
	if (err)
	if (err)
+1 −1
Original line number Original line Diff line number Diff line
@@ -322,7 +322,7 @@ static int start_read(struct inode *inode, struct ceph_rw_context *rw_ctx,
		/* caller of readpages does not hold buffer and read caps
		/* caller of readpages does not hold buffer and read caps
		 * (fadvise, madvise and readahead cases) */
		 * (fadvise, madvise and readahead cases) */
		int want = CEPH_CAP_FILE_CACHE;
		int want = CEPH_CAP_FILE_CACHE;
		ret = ceph_try_get_caps(ci, CEPH_CAP_FILE_RD, want, &got);
		ret = ceph_try_get_caps(ci, CEPH_CAP_FILE_RD, want, true, &got);
		if (ret < 0) {
		if (ret < 0) {
			dout("start_read %p, error getting cap\n", inode);
			dout("start_read %p, error getting cap\n", inode);
		} else if (!(got & want)) {
		} else if (!(got & want)) {
+12 −9
Original line number Original line Diff line number Diff line
@@ -519,9 +519,9 @@ static void __cap_set_timeouts(struct ceph_mds_client *mdsc,
 *    -> we take mdsc->cap_delay_lock
 *    -> we take mdsc->cap_delay_lock
 */
 */
static void __cap_delay_requeue(struct ceph_mds_client *mdsc,
static void __cap_delay_requeue(struct ceph_mds_client *mdsc,
				struct ceph_inode_info *ci)
				struct ceph_inode_info *ci,
				bool set_timeout)
{
{
	__cap_set_timeouts(mdsc, ci);
	dout("__cap_delay_requeue %p flags %d at %lu\n", &ci->vfs_inode,
	dout("__cap_delay_requeue %p flags %d at %lu\n", &ci->vfs_inode,
	     ci->i_ceph_flags, ci->i_hold_caps_max);
	     ci->i_ceph_flags, ci->i_hold_caps_max);
	if (!mdsc->stopping) {
	if (!mdsc->stopping) {
@@ -531,6 +531,8 @@ static void __cap_delay_requeue(struct ceph_mds_client *mdsc,
				goto no_change;
				goto no_change;
			list_del_init(&ci->i_cap_delay_list);
			list_del_init(&ci->i_cap_delay_list);
		}
		}
		if (set_timeout)
			__cap_set_timeouts(mdsc, ci);
		list_add_tail(&ci->i_cap_delay_list, &mdsc->cap_delay_list);
		list_add_tail(&ci->i_cap_delay_list, &mdsc->cap_delay_list);
no_change:
no_change:
		spin_unlock(&mdsc->cap_delay_lock);
		spin_unlock(&mdsc->cap_delay_lock);
@@ -720,7 +722,7 @@ void ceph_add_cap(struct inode *inode,
		dout(" issued %s, mds wanted %s, actual %s, queueing\n",
		dout(" issued %s, mds wanted %s, actual %s, queueing\n",
		     ceph_cap_string(issued), ceph_cap_string(wanted),
		     ceph_cap_string(issued), ceph_cap_string(wanted),
		     ceph_cap_string(actual_wanted));
		     ceph_cap_string(actual_wanted));
		__cap_delay_requeue(mdsc, ci);
		__cap_delay_requeue(mdsc, ci, true);
	}
	}


	if (flags & CEPH_CAP_FLAG_AUTH) {
	if (flags & CEPH_CAP_FLAG_AUTH) {
@@ -1647,7 +1649,7 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask,
	if (((was | ci->i_flushing_caps) & CEPH_CAP_FILE_BUFFER) &&
	if (((was | ci->i_flushing_caps) & CEPH_CAP_FILE_BUFFER) &&
	    (mask & CEPH_CAP_FILE_BUFFER))
	    (mask & CEPH_CAP_FILE_BUFFER))
		dirty |= I_DIRTY_DATASYNC;
		dirty |= I_DIRTY_DATASYNC;
	__cap_delay_requeue(mdsc, ci);
	__cap_delay_requeue(mdsc, ci, true);
	return dirty;
	return dirty;
}
}


@@ -2065,7 +2067,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,


	/* Reschedule delayed caps release if we delayed anything */
	/* Reschedule delayed caps release if we delayed anything */
	if (delayed)
	if (delayed)
		__cap_delay_requeue(mdsc, ci);
		__cap_delay_requeue(mdsc, ci, false);


	spin_unlock(&ci->i_ceph_lock);
	spin_unlock(&ci->i_ceph_lock);


@@ -2125,7 +2127,7 @@ static int try_flush_caps(struct inode *inode, u64 *ptid)


		if (delayed) {
		if (delayed) {
			spin_lock(&ci->i_ceph_lock);
			spin_lock(&ci->i_ceph_lock);
			__cap_delay_requeue(mdsc, ci);
			__cap_delay_requeue(mdsc, ci, true);
			spin_unlock(&ci->i_ceph_lock);
			spin_unlock(&ci->i_ceph_lock);
		}
		}
	} else {
	} else {
@@ -2671,17 +2673,18 @@ static void check_max_size(struct inode *inode, loff_t endoff)
		ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
		ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
}
}


int ceph_try_get_caps(struct ceph_inode_info *ci, int need, int want, int *got)
int ceph_try_get_caps(struct ceph_inode_info *ci, int need, int want,
		      bool nonblock, int *got)
{
{
	int ret, err = 0;
	int ret, err = 0;


	BUG_ON(need & ~CEPH_CAP_FILE_RD);
	BUG_ON(need & ~CEPH_CAP_FILE_RD);
	BUG_ON(want & ~(CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO));
	BUG_ON(want & ~(CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO|CEPH_CAP_FILE_SHARED));
	ret = ceph_pool_perm_check(ci, need);
	ret = ceph_pool_perm_check(ci, need);
	if (ret < 0)
	if (ret < 0)
		return ret;
		return ret;


	ret = try_get_cap_refs(ci, need, want, 0, true, got, &err);
	ret = try_get_cap_refs(ci, need, want, 0, nonblock, got, &err);
	if (ret) {
	if (ret) {
		if (err == -EAGAIN) {
		if (err == -EAGAIN) {
			ret = 0;
			ret = 0;
Loading