Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 0a78ac4b authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'ceph-for-4.19-rc1' of git://github.com/ceph/ceph-client

Pull ceph updates from Ilya Dryomov:
 "The main things are support for cephx v2 authentication protocol and
  basic support for rbd images within namespaces (myself).

  Also included are y2038 conversion patches from Arnd, a pile of
  miscellaneous fixes from Chengguang and Zheng's feature bit
  infrastructure for the filesystem"

* tag 'ceph-for-4.19-rc1' of git://github.com/ceph/ceph-client: (40 commits)
  ceph: don't drop message if it contains more data than expected
  ceph: support cephfs' own feature bits
  crush: fix using plain integer as NULL warning
  libceph: remove unnecessary non NULL check for request_key
  ceph: refactor error handling code in ceph_reserve_caps()
  ceph: refactor ceph_unreserve_caps()
  ceph: change to void return type for __do_request()
  ceph: compare fsc->max_file_size and inode->i_size for max file size limit
  ceph: add additional size check in ceph_setattr()
  ceph: add additional offset check in ceph_write_iter()
  ceph: add additional range check in ceph_fallocate()
  ceph: add new field max_file_size in ceph_fs_client
  libceph: weaken sizeof check in ceph_x_verify_authorizer_reply()
  libceph: check authorizer reply/challenge length before reading
  libceph: implement CEPHX_V2 calculation mode
  libceph: add authorizer challenge
  libceph: factor out encrypt_authorizer()
  libceph: factor out __ceph_x_decrypt()
  libceph: factor out __prepare_write_connect()
  libceph: store ceph_auth_handshake pointer in ceph_connection
  ...
parents bfebeb16 0fcf6c02
Loading
Loading
Loading
Loading
+88 −37
Original line number Diff line number Diff line
@@ -181,6 +181,7 @@ struct rbd_image_header {
struct rbd_spec {
	u64		pool_id;
	const char	*pool_name;
	const char	*pool_ns;	/* NULL if default, never "" */

	const char	*image_id;
	const char	*image_name;
@@ -735,6 +736,7 @@ enum {
	Opt_lock_timeout,
	Opt_last_int,
	/* int args above */
	Opt_pool_ns,
	Opt_last_string,
	/* string args above */
	Opt_read_only,
@@ -749,6 +751,7 @@ static match_table_t rbd_opts_tokens = {
	{Opt_queue_depth, "queue_depth=%d"},
	{Opt_lock_timeout, "lock_timeout=%d"},
	/* int args above */
	{Opt_pool_ns, "_pool_ns=%s"},
	/* string args above */
	{Opt_read_only, "read_only"},
	{Opt_read_only, "ro"},		/* Alternate spelling */
@@ -776,9 +779,14 @@ struct rbd_options {
#define RBD_EXCLUSIVE_DEFAULT	false
#define RBD_TRIM_DEFAULT	true

struct parse_rbd_opts_ctx {
	struct rbd_spec		*spec;
	struct rbd_options	*opts;
};

static int parse_rbd_opts_token(char *c, void *private)
{
	struct rbd_options *rbd_opts = private;
	struct parse_rbd_opts_ctx *pctx = private;
	substring_t argstr[MAX_OPT_ARGS];
	int token, intval, ret;

@@ -786,7 +794,7 @@ static int parse_rbd_opts_token(char *c, void *private)
	if (token < Opt_last_int) {
		ret = match_int(&argstr[0], &intval);
		if (ret < 0) {
			pr_err("bad mount option arg (not int) at '%s'\n", c);
			pr_err("bad option arg (not int) at '%s'\n", c);
			return ret;
		}
		dout("got int token %d val %d\n", token, intval);
@@ -802,7 +810,7 @@ static int parse_rbd_opts_token(char *c, void *private)
			pr_err("queue_depth out of range\n");
			return -EINVAL;
		}
		rbd_opts->queue_depth = intval;
		pctx->opts->queue_depth = intval;
		break;
	case Opt_lock_timeout:
		/* 0 is "wait forever" (i.e. infinite timeout) */
@@ -810,22 +818,28 @@ static int parse_rbd_opts_token(char *c, void *private)
			pr_err("lock_timeout out of range\n");
			return -EINVAL;
		}
		rbd_opts->lock_timeout = msecs_to_jiffies(intval * 1000);
		pctx->opts->lock_timeout = msecs_to_jiffies(intval * 1000);
		break;
	case Opt_pool_ns:
		kfree(pctx->spec->pool_ns);
		pctx->spec->pool_ns = match_strdup(argstr);
		if (!pctx->spec->pool_ns)
			return -ENOMEM;
		break;
	case Opt_read_only:
		rbd_opts->read_only = true;
		pctx->opts->read_only = true;
		break;
	case Opt_read_write:
		rbd_opts->read_only = false;
		pctx->opts->read_only = false;
		break;
	case Opt_lock_on_read:
		rbd_opts->lock_on_read = true;
		pctx->opts->lock_on_read = true;
		break;
	case Opt_exclusive:
		rbd_opts->exclusive = true;
		pctx->opts->exclusive = true;
		break;
	case Opt_notrim:
		rbd_opts->trim = false;
		pctx->opts->trim = false;
		break;
	default:
		/* libceph prints "bad option" msg */
@@ -1452,7 +1466,7 @@ static void rbd_osd_req_format_write(struct rbd_obj_request *obj_request)
	struct ceph_osd_request *osd_req = obj_request->osd_req;

	osd_req->r_flags = CEPH_OSD_FLAG_WRITE;
	ktime_get_real_ts(&osd_req->r_mtime);
	ktime_get_real_ts64(&osd_req->r_mtime);
	osd_req->r_data_offset = obj_request->ex.oe_off;
}

@@ -1475,7 +1489,13 @@ rbd_osd_req_create(struct rbd_obj_request *obj_req, unsigned int num_ops)
	req->r_callback = rbd_osd_req_callback;
	req->r_priv = obj_req;

	/*
	 * Data objects may be stored in a separate pool, but always in
	 * the same namespace in that pool as the header in its pool.
	 */
	ceph_oloc_copy(&req->r_base_oloc, &rbd_dev->header_oloc);
	req->r_base_oloc.pool = rbd_dev->layout.pool_id;

	if (ceph_oid_aprintf(&req->r_base_oid, GFP_NOIO, name_format,
			rbd_dev->header.object_prefix, obj_req->ex.oe_objno))
		goto err_req;
@@ -4119,6 +4139,14 @@ static ssize_t rbd_pool_id_show(struct device *dev,
			(unsigned long long) rbd_dev->spec->pool_id);
}

static ssize_t rbd_pool_ns_show(struct device *dev,
				struct device_attribute *attr, char *buf)
{
	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);

	return sprintf(buf, "%s\n", rbd_dev->spec->pool_ns ?: "");
}

static ssize_t rbd_name_show(struct device *dev,
			     struct device_attribute *attr, char *buf)
{
@@ -4217,6 +4245,7 @@ static DEVICE_ATTR(cluster_fsid, 0444, rbd_cluster_fsid_show, NULL);
static DEVICE_ATTR(config_info, 0400, rbd_config_info_show, NULL);
static DEVICE_ATTR(pool, 0444, rbd_pool_show, NULL);
static DEVICE_ATTR(pool_id, 0444, rbd_pool_id_show, NULL);
static DEVICE_ATTR(pool_ns, 0444, rbd_pool_ns_show, NULL);
static DEVICE_ATTR(name, 0444, rbd_name_show, NULL);
static DEVICE_ATTR(image_id, 0444, rbd_image_id_show, NULL);
static DEVICE_ATTR(refresh, 0200, NULL, rbd_image_refresh);
@@ -4235,6 +4264,7 @@ static struct attribute *rbd_attrs[] = {
	&dev_attr_config_info.attr,
	&dev_attr_pool.attr,
	&dev_attr_pool_id.attr,
	&dev_attr_pool_ns.attr,
	&dev_attr_name.attr,
	&dev_attr_image_id.attr,
	&dev_attr_current_snap.attr,
@@ -4295,6 +4325,7 @@ static void rbd_spec_free(struct kref *kref)
	struct rbd_spec *spec = container_of(kref, struct rbd_spec, kref);

	kfree(spec->pool_name);
	kfree(spec->pool_ns);
	kfree(spec->image_id);
	kfree(spec->image_name);
	kfree(spec->snap_name);
@@ -4353,6 +4384,12 @@ static struct rbd_device *__rbd_dev_create(struct rbd_client *rbdc,
	rbd_dev->header.data_pool_id = CEPH_NOPOOL;
	ceph_oid_init(&rbd_dev->header_oid);
	rbd_dev->header_oloc.pool = spec->pool_id;
	if (spec->pool_ns) {
		WARN_ON(!*spec->pool_ns);
		rbd_dev->header_oloc.pool_ns =
		    ceph_find_or_create_string(spec->pool_ns,
					       strlen(spec->pool_ns));
	}

	mutex_init(&rbd_dev->watch_mutex);
	rbd_dev->watch_state = RBD_WATCH_STATE_UNREGISTERED;
@@ -4633,6 +4670,17 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
		parent_spec->pool_id = pool_id;
		parent_spec->image_id = image_id;
		parent_spec->snap_id = snap_id;

		/* TODO: support cloning across namespaces */
		if (rbd_dev->spec->pool_ns) {
			parent_spec->pool_ns = kstrdup(rbd_dev->spec->pool_ns,
						       GFP_KERNEL);
			if (!parent_spec->pool_ns) {
				ret = -ENOMEM;
				goto out_err;
			}
		}

		rbd_dev->parent_spec = parent_spec;
		parent_spec = NULL;	/* rbd_dev now owns this */
	} else {
@@ -5146,8 +5194,7 @@ static int rbd_add_parse_args(const char *buf,
	const char *mon_addrs;
	char *snap_name;
	size_t mon_addrs_size;
	struct rbd_spec *spec = NULL;
	struct rbd_options *rbd_opts = NULL;
	struct parse_rbd_opts_ctx pctx = { 0 };
	struct ceph_options *copts;
	int ret;

@@ -5171,22 +5218,22 @@ static int rbd_add_parse_args(const char *buf,
		goto out_err;
	}

	spec = rbd_spec_alloc();
	if (!spec)
	pctx.spec = rbd_spec_alloc();
	if (!pctx.spec)
		goto out_mem;

	spec->pool_name = dup_token(&buf, NULL);
	if (!spec->pool_name)
	pctx.spec->pool_name = dup_token(&buf, NULL);
	if (!pctx.spec->pool_name)
		goto out_mem;
	if (!*spec->pool_name) {
	if (!*pctx.spec->pool_name) {
		rbd_warn(NULL, "no pool name provided");
		goto out_err;
	}

	spec->image_name = dup_token(&buf, NULL);
	if (!spec->image_name)
	pctx.spec->image_name = dup_token(&buf, NULL);
	if (!pctx.spec->image_name)
		goto out_mem;
	if (!*spec->image_name) {
	if (!*pctx.spec->image_name) {
		rbd_warn(NULL, "no image name provided");
		goto out_err;
	}
@@ -5207,24 +5254,24 @@ static int rbd_add_parse_args(const char *buf,
	if (!snap_name)
		goto out_mem;
	*(snap_name + len) = '\0';
	spec->snap_name = snap_name;
	pctx.spec->snap_name = snap_name;

	/* Initialize all rbd options to the defaults */

	rbd_opts = kzalloc(sizeof (*rbd_opts), GFP_KERNEL);
	if (!rbd_opts)
	pctx.opts = kzalloc(sizeof(*pctx.opts), GFP_KERNEL);
	if (!pctx.opts)
		goto out_mem;

	rbd_opts->read_only = RBD_READ_ONLY_DEFAULT;
	rbd_opts->queue_depth = RBD_QUEUE_DEPTH_DEFAULT;
	rbd_opts->lock_timeout = RBD_LOCK_TIMEOUT_DEFAULT;
	rbd_opts->lock_on_read = RBD_LOCK_ON_READ_DEFAULT;
	rbd_opts->exclusive = RBD_EXCLUSIVE_DEFAULT;
	rbd_opts->trim = RBD_TRIM_DEFAULT;
	pctx.opts->read_only = RBD_READ_ONLY_DEFAULT;
	pctx.opts->queue_depth = RBD_QUEUE_DEPTH_DEFAULT;
	pctx.opts->lock_timeout = RBD_LOCK_TIMEOUT_DEFAULT;
	pctx.opts->lock_on_read = RBD_LOCK_ON_READ_DEFAULT;
	pctx.opts->exclusive = RBD_EXCLUSIVE_DEFAULT;
	pctx.opts->trim = RBD_TRIM_DEFAULT;

	copts = ceph_parse_options(options, mon_addrs,
				   mon_addrs + mon_addrs_size - 1,
					parse_rbd_opts_token, rbd_opts);
				   parse_rbd_opts_token, &pctx);
	if (IS_ERR(copts)) {
		ret = PTR_ERR(copts);
		goto out_err;
@@ -5232,15 +5279,15 @@ static int rbd_add_parse_args(const char *buf,
	kfree(options);

	*ceph_opts = copts;
	*opts = rbd_opts;
	*rbd_spec = spec;
	*opts = pctx.opts;
	*rbd_spec = pctx.spec;

	return 0;
out_mem:
	ret = -ENOMEM;
out_err:
	kfree(rbd_opts);
	rbd_spec_put(spec);
	kfree(pctx.opts);
	rbd_spec_put(pctx.spec);
	kfree(options);

	return ret;
@@ -5586,8 +5633,10 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth)
		ret = rbd_register_watch(rbd_dev);
		if (ret) {
			if (ret == -ENOENT)
				pr_info("image %s/%s does not exist\n",
				pr_info("image %s/%s%s%s does not exist\n",
					rbd_dev->spec->pool_name,
					rbd_dev->spec->pool_ns ?: "",
					rbd_dev->spec->pool_ns ? "/" : "",
					rbd_dev->spec->image_name);
			goto err_out_format;
		}
@@ -5609,8 +5658,10 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth)
		ret = rbd_spec_fill_names(rbd_dev);
	if (ret) {
		if (ret == -ENOENT)
			pr_info("snap %s/%s@%s does not exist\n",
			pr_info("snap %s/%s%s%s@%s does not exist\n",
				rbd_dev->spec->pool_name,
				rbd_dev->spec->pool_ns ?: "",
				rbd_dev->spec->pool_ns ? "/" : "",
				rbd_dev->spec->image_name,
				rbd_dev->spec->snap_name);
		goto err_out_probe;
+22 −8
Original line number Diff line number Diff line
@@ -45,6 +45,7 @@ static inline void ceph_set_cached_acl(struct inode *inode,
struct posix_acl *ceph_get_acl(struct inode *inode, int type)
{
	int size;
	unsigned int retry_cnt = 0;
	const char *name;
	char *value = NULL;
	struct posix_acl *acl;
@@ -60,6 +61,7 @@ struct posix_acl *ceph_get_acl(struct inode *inode, int type)
		BUG();
	}

retry:
	size = __ceph_getxattr(inode, name, "", 0);
	if (size > 0) {
		value = kzalloc(size, GFP_NOFS);
@@ -68,12 +70,22 @@ struct posix_acl *ceph_get_acl(struct inode *inode, int type)
		size = __ceph_getxattr(inode, name, value, size);
	}

	if (size > 0)
	if (size == -ERANGE && retry_cnt < 10) {
		retry_cnt++;
		kfree(value);
		value = NULL;
		goto retry;
	}

	if (size > 0) {
		acl = posix_acl_from_xattr(&init_user_ns, value, size);
	else if (size == -ERANGE || size == -ENODATA || size == 0)
	} else if (size == -ENODATA || size == 0) {
		acl = NULL;
	else
	} else {
		pr_err_ratelimited("get acl %llx.%llx failed, err=%d\n",
				   ceph_vinop(inode), size);
		acl = ERR_PTR(-EIO);
	}

	kfree(value);

@@ -89,6 +101,7 @@ int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type)
	const char *name = NULL;
	char *value = NULL;
	struct iattr newattrs;
	struct timespec64 old_ctime = inode->i_ctime;
	umode_t new_mode = inode->i_mode, old_mode = inode->i_mode;

	switch (type) {
@@ -133,7 +146,7 @@ int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type)
	if (new_mode != old_mode) {
		newattrs.ia_ctime = current_time(inode);
		newattrs.ia_mode = new_mode;
		newattrs.ia_valid = ATTR_MODE;
		newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
		ret = __ceph_setattr(inode, &newattrs);
		if (ret)
			goto out_free;
@@ -142,8 +155,9 @@ int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type)
	ret = __ceph_setxattr(inode, name, value, size, 0);
	if (ret) {
		if (new_mode != old_mode) {
			newattrs.ia_ctime = old_ctime;
			newattrs.ia_mode = old_mode;
			newattrs.ia_valid = ATTR_MODE;
			newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
			__ceph_setattr(inode, &newattrs);
		}
		goto out_free;
@@ -171,10 +185,10 @@ int ceph_pre_init_acls(struct inode *dir, umode_t *mode,
		return err;

	if (acl) {
		int ret = posix_acl_equiv_mode(acl, mode);
		if (ret < 0)
		err = posix_acl_equiv_mode(acl, mode);
		if (err < 0)
			goto out_err;
		if (ret == 0) {
		if (err == 0) {
			posix_acl_release(acl);
			acl = NULL;
		}
+37 −37
Original line number Diff line number Diff line
@@ -574,7 +574,6 @@ static u64 get_writepages_data_length(struct inode *inode,
 */
static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
{
	struct timespec ts;
	struct inode *inode;
	struct ceph_inode_info *ci;
	struct ceph_fs_client *fsc;
@@ -625,12 +624,11 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
		set_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC);

	set_page_writeback(page);
	ts = timespec64_to_timespec(inode->i_mtime);
	err = ceph_osdc_writepages(&fsc->client->osdc, ceph_vino(inode),
				   &ci->i_layout, snapc, page_off, len,
				   ceph_wbc.truncate_seq,
				   ceph_wbc.truncate_size,
				   &ts, &page, 1);
				   &inode->i_mtime, &page, 1);
	if (err < 0) {
		struct writeback_control tmp_wbc;
		if (!wbc)
@@ -1134,7 +1132,7 @@ static int ceph_writepages_start(struct address_space *mapping,
			pages = NULL;
		}

		req->r_mtime = timespec64_to_timespec(inode->i_mtime);
		req->r_mtime = inode->i_mtime;
		rc = ceph_osdc_start_request(&fsc->client->osdc, req, true);
		BUG_ON(rc);
		req = NULL;
@@ -1431,7 +1429,7 @@ static void ceph_restore_sigs(sigset_t *oldset)
/*
 * vm ops
 */
static int ceph_filemap_fault(struct vm_fault *vmf)
static vm_fault_t ceph_filemap_fault(struct vm_fault *vmf)
{
	struct vm_area_struct *vma = vmf->vma;
	struct inode *inode = file_inode(vma->vm_file);
@@ -1439,8 +1437,9 @@ static int ceph_filemap_fault(struct vm_fault *vmf)
	struct ceph_file_info *fi = vma->vm_file->private_data;
	struct page *pinned_page = NULL;
	loff_t off = vmf->pgoff << PAGE_SHIFT;
	int want, got, ret;
	int want, got, err;
	sigset_t oldset;
	vm_fault_t ret = VM_FAULT_SIGBUS;

	ceph_block_sigs(&oldset);

@@ -1452,8 +1451,8 @@ static int ceph_filemap_fault(struct vm_fault *vmf)
		want = CEPH_CAP_FILE_CACHE;

	got = 0;
	ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, -1, &got, &pinned_page);
	if (ret < 0)
	err = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, -1, &got, &pinned_page);
	if (err < 0)
		goto out_restore;

	dout("filemap_fault %p %llu~%zd got cap refs on %s\n",
@@ -1465,16 +1464,17 @@ static int ceph_filemap_fault(struct vm_fault *vmf)
		ceph_add_rw_context(fi, &rw_ctx);
		ret = filemap_fault(vmf);
		ceph_del_rw_context(fi, &rw_ctx);
		dout("filemap_fault %p %llu~%zd drop cap refs %s ret %x\n",
			inode, off, (size_t)PAGE_SIZE,
				ceph_cap_string(got), ret);
	} else
		ret = -EAGAIN;
		err = -EAGAIN;

	dout("filemap_fault %p %llu~%zd dropping cap refs on %s ret %d\n",
	     inode, off, (size_t)PAGE_SIZE, ceph_cap_string(got), ret);
	if (pinned_page)
		put_page(pinned_page);
	ceph_put_cap_refs(ci, got);

	if (ret != -EAGAIN)
	if (err != -EAGAIN)
		goto out_restore;

	/* read inline data */
@@ -1482,7 +1482,6 @@ static int ceph_filemap_fault(struct vm_fault *vmf)
		/* does not support inline data > PAGE_SIZE */
		ret = VM_FAULT_SIGBUS;
	} else {
		int ret1;
		struct address_space *mapping = inode->i_mapping;
		struct page *page = find_or_create_page(mapping, 0,
						mapping_gfp_constraint(mapping,
@@ -1491,32 +1490,32 @@ static int ceph_filemap_fault(struct vm_fault *vmf)
			ret = VM_FAULT_OOM;
			goto out_inline;
		}
		ret1 = __ceph_do_getattr(inode, page,
		err = __ceph_do_getattr(inode, page,
					 CEPH_STAT_CAP_INLINE_DATA, true);
		if (ret1 < 0 || off >= i_size_read(inode)) {
		if (err < 0 || off >= i_size_read(inode)) {
			unlock_page(page);
			put_page(page);
			if (ret1 < 0)
				ret = ret1;
			if (err == -ENOMEM)
				ret = VM_FAULT_OOM;
			else
				ret = VM_FAULT_SIGBUS;
			goto out_inline;
		}
		if (ret1 < PAGE_SIZE)
			zero_user_segment(page, ret1, PAGE_SIZE);
		if (err < PAGE_SIZE)
			zero_user_segment(page, err, PAGE_SIZE);
		else
			flush_dcache_page(page);
		SetPageUptodate(page);
		vmf->page = page;
		ret = VM_FAULT_MAJOR | VM_FAULT_LOCKED;
out_inline:
		dout("filemap_fault %p %llu~%zd read inline data ret %d\n",
		dout("filemap_fault %p %llu~%zd read inline data ret %x\n",
		     inode, off, (size_t)PAGE_SIZE, ret);
	}
out_restore:
	ceph_restore_sigs(&oldset);
	if (ret < 0)
		ret = (ret == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS;
	if (err < 0)
		ret = vmf_error(err);

	return ret;
}
@@ -1524,7 +1523,7 @@ static int ceph_filemap_fault(struct vm_fault *vmf)
/*
 * Reuse write_begin here for simplicity.
 */
static int ceph_page_mkwrite(struct vm_fault *vmf)
static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf)
{
	struct vm_area_struct *vma = vmf->vma;
	struct inode *inode = file_inode(vma->vm_file);
@@ -1535,8 +1534,9 @@ static int ceph_page_mkwrite(struct vm_fault *vmf)
	loff_t off = page_offset(page);
	loff_t size = i_size_read(inode);
	size_t len;
	int want, got, ret;
	int want, got, err;
	sigset_t oldset;
	vm_fault_t ret = VM_FAULT_SIGBUS;

	prealloc_cf = ceph_alloc_cap_flush();
	if (!prealloc_cf)
@@ -1550,10 +1550,10 @@ static int ceph_page_mkwrite(struct vm_fault *vmf)
			lock_page(page);
			locked_page = page;
		}
		ret = ceph_uninline_data(vma->vm_file, locked_page);
		err = ceph_uninline_data(vma->vm_file, locked_page);
		if (locked_page)
			unlock_page(locked_page);
		if (ret < 0)
		if (err < 0)
			goto out_free;
	}

@@ -1570,9 +1570,9 @@ static int ceph_page_mkwrite(struct vm_fault *vmf)
		want = CEPH_CAP_FILE_BUFFER;

	got = 0;
	ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, off + len,
	err = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, off + len,
			    &got, NULL);
	if (ret < 0)
	if (err < 0)
		goto out_free;

	dout("page_mkwrite %p %llu~%zd got cap refs on %s\n",
@@ -1590,13 +1590,13 @@ static int ceph_page_mkwrite(struct vm_fault *vmf)
			break;
		}

		ret = ceph_update_writeable_page(vma->vm_file, off, len, page);
		if (ret >= 0) {
		err = ceph_update_writeable_page(vma->vm_file, off, len, page);
		if (err >= 0) {
			/* success.  we'll keep the page locked. */
			set_page_dirty(page);
			ret = VM_FAULT_LOCKED;
		}
	} while (ret == -EAGAIN);
	} while (err == -EAGAIN);

	if (ret == VM_FAULT_LOCKED ||
	    ci->i_inline_version != CEPH_INLINE_NONE) {
@@ -1610,14 +1610,14 @@ static int ceph_page_mkwrite(struct vm_fault *vmf)
			__mark_inode_dirty(inode, dirty);
	}

	dout("page_mkwrite %p %llu~%zd dropping cap refs on %s ret %d\n",
	dout("page_mkwrite %p %llu~%zd dropping cap refs on %s ret %x\n",
	     inode, off, len, ceph_cap_string(got), ret);
	ceph_put_cap_refs(ci, got);
out_free:
	ceph_restore_sigs(&oldset);
	ceph_free_cap_flush(prealloc_cf);
	if (ret < 0)
		ret = (ret == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS;
	if (err < 0)
		ret = vmf_error(err);
	return ret;
}

@@ -1734,7 +1734,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
		goto out;
	}

	req->r_mtime = timespec64_to_timespec(inode->i_mtime);
	req->r_mtime = inode->i_mtime;
	err = ceph_osdc_start_request(&fsc->client->osdc, req, false);
	if (!err)
		err = ceph_osdc_wait_request(&fsc->client->osdc, req);
@@ -1776,7 +1776,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
			goto out_put;
	}

	req->r_mtime = timespec64_to_timespec(inode->i_mtime);
	req->r_mtime = inode->i_mtime;
	err = ceph_osdc_start_request(&fsc->client->osdc, req, false);
	if (!err)
		err = ceph_osdc_wait_request(&fsc->client->osdc, req);
@@ -1937,7 +1937,7 @@ static int __ceph_pool_perm_get(struct ceph_inode_info *ci,
				     0, false, true);
	err = ceph_osdc_start_request(&fsc->client->osdc, rd_req, false);

	wr_req->r_mtime = timespec64_to_timespec(ci->vfs_inode.i_mtime);
	wr_req->r_mtime = ci->vfs_inode.i_mtime;
	err2 = ceph_osdc_start_request(&fsc->client->osdc, wr_req, false);

	if (!err)
+7 −4
Original line number Diff line number Diff line
@@ -26,7 +26,8 @@

struct ceph_aux_inode {
	u64 	version;
	struct timespec	mtime;
	u64	mtime_sec;
	u64	mtime_nsec;
};

struct fscache_netfs ceph_cache_netfs = {
@@ -130,7 +131,8 @@ static enum fscache_checkaux ceph_fscache_inode_check_aux(

	memset(&aux, 0, sizeof(aux));
	aux.version = ci->i_version;
	aux.mtime = timespec64_to_timespec(inode->i_mtime);
	aux.mtime_sec = inode->i_mtime.tv_sec;
	aux.mtime_nsec = inode->i_mtime.tv_nsec;

	if (memcmp(data, &aux, sizeof(aux)) != 0)
		return FSCACHE_CHECKAUX_OBSOLETE;
@@ -163,7 +165,8 @@ void ceph_fscache_register_inode_cookie(struct inode *inode)
	if (!ci->fscache) {
		memset(&aux, 0, sizeof(aux));
		aux.version = ci->i_version;
		aux.mtime = timespec64_to_timespec(inode->i_mtime);
		aux.mtime_sec = inode->i_mtime.tv_sec;
		aux.mtime_nsec = inode->i_mtime.tv_nsec;
		ci->fscache = fscache_acquire_cookie(fsc->fscache,
						     &ceph_fscache_inode_object_def,
						     &ci->i_vino, sizeof(ci->i_vino),
+62 −76
Original line number Diff line number Diff line
@@ -156,6 +156,37 @@ void ceph_adjust_min_caps(struct ceph_mds_client *mdsc, int delta)
	spin_unlock(&mdsc->caps_list_lock);
}

static void __ceph_unreserve_caps(struct ceph_mds_client *mdsc, int nr_caps)
{
	struct ceph_cap *cap;
	int i;

	if (nr_caps) {
		BUG_ON(mdsc->caps_reserve_count < nr_caps);
		mdsc->caps_reserve_count -= nr_caps;
		if (mdsc->caps_avail_count >=
		    mdsc->caps_reserve_count + mdsc->caps_min_count) {
			mdsc->caps_total_count -= nr_caps;
			for (i = 0; i < nr_caps; i++) {
				cap = list_first_entry(&mdsc->caps_list,
					struct ceph_cap, caps_item);
				list_del(&cap->caps_item);
				kmem_cache_free(ceph_cap_cachep, cap);
			}
		} else {
			mdsc->caps_avail_count += nr_caps;
		}

		dout("%s: caps %d = %d used + %d resv + %d avail\n",
		     __func__,
		     mdsc->caps_total_count, mdsc->caps_use_count,
		     mdsc->caps_reserve_count, mdsc->caps_avail_count);
		BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count +
						 mdsc->caps_reserve_count +
						 mdsc->caps_avail_count);
	}
}

/*
 * Called under mdsc->mutex.
 */
@@ -167,6 +198,7 @@ int ceph_reserve_caps(struct ceph_mds_client *mdsc,
	int have;
	int alloc = 0;
	int max_caps;
	int err = 0;
	bool trimmed = false;
	struct ceph_mds_session *s;
	LIST_HEAD(newcaps);
@@ -233,9 +265,14 @@ int ceph_reserve_caps(struct ceph_mds_client *mdsc,

		pr_warn("reserve caps ctx=%p ENOMEM need=%d got=%d\n",
			ctx, need, have + alloc);
		goto out_nomem;
		err = -ENOMEM;
		break;
	}

	if (!err) {
		BUG_ON(have + alloc != need);
		ctx->count = need;
	}

	spin_lock(&mdsc->caps_list_lock);
	mdsc->caps_total_count += alloc;
@@ -245,78 +282,27 @@ int ceph_reserve_caps(struct ceph_mds_client *mdsc,
	BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count +
					 mdsc->caps_reserve_count +
					 mdsc->caps_avail_count);

	if (err)
		__ceph_unreserve_caps(mdsc, have + alloc);

	spin_unlock(&mdsc->caps_list_lock);

	ctx->count = need;
	dout("reserve caps ctx=%p %d = %d used + %d resv + %d avail\n",
	     ctx, mdsc->caps_total_count, mdsc->caps_use_count,
	     mdsc->caps_reserve_count, mdsc->caps_avail_count);
	return 0;

out_nomem:

	spin_lock(&mdsc->caps_list_lock);
	mdsc->caps_avail_count += have;
	mdsc->caps_reserve_count -= have;

	while (!list_empty(&newcaps)) {
		cap = list_first_entry(&newcaps,
				struct ceph_cap, caps_item);
		list_del(&cap->caps_item);

		/* Keep some preallocated caps around (ceph_min_count), to
		 * avoid lots of free/alloc churn. */
		if (mdsc->caps_avail_count >=
		    mdsc->caps_reserve_count + mdsc->caps_min_count) {
			kmem_cache_free(ceph_cap_cachep, cap);
		} else {
			mdsc->caps_avail_count++;
			mdsc->caps_total_count++;
			list_add(&cap->caps_item, &mdsc->caps_list);
		}
	}

	BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count +
					 mdsc->caps_reserve_count +
					 mdsc->caps_avail_count);
	spin_unlock(&mdsc->caps_list_lock);
	return -ENOMEM;
	return err;
}

int ceph_unreserve_caps(struct ceph_mds_client *mdsc,
void ceph_unreserve_caps(struct ceph_mds_client *mdsc,
			struct ceph_cap_reservation *ctx)
{
	int i;
	struct ceph_cap *cap;

	dout("unreserve caps ctx=%p count=%d\n", ctx, ctx->count);
	if (ctx->count) {
	spin_lock(&mdsc->caps_list_lock);
		BUG_ON(mdsc->caps_reserve_count < ctx->count);
		mdsc->caps_reserve_count -= ctx->count;
		if (mdsc->caps_avail_count >=
		    mdsc->caps_reserve_count + mdsc->caps_min_count) {
			mdsc->caps_total_count -= ctx->count;
			for (i = 0; i < ctx->count; i++) {
				cap = list_first_entry(&mdsc->caps_list,
					struct ceph_cap, caps_item);
				list_del(&cap->caps_item);
				kmem_cache_free(ceph_cap_cachep, cap);
			}
		} else {
			mdsc->caps_avail_count += ctx->count;
		}
	__ceph_unreserve_caps(mdsc, ctx->count);
	ctx->count = 0;
		dout("unreserve caps %d = %d used + %d resv + %d avail\n",
		     mdsc->caps_total_count, mdsc->caps_use_count,
		     mdsc->caps_reserve_count, mdsc->caps_avail_count);
		BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count +
						 mdsc->caps_reserve_count +
						 mdsc->caps_avail_count);
	spin_unlock(&mdsc->caps_list_lock);
}
	return 0;
}

struct ceph_cap *ceph_get_cap(struct ceph_mds_client *mdsc,
			      struct ceph_cap_reservation *ctx)
@@ -1125,7 +1111,7 @@ struct cap_msg_args {
	u64			flush_tid, oldest_flush_tid, size, max_size;
	u64			xattr_version;
	struct ceph_buffer	*xattr_buf;
	struct timespec		atime, mtime, ctime;
	struct timespec64	atime, mtime, ctime;
	int			op, caps, wanted, dirty;
	u32			seq, issue_seq, mseq, time_warp_seq;
	u32			flags;
@@ -1146,7 +1132,7 @@ static int send_cap_msg(struct cap_msg_args *arg)
	struct ceph_msg *msg;
	void *p;
	size_t extra_len;
	struct timespec zerotime = {0};
	struct timespec64 zerotime = {0};
	struct ceph_osd_client *osdc = &arg->session->s_mdsc->fsc->client->osdc;

	dout("send_cap_msg %s %llx %llx caps %s wanted %s dirty %s"
@@ -1186,9 +1172,9 @@ static int send_cap_msg(struct cap_msg_args *arg)

	fc->size = cpu_to_le64(arg->size);
	fc->max_size = cpu_to_le64(arg->max_size);
	ceph_encode_timespec(&fc->mtime, &arg->mtime);
	ceph_encode_timespec(&fc->atime, &arg->atime);
	ceph_encode_timespec(&fc->ctime, &arg->ctime);
	ceph_encode_timespec64(&fc->mtime, &arg->mtime);
	ceph_encode_timespec64(&fc->atime, &arg->atime);
	ceph_encode_timespec64(&fc->ctime, &arg->ctime);
	fc->time_warp_seq = cpu_to_le32(arg->time_warp_seq);

	fc->uid = cpu_to_le32(from_kuid(&init_user_ns, arg->uid));
@@ -1237,7 +1223,7 @@ static int send_cap_msg(struct cap_msg_args *arg)
	 * We just zero these out for now, as the MDS ignores them unless
	 * the requisite feature flags are set (which we don't do yet).
	 */
	ceph_encode_timespec(p, &zerotime);
	ceph_encode_timespec64(p, &zerotime);
	p += sizeof(struct ceph_timespec);
	ceph_encode_64(&p, 0);

@@ -1360,9 +1346,9 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
		arg.xattr_buf = NULL;
	}

	arg.mtime = timespec64_to_timespec(inode->i_mtime);
	arg.atime = timespec64_to_timespec(inode->i_atime);
	arg.ctime = timespec64_to_timespec(inode->i_ctime);
	arg.mtime = inode->i_mtime;
	arg.atime = inode->i_atime;
	arg.ctime = inode->i_ctime;

	arg.op = op;
	arg.caps = cap->implemented;
@@ -3148,11 +3134,11 @@ static void handle_cap_grant(struct inode *inode,
	}

	if (newcaps & CEPH_CAP_ANY_RD) {
		struct timespec mtime, atime, ctime;
		struct timespec64 mtime, atime, ctime;
		/* ctime/mtime/atime? */
		ceph_decode_timespec(&mtime, &grant->mtime);
		ceph_decode_timespec(&atime, &grant->atime);
		ceph_decode_timespec(&ctime, &grant->ctime);
		ceph_decode_timespec64(&mtime, &grant->mtime);
		ceph_decode_timespec64(&atime, &grant->atime);
		ceph_decode_timespec64(&ctime, &grant->ctime);
		ceph_fill_file_time(inode, extra_info->issued,
				    le32_to_cpu(grant->time_warp_seq),
				    &ctime, &mtime, &atime);
Loading