Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 72b5ac54 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'ceph-for-4.8-rc1' of git://github.com/ceph/ceph-client

Pull Ceph updates from Ilya Dryomov:
 "The highlights are:

   - RADOS namespace support in libceph and CephFS (Zheng Yan and
     myself).  The stopgaps added in 4.5 to deny access to inodes in
     namespaces are removed and CEPH_FEATURE_FS_FILE_LAYOUT_V2 feature
     bit is now fully supported

   - A large rework of the MDS cap flushing code (Zheng Yan)

   - Handle some of ->d_revalidate() in RCU mode (Jeff Layton).  We were
     overly pessimistic before, bailing at the first sight of LOOKUP_RCU

  On top of that we've got a few CephFS bug fixes, a couple of cleanups
  and Arnd's workaround for a weird genksyms issue"

* tag 'ceph-for-4.8-rc1' of git://github.com/ceph/ceph-client: (34 commits)
  ceph: fix symbol versioning for ceph_monc_do_statfs
  ceph: Correctly return NXIO errors from ceph_llseek
  ceph: Mark the file cache as unreclaimable
  ceph: optimize cap flush waiting
  ceph: cleanup ceph_flush_snaps()
  ceph: kick cap flushes before sending other cap message
  ceph: introduce an inode flag to indicates if snapflush is needed
  ceph: avoid sending duplicated cap flush message
  ceph: unify cap flush and snapcap flush
  ceph: use list instead of rbtree to track cap flushes
  ceph: update types of some local varibles
  ceph: include 'follows' of pending snapflush in cap reconnect message
  ceph: update cap reconnect message to version 3
  ceph: mount non-default filesystem by name
  libceph: fsmap.user subscription support
  ceph: handle LOOKUP_RCU in ceph_d_revalidate
  ceph: allow dentry_lease_is_valid to work under RCU walk
  ceph: clear d_fsinfo pointer under d_lock
  ceph: remove ceph_mdsc_lease_release
  ceph: don't use ->d_time
  ...
parents c7fac299 a0f2b652
Loading
Loading
Loading
Loading
+8 −7
Original line number Diff line number Diff line
@@ -1937,7 +1937,7 @@ static struct ceph_osd_request *rbd_osd_req_create(
	osd_req->r_callback = rbd_osd_req_callback;
	osd_req->r_priv = obj_request;

	osd_req->r_base_oloc.pool = ceph_file_layout_pg_pool(rbd_dev->layout);
	osd_req->r_base_oloc.pool = rbd_dev->layout.pool_id;
	if (ceph_oid_aprintf(&osd_req->r_base_oid, GFP_NOIO, "%s",
			     obj_request->object_name))
		goto fail;
@@ -1991,7 +1991,7 @@ rbd_osd_req_create_copyup(struct rbd_obj_request *obj_request)
	osd_req->r_callback = rbd_osd_req_callback;
	osd_req->r_priv = obj_request;

	osd_req->r_base_oloc.pool = ceph_file_layout_pg_pool(rbd_dev->layout);
	osd_req->r_base_oloc.pool = rbd_dev->layout.pool_id;
	if (ceph_oid_aprintf(&osd_req->r_base_oid, GFP_NOIO, "%s",
			     obj_request->object_name))
		goto fail;
@@ -3995,10 +3995,11 @@ static struct rbd_device *rbd_dev_create(struct rbd_client *rbdc,

	/* Initialize the layout used for all rbd requests */

	rbd_dev->layout.fl_stripe_unit = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER);
	rbd_dev->layout.fl_stripe_count = cpu_to_le32(1);
	rbd_dev->layout.fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER);
	rbd_dev->layout.fl_pg_pool = cpu_to_le32((u32) spec->pool_id);
	rbd_dev->layout.stripe_unit = 1 << RBD_MAX_OBJ_ORDER;
	rbd_dev->layout.stripe_count = 1;
	rbd_dev->layout.object_size = 1 << RBD_MAX_OBJ_ORDER;
	rbd_dev->layout.pool_id = spec->pool_id;
	RCU_INIT_POINTER(rbd_dev->layout.pool_ns, NULL);

	/*
	 * If this is a mapping rbd_dev (as opposed to a parent one),
@@ -5187,7 +5188,7 @@ static int rbd_dev_header_name(struct rbd_device *rbd_dev)

	rbd_assert(rbd_image_format_valid(rbd_dev->image_format));

	rbd_dev->header_oloc.pool = ceph_file_layout_pg_pool(rbd_dev->layout);
	rbd_dev->header_oloc.pool = rbd_dev->layout.pool_id;
	if (rbd_dev->image_format == 1)
		ret = ceph_oid_aprintf(&rbd_dev->header_oid, GFP_KERNEL, "%s%s",
				       spec->image_name, RBD_SUFFIX);
+57 −20
Original line number Diff line number Diff line
@@ -1730,7 +1730,8 @@ enum {
	POOL_WRITE	= 2,
};

static int __ceph_pool_perm_get(struct ceph_inode_info *ci, u32 pool)
static int __ceph_pool_perm_get(struct ceph_inode_info *ci,
				s64 pool, struct ceph_string *pool_ns)
{
	struct ceph_fs_client *fsc = ceph_inode_to_client(&ci->vfs_inode);
	struct ceph_mds_client *mdsc = fsc->mdsc;
@@ -1738,6 +1739,7 @@ static int __ceph_pool_perm_get(struct ceph_inode_info *ci, u32 pool)
	struct rb_node **p, *parent;
	struct ceph_pool_perm *perm;
	struct page **pages;
	size_t pool_ns_len;
	int err = 0, err2 = 0, have = 0;

	down_read(&mdsc->pool_perm_rwsem);
@@ -1748,18 +1750,32 @@ static int __ceph_pool_perm_get(struct ceph_inode_info *ci, u32 pool)
			p = &(*p)->rb_left;
		else if (pool > perm->pool)
			p = &(*p)->rb_right;
		else {
			int ret = ceph_compare_string(pool_ns,
						perm->pool_ns,
						perm->pool_ns_len);
			if (ret < 0)
				p = &(*p)->rb_left;
			else if (ret > 0)
				p = &(*p)->rb_right;
			else {
				have = perm->perm;
				break;
			}
		}
	}
	up_read(&mdsc->pool_perm_rwsem);
	if (*p)
		goto out;

	dout("__ceph_pool_perm_get pool %u no perm cached\n", pool);
	if (pool_ns)
		dout("__ceph_pool_perm_get pool %lld ns %.*s no perm cached\n",
		     pool, (int)pool_ns->len, pool_ns->str);
	else
		dout("__ceph_pool_perm_get pool %lld no perm cached\n", pool);

	down_write(&mdsc->pool_perm_rwsem);
	p = &mdsc->pool_perm_tree.rb_node;
	parent = NULL;
	while (*p) {
		parent = *p;
@@ -1768,11 +1784,20 @@ static int __ceph_pool_perm_get(struct ceph_inode_info *ci, u32 pool)
			p = &(*p)->rb_left;
		else if (pool > perm->pool)
			p = &(*p)->rb_right;
		else {
			int ret = ceph_compare_string(pool_ns,
						perm->pool_ns,
						perm->pool_ns_len);
			if (ret < 0)
				p = &(*p)->rb_left;
			else if (ret > 0)
				p = &(*p)->rb_right;
			else {
				have = perm->perm;
				break;
			}
		}
	}
	if (*p) {
		up_write(&mdsc->pool_perm_rwsem);
		goto out;
@@ -1788,6 +1813,8 @@ static int __ceph_pool_perm_get(struct ceph_inode_info *ci, u32 pool)
	rd_req->r_flags = CEPH_OSD_FLAG_READ;
	osd_req_op_init(rd_req, 0, CEPH_OSD_OP_STAT, 0);
	rd_req->r_base_oloc.pool = pool;
	if (pool_ns)
		rd_req->r_base_oloc.pool_ns = ceph_get_string(pool_ns);
	ceph_oid_printf(&rd_req->r_base_oid, "%llx.00000000", ci->i_vino.ino);

	err = ceph_osdc_alloc_messages(rd_req, GFP_NOFS);
@@ -1841,7 +1868,8 @@ static int __ceph_pool_perm_get(struct ceph_inode_info *ci, u32 pool)
		goto out_unlock;
	}

	perm = kmalloc(sizeof(*perm), GFP_NOFS);
	pool_ns_len = pool_ns ? pool_ns->len : 0;
	perm = kmalloc(sizeof(*perm) + pool_ns_len + 1, GFP_NOFS);
	if (!perm) {
		err = -ENOMEM;
		goto out_unlock;
@@ -1849,6 +1877,11 @@ static int __ceph_pool_perm_get(struct ceph_inode_info *ci, u32 pool)

	perm->pool = pool;
	perm->perm = have;
	perm->pool_ns_len = pool_ns_len;
	if (pool_ns_len > 0)
		memcpy(perm->pool_ns, pool_ns->str, pool_ns_len);
	perm->pool_ns[pool_ns_len] = 0;

	rb_link_node(&perm->node, parent, p);
	rb_insert_color(&perm->node, &mdsc->pool_perm_tree);
	err = 0;
@@ -1860,43 +1893,46 @@ static int __ceph_pool_perm_get(struct ceph_inode_info *ci, u32 pool)
out:
	if (!err)
		err = have;
	dout("__ceph_pool_perm_get pool %u result = %d\n", pool, err);
	if (pool_ns)
		dout("__ceph_pool_perm_get pool %lld ns %.*s result = %d\n",
		     pool, (int)pool_ns->len, pool_ns->str, err);
	else
		dout("__ceph_pool_perm_get pool %lld result = %d\n", pool, err);
	return err;
}

int ceph_pool_perm_check(struct ceph_inode_info *ci, int need)
{
	u32 pool;
	s64 pool;
	struct ceph_string *pool_ns;
	int ret, flags;

	/* does not support pool namespace yet */
	if (ci->i_pool_ns_len)
		return -EIO;

	if (ceph_test_mount_opt(ceph_inode_to_client(&ci->vfs_inode),
				NOPOOLPERM))
		return 0;

	spin_lock(&ci->i_ceph_lock);
	flags = ci->i_ceph_flags;
	pool = ceph_file_layout_pg_pool(ci->i_layout);
	pool = ci->i_layout.pool_id;
	spin_unlock(&ci->i_ceph_lock);
check:
	if (flags & CEPH_I_POOL_PERM) {
		if ((need & CEPH_CAP_FILE_RD) && !(flags & CEPH_I_POOL_RD)) {
			dout("ceph_pool_perm_check pool %u no read perm\n",
			dout("ceph_pool_perm_check pool %lld no read perm\n",
			     pool);
			return -EPERM;
		}
		if ((need & CEPH_CAP_FILE_WR) && !(flags & CEPH_I_POOL_WR)) {
			dout("ceph_pool_perm_check pool %u no write perm\n",
			dout("ceph_pool_perm_check pool %lld no write perm\n",
			     pool);
			return -EPERM;
		}
		return 0;
	}

	ret = __ceph_pool_perm_get(ci, pool);
	pool_ns = ceph_try_get_string(ci->i_layout.pool_ns);
	ret = __ceph_pool_perm_get(ci, pool, pool_ns);
	ceph_put_string(pool_ns);
	if (ret < 0)
		return ret;

@@ -1907,10 +1943,11 @@ int ceph_pool_perm_check(struct ceph_inode_info *ci, int need)
		flags |= CEPH_I_POOL_WR;

	spin_lock(&ci->i_ceph_lock);
	if (pool == ceph_file_layout_pg_pool(ci->i_layout)) {
		ci->i_ceph_flags = flags;
	if (pool == ci->i_layout.pool_id &&
	    pool_ns == rcu_dereference_raw(ci->i_layout.pool_ns)) {
		ci->i_ceph_flags |= flags;
        } else {
		pool = ceph_file_layout_pg_pool(ci->i_layout);
		pool = ci->i_layout.pool_id;
		flags = ci->i_ceph_flags;
	}
	spin_unlock(&ci->i_ceph_lock);
+1 −1
Original line number Diff line number Diff line
@@ -71,7 +71,7 @@ int ceph_fscache_register_fs(struct ceph_fs_client* fsc)
					      &ceph_fscache_fsid_object_def,
					      fsc, true);
	if (!fsc->fscache)
		pr_err("Unable to resgister fsid: %p fscache cookie", fsc);
		pr_err("Unable to register fsid: %p fscache cookie\n", fsc);

	return 0;
}
+466 −407

File changed.

Preview size limit exceeded, changes collapsed.

+48 −25
Original line number Diff line number Diff line
@@ -59,7 +59,7 @@ int ceph_init_dentry(struct dentry *dentry)

	di->dentry = dentry;
	di->lease_session = NULL;
	dentry->d_time = jiffies;
	di->time = jiffies;
	/* avoid reordering d_fsdata setup so that the check above is safe */
	smp_mb();
	dentry->d_fsdata = di;
@@ -1124,7 +1124,7 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
void ceph_invalidate_dentry_lease(struct dentry *dentry)
{
	spin_lock(&dentry->d_lock);
	dentry->d_time = jiffies;
	ceph_dentry(dentry)->time = jiffies;
	ceph_dentry(dentry)->lease_shared_gen = 0;
	spin_unlock(&dentry->d_lock);
}
@@ -1133,7 +1133,8 @@ void ceph_invalidate_dentry_lease(struct dentry *dentry)
 * Check if dentry lease is valid.  If not, delete the lease.  Try to
 * renew if the least is more than half up.
 */
static int dentry_lease_is_valid(struct dentry *dentry)
static int dentry_lease_is_valid(struct dentry *dentry, unsigned int flags,
				 struct inode *dir)
{
	struct ceph_dentry_info *di;
	struct ceph_mds_session *s;
@@ -1141,12 +1142,11 @@ static int dentry_lease_is_valid(struct dentry *dentry)
	u32 gen;
	unsigned long ttl;
	struct ceph_mds_session *session = NULL;
	struct inode *dir = NULL;
	u32 seq = 0;

	spin_lock(&dentry->d_lock);
	di = ceph_dentry(dentry);
	if (di->lease_session) {
	if (di && di->lease_session) {
		s = di->lease_session;
		spin_lock(&s->s_gen_ttl_lock);
		gen = s->s_cap_gen;
@@ -1154,13 +1154,19 @@ static int dentry_lease_is_valid(struct dentry *dentry)
		spin_unlock(&s->s_gen_ttl_lock);

		if (di->lease_gen == gen &&
		    time_before(jiffies, dentry->d_time) &&
		    time_before(jiffies, di->time) &&
		    time_before(jiffies, ttl)) {
			valid = 1;
			if (di->lease_renew_after &&
			    time_after(jiffies, di->lease_renew_after)) {
				/* we should renew */
				dir = d_inode(dentry->d_parent);
				/*
				 * We should renew. If we're in RCU walk mode
				 * though, we can't do that so just return
				 * -ECHILD.
				 */
				if (flags & LOOKUP_RCU) {
					valid = -ECHILD;
				} else {
					session = ceph_get_mds_session(s);
					seq = di->lease_seq;
					di->lease_renew_after = 0;
@@ -1168,6 +1174,7 @@ static int dentry_lease_is_valid(struct dentry *dentry)
				}
			}
		}
	}
	spin_unlock(&dentry->d_lock);

	if (session) {
@@ -1207,15 +1214,19 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
	struct dentry *parent;
	struct inode *dir;

	if (flags & LOOKUP_RCU)
	if (flags & LOOKUP_RCU) {
		parent = ACCESS_ONCE(dentry->d_parent);
		dir = d_inode_rcu(parent);
		if (!dir)
			return -ECHILD;
	} else {
		parent = dget_parent(dentry);
		dir = d_inode(parent);
	}

	dout("d_revalidate %p '%pd' inode %p offset %lld\n", dentry,
	     dentry, d_inode(dentry), ceph_dentry(dentry)->offset);

	parent = dget_parent(dentry);
	dir = d_inode(parent);

	/* always trust cached snapped dentries, snapdir dentry */
	if (ceph_snap(dir) != CEPH_NOSNAP) {
		dout("d_revalidate %p '%pd' inode %p is SNAPPED\n", dentry,
@@ -1224,13 +1235,17 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
	} else if (d_really_is_positive(dentry) &&
		   ceph_snap(d_inode(dentry)) == CEPH_SNAPDIR) {
		valid = 1;
	} else if (dentry_lease_is_valid(dentry) ||
		   dir_lease_is_valid(dir, dentry)) {
	} else {
		valid = dentry_lease_is_valid(dentry, flags, dir);
		if (valid == -ECHILD)
			return valid;
		if (valid || dir_lease_is_valid(dir, dentry)) {
			if (d_really_is_positive(dentry))
				valid = ceph_is_any_caps(d_inode(dentry));
			else
				valid = 1;
		}
	}

	if (!valid) {
		struct ceph_mds_client *mdsc =
@@ -1238,6 +1253,9 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
		struct ceph_mds_request *req;
		int op, mask, err;

		if (flags & LOOKUP_RCU)
			return -ECHILD;

		op = ceph_snap(dir) == CEPH_SNAPDIR ?
			CEPH_MDS_OP_LOOKUPSNAP : CEPH_MDS_OP_LOOKUP;
		req = ceph_mdsc_create_request(mdsc, op, USE_ANY_MDS);
@@ -1273,6 +1291,7 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
		ceph_dir_clear_complete(dir);
	}

	if (!(flags & LOOKUP_RCU))
		dput(parent);
	return valid;
}
@@ -1286,10 +1305,14 @@ static void ceph_d_release(struct dentry *dentry)

	dout("d_release %p\n", dentry);
	ceph_dentry_lru_del(dentry);

	spin_lock(&dentry->d_lock);
	dentry->d_fsdata = NULL;
	spin_unlock(&dentry->d_lock);

	if (di->lease_session)
		ceph_put_mds_session(di->lease_session);
	kmem_cache_free(ceph_dentry_cachep, di);
	dentry->d_fsdata = NULL;
}

static int ceph_snapdir_d_revalidate(struct dentry *dentry,
Loading