Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit ae00d4f3 authored by Sage Weil's avatar Sage Weil
Browse files

ceph: fix cap_snap and realm split



The cap_snap creation/queueing relies on both the current i_head_snapc
_and_ the i_snap_realm pointers being correct, so that the new cap_snap
can properly reference the old context and the new i_head_snapc can be
updated to reference the new snaprealm's context.  To fix this, we:

 - move inodes completely to the new (split) realm so that i_snap_realm
   is correct, and
 - generate the new snapc's _before_ queueing the cap_snaps in
   ceph_update_snap_trace().

Signed-off-by: default avatarSage Weil <sage@newdream.net>
parent cfc0bf66
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -411,8 +411,8 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
	if (i_size < page_off + len)
		len = i_size - page_off;

	dout("writepage %p page %p index %lu on %llu~%u\n",
	     inode, page, page->index, page_off, len);
	dout("writepage %p page %p index %lu on %llu~%u snapc %p\n",
	     inode, page, page->index, page_off, len, snapc);

	writeback_stat = atomic_long_inc_return(&client->writeback_count);
	if (writeback_stat >
+29 −59
Original line number Diff line number Diff line
@@ -119,6 +119,7 @@ static struct ceph_snap_realm *ceph_create_snap_realm(
	INIT_LIST_HEAD(&realm->children);
	INIT_LIST_HEAD(&realm->child_item);
	INIT_LIST_HEAD(&realm->empty_item);
	INIT_LIST_HEAD(&realm->dirty_item);
	INIT_LIST_HEAD(&realm->inodes_with_caps);
	spin_lock_init(&realm->inodes_with_caps_lock);
	__insert_snap_realm(&mdsc->snap_realms, realm);
@@ -604,6 +605,7 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc,
	struct ceph_snap_realm *realm;
	int invalidate = 0;
	int err = -ENOMEM;
	LIST_HEAD(dirty_realms);

	dout("update_snap_trace deletion=%d\n", deletion);
more:
@@ -626,24 +628,6 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc,
		}
	}

	if (le64_to_cpu(ri->seq) > realm->seq) {
		dout("update_snap_trace updating %llx %p %lld -> %lld\n",
		     realm->ino, realm, realm->seq, le64_to_cpu(ri->seq));
		/*
		 * if the realm seq has changed, queue a cap_snap for every
		 * inode with open caps.  we do this _before_ we update
		 * the realm info so that we prepare for writeback under the
		 * _previous_ snap context.
		 *
		 * ...unless it's a snap deletion!
		 */
		if (!deletion)
			queue_realm_cap_snaps(realm);
	} else {
		dout("update_snap_trace %llx %p seq %lld unchanged\n",
		     realm->ino, realm, realm->seq);
	}

	/* ensure the parent is correct */
	err = adjust_snap_realm_parent(mdsc, realm, le64_to_cpu(ri->parent));
	if (err < 0)
@@ -651,6 +635,8 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc,
	invalidate += err;

	if (le64_to_cpu(ri->seq) > realm->seq) {
		dout("update_snap_trace updating %llx %p %lld -> %lld\n",
		     realm->ino, realm, realm->seq, le64_to_cpu(ri->seq));
		/* update realm parameters, snap lists */
		realm->seq = le64_to_cpu(ri->seq);
		realm->created = le64_to_cpu(ri->created);
@@ -668,9 +654,17 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc,
		if (err < 0)
			goto fail;

		/* queue realm for cap_snap creation */
		list_add(&realm->dirty_item, &dirty_realms);

		invalidate = 1;
	} else if (!realm->cached_context) {
		dout("update_snap_trace %llx %p seq %lld new\n",
		     realm->ino, realm, realm->seq);
		invalidate = 1;
	} else {
		dout("update_snap_trace %llx %p seq %lld unchanged\n",
		     realm->ino, realm, realm->seq);
	}

	dout("done with %llx %p, invalidated=%d, %p %p\n", realm->ino,
@@ -683,6 +677,14 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc,
	if (invalidate)
		rebuild_snap_realms(realm);

	/*
	 * queue cap snaps _after_ we've built the new snap contexts,
	 * so that i_head_snapc can be set appropriately.
	 */
	list_for_each_entry(realm, &dirty_realms, dirty_item) {
		queue_realm_cap_snaps(realm);
	}

	__cleanup_empty_realms(mdsc);
	return 0;

@@ -816,6 +818,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
			};
			struct inode *inode = ceph_find_inode(sb, vino);
			struct ceph_inode_info *ci;
			struct ceph_snap_realm *oldrealm;

			if (!inode)
				continue;
@@ -841,18 +844,19 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
			dout(" will move %p to split realm %llx %p\n",
			     inode, realm->ino, realm);
			/*
			 * Remove the inode from the realm's inode
			 * list, but don't add it to the new realm
			 * yet.  We don't want the cap_snap to be
			 * queued (again) by ceph_update_snap_trace()
			 * below.  Queue it _now_, under the old context.
			 * Move the inode to the new realm
			 */
			spin_lock(&realm->inodes_with_caps_lock);
			list_del_init(&ci->i_snap_realm_item);
			list_add(&ci->i_snap_realm_item,
				 &realm->inodes_with_caps);
			oldrealm = ci->i_snap_realm;
			ci->i_snap_realm = realm;
			spin_unlock(&realm->inodes_with_caps_lock);
			spin_unlock(&inode->i_lock);

			ceph_queue_cap_snap(ci);
			ceph_get_snap_realm(mdsc, realm);
			ceph_put_snap_realm(mdsc, oldrealm);

			iput(inode);
			continue;
@@ -880,43 +884,9 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
	ceph_update_snap_trace(mdsc, p, e,
			       op == CEPH_SNAP_OP_DESTROY);

	if (op == CEPH_SNAP_OP_SPLIT) {
		/*
		 * ok, _now_ add the inodes into the new realm.
		 */
		for (i = 0; i < num_split_inos; i++) {
			struct ceph_vino vino = {
				.ino = le64_to_cpu(split_inos[i]),
				.snap = CEPH_NOSNAP,
			};
			struct inode *inode = ceph_find_inode(sb, vino);
			struct ceph_inode_info *ci;

			if (!inode)
				continue;
			ci = ceph_inode(inode);
			spin_lock(&inode->i_lock);
			if (list_empty(&ci->i_snap_realm_item)) {
				struct ceph_snap_realm *oldrealm =
					ci->i_snap_realm;

				dout(" moving %p to split realm %llx %p\n",
				     inode, realm->ino, realm);
				spin_lock(&realm->inodes_with_caps_lock);
				list_add(&ci->i_snap_realm_item,
					 &realm->inodes_with_caps);
				ci->i_snap_realm = realm;
				spin_unlock(&realm->inodes_with_caps_lock);
				ceph_get_snap_realm(mdsc, realm);
				ceph_put_snap_realm(mdsc, oldrealm);
			}
			spin_unlock(&inode->i_lock);
			iput(inode);
		}

	if (op == CEPH_SNAP_OP_SPLIT)
		/* we took a reference when we created the realm, above */
		ceph_put_snap_realm(mdsc, realm);
	}

	__cleanup_empty_realms(mdsc);

+2 −0
Original line number Diff line number Diff line
@@ -690,6 +690,8 @@ struct ceph_snap_realm {

	struct list_head empty_item;     /* if i have ref==0 */

	struct list_head dirty_item;     /* if realm needs new context */

	/* the current set of snaps for this realm */
	struct ceph_snap_context *cached_context;