Merge tag 'ceph-for-4.12-rc1' of git://github.com/ceph/ceph-client (26c5eaa1) · Commits · e / devices / android_kernel_teracube_emerald

drivers/block/rbd.c

+215 −144

Original line number	Diff line number	Diff line
		@@ -387,6 +387,7 @@ struct rbd_device {

		struct rw_semaphore lock_rwsem;
		enum rbd_lock_state lock_state;
		char lock_cookie[32];
		struct rbd_client_id owner_cid;
		struct work_struct acquired_lock_work;
		struct work_struct released_lock_work;
		@@ -477,13 +478,6 @@ static int minor_to_rbd_dev_id(int minor)
		return minor >> RBD_SINGLE_MAJOR_PART_SHIFT;
		}

		static bool rbd_is_lock_supported(struct rbd_device *rbd_dev)
		{
		return (rbd_dev->header.features & RBD_FEATURE_EXCLUSIVE_LOCK) &&
		rbd_dev->spec->snap_id == CEPH_NOSNAP &&
		!rbd_dev->mapping.read_only;
		}

		static bool __rbd_is_lock_owner(struct rbd_device *rbd_dev)
		{
		return rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED \|\|
		@@ -731,7 +725,7 @@ static struct rbd_client rbd_client_create(struct ceph_options ceph_opts)
		kref_init(&rbdc->kref);
		INIT_LIST_HEAD(&rbdc->node);

		rbdc->client = ceph_create_client(ceph_opts, rbdc, 0, 0);
		rbdc->client = ceph_create_client(ceph_opts, rbdc);
		if (IS_ERR(rbdc->client))
		goto out_rbdc;
		ceph_opts = NULL; /* Now rbdc->client is responsible for ceph_opts */
		@@ -804,6 +798,7 @@ enum {
		Opt_read_only,
		Opt_read_write,
		Opt_lock_on_read,
		Opt_exclusive,
		Opt_err
		};

		@@ -816,6 +811,7 @@ static match_table_t rbd_opts_tokens = {
		{Opt_read_write, "read_write"},
		{Opt_read_write, "rw"}, /* Alternate spelling */
		{Opt_lock_on_read, "lock_on_read"},
		{Opt_exclusive, "exclusive"},
		{Opt_err, NULL}
		};

		@@ -823,11 +819,13 @@ struct rbd_options {
		int queue_depth;
		bool read_only;
		bool lock_on_read;
		bool exclusive;
		};

		#define RBD_QUEUE_DEPTH_DEFAULT BLKDEV_MAX_RQ
		#define RBD_READ_ONLY_DEFAULT false
		#define RBD_LOCK_ON_READ_DEFAULT false
		#define RBD_EXCLUSIVE_DEFAULT false

		static int parse_rbd_opts_token(char c, void private)
		{
		@@ -866,6 +864,9 @@ static int parse_rbd_opts_token(char c, void private)
		case Opt_lock_on_read:
		rbd_opts->lock_on_read = true;
		break;
		case Opt_exclusive:
		rbd_opts->exclusive = true;
		break;
		default:
		/* libceph prints "bad option" msg */
		return -EINVAL;
		@@ -3079,7 +3080,8 @@ static int rbd_lock(struct rbd_device *rbd_dev)
		char cookie[32];
		int ret;

		WARN_ON(__rbd_is_lock_owner(rbd_dev));
		WARN_ON(__rbd_is_lock_owner(rbd_dev) \|\|
		rbd_dev->lock_cookie[0] != '\0');

		format_lock_cookie(rbd_dev, cookie);
		ret = ceph_cls_lock(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc,
		@@ -3089,6 +3091,7 @@ static int rbd_lock(struct rbd_device *rbd_dev)
		return ret;

		rbd_dev->lock_state = RBD_LOCK_STATE_LOCKED;
		strcpy(rbd_dev->lock_cookie, cookie);
		rbd_set_owner_cid(rbd_dev, &cid);
		queue_work(rbd_dev->task_wq, &rbd_dev->acquired_lock_work);
		return 0;
		@@ -3097,27 +3100,24 @@ static int rbd_lock(struct rbd_device *rbd_dev)
		/*
		* lock_rwsem must be held for write
		*/
		static int rbd_unlock(struct rbd_device *rbd_dev)
		static void rbd_unlock(struct rbd_device *rbd_dev)
		{
		struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
		char cookie[32];
		int ret;

		WARN_ON(!__rbd_is_lock_owner(rbd_dev));

		rbd_dev->lock_state = RBD_LOCK_STATE_UNLOCKED;
		WARN_ON(!__rbd_is_lock_owner(rbd_dev) \|\|
		rbd_dev->lock_cookie[0] == '\0');

		format_lock_cookie(rbd_dev, cookie);
		ret = ceph_cls_unlock(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc,
		RBD_LOCK_NAME, cookie);
		if (ret && ret != -ENOENT) {
		rbd_warn(rbd_dev, "cls_unlock failed: %d", ret);
		return ret;
		}
		RBD_LOCK_NAME, rbd_dev->lock_cookie);
		if (ret && ret != -ENOENT)
		rbd_warn(rbd_dev, "failed to unlock: %d", ret);

		/* treat errors as the image is unlocked */
		rbd_dev->lock_state = RBD_LOCK_STATE_UNLOCKED;
		rbd_dev->lock_cookie[0] = '\0';
		rbd_set_owner_cid(rbd_dev, &rbd_empty_cid);
		queue_work(rbd_dev->task_wq, &rbd_dev->released_lock_work);
		return 0;
		}

		static int __rbd_notify_op_lock(struct rbd_device *rbd_dev,
		@@ -3447,6 +3447,18 @@ static void rbd_acquire_lock(struct work_struct *work)
		ret = rbd_request_lock(rbd_dev);
		if (ret == -ETIMEDOUT) {
		goto again; /* treat this as a dead client */
		} else if (ret == -EROFS) {
		rbd_warn(rbd_dev, "peer will not release lock");
		/*
		* If this is rbd_add_acquire_lock(), we want to fail
		* immediately -- reuse BLACKLISTED flag. Otherwise we
		* want to block.
		*/
		if (!(rbd_dev->disk->flags & GENHD_FL_UP)) {
		set_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags);
		/* wake "rbd map --exclusive" process */
		wake_requests(rbd_dev, false);
		}
		} else if (ret < 0) {
		rbd_warn(rbd_dev, "error requesting lock: %d", ret);
		mod_delayed_work(rbd_dev->task_wq, &rbd_dev->lock_dwork,
		@@ -3490,7 +3502,7 @@ static bool rbd_release_lock(struct rbd_device *rbd_dev)
		if (rbd_dev->lock_state != RBD_LOCK_STATE_RELEASING)
		return false;

		if (!rbd_unlock(rbd_dev))
		rbd_unlock(rbd_dev);
		/*
		* Give others a chance to grab the lock - we would re-acquire
		* almost immediately if we got new IO during ceph_osdc_sync()
		@@ -3499,7 +3511,6 @@ static bool rbd_release_lock(struct rbd_device *rbd_dev)
		* after wake_requests() in rbd_handle_released_lock().
		*/
		cancel_delayed_work(&rbd_dev->lock_dwork);

		return true;
		}

		@@ -3580,12 +3591,16 @@ static void rbd_handle_released_lock(struct rbd_device *rbd_dev, u8 struct_v,
		up_read(&rbd_dev->lock_rwsem);
		}

		static bool rbd_handle_request_lock(struct rbd_device *rbd_dev, u8 struct_v,
		/*
		* Returns result for ResponseMessage to be encoded (<= 0), or 1 if no
		* ResponseMessage is needed.
		*/
		static int rbd_handle_request_lock(struct rbd_device *rbd_dev, u8 struct_v,
		void **p)
		{
		struct rbd_client_id my_cid = rbd_get_cid(rbd_dev);
		struct rbd_client_id cid = { 0 };
		bool need_to_send;
		int result = 1;

		if (struct_v >= 2) {
		cid.gid = ceph_decode_64(p);
		@@ -3595,19 +3610,36 @@ static bool rbd_handle_request_lock(struct rbd_device *rbd_dev, u8 struct_v,
		dout("%s rbd_dev %p cid %llu-%llu\n", __func__, rbd_dev, cid.gid,
		cid.handle);
		if (rbd_cid_equal(&cid, &my_cid))
		return false;
		return result;

		down_read(&rbd_dev->lock_rwsem);
		need_to_send = __rbd_is_lock_owner(rbd_dev);
		if (__rbd_is_lock_owner(rbd_dev)) {
		if (rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED &&
		rbd_cid_equal(&rbd_dev->owner_cid, &rbd_empty_cid))
		goto out_unlock;

		/*
		* encode ResponseMessage(0) so the peer can detect
		* a missing owner
		*/
		result = 0;

		if (rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED) {
		if (!rbd_cid_equal(&rbd_dev->owner_cid, &rbd_empty_cid)) {
		dout("%s rbd_dev %p queueing unlock_work\n", __func__,
		rbd_dev);
		queue_work(rbd_dev->task_wq, &rbd_dev->unlock_work);
		if (!rbd_dev->opts->exclusive) {
		dout("%s rbd_dev %p queueing unlock_work\n",
		__func__, rbd_dev);
		queue_work(rbd_dev->task_wq,
		&rbd_dev->unlock_work);
		} else {
		/* refuse to release the lock */
		result = -EROFS;
		}
		}
		}

		out_unlock:
		up_read(&rbd_dev->lock_rwsem);
		return need_to_send;
		return result;
		}

		static void __rbd_acknowledge_notify(struct rbd_device *rbd_dev,
		@@ -3690,13 +3722,10 @@ static void rbd_watch_cb(void *arg, u64 notify_id, u64 cookie,
		rbd_acknowledge_notify(rbd_dev, notify_id, cookie);
		break;
		case RBD_NOTIFY_OP_REQUEST_LOCK:
		if (rbd_handle_request_lock(rbd_dev, struct_v, &p))
		/*
		* send ResponseMessage(0) back so the client
		* can detect a missing owner
		*/
		ret = rbd_handle_request_lock(rbd_dev, struct_v, &p);
		if (ret <= 0)
		rbd_acknowledge_notify_result(rbd_dev, notify_id,
		cookie, 0);
		cookie, ret);
		else
		rbd_acknowledge_notify(rbd_dev, notify_id, cookie);
		break;
		@@ -3821,24 +3850,51 @@ static void rbd_unregister_watch(struct rbd_device *rbd_dev)
		ceph_osdc_flush_notifies(&rbd_dev->rbd_client->client->osdc);
		}

		/*
		* lock_rwsem must be held for write
		*/
		static void rbd_reacquire_lock(struct rbd_device *rbd_dev)
		{
		struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
		char cookie[32];
		int ret;

		WARN_ON(rbd_dev->lock_state != RBD_LOCK_STATE_LOCKED);

		format_lock_cookie(rbd_dev, cookie);
		ret = ceph_cls_set_cookie(osdc, &rbd_dev->header_oid,
		&rbd_dev->header_oloc, RBD_LOCK_NAME,
		CEPH_CLS_LOCK_EXCLUSIVE, rbd_dev->lock_cookie,
		RBD_LOCK_TAG, cookie);
		if (ret) {
		if (ret != -EOPNOTSUPP)
		rbd_warn(rbd_dev, "failed to update lock cookie: %d",
		ret);

		/*
		* Lock cookie cannot be updated on older OSDs, so do
		* a manual release and queue an acquire.
		*/
		if (rbd_release_lock(rbd_dev))
		queue_delayed_work(rbd_dev->task_wq,
		&rbd_dev->lock_dwork, 0);
		} else {
		strcpy(rbd_dev->lock_cookie, cookie);
		}
		}

		static void rbd_reregister_watch(struct work_struct *work)
		{
		struct rbd_device *rbd_dev = container_of(to_delayed_work(work),
		struct rbd_device, watch_dwork);
		bool was_lock_owner = false;
		bool need_to_wake = false;
		int ret;

		dout("%s rbd_dev %p\n", __func__, rbd_dev);

		down_write(&rbd_dev->lock_rwsem);
		if (rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED)
		was_lock_owner = rbd_release_lock(rbd_dev);

		mutex_lock(&rbd_dev->watch_mutex);
		if (rbd_dev->watch_state != RBD_WATCH_STATE_ERROR) {
		mutex_unlock(&rbd_dev->watch_mutex);
		goto out;
		return;
		}

		ret = __rbd_register_watch(rbd_dev);
		@@ -3846,36 +3902,28 @@ static void rbd_reregister_watch(struct work_struct *work)
		rbd_warn(rbd_dev, "failed to reregister watch: %d", ret);
		if (ret == -EBLACKLISTED \|\| ret == -ENOENT) {
		set_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags);
		need_to_wake = true;
		wake_requests(rbd_dev, true);
		} else {
		queue_delayed_work(rbd_dev->task_wq,
		&rbd_dev->watch_dwork,
		RBD_RETRY_DELAY);
		}
		mutex_unlock(&rbd_dev->watch_mutex);
		goto out;
		return;
		}

		need_to_wake = true;
		rbd_dev->watch_state = RBD_WATCH_STATE_REGISTERED;
		rbd_dev->watch_cookie = rbd_dev->watch_handle->linger_id;
		mutex_unlock(&rbd_dev->watch_mutex);

		down_write(&rbd_dev->lock_rwsem);
		if (rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED)
		rbd_reacquire_lock(rbd_dev);
		up_write(&rbd_dev->lock_rwsem);

		ret = rbd_dev_refresh(rbd_dev);
		if (ret)
		rbd_warn(rbd_dev, "reregisteration refresh failed: %d", ret);

		if (was_lock_owner) {
		ret = rbd_try_lock(rbd_dev);
		if (ret)
		rbd_warn(rbd_dev, "reregisteration lock failed: %d",
		ret);
		}

		out:
		up_write(&rbd_dev->lock_rwsem);
		if (need_to_wake)
		wake_requests(rbd_dev, true);
		}

		/*
		@@ -4034,10 +4082,6 @@ static void rbd_queue_workfn(struct work_struct *work)
		if (op_type != OBJ_OP_READ) {
		snapc = rbd_dev->header.snapc;
		ceph_get_snap_context(snapc);
		must_be_locked = rbd_is_lock_supported(rbd_dev);
		} else {
		must_be_locked = rbd_dev->opts->lock_on_read &&
		rbd_is_lock_supported(rbd_dev);
		}
		up_read(&rbd_dev->header_rwsem);

		@@ -4048,14 +4092,20 @@ static void rbd_queue_workfn(struct work_struct *work)
		goto err_rq;
		}

		must_be_locked =
		(rbd_dev->header.features & RBD_FEATURE_EXCLUSIVE_LOCK) &&
		(op_type != OBJ_OP_READ \|\| rbd_dev->opts->lock_on_read);
		if (must_be_locked) {
		down_read(&rbd_dev->lock_rwsem);
		if (rbd_dev->lock_state != RBD_LOCK_STATE_LOCKED &&
		!test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags))
		!test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags)) {
		if (rbd_dev->opts->exclusive) {
		rbd_warn(rbd_dev, "exclusive lock required");
		result = -EROFS;
		goto err_unlock;
		}
		rbd_wait_state_locked(rbd_dev);

		WARN_ON((rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED) ^
		!test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags));
		}
		if (test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags)) {
		result = -EBLACKLISTED;
		goto err_unlock;
		@@ -4114,19 +4164,10 @@ static int rbd_queue_rq(struct blk_mq_hw_ctx *hctx,

		static void rbd_free_disk(struct rbd_device *rbd_dev)
		{
		struct gendisk *disk = rbd_dev->disk;

		if (!disk)
		return;

		rbd_dev->disk = NULL;
		if (disk->flags & GENHD_FL_UP) {
		del_gendisk(disk);
		if (disk->queue)
		blk_cleanup_queue(disk->queue);
		blk_cleanup_queue(rbd_dev->disk->queue);
		blk_mq_free_tag_set(&rbd_dev->tag_set);
		}
		put_disk(disk);
		put_disk(rbd_dev->disk);
		rbd_dev->disk = NULL;
		}

		static int rbd_obj_read_sync(struct rbd_device *rbd_dev,
		@@ -4383,8 +4424,12 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
		if (!ceph_test_opt(rbd_dev->rbd_client->client, NOCRC))
		q->backing_dev_info->capabilities \|= BDI_CAP_STABLE_WRITES;

		/*
		* disk_release() expects a queue ref from add_disk() and will
		* put it. Hold an extra ref until add_disk() is called.
		*/
		WARN_ON(!blk_get_queue(q));
		disk->queue = q;

		q->queuedata = rbd_dev;

		rbd_dev->disk = disk;
		@@ -5624,6 +5669,7 @@ static int rbd_add_parse_args(const char *buf,
		rbd_opts->read_only = RBD_READ_ONLY_DEFAULT;
		rbd_opts->queue_depth = RBD_QUEUE_DEPTH_DEFAULT;
		rbd_opts->lock_on_read = RBD_LOCK_ON_READ_DEFAULT;
		rbd_opts->exclusive = RBD_EXCLUSIVE_DEFAULT;

		copts = ceph_parse_options(options, mon_addrs,
		mon_addrs + mon_addrs_size - 1,
		@@ -5682,6 +5728,33 @@ static int rbd_add_get_pool_id(struct rbd_client rbdc, const char pool_name)
		return ret;
		}

		static void rbd_dev_image_unlock(struct rbd_device *rbd_dev)
		{
		down_write(&rbd_dev->lock_rwsem);
		if (__rbd_is_lock_owner(rbd_dev))
		rbd_unlock(rbd_dev);
		up_write(&rbd_dev->lock_rwsem);
		}

		static int rbd_add_acquire_lock(struct rbd_device *rbd_dev)
		{
		if (!(rbd_dev->header.features & RBD_FEATURE_EXCLUSIVE_LOCK)) {
		rbd_warn(rbd_dev, "exclusive-lock feature is not enabled");
		return -EINVAL;
		}

		/* FIXME: "rbd map --exclusive" should be in interruptible */
		down_read(&rbd_dev->lock_rwsem);
		rbd_wait_state_locked(rbd_dev);
		up_read(&rbd_dev->lock_rwsem);
		if (test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags)) {
		rbd_warn(rbd_dev, "failed to acquire exclusive lock");
		return -EROFS;
		}

		return 0;
		}

		/*
		* An rbd format 2 image has a unique identifier, distinct from the
		* name given to it by the user. Internally, that identifier is
		@@ -5873,6 +5946,15 @@ static int rbd_dev_probe_parent(struct rbd_device *rbd_dev, int depth)
		return ret;
		}

		static void rbd_dev_device_release(struct rbd_device *rbd_dev)
		{
		clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags);
		rbd_dev_mapping_clear(rbd_dev);
		rbd_free_disk(rbd_dev);
		if (!single_major)
		unregister_blkdev(rbd_dev->major, rbd_dev->name);
		}

		/*
		* rbd_dev->header_rwsem must be locked for write and will be unlocked
		* upon return.
		@@ -5908,26 +5990,13 @@ static int rbd_dev_device_setup(struct rbd_device *rbd_dev)
		set_capacity(rbd_dev->disk, rbd_dev->mapping.size / SECTOR_SIZE);
		set_disk_ro(rbd_dev->disk, rbd_dev->mapping.read_only);

		dev_set_name(&rbd_dev->dev, "%d", rbd_dev->dev_id);
		ret = device_add(&rbd_dev->dev);
		ret = dev_set_name(&rbd_dev->dev, "%d", rbd_dev->dev_id);
		if (ret)
		goto err_out_mapping;

		/* Everything's ready. Announce the disk to the world. */

		set_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags);
		up_write(&rbd_dev->header_rwsem);

		spin_lock(&rbd_dev_list_lock);
		list_add_tail(&rbd_dev->node, &rbd_dev_list);
		spin_unlock(&rbd_dev_list_lock);

		add_disk(rbd_dev->disk);
		pr_info("%s: capacity %llu features 0x%llx\n", rbd_dev->disk->disk_name,
		(unsigned long long)get_capacity(rbd_dev->disk) << SECTOR_SHIFT,
		rbd_dev->header.features);

		return ret;
		return 0;

		err_out_mapping:
		rbd_dev_mapping_clear(rbd_dev);
		@@ -5962,11 +6031,11 @@ static int rbd_dev_header_name(struct rbd_device *rbd_dev)
		static void rbd_dev_image_release(struct rbd_device *rbd_dev)
		{
		rbd_dev_unprobe(rbd_dev);
		if (rbd_dev->opts)
		rbd_unregister_watch(rbd_dev);
		rbd_dev->image_format = 0;
		kfree(rbd_dev->spec->image_id);
		rbd_dev->spec->image_id = NULL;

		rbd_dev_destroy(rbd_dev);
		}

		/*
		@@ -6126,22 +6195,43 @@ static ssize_t do_rbd_add(struct bus_type *bus,
		rbd_dev->mapping.read_only = read_only;

		rc = rbd_dev_device_setup(rbd_dev);
		if (rc) {
		/*
		* rbd_unregister_watch() can't be moved into
		* rbd_dev_image_release() without refactoring, see
		* commit 1f3ef78861ac.
		*/
		rbd_unregister_watch(rbd_dev);
		rbd_dev_image_release(rbd_dev);
		goto out;
		if (rc)
		goto err_out_image_probe;

		if (rbd_dev->opts->exclusive) {
		rc = rbd_add_acquire_lock(rbd_dev);
		if (rc)
		goto err_out_device_setup;
		}

		/* Everything's ready. Announce the disk to the world. */

		rc = device_add(&rbd_dev->dev);
		if (rc)
		goto err_out_image_lock;

		add_disk(rbd_dev->disk);
		/* see rbd_init_disk() */
		blk_put_queue(rbd_dev->disk->queue);

		spin_lock(&rbd_dev_list_lock);
		list_add_tail(&rbd_dev->node, &rbd_dev_list);
		spin_unlock(&rbd_dev_list_lock);

		pr_info("%s: capacity %llu features 0x%llx\n", rbd_dev->disk->disk_name,
		(unsigned long long)get_capacity(rbd_dev->disk) << SECTOR_SHIFT,
		rbd_dev->header.features);
		rc = count;
		out:
		module_put(THIS_MODULE);
		return rc;

		err_out_image_lock:
		rbd_dev_image_unlock(rbd_dev);
		err_out_device_setup:
		rbd_dev_device_release(rbd_dev);
		err_out_image_probe:
		rbd_dev_image_release(rbd_dev);
		err_out_rbd_dev:
		rbd_dev_destroy(rbd_dev);
		err_out_client:
		@@ -6169,21 +6259,6 @@ static ssize_t rbd_add_single_major(struct bus_type *bus,
		return do_rbd_add(bus, buf, count);
		}

		static void rbd_dev_device_release(struct rbd_device *rbd_dev)
		{
		rbd_free_disk(rbd_dev);

		spin_lock(&rbd_dev_list_lock);
		list_del_init(&rbd_dev->node);
		spin_unlock(&rbd_dev_list_lock);

		clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags);
		device_del(&rbd_dev->dev);
		rbd_dev_mapping_clear(rbd_dev);
		if (!single_major)
		unregister_blkdev(rbd_dev->major, rbd_dev->name);
		}

		static void rbd_dev_remove_parent(struct rbd_device *rbd_dev)
		{
		while (rbd_dev->parent) {
		@@ -6201,6 +6276,7 @@ static void rbd_dev_remove_parent(struct rbd_device *rbd_dev)
		}
		rbd_assert(second);
		rbd_dev_image_release(second);
		rbd_dev_destroy(second);
		first->parent = NULL;
		first->parent_overlap = 0;

		@@ -6269,21 +6345,16 @@ static ssize_t do_rbd_remove(struct bus_type *bus,
		blk_set_queue_dying(rbd_dev->disk->queue);
		}

		down_write(&rbd_dev->lock_rwsem);
		if (__rbd_is_lock_owner(rbd_dev))
		rbd_unlock(rbd_dev);
		up_write(&rbd_dev->lock_rwsem);
		rbd_unregister_watch(rbd_dev);
		del_gendisk(rbd_dev->disk);
		spin_lock(&rbd_dev_list_lock);
		list_del_init(&rbd_dev->node);
		spin_unlock(&rbd_dev_list_lock);
		device_del(&rbd_dev->dev);

		/*
		* Don't free anything from rbd_dev->disk until after all
		* notifies are completely processed. Otherwise
		* rbd_bus_del_dev() will race with rbd_watch_cb(), resulting
		* in a potential use after free of rbd_dev->disk or rbd_dev.
		*/
		rbd_dev_image_unlock(rbd_dev);
		rbd_dev_device_release(rbd_dev);
		rbd_dev_image_release(rbd_dev);

		rbd_dev_destroy(rbd_dev);
		return count;
		}

fs/ceph/addr.c

+6 −4

Original line number	Diff line number	Diff line
		@@ -670,8 +670,12 @@ static void writepages_finish(struct ceph_osd_request *req)
		bool remove_page;

		dout("writepages_finish %p rc %d\n", inode, rc);
		if (rc < 0)
		if (rc < 0) {
		mapping_set_error(mapping, rc);
		ceph_set_error_write(ci);
		} else {
		ceph_clear_error_write(ci);
		}

		/*
		* We lost the cache cap, need to truncate the page before
		@@ -703,9 +707,6 @@ static void writepages_finish(struct ceph_osd_request *req)
		clear_bdi_congested(inode_to_bdi(inode),
		BLK_RW_ASYNC);

		if (rc < 0)
		SetPageError(page);

		ceph_put_snap_context(page_snap_context(page));
		page->private = 0;
		ClearPagePrivate(page);
		@@ -1892,6 +1893,7 @@ static int __ceph_pool_perm_get(struct ceph_inode_info *ci,
		err = ceph_osdc_start_request(&fsc->client->osdc, rd_req, false);

		wr_req->r_mtime = ci->vfs_inode.i_mtime;
		wr_req->r_abort_on_full = true;
		err2 = ceph_osdc_start_request(&fsc->client->osdc, wr_req, false);

		if (!err)

fs/ceph/caps.c

+18 −7

Original line number	Diff line number	Diff line
		@@ -1015,6 +1015,7 @@ static int send_cap_msg(struct cap_msg_args *arg)
		void *p;
		size_t extra_len;
		struct timespec zerotime = {0};
		struct ceph_osd_client *osdc = &arg->session->s_mdsc->fsc->client->osdc;

		dout("send_cap_msg %s %llx %llx caps %s wanted %s dirty %s"
		" seq %u/%u tid %llu/%llu mseq %u follows %lld size %llu/%llu"
		@@ -1076,8 +1077,12 @@ static int send_cap_msg(struct cap_msg_args *arg)
		ceph_encode_64(&p, arg->inline_data ? 0 : CEPH_INLINE_NONE);
		/* inline data size */
		ceph_encode_32(&p, 0);
		/* osd_epoch_barrier (version 5) */
		ceph_encode_32(&p, 0);
		/*
		* osd_epoch_barrier (version 5)
		* The epoch_barrier is protected osdc->lock, so READ_ONCE here in
		* case it was recently changed
		*/
		ceph_encode_32(&p, READ_ONCE(osdc->epoch_barrier));
		/* oldest_flush_tid (version 6) */
		ceph_encode_64(&p, arg->oldest_flush_tid);

		@@ -1389,7 +1394,7 @@ static void __ceph_flush_snaps(struct ceph_inode_info *ci,
		first_tid = cf->tid + 1;

		capsnap = container_of(cf, struct ceph_cap_snap, cap_flush);
		atomic_inc(&capsnap->nref);
		refcount_inc(&capsnap->nref);
		spin_unlock(&ci->i_ceph_lock);

		dout("__flush_snaps %p capsnap %p tid %llu %s\n",
		@@ -2202,7 +2207,7 @@ static void __kick_flushing_caps(struct ceph_mds_client *mdsc,
		inode, capsnap, cf->tid,
		ceph_cap_string(capsnap->dirty));

		atomic_inc(&capsnap->nref);
		refcount_inc(&capsnap->nref);
		spin_unlock(&ci->i_ceph_lock);

		ret = __send_flush_snap(inode, session, capsnap, cap->mseq,
		@@ -3633,13 +3638,19 @@ void ceph_handle_caps(struct ceph_mds_session *session,
		p += inline_len;
		}

		if (le16_to_cpu(msg->hdr.version) >= 5) {
		struct ceph_osd_client *osdc = &mdsc->fsc->client->osdc;
		u32 epoch_barrier;

		ceph_decode_32_safe(&p, end, epoch_barrier, bad);
		ceph_osdc_update_epoch_barrier(osdc, epoch_barrier);
		}

		if (le16_to_cpu(msg->hdr.version) >= 8) {
		u64 flush_tid;
		u32 caller_uid, caller_gid;
		u32 osd_epoch_barrier;
		u32 pool_ns_len;
		/* version >= 5 */
		ceph_decode_32_safe(&p, end, osd_epoch_barrier, bad);

		/* version >= 6 */
		ceph_decode_64_safe(&p, end, flush_tid, bad);
		/* version >= 7 */

fs/ceph/debugfs.c

+10 −11

Original line number	Diff line number	Diff line
		@@ -22,20 +22,19 @@ static int mdsmap_show(struct seq_file s, void p)
		{
		int i;
		struct ceph_fs_client *fsc = s->private;
		struct ceph_mdsmap *mdsmap;

		if (fsc->mdsc == NULL \|\| fsc->mdsc->mdsmap == NULL)
		return 0;
		seq_printf(s, "epoch %d\n", fsc->mdsc->mdsmap->m_epoch);
		seq_printf(s, "root %d\n", fsc->mdsc->mdsmap->m_root);
		seq_printf(s, "session_timeout %d\n",
		fsc->mdsc->mdsmap->m_session_timeout);
		seq_printf(s, "session_autoclose %d\n",
		fsc->mdsc->mdsmap->m_session_autoclose);
		for (i = 0; i < fsc->mdsc->mdsmap->m_max_mds; i++) {
		struct ceph_entity_addr *addr =
		&fsc->mdsc->mdsmap->m_info[i].addr;
		int state = fsc->mdsc->mdsmap->m_info[i].state;

		mdsmap = fsc->mdsc->mdsmap;
		seq_printf(s, "epoch %d\n", mdsmap->m_epoch);
		seq_printf(s, "root %d\n", mdsmap->m_root);
		seq_printf(s, "max_mds %d\n", mdsmap->m_max_mds);
		seq_printf(s, "session_timeout %d\n", mdsmap->m_session_timeout);
		seq_printf(s, "session_autoclose %d\n", mdsmap->m_session_autoclose);
		for (i = 0; i < mdsmap->m_num_mds; i++) {
		struct ceph_entity_addr *addr = &mdsmap->m_info[i].addr;
		int state = mdsmap->m_info[i].state;
		seq_printf(s, "\tmds%d\t%s\t(%s)\n", i,
		ceph_pr_addr(&addr->in_addr),
		ceph_mds_state_name(state));

fs/ceph/dir.c

+16 −7

Original line number	Diff line number	Diff line
		@@ -294,7 +294,7 @@ static int ceph_readdir(struct file file, struct dir_context ctx)
		struct ceph_mds_client *mdsc = fsc->mdsc;
		int i;
		int err;
		u32 ftype;
		unsigned frag = -1;
		struct ceph_mds_reply_info_parsed *rinfo;

		dout("readdir %p file %p pos %llx\n", inode, file, ctx->pos);
		@@ -341,7 +341,6 @@ static int ceph_readdir(struct file file, struct dir_context ctx)
		/* do we have the correct frag content buffered? */
		if (need_send_readdir(fi, ctx->pos)) {
		struct ceph_mds_request *req;
		unsigned frag;
		int op = ceph_snap(inode) == CEPH_SNAPDIR ?
		CEPH_MDS_OP_LSSNAP : CEPH_MDS_OP_READDIR;

		@@ -352,6 +351,9 @@ static int ceph_readdir(struct file file, struct dir_context ctx)
		}

		if (is_hash_order(ctx->pos)) {
		/* fragtree isn't always accurate. choose frag
		* based on previous reply when possible. */
		if (frag == (unsigned)-1)
		frag = ceph_choose_frag(ci, fpos_hash(ctx->pos),
		NULL, NULL);
		} else {
		@@ -378,7 +380,11 @@ static int ceph_readdir(struct file file, struct dir_context ctx)
		ceph_mdsc_put_request(req);
		return -ENOMEM;
		}
		} else if (is_hash_order(ctx->pos)) {
		req->r_args.readdir.offset_hash =
		cpu_to_le32(fpos_hash(ctx->pos));
		}

		req->r_dir_release_cnt = fi->dir_release_count;
		req->r_dir_ordered_cnt = fi->dir_ordered_count;
		req->r_readdir_cache_idx = fi->readdir_cache_idx;
		@@ -476,6 +482,7 @@ static int ceph_readdir(struct file file, struct dir_context ctx)
		struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + i;
		struct ceph_vino vino;
		ino_t ino;
		u32 ftype;

		BUG_ON(rde->offset < ctx->pos);

		@@ -498,15 +505,17 @@ static int ceph_readdir(struct file file, struct dir_context ctx)
		ctx->pos++;
		}

		if (fi->next_offset > 2) {
		ceph_mdsc_put_request(fi->last_readdir);
		fi->last_readdir = NULL;

		if (fi->next_offset > 2) {
		frag = fi->frag;
		goto more;
		}

		/* more frags? */
		if (!ceph_frag_is_rightmost(fi->frag)) {
		unsigned frag = ceph_frag_next(fi->frag);
		frag = ceph_frag_next(fi->frag);
		if (is_hash_order(ctx->pos)) {
		loff_t new_pos = ceph_make_fpos(ceph_frag_value(frag),
		fi->next_offset, true);