Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client (1204c464) · Commits · e / devices / android_kernel_fairphone_FP4

drivers/block/rbd.c

+20 −6

Original line number	Diff line number	Diff line
		@@ -3762,8 +3762,8 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
		goto out_tag_set;
		}

		/* We use the default size, but let's be explicit about it. */
		blk_queue_physical_block_size(q, SECTOR_SIZE);
		queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
		/* QUEUE_FLAG_ADD_RANDOM is off by default for blk-mq */

		/* set io sizes to object size */
		segment_size = rbd_obj_bytes(&rbd_dev->header);
		@@ -5301,9 +5301,14 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping)

		if (mapping) {
		ret = rbd_dev_header_watch_sync(rbd_dev);
		if (ret)
		if (ret) {
		if (ret == -ENOENT)
		pr_info("image %s/%s does not exist\n",
		rbd_dev->spec->pool_name,
		rbd_dev->spec->image_name);
		goto out_header_name;
		}
		}

		ret = rbd_dev_header_info(rbd_dev);
		if (ret)
		@@ -5319,8 +5324,14 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping)
		ret = rbd_spec_fill_snap_id(rbd_dev);
		else
		ret = rbd_spec_fill_names(rbd_dev);
		if (ret)
		if (ret) {
		if (ret == -ENOENT)
		pr_info("snap %s/%s@%s does not exist\n",
		rbd_dev->spec->pool_name,
		rbd_dev->spec->image_name,
		rbd_dev->spec->snap_name);
		goto err_out_probe;
		}

		if (rbd_dev->header.features & RBD_FEATURE_LAYERING) {
		ret = rbd_dev_v2_parent_info(rbd_dev);
		@@ -5390,8 +5401,11 @@ static ssize_t do_rbd_add(struct bus_type *bus,

		/* pick the pool */
		rc = rbd_add_get_pool_id(rbdc, spec->pool_name);
		if (rc < 0)
		if (rc < 0) {
		if (rc == -ENOENT)
		pr_info("pool %s does not exist\n", spec->pool_name);
		goto err_out_client;
		}
		spec->pool_id = (u64)rc;

		/* The ceph file layout needs to fit pool id in 32 bits */
		@@ -5673,7 +5687,7 @@ static int __init rbd_init(void)

		/*
		* The number of active work items is limited by the number of
		* rbd devices, so leave @max_active at default.
		* rbd devices * queue depth, so leave @max_active at default.
		*/
		rbd_wq = alloc_workqueue(RBD_DRV_NAME, WQ_MEM_RECLAIM, 0);
		if (!rbd_wq) {

fs/ceph/addr.c

+25 −13

Original line number	Diff line number	Diff line
		@@ -1146,6 +1146,10 @@ static int ceph_write_begin(struct file file, struct address_space mapping,
		inode, page, (int)pos, (int)len);

		r = ceph_update_writeable_page(file, pos, len, page);
		if (r < 0)
		page_cache_release(page);
		else
		*pagep = page;
		} while (r == -EAGAIN);

		return r;
		@@ -1534,19 +1538,27 @@ int ceph_uninline_data(struct file filp, struct page locked_page)

		osd_req_op_extent_osd_data_pages(req, 1, &page, len, 0, false, false);

		{
		__le64 xattr_buf = cpu_to_le64(inline_version);
		err = osd_req_op_xattr_init(req, 0, CEPH_OSD_OP_CMPXATTR,
		"inline_version", &inline_version,
		sizeof(inline_version),
		"inline_version", &xattr_buf,
		sizeof(xattr_buf),
		CEPH_OSD_CMPXATTR_OP_GT,
		CEPH_OSD_CMPXATTR_MODE_U64);
		if (err)
		goto out_put;
		}

		{
		char xattr_buf[32];
		int xattr_len = snprintf(xattr_buf, sizeof(xattr_buf),
		"%llu", inline_version);
		err = osd_req_op_xattr_init(req, 2, CEPH_OSD_OP_SETXATTR,
		"inline_version", &inline_version,
		sizeof(inline_version), 0, 0);
		"inline_version",
		xattr_buf, xattr_len, 0, 0);
		if (err)
		goto out_put;
		}

		ceph_osdc_build_request(req, 0, NULL, CEPH_NOSNAP, &inode->i_mtime);
		err = ceph_osdc_start_request(&fsc->client->osdc, req, false);

fs/ceph/caps.c

+41 −10

Original line number	Diff line number	Diff line
		@@ -896,6 +896,18 @@ int ceph_is_any_caps(struct inode *inode)
		return ret;
		}

		static void drop_inode_snap_realm(struct ceph_inode_info *ci)
		{
		struct ceph_snap_realm *realm = ci->i_snap_realm;
		spin_lock(&realm->inodes_with_caps_lock);
		list_del_init(&ci->i_snap_realm_item);
		ci->i_snap_realm_counter++;
		ci->i_snap_realm = NULL;
		spin_unlock(&realm->inodes_with_caps_lock);
		ceph_put_snap_realm(ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc,
		realm);
		}

		/*
		* Remove a cap. Take steps to deal with a racing iterate_session_caps.
		*
		@@ -946,15 +958,13 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
		if (removed)
		ceph_put_cap(mdsc, cap);

		if (!__ceph_is_any_caps(ci) && ci->i_snap_realm) {
		struct ceph_snap_realm *realm = ci->i_snap_realm;
		spin_lock(&realm->inodes_with_caps_lock);
		list_del_init(&ci->i_snap_realm_item);
		ci->i_snap_realm_counter++;
		ci->i_snap_realm = NULL;
		spin_unlock(&realm->inodes_with_caps_lock);
		ceph_put_snap_realm(mdsc, realm);
		}
		/* when reconnect denied, we remove session caps forcibly,
		* i_wr_ref can be non-zero. If there are ongoing write,
		* keep i_snap_realm.
		*/
		if (!__ceph_is_any_caps(ci) && ci->i_wr_ref == 0 && ci->i_snap_realm)
		drop_inode_snap_realm(ci);

		if (!__ceph_is_any_real_caps(ci))
		__cap_delay_cancel(mdsc, ci);
		}
		@@ -1394,6 +1404,13 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
		int was = ci->i_dirty_caps;
		int dirty = 0;

		if (!ci->i_auth_cap) {
		pr_warn("__mark_dirty_caps %p %llx mask %s, "
		"but no auth cap (session was closed?)\n",
		inode, ceph_ino(inode), ceph_cap_string(mask));
		return 0;
		}

		dout("__mark_dirty_caps %p %s dirty %s -> %s\n", &ci->vfs_inode,
		ceph_cap_string(mask), ceph_cap_string(was),
		ceph_cap_string(was \| mask));
		@@ -1404,7 +1421,6 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
		ci->i_snap_realm->cached_context);
		dout(" inode %p now dirty snapc %p auth cap %p\n",
		&ci->vfs_inode, ci->i_head_snapc, ci->i_auth_cap);
		WARN_ON(!ci->i_auth_cap);
		BUG_ON(!list_empty(&ci->i_dirty_item));
		spin_lock(&mdsc->cap_dirty_lock);
		list_add(&ci->i_dirty_item, &mdsc->cap_dirty);
		@@ -1545,7 +1561,19 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
		if (!mdsc->stopping && inode->i_nlink > 0) {
		if (want) {
		retain \|= CEPH_CAP_ANY; /* be greedy */
		} else if (S_ISDIR(inode->i_mode) &&
		(issued & CEPH_CAP_FILE_SHARED) &&
		__ceph_dir_is_complete(ci)) {
		/*
		* If a directory is complete, we want to keep
		* the exclusive cap. So that MDS does not end up
		* revoking the shared cap on every create/unlink
		* operation.
		*/
		want = CEPH_CAP_ANY_SHARED \| CEPH_CAP_FILE_EXCL;
		retain \|= want;
		} else {

		retain \|= CEPH_CAP_ANY_SHARED;
		/*
		* keep RD only if we didn't have the file open RW,
		@@ -2309,6 +2337,9 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
		wake = 1;
		}
		}
		/* see comment in __ceph_remove_cap() */
		if (!__ceph_is_any_caps(ci) && ci->i_snap_realm)
		drop_inode_snap_realm(ci);
		}
		spin_unlock(&ci->i_ceph_lock);

fs/ceph/dir.c

+34 −14

Original line number	Diff line number	Diff line
		@@ -281,6 +281,7 @@ static int ceph_readdir(struct file file, struct dir_context ctx)
		/* can we use the dcache? */
		spin_lock(&ci->i_ceph_lock);
		if ((ctx->pos == 2 \|\| fi->dentry) &&
		ceph_test_mount_opt(fsc, DCACHE) &&
		!ceph_test_mount_opt(fsc, NOASYNCREADDIR) &&
		ceph_snap(inode) != CEPH_SNAPDIR &&
		__ceph_dir_is_complete_ordered(ci) &&
		@@ -336,16 +337,23 @@ static int ceph_readdir(struct file file, struct dir_context ctx)
		ceph_mdsc_put_request(req);
		return err;
		}
		req->r_inode = inode;
		ihold(inode);
		req->r_dentry = dget(file->f_path.dentry);
		/* hints to request -> mds selection code */
		req->r_direct_mode = USE_AUTH_MDS;
		req->r_direct_hash = ceph_frag_value(frag);
		req->r_direct_is_hash = true;
		if (fi->last_name) {
		req->r_path2 = kstrdup(fi->last_name, GFP_NOFS);
		if (!req->r_path2) {
		ceph_mdsc_put_request(req);
		return -ENOMEM;
		}
		}
		req->r_readdir_offset = fi->next_offset;
		req->r_args.readdir.frag = cpu_to_le32(frag);

		req->r_inode = inode;
		ihold(inode);
		req->r_dentry = dget(file->f_path.dentry);
		err = ceph_mdsc_do_request(mdsc, NULL, req);
		if (err < 0) {
		ceph_mdsc_put_request(req);
		@@ -629,6 +637,7 @@ static struct dentry ceph_lookup(struct inode dir, struct dentry *dentry,
		fsc->mount_options->snapdir_name,
		dentry->d_name.len) &&
		!is_root_ceph_dentry(dir, dentry) &&
		ceph_test_mount_opt(fsc, DCACHE) &&
		__ceph_dir_is_complete(ci) &&
		(__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) {
		spin_unlock(&ci->i_ceph_lock);
		@@ -755,10 +764,15 @@ static int ceph_symlink(struct inode dir, struct dentry dentry,
		err = PTR_ERR(req);
		goto out;
		}
		req->r_dentry = dget(dentry);
		req->r_num_caps = 2;
		req->r_path2 = kstrdup(dest, GFP_NOFS);
		if (!req->r_path2) {
		err = -ENOMEM;
		ceph_mdsc_put_request(req);
		goto out;
		}
		req->r_locked_dir = dir;
		req->r_dentry = dget(dentry);
		req->r_num_caps = 2;
		req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
		req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
		err = ceph_mdsc_do_request(mdsc, dir, req);
		@@ -933,16 +947,20 @@ static int ceph_rename(struct inode old_dir, struct dentry old_dentry,
		struct ceph_fs_client *fsc = ceph_sb_to_client(old_dir->i_sb);
		struct ceph_mds_client *mdsc = fsc->mdsc;
		struct ceph_mds_request *req;
		int op = CEPH_MDS_OP_RENAME;
		int err;

		if (ceph_snap(old_dir) != ceph_snap(new_dir))
		return -EXDEV;
		if (ceph_snap(old_dir) != CEPH_NOSNAP \|\|
		ceph_snap(new_dir) != CEPH_NOSNAP)
		if (ceph_snap(old_dir) != CEPH_NOSNAP) {
		if (old_dir == new_dir && ceph_snap(old_dir) == CEPH_SNAPDIR)
		op = CEPH_MDS_OP_RENAMESNAP;
		else
		return -EROFS;
		}
		dout("rename dir %p dentry %p to dir %p dentry %p\n",
		old_dir, old_dentry, new_dir, new_dentry);
		req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_RENAME, USE_AUTH_MDS);
		req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
		if (IS_ERR(req))
		return PTR_ERR(req);
		ihold(old_dir);
		@@ -1240,11 +1258,12 @@ static int ceph_dir_fsync(struct file *file, loff_t start, loff_t end,
		dout("dir_fsync %p wait on tid %llu (until %llu)\n",
		inode, req->r_tid, last_tid);
		if (req->r_timeout) {
		ret = wait_for_completion_timeout(
		&req->r_safe_completion, req->r_timeout);
		if (ret > 0)
		unsigned long time_left = wait_for_completion_timeout(
		&req->r_safe_completion,
		req->r_timeout);
		if (time_left > 0)
		ret = 0;
		else if (ret == 0)
		else
		ret = -EIO; /* timed out */
		} else {
		wait_for_completion(&req->r_safe_completion);
		@@ -1372,6 +1391,7 @@ const struct inode_operations ceph_snapdir_iops = {
		.getattr = ceph_getattr,
		.mkdir = ceph_mkdir,
		.rmdir = ceph_unlink,
		.rename = ceph_rename,
		};

		const struct dentry_operations ceph_dentry_ops = {

fs/ceph/mds_client.c

+45 −16

Original line number	Diff line number	Diff line
		@@ -1021,6 +1021,33 @@ static void cleanup_cap_releases(struct ceph_mds_session *session)
		spin_unlock(&session->s_cap_lock);
		}

		static void cleanup_session_requests(struct ceph_mds_client *mdsc,
		struct ceph_mds_session *session)
		{
		struct ceph_mds_request *req;
		struct rb_node *p;

		dout("cleanup_session_requests mds%d\n", session->s_mds);
		mutex_lock(&mdsc->mutex);
		while (!list_empty(&session->s_unsafe)) {
		req = list_first_entry(&session->s_unsafe,
		struct ceph_mds_request, r_unsafe_item);
		list_del_init(&req->r_unsafe_item);
		pr_info(" dropping unsafe request %llu\n", req->r_tid);
		__unregister_request(mdsc, req);
		}
		/* zero r_attempts, so kick_requests() will re-send requests */
		p = rb_first(&mdsc->request_tree);
		while (p) {
		req = rb_entry(p, struct ceph_mds_request, r_node);
		p = rb_next(p);
		if (req->r_session &&
		req->r_session->s_mds == session->s_mds)
		req->r_attempts = 0;
		}
		mutex_unlock(&mdsc->mutex);
		}

		/*
		* Helper to safely iterate over all caps associated with a session, with
		* special care taken to handle a racing __ceph_remove_cap().
		@@ -1098,7 +1125,7 @@ static int remove_session_caps_cb(struct inode inode, struct ceph_cap cap,
		cap, ci, &ci->vfs_inode);
		spin_lock(&ci->i_ceph_lock);
		__ceph_remove_cap(cap, false);
		if (!__ceph_is_any_real_caps(ci)) {
		if (!ci->i_auth_cap) {
		struct ceph_mds_client *mdsc =
		ceph_sb_to_client(inode->i_sb)->mdsc;

		@@ -1120,13 +1147,6 @@ static int remove_session_caps_cb(struct inode inode, struct ceph_cap cap,
		mdsc->num_cap_flushing--;
		drop = 1;
		}
		if (drop && ci->i_wrbuffer_ref) {
		pr_info(" dropping dirty data for %p %lld\n",
		inode, ceph_ino(inode));
		ci->i_wrbuffer_ref = 0;
		ci->i_wrbuffer_ref_head = 0;
		drop++;
		}
		spin_unlock(&mdsc->cap_dirty_lock);
		}
		spin_unlock(&ci->i_ceph_lock);
		@@ -1853,7 +1873,7 @@ static int set_request_path_attr(struct inode rinode, struct dentry rdentry,
		*/
		static struct ceph_msg create_request_message(struct ceph_mds_client mdsc,
		struct ceph_mds_request *req,
		int mds)
		int mds, bool drop_cap_releases)
		{
		struct ceph_msg *msg;
		struct ceph_mds_request_head *head;
		@@ -1937,6 +1957,12 @@ static struct ceph_msg create_request_message(struct ceph_mds_client mdsc,
		releases += ceph_encode_inode_release(&p,
		req->r_old_dentry->d_inode,
		mds, req->r_old_inode_drop, req->r_old_inode_unless, 0);

		if (drop_cap_releases) {
		releases = 0;
		p = msg->front.iov_base + req->r_request_release_offset;
		}

		head->num_releases = cpu_to_le16(releases);

		/* time stamp */
		@@ -1989,7 +2015,7 @@ static void complete_request(struct ceph_mds_client *mdsc,
		*/
		static int __prepare_send_request(struct ceph_mds_client *mdsc,
		struct ceph_mds_request *req,
		int mds)
		int mds, bool drop_cap_releases)
		{
		struct ceph_mds_request_head *rhead;
		struct ceph_msg *msg;
		@@ -2048,7 +2074,7 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
		ceph_msg_put(req->r_request);
		req->r_request = NULL;
		}
		msg = create_request_message(mdsc, req, mds);
		msg = create_request_message(mdsc, req, mds, drop_cap_releases);
		if (IS_ERR(msg)) {
		req->r_err = PTR_ERR(msg);
		complete_request(mdsc, req);
		@@ -2132,7 +2158,7 @@ static int __do_request(struct ceph_mds_client *mdsc,
		if (req->r_request_started == 0) /* note request start time */
		req->r_request_started = jiffies;

		err = __prepare_send_request(mdsc, req, mds);
		err = __prepare_send_request(mdsc, req, mds, false);
		if (!err) {
		ceph_msg_get(req->r_request);
		ceph_con_send(&session->s_con, req->r_request);
		@@ -2590,6 +2616,7 @@ static void handle_session(struct ceph_mds_session *session,
		case CEPH_SESSION_CLOSE:
		if (session->s_state == CEPH_MDS_SESSION_RECONNECTING)
		pr_info("mds%d reconnect denied\n", session->s_mds);
		cleanup_session_requests(mdsc, session);
		remove_session_caps(session);
		wake = 2; /* for good measure */
		wake_up_all(&mdsc->session_close_wq);
		@@ -2658,7 +2685,7 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc,

		mutex_lock(&mdsc->mutex);
		list_for_each_entry_safe(req, nreq, &session->s_unsafe, r_unsafe_item) {
		err = __prepare_send_request(mdsc, req, session->s_mds);
		err = __prepare_send_request(mdsc, req, session->s_mds, true);
		if (!err) {
		ceph_msg_get(req->r_request);
		ceph_con_send(&session->s_con, req->r_request);
		@@ -2679,7 +2706,8 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc,
		continue; /* only old requests */
		if (req->r_session &&
		req->r_session->s_mds == session->s_mds) {
		err = __prepare_send_request(mdsc, req, session->s_mds);
		err = __prepare_send_request(mdsc, req,
		session->s_mds, true);
		if (!err) {
		ceph_msg_get(req->r_request);
		ceph_con_send(&session->s_con, req->r_request);
		@@ -2864,6 +2892,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
		spin_unlock(&session->s_cap_lock);

		/* trim unused caps to reduce MDS's cache rejoin time */
		if (mdsc->fsc->sb->s_root)
		shrink_dcache_parent(mdsc->fsc->sb->s_root);

		ceph_con_close(&session->s_con);
		@@ -3133,7 +3162,7 @@ static void handle_lease(struct ceph_mds_client *mdsc,
		di->lease_renew_from &&
		di->lease_renew_after == 0) {
		unsigned long duration =
		le32_to_cpu(h->duration_ms) * HZ / 1000;
		msecs_to_jiffies(le32_to_cpu(h->duration_ms));

		di->lease_seq = seq;
		dentry->d_time = di->lease_renew_from + duration;