Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 5aea3dcd authored by Ilya Dryomov's avatar Ilya Dryomov
Browse files

libceph: a major OSD client update



This is a major sync up, up to ~Jewel.  The highlights are:

- per-session request trees (vs a global per-client tree)
- per-session locking (vs a global per-client rwlock)
- homeless OSD session
- no ad-hoc global per-client lists
- support for pool quotas
- foundation for watch/notify v2 support
- foundation for map check (pool deletion detection) support

The switchover is incomplete: lingering requests can be setup and
teared down but aren't ever reestablished.  This functionality is
restored with the introduction of the new lingering infrastructure
(ceph_osd_linger_request, linger_work, etc) in a later commit.

Signed-off-by: default avatarIlya Dryomov <idryomov@gmail.com>
parent 9dd2845c
Loading
Loading
Loading
Loading
+4 −4
Original line number Diff line number Diff line
@@ -193,12 +193,12 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
	if (copy_from_user(&dl, arg, sizeof(dl)))
		return -EFAULT;

	down_read(&osdc->map_sem);
	down_read(&osdc->lock);
	r = ceph_calc_file_object_mapping(&ci->i_layout, dl.file_offset, len,
					  &dl.object_no, &dl.object_offset,
					  &olen);
	if (r < 0) {
		up_read(&osdc->map_sem);
		up_read(&osdc->lock);
		return -EIO;
	}
	dl.file_offset -= dl.object_offset;
@@ -217,7 +217,7 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)

	r = ceph_object_locator_to_pg(osdc->osdmap, &oid, &oloc, &pgid);
	if (r < 0) {
		up_read(&osdc->map_sem);
		up_read(&osdc->lock);
		return r;
	}

@@ -230,7 +230,7 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
	} else {
		memset(&dl.osd_addr, 0, sizeof(dl.osd_addr));
	}
	up_read(&osdc->map_sem);
	up_read(&osdc->lock);

	/* send result back to user */
	if (copy_to_user(arg, &dl, sizeof(dl)))
+4 −4
Original line number Diff line number Diff line
@@ -75,7 +75,7 @@ static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val,
	char buf[128];

	dout("ceph_vxattrcb_layout %p\n", &ci->vfs_inode);
	down_read(&osdc->map_sem);
	down_read(&osdc->lock);
	pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool);
	if (pool_name) {
		size_t len = strlen(pool_name);
@@ -107,7 +107,7 @@ static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val,
				ret = -ERANGE;
		}
	}
	up_read(&osdc->map_sem);
	up_read(&osdc->lock);
	return ret;
}

@@ -141,13 +141,13 @@ static size_t ceph_vxattrcb_layout_pool(struct ceph_inode_info *ci,
	s64 pool = ceph_file_layout_pg_pool(ci->i_layout);
	const char *pool_name;

	down_read(&osdc->map_sem);
	down_read(&osdc->lock);
	pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool);
	if (pool_name)
		ret = snprintf(val, size, "%s", pool_name);
	else
		ret = snprintf(val, size, "%lld", (unsigned long long)pool);
	up_read(&osdc->map_sem);
	up_read(&osdc->lock);
	return ret;
}

+7 −11
Original line number Diff line number Diff line
@@ -33,12 +33,13 @@ struct ceph_osd {
	int o_incarnation;
	struct rb_node o_node;
	struct ceph_connection o_con;
	struct list_head o_requests;
	struct rb_root o_requests;
	struct list_head o_linger_requests;
	struct list_head o_osd_lru;
	struct ceph_auth_handshake o_auth;
	unsigned long lru_ttl;
	struct list_head o_keepalive_item;
	struct mutex lock;
};

#define CEPH_OSD_SLAB_OPS	2
@@ -144,8 +145,6 @@ struct ceph_osd_request_target {
struct ceph_osd_request {
	u64             r_tid;              /* unique for this client */
	struct rb_node  r_node;
	struct list_head r_req_lru_item;
	struct list_head r_osd_item;
	struct list_head r_linger_item;
	struct list_head r_linger_osd_item;
	struct ceph_osd *r_osd;
@@ -219,19 +218,16 @@ struct ceph_osd_client {
	struct ceph_client     *client;

	struct ceph_osdmap     *osdmap;       /* current map */
	struct rw_semaphore    map_sem;
	struct rw_semaphore    lock;

	struct mutex           request_mutex;
	struct rb_root         osds;          /* osds */
	struct list_head       osd_lru;       /* idle osds */
	spinlock_t             osd_lru_lock;
	u64                    last_tid;      /* tid of last request */
	struct rb_root         requests;      /* pending requests */
	struct list_head       req_lru;	      /* in-flight lru */
	struct list_head       req_unsent;    /* unsent/need-resend queue */
	struct list_head       req_notarget;  /* map to no osd */
	struct list_head       req_linger;    /* lingering requests */
	int                    num_requests;
	struct ceph_osd        homeless_osd;
	atomic64_t             last_tid;      /* tid of last request */
	atomic_t               num_requests;
	atomic_t               num_homeless;
	struct delayed_work    timeout_work;
	struct delayed_work    osds_timeout_work;
#ifdef CONFIG_DEBUG_FS
+26 −8
Original line number Diff line number Diff line
@@ -182,21 +182,39 @@ static void dump_request(struct seq_file *s, struct ceph_osd_request *req)
	seq_putc(s, '\n');
}

static void dump_requests(struct seq_file *s, struct ceph_osd *osd)
{
	struct rb_node *n;

	mutex_lock(&osd->lock);
	for (n = rb_first(&osd->o_requests); n; n = rb_next(n)) {
		struct ceph_osd_request *req =
		    rb_entry(n, struct ceph_osd_request, r_node);

		dump_request(s, req);
	}

	mutex_unlock(&osd->lock);
}

static int osdc_show(struct seq_file *s, void *pp)
{
	struct ceph_client *client = s->private;
	struct ceph_osd_client *osdc = &client->osdc;
	struct rb_node *p;

	mutex_lock(&osdc->request_mutex);
	for (p = rb_first(&osdc->requests); p; p = rb_next(p)) {
		struct ceph_osd_request *req;
	struct rb_node *n;

		req = rb_entry(p, struct ceph_osd_request, r_node);
	down_read(&osdc->lock);
	seq_printf(s, "REQUESTS %d homeless %d\n",
		   atomic_read(&osdc->num_requests),
		   atomic_read(&osdc->num_homeless));
	for (n = rb_first(&osdc->osds); n; n = rb_next(n)) {
		struct ceph_osd *osd = rb_entry(n, struct ceph_osd, o_node);

		dump_request(s, req);
		dump_requests(s, osd);
	}
	mutex_unlock(&osdc->request_mutex);
	dump_requests(s, &osdc->homeless_osd);

	up_read(&osdc->lock);
	return 0;
}

+561 −603

File changed.

Preview size limit exceeded, changes collapsed.