Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit af56e0aa authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull ceph updates from Sage Weil:
 "There are some updates and cleanups to the CRUSH placement code, a bug
  fix with incremental maps, several cleanups and fixes from Josh Durgin
  in the RBD block device code, a series of cleanups and bug fixes from
  Alex Elder in the messenger code, and some miscellaneous bounds
  checking and gfp cleanups/fixes."

Fix up trivial conflicts in net/ceph/{messenger.c,osdmap.c} due to the
networking people preferring "unsigned int" over just "unsigned".

* git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client: (45 commits)
  libceph: fix pg_temp updates
  libceph: avoid unregistering osd request when not registered
  ceph: add auth buf in prepare_write_connect()
  ceph: rename prepare_connect_authorizer()
  ceph: return pointer from prepare_connect_authorizer()
  ceph: use info returned by get_authorizer
  ceph: have get_authorizer methods return pointers
  ceph: ensure auth ops are defined before use
  ceph: messenger: reduce args to create_authorizer
  ceph: define ceph_auth_handshake type
  ceph: messenger: check return from get_authorizer
  ceph: messenger: rework prepare_connect_authorizer()
  ceph: messenger: check prepare_write_connect() result
  ceph: don't set WRITE_PENDING too early
  ceph: drop msgr argument from prepare_write_connect()
  ceph: messenger: send banner in process_connect()
  ceph: messenger: reset connection kvec caller
  libceph: don't reset kvec in prepare_write_banner()
  ceph: ignore preferred_osd field
  ceph: fully initialize new layout
  ...
parents 65a50c95 6bd9adbd
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -65,11 +65,11 @@ snap_*
Entries under /sys/bus/rbd/devices/<dev-id>/snap_<snap-name>
-------------------------------------------------------------

id
snap_id

	The rados internal snapshot id assigned for this snapshot

size
snap_size

	The size of the image when this snapshot was taken.

+29 −43
Original line number Diff line number Diff line
@@ -141,7 +141,7 @@ struct rbd_request {
struct rbd_snap {
	struct	device		dev;
	const char		*name;
	size_t			size;
	u64			size;
	struct list_head	node;
	u64			id;
};
@@ -175,8 +175,7 @@ struct rbd_device {
	/* protects updating the header */
	struct rw_semaphore     header_rwsem;
	char                    snap_name[RBD_MAX_SNAP_NAME_LEN];
	u32 cur_snap;	/* index+1 of current snapshot within snap context
			   0 - for the head */
	u64                     snap_id;	/* current snapshot id */
	int read_only;

	struct list_head	node;
@@ -241,7 +240,7 @@ static void rbd_put_dev(struct rbd_device *rbd_dev)
	put_device(&rbd_dev->dev);
}

static int __rbd_update_snaps(struct rbd_device *rbd_dev);
static int __rbd_refresh_header(struct rbd_device *rbd_dev);

static int rbd_open(struct block_device *bdev, fmode_t mode)
{
@@ -450,7 +449,9 @@ static void rbd_client_release(struct kref *kref)
	struct rbd_client *rbdc = container_of(kref, struct rbd_client, kref);

	dout("rbd_release_client %p\n", rbdc);
	spin_lock(&rbd_client_list_lock);
	list_del(&rbdc->node);
	spin_unlock(&rbd_client_list_lock);

	ceph_destroy_client(rbdc->client);
	kfree(rbdc->rbd_opts);
@@ -463,9 +464,7 @@ static void rbd_client_release(struct kref *kref)
 */
static void rbd_put_client(struct rbd_device *rbd_dev)
{
	spin_lock(&rbd_client_list_lock);
	kref_put(&rbd_dev->rbd_client->kref, rbd_client_release);
	spin_unlock(&rbd_client_list_lock);
	rbd_dev->rbd_client = NULL;
}

@@ -487,16 +486,18 @@ static void rbd_coll_release(struct kref *kref)
 */
static int rbd_header_from_disk(struct rbd_image_header *header,
				 struct rbd_image_header_ondisk *ondisk,
				 int allocated_snaps,
				 u32 allocated_snaps,
				 gfp_t gfp_flags)
{
	int i;
	u32 snap_count;
	u32 i, snap_count;

	if (memcmp(ondisk, RBD_HEADER_TEXT, sizeof(RBD_HEADER_TEXT)))
		return -ENXIO;

	snap_count = le32_to_cpu(ondisk->snap_count);
	if (snap_count > (UINT_MAX - sizeof(struct ceph_snap_context))
			 / sizeof (*ondisk))
		return -EINVAL;
	header->snapc = kmalloc(sizeof(struct ceph_snap_context) +
				snap_count * sizeof (*ondisk),
				gfp_flags);
@@ -506,11 +507,11 @@ static int rbd_header_from_disk(struct rbd_image_header *header,
	header->snap_names_len = le64_to_cpu(ondisk->snap_names_len);
	if (snap_count) {
		header->snap_names = kmalloc(header->snap_names_len,
					     GFP_KERNEL);
					     gfp_flags);
		if (!header->snap_names)
			goto err_snapc;
		header->snap_sizes = kmalloc(snap_count * sizeof(u64),
					     GFP_KERNEL);
					     gfp_flags);
		if (!header->snap_sizes)
			goto err_names;
	} else {
@@ -552,21 +553,6 @@ static int rbd_header_from_disk(struct rbd_image_header *header,
	return -ENOMEM;
}

static int snap_index(struct rbd_image_header *header, int snap_num)
{
	return header->total_snaps - snap_num;
}

static u64 cur_snap_id(struct rbd_device *rbd_dev)
{
	struct rbd_image_header *header = &rbd_dev->header;

	if (!rbd_dev->cur_snap)
		return 0;

	return header->snapc->snaps[snap_index(header, rbd_dev->cur_snap)];
}

static int snap_by_name(struct rbd_image_header *header, const char *snap_name,
			u64 *seq, u64 *size)
{
@@ -605,7 +591,7 @@ static int rbd_header_set_snap(struct rbd_device *dev, u64 *size)
			snapc->seq = header->snap_seq;
		else
			snapc->seq = 0;
		dev->cur_snap = 0;
		dev->snap_id = CEPH_NOSNAP;
		dev->read_only = 0;
		if (size)
			*size = header->image_size;
@@ -613,8 +599,7 @@ static int rbd_header_set_snap(struct rbd_device *dev, u64 *size)
		ret = snap_by_name(header, dev->snap_name, &snapc->seq, size);
		if (ret < 0)
			goto done;

		dev->cur_snap = header->total_snaps - ret;
		dev->snap_id = snapc->seq;
		dev->read_only = 1;
	}

@@ -935,7 +920,6 @@ static int rbd_do_request(struct request *rq,
	layout->fl_stripe_unit = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER);
	layout->fl_stripe_count = cpu_to_le32(1);
	layout->fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER);
	layout->fl_pg_preferred = cpu_to_le32(-1);
	layout->fl_pg_pool = cpu_to_le32(dev->poolid);
	ceph_calc_raw_layout(osdc, layout, snapid, ofs, &len, &bno,
				req, ops);
@@ -1168,7 +1152,7 @@ static int rbd_req_read(struct request *rq,
			 int coll_index)
{
	return rbd_do_op(rq, rbd_dev, NULL,
			 (snapid ? snapid : CEPH_NOSNAP),
			 snapid,
			 CEPH_OSD_OP_READ,
			 CEPH_OSD_FLAG_READ,
			 2,
@@ -1187,7 +1171,7 @@ static int rbd_req_sync_read(struct rbd_device *dev,
			  u64 *ver)
{
	return rbd_req_sync_op(dev, NULL,
			       (snapid ? snapid : CEPH_NOSNAP),
			       snapid,
			       CEPH_OSD_OP_READ,
			       CEPH_OSD_FLAG_READ,
			       NULL,
@@ -1238,7 +1222,7 @@ static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data)
	dout("rbd_watch_cb %s notify_id=%lld opcode=%d\n", dev->obj_md_name,
		notify_id, (int)opcode);
	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
	rc = __rbd_update_snaps(dev);
	rc = __rbd_refresh_header(dev);
	mutex_unlock(&ctl_mutex);
	if (rc)
		pr_warning(RBD_DRV_NAME "%d got notification but failed to "
@@ -1521,7 +1505,7 @@ static void rbd_rq_fn(struct request_queue *q)
					      coll, cur_seg);
			else
				rbd_req_read(rq, rbd_dev,
					     cur_snap_id(rbd_dev),
					     rbd_dev->snap_id,
					     ofs,
					     op_size, bio,
					     coll, cur_seg);
@@ -1592,7 +1576,7 @@ static int rbd_read_header(struct rbd_device *rbd_dev,
{
	ssize_t rc;
	struct rbd_image_header_ondisk *dh;
	int snap_count = 0;
	u32 snap_count = 0;
	u64 ver;
	size_t len;

@@ -1656,7 +1640,7 @@ static int rbd_header_add_snap(struct rbd_device *dev,
	struct ceph_mon_client *monc;

	/* we should create a snapshot only if we're pointing at the head */
	if (dev->cur_snap)
	if (dev->snap_id != CEPH_NOSNAP)
		return -EINVAL;

	monc = &dev->rbd_client->client->monc;
@@ -1683,7 +1667,9 @@ static int rbd_header_add_snap(struct rbd_device *dev,
	if (ret < 0)
		return ret;

	down_write(&dev->header_rwsem);
	dev->header.snapc->seq = new_snapid;
	up_write(&dev->header_rwsem);

	return 0;
bad:
@@ -1703,7 +1689,7 @@ static void __rbd_remove_all_snaps(struct rbd_device *rbd_dev)
/*
 * only read the first part of the ondisk header, without the snaps info
 */
static int __rbd_update_snaps(struct rbd_device *rbd_dev)
static int __rbd_refresh_header(struct rbd_device *rbd_dev)
{
	int ret;
	struct rbd_image_header h;
@@ -1890,7 +1876,7 @@ static ssize_t rbd_image_refresh(struct device *dev,

	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);

	rc = __rbd_update_snaps(rbd_dev);
	rc = __rbd_refresh_header(rbd_dev);
	if (rc < 0)
		ret = rc;

@@ -1949,7 +1935,7 @@ static ssize_t rbd_snap_size_show(struct device *dev,
{
	struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev);

	return sprintf(buf, "%zd\n", snap->size);
	return sprintf(buf, "%llu\n", (unsigned long long)snap->size);
}

static ssize_t rbd_snap_id_show(struct device *dev,
@@ -2173,7 +2159,7 @@ static int rbd_init_watch_dev(struct rbd_device *rbd_dev)
					 rbd_dev->header.obj_version);
		if (ret == -ERANGE) {
			mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
			rc = __rbd_update_snaps(rbd_dev);
			rc = __rbd_refresh_header(rbd_dev);
			mutex_unlock(&ctl_mutex);
			if (rc < 0)
				return rc;
@@ -2558,7 +2544,7 @@ static ssize_t rbd_snap_add(struct device *dev,
	if (ret < 0)
		goto err_unlock;

	ret = __rbd_update_snaps(rbd_dev);
	ret = __rbd_refresh_header(rbd_dev);
	if (ret < 0)
		goto err_unlock;

+0 −1
Original line number Diff line number Diff line
@@ -54,7 +54,6 @@ prepare_open_request(struct super_block *sb, int flags, int create_mode)
	req->r_fmode = ceph_flags_to_mode(flags);
	req->r_args.open.flags = cpu_to_le32(flags);
	req->r_args.open.mode = cpu_to_le32(create_mode);
	req->r_args.open.preferred = cpu_to_le32(-1);
out:
	return req;
}
+47 −55
Original line number Diff line number Diff line
@@ -26,8 +26,7 @@ static long ceph_ioctl_get_layout(struct file *file, void __user *arg)
		l.stripe_count = ceph_file_layout_stripe_count(ci->i_layout);
		l.object_size = ceph_file_layout_object_size(ci->i_layout);
		l.data_pool = le32_to_cpu(ci->i_layout.fl_pg_pool);
		l.preferred_osd =
			(s32)le32_to_cpu(ci->i_layout.fl_pg_preferred);
		l.preferred_osd = (s32)-1;
		if (copy_to_user(arg, &l, sizeof(l)))
			return -EFAULT;
	}
@@ -35,6 +34,32 @@ static long ceph_ioctl_get_layout(struct file *file, void __user *arg)
	return err;
}

static long __validate_layout(struct ceph_mds_client *mdsc,
			      struct ceph_ioctl_layout *l)
{
	int i, err;

	/* validate striping parameters */
	if ((l->object_size & ~PAGE_MASK) ||
	    (l->stripe_unit & ~PAGE_MASK) ||
	    ((unsigned)l->object_size % (unsigned)l->stripe_unit))
		return -EINVAL;

	/* make sure it's a valid data pool */
	mutex_lock(&mdsc->mutex);
	err = -EINVAL;
	for (i = 0; i < mdsc->mdsmap->m_num_data_pg_pools; i++)
		if (mdsc->mdsmap->m_data_pg_pools[i] == l->data_pool) {
			err = 0;
			break;
		}
	mutex_unlock(&mdsc->mutex);
	if (err)
		return err;

	return 0;
}

static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
{
	struct inode *inode = file->f_dentry->d_inode;
@@ -44,52 +69,40 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
	struct ceph_ioctl_layout l;
	struct ceph_inode_info *ci = ceph_inode(file->f_dentry->d_inode);
	struct ceph_ioctl_layout nl;
	int err, i;
	int err;

	if (copy_from_user(&l, arg, sizeof(l)))
		return -EFAULT;

	/* validate changed params against current layout */
	err = ceph_do_getattr(file->f_dentry->d_inode, CEPH_STAT_CAP_LAYOUT);
	if (!err) {
		nl.stripe_unit = ceph_file_layout_su(ci->i_layout);
		nl.stripe_count = ceph_file_layout_stripe_count(ci->i_layout);
		nl.object_size = ceph_file_layout_object_size(ci->i_layout);
		nl.data_pool = le32_to_cpu(ci->i_layout.fl_pg_pool);
		nl.preferred_osd =
				(s32)le32_to_cpu(ci->i_layout.fl_pg_preferred);
	} else
	if (err)
		return err;

	memset(&nl, 0, sizeof(nl));
	if (l.stripe_count)
		nl.stripe_count = l.stripe_count;
	else
		nl.stripe_count = ceph_file_layout_stripe_count(ci->i_layout);
	if (l.stripe_unit)
		nl.stripe_unit = l.stripe_unit;
	else
		nl.stripe_unit = ceph_file_layout_su(ci->i_layout);
	if (l.object_size)
		nl.object_size = l.object_size;
	else
		nl.object_size = ceph_file_layout_object_size(ci->i_layout);
	if (l.data_pool)
		nl.data_pool = l.data_pool;
	if (l.preferred_osd)
		nl.preferred_osd = l.preferred_osd;
	else
		nl.data_pool = ceph_file_layout_pg_pool(ci->i_layout);

	if ((nl.object_size & ~PAGE_MASK) ||
	    (nl.stripe_unit & ~PAGE_MASK) ||
	    ((unsigned)nl.object_size % (unsigned)nl.stripe_unit))
		return -EINVAL;
	/* this is obsolete, and always -1 */
	nl.preferred_osd = le64_to_cpu(-1);

	/* make sure it's a valid data pool */
	if (l.data_pool > 0) {
		mutex_lock(&mdsc->mutex);
		err = -EINVAL;
		for (i = 0; i < mdsc->mdsmap->m_num_data_pg_pools; i++)
			if (mdsc->mdsmap->m_data_pg_pools[i] == l.data_pool) {
				err = 0;
				break;
			}
		mutex_unlock(&mdsc->mutex);
	err = __validate_layout(mdsc, &nl);
	if (err)
		return err;
	}

	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETLAYOUT,
				       USE_AUTH_MDS);
@@ -106,8 +119,6 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
	req->r_args.setlayout.layout.fl_object_size =
		cpu_to_le32(l.object_size);
	req->r_args.setlayout.layout.fl_pg_pool = cpu_to_le32(l.data_pool);
	req->r_args.setlayout.layout.fl_pg_preferred =
		cpu_to_le32(l.preferred_osd);

	parent_inode = ceph_get_dentry_parent_inode(file->f_dentry);
	err = ceph_mdsc_do_request(mdsc, parent_inode, req);
@@ -127,33 +138,16 @@ static long ceph_ioctl_set_layout_policy (struct file *file, void __user *arg)
	struct inode *inode = file->f_dentry->d_inode;
	struct ceph_mds_request *req;
	struct ceph_ioctl_layout l;
	int err, i;
	int err;
	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;

	/* copy and validate */
	if (copy_from_user(&l, arg, sizeof(l)))
		return -EFAULT;

	if ((l.object_size & ~PAGE_MASK) ||
	    (l.stripe_unit & ~PAGE_MASK) ||
	    !l.stripe_unit ||
	    (l.object_size &&
	        (unsigned)l.object_size % (unsigned)l.stripe_unit))
		return -EINVAL;

	/* make sure it's a valid data pool */
	if (l.data_pool > 0) {
		mutex_lock(&mdsc->mutex);
		err = -EINVAL;
		for (i = 0; i < mdsc->mdsmap->m_num_data_pg_pools; i++)
			if (mdsc->mdsmap->m_data_pg_pools[i] == l.data_pool) {
				err = 0;
				break;
			}
		mutex_unlock(&mdsc->mutex);
	err = __validate_layout(mdsc, &l);
	if (err)
		return err;
	}

	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETDIRLAYOUT,
				       USE_AUTH_MDS);
@@ -171,8 +165,6 @@ static long ceph_ioctl_set_layout_policy (struct file *file, void __user *arg)
			cpu_to_le32(l.object_size);
	req->r_args.setlayout.layout.fl_pg_pool =
			cpu_to_le32(l.data_pool);
	req->r_args.setlayout.layout.fl_pg_preferred =
			cpu_to_le32(l.preferred_osd);

	err = ceph_mdsc_do_request(mdsc, inode, req);
	ceph_mdsc_put_request(req);
+2 −0
Original line number Diff line number Diff line
@@ -34,6 +34,8 @@
struct ceph_ioctl_layout {
	__u64 stripe_unit, stripe_count, object_size;
	__u64 data_pool;

	/* obsolete.  new values ignored, always return -1 */
	__s64 preferred_osd;
};

Loading