Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 6f428df4 authored by Ilya Dryomov's avatar Ilya Dryomov
Browse files

libceph: pg_upmap[_items] infrastructure



pg_temp and pg_upmap encodings are the same (PG -> array of osds),
except for the incremental remove: it's an empty mapping in new_pg_temp
for pg_temp and a separate old_pg_upmap set for pg_upmap.  (This isn't
to allow for empty pg_upmap mappings -- apparently, pg_temp just wasn't
looked at as an example for pg_upmap encoding.)

Reuse __decode_pg_temp() for decoding pg_upmap and new_pg_upmap.
__decode_pg_temp() stores into pg_temp union member, but since pg_upmap
union member is identical, reading through pg_upmap later is OK.

Signed-off-by: default avatarIlya Dryomov <idryomov@gmail.com>
parent 278b1d70
Loading
Loading
Loading
Loading
+9 −1
Original line number Diff line number Diff line
@@ -143,10 +143,14 @@ struct ceph_pg_mapping {
		struct {
			int len;
			int osds[];
		} pg_temp;
		} pg_temp, pg_upmap;
		struct {
			int osd;
		} primary_temp;
		struct {
			int len;
			int from_to[][2];
		} pg_upmap_items;
	};
};

@@ -165,6 +169,10 @@ struct ceph_osdmap {
	struct rb_root pg_temp;
	struct rb_root primary_temp;

	/* remap (post-CRUSH, pre-up) */
	struct rb_root pg_upmap;	/* PG := raw set */
	struct rb_root pg_upmap_items;	/* from -> to within raw set */

	u32 *osd_primary_affinity;

	struct rb_root pg_pools;
+23 −0
Original line number Diff line number Diff line
@@ -104,6 +104,29 @@ static int osdmap_show(struct seq_file *s, void *p)
		seq_printf(s, "primary_temp %llu.%x %d\n", pg->pgid.pool,
			   pg->pgid.seed, pg->primary_temp.osd);
	}
	for (n = rb_first(&map->pg_upmap); n; n = rb_next(n)) {
		struct ceph_pg_mapping *pg =
			rb_entry(n, struct ceph_pg_mapping, node);

		seq_printf(s, "pg_upmap %llu.%x [", pg->pgid.pool,
			   pg->pgid.seed);
		for (i = 0; i < pg->pg_upmap.len; i++)
			seq_printf(s, "%s%d", (i == 0 ? "" : ","),
				   pg->pg_upmap.osds[i]);
		seq_printf(s, "]\n");
	}
	for (n = rb_first(&map->pg_upmap_items); n; n = rb_next(n)) {
		struct ceph_pg_mapping *pg =
			rb_entry(n, struct ceph_pg_mapping, node);

		seq_printf(s, "pg_upmap_items %llu.%x [", pg->pgid.pool,
			   pg->pgid.seed);
		for (i = 0; i < pg->pg_upmap_items.len; i++)
			seq_printf(s, "%s%d->%d", (i == 0 ? "" : ","),
				   pg->pg_upmap_items.from_to[i][0],
				   pg->pg_upmap_items.from_to[i][1]);
		seq_printf(s, "]\n");
	}

	up_read(&osdc->lock);
	return 0;
+132 −3
Original line number Diff line number Diff line
@@ -735,6 +735,8 @@ struct ceph_osdmap *ceph_osdmap_alloc(void)
	map->pool_max = -1;
	map->pg_temp = RB_ROOT;
	map->primary_temp = RB_ROOT;
	map->pg_upmap = RB_ROOT;
	map->pg_upmap_items = RB_ROOT;
	mutex_init(&map->crush_workspace_mutex);

	return map;
@@ -759,6 +761,20 @@ void ceph_osdmap_destroy(struct ceph_osdmap *map)
		erase_pg_mapping(&map->primary_temp, pg);
		free_pg_mapping(pg);
	}
	while (!RB_EMPTY_ROOT(&map->pg_upmap)) {
		struct ceph_pg_mapping *pg =
			rb_entry(rb_first(&map->pg_upmap),
				 struct ceph_pg_mapping, node);
		rb_erase(&pg->node, &map->pg_upmap);
		kfree(pg);
	}
	while (!RB_EMPTY_ROOT(&map->pg_upmap_items)) {
		struct ceph_pg_mapping *pg =
			rb_entry(rb_first(&map->pg_upmap_items),
				 struct ceph_pg_mapping, node);
		rb_erase(&pg->node, &map->pg_upmap_items);
		kfree(pg);
	}
	while (!RB_EMPTY_ROOT(&map->pg_pools)) {
		struct ceph_pg_pool_info *pi =
			rb_entry(rb_first(&map->pg_pools),
@@ -1161,6 +1177,75 @@ static int decode_new_primary_affinity(void **p, void *end,
	return -EINVAL;
}

static struct ceph_pg_mapping *__decode_pg_upmap(void **p, void *end,
						 bool __unused)
{
	return __decode_pg_temp(p, end, false);
}

static int decode_pg_upmap(void **p, void *end, struct ceph_osdmap *map)
{
	return decode_pg_mapping(p, end, &map->pg_upmap, __decode_pg_upmap,
				 false);
}

static int decode_new_pg_upmap(void **p, void *end, struct ceph_osdmap *map)
{
	return decode_pg_mapping(p, end, &map->pg_upmap, __decode_pg_upmap,
				 true);
}

static int decode_old_pg_upmap(void **p, void *end, struct ceph_osdmap *map)
{
	return decode_pg_mapping(p, end, &map->pg_upmap, NULL, true);
}

static struct ceph_pg_mapping *__decode_pg_upmap_items(void **p, void *end,
						       bool __unused)
{
	struct ceph_pg_mapping *pg;
	u32 len, i;

	ceph_decode_32_safe(p, end, len, e_inval);
	if (len > (SIZE_MAX - sizeof(*pg)) / (2 * sizeof(u32)))
		return ERR_PTR(-EINVAL);

	ceph_decode_need(p, end, 2 * len * sizeof(u32), e_inval);
	pg = kzalloc(sizeof(*pg) + 2 * len * sizeof(u32), GFP_NOIO);
	if (!pg)
		return ERR_PTR(-ENOMEM);

	pg->pg_upmap_items.len = len;
	for (i = 0; i < len; i++) {
		pg->pg_upmap_items.from_to[i][0] = ceph_decode_32(p);
		pg->pg_upmap_items.from_to[i][1] = ceph_decode_32(p);
	}

	return pg;

e_inval:
	return ERR_PTR(-EINVAL);
}

static int decode_pg_upmap_items(void **p, void *end, struct ceph_osdmap *map)
{
	return decode_pg_mapping(p, end, &map->pg_upmap_items,
				 __decode_pg_upmap_items, false);
}

static int decode_new_pg_upmap_items(void **p, void *end,
				     struct ceph_osdmap *map)
{
	return decode_pg_mapping(p, end, &map->pg_upmap_items,
				 __decode_pg_upmap_items, true);
}

static int decode_old_pg_upmap_items(void **p, void *end,
				     struct ceph_osdmap *map)
{
	return decode_pg_mapping(p, end, &map->pg_upmap_items, NULL, true);
}

/*
 * decode a full map.
 */
@@ -1250,9 +1335,7 @@ static int osdmap_decode(void **p, void *end, struct ceph_osdmap *map)
		if (err)
			goto bad;
	} else {
		/* XXX can this happen? */
		kfree(map->osd_primary_affinity);
		map->osd_primary_affinity = NULL;
		WARN_ON(map->osd_primary_affinity);
	}

	/* crush */
@@ -1261,6 +1344,26 @@ static int osdmap_decode(void **p, void *end, struct ceph_osdmap *map)
	if (err)
		goto bad;

	*p += len;
	if (struct_v >= 3) {
		/* erasure_code_profiles */
		ceph_decode_skip_map_of_map(p, end, string, string, string,
					    bad);
	}

	if (struct_v >= 4) {
		err = decode_pg_upmap(p, end, map);
		if (err)
			goto bad;

		err = decode_pg_upmap_items(p, end, map);
		if (err)
			goto bad;
	} else {
		WARN_ON(!RB_EMPTY_ROOT(&map->pg_upmap));
		WARN_ON(!RB_EMPTY_ROOT(&map->pg_upmap_items));
	}

	/* ignore the rest */
	*p = end;

@@ -1520,6 +1623,32 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
			goto bad;
	}

	if (struct_v >= 3) {
		/* new_erasure_code_profiles */
		ceph_decode_skip_map_of_map(p, end, string, string, string,
					    bad);
		/* old_erasure_code_profiles */
		ceph_decode_skip_set(p, end, string, bad);
	}

	if (struct_v >= 4) {
		err = decode_new_pg_upmap(p, end, map);
		if (err)
			goto bad;

		err = decode_old_pg_upmap(p, end, map);
		if (err)
			goto bad;

		err = decode_new_pg_upmap_items(p, end, map);
		if (err)
			goto bad;

		err = decode_old_pg_upmap_items(p, end, map);
		if (err)
			goto bad;
	}

	/* ignore the rest */
	*p = end;