Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit dc6ae6d8 authored by Ilya Dryomov's avatar Ilya Dryomov
Browse files

crush: add chooseleaf_stable tunable



Add a tunable to fix the bug that chooseleaf may cause unnecessary pg
migrations when some device fails.

Reflects ceph.git commit fdb3f664448e80d984470f32f04e2e6f03ab52ec.

Signed-off-by: default avatarIlya Dryomov <idryomov@gmail.com>
Reviewed-by: default avatarSage Weil <sage@redhat.com>
parent 56a4f309
Loading
Loading
Loading
Loading
+7 −1
Original line number Original line Diff line number Diff line
@@ -59,7 +59,8 @@ enum {
	CRUSH_RULE_SET_CHOOSELEAF_TRIES = 9, /* override chooseleaf_descend_once */
	CRUSH_RULE_SET_CHOOSELEAF_TRIES = 9, /* override chooseleaf_descend_once */
	CRUSH_RULE_SET_CHOOSE_LOCAL_TRIES = 10,
	CRUSH_RULE_SET_CHOOSE_LOCAL_TRIES = 10,
	CRUSH_RULE_SET_CHOOSE_LOCAL_FALLBACK_TRIES = 11,
	CRUSH_RULE_SET_CHOOSE_LOCAL_FALLBACK_TRIES = 11,
	CRUSH_RULE_SET_CHOOSELEAF_VARY_R = 12
	CRUSH_RULE_SET_CHOOSELEAF_VARY_R = 12,
	CRUSH_RULE_SET_CHOOSELEAF_STABLE = 13
};
};


/*
/*
@@ -205,6 +206,11 @@ struct crush_map {
	 * mappings line up a bit better with previous mappings. */
	 * mappings line up a bit better with previous mappings. */
	__u8 chooseleaf_vary_r;
	__u8 chooseleaf_vary_r;


	/* if true, it makes chooseleaf firstn to return stable results (if
	 * no local retry) so that data migrations would be optimal when some
	 * device fails. */
	__u8 chooseleaf_stable;

#ifndef __KERNEL__
#ifndef __KERNEL__
	/*
	/*
	 * version 0 (original) of straw_calc has various flaws.  version 1
	 * version 0 (original) of straw_calc has various flaws.  version 1
+14 −4
Original line number Original line Diff line number Diff line
@@ -403,6 +403,7 @@ static int is_out(const struct crush_map *map,
 * @local_retries: localized retries
 * @local_retries: localized retries
 * @local_fallback_retries: localized fallback retries
 * @local_fallback_retries: localized fallback retries
 * @recurse_to_leaf: true if we want one device under each item of given type (chooseleaf instead of choose)
 * @recurse_to_leaf: true if we want one device under each item of given type (chooseleaf instead of choose)
 * @stable: stable mode starts rep=0 in the recursive call for all replicas
 * @vary_r: pass r to recursive calls
 * @vary_r: pass r to recursive calls
 * @out2: second output vector for leaf items (if @recurse_to_leaf)
 * @out2: second output vector for leaf items (if @recurse_to_leaf)
 * @parent_r: r value passed from the parent
 * @parent_r: r value passed from the parent
@@ -419,6 +420,7 @@ static int crush_choose_firstn(const struct crush_map *map,
			       unsigned int local_fallback_retries,
			       unsigned int local_fallback_retries,
			       int recurse_to_leaf,
			       int recurse_to_leaf,
			       unsigned int vary_r,
			       unsigned int vary_r,
			       unsigned int stable,
			       int *out2,
			       int *out2,
			       int parent_r)
			       int parent_r)
{
{
@@ -433,13 +435,13 @@ static int crush_choose_firstn(const struct crush_map *map,
	int collide, reject;
	int collide, reject;
	int count = out_size;
	int count = out_size;


	dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d tries %d recurse_tries %d local_retries %d local_fallback_retries %d parent_r %d\n",
	dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d tries %d recurse_tries %d local_retries %d local_fallback_retries %d parent_r %d stable %d\n",
		recurse_to_leaf ? "_LEAF" : "",
		recurse_to_leaf ? "_LEAF" : "",
		bucket->id, x, outpos, numrep,
		bucket->id, x, outpos, numrep,
		tries, recurse_tries, local_retries, local_fallback_retries,
		tries, recurse_tries, local_retries, local_fallback_retries,
		parent_r);
		parent_r, stable);


	for (rep = outpos; rep < numrep && count > 0 ; rep++) {
	for (rep = stable ? 0 : outpos; rep < numrep && count > 0 ; rep++) {
		/* keep trying until we get a non-out, non-colliding item */
		/* keep trying until we get a non-out, non-colliding item */
		ftotal = 0;
		ftotal = 0;
		skip_rep = 0;
		skip_rep = 0;
@@ -512,13 +514,14 @@ static int crush_choose_firstn(const struct crush_map *map,
						if (crush_choose_firstn(map,
						if (crush_choose_firstn(map,
							 map->buckets[-1-item],
							 map->buckets[-1-item],
							 weight, weight_max,
							 weight, weight_max,
							 x, outpos+1, 0,
							 x, stable ? 1 : outpos+1, 0,
							 out2, outpos, count,
							 out2, outpos, count,
							 recurse_tries, 0,
							 recurse_tries, 0,
							 local_retries,
							 local_retries,
							 local_fallback_retries,
							 local_fallback_retries,
							 0,
							 0,
							 vary_r,
							 vary_r,
							 stable,
							 NULL,
							 NULL,
							 sub_r) <= outpos)
							 sub_r) <= outpos)
							/* didn't get leaf */
							/* didn't get leaf */
@@ -816,6 +819,7 @@ int crush_do_rule(const struct crush_map *map,
	int choose_local_fallback_retries = map->choose_local_fallback_tries;
	int choose_local_fallback_retries = map->choose_local_fallback_tries;


	int vary_r = map->chooseleaf_vary_r;
	int vary_r = map->chooseleaf_vary_r;
	int stable = map->chooseleaf_stable;


	if ((__u32)ruleno >= map->max_rules) {
	if ((__u32)ruleno >= map->max_rules) {
		dprintk(" bad ruleno %d\n", ruleno);
		dprintk(" bad ruleno %d\n", ruleno);
@@ -870,6 +874,11 @@ int crush_do_rule(const struct crush_map *map,
				vary_r = curstep->arg1;
				vary_r = curstep->arg1;
			break;
			break;


		case CRUSH_RULE_SET_CHOOSELEAF_STABLE:
			if (curstep->arg1 >= 0)
				stable = curstep->arg1;
			break;

		case CRUSH_RULE_CHOOSELEAF_FIRSTN:
		case CRUSH_RULE_CHOOSELEAF_FIRSTN:
		case CRUSH_RULE_CHOOSE_FIRSTN:
		case CRUSH_RULE_CHOOSE_FIRSTN:
			firstn = 1;
			firstn = 1;
@@ -932,6 +941,7 @@ int crush_do_rule(const struct crush_map *map,
						choose_local_fallback_retries,
						choose_local_fallback_retries,
						recurse_to_leaf,
						recurse_to_leaf,
						vary_r,
						vary_r,
						stable,
						c+osize,
						c+osize,
						0);
						0);
				} else {
				} else {