Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 778f271d authored by Philipp Reisner's avatar Philipp Reisner
Browse files

drbd: The new, smarter resync speed controller

parent 8e26f9cc
Loading
Loading
Loading
Loading
+11 −0
Original line number Original line Diff line number Diff line
@@ -928,6 +928,12 @@ enum write_ordering_e {
	WO_bio_barrier
	WO_bio_barrier
};
};


struct fifo_buffer {
	int *values;
	unsigned int head_index;
	unsigned int size;
};

struct drbd_conf {
struct drbd_conf {
	/* things that are stored as / read from meta data on disk */
	/* things that are stored as / read from meta data on disk */
	unsigned long flags;
	unsigned long flags;
@@ -1068,6 +1074,11 @@ struct drbd_conf {
	u64 ed_uuid; /* UUID of the exposed data */
	u64 ed_uuid; /* UUID of the exposed data */
	struct mutex state_mutex;
	struct mutex state_mutex;
	char congestion_reason;  /* Why we where congested... */
	char congestion_reason;  /* Why we where congested... */
	atomic_t rs_sect_in; /* counter to measure the incoming resync data rate */
	int c_sync_rate; /* current resync rate after delay_probe magic */
	struct fifo_buffer rs_plan_s; /* correction values of resync planer */
	int rs_in_flight; /* resync sectors in flight (to proxy, in proxy and from proxy) */
	int rs_planed;    /* resync sectors already planed */
};
};


static inline struct drbd_conf *minor_to_mdev(unsigned int minor)
static inline struct drbd_conf *minor_to_mdev(unsigned int minor)
+1 −0
Original line number Original line Diff line number Diff line
@@ -2734,6 +2734,7 @@ void drbd_init_set_defaults(struct drbd_conf *mdev)
	atomic_set(&mdev->net_cnt, 0);
	atomic_set(&mdev->net_cnt, 0);
	atomic_set(&mdev->packet_seq, 0);
	atomic_set(&mdev->packet_seq, 0);
	atomic_set(&mdev->pp_in_use, 0);
	atomic_set(&mdev->pp_in_use, 0);
	atomic_set(&mdev->rs_sect_in, 0);


	mutex_init(&mdev->md_io_mutex);
	mutex_init(&mdev->md_io_mutex);
	mutex_init(&mdev->data.mutex);
	mutex_init(&mdev->data.mutex);
+22 −0
Original line number Original line Diff line number Diff line
@@ -1587,6 +1587,8 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n
	struct crypto_hash *csums_tfm = NULL;
	struct crypto_hash *csums_tfm = NULL;
	struct syncer_conf sc;
	struct syncer_conf sc;
	cpumask_var_t new_cpu_mask;
	cpumask_var_t new_cpu_mask;
	int *rs_plan_s = NULL;
	int fifo_size;


	if (!zalloc_cpumask_var(&new_cpu_mask, GFP_KERNEL)) {
	if (!zalloc_cpumask_var(&new_cpu_mask, GFP_KERNEL)) {
		retcode = ERR_NOMEM;
		retcode = ERR_NOMEM;
@@ -1687,6 +1689,16 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n
	if (retcode != NO_ERROR)
	if (retcode != NO_ERROR)
		goto fail;
		goto fail;


	fifo_size = (sc.c_plan_ahead * 10 * SLEEP_TIME) / HZ;
	if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) {
		rs_plan_s   = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL);
		if (!rs_plan_s) {
			dev_err(DEV, "kmalloc of fifo_buffer failed");
			retcode = ERR_NOMEM;
			goto fail;
		}
	}

	/* ok, assign the rest of it as well.
	/* ok, assign the rest of it as well.
	 * lock against receive_SyncParam() */
	 * lock against receive_SyncParam() */
	spin_lock(&mdev->peer_seq_lock);
	spin_lock(&mdev->peer_seq_lock);
@@ -1703,6 +1715,15 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n
		mdev->verify_tfm = verify_tfm;
		mdev->verify_tfm = verify_tfm;
		verify_tfm = NULL;
		verify_tfm = NULL;
	}
	}

	if (fifo_size != mdev->rs_plan_s.size) {
		kfree(mdev->rs_plan_s.values);
		mdev->rs_plan_s.values = rs_plan_s;
		mdev->rs_plan_s.size   = fifo_size;
		mdev->rs_planed = 0;
		rs_plan_s = NULL;
	}

	spin_unlock(&mdev->peer_seq_lock);
	spin_unlock(&mdev->peer_seq_lock);


	if (get_ldev(mdev)) {
	if (get_ldev(mdev)) {
@@ -1734,6 +1755,7 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n


	kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE);
	kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE);
fail:
fail:
	kfree(rs_plan_s);
	free_cpumask_var(new_cpu_mask);
	free_cpumask_var(new_cpu_mask);
	crypto_free_hash(csums_tfm);
	crypto_free_hash(csums_tfm);
	crypto_free_hash(verify_tfm);
	crypto_free_hash(verify_tfm);
+20 −0
Original line number Original line Diff line number Diff line
@@ -1640,6 +1640,8 @@ static int receive_RSDataReply(struct drbd_conf *mdev, struct p_header *h)
		drbd_send_ack_dp(mdev, P_NEG_ACK, p);
		drbd_send_ack_dp(mdev, P_NEG_ACK, p);
	}
	}


	atomic_add(data_size >> 9, &mdev->rs_sect_in);

	return ok;
	return ok;
}
}


@@ -2810,6 +2812,8 @@ static int receive_SyncParam(struct drbd_conf *mdev, struct p_header *h)
	struct crypto_hash *verify_tfm = NULL;
	struct crypto_hash *verify_tfm = NULL;
	struct crypto_hash *csums_tfm = NULL;
	struct crypto_hash *csums_tfm = NULL;
	const int apv = mdev->agreed_pro_version;
	const int apv = mdev->agreed_pro_version;
	int *rs_plan_s = NULL;
	int fifo_size = 0;


	exp_max_sz  = apv <= 87 ? sizeof(struct p_rs_param)
	exp_max_sz  = apv <= 87 ? sizeof(struct p_rs_param)
		    : apv == 88 ? sizeof(struct p_rs_param)
		    : apv == 88 ? sizeof(struct p_rs_param)
@@ -2904,6 +2908,15 @@ static int receive_SyncParam(struct drbd_conf *mdev, struct p_header *h)
			mdev->sync_conf.c_delay_target = be32_to_cpu(p->c_delay_target);
			mdev->sync_conf.c_delay_target = be32_to_cpu(p->c_delay_target);
			mdev->sync_conf.c_fill_target = be32_to_cpu(p->c_fill_target);
			mdev->sync_conf.c_fill_target = be32_to_cpu(p->c_fill_target);
			mdev->sync_conf.c_max_rate = be32_to_cpu(p->c_max_rate);
			mdev->sync_conf.c_max_rate = be32_to_cpu(p->c_max_rate);

			fifo_size = (mdev->sync_conf.c_plan_ahead * 10 * SLEEP_TIME) / HZ;
			if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) {
				rs_plan_s   = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL);
				if (!rs_plan_s) {
					dev_err(DEV, "kmalloc of fifo_buffer failed");
					goto disconnect;
				}
			}
		}
		}


		spin_lock(&mdev->peer_seq_lock);
		spin_lock(&mdev->peer_seq_lock);
@@ -2922,6 +2935,12 @@ static int receive_SyncParam(struct drbd_conf *mdev, struct p_header *h)
			mdev->csums_tfm = csums_tfm;
			mdev->csums_tfm = csums_tfm;
			dev_info(DEV, "using csums-alg: \"%s\"\n", p->csums_alg);
			dev_info(DEV, "using csums-alg: \"%s\"\n", p->csums_alg);
		}
		}
		if (fifo_size != mdev->rs_plan_s.size) {
			kfree(mdev->rs_plan_s.values);
			mdev->rs_plan_s.values = rs_plan_s;
			mdev->rs_plan_s.size   = fifo_size;
			mdev->rs_planed = 0;
		}
		spin_unlock(&mdev->peer_seq_lock);
		spin_unlock(&mdev->peer_seq_lock);
	}
	}


@@ -4202,6 +4221,7 @@ static int got_IsInSync(struct drbd_conf *mdev, struct p_header *h)
	/* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
	/* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
	mdev->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
	mdev->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
	dec_rs_pending(mdev);
	dec_rs_pending(mdev);
	atomic_add(blksize >> 9, &mdev->rs_sect_in);


	return TRUE;
	return TRUE;
}
}
+97 −1
Original line number Original line Diff line number Diff line
@@ -422,6 +422,89 @@ void resync_timer_fn(unsigned long data)
		drbd_queue_work(&mdev->data.work, &mdev->resync_work);
		drbd_queue_work(&mdev->data.work, &mdev->resync_work);
}
}


static void fifo_set(struct fifo_buffer *fb, int value)
{
	int i;

	for (i = 0; i < fb->size; i++)
		fb->values[i] += value;
}

static int fifo_push(struct fifo_buffer *fb, int value)
{
	int ov;

	ov = fb->values[fb->head_index];
	fb->values[fb->head_index++] = value;

	if (fb->head_index >= fb->size)
		fb->head_index = 0;

	return ov;
}

static void fifo_add_val(struct fifo_buffer *fb, int value)
{
	int i;

	for (i = 0; i < fb->size; i++)
		fb->values[i] += value;
}

int drbd_rs_controller(struct drbd_conf *mdev)
{
	unsigned int sect_in;  /* Number of sectors that came in since the last turn */
	unsigned int want;     /* The number of sectors we want in the proxy */
	int req_sect; /* Number of sectors to request in this turn */
	int correction; /* Number of sectors more we need in the proxy*/
	int cps; /* correction per invocation of drbd_rs_controller() */
	int steps; /* Number of time steps to plan ahead */
	int curr_corr;
	int max_sect;

	sect_in = atomic_xchg(&mdev->rs_sect_in, 0); /* Number of sectors that came in */
	mdev->rs_in_flight -= sect_in;

	spin_lock(&mdev->peer_seq_lock); /* get an atomic view on mdev->rs_plan_s */

	steps = mdev->rs_plan_s.size; /* (mdev->sync_conf.c_plan_ahead * 10 * SLEEP_TIME) / HZ; */

	if (mdev->rs_in_flight + sect_in == 0) { /* At start of resync */
		want = ((mdev->sync_conf.rate * 2 * SLEEP_TIME) / HZ) * steps;
	} else { /* normal path */
		want = mdev->sync_conf.c_fill_target ? mdev->sync_conf.c_fill_target :
			sect_in * mdev->sync_conf.c_delay_target * HZ / (SLEEP_TIME * 10);
	}

	correction = want - mdev->rs_in_flight - mdev->rs_planed;

	/* Plan ahead */
	cps = correction / steps;
	fifo_add_val(&mdev->rs_plan_s, cps);
	mdev->rs_planed += cps * steps;

	/* What we do in this step */
	curr_corr = fifo_push(&mdev->rs_plan_s, 0);
	spin_unlock(&mdev->peer_seq_lock);
	mdev->rs_planed -= curr_corr;

	req_sect = sect_in + curr_corr;
	if (req_sect < 0)
		req_sect = 0;

	max_sect = (mdev->sync_conf.c_max_rate * 2 * SLEEP_TIME) / HZ;
	if (req_sect > max_sect)
		req_sect = max_sect;

	/*
	dev_warn(DEV, "si=%u if=%d wa=%u co=%d st=%d cps=%d pl=%d cc=%d rs=%d\n",
		 sect_in, mdev->rs_in_flight, want, correction,
		 steps, cps, mdev->rs_planed, curr_corr, req_sect);
	*/

	return req_sect;
}

int w_make_resync_request(struct drbd_conf *mdev,
int w_make_resync_request(struct drbd_conf *mdev,
		struct drbd_work *w, int cancel)
		struct drbd_work *w, int cancel)
{
{
@@ -459,7 +542,13 @@ int w_make_resync_request(struct drbd_conf *mdev,
	max_segment_size = mdev->agreed_pro_version < 94 ?
	max_segment_size = mdev->agreed_pro_version < 94 ?
		queue_max_segment_size(mdev->rq_queue) : DRBD_MAX_SEGMENT_SIZE;
		queue_max_segment_size(mdev->rq_queue) : DRBD_MAX_SEGMENT_SIZE;


	number = SLEEP_TIME * mdev->sync_conf.rate  / ((BM_BLOCK_SIZE / 1024) * HZ);
	if (mdev->rs_plan_s.size) { /* mdev->sync_conf.c_plan_ahead */
		number = drbd_rs_controller(mdev) >> (BM_BLOCK_SHIFT - 9);
		mdev->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME;
	} else {
		mdev->c_sync_rate = mdev->sync_conf.rate;
		number = SLEEP_TIME * mdev->c_sync_rate  / ((BM_BLOCK_SIZE / 1024) * HZ);
	}
	pe = atomic_read(&mdev->rs_pending_cnt);
	pe = atomic_read(&mdev->rs_pending_cnt);


	mutex_lock(&mdev->data.mutex);
	mutex_lock(&mdev->data.mutex);
@@ -593,6 +682,7 @@ int w_make_resync_request(struct drbd_conf *mdev,
	}
	}


 requeue:
 requeue:
	mdev->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
	mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME);
	mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME);
	put_ldev(mdev);
	put_ldev(mdev);
	return 1;
	return 1;
@@ -1419,6 +1509,12 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
			drbd_resync_finished(mdev);
			drbd_resync_finished(mdev);
		}
		}


		atomic_set(&mdev->rs_sect_in, 0);
		mdev->rs_in_flight = 0;
		mdev->rs_planed = 0;
		spin_lock(&mdev->peer_seq_lock);
		fifo_set(&mdev->rs_plan_s, 0);
		spin_unlock(&mdev->peer_seq_lock);
		/* ns.conn may already be != mdev->state.conn,
		/* ns.conn may already be != mdev->state.conn,
		 * we may have been paused in between, or become paused until
		 * we may have been paused in between, or become paused until
		 * the timer triggers.
		 * the timer triggers.