Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit ba11ad9a authored by Lars Ellenberg's avatar Lars Ellenberg Committed by Jens Axboe
Browse files

drbd: improve usage of MSG_MORE



It seems to improve performance if we allow the "p_data" header in its
own frame (no MSG_MORE), but sendpage all but the last page with MSG_MORE.
This is also in preparation of a later zero copy receive implementation.

Suggested by Eduard.Guzovsky@stratus.com on drbd-dev.

Signed-off-by: default avatarPhilipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: default avatarLars Ellenberg <lars.ellenberg@linbit.com>
Signed-off-by: default avatarJens Axboe <jaxboe@fusionio.com>
parent 5dbf1673
Loading
Loading
Loading
Loading
+19 −13
Original line number Original line Diff line number Diff line
@@ -2272,9 +2272,9 @@ static int we_should_drop_the_connection(struct drbd_conf *mdev, struct socket *
 * with page_count == 0 or PageSlab.
 * with page_count == 0 or PageSlab.
 */
 */
static int _drbd_no_send_page(struct drbd_conf *mdev, struct page *page,
static int _drbd_no_send_page(struct drbd_conf *mdev, struct page *page,
		   int offset, size_t size)
		   int offset, size_t size, unsigned msg_flags)
{
{
	int sent = drbd_send(mdev, mdev->data.socket, kmap(page) + offset, size, 0);
	int sent = drbd_send(mdev, mdev->data.socket, kmap(page) + offset, size, msg_flags);
	kunmap(page);
	kunmap(page);
	if (sent == size)
	if (sent == size)
		mdev->send_cnt += size>>9;
		mdev->send_cnt += size>>9;
@@ -2282,7 +2282,7 @@ static int _drbd_no_send_page(struct drbd_conf *mdev, struct page *page,
}
}


static int _drbd_send_page(struct drbd_conf *mdev, struct page *page,
static int _drbd_send_page(struct drbd_conf *mdev, struct page *page,
		    int offset, size_t size)
		    int offset, size_t size, unsigned msg_flags)
{
{
	mm_segment_t oldfs = get_fs();
	mm_segment_t oldfs = get_fs();
	int sent, ok;
	int sent, ok;
@@ -2295,14 +2295,15 @@ static int _drbd_send_page(struct drbd_conf *mdev, struct page *page,
	 * __page_cache_release a page that would actually still be referenced
	 * __page_cache_release a page that would actually still be referenced
	 * by someone, leading to some obscure delayed Oops somewhere else. */
	 * by someone, leading to some obscure delayed Oops somewhere else. */
	if (disable_sendpage || (page_count(page) < 1) || PageSlab(page))
	if (disable_sendpage || (page_count(page) < 1) || PageSlab(page))
		return _drbd_no_send_page(mdev, page, offset, size);
		return _drbd_no_send_page(mdev, page, offset, size, msg_flags);


	msg_flags |= MSG_NOSIGNAL;
	drbd_update_congested(mdev);
	drbd_update_congested(mdev);
	set_fs(KERNEL_DS);
	set_fs(KERNEL_DS);
	do {
	do {
		sent = mdev->data.socket->ops->sendpage(mdev->data.socket, page,
		sent = mdev->data.socket->ops->sendpage(mdev->data.socket, page,
							offset, len,
							offset, len,
							MSG_NOSIGNAL);
							msg_flags);
		if (sent == -EAGAIN) {
		if (sent == -EAGAIN) {
			if (we_should_drop_the_connection(mdev,
			if (we_should_drop_the_connection(mdev,
							  mdev->data.socket))
							  mdev->data.socket))
@@ -2331,9 +2332,11 @@ static int _drbd_send_bio(struct drbd_conf *mdev, struct bio *bio)
{
{
	struct bio_vec *bvec;
	struct bio_vec *bvec;
	int i;
	int i;
	/* hint all but last page with MSG_MORE */
	__bio_for_each_segment(bvec, bio, i, 0) {
	__bio_for_each_segment(bvec, bio, i, 0) {
		if (!_drbd_no_send_page(mdev, bvec->bv_page,
		if (!_drbd_no_send_page(mdev, bvec->bv_page,
				     bvec->bv_offset, bvec->bv_len))
				     bvec->bv_offset, bvec->bv_len,
				     i == bio->bi_vcnt -1 ? 0 : MSG_MORE))
			return 0;
			return 0;
	}
	}
	return 1;
	return 1;
@@ -2343,12 +2346,13 @@ static int _drbd_send_zc_bio(struct drbd_conf *mdev, struct bio *bio)
{
{
	struct bio_vec *bvec;
	struct bio_vec *bvec;
	int i;
	int i;
	/* hint all but last page with MSG_MORE */
	__bio_for_each_segment(bvec, bio, i, 0) {
	__bio_for_each_segment(bvec, bio, i, 0) {
		if (!_drbd_send_page(mdev, bvec->bv_page,
		if (!_drbd_send_page(mdev, bvec->bv_page,
				     bvec->bv_offset, bvec->bv_len))
				     bvec->bv_offset, bvec->bv_len,
				     i == bio->bi_vcnt -1 ? 0 : MSG_MORE))
			return 0;
			return 0;
	}
	}

	return 1;
	return 1;
}
}


@@ -2356,9 +2360,11 @@ static int _drbd_send_zc_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e)
{
{
	struct page *page = e->pages;
	struct page *page = e->pages;
	unsigned len = e->size;
	unsigned len = e->size;
	/* hint all but last page with MSG_MORE */
	page_chain_for_each(page) {
	page_chain_for_each(page) {
		unsigned l = min_t(unsigned, len, PAGE_SIZE);
		unsigned l = min_t(unsigned, len, PAGE_SIZE);
		if (!_drbd_send_page(mdev, page, 0, l))
		if (!_drbd_send_page(mdev, page, 0, l,
				page_chain_next(page) ? MSG_MORE : 0))
			return 0;
			return 0;
		len -= l;
		len -= l;
	}
	}
@@ -2438,11 +2444,11 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req)
	p.dp_flags = cpu_to_be32(dp_flags);
	p.dp_flags = cpu_to_be32(dp_flags);
	set_bit(UNPLUG_REMOTE, &mdev->flags);
	set_bit(UNPLUG_REMOTE, &mdev->flags);
	ok = (sizeof(p) ==
	ok = (sizeof(p) ==
		drbd_send(mdev, mdev->data.socket, &p, sizeof(p), MSG_MORE));
		drbd_send(mdev, mdev->data.socket, &p, sizeof(p), dgs ? MSG_MORE : 0));
	if (ok && dgs) {
	if (ok && dgs) {
		dgb = mdev->int_dig_out;
		dgb = mdev->int_dig_out;
		drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, dgb);
		drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, dgb);
		ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, MSG_MORE);
		ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, 0);
	}
	}
	if (ok) {
	if (ok) {
		if (mdev->net_conf->wire_protocol == DRBD_PROT_A)
		if (mdev->net_conf->wire_protocol == DRBD_PROT_A)
@@ -2491,11 +2497,11 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd,
		return 0;
		return 0;


	ok = sizeof(p) == drbd_send(mdev, mdev->data.socket, &p,
	ok = sizeof(p) == drbd_send(mdev, mdev->data.socket, &p,
					sizeof(p), MSG_MORE);
					sizeof(p), dgs ? MSG_MORE : 0);
	if (ok && dgs) {
	if (ok && dgs) {
		dgb = mdev->int_dig_out;
		dgb = mdev->int_dig_out;
		drbd_csum_ee(mdev, mdev->integrity_w_tfm, e, dgb);
		drbd_csum_ee(mdev, mdev->integrity_w_tfm, e, dgb);
		ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, MSG_MORE);
		ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, 0);
	}
	}
	if (ok)
	if (ok)
		ok = _drbd_send_zc_ee(mdev, e);
		ok = _drbd_send_zc_ee(mdev, e);