Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 875f1d07 authored by Jens Axboe's avatar Jens Axboe
Browse files

iov_iter: add ITER_BVEC_FLAG_NO_REF flag



For ITER_BVEC, if we're holding on to kernel pages, the caller
doesn't need to grab a reference to the bvec pages, and drop that
same reference on IO completion. This is essentially safe for any
ITER_BVEC, but some use cases end up reusing pages and uncondtionally
dropping a page reference on completion. And example of that is
sendfile(2), that ends up being a splice_in + splice_out on the
pipe pages.

Add a flag that tells us it's fine to not grab a page reference
to the bvec pages, since that caller knows not to drop a reference
when it's done with the pages.

Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent bf33a769
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -855,6 +855,9 @@ static int io_import_fixed(struct io_ring_ctx *ctx, int rw,
	iov_iter_bvec(iter, rw, imu->bvec, imu->nr_bvecs, offset + len);
	if (offset)
		iov_iter_advance(iter, offset);

	/* don't drop a reference to these pages */
	iter->type |= ITER_BVEC_FLAG_NO_REF;
	return 0;
}

+19 −5
Original line number Diff line number Diff line
@@ -23,14 +23,23 @@ struct kvec {
};

enum iter_type {
	ITER_IOVEC = 0,
	ITER_KVEC = 2,
	ITER_BVEC = 4,
	ITER_PIPE = 8,
	ITER_DISCARD = 16,
	/* set if ITER_BVEC doesn't hold a bv_page ref */
	ITER_BVEC_FLAG_NO_REF = 2,

	/* iter types */
	ITER_IOVEC = 4,
	ITER_KVEC = 8,
	ITER_BVEC = 16,
	ITER_PIPE = 32,
	ITER_DISCARD = 64,
};

struct iov_iter {
	/*
	 * Bit 0 is the read/write bit, set if we're writing.
	 * Bit 1 is the BVEC_FLAG_NO_REF bit, set if type is a bvec and
	 * the caller isn't expecting to drop a page reference when done.
	 */
	unsigned int type;
	size_t iov_offset;
	size_t count;
@@ -84,6 +93,11 @@ static inline unsigned char iov_iter_rw(const struct iov_iter *i)
	return i->type & (READ | WRITE);
}

static inline bool iov_iter_bvec_no_ref(const struct iov_iter *i)
{
	return (i->type & ITER_BVEC_FLAG_NO_REF) != 0;
}

/*
 * Total number of bytes covered by an iovec.
 *