drbd: introduce WRITE_SAME support (9104d31a) · Commits · e / devices / android_kernel_fairphone_FP5

drivers/block/drbd/drbd_actlog.c

+8 −1

Original line number	Diff line number	Diff line
		@@ -840,6 +840,13 @@ static int update_sync_bits(struct drbd_device *device,
		return count;
		}

		static bool plausible_request_size(int size)
		{
		return size > 0
		&& size <= DRBD_MAX_BATCH_BIO_SIZE
		&& IS_ALIGNED(size, 512);
		}

		/* clear the bit corresponding to the piece of storage in question:
		* size byte of data starting from sector. Only clear a bits of the affected
		* one ore more _aligned_ BM_BLOCK_SIZE blocks.
		@@ -859,7 +866,7 @@ int __drbd_change_sync(struct drbd_device *device, sector_t sector, int size,
		if ((mode == SET_OUT_OF_SYNC) && size == 0)
		return 0;

		if (size <= 0 \|\| !IS_ALIGNED(size, 512) \|\| size > DRBD_MAX_DISCARD_SIZE) {
		if (!plausible_request_size(size)) {
		drbd_err(device, "%s: sector=%llus size=%d nonsense!\n",
		drbd_change_sync_fname[mode],
		(unsigned long long)sector, size);

drivers/block/drbd/drbd_debugfs.c

+3 −8

Original line number	Diff line number	Diff line
		@@ -237,14 +237,9 @@ static void seq_print_peer_request_flags(struct seq_file *m, struct drbd_peer_re
		seq_print_rq_state_bit(m, f & EE_SEND_WRITE_ACK, &sep, "C");
		seq_print_rq_state_bit(m, f & EE_MAY_SET_IN_SYNC, &sep, "set-in-sync");

		if (f & EE_IS_TRIM) {
		seq_putc(m, sep);
		sep = '\|';
		if (f & EE_IS_TRIM_USE_ZEROOUT)
		seq_puts(m, "zero-out");
		else
		seq_puts(m, "trim");
		}
		if (f & EE_IS_TRIM)
		__seq_print_rq_state_bit(m, f & EE_IS_TRIM_USE_ZEROOUT, &sep, "zero-out", "trim");
		seq_print_rq_state_bit(m, f & EE_WRITE_SAME, &sep, "write-same");
		seq_putc(m, '\n');
		}

drivers/block/drbd/drbd_int.h

+9 −4

Original line number	Diff line number	Diff line
		@@ -468,6 +468,9 @@ enum {
		/* this is/was a write request */
		__EE_WRITE,

		/* this is/was a write same request */
		__EE_WRITE_SAME,

		/* this originates from application on peer
		* (not some resync or verify or other DRBD internal request) */
		__EE_APPLICATION,
		@@ -487,6 +490,7 @@ enum {
		#define EE_IN_INTERVAL_TREE (1<<__EE_IN_INTERVAL_TREE)
		#define EE_SUBMITTED (1<<__EE_SUBMITTED)
		#define EE_WRITE (1<<__EE_WRITE)
		#define EE_WRITE_SAME (1<<__EE_WRITE_SAME)
		#define EE_APPLICATION (1<<__EE_APPLICATION)
		#define EE_RS_THIN_REQ (1<<__EE_RS_THIN_REQ)

		@@ -1350,8 +1354,8 @@ struct bm_extent {
		/* For now, don't allow more than half of what we can "activate" in one
		* activity log transaction to be discarded in one go. We may need to rework
		* drbd_al_begin_io() to allow for even larger discard ranges */
		#define DRBD_MAX_DISCARD_SIZE (AL_UPDATES_PER_TRANSACTION/2*AL_EXTENT_SIZE)
		#define DRBD_MAX_DISCARD_SECTORS (DRBD_MAX_DISCARD_SIZE >> 9)
		#define DRBD_MAX_BATCH_BIO_SIZE (AL_UPDATES_PER_TRANSACTION/2*AL_EXTENT_SIZE)
		#define DRBD_MAX_BBIO_SECTORS (DRBD_MAX_BATCH_BIO_SIZE >> 9)

		extern int drbd_bm_init(struct drbd_device *device);
		extern int drbd_bm_resize(struct drbd_device *device, sector_t sectors, int set_new_bits);
		@@ -1488,7 +1492,8 @@ enum determine_dev_size {
		extern enum determine_dev_size
		drbd_determine_dev_size(struct drbd_device , enum dds_flags, struct resize_parms ) __must_hold(local);
		extern void resync_after_online_grow(struct drbd_device *);
		extern void drbd_reconsider_queue_parameters(struct drbd_device device, struct drbd_backing_dev bdev);
		extern void drbd_reconsider_queue_parameters(struct drbd_device *device,
		struct drbd_backing_dev bdev, struct o_qlim o);
		extern enum drbd_state_rv drbd_set_role(struct drbd_device *device,
		enum drbd_role new_role,
		int force);
		@@ -1569,7 +1574,7 @@ extern int drbd_submit_peer_request(struct drbd_device *,
		extern int drbd_free_peer_reqs(struct drbd_device , struct list_head );
		extern struct drbd_peer_request drbd_alloc_peer_req(struct drbd_peer_device , u64,
		sector_t, unsigned int,
		bool,
		unsigned int,
		gfp_t) __must_hold(local);
		extern void __drbd_free_peer_req(struct drbd_device , struct drbd_peer_request ,
		int);

drivers/block/drbd/drbd_main.c

+72 −10

Original line number	Diff line number	Diff line
		@@ -920,6 +920,31 @@ void drbd_gen_and_send_sync_uuid(struct drbd_peer_device *peer_device)
		}
		}

		/* communicated if (agreed_features & DRBD_FF_WSAME) */
		void assign_p_sizes_qlim(struct drbd_device device, struct p_sizes p, struct request_queue *q)
		{
		if (q) {
		p->qlim->physical_block_size = cpu_to_be32(queue_physical_block_size(q));
		p->qlim->logical_block_size = cpu_to_be32(queue_logical_block_size(q));
		p->qlim->alignment_offset = cpu_to_be32(queue_alignment_offset(q));
		p->qlim->io_min = cpu_to_be32(queue_io_min(q));
		p->qlim->io_opt = cpu_to_be32(queue_io_opt(q));
		p->qlim->discard_enabled = blk_queue_discard(q);
		p->qlim->discard_zeroes_data = queue_discard_zeroes_data(q);
		p->qlim->write_same_capable = !!q->limits.max_write_same_sectors;
		} else {
		q = device->rq_queue;
		p->qlim->physical_block_size = cpu_to_be32(queue_physical_block_size(q));
		p->qlim->logical_block_size = cpu_to_be32(queue_logical_block_size(q));
		p->qlim->alignment_offset = 0;
		p->qlim->io_min = cpu_to_be32(queue_io_min(q));
		p->qlim->io_opt = cpu_to_be32(queue_io_opt(q));
		p->qlim->discard_enabled = 0;
		p->qlim->discard_zeroes_data = 0;
		p->qlim->write_same_capable = 0;
		}
		}

		int drbd_send_sizes(struct drbd_peer_device *peer_device, int trigger_reply, enum dds_flags flags)
		{
		struct drbd_device *device = peer_device->device;
		@@ -928,29 +953,37 @@ int drbd_send_sizes(struct drbd_peer_device *peer_device, int trigger_reply, enu
		sector_t d_size, u_size;
		int q_order_type;
		unsigned int max_bio_size;
		unsigned int packet_size;

		sock = &peer_device->connection->data;
		p = drbd_prepare_command(peer_device, sock);
		if (!p)
		return -EIO;

		packet_size = sizeof(*p);
		if (peer_device->connection->agreed_features & DRBD_FF_WSAME)
		packet_size += sizeof(p->qlim[0]);

		memset(p, 0, packet_size);
		if (get_ldev_if_state(device, D_NEGOTIATING)) {
		D_ASSERT(device, device->ldev->backing_bdev);
		struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev);
		d_size = drbd_get_max_capacity(device->ldev);
		rcu_read_lock();
		u_size = rcu_dereference(device->ldev->disk_conf)->disk_size;
		rcu_read_unlock();
		q_order_type = drbd_queue_order_type(device);
		max_bio_size = queue_max_hw_sectors(device->ldev->backing_bdev->bd_disk->queue) << 9;
		max_bio_size = queue_max_hw_sectors(q) << 9;
		max_bio_size = min(max_bio_size, DRBD_MAX_BIO_SIZE);
		assign_p_sizes_qlim(device, p, q);
		put_ldev(device);
		} else {
		d_size = 0;
		u_size = 0;
		q_order_type = QUEUE_ORDERED_NONE;
		max_bio_size = DRBD_MAX_BIO_SIZE; /* ... multiple BIOs per peer_request */
		assign_p_sizes_qlim(device, p, NULL);
		}

		sock = &peer_device->connection->data;
		p = drbd_prepare_command(peer_device, sock);
		if (!p)
		return -EIO;

		if (peer_device->connection->agreed_pro_version <= 94)
		max_bio_size = min(max_bio_size, DRBD_MAX_SIZE_H80_PACKET);
		else if (peer_device->connection->agreed_pro_version < 100)
		@@ -962,7 +995,8 @@ int drbd_send_sizes(struct drbd_peer_device *peer_device, int trigger_reply, enu
		p->max_bio_size = cpu_to_be32(max_bio_size);
		p->queue_order_type = cpu_to_be16(q_order_type);
		p->dds_flags = cpu_to_be16(flags);
		return drbd_send_command(peer_device, sock, P_SIZES, sizeof(*p), NULL, 0);

		return drbd_send_command(peer_device, sock, P_SIZES, packet_size, NULL, 0);
		}

		/**
		@@ -1577,6 +1611,9 @@ static int _drbd_send_bio(struct drbd_peer_device peer_device, struct bio bio)
		? 0 : MSG_MORE);
		if (err)
		return err;
		/* REQ_OP_WRITE_SAME has only one segment */
		if (bio_op(bio) == REQ_OP_WRITE_SAME)
		break;
		}
		return 0;
		}
		@@ -1595,6 +1632,9 @@ static int _drbd_send_zc_bio(struct drbd_peer_device peer_device, struct bio b
		bio_iter_last(bvec, iter) ? 0 : MSG_MORE);
		if (err)
		return err;
		/* REQ_OP_WRITE_SAME has only one segment */
		if (bio_op(bio) == REQ_OP_WRITE_SAME)
		break;
		}
		return 0;
		}
		@@ -1626,6 +1666,7 @@ static u32 bio_flags_to_wire(struct drbd_connection *connection,
		return (bio->bi_rw & REQ_SYNC ? DP_RW_SYNC : 0) \|
		(bio->bi_rw & REQ_FUA ? DP_FUA : 0) \|
		(bio->bi_rw & REQ_PREFLUSH ? DP_FLUSH : 0) \|
		(bio_op(bio) == REQ_OP_WRITE_SAME ? DP_WSAME : 0) \|
		(bio_op(bio) == REQ_OP_DISCARD ? DP_DISCARD : 0);
		else
		return bio->bi_rw & REQ_SYNC ? DP_RW_SYNC : 0;
		@@ -1639,6 +1680,8 @@ int drbd_send_dblock(struct drbd_peer_device peer_device, struct drbd_request
		struct drbd_device *device = peer_device->device;
		struct drbd_socket *sock;
		struct p_data *p;
		struct p_wsame *wsame = NULL;
		void *digest_out;
		unsigned int dp_flags = 0;
		int digest_size;
		int err;
		@@ -1674,12 +1717,29 @@ int drbd_send_dblock(struct drbd_peer_device peer_device, struct drbd_request
		err = __send_command(peer_device->connection, device->vnr, sock, P_TRIM, sizeof(*t), NULL, 0);
		goto out;
		}
		if (dp_flags & DP_WSAME) {
		/* this will only work if DRBD_FF_WSAME is set AND the
		* handshake agreed that all nodes and backend devices are
		* WRITE_SAME capable and agree on logical_block_size */
		wsame = (struct p_wsame*)p;
		digest_out = wsame + 1;
		wsame->size = cpu_to_be32(req->i.size);
		} else
		digest_out = p + 1;

		/* our digest is still only over the payload.
		* TRIM does not carry any payload. */
		if (digest_size)
		drbd_csum_bio(peer_device->connection->integrity_tfm, req->master_bio, p + 1);
		err = __send_command(peer_device->connection, device->vnr, sock, P_DATA, sizeof(*p) + digest_size, NULL, req->i.size);
		drbd_csum_bio(peer_device->connection->integrity_tfm, req->master_bio, digest_out);
		if (wsame) {
		err =
		__send_command(peer_device->connection, device->vnr, sock, P_WSAME,
		sizeof(*wsame) + digest_size, NULL,
		bio_iovec(req->master_bio).bv_len);
		} else
		err =
		__send_command(peer_device->connection, device->vnr, sock, P_DATA,
		sizeof(*p) + digest_size, NULL, req->i.size);
		if (!err) {
		/* For protocol A, we have to memcpy the payload into
		* socket buffers, as we may complete right away
		@@ -3660,6 +3720,8 @@ const char *cmdname(enum drbd_packet cmd)
		* one PRO_VERSION */
		static const char *cmdnames[] = {
		[P_DATA] = "Data",
		[P_WSAME] = "WriteSame",
		[P_TRIM] = "Trim",
		[P_DATA_REPLY] = "DataReply",
		[P_RS_DATA_REPLY] = "RSDataReply",
		[P_BARRIER] = "Barrier",

drivers/block/drbd/drbd_nl.c

+79 −9

Original line number	Diff line number	Diff line
		@@ -1174,6 +1174,17 @@ static void blk_queue_discard_granularity(struct request_queue *q, unsigned int
		{
		q->limits.discard_granularity = granularity;
		}

		static unsigned int drbd_max_discard_sectors(struct drbd_connection *connection)
		{
		/* when we introduced REQ_WRITE_SAME support, we also bumped
		* our maximum supported batch bio size used for discards. */
		if (connection->agreed_features & DRBD_FF_WSAME)
		return DRBD_MAX_BBIO_SECTORS;
		/* before, with DRBD <= 8.4.6, we only allowed up to one AL_EXTENT_SIZE. */
		return AL_EXTENT_SIZE >> 9;
		}

		static void decide_on_discard_support(struct drbd_device *device,
		struct request_queue *q,
		struct request_queue *b,
		@@ -1190,7 +1201,7 @@ static void decide_on_discard_support(struct drbd_device *device,
		can_do = false;
		drbd_info(device, "discard_zeroes_data=0 and discard_zeroes_if_aligned=no: disabling discards\n");
		}
		if (can_do && connection->cstate >= C_CONNECTED && !(connection->agreed_features & FF_TRIM)) {
		if (can_do && connection->cstate >= C_CONNECTED && !(connection->agreed_features & DRBD_FF_TRIM)) {
		can_do = false;
		drbd_info(connection, "peer DRBD too old, does not support TRIM: disabling discards\n");
		}
		@@ -1202,7 +1213,7 @@ static void decide_on_discard_support(struct drbd_device *device,
		* you care, you need to use devices with similar
		* topology on all peers. */
		blk_queue_discard_granularity(q, 512);
		q->limits.max_discard_sectors = DRBD_MAX_DISCARD_SECTORS;
		q->limits.max_discard_sectors = drbd_max_discard_sectors(connection);
		queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
		} else {
		queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q);
		@@ -1223,8 +1234,67 @@ static void fixup_discard_if_not_supported(struct request_queue *q)
		}
		}

		static void decide_on_write_same_support(struct drbd_device *device,
		struct request_queue *q,
		struct request_queue b, struct o_qlim o)
		{
		struct drbd_peer_device *peer_device = first_peer_device(device);
		struct drbd_connection *connection = peer_device->connection;
		bool can_do = b ? b->limits.max_write_same_sectors : true;

		if (can_do && connection->cstate >= C_CONNECTED && !(connection->agreed_features & DRBD_FF_WSAME)) {
		can_do = false;
		drbd_info(peer_device, "peer does not support WRITE_SAME\n");
		}

		if (o) {
		/* logical block size; queue_logical_block_size(NULL) is 512 */
		unsigned int peer_lbs = be32_to_cpu(o->logical_block_size);
		unsigned int me_lbs_b = queue_logical_block_size(b);
		unsigned int me_lbs = queue_logical_block_size(q);

		if (me_lbs_b != me_lbs) {
		drbd_warn(device,
		"logical block size of local backend does not match (drbd:%u, backend:%u); was this a late attach?\n",
		me_lbs, me_lbs_b);
		/* rather disable write same than trigger some BUG_ON later in the scsi layer. */
		can_do = false;
		}
		if (me_lbs_b != peer_lbs) {
		drbd_warn(peer_device, "logical block sizes do not match (me:%u, peer:%u); this may cause problems.\n",
		me_lbs, peer_lbs);
		if (can_do) {
		drbd_dbg(peer_device, "logical block size mismatch: WRITE_SAME disabled.\n");
		can_do = false;
		}
		me_lbs = max(me_lbs, me_lbs_b);
		/* We cannot change the logical block size of an in-use queue.
		* We can only hope that access happens to be properly aligned.
		* If not, the peer will likely produce an IO error, and detach. */
		if (peer_lbs > me_lbs) {
		if (device->state.role != R_PRIMARY) {
		blk_queue_logical_block_size(q, peer_lbs);
		drbd_warn(peer_device, "logical block size set to %u\n", peer_lbs);
		} else {
		drbd_warn(peer_device,
		"current Primary must NOT adjust logical block size (%u -> %u); hope for the best.\n",
		me_lbs, peer_lbs);
		}
		}
		}
		if (can_do && !o->write_same_capable) {
		/* If we introduce an open-coded write-same loop on the receiving side,
		* the peer would present itself as "capable". */
		drbd_dbg(peer_device, "WRITE_SAME disabled (peer device not capable)\n");
		can_do = false;
		}
		}

		blk_queue_max_write_same_sectors(q, can_do ? DRBD_MAX_BBIO_SECTORS : 0);
		}

		static void drbd_setup_queue_param(struct drbd_device device, struct drbd_backing_dev bdev,
		unsigned int max_bio_size)
		unsigned int max_bio_size, struct o_qlim *o)
		{
		struct request_queue * const q = device->rq_queue;
		unsigned int max_hw_sectors = max_bio_size >> 9;
		@@ -1244,15 +1314,15 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi
		rcu_read_unlock();

		blk_set_stacking_limits(&q->limits);
		blk_queue_max_write_same_sectors(q, 0);
		}

		blk_queue_logical_block_size(q, 512);
		blk_queue_max_hw_sectors(q, max_hw_sectors);
		/* This is the workaround for "bio would need to, but cannot, be split" */
		blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS);
		blk_queue_segment_boundary(q, PAGE_SIZE-1);
		decide_on_discard_support(device, q, b, discard_zeroes_if_aligned);
		decide_on_write_same_support(device, q, b, o);

		if (b) {
		blk_queue_stack_limits(q, b);

		@@ -1266,7 +1336,7 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi
		fixup_discard_if_not_supported(q);
		}

		void drbd_reconsider_queue_parameters(struct drbd_device device, struct drbd_backing_dev bdev)
		void drbd_reconsider_queue_parameters(struct drbd_device device, struct drbd_backing_dev bdev, struct o_qlim *o)
		{
		unsigned int now, new, local, peer;

		@@ -1309,7 +1379,7 @@ void drbd_reconsider_queue_parameters(struct drbd_device *device, struct drbd_ba
		if (new != now)
		drbd_info(device, "max BIO size = %u\n", new);

		drbd_setup_queue_param(device, bdev, new);
		drbd_setup_queue_param(device, bdev, new, o);
		}

		/* Starts the worker thread */
		@@ -1542,7 +1612,7 @@ int drbd_adm_disk_opts(struct sk_buff skb, struct genl_info info)
		drbd_bump_write_ordering(device->resource, NULL, WO_BDEV_FLUSH);

		if (old_disk_conf->discard_zeroes_if_aligned != new_disk_conf->discard_zeroes_if_aligned)
		drbd_reconsider_queue_parameters(device, device->ldev);
		drbd_reconsider_queue_parameters(device, device->ldev, NULL);

		drbd_md_sync(device);

		@@ -1922,7 +1992,7 @@ int drbd_adm_attach(struct sk_buff skb, struct genl_info info)
		device->read_cnt = 0;
		device->writ_cnt = 0;

		drbd_reconsider_queue_parameters(device, device->ldev);
		drbd_reconsider_queue_parameters(device, device->ldev, NULL);

		/* If I am currently not R_PRIMARY,
		* but meta data primary indicator is set,