Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 3a4d4eb3 authored by Lars Ellenberg's avatar Lars Ellenberg Committed by Jens Axboe
Browse files

drbd: prepare for new striped layout of activity log



Introduce two new on-disk meta data fields: al_stripes and al_stripe_size_4k
The intended use case is activity log on RAID 0 or similar.
Logically consecutive transactions will advance their on-disk position
by al_stripe_size_4k 4kB (transaction sized) blocks.

Right now, these are still asserted to be the backward compatible
values al_stripes = 1, al_stripe_size_4k = 8 (which amounts to 32kB).

Also introduce a caching member for meta_dev_idx in the in-core
structure: even though it is initially passed in in the rcu-protected
disk_conf structure, it cannot change without a detach/attach cycle.

Signed-off-by: default avatarPhilipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: default avatarLars Ellenberg <lars.ellenberg@linbit.com>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent ae8bf312
Loading
Loading
Loading
Loading
+3 −3
Original line number Original line Diff line number Diff line
@@ -353,11 +353,11 @@ static unsigned int rs_extent_to_bm_page(unsigned int rs_enr)


static sector_t al_tr_number_to_on_disk_sector(struct drbd_conf *mdev)
static sector_t al_tr_number_to_on_disk_sector(struct drbd_conf *mdev)
{
{
	const unsigned int stripes = 1;
	const unsigned int stripes = mdev->ldev->md.al_stripes;
	const unsigned int stripe_size_4kB = MD_32kB_SECT/MD_4kB_SECT;
	const unsigned int stripe_size_4kB = mdev->ldev->md.al_stripe_size_4k;


	/* transaction number, modulo on-disk ring buffer wrap around */
	/* transaction number, modulo on-disk ring buffer wrap around */
	unsigned int t = mdev->al_tr_number % (stripe_size_4kB * stripes);
	unsigned int t = mdev->al_tr_number % (mdev->ldev->md.al_size_4k);


	/* ... to aligned 4k on disk block */
	/* ... to aligned 4k on disk block */
	t = ((t % stripes) * stripe_size_4kB) + t/stripes;
	t = ((t % stripes) * stripe_size_4kB) + t/stripes;
+20 −26
Original line number Original line Diff line number Diff line
@@ -755,6 +755,14 @@ struct drbd_md {


	s32 al_offset;	/* signed relative sector offset to activity log */
	s32 al_offset;	/* signed relative sector offset to activity log */
	s32 bm_offset;	/* signed relative sector offset to bitmap */
	s32 bm_offset;	/* signed relative sector offset to bitmap */

	/* cached value of bdev->disk_conf->meta_dev_idx (see below) */
	s32 meta_dev_idx;

	/* see al_tr_number_to_on_disk_sector() */
	u32 al_stripes;
	u32 al_stripe_size_4k;
	u32 al_size_4k; /* cached product of the above */
};
};


struct drbd_backing_dev {
struct drbd_backing_dev {
@@ -1862,38 +1870,24 @@ static inline sector_t drbd_get_max_capacity(struct drbd_backing_dev *bdev)
}
}


/**
/**
 * drbd_md_ss__() - Return the sector number of our meta data super block
 * drbd_md_ss() - Return the sector number of our meta data super block
 * @mdev:	DRBD device.
 * @bdev:	Meta data block device.
 * @bdev:	Meta data block device.
 */
 */
static inline sector_t drbd_md_ss__(struct drbd_conf *mdev,
static inline sector_t drbd_md_ss(struct drbd_backing_dev *bdev)
				    struct drbd_backing_dev *bdev)
{
{
	int meta_dev_idx;
	const int meta_dev_idx = bdev->md.meta_dev_idx;


	rcu_read_lock();
	if (meta_dev_idx == DRBD_MD_INDEX_FLEX_EXT)
	meta_dev_idx = rcu_dereference(bdev->disk_conf)->meta_dev_idx;
	rcu_read_unlock();

	switch (meta_dev_idx) {
	default: /* external, some index; this is the old fixed size layout */
		return MD_128MB_SECT * meta_dev_idx;
	case DRBD_MD_INDEX_INTERNAL:
		/* with drbd08, internal meta data is always "flexible" */
	case DRBD_MD_INDEX_FLEX_INT:
		if (!bdev->backing_bdev) {
			if (__ratelimit(&drbd_ratelimit_state)) {
				dev_err(DEV, "bdev->backing_bdev==NULL\n");
				dump_stack();
			}
		return 0;
		return 0;
		}

		/* sizeof(struct md_on_disk_07) == 4k
	/* Since drbd08, internal meta data is always "flexible".
	 * position: last 4k aligned block of 4k size */
	 * position: last 4k aligned block of 4k size */
	if (meta_dev_idx == DRBD_MD_INDEX_INTERNAL ||
	    meta_dev_idx == DRBD_MD_INDEX_FLEX_INT)
		return (drbd_get_capacity(bdev->backing_bdev) & ~7ULL) - 8;
		return (drbd_get_capacity(bdev->backing_bdev) & ~7ULL) - 8;
	case DRBD_MD_INDEX_FLEX_EXT:

		return 0;
	/* external, some index; this is the old fixed size layout */
	}
	return MD_128MB_SECT * bdev->md.meta_dev_idx;
}
}


static inline void
static inline void
+69 −8
Original line number Original line Diff line number Diff line
@@ -2850,7 +2850,11 @@ struct meta_data_on_disk {
	u32 bm_bytes_per_bit;  /* BM_BLOCK_SIZE */
	u32 bm_bytes_per_bit;  /* BM_BLOCK_SIZE */
	u32 la_peer_max_bio_size;   /* last peer max_bio_size */
	u32 la_peer_max_bio_size;   /* last peer max_bio_size */


	u8 reserved_u8[4096 - (7*8 + 8*4)];
	/* see al_tr_number_to_on_disk_sector() */
	u32 al_stripes;
	u32 al_stripe_size_4k;

	u8 reserved_u8[4096 - (7*8 + 10*4)];
} __packed;
} __packed;


/**
/**
@@ -2898,7 +2902,10 @@ void drbd_md_sync(struct drbd_conf *mdev)
	buffer->bm_offset = cpu_to_be32(mdev->ldev->md.bm_offset);
	buffer->bm_offset = cpu_to_be32(mdev->ldev->md.bm_offset);
	buffer->la_peer_max_bio_size = cpu_to_be32(mdev->peer_max_bio_size);
	buffer->la_peer_max_bio_size = cpu_to_be32(mdev->peer_max_bio_size);


	D_ASSERT(drbd_md_ss__(mdev, mdev->ldev) == mdev->ldev->md.md_offset);
	buffer->al_stripes = cpu_to_be32(mdev->ldev->md.al_stripes);
	buffer->al_stripe_size_4k = cpu_to_be32(mdev->ldev->md.al_stripe_size_4k);

	D_ASSERT(drbd_md_ss(mdev->ldev) == mdev->ldev->md.md_offset);
	sector = mdev->ldev->md.md_offset;
	sector = mdev->ldev->md.md_offset;


	if (drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) {
	if (drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) {
@@ -2916,13 +2923,60 @@ void drbd_md_sync(struct drbd_conf *mdev)
	put_ldev(mdev);
	put_ldev(mdev);
}
}


static int check_activity_log_stripe_size(struct drbd_conf *mdev,
		struct meta_data_on_disk *on_disk,
		struct drbd_md *in_core)
{
	u32 al_stripes = be32_to_cpu(on_disk->al_stripes);
	u32 al_stripe_size_4k = be32_to_cpu(on_disk->al_stripe_size_4k);
	u64 al_size_4k;

	/* both not set: default to old fixed size activity log */
	if (al_stripes == 0 && al_stripe_size_4k == 0) {
		al_stripes = 1;
		al_stripe_size_4k = MD_32kB_SECT/8;
	}

	/* some paranoia plausibility checks */

	/* we need both values to be set */
	if (al_stripes == 0 || al_stripe_size_4k == 0)
		goto err;

	al_size_4k = (u64)al_stripes * al_stripe_size_4k;

	/* Upper limit of activity log area, to avoid potential overflow
	 * problems in al_tr_number_to_on_disk_sector(). As right now, more
	 * than 72 * 4k blocks total only increases the amount of history,
	 * limiting this arbitrarily to 16 GB is not a real limitation ;-)  */
	if (al_size_4k > (16 * 1024 * 1024/4))
		goto err;

	/* Lower limit: we need at least 8 transaction slots (32kB)
	 * to not break existing setups */
	if (al_size_4k < MD_32kB_SECT/8)
		goto err;

	in_core->al_stripe_size_4k = al_stripe_size_4k;
	in_core->al_stripes = al_stripes;
	in_core->al_size_4k = al_size_4k;

	return 0;
err:
	dev_err(DEV, "invalid activity log striping: al_stripes=%u, al_stripe_size_4k=%u\n",
			al_stripes, al_stripe_size_4k);
	return -EINVAL;
}

/**
/**
 * drbd_md_read() - Reads in the meta data super block
 * drbd_md_read() - Reads in the meta data super block
 * @mdev:	DRBD device.
 * @mdev:	DRBD device.
 * @bdev:	Device from which the meta data should be read in.
 * @bdev:	Device from which the meta data should be read in.
 *
 *
 * Return 0 (NO_ERROR) on success, and an enum drbd_ret_code in case
 * Return NO_ERROR on success, and an enum drbd_ret_code in case
 * something goes wrong.
 * something goes wrong.
 *
 * Called exactly once during drbd_adm_attach()
 */
 */
int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
{
{
@@ -2937,6 +2991,10 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
	if (!buffer)
	if (!buffer)
		goto out;
		goto out;


	/* First, figure out where our meta data superblock is located. */
	bdev->md.meta_dev_idx = bdev->disk_conf->meta_dev_idx;
	bdev->md.md_offset = drbd_md_ss(bdev);

	if (drbd_md_sync_page_io(mdev, bdev, bdev->md.md_offset, READ)) {
	if (drbd_md_sync_page_io(mdev, bdev, bdev->md.md_offset, READ)) {
		/* NOTE: can't do normal error processing here as this is
		/* NOTE: can't do normal error processing here as this is
		   called BEFORE disk is attached */
		   called BEFORE disk is attached */
@@ -2954,40 +3012,43 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
		rv = ERR_MD_UNCLEAN;
		rv = ERR_MD_UNCLEAN;
		goto err;
		goto err;
	}
	}

	rv = ERR_MD_INVALID;
	if (magic != DRBD_MD_MAGIC_08) {
	if (magic != DRBD_MD_MAGIC_08) {
		if (magic == DRBD_MD_MAGIC_07)
		if (magic == DRBD_MD_MAGIC_07)
			dev_err(DEV, "Found old (0.7) meta data magic. Did you \"drbdadm create-md\"?\n");
			dev_err(DEV, "Found old (0.7) meta data magic. Did you \"drbdadm create-md\"?\n");
		else
		else
			dev_err(DEV, "Meta data magic not found. Did you \"drbdadm create-md\"?\n");
			dev_err(DEV, "Meta data magic not found. Did you \"drbdadm create-md\"?\n");
		rv = ERR_MD_INVALID;
		goto err;
		goto err;
	}
	}

	if (check_activity_log_stripe_size(mdev, buffer, &bdev->md))
		goto err;

	if (be32_to_cpu(buffer->al_offset) != bdev->md.al_offset) {
	if (be32_to_cpu(buffer->al_offset) != bdev->md.al_offset) {
		dev_err(DEV, "unexpected al_offset: %d (expected %d)\n",
		dev_err(DEV, "unexpected al_offset: %d (expected %d)\n",
		    be32_to_cpu(buffer->al_offset), bdev->md.al_offset);
		    be32_to_cpu(buffer->al_offset), bdev->md.al_offset);
		rv = ERR_MD_INVALID;
		goto err;
		goto err;
	}
	}
	if (be32_to_cpu(buffer->bm_offset) != bdev->md.bm_offset) {
	if (be32_to_cpu(buffer->bm_offset) != bdev->md.bm_offset) {
		dev_err(DEV, "unexpected bm_offset: %d (expected %d)\n",
		dev_err(DEV, "unexpected bm_offset: %d (expected %d)\n",
		    be32_to_cpu(buffer->bm_offset), bdev->md.bm_offset);
		    be32_to_cpu(buffer->bm_offset), bdev->md.bm_offset);
		rv = ERR_MD_INVALID;
		goto err;
		goto err;
	}
	}
	if (be32_to_cpu(buffer->md_size_sect) != bdev->md.md_size_sect) {
	if (be32_to_cpu(buffer->md_size_sect) != bdev->md.md_size_sect) {
		dev_err(DEV, "unexpected md_size: %u (expected %u)\n",
		dev_err(DEV, "unexpected md_size: %u (expected %u)\n",
		    be32_to_cpu(buffer->md_size_sect), bdev->md.md_size_sect);
		    be32_to_cpu(buffer->md_size_sect), bdev->md.md_size_sect);
		rv = ERR_MD_INVALID;
		goto err;
		goto err;
	}
	}


	if (be32_to_cpu(buffer->bm_bytes_per_bit) != BM_BLOCK_SIZE) {
	if (be32_to_cpu(buffer->bm_bytes_per_bit) != BM_BLOCK_SIZE) {
		dev_err(DEV, "unexpected bm_bytes_per_bit: %u (expected %u)\n",
		dev_err(DEV, "unexpected bm_bytes_per_bit: %u (expected %u)\n",
		    be32_to_cpu(buffer->bm_bytes_per_bit), BM_BLOCK_SIZE);
		    be32_to_cpu(buffer->bm_bytes_per_bit), BM_BLOCK_SIZE);
		rv = ERR_MD_INVALID;
		goto err;
		goto err;
	}
	}


	rv = NO_ERROR;

	bdev->md.la_size_sect = be64_to_cpu(buffer->la_size);
	bdev->md.la_size_sect = be64_to_cpu(buffer->la_size);
	for (i = UI_CURRENT; i < UI_SIZE; i++)
	for (i = UI_CURRENT; i < UI_SIZE; i++)
		bdev->md.uuid[i] = be64_to_cpu(buffer->uuid[i]);
		bdev->md.uuid[i] = be64_to_cpu(buffer->uuid[i]);
+2 −3
Original line number Original line Diff line number Diff line
@@ -727,24 +727,23 @@ static void drbd_md_set_sector_offsets(struct drbd_conf *mdev,
	rcu_read_lock();
	rcu_read_lock();
	meta_dev_idx = rcu_dereference(bdev->disk_conf)->meta_dev_idx;
	meta_dev_idx = rcu_dereference(bdev->disk_conf)->meta_dev_idx;


	bdev->md.md_offset = drbd_md_ss(bdev);

	switch (meta_dev_idx) {
	switch (meta_dev_idx) {
	default:
	default:
		/* v07 style fixed size indexed meta data */
		/* v07 style fixed size indexed meta data */
		bdev->md.md_size_sect = MD_128MB_SECT;
		bdev->md.md_size_sect = MD_128MB_SECT;
		bdev->md.md_offset = drbd_md_ss__(mdev, bdev);
		bdev->md.al_offset = MD_4kB_SECT;
		bdev->md.al_offset = MD_4kB_SECT;
		bdev->md.bm_offset = MD_4kB_SECT + al_size_sect;
		bdev->md.bm_offset = MD_4kB_SECT + al_size_sect;
		break;
		break;
	case DRBD_MD_INDEX_FLEX_EXT:
	case DRBD_MD_INDEX_FLEX_EXT:
		/* just occupy the full device; unit: sectors */
		/* just occupy the full device; unit: sectors */
		bdev->md.md_size_sect = drbd_get_capacity(bdev->md_bdev);
		bdev->md.md_size_sect = drbd_get_capacity(bdev->md_bdev);
		bdev->md.md_offset = 0;
		bdev->md.al_offset = MD_4kB_SECT;
		bdev->md.al_offset = MD_4kB_SECT;
		bdev->md.bm_offset = MD_4kB_SECT + al_size_sect;
		bdev->md.bm_offset = MD_4kB_SECT + al_size_sect;
		break;
		break;
	case DRBD_MD_INDEX_INTERNAL:
	case DRBD_MD_INDEX_INTERNAL:
	case DRBD_MD_INDEX_FLEX_INT:
	case DRBD_MD_INDEX_FLEX_INT:
		bdev->md.md_offset = drbd_md_ss__(mdev, bdev);
		/* al size is still fixed */
		/* al size is still fixed */
		bdev->md.al_offset = -al_size_sect;
		bdev->md.al_offset = -al_size_sect;
		/* we need (slightly less than) ~ this much bitmap sectors: */
		/* we need (slightly less than) ~ this much bitmap sectors: */