Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 19f843aa authored by Lars Ellenberg's avatar Lars Ellenberg Committed by Philipp Reisner
Browse files

drbd: bitmap keep track of changes vs on-disk bitmap



When we set or clear bits in a bitmap page,
also set a flag in the page->private pointer.

This allows us to skip writes of unchanged pages.

Signed-off-by: default avatarPhilipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: default avatarLars Ellenberg <lars.ellenberg@linbit.com>
parent 95a0f10c
Loading
Loading
Loading
Loading
+29 −103
Original line number Diff line number Diff line
@@ -262,6 +262,33 @@ void drbd_al_complete_io(struct drbd_conf *mdev, sector_t sector)
	spin_unlock_irqrestore(&mdev->al_lock, flags);
}

#if (PAGE_SHIFT + 3) < (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT)
/* Currently BM_BLOCK_SHIFT, BM_EXT_SHIFT and AL_EXTENT_SHIFT
 * are still coupled, or assume too much about their relation.
 * Code below will not work if this is violated.
 * Will be cleaned up with some followup patch.
 */
# error FIXME
#endif

static unsigned int al_extent_to_bm_page(unsigned int al_enr)
{
	return al_enr >>
		/* bit to page */
		((PAGE_SHIFT + 3) -
		/* al extent number to bit */
		 (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT));
}

static unsigned int rs_extent_to_bm_page(unsigned int rs_enr)
{
	return rs_enr >>
		/* bit to page */
		((PAGE_SHIFT + 3) -
		/* al extent number to bit */
		 (BM_EXT_SHIFT - BM_BLOCK_SHIFT));
}

int
w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused)
{
@@ -289,7 +316,7 @@ w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused)
	 * For now, we must not write the transaction,
	 * if we cannot write out the bitmap of the evicted extent. */
	if (mdev->state.conn < C_CONNECTED && evicted != LC_FREE)
		drbd_bm_write_sect(mdev, evicted/AL_EXT_PER_BM_SECT);
		drbd_bm_write_page(mdev, al_extent_to_bm_page(evicted));

	/* The bitmap write may have failed, causing a state change. */
	if (mdev->state.disk < D_INCONSISTENT) {
@@ -635,105 +662,6 @@ static int atodb_prepare_unless_covered(struct drbd_conf *mdev,
	return err;
}

/**
 * drbd_al_to_on_disk_bm() -  * Writes bitmap parts covered by active AL extents
 * @mdev:	DRBD device.
 *
 * Called when we detach (unconfigure) local storage,
 * or when we go from R_PRIMARY to R_SECONDARY role.
 */
void drbd_al_to_on_disk_bm(struct drbd_conf *mdev)
{
	int i, nr_elements;
	unsigned int enr;
	struct bio **bios;
	struct drbd_atodb_wait wc;

	ERR_IF (!get_ldev_if_state(mdev, D_ATTACHING))
		return; /* sorry, I don't have any act_log etc... */

	wait_event(mdev->al_wait, lc_try_lock(mdev->act_log));

	nr_elements = mdev->act_log->nr_elements;

	/* GFP_KERNEL, we are not in anyone's write-out path */
	bios = kzalloc(sizeof(struct bio *) * nr_elements, GFP_KERNEL);
	if (!bios)
		goto submit_one_by_one;

	atomic_set(&wc.count, 0);
	init_completion(&wc.io_done);
	wc.mdev = mdev;
	wc.error = 0;

	for (i = 0; i < nr_elements; i++) {
		enr = lc_element_by_index(mdev->act_log, i)->lc_number;
		if (enr == LC_FREE)
			continue;
		/* next statement also does atomic_inc wc.count and local_cnt */
		if (atodb_prepare_unless_covered(mdev, bios,
						enr/AL_EXT_PER_BM_SECT,
						&wc))
			goto free_bios_submit_one_by_one;
	}

	/* unnecessary optimization? */
	lc_unlock(mdev->act_log);
	wake_up(&mdev->al_wait);

	/* all prepared, submit them */
	for (i = 0; i < nr_elements; i++) {
		if (bios[i] == NULL)
			break;
		if (drbd_insert_fault(mdev, DRBD_FAULT_MD_WR)) {
			bios[i]->bi_rw = WRITE;
			bio_endio(bios[i], -EIO);
		} else {
			submit_bio(WRITE, bios[i]);
		}
	}

	/* always (try to) flush bitmap to stable storage */
	drbd_md_flush(mdev);

	/* In case we did not submit a single IO do not wait for
	 * them to complete. ( Because we would wait forever here. )
	 *
	 * In case we had IOs and they are already complete, there
	 * is not point in waiting anyways.
	 * Therefore this if () ... */
	if (atomic_read(&wc.count))
		wait_for_completion(&wc.io_done);

	put_ldev(mdev);

	kfree(bios);
	return;

 free_bios_submit_one_by_one:
	/* free everything by calling the endio callback directly. */
	for (i = 0; i < nr_elements && bios[i]; i++)
		bio_endio(bios[i], 0);

	kfree(bios);

 submit_one_by_one:
	dev_warn(DEV, "Using the slow drbd_al_to_on_disk_bm()\n");

	for (i = 0; i < mdev->act_log->nr_elements; i++) {
		enr = lc_element_by_index(mdev->act_log, i)->lc_number;
		if (enr == LC_FREE)
			continue;
		/* Really slow: if we have al-extents 16..19 active,
		 * sector 4 will be written four times! Synchronous! */
		drbd_bm_write_sect(mdev, enr/AL_EXT_PER_BM_SECT);
	}

	lc_unlock(mdev->act_log);
	wake_up(&mdev->al_wait);
	put_ldev(mdev);
}

/**
 * drbd_al_apply_to_bm() - Sets the bitmap to diry(1) where covered ba active AL extents
 * @mdev:	DRBD device.
@@ -813,7 +741,7 @@ static int w_update_odbm(struct drbd_conf *mdev, struct drbd_work *w, int unused
		return 1;
	}

	drbd_bm_write_sect(mdev, udw->enr);
	drbd_bm_write_page(mdev, rs_extent_to_bm_page(udw->enr));
	put_ldev(mdev);

	kfree(udw);
@@ -893,7 +821,6 @@ static void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector,
				dev_warn(DEV, "Kicking resync_lru element enr=%u "
				     "out with rs_failed=%d\n",
				     ext->lce.lc_number, ext->rs_failed);
				set_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags);
			}
			ext->rs_left = rs_left;
			ext->rs_failed = success ? 0 : count;
@@ -912,7 +839,6 @@ static void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector,
				drbd_queue_work_front(&mdev->data.work, &udw->w);
			} else {
				dev_warn(DEV, "Could not kmalloc an udw\n");
				set_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags);
			}
		}
	} else {
+307 −102

File changed.

Preview size limit exceeded, changes collapsed.

+3 −4
Original line number Diff line number Diff line
@@ -833,7 +833,7 @@ enum {
	CRASHED_PRIMARY,	/* This node was a crashed primary.
				 * Gets cleared when the state.conn
				 * goes into C_CONNECTED state. */
	WRITE_BM_AFTER_RESYNC,	/* A kmalloc() during resync failed */
	NO_BARRIER_SUPP,	/* underlying block device doesn't implement barriers */
	CONSIDER_RESYNC,

	MD_NO_FUA,		/* Users wants us to not use FUA/FLUSH on meta data dev */
@@ -1428,7 +1428,7 @@ extern void _drbd_bm_set_bits(struct drbd_conf *mdev,
		const unsigned long s, const unsigned long e);
extern int  drbd_bm_test_bit(struct drbd_conf *mdev, unsigned long bitnr);
extern int  drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr);
extern int  drbd_bm_write_sect(struct drbd_conf *mdev, unsigned long enr) __must_hold(local);
extern int  drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(local);
extern int  drbd_bm_read(struct drbd_conf *mdev) __must_hold(local);
extern int  drbd_bm_write(struct drbd_conf *mdev) __must_hold(local);
extern unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev,
@@ -1446,7 +1446,7 @@ extern int drbd_bm_rs_done(struct drbd_conf *mdev);
/* for receive_bitmap */
extern void drbd_bm_merge_lel(struct drbd_conf *mdev, size_t offset,
		size_t number, unsigned long *buffer);
/* for _drbd_send_bitmap and drbd_bm_write_sect */
/* for _drbd_send_bitmap */
extern void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset,
		size_t number, unsigned long *buffer);

@@ -1641,7 +1641,6 @@ extern int __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector,
#define drbd_set_out_of_sync(mdev, sector, size) \
	__drbd_set_out_of_sync(mdev, sector, size, __FILE__, __LINE__)
extern void drbd_al_apply_to_bm(struct drbd_conf *mdev);
extern void drbd_al_to_on_disk_bm(struct drbd_conf *mdev);
extern void drbd_al_shrink(struct drbd_conf *mdev);


+26 −1
Original line number Diff line number Diff line
@@ -1289,6 +1289,26 @@ static void abw_start_sync(struct drbd_conf *mdev, int rv)
	}
}

int drbd_bitmap_io_from_worker(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *), char *why)
{
	int rv;

	D_ASSERT(current == mdev->worker.task);

	/* open coded non-blocking drbd_suspend_io(mdev); */
	set_bit(SUSPEND_IO, &mdev->flags);
	if (!is_susp(mdev->state))
		D_ASSERT(atomic_read(&mdev->ap_bio_cnt) == 0);

	drbd_bm_lock(mdev, why);
	rv = io_fn(mdev);
	drbd_bm_unlock(mdev);

	drbd_resume_io(mdev);

	return rv;
}

/**
 * after_state_ch() - Perform after state change actions that may sleep
 * @mdev:	DRBD device.
@@ -1404,7 +1424,12 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,

		/* D_DISKLESS Peer becomes secondary */
		if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY)
			drbd_al_to_on_disk_bm(mdev);
			drbd_bitmap_io_from_worker(mdev, &drbd_bm_write, "demote diskless peer");
		put_ldev(mdev);
	}

	if (os.role == R_PRIMARY && ns.role == R_SECONDARY && get_ldev(mdev)) {
		drbd_bitmap_io_from_worker(mdev, &drbd_bm_write, "demote");
		put_ldev(mdev);
	}

+6 −5
Original line number Diff line number Diff line
@@ -407,10 +407,8 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
		}
	}

	if ((new_role == R_SECONDARY) && get_ldev(mdev)) {
		drbd_al_to_on_disk_bm(mdev);
		put_ldev(mdev);
	}
	/* writeout of activity log covered areas of the bitmap
	 * to stable storage done in after state change already */

	if (mdev->state.conn >= C_WF_REPORT_PARAMS) {
		/* if this was forced, we should consider sync */
@@ -1174,7 +1172,10 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp

	if (cp_discovered) {
		drbd_al_apply_to_bm(mdev);
		drbd_al_to_on_disk_bm(mdev);
		if (drbd_bitmap_io(mdev, &drbd_bm_write, "crashed primary apply AL")) {
			retcode = ERR_IO_MD_DISK;
			goto force_diskless_dec;
		}
	}

	if (_drbd_bm_total_weight(mdev) == drbd_bm_bits(mdev))
Loading