Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 98683650 authored by Philipp Reisner's avatar Philipp Reisner
Browse files

Merge branch 'drbd-8.4_ed6' into for-3.8-drivers-drbd-8.4_ed6

parents ccae7868 328e0f12
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
drbd-y := drbd_bitmap.o drbd_proc.o
drbd-y += drbd_worker.o drbd_receiver.o drbd_req.o drbd_actlog.o
drbd-y += drbd_main.o drbd_strings.o drbd_nl.o
drbd-y += drbd_interval.o drbd_state.o
drbd-y += drbd_nla.o

obj-$(CONFIG_BLK_DEV_DRBD)     += drbd.o
+280 −409

File changed.

Preview size limit exceeded, changes collapsed.

+120 −99
Original line number Diff line number Diff line
@@ -119,13 +119,9 @@ static void __bm_print_lock_info(struct drbd_conf *mdev, const char *func)
	if (!__ratelimit(&drbd_ratelimit_state))
		return;
	dev_err(DEV, "FIXME %s in %s, bitmap locked for '%s' by %s\n",
	    current == mdev->receiver.task ? "receiver" :
	    current == mdev->asender.task  ? "asender"  :
	    current == mdev->worker.task   ? "worker"   : current->comm,
		drbd_task_to_thread_name(mdev->tconn, current),
		func, b->bm_why ?: "?",
	    b->bm_task == mdev->receiver.task ? "receiver" :
	    b->bm_task == mdev->asender.task  ? "asender"  :
	    b->bm_task == mdev->worker.task   ? "worker"   : "?");
		drbd_task_to_thread_name(mdev->tconn, b->bm_task));
}

void drbd_bm_lock(struct drbd_conf *mdev, char *why, enum bm_flag flags)
@@ -142,13 +138,9 @@ void drbd_bm_lock(struct drbd_conf *mdev, char *why, enum bm_flag flags)

	if (trylock_failed) {
		dev_warn(DEV, "%s going to '%s' but bitmap already locked for '%s' by %s\n",
		    current == mdev->receiver.task ? "receiver" :
		    current == mdev->asender.task  ? "asender"  :
		    current == mdev->worker.task   ? "worker"   : current->comm,
			 drbd_task_to_thread_name(mdev->tconn, current),
			 why, b->bm_why ?: "?",
		    b->bm_task == mdev->receiver.task ? "receiver" :
		    b->bm_task == mdev->asender.task  ? "asender"  :
		    b->bm_task == mdev->worker.task   ? "worker"   : "?");
			 drbd_task_to_thread_name(mdev->tconn, b->bm_task));
		mutex_lock(&b->bm_change);
	}
	if (BM_LOCKED_MASK & b->bm_flags)
@@ -196,6 +188,9 @@ void drbd_bm_unlock(struct drbd_conf *mdev)
/* to mark for lazy writeout once syncer cleared all clearable bits,
 * we if bits have been cleared since last IO. */
#define BM_PAGE_LAZY_WRITEOUT	28
/* pages marked with this "HINT" will be considered for writeout
 * on activity log transactions */
#define BM_PAGE_HINT_WRITEOUT	27

/* store_page_idx uses non-atomic assignment. It is only used directly after
 * allocating the page.  All other bm_set_page_* and bm_clear_page_* need to
@@ -227,8 +222,7 @@ static void bm_page_unlock_io(struct drbd_conf *mdev, int page_nr)
{
	struct drbd_bitmap *b = mdev->bitmap;
	void *addr = &page_private(b->bm_pages[page_nr]);
	clear_bit(BM_PAGE_IO_LOCK, addr);
	smp_mb__after_clear_bit();
	clear_bit_unlock(BM_PAGE_IO_LOCK, addr);
	wake_up(&mdev->bitmap->bm_io_wait);
}

@@ -246,6 +240,27 @@ static void bm_set_page_need_writeout(struct page *page)
	set_bit(BM_PAGE_NEED_WRITEOUT, &page_private(page));
}

/**
 * drbd_bm_mark_for_writeout() - mark a page with a "hint" to be considered for writeout
 * @mdev:	DRBD device.
 * @page_nr:	the bitmap page to mark with the "hint" flag
 *
 * From within an activity log transaction, we mark a few pages with these
 * hints, then call drbd_bm_write_hinted(), which will only write out changed
 * pages which are flagged with this mark.
 */
void drbd_bm_mark_for_writeout(struct drbd_conf *mdev, int page_nr)
{
	struct page *page;
	if (page_nr >= mdev->bitmap->bm_number_of_pages) {
		dev_warn(DEV, "BAD: page_nr: %u, number_of_pages: %u\n",
			 page_nr, (int)mdev->bitmap->bm_number_of_pages);
		return;
	}
	page = mdev->bitmap->bm_pages[page_nr];
	set_bit(BM_PAGE_HINT_WRITEOUT, &page_private(page));
}

static int bm_test_page_unchanged(struct page *page)
{
	volatile const unsigned long *addr = &page_private(page);
@@ -376,7 +391,7 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want)
	 * GFP_NOIO, as this is called while drbd IO is "suspended",
	 * and during resize or attach on diskless Primary,
	 * we must not block on IO to ourselves.
	 * Context is receiver thread or cqueue thread/dmsetup.  */
	 * Context is receiver thread or dmsetup. */
	bytes = sizeof(struct page *)*want;
	new_pages = kzalloc(bytes, GFP_NOIO);
	if (!new_pages) {
@@ -441,7 +456,8 @@ int drbd_bm_init(struct drbd_conf *mdev)

sector_t drbd_bm_capacity(struct drbd_conf *mdev)
{
	ERR_IF(!mdev->bitmap) return 0;
	if (!expect(mdev->bitmap))
		return 0;
	return mdev->bitmap->bm_dev_capacity;
}

@@ -449,7 +465,8 @@ sector_t drbd_bm_capacity(struct drbd_conf *mdev)
 */
void drbd_bm_cleanup(struct drbd_conf *mdev)
{
	ERR_IF (!mdev->bitmap) return;
	if (!expect(mdev->bitmap))
		return;
	bm_free_pages(mdev->bitmap->bm_pages, mdev->bitmap->bm_number_of_pages);
	bm_vk_free(mdev->bitmap->bm_pages, (BM_P_VMALLOCED & mdev->bitmap->bm_flags));
	kfree(mdev->bitmap);
@@ -612,7 +629,8 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits)
	int err = 0, growing;
	int opages_vmalloced;

	ERR_IF(!b) return -ENOMEM;
	if (!expect(b))
		return -ENOMEM;

	drbd_bm_lock(mdev, "resize", BM_LOCKED_MASK);

@@ -734,8 +752,10 @@ unsigned long _drbd_bm_total_weight(struct drbd_conf *mdev)
	unsigned long s;
	unsigned long flags;

	ERR_IF(!b) return 0;
	ERR_IF(!b->bm_pages) return 0;
	if (!expect(b))
		return 0;
	if (!expect(b->bm_pages))
		return 0;

	spin_lock_irqsave(&b->bm_lock, flags);
	s = b->bm_set;
@@ -758,8 +778,10 @@ unsigned long drbd_bm_total_weight(struct drbd_conf *mdev)
size_t drbd_bm_words(struct drbd_conf *mdev)
{
	struct drbd_bitmap *b = mdev->bitmap;
	ERR_IF(!b) return 0;
	ERR_IF(!b->bm_pages) return 0;
	if (!expect(b))
		return 0;
	if (!expect(b->bm_pages))
		return 0;

	return b->bm_words;
}
@@ -767,7 +789,8 @@ size_t drbd_bm_words(struct drbd_conf *mdev)
unsigned long drbd_bm_bits(struct drbd_conf *mdev)
{
	struct drbd_bitmap *b = mdev->bitmap;
	ERR_IF(!b) return 0;
	if (!expect(b))
		return 0;

	return b->bm_bits;
}
@@ -788,8 +811,10 @@ void drbd_bm_merge_lel(struct drbd_conf *mdev, size_t offset, size_t number,

	end = offset + number;

	ERR_IF(!b) return;
	ERR_IF(!b->bm_pages) return;
	if (!expect(b))
		return;
	if (!expect(b->bm_pages))
		return;
	if (number == 0)
		return;
	WARN_ON(offset >= b->bm_words);
@@ -833,8 +858,10 @@ void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset, size_t number,

	end = offset + number;

	ERR_IF(!b) return;
	ERR_IF(!b->bm_pages) return;
	if (!expect(b))
		return;
	if (!expect(b->bm_pages))
		return;

	spin_lock_irq(&b->bm_lock);
	if ((offset >= b->bm_words) ||
@@ -862,8 +889,10 @@ void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset, size_t number,
void drbd_bm_set_all(struct drbd_conf *mdev)
{
	struct drbd_bitmap *b = mdev->bitmap;
	ERR_IF(!b) return;
	ERR_IF(!b->bm_pages) return;
	if (!expect(b))
		return;
	if (!expect(b->bm_pages))
		return;

	spin_lock_irq(&b->bm_lock);
	bm_memset(b, 0, 0xff, b->bm_words);
@@ -876,8 +905,10 @@ void drbd_bm_set_all(struct drbd_conf *mdev)
void drbd_bm_clear_all(struct drbd_conf *mdev)
{
	struct drbd_bitmap *b = mdev->bitmap;
	ERR_IF(!b) return;
	ERR_IF(!b->bm_pages) return;
	if (!expect(b))
		return;
	if (!expect(b->bm_pages))
		return;

	spin_lock_irq(&b->bm_lock);
	bm_memset(b, 0, 0, b->bm_words);
@@ -891,7 +922,8 @@ struct bm_aio_ctx {
	unsigned int done;
	unsigned flags;
#define BM_AIO_COPY_PAGES	1
#define BM_WRITE_ALL_PAGES	2
#define BM_AIO_WRITE_HINTED	2
#define BM_WRITE_ALL_PAGES	4
	int error;
	struct kref kref;
};
@@ -1062,6 +1094,11 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w
		if (lazy_writeout_upper_idx && i == lazy_writeout_upper_idx)
			break;
		if (rw & WRITE) {
			if ((flags & BM_AIO_WRITE_HINTED) &&
			    !test_and_clear_bit(BM_PAGE_HINT_WRITEOUT,
				    &page_private(b->bm_pages[i])))
				continue;

			if (!(flags & BM_WRITE_ALL_PAGES) &&
			    bm_test_page_unchanged(b->bm_pages[i])) {
				dynamic_dev_dbg(DEV, "skipped bm write for idx %u\n", i);
@@ -1094,6 +1131,8 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w
	else
		kref_put(&ctx->kref, &bm_aio_ctx_destroy);

	/* summary for global bitmap IO */
	if (flags == 0)
		dev_info(DEV, "bitmap %s of %u pages took %lu jiffies\n",
			 rw == WRITE ? "WRITE" : "READ",
			 count, jiffies - now);
@@ -1117,6 +1156,7 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w
	}
	now = b->bm_set;

	if (flags == 0)
		dev_info(DEV, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n",
		     ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now);

@@ -1181,9 +1221,17 @@ int drbd_bm_write_copy_pages(struct drbd_conf *mdev) __must_hold(local)
	return bm_rw(mdev, WRITE, BM_AIO_COPY_PAGES, 0);
}

/**
 * drbd_bm_write_hinted() - Write bitmap pages with "hint" marks, if they have changed.
 * @mdev:	DRBD device.
 */
int drbd_bm_write_hinted(struct drbd_conf *mdev) __must_hold(local)
{
	return bm_rw(mdev, WRITE, BM_AIO_WRITE_HINTED | BM_AIO_COPY_PAGES, 0);
}

/**
 * drbd_bm_write_page: Writes a PAGE_SIZE aligned piece of bitmap
 * drbd_bm_write_page() - Writes a PAGE_SIZE aligned piece of bitmap
 * @mdev:	DRBD device.
 * @idx:	bitmap page index
 *
@@ -1291,8 +1339,10 @@ static unsigned long bm_find_next(struct drbd_conf *mdev,
	struct drbd_bitmap *b = mdev->bitmap;
	unsigned long i = DRBD_END_OF_BITMAP;

	ERR_IF(!b) return i;
	ERR_IF(!b->bm_pages) return i;
	if (!expect(b))
		return i;
	if (!expect(b->bm_pages))
		return i;

	spin_lock_irq(&b->bm_lock);
	if (BM_DONT_TEST & b->bm_flags)
@@ -1393,8 +1443,10 @@ static int bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s,
	struct drbd_bitmap *b = mdev->bitmap;
	int c = 0;

	ERR_IF(!b) return 1;
	ERR_IF(!b->bm_pages) return 0;
	if (!expect(b))
		return 1;
	if (!expect(b->bm_pages))
		return 0;

	spin_lock_irqsave(&b->bm_lock, flags);
	if ((val ? BM_DONT_SET : BM_DONT_CLEAR) & b->bm_flags)
@@ -1425,13 +1477,21 @@ static inline void bm_set_full_words_within_one_page(struct drbd_bitmap *b,
{
	int i;
	int bits;
	int changed = 0;
	unsigned long *paddr = kmap_atomic(b->bm_pages[page_nr]);
	for (i = first_word; i < last_word; i++) {
		bits = hweight_long(paddr[i]);
		paddr[i] = ~0UL;
		b->bm_set += BITS_PER_LONG - bits;
		changed += BITS_PER_LONG - bits;
	}
	kunmap_atomic(paddr);
	if (changed) {
		/* We only need lazy writeout, the information is still in the
		 * remote bitmap as well, and is reconstructed during the next
		 * bitmap exchange, if lost locally due to a crash. */
		bm_set_page_lazy_writeout(b->bm_pages[page_nr]);
		b->bm_set += changed;
	}
}

/* Same thing as drbd_bm_set_bits,
@@ -1526,8 +1586,10 @@ int drbd_bm_test_bit(struct drbd_conf *mdev, const unsigned long bitnr)
	unsigned long *p_addr;
	int i;

	ERR_IF(!b) return 0;
	ERR_IF(!b->bm_pages) return 0;
	if (!expect(b))
		return 0;
	if (!expect(b->bm_pages))
		return 0;

	spin_lock_irqsave(&b->bm_lock, flags);
	if (BM_DONT_TEST & b->bm_flags)
@@ -1561,8 +1623,10 @@ int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, const unsi
	 * robust in case we screwed up elsewhere, in that case pretend there
	 * was one dirty bit in the requested area, so we won't try to do a
	 * local read there (no bitmap probably implies no disk) */
	ERR_IF(!b) return 1;
	ERR_IF(!b->bm_pages) return 1;
	if (!expect(b))
		return 1;
	if (!expect(b->bm_pages))
		return 1;

	spin_lock_irqsave(&b->bm_lock, flags);
	if (BM_DONT_TEST & b->bm_flags)
@@ -1575,11 +1639,10 @@ int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, const unsi
				bm_unmap(p_addr);
			p_addr = bm_map_pidx(b, idx);
		}
		ERR_IF (bitnr >= b->bm_bits) {
			dev_err(DEV, "bitnr=%lu bm_bits=%lu\n", bitnr, b->bm_bits);
		} else {
		if (expect(bitnr < b->bm_bits))
			c += (0 != test_bit_le(bitnr - (page_nr << (PAGE_SHIFT+3)), p_addr));
		}
		else
			dev_err(DEV, "bitnr=%lu bm_bits=%lu\n", bitnr, b->bm_bits);
	}
	if (p_addr)
		bm_unmap(p_addr);
@@ -1609,8 +1672,10 @@ int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr)
	unsigned long flags;
	unsigned long *p_addr, *bm;

	ERR_IF(!b) return 0;
	ERR_IF(!b->bm_pages) return 0;
	if (!expect(b))
		return 0;
	if (!expect(b->bm_pages))
		return 0;

	spin_lock_irqsave(&b->bm_lock, flags);
	if (BM_DONT_TEST & b->bm_flags)
@@ -1632,47 +1697,3 @@ int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr)
	spin_unlock_irqrestore(&b->bm_lock, flags);
	return count;
}

/* Set all bits covered by the AL-extent al_enr.
 * Returns number of bits changed. */
unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, unsigned long al_enr)
{
	struct drbd_bitmap *b = mdev->bitmap;
	unsigned long *p_addr, *bm;
	unsigned long weight;
	unsigned long s, e;
	int count, i, do_now;
	ERR_IF(!b) return 0;
	ERR_IF(!b->bm_pages) return 0;

	spin_lock_irq(&b->bm_lock);
	if (BM_DONT_SET & b->bm_flags)
		bm_print_lock_info(mdev);
	weight = b->bm_set;

	s = al_enr * BM_WORDS_PER_AL_EXT;
	e = min_t(size_t, s + BM_WORDS_PER_AL_EXT, b->bm_words);
	/* assert that s and e are on the same page */
	D_ASSERT((e-1) >> (PAGE_SHIFT - LN2_BPL + 3)
	      ==  s    >> (PAGE_SHIFT - LN2_BPL + 3));
	count = 0;
	if (s < b->bm_words) {
		i = do_now = e-s;
		p_addr = bm_map_pidx(b, bm_word_to_page_idx(b, s));
		bm = p_addr + MLPP(s);
		while (i--) {
			count += hweight_long(*bm);
			*bm = -1UL;
			bm++;
		}
		bm_unmap(p_addr);
		b->bm_set += do_now*BITS_PER_LONG - count;
		if (e == b->bm_words)
			b->bm_set -= bm_clear_surplus(b);
	} else {
		dev_err(DEV, "start offset (%lu) too large in drbd_bm_ALe_set_all\n", s);
	}
	weight = b->bm_set - weight;
	spin_unlock_irq(&b->bm_lock);
	return weight;
}
+601 −797

File changed.

Preview size limit exceeded, changes collapsed.

+207 −0
Original line number Diff line number Diff line
#include <asm/bug.h>
#include <linux/rbtree_augmented.h>
#include "drbd_interval.h"

/**
 * interval_end  -  return end of @node
 */
static inline
sector_t interval_end(struct rb_node *node)
{
	struct drbd_interval *this = rb_entry(node, struct drbd_interval, rb);
	return this->end;
}

/**
 * compute_subtree_last  -  compute end of @node
 *
 * The end of an interval is the highest (start + (size >> 9)) value of this
 * node and of its children.  Called for @node and its parents whenever the end
 * may have changed.
 */
static inline sector_t
compute_subtree_last(struct drbd_interval *node)
{
	sector_t max = node->sector + (node->size >> 9);

	if (node->rb.rb_left) {
		sector_t left = interval_end(node->rb.rb_left);
		if (left > max)
			max = left;
	}
	if (node->rb.rb_right) {
		sector_t right = interval_end(node->rb.rb_right);
		if (right > max)
			max = right;
	}
	return max;
}

static void augment_propagate(struct rb_node *rb, struct rb_node *stop)
{
	while (rb != stop) {
		struct drbd_interval *node = rb_entry(rb, struct drbd_interval, rb);
		sector_t subtree_last = compute_subtree_last(node);
		if (node->end == subtree_last)
			break;
		node->end = subtree_last;
		rb = rb_parent(&node->rb);
	}
}

static void augment_copy(struct rb_node *rb_old, struct rb_node *rb_new)
{
	struct drbd_interval *old = rb_entry(rb_old, struct drbd_interval, rb);
	struct drbd_interval *new = rb_entry(rb_new, struct drbd_interval, rb);

	new->end = old->end;
}

static void augment_rotate(struct rb_node *rb_old, struct rb_node *rb_new)
{
	struct drbd_interval *old = rb_entry(rb_old, struct drbd_interval, rb);
	struct drbd_interval *new = rb_entry(rb_new, struct drbd_interval, rb);

	new->end = old->end;
	old->end = compute_subtree_last(old);
}

static const struct rb_augment_callbacks augment_callbacks = {
	augment_propagate,
	augment_copy,
	augment_rotate,
};

/**
 * drbd_insert_interval  -  insert a new interval into a tree
 */
bool
drbd_insert_interval(struct rb_root *root, struct drbd_interval *this)
{
	struct rb_node **new = &root->rb_node, *parent = NULL;

	BUG_ON(!IS_ALIGNED(this->size, 512));

	while (*new) {
		struct drbd_interval *here =
			rb_entry(*new, struct drbd_interval, rb);

		parent = *new;
		if (this->sector < here->sector)
			new = &(*new)->rb_left;
		else if (this->sector > here->sector)
			new = &(*new)->rb_right;
		else if (this < here)
			new = &(*new)->rb_left;
		else if (this > here)
			new = &(*new)->rb_right;
		else
			return false;
	}

	rb_link_node(&this->rb, parent, new);
	rb_insert_augmented(&this->rb, root, &augment_callbacks);
	return true;
}

/**
 * drbd_contains_interval  -  check if a tree contains a given interval
 * @sector:	start sector of @interval
 * @interval:	may not be a valid pointer
 *
 * Returns if the tree contains the node @interval with start sector @start.
 * Does not dereference @interval until @interval is known to be a valid object
 * in @tree.  Returns %false if @interval is in the tree but with a different
 * sector number.
 */
bool
drbd_contains_interval(struct rb_root *root, sector_t sector,
		       struct drbd_interval *interval)
{
	struct rb_node *node = root->rb_node;

	while (node) {
		struct drbd_interval *here =
			rb_entry(node, struct drbd_interval, rb);

		if (sector < here->sector)
			node = node->rb_left;
		else if (sector > here->sector)
			node = node->rb_right;
		else if (interval < here)
			node = node->rb_left;
		else if (interval > here)
			node = node->rb_right;
		else
			return true;
	}
	return false;
}

/**
 * drbd_remove_interval  -  remove an interval from a tree
 */
void
drbd_remove_interval(struct rb_root *root, struct drbd_interval *this)
{
	rb_erase_augmented(&this->rb, root, &augment_callbacks);
}

/**
 * drbd_find_overlap  - search for an interval overlapping with [sector, sector + size)
 * @sector:	start sector
 * @size:	size, aligned to 512 bytes
 *
 * Returns an interval overlapping with [sector, sector + size), or NULL if
 * there is none.  When there is more than one overlapping interval in the
 * tree, the interval with the lowest start sector is returned, and all other
 * overlapping intervals will be on the right side of the tree, reachable with
 * rb_next().
 */
struct drbd_interval *
drbd_find_overlap(struct rb_root *root, sector_t sector, unsigned int size)
{
	struct rb_node *node = root->rb_node;
	struct drbd_interval *overlap = NULL;
	sector_t end = sector + (size >> 9);

	BUG_ON(!IS_ALIGNED(size, 512));

	while (node) {
		struct drbd_interval *here =
			rb_entry(node, struct drbd_interval, rb);

		if (node->rb_left &&
		    sector < interval_end(node->rb_left)) {
			/* Overlap if any must be on left side */
			node = node->rb_left;
		} else if (here->sector < end &&
			   sector < here->sector + (here->size >> 9)) {
			overlap = here;
			break;
		} else if (sector >= here->sector) {
			/* Overlap if any must be on right side */
			node = node->rb_right;
		} else
			break;
	}
	return overlap;
}

struct drbd_interval *
drbd_next_overlap(struct drbd_interval *i, sector_t sector, unsigned int size)
{
	sector_t end = sector + (size >> 9);
	struct rb_node *node;

	for (;;) {
		node = rb_next(&i->rb);
		if (!node)
			return NULL;
		i = rb_entry(node, struct drbd_interval, rb);
		if (i->sector >= end)
			return NULL;
		if (sector < i->sector + (i->size >> 9))
			return i;
	}
}
Loading