Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 03891f9c authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull device mapper updates from Mike Snitzer:

 - The most significant set of changes this cycle is the Forward Error
   Correction (FEC) support that has been added to the DM verity target.

   Google uses DM verity on all Android devices and it is believed that
   this FEC support will enable DM verity to recover from storage
   failures seen since DM verity was first deployed as part of Android.

 - A stable fix for a race in the destruction of DM thin pool's
   workqueue

 - A stable fix for hung IO if a DM snapshot copy hit an error

 - A few small cleanups in DM core and DM persistent data.

 - A couple DM thinp range discard improvements (address atomicity of
   finding a range and the efficiency of discarding a partially mapped
   thin device)

 - Add ability to debug DM bufio leaks by recording stack trace when a
   buffer is allocated.  Upon detected leak the recorded stack is
   dumped.

* tag 'dm-4.5-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm:
  dm snapshot: fix hung bios when copy error occurs
  dm thin: bump thin and thin-pool target versions
  dm thin: fix race condition when destroying thin pool workqueue
  dm space map metadata: remove unused variable in brb_pop()
  dm verity: add ignore_zero_blocks feature
  dm verity: add support for forward error correction
  dm verity: factor out verity_for_bv_block()
  dm verity: factor out structures and functions useful to separate object
  dm verity: move dm-verity.c to dm-verity-target.c
  dm verity: separate function for parsing opt args
  dm verity: clean up duplicate hashing code
  dm btree: factor out need_insert() helper
  dm bufio: use BUG_ON instead of conditional call to BUG
  dm bufio: store stacktrace in buffers to help find buffer leaks
  dm bufio: return NULL to improve code clarity
  dm block manager: cleanup code that prints stacktrace
  dm: don't save and restore bi_private
  dm thin metadata: make dm_thin_find_mapped_range() atomic
  dm thin metadata: speed up discard of partially mapped volumes
parents 47c62e4b 385277bf
Loading
Loading
Loading
Loading
+38 −2
Original line number Diff line number Diff line
@@ -18,11 +18,11 @@ Construction Parameters

    0 is the original format used in the Chromium OS.
      The salt is appended when hashing, digests are stored continuously and
      the rest of the block is padded with zeros.
      the rest of the block is padded with zeroes.

    1 is the current format that should be used for new devices.
      The salt is prepended when hashing and each digest is
      padded with zeros to the power of two.
      padded with zeroes to the power of two.

<dev>
    This is the device containing data, the integrity of which needs to be
@@ -79,6 +79,37 @@ restart_on_corruption
    not compatible with ignore_corruption and requires user space support to
    avoid restart loops.

ignore_zero_blocks
    Do not verify blocks that are expected to contain zeroes and always return
    zeroes instead. This may be useful if the partition contains unused blocks
    that are not guaranteed to contain zeroes.

use_fec_from_device <fec_dev>
    Use forward error correction (FEC) to recover from corruption if hash
    verification fails. Use encoding data from the specified device. This
    may be the same device where data and hash blocks reside, in which case
    fec_start must be outside data and hash areas.

    If the encoding data covers additional metadata, it must be accessible
    on the hash device after the hash blocks.

    Note: block sizes for data and hash devices must match. Also, if the
    verity <dev> is encrypted the <fec_dev> should be too.

fec_roots <num>
    Number of generator roots. This equals to the number of parity bytes in
    the encoding data. For example, in RS(M, N) encoding, the number of roots
    is M-N.

fec_blocks <num>
    The number of encoding data blocks on the FEC device. The block size for
    the FEC device is <data_block_size>.

fec_start <offset>
    This is the offset, in <data_block_size> blocks, from the start of the
    FEC device to the beginning of the encoding data.


Theory of operation
===================

@@ -98,6 +129,11 @@ per-block basis. This allows for a lightweight hash computation on first read
into the page cache. Block hashes are stored linearly, aligned to the nearest
block size.

If forward error correction (FEC) support is enabled any recovery of
corrupted data will be verified using the cryptographic hash of the
corresponding data. This is why combining error correction with
integrity checking is essential.

Hash Tree
---------

+21 −0
Original line number Diff line number Diff line
@@ -240,6 +240,15 @@ config DM_BUFIO
	 as a cache, holding recently-read blocks in memory and performing
	 delayed writes.

config DM_DEBUG_BLOCK_STACK_TRACING
       bool "Keep stack trace of persistent data block lock holders"
       depends on STACKTRACE_SUPPORT && DM_BUFIO
       select STACKTRACE
       ---help---
	 Enable this for messages that may help debug problems with the
	 block manager locking used by thin provisioning and caching.

	 If unsure, say N.
config DM_BIO_PRISON
       tristate
       depends on BLK_DEV_DM
@@ -458,6 +467,18 @@ config DM_VERITY

	  If unsure, say N.

config DM_VERITY_FEC
	bool "Verity forward error correction support"
	depends on DM_VERITY
	select REED_SOLOMON
	select REED_SOLOMON_DEC8
	---help---
	  Add forward error correction support to dm-verity. This option
	  makes it possible to use pre-generated error correction data to
	  recover from corrupted blocks.

	  If unsure, say N.

config DM_SWITCH
	tristate "Switch target support (EXPERIMENTAL)"
	depends on BLK_DEV_DM
+5 −0
Original line number Diff line number Diff line
@@ -16,6 +16,7 @@ dm-cache-mq-y += dm-cache-policy-mq.o
dm-cache-smq-y   += dm-cache-policy-smq.o
dm-cache-cleaner-y += dm-cache-policy-cleaner.o
dm-era-y	+= dm-era-target.o
dm-verity-y	+= dm-verity-target.o
md-mod-y	+= md.o bitmap.o
raid456-y	+= raid5.o raid5-cache.o

@@ -63,3 +64,7 @@ obj-$(CONFIG_DM_LOG_WRITES) += dm-log-writes.o
ifeq ($(CONFIG_DM_UEVENT),y)
dm-mod-objs			+= dm-uevent.o
endif

ifeq ($(CONFIG_DM_VERITY_FEC),y)
dm-verity-objs			+= dm-verity-fec.o
endif
+40 −4
Original line number Diff line number Diff line
@@ -16,6 +16,7 @@
#include <linux/shrinker.h>
#include <linux/module.h>
#include <linux/rbtree.h>
#include <linux/stacktrace.h>

#define DM_MSG_PREFIX "bufio"

@@ -149,6 +150,11 @@ struct dm_buffer {
	struct list_head write_list;
	struct bio bio;
	struct bio_vec bio_vec[DM_BUFIO_INLINE_VECS];
#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
#define MAX_STACK 10
	struct stack_trace stack_trace;
	unsigned long stack_entries[MAX_STACK];
#endif
};

/*----------------------------------------------------------------*/
@@ -253,6 +259,17 @@ static LIST_HEAD(dm_bufio_all_clients);
 */
static DEFINE_MUTEX(dm_bufio_clients_lock);

#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
static void buffer_record_stack(struct dm_buffer *b)
{
	b->stack_trace.nr_entries = 0;
	b->stack_trace.max_entries = MAX_STACK;
	b->stack_trace.entries = b->stack_entries;
	b->stack_trace.skip = 2;
	save_stack_trace(&b->stack_trace);
}
#endif

/*----------------------------------------------------------------
 * A red/black tree acts as an index for all the buffers.
 *--------------------------------------------------------------*/
@@ -454,6 +471,9 @@ static struct dm_buffer *alloc_buffer(struct dm_bufio_client *c, gfp_t gfp_mask)

	adjust_total_allocated(b->data_mode, (long)c->block_size);

#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
	memset(&b->stack_trace, 0, sizeof(b->stack_trace));
#endif
	return b;
}

@@ -1063,12 +1083,16 @@ static void *new_read(struct dm_bufio_client *c, sector_t block,

	dm_bufio_lock(c);
	b = __bufio_new(c, block, nf, &need_submit, &write_list);
#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
	if (b && b->hold_count == 1)
		buffer_record_stack(b);
#endif
	dm_bufio_unlock(c);

	__flush_write_list(&write_list);

	if (!b)
		return b;
		return NULL;

	if (need_submit)
		submit_io(b, READ, b->block, read_endio);
@@ -1462,6 +1486,7 @@ static void drop_buffers(struct dm_bufio_client *c)
{
	struct dm_buffer *b;
	int i;
	bool warned = false;

	BUG_ON(dm_bufio_in_request());

@@ -1476,9 +1501,21 @@ static void drop_buffers(struct dm_bufio_client *c)
		__free_buffer_wake(b);

	for (i = 0; i < LIST_SIZE; i++)
		list_for_each_entry(b, &c->lru[i], lru_list)
		list_for_each_entry(b, &c->lru[i], lru_list) {
			WARN_ON(!warned);
			warned = true;
			DMERR("leaked buffer %llx, hold count %u, list %d",
			      (unsigned long long)b->block, b->hold_count, i);
#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
			print_stack_trace(&b->stack_trace, 1);
			b->hold_count = 0; /* mark unclaimed to avoid BUG_ON below */
#endif
		}

#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
	while ((b = __get_unclaimed_buffer(c)))
		__free_buffer_wake(b);
#endif

	for (i = 0; i < LIST_SIZE; i++)
		BUG_ON(!list_empty(&c->lru[i]));
@@ -1891,8 +1928,7 @@ static void __exit dm_bufio_exit(void)
		bug = 1;
	}

	if (bug)
		BUG();
	BUG_ON(bug);
}

module_init(dm_bufio_init)
+0 −3
Original line number Diff line number Diff line
@@ -118,14 +118,12 @@ static void iot_io_end(struct io_tracker *iot, sector_t len)
 */
struct dm_hook_info {
	bio_end_io_t *bi_end_io;
	void *bi_private;
};

static void dm_hook_bio(struct dm_hook_info *h, struct bio *bio,
			bio_end_io_t *bi_end_io, void *bi_private)
{
	h->bi_end_io = bio->bi_end_io;
	h->bi_private = bio->bi_private;

	bio->bi_end_io = bi_end_io;
	bio->bi_private = bi_private;
@@ -134,7 +132,6 @@ static void dm_hook_bio(struct dm_hook_info *h, struct bio *bio,
static void dm_unhook_bio(struct dm_hook_info *h, struct bio *bio)
{
	bio->bi_end_io = h->bi_end_io;
	bio->bi_private = h->bi_private;
}

/*----------------------------------------------------------------*/
Loading