Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 2995fa78 authored by Mikulas Patocka's avatar Mikulas Patocka Committed by Mike Snitzer
Browse files

dm sysfs: fix a module unload race



This reverts commit be35f486 ("dm: wait until embedded kobject is
released before destroying a device") and provides an improved fix.

The kobject release code that calls the completion must be placed in a
non-module file, otherwise there is a module unload race (if the process
calling dm_kobject_release is preempted and the DM module unloaded after
the completion is triggered, but before dm_kobject_release returns).

To fix this race, this patch moves the completion code to dm-builtin.c
which is always compiled directly into the kernel if BLK_DEV_DM is
selected.

The patch introduces a new dm_kobject_holder structure, its purpose is
to keep the completion and kobject in one place, so that it can be
accessed from non-module code without the need to export the layout of
struct mapped_device to that code.

Signed-off-by: default avatarMikulas Patocka <mpatocka@redhat.com>
Signed-off-by: default avatarMike Snitzer <snitzer@redhat.com>
Cc: stable@vger.kernel.org
parent 55b082e6
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
@@ -176,8 +176,12 @@ config MD_FAULTY

source "drivers/md/bcache/Kconfig"

config BLK_DEV_DM_BUILTIN
	boolean

config BLK_DEV_DM
	tristate "Device mapper support"
	select BLK_DEV_DM_BUILTIN
	---help---
	  Device-mapper is a low level volume manager.  It works by allowing
	  people to specify mappings for ranges of logical sectors.  Various
+1 −0
Original line number Diff line number Diff line
@@ -32,6 +32,7 @@ obj-$(CONFIG_MD_FAULTY) += faulty.o
obj-$(CONFIG_BCACHE)		+= bcache/
obj-$(CONFIG_BLK_DEV_MD)	+= md-mod.o
obj-$(CONFIG_BLK_DEV_DM)	+= dm-mod.o
obj-$(CONFIG_BLK_DEV_DM_BUILTIN) += dm-builtin.o
obj-$(CONFIG_DM_BUFIO)		+= dm-bufio.o
obj-$(CONFIG_DM_BIO_PRISON)	+= dm-bio-prison.o
obj-$(CONFIG_DM_CRYPT)		+= dm-crypt.o
+48 −0
Original line number Diff line number Diff line
#include "dm.h"

/*
 * The kobject release method must not be placed in the module itself,
 * otherwise we are subject to module unload races.
 *
 * The release method is called when the last reference to the kobject is
 * dropped. It may be called by any other kernel code that drops the last
 * reference.
 *
 * The release method suffers from module unload race. We may prevent the
 * module from being unloaded at the start of the release method (using
 * increased module reference count or synchronizing against the release
 * method), however there is no way to prevent the module from being
 * unloaded at the end of the release method.
 *
 * If this code were placed in the dm module, the following race may
 * happen:
 *  1. Some other process takes a reference to dm kobject
 *  2. The user issues ioctl function to unload the dm device
 *  3. dm_sysfs_exit calls kobject_put, however the object is not released
 *     because of the other reference taken at step 1
 *  4. dm_sysfs_exit waits on the completion
 *  5. The other process that took the reference in step 1 drops it,
 *     dm_kobject_release is called from this process
 *  6. dm_kobject_release calls complete()
 *  7. a reschedule happens before dm_kobject_release returns
 *  8. dm_sysfs_exit continues, the dm device is unloaded, module reference
 *     count is decremented
 *  9. The user unloads the dm module
 * 10. The other process that was rescheduled in step 7 continues to run,
 *     it is now executing code in unloaded module, so it crashes
 *
 * Note that if the process that takes the foreign reference to dm kobject
 * has a low priority and the system is sufficiently loaded with
 * higher-priority processes that prevent the low-priority process from
 * being scheduled long enough, this bug may really happen.
 *
 * In order to fix this module unload race, we place the release method
 * into a helper code that is compiled directly into the kernel.
 */

void dm_kobject_release(struct kobject *kobj)
{
	complete(dm_get_completion_from_kobject(kobj));
}

EXPORT_SYMBOL(dm_kobject_release);
+0 −5
Original line number Diff line number Diff line
@@ -79,11 +79,6 @@ static const struct sysfs_ops dm_sysfs_ops = {
	.show	= dm_attr_show,
};

static void dm_kobject_release(struct kobject *kobj)
{
	complete(dm_get_completion_from_kobject(kobj));
}

/*
 * dm kobject is embedded in mapped_device structure
 * no need to define release function here
+5 −15
Original line number Diff line number Diff line
@@ -200,11 +200,8 @@ struct mapped_device {
	/* forced geometry settings */
	struct hd_geometry geometry;

	/* sysfs handle */
	struct kobject kobj;

	/* wait until the kobject is released */
	struct completion kobj_completion;
	/* kobject and completion */
	struct dm_kobject_holder kobj_holder;

	/* zero-length flush that will be cloned and submitted to targets */
	struct bio flush_bio;
@@ -2044,7 +2041,7 @@ static struct mapped_device *alloc_dev(int minor)
	init_waitqueue_head(&md->wait);
	INIT_WORK(&md->work, dm_wq_work);
	init_waitqueue_head(&md->eventq);
	init_completion(&md->kobj_completion);
	init_completion(&md->kobj_holder.completion);

	md->disk->major = _major;
	md->disk->first_minor = minor;
@@ -2906,14 +2903,14 @@ struct gendisk *dm_disk(struct mapped_device *md)

struct kobject *dm_kobject(struct mapped_device *md)
{
	return &md->kobj;
	return &md->kobj_holder.kobj;
}

struct mapped_device *dm_get_from_kobject(struct kobject *kobj)
{
	struct mapped_device *md;

	md = container_of(kobj, struct mapped_device, kobj);
	md = container_of(kobj, struct mapped_device, kobj_holder.kobj);

	if (test_bit(DMF_FREEING, &md->flags) ||
	    dm_deleting_md(md))
@@ -2923,13 +2920,6 @@ struct mapped_device *dm_get_from_kobject(struct kobject *kobj)
	return md;
}

struct completion *dm_get_completion_from_kobject(struct kobject *kobj)
{
	struct mapped_device *md = container_of(kobj, struct mapped_device, kobj);

	return &md->kobj_completion;
}

int dm_suspended_md(struct mapped_device *md)
{
	return test_bit(DMF_SUSPENDED, &md->flags);
Loading