Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit e804ac78 authored by Tejun Heo's avatar Tejun Heo Committed by NeilBrown
Browse files

md: fix and update workqueue usage



Workqueue usage in md has two problems.

* Flush can be used during or depended upon by memory reclaim, but md
  uses the system workqueue for flush_work which may lead to deadlock.

* md depends on flush_scheduled_work() to achieve exclusion against
  completion of removal of previous instances.  flush_scheduled_work()
  may incur unexpected amount of delay and is scheduled to be removed.

This patch adds two workqueues to md - md_wq and md_misc_wq.  The
former is guaranteed to make forward progress under memory pressure
and serves flush_work.  The latter serves as the flush domain for
other works.

Signed-off-by: default avatarTejun Heo <tj@kernel.org>
Signed-off-by: default avatarNeilBrown <neilb@suse.de>
parent 57dab0bd
Loading
Loading
Loading
Loading
+43 −21
Original line number Diff line number Diff line
@@ -67,6 +67,8 @@ static DEFINE_SPINLOCK(pers_lock);
static void md_print_devices(void);

static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
static struct workqueue_struct *md_wq;
static struct workqueue_struct *md_misc_wq;

#define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); }

@@ -298,7 +300,7 @@ static void md_end_flush(struct bio *bio, int err)

	if (atomic_dec_and_test(&mddev->flush_pending)) {
		/* The pre-request flush has finished */
		schedule_work(&mddev->flush_work);
		queue_work(md_wq, &mddev->flush_work);
	}
	bio_put(bio);
}
@@ -367,7 +369,7 @@ void md_flush_request(mddev_t *mddev, struct bio *bio)
	submit_flushes(mddev);

	if (atomic_dec_and_test(&mddev->flush_pending))
		schedule_work(&mddev->flush_work);
		queue_work(md_wq, &mddev->flush_work);
}
EXPORT_SYMBOL(md_flush_request);

@@ -434,14 +436,13 @@ static void mddev_put(mddev_t *mddev)
		 * so destroy it */
		list_del(&mddev->all_mddevs);
		if (mddev->gendisk) {
			/* we did a probe so need to clean up.
			 * Call schedule_work inside the spinlock
			 * so that flush_scheduled_work() after
			 * mddev_find will succeed in waiting for the
			 * work to be done.
			/* We did a probe so need to clean up.  Call
			 * queue_work inside the spinlock so that
			 * flush_workqueue() after mddev_find will
			 * succeed in waiting for the work to be done.
			 */
			INIT_WORK(&mddev->del_work, mddev_delayed_delete);
			schedule_work(&mddev->del_work);
			queue_work(md_misc_wq, &mddev->del_work);
		} else
			kfree(mddev);
	}
@@ -1848,7 +1849,7 @@ static void unbind_rdev_from_array(mdk_rdev_t * rdev)
	synchronize_rcu();
	INIT_WORK(&rdev->del_work, md_delayed_delete);
	kobject_get(&rdev->kobj);
	schedule_work(&rdev->del_work);
	queue_work(md_misc_wq, &rdev->del_work);
}

/*
@@ -4192,10 +4193,10 @@ static int md_alloc(dev_t dev, char *name)
	shift = partitioned ? MdpMinorShift : 0;
	unit = MINOR(mddev->unit) >> shift;

	/* wait for any previous instance if this device
	 * to be completed removed (mddev_delayed_delete).
	/* wait for any previous instance of this device to be
	 * completely removed (mddev_delayed_delete).
	 */
	flush_scheduled_work();
	flush_workqueue(md_misc_wq);

	mutex_lock(&disks_mutex);
	error = -EEXIST;
@@ -5891,7 +5892,7 @@ static int md_open(struct block_device *bdev, fmode_t mode)
		 */
		mddev_put(mddev);
		/* Wait until bdev->bd_disk is definitely gone */
		flush_scheduled_work();
		flush_workqueue(md_misc_wq);
		/* Then retry the open from the top */
		return -ERESTARTSYS;
	}
@@ -6047,7 +6048,7 @@ void md_error(mddev_t *mddev, mdk_rdev_t *rdev)
	set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
	md_wakeup_thread(mddev->thread);
	if (mddev->event_work.func)
		schedule_work(&mddev->event_work);
		queue_work(md_misc_wq, &mddev->event_work);
	md_new_event_inintr(mddev);
}

@@ -7207,12 +7208,23 @@ static void md_geninit(void)

static int __init md_init(void)
{
	if (register_blkdev(MD_MAJOR, "md"))
		return -1;
	if ((mdp_major=register_blkdev(0, "mdp"))<=0) {
		unregister_blkdev(MD_MAJOR, "md");
		return -1;
	}
	int ret = -ENOMEM;

	md_wq = alloc_workqueue("md", WQ_RESCUER, 0);
	if (!md_wq)
		goto err_wq;

	md_misc_wq = alloc_workqueue("md_misc", 0, 0);
	if (!md_misc_wq)
		goto err_misc_wq;

	if ((ret = register_blkdev(MD_MAJOR, "md")) < 0)
		goto err_md;

	if ((ret = register_blkdev(0, "mdp")) < 0)
		goto err_mdp;
	mdp_major = ret;

	blk_register_region(MKDEV(MD_MAJOR, 0), 1UL<<MINORBITS, THIS_MODULE,
			    md_probe, NULL, NULL);
	blk_register_region(MKDEV(mdp_major, 0), 1UL<<MINORBITS, THIS_MODULE,
@@ -7223,8 +7235,16 @@ static int __init md_init(void)

	md_geninit();
	return 0;
}

err_mdp:
	unregister_blkdev(MD_MAJOR, "md");
err_md:
	destroy_workqueue(md_misc_wq);
err_misc_wq:
	destroy_workqueue(md_wq);
err_wq:
	return ret;
}

#ifndef MODULE

@@ -7311,6 +7331,8 @@ static __exit void md_exit(void)
		export_array(mddev);
		mddev->hold_active = 0;
	}
	destroy_workqueue(md_misc_wq);
	destroy_workqueue(md_wq);
}

subsys_initcall(md_init);