Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 7c452e5f authored by Mukesh Ojha's avatar Mukesh Ojha Committed by Greg Kroah-Hartman
Browse files

devcoredump : Serialize devcd_del work

[ Upstream commit 01daccf748323dfc61112f474cf2ba81015446b0 ]

In following scenario(diagram), when one thread X running dev_coredumpm()
adds devcd device to the framework which sends uevent notification to
userspace and another thread Y reads this uevent and call to
devcd_data_write() which eventually try to delete the queued timer that
is not initialized/queued yet.

So, debug object reports some warning and in the meantime, timer is
initialized and queued from X path. and from Y path, it gets reinitialized
again and timer->entry.pprev=NULL and try_to_grab_pending() stucks.

To fix this, introduce mutex and a boolean flag to serialize the behaviour.

 	cpu0(X)			                cpu1(Y)

    dev_coredump() uevent sent to user space
    device_add()  ======================> user space process Y reads the
                                          uevents writes to devcd fd
                                          which results into writes to

                                         devcd_data_write()
                                           mod_delayed_work()
                                             try_to_grab_pending()
                                               del_timer()
                                                 debug_assert_init()
   INIT_DELAYED_WORK()
   schedule_delayed_work()
                                                   debug_object_fixup()
                                                     timer_fixup_assert_init()
                                                       timer_setup()
                                                         do_init_timer()
                                                       /*
                                                        Above call reinitializes
                                                        the timer to
                                                        timer->entry.pprev=NULL
                                                        and this will be checked
                                                        later in timer_pending() call.
                                                       */
                                                 timer_pending()
                                                  !hlist_unhashed_lockless(&timer->entry)
                                                    !h->pprev
                                                /*
                                                  del_timer() checks h->pprev and finds
                                                  it to be NULL due to which
                                                  try_to_grab_pending() stucks.
                                                */

Link: https://lore.kernel.org/lkml/2e1f81e2-428c-f11f-ce92-eb11048cb271@quicinc.com/


Signed-off-by: default avatarMukesh Ojha <quic_mojha@quicinc.com>
Link: https://lore.kernel.org/r/1663073424-13663-1-git-send-email-quic_mojha@quicinc.com


Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Stable-dep-of: af54d778a038 ("devcoredump: Send uevent once devcd is ready")
Signed-off-by: default avatarSasha Levin <sashal@kernel.org>
parent 2e0dfb56
Loading
Loading
Loading
Loading
+81 −2
Original line number Diff line number Diff line
@@ -29,6 +29,47 @@ struct devcd_entry {
	struct device devcd_dev;
	void *data;
	size_t datalen;
	/*
	 * Here, mutex is required to serialize the calls to del_wk work between
	 * user/kernel space which happens when devcd is added with device_add()
	 * and that sends uevent to user space. User space reads the uevents,
	 * and calls to devcd_data_write() which try to modify the work which is
	 * not even initialized/queued from devcoredump.
	 *
	 *
	 *
	 *        cpu0(X)                                 cpu1(Y)
	 *
	 *        dev_coredump() uevent sent to user space
	 *        device_add()  ======================> user space process Y reads the
	 *                                              uevents writes to devcd fd
	 *                                              which results into writes to
	 *
	 *                                             devcd_data_write()
	 *                                               mod_delayed_work()
	 *                                                 try_to_grab_pending()
	 *                                                   del_timer()
	 *                                                     debug_assert_init()
	 *       INIT_DELAYED_WORK()
	 *       schedule_delayed_work()
	 *
	 *
	 * Also, mutex alone would not be enough to avoid scheduling of
	 * del_wk work after it get flush from a call to devcd_free()
	 * mentioned as below.
	 *
	 *	disabled_store()
	 *        devcd_free()
	 *          mutex_lock()             devcd_data_write()
	 *          flush_delayed_work()
	 *          mutex_unlock()
	 *                                   mutex_lock()
	 *                                   mod_delayed_work()
	 *                                   mutex_unlock()
	 * So, delete_work flag is required.
	 */
	struct mutex mutex;
	bool delete_work;
	struct module *owner;
	ssize_t (*read)(char *buffer, loff_t offset, size_t count,
			void *data, size_t datalen);
@@ -88,7 +129,12 @@ static ssize_t devcd_data_write(struct file *filp, struct kobject *kobj,
	struct device *dev = kobj_to_dev(kobj);
	struct devcd_entry *devcd = dev_to_devcd(dev);

	mutex_lock(&devcd->mutex);
	if (!devcd->delete_work) {
		devcd->delete_work = true;
		mod_delayed_work(system_wq, &devcd->del_wk, 0);
	}
	mutex_unlock(&devcd->mutex);

	return count;
}
@@ -116,7 +162,12 @@ static int devcd_free(struct device *dev, void *data)
{
	struct devcd_entry *devcd = dev_to_devcd(dev);

	mutex_lock(&devcd->mutex);
	if (!devcd->delete_work)
		devcd->delete_work = true;

	flush_delayed_work(&devcd->del_wk);
	mutex_unlock(&devcd->mutex);
	return 0;
}

@@ -126,6 +177,30 @@ static ssize_t disabled_show(struct class *class, struct class_attribute *attr,
	return sprintf(buf, "%d\n", devcd_disabled);
}

/*
 *
 *	disabled_store()                                	worker()
 *	 class_for_each_device(&devcd_class,
 *		NULL, NULL, devcd_free)
 *         ...
 *         ...
 *	   while ((dev = class_dev_iter_next(&iter))
 *                                                             devcd_del()
 *                                                               device_del()
 *                                                                 put_device() <- last reference
 *             error = fn(dev, data)                           devcd_dev_release()
 *             devcd_free(dev, data)                           kfree(devcd)
 *             mutex_lock(&devcd->mutex);
 *
 *
 * In the above diagram, It looks like disabled_store() would be racing with parallely
 * running devcd_del() and result in memory abort while acquiring devcd->mutex which
 * is called after kfree of devcd memory  after dropping its last reference with
 * put_device(). However, this will not happens as fn(dev, data) runs
 * with its own reference to device via klist_node so it is not its last reference.
 * so, above situation would not occur.
 */

static ssize_t disabled_store(struct class *class, struct class_attribute *attr,
			      const char *buf, size_t count)
{
@@ -291,13 +366,16 @@ void dev_coredumpm(struct device *dev, struct module *owner,
	devcd->read = read;
	devcd->free = free;
	devcd->failing_dev = get_device(dev);
	devcd->delete_work = false;

	mutex_init(&devcd->mutex);
	device_initialize(&devcd->devcd_dev);

	dev_set_name(&devcd->devcd_dev, "devcd%d",
		     atomic_inc_return(&devcd_count));
	devcd->devcd_dev.class = &devcd_class;

	mutex_lock(&devcd->mutex);
	if (device_add(&devcd->devcd_dev))
		goto put_device;

@@ -311,10 +389,11 @@ void dev_coredumpm(struct device *dev, struct module *owner,

	INIT_DELAYED_WORK(&devcd->del_wk, devcd_del);
	schedule_delayed_work(&devcd->del_wk, DEVCD_TIMEOUT);

	mutex_unlock(&devcd->mutex);
	return;
 put_device:
	put_device(&devcd->devcd_dev);
	mutex_unlock(&devcd->mutex);
 put_module:
	module_put(owner);
 free: