Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 021ad274 authored by Dexuan Cui's avatar Dexuan Cui Committed by Lorenzo Pieralisi
Browse files

PCI: hv: Serialize the present and eject work items

When we hot-remove the device, we first receive a PCI_EJECT message and
then receive a PCI_BUS_RELATIONS message with bus_rel->device_count == 0.

The first message is offloaded to hv_eject_device_work(), and the second
is offloaded to pci_devices_present_work(). Both the paths can be running
list_del(&hpdev->list_entry), causing general protection fault, because
system_wq can run them concurrently.

The patch eliminates the race condition.

Since access to present/eject work items is serialized, we do not need the
hbus->enum_sem anymore, so remove it.

Fixes: 4daace0d ("PCI: hv: Add paravirtual PCI front-end for Microsoft Hyper-V VMs")
Link: https://lkml.kernel.org/r/KL1P15301MB00064DA6B4D221123B5241CFBFD70@KL1P15301MB0006.APCP153.PROD.OUTLOOK.COM


Tested-by: default avatarAdrian Suhov <v-adsuho@microsoft.com>
Tested-by: default avatarChris Valean <v-chvale@microsoft.com>
Signed-off-by: default avatarDexuan Cui <decui@microsoft.com>
[lorenzo.pieralisi@arm.com: squashed semaphore removal patch]
Signed-off-by: default avatarLorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Reviewed-by: default avatarMichael Kelley <mikelley@microsoft.com>
Acked-by: default avatarHaiyang Zhang <haiyangz@microsoft.com>
Cc: <stable@vger.kernel.org> # v4.6+
Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
Cc: Jack Morgenstein <jackm@mellanox.com>
Cc: Stephen Hemminger <sthemmin@microsoft.com>
Cc: K. Y. Srinivasan <kys@microsoft.com>
parent 7928b2cb
Loading
Loading
Loading
Loading
+16 −18
Original line number Diff line number Diff line
@@ -447,7 +447,6 @@ struct hv_pcibus_device {
	spinlock_t device_list_lock;	/* Protect lists below */
	void __iomem *cfg_addr;

	struct semaphore enum_sem;
	struct list_head resources_for_children;

	struct list_head children;
@@ -461,6 +460,8 @@ struct hv_pcibus_device {
	struct retarget_msi_interrupt retarget_msi_interrupt_params;

	spinlock_t retarget_msi_interrupt_lock;

	struct workqueue_struct *wq;
};

/*
@@ -1590,12 +1591,8 @@ static struct hv_pci_dev *get_pcichild_wslot(struct hv_pcibus_device *hbus,
 * It must also treat the omission of a previously observed device as
 * notification that the device no longer exists.
 *
 * Note that this function is a work item, and it may not be
 * invoked in the order that it was queued.  Back to back
 * updates of the list of present devices may involve queuing
 * multiple work items, and this one may run before ones that
 * were sent later. As such, this function only does something
 * if is the last one in the queue.
 * Note that this function is serialized with hv_eject_device_work(),
 * because both are pushed to the ordered workqueue hbus->wq.
 */
static void pci_devices_present_work(struct work_struct *work)
{
@@ -1616,11 +1613,6 @@ static void pci_devices_present_work(struct work_struct *work)

	INIT_LIST_HEAD(&removed);

	if (down_interruptible(&hbus->enum_sem)) {
		put_hvpcibus(hbus);
		return;
	}

	/* Pull this off the queue and process it if it was the last one. */
	spin_lock_irqsave(&hbus->device_list_lock, flags);
	while (!list_empty(&hbus->dr_list)) {
@@ -1637,7 +1629,6 @@ static void pci_devices_present_work(struct work_struct *work)
	spin_unlock_irqrestore(&hbus->device_list_lock, flags);

	if (!dr) {
		up(&hbus->enum_sem);
		put_hvpcibus(hbus);
		return;
	}
@@ -1724,7 +1715,6 @@ static void pci_devices_present_work(struct work_struct *work)
		break;
	}

	up(&hbus->enum_sem);
	put_hvpcibus(hbus);
	kfree(dr);
}
@@ -1770,7 +1760,7 @@ static void hv_pci_devices_present(struct hv_pcibus_device *hbus,
	spin_unlock_irqrestore(&hbus->device_list_lock, flags);

	get_hvpcibus(hbus);
	schedule_work(&dr_wrk->wrk);
	queue_work(hbus->wq, &dr_wrk->wrk);
}

/**
@@ -1848,7 +1838,7 @@ static void hv_pci_eject_device(struct hv_pci_dev *hpdev)
	get_pcichild(hpdev, hv_pcidev_ref_pnp);
	INIT_WORK(&hpdev->wrk, hv_eject_device_work);
	get_hvpcibus(hpdev->hbus);
	schedule_work(&hpdev->wrk);
	queue_work(hpdev->hbus->wq, &hpdev->wrk);
}

/**
@@ -2461,13 +2451,18 @@ static int hv_pci_probe(struct hv_device *hdev,
	spin_lock_init(&hbus->config_lock);
	spin_lock_init(&hbus->device_list_lock);
	spin_lock_init(&hbus->retarget_msi_interrupt_lock);
	sema_init(&hbus->enum_sem, 1);
	init_completion(&hbus->remove_event);
	hbus->wq = alloc_ordered_workqueue("hv_pci_%x", 0,
					   hbus->sysdata.domain);
	if (!hbus->wq) {
		ret = -ENOMEM;
		goto free_bus;
	}

	ret = vmbus_open(hdev->channel, pci_ring_size, pci_ring_size, NULL, 0,
			 hv_pci_onchannelcallback, hbus);
	if (ret)
		goto free_bus;
		goto destroy_wq;

	hv_set_drvdata(hdev, hbus);

@@ -2536,6 +2531,8 @@ static int hv_pci_probe(struct hv_device *hdev,
	hv_free_config_window(hbus);
close:
	vmbus_close(hdev->channel);
destroy_wq:
	destroy_workqueue(hbus->wq);
free_bus:
	free_page((unsigned long)hbus);
	return ret;
@@ -2615,6 +2612,7 @@ static int hv_pci_remove(struct hv_device *hdev)
	irq_domain_free_fwnode(hbus->sysdata.fwnode);
	put_hvpcibus(hbus);
	wait_for_completion(&hbus->remove_event);
	destroy_workqueue(hbus->wq);
	free_page((unsigned long)hbus);
	return 0;
}