Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit ab1228e4 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull intel iommu updates from David Woodhouse:
 "This adds "Shared Virtual Memory" (aka PASID support) for the Intel
  IOMMU.  This allows devices to do DMA using process address space,
  translated through the normal CPU page tables for the relevant mm.

  With corresponding support added to the i915 driver, this has been
  tested with the graphics device on Skylake.  We don't have the
  required TLP support in our PCIe root ports for supporting discrete
  devices yet, so it's only integrated devices that can do it so far"

* git://git.infradead.org/intel-iommu: (23 commits)
  iommu/vt-d: Fix rwxp flags in SVM device fault callback
  iommu/vt-d: Expose struct svm_dev_ops without CONFIG_INTEL_IOMMU_SVM
  iommu/vt-d: Clean up pasid_enabled() and ecs_enabled() dependencies
  iommu/vt-d: Handle Caching Mode implementations of SVM
  iommu/vt-d: Fix SVM IOTLB flush handling
  iommu/vt-d: Use dev_err(..) in intel_svm_device_to_iommu(..)
  iommu/vt-d: fix a loop in prq_event_thread()
  iommu/vt-d: Fix IOTLB flushing for global pages
  iommu/vt-d: Fix address shifting in page request handler
  iommu/vt-d: shift wrapping bug in prq_event_thread()
  iommu/vt-d: Fix NULL pointer dereference in page request error case
  iommu/vt-d: Implement SVM_FLAG_SUPERVISOR_MODE for kernel access
  iommu/vt-d: Implement SVM_FLAG_PRIVATE_PASID to allocate unique PASIDs
  iommu/vt-d: Add callback to device driver on page faults
  iommu/vt-d: Implement page request handling
  iommu/vt-d: Generalise DMAR MSI setup to allow for page request events
  iommu/vt-d: Implement deferred invalidate for SVM
  iommu/vt-d: Add basic SVM PASID support
  iommu/vt-d: Always enable PASID/PRI PCI capabilities before ATS
  iommu/vt-d: Add initial support for PASID tables
  ...
parents 5ebe0ee8 0bdec95c
Loading
Loading
Loading
Loading
+10 −0
Original line number Diff line number Diff line
@@ -134,6 +134,16 @@ config INTEL_IOMMU
	  and include PCI device scope covered by these DMA
	  remapping devices.

config INTEL_IOMMU_SVM
	bool "Support for Shared Virtual Memory with Intel IOMMU"
	depends on INTEL_IOMMU && X86
	select PCI_PASID
	select MMU_NOTIFIER
	help
	  Shared Virtual Memory (SVM) provides a facility for devices
	  to access DMA resources through process address space by
	  means of a Process Address Space ID (PASID).

config INTEL_IOMMU_DEFAULT_ON
	def_bool y
	prompt "Enable Intel DMA Remapping Devices by default"
+1 −0
Original line number Diff line number Diff line
@@ -12,6 +12,7 @@ obj-$(CONFIG_ARM_SMMU) += arm-smmu.o
obj-$(CONFIG_ARM_SMMU_V3) += arm-smmu-v3.o
obj-$(CONFIG_DMAR_TABLE) += dmar.o
obj-$(CONFIG_INTEL_IOMMU) += intel-iommu.o
obj-$(CONFIG_INTEL_IOMMU_SVM) += intel-svm.o
obj-$(CONFIG_IPMMU_VMSA) += ipmmu-vmsa.o
obj-$(CONFIG_IRQ_REMAP) += intel_irq_remapping.o irq_remapping.o
obj-$(CONFIG_OMAP_IOMMU) += omap-iommu.o
+31 −11
Original line number Diff line number Diff line
@@ -1086,6 +1086,11 @@ static void free_iommu(struct intel_iommu *iommu)
	iommu_device_destroy(iommu->iommu_dev);

	if (iommu->irq) {
		if (iommu->pr_irq) {
			free_irq(iommu->pr_irq, iommu);
			dmar_free_hwirq(iommu->pr_irq);
			iommu->pr_irq = 0;
		}
		free_irq(iommu->irq, iommu);
		dmar_free_hwirq(iommu->irq);
		iommu->irq = 0;
@@ -1493,53 +1498,68 @@ static const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
	}
}


static inline int dmar_msi_reg(struct intel_iommu *iommu, int irq)
{
	if (iommu->irq == irq)
		return DMAR_FECTL_REG;
	else if (iommu->pr_irq == irq)
		return DMAR_PECTL_REG;
	else
		BUG();
}

void dmar_msi_unmask(struct irq_data *data)
{
	struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
	int reg = dmar_msi_reg(iommu, data->irq);
	unsigned long flag;

	/* unmask it */
	raw_spin_lock_irqsave(&iommu->register_lock, flag);
	writel(0, iommu->reg + DMAR_FECTL_REG);
	writel(0, iommu->reg + reg);
	/* Read a reg to force flush the post write */
	readl(iommu->reg + DMAR_FECTL_REG);
	readl(iommu->reg + reg);
	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
}

void dmar_msi_mask(struct irq_data *data)
{
	unsigned long flag;
	struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
	int reg = dmar_msi_reg(iommu, data->irq);
	unsigned long flag;

	/* mask it */
	raw_spin_lock_irqsave(&iommu->register_lock, flag);
	writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
	writel(DMA_FECTL_IM, iommu->reg + reg);
	/* Read a reg to force flush the post write */
	readl(iommu->reg + DMAR_FECTL_REG);
	readl(iommu->reg + reg);
	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
}

void dmar_msi_write(int irq, struct msi_msg *msg)
{
	struct intel_iommu *iommu = irq_get_handler_data(irq);
	int reg = dmar_msi_reg(iommu, irq);
	unsigned long flag;

	raw_spin_lock_irqsave(&iommu->register_lock, flag);
	writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
	writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
	writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
	writel(msg->data, iommu->reg + reg + 4);
	writel(msg->address_lo, iommu->reg + reg + 8);
	writel(msg->address_hi, iommu->reg + reg + 12);
	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
}

void dmar_msi_read(int irq, struct msi_msg *msg)
{
	struct intel_iommu *iommu = irq_get_handler_data(irq);
	int reg = dmar_msi_reg(iommu, irq);
	unsigned long flag;

	raw_spin_lock_irqsave(&iommu->register_lock, flag);
	msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
	msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
	msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
	msg->data = readl(iommu->reg + reg + 4);
	msg->address_lo = readl(iommu->reg + reg + 8);
	msg->address_hi = readl(iommu->reg + reg + 12);
	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
}

+255 −48
Original line number Diff line number Diff line
@@ -418,10 +418,13 @@ struct device_domain_info {
	struct list_head global; /* link to global list */
	u8 bus;			/* PCI bus number */
	u8 devfn;		/* PCI devfn number */
	struct {
		u8 enabled:1;
		u8 qdep;
	} ats;			/* ATS state */
	u8 pasid_supported:3;
	u8 pasid_enabled:1;
	u8 pri_supported:1;
	u8 pri_enabled:1;
	u8 ats_supported:1;
	u8 ats_enabled:1;
	u8 ats_qdep;
	struct device *dev; /* it's NULL for PCIe-to-PCI bridge */
	struct intel_iommu *iommu; /* IOMMU used by this device */
	struct dmar_domain *domain; /* pointer to domain */
@@ -497,13 +500,37 @@ static int dmar_forcedac;
static int intel_iommu_strict;
static int intel_iommu_superpage = 1;
static int intel_iommu_ecs = 1;
static int intel_iommu_pasid28;
static int iommu_identity_mapping;

#define IDENTMAP_ALL		1
#define IDENTMAP_GFX		2
#define IDENTMAP_AZALIA		4

/* We only actually use ECS when PASID support (on the new bit 40)
 * is also advertised. Some early implementations — the ones with
 * PASID support on bit 28 — have issues even when we *only* use
 * extended root/context tables. */
/* Broadwell and Skylake have broken ECS support — normal so-called "second
 * level" translation of DMA requests-without-PASID doesn't actually happen
 * unless you also set the NESTE bit in an extended context-entry. Which of
 * course means that SVM doesn't work because it's trying to do nested
 * translation of the physical addresses it finds in the process page tables,
 * through the IOVA->phys mapping found in the "second level" page tables.
 *
 * The VT-d specification was retroactively changed to change the definition
 * of the capability bits and pretend that Broadwell/Skylake never happened...
 * but unfortunately the wrong bit was changed. It's ECS which is broken, but
 * for some reason it was the PASID capability bit which was redefined (from
 * bit 28 on BDW/SKL to bit 40 in future).
 *
 * So our test for ECS needs to eschew those implementations which set the old
 * PASID capabiity bit 28, since those are the ones on which ECS is broken.
 * Unless we are working around the 'pasid28' limitations, that is, by putting
 * the device into passthrough mode for normal DMA and thus masking the bug.
 */
#define ecs_enabled(iommu) (intel_iommu_ecs && ecap_ecs(iommu->ecap) && \
			    ecap_pasid(iommu->ecap))
			    (intel_iommu_pasid28 || !ecap_broken_pasid(iommu->ecap)))
/* PASID support is thus enabled if ECS is enabled and *either* of the old
 * or new capability bits are set. */
#define pasid_enabled(iommu) (ecs_enabled(iommu) &&			\
			      (ecap_pasid(iommu->ecap) || ecap_broken_pasid(iommu->ecap)))

int intel_iommu_gfx_mapped;
EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
@@ -566,6 +593,11 @@ static int __init intel_iommu_setup(char *str)
			printk(KERN_INFO
				"Intel-IOMMU: disable extended context table support\n");
			intel_iommu_ecs = 0;
		} else if (!strncmp(str, "pasid28", 7)) {
			printk(KERN_INFO
				"Intel-IOMMU: enable pre-production PASID support\n");
			intel_iommu_pasid28 = 1;
			iommu_identity_mapping |= IDENTMAP_GFX;
		}

		str += strcspn(str, ",");
@@ -1407,37 +1439,22 @@ static struct device_domain_info *
iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
			 u8 bus, u8 devfn)
{
	bool found = false;
	struct device_domain_info *info;
	struct pci_dev *pdev;

	assert_spin_locked(&device_domain_lock);

	if (!ecap_dev_iotlb_support(iommu->ecap))
		return NULL;

	if (!iommu->qi)
		return NULL;

	list_for_each_entry(info, &domain->devices, link)
		if (info->iommu == iommu && info->bus == bus &&
		    info->devfn == devfn) {
			found = true;
			if (info->ats_supported && info->dev)
				return info;
			break;
		}

	if (!found || !info->dev || !dev_is_pci(info->dev))
		return NULL;

	pdev = to_pci_dev(info->dev);

	if (!pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS))
		return NULL;

	if (!dmar_find_matched_atsr_unit(pdev))
	return NULL;

	return info;
}

static void iommu_enable_dev_iotlb(struct device_domain_info *info)
@@ -1448,20 +1465,48 @@ static void iommu_enable_dev_iotlb(struct device_domain_info *info)
		return;

	pdev = to_pci_dev(info->dev);
	if (pci_enable_ats(pdev, VTD_PAGE_SHIFT))
		return;

	info->ats.enabled = 1;
	info->ats.qdep = pci_ats_queue_depth(pdev);
#ifdef CONFIG_INTEL_IOMMU_SVM
	/* The PCIe spec, in its wisdom, declares that the behaviour of
	   the device if you enable PASID support after ATS support is
	   undefined. So always enable PASID support on devices which
	   have it, even if we can't yet know if we're ever going to
	   use it. */
	if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1))
		info->pasid_enabled = 1;

	if (info->pri_supported && !pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32))
		info->pri_enabled = 1;
#endif
	if (info->ats_supported && !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) {
		info->ats_enabled = 1;
		info->ats_qdep = pci_ats_queue_depth(pdev);
	}
}

static void iommu_disable_dev_iotlb(struct device_domain_info *info)
{
	if (!info->ats.enabled)
	struct pci_dev *pdev;

	if (dev_is_pci(info->dev))
		return;

	pci_disable_ats(to_pci_dev(info->dev));
	info->ats.enabled = 0;
	pdev = to_pci_dev(info->dev);

	if (info->ats_enabled) {
		pci_disable_ats(pdev);
		info->ats_enabled = 0;
	}
#ifdef CONFIG_INTEL_IOMMU_SVM
	if (info->pri_enabled) {
		pci_disable_pri(pdev);
		info->pri_enabled = 0;
	}
	if (info->pasid_enabled) {
		pci_disable_pasid(pdev);
		info->pasid_enabled = 0;
	}
#endif
}

static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
@@ -1473,11 +1518,11 @@ static void iommu_flush_dev_iotlb(struct dmar_domain *domain,

	spin_lock_irqsave(&device_domain_lock, flags);
	list_for_each_entry(info, &domain->devices, link) {
		if (!info->ats.enabled)
		if (!info->ats_enabled)
			continue;

		sid = info->bus << 8 | info->devfn;
		qdep = info->ats.qdep;
		qdep = info->ats_qdep;
		qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
	}
	spin_unlock_irqrestore(&device_domain_lock, flags);
@@ -1667,6 +1712,14 @@ static void free_dmar_iommu(struct intel_iommu *iommu)

	/* free context mapping */
	free_context_table(iommu);

#ifdef CONFIG_INTEL_IOMMU_SVM
	if (pasid_enabled(iommu)) {
		if (ecap_prs(iommu->ecap))
			intel_svm_finish_prq(iommu);
		intel_svm_free_pasid_tables(iommu);
	}
#endif
}

static struct dmar_domain *alloc_domain(int flags)
@@ -1934,8 +1987,10 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
		}

		info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
		translation = info ? CONTEXT_TT_DEV_IOTLB :
				     CONTEXT_TT_MULTI_LEVEL;
		if (info && info->ats_supported)
			translation = CONTEXT_TT_DEV_IOTLB;
		else
			translation = CONTEXT_TT_MULTI_LEVEL;

		context_set_address_root(context, virt_to_phys(pgd));
		context_set_address_width(context, iommu->agaw);
@@ -2273,12 +2328,34 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,

	info->bus = bus;
	info->devfn = devfn;
	info->ats.enabled = 0;
	info->ats.qdep = 0;
	info->ats_supported = info->pasid_supported = info->pri_supported = 0;
	info->ats_enabled = info->pasid_enabled = info->pri_enabled = 0;
	info->ats_qdep = 0;
	info->dev = dev;
	info->domain = domain;
	info->iommu = iommu;

	if (dev && dev_is_pci(dev)) {
		struct pci_dev *pdev = to_pci_dev(info->dev);

		if (ecap_dev_iotlb_support(iommu->ecap) &&
		    pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS) &&
		    dmar_find_matched_atsr_unit(pdev))
			info->ats_supported = 1;

		if (ecs_enabled(iommu)) {
			if (pasid_enabled(iommu)) {
				int features = pci_pasid_features(pdev);
				if (features >= 0)
					info->pasid_supported = features | 1;
			}

			if (info->ats_supported && ecap_prs(iommu->ecap) &&
			    pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI))
				info->pri_supported = 1;
		}
	}

	spin_lock_irqsave(&device_domain_lock, flags);
	if (dev)
		found = find_domain(dev);
@@ -2404,11 +2481,6 @@ found_domain:
	return domain;
}

static int iommu_identity_mapping;
#define IDENTMAP_ALL		1
#define IDENTMAP_GFX		2
#define IDENTMAP_AZALIA		4

static int iommu_domain_identity_map(struct dmar_domain *domain,
				     unsigned long long start,
				     unsigned long long end)
@@ -3100,6 +3172,10 @@ static int __init init_dmars(void)

		if (!ecap_pass_through(iommu->ecap))
			hw_pass_through = 0;
#ifdef CONFIG_INTEL_IOMMU_SVM
		if (pasid_enabled(iommu))
			intel_svm_alloc_pasid_tables(iommu);
#endif
	}

	if (iommu_pass_through)
@@ -3187,6 +3263,13 @@ domains_done:

		iommu_flush_write_buffer(iommu);

#ifdef CONFIG_INTEL_IOMMU_SVM
		if (pasid_enabled(iommu) && ecap_prs(iommu->ecap)) {
			ret = intel_svm_enable_prq(iommu);
			if (ret)
				goto free_iommu;
		}
#endif
		ret = dmar_set_interrupt(iommu);
		if (ret)
			goto free_iommu;
@@ -4115,6 +4198,11 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
	if (ret)
		goto out;

#ifdef CONFIG_INTEL_IOMMU_SVM
	if (pasid_enabled(iommu))
		intel_svm_alloc_pasid_tables(iommu);
#endif

	if (dmaru->ignored) {
		/*
		 * we always have to disable PMRs or DMA may fail on this device
@@ -4126,6 +4214,14 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru)

	intel_iommu_init_qi(iommu);
	iommu_flush_write_buffer(iommu);

#ifdef CONFIG_INTEL_IOMMU_SVM
	if (pasid_enabled(iommu) && ecap_prs(iommu->ecap)) {
		ret = intel_svm_enable_prq(iommu);
		if (ret)
			goto disable_iommu;
	}
#endif
	ret = dmar_set_interrupt(iommu);
	if (ret)
		goto disable_iommu;
@@ -4194,14 +4290,17 @@ int dmar_find_matched_atsr_unit(struct pci_dev *dev)
	dev = pci_physfn(dev);
	for (bus = dev->bus; bus; bus = bus->parent) {
		bridge = bus->self;
		if (!bridge || !pci_is_pcie(bridge) ||
		/* If it's an integrated device, allow ATS */
		if (!bridge)
			return 1;
		/* Connected via non-PCIe: no ATS */
		if (!pci_is_pcie(bridge) ||
		    pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
			return 0;
		/* If we found the root port, look it up in the ATSR */
		if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
			break;
	}
	if (!bridge)
		return 0;

	rcu_read_lock();
	list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
@@ -4865,6 +4964,114 @@ static void intel_iommu_remove_device(struct device *dev)
	iommu_device_unlink(iommu->iommu_dev, dev);
}

#ifdef CONFIG_INTEL_IOMMU_SVM
int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sdev)
{
	struct device_domain_info *info;
	struct context_entry *context;
	struct dmar_domain *domain;
	unsigned long flags;
	u64 ctx_lo;
	int ret;

	domain = get_valid_domain_for_dev(sdev->dev);
	if (!domain)
		return -EINVAL;

	spin_lock_irqsave(&device_domain_lock, flags);
	spin_lock(&iommu->lock);

	ret = -EINVAL;
	info = sdev->dev->archdata.iommu;
	if (!info || !info->pasid_supported)
		goto out;

	context = iommu_context_addr(iommu, info->bus, info->devfn, 0);
	if (WARN_ON(!context))
		goto out;

	ctx_lo = context[0].lo;

	sdev->did = domain->iommu_did[iommu->seq_id];
	sdev->sid = PCI_DEVID(info->bus, info->devfn);

	if (!(ctx_lo & CONTEXT_PASIDE)) {
		context[1].hi = (u64)virt_to_phys(iommu->pasid_state_table);
		context[1].lo = (u64)virt_to_phys(iommu->pasid_table) | ecap_pss(iommu->ecap);
		wmb();
		/* CONTEXT_TT_MULTI_LEVEL and CONTEXT_TT_DEV_IOTLB are both
		 * extended to permit requests-with-PASID if the PASIDE bit
		 * is set. which makes sense. For CONTEXT_TT_PASS_THROUGH,
		 * however, the PASIDE bit is ignored and requests-with-PASID
		 * are unconditionally blocked. Which makes less sense.
		 * So convert from CONTEXT_TT_PASS_THROUGH to one of the new
		 * "guest mode" translation types depending on whether ATS
		 * is available or not. Annoyingly, we can't use the new
		 * modes *unless* PASIDE is set. */
		if ((ctx_lo & CONTEXT_TT_MASK) == (CONTEXT_TT_PASS_THROUGH << 2)) {
			ctx_lo &= ~CONTEXT_TT_MASK;
			if (info->ats_supported)
				ctx_lo |= CONTEXT_TT_PT_PASID_DEV_IOTLB << 2;
			else
				ctx_lo |= CONTEXT_TT_PT_PASID << 2;
		}
		ctx_lo |= CONTEXT_PASIDE;
		if (iommu->pasid_state_table)
			ctx_lo |= CONTEXT_DINVE;
		if (info->pri_supported)
			ctx_lo |= CONTEXT_PRS;
		context[0].lo = ctx_lo;
		wmb();
		iommu->flush.flush_context(iommu, sdev->did, sdev->sid,
					   DMA_CCMD_MASK_NOBIT,
					   DMA_CCMD_DEVICE_INVL);
	}

	/* Enable PASID support in the device, if it wasn't already */
	if (!info->pasid_enabled)
		iommu_enable_dev_iotlb(info);

	if (info->ats_enabled) {
		sdev->dev_iotlb = 1;
		sdev->qdep = info->ats_qdep;
		if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS)
			sdev->qdep = 0;
	}
	ret = 0;

 out:
	spin_unlock(&iommu->lock);
	spin_unlock_irqrestore(&device_domain_lock, flags);

	return ret;
}

struct intel_iommu *intel_svm_device_to_iommu(struct device *dev)
{
	struct intel_iommu *iommu;
	u8 bus, devfn;

	if (iommu_dummy(dev)) {
		dev_warn(dev,
			 "No IOMMU translation for device; cannot enable SVM\n");
		return NULL;
	}

	iommu = device_to_iommu(dev, &bus, &devfn);
	if ((!iommu)) {
		dev_err(dev, "No IOMMU for device; cannot enable SVM\n");
		return NULL;
	}

	if (!iommu->pasid_table) {
		dev_err(dev, "PASID not enabled on IOMMU; cannot enable SVM\n");
		return NULL;
	}

	return iommu;
}
#endif /* CONFIG_INTEL_IOMMU_SVM */

static const struct iommu_ops intel_iommu_ops = {
	.capable	= intel_iommu_capable,
	.domain_alloc	= intel_iommu_domain_alloc,
+602 −0

File added.

Preview size limit exceeded, changes collapsed.

Loading