Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 2f26e0a9 authored by David Woodhouse's avatar David Woodhouse
Browse files

iommu/vt-d: Add basic SVM PASID support



This provides basic PASID support for endpoint devices, tested with a
version of the i915 driver.

Signed-off-by: default avatarDavid Woodhouse <David.Woodhouse@intel.com>
parent b16d0cb9
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -139,6 +139,7 @@ config INTEL_IOMMU_SVM
	bool "Support for Shared Virtual Memory with Intel IOMMU"
	depends on INTEL_IOMMU && X86
	select PCI_PASID
	select MMU_NOTIFIER
	help
	  Shared Virtual Memory (SVM) provides a facility for devices
	  to access DMA resources through process address space by
+104 −0
Original line number Diff line number Diff line
@@ -4929,6 +4929,110 @@ static void intel_iommu_remove_device(struct device *dev)
	iommu_device_unlink(iommu->iommu_dev, dev);
}

#ifdef CONFIG_INTEL_IOMMU_SVM
int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sdev)
{
	struct device_domain_info *info;
	struct context_entry *context;
	struct dmar_domain *domain;
	unsigned long flags;
	u64 ctx_lo;
	int ret;

	domain = get_valid_domain_for_dev(sdev->dev);
	if (!domain)
		return -EINVAL;

	spin_lock_irqsave(&device_domain_lock, flags);
	spin_lock(&iommu->lock);

	ret = -EINVAL;
	info = sdev->dev->archdata.iommu;
	if (!info || !info->pasid_supported)
		goto out;

	context = iommu_context_addr(iommu, info->bus, info->devfn, 0);
	if (WARN_ON(!context))
		goto out;

	ctx_lo = context[0].lo;

	sdev->did = domain->iommu_did[iommu->seq_id];
	sdev->sid = PCI_DEVID(info->bus, info->devfn);

	if (!(ctx_lo & CONTEXT_PASIDE)) {
		context[1].hi = (u64)virt_to_phys(iommu->pasid_state_table);
		context[1].lo = (u64)virt_to_phys(iommu->pasid_table) | ecap_pss(iommu->ecap);
		wmb();
		/* CONTEXT_TT_MULTI_LEVEL and CONTEXT_TT_DEV_IOTLB are both
		 * extended to permit requests-with-PASID if the PASIDE bit
		 * is set. which makes sense. For CONTEXT_TT_PASS_THROUGH,
		 * however, the PASIDE bit is ignored and requests-with-PASID
		 * are unconditionally blocked. Which makes less sense.
		 * So convert from CONTEXT_TT_PASS_THROUGH to one of the new
		 * "guest mode" translation types depending on whether ATS
		 * is available or not. Annoyingly, we can't use the new
		 * modes *unless* PASIDE is set. */
		if ((ctx_lo & CONTEXT_TT_MASK) == (CONTEXT_TT_PASS_THROUGH << 2)) {
			ctx_lo &= ~CONTEXT_TT_MASK;
			if (info->ats_supported)
				ctx_lo |= CONTEXT_TT_PT_PASID_DEV_IOTLB << 2;
			else
				ctx_lo |= CONTEXT_TT_PT_PASID << 2;
		}
		ctx_lo |= CONTEXT_PASIDE;
		context[0].lo = ctx_lo;
		wmb();
		iommu->flush.flush_context(iommu, sdev->did, sdev->sid,
					   DMA_CCMD_MASK_NOBIT,
					   DMA_CCMD_DEVICE_INVL);
	}

	/* Enable PASID support in the device, if it wasn't already */
	if (!info->pasid_enabled)
		iommu_enable_dev_iotlb(info);

	if (info->ats_enabled) {
		sdev->dev_iotlb = 1;
		sdev->qdep = info->ats_qdep;
		if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS)
			sdev->qdep = 0;
	}
	ret = 0;

 out:
	spin_unlock(&iommu->lock);
	spin_unlock_irqrestore(&device_domain_lock, flags);

	return ret;
}

struct intel_iommu *intel_svm_device_to_iommu(struct device *dev)
{
	struct intel_iommu *iommu;
	u8 bus, devfn;

	if (iommu_dummy(dev)) {
		dev_warn(dev,
			 "No IOMMU translation for device; cannot enable SVM\n");
		return NULL;
	}

	iommu = device_to_iommu(dev, &bus, &devfn);
	if ((!iommu)) {
		dev_dbg(dev, "No IOMMU for device; cannot enable SVM\n");
		return NULL;
	}

	if (!iommu->pasid_table) {
		dev_dbg(dev, "PASID not enabled on IOMMU; cannot enable SVM\n");
		return NULL;
	}

	return iommu;
}
#endif /* CONFIG_INTEL_IOMMU_SVM */

static const struct iommu_ops intel_iommu_ops = {
	.capable	= intel_iommu_capable,
	.domain_alloc	= intel_iommu_domain_alloc,
+291 −0
Original line number Diff line number Diff line
@@ -14,6 +14,17 @@
 */

#include <linux/intel-iommu.h>
#include <linux/mmu_notifier.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/intel-svm.h>
#include <linux/rculist.h>
#include <linux/pci.h>
#include <linux/pci-ats.h>

struct pasid_entry {
	u64 val;
};

int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu)
{
@@ -42,6 +53,8 @@ int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu)
				iommu->name);
	}

	idr_init(&iommu->pasid_idr);

	return 0;
}

@@ -61,5 +74,283 @@ int intel_svm_free_pasid_tables(struct intel_iommu *iommu)
		free_pages((unsigned long)iommu->pasid_state_table, order);
		iommu->pasid_state_table = NULL;
	}
	idr_destroy(&iommu->pasid_idr);
	return 0;
}

static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_dev *sdev,
				       unsigned long address, int pages, int ih)
{
	struct qi_desc desc;
	int mask = ilog2(__roundup_pow_of_two(pages));

	if (pages == -1 || !cap_pgsel_inv(svm->iommu->cap) ||
	    mask > cap_max_amask_val(svm->iommu->cap)) {
		desc.low = QI_EIOTLB_PASID(svm->pasid) | QI_EIOTLB_DID(sdev->did) |
			QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | QI_EIOTLB_TYPE;
		desc.high = 0;
	} else {
		desc.low = QI_EIOTLB_PASID(svm->pasid) | QI_EIOTLB_DID(sdev->did) |
			QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) | QI_EIOTLB_TYPE;
		desc.high = QI_EIOTLB_ADDR(address) | QI_EIOTLB_GL(1) |
			QI_EIOTLB_IH(ih) | QI_EIOTLB_AM(mask);
	}

	qi_submit_sync(&desc, svm->iommu);

	if (sdev->dev_iotlb) {
		desc.low = QI_DEV_EIOTLB_PASID(svm->pasid) | QI_DEV_EIOTLB_SID(sdev->sid) |
			QI_DEV_EIOTLB_QDEP(sdev->qdep) | QI_DEIOTLB_TYPE;
		if (mask) {
			unsigned long adr, delta;

			/* Least significant zero bits in the address indicate the
			 * range of the request. So mask them out according to the
			 * size. */
			adr = address & ((1<<(VTD_PAGE_SHIFT + mask)) - 1);

			/* Now ensure that we round down further if the original
			 * request was not aligned w.r.t. its size */
			delta = address - adr;
			if (delta + (pages << VTD_PAGE_SHIFT) >= (1 << (VTD_PAGE_SHIFT + mask)))
				adr &= ~(1 << (VTD_PAGE_SHIFT + mask));
			desc.high = QI_DEV_EIOTLB_ADDR(adr) | QI_DEV_EIOTLB_SIZE;
		} else {
			desc.high = QI_DEV_EIOTLB_ADDR(address);
		}
		qi_submit_sync(&desc, svm->iommu);
	}
}

static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address,
				  int pages, int ih)
{
	struct intel_svm_dev *sdev;

	rcu_read_lock();
	list_for_each_entry_rcu(sdev, &svm->devs, list)
		intel_flush_svm_range_dev(svm, sdev, address, pages, ih);
	rcu_read_unlock();
}

static void intel_change_pte(struct mmu_notifier *mn, struct mm_struct *mm,
			     unsigned long address, pte_t pte)
{
	struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);

	intel_flush_svm_range(svm, address, 1, 1);
}

static void intel_invalidate_page(struct mmu_notifier *mn, struct mm_struct *mm,
				  unsigned long address)
{
	struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);

	intel_flush_svm_range(svm, address, 1, 1);
}

/* Pages have been freed at this point */
static void intel_invalidate_range(struct mmu_notifier *mn,
				   struct mm_struct *mm,
				   unsigned long start, unsigned long end)
{
	struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);

	intel_flush_svm_range(svm, start,
			      (end - start + PAGE_SIZE - 1) >> VTD_PAGE_SHIFT , 0);
}


static void intel_flush_pasid_dev(struct intel_svm *svm, struct intel_svm_dev *sdev)
{
	struct qi_desc desc;

	desc.high = 0;
	desc.low = QI_PC_TYPE | QI_PC_DID(sdev->did) | QI_PC_PASID_SEL | QI_PC_PASID(svm->pasid);

	qi_submit_sync(&desc, svm->iommu);
}

static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
{
	struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);

	svm->iommu->pasid_table[svm->pasid].val = 0;

	/* There's no need to do any flush because we can't get here if there
	 * are any devices left anyway. */
	WARN_ON(!list_empty(&svm->devs));
}

static const struct mmu_notifier_ops intel_mmuops = {
	.release = intel_mm_release,
	.change_pte = intel_change_pte,
	.invalidate_page = intel_invalidate_page,
	.invalidate_range = intel_invalidate_range,
};

static DEFINE_MUTEX(pasid_mutex);

int intel_svm_bind_mm(struct device *dev, int *pasid)
{
	struct intel_iommu *iommu = intel_svm_device_to_iommu(dev);
	struct intel_svm_dev *sdev;
	struct intel_svm *svm = NULL;
	int pasid_max;
	int ret;

	BUG_ON(pasid && !current->mm);

	if (WARN_ON(!iommu))
		return -EINVAL;

	if (dev_is_pci(dev)) {
		pasid_max = pci_max_pasids(to_pci_dev(dev));
		if (pasid_max < 0)
			return -EINVAL;
	} else
		pasid_max = 1 << 20;

	mutex_lock(&pasid_mutex);
	if (pasid) {
		int i;

		idr_for_each_entry(&iommu->pasid_idr, svm, i) {
			if (svm->mm != current->mm)
				continue;

			if (svm->pasid >= pasid_max) {
				dev_warn(dev,
					 "Limited PASID width. Cannot use existing PASID %d\n",
					 svm->pasid);
				ret = -ENOSPC;
				goto out;
			}

			list_for_each_entry(sdev, &svm->devs, list) {
				if (dev == sdev->dev) {
					sdev->users++;
					goto success;
				}
			}

			break;
		}
	}

	sdev = kzalloc(sizeof(*sdev), GFP_KERNEL);
	if (!sdev) {
		ret = -ENOMEM;
		goto out;
	}
	sdev->dev = dev;

	ret = intel_iommu_enable_pasid(iommu, sdev);
	if (ret || !pasid) {
		/* If they don't actually want to assign a PASID, this is
		 * just an enabling check/preparation. */
		kfree(sdev);
		goto out;
	}
	/* Finish the setup now we know we're keeping it */
	sdev->users = 1;
	init_rcu_head(&sdev->rcu);

	if (!svm) {
		svm = kzalloc(sizeof(*svm), GFP_KERNEL);
		if (!svm) {
			ret = -ENOMEM;
			kfree(sdev);
			goto out;
		}
		svm->iommu = iommu;

		if (pasid_max > 2 << ecap_pss(iommu->ecap))
			pasid_max = 2 << ecap_pss(iommu->ecap);

		ret = idr_alloc(&iommu->pasid_idr, svm, 0, pasid_max - 1,
				GFP_KERNEL);
		if (ret < 0) {
			kfree(svm);
			goto out;
		}
		svm->pasid = ret;
		svm->notifier.ops = &intel_mmuops;
		svm->mm = get_task_mm(current);
		INIT_LIST_HEAD_RCU(&svm->devs);
		ret = -ENOMEM;
		if (!svm->mm || (ret = mmu_notifier_register(&svm->notifier, svm->mm))) {
			idr_remove(&svm->iommu->pasid_idr, svm->pasid);
			kfree(svm);
			kfree(sdev);
			goto out;
		}
		iommu->pasid_table[svm->pasid].val = (u64)__pa(svm->mm->pgd) | 1;
		wmb();
	}
	list_add_rcu(&sdev->list, &svm->devs);

 success:
	*pasid = svm->pasid;
	ret = 0;
 out:
	mutex_unlock(&pasid_mutex);
	return ret;
}
EXPORT_SYMBOL_GPL(intel_svm_bind_mm);

int intel_svm_unbind_mm(struct device *dev, int pasid)
{
	struct intel_svm_dev *sdev;
	struct intel_iommu *iommu;
	struct intel_svm *svm;
	int ret = -EINVAL;

	mutex_lock(&pasid_mutex);
	iommu = intel_svm_device_to_iommu(dev);
	if (!iommu || !iommu->pasid_table)
		goto out;

	svm = idr_find(&iommu->pasid_idr, pasid);
	if (!svm)
		goto out;

	list_for_each_entry(sdev, &svm->devs, list) {
		if (dev == sdev->dev) {
			ret = 0;
			sdev->users--;
			if (!sdev->users) {
				list_del_rcu(&sdev->list);
				/* Flush the PASID cache and IOTLB for this device.
				 * Note that we do depend on the hardware *not* using
				 * the PASID any more. Just as we depend on other
				 * devices never using PASIDs that they have no right
				 * to use. We have a *shared* PASID table, because it's
				 * large and has to be physically contiguous. So it's
				 * hard to be as defensive as we might like. */
				intel_flush_pasid_dev(svm, sdev);
				intel_flush_svm_range_dev(svm, sdev, 0, -1, 0);
				kfree_rcu(sdev, rcu);

				if (list_empty(&svm->devs)) {
					mmu_notifier_unregister(&svm->notifier, svm->mm);

					idr_remove(&svm->iommu->pasid_idr, svm->pasid);
					mmput(svm->mm);
					/* We mandate that no page faults may be outstanding
					 * for the PASID when intel_svm_unbind_mm() is called.
					 * If that is not obeyed, subtle errors will happen.
					 * Let's make them less subtle... */
					memset(svm, 0x6b, sizeof(*svm));
					kfree(svm);
				}
			}
			break;
		}
	}
 out:
	mutex_unlock(&pasid_mutex);

	return ret;
}
EXPORT_SYMBOL_GPL(intel_svm_unbind_mm);
+7 −0
Original line number Diff line number Diff line
@@ -20,6 +20,13 @@
#define CONTEXT_TT_MULTI_LEVEL	0
#define CONTEXT_TT_DEV_IOTLB	1
#define CONTEXT_TT_PASS_THROUGH 2
/* Extended context entry types */
#define CONTEXT_TT_PT_PASID	4
#define CONTEXT_TT_PT_PASID_DEV_IOTLB 5
#define CONTEXT_TT_MASK (7ULL << 2)

#define CONTEXT_PRS		(1ULL << 9)
#define CONTEXT_PASIDE		(1ULL << 11)

struct intel_iommu;
struct dmar_domain;
+63 −5
Original line number Diff line number Diff line
/*
 * Copyright (c) 2006, Intel Corporation.
 * Copyright © 2006-2015, Intel Corporation.
 *
 * Authors: Ashok Raj <ashok.raj@intel.com>
 *          Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
 *          David Woodhouse <David.Woodhouse@intel.com>
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
@@ -13,10 +17,6 @@
 * You should have received a copy of the GNU General Public License along with
 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
 * Place - Suite 330, Boston, MA 02111-1307 USA.
 *
 * Copyright (C) 2006-2008 Intel Corporation
 * Author: Ashok Raj <ashok.raj@intel.com>
 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
 */

#ifndef _INTEL_IOMMU_H_
@@ -25,7 +25,10 @@
#include <linux/types.h>
#include <linux/iova.h>
#include <linux/io.h>
#include <linux/idr.h>
#include <linux/dma_remapping.h>
#include <linux/mmu_notifier.h>
#include <linux/list.h>
#include <asm/cacheflush.h>
#include <asm/iommu.h>

@@ -251,6 +254,9 @@ enum {
#define QI_DIOTLB_TYPE		0x3
#define QI_IEC_TYPE		0x4
#define QI_IWD_TYPE		0x5
#define QI_EIOTLB_TYPE		0x6
#define QI_PC_TYPE		0x7
#define QI_DEIOTLB_TYPE		0x8

#define QI_IEC_SELECTIVE	(((u64)1) << 4)
#define QI_IEC_IIDEX(idx)	(((u64)(idx & 0xffff) << 32))
@@ -278,6 +284,34 @@ enum {
#define QI_DEV_IOTLB_SIZE	1
#define QI_DEV_IOTLB_MAX_INVS	32

#define QI_PC_PASID(pasid)	(((u64)pasid) << 32)
#define QI_PC_DID(did)		(((u64)did) << 16)
#define QI_PC_GRAN(gran)	(((u64)gran) << 4)

#define QI_PC_ALL_PASIDS	(QI_PC_TYPE | QI_PC_GRAN(0))
#define QI_PC_PASID_SEL		(QI_PC_TYPE | QI_PC_GRAN(1))

#define QI_EIOTLB_ADDR(addr)	((u64)(addr) & VTD_PAGE_MASK)
#define QI_EIOTLB_GL(gl)	(((u64)gl) << 7)
#define QI_EIOTLB_IH(ih)	(((u64)ih) << 6)
#define QI_EIOTLB_AM(am)	(((u64)am))
#define QI_EIOTLB_PASID(pasid) 	(((u64)pasid) << 32)
#define QI_EIOTLB_DID(did)	(((u64)did) << 16)
#define QI_EIOTLB_GRAN(gran) 	(((u64)gran) << 4)

#define QI_DEV_EIOTLB_ADDR(a)	((u64)(a) & VTD_PAGE_MASK)
#define QI_DEV_EIOTLB_SIZE	(((u64)1) << 11)
#define QI_DEV_EIOTLB_GLOB(g)	((u64)g)
#define QI_DEV_EIOTLB_PASID(p)	(((u64)p) << 32)
#define QI_DEV_EIOTLB_SID(sid)	((u64)((sid) & 0xffff) << 32)
#define QI_DEV_EIOTLB_QDEP(qd)	(((qd) & 0x1f) << 16)
#define QI_DEV_EIOTLB_MAX_INVS	32

#define QI_GRAN_ALL_ALL			0
#define QI_GRAN_NONG_ALL		1
#define QI_GRAN_NONG_PASID		2
#define QI_GRAN_PSI_PASID		3

struct qi_desc {
	u64 low, high;
};
@@ -359,6 +393,7 @@ struct intel_iommu {
	 * told to. But while it's all driver-arbitrated, we're fine. */
	struct pasid_entry *pasid_table;
	struct pasid_state_entry *pasid_state_table;
	struct idr pasid_idr;
#endif
	struct q_inval  *qi;            /* Queued invalidation info */
	u32 *iommu_state; /* Store iommu states between suspend and resume.*/
@@ -399,9 +434,32 @@ extern int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu);

extern int dmar_ir_support(void);

#ifdef CONFIG_INTEL_IOMMU_SVM
extern int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu);
extern int intel_svm_free_pasid_tables(struct intel_iommu *iommu);

struct intel_svm_dev {
	struct list_head list;
	struct rcu_head rcu;
	struct device *dev;
	int users;
	u16 did;
	u16 dev_iotlb:1;
	u16 sid, qdep;
};

struct intel_svm {
	struct mmu_notifier notifier;
	struct mm_struct *mm;
	struct intel_iommu *iommu;
	int pasid;
	struct list_head devs;
};

extern int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sdev);
extern struct intel_iommu *intel_svm_device_to_iommu(struct device *dev);
#endif

extern const struct attribute_group *intel_iommu_groups[];

#endif
Loading