Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 2b681faf authored by Joerg Roedel's avatar Joerg Roedel
Browse files

Merge branch 'amd-iommu/pagetable' into amd-iommu/2.6.32

Conflicts:
	arch/x86/kernel/amd_iommu.c
parents 03362a05 abdc5eb3
Loading
Loading
Loading
Loading
+20 −18
Original line number Diff line number Diff line
@@ -147,19 +147,25 @@
#define PAGE_MODE_1_LEVEL 0x01
#define PAGE_MODE_2_LEVEL 0x02
#define PAGE_MODE_3_LEVEL 0x03

#define IOMMU_PDE_NL_0   0x000ULL
#define IOMMU_PDE_NL_1   0x200ULL
#define IOMMU_PDE_NL_2   0x400ULL
#define IOMMU_PDE_NL_3   0x600ULL

#define IOMMU_PTE_L2_INDEX(address) (((address) >> 30) & 0x1ffULL)
#define IOMMU_PTE_L1_INDEX(address) (((address) >> 21) & 0x1ffULL)
#define IOMMU_PTE_L0_INDEX(address) (((address) >> 12) & 0x1ffULL)

#define IOMMU_MAP_SIZE_L1 (1ULL << 21)
#define IOMMU_MAP_SIZE_L2 (1ULL << 30)
#define IOMMU_MAP_SIZE_L3 (1ULL << 39)
#define PAGE_MODE_4_LEVEL 0x04
#define PAGE_MODE_5_LEVEL 0x05
#define PAGE_MODE_6_LEVEL 0x06

#define PM_LEVEL_SHIFT(x)	(12 + ((x) * 9))
#define PM_LEVEL_SIZE(x)	(((x) < 6) ? \
				  ((1ULL << PM_LEVEL_SHIFT((x))) - 1): \
				   (0xffffffffffffffffULL))
#define PM_LEVEL_INDEX(x, a)	(((a) >> PM_LEVEL_SHIFT((x))) & 0x1ffULL)
#define PM_LEVEL_ENC(x)		(((x) << 9) & 0xe00ULL)
#define PM_LEVEL_PDE(x, a)	((a) | PM_LEVEL_ENC((x)) | \
				 IOMMU_PTE_P | IOMMU_PTE_IR | IOMMU_PTE_IW)
#define PM_PTE_LEVEL(pte)	(((pte) >> 9) & 0x7ULL)

#define PM_MAP_4k		0
#define PM_ADDR_MASK		0x000ffffffffff000ULL
#define PM_MAP_MASK(lvl)	(PM_ADDR_MASK & \
				(~((1ULL << (12 + ((lvl) * 9))) - 1)))
#define PM_ALIGNED(lvl, addr)	((PM_MAP_MASK(lvl) & (addr)) == (addr))

#define IOMMU_PTE_P  (1ULL << 0)
#define IOMMU_PTE_TV (1ULL << 1)
@@ -168,11 +174,6 @@
#define IOMMU_PTE_IR (1ULL << 61)
#define IOMMU_PTE_IW (1ULL << 62)

#define IOMMU_L1_PDE(address) \
	((address) | IOMMU_PDE_NL_1 | IOMMU_PTE_P | IOMMU_PTE_IR | IOMMU_PTE_IW)
#define IOMMU_L2_PDE(address) \
	((address) | IOMMU_PDE_NL_2 | IOMMU_PTE_P | IOMMU_PTE_IR | IOMMU_PTE_IW)

#define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL)
#define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_P)
#define IOMMU_PTE_PAGE(pte) (phys_to_virt((pte) & IOMMU_PAGE_MASK))
@@ -230,6 +231,7 @@ struct protection_domain {
	int mode;		/* paging mode (0-6 levels) */
	u64 *pt_root;		/* page table root pointer */
	unsigned long flags;	/* flags to find out type of domain */
	bool updated;		/* complete domain flush required */
	unsigned dev_cnt;	/* devices assigned to this domain */
	void *priv;		/* private data */
};
+158 −95
Original line number Diff line number Diff line
@@ -47,7 +47,6 @@ static DEFINE_SPINLOCK(iommu_pd_list_lock);
 */
static struct protection_domain *pt_domain;

#ifdef CONFIG_IOMMU_API
static struct iommu_ops amd_iommu_ops;

/*
@@ -60,13 +59,16 @@ struct iommu_cmd {
static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
			     struct unity_map_entry *e);
static struct dma_ops_domain *find_protection_domain(u16 devid);
static u64* alloc_pte(struct protection_domain *dom,
		      unsigned long address, u64
		      **pte_page, gfp_t gfp);
static u64 *alloc_pte(struct protection_domain *domain,
		      unsigned long address, int end_lvl,
		      u64 **pte_page, gfp_t gfp);
static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
				      unsigned long start_page,
				      unsigned int pages);
static void reset_iommu_command_buffer(struct amd_iommu *iommu);
static u64 *fetch_pte(struct protection_domain *domain,
		      unsigned long address, int map_size);
static void update_domain(struct protection_domain *domain);

#ifdef CONFIG_AMD_IOMMU_STATS

@@ -535,12 +537,15 @@ static void flush_all_devices_for_iommu(struct amd_iommu *iommu)
	}
}

void amd_iommu_flush_all_devices(void)
static void flush_devices_by_domain(struct protection_domain *domain)
{
	struct amd_iommu *iommu;
	int i;

	for (i = 0; i <= amd_iommu_last_bdf; ++i) {
		if ((domain == NULL && amd_iommu_pd_table[i] == NULL) ||
		    (amd_iommu_pd_table[i] != domain))
			continue;

		iommu = amd_iommu_rlookup_table[i];
		if (!iommu)
@@ -567,6 +572,11 @@ static void reset_iommu_command_buffer(struct amd_iommu *iommu)
	iommu->reset_in_progress = false;
}

void amd_iommu_flush_all_devices(void)
{
	flush_devices_by_domain(NULL);
}

/****************************************************************************
 *
 * The functions below are used the create the page table mappings for
@@ -584,18 +594,21 @@ static void reset_iommu_command_buffer(struct amd_iommu *iommu)
static int iommu_map_page(struct protection_domain *dom,
			  unsigned long bus_addr,
			  unsigned long phys_addr,
			  int prot)
			  int prot,
			  int map_size)
{
	u64 __pte, *pte;

	bus_addr  = PAGE_ALIGN(bus_addr);
	phys_addr = PAGE_ALIGN(phys_addr);

	/* only support 512GB address spaces for now */
	if (bus_addr > IOMMU_MAP_SIZE_L3 || !(prot & IOMMU_PROT_MASK))
	BUG_ON(!PM_ALIGNED(map_size, bus_addr));
	BUG_ON(!PM_ALIGNED(map_size, phys_addr));

	if (!(prot & IOMMU_PROT_MASK))
		return -EINVAL;

	pte = alloc_pte(dom, bus_addr, NULL, GFP_KERNEL);
	pte = alloc_pte(dom, bus_addr, map_size, NULL, GFP_KERNEL);

	if (IOMMU_PTE_PRESENT(*pte))
		return -EBUSY;
@@ -608,28 +621,17 @@ static int iommu_map_page(struct protection_domain *dom,

	*pte = __pte;

	update_domain(dom);

	return 0;
}

static void iommu_unmap_page(struct protection_domain *dom,
			     unsigned long bus_addr)
			     unsigned long bus_addr, int map_size)
{
	u64 *pte;

	pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(bus_addr)];

	if (!IOMMU_PTE_PRESENT(*pte))
		return;

	pte = IOMMU_PTE_PAGE(*pte);
	pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)];

	if (!IOMMU_PTE_PRESENT(*pte))
		return;

	pte = IOMMU_PTE_PAGE(*pte);
	pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)];
	u64 *pte = fetch_pte(dom, bus_addr, map_size);

	if (pte)
		*pte = 0;
}

@@ -685,7 +687,8 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,

	for (addr = e->address_start; addr < e->address_end;
	     addr += PAGE_SIZE) {
		ret = iommu_map_page(&dma_dom->domain, addr, addr, e->prot);
		ret = iommu_map_page(&dma_dom->domain, addr, addr, e->prot,
				     PM_MAP_4k);
		if (ret)
			return ret;
		/*
@@ -741,23 +744,28 @@ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom,
 * there is one, it returns the pointer to it.
 */
static u64 *fetch_pte(struct protection_domain *domain,
		      unsigned long address)
		      unsigned long address, int map_size)
{
	int level;
	u64 *pte;

	pte = &domain->pt_root[IOMMU_PTE_L2_INDEX(address)];
	level =  domain->mode - 1;
	pte   = &domain->pt_root[PM_LEVEL_INDEX(level, address)];

	while (level > map_size) {
		if (!IOMMU_PTE_PRESENT(*pte))
			return NULL;

	pte = IOMMU_PTE_PAGE(*pte);
	pte = &pte[IOMMU_PTE_L1_INDEX(address)];

	if (!IOMMU_PTE_PRESENT(*pte))
		return NULL;
		level -= 1;

		pte = IOMMU_PTE_PAGE(*pte);
	pte = &pte[IOMMU_PTE_L0_INDEX(address)];
		pte = &pte[PM_LEVEL_INDEX(level, address)];

		if ((PM_PTE_LEVEL(*pte) == 0) && level != map_size) {
			pte = NULL;
			break;
		}
	}

	return pte;
}
@@ -797,7 +805,7 @@ static int alloc_new_range(struct amd_iommu *iommu,
		u64 *pte, *pte_page;

		for (i = 0; i < num_ptes; ++i) {
			pte = alloc_pte(&dma_dom->domain, address,
			pte = alloc_pte(&dma_dom->domain, address, PM_MAP_4k,
					&pte_page, gfp);
			if (!pte)
				goto out_free;
@@ -830,16 +838,20 @@ static int alloc_new_range(struct amd_iommu *iommu,
	for (i = dma_dom->aperture[index]->offset;
	     i < dma_dom->aperture_size;
	     i += PAGE_SIZE) {
		u64 *pte = fetch_pte(&dma_dom->domain, i);
		u64 *pte = fetch_pte(&dma_dom->domain, i, PM_MAP_4k);
		if (!pte || !IOMMU_PTE_PRESENT(*pte))
			continue;

		dma_ops_reserve_addresses(dma_dom, i << PAGE_SHIFT, 1);
	}

	update_domain(&dma_dom->domain);

	return 0;

out_free:
	update_domain(&dma_dom->domain);

	free_page((unsigned long)dma_dom->aperture[index]->bitmap);

	kfree(dma_dom->aperture[index]);
@@ -1079,7 +1091,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu)
	dma_dom->domain.id = domain_id_alloc();
	if (dma_dom->domain.id == 0)
		goto free_dma_dom;
	dma_dom->domain.mode = PAGE_MODE_3_LEVEL;
	dma_dom->domain.mode = PAGE_MODE_2_LEVEL;
	dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL);
	dma_dom->domain.flags = PD_DMA_OPS_MASK;
	dma_dom->domain.priv = dma_dom;
@@ -1133,20 +1145,9 @@ static struct protection_domain *domain_for_device(u16 devid)
	return dom;
}

/*
 * If a device is not yet associated with a domain, this function does
 * assigns it visible for the hardware
 */
static void __attach_device(struct amd_iommu *iommu,
			    struct protection_domain *domain,
			    u16 devid)
static void set_dte_entry(u16 devid, struct protection_domain *domain)
{
	u64 pte_root;

	/* lock domain */
	spin_lock(&domain->lock);

	pte_root = virt_to_phys(domain->pt_root);
	u64 pte_root = virt_to_phys(domain->pt_root);

	pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK)
		    << DEV_ENTRY_MODE_SHIFT;
@@ -1157,6 +1158,21 @@ static void __attach_device(struct amd_iommu *iommu,
	amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root);

	amd_iommu_pd_table[devid] = domain;
}

/*
 * If a device is not yet associated with a domain, this function does
 * assigns it visible for the hardware
 */
static void __attach_device(struct amd_iommu *iommu,
			    struct protection_domain *domain,
			    u16 devid)
{
	/* lock domain */
	spin_lock(&domain->lock);

	/* update DTE entry */
	set_dte_entry(devid, domain);

	domain->dev_cnt += 1;

@@ -1164,6 +1180,10 @@ static void __attach_device(struct amd_iommu *iommu,
	spin_unlock(&domain->lock);
}

/*
 * If a device is not yet associated with a domain, this function does
 * assigns it visible for the hardware
 */
static void attach_device(struct amd_iommu *iommu,
			  struct protection_domain *domain,
			  u16 devid)
@@ -1389,39 +1409,91 @@ static int get_device_resources(struct device *dev,
	return 1;
}

static void update_device_table(struct protection_domain *domain)
{
	unsigned long flags;
	int i;

	for (i = 0; i <= amd_iommu_last_bdf; ++i) {
		if (amd_iommu_pd_table[i] != domain)
			continue;
		write_lock_irqsave(&amd_iommu_devtable_lock, flags);
		set_dte_entry(i, domain);
		write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
	}
}

static void update_domain(struct protection_domain *domain)
{
	if (!domain->updated)
		return;

	update_device_table(domain);
	flush_devices_by_domain(domain);
	iommu_flush_domain(domain->id);

	domain->updated = false;
}

/*
 * If the pte_page is not yet allocated this function is called
 * This function is used to add another level to an IO page table. Adding
 * another level increases the size of the address space by 9 bits to a size up
 * to 64 bits.
 */
static u64* alloc_pte(struct protection_domain *dom,
		      unsigned long address, u64 **pte_page, gfp_t gfp)
static bool increase_address_space(struct protection_domain *domain,
				   gfp_t gfp)
{
	u64 *pte, *page;
	u64 *pte;

	pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(address)];
	if (domain->mode == PAGE_MODE_6_LEVEL)
		/* address space already 64 bit large */
		return false;

	if (!IOMMU_PTE_PRESENT(*pte)) {
		page = (u64 *)get_zeroed_page(gfp);
		if (!page)
			return NULL;
		*pte = IOMMU_L2_PDE(virt_to_phys(page));
	pte = (void *)get_zeroed_page(gfp);
	if (!pte)
		return false;

	*pte             = PM_LEVEL_PDE(domain->mode,
					virt_to_phys(domain->pt_root));
	domain->pt_root  = pte;
	domain->mode    += 1;
	domain->updated  = true;

	return true;
}

	pte = IOMMU_PTE_PAGE(*pte);
	pte = &pte[IOMMU_PTE_L1_INDEX(address)];
static u64 *alloc_pte(struct protection_domain *domain,
		      unsigned long address,
		      int end_lvl,
		      u64 **pte_page,
		      gfp_t gfp)
{
	u64 *pte, *page;
	int level;

	while (address > PM_LEVEL_SIZE(domain->mode))
		increase_address_space(domain, gfp);

	level =  domain->mode - 1;
	pte   = &domain->pt_root[PM_LEVEL_INDEX(level, address)];

	while (level > end_lvl) {
		if (!IOMMU_PTE_PRESENT(*pte)) {
			page = (u64 *)get_zeroed_page(gfp);
			if (!page)
				return NULL;
		*pte = IOMMU_L1_PDE(virt_to_phys(page));
			*pte = PM_LEVEL_PDE(level, virt_to_phys(page));
		}

		level -= 1;

		pte = IOMMU_PTE_PAGE(*pte);

	if (pte_page)
		if (pte_page && level == end_lvl)
			*pte_page = pte;

	pte = &pte[IOMMU_PTE_L0_INDEX(address)];
		pte = &pte[PM_LEVEL_INDEX(level, address)];
	}

	return pte;
}
@@ -1441,10 +1513,13 @@ static u64* dma_ops_get_pte(struct dma_ops_domain *dom,

	pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)];
	if (!pte) {
		pte = alloc_pte(&dom->domain, address, &pte_page, GFP_ATOMIC);
		pte = alloc_pte(&dom->domain, address, PM_MAP_4k, &pte_page,
				GFP_ATOMIC);
		aperture->pte_pages[APERTURE_PAGE_INDEX(address)] = pte_page;
	} else
		pte += IOMMU_PTE_L0_INDEX(address);
		pte += PM_LEVEL_INDEX(0, address);

	update_domain(&dom->domain);

	return pte;
}
@@ -1506,7 +1581,7 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu,
	if (!pte)
		return;

	pte += IOMMU_PTE_L0_INDEX(address);
	pte += PM_LEVEL_INDEX(0, address);

	WARN_ON(!*pte);

@@ -2240,7 +2315,7 @@ static int amd_iommu_map_range(struct iommu_domain *dom,
	paddr &= PAGE_MASK;

	for (i = 0; i < npages; ++i) {
		ret = iommu_map_page(domain, iova, paddr, prot);
		ret = iommu_map_page(domain, iova, paddr, prot, PM_MAP_4k);
		if (ret)
			return ret;

@@ -2261,7 +2336,7 @@ static void amd_iommu_unmap_range(struct iommu_domain *dom,
	iova  &= PAGE_MASK;

	for (i = 0; i < npages; ++i) {
		iommu_unmap_page(domain, iova);
		iommu_unmap_page(domain, iova, PM_MAP_4k);
		iova  += PAGE_SIZE;
	}

@@ -2276,21 +2351,9 @@ static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
	phys_addr_t paddr;
	u64 *pte;

	pte = &domain->pt_root[IOMMU_PTE_L2_INDEX(iova)];

	if (!IOMMU_PTE_PRESENT(*pte))
		return 0;
	pte = fetch_pte(domain, iova, PM_MAP_4k);

	pte = IOMMU_PTE_PAGE(*pte);
	pte = &pte[IOMMU_PTE_L1_INDEX(iova)];

	if (!IOMMU_PTE_PRESENT(*pte))
		return 0;

	pte = IOMMU_PTE_PAGE(*pte);
	pte = &pte[IOMMU_PTE_L0_INDEX(iova)];

	if (!IOMMU_PTE_PRESENT(*pte))
	if (!pte || !IOMMU_PTE_PRESENT(*pte))
		return 0;

	paddr  = *pte & IOMMU_PAGE_MASK;