Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 3269ee0b authored by Alex Williamson's avatar Alex Williamson Committed by Joerg Roedel
Browse files

intel-iommu: Fix leaks in pagetable freeing



At best the current code only seems to free the leaf pagetables and
the root.  If you're unlucky enough to have a large gap (like any
QEMU guest with more than 3G of memory), only the first chunk of leaf
pagetables are freed (plus the root).  This is a massive memory leak.
This patch re-writes the pagetable freeing function to use a
recursive algorithm and manages to not only free all the pagetables,
but does it without any apparent performance loss versus the current
broken version.

Signed-off-by: default avatarAlex Williamson <alex.williamson@redhat.com>
Cc: stable@vger.kernel.org
Reviewed-by: default avatarMarcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: default avatarJoerg Roedel <joro@8bytes.org>
parent d4e4ab86
Loading
Loading
Loading
Loading
+35 −37
Original line number Diff line number Diff line
@@ -890,56 +890,54 @@ static int dma_pte_clear_range(struct dmar_domain *domain,
	return order;
}

static void dma_pte_free_level(struct dmar_domain *domain, int level,
			       struct dma_pte *pte, unsigned long pfn,
			       unsigned long start_pfn, unsigned long last_pfn)
{
	pfn = max(start_pfn, pfn);
	pte = &pte[pfn_level_offset(pfn, level)];

	do {
		unsigned long level_pfn;
		struct dma_pte *level_pte;

		if (!dma_pte_present(pte) || dma_pte_superpage(pte))
			goto next;

		level_pfn = pfn & level_mask(level - 1);
		level_pte = phys_to_virt(dma_pte_addr(pte));

		if (level > 2)
			dma_pte_free_level(domain, level - 1, level_pte,
					   level_pfn, start_pfn, last_pfn);

		/* If range covers entire pagetable, free it */
		if (!(start_pfn > level_pfn ||
		      last_pfn < level_pfn + level_size(level))) {
			dma_clear_pte(pte);
			domain_flush_cache(domain, pte, sizeof(*pte));
			free_pgtable_page(level_pte);
		}
next:
		pfn += level_size(level);
	} while (!first_pte_in_page(++pte) && pfn <= last_pfn);
}

/* free page table pages. last level pte should already be cleared */
static void dma_pte_free_pagetable(struct dmar_domain *domain,
				   unsigned long start_pfn,
				   unsigned long last_pfn)
{
	int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
	struct dma_pte *first_pte, *pte;
	int total = agaw_to_level(domain->agaw);
	int level;
	unsigned long tmp;
	int large_page = 2;

	BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
	BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
	BUG_ON(start_pfn > last_pfn);

	/* We don't need lock here; nobody else touches the iova range */
	level = 2;
	while (level <= total) {
		tmp = align_to_level(start_pfn, level);

		/* If we can't even clear one PTE at this level, we're done */
		if (tmp + level_size(level) - 1 > last_pfn)
			return;

		do {
			large_page = level;
			first_pte = pte = dma_pfn_level_pte(domain, tmp, level, &large_page);
			if (large_page > level)
				level = large_page + 1;
			if (!pte) {
				tmp = align_to_level(tmp + 1, level + 1);
				continue;
			}
			do {
				if (dma_pte_present(pte)) {
					free_pgtable_page(phys_to_virt(dma_pte_addr(pte)));
					dma_clear_pte(pte);
				}
				pte++;
				tmp += level_size(level);
			} while (!first_pte_in_page(pte) &&
				 tmp + level_size(level) - 1 <= last_pfn);

			domain_flush_cache(domain, first_pte,
					   (void *)pte - (void *)first_pte);
	dma_pte_free_level(domain, agaw_to_level(domain->agaw),
			   domain->pgd, 0, start_pfn, last_pfn);

		} while (tmp && tmp + level_size(level) - 1 <= last_pfn);
		level++;
	}
	/* free pgd */
	if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
		free_pgtable_page(domain->pgd);