Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 03bb2d65 authored by Christophe Leroy's avatar Christophe Leroy Committed by Scott Wood
Browse files

powerpc: get hugetlbpage handling more generic



Today there are two implementations of hugetlbpages which are managed
by exclusive #ifdefs:
* FSL_BOOKE: several directory entries points to the same single hugepage
* BOOK3S: one upper level directory entry points to a table of hugepages

In preparation of implementation of hugepage support on the 8xx, we
need a mix of the two above solutions, because the 8xx needs both cases
depending on the size of pages:
* In 4k page size mode, each PGD entry covers a 4M bytes area. It means
that 2 PGD entries will be necessary to cover an 8M hugepage while a
single PGD entry will cover 8x 512k hugepages.
* In 16 page size mode, each PGD entry covers a 64M bytes area. It means
that 8x 8M hugepages will be covered by one PGD entry and 64x 512k
hugepages will be covers by one PGD entry.

This patch:
* removes #ifdefs in favor of if/else based on the range sizes
* merges the two huge_pte_alloc() functions as they are pretty similar
* merges the two hugetlbpage_init() functions as they are pretty similar

Signed-off-by: default avatarChristophe Leroy <christophe.leroy@c-s.fr>
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> (v3)
Signed-off-by: default avatarScott Wood <oss@buserror.net>
parent 9b081e10
Loading
Loading
Loading
Loading
+81 −114
Original line number Original line Diff line number Diff line
@@ -64,14 +64,16 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
{
{
	struct kmem_cache *cachep;
	struct kmem_cache *cachep;
	pte_t *new;
	pte_t *new;

#ifdef CONFIG_PPC_FSL_BOOK3E
	int i;
	int i;
	int num_hugepd = 1 << (pshift - pdshift);
	int num_hugepd;

	if (pshift >= pdshift) {
		cachep = hugepte_cache;
		cachep = hugepte_cache;
#else
		num_hugepd = 1 << (pshift - pdshift);
	} else {
		cachep = PGT_CACHE(pdshift - pshift);
		cachep = PGT_CACHE(pdshift - pshift);
#endif
		num_hugepd = 1;
	}


	new = kmem_cache_zalloc(cachep, GFP_KERNEL);
	new = kmem_cache_zalloc(cachep, GFP_KERNEL);


@@ -89,7 +91,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
	smp_wmb();
	smp_wmb();


	spin_lock(&mm->page_table_lock);
	spin_lock(&mm->page_table_lock);
#ifdef CONFIG_PPC_FSL_BOOK3E

	/*
	/*
	 * We have multiple higher-level entries that point to the same
	 * We have multiple higher-level entries that point to the same
	 * actual pte location.  Fill in each as we go and backtrack on error.
	 * actual pte location.  Fill in each as we go and backtrack on error.
@@ -100,8 +102,13 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
		if (unlikely(!hugepd_none(*hpdp)))
		if (unlikely(!hugepd_none(*hpdp)))
			break;
			break;
		else
		else
#ifdef CONFIG_PPC_BOOK3S_64
			hpdp->pd = __pa(new) |
				   (shift_to_mmu_psize(pshift) << 2);
#else
			/* We use the old format for PPC_FSL_BOOK3E */
			/* We use the old format for PPC_FSL_BOOK3E */
			hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift;
			hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift;
#endif
	}
	}
	/* If we bailed from the for loop early, an error occurred, clean up */
	/* If we bailed from the for loop early, an error occurred, clean up */
	if (i < num_hugepd) {
	if (i < num_hugepd) {
@@ -109,17 +116,6 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
			hpdp->pd = 0;
			hpdp->pd = 0;
		kmem_cache_free(cachep, new);
		kmem_cache_free(cachep, new);
	}
	}
#else
	if (!hugepd_none(*hpdp))
		kmem_cache_free(cachep, new);
	else {
#ifdef CONFIG_PPC_BOOK3S_64
		hpdp->pd = __pa(new) | (shift_to_mmu_psize(pshift) << 2);
#else
		hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift;
#endif
	}
#endif
	spin_unlock(&mm->page_table_lock);
	spin_unlock(&mm->page_table_lock);
	return 0;
	return 0;
}
}
@@ -136,7 +132,6 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
#define HUGEPD_PUD_SHIFT PMD_SHIFT
#define HUGEPD_PUD_SHIFT PMD_SHIFT
#endif
#endif


#ifdef CONFIG_PPC_BOOK3S_64
/*
/*
 * At this point we do the placement change only for BOOK3S 64. This would
 * At this point we do the placement change only for BOOK3S 64. This would
 * possibly work on other subarchs.
 * possibly work on other subarchs.
@@ -153,6 +148,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz
	addr &= ~(sz-1);
	addr &= ~(sz-1);
	pg = pgd_offset(mm, addr);
	pg = pgd_offset(mm, addr);


#ifdef CONFIG_PPC_BOOK3S_64
	if (pshift == PGDIR_SHIFT)
	if (pshift == PGDIR_SHIFT)
		/* 16GB huge page */
		/* 16GB huge page */
		return (pte_t *) pg;
		return (pte_t *) pg;
@@ -178,32 +174,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz
				hpdp = (hugepd_t *)pm;
				hpdp = (hugepd_t *)pm;
		}
		}
	}
	}
	if (!hpdp)
		return NULL;

	BUG_ON(!hugepd_none(*hpdp) && !hugepd_ok(*hpdp));

	if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, pdshift, pshift))
		return NULL;

	return hugepte_offset(*hpdp, addr, pdshift);
}

#else
#else

pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz)
{
	pgd_t *pg;
	pud_t *pu;
	pmd_t *pm;
	hugepd_t *hpdp = NULL;
	unsigned pshift = __ffs(sz);
	unsigned pdshift = PGDIR_SHIFT;

	addr &= ~(sz-1);

	pg = pgd_offset(mm, addr);

	if (pshift >= HUGEPD_PGD_SHIFT) {
	if (pshift >= HUGEPD_PGD_SHIFT) {
		hpdp = (hugepd_t *)pg;
		hpdp = (hugepd_t *)pg;
	} else {
	} else {
@@ -217,7 +188,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz
			hpdp = (hugepd_t *)pm;
			hpdp = (hugepd_t *)pm;
		}
		}
	}
	}

#endif
	if (!hpdp)
	if (!hpdp)
		return NULL;
		return NULL;


@@ -228,7 +199,6 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz


	return hugepte_offset(*hpdp, addr, pdshift);
	return hugepte_offset(*hpdp, addr, pdshift);
}
}
#endif


#ifdef CONFIG_PPC_FSL_BOOK3E
#ifdef CONFIG_PPC_FSL_BOOK3E
/* Build list of addresses of gigantic pages.  This function is used in early
/* Build list of addresses of gigantic pages.  This function is used in early
@@ -310,7 +280,11 @@ static int __init do_gpage_early_setup(char *param, char *val,
				npages = 0;
				npages = 0;
			if (npages > MAX_NUMBER_GPAGES) {
			if (npages > MAX_NUMBER_GPAGES) {
				pr_warn("MMU: %lu pages requested for page "
				pr_warn("MMU: %lu pages requested for page "
#ifdef CONFIG_PHYS_ADDR_T_64BIT
					"size %llu KB, limiting to "
					"size %llu KB, limiting to "
#else
					"size %u KB, limiting to "
#endif
					__stringify(MAX_NUMBER_GPAGES) "\n",
					__stringify(MAX_NUMBER_GPAGES) "\n",
					npages, size / 1024);
					npages, size / 1024);
				npages = MAX_NUMBER_GPAGES;
				npages = MAX_NUMBER_GPAGES;
@@ -442,6 +416,8 @@ static void hugepd_free(struct mmu_gather *tlb, void *hugepte)
	}
	}
	put_cpu_var(hugepd_freelist_cur);
	put_cpu_var(hugepd_freelist_cur);
}
}
#else
static inline void hugepd_free(struct mmu_gather *tlb, void *hugepte) {}
#endif
#endif


static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshift,
static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshift,
@@ -453,13 +429,11 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif


	unsigned long pdmask = ~((1UL << pdshift) - 1);
	unsigned long pdmask = ~((1UL << pdshift) - 1);
	unsigned int num_hugepd = 1;
	unsigned int num_hugepd = 1;
	unsigned int shift = hugepd_shift(*hpdp);


#ifdef CONFIG_PPC_FSL_BOOK3E
	/* Note: On fsl the hpdp may be the first of several */
	/* Note: On fsl the hpdp may be the first of several */
	num_hugepd = (1 << (hugepd_shift(*hpdp) - pdshift));
	if (shift > pdshift)
#else
		num_hugepd = 1 << (shift - pdshift);
	unsigned int shift = hugepd_shift(*hpdp);
#endif


	start &= pdmask;
	start &= pdmask;
	if (start < floor)
	if (start < floor)
@@ -475,11 +449,10 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif
	for (i = 0; i < num_hugepd; i++, hpdp++)
	for (i = 0; i < num_hugepd; i++, hpdp++)
		hpdp->pd = 0;
		hpdp->pd = 0;


#ifdef CONFIG_PPC_FSL_BOOK3E
	if (shift >= pdshift)
		hugepd_free(tlb, hugepte);
		hugepd_free(tlb, hugepte);
#else
	else
		pgtable_free_tlb(tlb, hugepte, pdshift - shift);
		pgtable_free_tlb(tlb, hugepte, pdshift - shift);
#endif
}
}


static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
@@ -492,6 +465,8 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,


	start = addr;
	start = addr;
	do {
	do {
		unsigned long more;

		pmd = pmd_offset(pud, addr);
		pmd = pmd_offset(pud, addr);
		next = pmd_addr_end(addr, end);
		next = pmd_addr_end(addr, end);
		if (!is_hugepd(__hugepd(pmd_val(*pmd)))) {
		if (!is_hugepd(__hugepd(pmd_val(*pmd)))) {
@@ -502,15 +477,16 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
			WARN_ON(!pmd_none_or_clear_bad(pmd));
			WARN_ON(!pmd_none_or_clear_bad(pmd));
			continue;
			continue;
		}
		}
#ifdef CONFIG_PPC_FSL_BOOK3E
		/*
		/*
		 * Increment next by the size of the huge mapping since
		 * Increment next by the size of the huge mapping since
		 * there may be more than one entry at this level for a
		 * there may be more than one entry at this level for a
		 * single hugepage, but all of them point to
		 * single hugepage, but all of them point to
		 * the same kmem cache that holds the hugepte.
		 * the same kmem cache that holds the hugepte.
		 */
		 */
		next = addr + (1 << hugepd_shift(*(hugepd_t *)pmd));
		more = addr + (1 << hugepd_shift(*(hugepd_t *)pmd));
#endif
		if (more > next)
			next = more;

		free_hugepd_range(tlb, (hugepd_t *)pmd, PMD_SHIFT,
		free_hugepd_range(tlb, (hugepd_t *)pmd, PMD_SHIFT,
				  addr, next, floor, ceiling);
				  addr, next, floor, ceiling);
	} while (addr = next, addr != end);
	} while (addr = next, addr != end);
@@ -550,15 +526,17 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
			hugetlb_free_pmd_range(tlb, pud, addr, next, floor,
			hugetlb_free_pmd_range(tlb, pud, addr, next, floor,
					       ceiling);
					       ceiling);
		} else {
		} else {
#ifdef CONFIG_PPC_FSL_BOOK3E
			unsigned long more;
			/*
			/*
			 * Increment next by the size of the huge mapping since
			 * Increment next by the size of the huge mapping since
			 * there may be more than one entry at this level for a
			 * there may be more than one entry at this level for a
			 * single hugepage, but all of them point to
			 * single hugepage, but all of them point to
			 * the same kmem cache that holds the hugepte.
			 * the same kmem cache that holds the hugepte.
			 */
			 */
			next = addr + (1 << hugepd_shift(*(hugepd_t *)pud));
			more = addr + (1 << hugepd_shift(*(hugepd_t *)pud));
#endif
			if (more > next)
				next = more;

			free_hugepd_range(tlb, (hugepd_t *)pud, PUD_SHIFT,
			free_hugepd_range(tlb, (hugepd_t *)pud, PUD_SHIFT,
					  addr, next, floor, ceiling);
					  addr, next, floor, ceiling);
		}
		}
@@ -615,15 +593,17 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb,
				continue;
				continue;
			hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling);
			hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling);
		} else {
		} else {
#ifdef CONFIG_PPC_FSL_BOOK3E
			unsigned long more;
			/*
			/*
			 * Increment next by the size of the huge mapping since
			 * Increment next by the size of the huge mapping since
			 * there may be more than one entry at the pgd level
			 * there may be more than one entry at the pgd level
			 * for a single hugepage, but all of them point to the
			 * for a single hugepage, but all of them point to the
			 * same kmem cache that holds the hugepte.
			 * same kmem cache that holds the hugepte.
			 */
			 */
			next = addr + (1 << hugepd_shift(*(hugepd_t *)pgd));
			more = addr + (1 << hugepd_shift(*(hugepd_t *)pgd));
#endif
			if (more > next)
				next = more;

			free_hugepd_range(tlb, (hugepd_t *)pgd, PGDIR_SHIFT,
			free_hugepd_range(tlb, (hugepd_t *)pgd, PGDIR_SHIFT,
					  addr, next, floor, ceiling);
					  addr, next, floor, ceiling);
		}
		}
@@ -753,12 +733,13 @@ static int __init add_huge_page_size(unsigned long long size)


	/* Check that it is a page size supported by the hardware and
	/* Check that it is a page size supported by the hardware and
	 * that it fits within pagetable and slice limits. */
	 * that it fits within pagetable and slice limits. */
	if (size <= PAGE_SIZE)
		return -EINVAL;
#ifdef CONFIG_PPC_FSL_BOOK3E
#ifdef CONFIG_PPC_FSL_BOOK3E
	if ((size < PAGE_SIZE) || !is_power_of_4(size))
	if (!is_power_of_4(size))
		return -EINVAL;
		return -EINVAL;
#else
#else
	if (!is_power_of_2(size)
	if (!is_power_of_2(size) || (shift > SLICE_HIGH_SHIFT))
	    || (shift > SLICE_HIGH_SHIFT) || (shift <= PAGE_SHIFT))
		return -EINVAL;
		return -EINVAL;
#endif
#endif


@@ -791,53 +772,15 @@ static int __init hugepage_setup_sz(char *str)
}
}
__setup("hugepagesz=", hugepage_setup_sz);
__setup("hugepagesz=", hugepage_setup_sz);


#ifdef CONFIG_PPC_FSL_BOOK3E
struct kmem_cache *hugepte_cache;
struct kmem_cache *hugepte_cache;
static int __init hugetlbpage_init(void)
static int __init hugetlbpage_init(void)
{
{
	int psize;
	int psize;


	for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
#if !defined(CONFIG_PPC_FSL_BOOK3E)
		unsigned shift;

		if (!mmu_psize_defs[psize].shift)
			continue;

		shift = mmu_psize_to_shift(psize);

		/* Don't treat normal page sizes as huge... */
		if (shift != PAGE_SHIFT)
			if (add_huge_page_size(1ULL << shift) < 0)
				continue;
	}

	/*
	 * Create a kmem cache for hugeptes.  The bottom bits in the pte have
	 * size information encoded in them, so align them to allow this
	 */
	hugepte_cache =  kmem_cache_create("hugepte-cache", sizeof(pte_t),
					   HUGEPD_SHIFT_MASK + 1, 0, NULL);
	if (hugepte_cache == NULL)
		panic("%s: Unable to create kmem cache for hugeptes\n",
		      __func__);

	/* Default hpage size = 4M */
	if (mmu_psize_defs[MMU_PAGE_4M].shift)
		HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_4M].shift;
	else
		panic("%s: Unable to set default huge page size\n", __func__);


	return 0;
}
#else
static int __init hugetlbpage_init(void)
{
	int psize;

	if (!radix_enabled() && !mmu_has_feature(MMU_FTR_16M_PAGE))
	if (!radix_enabled() && !mmu_has_feature(MMU_FTR_16M_PAGE))
		return -ENODEV;
		return -ENODEV;

#endif
	for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
	for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
		unsigned shift;
		unsigned shift;
		unsigned pdshift;
		unsigned pdshift;
@@ -850,9 +793,9 @@ static int __init hugetlbpage_init(void)
		if (add_huge_page_size(1ULL << shift) < 0)
		if (add_huge_page_size(1ULL << shift) < 0)
			continue;
			continue;


		if (shift < PMD_SHIFT)
		if (shift < HUGEPD_PUD_SHIFT)
			pdshift = PMD_SHIFT;
			pdshift = PMD_SHIFT;
		else if (shift < PUD_SHIFT)
		else if (shift < HUGEPD_PGD_SHIFT)
			pdshift = PUD_SHIFT;
			pdshift = PUD_SHIFT;
		else
		else
			pdshift = PGDIR_SHIFT;
			pdshift = PGDIR_SHIFT;
@@ -860,14 +803,36 @@ static int __init hugetlbpage_init(void)
		 * if we have pdshift and shift value same, we don't
		 * if we have pdshift and shift value same, we don't
		 * use pgt cache for hugepd.
		 * use pgt cache for hugepd.
		 */
		 */
		if (pdshift != shift) {
		if (pdshift > shift) {
			pgtable_cache_add(pdshift - shift, NULL);
			pgtable_cache_add(pdshift - shift, NULL);
			if (!PGT_CACHE(pdshift - shift))
			if (!PGT_CACHE(pdshift - shift))
				panic("hugetlbpage_init(): could not create "
				panic("hugetlbpage_init(): could not create "
				      "pgtable cache for %d bit pagesize\n", shift);
				      "pgtable cache for %d bit pagesize\n", shift);
		}
		}
#ifdef CONFIG_PPC_FSL_BOOK3E
		else if (!hugepte_cache) {
			/*
			 * Create a kmem cache for hugeptes.  The bottom bits in
			 * the pte have size information encoded in them, so
			 * align them to allow this
			 */
			hugepte_cache = kmem_cache_create("hugepte-cache",
							  sizeof(pte_t),
							  HUGEPD_SHIFT_MASK + 1,
							  0, NULL);
			if (hugepte_cache == NULL)
				panic("%s: Unable to create kmem cache "
				      "for hugeptes\n", __func__);

		}
#endif
	}
	}


#ifdef CONFIG_PPC_FSL_BOOK3E
	/* Default hpage size = 4M */
	if (mmu_psize_defs[MMU_PAGE_4M].shift)
		HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_4M].shift;
#else
	/* Set default large page size. Currently, we pick 16M or 1M
	/* Set default large page size. Currently, we pick 16M or 1M
	 * depending on what is available
	 * depending on what is available
	 */
	 */
@@ -877,11 +842,13 @@ static int __init hugetlbpage_init(void)
		HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_1M].shift;
		HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_1M].shift;
	else if (mmu_psize_defs[MMU_PAGE_2M].shift)
	else if (mmu_psize_defs[MMU_PAGE_2M].shift)
		HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_2M].shift;
		HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_2M].shift;

#endif
	else
		panic("%s: Unable to set default huge page size\n", __func__);


	return 0;
	return 0;
}
}
#endif

arch_initcall(hugetlbpage_init);
arch_initcall(hugetlbpage_init);


void flush_dcache_icache_hugepage(struct page *page)
void flush_dcache_icache_hugepage(struct page *page)