Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit dbcf929c authored by David Gibson's avatar David Gibson Committed by Michael Ellerman
Browse files

powerpc/pseries: Add support for hash table resizing



This adds support for using two hypercalls to change the size of the
main hash page table while running as a PAPR guest. For now these
hypercalls are only in experimental qemu versions.

The interface is two part: first H_RESIZE_HPT_PREPARE is used to
allocate and prepare the new hash table. This may be slow, but can be
done asynchronously. Then, H_RESIZE_HPT_COMMIT is used to switch to the
new hash table. This requires that no CPUs be concurrently updating the
HPT, and so must be run under stop_machine().

This also adds a debugfs file which can be used to manually control
HPT resizing or testing purposes.

Signed-off-by: default avatarDavid Gibson <david@gibson.dropbear.id.au>
Reviewed-by: default avatarPaul Mackerras <paulus@samba.org>
[mpe: Rename the debugfs file to "hpt_order"]
Signed-off-by: default avatarMichael Ellerman <mpe@ellerman.id.au>
parent 64b40ffb
Loading
Loading
Loading
Loading
+1 −0
Original line number Original line Diff line number Diff line
@@ -157,6 +157,7 @@ struct mmu_hash_ops {
					       unsigned long addr,
					       unsigned long addr,
					       unsigned char *hpte_slot_array,
					       unsigned char *hpte_slot_array,
					       int psize, int ssize, int local);
					       int psize, int ssize, int local);
	int		(*resize_hpt)(unsigned long shift);
	/*
	/*
	 * Special for kexec.
	 * Special for kexec.
	 * To be called in real mode with interrupts disabled. No locks are
	 * To be called in real mode with interrupts disabled. No locks are
+33 −0
Original line number Original line Diff line number Diff line
@@ -35,7 +35,9 @@
#include <linux/memblock.h>
#include <linux/memblock.h>
#include <linux/context_tracking.h>
#include <linux/context_tracking.h>
#include <linux/libfdt.h>
#include <linux/libfdt.h>
#include <linux/debugfs.h>


#include <asm/debug.h>
#include <asm/processor.h>
#include <asm/processor.h>
#include <asm/pgtable.h>
#include <asm/pgtable.h>
#include <asm/mmu.h>
#include <asm/mmu.h>
@@ -1795,3 +1797,34 @@ void hash__setup_initial_memory_limit(phys_addr_t first_memblock_base,
	/* Finally limit subsequent allocations */
	/* Finally limit subsequent allocations */
	memblock_set_current_limit(ppc64_rma_size);
	memblock_set_current_limit(ppc64_rma_size);
}
}

#ifdef CONFIG_DEBUG_FS

static int hpt_order_get(void *data, u64 *val)
{
	*val = ppc64_pft_size;
	return 0;
}

static int hpt_order_set(void *data, u64 val)
{
	if (!mmu_hash_ops.resize_hpt)
		return -ENODEV;

	return mmu_hash_ops.resize_hpt(val);
}

DEFINE_SIMPLE_ATTRIBUTE(fops_hpt_order, hpt_order_get, hpt_order_set, "%llu\n");

static int __init hash64_debugfs(void)
{
	if (!debugfs_create_file("hpt_order", 0600, powerpc_debugfs_root,
				 NULL, &fops_hpt_order)) {
		pr_err("lpar: unable to create hpt_order debugsfs file\n");
	}

	return 0;
}
machine_device_initcall(pseries, hash64_debugfs);

#endif /* CONFIG_DEBUG_FS */
+109 −0
Original line number Original line Diff line number Diff line
@@ -27,6 +27,8 @@
#include <linux/console.h>
#include <linux/console.h>
#include <linux/export.h>
#include <linux/export.h>
#include <linux/jump_label.h>
#include <linux/jump_label.h>
#include <linux/delay.h>
#include <linux/stop_machine.h>
#include <asm/processor.h>
#include <asm/processor.h>
#include <asm/mmu.h>
#include <asm/mmu.h>
#include <asm/page.h>
#include <asm/page.h>
@@ -609,6 +611,112 @@ static int __init disable_bulk_remove(char *str)


__setup("bulk_remove=", disable_bulk_remove);
__setup("bulk_remove=", disable_bulk_remove);


#define HPT_RESIZE_TIMEOUT	10000 /* ms */

struct hpt_resize_state {
	unsigned long shift;
	int commit_rc;
};

static int pseries_lpar_resize_hpt_commit(void *data)
{
	struct hpt_resize_state *state = data;

	state->commit_rc = plpar_resize_hpt_commit(0, state->shift);
	if (state->commit_rc != H_SUCCESS)
		return -EIO;

	/* Hypervisor has transitioned the HTAB, update our globals */
	ppc64_pft_size = state->shift;
	htab_size_bytes = 1UL << ppc64_pft_size;
	htab_hash_mask = (htab_size_bytes >> 7) - 1;

	return 0;
}

/* Must be called in user context */
static int pseries_lpar_resize_hpt(unsigned long shift)
{
	struct hpt_resize_state state = {
		.shift = shift,
		.commit_rc = H_FUNCTION,
	};
	unsigned int delay, total_delay = 0;
	int rc;
	ktime_t t0, t1, t2;

	might_sleep();

	if (!firmware_has_feature(FW_FEATURE_HPT_RESIZE))
		return -ENODEV;

	printk(KERN_INFO "lpar: Attempting to resize HPT to shift %lu\n",
	       shift);

	t0 = ktime_get();

	rc = plpar_resize_hpt_prepare(0, shift);
	while (H_IS_LONG_BUSY(rc)) {
		delay = get_longbusy_msecs(rc);
		total_delay += delay;
		if (total_delay > HPT_RESIZE_TIMEOUT) {
			/* prepare with shift==0 cancels an in-progress resize */
			rc = plpar_resize_hpt_prepare(0, 0);
			if (rc != H_SUCCESS)
				printk(KERN_WARNING
				       "lpar: Unexpected error %d cancelling timed out HPT resize\n",
				       rc);
			return -ETIMEDOUT;
		}
		msleep(delay);
		rc = plpar_resize_hpt_prepare(0, shift);
	};

	switch (rc) {
	case H_SUCCESS:
		/* Continue on */
		break;

	case H_PARAMETER:
		return -EINVAL;
	case H_RESOURCE:
		return -EPERM;
	default:
		printk(KERN_WARNING
		       "lpar: Unexpected error %d from H_RESIZE_HPT_PREPARE\n",
		       rc);
		return -EIO;
	}

	t1 = ktime_get();

	rc = stop_machine(pseries_lpar_resize_hpt_commit, &state, NULL);

	t2 = ktime_get();

	if (rc != 0) {
		switch (state.commit_rc) {
		case H_PTEG_FULL:
			printk(KERN_WARNING
			       "lpar: Hash collision while resizing HPT\n");
			return -ENOSPC;

		default:
			printk(KERN_WARNING
			       "lpar: Unexpected error %d from H_RESIZE_HPT_COMMIT\n",
			       state.commit_rc);
			return -EIO;
		};
	}

	printk(KERN_INFO
	       "lpar: HPT resize to shift %lu complete (%lld ms / %lld ms)\n",
	       shift, (long long) ktime_ms_delta(t1, t0),
	       (long long) ktime_ms_delta(t2, t1));

	return 0;
}

void __init hpte_init_pseries(void)
void __init hpte_init_pseries(void)
{
{
	mmu_hash_ops.hpte_invalidate	 = pSeries_lpar_hpte_invalidate;
	mmu_hash_ops.hpte_invalidate	 = pSeries_lpar_hpte_invalidate;
@@ -620,6 +728,7 @@ void __init hpte_init_pseries(void)
	mmu_hash_ops.flush_hash_range	 = pSeries_lpar_flush_hash_range;
	mmu_hash_ops.flush_hash_range	 = pSeries_lpar_flush_hash_range;
	mmu_hash_ops.hpte_clear_all      = pseries_hpte_clear_all;
	mmu_hash_ops.hpte_clear_all      = pseries_hpte_clear_all;
	mmu_hash_ops.hugepage_invalidate = pSeries_lpar_hugepage_invalidate;
	mmu_hash_ops.hugepage_invalidate = pSeries_lpar_hugepage_invalidate;
	mmu_hash_ops.resize_hpt		 = pseries_lpar_resize_hpt;
}
}


#ifdef CONFIG_PPC_SMLPAR
#ifdef CONFIG_PPC_SMLPAR