Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 44b111b5 authored by Steffen Persvold's avatar Steffen Persvold Committed by Ingo Molnar
Browse files

x86: Add NumaChip support



Adds support for Numascale NumaChip large-SMP systems. It is
needed to enable the booting of more than ~168 cores.

v2:
 - [Steffen] enumerate only accessible northbridges
 - [Daniel] rediffed and validated against 3.1-rc10

v3:
 - [Daniel] use x86_init core numbering override
 - [Daniel] cleanups as per feedback

v4:
 - [Daniel] use updated x86_cpuinit override

v5:
 - drop disabling interrupts locally, as ISR write is atomic; drop delay
 - added read-mostly annotations where appropriate
 - require CONFIG_SMP, so drop conditional path

Workload tested on 96 cores/16 sockets.

Signed-off-by: default avatarSteffen Persvold <sp@numascale.com>
Signed-off-by: default avatarDaniel J Blueman <daniel@numascale-asia.com>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Link: http://lkml.kernel.org/r/1323101246-2400-1-git-send-email-daniel@numascale-asia.com


Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent 64be4c1c
Loading
Loading
Loading
Loading
+13 −0
Original line number Diff line number Diff line
@@ -343,6 +343,7 @@ config X86_EXTENDED_PLATFORM

	  If you enable this option then you'll be able to select support
	  for the following (non-PC) 64 bit x86 platforms:
		Numascale NumaChip
		ScaleMP vSMP
		SGI Ultraviolet

@@ -351,6 +352,18 @@ config X86_EXTENDED_PLATFORM
endif
# This is an alphabetically sorted list of 64 bit extended platforms
# Please maintain the alphabetic order if and when there are additions
config X86_NUMACHIP
	bool "Numascale NumaChip"
	depends on X86_64
	depends on X86_EXTENDED_PLATFORM
	depends on NUMA
	depends on SMP
	depends on X86_X2APIC
	depends on !EDAC_AMD64
	---help---
	  Adds support for Numascale NumaChip large-SMP systems. Needed to
	  enable more than ~168 cores.
	  If you don't have one of these, you should say N here.

config X86_VSMP
	bool "ScaleMP vSMP"
+167 −0
Original line number Diff line number Diff line
/*
 * This file is subject to the terms and conditions of the GNU General Public
 * License.  See the file "COPYING" in the main directory of this archive
 * for more details.
 *
 * Numascale NumaConnect-Specific Header file
 *
 * Copyright (C) 2011 Numascale AS. All rights reserved.
 *
 * Send feedback to <support@numascale.com>
 *
 */

#ifndef _ASM_X86_NUMACHIP_NUMACHIP_CSR_H
#define _ASM_X86_NUMACHIP_NUMACHIP_CSR_H

#include <linux/numa.h>
#include <linux/percpu.h>
#include <linux/io.h>
#include <linux/swab.h>
#include <asm/types.h>
#include <asm/processor.h>

#define CSR_NODE_SHIFT		16
#define CSR_NODE_BITS(p)	(((unsigned long)(p)) << CSR_NODE_SHIFT)
#define CSR_NODE_MASK		0x0fff		/* 4K nodes */

/* 32K CSR space, b15 indicates geo/non-geo */
#define CSR_OFFSET_MASK	0x7fffUL

/* Global CSR space covers all 4K possible nodes with 64K CSR space per node */
#define NUMACHIP_GCSR_BASE	0x3fff00000000ULL
#define NUMACHIP_GCSR_LIM	0x3fff0fffffffULL
#define NUMACHIP_GCSR_SIZE	(NUMACHIP_GCSR_LIM - NUMACHIP_GCSR_BASE + 1)

/*
 * Local CSR space starts in global CSR space with "nodeid" = 0xfff0, however
 * when using the direct mapping on x86_64, both start and size needs to be
 * aligned with PMD_SIZE which is 2M
 */
#define NUMACHIP_LCSR_BASE	0x3ffffe000000ULL
#define NUMACHIP_LCSR_LIM	0x3fffffffffffULL
#define NUMACHIP_LCSR_SIZE	(NUMACHIP_LCSR_LIM - NUMACHIP_LCSR_BASE + 1)

static inline void *gcsr_address(int node, unsigned long offset)
{
	return __va(NUMACHIP_GCSR_BASE | (1UL << 15) |
		CSR_NODE_BITS(node & CSR_NODE_MASK) | (offset & CSR_OFFSET_MASK));
}

static inline void *lcsr_address(unsigned long offset)
{
	return __va(NUMACHIP_LCSR_BASE | (1UL << 15) |
		CSR_NODE_BITS(0xfff0) | (offset & CSR_OFFSET_MASK));
}

static inline unsigned int read_gcsr(int node, unsigned long offset)
{
	return swab32(readl(gcsr_address(node, offset)));
}

static inline void write_gcsr(int node, unsigned long offset, unsigned int val)
{
	writel(swab32(val), gcsr_address(node, offset));
}

static inline unsigned int read_lcsr(unsigned long offset)
{
	return swab32(readl(lcsr_address(offset)));
}

static inline void write_lcsr(unsigned long offset, unsigned int val)
{
	writel(swab32(val), lcsr_address(offset));
}

/* ========================================================================= */
/*                   CSR_G0_STATE_CLEAR                                      */
/* ========================================================================= */

#define CSR_G0_STATE_CLEAR (0x000 + (0 << 12))
union numachip_csr_g0_state_clear {
	unsigned int v;
	struct numachip_csr_g0_state_clear_s {
		unsigned int _state:2;
		unsigned int _rsvd_2_6:5;
		unsigned int _lost:1;
		unsigned int _rsvd_8_31:24;
	} s;
};

/* ========================================================================= */
/*                   CSR_G0_NODE_IDS                                         */
/* ========================================================================= */

#define CSR_G0_NODE_IDS (0x008 + (0 << 12))
union numachip_csr_g0_node_ids {
	unsigned int v;
	struct numachip_csr_g0_node_ids_s {
		unsigned int _initialid:16;
		unsigned int _nodeid:12;
		unsigned int _rsvd_28_31:4;
	} s;
};

/* ========================================================================= */
/*                   CSR_G3_EXT_IRQ_GEN                                      */
/* ========================================================================= */

#define CSR_G3_EXT_IRQ_GEN (0x030 + (3 << 12))
union numachip_csr_g3_ext_irq_gen {
	unsigned int v;
	struct numachip_csr_g3_ext_irq_gen_s {
		unsigned int _vector:8;
		unsigned int _msgtype:3;
		unsigned int _index:5;
		unsigned int _destination_apic_id:16;
	} s;
};

/* ========================================================================= */
/*                   CSR_G3_EXT_IRQ_STATUS                                   */
/* ========================================================================= */

#define CSR_G3_EXT_IRQ_STATUS (0x034 + (3 << 12))
union numachip_csr_g3_ext_irq_status {
	unsigned int v;
	struct numachip_csr_g3_ext_irq_status_s {
		unsigned int _result:32;
	} s;
};

/* ========================================================================= */
/*                   CSR_G3_EXT_IRQ_DEST                                     */
/* ========================================================================= */

#define CSR_G3_EXT_IRQ_DEST (0x038 + (3 << 12))
union numachip_csr_g3_ext_irq_dest {
	unsigned int v;
	struct numachip_csr_g3_ext_irq_dest_s {
		unsigned int _irq:8;
		unsigned int _rsvd_8_31:24;
	} s;
};

/* ========================================================================= */
/*                   CSR_G3_NC_ATT_MAP_SELECT                                */
/* ========================================================================= */

#define CSR_G3_NC_ATT_MAP_SELECT (0x7fc + (3 << 12))
union numachip_csr_g3_nc_att_map_select {
	unsigned int v;
	struct numachip_csr_g3_nc_att_map_select_s {
		unsigned int _upper_address_bits:4;
		unsigned int _select_ram:4;
		unsigned int _rsvd_8_31:24;
	} s;
};

/* ========================================================================= */
/*                   CSR_G3_NC_ATT_MAP_SELECT_0-255                          */
/* ========================================================================= */

#define CSR_G3_NC_ATT_MAP_SELECT_0 (0x800 + (3 << 12))

#endif /* _ASM_X86_NUMACHIP_NUMACHIP_CSR_H */
+1 −0
Original line number Diff line number Diff line
@@ -10,6 +10,7 @@ obj-$(CONFIG_SMP) += ipi.o

ifeq ($(CONFIG_X86_64),y)
# APIC probe will depend on the listing order here
obj-$(CONFIG_X86_NUMACHIP)	+= apic_numachip.o
obj-$(CONFIG_X86_UV)		+= x2apic_uv_x.o
obj-$(CONFIG_X86_X2APIC)	+= x2apic_phys.o
obj-$(CONFIG_X86_X2APIC)	+= x2apic_cluster.o
+294 −0
Original line number Diff line number Diff line
/*
 * This file is subject to the terms and conditions of the GNU General Public
 * License.  See the file "COPYING" in the main directory of this archive
 * for more details.
 *
 * Numascale NumaConnect-Specific APIC Code
 *
 * Copyright (C) 2011 Numascale AS. All rights reserved.
 *
 * Send feedback to <support@numascale.com>
 *
 */

#include <linux/errno.h>
#include <linux/threads.h>
#include <linux/cpumask.h>
#include <linux/string.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/ctype.h>
#include <linux/init.h>
#include <linux/hardirq.h>
#include <linux/delay.h>

#include <asm/numachip/numachip_csr.h>
#include <asm/smp.h>
#include <asm/apic.h>
#include <asm/ipi.h>
#include <asm/apic_flat_64.h>

static int numachip_system __read_mostly;

static struct apic apic_numachip __read_mostly;

static unsigned int get_apic_id(unsigned long x)
{
	unsigned long value;
	unsigned int id;

	rdmsrl(MSR_FAM10H_NODE_ID, value);
	id = ((x >> 24) & 0xffU) | ((value << 2) & 0x3f00U);

	return id;
}

static unsigned long set_apic_id(unsigned int id)
{
	unsigned long x;

	x = ((id & 0xffU) << 24);
	return x;
}

static unsigned int read_xapic_id(void)
{
	return get_apic_id(apic_read(APIC_ID));
}

static int numachip_apic_id_registered(void)
{
	return physid_isset(read_xapic_id(), phys_cpu_present_map);
}

static int numachip_phys_pkg_id(int initial_apic_id, int index_msb)
{
	return initial_apic_id >> index_msb;
}

static const struct cpumask *numachip_target_cpus(void)
{
	return cpu_online_mask;
}

static void numachip_vector_allocation_domain(int cpu, struct cpumask *retmask)
{
	cpumask_clear(retmask);
	cpumask_set_cpu(cpu, retmask);
}

static int __cpuinit numachip_wakeup_secondary(int phys_apicid, unsigned long start_rip)
{
	union numachip_csr_g3_ext_irq_gen int_gen;

	int_gen.s._destination_apic_id = phys_apicid;
	int_gen.s._vector = 0;
	int_gen.s._msgtype = APIC_DM_INIT >> 8;
	int_gen.s._index = 0;

	write_lcsr(CSR_G3_EXT_IRQ_GEN, int_gen.v);

	int_gen.s._msgtype = APIC_DM_STARTUP >> 8;
	int_gen.s._vector = start_rip >> 12;

	write_lcsr(CSR_G3_EXT_IRQ_GEN, int_gen.v);

	atomic_set(&init_deasserted, 1);
	return 0;
}

static void numachip_send_IPI_one(int cpu, int vector)
{
	union numachip_csr_g3_ext_irq_gen int_gen;
	int apicid = per_cpu(x86_cpu_to_apicid, cpu);

	int_gen.s._destination_apic_id = apicid;
	int_gen.s._vector = vector;
	int_gen.s._msgtype = (vector == NMI_VECTOR ? APIC_DM_NMI : APIC_DM_FIXED) >> 8;
	int_gen.s._index = 0;

	write_lcsr(CSR_G3_EXT_IRQ_GEN, int_gen.v);
}

static void numachip_send_IPI_mask(const struct cpumask *mask, int vector)
{
	unsigned int cpu;

	for_each_cpu(cpu, mask)
		numachip_send_IPI_one(cpu, vector);
}

static void numachip_send_IPI_mask_allbutself(const struct cpumask *mask,
						int vector)
{
	unsigned int this_cpu = smp_processor_id();
	unsigned int cpu;

	for_each_cpu(cpu, mask) {
		if (cpu != this_cpu)
			numachip_send_IPI_one(cpu, vector);
	}
}

static void numachip_send_IPI_allbutself(int vector)
{
	unsigned int this_cpu = smp_processor_id();
	unsigned int cpu;

	for_each_online_cpu(cpu) {
		if (cpu != this_cpu)
			numachip_send_IPI_one(cpu, vector);
	}
}

static void numachip_send_IPI_all(int vector)
{
	numachip_send_IPI_mask(cpu_online_mask, vector);
}

static void numachip_send_IPI_self(int vector)
{
	__default_send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
}

static unsigned int numachip_cpu_mask_to_apicid(const struct cpumask *cpumask)
{
	int cpu;

	/*
	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
	 * May as well be the first.
	 */
	cpu = cpumask_first(cpumask);
	if (likely((unsigned)cpu < nr_cpu_ids))
		return per_cpu(x86_cpu_to_apicid, cpu);

	return BAD_APICID;
}

static unsigned int
numachip_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
				const struct cpumask *andmask)
{
	int cpu;

	/*
	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
	 * May as well be the first.
	 */
	for_each_cpu_and(cpu, cpumask, andmask) {
		if (cpumask_test_cpu(cpu, cpu_online_mask))
			break;
	}
	return per_cpu(x86_cpu_to_apicid, cpu);
}

static int __init numachip_probe(void)
{
	return apic == &apic_numachip;
}

static void __init map_csrs(void)
{
	printk(KERN_INFO "NumaChip: Mapping local CSR space (%016llx - %016llx)\n",
		NUMACHIP_LCSR_BASE, NUMACHIP_LCSR_BASE + NUMACHIP_LCSR_SIZE - 1);
	init_extra_mapping_uc(NUMACHIP_LCSR_BASE, NUMACHIP_LCSR_SIZE);

	printk(KERN_INFO "NumaChip: Mapping global CSR space (%016llx - %016llx)\n",
		NUMACHIP_GCSR_BASE, NUMACHIP_GCSR_BASE + NUMACHIP_GCSR_SIZE - 1);
	init_extra_mapping_uc(NUMACHIP_GCSR_BASE, NUMACHIP_GCSR_SIZE);
}

static void fixup_cpu_id(struct cpuinfo_x86 *c, int node)
{
	c->phys_proc_id = node;
	per_cpu(cpu_llc_id, smp_processor_id()) = node;
}

static int __init numachip_system_init(void)
{
	unsigned int val;

	if (!numachip_system)
		return 0;

	x86_cpuinit.fixup_cpu_id = fixup_cpu_id;

	map_csrs();

	val = read_lcsr(CSR_G0_NODE_IDS);
	printk(KERN_INFO "NumaChip: Local NodeID = %08x\n", val);

	return 0;
}
early_initcall(numachip_system_init);

static int numachip_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
{
	if (!strncmp(oem_id, "NUMASC", 6)) {
		numachip_system = 1;
		return 1;
	}

	return 0;
}

static struct apic apic_numachip __refconst = {

	.name				= "NumaConnect system",
	.probe				= numachip_probe,
	.acpi_madt_oem_check		= numachip_acpi_madt_oem_check,
	.apic_id_registered		= numachip_apic_id_registered,

	.irq_delivery_mode		= dest_Fixed,
	.irq_dest_mode			= 0, /* physical */

	.target_cpus			= numachip_target_cpus,
	.disable_esr			= 0,
	.dest_logical			= 0,
	.check_apicid_used		= NULL,
	.check_apicid_present		= NULL,

	.vector_allocation_domain	= numachip_vector_allocation_domain,
	.init_apic_ldr			= flat_init_apic_ldr,

	.ioapic_phys_id_map		= NULL,
	.setup_apic_routing		= NULL,
	.multi_timer_check		= NULL,
	.cpu_present_to_apicid		= default_cpu_present_to_apicid,
	.apicid_to_cpu_present		= NULL,
	.setup_portio_remap		= NULL,
	.check_phys_apicid_present	= default_check_phys_apicid_present,
	.enable_apic_mode		= NULL,
	.phys_pkg_id			= numachip_phys_pkg_id,
	.mps_oem_check			= NULL,

	.get_apic_id			= get_apic_id,
	.set_apic_id			= set_apic_id,
	.apic_id_mask			= 0xffU << 24,

	.cpu_mask_to_apicid		= numachip_cpu_mask_to_apicid,
	.cpu_mask_to_apicid_and		= numachip_cpu_mask_to_apicid_and,

	.send_IPI_mask			= numachip_send_IPI_mask,
	.send_IPI_mask_allbutself	= numachip_send_IPI_mask_allbutself,
	.send_IPI_allbutself		= numachip_send_IPI_allbutself,
	.send_IPI_all			= numachip_send_IPI_all,
	.send_IPI_self			= numachip_send_IPI_self,

	.wakeup_secondary_cpu		= numachip_wakeup_secondary,
	.trampoline_phys_low		= DEFAULT_TRAMPOLINE_PHYS_LOW,
	.trampoline_phys_high		= DEFAULT_TRAMPOLINE_PHYS_HIGH,
	.wait_for_init_deassert		= NULL,
	.smp_callin_clear_local_apic	= NULL,
	.inquire_remote_apic		= NULL, /* REMRD not supported */

	.read				= native_apic_mem_read,
	.write				= native_apic_mem_write,
	.icr_read			= native_apic_icr_read,
	.icr_write			= native_apic_icr_write,
	.wait_icr_idle			= native_apic_wait_icr_idle,
	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
};
apic_driver(apic_numachip);