Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 64ff3128 authored by Anton Blanchard's avatar Anton Blanchard Committed by Benjamin Herrenschmidt
Browse files

powerpc: Add support for popcnt instructions



POWER5 added popcntb, and POWER7 added popcntw and popcntd. As a first step
this patch does all the work out of line, but it would be nice to implement
them as inlines with an out of line fallback.

The performance issue with hweight was noticed when disabling SMT on a large
(192 thread) POWER7 box. The patch improves that testcase by about 8%.

Signed-off-by: default avatarAnton Blanchard <anton@samba.org>
Signed-off-by: default avatarBenjamin Herrenschmidt <benh@kernel.crashing.org>
parent 72083646
Loading
Loading
Loading
Loading
+9 −0
Original line number Diff line number Diff line
@@ -267,7 +267,16 @@ static __inline__ int fls64(__u64 x)
#include <asm-generic/bitops/fls64.h>
#endif /* __powerpc64__ */

#ifdef CONFIG_PPC64
unsigned int __arch_hweight8(unsigned int w);
unsigned int __arch_hweight16(unsigned int w);
unsigned int __arch_hweight32(unsigned int w);
unsigned long __arch_hweight64(__u64 w);
#include <asm-generic/bitops/const_hweight.h>
#else
#include <asm-generic/bitops/hweight.h>
#endif

#include <asm-generic/bitops/find.h>

/* Little-endian versions */
+6 −3
Original line number Diff line number Diff line
@@ -199,6 +199,8 @@ extern const char *powerpc_base_platform;
#define CPU_FTR_UNALIGNED_LD_STD	LONG_ASM_CONST(0x0080000000000000)
#define CPU_FTR_ASYM_SMT		LONG_ASM_CONST(0x0100000000000000)
#define CPU_FTR_STCX_CHECKS_ADDRESS	LONG_ASM_CONST(0x0200000000000000)
#define CPU_FTR_POPCNTB			LONG_ASM_CONST(0x0400000000000000)
#define CPU_FTR_POPCNTD			LONG_ASM_CONST(0x0800000000000000)

#ifndef __ASSEMBLY__

@@ -403,21 +405,22 @@ extern const char *powerpc_base_platform;
	    CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
	    CPU_FTR_MMCRA | CPU_FTR_SMT | \
	    CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \
	    CPU_FTR_PURR | CPU_FTR_STCX_CHECKS_ADDRESS)
	    CPU_FTR_PURR | CPU_FTR_STCX_CHECKS_ADDRESS | \
	    CPU_FTR_POPCNTB)
#define CPU_FTRS_POWER6 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
	    CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
	    CPU_FTR_MMCRA | CPU_FTR_SMT | \
	    CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \
	    CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
	    CPU_FTR_DSCR | CPU_FTR_UNALIGNED_LD_STD | \
	    CPU_FTR_STCX_CHECKS_ADDRESS)
	    CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB)
#define CPU_FTRS_POWER7 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
	    CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
	    CPU_FTR_MMCRA | CPU_FTR_SMT | \
	    CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \
	    CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
	    CPU_FTR_DSCR | CPU_FTR_SAO  | CPU_FTR_ASYM_SMT | \
	    CPU_FTR_STCX_CHECKS_ADDRESS)
	    CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD)
#define CPU_FTRS_CELL	(CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
	    CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
	    CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \
+7 −0
Original line number Diff line number Diff line
@@ -186,3 +186,10 @@ EXPORT_SYMBOL(__mtdcr);
EXPORT_SYMBOL(__mfdcr);
#endif
EXPORT_SYMBOL(empty_zero_page);

#ifdef CONFIG_PPC64
EXPORT_SYMBOL(__arch_hweight8);
EXPORT_SYMBOL(__arch_hweight16);
EXPORT_SYMBOL(__arch_hweight32);
EXPORT_SYMBOL(__arch_hweight64);
#endif
+1 −1
Original line number Diff line number Diff line
@@ -16,7 +16,7 @@ obj-$(CONFIG_HAS_IOMEM) += devres.o

obj-$(CONFIG_PPC64)	+= copypage_64.o copyuser_64.o \
			   memcpy_64.o usercopy_64.o mem_64.o string.o \
			   checksum_wrappers_64.o
			   checksum_wrappers_64.o hweight_64.o
obj-$(CONFIG_XMON)	+= sstep.o ldstfp.o
obj-$(CONFIG_KPROBES)	+= sstep.o ldstfp.o
obj-$(CONFIG_HAVE_HW_BREAKPOINT)	+= sstep.o ldstfp.o
+110 −0
Original line number Diff line number Diff line
/*
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 *
 * Copyright (C) IBM Corporation, 2010
 *
 * Author: Anton Blanchard <anton@au.ibm.com>
 */
#include <asm/processor.h>
#include <asm/ppc_asm.h>

/* Note: This code relies on -mminimal-toc */

_GLOBAL(__arch_hweight8)
BEGIN_FTR_SECTION
	b .__sw_hweight8
	nop
	nop
FTR_SECTION_ELSE
	popcntb	r3,r3
	clrldi	r3,r3,64-8
	blr
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POPCNTB)

_GLOBAL(__arch_hweight16)
BEGIN_FTR_SECTION
	b .__sw_hweight16
	nop
	nop
	nop
	nop
FTR_SECTION_ELSE
  BEGIN_FTR_SECTION_NESTED(50)
	popcntb r3,r3
	srdi	r4,r3,8
	add	r3,r4,r3
	clrldi	r3,r3,64-8
	blr
  FTR_SECTION_ELSE_NESTED(50)
	clrlwi  r3,r3,16
	popcntw	r3,r3
	clrldi	r3,r3,64-8
	blr
  ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_POPCNTD, 50)
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POPCNTB)

_GLOBAL(__arch_hweight32)
BEGIN_FTR_SECTION
	b .__sw_hweight32
	nop
	nop
	nop
	nop
	nop
	nop
FTR_SECTION_ELSE
  BEGIN_FTR_SECTION_NESTED(51)
	popcntb r3,r3
	srdi	r4,r3,16
	add	r3,r4,r3
	srdi	r4,r3,8
	add	r3,r4,r3
	clrldi	r3,r3,64-8
	blr
  FTR_SECTION_ELSE_NESTED(51)
	popcntw	r3,r3
	clrldi	r3,r3,64-8
	blr
  ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_POPCNTD, 51)
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POPCNTB)

_GLOBAL(__arch_hweight64)
BEGIN_FTR_SECTION
	b .__sw_hweight64
	nop
	nop
	nop
	nop
	nop
	nop
	nop
	nop
FTR_SECTION_ELSE
  BEGIN_FTR_SECTION_NESTED(52)
	popcntb r3,r3
	srdi	r4,r3,32
	add	r3,r4,r3
	srdi	r4,r3,16
	add	r3,r4,r3
	srdi	r4,r3,8
	add	r3,r4,r3
	clrldi	r3,r3,64-8
	blr
  FTR_SECTION_ELSE_NESTED(52)
	popcntd	r3,r3
	clrldi	r3,r3,64-8
	blr
  ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_POPCNTD, 52)
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POPCNTB)