Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 751ba79c authored by Matt Brown's avatar Matt Brown Committed by Michael Ellerman
Browse files

lib/raid6/altivec: Add vpermxor implementation for raid6 Q syndrome



This patch uses the vpermxor instruction to optimise the raid6 Q
syndrome. This instruction was made available with POWER8, ISA version
2.07. It allows for both vperm and vxor instructions to be done in a
single instruction. This has been tested for correctness on a ppc64le
vm with a basic RAID6 setup containing 5 drives.

The performance benchmarks are from the raid6test in the
/lib/raid6/test directory. These results are from an IBM Firestone
machine with ppc64le architecture. The benchmark results show a 35%
speed increase over the best existing algorithm for powerpc (altivec).
The raid6test has also been run on a big-endian ppc64 vm to ensure it
also works for big-endian architectures.

Performance benchmarks:
  raid6: altivecx4 gen() 18773 MB/s
  raid6: altivecx8 gen() 19438 MB/s

  raid6: vpermxor4 gen() 25112 MB/s
  raid6: vpermxor8 gen() 26279 MB/s

Signed-off-by: default avatarMatt Brown <matthew.brown.dev@gmail.com>
Reviewed-by: default avatarDaniel Axtens <dja@axtens.net>
[mpe: Add VPERMXOR macro so we can build with old binutils]
Signed-off-by: default avatarMichael Ellerman <mpe@ellerman.id.au>
parent 7004263b
Loading
Loading
Loading
Loading
+6 −0
Original line number Diff line number Diff line
@@ -271,6 +271,7 @@
#define PPC_INST_TLBSRX_DOT		0x7c0006a5
#define PPC_INST_VPMSUMW		0x10000488
#define PPC_INST_VPMSUMD		0x100004c8
#define PPC_INST_VPERMXOR		0x1000002d
#define PPC_INST_XXLOR			0xf0000490
#define PPC_INST_XXSWAPD		0xf0000250
#define PPC_INST_XVCPSGNDP		0xf0000780
@@ -517,6 +518,11 @@
#define XVCPSGNDP(t, a, b)	stringify_in_c(.long (PPC_INST_XVCPSGNDP | \
					       VSX_XX3((t), (a), (b))))

#define VPERMXOR(vrt, vra, vrb, vrc)				\
	stringify_in_c(.long (PPC_INST_VPERMXOR |		\
			      ___PPC_RT(vrt) | ___PPC_RA(vra) | \
			      ___PPC_RB(vrb) | (((vrc) & 0x1f) << 6)))

#define PPC_NAP			stringify_in_c(.long PPC_INST_NAP)
#define PPC_SLEEP		stringify_in_c(.long PPC_INST_SLEEP)
#define PPC_WINKLE		stringify_in_c(.long PPC_INST_WINKLE)
+4 −0
Original line number Diff line number Diff line
@@ -107,6 +107,10 @@ extern const struct raid6_calls raid6_avx512x2;
extern const struct raid6_calls raid6_avx512x4;
extern const struct raid6_calls raid6_tilegx8;
extern const struct raid6_calls raid6_s390vx8;
extern const struct raid6_calls raid6_vpermxor1;
extern const struct raid6_calls raid6_vpermxor2;
extern const struct raid6_calls raid6_vpermxor4;
extern const struct raid6_calls raid6_vpermxor8;

struct raid6_recov_calls {
	void (*data2)(int, size_t, int, int, void **);
+1 −0
Original line number Diff line number Diff line
@@ -4,3 +4,4 @@ int*.c
tables.c
neon?.c
s390vx?.c
vpermxor*.c
+26 −1
Original line number Diff line number Diff line
@@ -5,7 +5,8 @@ raid6_pq-y += algos.o recov.o tables.o int1.o int2.o int4.o \
		   int8.o int16.o int32.o

raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o avx2.o avx512.o recov_avx512.o
raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o
raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o \
                              vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o
raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o recov_neon.o recov_neon_inner.o
raid6_pq-$(CONFIG_TILEGX) += tilegx8.o
raid6_pq-$(CONFIG_S390) += s390vx8.o recov_s390xc.o
@@ -91,6 +92,30 @@ $(obj)/altivec8.c: UNROLL := 8
$(obj)/altivec8.c:   $(src)/altivec.uc $(src)/unroll.awk FORCE
	$(call if_changed,unroll)

CFLAGS_vpermxor1.o += $(altivec_flags)
targets += vpermxor1.c
$(obj)/vpermxor1.c: UNROLL := 1
$(obj)/vpermxor1.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
	$(call if_changed,unroll)

CFLAGS_vpermxor2.o += $(altivec_flags)
targets += vpermxor2.c
$(obj)/vpermxor2.c: UNROLL := 2
$(obj)/vpermxor2.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
	$(call if_changed,unroll)

CFLAGS_vpermxor4.o += $(altivec_flags)
targets += vpermxor4.c
$(obj)/vpermxor4.c: UNROLL := 4
$(obj)/vpermxor4.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
	$(call if_changed,unroll)

CFLAGS_vpermxor8.o += $(altivec_flags)
targets += vpermxor8.c
$(obj)/vpermxor8.c: UNROLL := 8
$(obj)/vpermxor8.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
	$(call if_changed,unroll)

CFLAGS_neon1.o += $(NEON_FLAGS)
targets += neon1.c
$(obj)/neon1.c:   UNROLL := 1
+4 −0
Original line number Diff line number Diff line
@@ -74,6 +74,10 @@ const struct raid6_calls * const raid6_algos[] = {
	&raid6_altivec2,
	&raid6_altivec4,
	&raid6_altivec8,
	&raid6_vpermxor1,
	&raid6_vpermxor2,
	&raid6_vpermxor4,
	&raid6_vpermxor8,
#endif
#if defined(CONFIG_TILEGX)
	&raid6_tilegx8,
Loading