Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 7d11965d authored by Ard Biesheuvel's avatar Ard Biesheuvel
Browse files

lib/raid6: add ARM-NEON accelerated syndrome calculation



Rebased/reworked a patch contributed by Rob Herring that uses
NEON intrinsics to perform the RAID-6 syndrome calculations.
It uses the existing unroll.awk code to generate several
unrolled versions of which the best performing one is selected
at boot time.

Signed-off-by: default avatarArd Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: default avatarNicolas Pitre <nico@linaro.org>
Cc: hpa@linux.intel.com
parent 01956597
Loading
Loading
Loading
Loading
+5 −0
Original line number Diff line number Diff line
@@ -114,6 +114,11 @@ extern const struct raid6_recov_calls raid6_recov_intx1;
extern const struct raid6_recov_calls raid6_recov_ssse3;
extern const struct raid6_recov_calls raid6_recov_avx2;

extern const struct raid6_calls raid6_neonx1;
extern const struct raid6_calls raid6_neonx2;
extern const struct raid6_calls raid6_neonx4;
extern const struct raid6_calls raid6_neonx8;

/* Algorithm list */
extern const struct raid6_calls * const raid6_algos[];
extern const struct raid6_recov_calls *const raid6_recov_algos[];
+1 −0
Original line number Diff line number Diff line
@@ -2,3 +2,4 @@ mktables
altivec*.c
int*.c
tables.c
neon?.c
+40 −0
Original line number Diff line number Diff line
@@ -5,6 +5,7 @@ raid6_pq-y += algos.o recov.o tables.o int1.o int2.o int4.o \

raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o avx2.o
raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o
raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o

hostprogs-y	+= mktables

@@ -16,6 +17,21 @@ ifeq ($(CONFIG_ALTIVEC),y)
altivec_flags := -maltivec -mabi=altivec
endif

# The GCC option -ffreestanding is required in order to compile code containing
# ARM/NEON intrinsics in a non C99-compliant environment (such as the kernel)
ifeq ($(CONFIG_KERNEL_MODE_NEON),y)
NEON_FLAGS := -ffreestanding
ifeq ($(ARCH),arm)
NEON_FLAGS += -mfloat-abi=softfp -mfpu=neon
endif
ifeq ($(ARCH),arm64)
CFLAGS_REMOVE_neon1.o += -mgeneral-regs-only
CFLAGS_REMOVE_neon2.o += -mgeneral-regs-only
CFLAGS_REMOVE_neon4.o += -mgeneral-regs-only
CFLAGS_REMOVE_neon8.o += -mgeneral-regs-only
endif
endif

targets += int1.c
$(obj)/int1.c:   UNROLL := 1
$(obj)/int1.c:   $(src)/int.uc $(src)/unroll.awk FORCE
@@ -70,6 +86,30 @@ $(obj)/altivec8.c: UNROLL := 8
$(obj)/altivec8.c:   $(src)/altivec.uc $(src)/unroll.awk FORCE
	$(call if_changed,unroll)

CFLAGS_neon1.o += $(NEON_FLAGS)
targets += neon1.c
$(obj)/neon1.c:   UNROLL := 1
$(obj)/neon1.c:   $(src)/neon.uc $(src)/unroll.awk FORCE
	$(call if_changed,unroll)

CFLAGS_neon2.o += $(NEON_FLAGS)
targets += neon2.c
$(obj)/neon2.c:   UNROLL := 2
$(obj)/neon2.c:   $(src)/neon.uc $(src)/unroll.awk FORCE
	$(call if_changed,unroll)

CFLAGS_neon4.o += $(NEON_FLAGS)
targets += neon4.c
$(obj)/neon4.c:   UNROLL := 4
$(obj)/neon4.c:   $(src)/neon.uc $(src)/unroll.awk FORCE
	$(call if_changed,unroll)

CFLAGS_neon8.o += $(NEON_FLAGS)
targets += neon8.c
$(obj)/neon8.c:   UNROLL := 8
$(obj)/neon8.c:   $(src)/neon.uc $(src)/unroll.awk FORCE
	$(call if_changed,unroll)

quiet_cmd_mktable = TABLE   $@
      cmd_mktable = $(obj)/mktables > $@ || ( rm -f $@ && exit 1 )

+6 −0
Original line number Diff line number Diff line
@@ -70,6 +70,12 @@ const struct raid6_calls * const raid6_algos[] = {
	&raid6_intx2,
	&raid6_intx4,
	&raid6_intx8,
#ifdef CONFIG_KERNEL_MODE_NEON
	&raid6_neonx1,
	&raid6_neonx2,
	&raid6_neonx4,
	&raid6_neonx8,
#endif
	NULL
};

lib/raid6/neon.c

0 → 100644
+58 −0
Original line number Diff line number Diff line
/*
 * linux/lib/raid6/neon.c - RAID6 syndrome calculation using ARM NEON intrinsics
 *
 * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */

#include <linux/raid/pq.h>

#ifdef __KERNEL__
#include <asm/neon.h>
#else
#define kernel_neon_begin()
#define kernel_neon_end()
#define cpu_has_neon()		(1)
#endif

/*
 * There are 2 reasons these wrappers are kept in a separate compilation unit
 * from the actual implementations in neonN.c (generated from neon.uc by
 * unroll.awk):
 * - the actual implementations use NEON intrinsics, and the GCC support header
 *   (arm_neon.h) is not fully compatible (type wise) with the kernel;
 * - the neonN.c files are compiled with -mfpu=neon and optimization enabled,
 *   and we have to make sure that we never use *any* NEON/VFP instructions
 *   outside a kernel_neon_begin()/kernel_neon_end() pair.
 */

#define RAID6_NEON_WRAPPER(_n)						\
	static void raid6_neon ## _n ## _gen_syndrome(int disks,	\
					size_t bytes, void **ptrs)	\
	{								\
		void raid6_neon ## _n  ## _gen_syndrome_real(int,	\
						unsigned long, void**);	\
		kernel_neon_begin();					\
		raid6_neon ## _n ## _gen_syndrome_real(disks,		\
					(unsigned long)bytes, ptrs);	\
		kernel_neon_end();					\
	}								\
	struct raid6_calls const raid6_neonx ## _n = {			\
		raid6_neon ## _n ## _gen_syndrome,			\
		raid6_have_neon,					\
		"neonx" #_n,						\
		0							\
	}

static int raid6_have_neon(void)
{
	return cpu_has_neon();
}

RAID6_NEON_WRAPPER(1);
RAID6_NEON_WRAPPER(2);
RAID6_NEON_WRAPPER(4);
RAID6_NEON_WRAPPER(8);
Loading