Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 5e1a6462 authored by Mikulas Patocka's avatar Mikulas Patocka Committed by Herbert Xu
Browse files

crypto: crc32-pclmul - remove useless relative addressing



In 32-bit mode, the x86 architecture can hold full 32-bit pointers.
Therefore, the code that copies the current address to the %ecx register
and uses %ecx-relative addressing is useless, we could just use absolute
addressing.

The processors have a stack of return addresses for branch prediction. If
we use a call instruction and pop the return address, it desynchronizes
the return stack and causes branch prediction misses.

This patch also moves the data to the .rodata section.

Signed-off-by: default avatarMikulas Patocka <mpatocka@redhat.com>
Signed-off-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>
parent c07f7c29
Loading
Loading
Loading
Loading
+6 −11
Original line number Original line Diff line number Diff line
@@ -41,6 +41,7 @@
#include <asm/inst.h>
#include <asm/inst.h>




.section .rodata
.align 16
.align 16
/*
/*
 * [x4*128+32 mod P(x) << 32)]'  << 1   = 0x154442bd4
 * [x4*128+32 mod P(x) << 32)]'  << 1   = 0x154442bd4
@@ -111,19 +112,13 @@ ENTRY(crc32_pclmul_le_16) /* buffer and buffer size are 16 bytes aligned */
	pxor    CONSTANT, %xmm1
	pxor    CONSTANT, %xmm1
	sub     $0x40, LEN
	sub     $0x40, LEN
	add     $0x40, BUF
	add     $0x40, BUF
#ifndef __x86_64__
	/* This is for position independent code(-fPIC) support for 32bit */
	call    delta
delta:
	pop     %ecx
#endif
	cmp     $0x40, LEN
	cmp     $0x40, LEN
	jb      less_64
	jb      less_64


#ifdef __x86_64__
#ifdef __x86_64__
	movdqa .Lconstant_R2R1(%rip), CONSTANT
	movdqa .Lconstant_R2R1(%rip), CONSTANT
#else
#else
	movdqa .Lconstant_R2R1 - delta(%ecx), CONSTANT
	movdqa .Lconstant_R2R1, CONSTANT
#endif
#endif


loop_64:/*  64 bytes Full cache line folding */
loop_64:/*  64 bytes Full cache line folding */
@@ -172,7 +167,7 @@ less_64:/* Folding cache line into 128bit */
#ifdef __x86_64__
#ifdef __x86_64__
	movdqa  .Lconstant_R4R3(%rip), CONSTANT
	movdqa  .Lconstant_R4R3(%rip), CONSTANT
#else
#else
	movdqa  .Lconstant_R4R3 - delta(%ecx), CONSTANT
	movdqa  .Lconstant_R4R3, CONSTANT
#endif
#endif
	prefetchnta     (BUF)
	prefetchnta     (BUF)


@@ -220,8 +215,8 @@ fold_64:
	movdqa  .Lconstant_R5(%rip), CONSTANT
	movdqa  .Lconstant_R5(%rip), CONSTANT
	movdqa  .Lconstant_mask32(%rip), %xmm3
	movdqa  .Lconstant_mask32(%rip), %xmm3
#else
#else
	movdqa  .Lconstant_R5 - delta(%ecx), CONSTANT
	movdqa  .Lconstant_R5, CONSTANT
	movdqa  .Lconstant_mask32 - delta(%ecx), %xmm3
	movdqa  .Lconstant_mask32, %xmm3
#endif
#endif
	psrldq  $0x04, %xmm2
	psrldq  $0x04, %xmm2
	pand    %xmm3, %xmm1
	pand    %xmm3, %xmm1
@@ -232,7 +227,7 @@ fold_64:
#ifdef __x86_64__
#ifdef __x86_64__
	movdqa  .Lconstant_RUpoly(%rip), CONSTANT
	movdqa  .Lconstant_RUpoly(%rip), CONSTANT
#else
#else
	movdqa  .Lconstant_RUpoly - delta(%ecx), CONSTANT
	movdqa  .Lconstant_RUpoly, CONSTANT
#endif
#endif
	movdqa  %xmm1, %xmm2
	movdqa  %xmm1, %xmm2
	pand    %xmm3, %xmm1
	pand    %xmm3, %xmm1