microblaze: Fix __muldi3 function for little-endian. (3370d82f) · Commits · e / devices / android_kernel_xiaomi_markw

arch/microblaze/lib/muldi3.S

deleted100644 → 0

+0 −121

Original line number	Diff line number	Diff line
		#include <linux/linkage.h>

		/*
		* Multiply operation for 64 bit integers, for devices with hard multiply
		* Input : Operand1[H] in Reg r5
		* Operand1[L] in Reg r6
		* Operand2[H] in Reg r7
		* Operand2[L] in Reg r8
		* Output: Result[H] in Reg r3
		* Result[L] in Reg r4
		*
		* Explaination:
		*
		* Both the input numbers are divided into 16 bit number as follows
		* op1 = A B C D
		* op2 = E F G H
		* result = D * H
		* + (C * H + D * G) << 16
		* + (B * H + C * G + D * F) << 32
		* + (A * H + B * G + C * F + D * E) << 48
		*
		* Only 64 bits of the output are considered
		*/

		.text
		.globl __muldi3
		.type __muldi3, @function
		.ent __muldi3

		__muldi3:
		addi r1, r1, -40

		/* Save the input operands on the caller's stack */
		swi r5, r1, 44
		swi r6, r1, 48
		swi r7, r1, 52
		swi r8, r1, 56

		/* Store all the callee saved registers */
		sw r20, r1, r0
		swi r21, r1, 4
		swi r22, r1, 8
		swi r23, r1, 12
		swi r24, r1, 16
		swi r25, r1, 20
		swi r26, r1, 24
		swi r27, r1, 28

		/* Load all the 16 bit values for A thru H */
		lhui r20, r1, 44 /* A */
		lhui r21, r1, 46 /* B */
		lhui r22, r1, 48 /* C */
		lhui r23, r1, 50 /* D */
		lhui r24, r1, 52 /* E */
		lhui r25, r1, 54 /* F */
		lhui r26, r1, 56 /* G */
		lhui r27, r1, 58 /* H */

		/* D * H ==> LSB of the result on stack ==> Store1 */
		mul r9, r23, r27
		swi r9, r1, 36 /* Pos2 and Pos3 */

		/* Hi (Store1) + C * H + D * G ==> Store2 ==> Pos1 and Pos2 */
		/* Store the carry generated in position 2 for Pos 3 */
		lhui r11, r1, 36 /* Pos2 */
		mul r9, r22, r27 /* C * H */
		mul r10, r23, r26 /* D * G */
		add r9, r9, r10
		addc r12, r0, r0
		add r9, r9, r11
		addc r12, r12, r0 /* Store the Carry */
		shi r9, r1, 36 /* Store Pos2 */
		swi r9, r1, 32
		lhui r11, r1, 32
		shi r11, r1, 34 /* Store Pos1 */

		/* Hi (Store2) + B * H + C * G + D * F ==> Store3 ==> Pos0 and Pos1 */
		mul r9, r21, r27 /* B * H */
		mul r10, r22, r26 /* C * G */
		mul r7, r23, r25 /* D * F */
		add r9, r9, r11
		add r9, r9, r10
		add r9, r9, r7
		swi r9, r1, 32 /* Pos0 and Pos1 */

		/* Hi (Store3) + A * H + B * G + C * F + D * E ==> Store3 ==> Pos0 */
		lhui r11, r1, 32 /* Pos0 */
		mul r9, r20, r27 /* A * H */
		mul r10, r21, r26 /* B * G */
		mul r7, r22, r25 /* C * F */
		mul r8, r23, r24 /* D * E */
		add r9, r9, r11
		add r9, r9, r10
		add r9, r9, r7
		add r9, r9, r8
		sext16 r9, r9 /* Sign extend the MSB */
		shi r9, r1, 32

		/* Move results to r3 and r4 */
		lhui r3, r1, 32
		add r3, r3, r12
		shi r3, r1, 32
		lwi r3, r1, 32 /* Hi Part */
		lwi r4, r1, 36 /* Lo Part */

		/* Restore Callee saved registers */
		lw r20, r1, r0
		lwi r21, r1, 4
		lwi r22, r1, 8
		lwi r23, r1, 12
		lwi r24, r1, 16
		lwi r25, r1, 20
		lwi r26, r1, 24
		lwi r27, r1, 28

		/* Restore Frame and return */
		rtsd r15, 8
		addi r1, r1, 40

		.size __muldi3, . - __muldi3
		.end __muldi3

arch/microblaze/lib/muldi3.c

0 → 100644

+60 −0

Original line number	Diff line number	Diff line
		#include <linux/module.h>

		#include "libgcc.h"

		#define DWtype long long
		#define UWtype unsigned long
		#define UHWtype unsigned short

		#define W_TYPE_SIZE 32

		#define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
		#define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
		#define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))

		/* If we still don't have umul_ppmm, define it using plain C. */
		#if !defined(umul_ppmm)
		#define umul_ppmm(w1, w0, u, v) \
		do { \
		UWtype __x0, __x1, __x2, __x3; \
		UHWtype __ul, __vl, __uh, __vh; \
		\
		__ul = __ll_lowpart(u); \
		__uh = __ll_highpart(u); \
		__vl = __ll_lowpart(v); \
		__vh = __ll_highpart(v); \
		\
		__x0 = (UWtype) __ul * __vl; \
		__x1 = (UWtype) __ul * __vh; \
		__x2 = (UWtype) __uh * __vl; \
		__x3 = (UWtype) __uh * __vh; \
		\
		__x1 += __ll_highpart(__x0); /* this can't give carry */\
		__x1 += __x2; /* but this indeed can */ \
		if (__x1 < __x2) /* did we get it? */ \
		__x3 += __ll_B; /* yes, add it in the proper pos */ \
		\
		(w1) = __x3 + __ll_highpart(__x1); \
		(w0) = __ll_lowpart(__x1) * __ll_B + __ll_lowpart(__x0);\
		} while (0)
		#endif

		#if !defined(__umulsidi3)
		#define __umulsidi3(u, v) ({ \
		DWunion __w; \
		umul_ppmm(__w.s.high, __w.s.low, u, v); \
		__w.ll; \
		})
		#endif

		DWtype __muldi3(DWtype u, DWtype v)
		{
		const DWunion uu = {.ll = u};
		const DWunion vv = {.ll = v};
		DWunion w = {.ll = __umulsidi3(uu.s.low, vv.s.low)};

		w.s.high += ((UWtype) uu.s.low * (UWtype) vv.s.high
		+ (UWtype) uu.s.high * (UWtype) vv.s.low);

		return w.ll;
		}