Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 3370d82f authored by Michal Simek's avatar Michal Simek
Browse files

microblaze: Fix __muldi3 function for little-endian.



__muldi3 was written for big endian platforms.
Code contained half word read/write instructions which
are not compatible with little endian cpu.
Asm __muldi3 implementation is replaced by C version.

Signed-off-by: default avatarMichal Simek <monstr@monstr.eu>
parent 17b93146
Loading
Loading
Loading
Loading

arch/microblaze/lib/muldi3.S

deleted100644 → 0
+0 −121
Original line number Diff line number Diff line
#include <linux/linkage.h>

/*
 * Multiply operation for 64 bit integers, for devices with hard multiply
 *	Input :	Operand1[H] in Reg r5
 *		Operand1[L] in Reg r6
 *		Operand2[H] in Reg r7
 *		Operand2[L] in Reg r8
 *	Output: Result[H] in Reg r3
 *		Result[L] in Reg r4
 *
 * Explaination:
 *
 *	Both the input numbers are divided into 16 bit number as follows
 *		op1 = A B C D
 *		op2 = E F G H
 *	result = D * H
 *		 + (C * H + D * G) << 16
 *		 + (B * H + C * G + D * F) << 32
 *		 + (A * H + B * G + C * F + D * E) << 48
 *
 *	Only 64 bits of the output are considered
 */

	.text
	.globl	__muldi3
	.type __muldi3, @function
	.ent __muldi3

__muldi3:
	addi	r1, r1, -40

/* Save the input operands on the caller's stack */
	swi	r5, r1, 44
	swi	r6, r1, 48
	swi	r7, r1, 52
	swi	r8, r1, 56

/* Store all the callee saved registers */
	sw	r20, r1, r0
	swi	r21, r1, 4
	swi	r22, r1, 8
	swi	r23, r1, 12
	swi	r24, r1, 16
	swi	r25, r1, 20
	swi	r26, r1, 24
	swi	r27, r1, 28

/* Load all the 16 bit values for A thru H */
	lhui	r20, r1, 44 /* A */
	lhui	r21, r1, 46 /* B */
	lhui	r22, r1, 48 /* C */
	lhui	r23, r1, 50 /* D */
	lhui	r24, r1, 52 /* E */
	lhui	r25, r1, 54 /* F */
	lhui	r26, r1, 56 /* G */
	lhui	r27, r1, 58 /* H */

/* D * H ==> LSB of the result on stack ==> Store1 */
	mul	r9, r23, r27
	swi	r9, r1, 36 /* Pos2 and Pos3 */

/* Hi (Store1) + C * H + D * G ==> Store2 ==> Pos1 and Pos2 */
/* Store the carry generated in position 2 for Pos 3 */
	lhui	r11, r1, 36 /* Pos2 */
	mul	r9, r22, r27 /* C * H */
	mul	r10, r23, r26 /* D * G */
	add	r9, r9, r10
	addc	r12, r0, r0
	add	r9, r9, r11
	addc	r12, r12, r0 /* Store the Carry */
	shi	r9, r1, 36 /* Store Pos2 */
	swi	r9, r1, 32
	lhui	r11, r1, 32
	shi	r11, r1, 34 /* Store Pos1 */

/* Hi (Store2) + B * H + C * G + D * F ==> Store3 ==> Pos0 and Pos1 */
	mul	r9, r21, r27 /* B * H */
	mul	r10, r22, r26 /* C * G */
	mul	r7, r23, r25 /* D * F */
	add	r9, r9, r11
	add	r9, r9, r10
	add	r9, r9, r7
	swi	r9, r1, 32 /* Pos0 and Pos1 */

/* Hi (Store3) + A * H + B * G + C * F + D * E ==> Store3 ==> Pos0 */
	lhui	r11, r1, 32 /* Pos0 */
	mul	r9, r20, r27 /* A * H */
	mul	r10, r21, r26 /* B * G */
	mul	r7, r22, r25 /* C * F */
	mul	r8, r23, r24 /* D * E */
	add	r9, r9, r11
	add 	r9, r9, r10
	add	r9, r9, r7
	add	r9, r9, r8
	sext16	r9, r9 /* Sign extend the MSB */
	shi	r9, r1, 32

/* Move results to r3 and r4 */
	lhui	r3, r1, 32
	add	r3, r3, r12
	shi	r3, r1, 32
	lwi	r3, r1, 32 /* Hi Part */
	lwi	r4, r1, 36 /* Lo Part */

/* Restore Callee saved registers */
	lw	r20, r1, r0
	lwi	r21, r1, 4
	lwi	r22, r1, 8
	lwi	r23, r1, 12
	lwi	r24, r1, 16
	lwi	r25, r1, 20
	lwi	r26, r1, 24
	lwi	r27, r1, 28

/* Restore Frame and return */
	rtsd	r15, 8
	addi	r1, r1, 40

.size __muldi3, . - __muldi3
.end __muldi3
+60 −0
Original line number Diff line number Diff line
#include <linux/module.h>

#include "libgcc.h"

#define DWtype long long
#define UWtype unsigned long
#define UHWtype unsigned short

#define W_TYPE_SIZE 32

#define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
#define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
#define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))

/* If we still don't have umul_ppmm, define it using plain C.  */
#if !defined(umul_ppmm)
#define umul_ppmm(w1, w0, u, v)						\
	do {								\
		UWtype __x0, __x1, __x2, __x3;				\
		UHWtype __ul, __vl, __uh, __vh;				\
									\
		__ul = __ll_lowpart(u);					\
		__uh = __ll_highpart(u);				\
		__vl = __ll_lowpart(v);					\
		__vh = __ll_highpart(v);				\
									\
		__x0 = (UWtype) __ul * __vl;				\
		__x1 = (UWtype) __ul * __vh;				\
		__x2 = (UWtype) __uh * __vl;				\
		__x3 = (UWtype) __uh * __vh;				\
									\
		__x1 += __ll_highpart(__x0); /* this can't give carry */\
		__x1 += __x2; /* but this indeed can */			\
		if (__x1 < __x2) /* did we get it? */			\
		__x3 += __ll_B; /* yes, add it in the proper pos */	\
									\
		(w1) = __x3 + __ll_highpart(__x1);			\
		(w0) = __ll_lowpart(__x1) * __ll_B + __ll_lowpart(__x0);\
	} while (0)
#endif

#if !defined(__umulsidi3)
#define __umulsidi3(u, v) ({				\
	DWunion __w;					\
	umul_ppmm(__w.s.high, __w.s.low, u, v);		\
	__w.ll;						\
	})
#endif

DWtype __muldi3(DWtype u, DWtype v)
{
	const DWunion uu = {.ll = u};
	const DWunion vv = {.ll = v};
	DWunion w = {.ll = __umulsidi3(uu.s.low, vv.s.low)};

	w.s.high += ((UWtype) uu.s.low * (UWtype) vv.s.high
		+ (UWtype) uu.s.high * (UWtype) vv.s.low);

	return w.ll;
}