Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 49a88c3c authored by Elliott Hughes's avatar Elliott Hughes Committed by Android Git Automerger
Browse files

am 410a1966: am fd382f2e: am fd7eabe4: Merge "Pixelflinger: Add AArch64...

am 410a1966: am fd382f2e: am fd7eabe4: Merge "Pixelflinger: Add AArch64 support to pixelflinger JIT."

* commit '410a1966':
  Pixelflinger: Add AArch64 support to pixelflinger JIT.
parents cec9ac9b 410a1966
Loading
Loading
Loading
Loading
+66 −1
Original line number Diff line number Diff line
@@ -457,6 +457,69 @@ inline int64_t gglMulii(int32_t x, int32_t y) {
    return u.res;
}

#elif defined(__aarch64__)

// inline AArch64 implementations

inline GGLfixed gglMulx(GGLfixed x, GGLfixed y, int shift) CONST;
inline GGLfixed gglMulx(GGLfixed x, GGLfixed y, int shift)
{
    GGLfixed result;
    GGLfixed round;

    asm("mov    %x[round], #1                        \n"
        "lsl    %x[round], %x[round], %x[shift]      \n"
        "lsr    %x[round], %x[round], #1             \n"
        "smaddl %x[result], %w[x], %w[y],%x[round]   \n"
        "lsr    %x[result], %x[result], %x[shift]    \n"
        : [round]"=&r"(round), [result]"=&r"(result) \
        : [x]"r"(x), [y]"r"(y), [shift] "r"(shift)   \
        :
       );
    return result;
}
inline GGLfixed gglMulAddx(GGLfixed x, GGLfixed y, GGLfixed a, int shift) CONST;
inline GGLfixed gglMulAddx(GGLfixed x, GGLfixed y, GGLfixed a, int shift)
{
    GGLfixed result;
    asm("smull  %x[result], %w[x], %w[y]                     \n"
        "lsr    %x[result], %x[result], %x[shift]            \n"
        "add    %w[result], %w[result], %w[a]                \n"
        : [result]"=&r"(result)                               \
        : [x]"r"(x), [y]"r"(y), [a]"r"(a), [shift] "r"(shift) \
        :
        );
    return result;
}

inline GGLfixed gglMulSubx(GGLfixed x, GGLfixed y, GGLfixed a, int shift) CONST;
inline GGLfixed gglMulSubx(GGLfixed x, GGLfixed y, GGLfixed a, int shift)
{

    GGLfixed result;
    int rshift;

    asm("smull  %x[result], %w[x], %w[y]                     \n"
        "lsr    %x[result], %x[result], %x[shift]            \n"
        "sub    %w[result], %w[result], %w[a]                \n"
        : [result]"=&r"(result)                               \
        : [x]"r"(x), [y]"r"(y), [a]"r"(a), [shift] "r"(shift) \
        :
        );
    return result;
}
inline int64_t gglMulii(int32_t x, int32_t y) CONST;
inline int64_t gglMulii(int32_t x, int32_t y)
{
    int64_t res;
    asm("smull  %x0, %w1, %w2 \n"
        : "=r"(res)
        : "%r"(x), "r"(y)
        :
        );
    return res;
}

#else // ----------------------------------------------------------------------

inline GGLfixed gglMulx(GGLfixed a, GGLfixed b, int shift) CONST;
@@ -498,7 +561,7 @@ inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c) {
inline int32_t gglClz(int32_t x) CONST;
inline int32_t gglClz(int32_t x)
{
#if (defined(__arm__) && !defined(__thumb__)) || defined(__mips__)
#if (defined(__arm__) && !defined(__thumb__)) || defined(__mips__) || defined(__aarch64__)
    return __builtin_clz(x);
#else
    if (!x) return 32;
@@ -554,6 +617,8 @@ inline GGLfixed gglClampx(GGLfixed c)
    // clamps to zero in one instruction, but gcc won't generate it and
    // replace it by a cmp + movlt (it's quite amazing actually).
    asm("bic %0, %1, %1, asr #31\n" : "=r"(c) : "r"(c));
#elif defined(__aarch64__)
    asm("bic %w0, %w1, %w1, asr #31\n" : "=r"(c) : "r"(c));
#else
    c &= ~(c>>31);
#endif
+10 −2
Original line number Diff line number Diff line
@@ -9,13 +9,11 @@ include $(CLEAR_VARS)
PIXELFLINGER_SRC_FILES:= \
    codeflinger/ARMAssemblerInterface.cpp \
    codeflinger/ARMAssemblerProxy.cpp \
    codeflinger/ARMAssembler.cpp \
    codeflinger/CodeCache.cpp \
    codeflinger/GGLAssembler.cpp \
    codeflinger/load_store.cpp \
    codeflinger/blending.cpp \
    codeflinger/texturing.cpp \
    codeflinger/disassem.c \
	codeflinger/tinyutils/SharedBuffer.cpp \
	codeflinger/tinyutils/VectorImpl.cpp \
	fixed.cpp.arm \
@@ -39,6 +37,8 @@ endif
endif

ifeq ($(TARGET_ARCH),arm)
PIXELFLINGER_SRC_FILES += codeflinger/ARMAssembler.cpp
PIXELFLINGER_SRC_FILES += codeflinger/disassem.c
# special optimization flags for pixelflinger
PIXELFLINGER_CFLAGS += -fstrict-aliasing -fomit-frame-pointer
endif
@@ -52,6 +52,14 @@ endif

LOCAL_SHARED_LIBRARIES := libcutils liblog

ifeq ($(TARGET_ARCH),aarch64)
PIXELFLINGER_SRC_FILES += arch-aarch64/t32cb16blend.S
PIXELFLINGER_SRC_FILES += arch-aarch64/col32cb16blend.S
PIXELFLINGER_SRC_FILES += codeflinger/Aarch64Assembler.cpp
PIXELFLINGER_SRC_FILES += codeflinger/Aarch64Disassembler.cpp
PIXELFLINGER_CFLAGS += -fstrict-aliasing -fomit-frame-pointer
endif

#
# Shared library
#
+87 −0
Original line number Diff line number Diff line
/*
 * Copyright (C) 2013 The Android Open Source Project
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *  * Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *  * Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */
    .text
    .align

    .global scanline_col32cb16blend_aarch64

//
// This function alpha blends a fixed color into a destination scanline, using
// the formula:
//
//     d = s + (((a + (a >> 7)) * d) >> 8)
//
// where d is the destination pixel,
//       s is the source color,
//       a is the alpha channel of the source color.
//

// x0 = destination buffer pointer
// w1 = color value
// w2 = count


scanline_col32cb16blend_aarch64:

    lsr         w5, w1, #24                     // shift down alpha
    mov         w9, #0xff                       // create mask
    add         w5, w5, w5, lsr #7              // add in top bit
    mov         w4, #256                        // create #0x100
    sub         w5, w4, w5                      // invert alpha
    and         w10, w1, #0xff                  // extract red
    and         w12, w9, w1, lsr #8             // extract green
    and         w4,  w9, w1, lsr #16            // extract blue
    lsl         w10, w10, #5                    // prescale red
    lsl         w12, w12, #6                    // prescale green
    lsl         w4,  w4,  #5                    // prescale blue
    lsr         w9,  w9,  #2                    // create dest green mask

1:
    ldrh        w8, [x0]                        // load dest pixel
    subs        w2, w2, #1                      // decrement loop counter
    lsr         w6, w8, #11                     // extract dest red
    and         w7, w9, w8, lsr #5              // extract dest green
    and         w8, w8, #0x1f                   // extract dest blue

    madd        w6, w6, w5, w10                 // dest red * alpha + src red
    madd        w7, w7, w5, w12                 // dest green * alpha + src green
    madd        w8, w8, w5, w4                  // dest blue * alpha + src blue

    lsr         w6, w6, #8                      // shift down red
    lsr         w7, w7, #8                      // shift down green
    lsl         w6, w6, #11                     // shift red into 565
    orr         w6, w6, w7, lsl #5              // shift green into 565
    orr         w6, w6, w8, lsr #8              // shift blue into 565

    strh        w6, [x0], #2                    // store pixel to dest, update ptr
    b.ne        1b                              // if count != 0, loop

    ret


+213 −0
Original line number Diff line number Diff line
/*
 * Copyright (C) 2013 The Android Open Source Project
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *  * Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *  * Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */
    .text
    .align

    .global scanline_t32cb16blend_aarch64

/*
 * .macro pixel
 *
 *  This macro alpha blends RGB565 original pixel located in either
 *  top or bottom 16 bits of DREG register with SRC 32 bit pixel value
 *  and writes the result to FB register
 *
 * \DREG is a 32-bit register containing *two* original destination RGB565
 *       pixels, with the even one in the low-16 bits, and the odd one in the
 *       high 16 bits.
 *
 * \SRC is a 32-bit 0xAABBGGRR pixel value, with pre-multiplied colors.
 *
 * \FB is a target register that will contain the blended pixel values.
 *
 * \ODD is either 0 or 1 and indicates if we're blending the lower or
 *      upper 16-bit pixels in DREG into FB
 *
 *
 * clobbered: w6, w7, w16, w17, w18
 *
 */

.macro pixel,   DREG, SRC, FB, ODD

    // SRC = 0xAABBGGRR
    lsr     w7, \SRC, #24               // sA
    add     w7, w7, w7, lsr #7          // sA + (sA >> 7)
    mov     w6, #0x100
    sub     w7, w6, w7                  // sA = 0x100 - (sA+(sA>>7))

1:

.if \ODD //Blending odd pixel present in top 16 bits of DREG register

    // red
    lsr     w16, \DREG, #(16 + 11)
    mul     w16, w7, w16
    lsr     w6, \SRC, #3
    and     w6, w6, #0x1F
    add     w16, w6, w16, lsr #8
    cmp     w16, #0x1F
    orr     w17, \FB, #(0x1F<<(16 + 11))
    orr     w18, \FB, w16, lsl #(16 + 11)
    csel    \FB, w17, w18, hi
        // green
        and     w6, \DREG, #(0x3F<<(16 + 5))
        lsr     w17,w6,#(16+5)
        mul     w6, w7, w17
        lsr     w16, \SRC, #(8+2)
        and     w16, w16, #0x3F
        add     w6, w16, w6, lsr #8
        cmp     w6, #0x3F
        orr     w17, \FB, #(0x3F<<(16 + 5))
        orr     w18, \FB, w6, lsl #(16 + 5)
        csel    \FB, w17, w18, hi
            // blue
            and     w16, \DREG, #(0x1F << 16)
            lsr     w17,w16,#16
            mul     w16, w7, w17
            lsr     w6, \SRC, #(8+8+3)
            and     w6, w6, #0x1F
            add     w16, w6, w16, lsr #8
            cmp     w16, #0x1F
            orr     w17, \FB, #(0x1F << 16)
            orr     w18, \FB, w16, lsl #16
            csel    \FB, w17, w18, hi

.else //Blending even pixel present in bottom 16 bits of DREG register

    // red
    lsr     w16, \DREG, #11
    and     w16, w16, #0x1F
    mul     w16, w7, w16
    lsr     w6, \SRC, #3
    and     w6, w6, #0x1F
    add     w16, w6, w16, lsr #8
    cmp     w16, #0x1F
    mov     w17, #(0x1F<<11)
    lsl     w18, w16, #11
    csel    \FB, w17, w18, hi


        // green
        and     w6, \DREG, #(0x3F<<5)
        mul     w6, w7, w6
        lsr     w16, \SRC, #(8+2)
        and     w16, w16, #0x3F
        add     w6, w16, w6, lsr #(5+8)
        cmp     w6, #0x3F
        orr     w17, \FB, #(0x3F<<5)
        orr     w18, \FB, w6, lsl #5
        csel    \FB, w17, w18, hi

            // blue
            and     w16, \DREG, #0x1F
            mul     w16, w7, w16
            lsr     w6, \SRC, #(8+8+3)
            and     w6, w6, #0x1F
            add     w16, w6, w16, lsr #8
            cmp     w16, #0x1F
            orr     w17, \FB, #0x1F
            orr     w18, \FB, w16
            csel    \FB, w17, w18, hi

.endif // End of blending even pixel

.endm // End of pixel macro


// x0:  dst ptr
// x1:  src ptr
// w2:  count
// w3:  d
// w4:  s0
// w5:  s1
// w6:  pixel
// w7:  pixel
// w8:  free
// w9:  free
// w10: free
// w11: free
// w12: scratch
// w14: pixel

scanline_t32cb16blend_aarch64:

    // align DST to 32 bits
    tst     x0, #0x3
    b.eq    aligned
    subs    w2, w2, #1
    b.lo    return

last:
    ldr     w4, [x1], #4
    ldrh    w3, [x0]
    pixel   w3, w4, w12, 0
    strh    w12, [x0], #2

aligned:
    subs    w2, w2, #2
    b.lo    9f

    // The main loop is unrolled twice and processes 4 pixels
8:
    ldp   w4,w5, [x1], #8
    add     x0, x0, #4
    // it's all zero, skip this pixel
    orr     w3, w4, w5
    cbz     w3, 7f

    // load the destination
    ldr     w3, [x0, #-4]
    // stream the destination
    pixel   w3, w4, w12, 0
    pixel   w3, w5, w12, 1
    str     w12, [x0, #-4]

    // 2nd iteration of the loop, don't stream anything
    subs    w2, w2, #2
    csel    w4, w5, w4, lt
    blt     9f
    ldp     w4,w5, [x1], #8
    add     x0, x0, #4
    orr     w3, w4, w5
    cbz     w3, 7f
    ldr     w3, [x0, #-4]
    pixel   w3, w4, w12, 0
    pixel   w3, w5, w12, 1
    str     w12, [x0, #-4]

7:  subs    w2, w2, #2
    bhs     8b
    mov     w4, w5

9:  adds    w2, w2, #1
    b.lo    return
    b       last

return:
    ret
+1 −1
Original line number Diff line number Diff line
@@ -63,7 +63,7 @@ public:
    };

    enum {
        CODEGEN_ARCH_ARM = 1, CODEGEN_ARCH_MIPS
        CODEGEN_ARCH_ARM = 1, CODEGEN_ARCH_MIPS, CODEGEN_ARCH_AARCH64
    };

    // -----------------------------------------------------------------------
Loading