Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 37e96a15 authored by Jason Sams's avatar Jason Sams Committed by Android (Google) Code Review
Browse files

Merge "Migrate thread launch to driver."

parents 1c14bcf7 55d2a254
Loading
Loading
Loading
Loading
+217 −4
Original line number Diff line number Diff line
@@ -55,6 +55,15 @@ struct DrvScript {

};

static Script * setTLS(Script *sc) {
    ScriptTLSStruct * tls = (ScriptTLSStruct *)pthread_getspecific(Context::gThreadTLSKey);
    rsAssert(tls);
    Script *old = tls->mScript;
    tls->mScript = sc;
    return old;
}


// Input: cacheDir
// Input: resName
// Input: extName
@@ -234,13 +243,215 @@ error:

}

typedef struct {
    Context *rsc;
    Script *script;
    const Allocation * ain;
    Allocation * aout;
    const void * usr;

    uint32_t mSliceSize;
    volatile int mSliceNum;

    const uint8_t *ptrIn;
    uint32_t eStrideIn;
    uint8_t *ptrOut;
    uint32_t eStrideOut;

    uint32_t xStart;
    uint32_t xEnd;
    uint32_t yStart;
    uint32_t yEnd;
    uint32_t zStart;
    uint32_t zEnd;
    uint32_t arrayStart;
    uint32_t arrayEnd;

    uint32_t dimX;
    uint32_t dimY;
    uint32_t dimZ;
    uint32_t dimArray;
} MTLaunchStruct;
typedef int (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t);

static void wc_xy(void *usr, uint32_t idx) {
    MTLaunchStruct *mtls = (MTLaunchStruct *)usr;

    while (1) {
        uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum);
        uint32_t yStart = mtls->yStart + slice * mtls->mSliceSize;
        uint32_t yEnd = yStart + mtls->mSliceSize;
        yEnd = rsMin(yEnd, mtls->yEnd);
        if (yEnd <= yStart) {
            return;
        }

        //LOGE("usr idx %i, x %i,%i  y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd);
        //LOGE("usr ptr in %p,  out %p", mtls->ptrIn, mtls->ptrOut);
        for (uint32_t y = yStart; y < yEnd; y++) {
            uint32_t offset = mtls->dimX * y;
            uint8_t *xPtrOut = mtls->ptrOut + (mtls->eStrideOut * offset);
            const uint8_t *xPtrIn = mtls->ptrIn + (mtls->eStrideIn * offset);

            for (uint32_t x = mtls->xStart; x < mtls->xEnd; x++) {
                ((rs_t)mtls->script->mHal.info.root) (xPtrIn, xPtrOut, mtls->usr, x, y, 0, 0);
                xPtrIn += mtls->eStrideIn;
                xPtrOut += mtls->eStrideOut;
            }
        }
    }
}

static void wc_x(void *usr, uint32_t idx) {
    MTLaunchStruct *mtls = (MTLaunchStruct *)usr;

    while (1) {
        uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum);
        uint32_t xStart = mtls->xStart + slice * mtls->mSliceSize;
        uint32_t xEnd = xStart + mtls->mSliceSize;
        xEnd = rsMin(xEnd, mtls->xEnd);
        if (xEnd <= xStart) {
            return;
        }

        //LOGE("usr idx %i, x %i,%i  y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd);
        //LOGE("usr ptr in %p,  out %p", mtls->ptrIn, mtls->ptrOut);
        uint8_t *xPtrOut = mtls->ptrOut + (mtls->eStrideOut * xStart);
        const uint8_t *xPtrIn = mtls->ptrIn + (mtls->eStrideIn * xStart);
        for (uint32_t x = xStart; x < xEnd; x++) {
            ((rs_t)mtls->script->mHal.info.root) (xPtrIn, xPtrOut, mtls->usr, x, 0, 0, 0);
            xPtrIn += mtls->eStrideIn;
            xPtrOut += mtls->eStrideOut;
        }
    }
}

void rsdScriptInvokeForEach(const Context *rsc,
                            Script *s,
                            const Allocation * ain,
                            Allocation * aout,
                            const void * usr,
                            uint32_t usrLen,
                            const RsScriptCall *sc) {

    RsHal * dc = (RsHal *)rsc->mHal.drv;

    MTLaunchStruct mtls;
    memset(&mtls, 0, sizeof(mtls));

    if (ain) {
        mtls.dimX = ain->getType()->getDimX();
        mtls.dimY = ain->getType()->getDimY();
        mtls.dimZ = ain->getType()->getDimZ();
        //mtls.dimArray = ain->getType()->getDimArray();
    } else if (aout) {
        mtls.dimX = aout->getType()->getDimX();
        mtls.dimY = aout->getType()->getDimY();
        mtls.dimZ = aout->getType()->getDimZ();
        //mtls.dimArray = aout->getType()->getDimArray();
    } else {
        rsc->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations");
        return;
    }

    if (!sc || (sc->xEnd == 0)) {
        mtls.xEnd = mtls.dimX;
    } else {
        rsAssert(sc->xStart < mtls.dimX);
        rsAssert(sc->xEnd <= mtls.dimX);
        rsAssert(sc->xStart < sc->xEnd);
        mtls.xStart = rsMin(mtls.dimX, sc->xStart);
        mtls.xEnd = rsMin(mtls.dimX, sc->xEnd);
        if (mtls.xStart >= mtls.xEnd) return;
    }

    if (!sc || (sc->yEnd == 0)) {
        mtls.yEnd = mtls.dimY;
    } else {
        rsAssert(sc->yStart < mtls.dimY);
        rsAssert(sc->yEnd <= mtls.dimY);
        rsAssert(sc->yStart < sc->yEnd);
        mtls.yStart = rsMin(mtls.dimY, sc->yStart);
        mtls.yEnd = rsMin(mtls.dimY, sc->yEnd);
        if (mtls.yStart >= mtls.yEnd) return;
    }

    mtls.xEnd = rsMax((uint32_t)1, mtls.xEnd);
    mtls.yEnd = rsMax((uint32_t)1, mtls.yEnd);
    mtls.zEnd = rsMax((uint32_t)1, mtls.zEnd);
    mtls.arrayEnd = rsMax((uint32_t)1, mtls.arrayEnd);

    rsAssert(ain->getType()->getDimZ() == 0);

    Context *mrsc = (Context *)rsc;
    Script * oldTLS = setTLS(s);

    mtls.rsc = mrsc;
    mtls.ain = ain;
    mtls.aout = aout;
    mtls.script = s;
    mtls.usr = usr;
    mtls.mSliceSize = 10;
    mtls.mSliceNum = 0;

    mtls.ptrIn = NULL;
    mtls.eStrideIn = 0;
    if (ain) {
        mtls.ptrIn = (const uint8_t *)ain->getPtr();
        mtls.eStrideIn = ain->getType()->getElementSizeBytes();
    }

    mtls.ptrOut = NULL;
    mtls.eStrideOut = 0;
    if (aout) {
        mtls.ptrOut = (uint8_t *)aout->getPtr();
        mtls.eStrideOut = aout->getType()->getElementSizeBytes();
    }

    if ((dc->mWorkers.mCount > 1) && s->mHal.info.isThreadable) {
        if (mtls.dimY > 1) {
            rsdLaunchThreads(mrsc, wc_xy, &mtls);
        } else {
            rsdLaunchThreads(mrsc, wc_x, &mtls);
        }

        //LOGE("launch 1");
    } else {
        //LOGE("launch 3");
        for (uint32_t ar = mtls.arrayStart; ar < mtls.arrayEnd; ar++) {
            for (uint32_t z = mtls.zStart; z < mtls.zEnd; z++) {
                for (uint32_t y = mtls.yStart; y < mtls.yEnd; y++) {
                    uint32_t offset = mtls.dimX * mtls.dimY * mtls.dimZ * ar +
                                      mtls.dimX * mtls.dimY * z +
                                      mtls.dimX * y;
                    uint8_t *xPtrOut = mtls.ptrOut + (mtls.eStrideOut * offset);
                    const uint8_t *xPtrIn = mtls.ptrIn + (mtls.eStrideIn * offset);

                    for (uint32_t x = mtls.xStart; x < mtls.xEnd; x++) {
                        ((rs_t)s->mHal.info.root) (xPtrIn, xPtrOut, usr, x, y, z, ar);
                        xPtrIn += mtls.eStrideIn;
                        xPtrOut += mtls.eStrideOut;
                    }
                }
            }
        }
    }

    setTLS(oldTLS);
}


int rsdScriptInvokeRoot(const Context *dc, const Script *script) {
int rsdScriptInvokeRoot(const Context *dc, Script *script) {
    DrvScript *drv = (DrvScript *)script->mHal.drv;
    return drv->mRoot();

    Script * oldTLS = setTLS(script);
    int ret = drv->mRoot();
    setTLS(oldTLS);

    return ret;
}

void rsdScriptInvokeInit(const Context *dc, const Script *script) {
void rsdScriptInvokeInit(const Context *dc, Script *script) {
    DrvScript *drv = (DrvScript *)script->mHal.drv;

    if (drv->mInit) {
@@ -249,15 +460,17 @@ void rsdScriptInvokeInit(const Context *dc, const Script *script) {
}


void rsdScriptInvokeFunction(const Context *dc, const Script *script,
void rsdScriptInvokeFunction(const Context *dc, Script *script,
                            uint32_t slot,
                            const void *params,
                            size_t paramLength) {
    DrvScript *drv = (DrvScript *)script->mHal.drv;
    //LOGE("invoke %p %p %i %p %i", dc, script, slot, params, paramLength);

    Script * oldTLS = setTLS(script);
    ((void (*)(const void *, uint32_t))
        drv->mInvokeFunctions[slot])(params, paramLength);
    setTLS(oldTLS);
}

void rsdScriptSetGlobalVar(const Context *dc, const Script *script,
+12 −3
Original line number Diff line number Diff line
@@ -25,14 +25,23 @@ bool rsdScriptInit(const android::renderscript::Context *, android::renderscript
                   uint8_t const *bitcode, size_t bitcodeSize,
                   uint32_t flags, android::renderscript::RsHalSymbolLookupFunc lookupFunc);
void rsdScriptInvokeFunction(const android::renderscript::Context *dc,
                             const android::renderscript::Script *script,
                             android::renderscript::Script *script,
                             uint32_t slot,
                             const void *params,
                             size_t paramLength);

void rsdScriptInvokeForEach(const android::renderscript::Context *rsc,
                            android::renderscript::Script *s,
                            const android::renderscript::Allocation * ain,
                            android::renderscript::Allocation * aout,
                            const void * usr,
                            uint32_t usrLen,
                            const RsScriptCall *sc);

int rsdScriptInvokeRoot(const android::renderscript::Context *dc,
                        const android::renderscript::Script *script);
                        android::renderscript::Script *script);
void rsdScriptInvokeInit(const android::renderscript::Context *dc,
                         const android::renderscript::Script *script);
                         android::renderscript::Script *script);

void rsdScriptSetGlobalVar(const android::renderscript::Context *,
                           const android::renderscript::Script *,
+135 −4
Original line number Diff line number Diff line
@@ -20,16 +20,29 @@
#include <malloc.h>
#include "rsContext.h"

#include <sys/types.h>
#include <sys/resource.h>
#include <sched.h>
#include <cutils/properties.h>
#include <cutils/sched_policy.h>
#include <sys/syscall.h>
#include <string.h>

using namespace android;
using namespace android::renderscript;

static void Shutdown(Context *rsc);
static void SetPriority(const Context *rsc, int32_t priority);

static RsdHalFunctions FunctionTable = {
    Shutdown,
    NULL,
    NULL,
    SetPriority,
    {
        rsdScriptInit,
        rsdScriptInvokeFunction,
        rsdScriptInvokeRoot,
        rsdScriptInvokeForEach,
        rsdScriptInvokeInit,
        rsdScriptSetGlobalVar,
        rsdScriptSetGlobalBind,
@@ -39,16 +52,134 @@ static RsdHalFunctions FunctionTable = {
};



static void * HelperThreadProc(void *vrsc) {
    Context *rsc = static_cast<Context *>(vrsc);
    RsHal *dc = (RsHal *)rsc->mHal.drv;


    uint32_t idx = (uint32_t)android_atomic_inc(&dc->mWorkers.mLaunchCount);

    //LOGV("RS helperThread starting %p idx=%i", rsc, idx);

    dc->mWorkers.mLaunchSignals[idx].init();
    dc->mWorkers.mNativeThreadId[idx] = gettid();

#if 0
    typedef struct {uint64_t bits[1024 / 64]; } cpu_set_t;
    cpu_set_t cpuset;
    memset(&cpuset, 0, sizeof(cpuset));
    cpuset.bits[idx / 64] |= 1ULL << (idx % 64);
    int ret = syscall(241, rsc->mWorkers.mNativeThreadId[idx],
              sizeof(cpuset), &cpuset);
    LOGE("SETAFFINITY ret = %i %s", ret, EGLUtils::strerror(ret));
#endif

    int status = pthread_setspecific(rsc->gThreadTLSKey, rsc->mTlsStruct);
    if (status) {
        LOGE("pthread_setspecific %i", status);
    }

    while (!dc->mExit) {
        dc->mWorkers.mLaunchSignals[idx].wait();
        if (dc->mWorkers.mLaunchCallback) {
           dc->mWorkers.mLaunchCallback(dc->mWorkers.mLaunchData, idx);
        }
        android_atomic_dec(&dc->mWorkers.mRunningCount);
        dc->mWorkers.mCompleteSignal.set();
    }

    //LOGV("RS helperThread exited %p idx=%i", rsc, idx);
    return NULL;
}

void rsdLaunchThreads(Context *rsc, WorkerCallback_t cbk, void *data) {
    RsHal *dc = (RsHal *)rsc->mHal.drv;

    dc->mWorkers.mLaunchData = data;
    dc->mWorkers.mLaunchCallback = cbk;
    android_atomic_release_store(dc->mWorkers.mCount, &dc->mWorkers.mRunningCount);
    for (uint32_t ct = 0; ct < dc->mWorkers.mCount; ct++) {
        dc->mWorkers.mLaunchSignals[ct].set();
    }
    while (android_atomic_acquire_load(&dc->mWorkers.mRunningCount) != 0) {
        dc->mWorkers.mCompleteSignal.wait();
    }
}

bool rsdHalInit(Context *rsc, uint32_t version_major, uint32_t version_minor) {
    rsc->mHal.funcs = FunctionTable;

    /*
    rsc->mHal.drv = (RsHal *)calloc(1, sizeof(RsHal));
    RsHal *dc = (RsHal *)calloc(1, sizeof(RsHal));
    if (!rsc->mHal.drv) {
        return false;
    }
    */
    rsc->mHal.drv = dc;


    int cpu = sysconf(_SC_NPROCESSORS_ONLN);
    LOGV("RS Launching thread(s), reported CPU count %i", cpu);
    if (cpu < 2) cpu = 0;

    dc->mWorkers.mCount = (uint32_t)cpu;
    dc->mWorkers.mThreadId = (pthread_t *) calloc(dc->mWorkers.mCount, sizeof(pthread_t));
    dc->mWorkers.mNativeThreadId = (pid_t *) calloc(dc->mWorkers.mCount, sizeof(pid_t));
    dc->mWorkers.mLaunchSignals = new Signal[dc->mWorkers.mCount];
    dc->mWorkers.mLaunchCallback = NULL;

    dc->mWorkers.mCompleteSignal.init();

    android_atomic_release_store(dc->mWorkers.mCount, &dc->mWorkers.mRunningCount);
    android_atomic_release_store(0, &dc->mWorkers.mLaunchCount);

    int status;
    pthread_attr_t threadAttr;
    status = pthread_attr_init(&threadAttr);
    if (status) {
        LOGE("Failed to init thread attribute.");
        return false;
    }

    for (uint32_t ct=0; ct < dc->mWorkers.mCount; ct++) {
        status = pthread_create(&dc->mWorkers.mThreadId[ct], &threadAttr, HelperThreadProc, rsc);
        if (status) {
            dc->mWorkers.mCount = ct;
            LOGE("Created fewer than expected number of RS threads.");
            break;
        }
    }
    while (android_atomic_acquire_load(&dc->mWorkers.mRunningCount) != 0) {
        usleep(100);
    }

    pthread_attr_destroy(&threadAttr);
    return true;
}


void SetPriority(const Context *rsc, int32_t priority) {
    RsHal *dc = (RsHal *)rsc->mHal.drv;
    for (uint32_t ct=0; ct < dc->mWorkers.mCount; ct++) {
        setpriority(PRIO_PROCESS, dc->mWorkers.mNativeThreadId[ct], priority);
    }
}

void Shutdown(Context *rsc) {
    RsHal *dc = (RsHal *)rsc->mHal.drv;

    dc->mExit = true;
    dc->mWorkers.mLaunchData = NULL;
    dc->mWorkers.mLaunchCallback = NULL;
    android_atomic_release_store(dc->mWorkers.mCount, &dc->mWorkers.mRunningCount);
    for (uint32_t ct = 0; ct < dc->mWorkers.mCount; ct++) {
        dc->mWorkers.mLaunchSignals[ct].set();
    }
    int status;
    void *res;
    for (uint32_t ct = 0; ct < dc->mWorkers.mCount; ct++) {
        status = pthread_join(dc->mWorkers.mThreadId[ct], &res);
    }
    rsAssert(android_atomic_acquire_load(&dc->mWorkers.mRunningCount) == 0);
}

+21 −13
Original line number Diff line number Diff line
@@ -20,28 +20,36 @@
#include <rs_hal.h>
#include <bcc/bcc.h>

#include "rsMutex.h"
#include "rsSignal.h"


typedef void (* InvokeFunc_t)(void);
typedef void (*WorkerCallback_t)(void *usr, uint32_t idx);

struct RsHalRec {
typedef struct RsHalRec {
    uint32_t version_major;
    uint32_t version_minor;
};

struct RsHalProgramStoreRec {
};

struct RsHalProgramRasterRec {
};

struct RsHalProgramVertexRec {
};

struct RsHalProgramFragmentRec {

    struct Workers {
        volatile int mRunningCount;
        volatile int mLaunchCount;
        uint32_t mCount;
        pthread_t *mThreadId;
        pid_t *mNativeThreadId;
        android::renderscript::Signal mCompleteSignal;

        android::renderscript::Signal *mLaunchSignals;
        WorkerCallback_t mLaunchCallback;
        void *mLaunchData;
    };
    Workers mWorkers;
    bool mExit;
} RsHal;



void rsdLaunchThreads(android::renderscript::Context *rsc, WorkerCallback_t cbk, void *data);

#endif
+4 −86
Original line number Diff line number Diff line
@@ -554,56 +554,6 @@ void Context::destroyWorkerThreadResources() {
    mExit = true;
}

void * Context::helperThreadProc(void *vrsc) {
     Context *rsc = static_cast<Context *>(vrsc);
     uint32_t idx = (uint32_t)android_atomic_inc(&rsc->mWorkers.mLaunchCount);

     //LOGV("RS helperThread starting %p idx=%i", rsc, idx);

     rsc->mWorkers.mLaunchSignals[idx].init();
     rsc->mWorkers.mNativeThreadId[idx] = gettid();

#if 0
     typedef struct {uint64_t bits[1024 / 64]; } cpu_set_t;
     cpu_set_t cpuset;
     memset(&cpuset, 0, sizeof(cpuset));
     cpuset.bits[idx / 64] |= 1ULL << (idx % 64);
     int ret = syscall(241, rsc->mWorkers.mNativeThreadId[idx],
               sizeof(cpuset), &cpuset);
     LOGE("SETAFFINITY ret = %i %s", ret, EGLUtils::strerror(ret));
#endif

     setpriority(PRIO_PROCESS, rsc->mWorkers.mNativeThreadId[idx], rsc->mThreadPriority);
     int status = pthread_setspecific(rsc->gThreadTLSKey, rsc->mTlsStruct);
     if (status) {
         LOGE("pthread_setspecific %i", status);
     }

     while (!rsc->mExit) {
         rsc->mWorkers.mLaunchSignals[idx].wait();
         if (rsc->mWorkers.mLaunchCallback) {
            rsc->mWorkers.mLaunchCallback(rsc->mWorkers.mLaunchData, idx);
         }
         android_atomic_dec(&rsc->mWorkers.mRunningCount);
         rsc->mWorkers.mCompleteSignal.set();
     }

     //LOGV("RS helperThread exited %p idx=%i", rsc, idx);
     return NULL;
}

void Context::launchThreads(WorkerCallback_t cbk, void *data) {
    mWorkers.mLaunchData = data;
    mWorkers.mLaunchCallback = cbk;
    android_atomic_release_store(mWorkers.mCount, &mWorkers.mRunningCount);
    for (uint32_t ct = 0; ct < mWorkers.mCount; ct++) {
        mWorkers.mLaunchSignals[ct].set();
    }
    while (android_atomic_acquire_load(&mWorkers.mRunningCount) != 0) {
        mWorkers.mCompleteSignal.wait();
    }
}

void Context::setPriority(int32_t p) {
    // Note: If we put this in the proper "background" policy
    // the wallpapers can become completly unresponsive at times.
@@ -620,9 +570,6 @@ void Context::setPriority(int32_t p) {
    }
#else
    setpriority(PRIO_PROCESS, mNativeThreadId, p);
    for (uint32_t ct=0; ct < mWorkers.mCount; ct++) {
        setpriority(PRIO_PROCESS, mWorkers.mNativeThreadId[ct], p);
    }
#endif
}

@@ -691,16 +638,8 @@ bool Context::initContext(Device *dev, const RsSurfaceConfig *sc) {
    if (!rsdHalInit(this, 0, 0)) {
        return false;
    }
    mHal.funcs.setPriority(this, mThreadPriority);

    int cpu = sysconf(_SC_NPROCESSORS_ONLN);
    LOGV("RS Launching thread(s), reported CPU count %i", cpu);
    if (cpu < 2) cpu = 0;

    mWorkers.mCount = (uint32_t)cpu;
    mWorkers.mThreadId = (pthread_t *) calloc(mWorkers.mCount, sizeof(pthread_t));
    mWorkers.mNativeThreadId = (pid_t *) calloc(mWorkers.mCount, sizeof(pid_t));
    mWorkers.mLaunchSignals = new Signal[mWorkers.mCount];
    mWorkers.mLaunchCallback = NULL;
    status = pthread_create(&mThreadId, &threadAttr, threadProc, this);
    if (status) {
        LOGE("Failed to start rs context thread.");
@@ -714,20 +653,6 @@ bool Context::initContext(Device *dev, const RsSurfaceConfig *sc) {
        return false;
    }

    mWorkers.mCompleteSignal.init();
    android_atomic_release_store(mWorkers.mCount, &mWorkers.mRunningCount);
    android_atomic_release_store(0, &mWorkers.mLaunchCount);
    for (uint32_t ct=0; ct < mWorkers.mCount; ct++) {
        status = pthread_create(&mWorkers.mThreadId[ct], &threadAttr, helperThreadProc, this);
        if (status) {
            mWorkers.mCount = ct;
            LOGE("Created fewer than expected number of RS threads.");
            break;
        }
    }
    while (android_atomic_acquire_load(&mWorkers.mRunningCount) != 0) {
        usleep(100);
    }
    pthread_attr_destroy(&threadAttr);
    return true;
}
@@ -744,17 +669,10 @@ Context::~Context() {
    mIO.shutdown();
    int status = pthread_join(mThreadId, &res);

    // Cleanup compute threads.
    mWorkers.mLaunchData = NULL;
    mWorkers.mLaunchCallback = NULL;
    android_atomic_release_store(mWorkers.mCount, &mWorkers.mRunningCount);
    for (uint32_t ct = 0; ct < mWorkers.mCount; ct++) {
        mWorkers.mLaunchSignals[ct].set();
    }
    for (uint32_t ct = 0; ct < mWorkers.mCount; ct++) {
        status = pthread_join(mWorkers.mThreadId[ct], &res);

    if (mHal.funcs.shutdownDriver) {
        mHal.funcs.shutdownDriver(this);
    }
    rsAssert(android_atomic_acquire_load(&mWorkers.mRunningCount) == 0);

    // Global structure cleanup.
    pthread_mutex_lock(&gInitMutex);
Loading