Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 55d2a254 authored by Jason Sams's avatar Jason Sams
Browse files

Migrate thread launch to driver.

Change-Id: If182c524cceb327547640f22f956856d291d1787
parent e4a06c5f
Loading
Loading
Loading
Loading
+217 −4
Original line number Diff line number Diff line
@@ -55,6 +55,15 @@ struct DrvScript {

};

static Script * setTLS(Script *sc) {
    ScriptTLSStruct * tls = (ScriptTLSStruct *)pthread_getspecific(Context::gThreadTLSKey);
    rsAssert(tls);
    Script *old = tls->mScript;
    tls->mScript = sc;
    return old;
}


// Input: cacheDir
// Input: resName
// Input: extName
@@ -234,13 +243,215 @@ error:

}

typedef struct {
    Context *rsc;
    Script *script;
    const Allocation * ain;
    Allocation * aout;
    const void * usr;

    uint32_t mSliceSize;
    volatile int mSliceNum;

    const uint8_t *ptrIn;
    uint32_t eStrideIn;
    uint8_t *ptrOut;
    uint32_t eStrideOut;

    uint32_t xStart;
    uint32_t xEnd;
    uint32_t yStart;
    uint32_t yEnd;
    uint32_t zStart;
    uint32_t zEnd;
    uint32_t arrayStart;
    uint32_t arrayEnd;

    uint32_t dimX;
    uint32_t dimY;
    uint32_t dimZ;
    uint32_t dimArray;
} MTLaunchStruct;
typedef int (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t);

static void wc_xy(void *usr, uint32_t idx) {
    MTLaunchStruct *mtls = (MTLaunchStruct *)usr;

    while (1) {
        uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum);
        uint32_t yStart = mtls->yStart + slice * mtls->mSliceSize;
        uint32_t yEnd = yStart + mtls->mSliceSize;
        yEnd = rsMin(yEnd, mtls->yEnd);
        if (yEnd <= yStart) {
            return;
        }

        //LOGE("usr idx %i, x %i,%i  y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd);
        //LOGE("usr ptr in %p,  out %p", mtls->ptrIn, mtls->ptrOut);
        for (uint32_t y = yStart; y < yEnd; y++) {
            uint32_t offset = mtls->dimX * y;
            uint8_t *xPtrOut = mtls->ptrOut + (mtls->eStrideOut * offset);
            const uint8_t *xPtrIn = mtls->ptrIn + (mtls->eStrideIn * offset);

            for (uint32_t x = mtls->xStart; x < mtls->xEnd; x++) {
                ((rs_t)mtls->script->mHal.info.root) (xPtrIn, xPtrOut, mtls->usr, x, y, 0, 0);
                xPtrIn += mtls->eStrideIn;
                xPtrOut += mtls->eStrideOut;
            }
        }
    }
}

static void wc_x(void *usr, uint32_t idx) {
    MTLaunchStruct *mtls = (MTLaunchStruct *)usr;

    while (1) {
        uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum);
        uint32_t xStart = mtls->xStart + slice * mtls->mSliceSize;
        uint32_t xEnd = xStart + mtls->mSliceSize;
        xEnd = rsMin(xEnd, mtls->xEnd);
        if (xEnd <= xStart) {
            return;
        }

        //LOGE("usr idx %i, x %i,%i  y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd);
        //LOGE("usr ptr in %p,  out %p", mtls->ptrIn, mtls->ptrOut);
        uint8_t *xPtrOut = mtls->ptrOut + (mtls->eStrideOut * xStart);
        const uint8_t *xPtrIn = mtls->ptrIn + (mtls->eStrideIn * xStart);
        for (uint32_t x = xStart; x < xEnd; x++) {
            ((rs_t)mtls->script->mHal.info.root) (xPtrIn, xPtrOut, mtls->usr, x, 0, 0, 0);
            xPtrIn += mtls->eStrideIn;
            xPtrOut += mtls->eStrideOut;
        }
    }
}

void rsdScriptInvokeForEach(const Context *rsc,
                            Script *s,
                            const Allocation * ain,
                            Allocation * aout,
                            const void * usr,
                            uint32_t usrLen,
                            const RsScriptCall *sc) {

    RsHal * dc = (RsHal *)rsc->mHal.drv;

    MTLaunchStruct mtls;
    memset(&mtls, 0, sizeof(mtls));

    if (ain) {
        mtls.dimX = ain->getType()->getDimX();
        mtls.dimY = ain->getType()->getDimY();
        mtls.dimZ = ain->getType()->getDimZ();
        //mtls.dimArray = ain->getType()->getDimArray();
    } else if (aout) {
        mtls.dimX = aout->getType()->getDimX();
        mtls.dimY = aout->getType()->getDimY();
        mtls.dimZ = aout->getType()->getDimZ();
        //mtls.dimArray = aout->getType()->getDimArray();
    } else {
        rsc->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations");
        return;
    }

    if (!sc || (sc->xEnd == 0)) {
        mtls.xEnd = mtls.dimX;
    } else {
        rsAssert(sc->xStart < mtls.dimX);
        rsAssert(sc->xEnd <= mtls.dimX);
        rsAssert(sc->xStart < sc->xEnd);
        mtls.xStart = rsMin(mtls.dimX, sc->xStart);
        mtls.xEnd = rsMin(mtls.dimX, sc->xEnd);
        if (mtls.xStart >= mtls.xEnd) return;
    }

    if (!sc || (sc->yEnd == 0)) {
        mtls.yEnd = mtls.dimY;
    } else {
        rsAssert(sc->yStart < mtls.dimY);
        rsAssert(sc->yEnd <= mtls.dimY);
        rsAssert(sc->yStart < sc->yEnd);
        mtls.yStart = rsMin(mtls.dimY, sc->yStart);
        mtls.yEnd = rsMin(mtls.dimY, sc->yEnd);
        if (mtls.yStart >= mtls.yEnd) return;
    }

    mtls.xEnd = rsMax((uint32_t)1, mtls.xEnd);
    mtls.yEnd = rsMax((uint32_t)1, mtls.yEnd);
    mtls.zEnd = rsMax((uint32_t)1, mtls.zEnd);
    mtls.arrayEnd = rsMax((uint32_t)1, mtls.arrayEnd);

    rsAssert(ain->getType()->getDimZ() == 0);

    Context *mrsc = (Context *)rsc;
    Script * oldTLS = setTLS(s);

    mtls.rsc = mrsc;
    mtls.ain = ain;
    mtls.aout = aout;
    mtls.script = s;
    mtls.usr = usr;
    mtls.mSliceSize = 10;
    mtls.mSliceNum = 0;

    mtls.ptrIn = NULL;
    mtls.eStrideIn = 0;
    if (ain) {
        mtls.ptrIn = (const uint8_t *)ain->getPtr();
        mtls.eStrideIn = ain->getType()->getElementSizeBytes();
    }

    mtls.ptrOut = NULL;
    mtls.eStrideOut = 0;
    if (aout) {
        mtls.ptrOut = (uint8_t *)aout->getPtr();
        mtls.eStrideOut = aout->getType()->getElementSizeBytes();
    }

    if ((dc->mWorkers.mCount > 1) && s->mHal.info.isThreadable) {
        if (mtls.dimY > 1) {
            rsdLaunchThreads(mrsc, wc_xy, &mtls);
        } else {
            rsdLaunchThreads(mrsc, wc_x, &mtls);
        }

        //LOGE("launch 1");
    } else {
        //LOGE("launch 3");
        for (uint32_t ar = mtls.arrayStart; ar < mtls.arrayEnd; ar++) {
            for (uint32_t z = mtls.zStart; z < mtls.zEnd; z++) {
                for (uint32_t y = mtls.yStart; y < mtls.yEnd; y++) {
                    uint32_t offset = mtls.dimX * mtls.dimY * mtls.dimZ * ar +
                                      mtls.dimX * mtls.dimY * z +
                                      mtls.dimX * y;
                    uint8_t *xPtrOut = mtls.ptrOut + (mtls.eStrideOut * offset);
                    const uint8_t *xPtrIn = mtls.ptrIn + (mtls.eStrideIn * offset);

                    for (uint32_t x = mtls.xStart; x < mtls.xEnd; x++) {
                        ((rs_t)s->mHal.info.root) (xPtrIn, xPtrOut, usr, x, y, z, ar);
                        xPtrIn += mtls.eStrideIn;
                        xPtrOut += mtls.eStrideOut;
                    }
                }
            }
        }
    }

    setTLS(oldTLS);
}


int rsdScriptInvokeRoot(const Context *dc, const Script *script) {
int rsdScriptInvokeRoot(const Context *dc, Script *script) {
    DrvScript *drv = (DrvScript *)script->mHal.drv;
    return drv->mRoot();

    Script * oldTLS = setTLS(script);
    int ret = drv->mRoot();
    setTLS(oldTLS);

    return ret;
}

void rsdScriptInvokeInit(const Context *dc, const Script *script) {
void rsdScriptInvokeInit(const Context *dc, Script *script) {
    DrvScript *drv = (DrvScript *)script->mHal.drv;

    if (drv->mInit) {
@@ -249,15 +460,17 @@ void rsdScriptInvokeInit(const Context *dc, const Script *script) {
}


void rsdScriptInvokeFunction(const Context *dc, const Script *script,
void rsdScriptInvokeFunction(const Context *dc, Script *script,
                            uint32_t slot,
                            const void *params,
                            size_t paramLength) {
    DrvScript *drv = (DrvScript *)script->mHal.drv;
    //LOGE("invoke %p %p %i %p %i", dc, script, slot, params, paramLength);

    Script * oldTLS = setTLS(script);
    ((void (*)(const void *, uint32_t))
        drv->mInvokeFunctions[slot])(params, paramLength);
    setTLS(oldTLS);
}

void rsdScriptSetGlobalVar(const Context *dc, const Script *script,
+12 −3
Original line number Diff line number Diff line
@@ -25,14 +25,23 @@ bool rsdScriptInit(const android::renderscript::Context *, android::renderscript
                   uint8_t const *bitcode, size_t bitcodeSize,
                   uint32_t flags, android::renderscript::RsHalSymbolLookupFunc lookupFunc);
void rsdScriptInvokeFunction(const android::renderscript::Context *dc,
                             const android::renderscript::Script *script,
                             android::renderscript::Script *script,
                             uint32_t slot,
                             const void *params,
                             size_t paramLength);

void rsdScriptInvokeForEach(const android::renderscript::Context *rsc,
                            android::renderscript::Script *s,
                            const android::renderscript::Allocation * ain,
                            android::renderscript::Allocation * aout,
                            const void * usr,
                            uint32_t usrLen,
                            const RsScriptCall *sc);

int rsdScriptInvokeRoot(const android::renderscript::Context *dc,
                        const android::renderscript::Script *script);
                        android::renderscript::Script *script);
void rsdScriptInvokeInit(const android::renderscript::Context *dc,
                         const android::renderscript::Script *script);
                         android::renderscript::Script *script);

void rsdScriptSetGlobalVar(const android::renderscript::Context *,
                           const android::renderscript::Script *,
+135 −4
Original line number Diff line number Diff line
@@ -20,16 +20,29 @@
#include <malloc.h>
#include "rsContext.h"

#include <sys/types.h>
#include <sys/resource.h>
#include <sched.h>
#include <cutils/properties.h>
#include <cutils/sched_policy.h>
#include <sys/syscall.h>
#include <string.h>

using namespace android;
using namespace android::renderscript;

static void Shutdown(Context *rsc);
static void SetPriority(const Context *rsc, int32_t priority);

static RsdHalFunctions FunctionTable = {
    Shutdown,
    NULL,
    NULL,
    SetPriority,
    {
        rsdScriptInit,
        rsdScriptInvokeFunction,
        rsdScriptInvokeRoot,
        rsdScriptInvokeForEach,
        rsdScriptInvokeInit,
        rsdScriptSetGlobalVar,
        rsdScriptSetGlobalBind,
@@ -39,16 +52,134 @@ static RsdHalFunctions FunctionTable = {
};



static void * HelperThreadProc(void *vrsc) {
    Context *rsc = static_cast<Context *>(vrsc);
    RsHal *dc = (RsHal *)rsc->mHal.drv;


    uint32_t idx = (uint32_t)android_atomic_inc(&dc->mWorkers.mLaunchCount);

    //LOGV("RS helperThread starting %p idx=%i", rsc, idx);

    dc->mWorkers.mLaunchSignals[idx].init();
    dc->mWorkers.mNativeThreadId[idx] = gettid();

#if 0
    typedef struct {uint64_t bits[1024 / 64]; } cpu_set_t;
    cpu_set_t cpuset;
    memset(&cpuset, 0, sizeof(cpuset));
    cpuset.bits[idx / 64] |= 1ULL << (idx % 64);
    int ret = syscall(241, rsc->mWorkers.mNativeThreadId[idx],
              sizeof(cpuset), &cpuset);
    LOGE("SETAFFINITY ret = %i %s", ret, EGLUtils::strerror(ret));
#endif

    int status = pthread_setspecific(rsc->gThreadTLSKey, rsc->mTlsStruct);
    if (status) {
        LOGE("pthread_setspecific %i", status);
    }

    while (!dc->mExit) {
        dc->mWorkers.mLaunchSignals[idx].wait();
        if (dc->mWorkers.mLaunchCallback) {
           dc->mWorkers.mLaunchCallback(dc->mWorkers.mLaunchData, idx);
        }
        android_atomic_dec(&dc->mWorkers.mRunningCount);
        dc->mWorkers.mCompleteSignal.set();
    }

    //LOGV("RS helperThread exited %p idx=%i", rsc, idx);
    return NULL;
}

void rsdLaunchThreads(Context *rsc, WorkerCallback_t cbk, void *data) {
    RsHal *dc = (RsHal *)rsc->mHal.drv;

    dc->mWorkers.mLaunchData = data;
    dc->mWorkers.mLaunchCallback = cbk;
    android_atomic_release_store(dc->mWorkers.mCount, &dc->mWorkers.mRunningCount);
    for (uint32_t ct = 0; ct < dc->mWorkers.mCount; ct++) {
        dc->mWorkers.mLaunchSignals[ct].set();
    }
    while (android_atomic_acquire_load(&dc->mWorkers.mRunningCount) != 0) {
        dc->mWorkers.mCompleteSignal.wait();
    }
}

bool rsdHalInit(Context *rsc, uint32_t version_major, uint32_t version_minor) {
    rsc->mHal.funcs = FunctionTable;

    /*
    rsc->mHal.drv = (RsHal *)calloc(1, sizeof(RsHal));
    RsHal *dc = (RsHal *)calloc(1, sizeof(RsHal));
    if (!rsc->mHal.drv) {
        return false;
    }
    */
    rsc->mHal.drv = dc;


    int cpu = sysconf(_SC_NPROCESSORS_ONLN);
    LOGV("RS Launching thread(s), reported CPU count %i", cpu);
    if (cpu < 2) cpu = 0;

    dc->mWorkers.mCount = (uint32_t)cpu;
    dc->mWorkers.mThreadId = (pthread_t *) calloc(dc->mWorkers.mCount, sizeof(pthread_t));
    dc->mWorkers.mNativeThreadId = (pid_t *) calloc(dc->mWorkers.mCount, sizeof(pid_t));
    dc->mWorkers.mLaunchSignals = new Signal[dc->mWorkers.mCount];
    dc->mWorkers.mLaunchCallback = NULL;

    dc->mWorkers.mCompleteSignal.init();

    android_atomic_release_store(dc->mWorkers.mCount, &dc->mWorkers.mRunningCount);
    android_atomic_release_store(0, &dc->mWorkers.mLaunchCount);

    int status;
    pthread_attr_t threadAttr;
    status = pthread_attr_init(&threadAttr);
    if (status) {
        LOGE("Failed to init thread attribute.");
        return false;
    }

    for (uint32_t ct=0; ct < dc->mWorkers.mCount; ct++) {
        status = pthread_create(&dc->mWorkers.mThreadId[ct], &threadAttr, HelperThreadProc, rsc);
        if (status) {
            dc->mWorkers.mCount = ct;
            LOGE("Created fewer than expected number of RS threads.");
            break;
        }
    }
    while (android_atomic_acquire_load(&dc->mWorkers.mRunningCount) != 0) {
        usleep(100);
    }

    pthread_attr_destroy(&threadAttr);
    return true;
}


void SetPriority(const Context *rsc, int32_t priority) {
    RsHal *dc = (RsHal *)rsc->mHal.drv;
    for (uint32_t ct=0; ct < dc->mWorkers.mCount; ct++) {
        setpriority(PRIO_PROCESS, dc->mWorkers.mNativeThreadId[ct], priority);
    }
}

void Shutdown(Context *rsc) {
    RsHal *dc = (RsHal *)rsc->mHal.drv;

    dc->mExit = true;
    dc->mWorkers.mLaunchData = NULL;
    dc->mWorkers.mLaunchCallback = NULL;
    android_atomic_release_store(dc->mWorkers.mCount, &dc->mWorkers.mRunningCount);
    for (uint32_t ct = 0; ct < dc->mWorkers.mCount; ct++) {
        dc->mWorkers.mLaunchSignals[ct].set();
    }
    int status;
    void *res;
    for (uint32_t ct = 0; ct < dc->mWorkers.mCount; ct++) {
        status = pthread_join(dc->mWorkers.mThreadId[ct], &res);
    }
    rsAssert(android_atomic_acquire_load(&dc->mWorkers.mRunningCount) == 0);
}

+21 −13
Original line number Diff line number Diff line
@@ -20,28 +20,36 @@
#include <rs_hal.h>
#include <bcc/bcc.h>

#include "rsMutex.h"
#include "rsSignal.h"


typedef void (* InvokeFunc_t)(void);
typedef void (*WorkerCallback_t)(void *usr, uint32_t idx);

struct RsHalRec {
typedef struct RsHalRec {
    uint32_t version_major;
    uint32_t version_minor;
};

struct RsHalProgramStoreRec {
};

struct RsHalProgramRasterRec {
};

struct RsHalProgramVertexRec {
};

struct RsHalProgramFragmentRec {

    struct Workers {
        volatile int mRunningCount;
        volatile int mLaunchCount;
        uint32_t mCount;
        pthread_t *mThreadId;
        pid_t *mNativeThreadId;
        android::renderscript::Signal mCompleteSignal;

        android::renderscript::Signal *mLaunchSignals;
        WorkerCallback_t mLaunchCallback;
        void *mLaunchData;
    };
    Workers mWorkers;
    bool mExit;
} RsHal;



void rsdLaunchThreads(android::renderscript::Context *rsc, WorkerCallback_t cbk, void *data);

#endif
+4 −86
Original line number Diff line number Diff line
@@ -554,56 +554,6 @@ void Context::destroyWorkerThreadResources() {
    mExit = true;
}

void * Context::helperThreadProc(void *vrsc) {
     Context *rsc = static_cast<Context *>(vrsc);
     uint32_t idx = (uint32_t)android_atomic_inc(&rsc->mWorkers.mLaunchCount);

     //LOGV("RS helperThread starting %p idx=%i", rsc, idx);

     rsc->mWorkers.mLaunchSignals[idx].init();
     rsc->mWorkers.mNativeThreadId[idx] = gettid();

#if 0
     typedef struct {uint64_t bits[1024 / 64]; } cpu_set_t;
     cpu_set_t cpuset;
     memset(&cpuset, 0, sizeof(cpuset));
     cpuset.bits[idx / 64] |= 1ULL << (idx % 64);
     int ret = syscall(241, rsc->mWorkers.mNativeThreadId[idx],
               sizeof(cpuset), &cpuset);
     LOGE("SETAFFINITY ret = %i %s", ret, EGLUtils::strerror(ret));
#endif

     setpriority(PRIO_PROCESS, rsc->mWorkers.mNativeThreadId[idx], rsc->mThreadPriority);
     int status = pthread_setspecific(rsc->gThreadTLSKey, rsc->mTlsStruct);
     if (status) {
         LOGE("pthread_setspecific %i", status);
     }

     while (!rsc->mExit) {
         rsc->mWorkers.mLaunchSignals[idx].wait();
         if (rsc->mWorkers.mLaunchCallback) {
            rsc->mWorkers.mLaunchCallback(rsc->mWorkers.mLaunchData, idx);
         }
         android_atomic_dec(&rsc->mWorkers.mRunningCount);
         rsc->mWorkers.mCompleteSignal.set();
     }

     //LOGV("RS helperThread exited %p idx=%i", rsc, idx);
     return NULL;
}

void Context::launchThreads(WorkerCallback_t cbk, void *data) {
    mWorkers.mLaunchData = data;
    mWorkers.mLaunchCallback = cbk;
    android_atomic_release_store(mWorkers.mCount, &mWorkers.mRunningCount);
    for (uint32_t ct = 0; ct < mWorkers.mCount; ct++) {
        mWorkers.mLaunchSignals[ct].set();
    }
    while (android_atomic_acquire_load(&mWorkers.mRunningCount) != 0) {
        mWorkers.mCompleteSignal.wait();
    }
}

void Context::setPriority(int32_t p) {
    // Note: If we put this in the proper "background" policy
    // the wallpapers can become completly unresponsive at times.
@@ -620,9 +570,6 @@ void Context::setPriority(int32_t p) {
    }
#else
    setpriority(PRIO_PROCESS, mNativeThreadId, p);
    for (uint32_t ct=0; ct < mWorkers.mCount; ct++) {
        setpriority(PRIO_PROCESS, mWorkers.mNativeThreadId[ct], p);
    }
#endif
}

@@ -691,16 +638,8 @@ bool Context::initContext(Device *dev, const RsSurfaceConfig *sc) {
    if (!rsdHalInit(this, 0, 0)) {
        return false;
    }
    mHal.funcs.setPriority(this, mThreadPriority);

    int cpu = sysconf(_SC_NPROCESSORS_ONLN);
    LOGV("RS Launching thread(s), reported CPU count %i", cpu);
    if (cpu < 2) cpu = 0;

    mWorkers.mCount = (uint32_t)cpu;
    mWorkers.mThreadId = (pthread_t *) calloc(mWorkers.mCount, sizeof(pthread_t));
    mWorkers.mNativeThreadId = (pid_t *) calloc(mWorkers.mCount, sizeof(pid_t));
    mWorkers.mLaunchSignals = new Signal[mWorkers.mCount];
    mWorkers.mLaunchCallback = NULL;
    status = pthread_create(&mThreadId, &threadAttr, threadProc, this);
    if (status) {
        LOGE("Failed to start rs context thread.");
@@ -714,20 +653,6 @@ bool Context::initContext(Device *dev, const RsSurfaceConfig *sc) {
        return false;
    }

    mWorkers.mCompleteSignal.init();
    android_atomic_release_store(mWorkers.mCount, &mWorkers.mRunningCount);
    android_atomic_release_store(0, &mWorkers.mLaunchCount);
    for (uint32_t ct=0; ct < mWorkers.mCount; ct++) {
        status = pthread_create(&mWorkers.mThreadId[ct], &threadAttr, helperThreadProc, this);
        if (status) {
            mWorkers.mCount = ct;
            LOGE("Created fewer than expected number of RS threads.");
            break;
        }
    }
    while (android_atomic_acquire_load(&mWorkers.mRunningCount) != 0) {
        usleep(100);
    }
    pthread_attr_destroy(&threadAttr);
    return true;
}
@@ -744,17 +669,10 @@ Context::~Context() {
    mIO.shutdown();
    int status = pthread_join(mThreadId, &res);

    // Cleanup compute threads.
    mWorkers.mLaunchData = NULL;
    mWorkers.mLaunchCallback = NULL;
    android_atomic_release_store(mWorkers.mCount, &mWorkers.mRunningCount);
    for (uint32_t ct = 0; ct < mWorkers.mCount; ct++) {
        mWorkers.mLaunchSignals[ct].set();
    }
    for (uint32_t ct = 0; ct < mWorkers.mCount; ct++) {
        status = pthread_join(mWorkers.mThreadId[ct], &res);

    if (mHal.funcs.shutdownDriver) {
        mHal.funcs.shutdownDriver(this);
    }
    rsAssert(android_atomic_acquire_load(&mWorkers.mRunningCount) == 0);

    // Global structure cleanup.
    pthread_mutex_lock(&gInitMutex);
Loading