Loading libs/rs/driver/rsdBcc.cpp +217 −4 Original line number Diff line number Diff line Loading @@ -55,6 +55,15 @@ struct DrvScript { }; static Script * setTLS(Script *sc) { ScriptTLSStruct * tls = (ScriptTLSStruct *)pthread_getspecific(Context::gThreadTLSKey); rsAssert(tls); Script *old = tls->mScript; tls->mScript = sc; return old; } // Input: cacheDir // Input: resName // Input: extName Loading Loading @@ -234,13 +243,215 @@ error: } typedef struct { Context *rsc; Script *script; const Allocation * ain; Allocation * aout; const void * usr; uint32_t mSliceSize; volatile int mSliceNum; const uint8_t *ptrIn; uint32_t eStrideIn; uint8_t *ptrOut; uint32_t eStrideOut; uint32_t xStart; uint32_t xEnd; uint32_t yStart; uint32_t yEnd; uint32_t zStart; uint32_t zEnd; uint32_t arrayStart; uint32_t arrayEnd; uint32_t dimX; uint32_t dimY; uint32_t dimZ; uint32_t dimArray; } MTLaunchStruct; typedef int (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t); static void wc_xy(void *usr, uint32_t idx) { MTLaunchStruct *mtls = (MTLaunchStruct *)usr; while (1) { uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum); uint32_t yStart = mtls->yStart + slice * mtls->mSliceSize; uint32_t yEnd = yStart + mtls->mSliceSize; yEnd = rsMin(yEnd, mtls->yEnd); if (yEnd <= yStart) { return; } //LOGE("usr idx %i, x %i,%i y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd); //LOGE("usr ptr in %p, out %p", mtls->ptrIn, mtls->ptrOut); for (uint32_t y = yStart; y < yEnd; y++) { uint32_t offset = mtls->dimX * y; uint8_t *xPtrOut = mtls->ptrOut + (mtls->eStrideOut * offset); const uint8_t *xPtrIn = mtls->ptrIn + (mtls->eStrideIn * offset); for (uint32_t x = mtls->xStart; x < mtls->xEnd; x++) { ((rs_t)mtls->script->mHal.info.root) (xPtrIn, xPtrOut, mtls->usr, x, y, 0, 0); xPtrIn += mtls->eStrideIn; xPtrOut += mtls->eStrideOut; } } } } static void wc_x(void *usr, uint32_t idx) { MTLaunchStruct *mtls = (MTLaunchStruct *)usr; while (1) { uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum); uint32_t xStart = mtls->xStart + slice * mtls->mSliceSize; uint32_t xEnd = xStart + mtls->mSliceSize; xEnd = rsMin(xEnd, mtls->xEnd); if (xEnd <= xStart) { return; } //LOGE("usr idx %i, x %i,%i y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd); //LOGE("usr ptr in %p, out %p", mtls->ptrIn, mtls->ptrOut); uint8_t *xPtrOut = mtls->ptrOut + (mtls->eStrideOut * xStart); const uint8_t *xPtrIn = mtls->ptrIn + (mtls->eStrideIn * xStart); for (uint32_t x = xStart; x < xEnd; x++) { ((rs_t)mtls->script->mHal.info.root) (xPtrIn, xPtrOut, mtls->usr, x, 0, 0, 0); xPtrIn += mtls->eStrideIn; xPtrOut += mtls->eStrideOut; } } } void rsdScriptInvokeForEach(const Context *rsc, Script *s, const Allocation * ain, Allocation * aout, const void * usr, uint32_t usrLen, const RsScriptCall *sc) { RsHal * dc = (RsHal *)rsc->mHal.drv; MTLaunchStruct mtls; memset(&mtls, 0, sizeof(mtls)); if (ain) { mtls.dimX = ain->getType()->getDimX(); mtls.dimY = ain->getType()->getDimY(); mtls.dimZ = ain->getType()->getDimZ(); //mtls.dimArray = ain->getType()->getDimArray(); } else if (aout) { mtls.dimX = aout->getType()->getDimX(); mtls.dimY = aout->getType()->getDimY(); mtls.dimZ = aout->getType()->getDimZ(); //mtls.dimArray = aout->getType()->getDimArray(); } else { rsc->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations"); return; } if (!sc || (sc->xEnd == 0)) { mtls.xEnd = mtls.dimX; } else { rsAssert(sc->xStart < mtls.dimX); rsAssert(sc->xEnd <= mtls.dimX); rsAssert(sc->xStart < sc->xEnd); mtls.xStart = rsMin(mtls.dimX, sc->xStart); mtls.xEnd = rsMin(mtls.dimX, sc->xEnd); if (mtls.xStart >= mtls.xEnd) return; } if (!sc || (sc->yEnd == 0)) { mtls.yEnd = mtls.dimY; } else { rsAssert(sc->yStart < mtls.dimY); rsAssert(sc->yEnd <= mtls.dimY); rsAssert(sc->yStart < sc->yEnd); mtls.yStart = rsMin(mtls.dimY, sc->yStart); mtls.yEnd = rsMin(mtls.dimY, sc->yEnd); if (mtls.yStart >= mtls.yEnd) return; } mtls.xEnd = rsMax((uint32_t)1, mtls.xEnd); mtls.yEnd = rsMax((uint32_t)1, mtls.yEnd); mtls.zEnd = rsMax((uint32_t)1, mtls.zEnd); mtls.arrayEnd = rsMax((uint32_t)1, mtls.arrayEnd); rsAssert(ain->getType()->getDimZ() == 0); Context *mrsc = (Context *)rsc; Script * oldTLS = setTLS(s); mtls.rsc = mrsc; mtls.ain = ain; mtls.aout = aout; mtls.script = s; mtls.usr = usr; mtls.mSliceSize = 10; mtls.mSliceNum = 0; mtls.ptrIn = NULL; mtls.eStrideIn = 0; if (ain) { mtls.ptrIn = (const uint8_t *)ain->getPtr(); mtls.eStrideIn = ain->getType()->getElementSizeBytes(); } mtls.ptrOut = NULL; mtls.eStrideOut = 0; if (aout) { mtls.ptrOut = (uint8_t *)aout->getPtr(); mtls.eStrideOut = aout->getType()->getElementSizeBytes(); } if ((dc->mWorkers.mCount > 1) && s->mHal.info.isThreadable) { if (mtls.dimY > 1) { rsdLaunchThreads(mrsc, wc_xy, &mtls); } else { rsdLaunchThreads(mrsc, wc_x, &mtls); } //LOGE("launch 1"); } else { //LOGE("launch 3"); for (uint32_t ar = mtls.arrayStart; ar < mtls.arrayEnd; ar++) { for (uint32_t z = mtls.zStart; z < mtls.zEnd; z++) { for (uint32_t y = mtls.yStart; y < mtls.yEnd; y++) { uint32_t offset = mtls.dimX * mtls.dimY * mtls.dimZ * ar + mtls.dimX * mtls.dimY * z + mtls.dimX * y; uint8_t *xPtrOut = mtls.ptrOut + (mtls.eStrideOut * offset); const uint8_t *xPtrIn = mtls.ptrIn + (mtls.eStrideIn * offset); for (uint32_t x = mtls.xStart; x < mtls.xEnd; x++) { ((rs_t)s->mHal.info.root) (xPtrIn, xPtrOut, usr, x, y, z, ar); xPtrIn += mtls.eStrideIn; xPtrOut += mtls.eStrideOut; } } } } } setTLS(oldTLS); } int rsdScriptInvokeRoot(const Context *dc, const Script *script) { int rsdScriptInvokeRoot(const Context *dc, Script *script) { DrvScript *drv = (DrvScript *)script->mHal.drv; return drv->mRoot(); Script * oldTLS = setTLS(script); int ret = drv->mRoot(); setTLS(oldTLS); return ret; } void rsdScriptInvokeInit(const Context *dc, const Script *script) { void rsdScriptInvokeInit(const Context *dc, Script *script) { DrvScript *drv = (DrvScript *)script->mHal.drv; if (drv->mInit) { Loading @@ -249,15 +460,17 @@ void rsdScriptInvokeInit(const Context *dc, const Script *script) { } void rsdScriptInvokeFunction(const Context *dc, const Script *script, void rsdScriptInvokeFunction(const Context *dc, Script *script, uint32_t slot, const void *params, size_t paramLength) { DrvScript *drv = (DrvScript *)script->mHal.drv; //LOGE("invoke %p %p %i %p %i", dc, script, slot, params, paramLength); Script * oldTLS = setTLS(script); ((void (*)(const void *, uint32_t)) drv->mInvokeFunctions[slot])(params, paramLength); setTLS(oldTLS); } void rsdScriptSetGlobalVar(const Context *dc, const Script *script, Loading libs/rs/driver/rsdBcc.h +12 −3 Original line number Diff line number Diff line Loading @@ -25,14 +25,23 @@ bool rsdScriptInit(const android::renderscript::Context *, android::renderscript uint8_t const *bitcode, size_t bitcodeSize, uint32_t flags, android::renderscript::RsHalSymbolLookupFunc lookupFunc); void rsdScriptInvokeFunction(const android::renderscript::Context *dc, const android::renderscript::Script *script, android::renderscript::Script *script, uint32_t slot, const void *params, size_t paramLength); void rsdScriptInvokeForEach(const android::renderscript::Context *rsc, android::renderscript::Script *s, const android::renderscript::Allocation * ain, android::renderscript::Allocation * aout, const void * usr, uint32_t usrLen, const RsScriptCall *sc); int rsdScriptInvokeRoot(const android::renderscript::Context *dc, const android::renderscript::Script *script); android::renderscript::Script *script); void rsdScriptInvokeInit(const android::renderscript::Context *dc, const android::renderscript::Script *script); android::renderscript::Script *script); void rsdScriptSetGlobalVar(const android::renderscript::Context *, const android::renderscript::Script *, Loading libs/rs/driver/rsdCore.cpp +135 −4 Original line number Diff line number Diff line Loading @@ -20,16 +20,29 @@ #include <malloc.h> #include "rsContext.h" #include <sys/types.h> #include <sys/resource.h> #include <sched.h> #include <cutils/properties.h> #include <cutils/sched_policy.h> #include <sys/syscall.h> #include <string.h> using namespace android; using namespace android::renderscript; static void Shutdown(Context *rsc); static void SetPriority(const Context *rsc, int32_t priority); static RsdHalFunctions FunctionTable = { Shutdown, NULL, NULL, SetPriority, { rsdScriptInit, rsdScriptInvokeFunction, rsdScriptInvokeRoot, rsdScriptInvokeForEach, rsdScriptInvokeInit, rsdScriptSetGlobalVar, rsdScriptSetGlobalBind, Loading @@ -39,16 +52,134 @@ static RsdHalFunctions FunctionTable = { }; static void * HelperThreadProc(void *vrsc) { Context *rsc = static_cast<Context *>(vrsc); RsHal *dc = (RsHal *)rsc->mHal.drv; uint32_t idx = (uint32_t)android_atomic_inc(&dc->mWorkers.mLaunchCount); //LOGV("RS helperThread starting %p idx=%i", rsc, idx); dc->mWorkers.mLaunchSignals[idx].init(); dc->mWorkers.mNativeThreadId[idx] = gettid(); #if 0 typedef struct {uint64_t bits[1024 / 64]; } cpu_set_t; cpu_set_t cpuset; memset(&cpuset, 0, sizeof(cpuset)); cpuset.bits[idx / 64] |= 1ULL << (idx % 64); int ret = syscall(241, rsc->mWorkers.mNativeThreadId[idx], sizeof(cpuset), &cpuset); LOGE("SETAFFINITY ret = %i %s", ret, EGLUtils::strerror(ret)); #endif int status = pthread_setspecific(rsc->gThreadTLSKey, rsc->mTlsStruct); if (status) { LOGE("pthread_setspecific %i", status); } while (!dc->mExit) { dc->mWorkers.mLaunchSignals[idx].wait(); if (dc->mWorkers.mLaunchCallback) { dc->mWorkers.mLaunchCallback(dc->mWorkers.mLaunchData, idx); } android_atomic_dec(&dc->mWorkers.mRunningCount); dc->mWorkers.mCompleteSignal.set(); } //LOGV("RS helperThread exited %p idx=%i", rsc, idx); return NULL; } void rsdLaunchThreads(Context *rsc, WorkerCallback_t cbk, void *data) { RsHal *dc = (RsHal *)rsc->mHal.drv; dc->mWorkers.mLaunchData = data; dc->mWorkers.mLaunchCallback = cbk; android_atomic_release_store(dc->mWorkers.mCount, &dc->mWorkers.mRunningCount); for (uint32_t ct = 0; ct < dc->mWorkers.mCount; ct++) { dc->mWorkers.mLaunchSignals[ct].set(); } while (android_atomic_acquire_load(&dc->mWorkers.mRunningCount) != 0) { dc->mWorkers.mCompleteSignal.wait(); } } bool rsdHalInit(Context *rsc, uint32_t version_major, uint32_t version_minor) { rsc->mHal.funcs = FunctionTable; /* rsc->mHal.drv = (RsHal *)calloc(1, sizeof(RsHal)); RsHal *dc = (RsHal *)calloc(1, sizeof(RsHal)); if (!rsc->mHal.drv) { return false; } */ rsc->mHal.drv = dc; int cpu = sysconf(_SC_NPROCESSORS_ONLN); LOGV("RS Launching thread(s), reported CPU count %i", cpu); if (cpu < 2) cpu = 0; dc->mWorkers.mCount = (uint32_t)cpu; dc->mWorkers.mThreadId = (pthread_t *) calloc(dc->mWorkers.mCount, sizeof(pthread_t)); dc->mWorkers.mNativeThreadId = (pid_t *) calloc(dc->mWorkers.mCount, sizeof(pid_t)); dc->mWorkers.mLaunchSignals = new Signal[dc->mWorkers.mCount]; dc->mWorkers.mLaunchCallback = NULL; dc->mWorkers.mCompleteSignal.init(); android_atomic_release_store(dc->mWorkers.mCount, &dc->mWorkers.mRunningCount); android_atomic_release_store(0, &dc->mWorkers.mLaunchCount); int status; pthread_attr_t threadAttr; status = pthread_attr_init(&threadAttr); if (status) { LOGE("Failed to init thread attribute."); return false; } for (uint32_t ct=0; ct < dc->mWorkers.mCount; ct++) { status = pthread_create(&dc->mWorkers.mThreadId[ct], &threadAttr, HelperThreadProc, rsc); if (status) { dc->mWorkers.mCount = ct; LOGE("Created fewer than expected number of RS threads."); break; } } while (android_atomic_acquire_load(&dc->mWorkers.mRunningCount) != 0) { usleep(100); } pthread_attr_destroy(&threadAttr); return true; } void SetPriority(const Context *rsc, int32_t priority) { RsHal *dc = (RsHal *)rsc->mHal.drv; for (uint32_t ct=0; ct < dc->mWorkers.mCount; ct++) { setpriority(PRIO_PROCESS, dc->mWorkers.mNativeThreadId[ct], priority); } } void Shutdown(Context *rsc) { RsHal *dc = (RsHal *)rsc->mHal.drv; dc->mExit = true; dc->mWorkers.mLaunchData = NULL; dc->mWorkers.mLaunchCallback = NULL; android_atomic_release_store(dc->mWorkers.mCount, &dc->mWorkers.mRunningCount); for (uint32_t ct = 0; ct < dc->mWorkers.mCount; ct++) { dc->mWorkers.mLaunchSignals[ct].set(); } int status; void *res; for (uint32_t ct = 0; ct < dc->mWorkers.mCount; ct++) { status = pthread_join(dc->mWorkers.mThreadId[ct], &res); } rsAssert(android_atomic_acquire_load(&dc->mWorkers.mRunningCount) == 0); } libs/rs/driver/rsdCore.h +21 −13 Original line number Diff line number Diff line Loading @@ -20,28 +20,36 @@ #include <rs_hal.h> #include <bcc/bcc.h> #include "rsMutex.h" #include "rsSignal.h" typedef void (* InvokeFunc_t)(void); typedef void (*WorkerCallback_t)(void *usr, uint32_t idx); struct RsHalRec { typedef struct RsHalRec { uint32_t version_major; uint32_t version_minor; }; struct RsHalProgramStoreRec { }; struct RsHalProgramRasterRec { }; struct RsHalProgramVertexRec { }; struct RsHalProgramFragmentRec { struct Workers { volatile int mRunningCount; volatile int mLaunchCount; uint32_t mCount; pthread_t *mThreadId; pid_t *mNativeThreadId; android::renderscript::Signal mCompleteSignal; android::renderscript::Signal *mLaunchSignals; WorkerCallback_t mLaunchCallback; void *mLaunchData; }; Workers mWorkers; bool mExit; } RsHal; void rsdLaunchThreads(android::renderscript::Context *rsc, WorkerCallback_t cbk, void *data); #endif libs/rs/rsContext.cpp +4 −86 Original line number Diff line number Diff line Loading @@ -554,56 +554,6 @@ void Context::destroyWorkerThreadResources() { mExit = true; } void * Context::helperThreadProc(void *vrsc) { Context *rsc = static_cast<Context *>(vrsc); uint32_t idx = (uint32_t)android_atomic_inc(&rsc->mWorkers.mLaunchCount); //LOGV("RS helperThread starting %p idx=%i", rsc, idx); rsc->mWorkers.mLaunchSignals[idx].init(); rsc->mWorkers.mNativeThreadId[idx] = gettid(); #if 0 typedef struct {uint64_t bits[1024 / 64]; } cpu_set_t; cpu_set_t cpuset; memset(&cpuset, 0, sizeof(cpuset)); cpuset.bits[idx / 64] |= 1ULL << (idx % 64); int ret = syscall(241, rsc->mWorkers.mNativeThreadId[idx], sizeof(cpuset), &cpuset); LOGE("SETAFFINITY ret = %i %s", ret, EGLUtils::strerror(ret)); #endif setpriority(PRIO_PROCESS, rsc->mWorkers.mNativeThreadId[idx], rsc->mThreadPriority); int status = pthread_setspecific(rsc->gThreadTLSKey, rsc->mTlsStruct); if (status) { LOGE("pthread_setspecific %i", status); } while (!rsc->mExit) { rsc->mWorkers.mLaunchSignals[idx].wait(); if (rsc->mWorkers.mLaunchCallback) { rsc->mWorkers.mLaunchCallback(rsc->mWorkers.mLaunchData, idx); } android_atomic_dec(&rsc->mWorkers.mRunningCount); rsc->mWorkers.mCompleteSignal.set(); } //LOGV("RS helperThread exited %p idx=%i", rsc, idx); return NULL; } void Context::launchThreads(WorkerCallback_t cbk, void *data) { mWorkers.mLaunchData = data; mWorkers.mLaunchCallback = cbk; android_atomic_release_store(mWorkers.mCount, &mWorkers.mRunningCount); for (uint32_t ct = 0; ct < mWorkers.mCount; ct++) { mWorkers.mLaunchSignals[ct].set(); } while (android_atomic_acquire_load(&mWorkers.mRunningCount) != 0) { mWorkers.mCompleteSignal.wait(); } } void Context::setPriority(int32_t p) { // Note: If we put this in the proper "background" policy // the wallpapers can become completly unresponsive at times. Loading @@ -620,9 +570,6 @@ void Context::setPriority(int32_t p) { } #else setpriority(PRIO_PROCESS, mNativeThreadId, p); for (uint32_t ct=0; ct < mWorkers.mCount; ct++) { setpriority(PRIO_PROCESS, mWorkers.mNativeThreadId[ct], p); } #endif } Loading Loading @@ -691,16 +638,8 @@ bool Context::initContext(Device *dev, const RsSurfaceConfig *sc) { if (!rsdHalInit(this, 0, 0)) { return false; } mHal.funcs.setPriority(this, mThreadPriority); int cpu = sysconf(_SC_NPROCESSORS_ONLN); LOGV("RS Launching thread(s), reported CPU count %i", cpu); if (cpu < 2) cpu = 0; mWorkers.mCount = (uint32_t)cpu; mWorkers.mThreadId = (pthread_t *) calloc(mWorkers.mCount, sizeof(pthread_t)); mWorkers.mNativeThreadId = (pid_t *) calloc(mWorkers.mCount, sizeof(pid_t)); mWorkers.mLaunchSignals = new Signal[mWorkers.mCount]; mWorkers.mLaunchCallback = NULL; status = pthread_create(&mThreadId, &threadAttr, threadProc, this); if (status) { LOGE("Failed to start rs context thread."); Loading @@ -714,20 +653,6 @@ bool Context::initContext(Device *dev, const RsSurfaceConfig *sc) { return false; } mWorkers.mCompleteSignal.init(); android_atomic_release_store(mWorkers.mCount, &mWorkers.mRunningCount); android_atomic_release_store(0, &mWorkers.mLaunchCount); for (uint32_t ct=0; ct < mWorkers.mCount; ct++) { status = pthread_create(&mWorkers.mThreadId[ct], &threadAttr, helperThreadProc, this); if (status) { mWorkers.mCount = ct; LOGE("Created fewer than expected number of RS threads."); break; } } while (android_atomic_acquire_load(&mWorkers.mRunningCount) != 0) { usleep(100); } pthread_attr_destroy(&threadAttr); return true; } Loading @@ -744,17 +669,10 @@ Context::~Context() { mIO.shutdown(); int status = pthread_join(mThreadId, &res); // Cleanup compute threads. mWorkers.mLaunchData = NULL; mWorkers.mLaunchCallback = NULL; android_atomic_release_store(mWorkers.mCount, &mWorkers.mRunningCount); for (uint32_t ct = 0; ct < mWorkers.mCount; ct++) { mWorkers.mLaunchSignals[ct].set(); } for (uint32_t ct = 0; ct < mWorkers.mCount; ct++) { status = pthread_join(mWorkers.mThreadId[ct], &res); if (mHal.funcs.shutdownDriver) { mHal.funcs.shutdownDriver(this); } rsAssert(android_atomic_acquire_load(&mWorkers.mRunningCount) == 0); // Global structure cleanup. pthread_mutex_lock(&gInitMutex); Loading Loading
libs/rs/driver/rsdBcc.cpp +217 −4 Original line number Diff line number Diff line Loading @@ -55,6 +55,15 @@ struct DrvScript { }; static Script * setTLS(Script *sc) { ScriptTLSStruct * tls = (ScriptTLSStruct *)pthread_getspecific(Context::gThreadTLSKey); rsAssert(tls); Script *old = tls->mScript; tls->mScript = sc; return old; } // Input: cacheDir // Input: resName // Input: extName Loading Loading @@ -234,13 +243,215 @@ error: } typedef struct { Context *rsc; Script *script; const Allocation * ain; Allocation * aout; const void * usr; uint32_t mSliceSize; volatile int mSliceNum; const uint8_t *ptrIn; uint32_t eStrideIn; uint8_t *ptrOut; uint32_t eStrideOut; uint32_t xStart; uint32_t xEnd; uint32_t yStart; uint32_t yEnd; uint32_t zStart; uint32_t zEnd; uint32_t arrayStart; uint32_t arrayEnd; uint32_t dimX; uint32_t dimY; uint32_t dimZ; uint32_t dimArray; } MTLaunchStruct; typedef int (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t); static void wc_xy(void *usr, uint32_t idx) { MTLaunchStruct *mtls = (MTLaunchStruct *)usr; while (1) { uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum); uint32_t yStart = mtls->yStart + slice * mtls->mSliceSize; uint32_t yEnd = yStart + mtls->mSliceSize; yEnd = rsMin(yEnd, mtls->yEnd); if (yEnd <= yStart) { return; } //LOGE("usr idx %i, x %i,%i y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd); //LOGE("usr ptr in %p, out %p", mtls->ptrIn, mtls->ptrOut); for (uint32_t y = yStart; y < yEnd; y++) { uint32_t offset = mtls->dimX * y; uint8_t *xPtrOut = mtls->ptrOut + (mtls->eStrideOut * offset); const uint8_t *xPtrIn = mtls->ptrIn + (mtls->eStrideIn * offset); for (uint32_t x = mtls->xStart; x < mtls->xEnd; x++) { ((rs_t)mtls->script->mHal.info.root) (xPtrIn, xPtrOut, mtls->usr, x, y, 0, 0); xPtrIn += mtls->eStrideIn; xPtrOut += mtls->eStrideOut; } } } } static void wc_x(void *usr, uint32_t idx) { MTLaunchStruct *mtls = (MTLaunchStruct *)usr; while (1) { uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum); uint32_t xStart = mtls->xStart + slice * mtls->mSliceSize; uint32_t xEnd = xStart + mtls->mSliceSize; xEnd = rsMin(xEnd, mtls->xEnd); if (xEnd <= xStart) { return; } //LOGE("usr idx %i, x %i,%i y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd); //LOGE("usr ptr in %p, out %p", mtls->ptrIn, mtls->ptrOut); uint8_t *xPtrOut = mtls->ptrOut + (mtls->eStrideOut * xStart); const uint8_t *xPtrIn = mtls->ptrIn + (mtls->eStrideIn * xStart); for (uint32_t x = xStart; x < xEnd; x++) { ((rs_t)mtls->script->mHal.info.root) (xPtrIn, xPtrOut, mtls->usr, x, 0, 0, 0); xPtrIn += mtls->eStrideIn; xPtrOut += mtls->eStrideOut; } } } void rsdScriptInvokeForEach(const Context *rsc, Script *s, const Allocation * ain, Allocation * aout, const void * usr, uint32_t usrLen, const RsScriptCall *sc) { RsHal * dc = (RsHal *)rsc->mHal.drv; MTLaunchStruct mtls; memset(&mtls, 0, sizeof(mtls)); if (ain) { mtls.dimX = ain->getType()->getDimX(); mtls.dimY = ain->getType()->getDimY(); mtls.dimZ = ain->getType()->getDimZ(); //mtls.dimArray = ain->getType()->getDimArray(); } else if (aout) { mtls.dimX = aout->getType()->getDimX(); mtls.dimY = aout->getType()->getDimY(); mtls.dimZ = aout->getType()->getDimZ(); //mtls.dimArray = aout->getType()->getDimArray(); } else { rsc->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations"); return; } if (!sc || (sc->xEnd == 0)) { mtls.xEnd = mtls.dimX; } else { rsAssert(sc->xStart < mtls.dimX); rsAssert(sc->xEnd <= mtls.dimX); rsAssert(sc->xStart < sc->xEnd); mtls.xStart = rsMin(mtls.dimX, sc->xStart); mtls.xEnd = rsMin(mtls.dimX, sc->xEnd); if (mtls.xStart >= mtls.xEnd) return; } if (!sc || (sc->yEnd == 0)) { mtls.yEnd = mtls.dimY; } else { rsAssert(sc->yStart < mtls.dimY); rsAssert(sc->yEnd <= mtls.dimY); rsAssert(sc->yStart < sc->yEnd); mtls.yStart = rsMin(mtls.dimY, sc->yStart); mtls.yEnd = rsMin(mtls.dimY, sc->yEnd); if (mtls.yStart >= mtls.yEnd) return; } mtls.xEnd = rsMax((uint32_t)1, mtls.xEnd); mtls.yEnd = rsMax((uint32_t)1, mtls.yEnd); mtls.zEnd = rsMax((uint32_t)1, mtls.zEnd); mtls.arrayEnd = rsMax((uint32_t)1, mtls.arrayEnd); rsAssert(ain->getType()->getDimZ() == 0); Context *mrsc = (Context *)rsc; Script * oldTLS = setTLS(s); mtls.rsc = mrsc; mtls.ain = ain; mtls.aout = aout; mtls.script = s; mtls.usr = usr; mtls.mSliceSize = 10; mtls.mSliceNum = 0; mtls.ptrIn = NULL; mtls.eStrideIn = 0; if (ain) { mtls.ptrIn = (const uint8_t *)ain->getPtr(); mtls.eStrideIn = ain->getType()->getElementSizeBytes(); } mtls.ptrOut = NULL; mtls.eStrideOut = 0; if (aout) { mtls.ptrOut = (uint8_t *)aout->getPtr(); mtls.eStrideOut = aout->getType()->getElementSizeBytes(); } if ((dc->mWorkers.mCount > 1) && s->mHal.info.isThreadable) { if (mtls.dimY > 1) { rsdLaunchThreads(mrsc, wc_xy, &mtls); } else { rsdLaunchThreads(mrsc, wc_x, &mtls); } //LOGE("launch 1"); } else { //LOGE("launch 3"); for (uint32_t ar = mtls.arrayStart; ar < mtls.arrayEnd; ar++) { for (uint32_t z = mtls.zStart; z < mtls.zEnd; z++) { for (uint32_t y = mtls.yStart; y < mtls.yEnd; y++) { uint32_t offset = mtls.dimX * mtls.dimY * mtls.dimZ * ar + mtls.dimX * mtls.dimY * z + mtls.dimX * y; uint8_t *xPtrOut = mtls.ptrOut + (mtls.eStrideOut * offset); const uint8_t *xPtrIn = mtls.ptrIn + (mtls.eStrideIn * offset); for (uint32_t x = mtls.xStart; x < mtls.xEnd; x++) { ((rs_t)s->mHal.info.root) (xPtrIn, xPtrOut, usr, x, y, z, ar); xPtrIn += mtls.eStrideIn; xPtrOut += mtls.eStrideOut; } } } } } setTLS(oldTLS); } int rsdScriptInvokeRoot(const Context *dc, const Script *script) { int rsdScriptInvokeRoot(const Context *dc, Script *script) { DrvScript *drv = (DrvScript *)script->mHal.drv; return drv->mRoot(); Script * oldTLS = setTLS(script); int ret = drv->mRoot(); setTLS(oldTLS); return ret; } void rsdScriptInvokeInit(const Context *dc, const Script *script) { void rsdScriptInvokeInit(const Context *dc, Script *script) { DrvScript *drv = (DrvScript *)script->mHal.drv; if (drv->mInit) { Loading @@ -249,15 +460,17 @@ void rsdScriptInvokeInit(const Context *dc, const Script *script) { } void rsdScriptInvokeFunction(const Context *dc, const Script *script, void rsdScriptInvokeFunction(const Context *dc, Script *script, uint32_t slot, const void *params, size_t paramLength) { DrvScript *drv = (DrvScript *)script->mHal.drv; //LOGE("invoke %p %p %i %p %i", dc, script, slot, params, paramLength); Script * oldTLS = setTLS(script); ((void (*)(const void *, uint32_t)) drv->mInvokeFunctions[slot])(params, paramLength); setTLS(oldTLS); } void rsdScriptSetGlobalVar(const Context *dc, const Script *script, Loading
libs/rs/driver/rsdBcc.h +12 −3 Original line number Diff line number Diff line Loading @@ -25,14 +25,23 @@ bool rsdScriptInit(const android::renderscript::Context *, android::renderscript uint8_t const *bitcode, size_t bitcodeSize, uint32_t flags, android::renderscript::RsHalSymbolLookupFunc lookupFunc); void rsdScriptInvokeFunction(const android::renderscript::Context *dc, const android::renderscript::Script *script, android::renderscript::Script *script, uint32_t slot, const void *params, size_t paramLength); void rsdScriptInvokeForEach(const android::renderscript::Context *rsc, android::renderscript::Script *s, const android::renderscript::Allocation * ain, android::renderscript::Allocation * aout, const void * usr, uint32_t usrLen, const RsScriptCall *sc); int rsdScriptInvokeRoot(const android::renderscript::Context *dc, const android::renderscript::Script *script); android::renderscript::Script *script); void rsdScriptInvokeInit(const android::renderscript::Context *dc, const android::renderscript::Script *script); android::renderscript::Script *script); void rsdScriptSetGlobalVar(const android::renderscript::Context *, const android::renderscript::Script *, Loading
libs/rs/driver/rsdCore.cpp +135 −4 Original line number Diff line number Diff line Loading @@ -20,16 +20,29 @@ #include <malloc.h> #include "rsContext.h" #include <sys/types.h> #include <sys/resource.h> #include <sched.h> #include <cutils/properties.h> #include <cutils/sched_policy.h> #include <sys/syscall.h> #include <string.h> using namespace android; using namespace android::renderscript; static void Shutdown(Context *rsc); static void SetPriority(const Context *rsc, int32_t priority); static RsdHalFunctions FunctionTable = { Shutdown, NULL, NULL, SetPriority, { rsdScriptInit, rsdScriptInvokeFunction, rsdScriptInvokeRoot, rsdScriptInvokeForEach, rsdScriptInvokeInit, rsdScriptSetGlobalVar, rsdScriptSetGlobalBind, Loading @@ -39,16 +52,134 @@ static RsdHalFunctions FunctionTable = { }; static void * HelperThreadProc(void *vrsc) { Context *rsc = static_cast<Context *>(vrsc); RsHal *dc = (RsHal *)rsc->mHal.drv; uint32_t idx = (uint32_t)android_atomic_inc(&dc->mWorkers.mLaunchCount); //LOGV("RS helperThread starting %p idx=%i", rsc, idx); dc->mWorkers.mLaunchSignals[idx].init(); dc->mWorkers.mNativeThreadId[idx] = gettid(); #if 0 typedef struct {uint64_t bits[1024 / 64]; } cpu_set_t; cpu_set_t cpuset; memset(&cpuset, 0, sizeof(cpuset)); cpuset.bits[idx / 64] |= 1ULL << (idx % 64); int ret = syscall(241, rsc->mWorkers.mNativeThreadId[idx], sizeof(cpuset), &cpuset); LOGE("SETAFFINITY ret = %i %s", ret, EGLUtils::strerror(ret)); #endif int status = pthread_setspecific(rsc->gThreadTLSKey, rsc->mTlsStruct); if (status) { LOGE("pthread_setspecific %i", status); } while (!dc->mExit) { dc->mWorkers.mLaunchSignals[idx].wait(); if (dc->mWorkers.mLaunchCallback) { dc->mWorkers.mLaunchCallback(dc->mWorkers.mLaunchData, idx); } android_atomic_dec(&dc->mWorkers.mRunningCount); dc->mWorkers.mCompleteSignal.set(); } //LOGV("RS helperThread exited %p idx=%i", rsc, idx); return NULL; } void rsdLaunchThreads(Context *rsc, WorkerCallback_t cbk, void *data) { RsHal *dc = (RsHal *)rsc->mHal.drv; dc->mWorkers.mLaunchData = data; dc->mWorkers.mLaunchCallback = cbk; android_atomic_release_store(dc->mWorkers.mCount, &dc->mWorkers.mRunningCount); for (uint32_t ct = 0; ct < dc->mWorkers.mCount; ct++) { dc->mWorkers.mLaunchSignals[ct].set(); } while (android_atomic_acquire_load(&dc->mWorkers.mRunningCount) != 0) { dc->mWorkers.mCompleteSignal.wait(); } } bool rsdHalInit(Context *rsc, uint32_t version_major, uint32_t version_minor) { rsc->mHal.funcs = FunctionTable; /* rsc->mHal.drv = (RsHal *)calloc(1, sizeof(RsHal)); RsHal *dc = (RsHal *)calloc(1, sizeof(RsHal)); if (!rsc->mHal.drv) { return false; } */ rsc->mHal.drv = dc; int cpu = sysconf(_SC_NPROCESSORS_ONLN); LOGV("RS Launching thread(s), reported CPU count %i", cpu); if (cpu < 2) cpu = 0; dc->mWorkers.mCount = (uint32_t)cpu; dc->mWorkers.mThreadId = (pthread_t *) calloc(dc->mWorkers.mCount, sizeof(pthread_t)); dc->mWorkers.mNativeThreadId = (pid_t *) calloc(dc->mWorkers.mCount, sizeof(pid_t)); dc->mWorkers.mLaunchSignals = new Signal[dc->mWorkers.mCount]; dc->mWorkers.mLaunchCallback = NULL; dc->mWorkers.mCompleteSignal.init(); android_atomic_release_store(dc->mWorkers.mCount, &dc->mWorkers.mRunningCount); android_atomic_release_store(0, &dc->mWorkers.mLaunchCount); int status; pthread_attr_t threadAttr; status = pthread_attr_init(&threadAttr); if (status) { LOGE("Failed to init thread attribute."); return false; } for (uint32_t ct=0; ct < dc->mWorkers.mCount; ct++) { status = pthread_create(&dc->mWorkers.mThreadId[ct], &threadAttr, HelperThreadProc, rsc); if (status) { dc->mWorkers.mCount = ct; LOGE("Created fewer than expected number of RS threads."); break; } } while (android_atomic_acquire_load(&dc->mWorkers.mRunningCount) != 0) { usleep(100); } pthread_attr_destroy(&threadAttr); return true; } void SetPriority(const Context *rsc, int32_t priority) { RsHal *dc = (RsHal *)rsc->mHal.drv; for (uint32_t ct=0; ct < dc->mWorkers.mCount; ct++) { setpriority(PRIO_PROCESS, dc->mWorkers.mNativeThreadId[ct], priority); } } void Shutdown(Context *rsc) { RsHal *dc = (RsHal *)rsc->mHal.drv; dc->mExit = true; dc->mWorkers.mLaunchData = NULL; dc->mWorkers.mLaunchCallback = NULL; android_atomic_release_store(dc->mWorkers.mCount, &dc->mWorkers.mRunningCount); for (uint32_t ct = 0; ct < dc->mWorkers.mCount; ct++) { dc->mWorkers.mLaunchSignals[ct].set(); } int status; void *res; for (uint32_t ct = 0; ct < dc->mWorkers.mCount; ct++) { status = pthread_join(dc->mWorkers.mThreadId[ct], &res); } rsAssert(android_atomic_acquire_load(&dc->mWorkers.mRunningCount) == 0); }
libs/rs/driver/rsdCore.h +21 −13 Original line number Diff line number Diff line Loading @@ -20,28 +20,36 @@ #include <rs_hal.h> #include <bcc/bcc.h> #include "rsMutex.h" #include "rsSignal.h" typedef void (* InvokeFunc_t)(void); typedef void (*WorkerCallback_t)(void *usr, uint32_t idx); struct RsHalRec { typedef struct RsHalRec { uint32_t version_major; uint32_t version_minor; }; struct RsHalProgramStoreRec { }; struct RsHalProgramRasterRec { }; struct RsHalProgramVertexRec { }; struct RsHalProgramFragmentRec { struct Workers { volatile int mRunningCount; volatile int mLaunchCount; uint32_t mCount; pthread_t *mThreadId; pid_t *mNativeThreadId; android::renderscript::Signal mCompleteSignal; android::renderscript::Signal *mLaunchSignals; WorkerCallback_t mLaunchCallback; void *mLaunchData; }; Workers mWorkers; bool mExit; } RsHal; void rsdLaunchThreads(android::renderscript::Context *rsc, WorkerCallback_t cbk, void *data); #endif
libs/rs/rsContext.cpp +4 −86 Original line number Diff line number Diff line Loading @@ -554,56 +554,6 @@ void Context::destroyWorkerThreadResources() { mExit = true; } void * Context::helperThreadProc(void *vrsc) { Context *rsc = static_cast<Context *>(vrsc); uint32_t idx = (uint32_t)android_atomic_inc(&rsc->mWorkers.mLaunchCount); //LOGV("RS helperThread starting %p idx=%i", rsc, idx); rsc->mWorkers.mLaunchSignals[idx].init(); rsc->mWorkers.mNativeThreadId[idx] = gettid(); #if 0 typedef struct {uint64_t bits[1024 / 64]; } cpu_set_t; cpu_set_t cpuset; memset(&cpuset, 0, sizeof(cpuset)); cpuset.bits[idx / 64] |= 1ULL << (idx % 64); int ret = syscall(241, rsc->mWorkers.mNativeThreadId[idx], sizeof(cpuset), &cpuset); LOGE("SETAFFINITY ret = %i %s", ret, EGLUtils::strerror(ret)); #endif setpriority(PRIO_PROCESS, rsc->mWorkers.mNativeThreadId[idx], rsc->mThreadPriority); int status = pthread_setspecific(rsc->gThreadTLSKey, rsc->mTlsStruct); if (status) { LOGE("pthread_setspecific %i", status); } while (!rsc->mExit) { rsc->mWorkers.mLaunchSignals[idx].wait(); if (rsc->mWorkers.mLaunchCallback) { rsc->mWorkers.mLaunchCallback(rsc->mWorkers.mLaunchData, idx); } android_atomic_dec(&rsc->mWorkers.mRunningCount); rsc->mWorkers.mCompleteSignal.set(); } //LOGV("RS helperThread exited %p idx=%i", rsc, idx); return NULL; } void Context::launchThreads(WorkerCallback_t cbk, void *data) { mWorkers.mLaunchData = data; mWorkers.mLaunchCallback = cbk; android_atomic_release_store(mWorkers.mCount, &mWorkers.mRunningCount); for (uint32_t ct = 0; ct < mWorkers.mCount; ct++) { mWorkers.mLaunchSignals[ct].set(); } while (android_atomic_acquire_load(&mWorkers.mRunningCount) != 0) { mWorkers.mCompleteSignal.wait(); } } void Context::setPriority(int32_t p) { // Note: If we put this in the proper "background" policy // the wallpapers can become completly unresponsive at times. Loading @@ -620,9 +570,6 @@ void Context::setPriority(int32_t p) { } #else setpriority(PRIO_PROCESS, mNativeThreadId, p); for (uint32_t ct=0; ct < mWorkers.mCount; ct++) { setpriority(PRIO_PROCESS, mWorkers.mNativeThreadId[ct], p); } #endif } Loading Loading @@ -691,16 +638,8 @@ bool Context::initContext(Device *dev, const RsSurfaceConfig *sc) { if (!rsdHalInit(this, 0, 0)) { return false; } mHal.funcs.setPriority(this, mThreadPriority); int cpu = sysconf(_SC_NPROCESSORS_ONLN); LOGV("RS Launching thread(s), reported CPU count %i", cpu); if (cpu < 2) cpu = 0; mWorkers.mCount = (uint32_t)cpu; mWorkers.mThreadId = (pthread_t *) calloc(mWorkers.mCount, sizeof(pthread_t)); mWorkers.mNativeThreadId = (pid_t *) calloc(mWorkers.mCount, sizeof(pid_t)); mWorkers.mLaunchSignals = new Signal[mWorkers.mCount]; mWorkers.mLaunchCallback = NULL; status = pthread_create(&mThreadId, &threadAttr, threadProc, this); if (status) { LOGE("Failed to start rs context thread."); Loading @@ -714,20 +653,6 @@ bool Context::initContext(Device *dev, const RsSurfaceConfig *sc) { return false; } mWorkers.mCompleteSignal.init(); android_atomic_release_store(mWorkers.mCount, &mWorkers.mRunningCount); android_atomic_release_store(0, &mWorkers.mLaunchCount); for (uint32_t ct=0; ct < mWorkers.mCount; ct++) { status = pthread_create(&mWorkers.mThreadId[ct], &threadAttr, helperThreadProc, this); if (status) { mWorkers.mCount = ct; LOGE("Created fewer than expected number of RS threads."); break; } } while (android_atomic_acquire_load(&mWorkers.mRunningCount) != 0) { usleep(100); } pthread_attr_destroy(&threadAttr); return true; } Loading @@ -744,17 +669,10 @@ Context::~Context() { mIO.shutdown(); int status = pthread_join(mThreadId, &res); // Cleanup compute threads. mWorkers.mLaunchData = NULL; mWorkers.mLaunchCallback = NULL; android_atomic_release_store(mWorkers.mCount, &mWorkers.mRunningCount); for (uint32_t ct = 0; ct < mWorkers.mCount; ct++) { mWorkers.mLaunchSignals[ct].set(); } for (uint32_t ct = 0; ct < mWorkers.mCount; ct++) { status = pthread_join(mWorkers.mThreadId[ct], &res); if (mHal.funcs.shutdownDriver) { mHal.funcs.shutdownDriver(this); } rsAssert(android_atomic_acquire_load(&mWorkers.mRunningCount) == 0); // Global structure cleanup. pthread_mutex_lock(&gInitMutex); Loading