Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit a4d3cbef authored by Wonsik Kim's avatar Wonsik Kim Committed by android-build-merger
Browse files

Merge "vpxdec: parallel execution of 10bit format conversion" into qt-dev

am: 86bd6afc

Change-Id: I18407ad96675f00e60aa54265a4663b1815d32c7
parents 303a2d32 86bd6afc
Loading
Loading
Loading
Loading
+88 −9
Original line number Diff line number Diff line
@@ -18,6 +18,8 @@
#define LOG_TAG "C2SoftVpxDec"
#include <log/log.h>

#include <algorithm>

#include <media/stagefright/foundation/AUtils.h>
#include <media/stagefright/foundation/MediaDefs.h>

@@ -303,13 +305,43 @@ private:
#endif
};

C2SoftVpxDec::ConverterThread::ConverterThread(
        const std::shared_ptr<Mutexed<ConversionQueue>> &queue)
    : Thread(false), mQueue(queue) {}

bool C2SoftVpxDec::ConverterThread::threadLoop() {
    Mutexed<ConversionQueue>::Locked queue(*mQueue);
    if (queue->entries.empty()) {
        queue.waitForCondition(queue->cond);
        if (queue->entries.empty()) {
            return true;
        }
    }
    std::function<void()> convert = queue->entries.front();
    queue->entries.pop_front();
    if (!queue->entries.empty()) {
        queue->cond.signal();
    }
    queue.unlock();

    convert();

    queue.lock();
    if (--queue->numPending == 0u) {
        queue->cond.broadcast();
    }
    return true;
}

C2SoftVpxDec::C2SoftVpxDec(
        const char *name,
        c2_node_id_t id,
        const std::shared_ptr<IntfImpl> &intfImpl)
    : SimpleC2Component(std::make_shared<SimpleInterface<IntfImpl>>(name, id, intfImpl)),
      mIntf(intfImpl),
      mCodecCtx(nullptr) {
      mCodecCtx(nullptr),
      mCoreCount(1),
      mQueue(new Mutexed<ConversionQueue>) {
}

C2SoftVpxDec::~C2SoftVpxDec() {
@@ -399,7 +431,7 @@ status_t C2SoftVpxDec::initDecoder() {

    vpx_codec_dec_cfg_t cfg;
    memset(&cfg, 0, sizeof(vpx_codec_dec_cfg_t));
    cfg.threads = GetCPUCoreCount();
    cfg.threads = mCoreCount = GetCPUCoreCount();

    vpx_codec_flags_t flags;
    memset(&flags, 0, sizeof(vpx_codec_flags_t));
@@ -413,6 +445,18 @@ status_t C2SoftVpxDec::initDecoder() {
        return UNKNOWN_ERROR;
    }

    if (mMode == MODE_VP9) {
        using namespace std::string_literals;
        for (int i = 0; i < mCoreCount; ++i) {
            sp<ConverterThread> thread(new ConverterThread(mQueue));
            mConverterThreads.push_back(thread);
            if (thread->run(("vp9conv #"s + std::to_string(i)).c_str(),
                            ANDROID_PRIORITY_AUDIO) != OK) {
                return UNKNOWN_ERROR;
            }
        }
    }

    return OK;
}

@@ -422,6 +466,21 @@ status_t C2SoftVpxDec::destroyDecoder() {
        delete mCodecCtx;
        mCodecCtx = nullptr;
    }
    bool running = true;
    for (const sp<ConverterThread> &thread : mConverterThreads) {
        thread->requestExit();
    }
    while (running) {
        mQueue->lock()->cond.broadcast();
        running = false;
        for (const sp<ConverterThread> &thread : mConverterThreads) {
            if (thread->isRunning()) {
                running = true;
                break;
            }
        }
    }
    mConverterThreads.clear();

    return OK;
}
@@ -759,10 +818,30 @@ bool C2SoftVpxDec::outputBuffer(
        const uint16_t *srcV = (const uint16_t *)img->planes[VPX_PLANE_V];

        if (format == HAL_PIXEL_FORMAT_RGBA_1010102) {
            convertYUV420Planar16ToY410((uint32_t *)dst, srcY, srcU, srcV, srcYStride / 2,
                                    srcUStride / 2, srcVStride / 2,
                                    dstYStride / sizeof(uint32_t),
                                    mWidth, mHeight);
            Mutexed<ConversionQueue>::Locked queue(*mQueue);
            size_t i = 0;
            constexpr size_t kHeight = 64;
            for (; i < mHeight; i += kHeight) {
                queue->entries.push_back(
                        [dst, srcY, srcU, srcV,
                         srcYStride, srcUStride, srcVStride, dstYStride,
                         width = mWidth, height = std::min(mHeight - i, kHeight)] {
                            convertYUV420Planar16ToY410(
                                    (uint32_t *)dst, srcY, srcU, srcV, srcYStride / 2,
                                    srcUStride / 2, srcVStride / 2, dstYStride / sizeof(uint32_t),
                                    width, height);
                        });
                srcY += srcYStride / 2 * kHeight;
                srcU += srcUStride / 2 * (kHeight / 2);
                srcV += srcVStride / 2 * (kHeight / 2);
                dst += dstYStride * kHeight;
            }
            CHECK_EQ(0u, queue->numPending);
            queue->numPending = queue->entries.size();
            while (queue->numPending > 0) {
                queue->cond.signal();
                queue.waitForCondition(queue->cond);
            }
        } else {
            convertYUV420Planar16ToYUV420Planar(dst, srcY, srcU, srcV, srcYStride / 2,
                                                srcUStride / 2, srcVStride / 2,
+22 −0
Original line number Diff line number Diff line
@@ -50,6 +50,19 @@ struct C2SoftVpxDec : public SimpleC2Component {
        MODE_VP9,
    } mMode;

    struct ConversionQueue;

    class ConverterThread : public Thread {
    public:
        explicit ConverterThread(
                const std::shared_ptr<Mutexed<ConversionQueue>> &queue);
        ~ConverterThread() override = default;
        bool threadLoop() override;

    private:
        std::shared_ptr<Mutexed<ConversionQueue>> mQueue;
    };

    std::shared_ptr<IntfImpl> mIntf;
    vpx_codec_ctx_t *mCodecCtx;
    bool mFrameParallelMode;  // Frame parallel is only supported by VP9 decoder.
@@ -59,6 +72,15 @@ struct C2SoftVpxDec : public SimpleC2Component {
    bool mSignalledOutputEos;
    bool mSignalledError;

    int mCoreCount;
    struct ConversionQueue {
        std::list<std::function<void()>> entries;
        Condition cond;
        size_t numPending{0u};
    };
    std::shared_ptr<Mutexed<ConversionQueue>> mQueue;
    std::vector<sp<ConverterThread>> mConverterThreads;

    status_t initDecoder();
    status_t destroyDecoder();
    void finishWork(uint64_t index, const std::unique_ptr<C2Work> &work,