Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit ead6d37a authored by Xusong Wang's avatar Xusong Wang
Browse files

Introduce reusable burst to canonical interface -- HAL.

This CL modifies the canonical interface for reusable burst executions:
- Add new method IBurst::createExecution

The reusable burst execution will not fallback to another execution path
if sending request packet fails. The behavior of single-time burst
execution remains unchanged.

Additionally, this CL enables pointer -> shared memory conversion in
1.2/1.3 burst implementation.

Bug: 184073769
Test: NNT_static
Test: neuralnetworks_utils_hal_1_0_test
Test: neuralnetworks_utils_hal_1_1_test
Test: neuralnetworks_utils_hal_1_2_test
Test: neuralnetworks_utils_hal_1_3_test
Test: neuralnetworks_utils_hal_common_test
Change-Id: Iaac81668d247c2cb76d70e6abbd10f00b397b19f
parent 727a7b21
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
@@ -48,6 +48,10 @@ class Burst final : public nn::IBurst {
            const nn::OptionalTimePoint& deadline,
            const nn::OptionalDuration& loopTimeoutDuration) const override;

    nn::GeneralResult<nn::SharedExecution> createReusableExecution(
            const nn::Request& request, nn::MeasureTiming measure,
            const nn::OptionalDuration& loopTimeoutDuration) const override;

  private:
    const nn::SharedPreparedModel kPreparedModel;
};
+6 −0
Original line number Diff line number Diff line
@@ -55,4 +55,10 @@ nn::ExecutionResult<std::pair<std::vector<nn::OutputShape>, nn::Timing>> Burst::
    return kPreparedModel->execute(request, measure, deadline, loopTimeoutDuration);
}

nn::GeneralResult<nn::SharedExecution> Burst::createReusableExecution(
        const nn::Request& request, nn::MeasureTiming measure,
        const nn::OptionalDuration& loopTimeoutDuration) const {
    return kPreparedModel->createReusableExecution(request, measure, loopTimeoutDuration);
}

}  // namespace android::hardware::neuralnetworks::V1_0::utils
+22 −8
Original line number Diff line number Diff line
@@ -28,9 +28,11 @@
#include <fmq/MessageQueue.h>
#include <hidl/MQDescriptor.h>
#include <nnapi/IBurst.h>
#include <nnapi/IExecution.h>
#include <nnapi/IPreparedModel.h>
#include <nnapi/Result.h>
#include <nnapi/Types.h>
#include <nnapi/hal/CommonUtils.h>
#include <nnapi/hal/ProtectCallback.h>

#include <atomic>
@@ -51,14 +53,14 @@ namespace android::hardware::neuralnetworks::V1_2::utils {
 * across FMQ, making it appear to the runtime as a regular synchronous inference. Additionally,
 * this class manages the burst's memory cache.
 */
class ExecutionBurstController final : public nn::IBurst {
class ExecutionBurstController final
    : public nn::IBurst,
      public std::enable_shared_from_this<ExecutionBurstController> {
    struct PrivateConstructorTag {};

  public:
    using FallbackFunction =
            std::function<nn::ExecutionResult<std::pair<std::vector<nn::OutputShape>, nn::Timing>>(
                    const nn::Request&, nn::MeasureTiming, const nn::OptionalTimePoint&,
                    const nn::OptionalDuration&)>;
    using FallbackFunction = std::function<
            nn::ExecutionResult<std::pair<std::vector<nn::OutputShape>, nn::Timing>>()>;

    /**
     * NN runtime memory cache.
@@ -154,10 +156,10 @@ class ExecutionBurstController final : public nn::IBurst {
     * @return ExecutionBurstController Execution burst controller object.
     */
    static nn::GeneralResult<std::shared_ptr<const ExecutionBurstController>> create(
            const sp<IPreparedModel>& preparedModel, FallbackFunction fallback,
            nn::SharedPreparedModel preparedModel, const sp<IPreparedModel>& hidlPreparedModel,
            std::chrono::microseconds pollingTimeWindow);

    ExecutionBurstController(PrivateConstructorTag tag, FallbackFunction fallback,
    ExecutionBurstController(PrivateConstructorTag tag, nn::SharedPreparedModel preparedModel,
                             std::unique_ptr<RequestChannelSender> requestChannelSender,
                             std::unique_ptr<ResultChannelReceiver> resultChannelReceiver,
                             sp<ExecutionBurstCallback> callback, sp<IBurstContext> burstContext,
@@ -173,9 +175,21 @@ class ExecutionBurstController final : public nn::IBurst {
            const nn::OptionalTimePoint& deadline,
            const nn::OptionalDuration& loopTimeoutDuration) const override;

    // See IBurst::createReusableExecution for information on this method.
    nn::GeneralResult<nn::SharedExecution> createReusableExecution(
            const nn::Request& request, nn::MeasureTiming measure,
            const nn::OptionalDuration& loopTimeoutDuration) const override;

    // If fallback is not nullptr, this method will invoke the fallback function to try another
    // execution path if the packet could not be sent. Otherwise, failing to send the packet will
    // result in an error.
    nn::ExecutionResult<std::pair<std::vector<nn::OutputShape>, nn::Timing>> executeInternal(
            const std::vector<FmqRequestDatum>& requestPacket,
            const hal::utils::RequestRelocation& relocation, FallbackFunction fallback) const;

  private:
    mutable std::atomic_flag mExecutionInFlight = ATOMIC_FLAG_INIT;
    const FallbackFunction kFallback;
    const nn::SharedPreparedModel kPreparedModel;
    const std::unique_ptr<RequestChannelSender> mRequestChannelSender;
    const std::unique_ptr<ResultChannelReceiver> mResultChannelReceiver;
    const sp<ExecutionBurstCallback> mBurstCallback;
+158 −24
Original line number Diff line number Diff line
@@ -28,6 +28,7 @@
#include <nnapi/Types.h>
#include <nnapi/Validation.h>
#include <nnapi/hal/1.0/Conversions.h>
#include <nnapi/hal/CommonUtils.h>
#include <nnapi/hal/HandleError.h>
#include <nnapi/hal/ProtectCallback.h>
#include <nnapi/hal/TransferValue.h>
@@ -50,6 +51,35 @@
namespace android::hardware::neuralnetworks::V1_2::utils {
namespace {

class BurstExecution final : public nn::IExecution,
                             public std::enable_shared_from_this<BurstExecution> {
    struct PrivateConstructorTag {};

  public:
    static nn::GeneralResult<std::shared_ptr<const BurstExecution>> create(
            std::shared_ptr<const ExecutionBurstController> controller,
            std::vector<FmqRequestDatum> request, hal::utils::RequestRelocation relocation,
            std::vector<ExecutionBurstController::OptionalCacheHold> cacheHolds);

    BurstExecution(PrivateConstructorTag tag,
                   std::shared_ptr<const ExecutionBurstController> controller,
                   std::vector<FmqRequestDatum> request, hal::utils::RequestRelocation relocation,
                   std::vector<ExecutionBurstController::OptionalCacheHold> cacheHolds);

    nn::ExecutionResult<std::pair<std::vector<nn::OutputShape>, nn::Timing>> compute(
            const nn::OptionalTimePoint& deadline) const override;

    nn::GeneralResult<std::pair<nn::SyncFence, nn::ExecuteFencedInfoCallback>> computeFenced(
            const std::vector<nn::SyncFence>& waitFor, const nn::OptionalTimePoint& deadline,
            const nn::OptionalDuration& timeoutDurationAfterFence) const override;

  private:
    const std::shared_ptr<const ExecutionBurstController> kController;
    const std::vector<FmqRequestDatum> kRequest;
    const hal::utils::RequestRelocation kRelocation;
    const std::vector<ExecutionBurstController::OptionalCacheHold> kCacheHolds;
};

nn::GeneralResult<sp<IBurstContext>> executionBurstResultCallback(
        V1_0::ErrorStatus status, const sp<IBurstContext>& burstContext) {
    HANDLE_HAL_STATUS(status) << "IPreparedModel::configureExecutionBurst failed with status "
@@ -209,10 +239,10 @@ Return<void> ExecutionBurstController::ExecutionBurstCallback::getMemories(
// ExecutionBurstController methods

nn::GeneralResult<std::shared_ptr<const ExecutionBurstController>> ExecutionBurstController::create(
        const sp<V1_2::IPreparedModel>& preparedModel, FallbackFunction fallback,
        nn::SharedPreparedModel preparedModel, const sp<V1_2::IPreparedModel>& hidlPreparedModel,
        std::chrono::microseconds pollingTimeWindow) {
    // check inputs
    if (preparedModel == nullptr) {
    if (preparedModel == nullptr || hidlPreparedModel == nullptr) {
        return NN_ERROR() << "ExecutionBurstController::create passed a nullptr";
    }

@@ -236,7 +266,7 @@ nn::GeneralResult<std::shared_ptr<const ExecutionBurstController>> ExecutionBurs
    auto cb = hal::utils::CallbackValue(executionBurstResultCallback);

    // configure burst
    const Return<void> ret = preparedModel->configureExecutionBurst(
    const Return<void> ret = hidlPreparedModel->configureExecutionBurst(
            burstCallback, *requestChannelDescriptor, *resultChannelDescriptor, cb);
    HANDLE_TRANSPORT_FAILURE(ret);

@@ -250,18 +280,18 @@ nn::GeneralResult<std::shared_ptr<const ExecutionBurstController>> ExecutionBurs

    // make and return controller
    return std::make_shared<const ExecutionBurstController>(
            PrivateConstructorTag{}, std::move(fallback), std::move(requestChannelSender),
            PrivateConstructorTag{}, std::move(preparedModel), std::move(requestChannelSender),
            std::move(resultChannelReceiver), std::move(burstCallback), std::move(burstContext),
            std::move(memoryCache), std::move(deathHandler));
}

ExecutionBurstController::ExecutionBurstController(
        PrivateConstructorTag /*tag*/, FallbackFunction fallback,
        PrivateConstructorTag /*tag*/, nn::SharedPreparedModel preparedModel,
        std::unique_ptr<RequestChannelSender> requestChannelSender,
        std::unique_ptr<ResultChannelReceiver> resultChannelReceiver,
        sp<ExecutionBurstCallback> callback, sp<IBurstContext> burstContext,
        std::shared_ptr<MemoryCache> memoryCache, neuralnetworks::utils::DeathHandler deathHandler)
    : kFallback(std::move(fallback)),
    : kPreparedModel(std::move(preparedModel)),
      mRequestChannelSender(std::move(requestChannelSender)),
      mResultChannelReceiver(std::move(resultChannelReceiver)),
      mBurstCallback(std::move(callback)),
@@ -283,49 +313,113 @@ ExecutionBurstController::execute(const nn::Request& request, nn::MeasureTiming
    // systraces. Note that the first point we can begin collecting systraces in
    // ExecutionBurstServer is when the RequestChannelReceiver realizes there is data in the FMQ, so
    // ExecutionBurstServer collects systraces at different points in the code.
    NNTRACE_FULL(NNTRACE_LAYER_IPC, NNTRACE_PHASE_EXECUTION, "ExecutionBurstController::execute");
    NNTRACE_RT(NNTRACE_PHASE_EXECUTION, "ExecutionBurstController::execute");

    // if the request is valid but of a higher version than what's supported in burst execution,
    // fall back to another execution path
    if (const auto version = NN_TRY(hal::utils::makeExecutionFailure(nn::validate(request)));
        version > nn::Version::ANDROID_Q) {
        // fallback to another execution path if the packet could not be sent
        if (kFallback) {
            return kFallback(request, measure, deadline, loopTimeoutDuration);
        }
        return NN_ERROR() << "Request object has features not supported by IBurst::execute";
        return kPreparedModel->execute(request, measure, deadline, loopTimeoutDuration);
    }

    // ensure that request is ready for IPC
    std::optional<nn::Request> maybeRequestInShared;
    hal::utils::RequestRelocation relocation;
    const nn::Request& requestInShared =
            NN_TRY(hal::utils::makeExecutionFailure(hal::utils::convertRequestFromPointerToShared(
                    &request, &maybeRequestInShared, &relocation)));

    // clear pools field of request, as they will be provided via slots
    const auto requestWithoutPools =
            nn::Request{.inputs = request.inputs, .outputs = request.outputs, .pools = {}};
    const auto requestWithoutPools = nn::Request{
            .inputs = requestInShared.inputs, .outputs = requestInShared.outputs, .pools = {}};
    auto hidlRequest = NN_TRY(
            hal::utils::makeExecutionFailure(V1_0::utils::unvalidatedConvert(requestWithoutPools)));
    const auto hidlMeasure = NN_TRY(hal::utils::makeExecutionFailure(convert(measure)));

    // Ensure that at most one execution is in flight at any given time.
    const bool alreadyInFlight = mExecutionInFlight.test_and_set();
    if (alreadyInFlight) {
        return NN_ERROR() << "IBurst already has an execution in flight";
    std::vector<int32_t> slots;
    std::vector<OptionalCacheHold> holds;
    slots.reserve(requestInShared.pools.size());
    holds.reserve(requestInShared.pools.size());
    for (const auto& memoryPool : requestInShared.pools) {
        auto [slot, hold] = mMemoryCache->cacheMemory(std::get<nn::SharedMemory>(memoryPool));
        slots.push_back(slot);
        holds.push_back(std::move(hold));
    }
    const auto guard = base::make_scope_guard([this] { mExecutionInFlight.clear(); });

    // send request packet
    const auto requestPacket = serialize(hidlRequest, hidlMeasure, slots);
    const auto fallback = [this, &request, measure, &deadline, &loopTimeoutDuration] {
        return kPreparedModel->execute(request, measure, deadline, loopTimeoutDuration);
    };
    return executeInternal(requestPacket, relocation, fallback);
}

// See IBurst::createReusableExecution for information on this method.
nn::GeneralResult<nn::SharedExecution> ExecutionBurstController::createReusableExecution(
        const nn::Request& request, nn::MeasureTiming measure,
        const nn::OptionalDuration& loopTimeoutDuration) const {
    NNTRACE_RT(NNTRACE_PHASE_EXECUTION, "ExecutionBurstController::createReusableExecution");

    // if the request is valid but of a higher version than what's supported in burst execution,
    // fall back to another execution path
    if (const auto version = NN_TRY(hal::utils::makeGeneralFailure(nn::validate(request)));
        version > nn::Version::ANDROID_Q) {
        // fallback to another execution path if the packet could not be sent
        return kPreparedModel->createReusableExecution(request, measure, loopTimeoutDuration);
    }

    // ensure that request is ready for IPC
    std::optional<nn::Request> maybeRequestInShared;
    hal::utils::RequestRelocation relocation;
    const nn::Request& requestInShared = NN_TRY(hal::utils::convertRequestFromPointerToShared(
            &request, &maybeRequestInShared, &relocation));

    // clear pools field of request, as they will be provided via slots
    const auto requestWithoutPools = nn::Request{
            .inputs = requestInShared.inputs, .outputs = requestInShared.outputs, .pools = {}};
    auto hidlRequest = NN_TRY(V1_0::utils::unvalidatedConvert(requestWithoutPools));
    const auto hidlMeasure = NN_TRY(convert(measure));

    std::vector<int32_t> slots;
    std::vector<OptionalCacheHold> holds;
    slots.reserve(request.pools.size());
    holds.reserve(request.pools.size());
    for (const auto& memoryPool : request.pools) {
    slots.reserve(requestInShared.pools.size());
    holds.reserve(requestInShared.pools.size());
    for (const auto& memoryPool : requestInShared.pools) {
        auto [slot, hold] = mMemoryCache->cacheMemory(std::get<nn::SharedMemory>(memoryPool));
        slots.push_back(slot);
        holds.push_back(std::move(hold));
    }

    const auto requestPacket = serialize(hidlRequest, hidlMeasure, slots);
    return BurstExecution::create(shared_from_this(), std::move(requestPacket),
                                  std::move(relocation), std::move(holds));
}

nn::ExecutionResult<std::pair<std::vector<nn::OutputShape>, nn::Timing>>
ExecutionBurstController::executeInternal(const std::vector<FmqRequestDatum>& requestPacket,
                                          const hal::utils::RequestRelocation& relocation,
                                          FallbackFunction fallback) const {
    NNTRACE_FULL(NNTRACE_LAYER_IPC, NNTRACE_PHASE_EXECUTION,
                 "ExecutionBurstController::executeInternal");

    // Ensure that at most one execution is in flight at any given time.
    const bool alreadyInFlight = mExecutionInFlight.test_and_set();
    if (alreadyInFlight) {
        return NN_ERROR() << "IBurst already has an execution in flight";
    }
    const auto guard = base::make_scope_guard([this] { mExecutionInFlight.clear(); });

    if (relocation.input) {
        relocation.input->flush();
    }

    // send request packet
    const auto sendStatus = mRequestChannelSender->send(hidlRequest, hidlMeasure, slots);
    const auto sendStatus = mRequestChannelSender->sendPacket(requestPacket);
    if (!sendStatus.ok()) {
        // fallback to another execution path if the packet could not be sent
        if (kFallback) {
            return kFallback(request, measure, deadline, loopTimeoutDuration);
        if (fallback) {
            return fallback();
        }
        return NN_ERROR() << "Error sending FMQ packet: " << sendStatus.error();
    }
@@ -333,7 +427,47 @@ ExecutionBurstController::execute(const nn::Request& request, nn::MeasureTiming
    // get result packet
    const auto [status, outputShapes, timing] =
            NN_TRY(hal::utils::makeExecutionFailure(mResultChannelReceiver->getBlocking()));

    if (relocation.output) {
        relocation.output->flush();
    }
    return executionCallback(status, outputShapes, timing);
}

nn::GeneralResult<std::shared_ptr<const BurstExecution>> BurstExecution::create(
        std::shared_ptr<const ExecutionBurstController> controller,
        std::vector<FmqRequestDatum> request, hal::utils::RequestRelocation relocation,
        std::vector<ExecutionBurstController::OptionalCacheHold> cacheHolds) {
    if (controller == nullptr) {
        return NN_ERROR() << "V1_2::utils::BurstExecution::create must have non-null controller";
    }

    return std::make_shared<const BurstExecution>(PrivateConstructorTag{}, std::move(controller),
                                                  std::move(request), std::move(relocation),
                                                  std::move(cacheHolds));
}

BurstExecution::BurstExecution(PrivateConstructorTag /*tag*/,
                               std::shared_ptr<const ExecutionBurstController> controller,
                               std::vector<FmqRequestDatum> request,
                               hal::utils::RequestRelocation relocation,
                               std::vector<ExecutionBurstController::OptionalCacheHold> cacheHolds)
    : kController(std::move(controller)),
      kRequest(std::move(request)),
      kRelocation(std::move(relocation)),
      kCacheHolds(std::move(cacheHolds)) {}

nn::ExecutionResult<std::pair<std::vector<nn::OutputShape>, nn::Timing>> BurstExecution::compute(
        const nn::OptionalTimePoint& /*deadline*/) const {
    return kController->executeInternal(kRequest, kRelocation, /*fallback=*/nullptr);
}

nn::GeneralResult<std::pair<nn::SyncFence, nn::ExecuteFencedInfoCallback>>
BurstExecution::computeFenced(const std::vector<nn::SyncFence>& /*waitFor*/,
                              const nn::OptionalTimePoint& /*deadline*/,
                              const nn::OptionalDuration& /*timeoutDurationAfterFence*/) const {
    return NN_ERROR(nn::ErrorStatus::GENERAL_FAILURE)
           << "IExecution::computeFenced is not supported on burst object";
}

}  // namespace android::hardware::neuralnetworks::V1_2::utils
+1 −1
Original line number Diff line number Diff line
@@ -158,7 +158,7 @@ nn::GeneralResult<nn::SharedBurst> PreparedModel::configureExecutionBurst() cons
        return preparedModel->execute(request, measure, deadline, loopTimeoutDuration);
    };
    const auto pollingTimeWindow = getBurstControllerPollingTimeWindow();
    return ExecutionBurstController::create(kPreparedModel, std::move(fallback), pollingTimeWindow);
    return ExecutionBurstController::create(shared_from_this(), kPreparedModel, pollingTimeWindow);
}

std::any PreparedModel::getUnderlyingResource() const {
Loading