Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 8a8ff0f3 authored by Michael Butler's avatar Michael Butler Committed by Android (Google) Code Review
Browse files

Merge "Implement full canonical Burst in NN util code"

parents 4ef12fea acff4063
Loading
Loading
Loading
Loading
+6 −1
Original line number Diff line number Diff line
@@ -27,7 +27,6 @@ cc_library_static {
    name: "neuralnetworks_utils_hal_1_2",
    defaults: ["neuralnetworks_utils_defaults"],
    srcs: ["src/*"],
    exclude_srcs: ["src/ExecutionBurst*"],
    local_include_dirs: ["include/nnapi/hal/1.2/"],
    export_include_dirs: ["include"],
    cflags: ["-Wthread-safety"],
@@ -41,10 +40,16 @@ cc_library_static {
        "android.hardware.neuralnetworks@1.0",
        "android.hardware.neuralnetworks@1.1",
        "android.hardware.neuralnetworks@1.2",
        "libfmq",
    ],
    export_static_lib_headers: [
        "neuralnetworks_utils_hal_common",
    ],
    product_variables: {
        debuggable: { // eng and userdebug builds
            cflags: ["-DNN_DEBUGGABLE"],
        },
    },
}

cc_test {
+1 −0
Original line number Diff line number Diff line
@@ -52,6 +52,7 @@ GeneralResult<Capabilities> convert(const hal::V1_2::Capabilities& capabilities)
GeneralResult<Model> convert(const hal::V1_2::Model& model);
GeneralResult<MeasureTiming> convert(const hal::V1_2::MeasureTiming& measureTiming);
GeneralResult<Timing> convert(const hal::V1_2::Timing& timing);
GeneralResult<SharedMemory> convert(const hardware::hidl_memory& memory);

GeneralResult<std::vector<Extension>> convert(
        const hardware::hidl_vec<hal::V1_2::Extension>& extensions);
+113 −110
Original line number Diff line number Diff line
@@ -14,23 +14,28 @@
 * limitations under the License.
 */

#ifndef ANDROID_FRAMEWORKS_ML_NN_COMMON_EXECUTION_BURST_CONTROLLER_H
#define ANDROID_FRAMEWORKS_ML_NN_COMMON_EXECUTION_BURST_CONTROLLER_H
#ifndef ANDROID_HARDWARE_INTERFACES_NEURALNETWORKS_1_2_UTILS_EXECUTION_BURST_CONTROLLER_H
#define ANDROID_HARDWARE_INTERFACES_NEURALNETWORKS_1_2_UTILS_EXECUTION_BURST_CONTROLLER_H

#include "ExecutionBurstUtils.h"

#include <android-base/macros.h>
#include <android-base/thread_annotations.h>
#include <android/hardware/neuralnetworks/1.0/types.h>
#include <android/hardware/neuralnetworks/1.1/types.h>
#include <android/hardware/neuralnetworks/1.2/IBurstCallback.h>
#include <android/hardware/neuralnetworks/1.2/IBurstContext.h>
#include <android/hardware/neuralnetworks/1.2/IPreparedModel.h>
#include <android/hardware/neuralnetworks/1.2/types.h>
#include <fmq/MessageQueue.h>
#include <hidl/MQDescriptor.h>
#include <nnapi/IBurst.h>
#include <nnapi/IPreparedModel.h>
#include <nnapi/Result.h>
#include <nnapi/Types.h>
#include <nnapi/hal/ProtectCallback.h>

#include <atomic>
#include <chrono>
#include <functional>
#include <map>
#include <memory>
#include <mutex>
@@ -39,147 +44,145 @@
#include <utility>
#include <vector>

namespace android::nn {
namespace android::hardware::neuralnetworks::V1_2::utils {

/**
 * The ExecutionBurstController class manages both the serialization and
 * deserialization of data across FMQ, making it appear to the runtime as a
 * regular synchronous inference. Additionally, this class manages the burst's
 * memory cache.
 * The ExecutionBurstController class manages both the serialization and deserialization of data
 * across FMQ, making it appear to the runtime as a regular synchronous inference. Additionally,
 * this class manages the burst's memory cache.
 */
class ExecutionBurstController {
    DISALLOW_IMPLICIT_CONSTRUCTORS(ExecutionBurstController);
class ExecutionBurstController final : public nn::IBurst {
    struct PrivateConstructorTag {};

  public:
    using FallbackFunction =
            std::function<nn::ExecutionResult<std::pair<std::vector<nn::OutputShape>, nn::Timing>>(
                    const nn::Request&, nn::MeasureTiming)>;

    /**
     * NN runtime burst callback object and memory cache.
     * NN runtime memory cache.
     *
     * MemoryCache associates a Memory object with a slot number to be passed across FMQ. The
     * ExecutionBurstServer can use this callback to retrieve a hidl_memory corresponding to the
     * slot via HIDL.
     *
     * ExecutionBurstCallback associates a hidl_memory object with a slot number
     * to be passed across FMQ. The ExecutionBurstServer can use this callback
     * to retrieve this hidl_memory corresponding to the slot via HIDL.
     * Whenever a hidl_memory object is copied, it will duplicate the underlying file descriptor.
     * Because the NN runtime currently copies the hidl_memory on each execution, it is difficult to
     * associate hidl_memory objects with previously cached hidl_memory objects. For this reason,
     * callers of this class must pair each hidl_memory object with an associated key. For
     * efficiency, if two hidl_memory objects represent the same underlying buffer, they must use
     * the same key.
     *
     * Whenever a hidl_memory object is copied, it will duplicate the underlying
     * file descriptor. Because the NN runtime currently copies the hidl_memory
     * on each execution, it is difficult to associate hidl_memory objects with
     * previously cached hidl_memory objects. For this reason, callers of this
     * class must pair each hidl_memory object with an associated key. For
     * efficiency, if two hidl_memory objects represent the same underlying
     * buffer, they must use the same key.
     * This class is thread-safe.
     */
    class ExecutionBurstCallback : public hardware::neuralnetworks::V1_2::IBurstCallback {
        DISALLOW_COPY_AND_ASSIGN(ExecutionBurstCallback);
    class MemoryCache : public std::enable_shared_from_this<MemoryCache> {
        struct PrivateConstructorTag {};

      public:
        ExecutionBurstCallback() = default;
        using Task = std::function<void()>;
        using Cleanup = base::ScopeGuard<Task>;
        using SharedCleanup = std::shared_ptr<const Cleanup>;
        using WeakCleanup = std::weak_ptr<const Cleanup>;

        hardware::Return<void> getMemories(const hardware::hidl_vec<int32_t>& slots,
                                           getMemories_cb cb) override;
        // Custom constructor to pre-allocate cache sizes.
        MemoryCache();

        /**
         * This function performs one of two different actions:
         * 1) If a key corresponding to a memory resource is unrecognized by the
         *    ExecutionBurstCallback object, the ExecutionBurstCallback object
         *    will allocate a slot, bind the memory to the slot, and return the
         *    slot identifier.
         * 2) If a key corresponding to a memory resource is recognized by the
         *    ExecutionBurstCallback object, the ExecutionBurstCallback object
         *    will return the existing slot identifier.
         * Add a burst context to the MemoryCache object.
         *
         * @param memories Memory resources used in an inference.
         * @param keys Unique identifiers where each element corresponds to a
         *     memory resource element in "memories".
         * @return Unique slot identifiers where each returned slot element
         *     corresponds to a memory resource element in "memories".
         * If this method is called, it must be called before the MemoryCache::cacheMemory or
         * MemoryCache::getMemory is used.
         *
         * @param burstContext Burst context to be added to the MemoryCache object.
         */
        std::vector<int32_t> getSlots(const hardware::hidl_vec<hardware::hidl_memory>& memories,
                                      const std::vector<intptr_t>& keys);
        void setBurstContext(sp<IBurstContext> burstContext);

        /*
         * This function performs two different actions:
         * 1) Removes an entry from the cache (if present), including the local
         *    storage of the hidl_memory object. Note that this call does not
         *    free any corresponding hidl_memory object in ExecutionBurstServer,
         *    which is separately freed via IBurstContext::freeMemory.
         * 2) Return whether a cache entry was removed and which slot was removed if
         *    found. If the key did not to correspond to any entry in the cache, a
         *    slot number of 0 is returned. The slot number and whether the entry
         *    existed is useful so the same slot can be freed in the
         *    ExecutionBurstServer's cache via IBurstContext::freeMemory.
        /**
         * Cache a memory object in the MemoryCache object.
         *
         * @param memory Memory object to be cached while the returned `SharedCleanup` is alive.
         * @return A pair of (1) a unique identifier for the cache entry and (2) a ref-counted
         *     "hold" object which preserves the cache as long as the hold object is alive.
         */
        std::pair<int32_t, SharedCleanup> cacheMemory(const nn::SharedMemory& memory);

        /**
         * Get the memory object corresponding to a slot identifier.
         *
         * @param slot Slot which identifies the memory object to retrieve.
         * @return The memory object corresponding to slot, otherwise GeneralError.
         */
        std::pair<bool, int32_t> freeMemory(intptr_t key);
        nn::GeneralResult<nn::SharedMemory> getMemory(int32_t slot);

      private:
        int32_t getSlotLocked(const hardware::hidl_memory& memory, intptr_t key);
        int32_t allocateSlotLocked();
        void freeMemory(const nn::SharedMemory& memory);
        int32_t allocateSlotLocked() REQUIRES(mMutex);

        std::mutex mMutex;
        std::stack<int32_t, std::vector<int32_t>> mFreeSlots;
        std::map<intptr_t, int32_t> mMemoryIdToSlot;
        std::vector<hardware::hidl_memory> mMemoryCache;
        std::condition_variable mCond;
        sp<IBurstContext> mBurstContext GUARDED_BY(mMutex);
        std::stack<int32_t, std::vector<int32_t>> mFreeSlots GUARDED_BY(mMutex);
        std::map<nn::SharedMemory, int32_t> mMemoryIdToSlot GUARDED_BY(mMutex);
        std::vector<nn::SharedMemory> mMemoryCache GUARDED_BY(mMutex);
        std::vector<WeakCleanup> mCacheCleaner GUARDED_BY(mMutex);
    };

    /**
     * HIDL Callback class to pass memory objects to the Burst server when given corresponding
     * slots.
     */
    class ExecutionBurstCallback : public IBurstCallback {
      public:
        // Precondition: memoryCache must be non-null.
        explicit ExecutionBurstCallback(const std::shared_ptr<MemoryCache>& memoryCache);

        // See IBurstCallback::getMemories for information on this method.
        Return<void> getMemories(const hidl_vec<int32_t>& slots, getMemories_cb cb) override;

      private:
        const std::weak_ptr<MemoryCache> kMemoryCache;
    };

    /**
     * Creates a burst controller on a prepared model.
     *
     * Prefer this over ExecutionBurstController's constructor.
     *
     * @param preparedModel Model prepared for execution to execute on.
     * @param pollingTimeWindow How much time (in microseconds) the
     *     ExecutionBurstController is allowed to poll the FMQ before waiting on
     *     the blocking futex. Polling may result in lower latencies at the
     *     potential cost of more power usage.
     * @param pollingTimeWindow How much time (in microseconds) the ExecutionBurstController is
     *     allowed to poll the FMQ before waiting on the blocking futex. Polling may result in lower
     *     latencies at the potential cost of more power usage.
     * @return ExecutionBurstController Execution burst controller object.
     */
    static std::unique_ptr<ExecutionBurstController> create(
            const sp<hardware::neuralnetworks::V1_2::IPreparedModel>& preparedModel,
    static nn::GeneralResult<std::shared_ptr<const ExecutionBurstController>> create(
            const sp<IPreparedModel>& preparedModel, FallbackFunction fallback,
            std::chrono::microseconds pollingTimeWindow);

    // prefer calling ExecutionBurstController::create
    ExecutionBurstController(const std::shared_ptr<RequestChannelSender>& requestChannelSender,
                             const std::shared_ptr<ResultChannelReceiver>& resultChannelReceiver,
                             const sp<hardware::neuralnetworks::V1_2::IBurstContext>& burstContext,
                             const sp<ExecutionBurstCallback>& callback,
                             const sp<hardware::hidl_death_recipient>& deathHandler = nullptr);
    ExecutionBurstController(PrivateConstructorTag tag, FallbackFunction fallback,
                             std::unique_ptr<RequestChannelSender> requestChannelSender,
                             std::unique_ptr<ResultChannelReceiver> resultChannelReceiver,
                             sp<ExecutionBurstCallback> callback, sp<IBurstContext> burstContext,
                             std::shared_ptr<MemoryCache> memoryCache,
                             neuralnetworks::utils::DeathHandler deathHandler);

    // explicit destructor to unregister the death recipient
    ~ExecutionBurstController();
    // See IBurst::cacheMemory for information on this method.
    OptionalCacheHold cacheMemory(const nn::SharedMemory& memory) const override;

    /**
     * Execute a request on a model.
     *
     * @param request Arguments to be executed on a model.
     * @param measure Whether to collect timing measurements, either YES or NO
     * @param memoryIds Identifiers corresponding to each memory object in the
     *     request's pools.
     * @return A tuple of:
     *     - result code of the execution
     *     - dynamic output shapes from the execution
     *     - any execution time measurements of the execution
     *     - whether or not a failed burst execution should be re-run using a
     *       different path (e.g., IPreparedModel::executeSynchronously)
     */
    std::tuple<int, std::vector<hardware::neuralnetworks::V1_2::OutputShape>,
               hardware::neuralnetworks::V1_2::Timing, bool>
    compute(const hardware::neuralnetworks::V1_0::Request& request,
            hardware::neuralnetworks::V1_2::MeasureTiming measure,
            const std::vector<intptr_t>& memoryIds);

    /**
     * Propagate a user's freeing of memory to the service.
     *
     * @param key Key corresponding to the memory object.
     */
    void freeMemory(intptr_t key);
    // See IBurst::execute for information on this method.
    nn::ExecutionResult<std::pair<std::vector<nn::OutputShape>, nn::Timing>> execute(
            const nn::Request& request, nn::MeasureTiming measure) const override;

  private:
    std::mutex mMutex;
    const std::shared_ptr<RequestChannelSender> mRequestChannelSender;
    const std::shared_ptr<ResultChannelReceiver> mResultChannelReceiver;
    const sp<hardware::neuralnetworks::V1_2::IBurstContext> mBurstContext;
    const sp<ExecutionBurstCallback> mMemoryCache;
    const sp<hardware::hidl_death_recipient> mDeathHandler;
    mutable std::atomic_flag mExecutionInFlight = ATOMIC_FLAG_INIT;
    const FallbackFunction kFallback;
    const std::unique_ptr<RequestChannelSender> mRequestChannelSender;
    const std::unique_ptr<ResultChannelReceiver> mResultChannelReceiver;
    const sp<ExecutionBurstCallback> mBurstCallback;
    const sp<IBurstContext> mBurstContext;
    const std::shared_ptr<MemoryCache> mMemoryCache;
    // `kDeathHandler` must come after `mRequestChannelSender` and `mResultChannelReceiver` because
    // it holds references to both objects.
    const neuralnetworks::utils::DeathHandler kDeathHandler;
};

}  // namespace android::nn
}  // namespace android::hardware::neuralnetworks::V1_2::utils

#endif  // ANDROID_FRAMEWORKS_ML_NN_COMMON_EXECUTION_BURST_CONTROLLER_H
#endif  // ANDROID_HARDWARE_INTERFACES_NEURALNETWORKS_1_2_UTILS_EXECUTION_BURST_CONTROLLER_H
+74 −127

File changed.

Preview size limit exceeded, changes collapsed.

+109 −143

File changed.

Preview size limit exceeded, changes collapsed.

Loading