Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit acff4063 authored by Michael Butler's avatar Michael Butler
Browse files

Implement full canonical Burst in NN util code

Bug: 180492058
Bug: 177267324
Test: mma
Test: presubmit
Change-Id: I5018f6cf2dbaf705f74f4f46318142c64433e19d
parent 9aed5c79
Loading
Loading
Loading
Loading
+6 −1
Original line number Original line Diff line number Diff line
@@ -27,7 +27,6 @@ cc_library_static {
    name: "neuralnetworks_utils_hal_1_2",
    name: "neuralnetworks_utils_hal_1_2",
    defaults: ["neuralnetworks_utils_defaults"],
    defaults: ["neuralnetworks_utils_defaults"],
    srcs: ["src/*"],
    srcs: ["src/*"],
    exclude_srcs: ["src/ExecutionBurst*"],
    local_include_dirs: ["include/nnapi/hal/1.2/"],
    local_include_dirs: ["include/nnapi/hal/1.2/"],
    export_include_dirs: ["include"],
    export_include_dirs: ["include"],
    cflags: ["-Wthread-safety"],
    cflags: ["-Wthread-safety"],
@@ -41,10 +40,16 @@ cc_library_static {
        "android.hardware.neuralnetworks@1.0",
        "android.hardware.neuralnetworks@1.0",
        "android.hardware.neuralnetworks@1.1",
        "android.hardware.neuralnetworks@1.1",
        "android.hardware.neuralnetworks@1.2",
        "android.hardware.neuralnetworks@1.2",
        "libfmq",
    ],
    ],
    export_static_lib_headers: [
    export_static_lib_headers: [
        "neuralnetworks_utils_hal_common",
        "neuralnetworks_utils_hal_common",
    ],
    ],
    product_variables: {
        debuggable: { // eng and userdebug builds
            cflags: ["-DNN_DEBUGGABLE"],
        },
    },
}
}


cc_test {
cc_test {
+1 −0
Original line number Original line Diff line number Diff line
@@ -52,6 +52,7 @@ GeneralResult<Capabilities> convert(const hal::V1_2::Capabilities& capabilities)
GeneralResult<Model> convert(const hal::V1_2::Model& model);
GeneralResult<Model> convert(const hal::V1_2::Model& model);
GeneralResult<MeasureTiming> convert(const hal::V1_2::MeasureTiming& measureTiming);
GeneralResult<MeasureTiming> convert(const hal::V1_2::MeasureTiming& measureTiming);
GeneralResult<Timing> convert(const hal::V1_2::Timing& timing);
GeneralResult<Timing> convert(const hal::V1_2::Timing& timing);
GeneralResult<SharedMemory> convert(const hardware::hidl_memory& memory);


GeneralResult<std::vector<Extension>> convert(
GeneralResult<std::vector<Extension>> convert(
        const hardware::hidl_vec<hal::V1_2::Extension>& extensions);
        const hardware::hidl_vec<hal::V1_2::Extension>& extensions);
+113 −110
Original line number Original line Diff line number Diff line
@@ -14,23 +14,28 @@
 * limitations under the License.
 * limitations under the License.
 */
 */


#ifndef ANDROID_FRAMEWORKS_ML_NN_COMMON_EXECUTION_BURST_CONTROLLER_H
#ifndef ANDROID_HARDWARE_INTERFACES_NEURALNETWORKS_1_2_UTILS_EXECUTION_BURST_CONTROLLER_H
#define ANDROID_FRAMEWORKS_ML_NN_COMMON_EXECUTION_BURST_CONTROLLER_H
#define ANDROID_HARDWARE_INTERFACES_NEURALNETWORKS_1_2_UTILS_EXECUTION_BURST_CONTROLLER_H


#include "ExecutionBurstUtils.h"
#include "ExecutionBurstUtils.h"


#include <android-base/macros.h>
#include <android-base/thread_annotations.h>
#include <android/hardware/neuralnetworks/1.0/types.h>
#include <android/hardware/neuralnetworks/1.0/types.h>
#include <android/hardware/neuralnetworks/1.1/types.h>
#include <android/hardware/neuralnetworks/1.2/IBurstCallback.h>
#include <android/hardware/neuralnetworks/1.2/IBurstCallback.h>
#include <android/hardware/neuralnetworks/1.2/IBurstContext.h>
#include <android/hardware/neuralnetworks/1.2/IBurstContext.h>
#include <android/hardware/neuralnetworks/1.2/IPreparedModel.h>
#include <android/hardware/neuralnetworks/1.2/IPreparedModel.h>
#include <android/hardware/neuralnetworks/1.2/types.h>
#include <android/hardware/neuralnetworks/1.2/types.h>
#include <fmq/MessageQueue.h>
#include <fmq/MessageQueue.h>
#include <hidl/MQDescriptor.h>
#include <hidl/MQDescriptor.h>
#include <nnapi/IBurst.h>
#include <nnapi/IPreparedModel.h>
#include <nnapi/Result.h>
#include <nnapi/Types.h>
#include <nnapi/hal/ProtectCallback.h>


#include <atomic>
#include <atomic>
#include <chrono>
#include <chrono>
#include <functional>
#include <map>
#include <map>
#include <memory>
#include <memory>
#include <mutex>
#include <mutex>
@@ -39,147 +44,145 @@
#include <utility>
#include <utility>
#include <vector>
#include <vector>


namespace android::nn {
namespace android::hardware::neuralnetworks::V1_2::utils {


/**
/**
 * The ExecutionBurstController class manages both the serialization and
 * The ExecutionBurstController class manages both the serialization and deserialization of data
 * deserialization of data across FMQ, making it appear to the runtime as a
 * across FMQ, making it appear to the runtime as a regular synchronous inference. Additionally,
 * regular synchronous inference. Additionally, this class manages the burst's
 * this class manages the burst's memory cache.
 * memory cache.
 */
 */
class ExecutionBurstController {
class ExecutionBurstController final : public nn::IBurst {
    DISALLOW_IMPLICIT_CONSTRUCTORS(ExecutionBurstController);
    struct PrivateConstructorTag {};


  public:
  public:
    using FallbackFunction =
            std::function<nn::ExecutionResult<std::pair<std::vector<nn::OutputShape>, nn::Timing>>(
                    const nn::Request&, nn::MeasureTiming)>;

    /**
    /**
     * NN runtime burst callback object and memory cache.
     * NN runtime memory cache.
     *
     * MemoryCache associates a Memory object with a slot number to be passed across FMQ. The
     * ExecutionBurstServer can use this callback to retrieve a hidl_memory corresponding to the
     * slot via HIDL.
     *
     *
     * ExecutionBurstCallback associates a hidl_memory object with a slot number
     * Whenever a hidl_memory object is copied, it will duplicate the underlying file descriptor.
     * to be passed across FMQ. The ExecutionBurstServer can use this callback
     * Because the NN runtime currently copies the hidl_memory on each execution, it is difficult to
     * to retrieve this hidl_memory corresponding to the slot via HIDL.
     * associate hidl_memory objects with previously cached hidl_memory objects. For this reason,
     * callers of this class must pair each hidl_memory object with an associated key. For
     * efficiency, if two hidl_memory objects represent the same underlying buffer, they must use
     * the same key.
     *
     *
     * Whenever a hidl_memory object is copied, it will duplicate the underlying
     * This class is thread-safe.
     * file descriptor. Because the NN runtime currently copies the hidl_memory
     * on each execution, it is difficult to associate hidl_memory objects with
     * previously cached hidl_memory objects. For this reason, callers of this
     * class must pair each hidl_memory object with an associated key. For
     * efficiency, if two hidl_memory objects represent the same underlying
     * buffer, they must use the same key.
     */
     */
    class ExecutionBurstCallback : public hardware::neuralnetworks::V1_2::IBurstCallback {
    class MemoryCache : public std::enable_shared_from_this<MemoryCache> {
        DISALLOW_COPY_AND_ASSIGN(ExecutionBurstCallback);
        struct PrivateConstructorTag {};


      public:
      public:
        ExecutionBurstCallback() = default;
        using Task = std::function<void()>;
        using Cleanup = base::ScopeGuard<Task>;
        using SharedCleanup = std::shared_ptr<const Cleanup>;
        using WeakCleanup = std::weak_ptr<const Cleanup>;


        hardware::Return<void> getMemories(const hardware::hidl_vec<int32_t>& slots,
        // Custom constructor to pre-allocate cache sizes.
                                           getMemories_cb cb) override;
        MemoryCache();


        /**
        /**
         * This function performs one of two different actions:
         * Add a burst context to the MemoryCache object.
         * 1) If a key corresponding to a memory resource is unrecognized by the
         *    ExecutionBurstCallback object, the ExecutionBurstCallback object
         *    will allocate a slot, bind the memory to the slot, and return the
         *    slot identifier.
         * 2) If a key corresponding to a memory resource is recognized by the
         *    ExecutionBurstCallback object, the ExecutionBurstCallback object
         *    will return the existing slot identifier.
         *
         *
         * @param memories Memory resources used in an inference.
         * If this method is called, it must be called before the MemoryCache::cacheMemory or
         * @param keys Unique identifiers where each element corresponds to a
         * MemoryCache::getMemory is used.
         *     memory resource element in "memories".
         *
         * @return Unique slot identifiers where each returned slot element
         * @param burstContext Burst context to be added to the MemoryCache object.
         *     corresponds to a memory resource element in "memories".
         */
         */
        std::vector<int32_t> getSlots(const hardware::hidl_vec<hardware::hidl_memory>& memories,
        void setBurstContext(sp<IBurstContext> burstContext);
                                      const std::vector<intptr_t>& keys);


        /*
        /**
         * This function performs two different actions:
         * Cache a memory object in the MemoryCache object.
         * 1) Removes an entry from the cache (if present), including the local
         *
         *    storage of the hidl_memory object. Note that this call does not
         * @param memory Memory object to be cached while the returned `SharedCleanup` is alive.
         *    free any corresponding hidl_memory object in ExecutionBurstServer,
         * @return A pair of (1) a unique identifier for the cache entry and (2) a ref-counted
         *    which is separately freed via IBurstContext::freeMemory.
         *     "hold" object which preserves the cache as long as the hold object is alive.
         * 2) Return whether a cache entry was removed and which slot was removed if
         */
         *    found. If the key did not to correspond to any entry in the cache, a
        std::pair<int32_t, SharedCleanup> cacheMemory(const nn::SharedMemory& memory);
         *    slot number of 0 is returned. The slot number and whether the entry

         *    existed is useful so the same slot can be freed in the
        /**
         *    ExecutionBurstServer's cache via IBurstContext::freeMemory.
         * Get the memory object corresponding to a slot identifier.
         *
         * @param slot Slot which identifies the memory object to retrieve.
         * @return The memory object corresponding to slot, otherwise GeneralError.
         */
         */
        std::pair<bool, int32_t> freeMemory(intptr_t key);
        nn::GeneralResult<nn::SharedMemory> getMemory(int32_t slot);


      private:
      private:
        int32_t getSlotLocked(const hardware::hidl_memory& memory, intptr_t key);
        void freeMemory(const nn::SharedMemory& memory);
        int32_t allocateSlotLocked();
        int32_t allocateSlotLocked() REQUIRES(mMutex);


        std::mutex mMutex;
        std::mutex mMutex;
        std::stack<int32_t, std::vector<int32_t>> mFreeSlots;
        std::condition_variable mCond;
        std::map<intptr_t, int32_t> mMemoryIdToSlot;
        sp<IBurstContext> mBurstContext GUARDED_BY(mMutex);
        std::vector<hardware::hidl_memory> mMemoryCache;
        std::stack<int32_t, std::vector<int32_t>> mFreeSlots GUARDED_BY(mMutex);
        std::map<nn::SharedMemory, int32_t> mMemoryIdToSlot GUARDED_BY(mMutex);
        std::vector<nn::SharedMemory> mMemoryCache GUARDED_BY(mMutex);
        std::vector<WeakCleanup> mCacheCleaner GUARDED_BY(mMutex);
    };

    /**
     * HIDL Callback class to pass memory objects to the Burst server when given corresponding
     * slots.
     */
    class ExecutionBurstCallback : public IBurstCallback {
      public:
        // Precondition: memoryCache must be non-null.
        explicit ExecutionBurstCallback(const std::shared_ptr<MemoryCache>& memoryCache);

        // See IBurstCallback::getMemories for information on this method.
        Return<void> getMemories(const hidl_vec<int32_t>& slots, getMemories_cb cb) override;

      private:
        const std::weak_ptr<MemoryCache> kMemoryCache;
    };
    };


    /**
    /**
     * Creates a burst controller on a prepared model.
     * Creates a burst controller on a prepared model.
     *
     *
     * Prefer this over ExecutionBurstController's constructor.
     *
     * @param preparedModel Model prepared for execution to execute on.
     * @param preparedModel Model prepared for execution to execute on.
     * @param pollingTimeWindow How much time (in microseconds) the
     * @param pollingTimeWindow How much time (in microseconds) the ExecutionBurstController is
     *     ExecutionBurstController is allowed to poll the FMQ before waiting on
     *     allowed to poll the FMQ before waiting on the blocking futex. Polling may result in lower
     *     the blocking futex. Polling may result in lower latencies at the
     *     latencies at the potential cost of more power usage.
     *     potential cost of more power usage.
     * @return ExecutionBurstController Execution burst controller object.
     * @return ExecutionBurstController Execution burst controller object.
     */
     */
    static std::unique_ptr<ExecutionBurstController> create(
    static nn::GeneralResult<std::shared_ptr<const ExecutionBurstController>> create(
            const sp<hardware::neuralnetworks::V1_2::IPreparedModel>& preparedModel,
            const sp<IPreparedModel>& preparedModel, FallbackFunction fallback,
            std::chrono::microseconds pollingTimeWindow);
            std::chrono::microseconds pollingTimeWindow);


    // prefer calling ExecutionBurstController::create
    ExecutionBurstController(PrivateConstructorTag tag, FallbackFunction fallback,
    ExecutionBurstController(const std::shared_ptr<RequestChannelSender>& requestChannelSender,
                             std::unique_ptr<RequestChannelSender> requestChannelSender,
                             const std::shared_ptr<ResultChannelReceiver>& resultChannelReceiver,
                             std::unique_ptr<ResultChannelReceiver> resultChannelReceiver,
                             const sp<hardware::neuralnetworks::V1_2::IBurstContext>& burstContext,
                             sp<ExecutionBurstCallback> callback, sp<IBurstContext> burstContext,
                             const sp<ExecutionBurstCallback>& callback,
                             std::shared_ptr<MemoryCache> memoryCache,
                             const sp<hardware::hidl_death_recipient>& deathHandler = nullptr);
                             neuralnetworks::utils::DeathHandler deathHandler);


    // explicit destructor to unregister the death recipient
    // See IBurst::cacheMemory for information on this method.
    ~ExecutionBurstController();
    OptionalCacheHold cacheMemory(const nn::SharedMemory& memory) const override;


    /**
    // See IBurst::execute for information on this method.
     * Execute a request on a model.
    nn::ExecutionResult<std::pair<std::vector<nn::OutputShape>, nn::Timing>> execute(
     *
            const nn::Request& request, nn::MeasureTiming measure) const override;
     * @param request Arguments to be executed on a model.
     * @param measure Whether to collect timing measurements, either YES or NO
     * @param memoryIds Identifiers corresponding to each memory object in the
     *     request's pools.
     * @return A tuple of:
     *     - result code of the execution
     *     - dynamic output shapes from the execution
     *     - any execution time measurements of the execution
     *     - whether or not a failed burst execution should be re-run using a
     *       different path (e.g., IPreparedModel::executeSynchronously)
     */
    std::tuple<int, std::vector<hardware::neuralnetworks::V1_2::OutputShape>,
               hardware::neuralnetworks::V1_2::Timing, bool>
    compute(const hardware::neuralnetworks::V1_0::Request& request,
            hardware::neuralnetworks::V1_2::MeasureTiming measure,
            const std::vector<intptr_t>& memoryIds);

    /**
     * Propagate a user's freeing of memory to the service.
     *
     * @param key Key corresponding to the memory object.
     */
    void freeMemory(intptr_t key);


  private:
  private:
    std::mutex mMutex;
    mutable std::atomic_flag mExecutionInFlight = ATOMIC_FLAG_INIT;
    const std::shared_ptr<RequestChannelSender> mRequestChannelSender;
    const FallbackFunction kFallback;
    const std::shared_ptr<ResultChannelReceiver> mResultChannelReceiver;
    const std::unique_ptr<RequestChannelSender> mRequestChannelSender;
    const sp<hardware::neuralnetworks::V1_2::IBurstContext> mBurstContext;
    const std::unique_ptr<ResultChannelReceiver> mResultChannelReceiver;
    const sp<ExecutionBurstCallback> mMemoryCache;
    const sp<ExecutionBurstCallback> mBurstCallback;
    const sp<hardware::hidl_death_recipient> mDeathHandler;
    const sp<IBurstContext> mBurstContext;
    const std::shared_ptr<MemoryCache> mMemoryCache;
    // `kDeathHandler` must come after `mRequestChannelSender` and `mResultChannelReceiver` because
    // it holds references to both objects.
    const neuralnetworks::utils::DeathHandler kDeathHandler;
};
};


}  // namespace android::nn
}  // namespace android::hardware::neuralnetworks::V1_2::utils


#endif  // ANDROID_FRAMEWORKS_ML_NN_COMMON_EXECUTION_BURST_CONTROLLER_H
#endif  // ANDROID_HARDWARE_INTERFACES_NEURALNETWORKS_1_2_UTILS_EXECUTION_BURST_CONTROLLER_H
+74 −127

File changed.

Preview size limit exceeded, changes collapsed.

+109 −143

File changed.

Preview size limit exceeded, changes collapsed.

Loading