Implement full canonical Burst in NN util code (acff4063) · Commits · e / os / android_hardware_interfaces

neuralnetworks/1.2/utils/Android.bp

+6 −1

Original line number	Original line	Diff line number	Diff line
	@@ -27,7 +27,6 @@ cc_library_static {
	name: "neuralnetworks_utils_hal_1_2",		name: "neuralnetworks_utils_hal_1_2",
	defaults: ["neuralnetworks_utils_defaults"],		defaults: ["neuralnetworks_utils_defaults"],
	srcs: ["src/*"],		srcs: ["src/*"],
	exclude_srcs: ["src/ExecutionBurst*"],
	local_include_dirs: ["include/nnapi/hal/1.2/"],		local_include_dirs: ["include/nnapi/hal/1.2/"],
	export_include_dirs: ["include"],		export_include_dirs: ["include"],
	cflags: ["-Wthread-safety"],		cflags: ["-Wthread-safety"],
	@@ -41,10 +40,16 @@ cc_library_static {
	"android.hardware.neuralnetworks@1.0",		"android.hardware.neuralnetworks@1.0",
	"android.hardware.neuralnetworks@1.1",		"android.hardware.neuralnetworks@1.1",
	"android.hardware.neuralnetworks@1.2",		"android.hardware.neuralnetworks@1.2",
			"libfmq",
	],		],
	export_static_lib_headers: [		export_static_lib_headers: [
	"neuralnetworks_utils_hal_common",		"neuralnetworks_utils_hal_common",
	],		],
			product_variables: {
			debuggable: { // eng and userdebug builds
			cflags: ["-DNN_DEBUGGABLE"],
			},
			},
	}		}

	cc_test {		cc_test {

neuralnetworks/1.2/utils/include/nnapi/hal/1.2/Conversions.h

+1 −0

Original line number	Original line	Diff line number	Diff line
	@@ -52,6 +52,7 @@ GeneralResult<Capabilities> convert(const hal::V1_2::Capabilities& capabilities)
	GeneralResult<Model> convert(const hal::V1_2::Model& model);		GeneralResult<Model> convert(const hal::V1_2::Model& model);
	GeneralResult<MeasureTiming> convert(const hal::V1_2::MeasureTiming& measureTiming);		GeneralResult<MeasureTiming> convert(const hal::V1_2::MeasureTiming& measureTiming);
	GeneralResult<Timing> convert(const hal::V1_2::Timing& timing);		GeneralResult<Timing> convert(const hal::V1_2::Timing& timing);
			GeneralResult<SharedMemory> convert(const hardware::hidl_memory& memory);

	GeneralResult<std::vector<Extension>> convert(		GeneralResult<std::vector<Extension>> convert(
	const hardware::hidl_vec<hal::V1_2::Extension>& extensions);		const hardware::hidl_vec<hal::V1_2::Extension>& extensions);

neuralnetworks/1.2/utils/include/nnapi/hal/1.2/ExecutionBurstController.h

+113 −110

Original line number	Original line	Diff line number	Diff line
	@@ -14,23 +14,28 @@
	* limitations under the License.		* limitations under the License.
	*/		*/

	#ifndef ANDROID_FRAMEWORKS_ML_NN_COMMON_EXECUTION_BURST_CONTROLLER_H		#ifndef ANDROID_HARDWARE_INTERFACES_NEURALNETWORKS_1_2_UTILS_EXECUTION_BURST_CONTROLLER_H
	#define ANDROID_FRAMEWORKS_ML_NN_COMMON_EXECUTION_BURST_CONTROLLER_H		#define ANDROID_HARDWARE_INTERFACES_NEURALNETWORKS_1_2_UTILS_EXECUTION_BURST_CONTROLLER_H

	#include "ExecutionBurstUtils.h"		#include "ExecutionBurstUtils.h"

	#include <android-base/macros.h>		#include <android-base/thread_annotations.h>
	#include <android/hardware/neuralnetworks/1.0/types.h>		#include <android/hardware/neuralnetworks/1.0/types.h>
	#include <android/hardware/neuralnetworks/1.1/types.h>
	#include <android/hardware/neuralnetworks/1.2/IBurstCallback.h>		#include <android/hardware/neuralnetworks/1.2/IBurstCallback.h>
	#include <android/hardware/neuralnetworks/1.2/IBurstContext.h>		#include <android/hardware/neuralnetworks/1.2/IBurstContext.h>
	#include <android/hardware/neuralnetworks/1.2/IPreparedModel.h>		#include <android/hardware/neuralnetworks/1.2/IPreparedModel.h>
	#include <android/hardware/neuralnetworks/1.2/types.h>		#include <android/hardware/neuralnetworks/1.2/types.h>
	#include <fmq/MessageQueue.h>		#include <fmq/MessageQueue.h>
	#include <hidl/MQDescriptor.h>		#include <hidl/MQDescriptor.h>
			#include <nnapi/IBurst.h>
			#include <nnapi/IPreparedModel.h>
			#include <nnapi/Result.h>
			#include <nnapi/Types.h>
			#include <nnapi/hal/ProtectCallback.h>

	#include <atomic>		#include <atomic>
	#include <chrono>		#include <chrono>
			#include <functional>
	#include <map>		#include <map>
	#include <memory>		#include <memory>
	#include <mutex>		#include <mutex>
	@@ -39,147 +44,145 @@
	#include <utility>		#include <utility>
	#include <vector>		#include <vector>

	namespace android::nn {		namespace android::hardware::neuralnetworks::V1_2::utils {

	/**		/**
	* The ExecutionBurstController class manages both the serialization and		* The ExecutionBurstController class manages both the serialization and deserialization of data
	* deserialization of data across FMQ, making it appear to the runtime as a		* across FMQ, making it appear to the runtime as a regular synchronous inference. Additionally,
	* regular synchronous inference. Additionally, this class manages the burst's		* this class manages the burst's memory cache.
	* memory cache.
	*/		*/
	class ExecutionBurstController {		class ExecutionBurstController final : public nn::IBurst {
	DISALLOW_IMPLICIT_CONSTRUCTORS(ExecutionBurstController);		struct PrivateConstructorTag {};

	public:		public:
			using FallbackFunction =
			std::function<nn::ExecutionResult<std::pair<std::vector<nn::OutputShape>, nn::Timing>>(
			const nn::Request&, nn::MeasureTiming)>;

	/**		/**
	* NN runtime burst callback object and memory cache.		* NN runtime memory cache.
			*
			* MemoryCache associates a Memory object with a slot number to be passed across FMQ. The
			* ExecutionBurstServer can use this callback to retrieve a hidl_memory corresponding to the
			* slot via HIDL.
	*		*
	* ExecutionBurstCallback associates a hidl_memory object with a slot number		* Whenever a hidl_memory object is copied, it will duplicate the underlying file descriptor.
	* to be passed across FMQ. The ExecutionBurstServer can use this callback		* Because the NN runtime currently copies the hidl_memory on each execution, it is difficult to
	* to retrieve this hidl_memory corresponding to the slot via HIDL.		* associate hidl_memory objects with previously cached hidl_memory objects. For this reason,
			* callers of this class must pair each hidl_memory object with an associated key. For
			* efficiency, if two hidl_memory objects represent the same underlying buffer, they must use
			* the same key.
	*		*
	* Whenever a hidl_memory object is copied, it will duplicate the underlying		* This class is thread-safe.
	* file descriptor. Because the NN runtime currently copies the hidl_memory
	* on each execution, it is difficult to associate hidl_memory objects with
	* previously cached hidl_memory objects. For this reason, callers of this
	* class must pair each hidl_memory object with an associated key. For
	* efficiency, if two hidl_memory objects represent the same underlying
	* buffer, they must use the same key.
	*/		*/
	class ExecutionBurstCallback : public hardware::neuralnetworks::V1_2::IBurstCallback {		class MemoryCache : public std::enable_shared_from_this<MemoryCache> {
	DISALLOW_COPY_AND_ASSIGN(ExecutionBurstCallback);		struct PrivateConstructorTag {};

	public:		public:
	ExecutionBurstCallback() = default;		using Task = std::function<void()>;
			using Cleanup = base::ScopeGuard<Task>;
			using SharedCleanup = std::shared_ptr<const Cleanup>;
			using WeakCleanup = std::weak_ptr<const Cleanup>;

	hardware::Return<void> getMemories(const hardware::hidl_vec<int32_t>& slots,		// Custom constructor to pre-allocate cache sizes.
	getMemories_cb cb) override;		MemoryCache();

	/**		/**
	* This function performs one of two different actions:		* Add a burst context to the MemoryCache object.
	* 1) If a key corresponding to a memory resource is unrecognized by the
	* ExecutionBurstCallback object, the ExecutionBurstCallback object
	* will allocate a slot, bind the memory to the slot, and return the
	* slot identifier.
	* 2) If a key corresponding to a memory resource is recognized by the
	* ExecutionBurstCallback object, the ExecutionBurstCallback object
	* will return the existing slot identifier.
	*		*
	* @param memories Memory resources used in an inference.		* If this method is called, it must be called before the MemoryCache::cacheMemory or
	* @param keys Unique identifiers where each element corresponds to a		* MemoryCache::getMemory is used.
	* memory resource element in "memories".		*
	* @return Unique slot identifiers where each returned slot element		* @param burstContext Burst context to be added to the MemoryCache object.
	* corresponds to a memory resource element in "memories".
	*/		*/
	std::vector<int32_t> getSlots(const hardware::hidl_vec<hardware::hidl_memory>& memories,		void setBurstContext(sp<IBurstContext> burstContext);
	const std::vector<intptr_t>& keys);

	/*		/**
	* This function performs two different actions:		* Cache a memory object in the MemoryCache object.
	* 1) Removes an entry from the cache (if present), including the local		*
	* storage of the hidl_memory object. Note that this call does not		* @param memory Memory object to be cached while the returned `SharedCleanup` is alive.
	* free any corresponding hidl_memory object in ExecutionBurstServer,		* @return A pair of (1) a unique identifier for the cache entry and (2) a ref-counted
	* which is separately freed via IBurstContext::freeMemory.		* "hold" object which preserves the cache as long as the hold object is alive.
	* 2) Return whether a cache entry was removed and which slot was removed if		*/
	* found. If the key did not to correspond to any entry in the cache, a		std::pair<int32_t, SharedCleanup> cacheMemory(const nn::SharedMemory& memory);
	* slot number of 0 is returned. The slot number and whether the entry
	* existed is useful so the same slot can be freed in the		/**
	* ExecutionBurstServer's cache via IBurstContext::freeMemory.		* Get the memory object corresponding to a slot identifier.
			*
			* @param slot Slot which identifies the memory object to retrieve.
			* @return The memory object corresponding to slot, otherwise GeneralError.
	*/		*/
	std::pair<bool, int32_t> freeMemory(intptr_t key);		nn::GeneralResult<nn::SharedMemory> getMemory(int32_t slot);

	private:		private:
	int32_t getSlotLocked(const hardware::hidl_memory& memory, intptr_t key);		void freeMemory(const nn::SharedMemory& memory);
	int32_t allocateSlotLocked();		int32_t allocateSlotLocked() REQUIRES(mMutex);

	std::mutex mMutex;		std::mutex mMutex;
	std::stack<int32_t, std::vector<int32_t>> mFreeSlots;		std::condition_variable mCond;
	std::map<intptr_t, int32_t> mMemoryIdToSlot;		sp<IBurstContext> mBurstContext GUARDED_BY(mMutex);
	std::vector<hardware::hidl_memory> mMemoryCache;		std::stack<int32_t, std::vector<int32_t>> mFreeSlots GUARDED_BY(mMutex);
			std::map<nn::SharedMemory, int32_t> mMemoryIdToSlot GUARDED_BY(mMutex);
			std::vector<nn::SharedMemory> mMemoryCache GUARDED_BY(mMutex);
			std::vector<WeakCleanup> mCacheCleaner GUARDED_BY(mMutex);
			};

			/**
			* HIDL Callback class to pass memory objects to the Burst server when given corresponding
			* slots.
			*/
			class ExecutionBurstCallback : public IBurstCallback {
			public:
			// Precondition: memoryCache must be non-null.
			explicit ExecutionBurstCallback(const std::shared_ptr<MemoryCache>& memoryCache);

			// See IBurstCallback::getMemories for information on this method.
			Return<void> getMemories(const hidl_vec<int32_t>& slots, getMemories_cb cb) override;

			private:
			const std::weak_ptr<MemoryCache> kMemoryCache;
	};		};

	/**		/**
	* Creates a burst controller on a prepared model.		* Creates a burst controller on a prepared model.
	*		*
	* Prefer this over ExecutionBurstController's constructor.
	*
	* @param preparedModel Model prepared for execution to execute on.		* @param preparedModel Model prepared for execution to execute on.
	* @param pollingTimeWindow How much time (in microseconds) the		* @param pollingTimeWindow How much time (in microseconds) the ExecutionBurstController is
	* ExecutionBurstController is allowed to poll the FMQ before waiting on		* allowed to poll the FMQ before waiting on the blocking futex. Polling may result in lower
	* the blocking futex. Polling may result in lower latencies at the		* latencies at the potential cost of more power usage.
	* potential cost of more power usage.
	* @return ExecutionBurstController Execution burst controller object.		* @return ExecutionBurstController Execution burst controller object.
	*/		*/
	static std::unique_ptr<ExecutionBurstController> create(		static nn::GeneralResult<std::shared_ptr<const ExecutionBurstController>> create(
	const sp<hardware::neuralnetworks::V1_2::IPreparedModel>& preparedModel,		const sp<IPreparedModel>& preparedModel, FallbackFunction fallback,
	std::chrono::microseconds pollingTimeWindow);		std::chrono::microseconds pollingTimeWindow);

	// prefer calling ExecutionBurstController::create		ExecutionBurstController(PrivateConstructorTag tag, FallbackFunction fallback,
	ExecutionBurstController(const std::shared_ptr<RequestChannelSender>& requestChannelSender,		std::unique_ptr<RequestChannelSender> requestChannelSender,
	const std::shared_ptr<ResultChannelReceiver>& resultChannelReceiver,		std::unique_ptr<ResultChannelReceiver> resultChannelReceiver,
	const sp<hardware::neuralnetworks::V1_2::IBurstContext>& burstContext,		sp<ExecutionBurstCallback> callback, sp<IBurstContext> burstContext,
	const sp<ExecutionBurstCallback>& callback,		std::shared_ptr<MemoryCache> memoryCache,
	const sp<hardware::hidl_death_recipient>& deathHandler = nullptr);		neuralnetworks::utils::DeathHandler deathHandler);

	// explicit destructor to unregister the death recipient		// See IBurst::cacheMemory for information on this method.
	~ExecutionBurstController();		OptionalCacheHold cacheMemory(const nn::SharedMemory& memory) const override;

	/**		// See IBurst::execute for information on this method.
	* Execute a request on a model.		nn::ExecutionResult<std::pair<std::vector<nn::OutputShape>, nn::Timing>> execute(
	*		const nn::Request& request, nn::MeasureTiming measure) const override;
	* @param request Arguments to be executed on a model.
	* @param measure Whether to collect timing measurements, either YES or NO
	* @param memoryIds Identifiers corresponding to each memory object in the
	* request's pools.
	* @return A tuple of:
	* - result code of the execution
	* - dynamic output shapes from the execution
	* - any execution time measurements of the execution
	* - whether or not a failed burst execution should be re-run using a
	* different path (e.g., IPreparedModel::executeSynchronously)
	*/
	std::tuple<int, std::vector<hardware::neuralnetworks::V1_2::OutputShape>,
	hardware::neuralnetworks::V1_2::Timing, bool>
	compute(const hardware::neuralnetworks::V1_0::Request& request,
	hardware::neuralnetworks::V1_2::MeasureTiming measure,
	const std::vector<intptr_t>& memoryIds);

	/**
	* Propagate a user's freeing of memory to the service.
	*
	* @param key Key corresponding to the memory object.
	*/
	void freeMemory(intptr_t key);

	private:		private:
	std::mutex mMutex;		mutable std::atomic_flag mExecutionInFlight = ATOMIC_FLAG_INIT;
	const std::shared_ptr<RequestChannelSender> mRequestChannelSender;		const FallbackFunction kFallback;
	const std::shared_ptr<ResultChannelReceiver> mResultChannelReceiver;		const std::unique_ptr<RequestChannelSender> mRequestChannelSender;
	const sp<hardware::neuralnetworks::V1_2::IBurstContext> mBurstContext;		const std::unique_ptr<ResultChannelReceiver> mResultChannelReceiver;
	const sp<ExecutionBurstCallback> mMemoryCache;		const sp<ExecutionBurstCallback> mBurstCallback;
	const sp<hardware::hidl_death_recipient> mDeathHandler;		const sp<IBurstContext> mBurstContext;
			const std::shared_ptr<MemoryCache> mMemoryCache;
			// `kDeathHandler` must come after `mRequestChannelSender` and `mResultChannelReceiver` because
			// it holds references to both objects.
			const neuralnetworks::utils::DeathHandler kDeathHandler;
	};		};

	} // namespace android::nn		} // namespace android::hardware::neuralnetworks::V1_2::utils

	#endif // ANDROID_FRAMEWORKS_ML_NN_COMMON_EXECUTION_BURST_CONTROLLER_H		#endif // ANDROID_HARDWARE_INTERFACES_NEURALNETWORKS_1_2_UTILS_EXECUTION_BURST_CONTROLLER_H

neuralnetworks/1.2/utils/include/nnapi/hal/1.2/ExecutionBurstServer.h

+74 −127

File changed.

Preview size limit exceeded, changes collapsed.

neuralnetworks/1.2/utils/include/nnapi/hal/1.2/ExecutionBurstUtils.h

+109 −143

File changed.

Preview size limit exceeded, changes collapsed.