Update neuralnetworks HAL to allow collecting execution duration. am: 55a3d328b7 (0aec977a) · Commits · e / os / android_hardware_interfaces

neuralnetworks/1.0/vts/functional/Callbacks.cpp

+10 −1

Original line number	Diff line number	Diff line
		@@ -135,14 +135,18 @@ ExecutionCallback::~ExecutionCallback() {}

		Return<void> ExecutionCallback::notify(ErrorStatus errorStatus) {
		mErrorStatus = errorStatus;
		mOutputShapes = {};
		mTiming = {.timeOnDevice = UINT64_MAX, .timeInDriver = UINT64_MAX};
		CallbackBase::notify();
		return Void();
		}

		Return<void> ExecutionCallback::notify_1_2(ErrorStatus errorStatus,
		const hidl_vec<OutputShape>& outputShapes) {
		const hidl_vec<OutputShape>& outputShapes,
		const Timing& timing) {
		mErrorStatus = errorStatus;
		mOutputShapes = outputShapes;
		mTiming = timing;
		CallbackBase::notify();
		return Void();
		}
		@@ -157,6 +161,11 @@ const std::vector<OutputShape>& ExecutionCallback::getOutputShapes() {
		return mOutputShapes;
		}

		Timing ExecutionCallback::getTiming() {
		wait();
		return mTiming;
		}

		} // namespace implementation
		} // namespace V1_2
		} // namespace neuralnetworks

neuralnetworks/1.0/vts/functional/Callbacks.h

+30 −3

Original line number	Diff line number	Diff line
		@@ -308,8 +308,20 @@ class ExecutionCallback : public CallbackBase, public IExecutionCallback {
		* of the output operand in the Request outputs vector.
		* outputShapes must be empty unless the status is either
		* NONE or OUTPUT_INSUFFICIENT_SIZE.
		* @return Timing Duration of execution. Unless MeasureTiming::YES was passed when
		* launching the execution and status is NONE, all times must
		* be reported as UINT64_MAX. A driver may choose to report
		* any time as UINT64_MAX, indicating that particular measurement is
		* not available.
		*/
		Return<void> notify_1_2(ErrorStatus status, const hidl_vec<OutputShape>& outputShapes) override;
		Return<void> notify_1_2(ErrorStatus status, const hidl_vec<OutputShape>& outputShapes,
		const Timing& timing) override;

		// An overload of the latest notify interface to hide the version from ExecutionBuilder.
		Return<void> notify(ErrorStatus status, const hidl_vec<OutputShape>& outputShapes,
		const Timing& timing) {
		return notify_1_2(status, outputShapes, timing);
		}

		/**
		* Retrieves the error status returned from the asynchronous task launched
		@@ -350,9 +362,24 @@ class ExecutionCallback : public CallbackBase, public IExecutionCallback {
		*/
		const std::vector<OutputShape>& getOutputShapes();

		/**
		* Retrieves the duration of execution ofthe asynchronous task launched
		* by IPreparedModel::execute_1_2. If IPreparedModel::execute_1_2 has not finished
		* asynchronously executing, this call will block until the asynchronous task
		* notifies the object.
		*
		* If the asynchronous task was launched by IPreparedModel::execute, every time
		* must be UINT64_MAX.
		*
		* @return timing Duration of the execution. Every time must be UINT64_MAX unless
		* the status is NONE.
		*/
		Timing getTiming();

		private:
		ErrorStatus mErrorStatus;
		std::vector<OutputShape> mOutputShapes;
		ErrorStatus mErrorStatus = ErrorStatus::GENERAL_FAILURE;
		std::vector<OutputShape> mOutputShapes = {};
		Timing mTiming = {};
		};

neuralnetworks/1.0/vts/functional/GeneratedTestHarness.cpp

+79 −42

Original line number	Diff line number	Diff line
		@@ -45,20 +45,16 @@ using ::test_helper::bool8;
		using ::test_helper::compare;
		using ::test_helper::expectMultinomialDistributionWithinTolerance;
		using ::test_helper::filter;
		using ::test_helper::Float32Operands;
		using ::test_helper::for_all;
		using ::test_helper::for_each;
		using ::test_helper::Int32Operands;
		using ::test_helper::MixedTyped;
		using ::test_helper::MixedTypedExample;
		using ::test_helper::MixedTypedIndex;
		using ::test_helper::Quant8Operands;
		using ::test_helper::resize_accordingly;

		template <typename T>
		void copy_back_(MixedTyped* dst, const std::vector<RequestArgument>& ra, char* src) {
		MixedTyped& test = *dst;
		for_each<T>(test, [&ra, src](int index, std::vector<T>& m) {
		void copy_back_(std::map<int, std::vector<T>>* dst, const std::vector<RequestArgument>& ra,
		char* src) {
		for_each<T>(*dst, [&ra, src](int index, std::vector<T>& m) {
		ASSERT_EQ(m.size(), ra[index].location.length / sizeof(T));
		char* begin = src + ra[index].location.offset;
		memcpy(m.data(), begin, ra[index].location.length);
		@@ -66,42 +62,47 @@ void copy_back_(MixedTyped* dst, const std::vector<RequestArgument>& ra, char* s
		}

		void copy_back(MixedTyped* dst, const std::vector<RequestArgument>& ra, char* src) {
		copy_back_<float>(dst, ra, src);
		copy_back_<int32_t>(dst, ra, src);
		copy_back_<uint8_t>(dst, ra, src);
		copy_back_<int16_t>(dst, ra, src);
		copy_back_<_Float16>(dst, ra, src);
		copy_back_<bool8>(dst, ra, src);
		copy_back_<int8_t>(dst, ra, src);
		static_assert(7 == std::tuple_size<MixedTyped>::value,
		copy_back_(&dst->float32Operands, ra, src);
		copy_back_(&dst->int32Operands, ra, src);
		copy_back_(&dst->quant8AsymmOperands, ra, src);
		copy_back_(&dst->quant16SymmOperands, ra, src);
		copy_back_(&dst->float16Operands, ra, src);
		copy_back_(&dst->bool8Operands, ra, src);
		copy_back_(&dst->quant8ChannelOperands, ra, src);
		copy_back_(&dst->quant16AsymmOperands, ra, src);
		static_assert(8 == MixedTyped::kNumTypes,
		"Number of types in MixedTyped changed, but copy_back function wasn't updated");
		}

		// Top level driver for models and examples generated by test_generator.py
		// Test driver for those generated from ml/nn/runtime/test/spec
		static Return<ErrorStatus> ExecutePreparedModel(sp<V1_0::IPreparedModel>& preparedModel,
		const Request& request,
		const Request& request, MeasureTiming,
		sp<ExecutionCallback>& callback) {
		return preparedModel->execute(request, callback);
		}
		static Return<ErrorStatus> ExecutePreparedModel(sp<V1_2::IPreparedModel>& preparedModel,
		const Request& request,
		const Request& request, MeasureTiming measure,
		sp<ExecutionCallback>& callback) {
		return preparedModel->execute_1_2(request, callback);
		return preparedModel->execute_1_2(request, measure, callback);
		}
		static Return<ErrorStatus> ExecutePreparedModel(sp<V1_0::IPreparedModel>&, const Request&,
		hidl_vec<OutputShape>*) {
		MeasureTiming, hidl_vec<OutputShape>, Timing) {
		ADD_FAILURE() << "asking for synchronous execution at V1_0";
		return ErrorStatus::GENERAL_FAILURE;
		}
		static Return<ErrorStatus> ExecutePreparedModel(sp<V1_2::IPreparedModel>& preparedModel,
		const Request& request,
		hidl_vec<OutputShape>* outputShapes) {
		const Request& request, MeasureTiming measure,
		hidl_vec<OutputShape>* outputShapes,
		Timing* timing) {
		ErrorStatus result;
		Return<void> ret = preparedModel->executeSynchronously(
		request, [&result, &outputShapes](ErrorStatus error, const hidl_vec<OutputShape>& shapes) {
		request, measure,
		[&result, outputShapes, timing](ErrorStatus error, const hidl_vec<OutputShape>& shapes,
		const Timing& time) {
		result = error;
		*outputShapes = shapes;
		*timing = time;
		});
		if (!ret.isOk()) {
		return ErrorStatus::GENERAL_FAILURE;
		@@ -114,8 +115,8 @@ const float kDefaultRtol = 1e-5f;
		template <typename T_IPreparedModel>
		void EvaluatePreparedModel(sp<T_IPreparedModel>& preparedModel, std::function<bool(int)> is_ignored,
		const std::vector<MixedTypedExample>& examples,
		bool hasRelaxedFloat32Model = false, float fpAtol = kDefaultAtol,
		float fpRtol = kDefaultRtol, Synchronously sync = Synchronously::NO) {
		bool hasRelaxedFloat32Model, float fpAtol, float fpRtol,
		Synchronously sync, MeasureTiming measure, bool testDynamicOutputShape) {
		const uint32_t INPUT = 0;
		const uint32_t OUTPUT = 1;

		@@ -125,7 +126,7 @@ void EvaluatePreparedModel(sp<T_IPreparedModel>& preparedModel, std::function<bo
		const MixedTyped& inputs = example.operands.first;
		const MixedTyped& golden = example.operands.second;

		const bool hasFloat16Inputs = !std::get<MixedTypedIndex<_Float16>::index>(inputs).empty();
		const bool hasFloat16Inputs = !inputs.float16Operands.empty();
		if (hasRelaxedFloat32Model \|\| hasFloat16Inputs) {
		// TODO: Adjust the error limit based on testing.
		// If in relaxed mode, set the absolute tolerance to be 5ULP of FP16.
		@@ -210,6 +211,7 @@ void EvaluatePreparedModel(sp<T_IPreparedModel>& preparedModel, std::function<bo

		ErrorStatus executionStatus;
		hidl_vec<OutputShape> outputShapes;
		Timing timing;
		if (sync == Synchronously::NO) {
		SCOPED_TRACE("asynchronous");

		@@ -218,7 +220,7 @@ void EvaluatePreparedModel(sp<T_IPreparedModel>& preparedModel, std::function<bo
		ASSERT_NE(nullptr, executionCallback.get());
		Return<ErrorStatus> executionLaunchStatus = ExecutePreparedModel(
		preparedModel, {.inputs = inputs_info, .outputs = outputs_info, .pools = pools},
		executionCallback);
		measure, executionCallback);
		ASSERT_TRUE(executionLaunchStatus.isOk());
		EXPECT_EQ(ErrorStatus::NONE, static_cast<ErrorStatus>(executionLaunchStatus));

		@@ -226,21 +228,44 @@ void EvaluatePreparedModel(sp<T_IPreparedModel>& preparedModel, std::function<bo
		executionCallback->wait();
		executionStatus = executionCallback->getStatus();
		outputShapes = executionCallback->getOutputShapes();
		timing = executionCallback->getTiming();
		} else {
		SCOPED_TRACE("synchronous");

		// execute
		Return<ErrorStatus> executionReturnStatus = ExecutePreparedModel(
		preparedModel, {.inputs = inputs_info, .outputs = outputs_info, .pools = pools},
		&outputShapes);
		measure, &outputShapes, &timing);
		ASSERT_TRUE(executionReturnStatus.isOk());
		executionStatus = static_cast<ErrorStatus>(executionReturnStatus);
		}

		if (testDynamicOutputShape && executionStatus != ErrorStatus::NONE) {
		LOG(INFO) << "NN VTS: Early termination of test because vendor service cannot "
		"execute model that it does not support.";
		std::cout << "[ ] Early termination of test because vendor service cannot "
		"execute model that it does not support."
		<< std::endl;
		return;
		}
		ASSERT_EQ(ErrorStatus::NONE, executionStatus);
		// TODO(xusongw): Check if the returned output shapes match with expectation once the
		// sample driver implementation of dynamic output shape is finished.
		ASSERT_EQ(outputShapes.size(), 0);
		if (measure == MeasureTiming::NO) {
		EXPECT_EQ(UINT64_MAX, timing.timeOnDevice);
		EXPECT_EQ(UINT64_MAX, timing.timeInDriver);
		} else {
		if (timing.timeOnDevice != UINT64_MAX && timing.timeInDriver != UINT64_MAX) {
		EXPECT_LE(timing.timeOnDevice, timing.timeInDriver);
		}
		}

		// Go through all outputs, overwrite output dimensions with returned output shapes
		if (testDynamicOutputShape) {
		ASSERT_NE(outputShapes.size(), 0);
		for_each<uint32_t>(test.operandDimensions,
		[&outputShapes](int idx, std::vector<uint32_t>& dim) {
		dim = outputShapes[idx].dimensions;
		});
		}

		// validate results
		outputMemory->read();
		@@ -261,9 +286,10 @@ void EvaluatePreparedModel(sp<T_IPreparedModel>& preparedModel, std::function<bo
		template <typename T_IPreparedModel>
		void EvaluatePreparedModel(sp<T_IPreparedModel>& preparedModel, std::function<bool(int)> is_ignored,
		const std::vector<MixedTypedExample>& examples,
		bool hasRelaxedFloat32Model, Synchronously sync) {
		bool hasRelaxedFloat32Model, Synchronously sync, MeasureTiming measure,
		bool testDynamicOutputShape) {
		EvaluatePreparedModel(preparedModel, is_ignored, examples, hasRelaxedFloat32Model, kDefaultAtol,
		kDefaultRtol, sync);
		kDefaultRtol, sync, measure, testDynamicOutputShape);
		}

		static void getPreparedModel(sp<PreparedModelCallback> callback,
		@@ -319,7 +345,8 @@ void Execute(const sp<V1_0::IDevice>& device, std::function<V1_0::Model(void)> c

		float fpAtol = 1e-5f, fpRtol = 5.0f * 1.1920928955078125e-7f;
		EvaluatePreparedModel(preparedModel, is_ignored, examples,
		/hasRelaxedFloat32Model=/false, fpAtol, fpRtol);
		/hasRelaxedFloat32Model=/false, fpAtol, fpRtol, Synchronously::NO,
		MeasureTiming::NO, /testDynamicOutputShape=/false);
		}

		void Execute(const sp<V1_1::IDevice>& device, std::function<V1_1::Model(void)> create_model,
		@@ -365,12 +392,14 @@ void Execute(const sp<V1_1::IDevice>& device, std::function<V1_1::Model(void)> c
		ASSERT_NE(nullptr, preparedModel.get());

		EvaluatePreparedModel(preparedModel, is_ignored, examples,
		model.relaxComputationFloat32toFloat16);
		model.relaxComputationFloat32toFloat16, 1e-5f, 1e-5f, Synchronously::NO,
		MeasureTiming::NO, /testDynamicOutputShape=/false);
		}

		// TODO: Reduce code duplication.
		void Execute(const sp<V1_2::IDevice>& device, std::function<V1_2::Model(void)> create_model,
		std::function<bool(int)> is_ignored, const std::vector<MixedTypedExample>& examples) {
		std::function<bool(int)> is_ignored, const std::vector<MixedTypedExample>& examples,
		bool testDynamicOutputShape) {
		V1_2::Model model = create_model();

		// see if service can handle model
		@@ -412,9 +441,17 @@ void Execute(const sp<V1_2::IDevice>& device, std::function<V1_2::Model(void)> c
		ASSERT_NE(nullptr, preparedModel.get());

		EvaluatePreparedModel(preparedModel, is_ignored, examples,
		model.relaxComputationFloat32toFloat16, Synchronously::NO);
		model.relaxComputationFloat32toFloat16, Synchronously::NO,
		MeasureTiming::NO, testDynamicOutputShape);
		EvaluatePreparedModel(preparedModel, is_ignored, examples,
		model.relaxComputationFloat32toFloat16, Synchronously::YES,
		MeasureTiming::NO, testDynamicOutputShape);
		EvaluatePreparedModel(preparedModel, is_ignored, examples,
		model.relaxComputationFloat32toFloat16, Synchronously::NO,
		MeasureTiming::YES, testDynamicOutputShape);
		EvaluatePreparedModel(preparedModel, is_ignored, examples,
		model.relaxComputationFloat32toFloat16, Synchronously::YES);
		model.relaxComputationFloat32toFloat16, Synchronously::YES,
		MeasureTiming::YES, testDynamicOutputShape);
		}

		} // namespace generated_tests

neuralnetworks/1.2/IExecutionCallback.hal

+6 −2

Original line number	Diff line number	Diff line
		@@ -18,7 +18,6 @@ package android.hardware.neuralnetworks@1.2;

		import @1.0::ErrorStatus;
		import @1.0::IExecutionCallback;
		import OutputShape;

		/**
		* IExecutionCallback must be used to return the error status result from an
		@@ -50,6 +49,11 @@ interface IExecutionCallback extends @1.0::IExecutionCallback {
		* of the output operand in the Request outputs vector.
		* outputShapes must be empty unless the status is either
		* NONE or OUTPUT_INSUFFICIENT_SIZE.
		* @return Timing Duration of execution. Unless MeasureTiming::YES was passed when
		* launching the execution and status is NONE, all times must
		* be reported as UINT64_MAX. A driver may choose to report
		* any time as UINT64_MAX, indicating that particular measurement is
		* not available.
		*/
		oneway notify_1_2(ErrorStatus status, vec<OutputShape> outputShapes);
		oneway notify_1_2(ErrorStatus status, vec<OutputShape> outputShapes, Timing timing);
		};

neuralnetworks/1.2/IPreparedModel.hal

+15 −3

Original line number	Diff line number	Diff line
		@@ -59,6 +59,10 @@ interface IPreparedModel extends @1.0::IPreparedModel {
		*
		* @param request The input and output information on which the prepared
		* model is to be executed.
		* @param measure Specifies whether or not to measure duration of the execution.
		* The duration runs from the time the driver sees the call
		* to the execute_1_2 function to the time the driver invokes
		* the callback.
		* @param callback A callback object used to return the error status of
		* the execution. The callback object's notify function must
		* be called exactly once, even if the execution was
		@@ -72,7 +76,7 @@ interface IPreparedModel extends @1.0::IPreparedModel {
		* - INVALID_ARGUMENT if one of the input arguments is
		* invalid
		*/
		execute_1_2(Request request, IExecutionCallback callback)
		execute_1_2(Request request, MeasureTiming measure, IExecutionCallback callback)
		generates (ErrorStatus status);

		/**
		@@ -98,6 +102,10 @@ interface IPreparedModel extends @1.0::IPreparedModel {
		*
		* @param request The input and output information on which the prepared
		* model is to be executed.
		* @param measure Specifies whether or not to measure duration of the execution.
		* The duration runs from the time the driver sees the call
		* to the executeSynchronously function to the time the driver
		* returns from the function.
		* @return status Error status of the execution, must be:
		* - NONE if execution is performed successfully
		* - DEVICE_UNAVAILABLE if driver is offline or busy
		@@ -112,9 +120,13 @@ interface IPreparedModel extends @1.0::IPreparedModel {
		* of the output operand in the Request outputs vector.
		* outputShapes must be empty unless the status is either
		* NONE or OUTPUT_INSUFFICIENT_SIZE.
		* @return Timing Duration of execution. Unless measure is YES and status is
		* NONE, all times must be reported as UINT64_MAX. A driver may
		* choose to report any time as UINT64_MAX, indicating that
		* measurement is not available.
		*/
		executeSynchronously(Request request)
		generates (ErrorStatus status, vec<OutputShape> outputShapes);
		executeSynchronously(Request request, MeasureTiming measure)
		generates (ErrorStatus status, vec<OutputShape> outputShapes, Timing timing);

		/**
		* Configure a Burst object used to execute multiple inferences on a