Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 95c82191 authored by David Gross's avatar David Gross Committed by Android (Google) Code Review
Browse files

Merge "Update neuralnetworks HAL to allow collecting execution duration."

parents ad9e2152 e301349b
Loading
Loading
Loading
Loading
+10 −1
Original line number Diff line number Diff line
@@ -135,14 +135,18 @@ ExecutionCallback::~ExecutionCallback() {}

Return<void> ExecutionCallback::notify(ErrorStatus errorStatus) {
    mErrorStatus = errorStatus;
    mOutputShapes = {};
    mTiming = {.timeOnDevice = UINT64_MAX, .timeInDriver = UINT64_MAX};
    CallbackBase::notify();
    return Void();
}

Return<void> ExecutionCallback::notify_1_2(ErrorStatus errorStatus,
                                           const hidl_vec<OutputShape>& outputShapes) {
                                           const hidl_vec<OutputShape>& outputShapes,
                                           const Timing& timing) {
    mErrorStatus = errorStatus;
    mOutputShapes = outputShapes;
    mTiming = timing;
    CallbackBase::notify();
    return Void();
}
@@ -157,6 +161,11 @@ const std::vector<OutputShape>& ExecutionCallback::getOutputShapes() {
    return mOutputShapes;
}

Timing ExecutionCallback::getTiming() {
    wait();
    return mTiming;
}

}  // namespace implementation
}  // namespace V1_2
}  // namespace neuralnetworks
+30 −3
Original line number Diff line number Diff line
@@ -308,8 +308,20 @@ class ExecutionCallback : public CallbackBase, public IExecutionCallback {
     *                     of the output operand in the Request outputs vector.
     *                     outputShapes must be empty unless the status is either
     *                     NONE or OUTPUT_INSUFFICIENT_SIZE.
     * @return Timing Duration of execution. Unless MeasureTiming::YES was passed when
     *                launching the execution and status is NONE, all times must
     *                be reported as UINT64_MAX. A driver may choose to report
     *                any time as UINT64_MAX, indicating that particular measurement is
     *                not available.
     */
    Return<void> notify_1_2(ErrorStatus status, const hidl_vec<OutputShape>& outputShapes) override;
    Return<void> notify_1_2(ErrorStatus status, const hidl_vec<OutputShape>& outputShapes,
                            const Timing& timing) override;

    // An overload of the latest notify interface to hide the version from ExecutionBuilder.
    Return<void> notify(ErrorStatus status, const hidl_vec<OutputShape>& outputShapes,
                        const Timing& timing) {
        return notify_1_2(status, outputShapes, timing);
    }

    /**
     * Retrieves the error status returned from the asynchronous task launched
@@ -350,9 +362,24 @@ class ExecutionCallback : public CallbackBase, public IExecutionCallback {
     */
    const std::vector<OutputShape>& getOutputShapes();

    /**
     * Retrieves the duration of execution ofthe asynchronous task launched
     * by IPreparedModel::execute_1_2. If IPreparedModel::execute_1_2 has not finished
     * asynchronously executing, this call will block until the asynchronous task
     * notifies the object.
     *
     * If the asynchronous task was launched by IPreparedModel::execute, every time
     * must be UINT64_MAX.
     *
     * @return timing Duration of the execution. Every time must be UINT64_MAX unless
     *                the status is NONE.
     */
    Timing getTiming();

   private:
    ErrorStatus mErrorStatus;
    std::vector<OutputShape> mOutputShapes;
    ErrorStatus mErrorStatus = ErrorStatus::GENERAL_FAILURE;
    std::vector<OutputShape> mOutputShapes = {};
    Timing mTiming = {};
};


+42 −23
Original line number Diff line number Diff line
@@ -77,28 +77,32 @@ void copy_back(MixedTyped* dst, const std::vector<RequestArgument>& ra, char* sr
// Top level driver for models and examples generated by test_generator.py
// Test driver for those generated from ml/nn/runtime/test/spec
static Return<ErrorStatus> ExecutePreparedModel(sp<V1_0::IPreparedModel>& preparedModel,
                                                const Request& request,
                                                const Request& request, MeasureTiming,
                                                sp<ExecutionCallback>& callback) {
    return preparedModel->execute(request, callback);
}
static Return<ErrorStatus> ExecutePreparedModel(sp<V1_2::IPreparedModel>& preparedModel,
                                                const Request& request,
                                                const Request& request, MeasureTiming measure,
                                                sp<ExecutionCallback>& callback) {
    return preparedModel->execute_1_2(request, callback);
    return preparedModel->execute_1_2(request, measure, callback);
}
static Return<ErrorStatus> ExecutePreparedModel(sp<V1_0::IPreparedModel>&, const Request&,
                                                hidl_vec<OutputShape>*) {
                                                MeasureTiming, hidl_vec<OutputShape>*, Timing*) {
    ADD_FAILURE() << "asking for synchronous execution at V1_0";
    return ErrorStatus::GENERAL_FAILURE;
}
static Return<ErrorStatus> ExecutePreparedModel(sp<V1_2::IPreparedModel>& preparedModel,
                                                const Request& request,
                                                hidl_vec<OutputShape>* outputShapes) {
                                                const Request& request, MeasureTiming measure,
                                                hidl_vec<OutputShape>* outputShapes,
                                                Timing* timing) {
    ErrorStatus result;
    Return<void> ret = preparedModel->executeSynchronously(
        request, [&result, &outputShapes](ErrorStatus error, const hidl_vec<OutputShape>& shapes) {
            request, measure,
            [&result, outputShapes, timing](ErrorStatus error, const hidl_vec<OutputShape>& shapes,
                                            const Timing& time) {
                result = error;
                *outputShapes = shapes;
                *timing = time;
            });
    if (!ret.isOk()) {
        return ErrorStatus::GENERAL_FAILURE;
@@ -111,9 +115,8 @@ const float kDefaultRtol = 1e-5f;
template <typename T_IPreparedModel>
void EvaluatePreparedModel(sp<T_IPreparedModel>& preparedModel, std::function<bool(int)> is_ignored,
                           const std::vector<MixedTypedExample>& examples,
                           bool hasRelaxedFloat32Model = false, float fpAtol = kDefaultAtol,
                           float fpRtol = kDefaultRtol, Synchronously sync = Synchronously::NO,
                           bool testDynamicOutputShape = false) {
                           bool hasRelaxedFloat32Model, float fpAtol, float fpRtol,
                           Synchronously sync, MeasureTiming measure, bool testDynamicOutputShape) {
    const uint32_t INPUT = 0;
    const uint32_t OUTPUT = 1;

@@ -208,6 +211,7 @@ void EvaluatePreparedModel(sp<T_IPreparedModel>& preparedModel, std::function<bo

        ErrorStatus executionStatus;
        hidl_vec<OutputShape> outputShapes;
        Timing timing;
        if (sync == Synchronously::NO) {
            SCOPED_TRACE("asynchronous");

@@ -216,7 +220,7 @@ void EvaluatePreparedModel(sp<T_IPreparedModel>& preparedModel, std::function<bo
            ASSERT_NE(nullptr, executionCallback.get());
            Return<ErrorStatus> executionLaunchStatus = ExecutePreparedModel(
                    preparedModel, {.inputs = inputs_info, .outputs = outputs_info, .pools = pools},
                executionCallback);
                    measure, executionCallback);
            ASSERT_TRUE(executionLaunchStatus.isOk());
            EXPECT_EQ(ErrorStatus::NONE, static_cast<ErrorStatus>(executionLaunchStatus));

@@ -224,13 +228,14 @@ void EvaluatePreparedModel(sp<T_IPreparedModel>& preparedModel, std::function<bo
            executionCallback->wait();
            executionStatus = executionCallback->getStatus();
            outputShapes = executionCallback->getOutputShapes();
            timing = executionCallback->getTiming();
        } else {
            SCOPED_TRACE("synchronous");

            // execute
            Return<ErrorStatus> executionReturnStatus = ExecutePreparedModel(
                    preparedModel, {.inputs = inputs_info, .outputs = outputs_info, .pools = pools},
                &outputShapes);
                    measure, &outputShapes, &timing);
            ASSERT_TRUE(executionReturnStatus.isOk());
            executionStatus = static_cast<ErrorStatus>(executionReturnStatus);
        }
@@ -244,6 +249,14 @@ void EvaluatePreparedModel(sp<T_IPreparedModel>& preparedModel, std::function<bo
            return;
        }
        ASSERT_EQ(ErrorStatus::NONE, executionStatus);
        if (measure == MeasureTiming::NO) {
            EXPECT_EQ(UINT64_MAX, timing.timeOnDevice);
            EXPECT_EQ(UINT64_MAX, timing.timeInDriver);
        } else {
            if (timing.timeOnDevice != UINT64_MAX && timing.timeInDriver != UINT64_MAX) {
                EXPECT_LE(timing.timeOnDevice, timing.timeInDriver);
            }
        }

        // Go through all outputs, overwrite output dimensions with returned output shapes
        if (testDynamicOutputShape) {
@@ -273,10 +286,10 @@ void EvaluatePreparedModel(sp<T_IPreparedModel>& preparedModel, std::function<bo
template <typename T_IPreparedModel>
void EvaluatePreparedModel(sp<T_IPreparedModel>& preparedModel, std::function<bool(int)> is_ignored,
                           const std::vector<MixedTypedExample>& examples,
                           bool hasRelaxedFloat32Model, Synchronously sync,
                           bool hasRelaxedFloat32Model, Synchronously sync, MeasureTiming measure,
                           bool testDynamicOutputShape) {
    EvaluatePreparedModel(preparedModel, is_ignored, examples, hasRelaxedFloat32Model, kDefaultAtol,
                          kDefaultRtol, sync, testDynamicOutputShape);
                          kDefaultRtol, sync, measure, testDynamicOutputShape);
}

static void getPreparedModel(sp<PreparedModelCallback> callback,
@@ -333,7 +346,7 @@ void Execute(const sp<V1_0::IDevice>& device, std::function<V1_0::Model(void)> c
    float fpAtol = 1e-5f, fpRtol = 5.0f * 1.1920928955078125e-7f;
    EvaluatePreparedModel(preparedModel, is_ignored, examples,
                          /*hasRelaxedFloat32Model=*/false, fpAtol, fpRtol, Synchronously::NO,
                          /*testDynamicOutputShape=*/false);
                          MeasureTiming::NO, /*testDynamicOutputShape=*/false);
}

void Execute(const sp<V1_1::IDevice>& device, std::function<V1_1::Model(void)> create_model,
@@ -380,7 +393,7 @@ void Execute(const sp<V1_1::IDevice>& device, std::function<V1_1::Model(void)> c

    EvaluatePreparedModel(preparedModel, is_ignored, examples,
                          model.relaxComputationFloat32toFloat16, 1e-5f, 1e-5f, Synchronously::NO,
                          /*testDynamicOutputShape=*/false);
                          MeasureTiming::NO, /*testDynamicOutputShape=*/false);
}

// TODO: Reduce code duplication.
@@ -429,10 +442,16 @@ void Execute(const sp<V1_2::IDevice>& device, std::function<V1_2::Model(void)> c

    EvaluatePreparedModel(preparedModel, is_ignored, examples,
                          model.relaxComputationFloat32toFloat16, Synchronously::NO,
                          testDynamicOutputShape);
                          MeasureTiming::NO, testDynamicOutputShape);
    EvaluatePreparedModel(preparedModel, is_ignored, examples,
                          model.relaxComputationFloat32toFloat16, Synchronously::YES,
                          MeasureTiming::NO, testDynamicOutputShape);
    EvaluatePreparedModel(preparedModel, is_ignored, examples,
                          model.relaxComputationFloat32toFloat16, Synchronously::NO,
                          MeasureTiming::YES, testDynamicOutputShape);
    EvaluatePreparedModel(preparedModel, is_ignored, examples,
                          model.relaxComputationFloat32toFloat16, Synchronously::YES,
                          testDynamicOutputShape);
                          MeasureTiming::YES, testDynamicOutputShape);
}

}  // namespace generated_tests
+6 −2
Original line number Diff line number Diff line
@@ -18,7 +18,6 @@ package android.hardware.neuralnetworks@1.2;

import @1.0::ErrorStatus;
import @1.0::IExecutionCallback;
import OutputShape;

/**
 * IExecutionCallback must be used to return the error status result from an
@@ -50,6 +49,11 @@ interface IExecutionCallback extends @1.0::IExecutionCallback {
     *                     of the output operand in the Request outputs vector.
     *                     outputShapes must be empty unless the status is either
     *                     NONE or OUTPUT_INSUFFICIENT_SIZE.
     * @return Timing Duration of execution. Unless MeasureTiming::YES was passed when
     *                launching the execution and status is NONE, all times must
     *                be reported as UINT64_MAX. A driver may choose to report
     *                any time as UINT64_MAX, indicating that particular measurement is
     *                not available.
     */
    oneway notify_1_2(ErrorStatus status, vec<OutputShape> outputShapes);
  oneway notify_1_2(ErrorStatus status, vec<OutputShape> outputShapes, Timing timing);
};
+15 −3
Original line number Diff line number Diff line
@@ -59,6 +59,10 @@ interface IPreparedModel extends @1.0::IPreparedModel {
     *
     * @param request The input and output information on which the prepared
     *                model is to be executed.
     * @param measure Specifies whether or not to measure duration of the execution.
     *                The duration runs from the time the driver sees the call
     *                to the execute_1_2 function to the time the driver invokes
     *                the callback.
     * @param callback A callback object used to return the error status of
     *                 the execution. The callback object's notify function must
     *                 be called exactly once, even if the execution was
@@ -72,7 +76,7 @@ interface IPreparedModel extends @1.0::IPreparedModel {
     *                - INVALID_ARGUMENT if one of the input arguments is
     *                  invalid
     */
    execute_1_2(Request request, IExecutionCallback callback)
    execute_1_2(Request request, MeasureTiming measure, IExecutionCallback callback)
        generates (ErrorStatus status);

    /**
@@ -98,6 +102,10 @@ interface IPreparedModel extends @1.0::IPreparedModel {
     *
     * @param request The input and output information on which the prepared
     *                model is to be executed.
     * @param measure Specifies whether or not to measure duration of the execution.
     *                The duration runs from the time the driver sees the call
     *                to the executeSynchronously function to the time the driver
     *                returns from the function.
     * @return status Error status of the execution, must be:
     *                - NONE if execution is performed successfully
     *                - DEVICE_UNAVAILABLE if driver is offline or busy
@@ -112,9 +120,13 @@ interface IPreparedModel extends @1.0::IPreparedModel {
     *                      of the output operand in the Request outputs vector.
     *                      outputShapes must be empty unless the status is either
     *                      NONE or OUTPUT_INSUFFICIENT_SIZE.
     * @return Timing Duration of execution. Unless measure is YES and status is
     *                NONE, all times must be reported as UINT64_MAX. A driver may
     *                choose to report any time as UINT64_MAX, indicating that
     *                measurement is not available.
     */
    executeSynchronously(Request request)
        generates (ErrorStatus status, vec<OutputShape> outputShapes);
    executeSynchronously(Request request, MeasureTiming measure)
            generates (ErrorStatus status, vec<OutputShape> outputShapes, Timing timing);

    /**
     * Configure a Burst object used to execute multiple inferences on a
Loading