diff options
| author | David Gross <dgross@google.com> | 2019-01-23 14:01:52 -0800 |
|---|---|---|
| committer | David Gross <dgross@google.com> | 2019-01-23 14:01:52 -0800 |
| commit | e301349b0597f2cc136703f2aaa182e6a546b50c (patch) | |
| tree | 0626b199a0f7a8fdc1ef596b0d2dc280b7df5b9b | |
| parent | f96dc6ed8f09cbb08bee1de4ce2d12e38c80f2bd (diff) | |
Update neuralnetworks HAL to allow collecting execution duration.
Test: VtsHalNeuralnetworksV1_0TargetTest --hal_service_instance=android.hardware.neuralnetworks@1.0::IDevice/sample-all
Test: VtsHalNeuralnetworksV1_1TargetTest --hal_service_instance=android.hardware.neuralnetworks@1.1::IDevice/sample-all
Test: VtsHalNeuralnetworksV1_2TargetTest --hal_service_instance=android.hardware.neuralnetworks@1.2::IDevice/sample-all
Bug: 115390094
Change-Id: If67a5ffe39cfdd78498e01f26251734fdc8e66c7
| -rw-r--r-- | neuralnetworks/1.0/vts/functional/Callbacks.cpp | 11 | ||||
| -rw-r--r-- | neuralnetworks/1.0/vts/functional/Callbacks.h | 33 | ||||
| -rw-r--r-- | neuralnetworks/1.0/vts/functional/GeneratedTestHarness.cpp | 65 | ||||
| -rw-r--r-- | neuralnetworks/1.2/IExecutionCallback.hal | 8 | ||||
| -rw-r--r-- | neuralnetworks/1.2/IPreparedModel.hal | 18 | ||||
| -rw-r--r-- | neuralnetworks/1.2/types.hal | 55 | ||||
| -rw-r--r-- | neuralnetworks/1.2/vts/functional/ValidateRequest.cpp | 29 |
7 files changed, 177 insertions, 42 deletions
diff --git a/neuralnetworks/1.0/vts/functional/Callbacks.cpp b/neuralnetworks/1.0/vts/functional/Callbacks.cpp index 03afcd0751..c30702cd99 100644 --- a/neuralnetworks/1.0/vts/functional/Callbacks.cpp +++ b/neuralnetworks/1.0/vts/functional/Callbacks.cpp @@ -135,14 +135,18 @@ ExecutionCallback::~ExecutionCallback() {} Return<void> ExecutionCallback::notify(ErrorStatus errorStatus) { mErrorStatus = errorStatus; + mOutputShapes = {}; + mTiming = {.timeOnDevice = UINT64_MAX, .timeInDriver = UINT64_MAX}; CallbackBase::notify(); return Void(); } Return<void> ExecutionCallback::notify_1_2(ErrorStatus errorStatus, - const hidl_vec<OutputShape>& outputShapes) { + const hidl_vec<OutputShape>& outputShapes, + const Timing& timing) { mErrorStatus = errorStatus; mOutputShapes = outputShapes; + mTiming = timing; CallbackBase::notify(); return Void(); } @@ -157,6 +161,11 @@ const std::vector<OutputShape>& ExecutionCallback::getOutputShapes() { return mOutputShapes; } +Timing ExecutionCallback::getTiming() { + wait(); + return mTiming; +} + } // namespace implementation } // namespace V1_2 } // namespace neuralnetworks diff --git a/neuralnetworks/1.0/vts/functional/Callbacks.h b/neuralnetworks/1.0/vts/functional/Callbacks.h index 46f29a60e7..4707d0a251 100644 --- a/neuralnetworks/1.0/vts/functional/Callbacks.h +++ b/neuralnetworks/1.0/vts/functional/Callbacks.h @@ -308,8 +308,20 @@ class ExecutionCallback : public CallbackBase, public IExecutionCallback { * of the output operand in the Request outputs vector. * outputShapes must be empty unless the status is either * NONE or OUTPUT_INSUFFICIENT_SIZE. + * @return Timing Duration of execution. Unless MeasureTiming::YES was passed when + * launching the execution and status is NONE, all times must + * be reported as UINT64_MAX. A driver may choose to report + * any time as UINT64_MAX, indicating that particular measurement is + * not available. */ - Return<void> notify_1_2(ErrorStatus status, const hidl_vec<OutputShape>& outputShapes) override; + Return<void> notify_1_2(ErrorStatus status, const hidl_vec<OutputShape>& outputShapes, + const Timing& timing) override; + + // An overload of the latest notify interface to hide the version from ExecutionBuilder. + Return<void> notify(ErrorStatus status, const hidl_vec<OutputShape>& outputShapes, + const Timing& timing) { + return notify_1_2(status, outputShapes, timing); + } /** * Retrieves the error status returned from the asynchronous task launched @@ -350,9 +362,24 @@ class ExecutionCallback : public CallbackBase, public IExecutionCallback { */ const std::vector<OutputShape>& getOutputShapes(); + /** + * Retrieves the duration of execution ofthe asynchronous task launched + * by IPreparedModel::execute_1_2. If IPreparedModel::execute_1_2 has not finished + * asynchronously executing, this call will block until the asynchronous task + * notifies the object. + * + * If the asynchronous task was launched by IPreparedModel::execute, every time + * must be UINT64_MAX. + * + * @return timing Duration of the execution. Every time must be UINT64_MAX unless + * the status is NONE. + */ + Timing getTiming(); + private: - ErrorStatus mErrorStatus; - std::vector<OutputShape> mOutputShapes; + ErrorStatus mErrorStatus = ErrorStatus::GENERAL_FAILURE; + std::vector<OutputShape> mOutputShapes = {}; + Timing mTiming = {}; }; diff --git a/neuralnetworks/1.0/vts/functional/GeneratedTestHarness.cpp b/neuralnetworks/1.0/vts/functional/GeneratedTestHarness.cpp index d45922e1c4..65c425ee1e 100644 --- a/neuralnetworks/1.0/vts/functional/GeneratedTestHarness.cpp +++ b/neuralnetworks/1.0/vts/functional/GeneratedTestHarness.cpp @@ -77,29 +77,33 @@ void copy_back(MixedTyped* dst, const std::vector<RequestArgument>& ra, char* sr // Top level driver for models and examples generated by test_generator.py // Test driver for those generated from ml/nn/runtime/test/spec static Return<ErrorStatus> ExecutePreparedModel(sp<V1_0::IPreparedModel>& preparedModel, - const Request& request, + const Request& request, MeasureTiming, sp<ExecutionCallback>& callback) { return preparedModel->execute(request, callback); } static Return<ErrorStatus> ExecutePreparedModel(sp<V1_2::IPreparedModel>& preparedModel, - const Request& request, + const Request& request, MeasureTiming measure, sp<ExecutionCallback>& callback) { - return preparedModel->execute_1_2(request, callback); + return preparedModel->execute_1_2(request, measure, callback); } static Return<ErrorStatus> ExecutePreparedModel(sp<V1_0::IPreparedModel>&, const Request&, - hidl_vec<OutputShape>*) { + MeasureTiming, hidl_vec<OutputShape>*, Timing*) { ADD_FAILURE() << "asking for synchronous execution at V1_0"; return ErrorStatus::GENERAL_FAILURE; } static Return<ErrorStatus> ExecutePreparedModel(sp<V1_2::IPreparedModel>& preparedModel, - const Request& request, - hidl_vec<OutputShape>* outputShapes) { + const Request& request, MeasureTiming measure, + hidl_vec<OutputShape>* outputShapes, + Timing* timing) { ErrorStatus result; Return<void> ret = preparedModel->executeSynchronously( - request, [&result, &outputShapes](ErrorStatus error, const hidl_vec<OutputShape>& shapes) { - result = error; - *outputShapes = shapes; - }); + request, measure, + [&result, outputShapes, timing](ErrorStatus error, const hidl_vec<OutputShape>& shapes, + const Timing& time) { + result = error; + *outputShapes = shapes; + *timing = time; + }); if (!ret.isOk()) { return ErrorStatus::GENERAL_FAILURE; } @@ -111,9 +115,8 @@ const float kDefaultRtol = 1e-5f; template <typename T_IPreparedModel> void EvaluatePreparedModel(sp<T_IPreparedModel>& preparedModel, std::function<bool(int)> is_ignored, const std::vector<MixedTypedExample>& examples, - bool hasRelaxedFloat32Model = false, float fpAtol = kDefaultAtol, - float fpRtol = kDefaultRtol, Synchronously sync = Synchronously::NO, - bool testDynamicOutputShape = false) { + bool hasRelaxedFloat32Model, float fpAtol, float fpRtol, + Synchronously sync, MeasureTiming measure, bool testDynamicOutputShape) { const uint32_t INPUT = 0; const uint32_t OUTPUT = 1; @@ -208,6 +211,7 @@ void EvaluatePreparedModel(sp<T_IPreparedModel>& preparedModel, std::function<bo ErrorStatus executionStatus; hidl_vec<OutputShape> outputShapes; + Timing timing; if (sync == Synchronously::NO) { SCOPED_TRACE("asynchronous"); @@ -215,8 +219,8 @@ void EvaluatePreparedModel(sp<T_IPreparedModel>& preparedModel, std::function<bo sp<ExecutionCallback> executionCallback = new ExecutionCallback(); ASSERT_NE(nullptr, executionCallback.get()); Return<ErrorStatus> executionLaunchStatus = ExecutePreparedModel( - preparedModel, {.inputs = inputs_info, .outputs = outputs_info, .pools = pools}, - executionCallback); + preparedModel, {.inputs = inputs_info, .outputs = outputs_info, .pools = pools}, + measure, executionCallback); ASSERT_TRUE(executionLaunchStatus.isOk()); EXPECT_EQ(ErrorStatus::NONE, static_cast<ErrorStatus>(executionLaunchStatus)); @@ -224,13 +228,14 @@ void EvaluatePreparedModel(sp<T_IPreparedModel>& preparedModel, std::function<bo executionCallback->wait(); executionStatus = executionCallback->getStatus(); outputShapes = executionCallback->getOutputShapes(); + timing = executionCallback->getTiming(); } else { SCOPED_TRACE("synchronous"); // execute Return<ErrorStatus> executionReturnStatus = ExecutePreparedModel( - preparedModel, {.inputs = inputs_info, .outputs = outputs_info, .pools = pools}, - &outputShapes); + preparedModel, {.inputs = inputs_info, .outputs = outputs_info, .pools = pools}, + measure, &outputShapes, &timing); ASSERT_TRUE(executionReturnStatus.isOk()); executionStatus = static_cast<ErrorStatus>(executionReturnStatus); } @@ -244,6 +249,14 @@ void EvaluatePreparedModel(sp<T_IPreparedModel>& preparedModel, std::function<bo return; } ASSERT_EQ(ErrorStatus::NONE, executionStatus); + if (measure == MeasureTiming::NO) { + EXPECT_EQ(UINT64_MAX, timing.timeOnDevice); + EXPECT_EQ(UINT64_MAX, timing.timeInDriver); + } else { + if (timing.timeOnDevice != UINT64_MAX && timing.timeInDriver != UINT64_MAX) { + EXPECT_LE(timing.timeOnDevice, timing.timeInDriver); + } + } // Go through all outputs, overwrite output dimensions with returned output shapes if (testDynamicOutputShape) { @@ -273,10 +286,10 @@ void EvaluatePreparedModel(sp<T_IPreparedModel>& preparedModel, std::function<bo template <typename T_IPreparedModel> void EvaluatePreparedModel(sp<T_IPreparedModel>& preparedModel, std::function<bool(int)> is_ignored, const std::vector<MixedTypedExample>& examples, - bool hasRelaxedFloat32Model, Synchronously sync, + bool hasRelaxedFloat32Model, Synchronously sync, MeasureTiming measure, bool testDynamicOutputShape) { EvaluatePreparedModel(preparedModel, is_ignored, examples, hasRelaxedFloat32Model, kDefaultAtol, - kDefaultRtol, sync, testDynamicOutputShape); + kDefaultRtol, sync, measure, testDynamicOutputShape); } static void getPreparedModel(sp<PreparedModelCallback> callback, @@ -333,7 +346,7 @@ void Execute(const sp<V1_0::IDevice>& device, std::function<V1_0::Model(void)> c float fpAtol = 1e-5f, fpRtol = 5.0f * 1.1920928955078125e-7f; EvaluatePreparedModel(preparedModel, is_ignored, examples, /*hasRelaxedFloat32Model=*/false, fpAtol, fpRtol, Synchronously::NO, - /*testDynamicOutputShape=*/false); + MeasureTiming::NO, /*testDynamicOutputShape=*/false); } void Execute(const sp<V1_1::IDevice>& device, std::function<V1_1::Model(void)> create_model, @@ -380,7 +393,7 @@ void Execute(const sp<V1_1::IDevice>& device, std::function<V1_1::Model(void)> c EvaluatePreparedModel(preparedModel, is_ignored, examples, model.relaxComputationFloat32toFloat16, 1e-5f, 1e-5f, Synchronously::NO, - /*testDynamicOutputShape=*/false); + MeasureTiming::NO, /*testDynamicOutputShape=*/false); } // TODO: Reduce code duplication. @@ -429,10 +442,16 @@ void Execute(const sp<V1_2::IDevice>& device, std::function<V1_2::Model(void)> c EvaluatePreparedModel(preparedModel, is_ignored, examples, model.relaxComputationFloat32toFloat16, Synchronously::NO, - testDynamicOutputShape); + MeasureTiming::NO, testDynamicOutputShape); + EvaluatePreparedModel(preparedModel, is_ignored, examples, + model.relaxComputationFloat32toFloat16, Synchronously::YES, + MeasureTiming::NO, testDynamicOutputShape); + EvaluatePreparedModel(preparedModel, is_ignored, examples, + model.relaxComputationFloat32toFloat16, Synchronously::NO, + MeasureTiming::YES, testDynamicOutputShape); EvaluatePreparedModel(preparedModel, is_ignored, examples, model.relaxComputationFloat32toFloat16, Synchronously::YES, - testDynamicOutputShape); + MeasureTiming::YES, testDynamicOutputShape); } } // namespace generated_tests diff --git a/neuralnetworks/1.2/IExecutionCallback.hal b/neuralnetworks/1.2/IExecutionCallback.hal index 47de1b60ec..7f6c9eeffe 100644 --- a/neuralnetworks/1.2/IExecutionCallback.hal +++ b/neuralnetworks/1.2/IExecutionCallback.hal @@ -18,7 +18,6 @@ package android.hardware.neuralnetworks@1.2; import @1.0::ErrorStatus; import @1.0::IExecutionCallback; -import OutputShape; /** * IExecutionCallback must be used to return the error status result from an @@ -50,6 +49,11 @@ interface IExecutionCallback extends @1.0::IExecutionCallback { * of the output operand in the Request outputs vector. * outputShapes must be empty unless the status is either * NONE or OUTPUT_INSUFFICIENT_SIZE. + * @return Timing Duration of execution. Unless MeasureTiming::YES was passed when + * launching the execution and status is NONE, all times must + * be reported as UINT64_MAX. A driver may choose to report + * any time as UINT64_MAX, indicating that particular measurement is + * not available. */ - oneway notify_1_2(ErrorStatus status, vec<OutputShape> outputShapes); + oneway notify_1_2(ErrorStatus status, vec<OutputShape> outputShapes, Timing timing); }; diff --git a/neuralnetworks/1.2/IPreparedModel.hal b/neuralnetworks/1.2/IPreparedModel.hal index 2d4e572333..5d2d80ff71 100644 --- a/neuralnetworks/1.2/IPreparedModel.hal +++ b/neuralnetworks/1.2/IPreparedModel.hal @@ -59,6 +59,10 @@ interface IPreparedModel extends @1.0::IPreparedModel { * * @param request The input and output information on which the prepared * model is to be executed. + * @param measure Specifies whether or not to measure duration of the execution. + * The duration runs from the time the driver sees the call + * to the execute_1_2 function to the time the driver invokes + * the callback. * @param callback A callback object used to return the error status of * the execution. The callback object's notify function must * be called exactly once, even if the execution was @@ -72,7 +76,7 @@ interface IPreparedModel extends @1.0::IPreparedModel { * - INVALID_ARGUMENT if one of the input arguments is * invalid */ - execute_1_2(Request request, IExecutionCallback callback) + execute_1_2(Request request, MeasureTiming measure, IExecutionCallback callback) generates (ErrorStatus status); /** @@ -98,6 +102,10 @@ interface IPreparedModel extends @1.0::IPreparedModel { * * @param request The input and output information on which the prepared * model is to be executed. + * @param measure Specifies whether or not to measure duration of the execution. + * The duration runs from the time the driver sees the call + * to the executeSynchronously function to the time the driver + * returns from the function. * @return status Error status of the execution, must be: * - NONE if execution is performed successfully * - DEVICE_UNAVAILABLE if driver is offline or busy @@ -112,9 +120,13 @@ interface IPreparedModel extends @1.0::IPreparedModel { * of the output operand in the Request outputs vector. * outputShapes must be empty unless the status is either * NONE or OUTPUT_INSUFFICIENT_SIZE. + * @return Timing Duration of execution. Unless measure is YES and status is + * NONE, all times must be reported as UINT64_MAX. A driver may + * choose to report any time as UINT64_MAX, indicating that + * measurement is not available. */ - executeSynchronously(Request request) - generates (ErrorStatus status, vec<OutputShape> outputShapes); + executeSynchronously(Request request, MeasureTiming measure) + generates (ErrorStatus status, vec<OutputShape> outputShapes, Timing timing); /** * Configure a Burst object used to execute multiple inferences on a diff --git a/neuralnetworks/1.2/types.hal b/neuralnetworks/1.2/types.hal index ce993d71a9..8bc28b4885 100644 --- a/neuralnetworks/1.2/types.hal +++ b/neuralnetworks/1.2/types.hal @@ -447,8 +447,34 @@ struct OutputShape { }; /** - * FmqRequestDatum is a single element of a serialized representation of a - * {@link @1.0::Request} object which is sent across FastMessageQueue. + * Specifies whether or not to measure timing information during execution. + */ +enum MeasureTiming : int32_t { + NO = 0, + YES = 1, +}; + +/** + + * Timing information measured during execution. Each time is a duration from + * the beginning of some task to the end of that task, including time when that + * task is not active (for example, preempted by some other task, or + * waiting for some resource to become available). + * + * Times are measured in microseconds. + * When a time is not available, it must be reported as UINT64_MAX. + */ +struct Timing { + /** Execution time on device (not driver, which runs on host processor). */ + uint64_t timeOnDevice; + /** Execution time in driver (including time on device). */ + uint64_t timeInDriver; +}; + +/** + * FmqRequestDatum is a single element of a serialized representation of an + * execution request (a {@link @1.0::Request} object and a {@link MeasureTiming} + * value) which is sent across FastMessageQueue. * * The serialized representation for a particular execution is referred to later * in these descriptions as a 'packet'. @@ -456,7 +482,7 @@ struct OutputShape { * FastMessageQueue can only pass HIDL-defined types that do not involve nested * buffers, handles, or interfaces. * - * The {@link @1.0::Request} is serialized as follows: + * The request is serialized as follows: * 1) 'packetInformation' * 2) For each input operand: * 2.1) 'inputOperandInformation' @@ -468,6 +494,7 @@ struct OutputShape { * 3.2.1) 'outputOperandDimensionValue' * 4) For each pool: * 4.1) 'poolIdentifier' + * 5) 'measureTiming' */ safe_union FmqRequestDatum { /** @@ -561,12 +588,21 @@ safe_union FmqRequestDatum { * identifier. */ int32_t poolIdentifier; + + /** + * Specifies whether or not to measure duration of the execution. The + * duration runs from the time the driver dequeues the request from a + * FastMessageQueue to the time the driver enqueues results to a + * FastMessageQueue. + */ + MeasureTiming measureTiming; }; /** * FmqResultDatum is a single element of a serialized representation of the - * values returned from an execution ({@link @1.0::ErrorStatus} and - * vec<{@link OutputShape}>) which is returned via FastMessageQueue. + * values returned from an execution ({@link @1.0::ErrorStatus}, + * vec<{@link OutputShape}>, and {@link Timing}) which is returned via + * FastMessageQueue. * * The serialized representation for a particular execution is referred to later * in these descriptions as a 'packet'. @@ -581,6 +617,7 @@ safe_union FmqRequestDatum { * 2.1) 'operandInformation' * 2.2) For each dimension element of the operand: * 2.2.1) 'operandDimensionValue' + * 3) 'executionTiming' */ safe_union FmqResultDatum { /** @@ -636,4 +673,12 @@ safe_union FmqResultDatum { * Element of the dimensions vector. */ uint32_t operandDimensionValue; + + /** + * Duration of execution. Unless measurement was requested and execution + * succeeds, all times must be reported as UINT64_MAX. A driver may choose + * to report any time as UINT64_MAX, indicating that measurement is not + * available. + */ + Timing executionTiming; }; diff --git a/neuralnetworks/1.2/vts/functional/ValidateRequest.cpp b/neuralnetworks/1.2/vts/functional/ValidateRequest.cpp index 1eaea4b9a6..00a7c3ec4f 100644 --- a/neuralnetworks/1.2/vts/functional/ValidateRequest.cpp +++ b/neuralnetworks/1.2/vts/functional/ValidateRequest.cpp @@ -42,6 +42,10 @@ using test_helper::MixedTypedExample; ///////////////////////// UTILITY FUNCTIONS ///////////////////////// +static bool badTiming(Timing timing) { + return timing.timeOnDevice == UINT64_MAX && timing.timeInDriver == UINT64_MAX; +} + static void createPreparedModel(const sp<IDevice>& device, const Model& model, sp<IPreparedModel>* preparedModel) { ASSERT_NE(nullptr, preparedModel); @@ -98,31 +102,46 @@ static void validate(const sp<IPreparedModel>& preparedModel, const std::string& Request request, const std::function<void(Request*)>& mutation) { mutation(&request); + // We'd like to test both with timing requested and without timing + // requested. Rather than running each test both ways, we'll decide whether + // to request timing by hashing the message. We do not use std::hash because + // it is not guaranteed stable across executions. + char hash = 0; + for (auto c : message) { + hash ^= c; + }; + MeasureTiming measure = (hash & 1) ? MeasureTiming::YES : MeasureTiming::NO; + { SCOPED_TRACE(message + " [execute_1_2]"); sp<ExecutionCallback> executionCallback = new ExecutionCallback(); ASSERT_NE(nullptr, executionCallback.get()); Return<ErrorStatus> executeLaunchStatus = - preparedModel->execute_1_2(request, executionCallback); + preparedModel->execute_1_2(request, measure, executionCallback); ASSERT_TRUE(executeLaunchStatus.isOk()); ASSERT_EQ(ErrorStatus::INVALID_ARGUMENT, static_cast<ErrorStatus>(executeLaunchStatus)); executionCallback->wait(); ErrorStatus executionReturnStatus = executionCallback->getStatus(); const auto& outputShapes = executionCallback->getOutputShapes(); + Timing timing = executionCallback->getTiming(); ASSERT_EQ(ErrorStatus::INVALID_ARGUMENT, executionReturnStatus); ASSERT_EQ(outputShapes.size(), 0); + ASSERT_TRUE(badTiming(timing)); } { SCOPED_TRACE(message + " [executeSynchronously]"); Return<void> executeStatus = preparedModel->executeSynchronously( - request, [](ErrorStatus error, const hidl_vec<OutputShape>& outputShapes) { - ASSERT_EQ(ErrorStatus::INVALID_ARGUMENT, error); - EXPECT_EQ(outputShapes.size(), 0); - }); + request, measure, + [](ErrorStatus error, const hidl_vec<OutputShape>& outputShapes, + const Timing& timing) { + ASSERT_EQ(ErrorStatus::INVALID_ARGUMENT, error); + EXPECT_EQ(outputShapes.size(), 0); + EXPECT_TRUE(badTiming(timing)); + }); ASSERT_TRUE(executeStatus.isOk()); } } |
