diff options
author | Michael Butler <butlermichael@google.com> | 2020-12-19 01:55:32 -0800 |
---|---|---|
committer | Xusong Wang <xusongw@google.com> | 2021-03-16 11:57:19 -0700 |
commit | 76e491fa46e9dffba38fd32f080c7b2d0ebcf1b1 (patch) | |
tree | 7b92e025bcad95815ba16d608620b3dad55a46af | |
parent | 900c28a250297964cf9aa65653b9adce4e6db27a (diff) |
Implement full canonical Burst in NN util code
Bug: 180492058
Bug: 177267324
Test: mma
Test: presubmit
Change-Id: I5018f6cf2dbaf705f74f4f46318142c64433e19d
Merged-In: I5018f6cf2dbaf705f74f4f46318142c64433e19d
(cherry picked from commit acff4063b63c04cbb28af87eab61e9a1fa70980a)
16 files changed, 1013 insertions, 1060 deletions
diff --git a/neuralnetworks/1.2/utils/Android.bp b/neuralnetworks/1.2/utils/Android.bp index 2921141484..41281ee955 100644 --- a/neuralnetworks/1.2/utils/Android.bp +++ b/neuralnetworks/1.2/utils/Android.bp @@ -27,7 +27,6 @@ cc_library_static { name: "neuralnetworks_utils_hal_1_2", defaults: ["neuralnetworks_utils_defaults"], srcs: ["src/*"], - exclude_srcs: ["src/ExecutionBurst*"], local_include_dirs: ["include/nnapi/hal/1.2/"], export_include_dirs: ["include"], cflags: ["-Wthread-safety"], @@ -41,10 +40,16 @@ cc_library_static { "android.hardware.neuralnetworks@1.0", "android.hardware.neuralnetworks@1.1", "android.hardware.neuralnetworks@1.2", + "libfmq", ], export_static_lib_headers: [ "neuralnetworks_utils_hal_common", ], + product_variables: { + debuggable: { // eng and userdebug builds + cflags: ["-DNN_DEBUGGABLE"], + }, + }, } cc_test { diff --git a/neuralnetworks/1.2/utils/include/nnapi/hal/1.2/Conversions.h b/neuralnetworks/1.2/utils/include/nnapi/hal/1.2/Conversions.h index 6fd13379ef..272cee7e88 100644 --- a/neuralnetworks/1.2/utils/include/nnapi/hal/1.2/Conversions.h +++ b/neuralnetworks/1.2/utils/include/nnapi/hal/1.2/Conversions.h @@ -52,6 +52,7 @@ GeneralResult<Capabilities> convert(const hal::V1_2::Capabilities& capabilities) GeneralResult<Model> convert(const hal::V1_2::Model& model); GeneralResult<MeasureTiming> convert(const hal::V1_2::MeasureTiming& measureTiming); GeneralResult<Timing> convert(const hal::V1_2::Timing& timing); +GeneralResult<SharedMemory> convert(const hardware::hidl_memory& memory); GeneralResult<std::vector<Extension>> convert( const hardware::hidl_vec<hal::V1_2::Extension>& extensions); diff --git a/neuralnetworks/1.2/utils/include/nnapi/hal/1.2/ExecutionBurstController.h b/neuralnetworks/1.2/utils/include/nnapi/hal/1.2/ExecutionBurstController.h index 5356a912bd..6b6fc71f65 100644 --- a/neuralnetworks/1.2/utils/include/nnapi/hal/1.2/ExecutionBurstController.h +++ b/neuralnetworks/1.2/utils/include/nnapi/hal/1.2/ExecutionBurstController.h @@ -14,23 +14,28 @@ * limitations under the License. */ -#ifndef ANDROID_FRAMEWORKS_ML_NN_COMMON_EXECUTION_BURST_CONTROLLER_H -#define ANDROID_FRAMEWORKS_ML_NN_COMMON_EXECUTION_BURST_CONTROLLER_H +#ifndef ANDROID_HARDWARE_INTERFACES_NEURALNETWORKS_1_2_UTILS_EXECUTION_BURST_CONTROLLER_H +#define ANDROID_HARDWARE_INTERFACES_NEURALNETWORKS_1_2_UTILS_EXECUTION_BURST_CONTROLLER_H #include "ExecutionBurstUtils.h" -#include <android-base/macros.h> +#include <android-base/thread_annotations.h> #include <android/hardware/neuralnetworks/1.0/types.h> -#include <android/hardware/neuralnetworks/1.1/types.h> #include <android/hardware/neuralnetworks/1.2/IBurstCallback.h> #include <android/hardware/neuralnetworks/1.2/IBurstContext.h> #include <android/hardware/neuralnetworks/1.2/IPreparedModel.h> #include <android/hardware/neuralnetworks/1.2/types.h> #include <fmq/MessageQueue.h> #include <hidl/MQDescriptor.h> +#include <nnapi/IBurst.h> +#include <nnapi/IPreparedModel.h> +#include <nnapi/Result.h> +#include <nnapi/Types.h> +#include <nnapi/hal/ProtectCallback.h> #include <atomic> #include <chrono> +#include <functional> #include <map> #include <memory> #include <mutex> @@ -39,147 +44,145 @@ #include <utility> #include <vector> -namespace android::nn { +namespace android::hardware::neuralnetworks::V1_2::utils { /** - * The ExecutionBurstController class manages both the serialization and - * deserialization of data across FMQ, making it appear to the runtime as a - * regular synchronous inference. Additionally, this class manages the burst's - * memory cache. + * The ExecutionBurstController class manages both the serialization and deserialization of data + * across FMQ, making it appear to the runtime as a regular synchronous inference. Additionally, + * this class manages the burst's memory cache. */ -class ExecutionBurstController { - DISALLOW_IMPLICIT_CONSTRUCTORS(ExecutionBurstController); +class ExecutionBurstController final : public nn::IBurst { + struct PrivateConstructorTag {}; public: + using FallbackFunction = + std::function<nn::ExecutionResult<std::pair<std::vector<nn::OutputShape>, nn::Timing>>( + const nn::Request&, nn::MeasureTiming)>; + /** - * NN runtime burst callback object and memory cache. + * NN runtime memory cache. + * + * MemoryCache associates a Memory object with a slot number to be passed across FMQ. The + * ExecutionBurstServer can use this callback to retrieve a hidl_memory corresponding to the + * slot via HIDL. * - * ExecutionBurstCallback associates a hidl_memory object with a slot number - * to be passed across FMQ. The ExecutionBurstServer can use this callback - * to retrieve this hidl_memory corresponding to the slot via HIDL. + * Whenever a hidl_memory object is copied, it will duplicate the underlying file descriptor. + * Because the NN runtime currently copies the hidl_memory on each execution, it is difficult to + * associate hidl_memory objects with previously cached hidl_memory objects. For this reason, + * callers of this class must pair each hidl_memory object with an associated key. For + * efficiency, if two hidl_memory objects represent the same underlying buffer, they must use + * the same key. * - * Whenever a hidl_memory object is copied, it will duplicate the underlying - * file descriptor. Because the NN runtime currently copies the hidl_memory - * on each execution, it is difficult to associate hidl_memory objects with - * previously cached hidl_memory objects. For this reason, callers of this - * class must pair each hidl_memory object with an associated key. For - * efficiency, if two hidl_memory objects represent the same underlying - * buffer, they must use the same key. + * This class is thread-safe. */ - class ExecutionBurstCallback : public hardware::neuralnetworks::V1_2::IBurstCallback { - DISALLOW_COPY_AND_ASSIGN(ExecutionBurstCallback); + class MemoryCache : public std::enable_shared_from_this<MemoryCache> { + struct PrivateConstructorTag {}; public: - ExecutionBurstCallback() = default; + using Task = std::function<void()>; + using Cleanup = base::ScopeGuard<Task>; + using SharedCleanup = std::shared_ptr<const Cleanup>; + using WeakCleanup = std::weak_ptr<const Cleanup>; - hardware::Return<void> getMemories(const hardware::hidl_vec<int32_t>& slots, - getMemories_cb cb) override; + // Custom constructor to pre-allocate cache sizes. + MemoryCache(); /** - * This function performs one of two different actions: - * 1) If a key corresponding to a memory resource is unrecognized by the - * ExecutionBurstCallback object, the ExecutionBurstCallback object - * will allocate a slot, bind the memory to the slot, and return the - * slot identifier. - * 2) If a key corresponding to a memory resource is recognized by the - * ExecutionBurstCallback object, the ExecutionBurstCallback object - * will return the existing slot identifier. + * Add a burst context to the MemoryCache object. * - * @param memories Memory resources used in an inference. - * @param keys Unique identifiers where each element corresponds to a - * memory resource element in "memories". - * @return Unique slot identifiers where each returned slot element - * corresponds to a memory resource element in "memories". + * If this method is called, it must be called before the MemoryCache::cacheMemory or + * MemoryCache::getMemory is used. + * + * @param burstContext Burst context to be added to the MemoryCache object. */ - std::vector<int32_t> getSlots(const hardware::hidl_vec<hardware::hidl_memory>& memories, - const std::vector<intptr_t>& keys); - - /* - * This function performs two different actions: - * 1) Removes an entry from the cache (if present), including the local - * storage of the hidl_memory object. Note that this call does not - * free any corresponding hidl_memory object in ExecutionBurstServer, - * which is separately freed via IBurstContext::freeMemory. - * 2) Return whether a cache entry was removed and which slot was removed if - * found. If the key did not to correspond to any entry in the cache, a - * slot number of 0 is returned. The slot number and whether the entry - * existed is useful so the same slot can be freed in the - * ExecutionBurstServer's cache via IBurstContext::freeMemory. + void setBurstContext(sp<IBurstContext> burstContext); + + /** + * Cache a memory object in the MemoryCache object. + * + * @param memory Memory object to be cached while the returned `SharedCleanup` is alive. + * @return A pair of (1) a unique identifier for the cache entry and (2) a ref-counted + * "hold" object which preserves the cache as long as the hold object is alive. */ - std::pair<bool, int32_t> freeMemory(intptr_t key); + std::pair<int32_t, SharedCleanup> cacheMemory(const nn::SharedMemory& memory); + + /** + * Get the memory object corresponding to a slot identifier. + * + * @param slot Slot which identifies the memory object to retrieve. + * @return The memory object corresponding to slot, otherwise GeneralError. + */ + nn::GeneralResult<nn::SharedMemory> getMemory(int32_t slot); private: - int32_t getSlotLocked(const hardware::hidl_memory& memory, intptr_t key); - int32_t allocateSlotLocked(); + void freeMemory(const nn::SharedMemory& memory); + int32_t allocateSlotLocked() REQUIRES(mMutex); std::mutex mMutex; - std::stack<int32_t, std::vector<int32_t>> mFreeSlots; - std::map<intptr_t, int32_t> mMemoryIdToSlot; - std::vector<hardware::hidl_memory> mMemoryCache; + std::condition_variable mCond; + sp<IBurstContext> mBurstContext GUARDED_BY(mMutex); + std::stack<int32_t, std::vector<int32_t>> mFreeSlots GUARDED_BY(mMutex); + std::map<nn::SharedMemory, int32_t> mMemoryIdToSlot GUARDED_BY(mMutex); + std::vector<nn::SharedMemory> mMemoryCache GUARDED_BY(mMutex); + std::vector<WeakCleanup> mCacheCleaner GUARDED_BY(mMutex); + }; + + /** + * HIDL Callback class to pass memory objects to the Burst server when given corresponding + * slots. + */ + class ExecutionBurstCallback : public IBurstCallback { + public: + // Precondition: memoryCache must be non-null. + explicit ExecutionBurstCallback(const std::shared_ptr<MemoryCache>& memoryCache); + + // See IBurstCallback::getMemories for information on this method. + Return<void> getMemories(const hidl_vec<int32_t>& slots, getMemories_cb cb) override; + + private: + const std::weak_ptr<MemoryCache> kMemoryCache; }; /** * Creates a burst controller on a prepared model. * - * Prefer this over ExecutionBurstController's constructor. - * * @param preparedModel Model prepared for execution to execute on. - * @param pollingTimeWindow How much time (in microseconds) the - * ExecutionBurstController is allowed to poll the FMQ before waiting on - * the blocking futex. Polling may result in lower latencies at the - * potential cost of more power usage. + * @param pollingTimeWindow How much time (in microseconds) the ExecutionBurstController is + * allowed to poll the FMQ before waiting on the blocking futex. Polling may result in lower + * latencies at the potential cost of more power usage. * @return ExecutionBurstController Execution burst controller object. */ - static std::unique_ptr<ExecutionBurstController> create( - const sp<hardware::neuralnetworks::V1_2::IPreparedModel>& preparedModel, + static nn::GeneralResult<std::shared_ptr<const ExecutionBurstController>> create( + const sp<IPreparedModel>& preparedModel, FallbackFunction fallback, std::chrono::microseconds pollingTimeWindow); - // prefer calling ExecutionBurstController::create - ExecutionBurstController(const std::shared_ptr<RequestChannelSender>& requestChannelSender, - const std::shared_ptr<ResultChannelReceiver>& resultChannelReceiver, - const sp<hardware::neuralnetworks::V1_2::IBurstContext>& burstContext, - const sp<ExecutionBurstCallback>& callback, - const sp<hardware::hidl_death_recipient>& deathHandler = nullptr); + ExecutionBurstController(PrivateConstructorTag tag, FallbackFunction fallback, + std::unique_ptr<RequestChannelSender> requestChannelSender, + std::unique_ptr<ResultChannelReceiver> resultChannelReceiver, + sp<ExecutionBurstCallback> callback, sp<IBurstContext> burstContext, + std::shared_ptr<MemoryCache> memoryCache, + neuralnetworks::utils::DeathHandler deathHandler); - // explicit destructor to unregister the death recipient - ~ExecutionBurstController(); + // See IBurst::cacheMemory for information on this method. + OptionalCacheHold cacheMemory(const nn::SharedMemory& memory) const override; - /** - * Execute a request on a model. - * - * @param request Arguments to be executed on a model. - * @param measure Whether to collect timing measurements, either YES or NO - * @param memoryIds Identifiers corresponding to each memory object in the - * request's pools. - * @return A tuple of: - * - result code of the execution - * - dynamic output shapes from the execution - * - any execution time measurements of the execution - * - whether or not a failed burst execution should be re-run using a - * different path (e.g., IPreparedModel::executeSynchronously) - */ - std::tuple<int, std::vector<hardware::neuralnetworks::V1_2::OutputShape>, - hardware::neuralnetworks::V1_2::Timing, bool> - compute(const hardware::neuralnetworks::V1_0::Request& request, - hardware::neuralnetworks::V1_2::MeasureTiming measure, - const std::vector<intptr_t>& memoryIds); - - /** - * Propagate a user's freeing of memory to the service. - * - * @param key Key corresponding to the memory object. - */ - void freeMemory(intptr_t key); + // See IBurst::execute for information on this method. + nn::ExecutionResult<std::pair<std::vector<nn::OutputShape>, nn::Timing>> execute( + const nn::Request& request, nn::MeasureTiming measure) const override; private: - std::mutex mMutex; - const std::shared_ptr<RequestChannelSender> mRequestChannelSender; - const std::shared_ptr<ResultChannelReceiver> mResultChannelReceiver; - const sp<hardware::neuralnetworks::V1_2::IBurstContext> mBurstContext; - const sp<ExecutionBurstCallback> mMemoryCache; - const sp<hardware::hidl_death_recipient> mDeathHandler; + mutable std::atomic_flag mExecutionInFlight = ATOMIC_FLAG_INIT; + const FallbackFunction kFallback; + const std::unique_ptr<RequestChannelSender> mRequestChannelSender; + const std::unique_ptr<ResultChannelReceiver> mResultChannelReceiver; + const sp<ExecutionBurstCallback> mBurstCallback; + const sp<IBurstContext> mBurstContext; + const std::shared_ptr<MemoryCache> mMemoryCache; + // `kDeathHandler` must come after `mRequestChannelSender` and `mResultChannelReceiver` because + // it holds references to both objects. + const neuralnetworks::utils::DeathHandler kDeathHandler; }; -} // namespace android::nn +} // namespace android::hardware::neuralnetworks::V1_2::utils -#endif // ANDROID_FRAMEWORKS_ML_NN_COMMON_EXECUTION_BURST_CONTROLLER_H +#endif // ANDROID_HARDWARE_INTERFACES_NEURALNETWORKS_1_2_UTILS_EXECUTION_BURST_CONTROLLER_H diff --git a/neuralnetworks/1.2/utils/include/nnapi/hal/1.2/ExecutionBurstServer.h b/neuralnetworks/1.2/utils/include/nnapi/hal/1.2/ExecutionBurstServer.h index 2e109b2de7..f7926f5835 100644 --- a/neuralnetworks/1.2/utils/include/nnapi/hal/1.2/ExecutionBurstServer.h +++ b/neuralnetworks/1.2/utils/include/nnapi/hal/1.2/ExecutionBurstServer.h @@ -14,19 +14,22 @@ * limitations under the License. */ -#ifndef ANDROID_FRAMEWORKS_ML_NN_COMMON_EXECUTION_BURST_SERVER_H -#define ANDROID_FRAMEWORKS_ML_NN_COMMON_EXECUTION_BURST_SERVER_H +#ifndef ANDROID_HARDWARE_INTERFACES_NEURALNETWORKS_1_2_UTILS_EXECUTION_BURST_SERVER_H +#define ANDROID_HARDWARE_INTERFACES_NEURALNETWORKS_1_2_UTILS_EXECUTION_BURST_SERVER_H #include "ExecutionBurstUtils.h" -#include <android-base/macros.h> +#include <android-base/thread_annotations.h> #include <android/hardware/neuralnetworks/1.0/types.h> -#include <android/hardware/neuralnetworks/1.1/types.h> #include <android/hardware/neuralnetworks/1.2/IBurstCallback.h> #include <android/hardware/neuralnetworks/1.2/IPreparedModel.h> #include <android/hardware/neuralnetworks/1.2/types.h> #include <fmq/MessageQueue.h> #include <hidl/MQDescriptor.h> +#include <nnapi/IBurst.h> +#include <nnapi/Result.h> +#include <nnapi/Types.h> +#include <nnapi/hal/ProtectCallback.h> #include <atomic> #include <chrono> @@ -36,84 +39,61 @@ #include <tuple> #include <vector> -namespace android::nn { +namespace android::hardware::neuralnetworks::V1_2::utils { /** - * The ExecutionBurstServer class is responsible for waiting for and - * deserializing a request object from a FMQ, performing the inference, and - * serializing the result back across another FMQ. + * The ExecutionBurstServer class is responsible for waiting for and deserializing a request object + * from a FMQ, performing the inference, and serializing the result back across another FMQ. */ -class ExecutionBurstServer : public hardware::neuralnetworks::V1_2::IBurstContext { - DISALLOW_IMPLICIT_CONSTRUCTORS(ExecutionBurstServer); +class ExecutionBurstServer : public IBurstContext { + struct PrivateConstructorTag {}; public: /** - * IBurstExecutorWithCache is a callback object passed to - * ExecutionBurstServer's factory function that is used to perform an - * execution. Because some memory resources are needed across multiple - * executions, this object also contains a local cache that can directly be - * used in the execution. + * Class to cache the memory objects for a burst object. * - * ExecutionBurstServer will never access its IBurstExecutorWithCache object - * with concurrent calls. + * This class is thread-safe. */ - class IBurstExecutorWithCache { - DISALLOW_COPY_AND_ASSIGN(IBurstExecutorWithCache); - + class MemoryCache { public: - IBurstExecutorWithCache() = default; - virtual ~IBurstExecutorWithCache() = default; - - /** - * Checks if a cache entry specified by a slot is present in the cache. - * - * @param slot Identifier of the cache entry. - * @return 'true' if the cache entry is present in the cache, 'false' - * otherwise. - */ - virtual bool isCacheEntryPresent(int32_t slot) const = 0; + // Precondition: burstExecutor != nullptr + // Precondition: burstCallback != nullptr + MemoryCache(nn::SharedBurst burstExecutor, sp<IBurstCallback> burstCallback); /** - * Adds an entry specified by a slot to the cache. + * Get the cached memory objects corresponding to provided slot identifiers. * - * The caller of this function must ensure that the cache entry that is - * being added is not already present in the cache. This can be checked - * via isCacheEntryPresent. + * If the slot entry is not present in the cache, this class will use IBurstCallback to + * retrieve those entries that are not present in the cache, then cache them. * - * @param memory Memory resource to be cached. - * @param slot Slot identifier corresponding to the memory resource. + * @param slots Identifiers of memory objects to be retrieved. + * @return A vector where each element is the memory object and a ref-counted cache "hold" + * object to preserve the cache entry of the IBurst object as long as the "hold" object + * is alive, otherwise GeneralError. Each element of the vector corresponds to the + * element of slot. */ - virtual void addCacheEntry(const hardware::hidl_memory& memory, int32_t slot) = 0; + nn::GeneralResult<std::vector<std::pair<nn::SharedMemory, nn::IBurst::OptionalCacheHold>>> + getCacheEntries(const std::vector<int32_t>& slots); /** - * Removes an entry specified by a slot from the cache. - * - * If the cache entry corresponding to the slot number does not exist, - * the call does nothing. + * Remove an entry from the cache. * - * @param slot Slot identifier corresponding to the memory resource. + * @param slot Identifier of the memory object to be removed from the cache. */ - virtual void removeCacheEntry(int32_t slot) = 0; - - /** - * Perform an execution. - * - * @param request Request object with inputs and outputs specified. - * Request::pools is empty, and DataLocation::poolIndex instead - * refers to the 'slots' argument as if it were Request::pools. - * @param slots Slots corresponding to the cached memory entries to be - * used. - * @param measure Whether timing information is requested for the - * execution. - * @return Result of the execution, including the status of the - * execution, dynamic output shapes, and any timing information. - */ - virtual std::tuple<hardware::neuralnetworks::V1_0::ErrorStatus, - hardware::hidl_vec<hardware::neuralnetworks::V1_2::OutputShape>, - hardware::neuralnetworks::V1_2::Timing> - execute(const hardware::neuralnetworks::V1_0::Request& request, - const std::vector<int32_t>& slots, - hardware::neuralnetworks::V1_2::MeasureTiming measure) = 0; + void removeCacheEntry(int32_t slot); + + private: + nn::GeneralResult<void> ensureCacheEntriesArePresentLocked( + const std::vector<int32_t>& slots) REQUIRES(mMutex); + nn::GeneralResult<std::pair<nn::SharedMemory, nn::IBurst::OptionalCacheHold>> + getCacheEntryLocked(int32_t slot) REQUIRES(mMutex); + void addCacheEntryLocked(int32_t slot, nn::SharedMemory memory) REQUIRES(mMutex); + + std::mutex mMutex; + std::map<int32_t, std::pair<nn::SharedMemory, nn::IBurst::OptionalCacheHold>> mCache + GUARDED_BY(mMutex); + nn::SharedBurst kBurstExecutor; + const sp<IBurstCallback> kBurstCallback; }; /** @@ -124,85 +104,52 @@ class ExecutionBurstServer : public hardware::neuralnetworks::V1_2::IBurstContex * 2) Execute a model with the given information * 3) Send the result to the created FMQ * - * @param callback Callback used to retrieve memories corresponding to - * unrecognized slots. - * @param requestChannel Input FMQ channel through which the client passes the - * request to the service. - * @param resultChannel Output FMQ channel from which the client can retrieve - * the result of the execution. - * @param executorWithCache Object which maintains a local cache of the - * memory pools and executes using the cached memory pools. - * @param pollingTimeWindow How much time (in microseconds) the - * ExecutionBurstServer is allowed to poll the FMQ before waiting on - * the blocking futex. Polling may result in lower latencies at the - * potential cost of more power usage. - * @result IBurstContext Handle to the burst context. - */ - static sp<ExecutionBurstServer> create( - const sp<hardware::neuralnetworks::V1_2::IBurstCallback>& callback, - const FmqRequestDescriptor& requestChannel, const FmqResultDescriptor& resultChannel, - std::shared_ptr<IBurstExecutorWithCache> executorWithCache, - std::chrono::microseconds pollingTimeWindow = std::chrono::microseconds{0}); - - /** - * Create automated context to manage FMQ-based executions. - * - * This function is intended to be used by a service to automatically: - * 1) Receive data from a provided FMQ - * 2) Execute a model with the given information - * 3) Send the result to the created FMQ - * - * @param callback Callback used to retrieve memories corresponding to - * unrecognized slots. - * @param requestChannel Input FMQ channel through which the client passes the - * request to the service. - * @param resultChannel Output FMQ channel from which the client can retrieve - * the result of the execution. - * @param preparedModel PreparedModel that the burst object was created from. - * IPreparedModel::executeSynchronously will be used to perform the + * @param callback Callback used to retrieve memories corresponding to unrecognized slots. + * @param requestChannel Input FMQ channel through which the client passes the request to the + * service. + * @param resultChannel Output FMQ channel from which the client can retrieve the result of the * execution. - * @param pollingTimeWindow How much time (in microseconds) the - * ExecutionBurstServer is allowed to poll the FMQ before waiting on - * the blocking futex. Polling may result in lower latencies at the - * potential cost of more power usage. - * @result IBurstContext Handle to the burst context. + * @param burstExecutor Object which maintains a local cache of the memory pools and executes + * using the cached memory pools. + * @param pollingTimeWindow How much time (in microseconds) the ExecutionBurstServer is allowed + * to poll the FMQ before waiting on the blocking futex. Polling may result in lower + * latencies at the potential cost of more power usage. + * @return IBurstContext Handle to the burst context. */ - static sp<ExecutionBurstServer> create( - const sp<hardware::neuralnetworks::V1_2::IBurstCallback>& callback, - const FmqRequestDescriptor& requestChannel, const FmqResultDescriptor& resultChannel, - hardware::neuralnetworks::V1_2::IPreparedModel* preparedModel, + static nn::GeneralResult<sp<ExecutionBurstServer>> create( + const sp<IBurstCallback>& callback, + const MQDescriptorSync<FmqRequestDatum>& requestChannel, + const MQDescriptorSync<FmqResultDatum>& resultChannel, nn::SharedBurst burstExecutor, std::chrono::microseconds pollingTimeWindow = std::chrono::microseconds{0}); - ExecutionBurstServer(const sp<hardware::neuralnetworks::V1_2::IBurstCallback>& callback, + ExecutionBurstServer(PrivateConstructorTag tag, const sp<IBurstCallback>& callback, std::unique_ptr<RequestChannelReceiver> requestChannel, std::unique_ptr<ResultChannelSender> resultChannel, - std::shared_ptr<IBurstExecutorWithCache> cachedExecutor); + nn::SharedBurst burstExecutor); ~ExecutionBurstServer(); - // Used by the NN runtime to preemptively remove any stored memory. - hardware::Return<void> freeMemory(int32_t slot) override; + // Used by the NN runtime to preemptively remove any stored memory. See + // IBurstContext::freeMemory for more information. + Return<void> freeMemory(int32_t slot) override; private: - // Ensures all cache entries contained in mExecutorWithCache are present in - // the cache. If they are not present, they are retrieved (via - // IBurstCallback::getMemories) and added to mExecutorWithCache. - // - // This method is locked via mMutex when it is called. - void ensureCacheEntriesArePresentLocked(const std::vector<int32_t>& slots); - - // Work loop that will continue processing execution requests until the - // ExecutionBurstServer object is freed. + // Work loop that will continue processing execution requests until the ExecutionBurstServer + // object is freed. void task(); + nn::ExecutionResult<std::pair<hidl_vec<OutputShape>, Timing>> execute( + const V1_0::Request& requestWithoutPools, const std::vector<int32_t>& slotsOfPools, + MeasureTiming measure); + std::thread mWorker; - std::mutex mMutex; std::atomic<bool> mTeardown{false}; - const sp<hardware::neuralnetworks::V1_2::IBurstCallback> mCallback; + const sp<IBurstCallback> mCallback; const std::unique_ptr<RequestChannelReceiver> mRequestChannelReceiver; const std::unique_ptr<ResultChannelSender> mResultChannelSender; - const std::shared_ptr<IBurstExecutorWithCache> mExecutorWithCache; + const nn::SharedBurst mBurstExecutor; + MemoryCache mMemoryCache; }; -} // namespace android::nn +} // namespace android::hardware::neuralnetworks::V1_2::utils -#endif // ANDROID_FRAMEWORKS_ML_NN_COMMON_EXECUTION_BURST_SERVER_H +#endif // ANDROID_HARDWARE_INTERFACES_NEURALNETWORKS_1_2_UTILS_EXECUTION_BURST_SERVER_H diff --git a/neuralnetworks/1.2/utils/include/nnapi/hal/1.2/ExecutionBurstUtils.h b/neuralnetworks/1.2/utils/include/nnapi/hal/1.2/ExecutionBurstUtils.h index 8a4159122e..c662bc3eed 100644 --- a/neuralnetworks/1.2/utils/include/nnapi/hal/1.2/ExecutionBurstUtils.h +++ b/neuralnetworks/1.2/utils/include/nnapi/hal/1.2/ExecutionBurstUtils.h @@ -18,15 +18,16 @@ #define ANDROID_HARDWARE_INTERFACES_NEURALNETWORKS_1_2_UTILS_EXECUTION_BURST_UTILS_H #include <android/hardware/neuralnetworks/1.0/types.h> -#include <android/hardware/neuralnetworks/1.1/types.h> #include <android/hardware/neuralnetworks/1.2/types.h> #include <fmq/MessageQueue.h> #include <hidl/MQDescriptor.h> +#include <nnapi/Result.h> +#include <nnapi/Types.h> +#include <nnapi/hal/ProtectCallback.h> #include <atomic> #include <chrono> #include <memory> -#include <optional> #include <tuple> #include <utility> #include <vector> @@ -38,159 +39,139 @@ namespace android::hardware::neuralnetworks::V1_2::utils { */ constexpr const size_t kExecutionBurstChannelLength = 1024; -using FmqRequestDescriptor = MQDescriptorSync<FmqRequestDatum>; -using FmqResultDescriptor = MQDescriptorSync<FmqResultDatum>; +/** + * Get how long the burst controller should poll while waiting for results to be returned. + * + * This time can be affected by the property "debug.nn.burst-controller-polling-window". + * + * @return Polling time in microseconds. + */ +std::chrono::microseconds getBurstControllerPollingTimeWindow(); /** - * Function to serialize a request. + * Get how long the burst server should poll while waiting for a request to be received. * - * Prefer calling RequestChannelSender::send. + * This time can be affected by the property "debug.nn.burst-server-polling-window". + * + * @return Polling time in microseconds. + */ +std::chrono::microseconds getBurstServerPollingTimeWindow(); + +/** + * Function to serialize a request. * * @param request Request object without the pool information. * @param measure Whether to collect timing information for the execution. - * @param memoryIds Slot identifiers corresponding to memory resources for the - * request. + * @param memoryIds Slot identifiers corresponding to memory resources for the request. * @return Serialized FMQ request data. */ -std::vector<hardware::neuralnetworks::V1_2::FmqRequestDatum> serialize( - const hardware::neuralnetworks::V1_0::Request& request, - hardware::neuralnetworks::V1_2::MeasureTiming measure, const std::vector<int32_t>& slots); +std::vector<FmqRequestDatum> serialize(const V1_0::Request& request, MeasureTiming measure, + const std::vector<int32_t>& slots); /** * Deserialize the FMQ request data. * - * The three resulting fields are the Request object (where Request::pools is - * empty), slot identifiers (which are stand-ins for Request::pools), and - * whether timing information must be collected for the run. + * The three resulting fields are the Request object (where Request::pools is empty), slot + * identifiers (which are stand-ins for Request::pools), and whether timing information must be + * collected for the run. * * @param data Serialized FMQ request data. - * @return Request object if successfully deserialized, std::nullopt otherwise. + * @return Request object if successfully deserialized, otherwise an error message. */ -std::optional<std::tuple<hardware::neuralnetworks::V1_0::Request, std::vector<int32_t>, - hardware::neuralnetworks::V1_2::MeasureTiming>> -deserialize(const std::vector<hardware::neuralnetworks::V1_2::FmqRequestDatum>& data); +nn::Result<std::tuple<V1_0::Request, std::vector<int32_t>, MeasureTiming>> deserialize( + const std::vector<FmqRequestDatum>& data); /** * Function to serialize results. * - * Prefer calling ResultChannelSender::send. - * * @param errorStatus Status of the execution. * @param outputShapes Dynamic shapes of the output tensors. * @param timing Timing information of the execution. * @return Serialized FMQ result data. */ -std::vector<hardware::neuralnetworks::V1_2::FmqResultDatum> serialize( - hardware::neuralnetworks::V1_0::ErrorStatus errorStatus, - const std::vector<hardware::neuralnetworks::V1_2::OutputShape>& outputShapes, - hardware::neuralnetworks::V1_2::Timing timing); +std::vector<FmqResultDatum> serialize(V1_0::ErrorStatus errorStatus, + const std::vector<OutputShape>& outputShapes, Timing timing); /** * Deserialize the FMQ result data. * - * The three resulting fields are the status of the execution, the dynamic - * shapes of the output tensors, and the timing information of the execution. + * The three resulting fields are the status of the execution, the dynamic shapes of the output + * tensors, and the timing information of the execution. * * @param data Serialized FMQ result data. - * @return Result object if successfully deserialized, std::nullopt otherwise. + * @return Result object if successfully deserialized, otherwise an error message. */ -std::optional<std::tuple<hardware::neuralnetworks::V1_0::ErrorStatus, - std::vector<hardware::neuralnetworks::V1_2::OutputShape>, - hardware::neuralnetworks::V1_2::Timing>> -deserialize(const std::vector<hardware::neuralnetworks::V1_2::FmqResultDatum>& data); +nn::Result<std::tuple<V1_0::ErrorStatus, std::vector<OutputShape>, Timing>> deserialize( + const std::vector<FmqResultDatum>& data); /** - * Convert result code to error status. - * - * @param resultCode Result code to be converted. - * @return ErrorStatus Resultant error status. - */ -hardware::neuralnetworks::V1_0::ErrorStatus legacyConvertResultCodeToErrorStatus(int resultCode); - -/** - * RequestChannelSender is responsible for serializing the result packet of - * information, sending it on the result channel, and signaling that the data is - * available. + * RequestChannelSender is responsible for serializing the result packet of information, sending it + * on the result channel, and signaling that the data is available. */ -class RequestChannelSender { - using FmqRequestDescriptor = - hardware::MQDescriptorSync<hardware::neuralnetworks::V1_2::FmqRequestDatum>; - using FmqRequestChannel = - hardware::MessageQueue<hardware::neuralnetworks::V1_2::FmqRequestDatum, - hardware::kSynchronizedReadWrite>; +class RequestChannelSender final : public neuralnetworks::utils::IProtectedCallback { + struct PrivateConstructorTag {}; public: /** * Create the sending end of a request channel. * - * Prefer this call over the constructor. - * * @param channelLength Number of elements in the FMQ. - * @return A pair of ResultChannelReceiver and the FMQ descriptor on - * successful creation, both nullptr otherwise. + * @return A pair of ResultChannelReceiver and the FMQ descriptor on successful creation, + * GeneralError otherwise. */ - static std::pair<std::unique_ptr<RequestChannelSender>, const FmqRequestDescriptor*> create( - size_t channelLength); + static nn::GeneralResult<std::pair<std::unique_ptr<RequestChannelSender>, + const MQDescriptorSync<FmqRequestDatum>*>> + create(size_t channelLength); /** * Send the request to the channel. * * @param request Request object without the pool information. * @param measure Whether to collect timing information for the execution. - * @param memoryIds Slot identifiers corresponding to memory resources for - * the request. - * @return 'true' on successful send, 'false' otherwise. + * @param slots Slot identifiers corresponding to memory resources for the request. + * @return An empty `Result` on successful send, otherwise an error message. */ - bool send(const hardware::neuralnetworks::V1_0::Request& request, - hardware::neuralnetworks::V1_2::MeasureTiming measure, - const std::vector<int32_t>& slots); + nn::Result<void> send(const V1_0::Request& request, MeasureTiming measure, + const std::vector<int32_t>& slots); /** - * Method to mark the channel as invalid, causing all future calls to - * RequestChannelSender::send to immediately return false without attempting - * to send a message across the FMQ. + * Method to mark the channel as invalid, causing all future calls to RequestChannelSender::send + * to immediately return false without attempting to send a message across the FMQ. */ - void invalidate(); + void notifyAsDeadObject() override; // prefer calling RequestChannelSender::send - bool sendPacket(const std::vector<hardware::neuralnetworks::V1_2::FmqRequestDatum>& packet); + nn::Result<void> sendPacket(const std::vector<FmqRequestDatum>& packet); - RequestChannelSender(std::unique_ptr<FmqRequestChannel> fmqRequestChannel); + RequestChannelSender(PrivateConstructorTag tag, size_t channelLength); private: - const std::unique_ptr<FmqRequestChannel> mFmqRequestChannel; + MessageQueue<FmqRequestDatum, kSynchronizedReadWrite> mFmqRequestChannel; std::atomic<bool> mValid{true}; }; /** - * RequestChannelReceiver is responsible for waiting on the channel until the - * packet is available, extracting the packet from the channel, and - * deserializing the packet. + * RequestChannelReceiver is responsible for waiting on the channel until the packet is available, + * extracting the packet from the channel, and deserializing the packet. * - * Because the receiver can wait on a packet that may never come (e.g., because - * the sending side of the packet has been closed), this object can be - * invalidated, unblocking the receiver. + * Because the receiver can wait on a packet that may never come (e.g., because the sending side of + * the packet has been closed), this object can be invalidated, unblocking the receiver. */ -class RequestChannelReceiver { - using FmqRequestChannel = - hardware::MessageQueue<hardware::neuralnetworks::V1_2::FmqRequestDatum, - hardware::kSynchronizedReadWrite>; +class RequestChannelReceiver final { + struct PrivateConstructorTag {}; public: /** * Create the receiving end of a request channel. * - * Prefer this call over the constructor. - * * @param requestChannel Descriptor for the request channel. - * @param pollingTimeWindow How much time (in microseconds) the - * RequestChannelReceiver is allowed to poll the FMQ before waiting on - * the blocking futex. Polling may result in lower latencies at the - * potential cost of more power usage. + * @param pollingTimeWindow How much time (in microseconds) the RequestChannelReceiver is + * allowed to poll the FMQ before waiting on the blocking futex. Polling may result in lower + * latencies at the potential cost of more power usage. * @return RequestChannelReceiver on successful creation, nullptr otherwise. */ - static std::unique_ptr<RequestChannelReceiver> create( - const FmqRequestDescriptor& requestChannel, + static nn::GeneralResult<std::unique_ptr<RequestChannelReceiver>> create( + const MQDescriptorSync<FmqRequestDatum>& requestChannel, std::chrono::microseconds pollingTimeWindow); /** @@ -200,49 +181,45 @@ class RequestChannelReceiver { * 1) The packet has been retrieved, or * 2) The receiver has been invalidated * - * @return Request object if successfully received, std::nullopt if error or - * if the receiver object was invalidated. + * @return Request object if successfully received, an appropriate message if error or if the + * receiver object was invalidated. */ - std::optional<std::tuple<hardware::neuralnetworks::V1_0::Request, std::vector<int32_t>, - hardware::neuralnetworks::V1_2::MeasureTiming>> - getBlocking(); + nn::Result<std::tuple<V1_0::Request, std::vector<int32_t>, MeasureTiming>> getBlocking(); /** - * Method to mark the channel as invalid, unblocking any current or future - * calls to RequestChannelReceiver::getBlocking. + * Method to mark the channel as invalid, unblocking any current or future calls to + * RequestChannelReceiver::getBlocking. */ void invalidate(); - RequestChannelReceiver(std::unique_ptr<FmqRequestChannel> fmqRequestChannel, + RequestChannelReceiver(PrivateConstructorTag tag, + const MQDescriptorSync<FmqRequestDatum>& requestChannel, std::chrono::microseconds pollingTimeWindow); private: - std::optional<std::vector<hardware::neuralnetworks::V1_2::FmqRequestDatum>> getPacketBlocking(); + nn::Result<std::vector<FmqRequestDatum>> getPacketBlocking(); - const std::unique_ptr<FmqRequestChannel> mFmqRequestChannel; + MessageQueue<FmqRequestDatum, kSynchronizedReadWrite> mFmqRequestChannel; std::atomic<bool> mTeardown{false}; const std::chrono::microseconds kPollingTimeWindow; }; /** - * ResultChannelSender is responsible for serializing the result packet of - * information, sending it on the result channel, and signaling that the data is - * available. + * ResultChannelSender is responsible for serializing the result packet of information, sending it + * on the result channel, and signaling that the data is available. */ -class ResultChannelSender { - using FmqResultChannel = hardware::MessageQueue<hardware::neuralnetworks::V1_2::FmqResultDatum, - hardware::kSynchronizedReadWrite>; +class ResultChannelSender final { + struct PrivateConstructorTag {}; public: /** * Create the sending end of a result channel. * - * Prefer this call over the constructor. - * * @param resultChannel Descriptor for the result channel. * @return ResultChannelSender on successful creation, nullptr otherwise. */ - static std::unique_ptr<ResultChannelSender> create(const FmqResultDescriptor& resultChannel); + static nn::GeneralResult<std::unique_ptr<ResultChannelSender>> create( + const MQDescriptorSync<FmqResultDatum>& resultChannel); /** * Send the result to the channel. @@ -250,52 +227,44 @@ class ResultChannelSender { * @param errorStatus Status of the execution. * @param outputShapes Dynamic shapes of the output tensors. * @param timing Timing information of the execution. - * @return 'true' on successful send, 'false' otherwise. */ - bool send(hardware::neuralnetworks::V1_0::ErrorStatus errorStatus, - const std::vector<hardware::neuralnetworks::V1_2::OutputShape>& outputShapes, - hardware::neuralnetworks::V1_2::Timing timing); + void send(V1_0::ErrorStatus errorStatus, const std::vector<OutputShape>& outputShapes, + Timing timing); // prefer calling ResultChannelSender::send - bool sendPacket(const std::vector<hardware::neuralnetworks::V1_2::FmqResultDatum>& packet); + void sendPacket(const std::vector<FmqResultDatum>& packet); - ResultChannelSender(std::unique_ptr<FmqResultChannel> fmqResultChannel); + ResultChannelSender(PrivateConstructorTag tag, + const MQDescriptorSync<FmqResultDatum>& resultChannel); private: - const std::unique_ptr<FmqResultChannel> mFmqResultChannel; + MessageQueue<FmqResultDatum, kSynchronizedReadWrite> mFmqResultChannel; }; /** - * ResultChannelReceiver is responsible for waiting on the channel until the - * packet is available, extracting the packet from the channel, and - * deserializing the packet. + * ResultChannelReceiver is responsible for waiting on the channel until the packet is available, + * extracting the packet from the channel, and deserializing the packet. * - * Because the receiver can wait on a packet that may never come (e.g., because - * the sending side of the packet has been closed), this object can be - * invalidated, unblocking the receiver. + * Because the receiver can wait on a packet that may never come (e.g., because the sending side of + * the packet has been closed), this object can be invalidated, unblocking the receiver. */ -class ResultChannelReceiver { - using FmqResultDescriptor = - hardware::MQDescriptorSync<hardware::neuralnetworks::V1_2::FmqResultDatum>; - using FmqResultChannel = hardware::MessageQueue<hardware::neuralnetworks::V1_2::FmqResultDatum, - hardware::kSynchronizedReadWrite>; +class ResultChannelReceiver final : public neuralnetworks::utils::IProtectedCallback { + struct PrivateConstructorTag {}; public: /** * Create the receiving end of a result channel. * - * Prefer this call over the constructor. - * * @param channelLength Number of elements in the FMQ. - * @param pollingTimeWindow How much time (in microseconds) the - * ResultChannelReceiver is allowed to poll the FMQ before waiting on - * the blocking futex. Polling may result in lower latencies at the - * potential cost of more power usage. - * @return A pair of ResultChannelReceiver and the FMQ descriptor on - * successful creation, both nullptr otherwise. + * @param pollingTimeWindow How much time (in microseconds) the ResultChannelReceiver is allowed + * to poll the FMQ before waiting on the blocking futex. Polling may result in lower + * latencies at the potential cost of more power usage. + * @return A pair of ResultChannelReceiver and the FMQ descriptor on successful creation, or + * GeneralError otherwise. */ - static std::pair<std::unique_ptr<ResultChannelReceiver>, const FmqResultDescriptor*> create( - size_t channelLength, std::chrono::microseconds pollingTimeWindow); + static nn::GeneralResult<std::pair<std::unique_ptr<ResultChannelReceiver>, + const MQDescriptorSync<FmqResultDatum>*>> + create(size_t channelLength, std::chrono::microseconds pollingTimeWindow); /** * Get the result from the channel. @@ -304,28 +273,25 @@ class ResultChannelReceiver { * 1) The packet has been retrieved, or * 2) The receiver has been invalidated * - * @return Result object if successfully received, std::nullopt if error or + * @return Result object if successfully received, otherwise an appropriate message if error or * if the receiver object was invalidated. */ - std::optional<std::tuple<hardware::neuralnetworks::V1_0::ErrorStatus, - std::vector<hardware::neuralnetworks::V1_2::OutputShape>, - hardware::neuralnetworks::V1_2::Timing>> - getBlocking(); + nn::Result<std::tuple<V1_0::ErrorStatus, std::vector<OutputShape>, Timing>> getBlocking(); /** - * Method to mark the channel as invalid, unblocking any current or future - * calls to ResultChannelReceiver::getBlocking. + * Method to mark the channel as invalid, unblocking any current or future calls to + * ResultChannelReceiver::getBlocking. */ - void invalidate(); + void notifyAsDeadObject() override; // prefer calling ResultChannelReceiver::getBlocking - std::optional<std::vector<hardware::neuralnetworks::V1_2::FmqResultDatum>> getPacketBlocking(); + nn::Result<std::vector<FmqResultDatum>> getPacketBlocking(); - ResultChannelReceiver(std::unique_ptr<FmqResultChannel> fmqResultChannel, + ResultChannelReceiver(PrivateConstructorTag tag, size_t channelLength, std::chrono::microseconds pollingTimeWindow); private: - const std::unique_ptr<FmqResultChannel> mFmqResultChannel; + MessageQueue<FmqResultDatum, kSynchronizedReadWrite> mFmqResultChannel; std::atomic<bool> mValid{true}; const std::chrono::microseconds kPollingTimeWindow; }; diff --git a/neuralnetworks/1.2/utils/src/Conversions.cpp b/neuralnetworks/1.2/utils/src/Conversions.cpp index 86a417a352..2c45583d0c 100644 --- a/neuralnetworks/1.2/utils/src/Conversions.cpp +++ b/neuralnetworks/1.2/utils/src/Conversions.cpp @@ -331,6 +331,10 @@ GeneralResult<Timing> convert(const hal::V1_2::Timing& timing) { return validatedConvert(timing); } +GeneralResult<SharedMemory> convert(const hardware::hidl_memory& memory) { + return validatedConvert(memory); +} + GeneralResult<std::vector<Extension>> convert(const hidl_vec<hal::V1_2::Extension>& extensions) { return validatedConvert(extensions); } diff --git a/neuralnetworks/1.2/utils/src/ExecutionBurstController.cpp b/neuralnetworks/1.2/utils/src/ExecutionBurstController.cpp index 2265861b41..eedf5916bc 100644 --- a/neuralnetworks/1.2/utils/src/ExecutionBurstController.cpp +++ b/neuralnetworks/1.2/utils/src/ExecutionBurstController.cpp @@ -17,283 +17,321 @@ #define LOG_TAG "ExecutionBurstController" #include "ExecutionBurstController.h" +#include "ExecutionBurstUtils.h" #include <android-base/logging.h> +#include <android-base/thread_annotations.h> +#include <nnapi/IBurst.h> +#include <nnapi/IPreparedModel.h> +#include <nnapi/Result.h> +#include <nnapi/TypeUtils.h> +#include <nnapi/Types.h> +#include <nnapi/Validation.h> +#include <nnapi/hal/1.0/Conversions.h> +#include <nnapi/hal/HandleError.h> +#include <nnapi/hal/ProtectCallback.h> +#include <nnapi/hal/TransferValue.h> #include <algorithm> #include <cstring> #include <limits> #include <memory> #include <string> +#include <thread> #include <tuple> #include <utility> #include <vector> -#include "ExecutionBurstUtils.h" -#include "HalInterfaces.h" +#include "Callbacks.h" +#include "Conversions.h" #include "Tracing.h" #include "Utils.h" -namespace android::nn { +namespace android::hardware::neuralnetworks::V1_2::utils { namespace { -class BurstContextDeathHandler : public hardware::hidl_death_recipient { - public: - using Callback = std::function<void()>; - - BurstContextDeathHandler(const Callback& onDeathCallback) : mOnDeathCallback(onDeathCallback) { - CHECK(onDeathCallback != nullptr); +nn::GeneralResult<sp<IBurstContext>> executionBurstResultCallback( + V1_0::ErrorStatus status, const sp<IBurstContext>& burstContext) { + HANDLE_HAL_STATUS(status) << "IPreparedModel::configureExecutionBurst failed with status " + << toString(status); + if (burstContext == nullptr) { + return NN_ERROR(nn::ErrorStatus::GENERAL_FAILURE) + << "IPreparedModel::configureExecutionBurst returned nullptr for burst"; } + return burstContext; +} - void serviceDied(uint64_t /*cookie*/, const wp<hidl::base::V1_0::IBase>& /*who*/) override { - LOG(ERROR) << "BurstContextDeathHandler::serviceDied -- service unexpectedly died!"; - mOnDeathCallback(); +nn::GeneralResult<hidl_vec<hidl_memory>> getMemoriesHelper( + const hidl_vec<int32_t>& slots, + const std::shared_ptr<ExecutionBurstController::MemoryCache>& memoryCache) { + hidl_vec<hidl_memory> memories(slots.size()); + for (size_t i = 0; i < slots.size(); ++i) { + const int32_t slot = slots[i]; + const auto memory = NN_TRY(memoryCache->getMemory(slot)); + memories[i] = NN_TRY(V1_0::utils::unvalidatedConvert(memory)); + if (!memories[i].valid()) { + return NN_ERROR() << "memory at slot " << slot << " is invalid"; + } } + return memories; +} - private: - const Callback mOnDeathCallback; -}; - -} // anonymous namespace - -hardware::Return<void> ExecutionBurstController::ExecutionBurstCallback::getMemories( - const hardware::hidl_vec<int32_t>& slots, getMemories_cb cb) { - std::lock_guard<std::mutex> guard(mMutex); +} // namespace - // get all memories - hardware::hidl_vec<hardware::hidl_memory> memories(slots.size()); - std::transform(slots.begin(), slots.end(), memories.begin(), [this](int32_t slot) { - return slot < mMemoryCache.size() ? mMemoryCache[slot] : hardware::hidl_memory{}; - }); +// MemoryCache methods - // ensure all memories are valid - if (!std::all_of(memories.begin(), memories.end(), - [](const hardware::hidl_memory& memory) { return memory.valid(); })) { - cb(V1_0::ErrorStatus::INVALID_ARGUMENT, {}); - return hardware::Void(); - } +ExecutionBurstController::MemoryCache::MemoryCache() { + constexpr size_t kPreallocatedCount = 1024; + std::vector<int32_t> freeSlotsSpace; + freeSlotsSpace.reserve(kPreallocatedCount); + mFreeSlots = std::stack<int32_t, std::vector<int32_t>>(std::move(freeSlotsSpace)); + mMemoryCache.reserve(kPreallocatedCount); + mCacheCleaner.reserve(kPreallocatedCount); +} - // return successful - cb(V1_0::ErrorStatus::NONE, std::move(memories)); - return hardware::Void(); +void ExecutionBurstController::MemoryCache::setBurstContext(sp<IBurstContext> burstContext) { + std::lock_guard guard(mMutex); + mBurstContext = std::move(burstContext); } -std::vector<int32_t> ExecutionBurstController::ExecutionBurstCallback::getSlots( - const hardware::hidl_vec<hardware::hidl_memory>& memories, - const std::vector<intptr_t>& keys) { - std::lock_guard<std::mutex> guard(mMutex); +std::pair<int32_t, ExecutionBurstController::MemoryCache::SharedCleanup> +ExecutionBurstController::MemoryCache::cacheMemory(const nn::SharedMemory& memory) { + std::unique_lock lock(mMutex); + base::ScopedLockAssertion lockAssert(mMutex); - // retrieve (or bind) all slots corresponding to memories - std::vector<int32_t> slots; - slots.reserve(memories.size()); - for (size_t i = 0; i < memories.size(); ++i) { - slots.push_back(getSlotLocked(memories[i], keys[i])); + // Use existing cache entry if (1) the Memory object is in the cache and (2) the cache entry is + // not currently being freed. + auto iter = mMemoryIdToSlot.find(memory); + while (iter != mMemoryIdToSlot.end()) { + const int32_t slot = iter->second; + if (auto cleaner = mCacheCleaner.at(slot).lock()) { + return std::make_pair(slot, std::move(cleaner)); + } + + // If the code reaches this point, the Memory object was in the cache, but is currently + // being destroyed. This code waits until the cache entry has been freed, then loops to + // ensure the cache entry has been freed or has been made present by another thread. + mCond.wait(lock); + iter = mMemoryIdToSlot.find(memory); } - return slots; -} -std::pair<bool, int32_t> ExecutionBurstController::ExecutionBurstCallback::freeMemory( - intptr_t key) { - std::lock_guard<std::mutex> guard(mMutex); + // Allocate a new cache entry. + const int32_t slot = allocateSlotLocked(); + mMemoryIdToSlot[memory] = slot; + mMemoryCache[slot] = memory; + + // Create reference-counted self-cleaning cache object. + auto self = weak_from_this(); + Task cleanup = [memory, memoryCache = std::move(self)] { + if (const auto lock = memoryCache.lock()) { + lock->freeMemory(memory); + } + }; + auto cleaner = std::make_shared<const Cleanup>(std::move(cleanup)); + mCacheCleaner[slot] = cleaner; + + return std::make_pair(slot, std::move(cleaner)); +} - auto iter = mMemoryIdToSlot.find(key); - if (iter == mMemoryIdToSlot.end()) { - return {false, 0}; +nn::GeneralResult<nn::SharedMemory> ExecutionBurstController::MemoryCache::getMemory(int32_t slot) { + std::lock_guard guard(mMutex); + if (slot < 0 || static_cast<size_t>(slot) >= mMemoryCache.size()) { + return NN_ERROR() << "Invalid slot: " << slot << " vs " << mMemoryCache.size(); } - const int32_t slot = iter->second; - mMemoryIdToSlot.erase(key); - mMemoryCache[slot] = {}; - mFreeSlots.push(slot); - return {true, slot}; + return mMemoryCache[slot]; } -int32_t ExecutionBurstController::ExecutionBurstCallback::getSlotLocked( - const hardware::hidl_memory& memory, intptr_t key) { - auto iter = mMemoryIdToSlot.find(key); - if (iter == mMemoryIdToSlot.end()) { - const int32_t slot = allocateSlotLocked(); - mMemoryIdToSlot[key] = slot; - mMemoryCache[slot] = memory; - return slot; - } else { - const int32_t slot = iter->second; - return slot; +void ExecutionBurstController::MemoryCache::freeMemory(const nn::SharedMemory& memory) { + { + std::lock_guard guard(mMutex); + const int32_t slot = mMemoryIdToSlot.at(memory); + if (mBurstContext) { + mBurstContext->freeMemory(slot); + } + mMemoryIdToSlot.erase(memory); + mMemoryCache[slot] = {}; + mCacheCleaner[slot].reset(); + mFreeSlots.push(slot); } + mCond.notify_all(); } -int32_t ExecutionBurstController::ExecutionBurstCallback::allocateSlotLocked() { +int32_t ExecutionBurstController::MemoryCache::allocateSlotLocked() { constexpr size_t kMaxNumberOfSlots = std::numeric_limits<int32_t>::max(); - // if there is a free slot, use it - if (mFreeSlots.size() > 0) { + // If there is a free slot, use it. + if (!mFreeSlots.empty()) { const int32_t slot = mFreeSlots.top(); mFreeSlots.pop(); return slot; } - // otherwise use a slot for the first time - CHECK(mMemoryCache.size() < kMaxNumberOfSlots) << "Exceeded maximum number of slots!"; + // Use a slot for the first time. + CHECK_LT(mMemoryCache.size(), kMaxNumberOfSlots) << "Exceeded maximum number of slots!"; const int32_t slot = static_cast<int32_t>(mMemoryCache.size()); mMemoryCache.emplace_back(); + mCacheCleaner.emplace_back(); return slot; } -std::unique_ptr<ExecutionBurstController> ExecutionBurstController::create( - const sp<V1_2::IPreparedModel>& preparedModel, +// ExecutionBurstCallback methods + +ExecutionBurstController::ExecutionBurstCallback::ExecutionBurstCallback( + const std::shared_ptr<MemoryCache>& memoryCache) + : kMemoryCache(memoryCache) { + CHECK(memoryCache != nullptr); +} + +Return<void> ExecutionBurstController::ExecutionBurstCallback::getMemories( + const hidl_vec<int32_t>& slots, getMemories_cb cb) { + const auto memoryCache = kMemoryCache.lock(); + if (memoryCache == nullptr) { + LOG(ERROR) << "ExecutionBurstController::ExecutionBurstCallback::getMemories called after " + "the MemoryCache has been freed"; + cb(V1_0::ErrorStatus::GENERAL_FAILURE, {}); + return Void(); + } + + const auto maybeMemories = getMemoriesHelper(slots, memoryCache); + if (!maybeMemories.has_value()) { + const auto& [message, code] = maybeMemories.error(); + LOG(ERROR) << "ExecutionBurstController::ExecutionBurstCallback::getMemories failed with " + << code << ": " << message; + cb(V1_0::ErrorStatus::INVALID_ARGUMENT, {}); + return Void(); + } + + cb(V1_0::ErrorStatus::NONE, maybeMemories.value()); + return Void(); +} + +// ExecutionBurstController methods + +nn::GeneralResult<std::shared_ptr<const ExecutionBurstController>> ExecutionBurstController::create( + const sp<V1_2::IPreparedModel>& preparedModel, FallbackFunction fallback, std::chrono::microseconds pollingTimeWindow) { // check inputs if (preparedModel == nullptr) { - LOG(ERROR) << "ExecutionBurstController::create passed a nullptr"; - return nullptr; + return NN_ERROR() << "ExecutionBurstController::create passed a nullptr"; } - // create callback object - sp<ExecutionBurstCallback> callback = new ExecutionBurstCallback(); - // create FMQ objects - auto [requestChannelSenderTemp, requestChannelDescriptor] = - RequestChannelSender::create(kExecutionBurstChannelLength); - auto [resultChannelReceiverTemp, resultChannelDescriptor] = - ResultChannelReceiver::create(kExecutionBurstChannelLength, pollingTimeWindow); - std::shared_ptr<RequestChannelSender> requestChannelSender = - std::move(requestChannelSenderTemp); - std::shared_ptr<ResultChannelReceiver> resultChannelReceiver = - std::move(resultChannelReceiverTemp); + auto [requestChannelSender, requestChannelDescriptor] = + NN_TRY(RequestChannelSender::create(kExecutionBurstChannelLength)); + auto [resultChannelReceiver, resultChannelDescriptor] = + NN_TRY(ResultChannelReceiver::create(kExecutionBurstChannelLength, pollingTimeWindow)); // check FMQ objects - if (!requestChannelSender || !resultChannelReceiver || !requestChannelDescriptor || - !resultChannelDescriptor) { - LOG(ERROR) << "ExecutionBurstController::create failed to create FastMessageQueue"; - return nullptr; - } + CHECK(requestChannelSender != nullptr); + CHECK(requestChannelDescriptor != nullptr); + CHECK(resultChannelReceiver != nullptr); + CHECK(resultChannelDescriptor != nullptr); + + // create memory cache + auto memoryCache = std::make_shared<MemoryCache>(); + + // create callback object + auto burstCallback = sp<ExecutionBurstCallback>::make(memoryCache); + auto cb = hal::utils::CallbackValue(executionBurstResultCallback); // configure burst - V1_0::ErrorStatus errorStatus; - sp<IBurstContext> burstContext; - const hardware::Return<void> ret = preparedModel->configureExecutionBurst( - callback, *requestChannelDescriptor, *resultChannelDescriptor, - [&errorStatus, &burstContext](V1_0::ErrorStatus status, - const sp<IBurstContext>& context) { - errorStatus = status; - burstContext = context; - }); - - // check burst - if (!ret.isOk()) { - LOG(ERROR) << "IPreparedModel::configureExecutionBurst failed with description " - << ret.description(); - return nullptr; - } - if (errorStatus != V1_0::ErrorStatus::NONE) { - LOG(ERROR) << "IPreparedModel::configureExecutionBurst failed with status " - << toString(errorStatus); - return nullptr; - } - if (burstContext == nullptr) { - LOG(ERROR) << "IPreparedModel::configureExecutionBurst returned nullptr for burst"; - return nullptr; - } + const Return<void> ret = preparedModel->configureExecutionBurst( + burstCallback, *requestChannelDescriptor, *resultChannelDescriptor, cb); + HANDLE_TRANSPORT_FAILURE(ret); + + auto burstContext = NN_TRY(cb.take()); + memoryCache->setBurstContext(burstContext); // create death handler object - BurstContextDeathHandler::Callback onDeathCallback = [requestChannelSender, - resultChannelReceiver] { - requestChannelSender->invalidate(); - resultChannelReceiver->invalidate(); - }; - const sp<BurstContextDeathHandler> deathHandler = new BurstContextDeathHandler(onDeathCallback); - - // linkToDeath registers a callback that will be invoked on service death to - // proactively handle service crashes. If the linkToDeath call fails, - // asynchronous calls are susceptible to hangs if the service crashes before - // providing the response. - const hardware::Return<bool> deathHandlerRet = burstContext->linkToDeath(deathHandler, 0); - if (!deathHandlerRet.isOk() || deathHandlerRet != true) { - LOG(ERROR) << "ExecutionBurstController::create -- Failed to register a death recipient " - "for the IBurstContext object."; - return nullptr; - } + auto deathHandler = NN_TRY(neuralnetworks::utils::DeathHandler::create(burstContext)); + deathHandler.protectCallbackForLifetimeOfDeathHandler(requestChannelSender.get()); + deathHandler.protectCallbackForLifetimeOfDeathHandler(resultChannelReceiver.get()); // make and return controller - return std::make_unique<ExecutionBurstController>(requestChannelSender, resultChannelReceiver, - burstContext, callback, deathHandler); + return std::make_shared<const ExecutionBurstController>( + PrivateConstructorTag{}, std::move(fallback), std::move(requestChannelSender), + std::move(resultChannelReceiver), std::move(burstCallback), std::move(burstContext), + std::move(memoryCache), std::move(deathHandler)); } ExecutionBurstController::ExecutionBurstController( - const std::shared_ptr<RequestChannelSender>& requestChannelSender, - const std::shared_ptr<ResultChannelReceiver>& resultChannelReceiver, - const sp<IBurstContext>& burstContext, const sp<ExecutionBurstCallback>& callback, - const sp<hardware::hidl_death_recipient>& deathHandler) - : mRequestChannelSender(requestChannelSender), - mResultChannelReceiver(resultChannelReceiver), - mBurstContext(burstContext), - mMemoryCache(callback), - mDeathHandler(deathHandler) {} - -ExecutionBurstController::~ExecutionBurstController() { - // It is safe to ignore any errors resulting from this unlinkToDeath call - // because the ExecutionBurstController object is already being destroyed - // and its underlying IBurstContext object is no longer being used by the NN - // runtime. - if (mDeathHandler) { - mBurstContext->unlinkToDeath(mDeathHandler).isOk(); - } + PrivateConstructorTag /*tag*/, FallbackFunction fallback, + std::unique_ptr<RequestChannelSender> requestChannelSender, + std::unique_ptr<ResultChannelReceiver> resultChannelReceiver, + sp<ExecutionBurstCallback> callback, sp<IBurstContext> burstContext, + std::shared_ptr<MemoryCache> memoryCache, neuralnetworks::utils::DeathHandler deathHandler) + : kFallback(std::move(fallback)), + mRequestChannelSender(std::move(requestChannelSender)), + mResultChannelReceiver(std::move(resultChannelReceiver)), + mBurstCallback(std::move(callback)), + mBurstContext(std::move(burstContext)), + mMemoryCache(std::move(memoryCache)), + kDeathHandler(std::move(deathHandler)) {} + +ExecutionBurstController::OptionalCacheHold ExecutionBurstController::cacheMemory( + const nn::SharedMemory& memory) const { + auto [slot, hold] = mMemoryCache->cacheMemory(memory); + return hold; } -static std::tuple<int, std::vector<V1_2::OutputShape>, V1_2::Timing, bool> getExecutionResult( - V1_0::ErrorStatus status, std::vector<V1_2::OutputShape> outputShapes, V1_2::Timing timing, - bool fallback) { - auto [n, checkedOutputShapes, checkedTiming] = - getExecutionResult(convertToV1_3(status), std::move(outputShapes), timing); - return {n, convertToV1_2(checkedOutputShapes), convertToV1_2(checkedTiming), fallback}; -} +nn::ExecutionResult<std::pair<std::vector<nn::OutputShape>, nn::Timing>> +ExecutionBurstController::execute(const nn::Request& request, nn::MeasureTiming measure) const { + // This is the first point when we know an execution is occurring, so begin to collect + // systraces. Note that the first point we can begin collecting systraces in + // ExecutionBurstServer is when the RequestChannelReceiver realizes there is data in the FMQ, so + // ExecutionBurstServer collects systraces at different points in the code. + NNTRACE_FULL(NNTRACE_LAYER_IPC, NNTRACE_PHASE_EXECUTION, "ExecutionBurstController::execute"); + + // if the request is valid but of a higher version than what's supported in burst execution, + // fall back to another execution path + if (const auto version = NN_TRY(hal::utils::makeExecutionFailure(nn::validate(request))); + version > nn::Version::ANDROID_Q) { + // fallback to another execution path if the packet could not be sent + if (kFallback) { + return kFallback(request, measure); + } + return NN_ERROR() << "Request object has features not supported by IBurst::execute"; + } -std::tuple<int, std::vector<V1_2::OutputShape>, V1_2::Timing, bool> -ExecutionBurstController::compute(const V1_0::Request& request, V1_2::MeasureTiming measure, - const std::vector<intptr_t>& memoryIds) { - // This is the first point when we know an execution is occurring, so begin - // to collect systraces. Note that the first point we can begin collecting - // systraces in ExecutionBurstServer is when the RequestChannelReceiver - // realizes there is data in the FMQ, so ExecutionBurstServer collects - // systraces at different points in the code. - NNTRACE_FULL(NNTRACE_LAYER_IPC, NNTRACE_PHASE_EXECUTION, "ExecutionBurstController::compute"); + // clear pools field of request, as they will be provided via slots + const auto requestWithoutPools = + nn::Request{.inputs = request.inputs, .outputs = request.outputs, .pools = {}}; + auto hidlRequest = NN_TRY( + hal::utils::makeExecutionFailure(V1_0::utils::unvalidatedConvert(requestWithoutPools))); + const auto hidlMeasure = NN_TRY(hal::utils::makeExecutionFailure(convert(measure))); + + // Ensure that at most one execution is in flight at any given time. + const bool alreadyInFlight = mExecutionInFlight.test_and_set(); + if (alreadyInFlight) { + return NN_ERROR() << "IBurst already has an execution in flight"; + } + const auto guard = base::make_scope_guard([this] { mExecutionInFlight.clear(); }); - std::lock_guard<std::mutex> guard(mMutex); + std::vector<int32_t> slots; + std::vector<OptionalCacheHold> holds; + slots.reserve(request.pools.size()); + holds.reserve(request.pools.size()); + for (const auto& memoryPool : request.pools) { + auto [slot, hold] = mMemoryCache->cacheMemory(std::get<nn::SharedMemory>(memoryPool)); + slots.push_back(slot); + holds.push_back(std::move(hold)); + } // send request packet - const std::vector<int32_t> slots = mMemoryCache->getSlots(request.pools, memoryIds); - const bool success = mRequestChannelSender->send(request, measure, slots); - if (!success) { - LOG(ERROR) << "Error sending FMQ packet"; - // only use fallback execution path if the packet could not be sent - return getExecutionResult(V1_0::ErrorStatus::GENERAL_FAILURE, {}, kNoTiming12, - /*fallback=*/true); + const auto sendStatus = mRequestChannelSender->send(hidlRequest, hidlMeasure, slots); + if (!sendStatus.ok()) { + // fallback to another execution path if the packet could not be sent + if (kFallback) { + return kFallback(request, measure); + } + return NN_ERROR() << "Error sending FMQ packet: " << sendStatus.error(); } // get result packet - const auto result = mResultChannelReceiver->getBlocking(); - if (!result) { - LOG(ERROR) << "Error retrieving FMQ packet"; - // only use fallback execution path if the packet could not be sent - return getExecutionResult(V1_0::ErrorStatus::GENERAL_FAILURE, {}, kNoTiming12, - /*fallback=*/false); - } - - // unpack results and return (only use fallback execution path if the - // packet could not be sent) - auto [status, outputShapes, timing] = std::move(*result); - return getExecutionResult(status, std::move(outputShapes), timing, /*fallback=*/false); -} - -void ExecutionBurstController::freeMemory(intptr_t key) { - std::lock_guard<std::mutex> guard(mMutex); - - bool valid; - int32_t slot; - std::tie(valid, slot) = mMemoryCache->freeMemory(key); - if (valid) { - mBurstContext->freeMemory(slot).isOk(); - } + const auto [status, outputShapes, timing] = + NN_TRY(hal::utils::makeExecutionFailure(mResultChannelReceiver->getBlocking())); + return executionCallback(status, outputShapes, timing); } -} // namespace android::nn +} // namespace android::hardware::neuralnetworks::V1_2::utils diff --git a/neuralnetworks/1.2/utils/src/ExecutionBurstServer.cpp b/neuralnetworks/1.2/utils/src/ExecutionBurstServer.cpp index 022548dcd4..50af881d23 100644 --- a/neuralnetworks/1.2/utils/src/ExecutionBurstServer.cpp +++ b/neuralnetworks/1.2/utils/src/ExecutionBurstServer.cpp @@ -17,8 +17,19 @@ #define LOG_TAG "ExecutionBurstServer" #include "ExecutionBurstServer.h" +#include "Conversions.h" +#include "ExecutionBurstUtils.h" #include <android-base/logging.h> +#include <nnapi/IBurst.h> +#include <nnapi/Result.h> +#include <nnapi/TypeUtils.h> +#include <nnapi/Types.h> +#include <nnapi/Validation.h> +#include <nnapi/hal/1.0/Conversions.h> +#include <nnapi/hal/HandleError.h> +#include <nnapi/hal/ProtectCallback.h> +#include <nnapi/hal/TransferValue.h> #include <algorithm> #include <cstring> @@ -29,134 +40,146 @@ #include <utility> #include <vector> -#include "ExecutionBurstUtils.h" -#include "HalInterfaces.h" #include "Tracing.h" -namespace android::nn { +namespace android::hardware::neuralnetworks::V1_2::utils { namespace { -// DefaultBurstExecutorWithCache adapts an IPreparedModel so that it can be -// used as an IBurstExecutorWithCache. Specifically, the cache simply stores the -// hidl_memory object, and the execution forwards calls to the provided -// IPreparedModel's "executeSynchronously" method. With this class, hidl_memory -// must be mapped and unmapped for each execution. -class DefaultBurstExecutorWithCache : public ExecutionBurstServer::IBurstExecutorWithCache { - public: - DefaultBurstExecutorWithCache(V1_2::IPreparedModel* preparedModel) - : mpPreparedModel(preparedModel) {} - - bool isCacheEntryPresent(int32_t slot) const override { - const auto it = mMemoryCache.find(slot); - return (it != mMemoryCache.end()) && it->second.valid(); +using neuralnetworks::utils::makeExecutionFailure; + +constexpr V1_2::Timing kNoTiming = {std::numeric_limits<uint64_t>::max(), + std::numeric_limits<uint64_t>::max()}; + +nn::GeneralResult<std::vector<nn::SharedMemory>> getMemoriesCallback( + V1_0::ErrorStatus status, const hidl_vec<hidl_memory>& memories) { + HANDLE_HAL_STATUS(status) << "getting burst memories failed with " << toString(status); + std::vector<nn::SharedMemory> canonicalMemories; + canonicalMemories.reserve(memories.size()); + for (const auto& memory : memories) { + canonicalMemories.push_back(NN_TRY(nn::convert(memory))); } + return canonicalMemories; +} + +} // anonymous namespace + +ExecutionBurstServer::MemoryCache::MemoryCache(nn::SharedBurst burstExecutor, + sp<IBurstCallback> burstCallback) + : kBurstExecutor(std::move(burstExecutor)), kBurstCallback(std::move(burstCallback)) { + CHECK(kBurstExecutor != nullptr); + CHECK(kBurstCallback != nullptr); +} + +nn::GeneralResult<std::vector<std::pair<nn::SharedMemory, nn::IBurst::OptionalCacheHold>>> +ExecutionBurstServer::MemoryCache::getCacheEntries(const std::vector<int32_t>& slots) { + std::lock_guard guard(mMutex); + NN_TRY(ensureCacheEntriesArePresentLocked(slots)); - void addCacheEntry(const hardware::hidl_memory& memory, int32_t slot) override { - mMemoryCache[slot] = memory; + std::vector<std::pair<nn::SharedMemory, nn::IBurst::OptionalCacheHold>> results; + results.reserve(slots.size()); + for (int32_t slot : slots) { + results.push_back(NN_TRY(getCacheEntryLocked(slot))); } - void removeCacheEntry(int32_t slot) override { mMemoryCache.erase(slot); } - - std::tuple<V1_0::ErrorStatus, hardware::hidl_vec<V1_2::OutputShape>, V1_2::Timing> execute( - const V1_0::Request& request, const std::vector<int32_t>& slots, - V1_2::MeasureTiming measure) override { - // convert slots to pools - hardware::hidl_vec<hardware::hidl_memory> pools(slots.size()); - std::transform(slots.begin(), slots.end(), pools.begin(), - [this](int32_t slot) { return mMemoryCache[slot]; }); - - // create full request - V1_0::Request fullRequest = request; - fullRequest.pools = std::move(pools); - - // setup execution - V1_0::ErrorStatus returnedStatus = V1_0::ErrorStatus::GENERAL_FAILURE; - hardware::hidl_vec<V1_2::OutputShape> returnedOutputShapes; - V1_2::Timing returnedTiming; - auto cb = [&returnedStatus, &returnedOutputShapes, &returnedTiming]( - V1_0::ErrorStatus status, - const hardware::hidl_vec<V1_2::OutputShape>& outputShapes, - const V1_2::Timing& timing) { - returnedStatus = status; - returnedOutputShapes = outputShapes; - returnedTiming = timing; - }; - - // execute - const hardware::Return<void> ret = - mpPreparedModel->executeSynchronously(fullRequest, measure, cb); - if (!ret.isOk() || returnedStatus != V1_0::ErrorStatus::NONE) { - LOG(ERROR) << "IPreparedModelAdapter::execute -- Error executing"; - return {returnedStatus, std::move(returnedOutputShapes), kNoTiming}; - } + return results; +} + +nn::GeneralResult<void> ExecutionBurstServer::MemoryCache::ensureCacheEntriesArePresentLocked( + const std::vector<int32_t>& slots) { + const auto slotIsKnown = [this](int32_t slot) + REQUIRES(mMutex) { return mCache.count(slot) > 0; }; + + // find unique unknown slots + std::vector<int32_t> unknownSlots = slots; + std::sort(unknownSlots.begin(), unknownSlots.end()); + auto unknownSlotsEnd = std::unique(unknownSlots.begin(), unknownSlots.end()); + unknownSlotsEnd = std::remove_if(unknownSlots.begin(), unknownSlotsEnd, slotIsKnown); + unknownSlots.erase(unknownSlotsEnd, unknownSlots.end()); - return std::make_tuple(returnedStatus, std::move(returnedOutputShapes), returnedTiming); + // quick-exit if all slots are known + if (unknownSlots.empty()) { + return {}; } - private: - V1_2::IPreparedModel* const mpPreparedModel; - std::map<int32_t, hardware::hidl_memory> mMemoryCache; -}; + auto cb = neuralnetworks::utils::CallbackValue(getMemoriesCallback); -} // anonymous namespace + const auto ret = kBurstCallback->getMemories(unknownSlots, cb); + HANDLE_TRANSPORT_FAILURE(ret); -// ExecutionBurstServer methods + auto returnedMemories = NN_TRY(cb.take()); -sp<ExecutionBurstServer> ExecutionBurstServer::create( - const sp<IBurstCallback>& callback, const MQDescriptorSync<FmqRequestDatum>& requestChannel, - const MQDescriptorSync<FmqResultDatum>& resultChannel, - std::shared_ptr<IBurstExecutorWithCache> executorWithCache, - std::chrono::microseconds pollingTimeWindow) { - // check inputs - if (callback == nullptr || executorWithCache == nullptr) { - LOG(ERROR) << "ExecutionBurstServer::create passed a nullptr"; - return nullptr; + if (returnedMemories.size() != unknownSlots.size()) { + return NN_ERROR() + << "ExecutionBurstServer::MemoryCache::ensureCacheEntriesArePresentLocked: Error " + "retrieving memories -- count mismatch between requested memories (" + << unknownSlots.size() << ") and returned memories (" << returnedMemories.size() + << ")"; } - // create FMQ objects - std::unique_ptr<RequestChannelReceiver> requestChannelReceiver = - RequestChannelReceiver::create(requestChannel, pollingTimeWindow); - std::unique_ptr<ResultChannelSender> resultChannelSender = - ResultChannelSender::create(resultChannel); + // add memories to unknown slots + for (size_t i = 0; i < unknownSlots.size(); ++i) { + addCacheEntryLocked(unknownSlots[i], std::move(returnedMemories[i])); + } - // check FMQ objects - if (!requestChannelReceiver || !resultChannelSender) { - LOG(ERROR) << "ExecutionBurstServer::create failed to create FastMessageQueue"; - return nullptr; + return {}; +} + +nn::GeneralResult<std::pair<nn::SharedMemory, nn::IBurst::OptionalCacheHold>> +ExecutionBurstServer::MemoryCache::getCacheEntryLocked(int32_t slot) { + if (const auto iter = mCache.find(slot); iter != mCache.end()) { + return iter->second; } + return NN_ERROR() + << "ExecutionBurstServer::MemoryCache::getCacheEntryLocked failed because slot " << slot + << " is not present in the cache"; +} - // make and return context - return new ExecutionBurstServer(callback, std::move(requestChannelReceiver), - std::move(resultChannelSender), std::move(executorWithCache)); +void ExecutionBurstServer::MemoryCache::addCacheEntryLocked(int32_t slot, nn::SharedMemory memory) { + auto hold = kBurstExecutor->cacheMemory(memory); + mCache.emplace(slot, std::make_pair(std::move(memory), std::move(hold))); +} + +void ExecutionBurstServer::MemoryCache::removeCacheEntry(int32_t slot) { + std::lock_guard guard(mMutex); + mCache.erase(slot); } -sp<ExecutionBurstServer> ExecutionBurstServer::create( +// ExecutionBurstServer methods + +nn::GeneralResult<sp<ExecutionBurstServer>> ExecutionBurstServer::create( const sp<IBurstCallback>& callback, const MQDescriptorSync<FmqRequestDatum>& requestChannel, - const MQDescriptorSync<FmqResultDatum>& resultChannel, V1_2::IPreparedModel* preparedModel, + const MQDescriptorSync<FmqResultDatum>& resultChannel, nn::SharedBurst burstExecutor, std::chrono::microseconds pollingTimeWindow) { - // check relevant input - if (preparedModel == nullptr) { - LOG(ERROR) << "ExecutionBurstServer::create passed a nullptr"; - return nullptr; + // check inputs + if (callback == nullptr || burstExecutor == nullptr) { + return NN_ERROR() << "ExecutionBurstServer::create passed a nullptr"; } - // adapt IPreparedModel to have caching - const std::shared_ptr<DefaultBurstExecutorWithCache> preparedModelAdapter = - std::make_shared<DefaultBurstExecutorWithCache>(preparedModel); + // create FMQ objects + auto requestChannelReceiver = + NN_TRY(RequestChannelReceiver::create(requestChannel, pollingTimeWindow)); + auto resultChannelSender = NN_TRY(ResultChannelSender::create(resultChannel)); + + // check FMQ objects + CHECK(requestChannelReceiver != nullptr); + CHECK(resultChannelSender != nullptr); // make and return context - return ExecutionBurstServer::create(callback, requestChannel, resultChannel, - preparedModelAdapter, pollingTimeWindow); + return sp<ExecutionBurstServer>::make(PrivateConstructorTag{}, callback, + std::move(requestChannelReceiver), + std::move(resultChannelSender), std::move(burstExecutor)); } -ExecutionBurstServer::ExecutionBurstServer( - const sp<IBurstCallback>& callback, std::unique_ptr<RequestChannelReceiver> requestChannel, - std::unique_ptr<ResultChannelSender> resultChannel, - std::shared_ptr<IBurstExecutorWithCache> executorWithCache) +ExecutionBurstServer::ExecutionBurstServer(PrivateConstructorTag /*tag*/, + const sp<IBurstCallback>& callback, + std::unique_ptr<RequestChannelReceiver> requestChannel, + std::unique_ptr<ResultChannelSender> resultChannel, + nn::SharedBurst burstExecutor) : mCallback(callback), mRequestChannelReceiver(std::move(requestChannel)), mResultChannelSender(std::move(resultChannel)), - mExecutorWithCache(std::move(executorWithCache)) { + mBurstExecutor(std::move(burstExecutor)), + mMemoryCache(mBurstExecutor, mCallback) { // TODO: highly document the threading behavior of this class mWorker = std::thread([this] { task(); }); } @@ -170,51 +193,9 @@ ExecutionBurstServer::~ExecutionBurstServer() { mWorker.join(); } -hardware::Return<void> ExecutionBurstServer::freeMemory(int32_t slot) { - std::lock_guard<std::mutex> hold(mMutex); - mExecutorWithCache->removeCacheEntry(slot); - return hardware::Void(); -} - -void ExecutionBurstServer::ensureCacheEntriesArePresentLocked(const std::vector<int32_t>& slots) { - const auto slotIsKnown = [this](int32_t slot) { - return mExecutorWithCache->isCacheEntryPresent(slot); - }; - - // find unique unknown slots - std::vector<int32_t> unknownSlots = slots; - auto unknownSlotsEnd = unknownSlots.end(); - std::sort(unknownSlots.begin(), unknownSlotsEnd); - unknownSlotsEnd = std::unique(unknownSlots.begin(), unknownSlotsEnd); - unknownSlotsEnd = std::remove_if(unknownSlots.begin(), unknownSlotsEnd, slotIsKnown); - unknownSlots.erase(unknownSlotsEnd, unknownSlots.end()); - - // quick-exit if all slots are known - if (unknownSlots.empty()) { - return; - } - - V1_0::ErrorStatus errorStatus = V1_0::ErrorStatus::GENERAL_FAILURE; - std::vector<hardware::hidl_memory> returnedMemories; - auto cb = [&errorStatus, &returnedMemories]( - V1_0::ErrorStatus status, - const hardware::hidl_vec<hardware::hidl_memory>& memories) { - errorStatus = status; - returnedMemories = memories; - }; - - const hardware::Return<void> ret = mCallback->getMemories(unknownSlots, cb); - - if (!ret.isOk() || errorStatus != V1_0::ErrorStatus::NONE || - returnedMemories.size() != unknownSlots.size()) { - LOG(ERROR) << "Error retrieving memories"; - return; - } - - // add memories to unknown slots - for (size_t i = 0; i < unknownSlots.size(); ++i) { - mExecutorWithCache->addCacheEntry(returnedMemories[i], unknownSlots[i]); - } +Return<void> ExecutionBurstServer::freeMemory(int32_t slot) { + mMemoryCache.removeCacheEntry(slot); + return Void(); } void ExecutionBurstServer::task() { @@ -223,38 +204,65 @@ void ExecutionBurstServer::task() { // receive request auto arguments = mRequestChannelReceiver->getBlocking(); - // if the request packet was not properly received, return a generic - // error and skip the execution + // if the request packet was not properly received, return a generic error and skip the + // execution // - // if the burst is being torn down, skip the execution so the "task" - // function can end - if (!arguments) { + // if the burst is being torn down, skip the execution so the "task" function can end + if (!arguments.has_value()) { if (!mTeardown) { mResultChannelSender->send(V1_0::ErrorStatus::GENERAL_FAILURE, {}, kNoTiming); } continue; } - // otherwise begin tracing execution - NNTRACE_FULL(NNTRACE_LAYER_IPC, NNTRACE_PHASE_EXECUTION, - "ExecutionBurstServer getting memory, executing, and returning results"); - - // unpack the arguments; types are Request, std::vector<int32_t>, and - // MeasureTiming, respectively - const auto [requestWithoutPools, slotsOfPools, measure] = std::move(*arguments); + // unpack the arguments; types are Request, std::vector<int32_t>, and MeasureTiming, + // respectively + const auto [requestWithoutPools, slotsOfPools, measure] = std::move(arguments).value(); - // ensure executor with cache has required memory - std::lock_guard<std::mutex> hold(mMutex); - ensureCacheEntriesArePresentLocked(slotsOfPools); - - // perform computation; types are ErrorStatus, hidl_vec<OutputShape>, - // and Timing, respectively - const auto [errorStatus, outputShapes, returnedTiming] = - mExecutorWithCache->execute(requestWithoutPools, slotsOfPools, measure); + auto result = execute(requestWithoutPools, slotsOfPools, measure); // return result - mResultChannelSender->send(errorStatus, outputShapes, returnedTiming); + if (result.has_value()) { + const auto& [outputShapes, timing] = result.value(); + mResultChannelSender->send(V1_0::ErrorStatus::NONE, outputShapes, timing); + } else { + const auto& [message, code, outputShapes] = result.error(); + LOG(ERROR) << "IBurst::execute failed with " << code << ": " << message; + mResultChannelSender->send(convert(code).value(), convert(outputShapes).value(), + kNoTiming); + } } } -} // namespace android::nn +nn::ExecutionResult<std::pair<hidl_vec<OutputShape>, Timing>> ExecutionBurstServer::execute( + const V1_0::Request& requestWithoutPools, const std::vector<int32_t>& slotsOfPools, + MeasureTiming measure) { + NNTRACE_FULL(NNTRACE_LAYER_IPC, NNTRACE_PHASE_EXECUTION, + "ExecutionBurstServer getting memory, executing, and returning results"); + + // ensure executor with cache has required memory + const auto cacheEntries = + NN_TRY(makeExecutionFailure(mMemoryCache.getCacheEntries(slotsOfPools))); + + // convert request, populating its pools + // This code performs an unvalidated convert because the request object without its pools is + // invalid because it is incomplete. Instead, the validation is performed after the memory pools + // have been added to the request. + auto canonicalRequest = + NN_TRY(makeExecutionFailure(nn::unvalidatedConvert(requestWithoutPools))); + CHECK(canonicalRequest.pools.empty()); + std::transform(cacheEntries.begin(), cacheEntries.end(), + std::back_inserter(canonicalRequest.pools), + [](const auto& cacheEntry) { return cacheEntry.first; }); + NN_TRY(makeExecutionFailure(validate(canonicalRequest))); + + nn::MeasureTiming canonicalMeasure = NN_TRY(makeExecutionFailure(nn::convert(measure))); + + const auto [outputShapes, timing] = + NN_TRY(mBurstExecutor->execute(canonicalRequest, canonicalMeasure)); + + return std::make_pair(NN_TRY(makeExecutionFailure(convert(outputShapes))), + NN_TRY(makeExecutionFailure(convert(timing)))); +} + +} // namespace android::hardware::neuralnetworks::V1_2::utils diff --git a/neuralnetworks/1.2/utils/src/ExecutionBurstUtils.cpp b/neuralnetworks/1.2/utils/src/ExecutionBurstUtils.cpp index f0275f933a..ca3a52c17b 100644 --- a/neuralnetworks/1.2/utils/src/ExecutionBurstUtils.cpp +++ b/neuralnetworks/1.2/utils/src/ExecutionBurstUtils.cpp @@ -19,11 +19,15 @@ #include "ExecutionBurstUtils.h" #include <android-base/logging.h> +#include <android-base/properties.h> #include <android/hardware/neuralnetworks/1.0/types.h> #include <android/hardware/neuralnetworks/1.1/types.h> #include <android/hardware/neuralnetworks/1.2/types.h> #include <fmq/MessageQueue.h> #include <hidl/MQDescriptor.h> +#include <nnapi/Result.h> +#include <nnapi/Types.h> +#include <nnapi/hal/ProtectCallback.h> #include <atomic> #include <chrono> @@ -39,84 +43,97 @@ namespace { constexpr V1_2::Timing kNoTiming = {std::numeric_limits<uint64_t>::max(), std::numeric_limits<uint64_t>::max()}; +std::chrono::microseconds getPollingTimeWindow(const std::string& property) { + constexpr int32_t kDefaultPollingTimeWindow = 0; +#ifdef NN_DEBUGGABLE + constexpr int32_t kMinPollingTimeWindow = 0; + const int32_t selectedPollingTimeWindow = + base::GetIntProperty(property, kDefaultPollingTimeWindow, kMinPollingTimeWindow); + return std::chrono::microseconds(selectedPollingTimeWindow); +#else + (void)property; + return std::chrono::microseconds(kDefaultPollingTimeWindow); +#endif // NN_DEBUGGABLE +} + +} // namespace + +std::chrono::microseconds getBurstControllerPollingTimeWindow() { + return getPollingTimeWindow("debug.nn.burst-controller-polling-window"); +} + +std::chrono::microseconds getBurstServerPollingTimeWindow() { + return getPollingTimeWindow("debug.nn.burst-server-polling-window"); } // serialize a request into a packet std::vector<FmqRequestDatum> serialize(const V1_0::Request& request, V1_2::MeasureTiming measure, const std::vector<int32_t>& slots) { // count how many elements need to be sent for a request - size_t count = 2 + request.inputs.size() + request.outputs.size() + request.pools.size(); + size_t count = 2 + request.inputs.size() + request.outputs.size() + slots.size(); for (const auto& input : request.inputs) { count += input.dimensions.size(); } for (const auto& output : request.outputs) { count += output.dimensions.size(); } + CHECK_LE(count, std::numeric_limits<uint32_t>::max()); // create buffer to temporarily store elements std::vector<FmqRequestDatum> data; data.reserve(count); // package packetInfo - { - FmqRequestDatum datum; - datum.packetInformation( - {/*.packetSize=*/static_cast<uint32_t>(count), - /*.numberOfInputOperands=*/static_cast<uint32_t>(request.inputs.size()), - /*.numberOfOutputOperands=*/static_cast<uint32_t>(request.outputs.size()), - /*.numberOfPools=*/static_cast<uint32_t>(request.pools.size())}); - data.push_back(datum); - } + data.emplace_back(); + data.back().packetInformation( + {.packetSize = static_cast<uint32_t>(count), + .numberOfInputOperands = static_cast<uint32_t>(request.inputs.size()), + .numberOfOutputOperands = static_cast<uint32_t>(request.outputs.size()), + .numberOfPools = static_cast<uint32_t>(slots.size())}); // package input data for (const auto& input : request.inputs) { // package operand information - FmqRequestDatum datum; - datum.inputOperandInformation( - {/*.hasNoValue=*/input.hasNoValue, - /*.location=*/input.location, - /*.numberOfDimensions=*/static_cast<uint32_t>(input.dimensions.size())}); - data.push_back(datum); + data.emplace_back(); + data.back().inputOperandInformation( + {.hasNoValue = input.hasNoValue, + .location = input.location, + .numberOfDimensions = static_cast<uint32_t>(input.dimensions.size())}); // package operand dimensions for (uint32_t dimension : input.dimensions) { - FmqRequestDatum datum; - datum.inputOperandDimensionValue(dimension); - data.push_back(datum); + data.emplace_back(); + data.back().inputOperandDimensionValue(dimension); } } // package output data for (const auto& output : request.outputs) { // package operand information - FmqRequestDatum datum; - datum.outputOperandInformation( - {/*.hasNoValue=*/output.hasNoValue, - /*.location=*/output.location, - /*.numberOfDimensions=*/static_cast<uint32_t>(output.dimensions.size())}); - data.push_back(datum); + data.emplace_back(); + data.back().outputOperandInformation( + {.hasNoValue = output.hasNoValue, + .location = output.location, + .numberOfDimensions = static_cast<uint32_t>(output.dimensions.size())}); // package operand dimensions for (uint32_t dimension : output.dimensions) { - FmqRequestDatum datum; - datum.outputOperandDimensionValue(dimension); - data.push_back(datum); + data.emplace_back(); + data.back().outputOperandDimensionValue(dimension); } } // package pool identifier for (int32_t slot : slots) { - FmqRequestDatum datum; - datum.poolIdentifier(slot); - data.push_back(datum); + data.emplace_back(); + data.back().poolIdentifier(slot); } // package measureTiming - { - FmqRequestDatum datum; - datum.measureTiming(measure); - data.push_back(datum); - } + data.emplace_back(); + data.back().measureTiming(measure); + + CHECK_EQ(data.size(), count); // return packet return data; @@ -137,46 +154,38 @@ std::vector<FmqResultDatum> serialize(V1_0::ErrorStatus errorStatus, data.reserve(count); // package packetInfo - { - FmqResultDatum datum; - datum.packetInformation({/*.packetSize=*/static_cast<uint32_t>(count), - /*.errorStatus=*/errorStatus, - /*.numberOfOperands=*/static_cast<uint32_t>(outputShapes.size())}); - data.push_back(datum); - } + data.emplace_back(); + data.back().packetInformation({.packetSize = static_cast<uint32_t>(count), + .errorStatus = errorStatus, + .numberOfOperands = static_cast<uint32_t>(outputShapes.size())}); // package output shape data for (const auto& operand : outputShapes) { // package operand information - FmqResultDatum::OperandInformation info{}; - info.isSufficient = operand.isSufficient; - info.numberOfDimensions = static_cast<uint32_t>(operand.dimensions.size()); - - FmqResultDatum datum; - datum.operandInformation(info); - data.push_back(datum); + data.emplace_back(); + data.back().operandInformation( + {.isSufficient = operand.isSufficient, + .numberOfDimensions = static_cast<uint32_t>(operand.dimensions.size())}); // package operand dimensions for (uint32_t dimension : operand.dimensions) { - FmqResultDatum datum; - datum.operandDimensionValue(dimension); - data.push_back(datum); + data.emplace_back(); + data.back().operandDimensionValue(dimension); } } // package executionTiming - { - FmqResultDatum datum; - datum.executionTiming(timing); - data.push_back(datum); - } + data.emplace_back(); + data.back().executionTiming(timing); + + CHECK_EQ(data.size(), count); // return result return data; } // deserialize request -std::optional<std::tuple<V1_0::Request, std::vector<int32_t>, V1_2::MeasureTiming>> deserialize( +nn::Result<std::tuple<V1_0::Request, std::vector<int32_t>, V1_2::MeasureTiming>> deserialize( const std::vector<FmqRequestDatum>& data) { using discriminator = FmqRequestDatum::hidl_discriminator; @@ -184,8 +193,7 @@ std::optional<std::tuple<V1_0::Request, std::vector<int32_t>, V1_2::MeasureTimin // validate packet information if (data.size() == 0 || data[index].getDiscriminator() != discriminator::packetInformation) { - LOG(ERROR) << "FMQ Request packet ill-formed"; - return std::nullopt; + return NN_ERROR() << "FMQ Request packet ill-formed"; } // unpackage packet information @@ -198,8 +206,7 @@ std::optional<std::tuple<V1_0::Request, std::vector<int32_t>, V1_2::MeasureTimin // verify packet size if (data.size() != packetSize) { - LOG(ERROR) << "FMQ Request packet ill-formed"; - return std::nullopt; + return NN_ERROR() << "FMQ Request packet ill-formed"; } // unpackage input operands @@ -208,8 +215,7 @@ std::optional<std::tuple<V1_0::Request, std::vector<int32_t>, V1_2::MeasureTimin for (size_t operand = 0; operand < numberOfInputOperands; ++operand) { // validate input operand information if (data[index].getDiscriminator() != discriminator::inputOperandInformation) { - LOG(ERROR) << "FMQ Request packet ill-formed"; - return std::nullopt; + return NN_ERROR() << "FMQ Request packet ill-formed"; } // unpackage operand information @@ -226,8 +232,7 @@ std::optional<std::tuple<V1_0::Request, std::vector<int32_t>, V1_2::MeasureTimin for (size_t i = 0; i < numberOfDimensions; ++i) { // validate dimension if (data[index].getDiscriminator() != discriminator::inputOperandDimensionValue) { - LOG(ERROR) << "FMQ Request packet ill-formed"; - return std::nullopt; + return NN_ERROR() << "FMQ Request packet ill-formed"; } // unpackage dimension @@ -240,7 +245,7 @@ std::optional<std::tuple<V1_0::Request, std::vector<int32_t>, V1_2::MeasureTimin // store result inputs.push_back( - {/*.hasNoValue=*/hasNoValue, /*.location=*/location, /*.dimensions=*/dimensions}); + {.hasNoValue = hasNoValue, .location = location, .dimensions = dimensions}); } // unpackage output operands @@ -249,8 +254,7 @@ std::optional<std::tuple<V1_0::Request, std::vector<int32_t>, V1_2::MeasureTimin for (size_t operand = 0; operand < numberOfOutputOperands; ++operand) { // validate output operand information if (data[index].getDiscriminator() != discriminator::outputOperandInformation) { - LOG(ERROR) << "FMQ Request packet ill-formed"; - return std::nullopt; + return NN_ERROR() << "FMQ Request packet ill-formed"; } // unpackage operand information @@ -267,8 +271,7 @@ std::optional<std::tuple<V1_0::Request, std::vector<int32_t>, V1_2::MeasureTimin for (size_t i = 0; i < numberOfDimensions; ++i) { // validate dimension if (data[index].getDiscriminator() != discriminator::outputOperandDimensionValue) { - LOG(ERROR) << "FMQ Request packet ill-formed"; - return std::nullopt; + return NN_ERROR() << "FMQ Request packet ill-formed"; } // unpackage dimension @@ -281,7 +284,7 @@ std::optional<std::tuple<V1_0::Request, std::vector<int32_t>, V1_2::MeasureTimin // store result outputs.push_back( - {/*.hasNoValue=*/hasNoValue, /*.location=*/location, /*.dimensions=*/dimensions}); + {.hasNoValue = hasNoValue, .location = location, .dimensions = dimensions}); } // unpackage pools @@ -290,8 +293,7 @@ std::optional<std::tuple<V1_0::Request, std::vector<int32_t>, V1_2::MeasureTimin for (size_t pool = 0; pool < numberOfPools; ++pool) { // validate input operand information if (data[index].getDiscriminator() != discriminator::poolIdentifier) { - LOG(ERROR) << "FMQ Request packet ill-formed"; - return std::nullopt; + return NN_ERROR() << "FMQ Request packet ill-formed"; } // unpackage operand information @@ -304,8 +306,7 @@ std::optional<std::tuple<V1_0::Request, std::vector<int32_t>, V1_2::MeasureTimin // validate measureTiming if (data[index].getDiscriminator() != discriminator::measureTiming) { - LOG(ERROR) << "FMQ Request packet ill-formed"; - return std::nullopt; + return NN_ERROR() << "FMQ Request packet ill-formed"; } // unpackage measureTiming @@ -314,27 +315,23 @@ std::optional<std::tuple<V1_0::Request, std::vector<int32_t>, V1_2::MeasureTimin // validate packet information if (index != packetSize) { - LOG(ERROR) << "FMQ Result packet ill-formed"; - return std::nullopt; + return NN_ERROR() << "FMQ Result packet ill-formed"; } // return request - V1_0::Request request = {/*.inputs=*/inputs, /*.outputs=*/outputs, /*.pools=*/{}}; + V1_0::Request request = {.inputs = inputs, .outputs = outputs, .pools = {}}; return std::make_tuple(std::move(request), std::move(slots), measure); } // deserialize a packet into the result -std::optional<std::tuple<V1_0::ErrorStatus, std::vector<V1_2::OutputShape>, V1_2::Timing>> -deserialize(const std::vector<FmqResultDatum>& data) { +nn::Result<std::tuple<V1_0::ErrorStatus, std::vector<V1_2::OutputShape>, V1_2::Timing>> deserialize( + const std::vector<FmqResultDatum>& data) { using discriminator = FmqResultDatum::hidl_discriminator; - - std::vector<V1_2::OutputShape> outputShapes; size_t index = 0; // validate packet information if (data.size() == 0 || data[index].getDiscriminator() != discriminator::packetInformation) { - LOG(ERROR) << "FMQ Result packet ill-formed"; - return std::nullopt; + return NN_ERROR() << "FMQ Result packet ill-formed"; } // unpackage packet information @@ -346,16 +343,16 @@ deserialize(const std::vector<FmqResultDatum>& data) { // verify packet size if (data.size() != packetSize) { - LOG(ERROR) << "FMQ Result packet ill-formed"; - return std::nullopt; + return NN_ERROR() << "FMQ Result packet ill-formed"; } // unpackage operands + std::vector<V1_2::OutputShape> outputShapes; + outputShapes.reserve(numberOfOperands); for (size_t operand = 0; operand < numberOfOperands; ++operand) { // validate operand information if (data[index].getDiscriminator() != discriminator::operandInformation) { - LOG(ERROR) << "FMQ Result packet ill-formed"; - return std::nullopt; + return NN_ERROR() << "FMQ Result packet ill-formed"; } // unpackage operand information @@ -370,8 +367,7 @@ deserialize(const std::vector<FmqResultDatum>& data) { for (size_t i = 0; i < numberOfDimensions; ++i) { // validate dimension if (data[index].getDiscriminator() != discriminator::operandDimensionValue) { - LOG(ERROR) << "FMQ Result packet ill-formed"; - return std::nullopt; + return NN_ERROR() << "FMQ Result packet ill-formed"; } // unpackage dimension @@ -383,13 +379,12 @@ deserialize(const std::vector<FmqResultDatum>& data) { } // store result - outputShapes.push_back({/*.dimensions=*/dimensions, /*.isSufficient=*/isSufficient}); + outputShapes.push_back({.dimensions = dimensions, .isSufficient = isSufficient}); } // validate execution timing if (data[index].getDiscriminator() != discriminator::executionTiming) { - LOG(ERROR) << "FMQ Result packet ill-formed"; - return std::nullopt; + return NN_ERROR() << "FMQ Result packet ill-formed"; } // unpackage execution timing @@ -398,123 +393,113 @@ deserialize(const std::vector<FmqResultDatum>& data) { // validate packet information if (index != packetSize) { - LOG(ERROR) << "FMQ Result packet ill-formed"; - return std::nullopt; + return NN_ERROR() << "FMQ Result packet ill-formed"; } // return result return std::make_tuple(errorStatus, std::move(outputShapes), timing); } -V1_0::ErrorStatus legacyConvertResultCodeToErrorStatus(int resultCode) { - return convertToV1_0(convertResultCodeToErrorStatus(resultCode)); -} - // RequestChannelSender methods -std::pair<std::unique_ptr<RequestChannelSender>, const FmqRequestDescriptor*> +nn::GeneralResult< + std::pair<std::unique_ptr<RequestChannelSender>, const MQDescriptorSync<FmqRequestDatum>*>> RequestChannelSender::create(size_t channelLength) { - std::unique_ptr<FmqRequestChannel> fmqRequestChannel = - std::make_unique<FmqRequestChannel>(channelLength, /*confEventFlag=*/true); - if (!fmqRequestChannel->isValid()) { - LOG(ERROR) << "Unable to create RequestChannelSender"; - return {nullptr, nullptr}; + auto requestChannelSender = + std::make_unique<RequestChannelSender>(PrivateConstructorTag{}, channelLength); + if (!requestChannelSender->mFmqRequestChannel.isValid()) { + return NN_ERROR() << "Unable to create RequestChannelSender"; } - const FmqRequestDescriptor* descriptor = fmqRequestChannel->getDesc(); - return std::make_pair(std::make_unique<RequestChannelSender>(std::move(fmqRequestChannel)), - descriptor); + const MQDescriptorSync<FmqRequestDatum>* descriptor = + requestChannelSender->mFmqRequestChannel.getDesc(); + return std::make_pair(std::move(requestChannelSender), descriptor); } -RequestChannelSender::RequestChannelSender(std::unique_ptr<FmqRequestChannel> fmqRequestChannel) - : mFmqRequestChannel(std::move(fmqRequestChannel)) {} +RequestChannelSender::RequestChannelSender(PrivateConstructorTag /*tag*/, size_t channelLength) + : mFmqRequestChannel(channelLength, /*configureEventFlagWord=*/true) {} -bool RequestChannelSender::send(const V1_0::Request& request, V1_2::MeasureTiming measure, - const std::vector<int32_t>& slots) { +nn::Result<void> RequestChannelSender::send(const V1_0::Request& request, + V1_2::MeasureTiming measure, + const std::vector<int32_t>& slots) { const std::vector<FmqRequestDatum> serialized = serialize(request, measure, slots); return sendPacket(serialized); } -bool RequestChannelSender::sendPacket(const std::vector<FmqRequestDatum>& packet) { +nn::Result<void> RequestChannelSender::sendPacket(const std::vector<FmqRequestDatum>& packet) { if (!mValid) { - return false; + return NN_ERROR() << "FMQ object is invalid"; } - if (packet.size() > mFmqRequestChannel->availableToWrite()) { - LOG(ERROR) - << "RequestChannelSender::sendPacket -- packet size exceeds size available in FMQ"; - return false; + if (packet.size() > mFmqRequestChannel.availableToWrite()) { + return NN_ERROR() + << "RequestChannelSender::sendPacket -- packet size exceeds size available in FMQ"; + } + + // Always send the packet with "blocking" because this signals the futex and unblocks the + // consumer if it is waiting on the futex. + const bool success = mFmqRequestChannel.writeBlocking(packet.data(), packet.size()); + if (!success) { + return NN_ERROR() + << "RequestChannelSender::sendPacket -- FMQ's writeBlocking returned an error"; } - // Always send the packet with "blocking" because this signals the futex and - // unblocks the consumer if it is waiting on the futex. - return mFmqRequestChannel->writeBlocking(packet.data(), packet.size()); + return {}; } -void RequestChannelSender::invalidate() { +void RequestChannelSender::notifyAsDeadObject() { mValid = false; } // RequestChannelReceiver methods -std::unique_ptr<RequestChannelReceiver> RequestChannelReceiver::create( - const FmqRequestDescriptor& requestChannel, std::chrono::microseconds pollingTimeWindow) { - std::unique_ptr<FmqRequestChannel> fmqRequestChannel = - std::make_unique<FmqRequestChannel>(requestChannel); +nn::GeneralResult<std::unique_ptr<RequestChannelReceiver>> RequestChannelReceiver::create( + const MQDescriptorSync<FmqRequestDatum>& requestChannel, + std::chrono::microseconds pollingTimeWindow) { + auto requestChannelReceiver = std::make_unique<RequestChannelReceiver>( + PrivateConstructorTag{}, requestChannel, pollingTimeWindow); - if (!fmqRequestChannel->isValid()) { - LOG(ERROR) << "Unable to create RequestChannelReceiver"; - return nullptr; + if (!requestChannelReceiver->mFmqRequestChannel.isValid()) { + return NN_ERROR() << "Unable to create RequestChannelReceiver"; } - if (fmqRequestChannel->getEventFlagWord() == nullptr) { - LOG(ERROR) - << "RequestChannelReceiver::create was passed an MQDescriptor without an EventFlag"; - return nullptr; + if (requestChannelReceiver->mFmqRequestChannel.getEventFlagWord() == nullptr) { + return NN_ERROR() + << "RequestChannelReceiver::create was passed an MQDescriptor without an EventFlag"; } - return std::make_unique<RequestChannelReceiver>(std::move(fmqRequestChannel), - pollingTimeWindow); + return requestChannelReceiver; } -RequestChannelReceiver::RequestChannelReceiver(std::unique_ptr<FmqRequestChannel> fmqRequestChannel, - std::chrono::microseconds pollingTimeWindow) - : mFmqRequestChannel(std::move(fmqRequestChannel)), kPollingTimeWindow(pollingTimeWindow) {} +RequestChannelReceiver::RequestChannelReceiver( + PrivateConstructorTag /*tag*/, const MQDescriptorSync<FmqRequestDatum>& requestChannel, + std::chrono::microseconds pollingTimeWindow) + : mFmqRequestChannel(requestChannel), kPollingTimeWindow(pollingTimeWindow) {} -std::optional<std::tuple<V1_0::Request, std::vector<int32_t>, V1_2::MeasureTiming>> +nn::Result<std::tuple<V1_0::Request, std::vector<int32_t>, V1_2::MeasureTiming>> RequestChannelReceiver::getBlocking() { - const auto packet = getPacketBlocking(); - if (!packet) { - return std::nullopt; - } - - return deserialize(*packet); + const auto packet = NN_TRY(getPacketBlocking()); + return deserialize(packet); } void RequestChannelReceiver::invalidate() { mTeardown = true; // force unblock - // ExecutionBurstServer is by default waiting on a request packet. If the - // client process destroys its burst object, the server may still be waiting - // on the futex. This force unblock wakes up any thread waiting on the - // futex. - // TODO: look for a different/better way to signal/notify the futex to wake - // up any thread waiting on it - FmqRequestDatum datum; - datum.packetInformation({/*.packetSize=*/0, /*.numberOfInputOperands=*/0, - /*.numberOfOutputOperands=*/0, /*.numberOfPools=*/0}); - mFmqRequestChannel->writeBlocking(&datum, 1); + // ExecutionBurstServer is by default waiting on a request packet. If the client process + // destroys its burst object, the server may still be waiting on the futex. This force unblock + // wakes up any thread waiting on the futex. + const auto data = serialize(V1_0::Request{}, V1_2::MeasureTiming::NO, {}); + mFmqRequestChannel.writeBlocking(data.data(), data.size()); } -std::optional<std::vector<FmqRequestDatum>> RequestChannelReceiver::getPacketBlocking() { +nn::Result<std::vector<FmqRequestDatum>> RequestChannelReceiver::getPacketBlocking() { if (mTeardown) { - return std::nullopt; + return NN_ERROR() << "FMQ object is being torn down"; } - // First spend time polling if results are available in FMQ instead of - // waiting on the futex. Polling is more responsive (yielding lower - // latencies), but can take up more power, so only poll for a limited period - // of time. + // First spend time polling if results are available in FMQ instead of waiting on the futex. + // Polling is more responsive (yielding lower latencies), but can take up more power, so only + // poll for a limited period of time. auto& getCurrentTime = std::chrono::high_resolution_clock::now; const auto timeToStopPolling = getCurrentTime() + kPollingTimeWindow; @@ -522,173 +507,144 @@ std::optional<std::vector<FmqRequestDatum>> RequestChannelReceiver::getPacketBlo while (getCurrentTime() < timeToStopPolling) { // if class is being torn down, immediately return if (mTeardown.load(std::memory_order_relaxed)) { - return std::nullopt; + return NN_ERROR() << "FMQ object is being torn down"; } - // Check if data is available. If it is, immediately retrieve it and - // return. - const size_t available = mFmqRequestChannel->availableToRead(); + // Check if data is available. If it is, immediately retrieve it and return. + const size_t available = mFmqRequestChannel.availableToRead(); if (available > 0) { - // This is the first point when we know an execution is occurring, - // so begin to collect systraces. Note that a similar systrace does - // not exist at the corresponding point in - // ResultChannelReceiver::getPacketBlocking because the execution is - // already in flight. - NNTRACE_FULL(NNTRACE_LAYER_IPC, NNTRACE_PHASE_EXECUTION, - "ExecutionBurstServer getting packet"); std::vector<FmqRequestDatum> packet(available); - const bool success = mFmqRequestChannel->read(packet.data(), available); + const bool success = mFmqRequestChannel.readBlocking(packet.data(), available); if (!success) { - LOG(ERROR) << "Error receiving packet"; - return std::nullopt; + return NN_ERROR() << "Error receiving packet"; } - return std::make_optional(std::move(packet)); + return packet; } } - // If we get to this point, we either stopped polling because it was taking - // too long or polling was not allowed. Instead, perform a blocking call - // which uses a futex to save power. + // If we get to this point, we either stopped polling because it was taking too long or polling + // was not allowed. Instead, perform a blocking call which uses a futex to save power. // wait for request packet and read first element of request packet FmqRequestDatum datum; - bool success = mFmqRequestChannel->readBlocking(&datum, 1); - - // This is the first point when we know an execution is occurring, so begin - // to collect systraces. Note that a similar systrace does not exist at the - // corresponding point in ResultChannelReceiver::getPacketBlocking because - // the execution is already in flight. - NNTRACE_FULL(NNTRACE_LAYER_IPC, NNTRACE_PHASE_EXECUTION, "ExecutionBurstServer getting packet"); + bool success = mFmqRequestChannel.readBlocking(&datum, 1); // retrieve remaining elements - // NOTE: all of the data is already available at this point, so there's no - // need to do a blocking wait to wait for more data. This is known because - // in FMQ, all writes are published (made available) atomically. Currently, - // the producer always publishes the entire packet in one function call, so - // if the first element of the packet is available, the remaining elements - // are also available. - const size_t count = mFmqRequestChannel->availableToRead(); + // NOTE: all of the data is already available at this point, so there's no need to do a blocking + // wait to wait for more data. This is known because in FMQ, all writes are published (made + // available) atomically. Currently, the producer always publishes the entire packet in one + // function call, so if the first element of the packet is available, the remaining elements are + // also available. + const size_t count = mFmqRequestChannel.availableToRead(); std::vector<FmqRequestDatum> packet(count + 1); std::memcpy(&packet.front(), &datum, sizeof(datum)); - success &= mFmqRequestChannel->read(packet.data() + 1, count); + success &= mFmqRequestChannel.read(packet.data() + 1, count); // terminate loop if (mTeardown) { - return std::nullopt; + return NN_ERROR() << "FMQ object is being torn down"; } // ensure packet was successfully received if (!success) { - LOG(ERROR) << "Error receiving packet"; - return std::nullopt; + return NN_ERROR() << "Error receiving packet"; } - return std::make_optional(std::move(packet)); + return packet; } // ResultChannelSender methods -std::unique_ptr<ResultChannelSender> ResultChannelSender::create( - const FmqResultDescriptor& resultChannel) { - std::unique_ptr<FmqResultChannel> fmqResultChannel = - std::make_unique<FmqResultChannel>(resultChannel); +nn::GeneralResult<std::unique_ptr<ResultChannelSender>> ResultChannelSender::create( + const MQDescriptorSync<FmqResultDatum>& resultChannel) { + auto resultChannelSender = + std::make_unique<ResultChannelSender>(PrivateConstructorTag{}, resultChannel); - if (!fmqResultChannel->isValid()) { - LOG(ERROR) << "Unable to create RequestChannelSender"; - return nullptr; + if (!resultChannelSender->mFmqResultChannel.isValid()) { + return NN_ERROR() << "Unable to create RequestChannelSender"; } - if (fmqResultChannel->getEventFlagWord() == nullptr) { - LOG(ERROR) << "ResultChannelSender::create was passed an MQDescriptor without an EventFlag"; - return nullptr; + if (resultChannelSender->mFmqResultChannel.getEventFlagWord() == nullptr) { + return NN_ERROR() + << "ResultChannelSender::create was passed an MQDescriptor without an EventFlag"; } - return std::make_unique<ResultChannelSender>(std::move(fmqResultChannel)); + return resultChannelSender; } -ResultChannelSender::ResultChannelSender(std::unique_ptr<FmqResultChannel> fmqResultChannel) - : mFmqResultChannel(std::move(fmqResultChannel)) {} +ResultChannelSender::ResultChannelSender(PrivateConstructorTag /*tag*/, + const MQDescriptorSync<FmqResultDatum>& resultChannel) + : mFmqResultChannel(resultChannel) {} -bool ResultChannelSender::send(V1_0::ErrorStatus errorStatus, +void ResultChannelSender::send(V1_0::ErrorStatus errorStatus, const std::vector<V1_2::OutputShape>& outputShapes, V1_2::Timing timing) { const std::vector<FmqResultDatum> serialized = serialize(errorStatus, outputShapes, timing); - return sendPacket(serialized); + sendPacket(serialized); } -bool ResultChannelSender::sendPacket(const std::vector<FmqResultDatum>& packet) { - if (packet.size() > mFmqResultChannel->availableToWrite()) { +void ResultChannelSender::sendPacket(const std::vector<FmqResultDatum>& packet) { + if (packet.size() > mFmqResultChannel.availableToWrite()) { LOG(ERROR) << "ResultChannelSender::sendPacket -- packet size exceeds size available in FMQ"; const std::vector<FmqResultDatum> errorPacket = serialize(V1_0::ErrorStatus::GENERAL_FAILURE, {}, kNoTiming); - // Always send the packet with "blocking" because this signals the futex - // and unblocks the consumer if it is waiting on the futex. - return mFmqResultChannel->writeBlocking(errorPacket.data(), errorPacket.size()); + // Always send the packet with "blocking" because this signals the futex and unblocks the + // consumer if it is waiting on the futex. + mFmqResultChannel.writeBlocking(errorPacket.data(), errorPacket.size()); + } else { + // Always send the packet with "blocking" because this signals the futex and unblocks the + // consumer if it is waiting on the futex. + mFmqResultChannel.writeBlocking(packet.data(), packet.size()); } - - // Always send the packet with "blocking" because this signals the futex and - // unblocks the consumer if it is waiting on the futex. - return mFmqResultChannel->writeBlocking(packet.data(), packet.size()); } // ResultChannelReceiver methods -std::pair<std::unique_ptr<ResultChannelReceiver>, const FmqResultDescriptor*> +nn::GeneralResult< + std::pair<std::unique_ptr<ResultChannelReceiver>, const MQDescriptorSync<FmqResultDatum>*>> ResultChannelReceiver::create(size_t channelLength, std::chrono::microseconds pollingTimeWindow) { - std::unique_ptr<FmqResultChannel> fmqResultChannel = - std::make_unique<FmqResultChannel>(channelLength, /*confEventFlag=*/true); - if (!fmqResultChannel->isValid()) { - LOG(ERROR) << "Unable to create ResultChannelReceiver"; - return {nullptr, nullptr}; + auto resultChannelReceiver = std::make_unique<ResultChannelReceiver>( + PrivateConstructorTag{}, channelLength, pollingTimeWindow); + if (!resultChannelReceiver->mFmqResultChannel.isValid()) { + return NN_ERROR() << "Unable to create ResultChannelReceiver"; } - const FmqResultDescriptor* descriptor = fmqResultChannel->getDesc(); - return std::make_pair( - std::make_unique<ResultChannelReceiver>(std::move(fmqResultChannel), pollingTimeWindow), - descriptor); + const MQDescriptorSync<FmqResultDatum>* descriptor = + resultChannelReceiver->mFmqResultChannel.getDesc(); + return std::make_pair(std::move(resultChannelReceiver), descriptor); } -ResultChannelReceiver::ResultChannelReceiver(std::unique_ptr<FmqResultChannel> fmqResultChannel, +ResultChannelReceiver::ResultChannelReceiver(PrivateConstructorTag /*tag*/, size_t channelLength, std::chrono::microseconds pollingTimeWindow) - : mFmqResultChannel(std::move(fmqResultChannel)), kPollingTimeWindow(pollingTimeWindow) {} + : mFmqResultChannel(channelLength, /*configureEventFlagWord=*/true), + kPollingTimeWindow(pollingTimeWindow) {} -std::optional<std::tuple<V1_0::ErrorStatus, std::vector<V1_2::OutputShape>, V1_2::Timing>> +nn::Result<std::tuple<V1_0::ErrorStatus, std::vector<V1_2::OutputShape>, V1_2::Timing>> ResultChannelReceiver::getBlocking() { - const auto packet = getPacketBlocking(); - if (!packet) { - return std::nullopt; - } - - return deserialize(*packet); + const auto packet = NN_TRY(getPacketBlocking()); + return deserialize(packet); } -void ResultChannelReceiver::invalidate() { +void ResultChannelReceiver::notifyAsDeadObject() { mValid = false; // force unblock - // ExecutionBurstController waits on a result packet after sending a - // request. If the driver containing ExecutionBurstServer crashes, the - // controller may be waiting on the futex. This force unblock wakes up any - // thread waiting on the futex. - // TODO: look for a different/better way to signal/notify the futex to - // wake up any thread waiting on it - FmqResultDatum datum; - datum.packetInformation({/*.packetSize=*/0, - /*.errorStatus=*/V1_0::ErrorStatus::GENERAL_FAILURE, - /*.numberOfOperands=*/0}); - mFmqResultChannel->writeBlocking(&datum, 1); + // ExecutionBurstController waits on a result packet after sending a request. If the driver + // containing ExecutionBurstServer crashes, the controller may be waiting on the futex. This + // force unblock wakes up any thread waiting on the futex. + const auto data = serialize(V1_0::ErrorStatus::GENERAL_FAILURE, {}, kNoTiming); + mFmqResultChannel.writeBlocking(data.data(), data.size()); } -std::optional<std::vector<FmqResultDatum>> ResultChannelReceiver::getPacketBlocking() { +nn::Result<std::vector<FmqResultDatum>> ResultChannelReceiver::getPacketBlocking() { if (!mValid) { - return std::nullopt; + return NN_ERROR() << "FMQ object is invalid"; } - // First spend time polling if results are available in FMQ instead of - // waiting on the futex. Polling is more responsive (yielding lower - // latencies), but can take up more power, so only poll for a limited period - // of time. + // First spend time polling if results are available in FMQ instead of waiting on the futex. + // Polling is more responsive (yielding lower latencies), but can take up more power, so only + // poll for a limited period of time. auto& getCurrentTime = std::chrono::high_resolution_clock::now; const auto timeToStopPolling = getCurrentTime() + kPollingTimeWindow; @@ -696,54 +652,49 @@ std::optional<std::vector<FmqResultDatum>> ResultChannelReceiver::getPacketBlock while (getCurrentTime() < timeToStopPolling) { // if class is being torn down, immediately return if (!mValid.load(std::memory_order_relaxed)) { - return std::nullopt; + return NN_ERROR() << "FMQ object is invalid"; } - // Check if data is available. If it is, immediately retrieve it and - // return. - const size_t available = mFmqResultChannel->availableToRead(); + // Check if data is available. If it is, immediately retrieve it and return. + const size_t available = mFmqResultChannel.availableToRead(); if (available > 0) { std::vector<FmqResultDatum> packet(available); - const bool success = mFmqResultChannel->read(packet.data(), available); + const bool success = mFmqResultChannel.readBlocking(packet.data(), available); if (!success) { - LOG(ERROR) << "Error receiving packet"; - return std::nullopt; + return NN_ERROR() << "Error receiving packet"; } - return std::make_optional(std::move(packet)); + return packet; } } - // If we get to this point, we either stopped polling because it was taking - // too long or polling was not allowed. Instead, perform a blocking call - // which uses a futex to save power. + // If we get to this point, we either stopped polling because it was taking too long or polling + // was not allowed. Instead, perform a blocking call which uses a futex to save power. // wait for result packet and read first element of result packet FmqResultDatum datum; - bool success = mFmqResultChannel->readBlocking(&datum, 1); + bool success = mFmqResultChannel.readBlocking(&datum, 1); // retrieve remaining elements - // NOTE: all of the data is already available at this point, so there's no - // need to do a blocking wait to wait for more data. This is known because - // in FMQ, all writes are published (made available) atomically. Currently, - // the producer always publishes the entire packet in one function call, so - // if the first element of the packet is available, the remaining elements - // are also available. - const size_t count = mFmqResultChannel->availableToRead(); + // NOTE: all of the data is already available at this point, so there's no need to do a blocking + // wait to wait for more data. This is known because in FMQ, all writes are published (made + // available) atomically. Currently, the producer always publishes the entire packet in one + // function call, so if the first element of the packet is available, the remaining elements are + // also available. + const size_t count = mFmqResultChannel.availableToRead(); std::vector<FmqResultDatum> packet(count + 1); std::memcpy(&packet.front(), &datum, sizeof(datum)); - success &= mFmqResultChannel->read(packet.data() + 1, count); + success &= mFmqResultChannel.read(packet.data() + 1, count); if (!mValid) { - return std::nullopt; + return NN_ERROR() << "FMQ object is invalid"; } // ensure packet was successfully received if (!success) { - LOG(ERROR) << "Error receiving packet"; - return std::nullopt; + return NN_ERROR() << "Error receiving packet"; } - return std::make_optional(std::move(packet)); + return packet; } } // namespace android::hardware::neuralnetworks::V1_2::utils diff --git a/neuralnetworks/1.2/utils/src/PreparedModel.cpp b/neuralnetworks/1.2/utils/src/PreparedModel.cpp index 6841c5e007..71a4ea872b 100644 --- a/neuralnetworks/1.2/utils/src/PreparedModel.cpp +++ b/neuralnetworks/1.2/utils/src/PreparedModel.cpp @@ -18,6 +18,8 @@ #include "Callbacks.h" #include "Conversions.h" +#include "ExecutionBurstController.h" +#include "ExecutionBurstUtils.h" #include "Utils.h" #include <android/hardware/neuralnetworks/1.0/types.h> @@ -27,12 +29,12 @@ #include <nnapi/IPreparedModel.h> #include <nnapi/Result.h> #include <nnapi/Types.h> -#include <nnapi/hal/1.0/Burst.h> #include <nnapi/hal/1.0/Conversions.h> #include <nnapi/hal/CommonUtils.h> #include <nnapi/hal/HandleError.h> #include <nnapi/hal/ProtectCallback.h> +#include <chrono> #include <memory> #include <tuple> #include <utility> @@ -119,7 +121,14 @@ PreparedModel::executeFenced(const nn::Request& /*request*/, } nn::GeneralResult<nn::SharedBurst> PreparedModel::configureExecutionBurst() const { - return V1_0::utils::Burst::create(shared_from_this()); + auto self = shared_from_this(); + auto fallback = [preparedModel = std::move(self)](const nn::Request& request, + nn::MeasureTiming measure) + -> nn::ExecutionResult<std::pair<std::vector<nn::OutputShape>, nn::Timing>> { + return preparedModel->execute(request, measure, {}, {}); + }; + const auto pollingTimeWindow = getBurstControllerPollingTimeWindow(); + return ExecutionBurstController::create(kPreparedModel, std::move(fallback), pollingTimeWindow); } std::any PreparedModel::getUnderlyingResource() const { diff --git a/neuralnetworks/1.3/utils/Android.bp b/neuralnetworks/1.3/utils/Android.bp index 2b1dcc40bb..28c036a8ea 100644 --- a/neuralnetworks/1.3/utils/Android.bp +++ b/neuralnetworks/1.3/utils/Android.bp @@ -42,6 +42,7 @@ cc_library_static { "android.hardware.neuralnetworks@1.1", "android.hardware.neuralnetworks@1.2", "android.hardware.neuralnetworks@1.3", + "libfmq", ], export_static_lib_headers: [ "neuralnetworks_utils_hal_common", diff --git a/neuralnetworks/1.3/utils/include/nnapi/hal/1.3/Conversions.h b/neuralnetworks/1.3/utils/include/nnapi/hal/1.3/Conversions.h index 8e1cdb82c9..b677c62505 100644 --- a/neuralnetworks/1.3/utils/include/nnapi/hal/1.3/Conversions.h +++ b/neuralnetworks/1.3/utils/include/nnapi/hal/1.3/Conversions.h @@ -59,7 +59,6 @@ GeneralResult<OptionalDuration> convert( GeneralResult<ErrorStatus> convert(const hal::V1_3::ErrorStatus& errorStatus); GeneralResult<SharedHandle> convert(const hardware::hidl_handle& handle); -GeneralResult<SharedMemory> convert(const hardware::hidl_memory& memory); GeneralResult<std::vector<BufferRole>> convert( const hardware::hidl_vec<hal::V1_3::BufferRole>& bufferRoles); diff --git a/neuralnetworks/1.3/utils/src/Conversions.cpp b/neuralnetworks/1.3/utils/src/Conversions.cpp index 320c74c2c6..9788fe1b9d 100644 --- a/neuralnetworks/1.3/utils/src/Conversions.cpp +++ b/neuralnetworks/1.3/utils/src/Conversions.cpp @@ -352,10 +352,6 @@ GeneralResult<SharedHandle> convert(const hardware::hidl_handle& handle) { return validatedConvert(handle); } -GeneralResult<SharedMemory> convert(const hardware::hidl_memory& memory) { - return validatedConvert(memory); -} - GeneralResult<std::vector<BufferRole>> convert( const hardware::hidl_vec<hal::V1_3::BufferRole>& bufferRoles) { return validatedConvert(bufferRoles); diff --git a/neuralnetworks/1.3/utils/src/PreparedModel.cpp b/neuralnetworks/1.3/utils/src/PreparedModel.cpp index 725e4f546a..64275a3729 100644 --- a/neuralnetworks/1.3/utils/src/PreparedModel.cpp +++ b/neuralnetworks/1.3/utils/src/PreparedModel.cpp @@ -29,8 +29,9 @@ #include <nnapi/Result.h> #include <nnapi/TypeUtils.h> #include <nnapi/Types.h> -#include <nnapi/hal/1.0/Burst.h> #include <nnapi/hal/1.2/Conversions.h> +#include <nnapi/hal/1.2/ExecutionBurstController.h> +#include <nnapi/hal/1.2/ExecutionBurstUtils.h> #include <nnapi/hal/CommonUtils.h> #include <nnapi/hal/HandleError.h> #include <nnapi/hal/ProtectCallback.h> @@ -199,7 +200,15 @@ PreparedModel::executeFenced(const nn::Request& request, const std::vector<nn::S } nn::GeneralResult<nn::SharedBurst> PreparedModel::configureExecutionBurst() const { - return V1_0::utils::Burst::create(shared_from_this()); + auto self = shared_from_this(); + auto fallback = [preparedModel = std::move(self)](const nn::Request& request, + nn::MeasureTiming measure) + -> nn::ExecutionResult<std::pair<std::vector<nn::OutputShape>, nn::Timing>> { + return preparedModel->execute(request, measure, {}, {}); + }; + const auto pollingTimeWindow = V1_2::utils::getBurstControllerPollingTimeWindow(); + return V1_2::utils::ExecutionBurstController::create(kPreparedModel, std::move(fallback), + pollingTimeWindow); } std::any PreparedModel::getUnderlyingResource() const { diff --git a/neuralnetworks/utils/common/include/nnapi/hal/ProtectCallback.h b/neuralnetworks/utils/common/include/nnapi/hal/ProtectCallback.h index c9218857ac..05110bc364 100644 --- a/neuralnetworks/utils/common/include/nnapi/hal/ProtectCallback.h +++ b/neuralnetworks/utils/common/include/nnapi/hal/ProtectCallback.h @@ -56,7 +56,7 @@ class IProtectedCallback { // Thread safe class class DeathRecipient final : public hidl_death_recipient { public: - void serviceDied(uint64_t /*cookie*/, const wp<hidl::base::V1_0::IBase>& /*who*/) override; + void serviceDied(uint64_t cookie, const wp<hidl::base::V1_0::IBase>& who) override; // Precondition: `killable` must be non-null. void add(IProtectedCallback* killable) const; // Precondition: `killable` must be non-null. @@ -64,6 +64,7 @@ class DeathRecipient final : public hidl_death_recipient { private: mutable std::mutex mMutex; + mutable bool mIsDeadObject GUARDED_BY(mMutex) = false; mutable std::vector<IProtectedCallback*> mObjects GUARDED_BY(mMutex); }; @@ -78,14 +79,21 @@ class DeathHandler final { ~DeathHandler(); using Cleanup = std::function<void()>; + using Hold = base::ScopeGuard<Cleanup>; + + // Precondition: `killable` must be non-null. + // `killable` must outlive the return value `Hold`. + [[nodiscard]] Hold protectCallback(IProtectedCallback* killable) const; + // Precondition: `killable` must be non-null. - [[nodiscard]] base::ScopeGuard<Cleanup> protectCallback(IProtectedCallback* killable) const; + // `killable` must outlive the `DeathHandler`. + void protectCallbackForLifetimeOfDeathHandler(IProtectedCallback* killable) const; private: DeathHandler(sp<hidl::base::V1_0::IBase> object, sp<DeathRecipient> deathRecipient); - sp<hidl::base::V1_0::IBase> kObject; - sp<DeathRecipient> kDeathRecipient; + sp<hidl::base::V1_0::IBase> mObject; + sp<DeathRecipient> mDeathRecipient; }; } // namespace android::hardware::neuralnetworks::utils diff --git a/neuralnetworks/utils/common/src/ProtectCallback.cpp b/neuralnetworks/utils/common/src/ProtectCallback.cpp index abe4cb675e..18e1f3bf0b 100644 --- a/neuralnetworks/utils/common/src/ProtectCallback.cpp +++ b/neuralnetworks/utils/common/src/ProtectCallback.cpp @@ -35,19 +35,25 @@ void DeathRecipient::serviceDied(uint64_t /*cookie*/, const wp<hidl::base::V1_0: std::lock_guard guard(mMutex); std::for_each(mObjects.begin(), mObjects.end(), [](IProtectedCallback* killable) { killable->notifyAsDeadObject(); }); + mObjects.clear(); + mIsDeadObject = true; } void DeathRecipient::add(IProtectedCallback* killable) const { CHECK(killable != nullptr); std::lock_guard guard(mMutex); - mObjects.push_back(killable); + if (mIsDeadObject) { + killable->notifyAsDeadObject(); + } else { + mObjects.push_back(killable); + } } void DeathRecipient::remove(IProtectedCallback* killable) const { CHECK(killable != nullptr); std::lock_guard guard(mMutex); - const auto removedIter = std::remove(mObjects.begin(), mObjects.end(), killable); - mObjects.erase(removedIter); + const auto newEnd = std::remove(mObjects.begin(), mObjects.end(), killable); + mObjects.erase(newEnd, mObjects.end()); } nn::GeneralResult<DeathHandler> DeathHandler::create(sp<hidl::base::V1_0::IBase> object) { @@ -67,19 +73,16 @@ nn::GeneralResult<DeathHandler> DeathHandler::create(sp<hidl::base::V1_0::IBase> } DeathHandler::DeathHandler(sp<hidl::base::V1_0::IBase> object, sp<DeathRecipient> deathRecipient) - : kObject(std::move(object)), kDeathRecipient(std::move(deathRecipient)) { - CHECK(kObject != nullptr); - CHECK(kDeathRecipient != nullptr); + : mObject(std::move(object)), mDeathRecipient(std::move(deathRecipient)) { + CHECK(mObject != nullptr); + CHECK(mDeathRecipient != nullptr); } DeathHandler::~DeathHandler() { - if (kObject != nullptr && kDeathRecipient != nullptr) { - const auto ret = kObject->unlinkToDeath(kDeathRecipient); - const auto maybeSuccess = handleTransportError(ret); - if (!maybeSuccess.has_value()) { - LOG(ERROR) << maybeSuccess.error().message; - } else if (!maybeSuccess.value()) { - LOG(ERROR) << "IBase::linkToDeath returned false"; + if (mObject != nullptr && mDeathRecipient != nullptr) { + const auto successful = mObject->unlinkToDeath(mDeathRecipient).isOk(); + if (!successful) { + LOG(ERROR) << "IBase::linkToDeath failed"; } } } @@ -87,9 +90,14 @@ DeathHandler::~DeathHandler() { [[nodiscard]] base::ScopeGuard<DeathHandler::Cleanup> DeathHandler::protectCallback( IProtectedCallback* killable) const { CHECK(killable != nullptr); - kDeathRecipient->add(killable); + mDeathRecipient->add(killable); return base::make_scope_guard( - [deathRecipient = kDeathRecipient, killable] { deathRecipient->remove(killable); }); + [deathRecipient = mDeathRecipient, killable] { deathRecipient->remove(killable); }); +} + +void DeathHandler::protectCallbackForLifetimeOfDeathHandler(IProtectedCallback* killable) const { + CHECK(killable != nullptr); + mDeathRecipient->add(killable); } } // namespace android::hardware::neuralnetworks::utils |