From bd2b4e78ac087e992a167e0a7795f350c4f38406 Mon Sep 17 00:00:00 2001 From: Lev Proleev Date: Thu, 8 Aug 2019 14:08:31 +0100 Subject: [PATCH] Create NNAPI HAL v1.3 and add TENSOR_QUANT8_ASYMM_SIGNED OperandType Bug: 137828494 Bug: 139120468 Bug: 136735770 Test: mma Change-Id: I1f615047d1c0c208a90082ffb6ffc43f252f77b4 Merged-In: I1f615047d1c0c208a90082ffb6ffc43f252f77b4 (cherry picked from commit 5a7b67ab8fafd171f07d0ba99338f85c42993e0f) --- current.txt | 2 + neuralnetworks/1.2/vts/functional/Android.bp | 1 + neuralnetworks/1.3/Android.bp | 21 ++ neuralnetworks/1.3/IDevice.hal | 171 +++++++++ neuralnetworks/1.3/types.hal | 361 +++++++++++++++++++ 5 files changed, 556 insertions(+) create mode 100644 neuralnetworks/1.3/Android.bp create mode 100644 neuralnetworks/1.3/IDevice.hal create mode 100644 neuralnetworks/1.3/types.hal diff --git a/current.txt b/current.txt index 972fe16a57..aa2494eae9 100644 --- a/current.txt +++ b/current.txt @@ -586,3 +586,5 @@ fd65298e1e09e0e3c781ab18305920d757dbe55a3b459ce17814ec5cf6dfee99 android.hardwar # HALs released in Android R 07d0a252b2d8fa35887908a996ba395cf392968395fc30afab791f46e0c22a52 android.hardware.boot@1.1::IBootControl 74049a402be913963edfdd80828a53736570e9d8124a1bf18166b6ed46a6b0ab android.hardware.boot@1.1::types +34515afa2bb792d3c6d8495a5f5d907d179c8507ca5e55c10050d02ae1d516ef android.hardware.neuralnetworks@1.3::IDevice +e2d20d4eb24f40b44a3766d05f77052581cb3f4df35fb48c0cc5d9cdcf5c872e android.hardware.neuralnetworks@1.3::types diff --git a/neuralnetworks/1.2/vts/functional/Android.bp b/neuralnetworks/1.2/vts/functional/Android.bp index 3ba8879ae9..bfb871986b 100644 --- a/neuralnetworks/1.2/vts/functional/Android.bp +++ b/neuralnetworks/1.2/vts/functional/Android.bp @@ -37,6 +37,7 @@ cc_test { "android.hardware.neuralnetworks@1.0", "android.hardware.neuralnetworks@1.1", "android.hardware.neuralnetworks@1.2", + "android.hardware.neuralnetworks@1.3", "android.hidl.allocator@1.0", "android.hidl.memory@1.0", "libgmock", diff --git a/neuralnetworks/1.3/Android.bp b/neuralnetworks/1.3/Android.bp new file mode 100644 index 0000000000..0615ec67dd --- /dev/null +++ b/neuralnetworks/1.3/Android.bp @@ -0,0 +1,21 @@ +// This file is autogenerated by hidl-gen -Landroidbp. + +hidl_interface { + name: "android.hardware.neuralnetworks@1.3", + root: "android.hardware", + vndk: { + enabled: true, + }, + srcs: [ + "types.hal", + "IDevice.hal", + ], + interfaces: [ + "android.hardware.neuralnetworks@1.0", + "android.hardware.neuralnetworks@1.1", + "android.hardware.neuralnetworks@1.2", + "android.hidl.base@1.0", + "android.hidl.safe_union@1.0", + ], + gen_java: false, +} diff --git a/neuralnetworks/1.3/IDevice.hal b/neuralnetworks/1.3/IDevice.hal new file mode 100644 index 0000000000..ee36fb4e51 --- /dev/null +++ b/neuralnetworks/1.3/IDevice.hal @@ -0,0 +1,171 @@ +/* + * Copyright (C) 2019 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package android.hardware.neuralnetworks@1.3; + +import @1.0::ErrorStatus; +import @1.1::ExecutionPreference; +import @1.2::Constant; +import @1.2::DeviceType; +import @1.2::Extension; +import @1.2::IDevice; +import @1.2::IPreparedModelCallback; + +/** + * This interface represents a device driver. + */ +interface IDevice extends @1.2::IDevice { + /** + * Gets the capabilities of a driver. + * + * @return status Error status of the call, must be: + * - NONE if successful + * - DEVICE_UNAVAILABLE if driver is offline or busy + * - GENERAL_FAILURE if there is an unspecified error + * @return capabilities Capabilities of the driver. + */ + getCapabilities_1_3() generates (ErrorStatus status, Capabilities capabilities); + + /** + * Gets the supported operations in a model. + * + * getSupportedOperations indicates which operations of a model are fully + * supported by the vendor driver. If an operation may not be supported for + * any reason, getSupportedOperations must return false for that operation. + * + * @param model A model whose operations--and their corresponding operands-- + * are to be verified by the driver. + * @return status Error status of the call, must be: + * - NONE if successful + * - DEVICE_UNAVAILABLE if driver is offline or busy + * - GENERAL_FAILURE if there is an unspecified error + * - INVALID_ARGUMENT if provided model is invalid + * @return supportedOperations A list of supported operations, where true + * indicates the operation is supported and false indicates the + * operation is not supported. The index of "supported" corresponds with + * the index of the operation it is describing. + */ + getSupportedOperations_1_3(Model model) + generates (ErrorStatus status, vec supportedOperations); + + /** + * Asynchronously creates a prepared model for execution and optionally + * saves it into cache files. + * + * prepareModel is used to make any necessary transformations to or + * alternative representations to a model for execution, possibly including + * transformations on the constant data, optimization on the model's graph, + * or compilation into the device's native binary format. The model itself + * is not changed. + * + * Optionally, caching information may be provided for the driver to save + * the prepared model to cache files for faster model compilation time when + * the same model preparation is requested in the future. There are two + * types of cache file handles provided to the driver: model cache and data + * cache. For more information on the two types of cache handles, refer to + * getNumberOfCacheFilesNeeded. + * + * The file descriptors must be opened with read and write permission. A + * file may have any size, and the corresponding file descriptor may have + * any offset. The driver must truncate a file to zero size before writing + * to that file. The file descriptors may be closed by the client once the + * asynchronous preparation has finished. The driver must dup a file + * descriptor if it wants to get access to the cache file later. + * + * The model is prepared asynchronously with respect to the caller. The + * prepareModel function must verify the inputs to the preparedModel + * function related to preparing the model (as opposed to saving the + * prepared model to cache) are correct. If there is an error, prepareModel + * must immediately invoke the callback with the appropriate ErrorStatus + * value and nullptr for the IPreparedModel, then return with the same + * ErrorStatus. If the inputs to the prepareModel function that are related + * to preparing the model are valid and there is no error, prepareModel must + * launch an asynchronous task to prepare the model in the background, and + * immediately return from prepareModel with ErrorStatus::NONE. If the + * asynchronous task fails to launch, prepareModel must immediately invoke + * the callback with ErrorStatus::GENERAL_FAILURE and nullptr for the + * IPreparedModel, then return with ErrorStatus::GENERAL_FAILURE. + * + * When the asynchronous task has finished preparing the model, it must + * immediately invoke the callback function provided as an input to + * prepareModel. If the model was prepared successfully, the callback object + * must be invoked with an error status of ErrorStatus::NONE and the + * produced IPreparedModel object. If an error occurred preparing the model, + * the callback object must be invoked with the appropriate ErrorStatus + * value and nullptr for the IPreparedModel. + * + * Optionally, the driver may save the prepared model to cache during the + * asynchronous preparation. Any error that occurs when saving to cache must + * not affect the status of preparing the model. Even if the input arguments + * related to the cache may be invalid, or the driver may fail to save to + * cache, the prepareModel function must finish preparing the model. The + * driver may choose not to save to cache even if the caching information is + * provided and valid. + * + * The only information that may be unknown to the model at this stage is + * the shape of the tensors, which may only be known at execution time. As + * such, some driver services may return partially prepared models, where + * the prepared model may only be finished when it is paired with a set of + * inputs to the model. Note that the same prepared model object may be used + * with different shapes of inputs on different (possibly concurrent) + * executions. + * + * Multiple threads may call prepareModel on the same model concurrently. + * + * @param model The model to be prepared for execution. + * @param preference Indicates the intended execution behavior of a prepared + * model. + * @param modelCache A vector of handles with each entry holding exactly one + * cache file descriptor for the security-sensitive cache. The length of + * the vector must either be 0 indicating that caching information is + * not provided, or match the numModelCache returned from + * getNumberOfCacheFilesNeeded. The cache handles will be provided in + * the same order when retrieving the preparedModel from cache files + * with prepareModelFromCache. + * @param dataCache A vector of handles with each entry holding exactly one + * cache file descriptor for the constants' cache. The length of the + * vector must either be 0 indicating that caching information is not + * provided, or match the numDataCache returned from + * getNumberOfCacheFilesNeeded. The cache handles will be provided in + * the same order when retrieving the preparedModel from cache files + * with prepareModelFromCache. + * @param token A caching token of length Constant::BYTE_SIZE_OF_CACHE_TOKEN + * identifying the prepared model. The same token will be provided when + * retrieving the prepared model from the cache files with + * prepareModelFromCache. Tokens should be chosen to have a low rate of + * collision for a particular application. The driver cannot detect a + * collision; a collision will result in a failed execution or in a + * successful execution that produces incorrect output values. If both + * modelCache and dataCache are empty indicating that caching + * information is not provided, this token must be ignored. + * @param callback A callback object used to return the error status of + * preparing the model for execution and the prepared model if + * successful, nullptr otherwise. The callback object's notify function + * must be called exactly once, even if the model could not be prepared. + * @return status Error status of launching a task which prepares the model + * in the background; must be: + * - NONE if preparation task is successfully launched + * - DEVICE_UNAVAILABLE if driver is offline or busy + * - GENERAL_FAILURE if there is an unspecified error + * - INVALID_ARGUMENT if one of the input arguments related to preparing + * the model is invalid + */ + prepareModel_1_3(Model model, ExecutionPreference preference, + vec modelCache, vec dataCache, + uint8_t[Constant:BYTE_SIZE_OF_CACHE_TOKEN] token, + IPreparedModelCallback callback) + generates (ErrorStatus status); +}; diff --git a/neuralnetworks/1.3/types.hal b/neuralnetworks/1.3/types.hal new file mode 100644 index 0000000000..db5dd51017 --- /dev/null +++ b/neuralnetworks/1.3/types.hal @@ -0,0 +1,361 @@ +/* + * Copyright (C) 2019 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package android.hardware.neuralnetworks@1.3; + +import @1.0::DataLocation; +import @1.0::OperandLifeTime; +import @1.0::PerformanceInfo; +import @1.2::OperandType; +import @1.2::OperationType; +import @1.2::SymmPerChannelQuantParams; + +import android.hidl.safe_union@1.0::Monostate; + +/** + * NOTE: Since NNAPI 1.2, OEM operation and data type are deprecated. Extensions + * are the preferred alternative. + * + * NOTE: Adding a new fundamental type requires updating the value of + * OperandTypeRange::FUNDAMENTAL_MAX. + */ +enum OperandType : @1.2::OperandType { + /** + * A tensor of 8 bit signed integers that represent real numbers. + * + * Attached to this tensor are two numbers that can be used to convert the + * 8 bit integer to the real value and vice versa. These two numbers are: + * - scale: a 32 bit floating point value greater than zero. + * - zeroPoint: a 32 bit integer, in range [-128, 127]. + * + * The formula is: + * real_value = (integer_value - zeroPoint) * scale. + * + * Available since API level 30. + */ + TENSOR_QUANT8_ASYMM_SIGNED = 14, +}; + +/** + * The range of operand values in the OperandType enum. + */ +enum OperandTypeRange : uint32_t { + BASE_MIN = 0, + FUNDAMENTAL_MIN = 0, + FUNDAMENTAL_MAX = 14, + OEM_MIN = 10000, + OEM_MAX = 10001, + BASE_MAX = 0xFFFF, +}; + + +/** + * The capabilities of a driver. + * + * Performance of an operation comes from the type of its first operand. + * This represents performance for non extension operand types. + */ +struct Capabilities { + /** + * Driver performance when operating on float32 data but performing + * calculations with range and/or precision as low as that of the IEEE + * 754 16-bit floating-point format. + */ + PerformanceInfo relaxedFloat32toFloat16PerformanceScalar; + PerformanceInfo relaxedFloat32toFloat16PerformanceTensor; + + /** + * Driver performance when operating on a particular data type. + * In the case of float32 data, this is used when the calculations + * are not relaxed. + */ + struct OperandPerformance { + OperandType type; + PerformanceInfo info; + }; + + /** + * Performance by operand type. Must be sorted by OperandType. + * If a particular OperandType is not present in operandPerformance, + * its performance is treated as + * { .execTime = FLT_MAX, .powerUsage = FLT_MAX }. + */ + vec operandPerformance; +}; + +/** + * Describes one operand of the model's graph. + */ +struct Operand { + /** + * The data type. + * + * Besides the values listed in {@link OperandType}, any value above + * {@link OperandTypeRange::BASE_MAX} is possible and should be interpreted + * as an extension type according to {@link Model::extensionNameToPrefix}. + */ + OperandType type; + + /** + * Dimensions of the operand. + * + * For a scalar operand, dimensions.size() must be 0. + * + * A tensor operand with all dimensions specified has "fully + * specified" dimensions. Whenever possible (i.e., whenever the + * dimensions are known at model construction time), a tensor + * operand should have (but is not required to have) fully + * specified dimensions, in order to enable the best possible + * performance. + * + * If a tensor operand's dimensions are not fully specified, the + * dimensions of the operand are deduced from the operand + * dimensions and values of the operation for which that operand + * is an output. + * + * In the following situations, a tensor operand's dimensions must + * be fully specified: + * + * . The operand has lifetime CONSTANT_COPY or + * CONSTANT_REFERENCE. + * + * . The operand has lifetime MODEL_INPUT. Fully + * specified dimensions must either be present in the + * Operand or they must be provided in the corresponding + * RequestArgument. + * EXCEPTION: If the input is optional and omitted + * (by setting the hasNoValue field of the corresponding + * RequestArgument to true) then it need not have fully + * specified dimensions. + * + * A tensor operand with some number of unspecified dimensions is + * represented by setting each unspecified dimension to 0. + * + * A tensor operand with unspecified rank is represented by providing + * an empty dimensions vector. + */ + vec dimensions; + + /** + * The number of times this operand appears as an operation input. + * + * (For example, if this operand appears once in one operation's + * input list, and three times in another operation's input list, + * then numberOfConsumers = 4.) + */ + uint32_t numberOfConsumers; + + /** + * Quantized scale of the operand. + * + * Only applicable if the operand is of type TENSOR_QUANT8_ASYMM or + * TENSOR_INT32. + */ + float scale; + + /** + * Quantized zero-point offset of the operand. + * + * Only applicable if the operand is of type TENSOR_QUANT8_ASYMM. + */ + int32_t zeroPoint; + + /** + * How the operand is used. + */ + OperandLifeTime lifetime; + + /** + * Where to find the data for this operand. + * If the lifetime is TEMPORARY_VARIABLE, MODEL_INPUT, MODEL_OUTPUT, or + * NO_VALUE: + * - All the fields must be 0. + * If the lifetime is CONSTANT_COPY: + * - location.poolIndex is 0. + * - location.offset is the offset in bytes into Model.operandValues. + * - location.length is set. + * If the lifetime is CONSTANT_REFERENCE: + * - location.poolIndex is set. + * - location.offset is the offset in bytes into the specified pool. + * - location.length is set. + */ + DataLocation location; + + /** + * Additional parameters specific to a particular operand type. + */ + safe_union ExtraParams { + /** + * No additional parameters. + */ + Monostate none; + + /** + * Symmetric per-channel quantization parameters. + * + * Only applicable to operands of type TENSOR_QUANT8_SYMM_PER_CHANNEL. + */ + SymmPerChannelQuantParams channelQuant; + + /** + * Extension operand parameters. + * + * The framework treats this as an opaque data blob. + * The format is up to individual extensions. + */ + vec extension; + } extraParams; +}; + +/** + * Describes one operation of the model's graph. + */ +struct Operation { + /** + * The operation type. + */ + OperationType type; + + /** + * Describes the table that contains the indexes of the inputs of the + * operation. The offset is the index in the operandIndexes table. + */ + vec inputs; + + /** + * Describes the table that contains the indexes of the outputs of the + * operation. The offset is the index in the operandIndexes table. + */ + vec outputs; +}; + +/** + * A Neural Network Model. + * + * This includes not only the execution graph, but also constant data such as + * weights or scalars added at construction time. The only information that + * may not be known is the shape of the input tensors. + */ +struct Model { + /** + * All operands included in the model. + */ + vec operands; + + /** + * All operations included in the model. + * + * The operations are sorted into execution order. Every operand + * with lifetime MODEL_OUTPUT or TEMPORARY_VARIABLE must be + * written before it is read. + */ + vec operations; + + /** + * Input indexes of the model. There must be at least one. + * + * Each value corresponds to the index of the operand in "operands". + */ + vec inputIndexes; + + /** + * Output indexes of the model. There must be at least one. + * + * Each value corresponds to the index of the operand in "operands". + */ + vec outputIndexes; + + /** + * A byte buffer containing operand data that were copied into the model. + * + * An operand's value must be located here if and only if Operand::lifetime + * equals OperandLifeTime::CONSTANT_COPY. + */ + vec operandValues; + + /** + * A collection of shared memory pools containing operand values. + * + * An operand's value must be located here if and only if Operand::lifetime + * equals OperandLifeTime::CONSTANT_REFERENCE. + */ + vec pools; + + /** + * 'true' indicates TENSOR_FLOAT32 may be calculated with range and/or + * precision as low as that of the IEEE 754 16-bit floating-point format. + * 'false' indicates TENSOR_FLOAT32 must be calculated using at least the + * range and precision of the IEEE 754 32-bit floating-point format. + */ + bool relaxComputationFloat32toFloat16; + + /** + * The mapping between extension names and prefixes of operand and + * operation type values. + * + * An operand or operation whose numeric type value is above + * {@link OperandTypeRange::BASE_MAX} or + * {@link OperationTypeRange::BASE_MAX} respectively should be interpreted + * as an extension operand. The low + * {@link Model::ExtensionTypeEncoding::LOW_BITS_TYPE} bits of the value + * correspond to the type ID within the extension and the high + * {@link Model::ExtensionTypeEncoding::HIGH_BITS_PREFIX} bits encode + * the "prefix", which maps uniquely to the extension name. + * + * For example, if a model contains an operation whose value is + * 0xAAAABBBB and extensionNameToPrefix contains an entry with + * prefix=0xAAAA and name="vendor.test.test_extension", then + * the operation should be interpreted as the operation 0xBBBB + * of the extension named vendor.test.test_extension. + * + * This is a one-to-one correspondence. That is, there must be at most one + * prefix corresponding to each extension name and at most one extension + * name corresponding to each prefix. + */ + vec extensionNameToPrefix; + + /** + * A correspondence between an extension name and a prefix of operand and + * operation type values. + */ + struct ExtensionNameAndPrefix { + /** + * The extension name. + * + * See {@link Extension::name} for the format specification. + */ + string name; + + /** + * The unique extension identifier within the model. + * + * See {@link Model::extensionNameToPrefix}. + */ + uint16_t prefix; + }; + + /** + * Numeric values of extension operand and operation types have the + * following structure: + * - 16 high bits represent the "prefix", which corresponds uniquely to the + * extension name. + * - 16 low bits represent the type ID within the extension. + */ + enum ExtensionTypeEncoding : uint8_t { + HIGH_BITS_PREFIX = 16, + LOW_BITS_TYPE = 16, + }; +};