Create NNAPI HAL v1.3 and add TENSOR_QUANT8_ASYMM_SIGNED OperandType

Bug: 137828494
Bug: 139120468
Bug: 136735770
Test: mma
Change-Id: I28f74e4b364fec1d7431a96cf5687256b3106069
Merged-In: I28f74e4b364fec1d7431a96cf5687256b3106069
(cherry picked from commit 5a7b67ab8f)
This commit is contained in:
Lev Proleev 2019-08-08 14:08:31 +01:00
parent f49cfb4462
commit ce8c632a09
5 changed files with 556 additions and 0 deletions

View file

@ -586,6 +586,8 @@ fd65298e1e09e0e3c781ab18305920d757dbe55a3b459ce17814ec5cf6dfee99 android.hardwar
# HALs released in Android R # HALs released in Android R
07d0a252b2d8fa35887908a996ba395cf392968395fc30afab791f46e0c22a52 android.hardware.boot@1.1::IBootControl 07d0a252b2d8fa35887908a996ba395cf392968395fc30afab791f46e0c22a52 android.hardware.boot@1.1::IBootControl
74049a402be913963edfdd80828a53736570e9d8124a1bf18166b6ed46a6b0ab android.hardware.boot@1.1::types 74049a402be913963edfdd80828a53736570e9d8124a1bf18166b6ed46a6b0ab android.hardware.boot@1.1::types
34515afa2bb792d3c6d8495a5f5d907d179c8507ca5e55c10050d02ae1d516ef android.hardware.neuralnetworks@1.3::IDevice
e2d20d4eb24f40b44a3766d05f77052581cb3f4df35fb48c0cc5d9cdcf5c872e android.hardware.neuralnetworks@1.3::types
544049dcda3f943ad67d83d5277f06681a3782982a9af5a78b5d4e8d295d061a android.hardware.vibrator@1.4::IVibrator 544049dcda3f943ad67d83d5277f06681a3782982a9af5a78b5d4e8d295d061a android.hardware.vibrator@1.4::IVibrator
5e1c12efbbba89c9143d10b1b90eceff8bc79aa079f5106215b528e104fef101 android.hardware.vibrator@1.4::IVibratorCallback 5e1c12efbbba89c9143d10b1b90eceff8bc79aa079f5106215b528e104fef101 android.hardware.vibrator@1.4::IVibratorCallback
033eae03c09ebc75e82db37bc39995dfaa9086745577b44d9e14e9ccb48bd8cc android.hardware.vibrator@1.4::types 033eae03c09ebc75e82db37bc39995dfaa9086745577b44d9e14e9ccb48bd8cc android.hardware.vibrator@1.4::types

View file

@ -37,6 +37,7 @@ cc_test {
"android.hardware.neuralnetworks@1.0", "android.hardware.neuralnetworks@1.0",
"android.hardware.neuralnetworks@1.1", "android.hardware.neuralnetworks@1.1",
"android.hardware.neuralnetworks@1.2", "android.hardware.neuralnetworks@1.2",
"android.hardware.neuralnetworks@1.3",
"android.hidl.allocator@1.0", "android.hidl.allocator@1.0",
"android.hidl.memory@1.0", "android.hidl.memory@1.0",
"libgmock", "libgmock",

View file

@ -0,0 +1,21 @@
// This file is autogenerated by hidl-gen -Landroidbp.
hidl_interface {
name: "android.hardware.neuralnetworks@1.3",
root: "android.hardware",
vndk: {
enabled: true,
},
srcs: [
"types.hal",
"IDevice.hal",
],
interfaces: [
"android.hardware.neuralnetworks@1.0",
"android.hardware.neuralnetworks@1.1",
"android.hardware.neuralnetworks@1.2",
"android.hidl.base@1.0",
"android.hidl.safe_union@1.0",
],
gen_java: false,
}

View file

@ -0,0 +1,171 @@
/*
* Copyright (C) 2019 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package android.hardware.neuralnetworks@1.3;
import @1.0::ErrorStatus;
import @1.1::ExecutionPreference;
import @1.2::Constant;
import @1.2::DeviceType;
import @1.2::Extension;
import @1.2::IDevice;
import @1.2::IPreparedModelCallback;
/**
* This interface represents a device driver.
*/
interface IDevice extends @1.2::IDevice {
/**
* Gets the capabilities of a driver.
*
* @return status Error status of the call, must be:
* - NONE if successful
* - DEVICE_UNAVAILABLE if driver is offline or busy
* - GENERAL_FAILURE if there is an unspecified error
* @return capabilities Capabilities of the driver.
*/
getCapabilities_1_3() generates (ErrorStatus status, Capabilities capabilities);
/**
* Gets the supported operations in a model.
*
* getSupportedOperations indicates which operations of a model are fully
* supported by the vendor driver. If an operation may not be supported for
* any reason, getSupportedOperations must return false for that operation.
*
* @param model A model whose operations--and their corresponding operands--
* are to be verified by the driver.
* @return status Error status of the call, must be:
* - NONE if successful
* - DEVICE_UNAVAILABLE if driver is offline or busy
* - GENERAL_FAILURE if there is an unspecified error
* - INVALID_ARGUMENT if provided model is invalid
* @return supportedOperations A list of supported operations, where true
* indicates the operation is supported and false indicates the
* operation is not supported. The index of "supported" corresponds with
* the index of the operation it is describing.
*/
getSupportedOperations_1_3(Model model)
generates (ErrorStatus status, vec<bool> supportedOperations);
/**
* Asynchronously creates a prepared model for execution and optionally
* saves it into cache files.
*
* prepareModel is used to make any necessary transformations to or
* alternative representations to a model for execution, possibly including
* transformations on the constant data, optimization on the model's graph,
* or compilation into the device's native binary format. The model itself
* is not changed.
*
* Optionally, caching information may be provided for the driver to save
* the prepared model to cache files for faster model compilation time when
* the same model preparation is requested in the future. There are two
* types of cache file handles provided to the driver: model cache and data
* cache. For more information on the two types of cache handles, refer to
* getNumberOfCacheFilesNeeded.
*
* The file descriptors must be opened with read and write permission. A
* file may have any size, and the corresponding file descriptor may have
* any offset. The driver must truncate a file to zero size before writing
* to that file. The file descriptors may be closed by the client once the
* asynchronous preparation has finished. The driver must dup a file
* descriptor if it wants to get access to the cache file later.
*
* The model is prepared asynchronously with respect to the caller. The
* prepareModel function must verify the inputs to the preparedModel
* function related to preparing the model (as opposed to saving the
* prepared model to cache) are correct. If there is an error, prepareModel
* must immediately invoke the callback with the appropriate ErrorStatus
* value and nullptr for the IPreparedModel, then return with the same
* ErrorStatus. If the inputs to the prepareModel function that are related
* to preparing the model are valid and there is no error, prepareModel must
* launch an asynchronous task to prepare the model in the background, and
* immediately return from prepareModel with ErrorStatus::NONE. If the
* asynchronous task fails to launch, prepareModel must immediately invoke
* the callback with ErrorStatus::GENERAL_FAILURE and nullptr for the
* IPreparedModel, then return with ErrorStatus::GENERAL_FAILURE.
*
* When the asynchronous task has finished preparing the model, it must
* immediately invoke the callback function provided as an input to
* prepareModel. If the model was prepared successfully, the callback object
* must be invoked with an error status of ErrorStatus::NONE and the
* produced IPreparedModel object. If an error occurred preparing the model,
* the callback object must be invoked with the appropriate ErrorStatus
* value and nullptr for the IPreparedModel.
*
* Optionally, the driver may save the prepared model to cache during the
* asynchronous preparation. Any error that occurs when saving to cache must
* not affect the status of preparing the model. Even if the input arguments
* related to the cache may be invalid, or the driver may fail to save to
* cache, the prepareModel function must finish preparing the model. The
* driver may choose not to save to cache even if the caching information is
* provided and valid.
*
* The only information that may be unknown to the model at this stage is
* the shape of the tensors, which may only be known at execution time. As
* such, some driver services may return partially prepared models, where
* the prepared model may only be finished when it is paired with a set of
* inputs to the model. Note that the same prepared model object may be used
* with different shapes of inputs on different (possibly concurrent)
* executions.
*
* Multiple threads may call prepareModel on the same model concurrently.
*
* @param model The model to be prepared for execution.
* @param preference Indicates the intended execution behavior of a prepared
* model.
* @param modelCache A vector of handles with each entry holding exactly one
* cache file descriptor for the security-sensitive cache. The length of
* the vector must either be 0 indicating that caching information is
* not provided, or match the numModelCache returned from
* getNumberOfCacheFilesNeeded. The cache handles will be provided in
* the same order when retrieving the preparedModel from cache files
* with prepareModelFromCache.
* @param dataCache A vector of handles with each entry holding exactly one
* cache file descriptor for the constants' cache. The length of the
* vector must either be 0 indicating that caching information is not
* provided, or match the numDataCache returned from
* getNumberOfCacheFilesNeeded. The cache handles will be provided in
* the same order when retrieving the preparedModel from cache files
* with prepareModelFromCache.
* @param token A caching token of length Constant::BYTE_SIZE_OF_CACHE_TOKEN
* identifying the prepared model. The same token will be provided when
* retrieving the prepared model from the cache files with
* prepareModelFromCache. Tokens should be chosen to have a low rate of
* collision for a particular application. The driver cannot detect a
* collision; a collision will result in a failed execution or in a
* successful execution that produces incorrect output values. If both
* modelCache and dataCache are empty indicating that caching
* information is not provided, this token must be ignored.
* @param callback A callback object used to return the error status of
* preparing the model for execution and the prepared model if
* successful, nullptr otherwise. The callback object's notify function
* must be called exactly once, even if the model could not be prepared.
* @return status Error status of launching a task which prepares the model
* in the background; must be:
* - NONE if preparation task is successfully launched
* - DEVICE_UNAVAILABLE if driver is offline or busy
* - GENERAL_FAILURE if there is an unspecified error
* - INVALID_ARGUMENT if one of the input arguments related to preparing
* the model is invalid
*/
prepareModel_1_3(Model model, ExecutionPreference preference,
vec<handle> modelCache, vec<handle> dataCache,
uint8_t[Constant:BYTE_SIZE_OF_CACHE_TOKEN] token,
IPreparedModelCallback callback)
generates (ErrorStatus status);
};

View file

@ -0,0 +1,361 @@
/*
* Copyright (C) 2019 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package android.hardware.neuralnetworks@1.3;
import @1.0::DataLocation;
import @1.0::OperandLifeTime;
import @1.0::PerformanceInfo;
import @1.2::OperandType;
import @1.2::OperationType;
import @1.2::SymmPerChannelQuantParams;
import android.hidl.safe_union@1.0::Monostate;
/**
* NOTE: Since NNAPI 1.2, OEM operation and data type are deprecated. Extensions
* are the preferred alternative.
*
* NOTE: Adding a new fundamental type requires updating the value of
* OperandTypeRange::FUNDAMENTAL_MAX.
*/
enum OperandType : @1.2::OperandType {
/**
* A tensor of 8 bit signed integers that represent real numbers.
*
* Attached to this tensor are two numbers that can be used to convert the
* 8 bit integer to the real value and vice versa. These two numbers are:
* - scale: a 32 bit floating point value greater than zero.
* - zeroPoint: a 32 bit integer, in range [-128, 127].
*
* The formula is:
* real_value = (integer_value - zeroPoint) * scale.
*
* Available since API level 30.
*/
TENSOR_QUANT8_ASYMM_SIGNED = 14,
};
/**
* The range of operand values in the OperandType enum.
*/
enum OperandTypeRange : uint32_t {
BASE_MIN = 0,
FUNDAMENTAL_MIN = 0,
FUNDAMENTAL_MAX = 14,
OEM_MIN = 10000,
OEM_MAX = 10001,
BASE_MAX = 0xFFFF,
};
/**
* The capabilities of a driver.
*
* Performance of an operation comes from the type of its first operand.
* This represents performance for non extension operand types.
*/
struct Capabilities {
/**
* Driver performance when operating on float32 data but performing
* calculations with range and/or precision as low as that of the IEEE
* 754 16-bit floating-point format.
*/
PerformanceInfo relaxedFloat32toFloat16PerformanceScalar;
PerformanceInfo relaxedFloat32toFloat16PerformanceTensor;
/**
* Driver performance when operating on a particular data type.
* In the case of float32 data, this is used when the calculations
* are not relaxed.
*/
struct OperandPerformance {
OperandType type;
PerformanceInfo info;
};
/**
* Performance by operand type. Must be sorted by OperandType.
* If a particular OperandType is not present in operandPerformance,
* its performance is treated as
* { .execTime = FLT_MAX, .powerUsage = FLT_MAX }.
*/
vec<OperandPerformance> operandPerformance;
};
/**
* Describes one operand of the model's graph.
*/
struct Operand {
/**
* The data type.
*
* Besides the values listed in {@link OperandType}, any value above
* {@link OperandTypeRange::BASE_MAX} is possible and should be interpreted
* as an extension type according to {@link Model::extensionNameToPrefix}.
*/
OperandType type;
/**
* Dimensions of the operand.
*
* For a scalar operand, dimensions.size() must be 0.
*
* A tensor operand with all dimensions specified has "fully
* specified" dimensions. Whenever possible (i.e., whenever the
* dimensions are known at model construction time), a tensor
* operand should have (but is not required to have) fully
* specified dimensions, in order to enable the best possible
* performance.
*
* If a tensor operand's dimensions are not fully specified, the
* dimensions of the operand are deduced from the operand
* dimensions and values of the operation for which that operand
* is an output.
*
* In the following situations, a tensor operand's dimensions must
* be fully specified:
*
* . The operand has lifetime CONSTANT_COPY or
* CONSTANT_REFERENCE.
*
* . The operand has lifetime MODEL_INPUT. Fully
* specified dimensions must either be present in the
* Operand or they must be provided in the corresponding
* RequestArgument.
* EXCEPTION: If the input is optional and omitted
* (by setting the hasNoValue field of the corresponding
* RequestArgument to true) then it need not have fully
* specified dimensions.
*
* A tensor operand with some number of unspecified dimensions is
* represented by setting each unspecified dimension to 0.
*
* A tensor operand with unspecified rank is represented by providing
* an empty dimensions vector.
*/
vec<uint32_t> dimensions;
/**
* The number of times this operand appears as an operation input.
*
* (For example, if this operand appears once in one operation's
* input list, and three times in another operation's input list,
* then numberOfConsumers = 4.)
*/
uint32_t numberOfConsumers;
/**
* Quantized scale of the operand.
*
* Only applicable if the operand is of type TENSOR_QUANT8_ASYMM or
* TENSOR_INT32.
*/
float scale;
/**
* Quantized zero-point offset of the operand.
*
* Only applicable if the operand is of type TENSOR_QUANT8_ASYMM.
*/
int32_t zeroPoint;
/**
* How the operand is used.
*/
OperandLifeTime lifetime;
/**
* Where to find the data for this operand.
* If the lifetime is TEMPORARY_VARIABLE, MODEL_INPUT, MODEL_OUTPUT, or
* NO_VALUE:
* - All the fields must be 0.
* If the lifetime is CONSTANT_COPY:
* - location.poolIndex is 0.
* - location.offset is the offset in bytes into Model.operandValues.
* - location.length is set.
* If the lifetime is CONSTANT_REFERENCE:
* - location.poolIndex is set.
* - location.offset is the offset in bytes into the specified pool.
* - location.length is set.
*/
DataLocation location;
/**
* Additional parameters specific to a particular operand type.
*/
safe_union ExtraParams {
/**
* No additional parameters.
*/
Monostate none;
/**
* Symmetric per-channel quantization parameters.
*
* Only applicable to operands of type TENSOR_QUANT8_SYMM_PER_CHANNEL.
*/
SymmPerChannelQuantParams channelQuant;
/**
* Extension operand parameters.
*
* The framework treats this as an opaque data blob.
* The format is up to individual extensions.
*/
vec<uint8_t> extension;
} extraParams;
};
/**
* Describes one operation of the model's graph.
*/
struct Operation {
/**
* The operation type.
*/
OperationType type;
/**
* Describes the table that contains the indexes of the inputs of the
* operation. The offset is the index in the operandIndexes table.
*/
vec<uint32_t> inputs;
/**
* Describes the table that contains the indexes of the outputs of the
* operation. The offset is the index in the operandIndexes table.
*/
vec<uint32_t> outputs;
};
/**
* A Neural Network Model.
*
* This includes not only the execution graph, but also constant data such as
* weights or scalars added at construction time. The only information that
* may not be known is the shape of the input tensors.
*/
struct Model {
/**
* All operands included in the model.
*/
vec<Operand> operands;
/**
* All operations included in the model.
*
* The operations are sorted into execution order. Every operand
* with lifetime MODEL_OUTPUT or TEMPORARY_VARIABLE must be
* written before it is read.
*/
vec<Operation> operations;
/**
* Input indexes of the model. There must be at least one.
*
* Each value corresponds to the index of the operand in "operands".
*/
vec<uint32_t> inputIndexes;
/**
* Output indexes of the model. There must be at least one.
*
* Each value corresponds to the index of the operand in "operands".
*/
vec<uint32_t> outputIndexes;
/**
* A byte buffer containing operand data that were copied into the model.
*
* An operand's value must be located here if and only if Operand::lifetime
* equals OperandLifeTime::CONSTANT_COPY.
*/
vec<uint8_t> operandValues;
/**
* A collection of shared memory pools containing operand values.
*
* An operand's value must be located here if and only if Operand::lifetime
* equals OperandLifeTime::CONSTANT_REFERENCE.
*/
vec<memory> pools;
/**
* 'true' indicates TENSOR_FLOAT32 may be calculated with range and/or
* precision as low as that of the IEEE 754 16-bit floating-point format.
* 'false' indicates TENSOR_FLOAT32 must be calculated using at least the
* range and precision of the IEEE 754 32-bit floating-point format.
*/
bool relaxComputationFloat32toFloat16;
/**
* The mapping between extension names and prefixes of operand and
* operation type values.
*
* An operand or operation whose numeric type value is above
* {@link OperandTypeRange::BASE_MAX} or
* {@link OperationTypeRange::BASE_MAX} respectively should be interpreted
* as an extension operand. The low
* {@link Model::ExtensionTypeEncoding::LOW_BITS_TYPE} bits of the value
* correspond to the type ID within the extension and the high
* {@link Model::ExtensionTypeEncoding::HIGH_BITS_PREFIX} bits encode
* the "prefix", which maps uniquely to the extension name.
*
* For example, if a model contains an operation whose value is
* 0xAAAABBBB and extensionNameToPrefix contains an entry with
* prefix=0xAAAA and name="vendor.test.test_extension", then
* the operation should be interpreted as the operation 0xBBBB
* of the extension named vendor.test.test_extension.
*
* This is a one-to-one correspondence. That is, there must be at most one
* prefix corresponding to each extension name and at most one extension
* name corresponding to each prefix.
*/
vec<ExtensionNameAndPrefix> extensionNameToPrefix;
/**
* A correspondence between an extension name and a prefix of operand and
* operation type values.
*/
struct ExtensionNameAndPrefix {
/**
* The extension name.
*
* See {@link Extension::name} for the format specification.
*/
string name;
/**
* The unique extension identifier within the model.
*
* See {@link Model::extensionNameToPrefix}.
*/
uint16_t prefix;
};
/**
* Numeric values of extension operand and operation types have the
* following structure:
* - 16 high bits represent the "prefix", which corresponds uniquely to the
* extension name.
* - 16 low bits represent the type ID within the extension.
*/
enum ExtensionTypeEncoding : uint8_t {
HIGH_BITS_PREFIX = 16,
LOW_BITS_TYPE = 16,
};
};