Create NNAPI HAL v1.3 and add TENSOR_QUANT8_ASYMM_SIGNED OperandType

Bug: 137828494 Bug: 139120468 Bug: 136735770 Test: mma Change-Id: I28f74e4b364fec1d7431a96cf5687256b3106069 Merged-In: I28f74e4b364fec1d7431a96cf5687256b3106069 (cherry picked from commit 5a7b67ab8f)
2019-08-08 14:08:31 +01:00 · 2019-08-08 14:08:31 +01:00 · ce8c632a09
commit ce8c632a09
parent f49cfb4462
5 changed files with 556 additions and 0 deletions
--- a/current.txt
+++ b/current.txt
@ -586,6 +586,8 @@ fd65298e1e09e0e3c781ab18305920d757dbe55a3b459ce17814ec5cf6dfee99 android.hardwar
 # HALs released in Android R
 07d0a252b2d8fa35887908a996ba395cf392968395fc30afab791f46e0c22a52 android.hardware.boot@1.1::IBootControl
 74049a402be913963edfdd80828a53736570e9d8124a1bf18166b6ed46a6b0ab android.hardware.boot@1.1::types
 34515afa2bb792d3c6d8495a5f5d907d179c8507ca5e55c10050d02ae1d516ef android.hardware.neuralnetworks@1.3::IDevice
 e2d20d4eb24f40b44a3766d05f77052581cb3f4df35fb48c0cc5d9cdcf5c872e android.hardware.neuralnetworks@1.3::types
 544049dcda3f943ad67d83d5277f06681a3782982a9af5a78b5d4e8d295d061a android.hardware.vibrator@1.4::IVibrator
 5e1c12efbbba89c9143d10b1b90eceff8bc79aa079f5106215b528e104fef101 android.hardware.vibrator@1.4::IVibratorCallback
 033eae03c09ebc75e82db37bc39995dfaa9086745577b44d9e14e9ccb48bd8cc android.hardware.vibrator@1.4::types
--- a/neuralnetworks/1.2/vts/functional/Android.bp
+++ b/neuralnetworks/1.2/vts/functional/Android.bp
@ -37,6 +37,7 @@ cc_test {
        "android.hardware.neuralnetworks@1.0",
        "android.hardware.neuralnetworks@1.1",
        "android.hardware.neuralnetworks@1.2",
        "android.hardware.neuralnetworks@1.3",
        "android.hidl.allocator@1.0",
        "android.hidl.memory@1.0",
        "libgmock",
--- a/neuralnetworks/1.3/Android.bp
+++ b/neuralnetworks/1.3/Android.bp
@ -0,0 +1,21 @@
 // This file is autogenerated by hidl-gen -Landroidbp.
 hidl_interface {
    name: "android.hardware.neuralnetworks@1.3",
    root: "android.hardware",
    vndk: {
        enabled: true,
    },
    srcs: [
        "types.hal",
        "IDevice.hal",
    ],
    interfaces: [
        "android.hardware.neuralnetworks@1.0",
        "android.hardware.neuralnetworks@1.1",
        "android.hardware.neuralnetworks@1.2",
        "android.hidl.base@1.0",
        "android.hidl.safe_union@1.0",
    ],
    gen_java: false,
 }
--- a/neuralnetworks/1.3/IDevice.hal
+++ b/neuralnetworks/1.3/IDevice.hal
@ -0,0 +1,171 @@
 /*
 * Copyright (C) 2019 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package android.hardware.neuralnetworks@1.3;
 import @1.0::ErrorStatus;
 import @1.1::ExecutionPreference;
 import @1.2::Constant;
 import @1.2::DeviceType;
 import @1.2::Extension;
 import @1.2::IDevice;
 import @1.2::IPreparedModelCallback;
 /**
 * This interface represents a device driver.
 */
 interface IDevice extends @1.2::IDevice {
    /**
     * Gets the capabilities of a driver.
     *
     * @return status Error status of the call, must be:
     *                - NONE if successful
     *                - DEVICE_UNAVAILABLE if driver is offline or busy
     *                - GENERAL_FAILURE if there is an unspecified error
     * @return capabilities Capabilities of the driver.
     */
    getCapabilities_1_3() generates (ErrorStatus status, Capabilities capabilities);
    /**
     * Gets the supported operations in a model.
     *
     * getSupportedOperations indicates which operations of a model are fully
     * supported by the vendor driver. If an operation may not be supported for
     * any reason, getSupportedOperations must return false for that operation.
     *
     * @param model A model whose operations--and their corresponding operands--
     *     are to be verified by the driver.
     * @return status Error status of the call, must be:
     *     - NONE if successful
     *     - DEVICE_UNAVAILABLE if driver is offline or busy
     *     - GENERAL_FAILURE if there is an unspecified error
     *     - INVALID_ARGUMENT if provided model is invalid
     * @return supportedOperations A list of supported operations, where true
     *     indicates the operation is supported and false indicates the
     *     operation is not supported. The index of "supported" corresponds with
     *     the index of the operation it is describing.
     */
    getSupportedOperations_1_3(Model model)
        generates (ErrorStatus status, vec<bool> supportedOperations);
    /**
     * Asynchronously creates a prepared model for execution and optionally
     * saves it into cache files.
     *
     * prepareModel is used to make any necessary transformations to or
     * alternative representations to a model for execution, possibly including
     * transformations on the constant data, optimization on the model's graph,
     * or compilation into the device's native binary format. The model itself
     * is not changed.
     *
     * Optionally, caching information may be provided for the driver to save
     * the prepared model to cache files for faster model compilation time when
     * the same model preparation is requested in the future. There are two
     * types of cache file handles provided to the driver: model cache and data
     * cache. For more information on the two types of cache handles, refer to
     * getNumberOfCacheFilesNeeded.
     *
     * The file descriptors must be opened with read and write permission. A
     * file may have any size, and the corresponding file descriptor may have
     * any offset. The driver must truncate a file to zero size before writing
     * to that file. The file descriptors may be closed by the client once the
     * asynchronous preparation has finished. The driver must dup a file
     * descriptor if it wants to get access to the cache file later.
     *
     * The model is prepared asynchronously with respect to the caller. The
     * prepareModel function must verify the inputs to the preparedModel
     * function related to preparing the model (as opposed to saving the
     * prepared model to cache) are correct. If there is an error, prepareModel
     * must immediately invoke the callback with the appropriate ErrorStatus
     * value and nullptr for the IPreparedModel, then return with the same
     * ErrorStatus. If the inputs to the prepareModel function that are related
     * to preparing the model are valid and there is no error, prepareModel must
     * launch an asynchronous task to prepare the model in the background, and
     * immediately return from prepareModel with ErrorStatus::NONE. If the
     * asynchronous task fails to launch, prepareModel must immediately invoke
     * the callback with ErrorStatus::GENERAL_FAILURE and nullptr for the
     * IPreparedModel, then return with ErrorStatus::GENERAL_FAILURE.
     *
     * When the asynchronous task has finished preparing the model, it must
     * immediately invoke the callback function provided as an input to
     * prepareModel. If the model was prepared successfully, the callback object
     * must be invoked with an error status of ErrorStatus::NONE and the
     * produced IPreparedModel object. If an error occurred preparing the model,
     * the callback object must be invoked with the appropriate ErrorStatus
     * value and nullptr for the IPreparedModel.
     *
     * Optionally, the driver may save the prepared model to cache during the
     * asynchronous preparation. Any error that occurs when saving to cache must
     * not affect the status of preparing the model. Even if the input arguments
     * related to the cache may be invalid, or the driver may fail to save to
     * cache, the prepareModel function must finish preparing the model. The
     * driver may choose not to save to cache even if the caching information is
     * provided and valid.
     *
     * The only information that may be unknown to the model at this stage is
     * the shape of the tensors, which may only be known at execution time. As
     * such, some driver services may return partially prepared models, where
     * the prepared model may only be finished when it is paired with a set of
     * inputs to the model. Note that the same prepared model object may be used
     * with different shapes of inputs on different (possibly concurrent)
     * executions.
     *
     * Multiple threads may call prepareModel on the same model concurrently.
     *
     * @param model The model to be prepared for execution.
     * @param preference Indicates the intended execution behavior of a prepared
     *     model.
     * @param modelCache A vector of handles with each entry holding exactly one
     *     cache file descriptor for the security-sensitive cache. The length of
     *     the vector must either be 0 indicating that caching information is
     *     not provided, or match the numModelCache returned from
     *     getNumberOfCacheFilesNeeded. The cache handles will be provided in
     *     the same order when retrieving the preparedModel from cache files
     *     with prepareModelFromCache.
     * @param dataCache A vector of handles with each entry holding exactly one
     *     cache file descriptor for the constants' cache. The length of the
     *     vector must either be 0 indicating that caching information is not
     *     provided, or match the numDataCache returned from
     *     getNumberOfCacheFilesNeeded. The cache handles will be provided in
     *     the same order when retrieving the preparedModel from cache files
     *     with prepareModelFromCache.
     * @param token A caching token of length Constant::BYTE_SIZE_OF_CACHE_TOKEN
     *     identifying the prepared model. The same token will be provided when
     *     retrieving the prepared model from the cache files with
     *     prepareModelFromCache.  Tokens should be chosen to have a low rate of
     *     collision for a particular application. The driver cannot detect a
     *     collision; a collision will result in a failed execution or in a
     *     successful execution that produces incorrect output values. If both
     *     modelCache and dataCache are empty indicating that caching
     *     information is not provided, this token must be ignored.
     * @param callback A callback object used to return the error status of
     *     preparing the model for execution and the prepared model if
     *     successful, nullptr otherwise. The callback object's notify function
     *     must be called exactly once, even if the model could not be prepared.
     * @return status Error status of launching a task which prepares the model
     *     in the background; must be:
     *     - NONE if preparation task is successfully launched
     *     - DEVICE_UNAVAILABLE if driver is offline or busy
     *     - GENERAL_FAILURE if there is an unspecified error
     *     - INVALID_ARGUMENT if one of the input arguments related to preparing
     *       the model is invalid
     */
    prepareModel_1_3(Model model, ExecutionPreference preference,
                     vec<handle> modelCache, vec<handle> dataCache,
                     uint8_t[Constant:BYTE_SIZE_OF_CACHE_TOKEN] token,
                     IPreparedModelCallback callback)
        generates (ErrorStatus status);
 };
--- a/neuralnetworks/1.3/types.hal
+++ b/neuralnetworks/1.3/types.hal
@ -0,0 +1,361 @@
 /*
 * Copyright (C) 2019 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package android.hardware.neuralnetworks@1.3;
 import @1.0::DataLocation;
 import @1.0::OperandLifeTime;
 import @1.0::PerformanceInfo;
 import @1.2::OperandType;
 import @1.2::OperationType;
 import @1.2::SymmPerChannelQuantParams;
 import android.hidl.safe_union@1.0::Monostate;
 /**
 * NOTE: Since NNAPI 1.2, OEM operation and data type are deprecated. Extensions
 * are the preferred alternative.
 *
 * NOTE: Adding a new fundamental type requires updating the value of
 * OperandTypeRange::FUNDAMENTAL_MAX.
 */
 enum OperandType : @1.2::OperandType {
    /**
     * A tensor of 8 bit signed integers that represent real numbers.
     *
     * Attached to this tensor are two numbers that can be used to convert the
     * 8 bit integer to the real value and vice versa. These two numbers are:
     * - scale: a 32 bit floating point value greater than zero.
     * - zeroPoint: a 32 bit integer, in range [-128, 127].
     *
     * The formula is:
     * real_value = (integer_value - zeroPoint) * scale.
     *
     * Available since API level 30.
     */
    TENSOR_QUANT8_ASYMM_SIGNED = 14,
 };
 /**
 * The range of operand values in the OperandType enum.
 */
 enum OperandTypeRange : uint32_t {
    BASE_MIN        = 0,
    FUNDAMENTAL_MIN = 0,
    FUNDAMENTAL_MAX = 14,
    OEM_MIN         = 10000,
    OEM_MAX         = 10001,
    BASE_MAX        = 0xFFFF,
 };
 /**
 * The capabilities of a driver.
 *
 * Performance of an operation comes from the type of its first operand.
 * This represents performance for non extension operand types.
 */
 struct Capabilities {
    /**
     * Driver performance when operating on float32 data but performing
     * calculations with range and/or precision as low as that of the IEEE
     * 754 16-bit floating-point format.
     */
    PerformanceInfo relaxedFloat32toFloat16PerformanceScalar;
    PerformanceInfo relaxedFloat32toFloat16PerformanceTensor;
    /**
     * Driver performance when operating on a particular data type.
     * In the case of float32 data, this is used when the calculations
     * are not relaxed.
     */
    struct OperandPerformance {
        OperandType type;
        PerformanceInfo info;
    };
    /**
     * Performance by operand type. Must be sorted by OperandType.
     * If a particular OperandType is not present in operandPerformance,
     * its performance is treated as
     * { .execTime = FLT_MAX, .powerUsage = FLT_MAX }.
     */
    vec<OperandPerformance> operandPerformance;
 };
 /**
 * Describes one operand of the model's graph.
 */
 struct Operand {
    /**
     * The data type.
     *
     * Besides the values listed in {@link OperandType}, any value above
     * {@link OperandTypeRange::BASE_MAX} is possible and should be interpreted
     * as an extension type according to {@link Model::extensionNameToPrefix}.
     */
    OperandType type;
    /**
     * Dimensions of the operand.
     *
     * For a scalar operand, dimensions.size() must be 0.
     *
     * A tensor operand with all dimensions specified has "fully
     * specified" dimensions. Whenever possible (i.e., whenever the
     * dimensions are known at model construction time), a tensor
     * operand should have (but is not required to have) fully
     * specified dimensions, in order to enable the best possible
     * performance.
     *
     * If a tensor operand's dimensions are not fully specified, the
     * dimensions of the operand are deduced from the operand
     * dimensions and values of the operation for which that operand
     * is an output.
     *
     * In the following situations, a tensor operand's dimensions must
     * be fully specified:
     *
     *     . The operand has lifetime CONSTANT_COPY or
     *       CONSTANT_REFERENCE.
     *
     *     . The operand has lifetime MODEL_INPUT. Fully
     *       specified dimensions must either be present in the
     *       Operand or they must be provided in the corresponding
     *       RequestArgument.
     *       EXCEPTION: If the input is optional and omitted
     *       (by setting the hasNoValue field of the corresponding
     *       RequestArgument to true) then it need not have fully
     *       specified dimensions.
     *
     * A tensor operand with some number of unspecified dimensions is
     * represented by setting each unspecified dimension to 0.
     *
     * A tensor operand with unspecified rank is represented by providing
     * an empty dimensions vector.
     */
    vec<uint32_t> dimensions;
    /**
     * The number of times this operand appears as an operation input.
     *
     * (For example, if this operand appears once in one operation's
     * input list, and three times in another operation's input list,
     * then numberOfConsumers = 4.)
     */
    uint32_t numberOfConsumers;
    /**
     * Quantized scale of the operand.
     *
     * Only applicable if the operand is of type TENSOR_QUANT8_ASYMM or
     * TENSOR_INT32.
     */
    float scale;
    /**
     * Quantized zero-point offset of the operand.
     *
     * Only applicable if the operand is of type TENSOR_QUANT8_ASYMM.
     */
    int32_t zeroPoint;
    /**
     * How the operand is used.
     */
    OperandLifeTime lifetime;
    /**
     * Where to find the data for this operand.
     * If the lifetime is TEMPORARY_VARIABLE, MODEL_INPUT, MODEL_OUTPUT, or
     * NO_VALUE:
     * - All the fields must be 0.
     * If the lifetime is CONSTANT_COPY:
     * - location.poolIndex is 0.
     * - location.offset is the offset in bytes into Model.operandValues.
     * - location.length is set.
     * If the lifetime is CONSTANT_REFERENCE:
     * - location.poolIndex is set.
     * - location.offset is the offset in bytes into the specified pool.
     * - location.length is set.
     */
    DataLocation location;
    /**
     * Additional parameters specific to a particular operand type.
     */
    safe_union ExtraParams {
       /**
        * No additional parameters.
        */
       Monostate none;
       /**
        * Symmetric per-channel quantization parameters.
        *
        * Only applicable to operands of type TENSOR_QUANT8_SYMM_PER_CHANNEL.
        */
       SymmPerChannelQuantParams channelQuant;
       /**
        * Extension operand parameters.
        *
        * The framework treats this as an opaque data blob.
        * The format is up to individual extensions.
        */
       vec<uint8_t> extension;
    } extraParams;
 };
 /**
 * Describes one operation of the model's graph.
 */
 struct Operation {
    /**
     * The operation type.
     */
    OperationType type;
    /**
     * Describes the table that contains the indexes of the inputs of the
     * operation. The offset is the index in the operandIndexes table.
     */
    vec<uint32_t> inputs;
    /**
     * Describes the table that contains the indexes of the outputs of the
     * operation. The offset is the index in the operandIndexes table.
     */
    vec<uint32_t> outputs;
 };
 /**
 * A Neural Network Model.
 *
 * This includes not only the execution graph, but also constant data such as
 * weights or scalars added at construction time. The only information that
 * may not be known is the shape of the input tensors.
 */
 struct Model {
    /**
     * All operands included in the model.
     */
    vec<Operand> operands;
    /**
     * All operations included in the model.
     *
     * The operations are sorted into execution order. Every operand
     * with lifetime MODEL_OUTPUT or TEMPORARY_VARIABLE must be
     * written before it is read.
     */
    vec<Operation> operations;
    /**
     * Input indexes of the model. There must be at least one.
     *
     * Each value corresponds to the index of the operand in "operands".
     */
    vec<uint32_t> inputIndexes;
    /**
     * Output indexes of the model. There must be at least one.
     *
     * Each value corresponds to the index of the operand in "operands".
     */
    vec<uint32_t> outputIndexes;
    /**
     * A byte buffer containing operand data that were copied into the model.
     *
     * An operand's value must be located here if and only if Operand::lifetime
     * equals OperandLifeTime::CONSTANT_COPY.
     */
    vec<uint8_t> operandValues;
    /**
     * A collection of shared memory pools containing operand values.
     *
     * An operand's value must be located here if and only if Operand::lifetime
     * equals OperandLifeTime::CONSTANT_REFERENCE.
     */
    vec<memory> pools;
    /**
     * 'true' indicates TENSOR_FLOAT32 may be calculated with range and/or
     * precision as low as that of the IEEE 754 16-bit floating-point format.
     * 'false' indicates TENSOR_FLOAT32 must be calculated using at least the
     * range and precision of the IEEE 754 32-bit floating-point format.
     */
    bool relaxComputationFloat32toFloat16;
    /**
     * The mapping between extension names and prefixes of operand and
     * operation type values.
     *
     * An operand or operation whose numeric type value is above
     * {@link OperandTypeRange::BASE_MAX} or
     * {@link OperationTypeRange::BASE_MAX} respectively should be interpreted
     * as an extension operand. The low
     * {@link Model::ExtensionTypeEncoding::LOW_BITS_TYPE} bits of the value
     * correspond to the type ID within the extension and the high
     * {@link Model::ExtensionTypeEncoding::HIGH_BITS_PREFIX} bits encode
     * the "prefix", which maps uniquely to the extension name.
     *
     * For example, if a model contains an operation whose value is
     * 0xAAAABBBB and extensionNameToPrefix contains an entry with
     * prefix=0xAAAA and name="vendor.test.test_extension", then
     * the operation should be interpreted as the operation 0xBBBB
     * of the extension named vendor.test.test_extension.
     *
     * This is a one-to-one correspondence. That is, there must be at most one
     * prefix corresponding to each extension name and at most one extension
     * name corresponding to each prefix.
     */
    vec<ExtensionNameAndPrefix> extensionNameToPrefix;
    /**
     * A correspondence between an extension name and a prefix of operand and
     * operation type values.
     */
    struct ExtensionNameAndPrefix {
        /**
         * The extension name.
         *
         * See {@link Extension::name} for the format specification.
         */
        string name;
        /**
         * The unique extension identifier within the model.
         *
         * See {@link Model::extensionNameToPrefix}.
         */
        uint16_t prefix;
    };
    /**
     * Numeric values of extension operand and operation types have the
     * following structure:
     * - 16 high bits represent the "prefix", which corresponds uniquely to the
     *   extension name.
     * - 16 low bits represent the type ID within the extension.
     */
    enum ExtensionTypeEncoding : uint8_t {
        HIGH_BITS_PREFIX = 16,
        LOW_BITS_TYPE = 16,
    };
 };