Create NNAPI HAL v1.3 and add TENSOR_QUANT8_ASYMM_SIGNED OperandType

Bug: 137828494 Bug: 139120468 Bug: 136735770 Test: mma Change-Id: I28f74e4b364fec1d7431a96cf5687256b3106069 Merged-In: I28f74e4b364fec1d7431a96cf5687256b3106069 (cherry picked from commit 5a7b67ab8f)
2019-08-08 14:08:31 +01:00 · 2019-08-08 14:08:31 +01:00 · ce8c632a09
commit ce8c632a09
parent f49cfb4462
5 changed files with 556 additions and 0 deletions
--- a/current.txt
+++ b/current.txt
@ -586,6 +586,8 @@ fd65298e1e09e0e3c781ab18305920d757dbe55a3b459ce17814ec5cf6dfee99 android.hardwar
 # HALs released in Android R
 07d0a252b2d8fa35887908a996ba395cf392968395fc30afab791f46e0c22a52 android.hardware.boot@1.1::IBootControl
 74049a402be913963edfdd80828a53736570e9d8124a1bf18166b6ed46a6b0ab android.hardware.boot@1.1::types
+34515afa2bb792d3c6d8495a5f5d907d179c8507ca5e55c10050d02ae1d516ef android.hardware.neuralnetworks@1.3::IDevice
+e2d20d4eb24f40b44a3766d05f77052581cb3f4df35fb48c0cc5d9cdcf5c872e android.hardware.neuralnetworks@1.3::types
 544049dcda3f943ad67d83d5277f06681a3782982a9af5a78b5d4e8d295d061a android.hardware.vibrator@1.4::IVibrator
 5e1c12efbbba89c9143d10b1b90eceff8bc79aa079f5106215b528e104fef101 android.hardware.vibrator@1.4::IVibratorCallback
 033eae03c09ebc75e82db37bc39995dfaa9086745577b44d9e14e9ccb48bd8cc android.hardware.vibrator@1.4::types
--- a/neuralnetworks/1.2/vts/functional/Android.bp
+++ b/neuralnetworks/1.2/vts/functional/Android.bp
@ -37,6 +37,7 @@ cc_test {
        "android.hardware.neuralnetworks@1.0",
        "android.hardware.neuralnetworks@1.1",
        "android.hardware.neuralnetworks@1.2",
+        "android.hardware.neuralnetworks@1.3",
        "android.hidl.allocator@1.0",
        "android.hidl.memory@1.0",
        "libgmock",
--- a/neuralnetworks/1.3/Android.bp
+++ b/neuralnetworks/1.3/Android.bp
@ -0,0 +1,21 @@
+// This file is autogenerated by hidl-gen -Landroidbp.
+
+hidl_interface {
+    name: "android.hardware.neuralnetworks@1.3",
+    root: "android.hardware",
+    vndk: {
+        enabled: true,
+    },
+    srcs: [
+        "types.hal",
+        "IDevice.hal",
+    ],
+    interfaces: [
+        "android.hardware.neuralnetworks@1.0",
+        "android.hardware.neuralnetworks@1.1",
+        "android.hardware.neuralnetworks@1.2",
+        "android.hidl.base@1.0",
+        "android.hidl.safe_union@1.0",
+    ],
+    gen_java: false,
+}
--- a/neuralnetworks/1.3/IDevice.hal
+++ b/neuralnetworks/1.3/IDevice.hal
@ -0,0 +1,171 @@
+/*
+ * Copyright (C) 2019 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package android.hardware.neuralnetworks@1.3;
+
+import @1.0::ErrorStatus;
+import @1.1::ExecutionPreference;
+import @1.2::Constant;
+import @1.2::DeviceType;
+import @1.2::Extension;
+import @1.2::IDevice;
+import @1.2::IPreparedModelCallback;
+
+/**
+ * This interface represents a device driver.
+ */
+interface IDevice extends @1.2::IDevice {
+    /**
+     * Gets the capabilities of a driver.
+     *
+     * @return status Error status of the call, must be:
+     *                - NONE if successful
+     *                - DEVICE_UNAVAILABLE if driver is offline or busy
+     *                - GENERAL_FAILURE if there is an unspecified error
+     * @return capabilities Capabilities of the driver.
+     */
+    getCapabilities_1_3() generates (ErrorStatus status, Capabilities capabilities);
+
+    /**
+     * Gets the supported operations in a model.
+     *
+     * getSupportedOperations indicates which operations of a model are fully
+     * supported by the vendor driver. If an operation may not be supported for
+     * any reason, getSupportedOperations must return false for that operation.
+     *
+     * @param model A model whose operations--and their corresponding operands--
+     *     are to be verified by the driver.
+     * @return status Error status of the call, must be:
+     *     - NONE if successful
+     *     - DEVICE_UNAVAILABLE if driver is offline or busy
+     *     - GENERAL_FAILURE if there is an unspecified error
+     *     - INVALID_ARGUMENT if provided model is invalid
+     * @return supportedOperations A list of supported operations, where true
+     *     indicates the operation is supported and false indicates the
+     *     operation is not supported. The index of "supported" corresponds with
+     *     the index of the operation it is describing.
+     */
+    getSupportedOperations_1_3(Model model)
+        generates (ErrorStatus status, vec<bool> supportedOperations);
+
+    /**
+     * Asynchronously creates a prepared model for execution and optionally
+     * saves it into cache files.
+     *
+     * prepareModel is used to make any necessary transformations to or
+     * alternative representations to a model for execution, possibly including
+     * transformations on the constant data, optimization on the model's graph,
+     * or compilation into the device's native binary format. The model itself
+     * is not changed.
+     *
+     * Optionally, caching information may be provided for the driver to save
+     * the prepared model to cache files for faster model compilation time when
+     * the same model preparation is requested in the future. There are two
+     * types of cache file handles provided to the driver: model cache and data
+     * cache. For more information on the two types of cache handles, refer to
+     * getNumberOfCacheFilesNeeded.
+     *
+     * The file descriptors must be opened with read and write permission. A
+     * file may have any size, and the corresponding file descriptor may have
+     * any offset. The driver must truncate a file to zero size before writing
+     * to that file. The file descriptors may be closed by the client once the
+     * asynchronous preparation has finished. The driver must dup a file
+     * descriptor if it wants to get access to the cache file later.
+     *
+     * The model is prepared asynchronously with respect to the caller. The
+     * prepareModel function must verify the inputs to the preparedModel
+     * function related to preparing the model (as opposed to saving the
+     * prepared model to cache) are correct. If there is an error, prepareModel
+     * must immediately invoke the callback with the appropriate ErrorStatus
+     * value and nullptr for the IPreparedModel, then return with the same
+     * ErrorStatus. If the inputs to the prepareModel function that are related
+     * to preparing the model are valid and there is no error, prepareModel must
+     * launch an asynchronous task to prepare the model in the background, and
+     * immediately return from prepareModel with ErrorStatus::NONE. If the
+     * asynchronous task fails to launch, prepareModel must immediately invoke
+     * the callback with ErrorStatus::GENERAL_FAILURE and nullptr for the
+     * IPreparedModel, then return with ErrorStatus::GENERAL_FAILURE.
+     *
+     * When the asynchronous task has finished preparing the model, it must
+     * immediately invoke the callback function provided as an input to
+     * prepareModel. If the model was prepared successfully, the callback object
+     * must be invoked with an error status of ErrorStatus::NONE and the
+     * produced IPreparedModel object. If an error occurred preparing the model,
+     * the callback object must be invoked with the appropriate ErrorStatus
+     * value and nullptr for the IPreparedModel.
+     *
+     * Optionally, the driver may save the prepared model to cache during the
+     * asynchronous preparation. Any error that occurs when saving to cache must
+     * not affect the status of preparing the model. Even if the input arguments
+     * related to the cache may be invalid, or the driver may fail to save to
+     * cache, the prepareModel function must finish preparing the model. The
+     * driver may choose not to save to cache even if the caching information is
+     * provided and valid.
+     *
+     * The only information that may be unknown to the model at this stage is
+     * the shape of the tensors, which may only be known at execution time. As
+     * such, some driver services may return partially prepared models, where
+     * the prepared model may only be finished when it is paired with a set of
+     * inputs to the model. Note that the same prepared model object may be used
+     * with different shapes of inputs on different (possibly concurrent)
+     * executions.
+     *
+     * Multiple threads may call prepareModel on the same model concurrently.
+     *
+     * @param model The model to be prepared for execution.
+     * @param preference Indicates the intended execution behavior of a prepared
+     *     model.
+     * @param modelCache A vector of handles with each entry holding exactly one
+     *     cache file descriptor for the security-sensitive cache. The length of
+     *     the vector must either be 0 indicating that caching information is
+     *     not provided, or match the numModelCache returned from
+     *     getNumberOfCacheFilesNeeded. The cache handles will be provided in
+     *     the same order when retrieving the preparedModel from cache files
+     *     with prepareModelFromCache.
+     * @param dataCache A vector of handles with each entry holding exactly one
+     *     cache file descriptor for the constants' cache. The length of the
+     *     vector must either be 0 indicating that caching information is not
+     *     provided, or match the numDataCache returned from
+     *     getNumberOfCacheFilesNeeded. The cache handles will be provided in
+     *     the same order when retrieving the preparedModel from cache files
+     *     with prepareModelFromCache.
+     * @param token A caching token of length Constant::BYTE_SIZE_OF_CACHE_TOKEN
+     *     identifying the prepared model. The same token will be provided when
+     *     retrieving the prepared model from the cache files with
+     *     prepareModelFromCache.  Tokens should be chosen to have a low rate of
+     *     collision for a particular application. The driver cannot detect a
+     *     collision; a collision will result in a failed execution or in a
+     *     successful execution that produces incorrect output values. If both
+     *     modelCache and dataCache are empty indicating that caching
+     *     information is not provided, this token must be ignored.
+     * @param callback A callback object used to return the error status of
+     *     preparing the model for execution and the prepared model if
+     *     successful, nullptr otherwise. The callback object's notify function
+     *     must be called exactly once, even if the model could not be prepared.
+     * @return status Error status of launching a task which prepares the model
+     *     in the background; must be:
+     *     - NONE if preparation task is successfully launched
+     *     - DEVICE_UNAVAILABLE if driver is offline or busy
+     *     - GENERAL_FAILURE if there is an unspecified error
+     *     - INVALID_ARGUMENT if one of the input arguments related to preparing
+     *       the model is invalid
+     */
+    prepareModel_1_3(Model model, ExecutionPreference preference,
+                     vec<handle> modelCache, vec<handle> dataCache,
+                     uint8_t[Constant:BYTE_SIZE_OF_CACHE_TOKEN] token,
+                     IPreparedModelCallback callback)
+        generates (ErrorStatus status);
+};
--- a/neuralnetworks/1.3/types.hal
+++ b/neuralnetworks/1.3/types.hal
@ -0,0 +1,361 @@
+/*
+ * Copyright (C) 2019 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package android.hardware.neuralnetworks@1.3;
+
+import @1.0::DataLocation;
+import @1.0::OperandLifeTime;
+import @1.0::PerformanceInfo;
+import @1.2::OperandType;
+import @1.2::OperationType;
+import @1.2::SymmPerChannelQuantParams;
+
+import android.hidl.safe_union@1.0::Monostate;
+
+/**
+ * NOTE: Since NNAPI 1.2, OEM operation and data type are deprecated. Extensions
+ * are the preferred alternative.
+ *
+ * NOTE: Adding a new fundamental type requires updating the value of
+ * OperandTypeRange::FUNDAMENTAL_MAX.
+ */
+enum OperandType : @1.2::OperandType {
+    /**
+     * A tensor of 8 bit signed integers that represent real numbers.
+     *
+     * Attached to this tensor are two numbers that can be used to convert the
+     * 8 bit integer to the real value and vice versa. These two numbers are:
+     * - scale: a 32 bit floating point value greater than zero.
+     * - zeroPoint: a 32 bit integer, in range [-128, 127].
+     *
+     * The formula is:
+     * real_value = (integer_value - zeroPoint) * scale.
+     *
+     * Available since API level 30.
+     */
+    TENSOR_QUANT8_ASYMM_SIGNED = 14,
+};
+
+/**
+ * The range of operand values in the OperandType enum.
+ */
+enum OperandTypeRange : uint32_t {
+    BASE_MIN        = 0,
+    FUNDAMENTAL_MIN = 0,
+    FUNDAMENTAL_MAX = 14,
+    OEM_MIN         = 10000,
+    OEM_MAX         = 10001,
+    BASE_MAX        = 0xFFFF,
+};
+
+
+/**
+ * The capabilities of a driver.
+ *
+ * Performance of an operation comes from the type of its first operand.
+ * This represents performance for non extension operand types.
+ */
+struct Capabilities {
+    /**
+     * Driver performance when operating on float32 data but performing
+     * calculations with range and/or precision as low as that of the IEEE
+     * 754 16-bit floating-point format.
+     */
+    PerformanceInfo relaxedFloat32toFloat16PerformanceScalar;
+    PerformanceInfo relaxedFloat32toFloat16PerformanceTensor;
+
+    /**
+     * Driver performance when operating on a particular data type.
+     * In the case of float32 data, this is used when the calculations
+     * are not relaxed.
+     */
+    struct OperandPerformance {
+        OperandType type;
+        PerformanceInfo info;
+    };
+
+    /**
+     * Performance by operand type. Must be sorted by OperandType.
+     * If a particular OperandType is not present in operandPerformance,
+     * its performance is treated as
+     * { .execTime = FLT_MAX, .powerUsage = FLT_MAX }.
+     */
+    vec<OperandPerformance> operandPerformance;
+};
+
+/**
+ * Describes one operand of the model's graph.
+ */
+struct Operand {
+    /**
+     * The data type.
+     *
+     * Besides the values listed in {@link OperandType}, any value above
+     * {@link OperandTypeRange::BASE_MAX} is possible and should be interpreted
+     * as an extension type according to {@link Model::extensionNameToPrefix}.
+     */
+    OperandType type;
+
+    /**
+     * Dimensions of the operand.
+     *
+     * For a scalar operand, dimensions.size() must be 0.
+     *
+     * A tensor operand with all dimensions specified has "fully
+     * specified" dimensions. Whenever possible (i.e., whenever the
+     * dimensions are known at model construction time), a tensor
+     * operand should have (but is not required to have) fully
+     * specified dimensions, in order to enable the best possible
+     * performance.
+     *
+     * If a tensor operand's dimensions are not fully specified, the
+     * dimensions of the operand are deduced from the operand
+     * dimensions and values of the operation for which that operand
+     * is an output.
+     *
+     * In the following situations, a tensor operand's dimensions must
+     * be fully specified:
+     *
+     *     . The operand has lifetime CONSTANT_COPY or
+     *       CONSTANT_REFERENCE.
+     *
+     *     . The operand has lifetime MODEL_INPUT. Fully
+     *       specified dimensions must either be present in the
+     *       Operand or they must be provided in the corresponding
+     *       RequestArgument.
+     *       EXCEPTION: If the input is optional and omitted
+     *       (by setting the hasNoValue field of the corresponding
+     *       RequestArgument to true) then it need not have fully
+     *       specified dimensions.
+     *
+     * A tensor operand with some number of unspecified dimensions is
+     * represented by setting each unspecified dimension to 0.
+     *
+     * A tensor operand with unspecified rank is represented by providing
+     * an empty dimensions vector.
+     */
+    vec<uint32_t> dimensions;
+
+    /**
+     * The number of times this operand appears as an operation input.
+     *
+     * (For example, if this operand appears once in one operation's
+     * input list, and three times in another operation's input list,
+     * then numberOfConsumers = 4.)
+     */
+    uint32_t numberOfConsumers;
+
+    /**
+     * Quantized scale of the operand.
+     *
+     * Only applicable if the operand is of type TENSOR_QUANT8_ASYMM or
+     * TENSOR_INT32.
+     */
+    float scale;
+
+    /**
+     * Quantized zero-point offset of the operand.
+     *
+     * Only applicable if the operand is of type TENSOR_QUANT8_ASYMM.
+     */
+    int32_t zeroPoint;
+
+    /**
+     * How the operand is used.
+     */
+    OperandLifeTime lifetime;
+
+    /**
+     * Where to find the data for this operand.
+     * If the lifetime is TEMPORARY_VARIABLE, MODEL_INPUT, MODEL_OUTPUT, or
+     * NO_VALUE:
+     * - All the fields must be 0.
+     * If the lifetime is CONSTANT_COPY:
+     * - location.poolIndex is 0.
+     * - location.offset is the offset in bytes into Model.operandValues.
+     * - location.length is set.
+     * If the lifetime is CONSTANT_REFERENCE:
+     * - location.poolIndex is set.
+     * - location.offset is the offset in bytes into the specified pool.
+     * - location.length is set.
+     */
+    DataLocation location;
+
+    /**
+     * Additional parameters specific to a particular operand type.
+     */
+    safe_union ExtraParams {
+       /**
+        * No additional parameters.
+        */
+       Monostate none;
+
+       /**
+        * Symmetric per-channel quantization parameters.
+        *
+        * Only applicable to operands of type TENSOR_QUANT8_SYMM_PER_CHANNEL.
+        */
+       SymmPerChannelQuantParams channelQuant;
+
+       /**
+        * Extension operand parameters.
+        *
+        * The framework treats this as an opaque data blob.
+        * The format is up to individual extensions.
+        */
+       vec<uint8_t> extension;
+    } extraParams;
+};
+
+/**
+ * Describes one operation of the model's graph.
+ */
+struct Operation {
+    /**
+     * The operation type.
+     */
+    OperationType type;
+
+    /**
+     * Describes the table that contains the indexes of the inputs of the
+     * operation. The offset is the index in the operandIndexes table.
+     */
+    vec<uint32_t> inputs;
+
+    /**
+     * Describes the table that contains the indexes of the outputs of the
+     * operation. The offset is the index in the operandIndexes table.
+     */
+    vec<uint32_t> outputs;
+};
+
+/**
+ * A Neural Network Model.
+ *
+ * This includes not only the execution graph, but also constant data such as
+ * weights or scalars added at construction time. The only information that
+ * may not be known is the shape of the input tensors.
+ */
+struct Model {
+    /**
+     * All operands included in the model.
+     */
+    vec<Operand> operands;
+
+    /**
+     * All operations included in the model.
+     *
+     * The operations are sorted into execution order. Every operand
+     * with lifetime MODEL_OUTPUT or TEMPORARY_VARIABLE must be
+     * written before it is read.
+     */
+    vec<Operation> operations;
+
+    /**
+     * Input indexes of the model. There must be at least one.
+     *
+     * Each value corresponds to the index of the operand in "operands".
+     */
+    vec<uint32_t> inputIndexes;
+
+    /**
+     * Output indexes of the model. There must be at least one.
+     *
+     * Each value corresponds to the index of the operand in "operands".
+     */
+    vec<uint32_t> outputIndexes;
+
+    /**
+     * A byte buffer containing operand data that were copied into the model.
+     *
+     * An operand's value must be located here if and only if Operand::lifetime
+     * equals OperandLifeTime::CONSTANT_COPY.
+     */
+    vec<uint8_t> operandValues;
+
+    /**
+     * A collection of shared memory pools containing operand values.
+     *
+     * An operand's value must be located here if and only if Operand::lifetime
+     * equals OperandLifeTime::CONSTANT_REFERENCE.
+     */
+    vec<memory> pools;
+
+    /**
+     * 'true' indicates TENSOR_FLOAT32 may be calculated with range and/or
+     * precision as low as that of the IEEE 754 16-bit floating-point format.
+     * 'false' indicates TENSOR_FLOAT32 must be calculated using at least the
+     * range and precision of the IEEE 754 32-bit floating-point format.
+     */
+    bool relaxComputationFloat32toFloat16;
+
+    /**
+     * The mapping between extension names and prefixes of operand and
+     * operation type values.
+     *
+     * An operand or operation whose numeric type value is above
+     * {@link OperandTypeRange::BASE_MAX} or
+     * {@link OperationTypeRange::BASE_MAX} respectively should be interpreted
+     * as an extension operand. The low
+     * {@link Model::ExtensionTypeEncoding::LOW_BITS_TYPE} bits of the value
+     * correspond to the type ID within the extension and the high
+     * {@link Model::ExtensionTypeEncoding::HIGH_BITS_PREFIX} bits encode
+     * the "prefix", which maps uniquely to the extension name.
+     *
+     * For example, if a model contains an operation whose value is
+     * 0xAAAABBBB and extensionNameToPrefix contains an entry with
+     * prefix=0xAAAA and name="vendor.test.test_extension", then
+     * the operation should be interpreted as the operation 0xBBBB
+     * of the extension named vendor.test.test_extension.
+     *
+     * This is a one-to-one correspondence. That is, there must be at most one
+     * prefix corresponding to each extension name and at most one extension
+     * name corresponding to each prefix.
+     */
+    vec<ExtensionNameAndPrefix> extensionNameToPrefix;
+
+    /**
+     * A correspondence between an extension name and a prefix of operand and
+     * operation type values.
+     */
+    struct ExtensionNameAndPrefix {
+        /**
+         * The extension name.
+         *
+         * See {@link Extension::name} for the format specification.
+         */
+        string name;
+
+        /**
+         * The unique extension identifier within the model.
+         *
+         * See {@link Model::extensionNameToPrefix}.
+         */
+        uint16_t prefix;
+    };
+
+    /**
+     * Numeric values of extension operand and operation types have the
+     * following structure:
+     * - 16 high bits represent the "prefix", which corresponds uniquely to the
+     *   extension name.
+     * - 16 low bits represent the type ID within the extension.
+     */
+    enum ExtensionTypeEncoding : uint8_t {
+        HIGH_BITS_PREFIX = 16,
+        LOW_BITS_TYPE = 16,
+    };
+};