diff --git a/tools/protos/metadata_file.proto b/tools/protos/metadata_file.proto index ac1129abee..47562c580d 100644 --- a/tools/protos/metadata_file.proto +++ b/tools/protos/metadata_file.proto @@ -92,6 +92,8 @@ message ThirdParty { SBOMRef sbom_ref = 10; } + // Identifiers for the package. + repeated Identifier identifier = 11; } // URL associated with a third-party package. @@ -278,4 +280,136 @@ message SBOMRef { // https://spdx.github.io/spdx-spec/v2.3/package-information/#72-package-spdx-identifier-field or // https://spdx.github.io/spdx-spec/v2.3/file-information/#82-file-spdx-identifier-field optional string element_id = 3; +} + +// Identifier for a third-package package. +// See go/tp-metadata-id. +message Identifier { + // The type of the identifier. Either an "ecosystem" value from + // https://ossf.github.io/osv-schema/#affectedpackage-field such as "Go", + // "npm" or "PyPI". The "value" and "version" fields follow the same rules as + // defined in the OSV spec. + + // Or one of: + // - "Git": The "value" field is the URL of the upstream git repository this + // package is retrieved from. + // For example: + // - https://github.com/git/git + // - git://git.kernel.org/pub/scm/git/git + // + // Use of a git URL requires that the package "version" value must specify a + // specific git tag or revision. This must not be a branch name. + // + // - "SVN": The "value" field is the URL of the upstream SVN repository this + // package is retrieved from. + // For example: + // - http://llvm.org/svn/llvm-project/llvm/ + // + // Use of an SVN URL requires that the package "version" value must specify + // a specific SVN tag or revision. This must not be a branch name. + // + // - "Hg": The "value" field is the URL of the upstream mercurial repository + // this package is retrieved from. + // For example: + // - https://mercurial-scm.org/repo/evolve + // + // Use of a mercurial URL requires that the package "version" value must + // specify a specific tag or revision. This must not be a branch name. + // + // - "Darcs": the "value" field is the URL of the upstream darcs repository + // this package is retrieved from. + // For example: + // - https://hub.darcs.net/hu.dwim/hu.dwim.util + // + // Use of a Darcs URL requires that the package "version" value must + // specify a specific tag or revision. This must not be a branch name. + // + // - "Piper": The "value" field is the URL of the upstream piper location. + // This is primarily used when a package is being migrated into third_party + // from elsewhere in Piper, or when a package is being newly developed in + // third_party. + // + // - "VCS": This is a generic fallback for an unlisted VCS system. The + // "value" field is the URL of the repository for this VCS. + // + // - "Archive": The "value" field is the URL of the archive containing the + // source code for the package, for example a zip or tgz file. + // + // - "PrebuiltByAlphabet": This type should be used for archives of primarily + // Google-owned source code (may contain non-Google-owned dependencies), + // which has been built using production Google infrastructure, and copied + // into third_party. + // + // - "LocalSource": The "value" field is the URL identifying where the local + // copy of the package source code can be found. + // Examples: + // - https://android.googlesource.com/platform/external/apache-http/ + // + // Typically, the metadata files describing a package reside in the same + // directory as the source code for the package. In a few rare cases where + // they are separate, the LocalSource URL identifies where to find the + // source code. This only describes where to find the local copy of the + // source; there should always be an additional URL describing where the + // package was retrieved from. + // + // - "Other": An identifier that does not fit any other type. This may also + // indicate that the Source code was received via email or some other + // out-of-band way. This is most commonly used with commercial software + // received directly from the Vendor. In the case of email, the "value" field + // can be used to provide additional information about how it was received. + optional string type = 1; + + // A human readable string to indicate why a third-package package does not + // have this identifier type set. + // Example: + // identifier { + // type: "PyPI" + // omission_reason: "Only on Git. Not published to PyPI." + // } + optional string omission_reason = 2; + + // The value of the package identifier as defined by the "type". + // Example: + // identifier { + // type: "PyPI" + // value: "django" + // version: "3.2.8" + // } + optional string value = 3; + + // The version associated with this package as defined by the "type". + // Example: + // identifier { + // type: "PyPI" + // value: "django" + // version: "3.2.8" + // } + optional string version = 4; + + // The closest version associated with this package as defined by the "type". + // This should only be set by automated infrastructure by applying automated + // heuristics, such as the closest git tag or package version from a package + // manifest file (e.g. pom.xml). + // + // For most identifier types, only one of `version` or `closest_version` + // should be set (not both). The exception is source repository types such as + // "Git", where `version` will refer to a git commit, and `closest_version` + // refers to a git tag. + // Example: + // identifier { + // type: "Git", + // value: "https://github.com/my/repo" + // version: "e5fa44f2b31c1fb553b6021e7360d07d5d91ff5e" + // closest_version: "v1.4" + // } + optional string closest_version = 5; + + // When `true`, this Identifier represents the location from which the source + // code for this package was originally obtained. This should only be set for + // *one* Identifier in a third_party package's METADATA. + + // For external packages, this is typically for the Identifier associated + // with the version control system or package manager that was used to + // check out or download the code. + optional bool primary_source = 6; } \ No newline at end of file diff --git a/tools/sbom/generate-sbom.py b/tools/sbom/generate-sbom.py index b19be87666..0a8f10a45c 100755 --- a/tools/sbom/generate-sbom.py +++ b/tools/sbom/generate-sbom.py @@ -82,6 +82,46 @@ SOONG_PREBUILT_MODULE_TYPES = [ 'vndk_prebuilt_shared', ] +THIRD_PARTY_IDENTIFIER_TYPES = [ + # Types defined in metadata_file.proto + 'Git', + 'SVN', + 'Hg', + 'Darcs', + 'VCS', + 'Archive', + 'PrebuiltByAlphabet', + 'LocalSource', + 'Other', + # OSV ecosystems defined at https://ossf.github.io/osv-schema/#affectedpackage-field. + 'Go', + 'npm', + 'OSS-Fuzz', + 'PyPI', + 'RubyGems', + 'crates.io', + 'Hackage', + 'GHC', + 'Packagist', + 'Maven', + 'NuGet', + 'Linux', + 'Debian', + 'Alpine', + 'Hex', + 'Android', + 'GitHub Actions', + 'Pub', + 'ConanCenter', + 'Rocky Linux', + 'AlmaLinux', + 'Bitnami', + 'Photon OS', + 'CRAN', + 'Bioconductor', + 'SwiftURL' +] + def get_args(): parser = argparse.ArgumentParser() @@ -360,6 +400,20 @@ def installed_file_has_metadata(installed_file_metadata, report): return True +# Validate identifiers in a package's METADATA. +# 1) Only known identifier type is allowed +# 2) Only one identifier's primary_source can be true +def validate_package_metadata(metadata_file_path, package_metadata): + primary_source_found = False + for identifier in package_metadata.third_party.identifier: + if identifier.type not in THIRD_PARTY_IDENTIFIER_TYPES: + sys.exit(f'Unknown value of third_party.identifier.type in {metadata_file_path}/METADATA: {identifier.type}.') + if primary_source_found and identifier.primary_source: + sys.exit( + f'Field "primary_source" is set to true in multiple third_party.identifier in {metadata_file_path}/METADATA.') + primary_source_found = identifier.primary_source + + def report_metadata_file(metadata_file_path, installed_file_metadata, report): if metadata_file_path: report[INFO_METADATA_FOUND_FOR_PACKAGE].append( @@ -372,6 +426,8 @@ def report_metadata_file(metadata_file_path, installed_file_metadata, report): with open(metadata_file_path + '/METADATA', 'rt') as f: text_format.Parse(f.read(), package_metadata) + validate_package_metadata(metadata_file_path, package_metadata) + if not metadata_file_path in metadata_file_protos: metadata_file_protos[metadata_file_path] = package_metadata if not package_metadata.name: