diff --git a/scripts/hiddenapi/verify_overlaps.py b/scripts/hiddenapi/verify_overlaps.py index 8579321aa..a4a423ed6 100755 --- a/scripts/hiddenapi/verify_overlaps.py +++ b/scripts/hiddenapi/verify_overlaps.py @@ -19,12 +19,244 @@ Verify that one set of hidden API flags is a subset of another. import argparse import csv +import sys from itertools import chain +class InteriorNode: + """ + An interior node in a trie. + + Each interior node has a dict that maps from an element of a signature to + either another interior node or a leaf. Each interior node represents either + a package, class or nested class. Class members are represented by a Leaf. + + Associating the set of flags [public-api] with the signature + "Ljava/lang/Object;->String()Ljava/lang/String;" will cause the following + nodes to be created: + Node() + ^- package:java -> Node() + ^- package:lang -> Node() + ^- class:Object -> Node() + ^- member:String()Ljava/lang/String; -> Leaf([public-api]) + + Associating the set of flags [blocked,core-platform-api] with the signature + "Ljava/lang/Character$UnicodeScript;->of(I)Ljava/lang/Character$UnicodeScript;" + will cause the following nodes to be created: + Node() + ^- package:java -> Node() + ^- package:lang -> Node() + ^- class:Character -> Node() + ^- class:UnicodeScript -> Node() + ^- member:of(I)Ljava/lang/Character$UnicodeScript; + -> Leaf([blocked,core-platform-api]) + + Attributes: + nodes: a dict from an element of the signature to the Node/Leaf + containing the next element/value. + """ + def __init__(self): + self.nodes = {} + + def signatureToElements(self, signature): + """ + Split a signature or a prefix into a number of elements: + 1. The packages (excluding the leading L preceding the first package). + 2. The class names, from outermost to innermost. + 3. The member signature. + + e.g. Ljava/lang/Character$UnicodeScript;->of(I)Ljava/lang/Character$UnicodeScript; + will be broken down into these elements: + 1. package:java + 2. package:lang + 3. class:Character + 4. class:UnicodeScript + 5. member:of(I)Ljava/lang/Character$UnicodeScript; + """ + # Remove the leading L. + # - java/lang/Character$UnicodeScript;->of(I)Ljava/lang/Character$UnicodeScript; + text = signature.removeprefix("L") + # Split the signature between qualified class name and the class member + # signature. + # 0 - java/lang/Character$UnicodeScript + # 1 - of(I)Ljava/lang/Character$UnicodeScript; + parts = text.split(";->") + member = parts[1:] + # Split the qualified class name into packages, and class name. + # 0 - java + # 1 - lang + # 2 - Character$UnicodeScript + elements = parts[0].split("/") + packages = elements[0:-1] + className = elements[-1] + if className == "*" or className == "**": + # Cannot specify a wildcard and target a specific member + if len(member) != 0: + raise Exception("Invalid signature %s: contains wildcard %s and member signature %s" + % (signature, className, member[0])) + wildcard = [className] + # Assemble the parts into a single list, adding prefixes to identify + # the different parts. + # 0 - package:java + # 1 - package:lang + # 2 - * + return list(chain(map(lambda x : "package:" + x, packages), + wildcard)) + else: + # Split the class name into outer / inner classes + # 0 - Character + # 1 - UnicodeScript + classes = className.split("$") + # Assemble the parts into a single list, adding prefixes to identify + # the different parts. + # 0 - package:java + # 1 - package:lang + # 2 - class:Character + # 3 - class:UnicodeScript + # 4 - member:of(I)Ljava/lang/Character$UnicodeScript; + return list(chain(map(lambda x : "package:" + x, packages), + map(lambda x : "class:" + x, classes), + map(lambda x : "member:" + x, member))) + + def add(self, signature, value): + """ + Associate the value with the specific signature. + :param signature: the member signature + :param value: the value to associated with the signature + :return: n/a + """ + # Split the signature into elements. + elements = self.signatureToElements(signature) + # Find the Node associated with the deepest class. + node = self + for element in elements[:-1]: + if element in node.nodes: + node = node.nodes[element] + else: + next = InteriorNode() + node.nodes[element] = next + node = next + # Add a Leaf containing the value and associate it with the member + # signature within the class. + lastElement = elements[-1] + if not lastElement.startswith("member:"): + raise Exception("Invalid signature: %s, does not identify a specific member" % signature) + if lastElement in node.nodes: + raise Exception("Duplicate signature: %s" % signature) + node.nodes[lastElement] = Leaf(value) + + def getMatchingRows(self, pattern): + """ + Get the values (plural) associated with the pattern. + + e.g. If the pattern is a full signature then this will return a list + containing the value associated with that signature. + + If the pattern is a class then this will return a list containing the + values associated with all members of that class. + + If the pattern is a package then this will return a list containing the + values associated with all the members of all the classes in that + package and sub-packages. + + If the pattern ends with "*" then the preceding part is treated as a + package and this will return a list containing the values associated + with all the members of all the classes in that package. + + If the pattern ends with "**" then the preceding part is treated + as a package and this will return a list containing the values + associated with all the members of all the classes in that package and + all sub-packages. + + :param pattern: the pattern which could be a complete signature or a + class, or package wildcard. + :return: an iterable containing all the values associated with the + pattern. + """ + elements = self.signatureToElements(pattern) + node = self + # Include all values from this node and all its children. + selector = lambda x : True + lastElement = elements[-1] + if lastElement == "*" or lastElement == "**": + elements = elements[:-1] + if lastElement == "*": + # Do not include values from sub-packages. + selector = lambda x : not x.startswith("package:") + for element in elements: + if element in node.nodes: + node = node.nodes[element] + else: + return [] + return chain.from_iterable(node.values(selector)) + + def values(self, selector): + """ + :param selector: a function that can be applied to a key in the nodes + attribute to determine whether to return its values. + :return: A list of iterables of all the values associated with this + node and its children. + """ + values = [] + self.appendValues(values, selector) + return values + + def appendValues(self, values, selector): + """ + Append the values associated with this node and its children to the + list. + + For each item (key, child) in nodes the child node's values are returned + if and only if the selector returns True when called on its key. A child + node's values are all the values associated with it and all its + descendant nodes. + + :param selector: a function that can be applied to a key in the nodes + attribute to determine whether to return its values. + :param values: a list of a iterables of values. + """ + for key, node in self.nodes.items(): + if selector(key): + node.appendValues(values, lambda x : True) + +class Leaf: + """ + A leaf of the trie + + Attributes: + value: the value associated with this leaf. + """ + def __init__(self, value): + self.value = value + + def values(self, selector): + """ + :return: A list of a list of the value associated with this node. + """ + return [[self.value]] + + def appendValues(self, values, selector): + """ + Appends a list of the value associated with this node to the list. + :param values: a list of a iterables of values. + """ + values.append([self.value]) + def dict_reader(input): return csv.DictReader(input, delimiter=',', quotechar='|', fieldnames=['signature']) -def extract_subset_from_monolithic_flags_as_dict_from_file(monolithicFlagsDict, patternsFile): +def read_flag_trie_from_file(file): + with open(file, 'r') as stream: + return read_flag_trie_from_stream(stream) + +def read_flag_trie_from_stream(stream): + trie = InteriorNode() + reader = dict_reader(stream) + for row in reader: + signature = row['signature'] + trie.add(signature, row) + return trie + +def extract_subset_from_monolithic_flags_as_dict_from_file(monolithicTrie, patternsFile): """ Extract a subset of flags from the dict containing all the monolithic flags. @@ -34,21 +266,24 @@ def extract_subset_from_monolithic_flags_as_dict_from_file(monolithicFlagsDict, :return: the dict from signature to row. """ with open(patternsFile, 'r') as stream: - return extract_subset_from_monolithic_flags_as_dict_from_stream(monolithicFlagsDict, stream) + return extract_subset_from_monolithic_flags_as_dict_from_stream(monolithicTrie, stream) -def extract_subset_from_monolithic_flags_as_dict_from_stream(monolithicFlagsDict, stream): +def extract_subset_from_monolithic_flags_as_dict_from_stream(monolithicTrie, stream): """ - Extract a subset of flags from the dict containing all the monolithic flags. + Extract a subset of flags from the trie containing all the monolithic flags. - :param monolithicFlagsDict: the dict containing all the monolithic flags. + :param monolithicTrie: the trie containing all the monolithic flags. :param stream: a stream containing a list of signature patterns that define the subset. :return: the dict from signature to row. """ dict = {} - for signature in stream: - signature = signature.rstrip() - dict[signature] = monolithicFlagsDict.get(signature, {}) + for pattern in stream: + pattern = pattern.rstrip() + rows = monolithicTrie.getMatchingRows(pattern) + for row in rows: + signature = row['signature'] + dict[signature] = row return dict def read_signature_csv_from_stream_as_dict(stream): @@ -108,20 +343,20 @@ def main(argv): args_parser.add_argument('modularFlags', nargs=argparse.REMAINDER, help='Flags produced by individual bootclasspath_fragment modules') args = args_parser.parse_args(argv[1:]) - # Read in the monolithic flags into a dict indexed by signature + # Read in all the flags into the trie monolithicFlagsPath = args.monolithicFlags - monolithicFlagsDict = read_signature_csv_from_file_as_dict(monolithicFlagsPath) + monolithicTrie = read_flag_trie_from_file(monolithicFlagsPath) # For each subset specified on the command line, create dicts for the flags - # provided by the subset and the corresponding flags from the complete set of - # flags and compare them. + # provided by the subset and the corresponding flags from the complete set + # of flags and compare them. failed = False for modularPair in args.modularFlags: parts = modularPair.split(":") modularFlagsPath = parts[0] modularPatternsPath = parts[1] modularFlagsDict = read_signature_csv_from_file_as_dict(modularFlagsPath) - monolithicFlagsSubsetDict = extract_subset_from_monolithic_flags_as_dict_from_file(monolithicFlagsDict, modularPatternsPath) + monolithicFlagsSubsetDict = extract_subset_from_monolithic_flags_as_dict_from_file(monolithicTrie, modularPatternsPath) mismatchingSignatures = compare_signature_flags(monolithicFlagsSubsetDict, modularFlagsDict) if mismatchingSignatures: failed = True diff --git a/scripts/hiddenapi/verify_overlaps_test.py b/scripts/hiddenapi/verify_overlaps_test.py index b6d5fa38e..747725458 100755 --- a/scripts/hiddenapi/verify_overlaps_test.py +++ b/scripts/hiddenapi/verify_overlaps_test.py @@ -20,8 +20,52 @@ import unittest from verify_overlaps import * +class TestSignatureToElements(unittest.TestCase): + + def signatureToElements(self, signature): + return InteriorNode().signatureToElements(signature) + + def test_signatureToElements_1(self): + expected = [ + 'package:java', + 'package:lang', + 'class:ProcessBuilder', + 'class:Redirect', + 'class:1', + 'member:()V', + ] + self.assertEqual(expected, self.signatureToElements( + "Ljava/lang/ProcessBuilder$Redirect$1;->()V")) + + def test_signatureToElements_2(self): + expected = [ + 'package:java', + 'package:lang', + 'class:Object', + 'member:hashCode()I', + ] + self.assertEqual(expected, self.signatureToElements( + "Ljava/lang/Object;->hashCode()I")) + + def test_signatureToElements_3(self): + expected = [ + 'package:java', + 'package:lang', + 'class:CharSequence', + 'class:', + 'class:ExternalSyntheticLambda0', + 'member:(Ljava/lang/CharSequence;)V', + ] + self.assertEqual(expected, self.signatureToElements( + "Ljava/lang/CharSequence$$ExternalSyntheticLambda0;" + "->(Ljava/lang/CharSequence;)V")) + class TestDetectOverlaps(unittest.TestCase): + def read_flag_trie_from_string(self, csv): + with io.StringIO(csv) as f: + return read_flag_trie_from_stream(f) + def read_signature_csv_from_string_as_dict(self, csv): with io.StringIO(csv) as f: return read_signature_csv_from_stream_as_dict(f) @@ -33,13 +77,14 @@ class TestDetectOverlaps(unittest.TestCase): extractInput = ''' Ljava/lang/Object;->hashCode()I,public-api,system-api,test-api Ljava/lang/Object;->toString()Ljava/lang/String;,blocked +Ljava/util/zip/ZipFile;->()V,blocked +Ljava/lang/Character$UnicodeScript;->of(I)Ljava/lang/Character$UnicodeScript;,blocked +Ljava/lang/Character;->serialVersionUID:J,sdk +Ljava/lang/ProcessBuilder$Redirect$1;->()V,blocked ''' - def test_extract_subset(self): - monolithic = self.read_signature_csv_from_string_as_dict(TestDetectOverlaps.extractInput) - modular = self.read_signature_csv_from_string_as_dict(''' -Ljava/lang/Object;->hashCode()I,public-api,system-api,test-api -''') + def test_extract_subset_signature(self): + monolithic = self.read_flag_trie_from_string(TestDetectOverlaps.extractInput) patterns = 'Ljava/lang/Object;->hashCode()I' @@ -52,6 +97,144 @@ Ljava/lang/Object;->hashCode()I,public-api,system-api,test-api } self.assertEqual(expected, subset) + def test_extract_subset_class(self): + monolithic = self.read_flag_trie_from_string(TestDetectOverlaps.extractInput) + + patterns = 'java/lang/Object' + + subset = self.extract_subset_from_monolithic_flags_as_dict_from_string(monolithic, patterns) + expected = { + 'Ljava/lang/Object;->hashCode()I': { + None: ['public-api', 'system-api', 'test-api'], + 'signature': 'Ljava/lang/Object;->hashCode()I', + }, + 'Ljava/lang/Object;->toString()Ljava/lang/String;': { + None: ['blocked'], + 'signature': 'Ljava/lang/Object;->toString()Ljava/lang/String;', + }, + } + self.assertEqual(expected, subset) + + def test_extract_subset_outer_class(self): + monolithic = self.read_flag_trie_from_string(TestDetectOverlaps.extractInput) + + patterns = 'java/lang/Character' + + subset = self.extract_subset_from_monolithic_flags_as_dict_from_string(monolithic, patterns) + expected = { + 'Ljava/lang/Character$UnicodeScript;->of(I)Ljava/lang/Character$UnicodeScript;': { + None: ['blocked'], + 'signature': 'Ljava/lang/Character$UnicodeScript;->of(I)Ljava/lang/Character$UnicodeScript;', + }, + 'Ljava/lang/Character;->serialVersionUID:J': { + None: ['sdk'], + 'signature': 'Ljava/lang/Character;->serialVersionUID:J', + }, + } + self.assertEqual(expected, subset) + + def test_extract_subset_nested_class(self): + monolithic = self.read_flag_trie_from_string(TestDetectOverlaps.extractInput) + + patterns = 'java/lang/Character$UnicodeScript' + + subset = self.extract_subset_from_monolithic_flags_as_dict_from_string(monolithic, patterns) + expected = { + 'Ljava/lang/Character$UnicodeScript;->of(I)Ljava/lang/Character$UnicodeScript;': { + None: ['blocked'], + 'signature': 'Ljava/lang/Character$UnicodeScript;->of(I)Ljava/lang/Character$UnicodeScript;', + }, + } + self.assertEqual(expected, subset) + + def test_extract_subset_package(self): + monolithic = self.read_flag_trie_from_string(TestDetectOverlaps.extractInput) + + patterns = 'java/lang/*' + + subset = self.extract_subset_from_monolithic_flags_as_dict_from_string(monolithic, patterns) + expected = { + 'Ljava/lang/Character$UnicodeScript;->of(I)Ljava/lang/Character$UnicodeScript;': { + None: ['blocked'], + 'signature': 'Ljava/lang/Character$UnicodeScript;->of(I)Ljava/lang/Character$UnicodeScript;', + }, + 'Ljava/lang/Character;->serialVersionUID:J': { + None: ['sdk'], + 'signature': 'Ljava/lang/Character;->serialVersionUID:J', + }, + 'Ljava/lang/Object;->hashCode()I': { + None: ['public-api', 'system-api', 'test-api'], + 'signature': 'Ljava/lang/Object;->hashCode()I', + }, + 'Ljava/lang/Object;->toString()Ljava/lang/String;': { + None: ['blocked'], + 'signature': 'Ljava/lang/Object;->toString()Ljava/lang/String;', + }, + 'Ljava/lang/ProcessBuilder$Redirect$1;->()V': { + None: ['blocked'], + 'signature': 'Ljava/lang/ProcessBuilder$Redirect$1;->()V', + }, + } + self.assertEqual(expected, subset) + + def test_extract_subset_recursive_package(self): + monolithic = self.read_flag_trie_from_string(TestDetectOverlaps.extractInput) + + patterns = 'java/**' + + subset = self.extract_subset_from_monolithic_flags_as_dict_from_string(monolithic, patterns) + expected = { + 'Ljava/lang/Character$UnicodeScript;->of(I)Ljava/lang/Character$UnicodeScript;': { + None: ['blocked'], + 'signature': 'Ljava/lang/Character$UnicodeScript;->of(I)Ljava/lang/Character$UnicodeScript;', + }, + 'Ljava/lang/Character;->serialVersionUID:J': { + None: ['sdk'], + 'signature': 'Ljava/lang/Character;->serialVersionUID:J', + }, + 'Ljava/lang/Object;->hashCode()I': { + None: ['public-api', 'system-api', 'test-api'], + 'signature': 'Ljava/lang/Object;->hashCode()I', + }, + 'Ljava/lang/Object;->toString()Ljava/lang/String;': { + None: ['blocked'], + 'signature': 'Ljava/lang/Object;->toString()Ljava/lang/String;', + }, + 'Ljava/lang/ProcessBuilder$Redirect$1;->()V': { + None: ['blocked'], + 'signature': 'Ljava/lang/ProcessBuilder$Redirect$1;->()V', + }, + 'Ljava/util/zip/ZipFile;->()V': { + None: ['blocked'], + 'signature': 'Ljava/util/zip/ZipFile;->()V', + }, + } + self.assertEqual(expected, subset) + + def test_extract_subset_invalid_pattern_wildcard_and_member(self): + monolithic = self.read_flag_trie_from_string(TestDetectOverlaps.extractInput) + + patterns = 'Ljava/lang/*;->hashCode()I' + + with self.assertRaises(Exception) as context: + self.extract_subset_from_monolithic_flags_as_dict_from_string(monolithic, patterns) + self.assertTrue("contains wildcard * and member signature hashCode()I" in str(context.exception)) + + def test_read_trie_duplicate(self): + with self.assertRaises(Exception) as context: + self.read_flag_trie_from_string(''' +Ljava/lang/Object;->hashCode()I,public-api,system-api,test-api +Ljava/lang/Object;->hashCode()I,blocked +''') + self.assertTrue("Duplicate signature: Ljava/lang/Object;->hashCode()I" in str(context.exception)) + + def test_read_trie_missing_member(self): + with self.assertRaises(Exception) as context: + self.read_flag_trie_from_string(''' +Ljava/lang/Object,public-api,system-api,test-api +''') + self.assertTrue("Invalid signature: Ljava/lang/Object, does not identify a specific member" in str(context.exception)) + def test_match(self): monolithic = self.read_signature_csv_from_string_as_dict(''' Ljava/lang/Object;->hashCode()I,public-api,system-api,test-api