Merge "Use trie to store monolithic hidden API flags"

This commit is contained in:
Paul Duffin 2021-08-10 09:26:56 +00:00 committed by Gerrit Code Review
commit 8f34b0e19d
2 changed files with 436 additions and 18 deletions

View file

@ -19,12 +19,244 @@ Verify that one set of hidden API flags is a subset of another.
import argparse import argparse
import csv import csv
import sys
from itertools import chain from itertools import chain
class InteriorNode:
"""
An interior node in a trie.
Each interior node has a dict that maps from an element of a signature to
either another interior node or a leaf. Each interior node represents either
a package, class or nested class. Class members are represented by a Leaf.
Associating the set of flags [public-api] with the signature
"Ljava/lang/Object;->String()Ljava/lang/String;" will cause the following
nodes to be created:
Node()
^- package:java -> Node()
^- package:lang -> Node()
^- class:Object -> Node()
^- member:String()Ljava/lang/String; -> Leaf([public-api])
Associating the set of flags [blocked,core-platform-api] with the signature
"Ljava/lang/Character$UnicodeScript;->of(I)Ljava/lang/Character$UnicodeScript;"
will cause the following nodes to be created:
Node()
^- package:java -> Node()
^- package:lang -> Node()
^- class:Character -> Node()
^- class:UnicodeScript -> Node()
^- member:of(I)Ljava/lang/Character$UnicodeScript;
-> Leaf([blocked,core-platform-api])
Attributes:
nodes: a dict from an element of the signature to the Node/Leaf
containing the next element/value.
"""
def __init__(self):
self.nodes = {}
def signatureToElements(self, signature):
"""
Split a signature or a prefix into a number of elements:
1. The packages (excluding the leading L preceding the first package).
2. The class names, from outermost to innermost.
3. The member signature.
e.g. Ljava/lang/Character$UnicodeScript;->of(I)Ljava/lang/Character$UnicodeScript;
will be broken down into these elements:
1. package:java
2. package:lang
3. class:Character
4. class:UnicodeScript
5. member:of(I)Ljava/lang/Character$UnicodeScript;
"""
# Remove the leading L.
# - java/lang/Character$UnicodeScript;->of(I)Ljava/lang/Character$UnicodeScript;
text = signature.removeprefix("L")
# Split the signature between qualified class name and the class member
# signature.
# 0 - java/lang/Character$UnicodeScript
# 1 - of(I)Ljava/lang/Character$UnicodeScript;
parts = text.split(";->")
member = parts[1:]
# Split the qualified class name into packages, and class name.
# 0 - java
# 1 - lang
# 2 - Character$UnicodeScript
elements = parts[0].split("/")
packages = elements[0:-1]
className = elements[-1]
if className == "*" or className == "**":
# Cannot specify a wildcard and target a specific member
if len(member) != 0:
raise Exception("Invalid signature %s: contains wildcard %s and member signature %s"
% (signature, className, member[0]))
wildcard = [className]
# Assemble the parts into a single list, adding prefixes to identify
# the different parts.
# 0 - package:java
# 1 - package:lang
# 2 - *
return list(chain(map(lambda x : "package:" + x, packages),
wildcard))
else:
# Split the class name into outer / inner classes
# 0 - Character
# 1 - UnicodeScript
classes = className.split("$")
# Assemble the parts into a single list, adding prefixes to identify
# the different parts.
# 0 - package:java
# 1 - package:lang
# 2 - class:Character
# 3 - class:UnicodeScript
# 4 - member:of(I)Ljava/lang/Character$UnicodeScript;
return list(chain(map(lambda x : "package:" + x, packages),
map(lambda x : "class:" + x, classes),
map(lambda x : "member:" + x, member)))
def add(self, signature, value):
"""
Associate the value with the specific signature.
:param signature: the member signature
:param value: the value to associated with the signature
:return: n/a
"""
# Split the signature into elements.
elements = self.signatureToElements(signature)
# Find the Node associated with the deepest class.
node = self
for element in elements[:-1]:
if element in node.nodes:
node = node.nodes[element]
else:
next = InteriorNode()
node.nodes[element] = next
node = next
# Add a Leaf containing the value and associate it with the member
# signature within the class.
lastElement = elements[-1]
if not lastElement.startswith("member:"):
raise Exception("Invalid signature: %s, does not identify a specific member" % signature)
if lastElement in node.nodes:
raise Exception("Duplicate signature: %s" % signature)
node.nodes[lastElement] = Leaf(value)
def getMatchingRows(self, pattern):
"""
Get the values (plural) associated with the pattern.
e.g. If the pattern is a full signature then this will return a list
containing the value associated with that signature.
If the pattern is a class then this will return a list containing the
values associated with all members of that class.
If the pattern is a package then this will return a list containing the
values associated with all the members of all the classes in that
package and sub-packages.
If the pattern ends with "*" then the preceding part is treated as a
package and this will return a list containing the values associated
with all the members of all the classes in that package.
If the pattern ends with "**" then the preceding part is treated
as a package and this will return a list containing the values
associated with all the members of all the classes in that package and
all sub-packages.
:param pattern: the pattern which could be a complete signature or a
class, or package wildcard.
:return: an iterable containing all the values associated with the
pattern.
"""
elements = self.signatureToElements(pattern)
node = self
# Include all values from this node and all its children.
selector = lambda x : True
lastElement = elements[-1]
if lastElement == "*" or lastElement == "**":
elements = elements[:-1]
if lastElement == "*":
# Do not include values from sub-packages.
selector = lambda x : not x.startswith("package:")
for element in elements:
if element in node.nodes:
node = node.nodes[element]
else:
return []
return chain.from_iterable(node.values(selector))
def values(self, selector):
"""
:param selector: a function that can be applied to a key in the nodes
attribute to determine whether to return its values.
:return: A list of iterables of all the values associated with this
node and its children.
"""
values = []
self.appendValues(values, selector)
return values
def appendValues(self, values, selector):
"""
Append the values associated with this node and its children to the
list.
For each item (key, child) in nodes the child node's values are returned
if and only if the selector returns True when called on its key. A child
node's values are all the values associated with it and all its
descendant nodes.
:param selector: a function that can be applied to a key in the nodes
attribute to determine whether to return its values.
:param values: a list of a iterables of values.
"""
for key, node in self.nodes.items():
if selector(key):
node.appendValues(values, lambda x : True)
class Leaf:
"""
A leaf of the trie
Attributes:
value: the value associated with this leaf.
"""
def __init__(self, value):
self.value = value
def values(self, selector):
"""
:return: A list of a list of the value associated with this node.
"""
return [[self.value]]
def appendValues(self, values, selector):
"""
Appends a list of the value associated with this node to the list.
:param values: a list of a iterables of values.
"""
values.append([self.value])
def dict_reader(input): def dict_reader(input):
return csv.DictReader(input, delimiter=',', quotechar='|', fieldnames=['signature']) return csv.DictReader(input, delimiter=',', quotechar='|', fieldnames=['signature'])
def extract_subset_from_monolithic_flags_as_dict_from_file(monolithicFlagsDict, patternsFile): def read_flag_trie_from_file(file):
with open(file, 'r') as stream:
return read_flag_trie_from_stream(stream)
def read_flag_trie_from_stream(stream):
trie = InteriorNode()
reader = dict_reader(stream)
for row in reader:
signature = row['signature']
trie.add(signature, row)
return trie
def extract_subset_from_monolithic_flags_as_dict_from_file(monolithicTrie, patternsFile):
""" """
Extract a subset of flags from the dict containing all the monolithic flags. Extract a subset of flags from the dict containing all the monolithic flags.
@ -34,21 +266,24 @@ def extract_subset_from_monolithic_flags_as_dict_from_file(monolithicFlagsDict,
:return: the dict from signature to row. :return: the dict from signature to row.
""" """
with open(patternsFile, 'r') as stream: with open(patternsFile, 'r') as stream:
return extract_subset_from_monolithic_flags_as_dict_from_stream(monolithicFlagsDict, stream) return extract_subset_from_monolithic_flags_as_dict_from_stream(monolithicTrie, stream)
def extract_subset_from_monolithic_flags_as_dict_from_stream(monolithicFlagsDict, stream): def extract_subset_from_monolithic_flags_as_dict_from_stream(monolithicTrie, stream):
""" """
Extract a subset of flags from the dict containing all the monolithic flags. Extract a subset of flags from the trie containing all the monolithic flags.
:param monolithicFlagsDict: the dict containing all the monolithic flags. :param monolithicTrie: the trie containing all the monolithic flags.
:param stream: a stream containing a list of signature patterns that define :param stream: a stream containing a list of signature patterns that define
the subset. the subset.
:return: the dict from signature to row. :return: the dict from signature to row.
""" """
dict = {} dict = {}
for signature in stream: for pattern in stream:
signature = signature.rstrip() pattern = pattern.rstrip()
dict[signature] = monolithicFlagsDict.get(signature, {}) rows = monolithicTrie.getMatchingRows(pattern)
for row in rows:
signature = row['signature']
dict[signature] = row
return dict return dict
def read_signature_csv_from_stream_as_dict(stream): def read_signature_csv_from_stream_as_dict(stream):
@ -108,20 +343,20 @@ def main(argv):
args_parser.add_argument('modularFlags', nargs=argparse.REMAINDER, help='Flags produced by individual bootclasspath_fragment modules') args_parser.add_argument('modularFlags', nargs=argparse.REMAINDER, help='Flags produced by individual bootclasspath_fragment modules')
args = args_parser.parse_args(argv[1:]) args = args_parser.parse_args(argv[1:])
# Read in the monolithic flags into a dict indexed by signature # Read in all the flags into the trie
monolithicFlagsPath = args.monolithicFlags monolithicFlagsPath = args.monolithicFlags
monolithicFlagsDict = read_signature_csv_from_file_as_dict(monolithicFlagsPath) monolithicTrie = read_flag_trie_from_file(monolithicFlagsPath)
# For each subset specified on the command line, create dicts for the flags # For each subset specified on the command line, create dicts for the flags
# provided by the subset and the corresponding flags from the complete set of # provided by the subset and the corresponding flags from the complete set
# flags and compare them. # of flags and compare them.
failed = False failed = False
for modularPair in args.modularFlags: for modularPair in args.modularFlags:
parts = modularPair.split(":") parts = modularPair.split(":")
modularFlagsPath = parts[0] modularFlagsPath = parts[0]
modularPatternsPath = parts[1] modularPatternsPath = parts[1]
modularFlagsDict = read_signature_csv_from_file_as_dict(modularFlagsPath) modularFlagsDict = read_signature_csv_from_file_as_dict(modularFlagsPath)
monolithicFlagsSubsetDict = extract_subset_from_monolithic_flags_as_dict_from_file(monolithicFlagsDict, modularPatternsPath) monolithicFlagsSubsetDict = extract_subset_from_monolithic_flags_as_dict_from_file(monolithicTrie, modularPatternsPath)
mismatchingSignatures = compare_signature_flags(monolithicFlagsSubsetDict, modularFlagsDict) mismatchingSignatures = compare_signature_flags(monolithicFlagsSubsetDict, modularFlagsDict)
if mismatchingSignatures: if mismatchingSignatures:
failed = True failed = True

View file

@ -20,8 +20,52 @@ import unittest
from verify_overlaps import * from verify_overlaps import *
class TestSignatureToElements(unittest.TestCase):
def signatureToElements(self, signature):
return InteriorNode().signatureToElements(signature)
def test_signatureToElements_1(self):
expected = [
'package:java',
'package:lang',
'class:ProcessBuilder',
'class:Redirect',
'class:1',
'member:<init>()V',
]
self.assertEqual(expected, self.signatureToElements(
"Ljava/lang/ProcessBuilder$Redirect$1;-><init>()V"))
def test_signatureToElements_2(self):
expected = [
'package:java',
'package:lang',
'class:Object',
'member:hashCode()I',
]
self.assertEqual(expected, self.signatureToElements(
"Ljava/lang/Object;->hashCode()I"))
def test_signatureToElements_3(self):
expected = [
'package:java',
'package:lang',
'class:CharSequence',
'class:',
'class:ExternalSyntheticLambda0',
'member:<init>(Ljava/lang/CharSequence;)V',
]
self.assertEqual(expected, self.signatureToElements(
"Ljava/lang/CharSequence$$ExternalSyntheticLambda0;"
"-><init>(Ljava/lang/CharSequence;)V"))
class TestDetectOverlaps(unittest.TestCase): class TestDetectOverlaps(unittest.TestCase):
def read_flag_trie_from_string(self, csv):
with io.StringIO(csv) as f:
return read_flag_trie_from_stream(f)
def read_signature_csv_from_string_as_dict(self, csv): def read_signature_csv_from_string_as_dict(self, csv):
with io.StringIO(csv) as f: with io.StringIO(csv) as f:
return read_signature_csv_from_stream_as_dict(f) return read_signature_csv_from_stream_as_dict(f)
@ -33,13 +77,14 @@ class TestDetectOverlaps(unittest.TestCase):
extractInput = ''' extractInput = '''
Ljava/lang/Object;->hashCode()I,public-api,system-api,test-api Ljava/lang/Object;->hashCode()I,public-api,system-api,test-api
Ljava/lang/Object;->toString()Ljava/lang/String;,blocked Ljava/lang/Object;->toString()Ljava/lang/String;,blocked
Ljava/util/zip/ZipFile;-><clinit>()V,blocked
Ljava/lang/Character$UnicodeScript;->of(I)Ljava/lang/Character$UnicodeScript;,blocked
Ljava/lang/Character;->serialVersionUID:J,sdk
Ljava/lang/ProcessBuilder$Redirect$1;-><init>()V,blocked
''' '''
def test_extract_subset(self): def test_extract_subset_signature(self):
monolithic = self.read_signature_csv_from_string_as_dict(TestDetectOverlaps.extractInput) monolithic = self.read_flag_trie_from_string(TestDetectOverlaps.extractInput)
modular = self.read_signature_csv_from_string_as_dict('''
Ljava/lang/Object;->hashCode()I,public-api,system-api,test-api
''')
patterns = 'Ljava/lang/Object;->hashCode()I' patterns = 'Ljava/lang/Object;->hashCode()I'
@ -52,6 +97,144 @@ Ljava/lang/Object;->hashCode()I,public-api,system-api,test-api
} }
self.assertEqual(expected, subset) self.assertEqual(expected, subset)
def test_extract_subset_class(self):
monolithic = self.read_flag_trie_from_string(TestDetectOverlaps.extractInput)
patterns = 'java/lang/Object'
subset = self.extract_subset_from_monolithic_flags_as_dict_from_string(monolithic, patterns)
expected = {
'Ljava/lang/Object;->hashCode()I': {
None: ['public-api', 'system-api', 'test-api'],
'signature': 'Ljava/lang/Object;->hashCode()I',
},
'Ljava/lang/Object;->toString()Ljava/lang/String;': {
None: ['blocked'],
'signature': 'Ljava/lang/Object;->toString()Ljava/lang/String;',
},
}
self.assertEqual(expected, subset)
def test_extract_subset_outer_class(self):
monolithic = self.read_flag_trie_from_string(TestDetectOverlaps.extractInput)
patterns = 'java/lang/Character'
subset = self.extract_subset_from_monolithic_flags_as_dict_from_string(monolithic, patterns)
expected = {
'Ljava/lang/Character$UnicodeScript;->of(I)Ljava/lang/Character$UnicodeScript;': {
None: ['blocked'],
'signature': 'Ljava/lang/Character$UnicodeScript;->of(I)Ljava/lang/Character$UnicodeScript;',
},
'Ljava/lang/Character;->serialVersionUID:J': {
None: ['sdk'],
'signature': 'Ljava/lang/Character;->serialVersionUID:J',
},
}
self.assertEqual(expected, subset)
def test_extract_subset_nested_class(self):
monolithic = self.read_flag_trie_from_string(TestDetectOverlaps.extractInput)
patterns = 'java/lang/Character$UnicodeScript'
subset = self.extract_subset_from_monolithic_flags_as_dict_from_string(monolithic, patterns)
expected = {
'Ljava/lang/Character$UnicodeScript;->of(I)Ljava/lang/Character$UnicodeScript;': {
None: ['blocked'],
'signature': 'Ljava/lang/Character$UnicodeScript;->of(I)Ljava/lang/Character$UnicodeScript;',
},
}
self.assertEqual(expected, subset)
def test_extract_subset_package(self):
monolithic = self.read_flag_trie_from_string(TestDetectOverlaps.extractInput)
patterns = 'java/lang/*'
subset = self.extract_subset_from_monolithic_flags_as_dict_from_string(monolithic, patterns)
expected = {
'Ljava/lang/Character$UnicodeScript;->of(I)Ljava/lang/Character$UnicodeScript;': {
None: ['blocked'],
'signature': 'Ljava/lang/Character$UnicodeScript;->of(I)Ljava/lang/Character$UnicodeScript;',
},
'Ljava/lang/Character;->serialVersionUID:J': {
None: ['sdk'],
'signature': 'Ljava/lang/Character;->serialVersionUID:J',
},
'Ljava/lang/Object;->hashCode()I': {
None: ['public-api', 'system-api', 'test-api'],
'signature': 'Ljava/lang/Object;->hashCode()I',
},
'Ljava/lang/Object;->toString()Ljava/lang/String;': {
None: ['blocked'],
'signature': 'Ljava/lang/Object;->toString()Ljava/lang/String;',
},
'Ljava/lang/ProcessBuilder$Redirect$1;-><init>()V': {
None: ['blocked'],
'signature': 'Ljava/lang/ProcessBuilder$Redirect$1;-><init>()V',
},
}
self.assertEqual(expected, subset)
def test_extract_subset_recursive_package(self):
monolithic = self.read_flag_trie_from_string(TestDetectOverlaps.extractInput)
patterns = 'java/**'
subset = self.extract_subset_from_monolithic_flags_as_dict_from_string(monolithic, patterns)
expected = {
'Ljava/lang/Character$UnicodeScript;->of(I)Ljava/lang/Character$UnicodeScript;': {
None: ['blocked'],
'signature': 'Ljava/lang/Character$UnicodeScript;->of(I)Ljava/lang/Character$UnicodeScript;',
},
'Ljava/lang/Character;->serialVersionUID:J': {
None: ['sdk'],
'signature': 'Ljava/lang/Character;->serialVersionUID:J',
},
'Ljava/lang/Object;->hashCode()I': {
None: ['public-api', 'system-api', 'test-api'],
'signature': 'Ljava/lang/Object;->hashCode()I',
},
'Ljava/lang/Object;->toString()Ljava/lang/String;': {
None: ['blocked'],
'signature': 'Ljava/lang/Object;->toString()Ljava/lang/String;',
},
'Ljava/lang/ProcessBuilder$Redirect$1;-><init>()V': {
None: ['blocked'],
'signature': 'Ljava/lang/ProcessBuilder$Redirect$1;-><init>()V',
},
'Ljava/util/zip/ZipFile;-><clinit>()V': {
None: ['blocked'],
'signature': 'Ljava/util/zip/ZipFile;-><clinit>()V',
},
}
self.assertEqual(expected, subset)
def test_extract_subset_invalid_pattern_wildcard_and_member(self):
monolithic = self.read_flag_trie_from_string(TestDetectOverlaps.extractInput)
patterns = 'Ljava/lang/*;->hashCode()I'
with self.assertRaises(Exception) as context:
self.extract_subset_from_monolithic_flags_as_dict_from_string(monolithic, patterns)
self.assertTrue("contains wildcard * and member signature hashCode()I" in str(context.exception))
def test_read_trie_duplicate(self):
with self.assertRaises(Exception) as context:
self.read_flag_trie_from_string('''
Ljava/lang/Object;->hashCode()I,public-api,system-api,test-api
Ljava/lang/Object;->hashCode()I,blocked
''')
self.assertTrue("Duplicate signature: Ljava/lang/Object;->hashCode()I" in str(context.exception))
def test_read_trie_missing_member(self):
with self.assertRaises(Exception) as context:
self.read_flag_trie_from_string('''
Ljava/lang/Object,public-api,system-api,test-api
''')
self.assertTrue("Invalid signature: Ljava/lang/Object, does not identify a specific member" in str(context.exception))
def test_match(self): def test_match(self):
monolithic = self.read_signature_csv_from_string_as_dict(''' monolithic = self.read_signature_csv_from_string_as_dict('''
Ljava/lang/Object;->hashCode()I,public-api,system-api,test-api Ljava/lang/Object;->hashCode()I,public-api,system-api,test-api