Create custom diff tool to compare stub contents
Context: - Create a tool to analyze loose equivalence of the stubs in two directories - The tool can analyze strict equivalence of the directories of the stubs in two directories - Analyze text to compare loose equivalence of the stub contents; Add a functionality to pass `skip_words` as arguments, which are optional list of words used to signal the tool which words are not considered diff - The tool can be locally used to compare stub contents, and does not contribute to build process Test: m Change-Id: I74563a9a24ecdde939be2ce37b9096a9aeb4920a
This commit is contained in:
parent
03b846ff37
commit
3d38b6d9c8
1 changed files with 328 additions and 0 deletions
328
tools/stub_diff_analyzer.py
Normal file
328
tools/stub_diff_analyzer.py
Normal file
|
@ -0,0 +1,328 @@
|
|||
#!/usr/bin/env python
|
||||
#
|
||||
# Copyright (C) 2022 The Android Open Source Project
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from sys import exit
|
||||
from typing import List
|
||||
from glob import glob
|
||||
from pathlib import Path
|
||||
from collections import defaultdict
|
||||
from difflib import Differ
|
||||
from re import split
|
||||
from tqdm import tqdm
|
||||
import argparse
|
||||
|
||||
|
||||
DIFFER_CODE_LEN = 2
|
||||
|
||||
class DifferCodes:
|
||||
COMMON = ' '
|
||||
UNIQUE_FIRST = '- '
|
||||
UNIQUE_SECOND = '+ '
|
||||
DIFF_IDENT = '? '
|
||||
|
||||
class FilesDiffAnalyzer:
|
||||
def __init__(self, args) -> None:
|
||||
self.out_dir = args.out_dir
|
||||
self.show_diff = args.show_diff
|
||||
self.skip_words = args.skip_words
|
||||
self.first_dir = args.first_dir
|
||||
self.second_dir = args.second_dir
|
||||
self.include_common = args.include_common
|
||||
|
||||
self.first_dir_files = self.get_files(self.first_dir)
|
||||
self.second_dir_files = self.get_files(self.second_dir)
|
||||
self.common_file_map = defaultdict(set)
|
||||
|
||||
self.map_common_files(self.first_dir_files, self.first_dir)
|
||||
self.map_common_files(self.second_dir_files, self.second_dir)
|
||||
|
||||
def get_files(self, dir: str) -> List[str]:
|
||||
"""Get all files directory in the input directory including the files in the subdirectories
|
||||
|
||||
Recursively finds all files in the input directory.
|
||||
Returns a list of file directory strings, which do not include directories but only files.
|
||||
List is sorted in alphabetical order of the file directories.
|
||||
|
||||
Args:
|
||||
dir: Directory to get the files. String.
|
||||
|
||||
Returns:
|
||||
A list of file directory strings within the input directory.
|
||||
Sorted in Alphabetical order.
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: An error occurred accessing the non-existing directory
|
||||
"""
|
||||
|
||||
if not dir_exists(dir):
|
||||
raise FileNotFoundError("Directory does not exist")
|
||||
|
||||
if dir[:-2] != "**":
|
||||
if dir[:-1] != "/":
|
||||
dir += "/"
|
||||
dir += "**"
|
||||
|
||||
return [file for file in sorted(glob(dir, recursive=True)) if Path(file).is_file()]
|
||||
|
||||
def map_common_files(self, files: List[str], dir: str) -> None:
|
||||
for file in files:
|
||||
file_name = file.split(dir, 1)[-1]
|
||||
self.common_file_map[file_name].add(dir)
|
||||
return
|
||||
|
||||
def compare_file_contents(self, first_file: str, second_file: str) -> List[str]:
|
||||
"""Compare the contents of the files and return different lines
|
||||
|
||||
Given two file directory strings, compare the contents of the two files
|
||||
and return the list of file contents string prepended with unique identifier codes.
|
||||
The identifier codes include:
|
||||
- ' '(two empty space characters): Line common to two files
|
||||
- '- '(minus followed by a space) : Line unique to first file
|
||||
- '+ '(plus followed by a space) : Line unique to second file
|
||||
|
||||
Args:
|
||||
first_file: First file directory string to compare the content
|
||||
second_file: Second file directory string to compare the content
|
||||
|
||||
Returns:
|
||||
A list of the file content strings. For example:
|
||||
|
||||
[
|
||||
" Foo",
|
||||
"- Bar",
|
||||
"+ Baz"
|
||||
]
|
||||
"""
|
||||
|
||||
d = Differ()
|
||||
first_file_contents = sort_methods(get_file_contents(first_file))
|
||||
second_file_contents = sort_methods(get_file_contents(second_file))
|
||||
diff = list(d.compare(first_file_contents, second_file_contents))
|
||||
ret = [f"diff {first_file} {second_file}"]
|
||||
|
||||
idx = 0
|
||||
while idx < len(diff):
|
||||
line = diff[idx]
|
||||
line_code = line[:DIFFER_CODE_LEN]
|
||||
|
||||
match line_code:
|
||||
case DifferCodes.COMMON:
|
||||
if self.include_common:
|
||||
ret.append(line)
|
||||
|
||||
case DifferCodes.UNIQUE_FIRST:
|
||||
# Should compare line
|
||||
if (idx < len(diff) - 1 and
|
||||
(next_line_code := diff[idx + 1][:DIFFER_CODE_LEN])
|
||||
not in (DifferCodes.UNIQUE_FIRST, DifferCodes.COMMON)):
|
||||
delta = 1 if next_line_code == DifferCodes.UNIQUE_SECOND else 2
|
||||
line_to_compare = diff[idx + delta]
|
||||
if self.lines_differ(line, line_to_compare):
|
||||
ret.extend([line, line_to_compare])
|
||||
else:
|
||||
if self.include_common:
|
||||
ret.append(DifferCodes.COMMON +
|
||||
line[DIFFER_CODE_LEN:])
|
||||
idx += delta
|
||||
else:
|
||||
ret.append(line)
|
||||
|
||||
case DifferCodes.UNIQUE_SECOND:
|
||||
ret.append(line)
|
||||
|
||||
case DifferCodes.DIFF_IDENT:
|
||||
pass
|
||||
idx += 1
|
||||
return ret
|
||||
|
||||
def lines_differ(self, line1: str, line2: str) -> bool:
|
||||
"""Check if the input lines are different or not
|
||||
|
||||
Compare the two lines word by word and check if the two lines are different or not.
|
||||
If the different words in the comparing lines are included in skip_words,
|
||||
the lines are not considered different.
|
||||
|
||||
Args:
|
||||
line1: first line to compare
|
||||
line2: second line to compare
|
||||
|
||||
Returns:
|
||||
Boolean value indicating if the two lines are different or not
|
||||
|
||||
"""
|
||||
# Split by '.' or ' '(whitespace)
|
||||
def split_words(line: str) -> List[str]:
|
||||
return split('\\s|\\.', line[DIFFER_CODE_LEN:])
|
||||
|
||||
line1_words, line2_words = split_words(line1), split_words(line2)
|
||||
if len(line1_words) != len(line2_words):
|
||||
return True
|
||||
|
||||
for word1, word2 in zip(line1_words, line2_words):
|
||||
if word1 != word2:
|
||||
# not check if words are equal to skip word, but
|
||||
# check if words contain skip word as substring
|
||||
if all(sw not in word1 and sw not in word2 for sw in self.skip_words):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def analyze(self) -> None:
|
||||
"""Analyze file contents in both directories and write to output or console.
|
||||
"""
|
||||
for file in tqdm(sorted(self.common_file_map.keys())):
|
||||
val = self.common_file_map[file]
|
||||
|
||||
# When file exists in both directories
|
||||
lines = list()
|
||||
if val == set([self.first_dir, self.second_dir]):
|
||||
lines = self.compare_file_contents(
|
||||
self.first_dir + file, self.second_dir + file)
|
||||
else:
|
||||
existing_dir, not_existing_dir = (
|
||||
(self.first_dir, self.second_dir) if self.first_dir in val
|
||||
else (self.second_dir, self.first_dir))
|
||||
|
||||
lines = [f"{not_existing_dir}{file} does not exist."]
|
||||
|
||||
if self.show_diff:
|
||||
lines.append(f"Content of {existing_dir}{file}: \n")
|
||||
lines.extend(get_file_contents(existing_dir + file))
|
||||
|
||||
self.write(lines)
|
||||
|
||||
def write(self, lines: List[str]) -> None:
|
||||
if self.out_dir == "":
|
||||
pprint(lines)
|
||||
else:
|
||||
write_lines(self.out_dir, lines)
|
||||
|
||||
###
|
||||
# Helper functions
|
||||
###
|
||||
|
||||
def sort_methods(lines: List[str]) -> List[str]:
|
||||
"""Sort class methods in the file contents by alphabetical order
|
||||
|
||||
Given lines of Java file contents, return lines with class methods sorted in alphabetical order.
|
||||
Also omit empty lines or lines with spaces.
|
||||
For example:
|
||||
l = [
|
||||
"package android.test;",
|
||||
"",
|
||||
"public static final int ORANGE = 1;",
|
||||
"",
|
||||
"public class TestClass {",
|
||||
"public TestClass() { throw new RuntimeException("Stub!"); }",
|
||||
"public void foo() { throw new RuntimeException("Stub!"); }",
|
||||
"public void bar() { throw new RuntimeException("Stub!"); }",
|
||||
"}"
|
||||
]
|
||||
sort_methods(l) returns
|
||||
[
|
||||
"package android.test;",
|
||||
"public static final int ORANGE = 1;",
|
||||
"public class TestClass {",
|
||||
"public TestClass() { throw new RuntimeException("Stub!"); }",
|
||||
"public void bar() { throw new RuntimeException("Stub!"); }",
|
||||
"public void foo() { throw new RuntimeException("Stub!"); }",
|
||||
"}"
|
||||
]
|
||||
|
||||
Args:
|
||||
lines: List of strings consisted of Java file contents.
|
||||
|
||||
Returns:
|
||||
A list of string with sorted class methods.
|
||||
|
||||
"""
|
||||
def is_not_blank(l: str) -> bool:
|
||||
return bool(l) and not l.isspace()
|
||||
|
||||
ret = list()
|
||||
|
||||
in_class = False
|
||||
buffer = list()
|
||||
for line in lines:
|
||||
if not in_class:
|
||||
if "class" in line:
|
||||
in_class = True
|
||||
ret.append(line)
|
||||
else:
|
||||
# Adding static variables, package info, etc.
|
||||
# Skipping empty or space lines.
|
||||
if is_not_blank(line):
|
||||
ret.append(line)
|
||||
else:
|
||||
# End of class
|
||||
if line and line[0] == "}":
|
||||
in_class = False
|
||||
ret.extend(sorted(buffer))
|
||||
buffer = list()
|
||||
ret.append(line)
|
||||
else:
|
||||
if is_not_blank(line):
|
||||
buffer.append(line)
|
||||
|
||||
return ret
|
||||
|
||||
def get_file_contents(file_path: str) -> List[str]:
|
||||
lines = list()
|
||||
with open(file_path) as f:
|
||||
lines = [line.rstrip('\n') for line in f]
|
||||
f.close()
|
||||
return lines
|
||||
|
||||
def pprint(l: List[str]) -> None:
|
||||
for line in l:
|
||||
print(line)
|
||||
|
||||
def write_lines(out_dir: str, lines: List[str]) -> None:
|
||||
with open(out_dir, "a") as f:
|
||||
f.writelines(line + '\n' for line in lines)
|
||||
f.write("\n")
|
||||
f.close()
|
||||
|
||||
def dir_exists(dir: str) -> bool:
|
||||
return Path(dir).exists()
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('first_dir', action='store', type=str,
|
||||
help="first path to compare file directory and contents")
|
||||
parser.add_argument('second_dir', action='store', type=str,
|
||||
help="second path to compare file directory and contents")
|
||||
parser.add_argument('--out', dest='out_dir',
|
||||
action='store', default="", type=str,
|
||||
help="optional directory to write log. If not set, will print to console")
|
||||
parser.add_argument('--show-diff-file', dest='show_diff',
|
||||
action=argparse.BooleanOptionalAction,
|
||||
help="optional flag. If passed, will print out the content of the file unique to each directories")
|
||||
parser.add_argument('--include-common', dest='include_common',
|
||||
action=argparse.BooleanOptionalAction,
|
||||
help="optional flag. If passed, will print out the contents common to both files as well,\
|
||||
instead of printing only diff lines.")
|
||||
parser.add_argument('--skip-words', nargs='+',
|
||||
dest='skip_words', default=[], help="optional words to skip in comparison")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.first_dir or not args.second_dir:
|
||||
parser.print_usage()
|
||||
exit(0)
|
||||
|
||||
analyzer = FilesDiffAnalyzer(args)
|
||||
analyzer.analyze()
|
Loading…
Reference in a new issue