diff --git a/tools/stub_diff_analyzer.py b/tools/stub_diff_analyzer.py new file mode 100644 index 0000000000..e49d092311 --- /dev/null +++ b/tools/stub_diff_analyzer.py @@ -0,0 +1,328 @@ +#!/usr/bin/env python +# +# Copyright (C) 2022 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from sys import exit +from typing import List +from glob import glob +from pathlib import Path +from collections import defaultdict +from difflib import Differ +from re import split +from tqdm import tqdm +import argparse + + +DIFFER_CODE_LEN = 2 + +class DifferCodes: + COMMON = ' ' + UNIQUE_FIRST = '- ' + UNIQUE_SECOND = '+ ' + DIFF_IDENT = '? ' + +class FilesDiffAnalyzer: + def __init__(self, args) -> None: + self.out_dir = args.out_dir + self.show_diff = args.show_diff + self.skip_words = args.skip_words + self.first_dir = args.first_dir + self.second_dir = args.second_dir + self.include_common = args.include_common + + self.first_dir_files = self.get_files(self.first_dir) + self.second_dir_files = self.get_files(self.second_dir) + self.common_file_map = defaultdict(set) + + self.map_common_files(self.first_dir_files, self.first_dir) + self.map_common_files(self.second_dir_files, self.second_dir) + + def get_files(self, dir: str) -> List[str]: + """Get all files directory in the input directory including the files in the subdirectories + + Recursively finds all files in the input directory. + Returns a list of file directory strings, which do not include directories but only files. + List is sorted in alphabetical order of the file directories. + + Args: + dir: Directory to get the files. String. + + Returns: + A list of file directory strings within the input directory. + Sorted in Alphabetical order. + + Raises: + FileNotFoundError: An error occurred accessing the non-existing directory + """ + + if not dir_exists(dir): + raise FileNotFoundError("Directory does not exist") + + if dir[:-2] != "**": + if dir[:-1] != "/": + dir += "/" + dir += "**" + + return [file for file in sorted(glob(dir, recursive=True)) if Path(file).is_file()] + + def map_common_files(self, files: List[str], dir: str) -> None: + for file in files: + file_name = file.split(dir, 1)[-1] + self.common_file_map[file_name].add(dir) + return + + def compare_file_contents(self, first_file: str, second_file: str) -> List[str]: + """Compare the contents of the files and return different lines + + Given two file directory strings, compare the contents of the two files + and return the list of file contents string prepended with unique identifier codes. + The identifier codes include: + - ' '(two empty space characters): Line common to two files + - '- '(minus followed by a space) : Line unique to first file + - '+ '(plus followed by a space) : Line unique to second file + + Args: + first_file: First file directory string to compare the content + second_file: Second file directory string to compare the content + + Returns: + A list of the file content strings. For example: + + [ + " Foo", + "- Bar", + "+ Baz" + ] + """ + + d = Differ() + first_file_contents = sort_methods(get_file_contents(first_file)) + second_file_contents = sort_methods(get_file_contents(second_file)) + diff = list(d.compare(first_file_contents, second_file_contents)) + ret = [f"diff {first_file} {second_file}"] + + idx = 0 + while idx < len(diff): + line = diff[idx] + line_code = line[:DIFFER_CODE_LEN] + + match line_code: + case DifferCodes.COMMON: + if self.include_common: + ret.append(line) + + case DifferCodes.UNIQUE_FIRST: + # Should compare line + if (idx < len(diff) - 1 and + (next_line_code := diff[idx + 1][:DIFFER_CODE_LEN]) + not in (DifferCodes.UNIQUE_FIRST, DifferCodes.COMMON)): + delta = 1 if next_line_code == DifferCodes.UNIQUE_SECOND else 2 + line_to_compare = diff[idx + delta] + if self.lines_differ(line, line_to_compare): + ret.extend([line, line_to_compare]) + else: + if self.include_common: + ret.append(DifferCodes.COMMON + + line[DIFFER_CODE_LEN:]) + idx += delta + else: + ret.append(line) + + case DifferCodes.UNIQUE_SECOND: + ret.append(line) + + case DifferCodes.DIFF_IDENT: + pass + idx += 1 + return ret + + def lines_differ(self, line1: str, line2: str) -> bool: + """Check if the input lines are different or not + + Compare the two lines word by word and check if the two lines are different or not. + If the different words in the comparing lines are included in skip_words, + the lines are not considered different. + + Args: + line1: first line to compare + line2: second line to compare + + Returns: + Boolean value indicating if the two lines are different or not + + """ + # Split by '.' or ' '(whitespace) + def split_words(line: str) -> List[str]: + return split('\\s|\\.', line[DIFFER_CODE_LEN:]) + + line1_words, line2_words = split_words(line1), split_words(line2) + if len(line1_words) != len(line2_words): + return True + + for word1, word2 in zip(line1_words, line2_words): + if word1 != word2: + # not check if words are equal to skip word, but + # check if words contain skip word as substring + if all(sw not in word1 and sw not in word2 for sw in self.skip_words): + return True + + return False + + def analyze(self) -> None: + """Analyze file contents in both directories and write to output or console. + """ + for file in tqdm(sorted(self.common_file_map.keys())): + val = self.common_file_map[file] + + # When file exists in both directories + lines = list() + if val == set([self.first_dir, self.second_dir]): + lines = self.compare_file_contents( + self.first_dir + file, self.second_dir + file) + else: + existing_dir, not_existing_dir = ( + (self.first_dir, self.second_dir) if self.first_dir in val + else (self.second_dir, self.first_dir)) + + lines = [f"{not_existing_dir}{file} does not exist."] + + if self.show_diff: + lines.append(f"Content of {existing_dir}{file}: \n") + lines.extend(get_file_contents(existing_dir + file)) + + self.write(lines) + + def write(self, lines: List[str]) -> None: + if self.out_dir == "": + pprint(lines) + else: + write_lines(self.out_dir, lines) + +### +# Helper functions +### + +def sort_methods(lines: List[str]) -> List[str]: + """Sort class methods in the file contents by alphabetical order + + Given lines of Java file contents, return lines with class methods sorted in alphabetical order. + Also omit empty lines or lines with spaces. + For example: + l = [ + "package android.test;", + "", + "public static final int ORANGE = 1;", + "", + "public class TestClass {", + "public TestClass() { throw new RuntimeException("Stub!"); }", + "public void foo() { throw new RuntimeException("Stub!"); }", + "public void bar() { throw new RuntimeException("Stub!"); }", + "}" + ] + sort_methods(l) returns + [ + "package android.test;", + "public static final int ORANGE = 1;", + "public class TestClass {", + "public TestClass() { throw new RuntimeException("Stub!"); }", + "public void bar() { throw new RuntimeException("Stub!"); }", + "public void foo() { throw new RuntimeException("Stub!"); }", + "}" + ] + + Args: + lines: List of strings consisted of Java file contents. + + Returns: + A list of string with sorted class methods. + + """ + def is_not_blank(l: str) -> bool: + return bool(l) and not l.isspace() + + ret = list() + + in_class = False + buffer = list() + for line in lines: + if not in_class: + if "class" in line: + in_class = True + ret.append(line) + else: + # Adding static variables, package info, etc. + # Skipping empty or space lines. + if is_not_blank(line): + ret.append(line) + else: + # End of class + if line and line[0] == "}": + in_class = False + ret.extend(sorted(buffer)) + buffer = list() + ret.append(line) + else: + if is_not_blank(line): + buffer.append(line) + + return ret + +def get_file_contents(file_path: str) -> List[str]: + lines = list() + with open(file_path) as f: + lines = [line.rstrip('\n') for line in f] + f.close() + return lines + +def pprint(l: List[str]) -> None: + for line in l: + print(line) + +def write_lines(out_dir: str, lines: List[str]) -> None: + with open(out_dir, "a") as f: + f.writelines(line + '\n' for line in lines) + f.write("\n") + f.close() + +def dir_exists(dir: str) -> bool: + return Path(dir).exists() + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('first_dir', action='store', type=str, + help="first path to compare file directory and contents") + parser.add_argument('second_dir', action='store', type=str, + help="second path to compare file directory and contents") + parser.add_argument('--out', dest='out_dir', + action='store', default="", type=str, + help="optional directory to write log. If not set, will print to console") + parser.add_argument('--show-diff-file', dest='show_diff', + action=argparse.BooleanOptionalAction, + help="optional flag. If passed, will print out the content of the file unique to each directories") + parser.add_argument('--include-common', dest='include_common', + action=argparse.BooleanOptionalAction, + help="optional flag. If passed, will print out the contents common to both files as well,\ + instead of printing only diff lines.") + parser.add_argument('--skip-words', nargs='+', + dest='skip_words', default=[], help="optional words to skip in comparison") + + args = parser.parse_args() + + if not args.first_dir or not args.second_dir: + parser.print_usage() + exit(0) + + analyzer = FilesDiffAnalyzer(args) + analyzer.analyze()