329 lines
12 KiB
Python
329 lines
12 KiB
Python
|
#!/usr/bin/env python
|
||
|
#
|
||
|
# Copyright (C) 2022 The Android Open Source Project
|
||
|
#
|
||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
# you may not use this file except in compliance with the License.
|
||
|
# You may obtain a copy of the License at
|
||
|
#
|
||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||
|
#
|
||
|
# Unless required by applicable law or agreed to in writing, software
|
||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
# See the License for the specific language governing permissions and
|
||
|
# limitations under the License.
|
||
|
|
||
|
from sys import exit
|
||
|
from typing import List
|
||
|
from glob import glob
|
||
|
from pathlib import Path
|
||
|
from collections import defaultdict
|
||
|
from difflib import Differ
|
||
|
from re import split
|
||
|
from tqdm import tqdm
|
||
|
import argparse
|
||
|
|
||
|
|
||
|
DIFFER_CODE_LEN = 2
|
||
|
|
||
|
class DifferCodes:
|
||
|
COMMON = ' '
|
||
|
UNIQUE_FIRST = '- '
|
||
|
UNIQUE_SECOND = '+ '
|
||
|
DIFF_IDENT = '? '
|
||
|
|
||
|
class FilesDiffAnalyzer:
|
||
|
def __init__(self, args) -> None:
|
||
|
self.out_dir = args.out_dir
|
||
|
self.show_diff = args.show_diff
|
||
|
self.skip_words = args.skip_words
|
||
|
self.first_dir = args.first_dir
|
||
|
self.second_dir = args.second_dir
|
||
|
self.include_common = args.include_common
|
||
|
|
||
|
self.first_dir_files = self.get_files(self.first_dir)
|
||
|
self.second_dir_files = self.get_files(self.second_dir)
|
||
|
self.common_file_map = defaultdict(set)
|
||
|
|
||
|
self.map_common_files(self.first_dir_files, self.first_dir)
|
||
|
self.map_common_files(self.second_dir_files, self.second_dir)
|
||
|
|
||
|
def get_files(self, dir: str) -> List[str]:
|
||
|
"""Get all files directory in the input directory including the files in the subdirectories
|
||
|
|
||
|
Recursively finds all files in the input directory.
|
||
|
Returns a list of file directory strings, which do not include directories but only files.
|
||
|
List is sorted in alphabetical order of the file directories.
|
||
|
|
||
|
Args:
|
||
|
dir: Directory to get the files. String.
|
||
|
|
||
|
Returns:
|
||
|
A list of file directory strings within the input directory.
|
||
|
Sorted in Alphabetical order.
|
||
|
|
||
|
Raises:
|
||
|
FileNotFoundError: An error occurred accessing the non-existing directory
|
||
|
"""
|
||
|
|
||
|
if not dir_exists(dir):
|
||
|
raise FileNotFoundError("Directory does not exist")
|
||
|
|
||
|
if dir[:-2] != "**":
|
||
|
if dir[:-1] != "/":
|
||
|
dir += "/"
|
||
|
dir += "**"
|
||
|
|
||
|
return [file for file in sorted(glob(dir, recursive=True)) if Path(file).is_file()]
|
||
|
|
||
|
def map_common_files(self, files: List[str], dir: str) -> None:
|
||
|
for file in files:
|
||
|
file_name = file.split(dir, 1)[-1]
|
||
|
self.common_file_map[file_name].add(dir)
|
||
|
return
|
||
|
|
||
|
def compare_file_contents(self, first_file: str, second_file: str) -> List[str]:
|
||
|
"""Compare the contents of the files and return different lines
|
||
|
|
||
|
Given two file directory strings, compare the contents of the two files
|
||
|
and return the list of file contents string prepended with unique identifier codes.
|
||
|
The identifier codes include:
|
||
|
- ' '(two empty space characters): Line common to two files
|
||
|
- '- '(minus followed by a space) : Line unique to first file
|
||
|
- '+ '(plus followed by a space) : Line unique to second file
|
||
|
|
||
|
Args:
|
||
|
first_file: First file directory string to compare the content
|
||
|
second_file: Second file directory string to compare the content
|
||
|
|
||
|
Returns:
|
||
|
A list of the file content strings. For example:
|
||
|
|
||
|
[
|
||
|
" Foo",
|
||
|
"- Bar",
|
||
|
"+ Baz"
|
||
|
]
|
||
|
"""
|
||
|
|
||
|
d = Differ()
|
||
|
first_file_contents = sort_methods(get_file_contents(first_file))
|
||
|
second_file_contents = sort_methods(get_file_contents(second_file))
|
||
|
diff = list(d.compare(first_file_contents, second_file_contents))
|
||
|
ret = [f"diff {first_file} {second_file}"]
|
||
|
|
||
|
idx = 0
|
||
|
while idx < len(diff):
|
||
|
line = diff[idx]
|
||
|
line_code = line[:DIFFER_CODE_LEN]
|
||
|
|
||
|
match line_code:
|
||
|
case DifferCodes.COMMON:
|
||
|
if self.include_common:
|
||
|
ret.append(line)
|
||
|
|
||
|
case DifferCodes.UNIQUE_FIRST:
|
||
|
# Should compare line
|
||
|
if (idx < len(diff) - 1 and
|
||
|
(next_line_code := diff[idx + 1][:DIFFER_CODE_LEN])
|
||
|
not in (DifferCodes.UNIQUE_FIRST, DifferCodes.COMMON)):
|
||
|
delta = 1 if next_line_code == DifferCodes.UNIQUE_SECOND else 2
|
||
|
line_to_compare = diff[idx + delta]
|
||
|
if self.lines_differ(line, line_to_compare):
|
||
|
ret.extend([line, line_to_compare])
|
||
|
else:
|
||
|
if self.include_common:
|
||
|
ret.append(DifferCodes.COMMON +
|
||
|
line[DIFFER_CODE_LEN:])
|
||
|
idx += delta
|
||
|
else:
|
||
|
ret.append(line)
|
||
|
|
||
|
case DifferCodes.UNIQUE_SECOND:
|
||
|
ret.append(line)
|
||
|
|
||
|
case DifferCodes.DIFF_IDENT:
|
||
|
pass
|
||
|
idx += 1
|
||
|
return ret
|
||
|
|
||
|
def lines_differ(self, line1: str, line2: str) -> bool:
|
||
|
"""Check if the input lines are different or not
|
||
|
|
||
|
Compare the two lines word by word and check if the two lines are different or not.
|
||
|
If the different words in the comparing lines are included in skip_words,
|
||
|
the lines are not considered different.
|
||
|
|
||
|
Args:
|
||
|
line1: first line to compare
|
||
|
line2: second line to compare
|
||
|
|
||
|
Returns:
|
||
|
Boolean value indicating if the two lines are different or not
|
||
|
|
||
|
"""
|
||
|
# Split by '.' or ' '(whitespace)
|
||
|
def split_words(line: str) -> List[str]:
|
||
|
return split('\\s|\\.', line[DIFFER_CODE_LEN:])
|
||
|
|
||
|
line1_words, line2_words = split_words(line1), split_words(line2)
|
||
|
if len(line1_words) != len(line2_words):
|
||
|
return True
|
||
|
|
||
|
for word1, word2 in zip(line1_words, line2_words):
|
||
|
if word1 != word2:
|
||
|
# not check if words are equal to skip word, but
|
||
|
# check if words contain skip word as substring
|
||
|
if all(sw not in word1 and sw not in word2 for sw in self.skip_words):
|
||
|
return True
|
||
|
|
||
|
return False
|
||
|
|
||
|
def analyze(self) -> None:
|
||
|
"""Analyze file contents in both directories and write to output or console.
|
||
|
"""
|
||
|
for file in tqdm(sorted(self.common_file_map.keys())):
|
||
|
val = self.common_file_map[file]
|
||
|
|
||
|
# When file exists in both directories
|
||
|
lines = list()
|
||
|
if val == set([self.first_dir, self.second_dir]):
|
||
|
lines = self.compare_file_contents(
|
||
|
self.first_dir + file, self.second_dir + file)
|
||
|
else:
|
||
|
existing_dir, not_existing_dir = (
|
||
|
(self.first_dir, self.second_dir) if self.first_dir in val
|
||
|
else (self.second_dir, self.first_dir))
|
||
|
|
||
|
lines = [f"{not_existing_dir}{file} does not exist."]
|
||
|
|
||
|
if self.show_diff:
|
||
|
lines.append(f"Content of {existing_dir}{file}: \n")
|
||
|
lines.extend(get_file_contents(existing_dir + file))
|
||
|
|
||
|
self.write(lines)
|
||
|
|
||
|
def write(self, lines: List[str]) -> None:
|
||
|
if self.out_dir == "":
|
||
|
pprint(lines)
|
||
|
else:
|
||
|
write_lines(self.out_dir, lines)
|
||
|
|
||
|
###
|
||
|
# Helper functions
|
||
|
###
|
||
|
|
||
|
def sort_methods(lines: List[str]) -> List[str]:
|
||
|
"""Sort class methods in the file contents by alphabetical order
|
||
|
|
||
|
Given lines of Java file contents, return lines with class methods sorted in alphabetical order.
|
||
|
Also omit empty lines or lines with spaces.
|
||
|
For example:
|
||
|
l = [
|
||
|
"package android.test;",
|
||
|
"",
|
||
|
"public static final int ORANGE = 1;",
|
||
|
"",
|
||
|
"public class TestClass {",
|
||
|
"public TestClass() { throw new RuntimeException("Stub!"); }",
|
||
|
"public void foo() { throw new RuntimeException("Stub!"); }",
|
||
|
"public void bar() { throw new RuntimeException("Stub!"); }",
|
||
|
"}"
|
||
|
]
|
||
|
sort_methods(l) returns
|
||
|
[
|
||
|
"package android.test;",
|
||
|
"public static final int ORANGE = 1;",
|
||
|
"public class TestClass {",
|
||
|
"public TestClass() { throw new RuntimeException("Stub!"); }",
|
||
|
"public void bar() { throw new RuntimeException("Stub!"); }",
|
||
|
"public void foo() { throw new RuntimeException("Stub!"); }",
|
||
|
"}"
|
||
|
]
|
||
|
|
||
|
Args:
|
||
|
lines: List of strings consisted of Java file contents.
|
||
|
|
||
|
Returns:
|
||
|
A list of string with sorted class methods.
|
||
|
|
||
|
"""
|
||
|
def is_not_blank(l: str) -> bool:
|
||
|
return bool(l) and not l.isspace()
|
||
|
|
||
|
ret = list()
|
||
|
|
||
|
in_class = False
|
||
|
buffer = list()
|
||
|
for line in lines:
|
||
|
if not in_class:
|
||
|
if "class" in line:
|
||
|
in_class = True
|
||
|
ret.append(line)
|
||
|
else:
|
||
|
# Adding static variables, package info, etc.
|
||
|
# Skipping empty or space lines.
|
||
|
if is_not_blank(line):
|
||
|
ret.append(line)
|
||
|
else:
|
||
|
# End of class
|
||
|
if line and line[0] == "}":
|
||
|
in_class = False
|
||
|
ret.extend(sorted(buffer))
|
||
|
buffer = list()
|
||
|
ret.append(line)
|
||
|
else:
|
||
|
if is_not_blank(line):
|
||
|
buffer.append(line)
|
||
|
|
||
|
return ret
|
||
|
|
||
|
def get_file_contents(file_path: str) -> List[str]:
|
||
|
lines = list()
|
||
|
with open(file_path) as f:
|
||
|
lines = [line.rstrip('\n') for line in f]
|
||
|
f.close()
|
||
|
return lines
|
||
|
|
||
|
def pprint(l: List[str]) -> None:
|
||
|
for line in l:
|
||
|
print(line)
|
||
|
|
||
|
def write_lines(out_dir: str, lines: List[str]) -> None:
|
||
|
with open(out_dir, "a") as f:
|
||
|
f.writelines(line + '\n' for line in lines)
|
||
|
f.write("\n")
|
||
|
f.close()
|
||
|
|
||
|
def dir_exists(dir: str) -> bool:
|
||
|
return Path(dir).exists()
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
parser = argparse.ArgumentParser()
|
||
|
parser.add_argument('first_dir', action='store', type=str,
|
||
|
help="first path to compare file directory and contents")
|
||
|
parser.add_argument('second_dir', action='store', type=str,
|
||
|
help="second path to compare file directory and contents")
|
||
|
parser.add_argument('--out', dest='out_dir',
|
||
|
action='store', default="", type=str,
|
||
|
help="optional directory to write log. If not set, will print to console")
|
||
|
parser.add_argument('--show-diff-file', dest='show_diff',
|
||
|
action=argparse.BooleanOptionalAction,
|
||
|
help="optional flag. If passed, will print out the content of the file unique to each directories")
|
||
|
parser.add_argument('--include-common', dest='include_common',
|
||
|
action=argparse.BooleanOptionalAction,
|
||
|
help="optional flag. If passed, will print out the contents common to both files as well,\
|
||
|
instead of printing only diff lines.")
|
||
|
parser.add_argument('--skip-words', nargs='+',
|
||
|
dest='skip_words', default=[], help="optional words to skip in comparison")
|
||
|
|
||
|
args = parser.parse_args()
|
||
|
|
||
|
if not args.first_dir or not args.second_dir:
|
||
|
parser.print_usage()
|
||
|
exit(0)
|
||
|
|
||
|
analyzer = FilesDiffAnalyzer(args)
|
||
|
analyzer.analyze()
|