208 lines
5.1 KiB
Python
Executable file
208 lines
5.1 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
# Run with directory arguments from any directory, with no special setup
|
|
# required.
|
|
|
|
import os
|
|
from pathlib import Path
|
|
import re
|
|
import sys
|
|
from typing import Sequence
|
|
|
|
VERBOSE = False
|
|
|
|
copyrights = set()
|
|
|
|
|
|
def warn(s):
|
|
sys.stderr.write("warning: %s\n" % s)
|
|
|
|
|
|
def warn_verbose(s):
|
|
if VERBOSE:
|
|
warn(s)
|
|
|
|
|
|
def is_interesting(path_str: str) -> bool:
|
|
path = Path(path_str.lower())
|
|
uninteresting_extensions = [
|
|
".bp",
|
|
".map",
|
|
".md",
|
|
".mk",
|
|
".py",
|
|
".pyc",
|
|
".swp",
|
|
".txt",
|
|
]
|
|
if path.suffix in uninteresting_extensions:
|
|
return False
|
|
if path.name in {"notice", "readme", "pylintrc"}:
|
|
return False
|
|
# Backup files for some editors.
|
|
if path.match("*~"):
|
|
return False
|
|
return True
|
|
|
|
|
|
def is_auto_generated(content):
|
|
if "Generated by gensyscalls.py" in content or "generated by genserv.py" in content:
|
|
return True
|
|
if "This header was automatically generated from a Linux kernel header" in content:
|
|
return True
|
|
return False
|
|
|
|
|
|
def is_copyright_end(line: str, first_line_was_hash: bool) -> bool:
|
|
endings = [
|
|
" $FreeBSD: ",
|
|
"$Citrus$",
|
|
"$FreeBSD$",
|
|
"*/",
|
|
"From: @(#)",
|
|
# OpenBSD likes to say where stuff originally came from:
|
|
"Original version ID:",
|
|
"\t$Citrus: ",
|
|
"\t$NetBSD: ",
|
|
"\t$OpenBSD: ",
|
|
"\t@(#)",
|
|
"\tcitrus Id: ",
|
|
"\tfrom: @(#)",
|
|
"from OpenBSD:",
|
|
]
|
|
if first_line_was_hash and not line:
|
|
return True
|
|
|
|
for ending in endings:
|
|
if ending in line:
|
|
return True
|
|
|
|
return False
|
|
|
|
|
|
def extract_copyright_at(lines: Sequence[str], i: int) -> int:
|
|
first_line_was_hash = lines[i].startswith("#")
|
|
|
|
# Do we need to back up to find the start of the copyright header?
|
|
start = i
|
|
if not first_line_was_hash:
|
|
while start > 0:
|
|
if "/*" in lines[start - 1]:
|
|
break
|
|
start -= 1
|
|
|
|
# Read comment lines until we hit something that terminates a
|
|
# copyright header.
|
|
while i < len(lines):
|
|
if is_copyright_end(lines[i], first_line_was_hash):
|
|
break
|
|
i += 1
|
|
|
|
end = i
|
|
|
|
# Trim trailing cruft.
|
|
while end > 0:
|
|
line = lines[end - 1]
|
|
if line not in {
|
|
" *", " * ===================================================="
|
|
}:
|
|
break
|
|
end -= 1
|
|
|
|
# Remove C/assembler comment formatting, pulling out just the text.
|
|
clean_lines = []
|
|
for line in lines[start:end]:
|
|
line = line.replace("\t", " ")
|
|
line = line.replace("/* ", "")
|
|
line = re.sub(r"^ \* ", "", line)
|
|
line = line.replace("** ", "")
|
|
line = line.replace("# ", "")
|
|
if "SPDX-License-Identifier:" in line:
|
|
continue
|
|
if line.startswith("++Copyright++"):
|
|
continue
|
|
line = line.replace("--Copyright--", "")
|
|
line = line.rstrip()
|
|
# These come last and take care of "blank" comment lines.
|
|
if line in {"#", " *", "**", "-"}:
|
|
line = ""
|
|
clean_lines.append(line)
|
|
|
|
# Trim blank lines from head and tail.
|
|
while clean_lines[0] == "":
|
|
clean_lines = clean_lines[1:]
|
|
while clean_lines[len(clean_lines) - 1] == "":
|
|
clean_lines = clean_lines[0:(len(clean_lines) - 1)]
|
|
|
|
copyrights.add("\n".join(clean_lines))
|
|
|
|
return i
|
|
|
|
|
|
def do_file(path: str) -> None:
|
|
raw = Path(path).read_bytes()
|
|
try:
|
|
content = raw.decode("utf-8")
|
|
except UnicodeDecodeError:
|
|
warn("bad UTF-8 in %s" % path)
|
|
content = raw.decode("iso-8859-1")
|
|
|
|
lines = content.split("\n")
|
|
|
|
if len(lines) <= 4:
|
|
warn_verbose("ignoring short file %s" % path)
|
|
return
|
|
|
|
if is_auto_generated(content):
|
|
warn_verbose("ignoring auto-generated file %s" % path)
|
|
return
|
|
|
|
if not "Copyright" in content:
|
|
if "public domain" in content.lower():
|
|
warn_verbose("ignoring public domain file %s" % path)
|
|
return
|
|
warn('no copyright notice found in "%s" (%d lines)' %
|
|
(path, len(lines)))
|
|
return
|
|
|
|
# Manually iterate because extract_copyright_at tells us how many lines to
|
|
# skip.
|
|
i = 0
|
|
while i < len(lines):
|
|
if "Copyright" in lines[i] and not "@(#) Copyright" in lines[i]:
|
|
i = extract_copyright_at(lines, i)
|
|
else:
|
|
i += 1
|
|
|
|
|
|
def do_dir(arg):
|
|
for directory, sub_directories, filenames in os.walk(arg):
|
|
if ".git" in sub_directories:
|
|
sub_directories.remove(".git")
|
|
sub_directories = sorted(sub_directories)
|
|
|
|
for filename in sorted(filenames):
|
|
path = os.path.join(directory, filename)
|
|
if is_interesting(path):
|
|
do_file(path)
|
|
|
|
|
|
def main() -> None:
|
|
args = sys.argv[1:]
|
|
if len(args) == 0:
|
|
args = ["."]
|
|
|
|
for arg in args:
|
|
if os.path.isdir(arg):
|
|
do_dir(arg)
|
|
else:
|
|
do_file(arg)
|
|
|
|
for notice in sorted(copyrights):
|
|
print(notice)
|
|
print()
|
|
print("-" * 67)
|
|
print()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|