platform_bionic/libc/tools/generate-NOTICE.py

#!/usr/bin/env python
# Run with directory arguments from any directory, with no special setup required.

import ftplib
import hashlib
import os
import re
import shutil
import string
import subprocess
import sys
import tarfile
import tempfile

VERBOSE = False

def warn(s):
    sys.stderr.write("warning: %s\n" % s)

def warn_verbose(s):
    if VERBOSE:
        warn(s)

def is_interesting(path):
    path = path.lower()
    uninteresting_extensions = [
        ".bp",
        ".map",
        ".mk",
        ".py",
        ".pyc",
        ".swp",
        ".txt",
    ]
    if os.path.splitext(path)[1] in uninteresting_extensions:
        return False
    if path.endswith("/notice") or path.endswith("/readme"):
        return False
    return True

def is_auto_generated(content):
    if "Generated by gensyscalls.py" in content or "generated by genserv.py" in content:
        return True
    if "This header was automatically generated from a Linux kernel header" in content:
        return True
    return False

copyrights = set()

def extract_copyright_at(lines, i):
    hash = lines[i].startswith("#")

    # Do we need to back up to find the start of the copyright header?
    start = i
    if not hash:
        while start > 0:
            if "/*" in lines[start - 1]:
                break
            start -= 1

    # Read comment lines until we hit something that terminates a
    # copyright header.
    while i < len(lines):
        if "*/" in lines[i]:
            break
        if hash and len(lines[i]) == 0:
            break
        if "\t@(#)" in lines[i] or "\tfrom: @(#)" in lines[i] or "From: @(#)" in lines[i] or "from OpenBSD:" in lines[i]:
            break
        if "\tcitrus Id: " in lines[i]:
            break
        if "\t$Citrus: " in lines[i] or "\t$OpenBSD: " in lines[i] or " $FreeBSD: " in lines[i] or "\t$NetBSD: " in lines[i]:
            break
        if "$FreeBSD$" in lines[i] or "$Citrus$" in lines[i]:
            break
        # OpenBSD likes to say where stuff originally came from:
        if "Original version ID:" in lines[i]:
            break
        i += 1

    end = i

    # Trim trailing cruft.
    while end > 0:
        if lines[end - 1] != " *" and lines[end - 1] != " * ====================================================":
            break
        end -= 1

    # Remove C/assembler comment formatting, pulling out just the text.
    clean_lines = []
    for line in lines[start:end]:
        line = line.replace("\t", "    ")
        line = line.replace("/* ", "")
        line = re.sub("^ \* ", "", line)
        line = line.replace("** ", "")
        line = line.replace("# ", "")
        if "SPDX-License-Identifier:" in line:
            continue
        if line.startswith("++Copyright++"):
            continue
        line = line.replace("--Copyright--", "")
        line = line.rstrip()
        # These come last and take care of "blank" comment lines.
        if line == "#" or line == " *" or line == "**" or line == "-":
            line = ""
        clean_lines.append(line)

    # Trim blank lines from head and tail.
    while clean_lines[0] == "":
        clean_lines = clean_lines[1:]
    while clean_lines[len(clean_lines) - 1] == "":
        clean_lines = clean_lines[0:(len(clean_lines) - 1)]

    copyright = "\n".join(clean_lines)
    copyrights.add(copyright)

    return i


def do_file(path):
    with open(path, "r") as the_file:
        try:
            content = open(path, "r").read().decode("utf-8")
        except UnicodeDecodeError:
            warn("bad UTF-8 in %s" % path)
            content = open(path, "r").read().decode("iso-8859-1")

    lines = content.split("\n")

    if len(lines) <= 4:
        warn_verbose("ignoring short file %s" % path)
        return

    if is_auto_generated(content):
        warn_verbose("ignoring auto-generated file %s" % path)
        return

    if not "Copyright" in content:
        if "public domain" in content.lower():
            warn("ignoring public domain file %s" % path)
            return
        warn('no copyright notice found in "%s" (%d lines)' % (path, len(lines)))
        return

    # Manually iterate because extract_copyright_at tells us how many lines to skip.
    i = 0
    while i < len(lines):
        if "Copyright" in lines[i] and not "@(#) Copyright" in lines[i]:
            i = extract_copyright_at(lines, i)
        else:
            i += 1


def do_dir(path):
    for directory, sub_directories, filenames in os.walk(arg):
        if ".git" in sub_directories:
            sub_directories.remove(".git")
        sub_directories = sorted(sub_directories)

        for filename in sorted(filenames):
            path = os.path.join(directory, filename)
            if is_interesting(path):
                do_file(path)


args = sys.argv[1:]
if len(args) == 0:
    args = [ "." ]

for arg in args:
    if os.path.isdir(arg):
        do_dir(arg)
    else:
        do_file(arg)

for copyright in sorted(copyrights):
    print copyright.encode("utf-8")
    print
    print "-------------------------------------------------------------------"
    print

sys.exit(0)
Use env to invoke python /usr/bin/python may be python3. We should respect PATH to find the python executable so it can be locally overridden to be python2. Test: Build libc, repo upload Change-Id: Iaddd7cd4a1c2177c32786e4fa0fc664ab0ad36de 2017-09-01 00:08:26 +02:00			`#!/usr/bin/env python`
Auto-generate a complete NOTICE file. Remove the hand-collated ones, and switch to a script that pulls the copyright headers out of every file and collects the unique ones. Change-Id: Ied3b98b3f56241df97166c410ff81de4e0157c9d 2012-08-10 00:17:46 +02:00			`# Run with directory arguments from any directory, with no special setup required.`

			`import ftplib`
			`import hashlib`
			`import os`
			`import re`
			`import shutil`
			`import string`
			`import subprocess`
			`import sys`
			`import tarfile`
			`import tempfile`

Allow passing filenames to generate-NOTICE.py. For the libandroid_support NOTICE file, we need to combine all the files in that directory, plus the specific files pulled from bionic. Also cleaned up some of the Python style. Bug: N/A Test: used for libandroid_support Change-Id: If433e3a0f0478f06d99a9b3556e99dde06a7e5e1 2017-07-14 19:00:32 +02:00			`VERBOSE = False`

			`def warn(s):`
			`sys.stderr.write("warning: %s\n" % s)`

			`def warn_verbose(s):`
			`if VERBOSE:`
			`warn(s)`
Auto-generate a complete NOTICE file. Remove the hand-collated ones, and switch to a script that pulls the copyright headers out of every file and collects the unique ones. Change-Id: Ied3b98b3f56241df97166c410ff81de4e0157c9d 2012-08-10 00:17:46 +02:00
Allow passing filenames to generate-NOTICE.py. For the libandroid_support NOTICE file, we need to combine all the files in that directory, plus the specific files pulled from bionic. Also cleaned up some of the Python style. Bug: N/A Test: used for libandroid_support Change-Id: If433e3a0f0478f06d99a9b3556e99dde06a7e5e1 2017-07-14 19:00:32 +02:00			`def is_interesting(path):`
			`path = path.lower()`
			`uninteresting_extensions = [`
			`".bp",`
			`".map",`
			`".mk",`
			`".py",`
			`".pyc",`
			`".swp",`
			`".txt",`
			`]`
			`if os.path.splitext(path)[1] in uninteresting_extensions:`
			`return False`
			`if path.endswith("/notice") or path.endswith("/readme"):`
			`return False`
			`return True`

			`def is_auto_generated(content):`
Bring the generate-NOTICE script up to date. Change-Id: I63981a2f3cb0a4d7ee7e1a2b4ce00f77898ac25b 2014-03-07 00:10:22 +01:00			`if "Generated by gensyscalls.py" in content or "generated by genserv.py" in content:`
Auto-generate a complete NOTICE file. Remove the hand-collated ones, and switch to a script that pulls the copyright headers out of every file and collects the unique ones. Change-Id: Ied3b98b3f56241df97166c410ff81de4e0157c9d 2012-08-10 00:17:46 +02:00			`return True`
			`if "This header was automatically generated from a Linux kernel header" in content:`
			`return True`
			`return False`

			`copyrights = set()`

Allow passing filenames to generate-NOTICE.py. For the libandroid_support NOTICE file, we need to combine all the files in that directory, plus the specific files pulled from bionic. Also cleaned up some of the Python style. Bug: N/A Test: used for libandroid_support Change-Id: If433e3a0f0478f06d99a9b3556e99dde06a7e5e1 2017-07-14 19:00:32 +02:00			`def extract_copyright_at(lines, i):`
Auto-generate a complete NOTICE file. Remove the hand-collated ones, and switch to a script that pulls the copyright headers out of every file and collects the unique ones. Change-Id: Ied3b98b3f56241df97166c410ff81de4e0157c9d 2012-08-10 00:17:46 +02:00			`hash = lines[i].startswith("#")`

Sort NOTICE entries. Also make sure we get the whole of any copyright header, in case there's text before the "Copyright" line. Change-Id: Iabcc5e0931a39c0107b833539fec7c5a3d134592 2012-08-15 00:04:05 +02:00			`# Do we need to back up to find the start of the copyright header?`
			`start = i`
			`if not hash:`
			`while start > 0:`
			`if "/*" in lines[start - 1]:`
			`break`
			`start -= 1`

Auto-generate a complete NOTICE file. Remove the hand-collated ones, and switch to a script that pulls the copyright headers out of every file and collects the unique ones. Change-Id: Ied3b98b3f56241df97166c410ff81de4e0157c9d 2012-08-10 00:17:46 +02:00			`# Read comment lines until we hit something that terminates a`
			`# copyright header.`
			`while i < len(lines):`
			`if "*/" in lines[i]:`
			`break`
			`if hash and len(lines[i]) == 0:`
			`break`
			`if "\t@(#)" in lines[i] or "\tfrom: @(#)" in lines[i] or "From: @(#)" in lines[i] or "from OpenBSD:" in lines[i]:`
			`break`
			`if "\tcitrus Id: " in lines[i]:`
			`break`
Update the NOTICE files and improve the script slightly. Change-Id: Ia131634cd97ca9523e1b63dce41f66247cbaa311 2014-05-05 23:58:17 +02:00			`if "\t$Citrus: " in lines[i] or "\t$OpenBSD: " in lines[i] or " $FreeBSD: " in lines[i] or "\t$NetBSD: " in lines[i]:`
Auto-generate a complete NOTICE file. Remove the hand-collated ones, and switch to a script that pulls the copyright headers out of every file and collects the unique ones. Change-Id: Ied3b98b3f56241df97166c410ff81de4e0157c9d 2012-08-10 00:17:46 +02:00			`break`
			`if "$FreeBSD$" in lines[i] or "$Citrus$" in lines[i]:`
			`break`
Update the NOTICE files and improve the script slightly. Change-Id: Ia131634cd97ca9523e1b63dce41f66247cbaa311 2014-05-05 23:58:17 +02:00			`# OpenBSD likes to say where stuff originally came from:`
			`if "Original version ID:" in lines[i]:`
			`break`
Auto-generate a complete NOTICE file. Remove the hand-collated ones, and switch to a script that pulls the copyright headers out of every file and collects the unique ones. Change-Id: Ied3b98b3f56241df97166c410ff81de4e0157c9d 2012-08-10 00:17:46 +02:00			`i += 1`

			`end = i`

			`# Trim trailing cruft.`
			`while end > 0:`
			`if lines[end - 1] != " " and lines[end - 1] != " ====================================================":`
			`break`
			`end -= 1`

			`# Remove C/assembler comment formatting, pulling out just the text.`
			`clean_lines = []`
			`for line in lines[start:end]:`
			`line = line.replace("\t", " ")`
			`line = line.replace("/* ", "")`
Fix a couple of bugs in generate-NOTICE and regenerate the NOTICE files. Change-Id: Id6fcb74292e661504d0758bfce24abdc18cb8d32 2014-07-23 06:24:47 +02:00			`line = re.sub("^ \* ", "", line)`
Auto-generate a complete NOTICE file. Remove the hand-collated ones, and switch to a script that pulls the copyright headers out of every file and collects the unique ones. Change-Id: Ied3b98b3f56241df97166c410ff81de4e0157c9d 2012-08-10 00:17:46 +02:00			`line = line.replace("** ", "")`
			`line = line.replace("# ", "")`
Update to FreeBSD libm r336665. This reverts commit 253a8306316cedfd6fd3e3a169fbffe4cac04035 and moves us forward to a revision that contains fixes for the problem with the previous attempt. This also makes sincos(3)/sincosf(3)/sincosl(3) available to `_BSD_SOURCE` as well as `_GNU_SOURCE`. The new FreeBSD libm code requires the FreeBSD `__CONCAT` macro, and all our existing callers are FreeBSD too, so update that. There's also an assumption that <complex.h> drags in <math.h> which isn't true for us, so work around that with `-include` in the makefile. This then causes clang to recognize a bug -- returning from a void function -- in our fake (LP32) sincosl(3), so fix that too. Bug: http://b/111710419 Change-Id: I84703ad844f8afde6ec6b11604ab3c096ccb62c3 Test: ran tests 2018-07-24 02:01:52 +02:00			`if "SPDX-License-Identifier:" in line:`
			`continue`
Auto-generate a complete NOTICE file. Remove the hand-collated ones, and switch to a script that pulls the copyright headers out of every file and collects the unique ones. Change-Id: Ied3b98b3f56241df97166c410ff81de4e0157c9d 2012-08-10 00:17:46 +02:00			`if line.startswith("++Copyright++"):`
			`continue`
			`line = line.replace("--Copyright--", "")`
			`line = line.rstrip()`
			`# These come last and take care of "blank" comment lines.`
			`if line == "#" or line == " " or line == "*" or line == "-":`
			`line = ""`
			`clean_lines.append(line)`

			`# Trim blank lines from head and tail.`
			`while clean_lines[0] == "":`
			`clean_lines = clean_lines[1:]`
			`while clean_lines[len(clean_lines) - 1] == "":`
			`clean_lines = clean_lines[0:(len(clean_lines) - 1)]`

			`copyright = "\n".join(clean_lines)`
			`copyrights.add(copyright)`

			`return i`


Allow passing filenames to generate-NOTICE.py. For the libandroid_support NOTICE file, we need to combine all the files in that directory, plus the specific files pulled from bionic. Also cleaned up some of the Python style. Bug: N/A Test: used for libandroid_support Change-Id: If433e3a0f0478f06d99a9b3556e99dde06a7e5e1 2017-07-14 19:00:32 +02:00			`def do_file(path):`
			`with open(path, "r") as the_file:`
			`try:`
			`content = open(path, "r").read().decode("utf-8")`
			`except UnicodeDecodeError:`
			`warn("bad UTF-8 in %s" % path)`
			`content = open(path, "r").read().decode("iso-8859-1")`

			`lines = content.split("\n")`

			`if len(lines) <= 4:`
			`warn_verbose("ignoring short file %s" % path)`
			`return`
Auto-generate a complete NOTICE file. Remove the hand-collated ones, and switch to a script that pulls the copyright headers out of every file and collects the unique ones. Change-Id: Ied3b98b3f56241df97166c410ff81de4e0157c9d 2012-08-10 00:17:46 +02:00
Allow passing filenames to generate-NOTICE.py. For the libandroid_support NOTICE file, we need to combine all the files in that directory, plus the specific files pulled from bionic. Also cleaned up some of the Python style. Bug: N/A Test: used for libandroid_support Change-Id: If433e3a0f0478f06d99a9b3556e99dde06a7e5e1 2017-07-14 19:00:32 +02:00			`if is_auto_generated(content):`
			`warn_verbose("ignoring auto-generated file %s" % path)`
			`return`

			`if not "Copyright" in content:`
			`if "public domain" in content.lower():`
			`warn("ignoring public domain file %s" % path)`
			`return`
			`warn('no copyright notice found in "%s" (%d lines)' % (path, len(lines)))`
			`return`

			`# Manually iterate because extract_copyright_at tells us how many lines to skip.`
			`i = 0`
			`while i < len(lines):`
			`if "Copyright" in lines[i] and not "@(#) Copyright" in lines[i]:`
			`i = extract_copyright_at(lines, i)`
			`else:`
			`i += 1`


			`def do_dir(path):`
Auto-generate a complete NOTICE file. Remove the hand-collated ones, and switch to a script that pulls the copyright headers out of every file and collects the unique ones. Change-Id: Ied3b98b3f56241df97166c410ff81de4e0157c9d 2012-08-10 00:17:46 +02:00			`for directory, sub_directories, filenames in os.walk(arg):`
			`if ".git" in sub_directories:`
			`sub_directories.remove(".git")`
			`sub_directories = sorted(sub_directories)`

			`for filename in sorted(filenames):`
			`path = os.path.join(directory, filename)`
Allow passing filenames to generate-NOTICE.py. For the libandroid_support NOTICE file, we need to combine all the files in that directory, plus the specific files pulled from bionic. Also cleaned up some of the Python style. Bug: N/A Test: used for libandroid_support Change-Id: If433e3a0f0478f06d99a9b3556e99dde06a7e5e1 2017-07-14 19:00:32 +02:00			`if is_interesting(path):`
			`do_file(path)`
Auto-generate a complete NOTICE file. Remove the hand-collated ones, and switch to a script that pulls the copyright headers out of every file and collects the unique ones. Change-Id: Ied3b98b3f56241df97166c410ff81de4e0157c9d 2012-08-10 00:17:46 +02:00

Allow passing filenames to generate-NOTICE.py. For the libandroid_support NOTICE file, we need to combine all the files in that directory, plus the specific files pulled from bionic. Also cleaned up some of the Python style. Bug: N/A Test: used for libandroid_support Change-Id: If433e3a0f0478f06d99a9b3556e99dde06a7e5e1 2017-07-14 19:00:32 +02:00			`args = sys.argv[1:]`
			`if len(args) == 0:`
			`args = [ "." ]`
Auto-generate a complete NOTICE file. Remove the hand-collated ones, and switch to a script that pulls the copyright headers out of every file and collects the unique ones. Change-Id: Ied3b98b3f56241df97166c410ff81de4e0157c9d 2012-08-10 00:17:46 +02:00
Allow passing filenames to generate-NOTICE.py. For the libandroid_support NOTICE file, we need to combine all the files in that directory, plus the specific files pulled from bionic. Also cleaned up some of the Python style. Bug: N/A Test: used for libandroid_support Change-Id: If433e3a0f0478f06d99a9b3556e99dde06a7e5e1 2017-07-14 19:00:32 +02:00			`for arg in args:`
			`if os.path.isdir(arg):`
			`do_dir(arg)`
			`else:`
			`do_file(arg)`
Auto-generate a complete NOTICE file. Remove the hand-collated ones, and switch to a script that pulls the copyright headers out of every file and collects the unique ones. Change-Id: Ied3b98b3f56241df97166c410ff81de4e0157c9d 2012-08-10 00:17:46 +02:00
Sort NOTICE entries. Also make sure we get the whole of any copyright header, in case there's text before the "Copyright" line. Change-Id: Iabcc5e0931a39c0107b833539fec7c5a3d134592 2012-08-15 00:04:05 +02:00			`for copyright in sorted(copyrights):`
Allow passing filenames to generate-NOTICE.py. For the libandroid_support NOTICE file, we need to combine all the files in that directory, plus the specific files pulled from bionic. Also cleaned up some of the Python style. Bug: N/A Test: used for libandroid_support Change-Id: If433e3a0f0478f06d99a9b3556e99dde06a7e5e1 2017-07-14 19:00:32 +02:00			`print copyright.encode("utf-8")`
Auto-generate a complete NOTICE file. Remove the hand-collated ones, and switch to a script that pulls the copyright headers out of every file and collects the unique ones. Change-Id: Ied3b98b3f56241df97166c410ff81de4e0157c9d 2012-08-10 00:17:46 +02:00			`print`
Allow passing filenames to generate-NOTICE.py. For the libandroid_support NOTICE file, we need to combine all the files in that directory, plus the specific files pulled from bionic. Also cleaned up some of the Python style. Bug: N/A Test: used for libandroid_support Change-Id: If433e3a0f0478f06d99a9b3556e99dde06a7e5e1 2017-07-14 19:00:32 +02:00			`print "-------------------------------------------------------------------"`
Auto-generate a complete NOTICE file. Remove the hand-collated ones, and switch to a script that pulls the copyright headers out of every file and collects the unique ones. Change-Id: Ied3b98b3f56241df97166c410ff81de4e0157c9d 2012-08-10 00:17:46 +02:00			`print`

			`sys.exit(0)`