platform_bionic/libc/tools/generate-NOTICE.py

#!/usr/bin/python
# Run with directory arguments from any directory, with no special setup required.
# Or:
# for i in libc libdl libm linker libstdc++ libthread_db ; do ./libc/tools/generate-NOTICE.py $i > $i/NOTICE ; done

import ftplib
import hashlib
import os
import re
import shutil
import string
import subprocess
import sys
import tarfile
import tempfile

def IsUninteresting(path):
    path = path.lower()
    if path.endswith(".mk") or path.endswith(".py") or path.endswith(".pyc") or path.endswith(".txt") or path.endswith(".3"):
        return True
    if path.endswith("/notice") or path.endswith("/readme") or path.endswith("/caveats"):
        return True
    if path.endswith("/tzdata") or path.endswith("/zoneinfo/generate"):
        return True
    return False

def IsAutoGenerated(content):
    if "Generated by gensyscalls.py" in content or "generated by genserv.py" in content:
        return True
    if "This header was automatically generated from a Linux kernel header" in content:
        return True
    return False

copyrights = set()

def ExtractCopyrightAt(lines, i):
    hash = lines[i].startswith("#")

    # Do we need to back up to find the start of the copyright header?
    start = i
    if not hash:
        while start > 0:
            if "/*" in lines[start - 1]:
                break
            start -= 1

    # Read comment lines until we hit something that terminates a
    # copyright header.
    while i < len(lines):
        if "*/" in lines[i]:
            break
        if hash and len(lines[i]) == 0:
            break
        if "\t@(#)" in lines[i] or "\tfrom: @(#)" in lines[i] or "From: @(#)" in lines[i] or "from OpenBSD:" in lines[i]:
            break
        if "\tcitrus Id: " in lines[i]:
            break
        if "\t$Citrus: " in lines[i] or "\t$OpenBSD: " in lines[i] or " $FreeBSD: " in lines[i] or "\t$NetBSD: " in lines[i]:
            break
        if "$FreeBSD$" in lines[i] or "$Citrus$" in lines[i]:
            break
        # OpenBSD likes to say where stuff originally came from:
        if "Original version ID:" in lines[i]:
            break
        i += 1

    end = i

    # Trim trailing cruft.
    while end > 0:
        if lines[end - 1] != " *" and lines[end - 1] != " * ====================================================":
            break
        end -= 1

    # Remove C/assembler comment formatting, pulling out just the text.
    clean_lines = []
    for line in lines[start:end]:
        line = line.replace("\t", "    ")
        line = line.replace("/* ", "")
        line = line.replace(" * ", "")
        line = line.replace("** ", "")
        line = line.replace("# ", "")
        if line.startswith("++Copyright++"):
            continue
        line = line.replace("--Copyright--", "")
        line = line.rstrip()
        # These come last and take care of "blank" comment lines.
        if line == "#" or line == " *" or line == "**" or line == "-":
            line = ""
        clean_lines.append(line)

    # Trim blank lines from head and tail.
    while clean_lines[0] == "":
        clean_lines = clean_lines[1:]
    while clean_lines[len(clean_lines) - 1] == "":
        clean_lines = clean_lines[0:(len(clean_lines) - 1)]

    copyright = "\n".join(clean_lines)
    copyrights.add(copyright)

    return i

args = sys.argv[1:]
if len(args) == 0:
    args = [ "." ]

for arg in args:
    sys.stderr.write('Searching for source files in "%s"...\n' % arg)

    for directory, sub_directories, filenames in os.walk(arg):
        if ".git" in sub_directories:
            sub_directories.remove(".git")
        sub_directories = sorted(sub_directories)

        for filename in sorted(filenames):
            path = os.path.join(directory, filename)
            if IsUninteresting(path):
                #print "ignoring uninteresting file %s" % path
                continue

            try:
                content = open(path, 'r').read().decode('utf-8')
            except:
                # TODO: update hash.h, md5.c, and md5.h; upstream is probably UTF-8 already.
                sys.stderr.write('warning: bad UTF-8 in %s\n' % path)
                content = open(path, 'r').read().decode('iso-8859-1')

            lines = content.split("\n")

            if len(lines) <= 4:
                #print "ignoring short file %s" % path
                continue

            if IsAutoGenerated(content):
                #print "ignoring auto-generated file %s" % path
                continue

            if not "Copyright" in content:
                if "public domain" in content.lower():
                    #print "ignoring public domain file %s" % path
                    continue
                sys.stderr.write('warning: no copyright notice found in "%s" (%d lines)\n' % (path, len(lines)))
                continue

            i = 0
            while i < len(lines):
                if "Copyright" in lines[i]:
                    i = ExtractCopyrightAt(lines, i)
                i += 1

            #print path

for copyright in sorted(copyrights):
    print copyright.encode('utf-8')
    print
    print '-------------------------------------------------------------------'
    print

sys.exit(0)
Auto-generate a complete NOTICE file. Remove the hand-collated ones, and switch to a script that pulls the copyright headers out of every file and collects the unique ones. Change-Id: Ied3b98b3f56241df97166c410ff81de4e0157c9d 2012-08-10 00:17:46 +02:00			`#!/usr/bin/python`
			`# Run with directory arguments from any directory, with no special setup required.`
Update libc/NOTICE and record the incantation. Change-Id: I0673d6263de2c5a21cd549f18c0648d0fa4bb40f 2012-09-14 01:51:57 +02:00			`# Or:`
			`# for i in libc libdl libm linker libstdc++ libthread_db ; do ./libc/tools/generate-NOTICE.py $i > $i/NOTICE ; done`
Auto-generate a complete NOTICE file. Remove the hand-collated ones, and switch to a script that pulls the copyright headers out of every file and collects the unique ones. Change-Id: Ied3b98b3f56241df97166c410ff81de4e0157c9d 2012-08-10 00:17:46 +02:00
			`import ftplib`
			`import hashlib`
			`import os`
			`import re`
			`import shutil`
			`import string`
			`import subprocess`
			`import sys`
			`import tarfile`
			`import tempfile`

			`def IsUninteresting(path):`
			`path = path.lower()`
			`if path.endswith(".mk") or path.endswith(".py") or path.endswith(".pyc") or path.endswith(".txt") or path.endswith(".3"):`
			`return True`
			`if path.endswith("/notice") or path.endswith("/readme") or path.endswith("/caveats"):`
			`return True`
Update generate-NOTICE.py to know about tzdata. Also regenerate the NOTICE files. One had slipped behind. Bug: 7012465 Change-Id: Ice2e909b521472eb1acd53d8151038bebe19984a 2012-10-19 23:55:19 +02:00			`if path.endswith("/tzdata") or path.endswith("/zoneinfo/generate"):`
Auto-generate a complete NOTICE file. Remove the hand-collated ones, and switch to a script that pulls the copyright headers out of every file and collects the unique ones. Change-Id: Ied3b98b3f56241df97166c410ff81de4e0157c9d 2012-08-10 00:17:46 +02:00			`return True`
			`return False`

			`def IsAutoGenerated(content):`
Bring the generate-NOTICE script up to date. Change-Id: I63981a2f3cb0a4d7ee7e1a2b4ce00f77898ac25b 2014-03-07 00:10:22 +01:00			`if "Generated by gensyscalls.py" in content or "generated by genserv.py" in content:`
Auto-generate a complete NOTICE file. Remove the hand-collated ones, and switch to a script that pulls the copyright headers out of every file and collects the unique ones. Change-Id: Ied3b98b3f56241df97166c410ff81de4e0157c9d 2012-08-10 00:17:46 +02:00			`return True`
			`if "This header was automatically generated from a Linux kernel header" in content:`
			`return True`
			`return False`

			`copyrights = set()`

			`def ExtractCopyrightAt(lines, i):`
			`hash = lines[i].startswith("#")`

Sort NOTICE entries. Also make sure we get the whole of any copyright header, in case there's text before the "Copyright" line. Change-Id: Iabcc5e0931a39c0107b833539fec7c5a3d134592 2012-08-15 00:04:05 +02:00			`# Do we need to back up to find the start of the copyright header?`
			`start = i`
			`if not hash:`
			`while start > 0:`
			`if "/*" in lines[start - 1]:`
			`break`
			`start -= 1`

Auto-generate a complete NOTICE file. Remove the hand-collated ones, and switch to a script that pulls the copyright headers out of every file and collects the unique ones. Change-Id: Ied3b98b3f56241df97166c410ff81de4e0157c9d 2012-08-10 00:17:46 +02:00			`# Read comment lines until we hit something that terminates a`
			`# copyright header.`
			`while i < len(lines):`
			`if "*/" in lines[i]:`
			`break`
			`if hash and len(lines[i]) == 0:`
			`break`
			`if "\t@(#)" in lines[i] or "\tfrom: @(#)" in lines[i] or "From: @(#)" in lines[i] or "from OpenBSD:" in lines[i]:`
			`break`
			`if "\tcitrus Id: " in lines[i]:`
			`break`
Update the NOTICE files and improve the script slightly. Change-Id: Ia131634cd97ca9523e1b63dce41f66247cbaa311 2014-05-05 23:58:17 +02:00			`if "\t$Citrus: " in lines[i] or "\t$OpenBSD: " in lines[i] or " $FreeBSD: " in lines[i] or "\t$NetBSD: " in lines[i]:`
Auto-generate a complete NOTICE file. Remove the hand-collated ones, and switch to a script that pulls the copyright headers out of every file and collects the unique ones. Change-Id: Ied3b98b3f56241df97166c410ff81de4e0157c9d 2012-08-10 00:17:46 +02:00			`break`
			`if "$FreeBSD$" in lines[i] or "$Citrus$" in lines[i]:`
			`break`
Update the NOTICE files and improve the script slightly. Change-Id: Ia131634cd97ca9523e1b63dce41f66247cbaa311 2014-05-05 23:58:17 +02:00			`# OpenBSD likes to say where stuff originally came from:`
			`if "Original version ID:" in lines[i]:`
			`break`
Auto-generate a complete NOTICE file. Remove the hand-collated ones, and switch to a script that pulls the copyright headers out of every file and collects the unique ones. Change-Id: Ied3b98b3f56241df97166c410ff81de4e0157c9d 2012-08-10 00:17:46 +02:00			`i += 1`

			`end = i`

			`# Trim trailing cruft.`
			`while end > 0:`
			`if lines[end - 1] != " " and lines[end - 1] != " ====================================================":`
			`break`
			`end -= 1`

			`# Remove C/assembler comment formatting, pulling out just the text.`
			`clean_lines = []`
			`for line in lines[start:end]:`
			`line = line.replace("\t", " ")`
			`line = line.replace("/* ", "")`
			`line = line.replace(" * ", "")`
			`line = line.replace("** ", "")`
			`line = line.replace("# ", "")`
			`if line.startswith("++Copyright++"):`
			`continue`
			`line = line.replace("--Copyright--", "")`
			`line = line.rstrip()`
			`# These come last and take care of "blank" comment lines.`
			`if line == "#" or line == " " or line == "*" or line == "-":`
			`line = ""`
			`clean_lines.append(line)`

			`# Trim blank lines from head and tail.`
			`while clean_lines[0] == "":`
			`clean_lines = clean_lines[1:]`
			`while clean_lines[len(clean_lines) - 1] == "":`
			`clean_lines = clean_lines[0:(len(clean_lines) - 1)]`

			`copyright = "\n".join(clean_lines)`
			`copyrights.add(copyright)`

			`return i`

			`args = sys.argv[1:]`
			`if len(args) == 0:`
			`args = [ "." ]`

			`for arg in args:`
			`sys.stderr.write('Searching for source files in "%s"...\n' % arg)`

			`for directory, sub_directories, filenames in os.walk(arg):`
			`if ".git" in sub_directories:`
			`sub_directories.remove(".git")`
			`sub_directories = sorted(sub_directories)`

			`for filename in sorted(filenames):`
			`path = os.path.join(directory, filename)`
			`if IsUninteresting(path):`
			`#print "ignoring uninteresting file %s" % path`
			`continue`

			`try:`
			`content = open(path, 'r').read().decode('utf-8')`
			`except:`
			`# TODO: update hash.h, md5.c, and md5.h; upstream is probably UTF-8 already.`
			`sys.stderr.write('warning: bad UTF-8 in %s\n' % path)`
			`content = open(path, 'r').read().decode('iso-8859-1')`

			`lines = content.split("\n")`

			`if len(lines) <= 4:`
			`#print "ignoring short file %s" % path`
			`continue`

			`if IsAutoGenerated(content):`
			`#print "ignoring auto-generated file %s" % path`
			`continue`

			`if not "Copyright" in content:`
			`if "public domain" in content.lower():`
			`#print "ignoring public domain file %s" % path`
			`continue`
			`sys.stderr.write('warning: no copyright notice found in "%s" (%d lines)\n' % (path, len(lines)))`
			`continue`

			`i = 0`
			`while i < len(lines):`
			`if "Copyright" in lines[i]:`
			`i = ExtractCopyrightAt(lines, i)`
			`i += 1`

			`#print path`

Sort NOTICE entries. Also make sure we get the whole of any copyright header, in case there's text before the "Copyright" line. Change-Id: Iabcc5e0931a39c0107b833539fec7c5a3d134592 2012-08-15 00:04:05 +02:00			`for copyright in sorted(copyrights):`
Auto-generate a complete NOTICE file. Remove the hand-collated ones, and switch to a script that pulls the copyright headers out of every file and collects the unique ones. Change-Id: Ied3b98b3f56241df97166c410ff81de4e0157c9d 2012-08-10 00:17:46 +02:00			`print copyright.encode('utf-8')`
			`print`
			`print '-------------------------------------------------------------------'`
			`print`

			`sys.exit(0)`