Create reproducible NOTICE collections

Sort the lists of notice files instead of relying on the filesystem
ordering returned by os.walk

Bug: 23972440
Change-Id: Id32be9ce8e540244ad9d67b6fc7883fc2963fe5c
This commit is contained in:
Dan Willemsen 2015-11-10 12:37:23 -08:00
parent 8b2c37c953
commit 8ae4984c2c

View file

@ -99,7 +99,7 @@ def combine_notice_files_html(file_hash, input_dir, output_filename):
# most browsers, but href's to table row ids do) # most browsers, but href's to table row ids do)
id_table = {} id_table = {}
id_count = 0 id_count = 0
for value in file_hash.values(): for value in file_hash:
for filename in value: for filename in value:
id_table[filename] = id_count id_table[filename] = id_count
id_count += 1 id_count += 1
@ -116,7 +116,7 @@ def combine_notice_files_html(file_hash, input_dir, output_filename):
print >> output_file, "<ul>" print >> output_file, "<ul>"
# Flatten the list of lists into a single list of filenames # Flatten the list of lists into a single list of filenames
sorted_filenames = sorted(itertools.chain.from_iterable(file_hash.values())) sorted_filenames = sorted(itertools.chain.from_iterable(file_hash))
# Print out a nice table of contents # Print out a nice table of contents
for filename in sorted_filenames: for filename in sorted_filenames:
@ -127,11 +127,11 @@ def combine_notice_files_html(file_hash, input_dir, output_filename):
print >> output_file, "</div><!-- table of contents -->" print >> output_file, "</div><!-- table of contents -->"
# Output the individual notice file lists # Output the individual notice file lists
print >>output_file, '<table cellpadding="0" cellspacing="0" border="0">' print >>output_file, '<table cellpadding="0" cellspacing="0" border="0">'
for value in file_hash.values(): for value in file_hash:
print >> output_file, '<tr id="id%d"><td class="same-license">' % id_table.get(value[0]) print >> output_file, '<tr id="id%d"><td class="same-license">' % id_table.get(value[0])
print >> output_file, '<div class="label">Notices for file(s):</div>' print >> output_file, '<div class="label">Notices for file(s):</div>'
print >> output_file, '<div class="file-list">' print >> output_file, '<div class="file-list">'
for filename in sorted(value): for filename in value:
print >> output_file, "%s <br/>" % (SRC_DIR_STRIP_RE.sub(r"\1", filename)) print >> output_file, "%s <br/>" % (SRC_DIR_STRIP_RE.sub(r"\1", filename))
print >> output_file, "</div><!-- file-list -->" print >> output_file, "</div><!-- file-list -->"
print >> output_file print >> output_file
@ -154,10 +154,10 @@ def combine_notice_files_text(file_hash, input_dir, output_filename, file_title)
SRC_DIR_STRIP_RE = re.compile(input_dir + "(/.*).txt") SRC_DIR_STRIP_RE = re.compile(input_dir + "(/.*).txt")
output_file = open(output_filename, "wb") output_file = open(output_filename, "wb")
print >> output_file, file_title print >> output_file, file_title
for value in file_hash.values(): for value in file_hash:
print >> output_file, "============================================================" print >> output_file, "============================================================"
print >> output_file, "Notices for file(s):" print >> output_file, "Notices for file(s):"
for filename in sorted(value): for filename in value:
print >> output_file, SRC_DIR_STRIP_RE.sub(r"\1", filename) print >> output_file, SRC_DIR_STRIP_RE.sub(r"\1", filename)
print >> output_file, "------------------------------------------------------------" print >> output_file, "------------------------------------------------------------"
print >> output_file, open(value[0]).read() print >> output_file, open(value[0]).read()
@ -178,11 +178,12 @@ def main(args):
file_md5sum = md5sum(filename) file_md5sum = md5sum(filename)
files_with_same_hash[file_md5sum].append(filename) files_with_same_hash[file_md5sum].append(filename)
filesets = [sorted(files_with_same_hash[md5]) for md5 in sorted(files_with_same_hash.keys())]
print "Combining NOTICE files into HTML" print "Combining NOTICE files into HTML"
combine_notice_files_html(files_with_same_hash, input_dir, html_output_file) combine_notice_files_html(filesets, input_dir, html_output_file)
print "Combining NOTICE files into text" print "Combining NOTICE files into text"
combine_notice_files_text(files_with_same_hash, input_dir, txt_output_file, file_title) combine_notice_files_text(filesets, input_dir, txt_output_file, file_title)
if __name__ == "__main__": if __name__ == "__main__":
main(args) main(args)