diff options
Diffstat (limited to 'libc/tools/generate_notice.py')
-rwxr-xr-x | libc/tools/generate_notice.py | 208 |
1 files changed, 208 insertions, 0 deletions
diff --git a/libc/tools/generate_notice.py b/libc/tools/generate_notice.py new file mode 100755 index 000000000..e004d74e9 --- /dev/null +++ b/libc/tools/generate_notice.py @@ -0,0 +1,208 @@ +#!/usr/bin/env python3 +# Run with directory arguments from any directory, with no special setup +# required. + +import os +from pathlib import Path +import re +import sys +from typing import Sequence + +VERBOSE = False + +copyrights = set() + + +def warn(s): + sys.stderr.write("warning: %s\n" % s) + + +def warn_verbose(s): + if VERBOSE: + warn(s) + + +def is_interesting(path_str: str) -> bool: + path = Path(path_str.lower()) + uninteresting_extensions = [ + ".bp", + ".map", + ".md", + ".mk", + ".py", + ".pyc", + ".swp", + ".txt", + ] + if path.suffix in uninteresting_extensions: + return False + if path.name in {"notice", "readme", "pylintrc"}: + return False + # Backup files for some editors. + if path.match("*~"): + return False + return True + + +def is_auto_generated(content): + if "Generated by gensyscalls.py" in content or "generated by genserv.py" in content: + return True + if "This header was automatically generated from a Linux kernel header" in content: + return True + return False + + +def is_copyright_end(line: str, first_line_was_hash: bool) -> bool: + endings = [ + " $FreeBSD: ", + "$Citrus$", + "$FreeBSD$", + "*/", + "From: @(#)", + # OpenBSD likes to say where stuff originally came from: + "Original version ID:", + "\t$Citrus: ", + "\t$NetBSD: ", + "\t$OpenBSD: ", + "\t@(#)", + "\tcitrus Id: ", + "\tfrom: @(#)", + "from OpenBSD:", + ] + if first_line_was_hash and not line: + return True + + for ending in endings: + if ending in line: + return True + + return False + + +def extract_copyright_at(lines: Sequence[str], i: int) -> int: + first_line_was_hash = lines[i].startswith("#") + + # Do we need to back up to find the start of the copyright header? + start = i + if not first_line_was_hash: + while start > 0: + if "/*" in lines[start - 1]: + break + start -= 1 + + # Read comment lines until we hit something that terminates a + # copyright header. + while i < len(lines): + if is_copyright_end(lines[i], first_line_was_hash): + break + i += 1 + + end = i + + # Trim trailing cruft. + while end > 0: + line = lines[end - 1] + if line not in { + " *", " * ====================================================" + }: + break + end -= 1 + + # Remove C/assembler comment formatting, pulling out just the text. + clean_lines = [] + for line in lines[start:end]: + line = line.replace("\t", " ") + line = line.replace("/* ", "") + line = re.sub(r"^ \* ", "", line) + line = line.replace("** ", "") + line = line.replace("# ", "") + if "SPDX-License-Identifier:" in line: + continue + if line.startswith("++Copyright++"): + continue + line = line.replace("--Copyright--", "") + line = line.rstrip() + # These come last and take care of "blank" comment lines. + if line in {"#", " *", "**", "-"}: + line = "" + clean_lines.append(line) + + # Trim blank lines from head and tail. + while clean_lines[0] == "": + clean_lines = clean_lines[1:] + while clean_lines[len(clean_lines) - 1] == "": + clean_lines = clean_lines[0:(len(clean_lines) - 1)] + + copyrights.add("\n".join(clean_lines)) + + return i + + +def do_file(path: str) -> None: + raw = Path(path).read_bytes() + try: + content = raw.decode("utf-8") + except UnicodeDecodeError: + warn("bad UTF-8 in %s" % path) + content = raw.decode("iso-8859-1") + + lines = content.split("\n") + + if len(lines) <= 4: + warn_verbose("ignoring short file %s" % path) + return + + if is_auto_generated(content): + warn_verbose("ignoring auto-generated file %s" % path) + return + + if not "Copyright" in content: + if "public domain" in content.lower(): + warn_verbose("ignoring public domain file %s" % path) + return + warn('no copyright notice found in "%s" (%d lines)' % + (path, len(lines))) + return + + # Manually iterate because extract_copyright_at tells us how many lines to + # skip. + i = 0 + while i < len(lines): + if "Copyright" in lines[i] and not "@(#) Copyright" in lines[i]: + i = extract_copyright_at(lines, i) + else: + i += 1 + + +def do_dir(arg): + for directory, sub_directories, filenames in os.walk(arg): + if ".git" in sub_directories: + sub_directories.remove(".git") + sub_directories = sorted(sub_directories) + + for filename in sorted(filenames): + path = os.path.join(directory, filename) + if is_interesting(path): + do_file(path) + + +def main() -> None: + args = sys.argv[1:] + if len(args) == 0: + args = ["."] + + for arg in args: + if os.path.isdir(arg): + do_dir(arg) + else: + do_file(arg) + + for notice in sorted(copyrights): + print(notice) + print() + print("-" * 67) + print() + + +if __name__ == "__main__": + main() |