platform_build/tools/warn/warn_common.py

# python3
# Copyright (C) 2019 The Android Open Source Project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Grep warnings messages and output HTML tables or warning counts in CSV.

Default is to output warnings in HTML tables grouped by warning severity.
Use option --byproject to output tables grouped by source file projects.
Use option --gencsv to output warning counts in CSV format.

Default input file is build.log, which can be changed with the --log flag.
"""

# List of important data structures and functions in this script.
#
# To parse and keep warning message in the input file:
#   severity:                classification of message severity
#   warn_patterns:
#   warn_patterns[w]['category']     tool that issued the warning, not used now
#   warn_patterns[w]['description']  table heading
#   warn_patterns[w]['members']      matched warnings from input
#   warn_patterns[w]['patterns']     regular expressions to match warnings
#   warn_patterns[w]['projects'][p]  number of warnings of pattern w in p
#   warn_patterns[w]['severity']     severity tuple
#   project_list[p][0]               project name
#   project_list[p][1]               regular expression to match a project path
#   project_patterns[p]              re.compile(project_list[p][1])
#   project_names[p]                 project_list[p][0]
#   warning_messages     array of each warning message, without source url
#   warning_links        array of each warning code search link; for 'chrome'
#   warning_records      array of [idx to warn_patterns,
#                                  idx to project_names,
#                                  idx to warning_messages,
#                                  idx to warning_links]
#   parse_input_file
#
import argparse
import io
import multiprocessing
import os
import re
import sys

# pylint:disable=relative-beyond-top-level,no-name-in-module
# suppress false positive of no-name-in-module warnings
from . import android_project_list
from . import chrome_project_list
from . import cpp_warn_patterns as cpp_patterns
from . import html_writer
from . import java_warn_patterns as java_patterns
from . import make_warn_patterns as make_patterns
from . import other_warn_patterns as other_patterns
from . import tidy_warn_patterns as tidy_patterns


def parse_args(use_google3):
  """Define and parse the args. Return the parse_args() result."""
  parser = argparse.ArgumentParser(
      description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
  parser.add_argument('--capacitor_path', default='',
                      help='Save capacitor warning file to the passed absolute'
                      ' path')
  # csvpath has a different naming than the above path because historically the
  # original Android script used csvpath, so other scripts rely on it
  parser.add_argument('--csvpath', default='',
                      help='Save CSV warning file to the passed path')
  parser.add_argument('--gencsv', action='store_true',
                      help='Generate CSV file with number of various warnings')
  parser.add_argument('--csvwithdescription', default='',
                      help="""Save CSV warning file to the passed path this csv
                            will contain all the warning descriptions""")
  parser.add_argument('--byproject', action='store_true',
                      help='Separate warnings in HTML output by project names')
  parser.add_argument('--url', default='',
                      help='Root URL of an Android source code tree prefixed '
                      'before files in warnings')
  parser.add_argument('--separator', default='?l=',
                      help='Separator between the end of a URL and the line '
                      'number argument. e.g. #')
  parser.add_argument('--processes', default=multiprocessing.cpu_count(),
                      type=int,
                      help='Number of parallel processes to process warnings')
  # Old Android build scripts call warn.py without --platform,
  # so the default platform is set to 'android'.
  parser.add_argument('--platform', default='android',
                      choices=['chrome', 'android'],
                      help='Platform of the build log')
  # Old Android build scripts call warn.py with only a build.log file path.
  parser.add_argument('--log', help='Path to build log file')
  parser.add_argument(dest='buildlog', metavar='build.log',
                      default='build.log', nargs='?',
                      help='Path to build.log file')
  flags = parser.parse_args()
  if not flags.log:
    flags.log = flags.buildlog
  if not use_google3 and not os.path.exists(flags.log):
    sys.exit('Cannot find log file: ' + flags.log)
  return flags


def get_project_names(project_list):
  """Get project_names from project_list."""
  return [p[0] for p in project_list]


def find_project_index(line, project_patterns):
  """Return the index to the project pattern array."""
  for idx, pattern in enumerate(project_patterns):
    if pattern.match(line):
      return idx
  return -1


def classify_one_warning(warning, link, results, project_patterns,
                         warn_patterns):
  """Classify one warning line."""
  for idx, pattern in enumerate(warn_patterns):
    for cpat in pattern['compiled_patterns']:
      if cpat.match(warning):
        project_idx = find_project_index(warning, project_patterns)
        results.append([warning, link, idx, project_idx])
        return
  # If we end up here, there was a problem parsing the log
  # probably caused by 'make -j' mixing the output from
  # 2 or more concurrent compiles


def remove_prefix(src, sub):
  """Remove everything before last occurrence of substring sub in string src."""
  if sub in src:
    inc_sub = src.rfind(sub)
    return src[inc_sub:]
  return src


# TODO(emmavukelj): Don't have any generate_*_cs_link functions call
# normalize_path a second time (the first time being in parse_input_file)
def generate_cs_link(warning_line, flags, android_root=None):
  """Try to add code search HTTP URL prefix."""
  if flags.platform == 'chrome':
    return generate_chrome_cs_link(warning_line, flags)
  if flags.platform == 'android':
    return generate_android_cs_link(warning_line, flags, android_root)
  return 'https://cs.corp.google.com/'


def generate_android_cs_link(warning_line, flags, android_root):
  """Generate the code search link for a warning line in Android."""
  # max_splits=2 -> only 3 items
  raw_path, line_number_str, _ = warning_line.split(':', 2)
  normalized_path = normalize_path(raw_path, flags, android_root)
  if not flags.url:
    return normalized_path
  link_path = flags.url + '/' + normalized_path
  if line_number_str.isdigit():
    link_path += flags.separator + line_number_str
  return link_path


def generate_chrome_cs_link(warning_line, flags):
  """Generate the code search link for a warning line in Chrome."""
  split_line = warning_line.split(':')
  raw_path = split_line[0]
  normalized_path = normalize_path(raw_path, flags)
  link_base = 'https://cs.chromium.org/'
  link_add = 'chromium'
  link_path = None

  # Basically just going through a few specific directory cases and specifying
  # the proper behavior for that case. This list of cases was accumulated
  # through trial and error manually going through the warnings.
  #
  # This code pattern of using case-specific "if"s instead of "elif"s looks
  # possibly accidental and mistaken but it is intentional because some paths
  # fall under several cases (e.g. third_party/lib/nghttp2_frame.c) and for
  # those we want the most specific case to be applied. If there is reliable
  # knowledge of exactly where these occur, this could be changed to "elif"s
  # but there is no reliable set of paths falling under multiple cases at the
  # moment.
  if '/src/third_party' in raw_path:
    link_path = remove_prefix(raw_path, '/src/third_party/')
  if '/chrome_root/src_internal/' in raw_path:
    link_path = remove_prefix(raw_path, '/chrome_root/src_internal/')
    link_path = link_path[len('/chrome_root'):]  # remove chrome_root
  if '/chrome_root/src/' in raw_path:
    link_path = remove_prefix(raw_path, '/chrome_root/src/')
    link_path = link_path[len('/chrome_root'):]  # remove chrome_root
  if '/libassistant/' in raw_path:
    link_add = 'eureka_internal/chromium/src'
    link_base = 'https://cs.corp.google.com/'  # internal data
    link_path = remove_prefix(normalized_path, '/libassistant/')
  if raw_path.startswith('gen/'):
    link_path = '/src/out/Debug/gen/' + normalized_path
  if '/gen/' in raw_path:
    return '%s?q=file:%s' % (link_base, remove_prefix(normalized_path, '/gen/'))

  if not link_path and (raw_path.startswith('src/') or
                        raw_path.startswith('src_internal/')):
    link_path = '/%s' % raw_path

  if not link_path:  # can't find specific link, send a query
    return '%s?q=file:%s' % (link_base, normalized_path)

  line_number = int(split_line[1])
  link = '%s%s%s?l=%d' % (link_base, link_add, link_path, line_number)
  return link


def find_warn_py_and_android_root(path):
  """Return android source root path if warn.py is found."""
  parts = path.split('/')
  for idx in reversed(range(2, len(parts))):
    root_path = '/'.join(parts[:idx])
    # Android root directory should contain this script.
    if os.path.exists(root_path + '/build/make/tools/warn.py'):
      return root_path
  return ''


def find_android_root(buildlog):
  """Guess android source root from common prefix of file paths."""
  # Use the longest common prefix of the absolute file paths
  # of the first 10000 warning messages as the android_root.
  warning_lines = []
  warning_pattern = re.compile('^/[^ ]*/[^ ]*: warning: .*')
  count = 0
  for line in buildlog:
    # We want to find android_root of a local build machine.
    # Do not use RBE warning lines, which has '/b/f/w/' path prefix.
    # Do not use /tmp/ file warnings.
    if warning_pattern.match(line) and (
        '/b/f/w' not in line and not line.startswith('/tmp/')):
      warning_lines.append(line)
      count += 1
      if count > 9999:
        break
      # Try to find warn.py and use its location to find
      # the source tree root.
      if count < 100:
        path = os.path.normpath(re.sub(':.*$', '', line))
        android_root = find_warn_py_and_android_root(path)
        if android_root:
          return android_root
  # Do not use common prefix of a small number of paths.
  if count > 10:
    # pytype: disable=wrong-arg-types
    root_path = os.path.commonprefix(warning_lines)
    # pytype: enable=wrong-arg-types
    if len(root_path) > 2 and root_path[len(root_path) - 1] == '/':
      return root_path[:-1]
  return ''


def remove_android_root_prefix(path, android_root):
  """Remove android_root prefix from path if it is found."""
  if path.startswith(android_root):
    return path[1 + len(android_root):]
  return path


def normalize_path(path, flags, android_root=None):
  """Normalize file path relative to src/ or src-internal/ directory."""
  path = os.path.normpath(path)

  if flags.platform == 'android':
    if android_root:
      return remove_android_root_prefix(path, android_root)
    return path

  # Remove known prefix of root path and normalize the suffix.
  idx = path.find('chrome_root/')
  if idx >= 0:
    # remove chrome_root/, we want path relative to that
    return path[idx + len('chrome_root/'):]
  return path


def normalize_warning_line(line, flags, android_root=None):
  """Normalize file path relative to src directory in a warning line."""
  line = re.sub(u'[\u2018\u2019]', '\'', line)
  # replace non-ASCII chars to spaces
  line = re.sub(u'[^\x00-\x7f]', ' ', line)
  line = line.strip()
  first_column = line.find(':')
  return normalize_path(line[:first_column], flags,
                        android_root) + line[first_column:]


def parse_input_file_chrome(infile, flags):
  """Parse Chrome input file, collect parameters and warning lines."""
  platform_version = 'unknown'
  board_name = 'unknown'
  architecture = 'unknown'

  # only handle warning lines of format 'file_path:line_no:col_no: warning: ...'
  # Bug: http://198657613, This might need change to handle RBE output.
  chrome_warning_pattern = r'^[^ ]*/[^ ]*:[0-9]+:[0-9]+: warning: .*'

  warning_pattern = re.compile(chrome_warning_pattern)

  # Collect all unique warning lines
  # Remove the duplicated warnings save ~8% of time when parsing
  # one typical build log than before
  unique_warnings = dict()
  for line in infile:
    if warning_pattern.match(line):
      normalized_line = normalize_warning_line(line, flags)
      if normalized_line not in unique_warnings:
        unique_warnings[normalized_line] = generate_cs_link(line, flags)
    elif (platform_version == 'unknown' or board_name == 'unknown' or
          architecture == 'unknown'):
      result = re.match(r'.+Package:.+chromeos-base/chromeos-chrome-', line)
      if result is not None:
        platform_version = 'R' + line.split('chrome-')[1].split('_')[0]
        continue
      result = re.match(r'.+Source\sunpacked\sin\s(.+)', line)
      if result is not None:
        board_name = result.group(1).split('/')[2]
        continue
      result = re.match(r'.+USE:\s*([^\s]*).*', line)
      if result is not None:
        architecture = result.group(1)
        continue

  header_str = '%s - %s - %s' % (platform_version, board_name, architecture)
  return unique_warnings, header_str


def add_normalized_line_to_warnings(line, flags, android_root, unique_warnings):
  """Parse/normalize path, updating warning line and add to warnings dict."""
  normalized_line = normalize_warning_line(line, flags, android_root)
  if normalized_line not in unique_warnings:
    unique_warnings[normalized_line] = generate_cs_link(line, flags,
                                                        android_root)
  return unique_warnings


def parse_input_file_android(infile, flags):
  """Parse Android input file, collect parameters and warning lines."""
  # pylint:disable=too-many-locals,too-many-branches
  platform_version = 'unknown'
  target_product = 'unknown'
  target_variant = 'unknown'
  build_id = 'unknown'
  use_rbe = False
  android_root = find_android_root(infile)
  infile.seek(0)

  # rustc warning messages have two lines that should be combined:
  #     warning: description
  #        --> file_path:line_number:column_number
  # Some warning messages have no file name:
  #     warning: macro replacement list ... [bugprone-macro-parentheses]
  # Some makefile warning messages have no line number:
  #     some/path/file.mk: warning: description
  # C/C++ compiler warning messages have line and column numbers:
  #     some/path/file.c:line_number:column_number: warning: description
  warning_pattern = re.compile('(^[^ ]*/[^ ]*: warning: .*)|(^warning: .*)')
  warning_without_file = re.compile('^warning: .*')
  rustc_file_position = re.compile('^[ ]+--> [^ ]*/[^ ]*:[0-9]+:[0-9]+')

  # If RBE was used, try to reclaim some warning lines mixed with some
  # leading chars from other concurrent job's stderr output .
  # The leading characters can be any character, including digits and spaces.
  # It's impossible to correctly identify the starting point of the source
  # file path without the file directory name knowledge.
  # Here we can only be sure to recover lines containing "/b/f/w/".
  rbe_warning_pattern = re.compile('.*/b/f/w/[^ ]*: warning: .*')

   # Collect all unique warning lines
  # Remove the duplicated warnings save ~8% of time when parsing
  # one typical build log than before
  unique_warnings = dict()
  line_counter = 0
  prev_warning = ''
  for line in infile:
    if prev_warning:
      if rustc_file_position.match(line):
        # must be a rustc warning, combine 2 lines into one warning
        line = line.strip().replace('--> ', '') + ': ' + prev_warning
        unique_warnings = add_normalized_line_to_warnings(
            line, flags, android_root, unique_warnings)
        prev_warning = ''
        continue
      # add prev_warning, and then process the current line
      prev_warning = 'unknown_source_file: ' + prev_warning
      unique_warnings = add_normalized_line_to_warnings(
          prev_warning, flags, android_root, unique_warnings)
      prev_warning = ''

    if use_rbe and rbe_warning_pattern.match(line):
      cleaned_up_line = re.sub('.*/b/f/w/', '', line)
      unique_warnings = add_normalized_line_to_warnings(
          cleaned_up_line, flags, android_root, unique_warnings)
      continue

    if warning_pattern.match(line):
      if warning_without_file.match(line):
        # save this line and combine it with the next line
        prev_warning = line
      else:
        unique_warnings = add_normalized_line_to_warnings(
            line, flags, android_root, unique_warnings)
      continue

    if line_counter < 100:
      # save a little bit of time by only doing this for the first few lines
      line_counter += 1
      result = re.search('(?<=^PLATFORM_VERSION=).*', line)
      if result is not None:
        platform_version = result.group(0)
        continue
      result = re.search('(?<=^TARGET_PRODUCT=).*', line)
      if result is not None:
        target_product = result.group(0)
        continue
      result = re.search('(?<=^TARGET_BUILD_VARIANT=).*', line)
      if result is not None:
        target_variant = result.group(0)
        continue
      result = re.search('(?<=^BUILD_ID=).*', line)
      if result is not None:
        build_id = result.group(0)
        continue
      result = re.search('(?<=^TOP=).*', line)
      if result is not None:
        android_root = result.group(1)
        continue
      if re.search('USE_RBE=', line) is not None:
        use_rbe = True
        continue

  if android_root:
    new_unique_warnings = dict()
    for warning_line in unique_warnings:
      normalized_line = normalize_warning_line(warning_line, flags,
                                               android_root)
      new_unique_warnings[normalized_line] = generate_android_cs_link(
          warning_line, flags, android_root)
    unique_warnings = new_unique_warnings

  header_str = '%s - %s - %s (%s)' % (
      platform_version, target_product, target_variant, build_id)
  return unique_warnings, header_str


def parse_input_file(infile, flags):
  """Parse one input file for chrome or android."""
  if flags.platform == 'chrome':
    return parse_input_file_chrome(infile, flags)
  if flags.platform == 'android':
    return parse_input_file_android(infile, flags)
  raise RuntimeError('parse_input_file not defined for platform %s' %
                     flags.platform)


def parse_compiler_output(compiler_output):
  """Parse compiler output for relevant info."""
  split_output = compiler_output.split(':', 3)  # 3 = max splits
  file_path = split_output[0]
  line_number = int(split_output[1])
  col_number = int(split_output[2].split(' ')[0])
  warning_message = split_output[3]
  return file_path, line_number, col_number, warning_message


def get_warn_patterns(platform):
  """Get and initialize warn_patterns."""
  warn_patterns = []
  if platform == 'chrome':
    warn_patterns = cpp_patterns.warn_patterns
  elif platform == 'android':
    warn_patterns = (make_patterns.warn_patterns + cpp_patterns.warn_patterns +
                     java_patterns.warn_patterns + tidy_patterns.warn_patterns +
                     other_patterns.warn_patterns)
  else:
    raise Exception('platform name %s is not valid' % platform)
  for pattern in warn_patterns:
    pattern['members'] = []
    # Each warning pattern has a 'projects' dictionary, that
    # maps a project name to number of warnings in that project.
    pattern['projects'] = {}
  return warn_patterns


def get_project_list(platform):
  """Return project list for appropriate platform."""
  if platform == 'chrome':
    return chrome_project_list.project_list
  if platform == 'android':
    return android_project_list.project_list
  raise Exception('platform name %s is not valid' % platform)


def parallel_classify_warnings(warning_data, args, project_names,
                               project_patterns, warn_patterns,
                               use_google3, create_launch_subprocs_fn,
                               classify_warnings_fn):
  """Classify all warning lines with num_cpu parallel processes."""
  # pylint:disable=too-many-arguments,too-many-locals
  num_cpu = args.processes
  group_results = []

  if num_cpu > 1:
    # set up parallel processing for this...
    warning_groups = [[] for _ in range(num_cpu)]
    i = 0
    for warning, link in warning_data.items():
      warning_groups[i].append((warning, link))
      i = (i + 1) % num_cpu
    arg_groups = [[] for _ in range(num_cpu)]
    for i, group in enumerate(warning_groups):
      arg_groups[i] = [{
          'group': group,
          'project_patterns': project_patterns,
          'warn_patterns': warn_patterns,
          'num_processes': num_cpu
      }]

    group_results = create_launch_subprocs_fn(num_cpu,
                                              classify_warnings_fn,
                                              arg_groups,
                                              group_results)
  else:
    group_results = []
    for warning, link in warning_data.items():
      classify_one_warning(warning, link, group_results,
                           project_patterns, warn_patterns)
    group_results = [group_results]

  warning_messages = []
  warning_links = []
  warning_records = []
  if use_google3:
    group_results = [group_results]
  for group_result in group_results:
    for result in group_result:
      for line, link, pattern_idx, project_idx in result:
        pattern = warn_patterns[pattern_idx]
        pattern['members'].append(line)
        message_idx = len(warning_messages)
        warning_messages.append(line)
        link_idx = len(warning_links)
        warning_links.append(link)
        warning_records.append([pattern_idx, project_idx, message_idx,
                                link_idx])
        pname = '???' if project_idx < 0 else project_names[project_idx]
        # Count warnings by project.
        if pname in pattern['projects']:
          pattern['projects'][pname] += 1
        else:
          pattern['projects'][pname] = 1
  return warning_messages, warning_links, warning_records


def process_log(logfile, flags, project_names, project_patterns, warn_patterns,
                html_path, use_google3, create_launch_subprocs_fn,
                classify_warnings_fn, logfile_object):
  # pylint does not recognize g-doc-*
  # pylint: disable=bad-option-value,g-doc-args
  # pylint: disable=bad-option-value,g-doc-return-or-yield
  # pylint: disable=too-many-arguments,too-many-locals
  """Function that handles processing of a log.

  This is isolated into its own function (rather than just taking place in main)
  so that it can be used by both warn.py and the borg job process_gs_logs.py, to
  avoid duplication of code.
  Note that if the arguments to this function change, process_gs_logs.py must
  be updated accordingly.
  """
  if logfile_object is None:
    with io.open(logfile, encoding='utf-8') as log:
      warning_lines_and_links, header_str = parse_input_file(log, flags)
  else:
    warning_lines_and_links, header_str = parse_input_file(
        logfile_object, flags)
  warning_messages, warning_links, warning_records = parallel_classify_warnings(
      warning_lines_and_links, flags, project_names, project_patterns,
      warn_patterns, use_google3, create_launch_subprocs_fn,
      classify_warnings_fn)

  html_writer.write_html(flags, project_names, warn_patterns, html_path,
                         warning_messages, warning_links, warning_records,
                         header_str)

  return warning_messages, warning_links, warning_records, header_str


def common_main(use_google3, create_launch_subprocs_fn, classify_warnings_fn,
                logfile_object=None):
  """Shared main function for Google3 and non-Google3 versions of warn.py."""
  flags = parse_args(use_google3)
  warn_patterns = get_warn_patterns(flags.platform)
  project_list = get_project_list(flags.platform)

  project_names = get_project_names(project_list)
  project_patterns = [re.compile(p[1]) for p in project_list]

  # html_path=None because we output html below if not outputting CSV
  warning_messages, warning_links, warning_records, header_str = process_log(
      logfile=flags.log, flags=flags, project_names=project_names,
      project_patterns=project_patterns, warn_patterns=warn_patterns,
      html_path=None, use_google3=use_google3,
      create_launch_subprocs_fn=create_launch_subprocs_fn,
      classify_warnings_fn=classify_warnings_fn,
      logfile_object=logfile_object)

  html_writer.write_out_csv(flags, warn_patterns, warning_messages,
                            warning_links, warning_records, header_str,
                            project_names)

  # Return these values, so that caller can use them, if desired.
  return flags, warning_messages, warning_records, warn_patterns