summaryrefslogtreecommitdiff
path: root/tools/warn/warn_common.py
blob: 844f629a9699ee84ef85f0cfc8f54e575908d6d0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
# python3
# Copyright (C) 2019 The Android Open Source Project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Grep warnings messages and output HTML tables or warning counts in CSV.

Default is to output warnings in HTML tables grouped by warning severity.
Use option --byproject to output tables grouped by source file projects.
Use option --gencsv to output warning counts in CSV format.

Default input file is build.log, which can be changed with the --log flag.
"""

# List of important data structures and functions in this script.
#
# To parse and keep warning message in the input file:
#   severity:                classification of message severity
#   warn_patterns:
#   warn_patterns[w]['category']     tool that issued the warning, not used now
#   warn_patterns[w]['description']  table heading
#   warn_patterns[w]['members']      matched warnings from input
#   warn_patterns[w]['patterns']     regular expressions to match warnings
#   warn_patterns[w]['projects'][p]  number of warnings of pattern w in p
#   warn_patterns[w]['severity']     severity tuple
#   project_list[p][0]               project name
#   project_list[p][1]               regular expression to match a project path
#   project_patterns[p]              re.compile(project_list[p][1])
#   project_names[p]                 project_list[p][0]
#   warning_messages     array of each warning message, without source url
#   warning_links        array of each warning code search link; for 'chrome'
#   warning_records      array of [idx to warn_patterns,
#                                  idx to project_names,
#                                  idx to warning_messages,
#                                  idx to warning_links]
#   parse_input_file
#
import argparse
import io
import multiprocessing
import os
import re
import sys

# pylint:disable=relative-beyond-top-level,no-name-in-module
# suppress false positive of no-name-in-module warnings
from . import android_project_list
from . import chrome_project_list
from . import cpp_warn_patterns as cpp_patterns
from . import html_writer
from . import java_warn_patterns as java_patterns
from . import make_warn_patterns as make_patterns
from . import other_warn_patterns as other_patterns
from . import tidy_warn_patterns as tidy_patterns


def parse_args(use_google3):
  """Define and parse the args. Return the parse_args() result."""
  parser = argparse.ArgumentParser(
      description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
  parser.add_argument('--capacitor_path', default='',
                      help='Save capacitor warning file to the passed absolute'
                      ' path')
  # csvpath has a different naming than the above path because historically the
  # original Android script used csvpath, so other scripts rely on it
  parser.add_argument('--csvpath', default='',
                      help='Save CSV warning file to the passed path')
  parser.add_argument('--gencsv', action='store_true',
                      help='Generate CSV file with number of various warnings')
  parser.add_argument('--csvwithdescription', default='',
                      help="""Save CSV warning file to the passed path this csv
                            will contain all the warning descriptions""")
  parser.add_argument('--byproject', action='store_true',
                      help='Separate warnings in HTML output by project names')
  parser.add_argument('--url', default='',
                      help='Root URL of an Android source code tree prefixed '
                      'before files in warnings')
  parser.add_argument('--separator', default='?l=',
                      help='Separator between the end of a URL and the line '
                      'number argument. e.g. #')
  parser.add_argument('--processes', default=multiprocessing.cpu_count(),
                      type=int,
                      help='Number of parallel processes to process warnings')
  # Old Android build scripts call warn.py without --platform,
  # so the default platform is set to 'android'.
  parser.add_argument('--platform', default='android',
                      choices=['chrome', 'android'],
                      help='Platform of the build log')
  # Old Android build scripts call warn.py with only a build.log file path.
  parser.add_argument('--log', help='Path to build log file')
  parser.add_argument(dest='buildlog', metavar='build.log',
                      default='build.log', nargs='?',
                      help='Path to build.log file')
  flags = parser.parse_args()
  if not flags.log:
    flags.log = flags.buildlog
  if not use_google3 and not os.path.exists(flags.log):
    sys.exit('Cannot find log file: ' + flags.log)
  return flags


def get_project_names(project_list):
  """Get project_names from project_list."""
  return [p[0] for p in project_list]


def find_project_index(line, project_patterns):
  """Return the index to the project pattern array."""
  for idx, pattern in enumerate(project_patterns):
    if pattern.match(line):
      return idx
  return -1


def classify_one_warning(warning, link, results, project_patterns,
                         warn_patterns):
  """Classify one warning line."""
  for idx, pattern in enumerate(warn_patterns):
    for cpat in pattern['compiled_patterns']:
      if cpat.match(warning):
        project_idx = find_project_index(warning, project_patterns)
        results.append([warning, link, idx, project_idx])
        return
  # If we end up here, there was a problem parsing the log
  # probably caused by 'make -j' mixing the output from
  # 2 or more concurrent compiles


def remove_prefix(src, sub):
  """Remove everything before last occurrence of substring sub in string src."""
  if sub in src:
    inc_sub = src.rfind(sub)
    return src[inc_sub:]
  return src


# TODO(emmavukelj): Don't have any generate_*_cs_link functions call
# normalize_path a second time (the first time being in parse_input_file)
def generate_cs_link(warning_line, flags, android_root=None):
  """Try to add code search HTTP URL prefix."""
  if flags.platform == 'chrome':
    return generate_chrome_cs_link(warning_line, flags)
  if flags.platform == 'android':
    return generate_android_cs_link(warning_line, flags, android_root)
  return 'https://cs.corp.google.com/'


def generate_android_cs_link(warning_line, flags, android_root):
  """Generate the code search link for a warning line in Android."""
  # max_splits=2 -> only 3 items
  raw_path, line_number_str, _ = warning_line.split(':', 2)
  normalized_path = normalize_path(raw_path, flags, android_root)
  if not flags.url:
    return normalized_path
  link_path = flags.url + '/' + normalized_path
  if line_number_str.isdigit():
    link_path += flags.separator + line_number_str
  return link_path


def generate_chrome_cs_link(warning_line, flags):
  """Generate the code search link for a warning line in Chrome."""
  split_line = warning_line.split(':')
  raw_path = split_line[0]
  normalized_path = normalize_path(raw_path, flags)
  link_base = 'https://cs.chromium.org/'
  link_add = 'chromium'
  link_path = None

  # Basically just going through a few specific directory cases and specifying
  # the proper behavior for that case. This list of cases was accumulated
  # through trial and error manually going through the warnings.
  #
  # This code pattern of using case-specific "if"s instead of "elif"s looks
  # possibly accidental and mistaken but it is intentional because some paths
  # fall under several cases (e.g. third_party/lib/nghttp2_frame.c) and for
  # those we want the most specific case to be applied. If there is reliable
  # knowledge of exactly where these occur, this could be changed to "elif"s
  # but there is no reliable set of paths falling under multiple cases at the
  # moment.
  if '/src/third_party' in raw_path:
    link_path = remove_prefix(raw_path, '/src/third_party/')
  if '/chrome_root/src_internal/' in raw_path:
    link_path = remove_prefix(raw_path, '/chrome_root/src_internal/')
    link_path = link_path[len('/chrome_root'):]  # remove chrome_root
  if '/chrome_root/src/' in raw_path:
    link_path = remove_prefix(raw_path, '/chrome_root/src/')
    link_path = link_path[len('/chrome_root'):]  # remove chrome_root
  if '/libassistant/' in raw_path:
    link_add = 'eureka_internal/chromium/src'
    link_base = 'https://cs.corp.google.com/'  # internal data
    link_path = remove_prefix(normalized_path, '/libassistant/')
  if raw_path.startswith('gen/'):
    link_path = '/src/out/Debug/gen/' + normalized_path
  if '/gen/' in raw_path:
    return '%s?q=file:%s' % (link_base, remove_prefix(normalized_path, '/gen/'))

  if not link_path and (raw_path.startswith('src/') or
                        raw_path.startswith('src_internal/')):
    link_path = '/%s' % raw_path

  if not link_path:  # can't find specific link, send a query
    return '%s?q=file:%s' % (link_base, normalized_path)

  line_number = int(split_line[1])
  link = '%s%s%s?l=%d' % (link_base, link_add, link_path, line_number)
  return link


def find_warn_py_and_android_root(path):
  """Return android source root path if warn.py is found."""
  parts = path.split('/')
  for idx in reversed(range(2, len(parts))):
    root_path = '/'.join(parts[:idx])
    # Android root directory should contain this script.
    if os.path.exists(root_path + '/build/make/tools/warn.py'):
      return root_path
  return ''


def find_android_root(buildlog):
  """Guess android source root from common prefix of file paths."""
  # Use the longest common prefix of the absolute file paths
  # of the first 10000 warning messages as the android_root.
  warning_lines = []
  warning_pattern = re.compile('^/[^ ]*/[^ ]*: warning: .*')
  count = 0
  for line in buildlog:
    if warning_pattern.match(line):
      warning_lines.append(line)
      count += 1
      if count > 9999:
        break
      # Try to find warn.py and use its location to find
      # the source tree root.
      if count < 100:
        path = os.path.normpath(re.sub(':.*$', '', line))
        android_root = find_warn_py_and_android_root(path)
        if android_root:
          return android_root
  # Do not use common prefix of a small number of paths.
  if count > 10:
    # pytype: disable=wrong-arg-types
    root_path = os.path.commonprefix(warning_lines)
    # pytype: enable=wrong-arg-types
    if len(root_path) > 2 and root_path[len(root_path) - 1] == '/':
      return root_path[:-1]
  return ''


def remove_android_root_prefix(path, android_root):
  """Remove android_root prefix from path if it is found."""
  if path.startswith(android_root):
    return path[1 + len(android_root):]
  return path


def normalize_path(path, flags, android_root=None):
  """Normalize file path relative to src/ or src-internal/ directory."""
  path = os.path.normpath(path)

  if flags.platform == 'android':
    if android_root:
      return remove_android_root_prefix(path, android_root)
    return path

  # Remove known prefix of root path and normalize the suffix.
  idx = path.find('chrome_root/')
  if idx >= 0:
    # remove chrome_root/, we want path relative to that
    return path[idx + len('chrome_root/'):]
  return path


def normalize_warning_line(line, flags, android_root=None):
  """Normalize file path relative to src directory in a warning line."""
  line = re.sub(u'[\u2018\u2019]', '\'', line)
  # replace non-ASCII chars to spaces
  line = re.sub(u'[^\x00-\x7f]', ' ', line)
  line = line.strip()
  first_column = line.find(':')
  return normalize_path(line[:first_column], flags,
                        android_root) + line[first_column:]


def parse_input_file_chrome(infile, flags):
  """Parse Chrome input file, collect parameters and warning lines."""
  platform_version = 'unknown'
  board_name = 'unknown'
  architecture = 'unknown'

  # only handle warning lines of format 'file_path:line_no:col_no: warning: ...'
  chrome_warning_pattern = r'^[^ ]*/[^ ]*:[0-9]+:[0-9]+: warning: .*'

  warning_pattern = re.compile(chrome_warning_pattern)

  # Collect all unique warning lines
  # Remove the duplicated warnings save ~8% of time when parsing
  # one typical build log than before
  unique_warnings = dict()
  for line in infile:
    if warning_pattern.match(line):
      normalized_line = normalize_warning_line(line, flags)
      if normalized_line not in unique_warnings:
        unique_warnings[normalized_line] = generate_cs_link(line, flags)
    elif (platform_version == 'unknown' or board_name == 'unknown' or
          architecture == 'unknown'):
      result = re.match(r'.+Package:.+chromeos-base/chromeos-chrome-', line)
      if result is not None:
        platform_version = 'R' + line.split('chrome-')[1].split('_')[0]
        continue
      result = re.match(r'.+Source\sunpacked\sin\s(.+)', line)
      if result is not None:
        board_name = result.group(1).split('/')[2]
        continue
      result = re.match(r'.+USE:\s*([^\s]*).*', line)
      if result is not None:
        architecture = result.group(1)
        continue

  header_str = '%s - %s - %s' % (platform_version, board_name, architecture)
  return unique_warnings, header_str


def add_normalized_line_to_warnings(line, flags, android_root, unique_warnings):
  """Parse/normalize path, updating warning line and add to warnings dict."""
  normalized_line = normalize_warning_line(line, flags, android_root)
  if normalized_line not in unique_warnings:
    unique_warnings[normalized_line] = generate_cs_link(line, flags,
                                                        android_root)
  return unique_warnings


def parse_input_file_android(infile, flags):
  """Parse Android input file, collect parameters and warning lines."""
  # pylint:disable=too-many-locals,too-many-branches
  platform_version = 'unknown'
  target_product = 'unknown'
  target_variant = 'unknown'
  android_root = find_android_root(infile)
  infile.seek(0)

  # rustc warning messages have two lines that should be combined:
  #     warning: description
  #        --> file_path:line_number:column_number
  # Some warning messages have no file name:
  #     warning: macro replacement list ... [bugprone-macro-parentheses]
  # Some makefile warning messages have no line number:
  #     some/path/file.mk: warning: description
  # C/C++ compiler warning messages have line and column numbers:
  #     some/path/file.c:line_number:column_number: warning: description
  warning_pattern = re.compile('(^[^ ]*/[^ ]*: warning: .*)|(^warning: .*)')
  warning_without_file = re.compile('^warning: .*')
  rustc_file_position = re.compile('^[ ]+--> [^ ]*/[^ ]*:[0-9]+:[0-9]+')

   # Collect all unique warning lines
  # Remove the duplicated warnings save ~8% of time when parsing
  # one typical build log than before
  unique_warnings = dict()
  line_counter = 0
  prev_warning = ''
  for line in infile:
    if prev_warning:
      if rustc_file_position.match(line):
        # must be a rustc warning, combine 2 lines into one warning
        line = line.strip().replace('--> ', '') + ': ' + prev_warning
        unique_warnings = add_normalized_line_to_warnings(
            line, flags, android_root, unique_warnings)
        prev_warning = ''
        continue
      # add prev_warning, and then process the current line
      prev_warning = 'unknown_source_file: ' + prev_warning
      unique_warnings = add_normalized_line_to_warnings(
          prev_warning, flags, android_root, unique_warnings)
      prev_warning = ''

    if warning_pattern.match(line):
      if warning_without_file.match(line):
        # save this line and combine it with the next line
        prev_warning = line
      else:
        unique_warnings = add_normalized_line_to_warnings(
            line, flags, android_root, unique_warnings)
      continue

    if line_counter < 100:
      # save a little bit of time by only doing this for the first few lines
      line_counter += 1
      result = re.search('(?<=^PLATFORM_VERSION=).*', line)
      if result is not None:
        platform_version = result.group(0)
      result = re.search('(?<=^TARGET_PRODUCT=).*', line)
      if result is not None:
        target_product = result.group(0)
      result = re.search('(?<=^TARGET_BUILD_VARIANT=).*', line)
      if result is not None:
        target_variant = result.group(0)
      result = re.search('(?<=^TOP=).*', line)
      if result is not None:
        android_root = result.group(1)

  if android_root:
    new_unique_warnings = dict()
    for warning_line in unique_warnings:
      normalized_line = normalize_warning_line(warning_line, flags,
                                               android_root)
      new_unique_warnings[normalized_line] = generate_android_cs_link(
          warning_line, flags, android_root)
    unique_warnings = new_unique_warnings

  header_str = '%s - %s - %s' % (platform_version, target_product,
                                 target_variant)
  return unique_warnings, header_str


def parse_input_file(infile, flags):
  """Parse one input file for chrome or android."""
  if flags.platform == 'chrome':
    return parse_input_file_chrome(infile, flags)
  if flags.platform == 'android':
    return parse_input_file_android(infile, flags)
  raise RuntimeError('parse_input_file not defined for platform %s' %
                     flags.platform)


def parse_compiler_output(compiler_output):
  """Parse compiler output for relevant info."""
  split_output = compiler_output.split(':', 3)  # 3 = max splits
  file_path = split_output[0]
  line_number = int(split_output[1])
  col_number = int(split_output[2].split(' ')[0])
  warning_message = split_output[3]
  return file_path, line_number, col_number, warning_message


def get_warn_patterns(platform):
  """Get and initialize warn_patterns."""
  warn_patterns = []
  if platform == 'chrome':
    warn_patterns = cpp_patterns.warn_patterns
  elif platform == 'android':
    warn_patterns = (make_patterns.warn_patterns + cpp_patterns.warn_patterns +
                     java_patterns.warn_patterns + tidy_patterns.warn_patterns +
                     other_patterns.warn_patterns)
  else:
    raise Exception('platform name %s is not valid' % platform)
  for pattern in warn_patterns:
    pattern['members'] = []
    # Each warning pattern has a 'projects' dictionary, that
    # maps a project name to number of warnings in that project.
    pattern['projects'] = {}
  return warn_patterns


def get_project_list(platform):
  """Return project list for appropriate platform."""
  if platform == 'chrome':
    return chrome_project_list.project_list
  if platform == 'android':
    return android_project_list.project_list
  raise Exception('platform name %s is not valid' % platform)


def parallel_classify_warnings(warning_data, args, project_names,
                               project_patterns, warn_patterns,
                               use_google3, create_launch_subprocs_fn,
                               classify_warnings_fn):
  """Classify all warning lines with num_cpu parallel processes."""
  # pylint:disable=too-many-arguments,too-many-locals
  num_cpu = args.processes
  group_results = []

  if num_cpu > 1:
    # set up parallel processing for this...
    warning_groups = [[] for _ in range(num_cpu)]
    i = 0
    for warning, link in warning_data.items():
      warning_groups[i].append((warning, link))
      i = (i + 1) % num_cpu
    arg_groups = [[] for _ in range(num_cpu)]
    for i, group in enumerate(warning_groups):
      arg_groups[i] = [{
          'group': group,
          'project_patterns': project_patterns,
          'warn_patterns': warn_patterns,
          'num_processes': num_cpu
      }]

    group_results = create_launch_subprocs_fn(num_cpu,
                                              classify_warnings_fn,
                                              arg_groups,
                                              group_results)
  else:
    group_results = []
    for warning, link in warning_data.items():
      classify_one_warning(warning, link, group_results,
                           project_patterns, warn_patterns)
    group_results = [group_results]

  warning_messages = []
  warning_links = []
  warning_records = []
  if use_google3:
    group_results = [group_results]
  for group_result in group_results:
    for result in group_result:
      for line, link, pattern_idx, project_idx in result:
        pattern = warn_patterns[pattern_idx]
        pattern['members'].append(line)
        message_idx = len(warning_messages)
        warning_messages.append(line)
        link_idx = len(warning_links)
        warning_links.append(link)
        warning_records.append([pattern_idx, project_idx, message_idx,
                                link_idx])
        pname = '???' if project_idx < 0 else project_names[project_idx]
        # Count warnings by project.
        if pname in pattern['projects']:
          pattern['projects'][pname] += 1
        else:
          pattern['projects'][pname] = 1
  return warning_messages, warning_links, warning_records


def process_log(logfile, flags, project_names, project_patterns, warn_patterns,
                html_path, use_google3, create_launch_subprocs_fn,
                classify_warnings_fn, logfile_object):
  # pylint does not recognize g-doc-*
  # pylint: disable=bad-option-value,g-doc-args
  # pylint: disable=bad-option-value,g-doc-return-or-yield
  # pylint: disable=too-many-arguments,too-many-locals
  """Function that handles processing of a log.

  This is isolated into its own function (rather than just taking place in main)
  so that it can be used by both warn.py and the borg job process_gs_logs.py, to
  avoid duplication of code.
  Note that if the arguments to this function change, process_gs_logs.py must
  be updated accordingly.
  """
  if logfile_object is None:
    with io.open(logfile, encoding='utf-8') as log:
      warning_lines_and_links, header_str = parse_input_file(log, flags)
  else:
    warning_lines_and_links, header_str = parse_input_file(
        logfile_object, flags)
  warning_messages, warning_links, warning_records = parallel_classify_warnings(
      warning_lines_and_links, flags, project_names, project_patterns,
      warn_patterns, use_google3, create_launch_subprocs_fn,
      classify_warnings_fn)

  html_writer.write_html(flags, project_names, warn_patterns, html_path,
                         warning_messages, warning_links, warning_records,
                         header_str)

  return warning_messages, warning_links, warning_records, header_str


def common_main(use_google3, create_launch_subprocs_fn, classify_warnings_fn,
                logfile_object=None):
  """Shared main function for Google3 and non-Google3 versions of warn.py."""
  flags = parse_args(use_google3)
  warn_patterns = get_warn_patterns(flags.platform)
  project_list = get_project_list(flags.platform)

  project_names = get_project_names(project_list)
  project_patterns = [re.compile(p[1]) for p in project_list]

  # html_path=None because we output html below if not outputting CSV
  warning_messages, warning_links, warning_records, header_str = process_log(
      logfile=flags.log, flags=flags, project_names=project_names,
      project_patterns=project_patterns, warn_patterns=warn_patterns,
      html_path=None, use_google3=use_google3,
      create_launch_subprocs_fn=create_launch_subprocs_fn,
      classify_warnings_fn=classify_warnings_fn,
      logfile_object=logfile_object)

  html_writer.write_out_csv(flags, warn_patterns, warning_messages,
                            warning_links, warning_records, header_str,
                            project_names)

  # Return these values, so that caller can use them, if desired.
  return flags, warning_messages, warning_records, warn_patterns