utils/update_mir_test_checks.py

   1 #!/usr/bin/env python
   2
   3 """Updates FileCheck checks in MIR tests.
   4
   5 This script is a utility to update MIR based tests with new FileCheck
   6 patterns.
   7
   8 The checks added by this script will cover the entire body of each
   9 function it handles. Virtual registers used are given names via
  10 FileCheck patterns, so if you do want to check a subset of the body it
  11 should be straightforward to trim out the irrelevant parts. None of
  12 the YAML metadata will be checked, other than function names.
  13
  14 If there are multiple llc commands in a test, the full set of checks
  15 will be repeated for each different check pattern. Checks for patterns
  16 that are common between different commands will be left as-is by
  17 default, or removed if the --remove-common-prefixes flag is provided.
  18 """
  19
  20 from __future__ import print_function
  21
  22 import argparse
  23 import collections
  24 import os
  25 import re
  26 import subprocess
  27 import sys
  28
  29 RUN_LINE_RE = re.compile('^\s*[;#]\s*RUN:\s*(.*)$')
  30 TRIPLE_ARG_RE = re.compile(r'-mtriple[= ]([^ ]+)')
  31 MARCH_ARG_RE = re.compile(r'-march[= ]([^ ]+)')
  32 TRIPLE_IR_RE = re.compile(r'^\s*target\s+triple\s*=\s*"([^"]+)"$')
  33 CHECK_PREFIX_RE = re.compile('--?check-prefix(?:es)?[= ](\S+)')
  34 CHECK_RE = re.compile(r'^\s*[;#]\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL)?:')
  35
  36 MIR_FUNC_NAME_RE = re.compile(r' *name: *(?P<func>[A-Za-z0-9_.-]+)')
  37 MIR_BODY_BEGIN_RE = re.compile(r' *body: *\|')
  38 MIR_BASIC_BLOCK_RE = re.compile(r' *bb\.[0-9]+.*:$')
  39 VREG_RE = re.compile(r'(%[0-9]+)(?::[a-z0-9_]+)?(?:\([<>a-z0-9 ]+\))?')
  40 VREG_DEF_RE = re.compile(
  41     r'^ *(?P<vregs>{0}(?:, {0})*) '
  42     r'= (?P<opcode>[A-Zt][A-Za-z0-9_]+)'.format(VREG_RE.pattern))
  43 MIR_PREFIX_DATA_RE = re.compile(r'^ *(;|bb.[0-9].*: *$|[a-z]+:( |$)|$)')
  44 VREG_CLASS_RE = re.compile(r'^ *- *{ id: ([0-9]+), class: ([a-z0-9_]+)', re.M)
  45
  46 IR_FUNC_NAME_RE = re.compile(
  47     r'^\s*define\s+(?:internal\s+)?[^@]*@(?P<func>\w+)\s*\(')
  48 IR_PREFIX_DATA_RE = re.compile(r'^ *(;|$)')
  49
  50 MIR_FUNC_RE = re.compile(
  51     r'^---$'
  52     r'\n'
  53     r'^ *name: *(?P<func>[A-Za-z0-9_.-]+)$'
  54     r'(?:.*?(?P<vregs>^ *registers: *(?:\n *- {[^\n]+$)*))?'
  55     r'.*?'
  56     r'^ *body: *\|\n'
  57     r'(?P<body>.*?)\n'
  58     r'^\.\.\.$',
  59     flags=(re.M | re.S))
  60
  61 class LLC:
  62     def __init__(self, bin):
  63         self.bin = bin
  64
  65     def __call__(self, args, ir):
  66         if ir.endswith('.mir'):
  67             args = '{} -x mir'.format(args)
  68         with open(ir) as ir_file:
  69             stdout = subprocess.check_output('{} {}'.format(self.bin, args),
  70                                              shell=True, stdin=ir_file)
  71             # Fix line endings to unix CR style.
  72             stdout = stdout.replace('\r\n', '\n')
  73         return stdout
  74
  75
  76 class Run:
  77     def __init__(self, prefixes, cmd_args, triple):
  78         self.prefixes = prefixes
  79         self.cmd_args = cmd_args
  80         self.triple = triple
  81
  82     def __getitem__(self, index):
  83         return [self.prefixes, self.cmd_args, self.triple][index]
  84
  85
  86 def log(msg, verbose=True):
  87     if verbose:
  88         print(msg, file=sys.stderr)
  89
  90
  91 def warn(msg, test_file=None):
  92     if test_file:
  93         msg = '{}: {}'.format(test_file, msg)
  94     print('WARNING: {}'.format(msg), file=sys.stderr)
  95
  96
  97 def find_triple_in_ir(lines, verbose=False):
  98     for l in lines:
  99         m = TRIPLE_IR_RE.match(l)
 100         if m:
 101             return m.group(1)
 102     return None
 103
 104
 105 def find_run_lines(test, lines, verbose=False):
 106     raw_lines = [m.group(1)
 107                  for m in [RUN_LINE_RE.match(l) for l in lines] if m]
 108     run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
 109     for l in raw_lines[1:]:
 110         if run_lines[-1].endswith("\\"):
 111             run_lines[-1] = run_lines[-1].rstrip("\\") + " " + l
 112         else:
 113             run_lines.append(l)
 114     if verbose:
 115         log('Found {} RUN lines:'.format(len(run_lines)))
 116         for l in run_lines:
 117             log('  RUN: {}'.format(l))
 118     return run_lines
 119
 120
 121 def build_run_list(test, run_lines, verbose=False):
 122     run_list = []
 123     all_prefixes = []
 124     for l in run_lines:
 125         commands = [cmd.strip() for cmd in l.split('|', 1)]
 126         llc_cmd = commands[0]
 127         filecheck_cmd = commands[1] if len(commands) > 1 else ''
 128
 129         if not llc_cmd.startswith('llc '):
 130             warn('Skipping non-llc RUN line: {}'.format(l), test_file=test)
 131             continue
 132         if not filecheck_cmd.startswith('FileCheck '):
 133             warn('Skipping non-FileChecked RUN line: {}'.format(l),
 134                  test_file=test)
 135             continue
 136
 137         triple = None
 138         m = TRIPLE_ARG_RE.search(llc_cmd)
 139         if m:
 140             triple = m.group(1)
 141         # If we find -march but not -mtriple, use that.
 142         m = MARCH_ARG_RE.search(llc_cmd)
 143         if m and not triple:
 144             triple = '{}--'.format(m.group(1))
 145
 146         cmd_args = llc_cmd[len('llc'):].strip()
 147         cmd_args = cmd_args.replace('< %s', '').replace('%s', '').strip()
 148
 149         check_prefixes = [item for m in CHECK_PREFIX_RE.finditer(filecheck_cmd)
 150                           for item in m.group(1).split(',')]
 151         if not check_prefixes:
 152             check_prefixes = ['CHECK']
 153         all_prefixes += check_prefixes
 154
 155         run_list.append(Run(check_prefixes, cmd_args, triple))
 156
 157     # Remove any common prefixes. We'll just leave those entirely alone.
 158     common_prefixes = set([prefix for prefix in all_prefixes
 159                            if all_prefixes.count(prefix) > 1])
 160     for run in run_list:
 161         run.prefixes = [p for p in run.prefixes if p not in common_prefixes]
 162
 163     return run_list, common_prefixes
 164
 165
 166 def find_functions_with_one_bb(lines, verbose=False):
 167     result = []
 168     cur_func = None
 169     bbs = 0
 170     for line in lines:
 171         m = MIR_FUNC_NAME_RE.match(line)
 172         if m:
 173             if bbs == 1:
 174                 result.append(cur_func)
 175             cur_func = m.group('func')
 176             bbs = 0
 177         m = MIR_BASIC_BLOCK_RE.match(line)
 178         if m:
 179             bbs += 1
 180     if bbs == 1:
 181         result.append(cur_func)
 182     return result
 183
 184
 185 def build_function_body_dictionary(test, raw_tool_output, triple, prefixes,
 186                                    func_dict, verbose):
 187     for m in MIR_FUNC_RE.finditer(raw_tool_output):
 188         func = m.group('func')
 189         body = m.group('body')
 190         if verbose:
 191             log('Processing function: {}'.format(func))
 192             for l in body.splitlines():
 193                 log('  {}'.format(l))
 194         for prefix in prefixes:
 195             if func in func_dict[prefix] and func_dict[prefix][func] != body:
 196                 warn('Found conflicting asm for prefix: {}'.format(prefix),
 197                      test_file=test)
 198             func_dict[prefix][func] = body
 199             func_dict[prefix]['{}:vregs'.format(func)] = m.group('vregs')
 200
 201
 202 def add_checks_for_function(test, output_lines, run_list, func_dict, func_name,
 203                             add_vreg_checks, single_bb, verbose=False):
 204     printed_prefixes = set()
 205     for run in run_list:
 206         for prefix in run.prefixes:
 207             if prefix in printed_prefixes:
 208                 continue
 209             if not func_dict[prefix][func_name]:
 210                 continue
 211             # if printed_prefixes:
 212             #     # Add some space between different check prefixes.
 213             #     output_lines.append('')
 214             printed_prefixes.add(prefix)
 215             log('Adding {} lines for {}'.format(prefix, func_name), verbose)
 216             vregs = None
 217             if add_vreg_checks:
 218                 vregs = func_dict[prefix]['{}:vregs'.format(func_name)]
 219             add_check_lines(test, output_lines, prefix, func_name, single_bb,
 220                             func_dict[prefix][func_name].splitlines(), vregs)
 221             break
 222     return output_lines
 223
 224
 225 def add_check_lines(test, output_lines, prefix, func_name, single_bb,
 226                     func_body, vreg_data):
 227     if single_bb:
 228         # Don't bother checking the basic block label for a single BB
 229         func_body.pop(0)
 230
 231     if not func_body:
 232         warn('Function has no instructions to check: {}'.format(func_name),
 233              test_file=test)
 234         return
 235
 236     first_line = func_body[0]
 237     indent = len(first_line) - len(first_line.lstrip(' '))
 238     # A check comment, indented the appropriate amount
 239     check = '{:>{}}; {}'.format('', indent, prefix)
 240
 241     output_lines.append('{}-LABEL: name: {}'.format(check, func_name))
 242
 243     if vreg_data:
 244         output_lines.append('{}: registers:'.format(check))
 245         for m in VREG_CLASS_RE.finditer(vreg_data):
 246             output_lines.append('{}-NEXT: id: {}, class: {}'.format(
 247                 check, m.group(1), m.group(2)))
 248
 249     vreg_map = {}
 250     for func_line in func_body:
 251         if not func_line.strip():
 252             continue
 253         m = VREG_DEF_RE.match(func_line)
 254         if m:
 255             for vreg in VREG_RE.finditer(m.group('vregs')):
 256                 name = mangle_vreg(m.group('opcode'), vreg_map.values())
 257                 vreg_map[vreg.group(1)] = name
 258                 func_line = func_line.replace(
 259                     vreg.group(1), '[[{}:%[0-9]+]]'.format(name), 1)
 260         for number, name in vreg_map.items():
 261             func_line = re.sub(r'{}\b'.format(number), '[[{}]]'.format(name),
 262                                func_line)
 263         check_line = '{}: {}'.format(check, func_line[indent:]).rstrip()
 264         output_lines.append(check_line)
 265
 266
 267 def mangle_vreg(opcode, current_names):
 268     base = opcode
 269     # Simplify some common prefixes and suffixes
 270     if opcode.startswith('G_'):
 271         base = base[len('G_'):]
 272     if opcode.endswith('_PSEUDO'):
 273         base = base[:len('_PSEUDO')]
 274     # Shorten some common opcodes with long-ish names
 275     base = dict(IMPLICIT_DEF='DEF',
 276                 GLOBAL_VALUE='GV',
 277                 CONSTANT='C',
 278                 FCONSTANT='C',
 279                 MERGE_VALUES='MV',
 280                 UNMERGE_VALUES='UV',
 281                 INTRINSIC='INT',
 282                 INTRINSIC_W_SIDE_EFFECTS='INT',
 283                 INSERT_VECTOR_ELT='IVEC',
 284                 EXTRACT_VECTOR_ELT='EVEC',
 285                 SHUFFLE_VECTOR='SHUF').get(base, base)
 286     # Avoid ambiguity when opcodes end in numbers
 287     if len(base.rstrip('0123456789')) < len(base):
 288         base += '_'
 289
 290     i = 0
 291     for name in current_names:
 292         if name.rstrip('0123456789') == base:
 293             i += 1
 294     if i:
 295         return '{}{}'.format(base, i)
 296     return base
 297
 298
 299 def should_add_line_to_output(input_line, prefix_set):
 300     # Skip any check lines that we're handling.
 301     m = CHECK_RE.match(input_line)
 302     if m and m.group(1) in prefix_set:
 303         return False
 304     return True
 305
 306
 307 def update_test_file(llc, test, remove_common_prefixes=False,
 308                      add_vreg_checks=False, verbose=False):
 309     log('Scanning for RUN lines in test file: {}'.format(test), verbose)
 310     with open(test) as fd:
 311         input_lines = [l.rstrip() for l in fd]
 312
 313     triple_in_ir = find_triple_in_ir(input_lines, verbose)
 314     run_lines = find_run_lines(test, input_lines, verbose)
 315     run_list, common_prefixes = build_run_list(test, run_lines, verbose)
 316
 317     simple_functions = find_functions_with_one_bb(input_lines, verbose)
 318
 319     func_dict = {}
 320     for run in run_list:
 321         for prefix in run.prefixes:
 322             func_dict.update({prefix: dict()})
 323     for prefixes, llc_args, triple_in_cmd in run_list:
 324         log('Extracted LLC cmd: llc {}'.format(llc_args), verbose)
 325         log('Extracted FileCheck prefixes: {}'.format(prefixes), verbose)
 326
 327         raw_tool_output = llc(llc_args, test)
 328         if not triple_in_cmd and not triple_in_ir:
 329             warn('No triple found: skipping file', test_file=test)
 330             return
 331
 332         build_function_body_dictionary(test, raw_tool_output,
 333                                        triple_in_cmd or triple_in_ir,
 334                                        prefixes, func_dict, verbose)
 335
 336     state = 'toplevel'
 337     func_name = None
 338     prefix_set = set([prefix for run in run_list for prefix in run.prefixes])
 339     log('Rewriting FileCheck prefixes: {}'.format(prefix_set), verbose)
 340
 341     if remove_common_prefixes:
 342         prefix_set.update(common_prefixes)
 343     elif common_prefixes:
 344         warn('Ignoring common prefixes: {}'.format(common_prefixes),
 345              test_file=test)
 346
 347     comment_char = '#' if test.endswith('.mir') else ';'
 348     autogenerated_note = ('{} NOTE: Assertions have been autogenerated by '
 349                           'utils/{}'.format(comment_char,
 350                                             os.path.basename(__file__)))
 351     output_lines = []
 352     output_lines.append(autogenerated_note)
 353
 354     for input_line in input_lines:
 355         if input_line == autogenerated_note:
 356             continue
 357
 358         if state == 'toplevel':
 359             m = IR_FUNC_NAME_RE.match(input_line)
 360             if m:
 361                 state = 'ir function prefix'
 362                 func_name = m.group('func')
 363             if input_line.strip() == '---':
 364                 state = 'document'
 365             output_lines.append(input_line)
 366         elif state == 'document':
 367             m = MIR_FUNC_NAME_RE.match(input_line)
 368             if m:
 369                 state = 'mir function metadata'
 370                 func_name = m.group('func')
 371             if input_line.strip() == '...':
 372                 state = 'toplevel'
 373                 func_name = None
 374             if should_add_line_to_output(input_line, prefix_set):
 375                 output_lines.append(input_line)
 376         elif state == 'mir function metadata':
 377             if should_add_line_to_output(input_line, prefix_set):
 378                 output_lines.append(input_line)
 379             m = MIR_BODY_BEGIN_RE.match(input_line)
 380             if m:
 381                 if func_name in simple_functions:
 382                     # If there's only one block, put the checks inside it
 383                     state = 'mir function prefix'
 384                     continue
 385                 state = 'mir function body'
 386                 add_checks_for_function(test, output_lines, run_list,
 387                                         func_dict, func_name, add_vreg_checks,
 388                                         single_bb=False, verbose=verbose)
 389         elif state == 'mir function prefix':
 390             m = MIR_PREFIX_DATA_RE.match(input_line)
 391             if not m:
 392                 state = 'mir function body'
 393                 add_checks_for_function(test, output_lines, run_list,
 394                                         func_dict, func_name, add_vreg_checks,
 395                                         single_bb=True, verbose=verbose)
 396
 397             if should_add_line_to_output(input_line, prefix_set):
 398                 output_lines.append(input_line)
 399         elif state == 'mir function body':
 400             if input_line.strip() == '...':
 401                 state = 'toplevel'
 402                 func_name = None
 403             if should_add_line_to_output(input_line, prefix_set):
 404                 output_lines.append(input_line)
 405         elif state == 'ir function prefix':
 406             m = IR_PREFIX_DATA_RE.match(input_line)
 407             if not m:
 408                 state = 'ir function body'
 409                 add_checks_for_function(test, output_lines, run_list,
 410                                         func_dict, func_name, add_vreg_checks,
 411                                         single_bb=False, verbose=verbose)
 412
 413             if should_add_line_to_output(input_line, prefix_set):
 414                 output_lines.append(input_line)
 415         elif state == 'ir function body':
 416             if input_line.strip() == '}':
 417                 state = 'toplevel'
 418                 func_name = None
 419             if should_add_line_to_output(input_line, prefix_set):
 420                 output_lines.append(input_line)
 421
 422
 423     log('Writing {} lines to {}...'.format(len(output_lines), test), verbose)
 424
 425     with open(test, 'wb') as fd:
 426         fd.writelines([l + '\n' for l in output_lines])
 427
 428
 429 def main():
 430     parser = argparse.ArgumentParser(
 431         description=__doc__, formatter_class=argparse.RawTextHelpFormatter)
 432     parser.add_argument('-v', '--verbose', action='store_true',
 433                         help='Show verbose output')
 434     parser.add_argument('--llc-binary', dest='llc', default='llc', type=LLC,
 435                         help='The "llc" binary to generate the test case with')
 436     parser.add_argument('--remove-common-prefixes', action='store_true',
 437                         help='Remove existing check lines whose prefixes are '
 438                              'shared between multiple commands')
 439     parser.add_argument('--add-vreg-checks', action='store_true',
 440                         help='Add checks for the "registers:" block')
 441     parser.add_argument('tests', nargs='+')
 442     args = parser.parse_args()
 443
 444     for test in args.tests:
 445         try:
 446             update_test_file(args.llc, test, args.remove_common_prefixes,
 447                              args.add_vreg_checks, verbose=args.verbose)
 448         except Exception:
 449             warn('Error processing file', test_file=test)
 450             raise
 451
 452
 453 if __name__ == '__main__':
 454   main()