tests/test-zstd-speed.py

   1 #! /usr/bin/env python3
   2
   3 # ################################################################
   4 # Copyright (c) 2016-present, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
   5 # All rights reserved.
   6 #
   7 # This source code is licensed under both the BSD-style license (found in the
   8 # LICENSE file in the root directory of this source tree) and the GPLv2 (found
   9 # in the COPYING file in the root directory of this source tree).
  10 # ##########################################################################
  11
  12 # Limitations:
  13 # - doesn't support filenames with spaces
  14 # - dir1/zstd and dir2/zstd will be merged in a single results file
  15
  16 import argparse
  17 import os           # getloadavg
  18 import string
  19 import subprocess
  20 import time         # strftime
  21 import traceback
  22 import hashlib
  23 import platform     # system
  24
  25 script_version = 'v1.1.2 (2017-03-26)'
  26 default_repo_url = 'https://github.com/facebook/zstd.git'
  27 working_dir_name = 'speedTest'
  28 working_path = os.getcwd() + '/' + working_dir_name     # /path/to/zstd/tests/speedTest
  29 clone_path = working_path + '/' + 'zstd'                # /path/to/zstd/tests/speedTest/zstd
  30 email_header = 'ZSTD_speedTest'
  31 pid = str(os.getpid())
  32 verbose = False
  33 clang_version = "unknown"
  34 gcc_version = "unknown"
  35 args = None
  36
  37
  38 def hashfile(hasher, fname, blocksize=65536):
  39     with open(fname, "rb") as f:
  40         for chunk in iter(lambda: f.read(blocksize), b""):
  41             hasher.update(chunk)
  42     return hasher.hexdigest()
  43
  44
  45 def log(text):
  46     print(time.strftime("%Y/%m/%d %H:%M:%S") + ' - ' + text)
  47
  48
  49 def execute(command, print_command=True, print_output=False, print_error=True, param_shell=True):
  50     if print_command:
  51         log("> " + command)
  52     popen = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=param_shell, cwd=execute.cwd)
  53     stdout_lines, stderr_lines = popen.communicate(timeout=args.timeout)
  54     stderr_lines = stderr_lines.decode("utf-8")
  55     stdout_lines = stdout_lines.decode("utf-8")
  56     if print_output:
  57         if stdout_lines:
  58             print(stdout_lines)
  59         if stderr_lines:
  60             print(stderr_lines)
  61     if popen.returncode is not None and popen.returncode != 0:
  62         if stderr_lines and not print_output and print_error:
  63             print(stderr_lines)
  64         raise RuntimeError(stdout_lines + stderr_lines)
  65     return (stdout_lines + stderr_lines).splitlines()
  66 execute.cwd = None
  67
  68
  69 def does_command_exist(command):
  70     try:
  71         execute(command, verbose, False, False)
  72     except Exception:
  73         return False
  74     return True
  75
  76
  77 def send_email(emails, topic, text, have_mutt, have_mail):
  78     logFileName = working_path + '/' + 'tmpEmailContent'
  79     with open(logFileName, "w") as myfile:
  80         myfile.writelines(text)
  81         myfile.close()
  82         if have_mutt:
  83             execute('mutt -s "' + topic + '" ' + emails + ' < ' + logFileName, verbose)
  84         elif have_mail:
  85             execute('mail -s "' + topic + '" ' + emails + ' < ' + logFileName, verbose)
  86         else:
  87             log("e-mail cannot be sent (mail or mutt not found)")
  88
  89
  90 def send_email_with_attachments(branch, commit, last_commit, args, text, results_files,
  91                                 logFileName, have_mutt, have_mail):
  92     with open(logFileName, "w") as myfile:
  93         myfile.writelines(text)
  94         myfile.close()
  95         email_topic = '[%s:%s] Warning for %s:%s last_commit=%s speed<%s ratio<%s' \
  96                       % (email_header, pid, branch, commit, last_commit,
  97                          args.lowerLimit, args.ratioLimit)
  98         if have_mutt:
  99             execute('mutt -s "' + email_topic + '" ' + args.emails + ' -a ' + results_files
 100                     + ' < ' + logFileName)
 101         elif have_mail:
 102             execute('mail -s "' + email_topic + '" ' + args.emails + ' < ' + logFileName)
 103         else:
 104             log("e-mail cannot be sent (mail or mutt not found)")
 105
 106
 107 def git_get_branches():
 108     execute('git fetch -p', verbose)
 109     branches = execute('git branch -rl', verbose)
 110     output = []
 111     for line in branches:
 112         if ("HEAD" not in line) and ("coverity_scan" not in line) and ("gh-pages" not in line):
 113             output.append(line.strip())
 114     return output
 115
 116
 117 def git_get_changes(branch, commit, last_commit):
 118     fmt = '--format="%h: (%an) %s, %ar"'
 119     if last_commit is None:
 120         commits = execute('git log -n 10 %s %s' % (fmt, commit))
 121     else:
 122         commits = execute('git --no-pager log %s %s..%s' % (fmt, last_commit, commit))
 123     return str('Changes in %s since %s:\n' % (branch, last_commit)) + '\n'.join(commits)
 124
 125
 126 def get_last_results(resultsFileName):
 127     if not os.path.isfile(resultsFileName):
 128         return None, None, None, None
 129     commit = None
 130     csize = []
 131     cspeed = []
 132     dspeed = []
 133     with open(resultsFileName, 'r') as f:
 134         for line in f:
 135             words = line.split()
 136             if len(words) <= 4:   # branch + commit + compilerVer + md5
 137                 commit = words[1]
 138                 csize = []
 139                 cspeed = []
 140                 dspeed = []
 141             if (len(words) == 8) or (len(words) == 9):  # results: "filename" or "XX files"
 142                 csize.append(int(words[1]))
 143                 cspeed.append(float(words[3]))
 144                 dspeed.append(float(words[5]))
 145     return commit, csize, cspeed, dspeed
 146
 147
 148 def benchmark_and_compare(branch, commit, last_commit, args, executableName, md5sum, compilerVersion, resultsFileName,
 149                           testFilePath, fileName, last_csize, last_cspeed, last_dspeed):
 150     sleepTime = 30
 151     while os.getloadavg()[0] > args.maxLoadAvg:
 152         log("WARNING: bench loadavg=%.2f is higher than %s, sleeping for %s seconds"
 153             % (os.getloadavg()[0], args.maxLoadAvg, sleepTime))
 154         time.sleep(sleepTime)
 155     start_load = str(os.getloadavg())
 156     osType = platform.system()
 157     if osType == 'Linux':
 158         cpuSelector = "taskset --cpu-list 0"
 159     else:
 160         cpuSelector = ""
 161     if args.dictionary:
 162         result = execute('%s programs/%s -rqi5b1e%s -D %s %s' % (cpuSelector, executableName, args.lastCLevel, args.dictionary, testFilePath), print_output=True)
 163     else:
 164         result = execute('%s programs/%s -rqi5b1e%s %s' % (cpuSelector, executableName, args.lastCLevel, testFilePath), print_output=True)
 165     end_load = str(os.getloadavg())
 166     linesExpected = args.lastCLevel + 1
 167     if len(result) != linesExpected:
 168         raise RuntimeError("ERROR: number of result lines=%d is different that expected %d\n%s" % (len(result), linesExpected, '\n'.join(result)))
 169     with open(resultsFileName, "a") as myfile:
 170         myfile.write('%s %s %s md5=%s\n' % (branch, commit, compilerVersion, md5sum))
 171         myfile.write('\n'.join(result) + '\n')
 172         myfile.close()
 173         if (last_cspeed == None):
 174             log("WARNING: No data for comparison for branch=%s file=%s " % (branch, fileName))
 175             return ""
 176         commit, csize, cspeed, dspeed = get_last_results(resultsFileName)
 177         text = ""
 178         for i in range(0, min(len(cspeed), len(last_cspeed))):
 179             print("%s:%s -%d cSpeed=%6.2f cLast=%6.2f cDiff=%1.4f dSpeed=%6.2f dLast=%6.2f dDiff=%1.4f ratioDiff=%1.4f %s" % (branch, commit, i+1, cspeed[i], last_cspeed[i], cspeed[i]/last_cspeed[i], dspeed[i], last_dspeed[i], dspeed[i]/last_dspeed[i], float(last_csize[i])/csize[i], fileName))
 180             if (cspeed[i]/last_cspeed[i] < args.lowerLimit):
 181                 text += "WARNING: %s -%d cSpeed=%.2f cLast=%.2f cDiff=%.4f %s\n" % (executableName, i+1, cspeed[i], last_cspeed[i], cspeed[i]/last_cspeed[i], fileName)
 182             if (dspeed[i]/last_dspeed[i] < args.lowerLimit):
 183                 text += "WARNING: %s -%d dSpeed=%.2f dLast=%.2f dDiff=%.4f %s\n" % (executableName, i+1, dspeed[i], last_dspeed[i], dspeed[i]/last_dspeed[i], fileName)
 184             if (float(last_csize[i])/csize[i] < args.ratioLimit):
 185                 text += "WARNING: %s -%d cSize=%d last_cSize=%d diff=%.4f %s\n" % (executableName, i+1, csize[i], last_csize[i], float(last_csize[i])/csize[i], fileName)
 186         if text:
 187             text = args.message + ("\nmaxLoadAvg=%s  load average at start=%s end=%s\n%s  last_commit=%s  md5=%s\n" % (args.maxLoadAvg, start_load, end_load, compilerVersion, last_commit, md5sum)) + text
 188         return text
 189
 190
 191 def update_config_file(branch, commit):
 192     last_commit = None
 193     commitFileName = working_path + "/commit_" + branch.replace("/", "_") + ".txt"
 194     if os.path.isfile(commitFileName):
 195         with open(commitFileName, 'r') as infile:
 196             last_commit = infile.read()
 197     with open(commitFileName, 'w') as outfile:
 198         outfile.write(commit)
 199     return last_commit
 200
 201
 202 def double_check(branch, commit, args, executableName, md5sum, compilerVersion, resultsFileName, filePath, fileName):
 203     last_commit, csize, cspeed, dspeed = get_last_results(resultsFileName)
 204     if not args.dry_run:
 205         text = benchmark_and_compare(branch, commit, last_commit, args, executableName, md5sum, compilerVersion, resultsFileName, filePath, fileName, csize, cspeed, dspeed)
 206         if text:
 207             log("WARNING: redoing tests for branch %s: commit %s" % (branch, commit))
 208             text = benchmark_and_compare(branch, commit, last_commit, args, executableName, md5sum, compilerVersion, resultsFileName, filePath, fileName, csize, cspeed, dspeed)
 209     return text
 210
 211
 212 def test_commit(branch, commit, last_commit, args, testFilePaths, have_mutt, have_mail):
 213     local_branch = branch.split('/')[1]
 214     version = local_branch.rpartition('-')[2] + '_' + commit
 215     if not args.dry_run:
 216         execute('make -C programs clean zstd CC=clang MOREFLAGS="-Werror -Wconversion -Wno-sign-conversion -DZSTD_GIT_COMMIT=%s" && ' % version +
 217                 'mv programs/zstd programs/zstd_clang && ' +
 218                 'make -C programs clean zstd zstd32 MOREFLAGS="-DZSTD_GIT_COMMIT=%s"' % version)
 219     md5_zstd = hashfile(hashlib.md5(), clone_path + '/programs/zstd')
 220     md5_zstd32 = hashfile(hashlib.md5(), clone_path + '/programs/zstd32')
 221     md5_zstd_clang = hashfile(hashlib.md5(), clone_path + '/programs/zstd_clang')
 222     print("md5(zstd)=%s\nmd5(zstd32)=%s\nmd5(zstd_clang)=%s" % (md5_zstd, md5_zstd32, md5_zstd_clang))
 223     print("gcc_version=%s clang_version=%s" % (gcc_version, clang_version))
 224
 225     logFileName = working_path + "/log_" + branch.replace("/", "_") + ".txt"
 226     text_to_send = []
 227     results_files = ""
 228     if args.dictionary:
 229         dictName = args.dictionary.rpartition('/')[2]
 230     else:
 231         dictName = None
 232
 233     for filePath in testFilePaths:
 234         fileName = filePath.rpartition('/')[2]
 235         if dictName:
 236             resultsFileName = working_path + "/" + dictName.replace(".", "_") + "_" + branch.replace("/", "_") + "_" + fileName.replace(".", "_") + ".txt"
 237         else:
 238             resultsFileName = working_path + "/results_" + branch.replace("/", "_") + "_" + fileName.replace(".", "_") + ".txt"
 239         text = double_check(branch, commit, args, 'zstd', md5_zstd, 'gcc_version='+gcc_version, resultsFileName, filePath, fileName)
 240         if text:
 241             text_to_send.append(text)
 242             results_files += resultsFileName + " "
 243         resultsFileName = working_path + "/results32_" + branch.replace("/", "_") + "_" + fileName.replace(".", "_") + ".txt"
 244         text = double_check(branch, commit, args, 'zstd32', md5_zstd32, 'gcc_version='+gcc_version, resultsFileName, filePath, fileName)
 245         if text:
 246             text_to_send.append(text)
 247             results_files += resultsFileName + " "
 248         resultsFileName = working_path + "/resultsClang_" + branch.replace("/", "_") + "_" + fileName.replace(".", "_") + ".txt"
 249         text = double_check(branch, commit, args, 'zstd_clang', md5_zstd_clang, 'clang_version='+clang_version, resultsFileName, filePath, fileName)
 250         if text:
 251             text_to_send.append(text)
 252             results_files += resultsFileName + " "
 253     if text_to_send:
 254         send_email_with_attachments(branch, commit, last_commit, args, text_to_send, results_files, logFileName, have_mutt, have_mail)
 255
 256
 257 if __name__ == '__main__':
 258     parser = argparse.ArgumentParser()
 259     parser.add_argument('testFileNames', help='file or directory names list for speed benchmark')
 260     parser.add_argument('emails', help='list of e-mail addresses to send warnings')
 261     parser.add_argument('--dictionary', '-D', help='path to the dictionary')
 262     parser.add_argument('--message', '-m', help='attach an additional message to e-mail', default="")
 263     parser.add_argument('--repoURL', help='changes default repository URL', default=default_repo_url)
 264     parser.add_argument('--lowerLimit', '-l', type=float, help='send email if speed is lower than given limit', default=0.98)
 265     parser.add_argument('--ratioLimit', '-r', type=float, help='send email if ratio is lower than given limit', default=0.999)
 266     parser.add_argument('--maxLoadAvg', type=float, help='maximum load average to start testing', default=0.75)
 267     parser.add_argument('--lastCLevel', type=int, help='last compression level for testing', default=5)
 268     parser.add_argument('--sleepTime', '-s', type=int, help='frequency of repository checking in seconds', default=300)
 269     parser.add_argument('--timeout', '-t', type=int, help='timeout for executing shell commands', default=1800)
 270     parser.add_argument('--dry-run', dest='dry_run', action='store_true', help='not build', default=False)
 271     parser.add_argument('--verbose', '-v', action='store_true', help='more verbose logs', default=False)
 272     args = parser.parse_args()
 273     verbose = args.verbose
 274
 275     # check if test files are accessible
 276     testFileNames = args.testFileNames.split()
 277     testFilePaths = []
 278     for fileName in testFileNames:
 279         fileName = os.path.expanduser(fileName)
 280         if os.path.isfile(fileName) or os.path.isdir(fileName):
 281             testFilePaths.append(os.path.abspath(fileName))
 282         else:
 283             log("ERROR: File/directory not found: " + fileName)
 284             exit(1)
 285
 286     # check if dictionary is accessible
 287     if args.dictionary:
 288         args.dictionary = os.path.abspath(os.path.expanduser(args.dictionary))
 289         if not os.path.isfile(args.dictionary):
 290             log("ERROR: Dictionary not found: " + args.dictionary)
 291             exit(1)
 292
 293     # check availability of e-mail senders
 294     have_mutt = does_command_exist("mutt -h")
 295     have_mail = does_command_exist("mail -V")
 296     if not have_mutt and not have_mail:
 297         log("ERROR: e-mail senders 'mail' or 'mutt' not found")
 298         exit(1)
 299
 300     clang_version = execute("clang -v 2>&1 | grep ' version ' | sed -e 's:.*version \\([0-9.]*\\).*:\\1:' -e 's:\\.\\([0-9][0-9]\\):\\1:g'", verbose)[0];
 301     gcc_version = execute("gcc -dumpversion", verbose)[0];
 302
 303     if verbose:
 304         print("PARAMETERS:\nrepoURL=%s" % args.repoURL)
 305         print("working_path=%s" % working_path)
 306         print("clone_path=%s" % clone_path)
 307         print("testFilePath(%s)=%s" % (len(testFilePaths), testFilePaths))
 308         print("message=%s" % args.message)
 309         print("emails=%s" % args.emails)
 310         print("dictionary=%s" % args.dictionary)
 311         print("maxLoadAvg=%s" % args.maxLoadAvg)
 312         print("lowerLimit=%s" % args.lowerLimit)
 313         print("ratioLimit=%s" % args.ratioLimit)
 314         print("lastCLevel=%s" % args.lastCLevel)
 315         print("sleepTime=%s" % args.sleepTime)
 316         print("timeout=%s" % args.timeout)
 317         print("dry_run=%s" % args.dry_run)
 318         print("verbose=%s" % args.verbose)
 319         print("have_mutt=%s have_mail=%s" % (have_mutt, have_mail))
 320
 321     # clone ZSTD repo if needed
 322     if not os.path.isdir(working_path):
 323         os.mkdir(working_path)
 324     if not os.path.isdir(clone_path):
 325         execute.cwd = working_path
 326         execute('git clone ' + args.repoURL)
 327     if not os.path.isdir(clone_path):
 328         log("ERROR: ZSTD clone not found: " + clone_path)
 329         exit(1)
 330     execute.cwd = clone_path
 331
 332     # check if speedTest.pid already exists
 333     pidfile = "./speedTest.pid"
 334     if os.path.isfile(pidfile):
 335         log("ERROR: %s already exists, exiting" % pidfile)
 336         exit(1)
 337
 338     send_email(args.emails, '[%s:%s] test-zstd-speed.py %s has been started' % (email_header, pid, script_version), args.message, have_mutt, have_mail)
 339     with open(pidfile, 'w') as the_file:
 340         the_file.write(pid)
 341
 342     branch = ""
 343     commit = ""
 344     first_time = True
 345     while True:
 346         try:
 347             if first_time:
 348                 first_time = False
 349             else:
 350                 time.sleep(args.sleepTime)
 351             loadavg = os.getloadavg()[0]
 352             if (loadavg <= args.maxLoadAvg):
 353                 branches = git_get_branches()
 354                 for branch in branches:
 355                     commit = execute('git show -s --format=%h ' + branch, verbose)[0]
 356                     last_commit = update_config_file(branch, commit)
 357                     if commit == last_commit:
 358                         log("skipping branch %s: head %s already processed" % (branch, commit))
 359                     else:
 360                         log("build branch %s: head %s is different from prev %s" % (branch, commit, last_commit))
 361                         execute('git checkout -- . && git checkout ' + branch)
 362                         print(git_get_changes(branch, commit, last_commit))
 363                         test_commit(branch, commit, last_commit, args, testFilePaths, have_mutt, have_mail)
 364             else:
 365                 log("WARNING: main loadavg=%.2f is higher than %s" % (loadavg, args.maxLoadAvg))
 366             if verbose:
 367                 log("sleep for %s seconds" % args.sleepTime)
 368         except Exception as e:
 369             stack = traceback.format_exc()
 370             email_topic = '[%s:%s] ERROR in %s:%s' % (email_header, pid, branch, commit)
 371             send_email(args.emails, email_topic, stack, have_mutt, have_mail)
 372             print(stack)
 373         except KeyboardInterrupt:
 374             os.unlink(pidfile)
 375             send_email(args.emails, '[%s:%s] test-zstd-speed.py %s has been stopped' % (email_header, pid, script_version), args.message, have_mutt, have_mail)
 376             exit(0)