Files
dolt/benchmark-tools/sysbench_wrapper.py
2020-10-12 15:01:22 -07:00

206 lines
6.5 KiB
Python
Executable File

import argparse
import getpass
import logging
import os
import platform
import tempfile
from datetime import datetime
from subprocess import Popen, PIPE
from typing import List, Tuple
from doltpy.core import Dolt
import csv
logger = logging.getLogger(__name__)
# This is the list of benchmarks that we have validated can successfully run with Dolt
SUPPORTED_BENCHMARKS = [
'bulk_insert'
]
TEST_TABLE = 'sbtest1'
RESULTS_TABLE = 'sysbench_benchmark'
OUTPUT_MAPPING = {
'read': 'sql_read_queries',
'write': 'sql_write_queries',
'other': 'sql_other_queries',
'total': 'sql_total_queries',
'transactions': 'sql_transactions',
'ignored errors': 'sql_ignored_errors',
'reconnects': 'sql_reconnects',
'total time': 'total_time',
'total number of events': 'total_number_of_events',
'min': 'latency_minimum',
'avg': 'latency_average',
'max': 'latency_maximum',
'95th percentile': 'latency_percentile_95th',
'sum': 'latency_sum'
}
class SysbenchFailureException(Exception):
def __init__(self, test: str, stage: str, message: str):
self.test = test
self.stage = stage
self.message = message
def __str__(self):
return '{} failed to {} with message:\n'.format(self.test, self.stage, self.message)
def setup() -> Dolt:
# Setup a test repository and start the server
logger.info('Setting up test repo for benchmarking')
test_repo = init_empty_test_repo()
logger.info('Test repo directory {}, starting Dolt SQL server'.format(test_repo.repo_dir()))
test_repo.sql_server()
return test_repo
def init_empty_test_repo() -> Dolt:
temp_dir = tempfile.mkdtemp()
repo_path, repo_data_dir = get_repo_path_tmp_path(temp_dir)
assert not os.path.exists(repo_data_dir)
return Dolt.init(repo_path)
def get_repo_path_tmp_path(path: str, subpath: str = None) -> Tuple[str, str]:
if subpath:
return os.path.join(path, subpath), os.path.join(path, subpath, '.dolt')
else:
return path, os.path.join(path, '.dolt')
def test_loop(test_list: List[str], test_repo) -> List[dict]:
"""
This is the main loop for running the tests and collecting the output
:param test_list:
:return:
"""
result = []
for test in test_list:
try:
test_output = run_test(test_repo, test)
cur_test_res = parse_output(test_output)
cur_test_res['test_name'] = test
result.append(cur_test_res)
except SysbenchFailureException as e:
logger.error('Test {} failed to produce output, moving in, error was:\n'.format(test, e))
except ValueError as e:
logger.error('Failure caused by failure to parse output:\n{}'.format(e))
return result
def run_test(test_repo: Dolt, test: str) -> str:
# ensure table is removed
if TEST_TABLE in [t.name for t in test_repo.ls()]:
test_repo.table_rm(TEST_TABLE)
sysbench_args = [
'sysbench',
test,
'--table-size=1000000',
'--db-driver=mysql',
'--mysql-db={}'.format(test_repo.repo_name),
'--mysql-user=root',
'--mysql-host=127.0.0.1',
]
# Prepare the test
prepare_exitcode, prepare_output = _execute(sysbench_args + ['prepare'], os.getcwd())
if prepare_exitcode != 0:
logger.error(prepare_output)
raise SysbenchFailureException(test, 'prepare', prepare_output)
# Run the test
run_exitcode, run_output = _execute(sysbench_args + ['run'], os.getcwd())
if run_exitcode != 0:
logger.error(run_output)
raise SysbenchFailureException(test, 'run', prepare_output)
return run_output
def _execute(args: List[str], cwd: str):
proc = Popen(args=args, cwd=cwd, stdout=PIPE, stderr=PIPE)
out, err = proc.communicate()
exitcode = proc.returncode
return exitcode, out.decode('utf-8')
def parse_output(to_parse: str) -> dict:
result = {}
split = to_parse.split('\n')
processing_lines = False
for line in split:
clean_line = line.strip()
if not clean_line:
pass
elif clean_line.startswith('SQL statistics'):
processing_lines = True
elif clean_line.startswith('Threads fairness'):
return result
elif processing_lines and clean_line:
line_split = clean_line.split(':')
raw_name = line_split[0]
if len(line_split) > 1 and line_split[1] and raw_name in OUTPUT_MAPPING:
value_split = line_split[1].strip().split('(')
clean_value = value_split[0].rstrip('s').rstrip()
final_value = float(clean_value) if '.' in clean_value else int(clean_value)
result[OUTPUT_MAPPING[raw_name]] = final_value
raise ValueError('Could not parse the following output:\n{}'.format(to_parse))
def get_os_detail():
return '{}-{}-{}'.format(os.name, platform.system(), platform.release())
def get_args():
parser = argparse.ArgumentParser()
parser.add_argument('--committish', help='Commit used to build Dolt bianry being tested', required=True)
parser.add_argument('--tests', help='List of benchmarks', type=str, required=True)
parser.add_argument('--username', type=str, required=False, default=getpass.getuser())
parser.add_argument('--note', type=str, required=False, default=None)
return parser.parse_args()
def write_output_file(committish: str, username: str, output: List[dict]):
if not os.path.exists('output'):
os.mkdir('output')
output_file = 'output/{}.csv'.format(committish)
logger.info('Writing output file to {}'.format(output_file))
with open(output_file, 'w', newline='') as csvfile:
metadata = {
'database': 'dolt',
'username': username,
'committish': committish,
'timestamp': datetime.now(),
'system_info': get_os_detail()
}
fieldnames = list(metadata.keys()) + ['test_name'] + list(OUTPUT_MAPPING.values())
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for row in output:
to_write = {**row, **metadata}
writer.writerow(to_write)
def main():
args = get_args()
test_list = args.tests.split(',')
assert all(test in SUPPORTED_BENCHMARKS for test in test_list), 'Must provide list of supported tests'
test_db = setup()
logger.info('Executing the following tests in sysbench: {}'.format(test_list))
results = test_loop(test_list, test_db)
write_output_file(args.committish, args.username, results)
if __name__ == '__main__':
main()