From 91f65b7bb5d919372c080befdd72f4669de3c037 Mon Sep 17 00:00:00 2001 From: Oscar Batori Date: Mon, 30 Nov 2020 14:48:20 -0800 Subject: [PATCH] Enhance the way output is written, update README --- benchmark/perf_tools/README.md | 16 +++++-------- .../python/push_output_to_dolthub.py | 12 ++++++---- .../perf_tools/python/sysbench_wrapper.py | 24 +++++++++++++------ 3 files changed, 30 insertions(+), 22 deletions(-) diff --git a/benchmark/perf_tools/README.md b/benchmark/perf_tools/README.md index 84632f4374..5c1ac8af54 100644 --- a/benchmark/perf_tools/README.md +++ b/benchmark/perf_tools/README.md @@ -20,21 +20,17 @@ We also execute the same set of benchmarks against MySQL for comparison. All the ## Example A common use-case might be to compare Dolt built from the current working set in your local checkout to MySQL. To do this we can run the following: ``` -./run_benchmarks.sh \ - bulk_insert,oltp_read_only,oltp_insert,oltp_point_select,select_random_points,select_random_ranges - \ - current +$ ./run_benchmarks.sh all current ``` -This takes the current checkout of Dolt, builds a binary, and executes the benchmarks in a `docker-compose` setup. It does the same for MySQL. This produces two CSV files containing the results in `bencharm/perf_tools/output`. An example would be: +This takes the current checkout of Dolt, builds a binary, and executes the supported benchmarks in a `docker-compose` setup. It does the same for MySQL. Each invocation of `run_benchmarks.sh` is associatd with a run ID, for example `58296063ab3c2a6701f8f986`. This run ID identifies the CSV file: ``` -ls -ltr output +$ ls -ltr output total 16 --rw-r--r-- 1 oscarbatori staff 1727 Nov 29 19:59 cedbe9b0d2516b4b05661af2b07f0765bc6f1816-dirty.csv --rw-r--r--@ 1 oscarbatori staff 1539 Nov 29 22:04 mysql.csv +-rw-r--r-- 1 oscarbatori staff 1727 Nov 29 19:59 58296063ab3c2a6701f8f986.csv ``` -This indicates that the results for `current`, the HEAD of the currently checked branch that has uncommitted changes in the working set, are stored in `cedbe9b0d2516b4b05661af2b07f0765bc6f1816-dirty.csv`, where "dirty" implies the presence of uncommitted changes. The `mysql.csv` file contains results for MySQL. +Each row corresponds to an invocation of test on either MySQL, or a compilation of Dolt. Each row indicates this. ## Requirements To run this stack a few things are required: @@ -45,7 +41,7 @@ To run this stack a few things are required: ## Uploading to DoltHub We can upload the results to DoltHub using `push_results_to_dolthub.py` as follows: ``` -python push_outputp_to_dolthub.py --result-directory output --remote-results-db dolthub/dolt-benchmarks-test --branch test-run +$ python push_outputp_to_dolthub.py --results-file output/58296063ab3c2a6701f8f986.csv --remote-results-db dolthub/dolt-benchmarks-test --branch test-run ``` These results will then be available to the team for analysis, and via our API for rendering on our benchmarking documentation. diff --git a/benchmark/perf_tools/python/push_output_to_dolthub.py b/benchmark/perf_tools/python/push_output_to_dolthub.py index d91f209d5f..041fac7b38 100644 --- a/benchmark/perf_tools/python/push_output_to_dolthub.py +++ b/benchmark/perf_tools/python/push_output_to_dolthub.py @@ -11,21 +11,23 @@ RESULTS_TABLE = 'sysbench_benchmark' logger = get_logger(__name__) -def write_results_to_dolt(results_dir: str, remote: str, branch: str): - dfs = [pd.read_csv(os.path.join(results_dir, filename)) for filename in os.listdir(results_dir)] - table_writer = get_df_table_writer(RESULTS_TABLE, lambda: pd.concat(dfs), RESULTS_TABLE_PKS, import_mode='update') +def write_results_to_dolt(results_file: str, remote: str, branch: str): + table_writer = get_df_table_writer(RESULTS_TABLE, + lambda: pd.read_csv(results_file), + RESULTS_TABLE_PKS, + import_mode='update') loader = get_dolt_loader(table_writer, True, 'benchmark run', branch) load_to_dolthub(loader, clone=True, push=True, remote_name='origin', remote_url=remote) def main(): parser = argparse.ArgumentParser() - parser.add_argument('--results-directory', type=str, required=True) + parser.add_argument('--results-file', type=str, required=True) parser.add_argument('--remote-results-db', type=str, required=True) parser.add_argument('--remote-results-db-branch', type=str, required=False, default='master') args = parser.parse_args() logger.info('Writing the results of the tests') - write_results_to_dolt(args.results_directory, args.remote_results_db, args.remote_results_db_branch) + write_results_to_dolt(args.results_file, args.remote_results_db, args.remote_results_db_branch) if __name__ == '__main__': diff --git a/benchmark/perf_tools/python/sysbench_wrapper.py b/benchmark/perf_tools/python/sysbench_wrapper.py index 3a27a9e2a7..0c74cf7712 100755 --- a/benchmark/perf_tools/python/sysbench_wrapper.py +++ b/benchmark/perf_tools/python/sysbench_wrapper.py @@ -17,7 +17,12 @@ SUPPORTED_BENCHMARKS = [ 'oltp_insert', 'oltp_point_select', 'select_random_points', - 'select_random_ranges' + 'select_random_ranges', + 'oltp_delete', + 'oltp_write_only', + 'oltp_read_write', + 'oltp_update_index', + 'oltp_update_non_index' ] TEST_TABLE = 'sbtest1' @@ -56,7 +61,10 @@ def main(): logger.setLevel(logging.INFO) args = get_args() test_list = args.tests.split(',') - assert all(test in SUPPORTED_BENCHMARKS for test in test_list), 'Must provide list of supported tests' + if len(test_list) == 1 and test_list == ['all']: + test_list = SUPPORTED_BENCHMARKS + else: + assert all(test in SUPPORTED_BENCHMARKS for test in test_list), 'Must provide list of supported tests' logger.info('Running with run ID {}'.format(args.run_id)) if args.committish: @@ -71,7 +79,7 @@ def get_args(): parser = argparse.ArgumentParser() parser.add_argument('--db-host', help='The host for the database we will connect to') parser.add_argument('--committish', help='Commit used to build Dolt bianry being tested') - parser.add_argument('--tests', help='List of benchmarks', type=str, required=True) + parser.add_argument('--tests', help='List of benchmarks', type=str, default=True) parser.add_argument('--username', type=str, required=False, default=getpass.getuser()) parser.add_argument('--note', type=str, required=False, default=None) parser.add_argument('--table-size', type=int, default=10000) @@ -195,21 +203,23 @@ def write_output_file(run_id: str, table_size: int): if not os.path.exists('/output'): os.mkdir('/output') - output_file = '/output/{}.csv'.format(committish if committish else database_name) + output_file = '/output/{}.csv'.format(run_id) + file_exists = os.path.exists(output_file) logger.info('Writing output file to {}'.format(output_file)) - with open(output_file, 'w', newline='') as csvfile: + with open(output_file, 'a' if file_exists else 'w', newline='') as csvfile: metadata = { 'run_id': run_id, 'database': database_name, 'username': username, - 'committish': committish or 'n/a', + 'committish': committish or 'not-applicable', 'timestamp': timestamp, 'system_info': get_os_detail(), 'table_size': table_size } fieldnames = list(metadata.keys()) + ['test_name'] + list(OUTPUT_MAPPING.values()) writer = csv.DictWriter(csvfile, fieldnames=fieldnames) - writer.writeheader() + if not file_exists: + writer.writeheader() for row in output: to_write = {**row, **metadata} writer.writerow(to_write)