From 91f65b7bb5d919372c080befdd72f4669de3c037 Mon Sep 17 00:00:00 2001
From: Oscar Batori <oscar@liquidata.co>
Date: Mon, 30 Nov 2020 14:48:20 -0800
Subject: [PATCH] Enhance the way output is written, update README

---
 benchmark/perf_tools/README.md                | 16 +++++--------
 .../python/push_output_to_dolthub.py          | 12 ++++++----
 .../perf_tools/python/sysbench_wrapper.py     | 24 +++++++++++++------
 3 files changed, 30 insertions(+), 22 deletions(-)
diff --git a/benchmark/perf_tools/README.md b/benchmark/perf_tools/README.md
index 84632f4374..5c1ac8af54 100644
--- a/benchmark/perf_tools/README.md
+++ b/benchmark/perf_tools/README.md
@@ -20,21 +20,17 @@ We also execute the same set of benchmarks against MySQL for comparison. All the
 ## Example
 A common use-case might be to compare Dolt built from the current working set in your local checkout to MySQL. To do this we can run the following:
 ```
-./run_benchmarks.sh \
-    bulk_insert,oltp_read_only,oltp_insert,oltp_point_select,select_random_points,select_random_ranges
-    <username> \
-    current
+$ ./run_benchmarks.sh all <your-username> current
 ```
 
-This takes the current checkout of Dolt, builds a binary, and executes the benchmarks in a `docker-compose` setup. It does the same for MySQL. This produces two CSV files containing the results in `bencharm/perf_tools/output`. An example would be:
+This takes the current checkout of Dolt, builds a binary, and executes the supported benchmarks in a `docker-compose` setup. It does the same for MySQL. Each invocation of `run_benchmarks.sh` is associatd with a run ID, for example `58296063ab3c2a6701f8f986`. This run ID identifies the CSV file: 
 ```
-ls -ltr output
+$ ls -ltr output
 total 16
--rw-r--r--  1 oscarbatori  staff  1727 Nov 29 19:59 cedbe9b0d2516b4b05661af2b07f0765bc6f1816-dirty.csv
--rw-r--r--@ 1 oscarbatori  staff  1539 Nov 29 22:04 mysql.csv
+-rw-r--r--  1 oscarbatori  staff  1727 Nov 29 19:59 58296063ab3c2a6701f8f986.csv
 ```
 
-This indicates that the results for `current`, the HEAD of the currently checked branch that has uncommitted changes in the working set, are stored in `cedbe9b0d2516b4b05661af2b07f0765bc6f1816-dirty.csv`, where "dirty" implies the presence of uncommitted changes. The `mysql.csv` file contains results for MySQL. 
+Each row corresponds to an invocation of test on either MySQL, or a compilation of Dolt. Each row indicates this.
 
 ## Requirements
 To run this stack a few things are required:
@@ -45,7 +41,7 @@ To run this stack a few things are required:
 ## Uploading to DoltHub
 We can upload the results to DoltHub using `push_results_to_dolthub.py` as follows:
 ```
-python push_outputp_to_dolthub.py --result-directory output --remote-results-db dolthub/dolt-benchmarks-test --branch test-run
+$ python push_outputp_to_dolthub.py --results-file output/58296063ab3c2a6701f8f986.csv --remote-results-db dolthub/dolt-benchmarks-test --branch test-run
 ```
 
 These results will then be available to the team for analysis, and via our API for rendering on our benchmarking documentation.
diff --git a/benchmark/perf_tools/python/push_output_to_dolthub.py b/benchmark/perf_tools/python/push_output_to_dolthub.py
index d91f209d5f..041fac7b38 100644
--- a/benchmark/perf_tools/python/push_output_to_dolthub.py
+++ b/benchmark/perf_tools/python/push_output_to_dolthub.py
@@ -11,21 +11,23 @@ RESULTS_TABLE = 'sysbench_benchmark'
 logger = get_logger(__name__)
 
 
-def write_results_to_dolt(results_dir: str, remote: str, branch: str):
-    dfs = [pd.read_csv(os.path.join(results_dir, filename)) for filename in os.listdir(results_dir)]
-    table_writer = get_df_table_writer(RESULTS_TABLE, lambda: pd.concat(dfs), RESULTS_TABLE_PKS, import_mode='update')
+def write_results_to_dolt(results_file: str, remote: str, branch: str):
+    table_writer = get_df_table_writer(RESULTS_TABLE,
+                                       lambda: pd.read_csv(results_file),
+                                       RESULTS_TABLE_PKS,
+                                       import_mode='update')
     loader = get_dolt_loader(table_writer, True, 'benchmark run', branch)
     load_to_dolthub(loader, clone=True, push=True, remote_name='origin', remote_url=remote)
 
 
 def main():
     parser = argparse.ArgumentParser()
-    parser.add_argument('--results-directory', type=str, required=True)
+    parser.add_argument('--results-file', type=str, required=True)
     parser.add_argument('--remote-results-db', type=str, required=True)
     parser.add_argument('--remote-results-db-branch', type=str, required=False, default='master')
     args = parser.parse_args()
     logger.info('Writing the results of the tests')
-    write_results_to_dolt(args.results_directory, args.remote_results_db, args.remote_results_db_branch)
+    write_results_to_dolt(args.results_file, args.remote_results_db, args.remote_results_db_branch)
 
 
 if __name__ == '__main__':
diff --git a/benchmark/perf_tools/python/sysbench_wrapper.py b/benchmark/perf_tools/python/sysbench_wrapper.py
index 3a27a9e2a7..0c74cf7712 100755
--- a/benchmark/perf_tools/python/sysbench_wrapper.py
+++ b/benchmark/perf_tools/python/sysbench_wrapper.py
@@ -17,7 +17,12 @@ SUPPORTED_BENCHMARKS = [
     'oltp_insert',
     'oltp_point_select',
     'select_random_points',
-    'select_random_ranges'
+    'select_random_ranges',
+    'oltp_delete',
+    'oltp_write_only',
+    'oltp_read_write',
+    'oltp_update_index',
+    'oltp_update_non_index'
 ]
 
 TEST_TABLE = 'sbtest1'
@@ -56,7 +61,10 @@ def main():
     logger.setLevel(logging.INFO)
     args = get_args()
     test_list = args.tests.split(',')
-    assert all(test in SUPPORTED_BENCHMARKS for test in test_list), 'Must provide list of supported tests'
+    if len(test_list) == 1 and test_list == ['all']:
+        test_list = SUPPORTED_BENCHMARKS
+    else:
+        assert all(test in SUPPORTED_BENCHMARKS for test in test_list), 'Must provide list of supported tests'
 
     logger.info('Running with run ID {}'.format(args.run_id))
     if args.committish:
@@ -71,7 +79,7 @@ def get_args():
     parser = argparse.ArgumentParser()
     parser.add_argument('--db-host', help='The host for the database we will connect to')
     parser.add_argument('--committish', help='Commit used to build Dolt bianry being tested')
-    parser.add_argument('--tests', help='List of benchmarks', type=str, required=True)
+    parser.add_argument('--tests', help='List of benchmarks', type=str, default=True)
     parser.add_argument('--username', type=str, required=False, default=getpass.getuser())
     parser.add_argument('--note', type=str, required=False, default=None)
     parser.add_argument('--table-size', type=int, default=10000)
@@ -195,21 +203,23 @@ def write_output_file(run_id: str,
                       table_size: int):
     if not os.path.exists('/output'):
         os.mkdir('/output')
-    output_file = '/output/{}.csv'.format(committish if committish else database_name)
+    output_file = '/output/{}.csv'.format(run_id)
+    file_exists = os.path.exists(output_file)
     logger.info('Writing output file to {}'.format(output_file))
-    with open(output_file, 'w', newline='') as csvfile:
+    with open(output_file, 'a' if file_exists else 'w', newline='') as csvfile:
         metadata = {
             'run_id': run_id,
             'database': database_name,
             'username': username,
-            'committish': committish or 'n/a',
+            'committish': committish or 'not-applicable',
             'timestamp': timestamp,
             'system_info': get_os_detail(),
             'table_size': table_size
         }
         fieldnames = list(metadata.keys()) + ['test_name'] + list(OUTPUT_MAPPING.values())
         writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
-        writer.writeheader()
+        if not file_exists:
+            writer.writeheader()
         for row in output:
             to_write = {**row, **metadata}
             writer.writerow(to_write)