Enhance the way output is written, update README

2025-12-30 16:12:39 -06:00 · 2020-11-30 14:48:20 -08:00
parent c296d449e8
commit 91f65b7bb5
3 changed files with 30 additions and 22 deletions
--- a/benchmark/perf_tools/README.md
+++ b/benchmark/perf_tools/README.md
@@ -20,21 +20,17 @@ We also execute the same set of benchmarks against MySQL for comparison. All the
 ## Example
 A common use-case might be to compare Dolt built from the current working set in your local checkout to MySQL. To do this we can run the following:
 ```
-./run_benchmarks.sh \
-    bulk_insert,oltp_read_only,oltp_insert,oltp_point_select,select_random_points,select_random_ranges
-    <username> \
-    current
+$ ./run_benchmarks.sh all <your-username> current
 ```

-This takes the current checkout of Dolt, builds a binary, and executes the benchmarks in a `docker-compose` setup. It does the same for MySQL. This produces two CSV files containing the results in `bencharm/perf_tools/output`. An example would be:
+This takes the current checkout of Dolt, builds a binary, and executes the supported benchmarks in a `docker-compose` setup. It does the same for MySQL. Each invocation of `run_benchmarks.sh` is associatd with a run ID, for example `58296063ab3c2a6701f8f986`. This run ID identifies the CSV file: 
 ```
-ls -ltr output
+$ ls -ltr output
 total 16
-rw-r--r--  1 oscarbatori  staff  1727 Nov 29 19:59 cedbe9b0d2516b4b05661af2b07f0765bc6f1816-dirty.csv
-rw-r--r--@ 1 oscarbatori  staff  1539 Nov 29 22:04 mysql.csv
+-rw-r--r--  1 oscarbatori  staff  1727 Nov 29 19:59 58296063ab3c2a6701f8f986.csv
 ```

-This indicates that the results for `current`, the HEAD of the currently checked branch that has uncommitted changes in the working set, are stored in `cedbe9b0d2516b4b05661af2b07f0765bc6f1816-dirty.csv`, where "dirty" implies the presence of uncommitted changes. The `mysql.csv` file contains results for MySQL. 
+Each row corresponds to an invocation of test on either MySQL, or a compilation of Dolt. Each row indicates this.

 ## Requirements
 To run this stack a few things are required:
@@ -45,7 +41,7 @@ To run this stack a few things are required:
 ## Uploading to DoltHub
 We can upload the results to DoltHub using `push_results_to_dolthub.py` as follows:
 ```
-python push_outputp_to_dolthub.py --result-directory output --remote-results-db dolthub/dolt-benchmarks-test --branch test-run
+$ python push_outputp_to_dolthub.py --results-file output/58296063ab3c2a6701f8f986.csv --remote-results-db dolthub/dolt-benchmarks-test --branch test-run
 ```

 These results will then be available to the team for analysis, and via our API for rendering on our benchmarking documentation.
--- a/benchmark/perf_tools/python/push_output_to_dolthub.py
+++ b/benchmark/perf_tools/python/push_output_to_dolthub.py
@@ -11,21 +11,23 @@ RESULTS_TABLE = 'sysbench_benchmark'
 logger = get_logger(__name__)


-def write_results_to_dolt(results_dir: str, remote: str, branch: str):
-    dfs = [pd.read_csv(os.path.join(results_dir, filename)) for filename in os.listdir(results_dir)]
-    table_writer = get_df_table_writer(RESULTS_TABLE, lambda: pd.concat(dfs), RESULTS_TABLE_PKS, import_mode='update')
+def write_results_to_dolt(results_file: str, remote: str, branch: str):
+    table_writer = get_df_table_writer(RESULTS_TABLE,
+                                       lambda: pd.read_csv(results_file),
+                                       RESULTS_TABLE_PKS,
+                                       import_mode='update')
    loader = get_dolt_loader(table_writer, True, 'benchmark run', branch)
    load_to_dolthub(loader, clone=True, push=True, remote_name='origin', remote_url=remote)


 def main():
    parser = argparse.ArgumentParser()
-    parser.add_argument('--results-directory', type=str, required=True)
+    parser.add_argument('--results-file', type=str, required=True)
    parser.add_argument('--remote-results-db', type=str, required=True)
    parser.add_argument('--remote-results-db-branch', type=str, required=False, default='master')
    args = parser.parse_args()
    logger.info('Writing the results of the tests')
-    write_results_to_dolt(args.results_directory, args.remote_results_db, args.remote_results_db_branch)
+    write_results_to_dolt(args.results_file, args.remote_results_db, args.remote_results_db_branch)


 if __name__ == '__main__':
--- a/benchmark/perf_tools/python/sysbench_wrapper.py
+++ b/benchmark/perf_tools/python/sysbench_wrapper.py
@@ -17,7 +17,12 @@ SUPPORTED_BENCHMARKS = [
    'oltp_insert',
    'oltp_point_select',
    'select_random_points',
-    'select_random_ranges'
+    'select_random_ranges',
+    'oltp_delete',
+    'oltp_write_only',
+    'oltp_read_write',
+    'oltp_update_index',
+    'oltp_update_non_index'
 ]

 TEST_TABLE = 'sbtest1'
@@ -56,7 +61,10 @@ def main():
    logger.setLevel(logging.INFO)
    args = get_args()
    test_list = args.tests.split(',')
-    assert all(test in SUPPORTED_BENCHMARKS for test in test_list), 'Must provide list of supported tests'
+    if len(test_list) == 1 and test_list == ['all']:
+        test_list = SUPPORTED_BENCHMARKS
+    else:
+        assert all(test in SUPPORTED_BENCHMARKS for test in test_list), 'Must provide list of supported tests'

    logger.info('Running with run ID {}'.format(args.run_id))
    if args.committish:
@@ -71,7 +79,7 @@ def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('--db-host', help='The host for the database we will connect to')
    parser.add_argument('--committish', help='Commit used to build Dolt bianry being tested')
-    parser.add_argument('--tests', help='List of benchmarks', type=str, required=True)
+    parser.add_argument('--tests', help='List of benchmarks', type=str, default=True)
    parser.add_argument('--username', type=str, required=False, default=getpass.getuser())
    parser.add_argument('--note', type=str, required=False, default=None)
    parser.add_argument('--table-size', type=int, default=10000)
@@ -195,21 +203,23 @@ def write_output_file(run_id: str,
                      table_size: int):
    if not os.path.exists('/output'):
        os.mkdir('/output')
-    output_file = '/output/{}.csv'.format(committish if committish else database_name)
+    output_file = '/output/{}.csv'.format(run_id)
+    file_exists = os.path.exists(output_file)
    logger.info('Writing output file to {}'.format(output_file))
-    with open(output_file, 'w', newline='') as csvfile:
+    with open(output_file, 'a' if file_exists else 'w', newline='') as csvfile:
        metadata = {
            'run_id': run_id,
            'database': database_name,
            'username': username,
-            'committish': committish or 'n/a',
+            'committish': committish or 'not-applicable',
            'timestamp': timestamp,
            'system_info': get_os_detail(),
            'table_size': table_size
        }
        fieldnames = list(metadata.keys()) + ['test_name'] + list(OUTPUT_MAPPING.values())
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
-        writer.writeheader()
+        if not file_exists:
+            writer.writeheader()
        for row in output:
            to_write = {**row, **metadata}
            writer.writerow(to_write)