Enhance the way output is written, update README

This commit is contained in:
Oscar Batori
2020-11-30 14:48:20 -08:00
parent c296d449e8
commit 91f65b7bb5
3 changed files with 30 additions and 22 deletions

View File

@@ -20,21 +20,17 @@ We also execute the same set of benchmarks against MySQL for comparison. All the
## Example
A common use-case might be to compare Dolt built from the current working set in your local checkout to MySQL. To do this we can run the following:
```
./run_benchmarks.sh \
bulk_insert,oltp_read_only,oltp_insert,oltp_point_select,select_random_points,select_random_ranges
<username> \
current
$ ./run_benchmarks.sh all <your-username> current
```
This takes the current checkout of Dolt, builds a binary, and executes the benchmarks in a `docker-compose` setup. It does the same for MySQL. This produces two CSV files containing the results in `bencharm/perf_tools/output`. An example would be:
This takes the current checkout of Dolt, builds a binary, and executes the supported benchmarks in a `docker-compose` setup. It does the same for MySQL. Each invocation of `run_benchmarks.sh` is associatd with a run ID, for example `58296063ab3c2a6701f8f986`. This run ID identifies the CSV file:
```
ls -ltr output
$ ls -ltr output
total 16
-rw-r--r-- 1 oscarbatori staff 1727 Nov 29 19:59 cedbe9b0d2516b4b05661af2b07f0765bc6f1816-dirty.csv
-rw-r--r--@ 1 oscarbatori staff 1539 Nov 29 22:04 mysql.csv
-rw-r--r-- 1 oscarbatori staff 1727 Nov 29 19:59 58296063ab3c2a6701f8f986.csv
```
This indicates that the results for `current`, the HEAD of the currently checked branch that has uncommitted changes in the working set, are stored in `cedbe9b0d2516b4b05661af2b07f0765bc6f1816-dirty.csv`, where "dirty" implies the presence of uncommitted changes. The `mysql.csv` file contains results for MySQL.
Each row corresponds to an invocation of test on either MySQL, or a compilation of Dolt. Each row indicates this.
## Requirements
To run this stack a few things are required:
@@ -45,7 +41,7 @@ To run this stack a few things are required:
## Uploading to DoltHub
We can upload the results to DoltHub using `push_results_to_dolthub.py` as follows:
```
python push_outputp_to_dolthub.py --result-directory output --remote-results-db dolthub/dolt-benchmarks-test --branch test-run
$ python push_outputp_to_dolthub.py --results-file output/58296063ab3c2a6701f8f986.csv --remote-results-db dolthub/dolt-benchmarks-test --branch test-run
```
These results will then be available to the team for analysis, and via our API for rendering on our benchmarking documentation.

View File

@@ -11,21 +11,23 @@ RESULTS_TABLE = 'sysbench_benchmark'
logger = get_logger(__name__)
def write_results_to_dolt(results_dir: str, remote: str, branch: str):
dfs = [pd.read_csv(os.path.join(results_dir, filename)) for filename in os.listdir(results_dir)]
table_writer = get_df_table_writer(RESULTS_TABLE, lambda: pd.concat(dfs), RESULTS_TABLE_PKS, import_mode='update')
def write_results_to_dolt(results_file: str, remote: str, branch: str):
table_writer = get_df_table_writer(RESULTS_TABLE,
lambda: pd.read_csv(results_file),
RESULTS_TABLE_PKS,
import_mode='update')
loader = get_dolt_loader(table_writer, True, 'benchmark run', branch)
load_to_dolthub(loader, clone=True, push=True, remote_name='origin', remote_url=remote)
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--results-directory', type=str, required=True)
parser.add_argument('--results-file', type=str, required=True)
parser.add_argument('--remote-results-db', type=str, required=True)
parser.add_argument('--remote-results-db-branch', type=str, required=False, default='master')
args = parser.parse_args()
logger.info('Writing the results of the tests')
write_results_to_dolt(args.results_directory, args.remote_results_db, args.remote_results_db_branch)
write_results_to_dolt(args.results_file, args.remote_results_db, args.remote_results_db_branch)
if __name__ == '__main__':

View File

@@ -17,7 +17,12 @@ SUPPORTED_BENCHMARKS = [
'oltp_insert',
'oltp_point_select',
'select_random_points',
'select_random_ranges'
'select_random_ranges',
'oltp_delete',
'oltp_write_only',
'oltp_read_write',
'oltp_update_index',
'oltp_update_non_index'
]
TEST_TABLE = 'sbtest1'
@@ -56,7 +61,10 @@ def main():
logger.setLevel(logging.INFO)
args = get_args()
test_list = args.tests.split(',')
assert all(test in SUPPORTED_BENCHMARKS for test in test_list), 'Must provide list of supported tests'
if len(test_list) == 1 and test_list == ['all']:
test_list = SUPPORTED_BENCHMARKS
else:
assert all(test in SUPPORTED_BENCHMARKS for test in test_list), 'Must provide list of supported tests'
logger.info('Running with run ID {}'.format(args.run_id))
if args.committish:
@@ -71,7 +79,7 @@ def get_args():
parser = argparse.ArgumentParser()
parser.add_argument('--db-host', help='The host for the database we will connect to')
parser.add_argument('--committish', help='Commit used to build Dolt bianry being tested')
parser.add_argument('--tests', help='List of benchmarks', type=str, required=True)
parser.add_argument('--tests', help='List of benchmarks', type=str, default=True)
parser.add_argument('--username', type=str, required=False, default=getpass.getuser())
parser.add_argument('--note', type=str, required=False, default=None)
parser.add_argument('--table-size', type=int, default=10000)
@@ -195,21 +203,23 @@ def write_output_file(run_id: str,
table_size: int):
if not os.path.exists('/output'):
os.mkdir('/output')
output_file = '/output/{}.csv'.format(committish if committish else database_name)
output_file = '/output/{}.csv'.format(run_id)
file_exists = os.path.exists(output_file)
logger.info('Writing output file to {}'.format(output_file))
with open(output_file, 'w', newline='') as csvfile:
with open(output_file, 'a' if file_exists else 'w', newline='') as csvfile:
metadata = {
'run_id': run_id,
'database': database_name,
'username': username,
'committish': committish or 'n/a',
'committish': committish or 'not-applicable',
'timestamp': timestamp,
'system_info': get_os_detail(),
'table_size': table_size
}
fieldnames = list(metadata.keys()) + ['test_name'] + list(OUTPUT_MAPPING.values())
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
if not file_exists:
writer.writeheader()
for row in output:
to_write = {**row, **metadata}
writer.writerow(to_write)