mirror of
https://github.com/dolthub/dolt.git
synced 2025-12-30 16:12:39 -06:00
Enhance the way output is written, update README
This commit is contained in:
@@ -20,21 +20,17 @@ We also execute the same set of benchmarks against MySQL for comparison. All the
|
||||
## Example
|
||||
A common use-case might be to compare Dolt built from the current working set in your local checkout to MySQL. To do this we can run the following:
|
||||
```
|
||||
./run_benchmarks.sh \
|
||||
bulk_insert,oltp_read_only,oltp_insert,oltp_point_select,select_random_points,select_random_ranges
|
||||
<username> \
|
||||
current
|
||||
$ ./run_benchmarks.sh all <your-username> current
|
||||
```
|
||||
|
||||
This takes the current checkout of Dolt, builds a binary, and executes the benchmarks in a `docker-compose` setup. It does the same for MySQL. This produces two CSV files containing the results in `bencharm/perf_tools/output`. An example would be:
|
||||
This takes the current checkout of Dolt, builds a binary, and executes the supported benchmarks in a `docker-compose` setup. It does the same for MySQL. Each invocation of `run_benchmarks.sh` is associatd with a run ID, for example `58296063ab3c2a6701f8f986`. This run ID identifies the CSV file:
|
||||
```
|
||||
ls -ltr output
|
||||
$ ls -ltr output
|
||||
total 16
|
||||
-rw-r--r-- 1 oscarbatori staff 1727 Nov 29 19:59 cedbe9b0d2516b4b05661af2b07f0765bc6f1816-dirty.csv
|
||||
-rw-r--r--@ 1 oscarbatori staff 1539 Nov 29 22:04 mysql.csv
|
||||
-rw-r--r-- 1 oscarbatori staff 1727 Nov 29 19:59 58296063ab3c2a6701f8f986.csv
|
||||
```
|
||||
|
||||
This indicates that the results for `current`, the HEAD of the currently checked branch that has uncommitted changes in the working set, are stored in `cedbe9b0d2516b4b05661af2b07f0765bc6f1816-dirty.csv`, where "dirty" implies the presence of uncommitted changes. The `mysql.csv` file contains results for MySQL.
|
||||
Each row corresponds to an invocation of test on either MySQL, or a compilation of Dolt. Each row indicates this.
|
||||
|
||||
## Requirements
|
||||
To run this stack a few things are required:
|
||||
@@ -45,7 +41,7 @@ To run this stack a few things are required:
|
||||
## Uploading to DoltHub
|
||||
We can upload the results to DoltHub using `push_results_to_dolthub.py` as follows:
|
||||
```
|
||||
python push_outputp_to_dolthub.py --result-directory output --remote-results-db dolthub/dolt-benchmarks-test --branch test-run
|
||||
$ python push_outputp_to_dolthub.py --results-file output/58296063ab3c2a6701f8f986.csv --remote-results-db dolthub/dolt-benchmarks-test --branch test-run
|
||||
```
|
||||
|
||||
These results will then be available to the team for analysis, and via our API for rendering on our benchmarking documentation.
|
||||
|
||||
@@ -11,21 +11,23 @@ RESULTS_TABLE = 'sysbench_benchmark'
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
def write_results_to_dolt(results_dir: str, remote: str, branch: str):
|
||||
dfs = [pd.read_csv(os.path.join(results_dir, filename)) for filename in os.listdir(results_dir)]
|
||||
table_writer = get_df_table_writer(RESULTS_TABLE, lambda: pd.concat(dfs), RESULTS_TABLE_PKS, import_mode='update')
|
||||
def write_results_to_dolt(results_file: str, remote: str, branch: str):
|
||||
table_writer = get_df_table_writer(RESULTS_TABLE,
|
||||
lambda: pd.read_csv(results_file),
|
||||
RESULTS_TABLE_PKS,
|
||||
import_mode='update')
|
||||
loader = get_dolt_loader(table_writer, True, 'benchmark run', branch)
|
||||
load_to_dolthub(loader, clone=True, push=True, remote_name='origin', remote_url=remote)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--results-directory', type=str, required=True)
|
||||
parser.add_argument('--results-file', type=str, required=True)
|
||||
parser.add_argument('--remote-results-db', type=str, required=True)
|
||||
parser.add_argument('--remote-results-db-branch', type=str, required=False, default='master')
|
||||
args = parser.parse_args()
|
||||
logger.info('Writing the results of the tests')
|
||||
write_results_to_dolt(args.results_directory, args.remote_results_db, args.remote_results_db_branch)
|
||||
write_results_to_dolt(args.results_file, args.remote_results_db, args.remote_results_db_branch)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
@@ -17,7 +17,12 @@ SUPPORTED_BENCHMARKS = [
|
||||
'oltp_insert',
|
||||
'oltp_point_select',
|
||||
'select_random_points',
|
||||
'select_random_ranges'
|
||||
'select_random_ranges',
|
||||
'oltp_delete',
|
||||
'oltp_write_only',
|
||||
'oltp_read_write',
|
||||
'oltp_update_index',
|
||||
'oltp_update_non_index'
|
||||
]
|
||||
|
||||
TEST_TABLE = 'sbtest1'
|
||||
@@ -56,7 +61,10 @@ def main():
|
||||
logger.setLevel(logging.INFO)
|
||||
args = get_args()
|
||||
test_list = args.tests.split(',')
|
||||
assert all(test in SUPPORTED_BENCHMARKS for test in test_list), 'Must provide list of supported tests'
|
||||
if len(test_list) == 1 and test_list == ['all']:
|
||||
test_list = SUPPORTED_BENCHMARKS
|
||||
else:
|
||||
assert all(test in SUPPORTED_BENCHMARKS for test in test_list), 'Must provide list of supported tests'
|
||||
|
||||
logger.info('Running with run ID {}'.format(args.run_id))
|
||||
if args.committish:
|
||||
@@ -71,7 +79,7 @@ def get_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--db-host', help='The host for the database we will connect to')
|
||||
parser.add_argument('--committish', help='Commit used to build Dolt bianry being tested')
|
||||
parser.add_argument('--tests', help='List of benchmarks', type=str, required=True)
|
||||
parser.add_argument('--tests', help='List of benchmarks', type=str, default=True)
|
||||
parser.add_argument('--username', type=str, required=False, default=getpass.getuser())
|
||||
parser.add_argument('--note', type=str, required=False, default=None)
|
||||
parser.add_argument('--table-size', type=int, default=10000)
|
||||
@@ -195,21 +203,23 @@ def write_output_file(run_id: str,
|
||||
table_size: int):
|
||||
if not os.path.exists('/output'):
|
||||
os.mkdir('/output')
|
||||
output_file = '/output/{}.csv'.format(committish if committish else database_name)
|
||||
output_file = '/output/{}.csv'.format(run_id)
|
||||
file_exists = os.path.exists(output_file)
|
||||
logger.info('Writing output file to {}'.format(output_file))
|
||||
with open(output_file, 'w', newline='') as csvfile:
|
||||
with open(output_file, 'a' if file_exists else 'w', newline='') as csvfile:
|
||||
metadata = {
|
||||
'run_id': run_id,
|
||||
'database': database_name,
|
||||
'username': username,
|
||||
'committish': committish or 'n/a',
|
||||
'committish': committish or 'not-applicable',
|
||||
'timestamp': timestamp,
|
||||
'system_info': get_os_detail(),
|
||||
'table_size': table_size
|
||||
}
|
||||
fieldnames = list(metadata.keys()) + ['test_name'] + list(OUTPUT_MAPPING.values())
|
||||
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
if not file_exists:
|
||||
writer.writeheader()
|
||||
for row in output:
|
||||
to_write = {**row, **metadata}
|
||||
writer.writerow(to_write)
|
||||
|
||||
Reference in New Issue
Block a user