mirror of
https://github.com/dolthub/dolt.git
synced 2026-01-06 00:39:40 -06:00
846 lines
22 KiB
Perl
Executable File
846 lines
22 KiB
Perl
Executable File
#!/usr/bin/perl -w
|
|
###################################################################################
|
|
#
|
|
#
|
|
# benchmark.pl - Dolt benchmarking script
|
|
#
|
|
#
|
|
# Description: Dolt benchmarking script.
|
|
# Author: Tim Sehn
|
|
# Date: March, 2019
|
|
#
|
|
###################################################################################
|
|
|
|
use strict;
|
|
|
|
use Data::Dumper;
|
|
use Getopt::Long;
|
|
use List::Util qw(shuffle);
|
|
use Pod::Usage;
|
|
|
|
# These are defaults and can be overridden with command line args.
|
|
use constant BENCHMARK_ROOT => '/var/tmp';
|
|
use constant DOLT_PATH => '~/go/bin/';
|
|
use constant LOG_LEVEL => 1; # 0 = quiet, 1 = status, 2 = verbose
|
|
use constant UNSAFE => 0;
|
|
use constant PRESERVE_INPUTS => 0;
|
|
|
|
###################################################################################
|
|
#
|
|
# Configuration
|
|
#
|
|
###################################################################################
|
|
|
|
# Ideally, we will store the configuration in a dolt repository. We will pull down
|
|
# the repo and extract all this information from the repository. Then, we'll
|
|
# insert the output with the configuration version identifier in the output.
|
|
|
|
# Define the benchmarks we will run.
|
|
my $benchmark_config = {
|
|
# Version the configuration to store with the output
|
|
version => '0.0.1',
|
|
# Define the schema and size of the test database.
|
|
# This creates a set of csv files and a dolt schema file which are used in the
|
|
# benchmark tests. The gen field is either increment or rand. Types supported
|
|
# are int and string.
|
|
seed => {
|
|
name => 'test.csv',
|
|
size => 1000000,
|
|
schema_file => 'test.schema',
|
|
schema => [
|
|
{
|
|
name => 'id',
|
|
type => 'int',
|
|
primary => 1,
|
|
gen => 'increment',
|
|
},
|
|
{
|
|
name => 'int1',
|
|
type => 'int',
|
|
primary => 0,
|
|
gen => 'rand',
|
|
size => 10,
|
|
},
|
|
{
|
|
name => 'int2',
|
|
type => 'int',
|
|
primary => 0,
|
|
gen => 'rand',
|
|
size => 100,
|
|
},
|
|
{
|
|
name => 'int3',
|
|
type => 'int',
|
|
primary => 0,
|
|
gen => 'rand',
|
|
size => 1000,
|
|
},
|
|
{
|
|
name => 'int4',
|
|
type => 'int',
|
|
primary => 0,
|
|
gen => 'rand',
|
|
size => 10000,
|
|
},
|
|
{
|
|
name => 'int5',
|
|
type => 'int',
|
|
primary => 0,
|
|
gen => 'rand',
|
|
size => 100000,
|
|
},
|
|
{
|
|
name => 'string1',
|
|
type => 'string',
|
|
primary => 0,
|
|
gen => 'rand',
|
|
size => 1,
|
|
},
|
|
{
|
|
name => 'string2',
|
|
type => 'string',
|
|
primary => 0,
|
|
gen => 'rand',
|
|
size => 2,
|
|
},
|
|
{
|
|
name => 'string3',
|
|
type => 'string',
|
|
primary => 0,
|
|
gen => 'rand',
|
|
size => 4,
|
|
},
|
|
{
|
|
name => 'string4',
|
|
type => 'string',
|
|
primary => 0,
|
|
gen => 'rand',
|
|
size => 8,
|
|
},
|
|
{
|
|
name => 'string5',
|
|
type => 'string',
|
|
primary => 0,
|
|
gen => 'rand',
|
|
size => 16,
|
|
},
|
|
],
|
|
},
|
|
# This configuration defines which csv files we'll create to represent
|
|
# a small, medium, and large change. The pct key/value pair is used to
|
|
# calculate the percentage chance that a column value is changed.
|
|
changes => [
|
|
{
|
|
file => 'small-change.csv',
|
|
pct => 0.001,
|
|
},
|
|
{
|
|
file => 'medium-change.csv',
|
|
pct => 0.01,
|
|
},
|
|
{
|
|
file => 'large-change.csv',
|
|
pct => 0.05,
|
|
},
|
|
],
|
|
benchmarks => {
|
|
git => {
|
|
root => 'git-benchmark',
|
|
tests => [
|
|
{
|
|
name => 'raw',
|
|
command => 'git',
|
|
},
|
|
{
|
|
name => 'init',
|
|
command => 'git init',
|
|
},
|
|
{
|
|
prep => ['cp ../test.csv .'],
|
|
name => 'add',
|
|
command => 'git add test.csv',
|
|
},
|
|
{
|
|
name => 'commit',
|
|
command => 'git commit -m "first test commit"',
|
|
check_disk => 1,
|
|
},
|
|
{
|
|
prep => ['cp ../small-change.csv ./test.csv'],
|
|
name => 'small diff',
|
|
command => 'git diff test.csv',
|
|
post => ['git add test.csv', 'git commit -m "Small change"'],
|
|
check_disk => 1,
|
|
},
|
|
{
|
|
prep => ['cp ../medium-change.csv ./test.csv'],
|
|
name => 'medium diff',
|
|
command => 'git diff test.csv',
|
|
post => ['git add test.csv', 'git commit -m "Medium change"'],
|
|
check_disk => 1,
|
|
},
|
|
{
|
|
prep => ['cp ../large-change.csv ./test.csv'],
|
|
name => 'large diff',
|
|
command => 'git diff test.csv',
|
|
post => ['git add test.csv', 'git commit -m "Large change"'],
|
|
check_disk => 1,
|
|
}
|
|
]
|
|
},
|
|
dolt => {
|
|
root => 'dolt-benchmark',
|
|
tests => [
|
|
{
|
|
name => 'raw',
|
|
command => 'dolt',
|
|
},
|
|
{
|
|
name => 'init',
|
|
command => 'dolt init',
|
|
},
|
|
{
|
|
prep => [
|
|
'dolt table create -s ../test.schema test',
|
|
'dolt table import -u test ../test.csv',
|
|
],
|
|
name =>'add',
|
|
command=> 'dolt add test',
|
|
},
|
|
{
|
|
name => 'commit',
|
|
command => 'dolt commit -m "first test commit"',
|
|
check_disk => 1,
|
|
},
|
|
{
|
|
prep => [
|
|
'dolt table rm test',
|
|
'dolt table import -c -s ../test.schema test ../small-change.csv'
|
|
],
|
|
name => 'small diff',
|
|
command => 'dolt diff test',
|
|
post => ['dolt add test', 'dolt commit -m "small change"'],
|
|
check_disk => 1,
|
|
},
|
|
{
|
|
prep => [
|
|
'dolt table rm test',
|
|
'dolt table import -c -s ../test.schema test ../medium-change.csv'
|
|
],
|
|
name => 'medium diff',
|
|
command => 'dolt diff test',
|
|
post => ['dolt add test',
|
|
'dolt commit -m "medium change"'],
|
|
check_disk => 1,
|
|
},
|
|
{
|
|
prep => [
|
|
'dolt table rm test',
|
|
'dolt table import -c -s ../test.schema test ../large-change.csv'
|
|
],
|
|
name => 'large diff',
|
|
command => 'dolt diff test',
|
|
post => ['dolt add test', 'dolt commit -m "large change"'],
|
|
check_disk => 1,
|
|
}
|
|
]
|
|
}
|
|
}
|
|
};
|
|
|
|
my $publish_config = {
|
|
repo_root => '/Users/timsehn/liquidata/dolt-repos/dolt-benchmark',
|
|
table => 'results'
|
|
};
|
|
|
|
###################################################################################
|
|
#
|
|
# Execute the Benchmark
|
|
#
|
|
###################################################################################
|
|
|
|
# Process command line arguments
|
|
my $root = BENCHMARK_ROOT;
|
|
my $log_level = LOG_LEVEL;
|
|
my $unsafe = UNSAFE;
|
|
my $preserve = PRESERVE_INPUTS;
|
|
my $dolt_path = DOLT_PATH;
|
|
my $publish = 0;
|
|
my $publish_repo = '';
|
|
my $help = 0;
|
|
my $man = 0;
|
|
|
|
GetOptions("root=s" => \$root,
|
|
"loglevel=i" => \$log_level,
|
|
"preserve" => \$preserve,
|
|
"unsafe" => \$unsafe,
|
|
"dolt-path=s" => \$dolt_path,
|
|
"publish" => \$publish,
|
|
"publish-repo=s" => \$publish_repo,
|
|
'help|?' => \$help,
|
|
'man' => \$man) or pod2usage(2);
|
|
|
|
pod2usage(1) if $help;
|
|
pod2usage(-exitval => 0, -verbose => 2) if $man;
|
|
|
|
if ( $publish_repo ) {
|
|
die("Cannot specify --results-repo unless --publish is specified")
|
|
unless $publish;
|
|
$publish_config->{'repo_root'} = $publish_repo;
|
|
}
|
|
|
|
# Set up the environment
|
|
$ENV{'PATH'} = "$ENV{PATH}:$dolt_path";
|
|
$ENV{'NOMS_VERSION_NEXT'} = 1;
|
|
|
|
# Make sure root exists
|
|
if ( -d $root ) {
|
|
output("Changing directory to $root", 2);
|
|
chdir($root) or die("Could not cd to $root\n");
|
|
} else {
|
|
die("Could not run benchmarks in $root. Directory does not exist.\n");
|
|
}
|
|
|
|
# Build input files
|
|
my $test_csv = $benchmark_config->{'seed'}{'name'};
|
|
my $schema_file = $benchmark_config->{'seed'}{'schema_file'};
|
|
my $schema = $benchmark_config->{'seed'}{'schema'};
|
|
my $rows = $benchmark_config->{'seed'}{'size'};
|
|
my $columns = scalar(@{$schema});
|
|
my $changes = $benchmark_config->{'changes'};
|
|
|
|
output("Building input files...$rows rows, $columns columns", 1);
|
|
generate_dolt_schema($schema_file, $schema);
|
|
create_test_input_csvs($test_csv, $rows, $schema, $changes);
|
|
|
|
# TO DO: Gather system information to insert into the output.
|
|
my $profile = {};
|
|
gather_profile_info($profile);
|
|
|
|
# Run the benchmarks
|
|
my %data;
|
|
foreach my $benchmark ( keys %{$benchmark_config->{'benchmarks'}} ) {
|
|
output("Executing $benchmark benchmark...", 1);
|
|
|
|
# Build the root directory for the repository
|
|
my $benchmarks = $benchmark_config->{'benchmarks'};
|
|
my $benchmark_root = $benchmarks->{$benchmark}{'root'};
|
|
if ( -d $benchmark_root ) {
|
|
if ( $unsafe ) {
|
|
output("Deleting $root/$benchmark_root because it alreadys exists", 2);
|
|
run_command("rm -rf $benchmark_root");
|
|
} else {
|
|
error_exit("$root/$benchmark_root must not exist to run benchmark");
|
|
}
|
|
}
|
|
output("Changing directory to $benchmark_root\n", 2);
|
|
mkdir($benchmark_root) or error_exit("Could not mkdir $benchmark_root");
|
|
chdir($benchmark_root) or error_exit("Could not cd to $benchmark_root");
|
|
|
|
# Run and time the commands in the root directory
|
|
foreach my $test ( @{$benchmarks->{$benchmark}{'tests'}} ) {
|
|
output("Running test: " . $test->{'name'}, 1);
|
|
|
|
foreach my $prep ( @{$test->{'prep'}} ) {
|
|
run_command($prep);
|
|
}
|
|
|
|
my ($real, $user, $system) = time_command($test->{'command'}, $log_level);
|
|
|
|
$data{$test->{'name'}}{$benchmark}{'real'} = $real;
|
|
$data{$test->{'name'}}{$benchmark}{'user'} = $user;
|
|
$data{$test->{'name'}}{$benchmark}{'system'} = $system;
|
|
|
|
foreach my $post ( @{$test->{'post'}} ) {
|
|
run_command($post);
|
|
}
|
|
|
|
if ( $test->{'check_disk'} ) {
|
|
$data{$test->{'name'}}{$benchmark}{'disk'} = disk_usage();
|
|
}
|
|
}
|
|
|
|
# Cleanup the repository
|
|
output("Changing directory to $root and removing $benchmark_root", 2);
|
|
chdir($root);
|
|
run_command("rm -rf $benchmark_root") unless $preserve;
|
|
}
|
|
|
|
# Cleanup the input files.
|
|
output("Cleaning up...", 1);
|
|
cleanup($root, $benchmark_config, $preserve, $unsafe);
|
|
|
|
# Output
|
|
publish($publish_config, \%data, $profile, $benchmark_config, $root) if $publish;
|
|
output_data(\%data, $benchmark_config->{'benchmarks'}, $log_level);
|
|
|
|
exit 0;
|
|
|
|
###################################################################################
|
|
#
|
|
# Functions
|
|
#
|
|
###################################################################################
|
|
|
|
# System utility functions
|
|
|
|
sub time_command {
|
|
my $command = shift;
|
|
my $log_level = shift;
|
|
|
|
output("Running:\n\t$command", 2);
|
|
|
|
# time outputs to STDERR so I'll trash STDOUT and grab STDERR from
|
|
# STDOUT which `` writes to
|
|
my $piped_command;
|
|
if ( $log_level > 1 ) {
|
|
$piped_command = "{ time $command ;} 2>&1";
|
|
} else {
|
|
$piped_command = "{ time $command ;} 2>&1 1>/dev/null";
|
|
}
|
|
|
|
my $output = `$piped_command`;
|
|
# To Do: Some of these commands expect to exit 1. ie, git and dolt.
|
|
# I need to build in an expect into the benchmark definition
|
|
# if ($?) {
|
|
# die "Error running: $piped_command\n";
|
|
# }
|
|
|
|
$output =~ /real\s+(.+)\nuser\s+(.+)\nsys\s+(.+)\n/;
|
|
|
|
output("Output:\n\t$output", 2) if $output;
|
|
|
|
my $real = convert_time_output_to_ms($1);
|
|
my $user = convert_time_output_to_ms($2);
|
|
my $system = convert_time_output_to_ms($3);
|
|
|
|
return ($real, $user, $system);
|
|
}
|
|
|
|
sub disk_usage {
|
|
output("Checking disk usage...", 2);
|
|
|
|
my $command = 'du -h -d 0';
|
|
output("Running $command", 2);
|
|
my $output = `$command`;
|
|
output("Output:\n\t$output", 2) if $output;
|
|
|
|
$output =~ /^\s*([\d\w\.]+)\s+\./;
|
|
|
|
return $1;
|
|
}
|
|
|
|
sub run_command {
|
|
my $command = shift;
|
|
|
|
output("Running:\n\t$command", 2);
|
|
my $output = `$command 2>&1`;
|
|
output("Output:\n\t$output", 2) if $output;
|
|
if ($?) {
|
|
error_exit("Error running: $command");
|
|
}
|
|
}
|
|
|
|
sub convert_time_output_to_ms {
|
|
my $time = shift;
|
|
|
|
$time =~ /(\d+)m(\d+)\.(\d+)s/;
|
|
|
|
my $minutes = $1;
|
|
my $seconds = $2;
|
|
my $ms = $3;
|
|
|
|
return $ms + ($seconds*1000) + ($minutes*60*1000);
|
|
}
|
|
|
|
# CSV Creation functions
|
|
|
|
sub create_test_input_csvs {
|
|
my $csv = shift;
|
|
my $size = shift;
|
|
my $schema = shift;
|
|
my $changes = shift;
|
|
|
|
my @all_filehandles;
|
|
open(CSV, ">", $csv) or error_exit("Could not open $csv: $!\n");
|
|
push @all_filehandles, *CSV;
|
|
|
|
foreach my $change ( @{$changes} ){
|
|
open($change->{'filehandle'}, '>', $change->{'file'})
|
|
or error_exit("Could not open ". $change->{'file'} . ": $!");
|
|
push @all_filehandles, $change->{'filehandle'};
|
|
}
|
|
|
|
# Create header row and write it to all csvs
|
|
my $first = 1;
|
|
foreach my $column ( @{$schema} ) {
|
|
write_to_files(',', @all_filehandles) unless $first;
|
|
write_to_files($column->{'name'}, @all_filehandles);
|
|
$first = 0;
|
|
}
|
|
write_to_files("\n", @all_filehandles);;
|
|
|
|
# Create mock data
|
|
|
|
# Create an array with the data and write the original CSV
|
|
my @values;
|
|
foreach ( my $i = 0; $i < $size; $i++ ) {
|
|
$first = 1;
|
|
$values[$i] = [];
|
|
foreach my $column ( @{$schema} ) {
|
|
print CSV ',' unless $first;
|
|
$first = 0;
|
|
my $value = generate_value($column->{'type'},
|
|
$column->{'gen'},
|
|
$column->{'size'},
|
|
$i);
|
|
print CSV $value;
|
|
push @{$values[$i]}, $value;
|
|
|
|
}
|
|
print CSV "\n";
|
|
}
|
|
|
|
# Shuffle the rows and change the values
|
|
foreach my $change ( @{$changes} ) {
|
|
my $fh = $change->{'filehandle'};
|
|
my @shuffle = shuffle(@values);
|
|
foreach my $row ( @shuffle ) {
|
|
my $first = 1;
|
|
my $i = 0;
|
|
foreach my $column ( @{$schema} ) {
|
|
my $value = $row->[$i];
|
|
|
|
print $fh ',' unless $first;
|
|
$first = 0;
|
|
|
|
if ( rand() < $change->{'pct'} ) {
|
|
$value = generate_value($column->{'type'},
|
|
$column->{'gen'},
|
|
$column->{'size'},
|
|
$row->[0]);
|
|
}
|
|
|
|
print $fh $value;
|
|
$i++;
|
|
}
|
|
print $fh "\n";
|
|
}
|
|
}
|
|
|
|
foreach my $fh (@all_filehandles) {
|
|
close $fh;
|
|
}
|
|
}
|
|
|
|
sub generate_value {
|
|
my $type = shift;
|
|
my $gen = shift;
|
|
my $size = shift;
|
|
my $i = shift; # Used for increment
|
|
|
|
if ( $type eq 'int' ) {
|
|
return $i if ( $gen eq 'increment' );
|
|
if ( $gen eq 'rand' ) {
|
|
return int(rand($size+1));
|
|
} else {
|
|
error_exit("Do not understand generator: $gen");
|
|
}
|
|
} elsif ( $type eq 'string' ) {
|
|
if ( $gen eq 'rand' ) {
|
|
return rndStr($size, 'a'..'z', 0..9);
|
|
} else {
|
|
error_exit("Do not understand generator: $gen");
|
|
}
|
|
} else {
|
|
error_exit("Do not understand type: $type");
|
|
}
|
|
}
|
|
|
|
sub write_to_files {
|
|
my $string = shift;
|
|
my @filehandles = @_;
|
|
|
|
foreach my $filehandle ( @filehandles ) {
|
|
print $filehandle $string;
|
|
}
|
|
}
|
|
|
|
# Perl wizardry. Do not question.
|
|
sub rndStr {
|
|
join('', @_[ map{ rand @_ } 1 .. shift ]);
|
|
}
|
|
|
|
# Gather Profile information
|
|
|
|
sub gather_profile_info {
|
|
my $profile = shift;
|
|
|
|
output('Gathering profile information...', 1);
|
|
my $uname_cmd = 'uname -a';
|
|
output("Running $uname_cmd", 2);
|
|
# TO DO: Turn this into structured data
|
|
$profile->{'uname'} = `$uname_cmd`;
|
|
$profile->{'uname'} =~ s/\n//g;
|
|
if ($?) {
|
|
error_exit("Error running: $uname_cmd");
|
|
}
|
|
output("uname is:\n\t$profile->{uname}", 2);
|
|
|
|
$profile->{'now'} = time();
|
|
|
|
$profile->{'git_version'} = `git version`;
|
|
$profile->{'git_version'} =~ s/\n//g;
|
|
$profile->{'dolt_version'} = `dolt version`;
|
|
$profile->{'dolt_version'} =~ s/\n//g;
|
|
}
|
|
|
|
# Generate schema
|
|
|
|
# TO DO: Change these schema generation functions to build the proper perl
|
|
# data structure and use a JSON parser to output the proper JSON schema
|
|
sub generate_dolt_schema {
|
|
my $schema_file = shift;
|
|
my $schema = shift;
|
|
|
|
my $filehandle;
|
|
open($filehandle, '>', $schema_file)
|
|
or error_exit("Could not open $schema_file");
|
|
|
|
print $filehandle "{\n\"columns\":[\n";
|
|
|
|
my $first = 1;
|
|
my $tag = 0;
|
|
foreach my $column ( @{$schema} ) {
|
|
print $filehandle ",\n" unless $first;
|
|
$first = 0;
|
|
generate_column_schema($column, $tag, $filehandle);
|
|
$tag++;
|
|
}
|
|
|
|
print $filehandle "]\n}\n";
|
|
}
|
|
|
|
sub generate_column_schema {
|
|
my $col_schema = shift;
|
|
my $tag = shift;
|
|
my $filehandle = shift;
|
|
|
|
print $filehandle "{\n\"tag\": $tag,\n";
|
|
print $filehandle "\"name\":\"$col_schema->{name}\",\n";
|
|
print $filehandle "\"kind\":\"$col_schema->{type}\",\n";
|
|
if ( $col_schema->{primary} ) {
|
|
print $filehandle "\"is_part_of_pk\": true,\n" .
|
|
"\"col_constraints\": [\n{\n\"constraint_type\": \"not_null\",\n" .
|
|
"\"params\": null\n}\n]\n";
|
|
} else {
|
|
print $filehandle "\"is_part_of_pk\": false,\n" .
|
|
"\"col_constraints\": []\n";
|
|
}
|
|
|
|
print $filehandle "}";
|
|
}
|
|
|
|
sub cleanup {
|
|
my $root = shift;
|
|
my $benchmark_config = shift;
|
|
my $preserve = shift;
|
|
my $unsafe = shift;
|
|
|
|
return if $preserve;
|
|
|
|
chdir($root) or error_exit("Could not cd to $root");
|
|
|
|
my $seed = $benchmark_config->{'seed'}{'name'};
|
|
my $schema = $benchmark_config->{'seed'}{'schema_file'};
|
|
my $changes = $benchmark_config->{'changes'};
|
|
|
|
output("Removing $seed and $schema files", 2);
|
|
unlink($seed) if ( -e $seed);
|
|
unlink($schema) if ( -e $schema );
|
|
foreach my $change ( @{$changes} ) {
|
|
output("Removing $change->{file}", 2);
|
|
unlink($change->{'file'}) if ( -e $change->{'file'} );
|
|
}
|
|
|
|
output("Removing repository roots",2);
|
|
foreach my $benchmark ( keys %{$benchmark_config->{'benchmarks'}} ) {
|
|
my $benchmark_root = $benchmark_config->{'benchmarks'}{$benchmark}{'root'};
|
|
run_command("rm -rf $benchmark_root");
|
|
}
|
|
}
|
|
|
|
# Data
|
|
sub output_data {
|
|
my $data = shift;
|
|
my $benchmarks = shift;
|
|
my $log_level = shift;
|
|
|
|
return if ( $log_level == 0 );
|
|
|
|
print Dumper $data if ( $log_level >= 2 );
|
|
|
|
print "\n--- Times ---\n";
|
|
foreach my $test ( @{$benchmarks->{'dolt'}{'tests'}} ) {
|
|
my $test_name = $test->{'name'};
|
|
|
|
print "$test_name:\n";
|
|
print "\tDolt: $data->{$test_name}{dolt}{real}ms\n";
|
|
print "\tGit: $data->{$test_name}{'git'}{'real'}ms\n";
|
|
}
|
|
|
|
print "\n--- Disk ---\n";
|
|
foreach my $test ( @{$benchmarks->{'dolt'}{'tests'}} ) {
|
|
my $test_name = $test->{'name'};
|
|
if ( $data->{$test_name}{'dolt'}{'disk'} ) {
|
|
print "$test_name:\n";
|
|
print "\tDolt: $data->{$test_name}{dolt}{disk}\n";
|
|
print "\tGit: $data->{$test_name}{'git'}{'disk'}\n";
|
|
}
|
|
}
|
|
}
|
|
|
|
sub publish {
|
|
my $publish_config = shift;
|
|
my $data = shift;
|
|
my $profile = shift;
|
|
my $benchmark_config = shift;
|
|
my $root = shift;
|
|
|
|
# Once we have remotes, we'll want to pull the repo down from DoltHub,
|
|
# Make our inserts on a new branch, and then push the branch back to DoltHub.
|
|
# Then, we can delete the repo or have a keep flag if users want to inspect
|
|
# the results.
|
|
|
|
# We'll assume the output repo is in a schema we understand
|
|
my $data_repo_root = $publish_config->{'repo_root'};
|
|
my $results_table = $publish_config->{'table'};
|
|
|
|
output('Publishing results to dolt...', 1);
|
|
|
|
output("Changing directory to $data_repo_root...", 2);
|
|
chdir($data_repo_root) or error_exit("Could not cd to $data_repo_root");
|
|
|
|
# Make sure this is a valid dolt repo and the results table exists
|
|
my $output = `dolt ls`;
|
|
error_exit("$data_repo_root does not contain a valid dolt repository") if ($?);
|
|
error_exit("$results_table not found in dolt repository in $data_repo_root")
|
|
unless ( $output =~ /$results_table/ );
|
|
|
|
# Insert data into dolt with the following schema:
|
|
# uname (pk), now (pk), benchmark version (pk), test name (pk),
|
|
# dolt time, git time, dolt disk, git disk
|
|
my $uname = $profile->{'uname'};
|
|
my $now = $profile->{'now'};
|
|
my $git_version = $profile->{'git_version'};
|
|
my $dolt_version = $profile->{'dolt_version'};
|
|
my $version = $benchmark_config->{version};
|
|
|
|
foreach my $test ( keys %{$data} ) {
|
|
my $dolt_time = $data->{$test}{'dolt'}{'real'};
|
|
my $git_time = $data->{$test}{'git'}{'real'};
|
|
my $dolt_disk = $data->{$test}{'dolt'}{'disk'} || "";
|
|
my $git_disk = $data->{$test}{'git'}{'disk'} || "";
|
|
|
|
my $dolt_insert = "dolt table put-row $results_table uname:\"$uname\" " .
|
|
"test_time:$now git_version:\"$git_version\" " .
|
|
"dolt_version:\"$dolt_version\" benchmark_version:\"$version\" " .
|
|
"test_name:\"$test\" dolt_time:$dolt_time git_time:$git_time " .
|
|
"dolt_disk:\"$dolt_disk\" git_disk:\"$git_disk\"";
|
|
|
|
run_command($dolt_insert);
|
|
}
|
|
|
|
output("Returning to $root directory...", 2);
|
|
chdir($root) or error_exit("Could not cd to $root");
|
|
}
|
|
|
|
# Logging
|
|
|
|
# 0 = quiet, 1 = status, 2 = verbose
|
|
sub output {
|
|
my $message = shift;
|
|
my $level = shift;
|
|
|
|
my $now = localtime();
|
|
|
|
# Take advantage of log level being global
|
|
print "$now: $message\n" if ( $level <= $log_level );
|
|
}
|
|
|
|
sub error_exit {
|
|
my $message = shift;
|
|
|
|
print STDERR "$message\n";
|
|
|
|
print "Exiting early...attempting to cleanup...\n";
|
|
|
|
# Take advantage that these are global so I don't have to pass them around.
|
|
cleanup($root, $benchmark_config, $preserve, $unsafe);
|
|
|
|
exit 1;
|
|
}
|
|
|
|
__END__
|
|
|
|
=head1 NAME
|
|
|
|
benchmark.pl - Performs a Dolt benchmark against Git
|
|
|
|
=head1 SYNOPSIS
|
|
|
|
benchmark.pl [options]
|
|
|
|
=head1 OPTIONS
|
|
|
|
=over 8
|
|
|
|
=item B<-root>
|
|
|
|
Override the root directory to perform the benchmark in. Defaults to /var/tmp.
|
|
|
|
=item B<-loglevel>
|
|
|
|
The verbosity of the output. 0 is quiet. 1 is status. 2 is verbose. Defaults to 1.
|
|
|
|
=item B<-dolt-path>
|
|
|
|
Override where the dolt utility is located. Defaults to ~/go/bin/.
|
|
|
|
=item B<-preserve>
|
|
|
|
Do not delete the CSV inputs, Dolt repo, and Git repo. Useful for debugging.
|
|
|
|
=item B<-unsafe>
|
|
|
|
Delete files and directories that are in the way of the benchmark doing its job.
|
|
|
|
=item B<-publish>
|
|
|
|
Publish the results to the shared benchmark results Dolt repository.
|
|
|
|
=item B<-publish-repo>
|
|
|
|
Specify the directory where you would like the dolt repository used to pusblish
|
|
results to be placed. -publish must also be specified.
|
|
|
|
=item B<-help>
|
|
|
|
Print a brief help message and exit.
|
|
|
|
=item B<-man>
|
|
|
|
Print the manual page and exit.
|
|
|
|
=back
|
|
|
|
=head1 DESCRIPTION
|
|
|
|
B<benchmark.pl> will create a benchmark according to the benchmark configuration
|
|
specified in this script. The benchmark will entail creating random CSV input files
|
|
of a defined schema. These files will be imported into a Dolt and Git repository
|
|
and various commands will be timed. The disk usage will also be gathered at various
|
|
points. The benchmark output will be printed to the screen.
|
|
|
|
=cut
|