Support the bulk edit accumulator for dolt table import (#3591)

This commit is contained in:
Vinai Rachakonda
2022-06-14 13:28:23 -04:00
committed by GitHub
parent 40843d17f2
commit a4e28fa5b5
4 changed files with 20 additions and 17 deletions

View File

@@ -54,6 +54,7 @@ type SqlEngineConfig struct {
ServerUser string
ServerPass string
Autocommit bool
Bulk bool
}
// NewSqlEngine returns a SqlEngine
@@ -66,7 +67,7 @@ func NewSqlEngine(
parallelism := runtime.GOMAXPROCS(0)
dbs, err := CollectDBs(ctx, mrEnv)
dbs, err := CollectDBs(ctx, mrEnv, config.Bulk)
if err != nil {
return nil, err
}

View File

@@ -30,7 +30,7 @@ import (
// CollectDBs takes a MultiRepoEnv and creates Database objects from each environment and returns a slice of these
// objects.
func CollectDBs(ctx context.Context, mrEnv *env.MultiRepoEnv) ([]sqle.SqlDatabase, error) {
func CollectDBs(ctx context.Context, mrEnv *env.MultiRepoEnv, useBulkEditor bool) ([]sqle.SqlDatabase, error) {
var dbs []sqle.SqlDatabase
var db sqle.SqlDatabase
@@ -41,7 +41,7 @@ func CollectDBs(ctx context.Context, mrEnv *env.MultiRepoEnv) ([]sqle.SqlDatabas
}
dEnv.DoltDB.SetCommitHooks(ctx, postCommitHooks)
db = newDatabase(name, dEnv)
db = newDatabase(name, dEnv, useBulkEditor)
if _, remote, ok := sql.SystemVariables.GetGlobal(sqle.ReadReplicaRemoteKey); ok && remote != "" {
remoteName, ok := remote.(string)
@@ -84,9 +84,13 @@ func GetCommitHooks(ctx context.Context, dEnv *env.DoltEnv) ([]doltdb.CommitHook
return postCommitHooks, nil
}
func newDatabase(name string, dEnv *env.DoltEnv) sqle.Database {
func newDatabase(name string, dEnv *env.DoltEnv, useBulkEditor bool) sqle.Database {
deaf := dEnv.DbEaFactory()
if useBulkEditor {
deaf = dEnv.BulkDbEaFactory()
}
opts := editor.Options{
Deaf: dEnv.DbEaFactory(),
Deaf: deaf,
Tempdir: dEnv.TempTableFilesDir(),
}
return sqle.NewDatabase(name, dEnv.DbData(), opts)

View File

@@ -84,7 +84,8 @@ func NewSqlEngineTableWriter(ctx context.Context, dEnv *env.DoltEnv, createTable
PrivFilePath: "",
ServerUser: "root",
ServerPass: "",
Autocommit: true,
Autocommit: false, // We set autocommit == false to ensure to improve performance. Bulk import should not commit on each row.
Bulk: true,
}
se, err := engine.NewSqlEngine(
ctx,
@@ -107,11 +108,6 @@ func NewSqlEngineTableWriter(ctx context.Context, dEnv *env.DoltEnv, createTable
dsess.DSessFromSess(sqlCtx.Session).EnableBatchedMode()
err = sqlCtx.Session.SetSessionVariable(sqlCtx, sql.AutoCommitSessionVar, false)
if err != nil {
return nil, err
}
doltCreateTableSchema, err := sqlutil.FromDoltSchema(options.TableToWriteTo, createTableSchema)
if err != nil {
return nil, err

View File

@@ -27,22 +27,24 @@ python3 csv_gen.py '{
{"name":"c3", "type":"float"},
{"name":"c4", "type":"int"}
],
"row_count": 1000000
"row_count": 10000000
}' > benchmark.csv
# Run the current version of dolt TODO: Assumes no storage version changes... Change if there is
# Run the current version of dolt
echo "Running the current version of import"
rm -rf .dolt
dolt init
time dolt table import -c --pk=pk current_version benchmark.csv
# Run the current version of export
echo "Running the current version of export"
time dolt table export -f current_version export.csv
# Run the old version of dolt
rm -rf .dolt
./old-dolt init
echo "Running version 0.34.5"
time ./old-dolt table import -c --pk=pk old_version benchmark.csv
# Run the current version of export
echo "Running the current version of export"
time dolt table export current_version export.csv
# Run the old version of export
time ./old-dolt table export -f old_version export.csv