mirror of
https://github.com/dolthub/dolt.git
synced 2025-12-30 16:12:39 -06:00
Support the bulk edit accumulator for dolt table import (#3591)
This commit is contained in:
@@ -54,6 +54,7 @@ type SqlEngineConfig struct {
|
||||
ServerUser string
|
||||
ServerPass string
|
||||
Autocommit bool
|
||||
Bulk bool
|
||||
}
|
||||
|
||||
// NewSqlEngine returns a SqlEngine
|
||||
@@ -66,7 +67,7 @@ func NewSqlEngine(
|
||||
|
||||
parallelism := runtime.GOMAXPROCS(0)
|
||||
|
||||
dbs, err := CollectDBs(ctx, mrEnv)
|
||||
dbs, err := CollectDBs(ctx, mrEnv, config.Bulk)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -30,7 +30,7 @@ import (
|
||||
|
||||
// CollectDBs takes a MultiRepoEnv and creates Database objects from each environment and returns a slice of these
|
||||
// objects.
|
||||
func CollectDBs(ctx context.Context, mrEnv *env.MultiRepoEnv) ([]sqle.SqlDatabase, error) {
|
||||
func CollectDBs(ctx context.Context, mrEnv *env.MultiRepoEnv, useBulkEditor bool) ([]sqle.SqlDatabase, error) {
|
||||
var dbs []sqle.SqlDatabase
|
||||
var db sqle.SqlDatabase
|
||||
|
||||
@@ -41,7 +41,7 @@ func CollectDBs(ctx context.Context, mrEnv *env.MultiRepoEnv) ([]sqle.SqlDatabas
|
||||
}
|
||||
dEnv.DoltDB.SetCommitHooks(ctx, postCommitHooks)
|
||||
|
||||
db = newDatabase(name, dEnv)
|
||||
db = newDatabase(name, dEnv, useBulkEditor)
|
||||
|
||||
if _, remote, ok := sql.SystemVariables.GetGlobal(sqle.ReadReplicaRemoteKey); ok && remote != "" {
|
||||
remoteName, ok := remote.(string)
|
||||
@@ -84,9 +84,13 @@ func GetCommitHooks(ctx context.Context, dEnv *env.DoltEnv) ([]doltdb.CommitHook
|
||||
return postCommitHooks, nil
|
||||
}
|
||||
|
||||
func newDatabase(name string, dEnv *env.DoltEnv) sqle.Database {
|
||||
func newDatabase(name string, dEnv *env.DoltEnv, useBulkEditor bool) sqle.Database {
|
||||
deaf := dEnv.DbEaFactory()
|
||||
if useBulkEditor {
|
||||
deaf = dEnv.BulkDbEaFactory()
|
||||
}
|
||||
opts := editor.Options{
|
||||
Deaf: dEnv.DbEaFactory(),
|
||||
Deaf: deaf,
|
||||
Tempdir: dEnv.TempTableFilesDir(),
|
||||
}
|
||||
return sqle.NewDatabase(name, dEnv.DbData(), opts)
|
||||
|
||||
@@ -84,7 +84,8 @@ func NewSqlEngineTableWriter(ctx context.Context, dEnv *env.DoltEnv, createTable
|
||||
PrivFilePath: "",
|
||||
ServerUser: "root",
|
||||
ServerPass: "",
|
||||
Autocommit: true,
|
||||
Autocommit: false, // We set autocommit == false to ensure to improve performance. Bulk import should not commit on each row.
|
||||
Bulk: true,
|
||||
}
|
||||
se, err := engine.NewSqlEngine(
|
||||
ctx,
|
||||
@@ -107,11 +108,6 @@ func NewSqlEngineTableWriter(ctx context.Context, dEnv *env.DoltEnv, createTable
|
||||
|
||||
dsess.DSessFromSess(sqlCtx.Session).EnableBatchedMode()
|
||||
|
||||
err = sqlCtx.Session.SetSessionVariable(sqlCtx, sql.AutoCommitSessionVar, false)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
doltCreateTableSchema, err := sqlutil.FromDoltSchema(options.TableToWriteTo, createTableSchema)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
||||
@@ -27,22 +27,24 @@ python3 csv_gen.py '{
|
||||
{"name":"c3", "type":"float"},
|
||||
{"name":"c4", "type":"int"}
|
||||
],
|
||||
"row_count": 1000000
|
||||
"row_count": 10000000
|
||||
}' > benchmark.csv
|
||||
|
||||
# Run the current version of dolt TODO: Assumes no storage version changes... Change if there is
|
||||
# Run the current version of dolt
|
||||
echo "Running the current version of import"
|
||||
rm -rf .dolt
|
||||
dolt init
|
||||
time dolt table import -c --pk=pk current_version benchmark.csv
|
||||
|
||||
# Run the current version of export
|
||||
echo "Running the current version of export"
|
||||
time dolt table export -f current_version export.csv
|
||||
|
||||
# Run the old version of dolt
|
||||
rm -rf .dolt
|
||||
./old-dolt init
|
||||
echo "Running version 0.34.5"
|
||||
time ./old-dolt table import -c --pk=pk old_version benchmark.csv
|
||||
|
||||
# Run the current version of export
|
||||
echo "Running the current version of export"
|
||||
time dolt table export current_version export.csv
|
||||
|
||||
# Run the old version of export
|
||||
time ./old-dolt table export -f old_version export.csv
|
||||
|
||||
Reference in New Issue
Block a user