From 9f5acfd559f05462757facb3bdb33834fe80545c Mon Sep 17 00:00:00 2001 From: Taylor Bantle Date: Tue, 17 Sep 2019 12:08:16 -0700 Subject: [PATCH 01/11] go/{cmd, libraries}: First pass at replace table --- go/cmd/dolt/commands/tblcmds/import.go | 28 ++++++++++++++++--- go/libraries/doltcore/mvdata/data_loc.go | 3 ++ go/libraries/doltcore/mvdata/data_mover.go | 5 +++- go/libraries/doltcore/mvdata/file_data_loc.go | 19 +++++++++++++ .../doltcore/mvdata/stream_data_loc.go | 14 ++++++++++ .../doltcore/mvdata/table_data_loc.go | 20 +++++++++++++ 6 files changed, 84 insertions(+), 5 deletions(-) diff --git a/go/cmd/dolt/commands/tblcmds/import.go b/go/cmd/dolt/commands/tblcmds/import.go index 5a17712281..cf3ffd65a1 100644 --- a/go/cmd/dolt/commands/tblcmds/import.go +++ b/go/cmd/dolt/commands/tblcmds/import.go @@ -37,6 +37,7 @@ import ( const ( createParam = "create-table" updateParam = "update-table" + replaceParam = "replace-table" tableParam = "table" fileParam = "file" outSchemaParam = "schema" @@ -97,6 +98,14 @@ A mapping file can be used to map fields between the file being imported and the be used when creating a new table, or updating an existing table. ` + mappingFileHelp + + + `If --replace-table | -r is given the operation will replace with the contents of the file. The table's + existing schema will be used, and field names will be used to match file fields with table fields unless a mapping file is + specified. + + If the schema for the existing table does not match the schema for the new file, the import will be aborted by default. To + overwrite both the table and the schema, use -c -f.` + + ` In both create and update scenarios the file's extension is used to infer the type of the file. If a file does not have the expected extension then the --file-type parameter should be used to explicitly define the format of @@ -106,6 +115,7 @@ the file in one of the supported formats (csv, psv, nbf, json, xlsx). For files var importSynopsis = []string{ "-c [-f] [--pk ] [--schema ] [--map ] [--continue] [--file-type ]
", "-u [--map ] [--continue] [--file-type ]
", + "-r [--map ] [--file-type ]
", } func validateImportArgs(apr *argparser.ArgParseResults, usage cli.UsagePrinter) (mvdata.MoveOperation, mvdata.TableDataLocation, mvdata.DataLocation, interface{}) { @@ -116,15 +126,19 @@ func validateImportArgs(apr *argparser.ArgParseResults, usage cli.UsagePrinter) var mvOp mvdata.MoveOperation var srcOpts interface{} - if !apr.Contains(createParam) && !apr.Contains(updateParam) { - cli.PrintErrln("Must include '-c' for initial table import or -u to update existing table.") + if !apr.Contains(createParam) && !apr.Contains(updateParam) && !apr.Contains(replaceParam) { + cli.PrintErrln("Must include '-c' for initial table import or -u to update existing table or -r to replace existing table.") return mvdata.InvalidOp, mvdata.TableDataLocation{}, nil, nil } else if apr.Contains(createParam) { mvOp = mvdata.OverwriteOp } else { - mvOp = mvdata.UpdateOp + if apr.Contains(replaceParam) { + mvOp = mvdata.ReplaceOp + } else { + mvOp = mvdata.UpdateOp + } if apr.Contains(outSchemaParam) { - cli.PrintErrln("fatal:", outSchemaParam+" is not supported for update operations") + cli.PrintErrln("fatal:", outSchemaParam+" is not supported for update or replace operations") usage() return mvdata.InvalidOp, mvdata.TableDataLocation{}, nil, nil } @@ -250,6 +264,7 @@ func createArgParser() *argparser.ArgParser { ap.SupportsFlag(createParam, "c", "Create a new table, or overwrite an existing table (with the -f flag) from the imported data.") ap.SupportsFlag(updateParam, "u", "Update an existing table with the imported data.") ap.SupportsFlag(forceParam, "f", "If a create operation is being executed, data already exists in the destination, the Force flag will allow the target to be overwritten.") + ap.SupportsFlag(replaceParam, "r", "Replace existing table with imported data.") ap.SupportsFlag(contOnErrParam, "", "Continue importing when row import errors are encountered.") ap.SupportsString(outSchemaParam, "s", "schema_file", "The schema for the output data.") ap.SupportsString(mappingFileParam, "m", "mapping_file", "A file that lays out how fields should be mapped from input data to output data.") @@ -299,6 +314,11 @@ func executeMove(ctx context.Context, dEnv *env.DoltEnv, force bool, mvOpts *mvd } } + // not sure if this is where this should happen but it needs to happen at some point + // if mvOpts.Operation == mvdata.ReplaceOp && [schema check fails] { + // cli.PrintErrln(color.RedString("Schema does not match. Please upload file with same schema as table")) + // } + mover, nDMErr := mvdata.NewDataMover(ctx, root, dEnv.FS, mvOpts, importStatsCB) if nDMErr != nil { diff --git a/go/libraries/doltcore/mvdata/data_loc.go b/go/libraries/doltcore/mvdata/data_loc.go index 1cad3d953f..ce2bfcb88d 100644 --- a/go/libraries/doltcore/mvdata/data_loc.go +++ b/go/libraries/doltcore/mvdata/data_loc.go @@ -100,6 +100,9 @@ type DataLocation interface { // NewUpdatingWriter will create a TableWriteCloser for a DataLocation that will update and append rows based on // their primary key. NewUpdatingWriter(ctx context.Context, mvOpts *MoveOptions, root *doltdb.RootValue, fs filesys.WritableFS, srcIsSorted bool, outSch schema.Schema, statsCB noms.StatsCB) (table.TableWriteCloser, error) + + // NewReplacingWriter will create a TableWriteCloser for a DataLocation that will overwrite an existing table if it has the same schema. + NewReplacingWriter(ctx context.Context, mvOpts *MoveOptions, root *doltdb.RootValue, fs filesys.WritableFS, srcIsSorted bool, outSch schema.Schema, statsCB noms.StatsCB) (table.TableWriteCloser, error) } // NewDataLocation creates a DataLocation object from a path and a format string. If the path is the name of a table diff --git a/go/libraries/doltcore/mvdata/data_mover.go b/go/libraries/doltcore/mvdata/data_mover.go index 2ffb709310..d96ad36edc 100644 --- a/go/libraries/doltcore/mvdata/data_mover.go +++ b/go/libraries/doltcore/mvdata/data_mover.go @@ -36,6 +36,7 @@ type MoveOperation string const ( OverwriteOp MoveOperation = "overwrite" + ReplaceOp MoveOperation = "replace" UpdateOp MoveOperation = "update" InvalidOp MoveOperation = "invalid" ) @@ -140,6 +141,8 @@ func NewDataMover(ctx context.Context, root *doltdb.RootValue, fs filesys.Filesy var wr table.TableWriteCloser if mvOpts.Operation == OverwriteOp { wr, err = mvOpts.Dest.NewCreatingWriter(ctx, mvOpts, root, fs, srcIsSorted, outSch, statsCB) + } else if mvOpts.Operation == ReplaceOp { + wr, err = mvOpts.Dest.NewReplacingWriter(ctx, mvOpts, root, fs, srcIsSorted, outSch, statsCB) } else { wr, err = mvOpts.Dest.NewUpdatingWriter(ctx, mvOpts, root, fs, srcIsSorted, outSch, statsCB) } @@ -200,7 +203,7 @@ func maybeMapFields(transforms *pipeline.TransformCollection, mapping *rowconv.F } func getOutSchema(ctx context.Context, inSch schema.Schema, root *doltdb.RootValue, fs filesys.ReadableFS, mvOpts *MoveOptions) (schema.Schema, error) { - if mvOpts.Operation == UpdateOp { + if mvOpts.Operation == UpdateOp || mvOpts.Operation == ReplaceOp { // Get schema from target rd, _, err := mvOpts.Dest.NewReader(ctx, root, fs, mvOpts.SchFile, mvOpts.SrcOptions) diff --git a/go/libraries/doltcore/mvdata/file_data_loc.go b/go/libraries/doltcore/mvdata/file_data_loc.go index b6ee924fa9..e8d7d98875 100644 --- a/go/libraries/doltcore/mvdata/file_data_loc.go +++ b/go/libraries/doltcore/mvdata/file_data_loc.go @@ -155,3 +155,22 @@ func (dl FileDataLocation) NewCreatingWriter(ctx context.Context, mvOpts *MoveOp func (dl FileDataLocation) NewUpdatingWriter(ctx context.Context, mvOpts *MoveOptions, root *doltdb.RootValue, fs filesys.WritableFS, srcIsSorted bool, outSch schema.Schema, statsCB noms.StatsCB) (table.TableWriteCloser, error) { panic("Updating of files is not supported") } + +// NewReplacingWriter will create a TableWriteCloser for a DataLocation that will overwrite an existing table using +// the same schema +func (dl FileDataLocation) NewReplacingWriter(ctx context.Context, mvOpts *MoveOptions, root *doltdb.RootValue, fs filesys.WritableFS, srcIsSorted bool, outSch schema.Schema, statsCB noms.StatsCB) (table.TableWriteCloser, error) { + switch dl.Format { + case CsvFile: + return csv.OpenCSVWriter(dl.Path, fs, outSch, csv.NewCSVInfo()) + case PsvFile: + return csv.OpenCSVWriter(dl.Path, fs, outSch, csv.NewCSVInfo().SetDelim("|")) + case XlsxFile: + panic("writing to xlsx files is not supported yet") + case JsonFile: + return json.OpenJSONWriter(dl.Path, fs, outSch, json.NewJSONInfo()) + case SqlFile: + return sqlexport.OpenSQLExportWriter(dl.Path, mvOpts.TableName, fs, outSch) + } + + panic("Invalid Data Format." + string(dl.Format)) +} diff --git a/go/libraries/doltcore/mvdata/stream_data_loc.go b/go/libraries/doltcore/mvdata/stream_data_loc.go index 9343cd9d6e..33f9e3e46f 100644 --- a/go/libraries/doltcore/mvdata/stream_data_loc.go +++ b/go/libraries/doltcore/mvdata/stream_data_loc.go @@ -91,3 +91,17 @@ func (dl StreamDataLocation) NewCreatingWriter(ctx context.Context, mvOpts *Move func (dl StreamDataLocation) NewUpdatingWriter(ctx context.Context, mvOpts *MoveOptions, root *doltdb.RootValue, fs filesys.WritableFS, srcIsSorted bool, outSch schema.Schema, statsCB noms.StatsCB) (table.TableWriteCloser, error) { panic("Updating is not supported for stdout") } + +// NewReplacingWriter will create a TableWriteCloser for a DataLocation that will overwrite an existing table using +// the same schema +func (dl StreamDataLocation) NewReplacingWriter(ctx context.Context, mvOpts *MoveOptions, root *doltdb.RootValue, fs filesys.WritableFS, srcIsSorted bool, outSch schema.Schema, statsCB noms.StatsCB) (table.TableWriteCloser, error) { + switch dl.Format { + case CsvFile: + return csv.NewCSVWriter(iohelp.NopWrCloser(dl.Writer), outSch, csv.NewCSVInfo()) + + case PsvFile: + return csv.NewCSVWriter(iohelp.NopWrCloser(dl.Writer), outSch, csv.NewCSVInfo().SetDelim("|")) + } + + return nil, errors.New(string(dl.Format) + "is an unsupported format to write to stdout") +} diff --git a/go/libraries/doltcore/mvdata/table_data_loc.go b/go/libraries/doltcore/mvdata/table_data_loc.go index 56ad757b11..46ae306a9e 100644 --- a/go/libraries/doltcore/mvdata/table_data_loc.go +++ b/go/libraries/doltcore/mvdata/table_data_loc.go @@ -119,3 +119,23 @@ func (dl TableDataLocation) NewUpdatingWriter(ctx context.Context, mvOpts *MoveO return noms.NewNomsMapUpdater(ctx, root.VRW(), m, outSch, statsCB), nil } + +// NewReplacingWriter will create a TableWriteCloser for a DataLocation that will overwrite an existing table using +// the same schema +func (dl TableDataLocation) NewReplacingWriter(ctx context.Context, mvOpts *MoveOptions, root *doltdb.RootValue, fs filesys.WritableFS, srcIsSorted bool, outSch schema.Schema, statsCB noms.StatsCB) (table.TableWriteCloser, error) { + if outSch.GetPKCols().Size() == 0 { + return nil, ErrNoPK + } + + if srcIsSorted { + return noms.NewNomsMapCreator(ctx, root.VRW(), outSch), nil + } else { + m, err := types.NewMap(ctx, root.VRW()) + + if err != nil { + return nil, err + } + + return noms.NewNomsMapUpdater(ctx, root.VRW(), m, outSch, statsCB), nil + } +} From c381201a1a7b314538bda9e3b279d68a220f2f1d Mon Sep 17 00:00:00 2001 From: Taylor Bantle Date: Tue, 17 Sep 2019 12:22:12 -0700 Subject: [PATCH 02/11] bats/replace-table.bats: Add bats tests for replace table --- bats/replace-tables.bats | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 bats/replace-tables.bats diff --git a/bats/replace-tables.bats b/bats/replace-tables.bats new file mode 100644 index 0000000000..04770d6e59 --- /dev/null +++ b/bats/replace-tables.bats @@ -0,0 +1,32 @@ +#!/usr/bin/env bats + +setup() { + load $BATS_TEST_DIRNAME/helper/common.bash + export PATH=$PATH:~/go/bin + export NOMS_VERSION_NEXT=1 + cd $BATS_TMPDIR + mkdir "dolt-repo-$$" + cd "dolt-repo-$$" + dolt init +} + +teardown() { + rm -rf "$BATS_TMPDIR/dolt-repo-$$" +} + +@test "replace table using csv" { + run dolt table create -s `batshelper 1pk5col-ints.schema` test + [ "$status" -eq 0 ] + run dolt table import -r test `batshelper 1pk5col-ints.csv` + [ "$status" -eq 0 ] + [[ "$output" =~ "Rows Processed: 2, Additions: 2, Modifications: 0, Had No Effect: 0" ]] || false + [[ "$output" =~ "Import completed successfully." ]] || false +} + +@test "replace table using schema with csv" { + run dolt table create -s `batshelper 1pk5col-ints.schema` test + [ "$status" -eq 0 ] + run dolt table import -u -s `batshelper 1pk5col-ints.schema` test `batshelper 1pk5col-ints.csv` + [ "$status" -eq 1 ] + [[ "$output" =~ "schema is not supported for update or replace operations" ]] || false +} \ No newline at end of file From 05b4e4a5514af4c4be44e11a0106b8051413b84d Mon Sep 17 00:00:00 2001 From: Taylor Bantle Date: Thu, 19 Sep 2019 13:29:21 -0700 Subject: [PATCH 03/11] go/{cmd, libraries}: Import replace successfully verifies JSON schema --- go/cmd/dolt/commands/tblcmds/import.go | 10 ++--- go/cmd/dolt/commands/tblcmds/table.go | 2 +- go/libraries/doltcore/mvdata/data_loc.go | 2 +- go/libraries/doltcore/mvdata/data_loc_test.go | 2 +- go/libraries/doltcore/mvdata/data_mover.go | 12 +++++- go/libraries/doltcore/mvdata/file_data_loc.go | 41 +++++++------------ .../doltcore/mvdata/stream_data_loc.go | 18 +++----- .../doltcore/mvdata/table_data_loc.go | 32 +++++++-------- .../doltcore/table/typed/json/reader.go | 23 +++++++---- 9 files changed, 67 insertions(+), 75 deletions(-) diff --git a/go/cmd/dolt/commands/tblcmds/import.go b/go/cmd/dolt/commands/tblcmds/import.go index cf3ffd65a1..ce34f3f26a 100644 --- a/go/cmd/dolt/commands/tblcmds/import.go +++ b/go/cmd/dolt/commands/tblcmds/import.go @@ -314,11 +314,6 @@ func executeMove(ctx context.Context, dEnv *env.DoltEnv, force bool, mvOpts *mvd } } - // not sure if this is where this should happen but it needs to happen at some point - // if mvOpts.Operation == mvdata.ReplaceOp && [schema check fails] { - // cli.PrintErrln(color.RedString("Schema does not match. Please upload file with same schema as table")) - // } - mover, nDMErr := mvdata.NewDataMover(ctx, root, dEnv.FS, mvOpts, importStatsCB) if nDMErr != nil { @@ -391,6 +386,11 @@ func newDataMoverErrToVerr(mvOpts *mvdata.MoveOptions, err *mvdata.DataMoverCrea bdr.AddDetails(`Mapping File: "%s"`, mvOpts.MappingFile) return bdr.AddCause(err.Cause).Build() + case mvdata.ReplacingErr: + bdr := errhand.BuildDError("Error replacing table") + bdr.AddDetails("When adding to replace data with %s, could not validate schema.", mvOpts.Src.String()) + return bdr.AddCause(err.Cause).Build() + case mvdata.CreateMapperErr: bdr := errhand.BuildDError("Error creating input to output mapper.") details := fmt.Sprintf("When attempting to move data from %s to %s, could not create a mapper.", mvOpts.Src.String(), mvOpts.Dest.String()) diff --git a/go/cmd/dolt/commands/tblcmds/table.go b/go/cmd/dolt/commands/tblcmds/table.go index f0471f8019..d9ab3bcebe 100644 --- a/go/cmd/dolt/commands/tblcmds/table.go +++ b/go/cmd/dolt/commands/tblcmds/table.go @@ -20,7 +20,7 @@ import ( ) var Commands = cli.GenSubCommandHandler([]*cli.Command{ - {Name: "import", Desc: "Creates, overwrites, or updates a table from the data in a file.", Func: Import, ReqRepo: true, EventType: eventsapi.ClientEventType_TABLE_IMPORT}, + {Name: "import", Desc: "Creates, overwrites, replaces, or updates a table from the data in a file.", Func: Import, ReqRepo: true, EventType: eventsapi.ClientEventType_TABLE_IMPORT}, {Name: "export", Desc: "Export a table to a file.", Func: Export, ReqRepo: true, EventType: eventsapi.ClientEventType_TABLE_EXPORT}, {Name: "create", Desc: "Creates or overwrite an existing table with an empty table.", Func: Create, ReqRepo: true, EventType: eventsapi.ClientEventType_TABLE_CREATE}, {Name: "rm", Desc: "Deletes a table", Func: Rm, ReqRepo: true, EventType: eventsapi.ClientEventType_TABLE_RM}, diff --git a/go/libraries/doltcore/mvdata/data_loc.go b/go/libraries/doltcore/mvdata/data_loc.go index ce2bfcb88d..f685dd6d73 100644 --- a/go/libraries/doltcore/mvdata/data_loc.go +++ b/go/libraries/doltcore/mvdata/data_loc.go @@ -91,7 +91,7 @@ type DataLocation interface { Exists(ctx context.Context, root *doltdb.RootValue, fs filesys.ReadableFS) (bool, error) // NewReader creates a TableReadCloser for the DataLocation - NewReader(ctx context.Context, root *doltdb.RootValue, fs filesys.ReadableFS, schPath string, opts interface{}) (rdCl table.TableReadCloser, sorted bool, err error) + NewReader(ctx context.Context, root *doltdb.RootValue, fs filesys.ReadableFS, schPath string, opts interface{}) (rdCl table.TableReadCloser, sorted bool, fileMatchesSchema bool, err error) // NewCreatingWriter will create a TableWriteCloser for a DataLocation that will create a new table, or overwrite // an existing table. diff --git a/go/libraries/doltcore/mvdata/data_loc_test.go b/go/libraries/doltcore/mvdata/data_loc_test.go index c1f3ed89ba..30fb4a9998 100644 --- a/go/libraries/doltcore/mvdata/data_loc_test.go +++ b/go/libraries/doltcore/mvdata/data_loc_test.go @@ -239,7 +239,7 @@ func TestCreateRdWr(t *testing.T) { } // TODO (oo): fix this for json path test - rd, _, err := loc.NewReader(context.Background(), root, fs, "schema.json", nil) + rd, _, _, err := loc.NewReader(context.Background(), root, fs, "schema.json", nil) if err != nil { t.Fatal("Unexpected error creating writer", err) diff --git a/go/libraries/doltcore/mvdata/data_mover.go b/go/libraries/doltcore/mvdata/data_mover.go index d96ad36edc..5d7671ad84 100644 --- a/go/libraries/doltcore/mvdata/data_mover.go +++ b/go/libraries/doltcore/mvdata/data_mover.go @@ -79,6 +79,7 @@ const ( NomsKindSchemaErr DataMoverCreationErrType = "Invalid schema error" SchemaErr DataMoverCreationErrType = "Schema error" MappingErr DataMoverCreationErrType = "Mapping error" + ReplacingErr DataMoverCreationErrType = "Replacing error" CreateMapperErr DataMoverCreationErrType = "Mapper creation error" CreateWriterErr DataMoverCreationErrType = "Create writer error" CreateSorterErr DataMoverCreationErrType = "Create sorter error" @@ -98,7 +99,7 @@ func NewDataMover(ctx context.Context, root *doltdb.RootValue, fs filesys.Filesy var err error transforms := pipeline.NewTransformCollection() - rd, srcIsSorted, err := mvOpts.Src.NewReader(ctx, root, fs, mvOpts.SchFile, mvOpts.SrcOptions) + rd, srcIsSorted, fileMatchesSchema, err := mvOpts.Src.NewReader(ctx, root, fs, mvOpts.SchFile, mvOpts.SrcOptions) if err != nil { return nil, &DataMoverCreationError{CreateReaderErr, err} @@ -119,6 +120,13 @@ func NewDataMover(ctx context.Context, root *doltdb.RootValue, fs filesys.Filesy return nil, &DataMoverCreationError{SchemaErr, err} } + if mvOpts.Operation == ReplaceOp && mvOpts.MappingFile == "" { + if !fileMatchesSchema { + err := errors.New("Schema from file does not match existing table schema.") + return nil, &DataMoverCreationError{ReplacingErr, err} + } + } + var mapping *rowconv.FieldMapping if mvOpts.MappingFile != "" { mapping, err = rowconv.MappingFromFile(mvOpts.MappingFile, fs, rd.GetSchema(), outSch) @@ -206,7 +214,7 @@ func getOutSchema(ctx context.Context, inSch schema.Schema, root *doltdb.RootVal if mvOpts.Operation == UpdateOp || mvOpts.Operation == ReplaceOp { // Get schema from target - rd, _, err := mvOpts.Dest.NewReader(ctx, root, fs, mvOpts.SchFile, mvOpts.SrcOptions) + rd, _, _, err := mvOpts.Dest.NewReader(ctx, root, fs, mvOpts.SchFile, mvOpts.SrcOptions) if err != nil { return nil, err diff --git a/go/libraries/doltcore/mvdata/file_data_loc.go b/go/libraries/doltcore/mvdata/file_data_loc.go index e8d7d98875..fc59da4d5e 100644 --- a/go/libraries/doltcore/mvdata/file_data_loc.go +++ b/go/libraries/doltcore/mvdata/file_data_loc.go @@ -71,13 +71,13 @@ func (dl FileDataLocation) Exists(ctx context.Context, root *doltdb.RootValue, f } // NewReader creates a TableReadCloser for the DataLocation -func (dl FileDataLocation) NewReader(ctx context.Context, root *doltdb.RootValue, fs filesys.ReadableFS, schPath string, opts interface{}) (rdCl table.TableReadCloser, sorted bool, err error) { +func (dl FileDataLocation) NewReader(ctx context.Context, root *doltdb.RootValue, fs filesys.ReadableFS, schPath string, opts interface{}) (rdCl table.TableReadCloser, sorted bool, fileMatchesSchema bool, err error) { exists, isDir := fs.Exists(dl.Path) if !exists { - return nil, false, os.ErrNotExist + return nil, false, false, os.ErrNotExist } else if isDir { - return nil, false, filesys.ErrIsDir + return nil, false, false, filesys.ErrIsDir } switch dl.Format { @@ -94,41 +94,41 @@ func (dl FileDataLocation) NewReader(ctx context.Context, root *doltdb.RootValue rd, err := csv.OpenCSVReader(root.VRW().Format(), dl.Path, fs, csv.NewCSVInfo().SetDelim(delim)) - return rd, false, err + return rd, false, true, err case PsvFile: rd, err := csv.OpenCSVReader(root.VRW().Format(), dl.Path, fs, csv.NewCSVInfo().SetDelim("|")) - return rd, false, err + return rd, false, true, err case XlsxFile: xlsxOpts := opts.(XlsxOptions) rd, err := xlsx.OpenXLSXReader(root.VRW().Format(), dl.Path, fs, &xlsx.XLSXFileInfo{SheetName: xlsxOpts.SheetName}) - return rd, false, err + return rd, false, true, err case JsonFile: var sch schema.Schema = nil if schPath == "" { if opts == nil { - return nil, false, errors.New("Unable to determine table name on JSON import") + return nil, false, false, errors.New("Unable to determine table name on JSON import") } jsonOpts, _ := opts.(JSONOptions) table, exists, err := root.GetTable(context.TODO(), jsonOpts.TableName) if !exists { - return nil, false, errors.New(fmt.Sprintf("The following table could not be found:\n%v", jsonOpts.TableName)) + return nil, false, false, errors.New(fmt.Sprintf("The following table could not be found:\n%v", jsonOpts.TableName)) } if err != nil { - return nil, false, errors.New(fmt.Sprintf("An error occurred attempting to read the table:\n%v", err.Error())) + return nil, false, false, errors.New(fmt.Sprintf("An error occurred attempting to read the table:\n%v", err.Error())) } sch, err = table.GetSchema(context.TODO()) if err != nil { - return nil, false, errors.New(fmt.Sprintf("An error occurred attempting to read the table schema:\n%v", err.Error())) + return nil, false, false, errors.New(fmt.Sprintf("An error occurred attempting to read the table schema:\n%v", err.Error())) } } - rd, err := json.OpenJSONReader(root.VRW().Format(), dl.Path, fs, json.NewJSONInfo(), sch, schPath) - return rd, false, err + rd, fileMatchesSchema, err := json.OpenJSONReader(root.VRW().Format(), dl.Path, fs, json.NewJSONInfo(), sch, schPath) + return rd, false, fileMatchesSchema, err } - return nil, false, errors.New("unsupported format") + return nil, false, false, errors.New("unsupported format") } // NewCreatingWriter will create a TableWriteCloser for a DataLocation that will create a new table, or overwrite @@ -159,18 +159,5 @@ func (dl FileDataLocation) NewUpdatingWriter(ctx context.Context, mvOpts *MoveOp // NewReplacingWriter will create a TableWriteCloser for a DataLocation that will overwrite an existing table using // the same schema func (dl FileDataLocation) NewReplacingWriter(ctx context.Context, mvOpts *MoveOptions, root *doltdb.RootValue, fs filesys.WritableFS, srcIsSorted bool, outSch schema.Schema, statsCB noms.StatsCB) (table.TableWriteCloser, error) { - switch dl.Format { - case CsvFile: - return csv.OpenCSVWriter(dl.Path, fs, outSch, csv.NewCSVInfo()) - case PsvFile: - return csv.OpenCSVWriter(dl.Path, fs, outSch, csv.NewCSVInfo().SetDelim("|")) - case XlsxFile: - panic("writing to xlsx files is not supported yet") - case JsonFile: - return json.OpenJSONWriter(dl.Path, fs, outSch, json.NewJSONInfo()) - case SqlFile: - return sqlexport.OpenSQLExportWriter(dl.Path, mvOpts.TableName, fs, outSch) - } - - panic("Invalid Data Format." + string(dl.Format)) + panic("Replacing files is not supported") } diff --git a/go/libraries/doltcore/mvdata/stream_data_loc.go b/go/libraries/doltcore/mvdata/stream_data_loc.go index 33f9e3e46f..c571d559ef 100644 --- a/go/libraries/doltcore/mvdata/stream_data_loc.go +++ b/go/libraries/doltcore/mvdata/stream_data_loc.go @@ -47,7 +47,7 @@ func (dl StreamDataLocation) Exists(ctx context.Context, root *doltdb.RootValue, } // NewReader creates a TableReadCloser for the DataLocation -func (dl StreamDataLocation) NewReader(ctx context.Context, root *doltdb.RootValue, fs filesys.ReadableFS, schPath string, opts interface{}) (rdCl table.TableReadCloser, sorted bool, err error) { +func (dl StreamDataLocation) NewReader(ctx context.Context, root *doltdb.RootValue, fs filesys.ReadableFS, schPath string, opts interface{}) (rdCl table.TableReadCloser, sorted bool, fileMatchesSchema bool, err error) { switch dl.Format { case CsvFile: delim := "," @@ -62,14 +62,14 @@ func (dl StreamDataLocation) NewReader(ctx context.Context, root *doltdb.RootVal rd, err := csv.NewCSVReader(root.VRW().Format(), ioutil.NopCloser(dl.Reader), csv.NewCSVInfo().SetDelim(delim)) - return rd, false, err + return rd, false, false, err case PsvFile: rd, err := csv.NewCSVReader(root.VRW().Format(), ioutil.NopCloser(dl.Reader), csv.NewCSVInfo().SetDelim("|")) - return rd, false, err + return rd, false, false, err } - return nil, false, errors.New(string(dl.Format) + "is an unsupported format to read from stdin") + return nil, false, false, errors.New(string(dl.Format) + "is an unsupported format to read from stdin") } // NewCreatingWriter will create a TableWriteCloser for a DataLocation that will create a new table, or overwrite @@ -95,13 +95,5 @@ func (dl StreamDataLocation) NewUpdatingWriter(ctx context.Context, mvOpts *Move // NewReplacingWriter will create a TableWriteCloser for a DataLocation that will overwrite an existing table using // the same schema func (dl StreamDataLocation) NewReplacingWriter(ctx context.Context, mvOpts *MoveOptions, root *doltdb.RootValue, fs filesys.WritableFS, srcIsSorted bool, outSch schema.Schema, statsCB noms.StatsCB) (table.TableWriteCloser, error) { - switch dl.Format { - case CsvFile: - return csv.NewCSVWriter(iohelp.NopWrCloser(dl.Writer), outSch, csv.NewCSVInfo()) - - case PsvFile: - return csv.NewCSVWriter(iohelp.NopWrCloser(dl.Writer), outSch, csv.NewCSVInfo().SetDelim("|")) - } - - return nil, errors.New(string(dl.Format) + "is an unsupported format to write to stdout") + panic("Replacing is not supported for stdout") } diff --git a/go/libraries/doltcore/mvdata/table_data_loc.go b/go/libraries/doltcore/mvdata/table_data_loc.go index 46ae306a9e..6e9bf7fcbf 100644 --- a/go/libraries/doltcore/mvdata/table_data_loc.go +++ b/go/libraries/doltcore/mvdata/table_data_loc.go @@ -46,36 +46,36 @@ func (dl TableDataLocation) Exists(ctx context.Context, root *doltdb.RootValue, } // NewReader creates a TableReadCloser for the DataLocation -func (dl TableDataLocation) NewReader(ctx context.Context, root *doltdb.RootValue, fs filesys.ReadableFS, schPath string, opts interface{}) (rdCl table.TableReadCloser, sorted bool, err error) { +func (dl TableDataLocation) NewReader(ctx context.Context, root *doltdb.RootValue, fs filesys.ReadableFS, schPath string, opts interface{}) (rdCl table.TableReadCloser, sorted bool, fileMatchesSchema bool, err error) { tbl, ok, err := root.GetTable(ctx, dl.Name) if err != nil { - return nil, false, err + return nil, false, false, err } if !ok { - return nil, false, doltdb.ErrTableNotFound + return nil, false, false, doltdb.ErrTableNotFound } sch, err := tbl.GetSchema(ctx) if err != nil { - return nil, false, err + return nil, false, false, err } rowData, err := tbl.GetRowData(ctx) if err != nil { - return nil, false, err + return nil, false, false, err } rd, err := noms.NewNomsMapReader(ctx, rowData, sch) if err != nil { - return nil, false, err + return nil, false, false, err } - return rd, true, nil + return rd, true, false, nil } // NewCreatingWriter will create a TableWriteCloser for a DataLocation that will create a new table, or overwrite @@ -127,15 +127,15 @@ func (dl TableDataLocation) NewReplacingWriter(ctx context.Context, mvOpts *Move return nil, ErrNoPK } - if srcIsSorted { - return noms.NewNomsMapCreator(ctx, root.VRW(), outSch), nil - } else { - m, err := types.NewMap(ctx, root.VRW()) + // if srcIsSorted { + // return noms.NewNomsMapCreator(ctx, root.VRW(), outSch), nil + // } else { + m, err := types.NewMap(ctx, root.VRW()) - if err != nil { - return nil, err - } - - return noms.NewNomsMapUpdater(ctx, root.VRW(), m, outSch, statsCB), nil + if err != nil { + return nil, err } + + return noms.NewNomsMapUpdater(ctx, root.VRW(), m, outSch, statsCB), nil + // } } diff --git a/go/libraries/doltcore/table/typed/json/reader.go b/go/libraries/doltcore/table/typed/json/reader.go index 0727c023f6..719cd7afbf 100644 --- a/go/libraries/doltcore/table/typed/json/reader.go +++ b/go/libraries/doltcore/table/typed/json/reader.go @@ -37,50 +37,55 @@ type JSONReader struct { ind int } -func OpenJSONReader(nbf *types.NomsBinFormat, path string, fs filesys.ReadableFS, info *JSONFileInfo, sch schema.Schema, schPath string) (*JSONReader, error) { +func OpenJSONReader(nbf *types.NomsBinFormat, path string, fs filesys.ReadableFS, info *JSONFileInfo, sch schema.Schema, schPath string) (*JSONReader, bool, error) { r, err := fs.OpenForRead(path) if err != nil { - return nil, err + return nil, false, err } return NewJSONReader(nbf, r, info, fs, sch, schPath, path) } -func NewJSONReader(nbf *types.NomsBinFormat, r io.ReadCloser, info *JSONFileInfo, fs filesys.ReadableFS, sch schema.Schema, schPath string, tblPath string) (*JSONReader, error) { +func NewJSONReader(nbf *types.NomsBinFormat, r io.ReadCloser, info *JSONFileInfo, fs filesys.ReadableFS, sch schema.Schema, schPath string, tblPath string) (*JSONReader, bool, error) { br := bufio.NewReaderSize(r, ReadBufSize) if sch == nil { if schPath == "" { - return nil, errors.New("schema must be provided") + return nil, false, errors.New("schema must be provided") } schData, err := fs.ReadFile(schPath) if err != nil { - return nil, err + return nil, false, err } jsonSchStr := string(schData) sch, err = encoding.UnmarshalJson(jsonSchStr) if err != nil { - return nil, err + return nil, false, err } } tblData, err := fs.ReadFile(tblPath) if err != nil { - return nil, err + return nil, false, err } jsonRows, err := UnmarshalFromJSON(tblData) if err != nil { - return nil, err + return nil, false, err } + fileMatchesSchema := true decodedRows, err := jsonRows.decodeJSONRows(nbf, sch) + // decodeJSONRows returns err if row doesn't match schema + if err != nil { + fileMatchesSchema = false + } info.SetRows(decodedRows) - return &JSONReader{r, br, info, sch, 0}, nil + return &JSONReader{r, br, info, sch, 0}, fileMatchesSchema, nil } // Close should release resources being held From 8304de36ce4d3e379a64b9de7aee20d44358bec2 Mon Sep 17 00:00:00 2001 From: Taylor Bantle Date: Thu, 19 Sep 2019 13:34:34 -0700 Subject: [PATCH 04/11] bats: Created bats tests for replace-table and updated update-table tests --- bats/helper/1pk5col-ints.json | 20 ++++++++ bats/helper/employees-tbl-new.json | 36 ++++++++++++++ bats/helper/employees-tbl-schema-wrong.csv | 3 ++ bats/helper/employees-tbl-schema-wrong.json | 32 ++++++++++++ bats/replace-tables.bats | 54 ++++++++++++++++++++- bats/update-tables.bats | 13 ++++- 6 files changed, 155 insertions(+), 3 deletions(-) create mode 100644 bats/helper/1pk5col-ints.json create mode 100644 bats/helper/employees-tbl-new.json create mode 100644 bats/helper/employees-tbl-schema-wrong.csv create mode 100644 bats/helper/employees-tbl-schema-wrong.json diff --git a/bats/helper/1pk5col-ints.json b/bats/helper/1pk5col-ints.json new file mode 100644 index 0000000000..30c6167b68 --- /dev/null +++ b/bats/helper/1pk5col-ints.json @@ -0,0 +1,20 @@ +{ + "rows": [ + { + "pk": 0, + "c1": 1, + "c2": 2, + "c3": 3, + "c4": 4, + "c5": 5 + }, + { + "pk": 1, + "c1": 1, + "c2": 2, + "c3": 3, + "c4": 4, + "c5": 5 + } + ] +} diff --git a/bats/helper/employees-tbl-new.json b/bats/helper/employees-tbl-new.json new file mode 100644 index 0000000000..4a491d04dd --- /dev/null +++ b/bats/helper/employees-tbl-new.json @@ -0,0 +1,36 @@ +{ + "rows": [ + { + "id": 0, + "first name": "tim", + "last name": "sehn", + "title": "ceo", + "start date": "", + "end date": "" + }, + { + "id": 1, + "first name": "aaron", + "last name": "son", + "title": "founder", + "start date": "", + "end date": "" + }, + { + "id": 2, + "first name": "brian", + "last name": "hendricks", + "title": "founder", + "start date": "", + "end date": "" + }, + { + "id": 3, + "first name": "matt", + "last name": "jesuele", + "title": "software engineer", + "start date": "", + "end date": "" + } + ] +} diff --git a/bats/helper/employees-tbl-schema-wrong.csv b/bats/helper/employees-tbl-schema-wrong.csv new file mode 100644 index 0000000000..f93e2167ce --- /dev/null +++ b/bats/helper/employees-tbl-schema-wrong.csv @@ -0,0 +1,3 @@ +id, first name, last name, position +0, "tim", "sehn", "ceo" +1, "aaron", "son", "founder" diff --git a/bats/helper/employees-tbl-schema-wrong.json b/bats/helper/employees-tbl-schema-wrong.json new file mode 100644 index 0000000000..665fa446e2 --- /dev/null +++ b/bats/helper/employees-tbl-schema-wrong.json @@ -0,0 +1,32 @@ +{ + "rows": [ + { + "id": "0", + "first name": "tim", + "last name": "sehn", + "position": "ceo", + "children": true + }, + { + "id": "1", + "first name": "aaron", + "last name": "son", + "position": "founder", + "children": true + }, + { + "id": "2", + "first name": "brian", + "last name": "hendricks", + "position": "founder", + "children": true + }, + { + "id": "3", + "first name": "matt", + "last name": "jesuele", + "position": "software engineer", + "children": false + } + ] +} diff --git a/bats/replace-tables.bats b/bats/replace-tables.bats index 04770d6e59..0522834b2a 100644 --- a/bats/replace-tables.bats +++ b/bats/replace-tables.bats @@ -26,7 +26,59 @@ teardown() { @test "replace table using schema with csv" { run dolt table create -s `batshelper 1pk5col-ints.schema` test [ "$status" -eq 0 ] - run dolt table import -u -s `batshelper 1pk5col-ints.schema` test `batshelper 1pk5col-ints.csv` + run dolt table import -r -s `batshelper 1pk5col-ints.schema` test `batshelper 1pk5col-ints.csv` [ "$status" -eq 1 ] [[ "$output" =~ "schema is not supported for update or replace operations" ]] || false +} + +@test "replace table using json" { + run dolt table create -s `batshelper employees-sch.json` employees + [ "$status" -eq 0 ] + run dolt table import -r employees `batshelper employees-tbl.json` + [ "$status" -eq 0 ] + [[ "$output" =~ "Rows Processed: 3, Additions: 3, Modifications: 0, Had No Effect: 0" ]] || false + [[ "$output" =~ "Import completed successfully." ]] || false +} + +@test "replace table using wrong json" { + run dolt table create -s `batshelper employees-sch-wrong.json` employees + [ "$status" -eq 0 ] + run dolt table import -r employees `batshelper employees-tbl.json` + [ "$status" -eq 1 ] + [[ "$output" =~ "Error replacing table" ]] || false + [[ "$output" =~ "cause: Schema from file does not match existing table schema" ]] || false +} + +@test "replace table using schema with json" { + run dolt table create -s `batshelper employees-sch-wrong.json` employees + [ "$status" -eq 0 ] + run dolt table import -r -s `batshelper employees-sch.json` employees `batshelper employees-tbl.json` + [ "$status" -eq 1 ] + [[ "$output" =~ "fatal: schema is not supported for update or replace operations" ]] || false +} + +@test "replace table with json when table does not exist" { + run dolt table import -r employees `batshelper employees-tbl.json` + [ "$status" -eq 1 ] + [[ "$output" =~ "The following table could not be found:" ]] || false +} + +@test "replace table with existing imported data" { + run dolt table import -c -s `batshelper employees-sch.json` employees `batshelper employees-tbl.json` + [ "$status" -eq 0 ] + [[ "$output" =~ "Import completed successfully." ]] || false + run dolt table import -r employees `batshelper employees-tbl-new.json` + [ "$status" -eq 0 ] + [[ "$output" =~ "Rows Processed: 4, Additions: 4, Modifications: 0, Had No Effect: 0" ]] || false + [[ "$output" =~ "Import completed successfully." ]] || false +} + +@test "replace table with existing imported data with different schema" { + run dolt table import -c -s `batshelper employees-sch.json` employees `batshelper employees-tbl.json` + [ "$status" -eq 0 ] + [[ "$output" =~ "Import completed successfully." ]] || false + run dolt table import -r employees `batshelper employees-tbl-schema-wrong.json` + [ "$status" -eq 1 ] + [[ "$output" =~ "Error replacing table" ]] || false + [[ "$output" =~ "cause: Schema from file does not match existing table schema" ]] || false } \ No newline at end of file diff --git a/bats/update-tables.bats b/bats/update-tables.bats index 1af2a1530a..b4cdf7ed3d 100644 --- a/bats/update-tables.bats +++ b/bats/update-tables.bats @@ -28,7 +28,7 @@ teardown() { [ "$status" -eq 0 ] run dolt table import -u -s `batshelper 1pk5col-ints.schema` test `batshelper 1pk5col-ints.csv` [ "$status" -eq 1 ] - [[ "$output" =~ "schema is not supported for update operations" ]] || false + [[ "$output" =~ "fatal: schema is not supported for update or replace operations" ]] || false } @test "update table using csv with newlines" { @@ -61,7 +61,16 @@ teardown() { [ "$status" -eq 0 ] run dolt table import -u -s `batshelper employees-sch.json` employees `batshelper employees-tbl.json` [ "$status" -eq 1 ] - [[ "$output" =~ "schema is not supported for update operations" ]] || false + [[ "$output" =~ "fatal: schema is not supported for update or replace operations" ]] || false +} + +@test "update table with existing imported data with different schema" { + run dolt table import -c -s `batshelper employees-sch.json` employees `batshelper employees-tbl.json` + [ "$status" -eq 0 ] + [[ "$output" =~ "Import completed successfully." ]] || false + run dolt table import -u employees `batshelper employees-tbl-schema-wrong.json` + [ "$status" -eq 0 ] + [[ "$output" =~ "Rows Processed: 0, Additions: 0, Modifications: 0, Had No Effect: 0" ]] || false } @test "update table with json when table does not exist" { From 6cdd524b152188534c9f2d25307d00951d97efa5 Mon Sep 17 00:00:00 2001 From: Taylor Bantle Date: Thu, 19 Sep 2019 15:36:19 -0700 Subject: [PATCH 05/11] go/libraries/doltcore: Add replace schema validation for xlsx files --- go/libraries/doltcore/mvdata/file_data_loc.go | 44 ++++++++++++------- .../doltcore/table/untyped/xlsx/reader.go | 31 ++++++++----- 2 files changed, 49 insertions(+), 26 deletions(-) diff --git a/go/libraries/doltcore/mvdata/file_data_loc.go b/go/libraries/doltcore/mvdata/file_data_loc.go index fc59da4d5e..2f07e5b0f2 100644 --- a/go/libraries/doltcore/mvdata/file_data_loc.go +++ b/go/libraries/doltcore/mvdata/file_data_loc.go @@ -92,18 +92,22 @@ func (dl FileDataLocation) NewReader(ctx context.Context, root *doltdb.RootValue } } - rd, err := csv.OpenCSVReader(root.VRW().Format(), dl.Path, fs, csv.NewCSVInfo().SetDelim(delim)) - - return rd, false, true, err + rd, fileMatchesSchema, err := csv.OpenCSVReader(root.VRW().Format(), dl.Path, fs, csv.NewCSVInfo().SetDelim(delim)) + return rd, false, fileMatchesSchema, err case PsvFile: - rd, err := csv.OpenCSVReader(root.VRW().Format(), dl.Path, fs, csv.NewCSVInfo().SetDelim("|")) - return rd, false, true, err + rd, fileMatchesSchema, err := csv.OpenCSVReader(root.VRW().Format(), dl.Path, fs, csv.NewCSVInfo().SetDelim("|")) + return rd, false, fileMatchesSchema, err case XlsxFile: + var outSch schema.Schema = nil xlsxOpts := opts.(XlsxOptions) - rd, err := xlsx.OpenXLSXReader(root.VRW().Format(), dl.Path, fs, &xlsx.XLSXFileInfo{SheetName: xlsxOpts.SheetName}) - return rd, false, true, err + sch, tableExists, err := GetOutSchema(xlsxOpts.SheetName, root) + if tableExists { + outSch = sch + } + rd, fileMatchesSchema, err := xlsx.OpenXLSXReader(root.VRW().Format(), dl.Path, fs, &xlsx.XLSXFileInfo{SheetName: xlsxOpts.SheetName}, outSch) + return rd, false, fileMatchesSchema, err case JsonFile: var sch schema.Schema = nil @@ -112,16 +116,9 @@ func (dl FileDataLocation) NewReader(ctx context.Context, root *doltdb.RootValue return nil, false, false, errors.New("Unable to determine table name on JSON import") } jsonOpts, _ := opts.(JSONOptions) - table, exists, err := root.GetTable(context.TODO(), jsonOpts.TableName) - if !exists { - return nil, false, false, errors.New(fmt.Sprintf("The following table could not be found:\n%v", jsonOpts.TableName)) - } + sch, _, err = GetOutSchema(jsonOpts.TableName, root) if err != nil { - return nil, false, false, errors.New(fmt.Sprintf("An error occurred attempting to read the table:\n%v", err.Error())) - } - sch, err = table.GetSchema(context.TODO()) - if err != nil { - return nil, false, false, errors.New(fmt.Sprintf("An error occurred attempting to read the table schema:\n%v", err.Error())) + return nil, false, false, err } } rd, fileMatchesSchema, err := json.OpenJSONReader(root.VRW().Format(), dl.Path, fs, json.NewJSONInfo(), sch, schPath) @@ -161,3 +158,18 @@ func (dl FileDataLocation) NewUpdatingWriter(ctx context.Context, mvOpts *MoveOp func (dl FileDataLocation) NewReplacingWriter(ctx context.Context, mvOpts *MoveOptions, root *doltdb.RootValue, fs filesys.WritableFS, srcIsSorted bool, outSch schema.Schema, statsCB noms.StatsCB) (table.TableWriteCloser, error) { panic("Replacing files is not supported") } + +func GetOutSchema(tableName string, root *doltdb.RootValue) (schema.Schema, bool, error) { + table, exists, err := root.GetTable(context.TODO(), tableName) + if !exists { + return nil, exists, errors.New(fmt.Sprintf("The following table could not be found:\n%v", tableName)) + } + if err != nil { + return nil, exists, errors.New(fmt.Sprintf("An error occurred attempting to read the table:\n%v", err.Error())) + } + sch, err := table.GetSchema(context.TODO()) + if err != nil { + return nil, exists, errors.New(fmt.Sprintf("An error occurred attempting to read the table schema:\n%v", err.Error())) + } + return sch, exists, nil +} diff --git a/go/libraries/doltcore/table/untyped/xlsx/reader.go b/go/libraries/doltcore/table/untyped/xlsx/reader.go index 89cdf685f5..db5f776fe9 100644 --- a/go/libraries/doltcore/table/untyped/xlsx/reader.go +++ b/go/libraries/doltcore/table/untyped/xlsx/reader.go @@ -38,11 +38,11 @@ type XLSXReader struct { rows []row.Row } -func OpenXLSXReader(nbf *types.NomsBinFormat, path string, fs filesys.ReadableFS, info *XLSXFileInfo) (*XLSXReader, error) { +func OpenXLSXReader(nbf *types.NomsBinFormat, path string, fs filesys.ReadableFS, info *XLSXFileInfo, outSch schema.Schema) (*XLSXReader, bool, error) { r, err := fs.OpenForRead(path) if err != nil { - return nil, err + return nil, false, err } br := bufio.NewReaderSize(r, ReadBufSize) @@ -51,18 +51,29 @@ func OpenXLSXReader(nbf *types.NomsBinFormat, path string, fs filesys.ReadableFS data, err := getXlsxRows(path, info.SheetName) if err != nil { - return nil, err + return nil, false, err } - _, sch := untyped.NewUntypedSchema(colStrs...) - - decodedRows, err := decodeXLSXRows(nbf, data, sch) - if err != nil { - r.Close() - return nil, err + fileMatchesSchema := true + var decodedRows []row.Row + var inSch schema.Schema + if outSch != nil { + inSch = outSch + decodedRows, err = decodeXLSXRows(nbf, data, outSch) + if err != nil { + fileMatchesSchema = false + } + } else { + _, sch := untyped.NewUntypedSchema(colStrs...) + inSch = sch + decodedRows, err = decodeXLSXRows(nbf, data, sch) + if err != nil { + r.Close() + return nil, false, err + } } - return &XLSXReader{r, br, info, sch, 0, decodedRows}, nil + return &XLSXReader{r, br, info, inSch, 0, decodedRows}, fileMatchesSchema, nil } func getColHeaders(path string, sheetName string) ([]string, error) { From 4903b9d4d81b5db6d4fa26bc5328c7f5447eab47 Mon Sep 17 00:00:00 2001 From: Taylor Bantle Date: Thu, 19 Sep 2019 15:38:00 -0700 Subject: [PATCH 06/11] bats: Add replace-tables bats tests for xlsx files --- bats/helper/employees-sch-2.json | 50 ++++++++++++++++++++++++++++++++ bats/replace-tables.bats | 27 +++++++++++++++++ 2 files changed, 77 insertions(+) create mode 100644 bats/helper/employees-sch-2.json diff --git a/bats/helper/employees-sch-2.json b/bats/helper/employees-sch-2.json new file mode 100644 index 0000000000..d928352b3e --- /dev/null +++ b/bats/helper/employees-sch-2.json @@ -0,0 +1,50 @@ +{ + "columns": [ + { + "name": "id", + "kind": "string", + "tag": 0, + "is_part_of_pk": true, + "col_constraints": [ + { + "constraint_type": "not_null" + } + ] + }, + { + "name": "first", + "kind": "string", + "tag": 1, + "is_part_of_pk": false, + "col_constraints": [] + }, + { + "name": "last", + "kind": "string", + "tag": 2, + "is_part_of_pk": false, + "col_constraints": [] + }, + { + "name": "title", + "kind": "string", + "tag": 3, + "is_part_of_pk": false, + "col_constraints": [] + }, + { + "name": "start date", + "kind": "string", + "tag": 4, + "is_part_of_pk": false, + "col_constraints": [] + }, + { + "name": "end date", + "kind": "string", + "tag": 5, + "is_part_of_pk": false, + "col_constraints": [] + } + ] +} diff --git a/bats/replace-tables.bats b/bats/replace-tables.bats index 0522834b2a..7d5181f3a6 100644 --- a/bats/replace-tables.bats +++ b/bats/replace-tables.bats @@ -23,6 +23,15 @@ teardown() { [[ "$output" =~ "Import completed successfully." ]] || false } +@test "replace table using csv with wrong schema" { + run dolt table create -s `batshelper 1pk5col-ints.schema` test + [ "$status" -eq 0 ] + run dolt table import -r test `batshelper 2pk5col-ints.csv` + [ "$status" -eq 1 ] + [[ "$output" =~ "Error replacing table" ]] || false + [[ "$output" =~ "cause: Schema from file does not match existing table schema" ]] || false +} + @test "replace table using schema with csv" { run dolt table create -s `batshelper 1pk5col-ints.schema` test [ "$status" -eq 0 ] @@ -81,4 +90,22 @@ teardown() { [ "$status" -eq 1 ] [[ "$output" =~ "Error replacing table" ]] || false [[ "$output" =~ "cause: Schema from file does not match existing table schema" ]] || false +} + +@test "replace table using xlsx file" { + run dolt table create -s `batshelper employees-sch-2.json` employees + [ "$status" -eq 0 ] + run dolt table import -r employees `batshelper employees.xlsx` + [ "$status" -eq 0 ] + [[ "$output" =~ "Rows Processed: 3, Additions: 3, Modifications: 0, Had No Effect: 0" ]] || false + [[ "$output" =~ "Import completed successfully." ]] || false +} + +@test "replace table using xlsx file with wrong schema" { + run dolt table create -s `batshelper employees-sch.json` employees + [ "$status" -eq 0 ] + run dolt table import -r employees `batshelper employees.xlsx` + [ "$status" -eq 1 ] + [[ "$output" =~ "Error replacing table" ]] || false + [[ "$output" =~ "cause: Schema from file does not match existing table schema" ]] || false } \ No newline at end of file From e228b2185ef5935b16291a01afb5597c9a650e46 Mon Sep 17 00:00:00 2001 From: Taylor Bantle Date: Thu, 19 Sep 2019 16:33:12 -0700 Subject: [PATCH 07/11] go/{cmd, libraries}: Add replace schema validation for csv and psv files --- go/cmd/dolt/commands/tblcmds/import.go | 2 + go/go.sum | 3 ++ go/libraries/doltcore/mvdata/data_mover.go | 3 +- go/libraries/doltcore/mvdata/file_data_loc.go | 25 +++++---- .../doltcore/mvdata/stream_data_loc.go | 4 +- .../doltcore/mvdata/table_data_loc.go | 4 -- .../doltcore/table/pipeline/transform_test.go | 8 +-- .../doltcore/table/typed/json/reader.go | 1 - .../doltcore/table/untyped/csv/reader.go | 54 ++++++++++++++++--- .../doltcore/table/untyped/csv/reader_test.go | 2 +- 10 files changed, 78 insertions(+), 28 deletions(-) diff --git a/go/cmd/dolt/commands/tblcmds/import.go b/go/cmd/dolt/commands/tblcmds/import.go index ce34f3f26a..31c65c64ef 100644 --- a/go/cmd/dolt/commands/tblcmds/import.go +++ b/go/cmd/dolt/commands/tblcmds/import.go @@ -190,6 +190,8 @@ func validateImportArgs(apr *argparser.ArgParseResults, usage cli.UsagePrinter) srcOpts = mvdata.XlsxOptions{SheetName: tableName} } else if val.Format == mvdata.JsonFile { srcOpts = mvdata.JSONOptions{TableName: tableName} + } else if val.Format == mvdata.CsvFile || val.Format == mvdata.PsvFile { + srcOpts = mvdata.CsvOptions{TableName: tableName} } case mvdata.StreamDataLocation: diff --git a/go/go.sum b/go/go.sum index 633e6e448d..5ed9291757 100644 --- a/go/go.sum +++ b/go/go.sum @@ -194,12 +194,15 @@ github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/lib/pq v1.0.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo= github.com/lib/pq v1.1.1 h1:sJZmqHoEaY7f+NPP8pgLB/WxulyR3fewgCM2qaSlBb4= github.com/lib/pq v1.1.1/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo= +github.com/liquidata-inc/dolt v0.9.8 h1:uup9UA59oSvYsqYd9A3zF0CFXUnF1qI8odE+Cxtn+pg= github.com/liquidata-inc/go-mysql-server v0.4.1-0.20190821121850-0e0249cf7bc0 h1:PfmOkSsLcIete4/6vhuJg020i4BEHLaHnEhvclbFxS8= github.com/liquidata-inc/go-mysql-server v0.4.1-0.20190821121850-0e0249cf7bc0/go.mod h1:DdWE0ku/mNfuLsRJIrHeHpDtB7am+6oopxEsQKmVkx8= github.com/liquidata-inc/go-mysql-server v0.4.1-0.20190911213247-6dfc00bbb116 h1:1sjwK2ofL06EFX8f0wbA/RW2Y2C3su42YBg/O4Rn9bg= github.com/liquidata-inc/go-mysql-server v0.4.1-0.20190911213247-6dfc00bbb116/go.mod h1:DdWE0ku/mNfuLsRJIrHeHpDtB7am+6oopxEsQKmVkx8= github.com/liquidata-inc/go-mysql-server v0.4.1-0.20190912171848-afe92c9129ca h1:m+IjqmcLBpmNI/58CcE+GOjsD5gxwfK1etDdcVAUx/k= github.com/liquidata-inc/go-mysql-server v0.4.1-0.20190912171848-afe92c9129ca/go.mod h1:DdWE0ku/mNfuLsRJIrHeHpDtB7am+6oopxEsQKmVkx8= +github.com/liquidata-inc/go-mysql-server v0.4.1-0.20190916182737-521be194cc83 h1:6DUdp1E9YtjukV2VHIj7iHQx4XQQVqqFjmHGlArjsMI= +github.com/liquidata-inc/go-mysql-server v0.4.1-0.20190916182737-521be194cc83/go.mod h1:DdWE0ku/mNfuLsRJIrHeHpDtB7am+6oopxEsQKmVkx8= github.com/liquidata-inc/ishell v0.0.0-20190514193646-693241f1f2a0 h1:phMgajKClMUiIr+hF2LGt8KRuUa2Vd2GI1sNgHgSXoU= github.com/liquidata-inc/ishell v0.0.0-20190514193646-693241f1f2a0/go.mod h1:YC1rI9k5gx8D02ljlbxDfZe80s/iq8bGvaaQsvR+qxs= github.com/liquidata-inc/mmap-go v1.0.3 h1:2LndAeAtup9rpvUmu4wZSYCsjCQ0Zpc+NqE+6+PnT7g= diff --git a/go/libraries/doltcore/mvdata/data_mover.go b/go/libraries/doltcore/mvdata/data_mover.go index 5d7671ad84..a27f98bca8 100644 --- a/go/libraries/doltcore/mvdata/data_mover.go +++ b/go/libraries/doltcore/mvdata/data_mover.go @@ -42,7 +42,8 @@ const ( ) type CsvOptions struct { - Delim string + Delim string + TableName string } type XlsxOptions struct { diff --git a/go/libraries/doltcore/mvdata/file_data_loc.go b/go/libraries/doltcore/mvdata/file_data_loc.go index 2f07e5b0f2..2a0f2cb987 100644 --- a/go/libraries/doltcore/mvdata/file_data_loc.go +++ b/go/libraries/doltcore/mvdata/file_data_loc.go @@ -83,20 +83,27 @@ func (dl FileDataLocation) NewReader(ctx context.Context, root *doltdb.RootValue switch dl.Format { case CsvFile: delim := "," - - if opts != nil { - csvOpts, _ := opts.(CsvOptions) - - if len(csvOpts.Delim) != 0 { - delim = csvOpts.Delim - } + csvOpts, _ := opts.(CsvOptions) + if len(csvOpts.Delim) != 0 { + delim = csvOpts.Delim } - rd, fileMatchesSchema, err := csv.OpenCSVReader(root.VRW().Format(), dl.Path, fs, csv.NewCSVInfo().SetDelim(delim)) + var outSch schema.Schema = nil + sch, tableExists, err := GetOutSchema(csvOpts.TableName, root) + if tableExists { + outSch = sch + } + rd, fileMatchesSchema, err := csv.OpenCSVReader(root.VRW().Format(), dl.Path, fs, csv.NewCSVInfo().SetDelim(delim), outSch) return rd, false, fileMatchesSchema, err case PsvFile: - rd, fileMatchesSchema, err := csv.OpenCSVReader(root.VRW().Format(), dl.Path, fs, csv.NewCSVInfo().SetDelim("|")) + var outSch schema.Schema = nil + csvOpts, _ := opts.(CsvOptions) + sch, tableExists, err := GetOutSchema(csvOpts.TableName, root) + if tableExists { + outSch = sch + } + rd, fileMatchesSchema, err := csv.OpenCSVReader(root.VRW().Format(), dl.Path, fs, csv.NewCSVInfo().SetDelim("|"), outSch) return rd, false, fileMatchesSchema, err case XlsxFile: diff --git a/go/libraries/doltcore/mvdata/stream_data_loc.go b/go/libraries/doltcore/mvdata/stream_data_loc.go index c571d559ef..73b5b7ebe9 100644 --- a/go/libraries/doltcore/mvdata/stream_data_loc.go +++ b/go/libraries/doltcore/mvdata/stream_data_loc.go @@ -60,12 +60,12 @@ func (dl StreamDataLocation) NewReader(ctx context.Context, root *doltdb.RootVal } } - rd, err := csv.NewCSVReader(root.VRW().Format(), ioutil.NopCloser(dl.Reader), csv.NewCSVInfo().SetDelim(delim)) + rd, _, err := csv.NewCSVReader(root.VRW().Format(), ioutil.NopCloser(dl.Reader), csv.NewCSVInfo().SetDelim(delim), nil) return rd, false, false, err case PsvFile: - rd, err := csv.NewCSVReader(root.VRW().Format(), ioutil.NopCloser(dl.Reader), csv.NewCSVInfo().SetDelim("|")) + rd, _, err := csv.NewCSVReader(root.VRW().Format(), ioutil.NopCloser(dl.Reader), csv.NewCSVInfo().SetDelim("|"), nil) return rd, false, false, err } diff --git a/go/libraries/doltcore/mvdata/table_data_loc.go b/go/libraries/doltcore/mvdata/table_data_loc.go index 6e9bf7fcbf..faee15a540 100644 --- a/go/libraries/doltcore/mvdata/table_data_loc.go +++ b/go/libraries/doltcore/mvdata/table_data_loc.go @@ -127,9 +127,6 @@ func (dl TableDataLocation) NewReplacingWriter(ctx context.Context, mvOpts *Move return nil, ErrNoPK } - // if srcIsSorted { - // return noms.NewNomsMapCreator(ctx, root.VRW(), outSch), nil - // } else { m, err := types.NewMap(ctx, root.VRW()) if err != nil { @@ -137,5 +134,4 @@ func (dl TableDataLocation) NewReplacingWriter(ctx context.Context, mvOpts *Move } return noms.NewNomsMapUpdater(ctx, root.VRW(), m, outSch, statsCB), nil - // } } diff --git a/go/libraries/doltcore/table/pipeline/transform_test.go b/go/libraries/doltcore/table/pipeline/transform_test.go index 9cdd61e174..e57319a4a7 100644 --- a/go/libraries/doltcore/table/pipeline/transform_test.go +++ b/go/libraries/doltcore/table/pipeline/transform_test.go @@ -85,7 +85,7 @@ func TestPipeline(t *testing.T) { func() { csvInfo := &csv.CSVFileInfo{Delim: ",", HasHeaderLine: true, Columns: nil, EscapeQuotes: true} - rd, _ := csv.NewCSVReader(types.Format_7_18, ioutil.NopCloser(buf), csvInfo) + rd, _, _ := csv.NewCSVReader(types.Format_7_18, ioutil.NopCloser(buf), csvInfo, nil) wr, _ := csv.NewCSVWriter(iohelp.NopWrCloser(outBuf), schOut, csvInfo) tc := NewTransformCollection( @@ -123,7 +123,7 @@ func TestAddingStages(t *testing.T) { func() { csvInfo := &csv.CSVFileInfo{Delim: ",", HasHeaderLine: true, Columns: nil, EscapeQuotes: true} - rd, _ := csv.NewCSVReader(types.Format_7_18, ioutil.NopCloser(buf), csvInfo) + rd, _, _ := csv.NewCSVReader(types.Format_7_18, ioutil.NopCloser(buf), csvInfo, nil) wr, _ := csv.NewCSVWriter(iohelp.NopWrCloser(outBuf), schOut, csvInfo) tc := NewTransformCollection( @@ -193,7 +193,7 @@ Don,Beddoe,Bewitched (episode Humbug Not to Be Spoken Here - Season 4),1967,true func() { csvInfo := &csv.CSVFileInfo{Delim: ",", HasHeaderLine: true, Columns: nil, EscapeQuotes: true} - rd, _ := csv.NewCSVReader(types.Format_7_18, ioutil.NopCloser(buf), csvInfo) + rd, _, _ := csv.NewCSVReader(types.Format_7_18, ioutil.NopCloser(buf), csvInfo, nil) wr, _ := csv.NewCSVWriter(iohelp.NopWrCloser(outBuf), schOut, csvInfo) addedStages := []NamedTransform{ @@ -281,7 +281,7 @@ func TestAbort(t *testing.T) { func() { csvInfo := &csv.CSVFileInfo{Delim: ",", HasHeaderLine: true, Columns: nil, EscapeQuotes: true} - rd, _ := csv.NewCSVReader(types.Format_7_18, ioutil.NopCloser(buf), csvInfo) + rd, _, _ := csv.NewCSVReader(types.Format_7_18, ioutil.NopCloser(buf), csvInfo, nil) wr, _ := csv.NewCSVWriter(iohelp.NopWrCloser(outBuf), schOut, csvInfo) var wg = sync.WaitGroup{} diff --git a/go/libraries/doltcore/table/typed/json/reader.go b/go/libraries/doltcore/table/typed/json/reader.go index 719cd7afbf..6c85f01f76 100644 --- a/go/libraries/doltcore/table/typed/json/reader.go +++ b/go/libraries/doltcore/table/typed/json/reader.go @@ -79,7 +79,6 @@ func NewJSONReader(nbf *types.NomsBinFormat, r io.ReadCloser, info *JSONFileInfo fileMatchesSchema := true decodedRows, err := jsonRows.decodeJSONRows(nbf, sch) - // decodeJSONRows returns err if row doesn't match schema if err != nil { fileMatchesSchema = false } diff --git a/go/libraries/doltcore/table/untyped/csv/reader.go b/go/libraries/doltcore/table/untyped/csv/reader.go index 7093ae39b2..a59d1c8d78 100644 --- a/go/libraries/doltcore/table/untyped/csv/reader.go +++ b/go/libraries/doltcore/table/untyped/csv/reader.go @@ -47,29 +47,37 @@ type CSVReader struct { // OpenCSVReader opens a reader at a given path within a given filesys. The CSVFileInfo should describe the csv file // being opened. -func OpenCSVReader(nbf *types.NomsBinFormat, path string, fs filesys.ReadableFS, info *CSVFileInfo) (*CSVReader, error) { +func OpenCSVReader(nbf *types.NomsBinFormat, path string, fs filesys.ReadableFS, info *CSVFileInfo, outSch schema.Schema) (*CSVReader, bool, error) { r, err := fs.OpenForRead(path) if err != nil { - return nil, err + return nil, false, err } - return NewCSVReader(nbf, r, info) + return NewCSVReader(nbf, r, info, outSch) } // NewCSVReader creates a CSVReader from a given ReadCloser. The CSVFileInfo should describe the csv file being read. -func NewCSVReader(nbf *types.NomsBinFormat, r io.ReadCloser, info *CSVFileInfo) (*CSVReader, error) { +func NewCSVReader(nbf *types.NomsBinFormat, r io.ReadCloser, info *CSVFileInfo, outSch schema.Schema) (*CSVReader, bool, error) { br := bufio.NewReaderSize(r, ReadBufSize) colStrs, err := getColHeaders(br, info) if err != nil { r.Close() - return nil, err + return nil, false, err } _, sch := untyped.NewUntypedSchema(colStrs...) - return &CSVReader{r, br, info, sch, false, nbf}, nil + fileMatchesSchema := true + if outSch != nil { + fileMatchesSchema, err = SchemasMatch(sch, outSch) + if err != nil { + return nil, false, nil + } + } + + return &CSVReader{r, br, info, sch, false, nbf}, fileMatchesSchema, nil } func getColHeaders(br *bufio.Reader, info *CSVFileInfo) ([]string, error) { @@ -171,3 +179,37 @@ func (csvr *CSVReader) parseRow(line string) (row.Row, error) { return row.New(csvr.nbf, sch, taggedVals) } + +func SchemasMatch(sch1, sch2 schema.Schema) (bool, error) { + inSch := sch1.GetAllCols() + outSch := sch2.GetAllCols() + + if inSch.Size() != outSch.Size() { + return false, nil + } + + match := true + err := outSch.Iter(func(tag uint64, outCol schema.Column) (stop bool, err error) { + inCol, ok := inSch.GetByTag(tag) + + if !ok || !ColumnsMatch(inCol, outCol) { + match = false + return true, nil + } + + return false, nil + }) + + if err != nil { + return false, err + } + + return match, nil +} + +func ColumnsMatch(inCol, outCol schema.Column) bool { + if inCol.Name != outCol.Name || inCol.Tag != outCol.Tag { + return false + } + return true +} diff --git a/go/libraries/doltcore/table/untyped/csv/reader_test.go b/go/libraries/doltcore/table/untyped/csv/reader_test.go index f09e8f98ec..916082de2e 100644 --- a/go/libraries/doltcore/table/untyped/csv/reader_test.go +++ b/go/libraries/doltcore/table/untyped/csv/reader_test.go @@ -132,7 +132,7 @@ func readTestRows(t *testing.T, inputStr string, info *CSVFileInfo) ([]row.Row, const path = "/file.csv" fs := filesys.NewInMemFS(nil, map[string][]byte{path: []byte(inputStr)}, root) - csvR, err := OpenCSVReader(types.Format_7_18, path, fs, info) + csvR, _, err := OpenCSVReader(types.Format_7_18, path, fs, info, nil) defer csvR.Close(context.Background()) if err != nil { From b1a25ebf27b888611e044c7f578119dba7f4de88 Mon Sep 17 00:00:00 2001 From: Taylor Bantle Date: Thu, 19 Sep 2019 17:25:51 -0700 Subject: [PATCH 08/11] bats: Add more replace-table bats tests --- bats/replace-tables.bats | 51 ++++++++++++++++++++++++++++++++++------ 1 file changed, 44 insertions(+), 7 deletions(-) diff --git a/bats/replace-tables.bats b/bats/replace-tables.bats index 7d5181f3a6..cd4271108c 100644 --- a/bats/replace-tables.bats +++ b/bats/replace-tables.bats @@ -29,7 +29,25 @@ teardown() { run dolt table import -r test `batshelper 2pk5col-ints.csv` [ "$status" -eq 1 ] [[ "$output" =~ "Error replacing table" ]] || false - [[ "$output" =~ "cause: Schema from file does not match existing table schema" ]] || false + [[ "$output" =~ "cause: Schema from file does not match schema from existing table." ]] || false +} + +@test "replace table using psv" { + run dolt table create -s `batshelper 1pk5col-ints.schema` test + [ "$status" -eq 0 ] + run dolt table import -r test `batshelper 1pk5col-ints.psv` + [ "$status" -eq 0 ] + [[ "$output" =~ "Rows Processed: 2, Additions: 2, Modifications: 0, Had No Effect: 0" ]] || false + [[ "$output" =~ "Import completed successfully." ]] || false +} + +@test "replace table using psv with wrong schema" { + run dolt table create -s `batshelper 2pk5col-ints.schema` test + [ "$status" -eq 0 ] + run dolt table import -r test `batshelper 1pk5col-ints.psv` + [ "$status" -eq 1 ] + [[ "$output" =~ "Error replacing table" ]] || false + [[ "$output" =~ "cause: Schema from file does not match schema from existing table." ]] || false } @test "replace table using schema with csv" { @@ -49,13 +67,13 @@ teardown() { [[ "$output" =~ "Import completed successfully." ]] || false } -@test "replace table using wrong json" { +@test "replace table using json with wrong schema" { run dolt table create -s `batshelper employees-sch-wrong.json` employees [ "$status" -eq 0 ] run dolt table import -r employees `batshelper employees-tbl.json` [ "$status" -eq 1 ] [[ "$output" =~ "Error replacing table" ]] || false - [[ "$output" =~ "cause: Schema from file does not match existing table schema" ]] || false + [[ "$output" =~ "cause: Schema from file does not match schema from existing table." ]] || false } @test "replace table using schema with json" { @@ -72,7 +90,7 @@ teardown() { [[ "$output" =~ "The following table could not be found:" ]] || false } -@test "replace table with existing imported data" { +@test "replace table with existing data using json" { run dolt table import -c -s `batshelper employees-sch.json` employees `batshelper employees-tbl.json` [ "$status" -eq 0 ] [[ "$output" =~ "Import completed successfully." ]] || false @@ -82,14 +100,23 @@ teardown() { [[ "$output" =~ "Import completed successfully." ]] || false } -@test "replace table with existing imported data with different schema" { +@test "replace table with existing data with different schema" { run dolt table import -c -s `batshelper employees-sch.json` employees `batshelper employees-tbl.json` [ "$status" -eq 0 ] [[ "$output" =~ "Import completed successfully." ]] || false run dolt table import -r employees `batshelper employees-tbl-schema-wrong.json` [ "$status" -eq 1 ] [[ "$output" =~ "Error replacing table" ]] || false - [[ "$output" =~ "cause: Schema from file does not match existing table schema" ]] || false + [[ "$output" =~ "cause: Schema from file does not match schema from existing table." ]] || false +} + +@test "replace table with bad json" { + run dolt table create -s `batshelper employees-sch.json` employees + [ "$status" -eq 0 ] + run dolt table import -r employees `batshelper employees-tbl-bad.json` + [ "$status" -eq 1 ] + [[ "$output" =~ "Error creating reader" ]] || false + [[ "$output" =~ "employees-tbl-bad.json to" ]] || false } @test "replace table using xlsx file" { @@ -107,5 +134,15 @@ teardown() { run dolt table import -r employees `batshelper employees.xlsx` [ "$status" -eq 1 ] [[ "$output" =~ "Error replacing table" ]] || false - [[ "$output" =~ "cause: Schema from file does not match existing table schema" ]] || false + [[ "$output" =~ "cause: Schema from file does not match schema from existing table." ]] || false +} + +@test "replace table with 2 primary keys with a csv with 1 primary key" { + run dolt table import -c --pk=pk1,pk2 test `batshelper 2pk5col-ints.csv` + [ "$status" -eq 0 ] + [[ "$output" =~ "Import completed successfully." ]] || false + run dolt table import -r test `batshelper 1pk5col-ints.csv` + [ "$status" -eq 1 ] + [[ "$output" =~ "Error replacing table" ]] || false + [[ "$output" =~ "cause: Schema from file does not match schema from existing table." ]] || false } \ No newline at end of file From 10aacb127386e2bb16255b03c187f37002e99378 Mon Sep 17 00:00:00 2001 From: Taylor Bantle Date: Thu, 19 Sep 2019 18:06:19 -0700 Subject: [PATCH 09/11] Some cleanup and rewording --- bats/helper/employees-tbl-schema-wrong.json | 12 ++++------ bats/replace-tables.bats | 12 +++++++++- go/cmd/dolt/commands/tblcmds/import.go | 22 +++++++++---------- go/libraries/doltcore/mvdata/data_mover.go | 8 +++---- go/libraries/doltcore/mvdata/file_data_loc.go | 4 ++-- .../doltcore/mvdata/stream_data_loc.go | 4 ++-- .../doltcore/mvdata/table_data_loc.go | 14 ++++++++---- 7 files changed, 43 insertions(+), 33 deletions(-) diff --git a/bats/helper/employees-tbl-schema-wrong.json b/bats/helper/employees-tbl-schema-wrong.json index 665fa446e2..ca4aab996e 100644 --- a/bats/helper/employees-tbl-schema-wrong.json +++ b/bats/helper/employees-tbl-schema-wrong.json @@ -4,29 +4,25 @@ "id": "0", "first name": "tim", "last name": "sehn", - "position": "ceo", - "children": true + "position": "ceo" }, { "id": "1", "first name": "aaron", "last name": "son", - "position": "founder", - "children": true + "position": "founder" }, { "id": "2", "first name": "brian", "last name": "hendricks", - "position": "founder", - "children": true + "position": "founder" }, { "id": "3", "first name": "matt", "last name": "jesuele", - "position": "software engineer", - "children": false + "position": "software engineer" } ] } diff --git a/bats/replace-tables.bats b/bats/replace-tables.bats index cd4271108c..4ecc7b0248 100644 --- a/bats/replace-tables.bats +++ b/bats/replace-tables.bats @@ -137,7 +137,7 @@ teardown() { [[ "$output" =~ "cause: Schema from file does not match schema from existing table." ]] || false } -@test "replace table with 2 primary keys with a csv with 1 primary key" { +@test "replace table with 2 primary keys with a csv with one primary key" { run dolt table import -c --pk=pk1,pk2 test `batshelper 2pk5col-ints.csv` [ "$status" -eq 0 ] [[ "$output" =~ "Import completed successfully." ]] || false @@ -145,4 +145,14 @@ teardown() { [ "$status" -eq 1 ] [[ "$output" =~ "Error replacing table" ]] || false [[ "$output" =~ "cause: Schema from file does not match schema from existing table." ]] || false +} + +@test "replace table with 2 primary keys with a csv with 2 primary keys" { + run dolt table import -c --pk=pk1,pk2 test `batshelper 2pk5col-ints.csv` + [ "$status" -eq 0 ] + [[ "$output" =~ "Import completed successfully." ]] || false + run dolt table import -r test `batshelper 2pk5col-ints.csv` + [ "$status" -eq 0 ] + [[ "$output" =~ "Rows Processed: 4, Additions: 4, Modifications: 0, Had No Effect: 0" ]] || false + [[ "$output" =~ "Import completed successfully." ]] || false } \ No newline at end of file diff --git a/go/cmd/dolt/commands/tblcmds/import.go b/go/cmd/dolt/commands/tblcmds/import.go index 31c65c64ef..7186fcc004 100644 --- a/go/cmd/dolt/commands/tblcmds/import.go +++ b/go/cmd/dolt/commands/tblcmds/import.go @@ -94,20 +94,20 @@ schema will be used, and field names will be used to match file fields with tabl During import, if there is an error importing any row, the import will be aborted by default. Use the --continue flag to continue importing when an error is encountered. +If --replace-table | -r is given the operation will replace
with the contents of the file. The table's +existing schema will be used, and field names will be used to match file fields with table fields unless a mapping file is +specified. + +If the schema for the existing table does not match the schema for the new file, the import will be aborted by default. To +overwrite both the table and the schema, use -c -f. + A mapping file can be used to map fields between the file being imported and the table being written to. This can -be used when creating a new table, or updating an existing table. +be used when creating a new table, or updating or replacing an existing table. ` + mappingFileHelp + - `If --replace-table | -r is given the operation will replace
with the contents of the file. The table's - existing schema will be used, and field names will be used to match file fields with table fields unless a mapping file is - specified. - - If the schema for the existing table does not match the schema for the new file, the import will be aborted by default. To - overwrite both the table and the schema, use -c -f.` + - ` -In both create and update scenarios the file's extension is used to infer the type of the file. If a file does not +In create, update, and replace scenarios the file's extension is used to infer the type of the file. If a file does not have the expected extension then the --file-type parameter should be used to explicitly define the format of the file in one of the supported formats (csv, psv, nbf, json, xlsx). For files separated by a delimiter other than a ',' (type csv) or a '|' (type psv), the --delim parameter can be used to specify a delimeter` @@ -266,7 +266,7 @@ func createArgParser() *argparser.ArgParser { ap.SupportsFlag(createParam, "c", "Create a new table, or overwrite an existing table (with the -f flag) from the imported data.") ap.SupportsFlag(updateParam, "u", "Update an existing table with the imported data.") ap.SupportsFlag(forceParam, "f", "If a create operation is being executed, data already exists in the destination, the Force flag will allow the target to be overwritten.") - ap.SupportsFlag(replaceParam, "r", "Replace existing table with imported data.") + ap.SupportsFlag(replaceParam, "r", "Replace existing table with imported data while preserving the original schema.") ap.SupportsFlag(contOnErrParam, "", "Continue importing when row import errors are encountered.") ap.SupportsString(outSchemaParam, "s", "schema_file", "The schema for the output data.") ap.SupportsString(mappingFileParam, "m", "mapping_file", "A file that lays out how fields should be mapped from input data to output data.") @@ -390,7 +390,7 @@ func newDataMoverErrToVerr(mvOpts *mvdata.MoveOptions, err *mvdata.DataMoverCrea case mvdata.ReplacingErr: bdr := errhand.BuildDError("Error replacing table") - bdr.AddDetails("When adding to replace data with %s, could not validate schema.", mvOpts.Src.String()) + bdr.AddDetails("When attempting to replace data with %s, could not validate schema.", mvOpts.Src.String()) return bdr.AddCause(err.Cause).Build() case mvdata.CreateMapperErr: diff --git a/go/libraries/doltcore/mvdata/data_mover.go b/go/libraries/doltcore/mvdata/data_mover.go index a27f98bca8..fb3dcdb54f 100644 --- a/go/libraries/doltcore/mvdata/data_mover.go +++ b/go/libraries/doltcore/mvdata/data_mover.go @@ -121,11 +121,9 @@ func NewDataMover(ctx context.Context, root *doltdb.RootValue, fs filesys.Filesy return nil, &DataMoverCreationError{SchemaErr, err} } - if mvOpts.Operation == ReplaceOp && mvOpts.MappingFile == "" { - if !fileMatchesSchema { - err := errors.New("Schema from file does not match existing table schema.") - return nil, &DataMoverCreationError{ReplacingErr, err} - } + if mvOpts.Operation == ReplaceOp && mvOpts.MappingFile == "" && !fileMatchesSchema { + err := errors.New("Schema from file does not match schema from existing table.") + return nil, &DataMoverCreationError{ReplacingErr, err} } var mapping *rowconv.FieldMapping diff --git a/go/libraries/doltcore/mvdata/file_data_loc.go b/go/libraries/doltcore/mvdata/file_data_loc.go index 2a0f2cb987..0ab3fa1a81 100644 --- a/go/libraries/doltcore/mvdata/file_data_loc.go +++ b/go/libraries/doltcore/mvdata/file_data_loc.go @@ -160,8 +160,8 @@ func (dl FileDataLocation) NewUpdatingWriter(ctx context.Context, mvOpts *MoveOp panic("Updating of files is not supported") } -// NewReplacingWriter will create a TableWriteCloser for a DataLocation that will overwrite an existing table using -// the same schema +// NewReplacingWriter will create a TableWriteCloser for a DataLocation that will overwrite an existing table while +// preserving schema func (dl FileDataLocation) NewReplacingWriter(ctx context.Context, mvOpts *MoveOptions, root *doltdb.RootValue, fs filesys.WritableFS, srcIsSorted bool, outSch schema.Schema, statsCB noms.StatsCB) (table.TableWriteCloser, error) { panic("Replacing files is not supported") } diff --git a/go/libraries/doltcore/mvdata/stream_data_loc.go b/go/libraries/doltcore/mvdata/stream_data_loc.go index 73b5b7ebe9..b7340ace25 100644 --- a/go/libraries/doltcore/mvdata/stream_data_loc.go +++ b/go/libraries/doltcore/mvdata/stream_data_loc.go @@ -92,8 +92,8 @@ func (dl StreamDataLocation) NewUpdatingWriter(ctx context.Context, mvOpts *Move panic("Updating is not supported for stdout") } -// NewReplacingWriter will create a TableWriteCloser for a DataLocation that will overwrite an existing table using -// the same schema +// NewReplacingWriter will create a TableWriteCloser for a DataLocation that will overwrite an existing table while +// preserving schema func (dl StreamDataLocation) NewReplacingWriter(ctx context.Context, mvOpts *MoveOptions, root *doltdb.RootValue, fs filesys.WritableFS, srcIsSorted bool, outSch schema.Schema, statsCB noms.StatsCB) (table.TableWriteCloser, error) { panic("Replacing is not supported for stdout") } diff --git a/go/libraries/doltcore/mvdata/table_data_loc.go b/go/libraries/doltcore/mvdata/table_data_loc.go index faee15a540..7651a02ba5 100644 --- a/go/libraries/doltcore/mvdata/table_data_loc.go +++ b/go/libraries/doltcore/mvdata/table_data_loc.go @@ -120,11 +120,17 @@ func (dl TableDataLocation) NewUpdatingWriter(ctx context.Context, mvOpts *MoveO return noms.NewNomsMapUpdater(ctx, root.VRW(), m, outSch, statsCB), nil } -// NewReplacingWriter will create a TableWriteCloser for a DataLocation that will overwrite an existing table using -// the same schema +// NewReplacingWriter will create a TableWriteCloser for a DataLocation that will overwrite an existing table while +// preserving schema func (dl TableDataLocation) NewReplacingWriter(ctx context.Context, mvOpts *MoveOptions, root *doltdb.RootValue, fs filesys.WritableFS, srcIsSorted bool, outSch schema.Schema, statsCB noms.StatsCB) (table.TableWriteCloser, error) { - if outSch.GetPKCols().Size() == 0 { - return nil, ErrNoPK + _, ok, err := root.GetTable(ctx, dl.Name) + + if err != nil { + return nil, err + } + + if !ok { + return nil, errors.New("Could not find table " + dl.Name) } m, err := types.NewMap(ctx, root.VRW()) From 93dd3680757856e2c247a017c28a193637b6fa7d Mon Sep 17 00:00:00 2001 From: Taylor Bantle Date: Fri, 20 Sep 2019 15:44:14 -0700 Subject: [PATCH 10/11] go/{cmd, libraries}: Make schema verification a method of TableReader --- go/cmd/dolt/commands/tblcmds/import.go | 2 - go/libraries/doltcore/mvdata/data_loc.go | 2 +- go/libraries/doltcore/mvdata/data_loc_test.go | 2 +- go/libraries/doltcore/mvdata/data_mover.go | 19 +++-- go/libraries/doltcore/mvdata/file_data_loc.go | 79 +++++++------------ .../doltcore/mvdata/stream_data_loc.go | 12 +-- .../doltcore/mvdata/table_data_loc.go | 14 ++-- go/libraries/doltcore/schema/schema.go | 29 +++++++ go/libraries/doltcore/table/inmem_table.go | 4 + go/libraries/doltcore/table/io.go | 3 + .../doltcore/table/pipeline/transform_test.go | 8 +- .../doltcore/table/typed/json/file_info.go | 10 ++- .../doltcore/table/typed/json/reader.go | 26 +++--- .../doltcore/table/typed/noms/reader.go | 4 + .../doltcore/table/untyped/csv/reader.go | 59 +++----------- .../doltcore/table/untyped/csv/reader_test.go | 2 +- .../doltcore/table/untyped/xlsx/reader.go | 35 ++++---- 17 files changed, 150 insertions(+), 160 deletions(-) diff --git a/go/cmd/dolt/commands/tblcmds/import.go b/go/cmd/dolt/commands/tblcmds/import.go index 7186fcc004..7ffa4f6d73 100644 --- a/go/cmd/dolt/commands/tblcmds/import.go +++ b/go/cmd/dolt/commands/tblcmds/import.go @@ -190,8 +190,6 @@ func validateImportArgs(apr *argparser.ArgParseResults, usage cli.UsagePrinter) srcOpts = mvdata.XlsxOptions{SheetName: tableName} } else if val.Format == mvdata.JsonFile { srcOpts = mvdata.JSONOptions{TableName: tableName} - } else if val.Format == mvdata.CsvFile || val.Format == mvdata.PsvFile { - srcOpts = mvdata.CsvOptions{TableName: tableName} } case mvdata.StreamDataLocation: diff --git a/go/libraries/doltcore/mvdata/data_loc.go b/go/libraries/doltcore/mvdata/data_loc.go index f685dd6d73..ce2bfcb88d 100644 --- a/go/libraries/doltcore/mvdata/data_loc.go +++ b/go/libraries/doltcore/mvdata/data_loc.go @@ -91,7 +91,7 @@ type DataLocation interface { Exists(ctx context.Context, root *doltdb.RootValue, fs filesys.ReadableFS) (bool, error) // NewReader creates a TableReadCloser for the DataLocation - NewReader(ctx context.Context, root *doltdb.RootValue, fs filesys.ReadableFS, schPath string, opts interface{}) (rdCl table.TableReadCloser, sorted bool, fileMatchesSchema bool, err error) + NewReader(ctx context.Context, root *doltdb.RootValue, fs filesys.ReadableFS, schPath string, opts interface{}) (rdCl table.TableReadCloser, sorted bool, err error) // NewCreatingWriter will create a TableWriteCloser for a DataLocation that will create a new table, or overwrite // an existing table. diff --git a/go/libraries/doltcore/mvdata/data_loc_test.go b/go/libraries/doltcore/mvdata/data_loc_test.go index 30fb4a9998..c1f3ed89ba 100644 --- a/go/libraries/doltcore/mvdata/data_loc_test.go +++ b/go/libraries/doltcore/mvdata/data_loc_test.go @@ -239,7 +239,7 @@ func TestCreateRdWr(t *testing.T) { } // TODO (oo): fix this for json path test - rd, _, _, err := loc.NewReader(context.Background(), root, fs, "schema.json", nil) + rd, _, err := loc.NewReader(context.Background(), root, fs, "schema.json", nil) if err != nil { t.Fatal("Unexpected error creating writer", err) diff --git a/go/libraries/doltcore/mvdata/data_mover.go b/go/libraries/doltcore/mvdata/data_mover.go index fb3dcdb54f..ff17508efc 100644 --- a/go/libraries/doltcore/mvdata/data_mover.go +++ b/go/libraries/doltcore/mvdata/data_mover.go @@ -42,8 +42,7 @@ const ( ) type CsvOptions struct { - Delim string - TableName string + Delim string } type XlsxOptions struct { @@ -100,7 +99,7 @@ func NewDataMover(ctx context.Context, root *doltdb.RootValue, fs filesys.Filesy var err error transforms := pipeline.NewTransformCollection() - rd, srcIsSorted, fileMatchesSchema, err := mvOpts.Src.NewReader(ctx, root, fs, mvOpts.SchFile, mvOpts.SrcOptions) + rd, srcIsSorted, err := mvOpts.Src.NewReader(ctx, root, fs, mvOpts.SchFile, mvOpts.SrcOptions) if err != nil { return nil, &DataMoverCreationError{CreateReaderErr, err} @@ -121,9 +120,15 @@ func NewDataMover(ctx context.Context, root *doltdb.RootValue, fs filesys.Filesy return nil, &DataMoverCreationError{SchemaErr, err} } - if mvOpts.Operation == ReplaceOp && mvOpts.MappingFile == "" && !fileMatchesSchema { - err := errors.New("Schema from file does not match schema from existing table.") - return nil, &DataMoverCreationError{ReplacingErr, err} + if mvOpts.Operation == ReplaceOp && mvOpts.MappingFile == "" { + fileMatchesSchema, err := rd.VerifySchema(outSch) + if err != nil { + return nil, &DataMoverCreationError{ReplacingErr, err} + } + if !fileMatchesSchema { + err := errors.New("Schema from file does not match schema from existing table.") + return nil, &DataMoverCreationError{ReplacingErr, err} + } } var mapping *rowconv.FieldMapping @@ -213,7 +218,7 @@ func getOutSchema(ctx context.Context, inSch schema.Schema, root *doltdb.RootVal if mvOpts.Operation == UpdateOp || mvOpts.Operation == ReplaceOp { // Get schema from target - rd, _, _, err := mvOpts.Dest.NewReader(ctx, root, fs, mvOpts.SchFile, mvOpts.SrcOptions) + rd, _, err := mvOpts.Dest.NewReader(ctx, root, fs, mvOpts.SchFile, mvOpts.SrcOptions) if err != nil { return nil, err diff --git a/go/libraries/doltcore/mvdata/file_data_loc.go b/go/libraries/doltcore/mvdata/file_data_loc.go index 0ab3fa1a81..ab6c3f9fba 100644 --- a/go/libraries/doltcore/mvdata/file_data_loc.go +++ b/go/libraries/doltcore/mvdata/file_data_loc.go @@ -71,68 +71,64 @@ func (dl FileDataLocation) Exists(ctx context.Context, root *doltdb.RootValue, f } // NewReader creates a TableReadCloser for the DataLocation -func (dl FileDataLocation) NewReader(ctx context.Context, root *doltdb.RootValue, fs filesys.ReadableFS, schPath string, opts interface{}) (rdCl table.TableReadCloser, sorted bool, fileMatchesSchema bool, err error) { +func (dl FileDataLocation) NewReader(ctx context.Context, root *doltdb.RootValue, fs filesys.ReadableFS, schPath string, opts interface{}) (rdCl table.TableReadCloser, sorted bool, err error) { exists, isDir := fs.Exists(dl.Path) if !exists { - return nil, false, false, os.ErrNotExist + return nil, false, os.ErrNotExist } else if isDir { - return nil, false, false, filesys.ErrIsDir + return nil, false, filesys.ErrIsDir } switch dl.Format { case CsvFile: delim := "," - csvOpts, _ := opts.(CsvOptions) - if len(csvOpts.Delim) != 0 { - delim = csvOpts.Delim + + if opts != nil { + csvOpts, _ := opts.(CsvOptions) + + if len(csvOpts.Delim) != 0 { + delim = csvOpts.Delim + } } - var outSch schema.Schema = nil - sch, tableExists, err := GetOutSchema(csvOpts.TableName, root) - if tableExists { - outSch = sch - } - rd, fileMatchesSchema, err := csv.OpenCSVReader(root.VRW().Format(), dl.Path, fs, csv.NewCSVInfo().SetDelim(delim), outSch) - return rd, false, fileMatchesSchema, err + rd, err := csv.OpenCSVReader(root.VRW().Format(), dl.Path, fs, csv.NewCSVInfo().SetDelim(delim)) + + return rd, false, err case PsvFile: - var outSch schema.Schema = nil - csvOpts, _ := opts.(CsvOptions) - sch, tableExists, err := GetOutSchema(csvOpts.TableName, root) - if tableExists { - outSch = sch - } - rd, fileMatchesSchema, err := csv.OpenCSVReader(root.VRW().Format(), dl.Path, fs, csv.NewCSVInfo().SetDelim("|"), outSch) - return rd, false, fileMatchesSchema, err + rd, err := csv.OpenCSVReader(root.VRW().Format(), dl.Path, fs, csv.NewCSVInfo().SetDelim("|")) + return rd, false, err case XlsxFile: - var outSch schema.Schema = nil xlsxOpts := opts.(XlsxOptions) - sch, tableExists, err := GetOutSchema(xlsxOpts.SheetName, root) - if tableExists { - outSch = sch - } - rd, fileMatchesSchema, err := xlsx.OpenXLSXReader(root.VRW().Format(), dl.Path, fs, &xlsx.XLSXFileInfo{SheetName: xlsxOpts.SheetName}, outSch) - return rd, false, fileMatchesSchema, err + rd, err := xlsx.OpenXLSXReader(root.VRW().Format(), dl.Path, fs, &xlsx.XLSXFileInfo{SheetName: xlsxOpts.SheetName}) + return rd, false, err case JsonFile: var sch schema.Schema = nil if schPath == "" { if opts == nil { - return nil, false, false, errors.New("Unable to determine table name on JSON import") + return nil, false, errors.New("Unable to determine table name on JSON import") } jsonOpts, _ := opts.(JSONOptions) - sch, _, err = GetOutSchema(jsonOpts.TableName, root) + table, exists, err := root.GetTable(context.TODO(), jsonOpts.TableName) + if !exists { + return nil, false, errors.New(fmt.Sprintf("The following table could not be found:\n%v", jsonOpts.TableName)) + } if err != nil { - return nil, false, false, err + return nil, false, errors.New(fmt.Sprintf("An error occurred attempting to read the table:\n%v", err.Error())) + } + sch, err = table.GetSchema(context.TODO()) + if err != nil { + return nil, false, errors.New(fmt.Sprintf("An error occurred attempting to read the table schema:\n%v", err.Error())) } } - rd, fileMatchesSchema, err := json.OpenJSONReader(root.VRW().Format(), dl.Path, fs, json.NewJSONInfo(), sch, schPath) - return rd, false, fileMatchesSchema, err + rd, err := json.OpenJSONReader(root.VRW().Format(), dl.Path, fs, json.NewJSONInfo(), sch, schPath) + return rd, false, err } - return nil, false, false, errors.New("unsupported format") + return nil, false, errors.New("unsupported format") } // NewCreatingWriter will create a TableWriteCloser for a DataLocation that will create a new table, or overwrite @@ -165,18 +161,3 @@ func (dl FileDataLocation) NewUpdatingWriter(ctx context.Context, mvOpts *MoveOp func (dl FileDataLocation) NewReplacingWriter(ctx context.Context, mvOpts *MoveOptions, root *doltdb.RootValue, fs filesys.WritableFS, srcIsSorted bool, outSch schema.Schema, statsCB noms.StatsCB) (table.TableWriteCloser, error) { panic("Replacing files is not supported") } - -func GetOutSchema(tableName string, root *doltdb.RootValue) (schema.Schema, bool, error) { - table, exists, err := root.GetTable(context.TODO(), tableName) - if !exists { - return nil, exists, errors.New(fmt.Sprintf("The following table could not be found:\n%v", tableName)) - } - if err != nil { - return nil, exists, errors.New(fmt.Sprintf("An error occurred attempting to read the table:\n%v", err.Error())) - } - sch, err := table.GetSchema(context.TODO()) - if err != nil { - return nil, exists, errors.New(fmt.Sprintf("An error occurred attempting to read the table schema:\n%v", err.Error())) - } - return sch, exists, nil -} diff --git a/go/libraries/doltcore/mvdata/stream_data_loc.go b/go/libraries/doltcore/mvdata/stream_data_loc.go index b7340ace25..133016faca 100644 --- a/go/libraries/doltcore/mvdata/stream_data_loc.go +++ b/go/libraries/doltcore/mvdata/stream_data_loc.go @@ -47,7 +47,7 @@ func (dl StreamDataLocation) Exists(ctx context.Context, root *doltdb.RootValue, } // NewReader creates a TableReadCloser for the DataLocation -func (dl StreamDataLocation) NewReader(ctx context.Context, root *doltdb.RootValue, fs filesys.ReadableFS, schPath string, opts interface{}) (rdCl table.TableReadCloser, sorted bool, fileMatchesSchema bool, err error) { +func (dl StreamDataLocation) NewReader(ctx context.Context, root *doltdb.RootValue, fs filesys.ReadableFS, schPath string, opts interface{}) (rdCl table.TableReadCloser, sorted bool, err error) { switch dl.Format { case CsvFile: delim := "," @@ -60,16 +60,16 @@ func (dl StreamDataLocation) NewReader(ctx context.Context, root *doltdb.RootVal } } - rd, _, err := csv.NewCSVReader(root.VRW().Format(), ioutil.NopCloser(dl.Reader), csv.NewCSVInfo().SetDelim(delim), nil) + rd, err := csv.NewCSVReader(root.VRW().Format(), ioutil.NopCloser(dl.Reader), csv.NewCSVInfo().SetDelim(delim)) - return rd, false, false, err + return rd, false, err case PsvFile: - rd, _, err := csv.NewCSVReader(root.VRW().Format(), ioutil.NopCloser(dl.Reader), csv.NewCSVInfo().SetDelim("|"), nil) - return rd, false, false, err + rd, err := csv.NewCSVReader(root.VRW().Format(), ioutil.NopCloser(dl.Reader), csv.NewCSVInfo().SetDelim("|")) + return rd, false, err } - return nil, false, false, errors.New(string(dl.Format) + "is an unsupported format to read from stdin") + return nil, false, errors.New(string(dl.Format) + "is an unsupported format to read from stdin") } // NewCreatingWriter will create a TableWriteCloser for a DataLocation that will create a new table, or overwrite diff --git a/go/libraries/doltcore/mvdata/table_data_loc.go b/go/libraries/doltcore/mvdata/table_data_loc.go index 7651a02ba5..e620f6eb77 100644 --- a/go/libraries/doltcore/mvdata/table_data_loc.go +++ b/go/libraries/doltcore/mvdata/table_data_loc.go @@ -46,36 +46,36 @@ func (dl TableDataLocation) Exists(ctx context.Context, root *doltdb.RootValue, } // NewReader creates a TableReadCloser for the DataLocation -func (dl TableDataLocation) NewReader(ctx context.Context, root *doltdb.RootValue, fs filesys.ReadableFS, schPath string, opts interface{}) (rdCl table.TableReadCloser, sorted bool, fileMatchesSchema bool, err error) { +func (dl TableDataLocation) NewReader(ctx context.Context, root *doltdb.RootValue, fs filesys.ReadableFS, schPath string, opts interface{}) (rdCl table.TableReadCloser, sorted bool, err error) { tbl, ok, err := root.GetTable(ctx, dl.Name) if err != nil { - return nil, false, false, err + return nil, false, err } if !ok { - return nil, false, false, doltdb.ErrTableNotFound + return nil, false, doltdb.ErrTableNotFound } sch, err := tbl.GetSchema(ctx) if err != nil { - return nil, false, false, err + return nil, false, err } rowData, err := tbl.GetRowData(ctx) if err != nil { - return nil, false, false, err + return nil, false, err } rd, err := noms.NewNomsMapReader(ctx, rowData, sch) if err != nil { - return nil, false, false, err + return nil, false, err } - return rd, true, false, nil + return rd, true, nil } // NewCreatingWriter will create a TableWriteCloser for a DataLocation that will create a new table, or overwrite diff --git a/go/libraries/doltcore/schema/schema.go b/go/libraries/doltcore/schema/schema.go index af25311025..5889cf6219 100644 --- a/go/libraries/doltcore/schema/schema.go +++ b/go/libraries/doltcore/schema/schema.go @@ -84,6 +84,35 @@ func SchemasAreEqual(sch1, sch2 Schema) (bool, error) { return areEqual, nil } +// VerifyInSchema tests that the incoming schema matches the schema from the original table. +// The test for column equality is more flexible than SchemasAreEqual. +func VerifyInSchema(inSch, outSch Schema) (bool, error) { + inSchCols := inSch.GetAllCols() + outSchCols := outSch.GetAllCols() + + if inSchCols.Size() != outSchCols.Size() { + return false, nil + } + + match := true + err := outSchCols.Iter(func(tag uint64, outCol Column) (stop bool, err error) { + inCol, ok := inSchCols.GetByTag(tag) + + if !ok || inCol.Name != outCol.Name || inCol.Tag != outCol.Tag { + match = false + return true, nil + } + + return false, nil + }) + + if err != nil { + return false, err + } + + return match, nil +} + var randGen = rand.New(rand.NewSource(time.Now().UnixNano())) func AutoGenerateTag(sch Schema) uint64 { diff --git a/go/libraries/doltcore/table/inmem_table.go b/go/libraries/doltcore/table/inmem_table.go index 30f2d31e4d..0e4f2115f5 100644 --- a/go/libraries/doltcore/table/inmem_table.go +++ b/go/libraries/doltcore/table/inmem_table.go @@ -144,6 +144,10 @@ func (rd *InMemTableReader) GetSchema() schema.Schema { return rd.tt.sch } +func (rd *InMemTableReader) VerifySchema(outSch schema.Schema) (bool, error) { + return schema.VerifyInSchema(rd.tt.sch, outSch) +} + // InMemTableWriter is an implementation of a TableWriter for an InMemTable type InMemTableWriter struct { tt *InMemTable diff --git a/go/libraries/doltcore/table/io.go b/go/libraries/doltcore/table/io.go index c4233107ee..b7240fca0c 100644 --- a/go/libraries/doltcore/table/io.go +++ b/go/libraries/doltcore/table/io.go @@ -31,6 +31,9 @@ type TableReader interface { // ReadRow reads a row from a table. If there is a bad row the returned error will be non nil, and callin IsBadRow(err) // will be return true. This is a potentially non-fatal error and callers can decide if they want to continue on a bad row, or fail. ReadRow(ctx context.Context) (row.Row, error) + + // VerifySchema checks that the incoming schema matches the schema from the original table + VerifySchema(outSch schema.Schema) (bool, error) } // TableWriteCloser is an interface for writing rows to a table diff --git a/go/libraries/doltcore/table/pipeline/transform_test.go b/go/libraries/doltcore/table/pipeline/transform_test.go index e57319a4a7..9cdd61e174 100644 --- a/go/libraries/doltcore/table/pipeline/transform_test.go +++ b/go/libraries/doltcore/table/pipeline/transform_test.go @@ -85,7 +85,7 @@ func TestPipeline(t *testing.T) { func() { csvInfo := &csv.CSVFileInfo{Delim: ",", HasHeaderLine: true, Columns: nil, EscapeQuotes: true} - rd, _, _ := csv.NewCSVReader(types.Format_7_18, ioutil.NopCloser(buf), csvInfo, nil) + rd, _ := csv.NewCSVReader(types.Format_7_18, ioutil.NopCloser(buf), csvInfo) wr, _ := csv.NewCSVWriter(iohelp.NopWrCloser(outBuf), schOut, csvInfo) tc := NewTransformCollection( @@ -123,7 +123,7 @@ func TestAddingStages(t *testing.T) { func() { csvInfo := &csv.CSVFileInfo{Delim: ",", HasHeaderLine: true, Columns: nil, EscapeQuotes: true} - rd, _, _ := csv.NewCSVReader(types.Format_7_18, ioutil.NopCloser(buf), csvInfo, nil) + rd, _ := csv.NewCSVReader(types.Format_7_18, ioutil.NopCloser(buf), csvInfo) wr, _ := csv.NewCSVWriter(iohelp.NopWrCloser(outBuf), schOut, csvInfo) tc := NewTransformCollection( @@ -193,7 +193,7 @@ Don,Beddoe,Bewitched (episode Humbug Not to Be Spoken Here - Season 4),1967,true func() { csvInfo := &csv.CSVFileInfo{Delim: ",", HasHeaderLine: true, Columns: nil, EscapeQuotes: true} - rd, _, _ := csv.NewCSVReader(types.Format_7_18, ioutil.NopCloser(buf), csvInfo, nil) + rd, _ := csv.NewCSVReader(types.Format_7_18, ioutil.NopCloser(buf), csvInfo) wr, _ := csv.NewCSVWriter(iohelp.NopWrCloser(outBuf), schOut, csvInfo) addedStages := []NamedTransform{ @@ -281,7 +281,7 @@ func TestAbort(t *testing.T) { func() { csvInfo := &csv.CSVFileInfo{Delim: ",", HasHeaderLine: true, Columns: nil, EscapeQuotes: true} - rd, _, _ := csv.NewCSVReader(types.Format_7_18, ioutil.NopCloser(buf), csvInfo, nil) + rd, _ := csv.NewCSVReader(types.Format_7_18, ioutil.NopCloser(buf), csvInfo) wr, _ := csv.NewCSVWriter(iohelp.NopWrCloser(outBuf), schOut, csvInfo) var wg = sync.WaitGroup{} diff --git a/go/libraries/doltcore/table/typed/json/file_info.go b/go/libraries/doltcore/table/typed/json/file_info.go index 2da611f91c..68691286a2 100644 --- a/go/libraries/doltcore/table/typed/json/file_info.go +++ b/go/libraries/doltcore/table/typed/json/file_info.go @@ -17,14 +17,20 @@ package json import "github.com/liquidata-inc/dolt/go/libraries/doltcore/row" type JSONFileInfo struct { - Rows []row.Row + Rows []row.Row + AbleToDecode bool } func NewJSONInfo() *JSONFileInfo { - return &JSONFileInfo{nil} + return &JSONFileInfo{nil, true} } func (info *JSONFileInfo) SetRows(rows []row.Row) *JSONFileInfo { info.Rows = rows return info } + +func (info *JSONFileInfo) SetAbleToDecode(able bool) *JSONFileInfo { + info.AbleToDecode = able + return info +} diff --git a/go/libraries/doltcore/table/typed/json/reader.go b/go/libraries/doltcore/table/typed/json/reader.go index 6c85f01f76..24b847d503 100644 --- a/go/libraries/doltcore/table/typed/json/reader.go +++ b/go/libraries/doltcore/table/typed/json/reader.go @@ -37,54 +37,55 @@ type JSONReader struct { ind int } -func OpenJSONReader(nbf *types.NomsBinFormat, path string, fs filesys.ReadableFS, info *JSONFileInfo, sch schema.Schema, schPath string) (*JSONReader, bool, error) { +func OpenJSONReader(nbf *types.NomsBinFormat, path string, fs filesys.ReadableFS, info *JSONFileInfo, sch schema.Schema, schPath string) (*JSONReader, error) { r, err := fs.OpenForRead(path) if err != nil { - return nil, false, err + return nil, err } return NewJSONReader(nbf, r, info, fs, sch, schPath, path) } -func NewJSONReader(nbf *types.NomsBinFormat, r io.ReadCloser, info *JSONFileInfo, fs filesys.ReadableFS, sch schema.Schema, schPath string, tblPath string) (*JSONReader, bool, error) { +func NewJSONReader(nbf *types.NomsBinFormat, r io.ReadCloser, info *JSONFileInfo, fs filesys.ReadableFS, sch schema.Schema, schPath string, tblPath string) (*JSONReader, error) { br := bufio.NewReaderSize(r, ReadBufSize) if sch == nil { if schPath == "" { - return nil, false, errors.New("schema must be provided") + return nil, errors.New("schema must be provided") } schData, err := fs.ReadFile(schPath) if err != nil { - return nil, false, err + return nil, err } jsonSchStr := string(schData) sch, err = encoding.UnmarshalJson(jsonSchStr) if err != nil { - return nil, false, err + return nil, err } } tblData, err := fs.ReadFile(tblPath) if err != nil { - return nil, false, err + return nil, err } jsonRows, err := UnmarshalFromJSON(tblData) if err != nil { - return nil, false, err + return nil, err } - fileMatchesSchema := true + ableToDecode := true decodedRows, err := jsonRows.decodeJSONRows(nbf, sch) if err != nil { - fileMatchesSchema = false + ableToDecode = false } info.SetRows(decodedRows) + info.SetAbleToDecode(ableToDecode) - return &JSONReader{r, br, info, sch, 0}, fileMatchesSchema, nil + return &JSONReader{r, br, info, sch, 0}, nil } // Close should release resources being held @@ -101,7 +102,10 @@ func (jsonr *JSONReader) Close(ctx context.Context) error { func (jsonr *JSONReader) GetSchema() schema.Schema { return jsonr.sch +} +func (jsonr *JSONReader) VerifySchema(sch schema.Schema) (bool, error) { + return jsonr.info.AbleToDecode, nil } func (jsonr *JSONReader) ReadRow(ctx context.Context) (row.Row, error) { diff --git a/go/libraries/doltcore/table/typed/noms/reader.go b/go/libraries/doltcore/table/typed/noms/reader.go index 1e04ac9140..c6586e659b 100644 --- a/go/libraries/doltcore/table/typed/noms/reader.go +++ b/go/libraries/doltcore/table/typed/noms/reader.go @@ -65,3 +65,7 @@ func (nmr *NomsMapReader) Close(ctx context.Context) error { nmr.itr = nil return nil } + +func (nmr *NomsMapReader) VerifySchema(outSch schema.Schema) (bool, error) { + return schema.VerifyInSchema(nmr.sch, outSch) +} diff --git a/go/libraries/doltcore/table/untyped/csv/reader.go b/go/libraries/doltcore/table/untyped/csv/reader.go index a59d1c8d78..46f58fd5f6 100644 --- a/go/libraries/doltcore/table/untyped/csv/reader.go +++ b/go/libraries/doltcore/table/untyped/csv/reader.go @@ -47,37 +47,29 @@ type CSVReader struct { // OpenCSVReader opens a reader at a given path within a given filesys. The CSVFileInfo should describe the csv file // being opened. -func OpenCSVReader(nbf *types.NomsBinFormat, path string, fs filesys.ReadableFS, info *CSVFileInfo, outSch schema.Schema) (*CSVReader, bool, error) { +func OpenCSVReader(nbf *types.NomsBinFormat, path string, fs filesys.ReadableFS, info *CSVFileInfo) (*CSVReader, error) { r, err := fs.OpenForRead(path) if err != nil { - return nil, false, err + return nil, err } - return NewCSVReader(nbf, r, info, outSch) + return NewCSVReader(nbf, r, info) } // NewCSVReader creates a CSVReader from a given ReadCloser. The CSVFileInfo should describe the csv file being read. -func NewCSVReader(nbf *types.NomsBinFormat, r io.ReadCloser, info *CSVFileInfo, outSch schema.Schema) (*CSVReader, bool, error) { +func NewCSVReader(nbf *types.NomsBinFormat, r io.ReadCloser, info *CSVFileInfo) (*CSVReader, error) { br := bufio.NewReaderSize(r, ReadBufSize) colStrs, err := getColHeaders(br, info) if err != nil { r.Close() - return nil, false, err + return nil, err } _, sch := untyped.NewUntypedSchema(colStrs...) - fileMatchesSchema := true - if outSch != nil { - fileMatchesSchema, err = SchemasMatch(sch, outSch) - if err != nil { - return nil, false, nil - } - } - - return &CSVReader{r, br, info, sch, false, nbf}, fileMatchesSchema, nil + return &CSVReader{r, br, info, sch, false, nbf}, nil } func getColHeaders(br *bufio.Reader, info *CSVFileInfo) ([]string, error) { @@ -140,6 +132,11 @@ func (csvr *CSVReader) GetSchema() schema.Schema { return csvr.sch } +// VerifySchema checks that the in schema matches the original schema +func (csvr *CSVReader) VerifySchema(outSch schema.Schema) (bool, error) { + return schema.VerifyInSchema(csvr.sch, outSch) +} + // Close should release resources being held func (csvr *CSVReader) Close(ctx context.Context) error { if csvr.closer != nil { @@ -179,37 +176,3 @@ func (csvr *CSVReader) parseRow(line string) (row.Row, error) { return row.New(csvr.nbf, sch, taggedVals) } - -func SchemasMatch(sch1, sch2 schema.Schema) (bool, error) { - inSch := sch1.GetAllCols() - outSch := sch2.GetAllCols() - - if inSch.Size() != outSch.Size() { - return false, nil - } - - match := true - err := outSch.Iter(func(tag uint64, outCol schema.Column) (stop bool, err error) { - inCol, ok := inSch.GetByTag(tag) - - if !ok || !ColumnsMatch(inCol, outCol) { - match = false - return true, nil - } - - return false, nil - }) - - if err != nil { - return false, err - } - - return match, nil -} - -func ColumnsMatch(inCol, outCol schema.Column) bool { - if inCol.Name != outCol.Name || inCol.Tag != outCol.Tag { - return false - } - return true -} diff --git a/go/libraries/doltcore/table/untyped/csv/reader_test.go b/go/libraries/doltcore/table/untyped/csv/reader_test.go index 916082de2e..f09e8f98ec 100644 --- a/go/libraries/doltcore/table/untyped/csv/reader_test.go +++ b/go/libraries/doltcore/table/untyped/csv/reader_test.go @@ -132,7 +132,7 @@ func readTestRows(t *testing.T, inputStr string, info *CSVFileInfo) ([]row.Row, const path = "/file.csv" fs := filesys.NewInMemFS(nil, map[string][]byte{path: []byte(inputStr)}, root) - csvR, _, err := OpenCSVReader(types.Format_7_18, path, fs, info, nil) + csvR, err := OpenCSVReader(types.Format_7_18, path, fs, info) defer csvR.Close(context.Background()) if err != nil { diff --git a/go/libraries/doltcore/table/untyped/xlsx/reader.go b/go/libraries/doltcore/table/untyped/xlsx/reader.go index db5f776fe9..c867fed1d1 100644 --- a/go/libraries/doltcore/table/untyped/xlsx/reader.go +++ b/go/libraries/doltcore/table/untyped/xlsx/reader.go @@ -38,11 +38,11 @@ type XLSXReader struct { rows []row.Row } -func OpenXLSXReader(nbf *types.NomsBinFormat, path string, fs filesys.ReadableFS, info *XLSXFileInfo, outSch schema.Schema) (*XLSXReader, bool, error) { +func OpenXLSXReader(nbf *types.NomsBinFormat, path string, fs filesys.ReadableFS, info *XLSXFileInfo) (*XLSXReader, error) { r, err := fs.OpenForRead(path) if err != nil { - return nil, false, err + return nil, err } br := bufio.NewReaderSize(r, ReadBufSize) @@ -51,29 +51,18 @@ func OpenXLSXReader(nbf *types.NomsBinFormat, path string, fs filesys.ReadableFS data, err := getXlsxRows(path, info.SheetName) if err != nil { - return nil, false, err + return nil, err } - fileMatchesSchema := true - var decodedRows []row.Row - var inSch schema.Schema - if outSch != nil { - inSch = outSch - decodedRows, err = decodeXLSXRows(nbf, data, outSch) - if err != nil { - fileMatchesSchema = false - } - } else { - _, sch := untyped.NewUntypedSchema(colStrs...) - inSch = sch - decodedRows, err = decodeXLSXRows(nbf, data, sch) - if err != nil { - r.Close() - return nil, false, err - } + _, sch := untyped.NewUntypedSchema(colStrs...) + + decodedRows, err := decodeXLSXRows(nbf, data, sch) + if err != nil { + r.Close() + return nil, err } - return &XLSXReader{r, br, info, inSch, 0, decodedRows}, fileMatchesSchema, nil + return &XLSXReader{r, br, info, sch, 0, decodedRows}, nil } func getColHeaders(path string, sheetName string) ([]string, error) { @@ -90,6 +79,10 @@ func (xlsxr *XLSXReader) GetSchema() schema.Schema { return xlsxr.sch } +func (xlsxr *XLSXReader) VerifySchema(outSch schema.Schema) (bool, error) { + return schema.VerifyInSchema(xlsxr.sch, outSch) +} + // Close should release resources being held func (xlsxr *XLSXReader) Close(ctx context.Context) error { if xlsxr.closer != nil { From 59536a44c621f9b23d09b6248dc941d64c239999 Mon Sep 17 00:00:00 2001 From: Taylor Bantle Date: Fri, 20 Sep 2019 17:09:25 -0700 Subject: [PATCH 11/11] go/libraries/doltcore/table: Add comments to VerifySchema declarations --- go/libraries/doltcore/table/inmem_table.go | 1 + go/libraries/doltcore/table/io.go | 2 +- go/libraries/doltcore/table/typed/json/reader.go | 2 ++ go/libraries/doltcore/table/typed/noms/reader.go | 1 + go/libraries/doltcore/table/untyped/xlsx/reader.go | 2 ++ 5 files changed, 7 insertions(+), 1 deletion(-) diff --git a/go/libraries/doltcore/table/inmem_table.go b/go/libraries/doltcore/table/inmem_table.go index 0e4f2115f5..9af294daf1 100644 --- a/go/libraries/doltcore/table/inmem_table.go +++ b/go/libraries/doltcore/table/inmem_table.go @@ -144,6 +144,7 @@ func (rd *InMemTableReader) GetSchema() schema.Schema { return rd.tt.sch } +// VerifySchema checks that the incoming schema matches the schema from the existing table func (rd *InMemTableReader) VerifySchema(outSch schema.Schema) (bool, error) { return schema.VerifyInSchema(rd.tt.sch, outSch) } diff --git a/go/libraries/doltcore/table/io.go b/go/libraries/doltcore/table/io.go index b7240fca0c..f6657ccf8c 100644 --- a/go/libraries/doltcore/table/io.go +++ b/go/libraries/doltcore/table/io.go @@ -32,7 +32,7 @@ type TableReader interface { // will be return true. This is a potentially non-fatal error and callers can decide if they want to continue on a bad row, or fail. ReadRow(ctx context.Context) (row.Row, error) - // VerifySchema checks that the incoming schema matches the schema from the original table + // VerifySchema checks that the incoming schema matches the schema from the existing table VerifySchema(outSch schema.Schema) (bool, error) } diff --git a/go/libraries/doltcore/table/typed/json/reader.go b/go/libraries/doltcore/table/typed/json/reader.go index 24b847d503..1800474f3b 100644 --- a/go/libraries/doltcore/table/typed/json/reader.go +++ b/go/libraries/doltcore/table/typed/json/reader.go @@ -100,10 +100,12 @@ func (jsonr *JSONReader) Close(ctx context.Context) error { } +// GetSchema gets the schema of the rows that this reader will return func (jsonr *JSONReader) GetSchema() schema.Schema { return jsonr.sch } +// VerifySchema checks that the incoming schema matches the schema from the existing table func (jsonr *JSONReader) VerifySchema(sch schema.Schema) (bool, error) { return jsonr.info.AbleToDecode, nil } diff --git a/go/libraries/doltcore/table/typed/noms/reader.go b/go/libraries/doltcore/table/typed/noms/reader.go index c6586e659b..d95196031f 100644 --- a/go/libraries/doltcore/table/typed/noms/reader.go +++ b/go/libraries/doltcore/table/typed/noms/reader.go @@ -66,6 +66,7 @@ func (nmr *NomsMapReader) Close(ctx context.Context) error { return nil } +// VerifySchema checks that the incoming schema matches the schema from the existing table func (nmr *NomsMapReader) VerifySchema(outSch schema.Schema) (bool, error) { return schema.VerifyInSchema(nmr.sch, outSch) } diff --git a/go/libraries/doltcore/table/untyped/xlsx/reader.go b/go/libraries/doltcore/table/untyped/xlsx/reader.go index c867fed1d1..59363bd977 100644 --- a/go/libraries/doltcore/table/untyped/xlsx/reader.go +++ b/go/libraries/doltcore/table/untyped/xlsx/reader.go @@ -75,10 +75,12 @@ func getColHeaders(path string, sheetName string) ([]string, error) { return colHeaders, nil } +// GetSchema gets the schema of the rows that this reader will return func (xlsxr *XLSXReader) GetSchema() schema.Schema { return xlsxr.sch } +// VerifySchema checks that the incoming schema matches the schema from the existing table func (xlsxr *XLSXReader) VerifySchema(outSch schema.Schema) (bool, error) { return schema.VerifyInSchema(xlsxr.sch, outSch) }