go/libraries/doltcore: Add replace schema validation for xlsx files

This commit is contained in:
Taylor Bantle
2019-09-19 15:36:19 -07:00
parent 8304de36ce
commit 6cdd524b15
2 changed files with 49 additions and 26 deletions
+28 -16
View File
@@ -92,18 +92,22 @@ func (dl FileDataLocation) NewReader(ctx context.Context, root *doltdb.RootValue
}
}
rd, err := csv.OpenCSVReader(root.VRW().Format(), dl.Path, fs, csv.NewCSVInfo().SetDelim(delim))
return rd, false, true, err
rd, fileMatchesSchema, err := csv.OpenCSVReader(root.VRW().Format(), dl.Path, fs, csv.NewCSVInfo().SetDelim(delim))
return rd, false, fileMatchesSchema, err
case PsvFile:
rd, err := csv.OpenCSVReader(root.VRW().Format(), dl.Path, fs, csv.NewCSVInfo().SetDelim("|"))
return rd, false, true, err
rd, fileMatchesSchema, err := csv.OpenCSVReader(root.VRW().Format(), dl.Path, fs, csv.NewCSVInfo().SetDelim("|"))
return rd, false, fileMatchesSchema, err
case XlsxFile:
var outSch schema.Schema = nil
xlsxOpts := opts.(XlsxOptions)
rd, err := xlsx.OpenXLSXReader(root.VRW().Format(), dl.Path, fs, &xlsx.XLSXFileInfo{SheetName: xlsxOpts.SheetName})
return rd, false, true, err
sch, tableExists, err := GetOutSchema(xlsxOpts.SheetName, root)
if tableExists {
outSch = sch
}
rd, fileMatchesSchema, err := xlsx.OpenXLSXReader(root.VRW().Format(), dl.Path, fs, &xlsx.XLSXFileInfo{SheetName: xlsxOpts.SheetName}, outSch)
return rd, false, fileMatchesSchema, err
case JsonFile:
var sch schema.Schema = nil
@@ -112,16 +116,9 @@ func (dl FileDataLocation) NewReader(ctx context.Context, root *doltdb.RootValue
return nil, false, false, errors.New("Unable to determine table name on JSON import")
}
jsonOpts, _ := opts.(JSONOptions)
table, exists, err := root.GetTable(context.TODO(), jsonOpts.TableName)
if !exists {
return nil, false, false, errors.New(fmt.Sprintf("The following table could not be found:\n%v", jsonOpts.TableName))
}
sch, _, err = GetOutSchema(jsonOpts.TableName, root)
if err != nil {
return nil, false, false, errors.New(fmt.Sprintf("An error occurred attempting to read the table:\n%v", err.Error()))
}
sch, err = table.GetSchema(context.TODO())
if err != nil {
return nil, false, false, errors.New(fmt.Sprintf("An error occurred attempting to read the table schema:\n%v", err.Error()))
return nil, false, false, err
}
}
rd, fileMatchesSchema, err := json.OpenJSONReader(root.VRW().Format(), dl.Path, fs, json.NewJSONInfo(), sch, schPath)
@@ -161,3 +158,18 @@ func (dl FileDataLocation) NewUpdatingWriter(ctx context.Context, mvOpts *MoveOp
func (dl FileDataLocation) NewReplacingWriter(ctx context.Context, mvOpts *MoveOptions, root *doltdb.RootValue, fs filesys.WritableFS, srcIsSorted bool, outSch schema.Schema, statsCB noms.StatsCB) (table.TableWriteCloser, error) {
panic("Replacing files is not supported")
}
func GetOutSchema(tableName string, root *doltdb.RootValue) (schema.Schema, bool, error) {
table, exists, err := root.GetTable(context.TODO(), tableName)
if !exists {
return nil, exists, errors.New(fmt.Sprintf("The following table could not be found:\n%v", tableName))
}
if err != nil {
return nil, exists, errors.New(fmt.Sprintf("An error occurred attempting to read the table:\n%v", err.Error()))
}
sch, err := table.GetSchema(context.TODO())
if err != nil {
return nil, exists, errors.New(fmt.Sprintf("An error occurred attempting to read the table schema:\n%v", err.Error()))
}
return sch, exists, nil
}
@@ -38,11 +38,11 @@ type XLSXReader struct {
rows []row.Row
}
func OpenXLSXReader(nbf *types.NomsBinFormat, path string, fs filesys.ReadableFS, info *XLSXFileInfo) (*XLSXReader, error) {
func OpenXLSXReader(nbf *types.NomsBinFormat, path string, fs filesys.ReadableFS, info *XLSXFileInfo, outSch schema.Schema) (*XLSXReader, bool, error) {
r, err := fs.OpenForRead(path)
if err != nil {
return nil, err
return nil, false, err
}
br := bufio.NewReaderSize(r, ReadBufSize)
@@ -51,18 +51,29 @@ func OpenXLSXReader(nbf *types.NomsBinFormat, path string, fs filesys.ReadableFS
data, err := getXlsxRows(path, info.SheetName)
if err != nil {
return nil, err
return nil, false, err
}
_, sch := untyped.NewUntypedSchema(colStrs...)
decodedRows, err := decodeXLSXRows(nbf, data, sch)
if err != nil {
r.Close()
return nil, err
fileMatchesSchema := true
var decodedRows []row.Row
var inSch schema.Schema
if outSch != nil {
inSch = outSch
decodedRows, err = decodeXLSXRows(nbf, data, outSch)
if err != nil {
fileMatchesSchema = false
}
} else {
_, sch := untyped.NewUntypedSchema(colStrs...)
inSch = sch
decodedRows, err = decodeXLSXRows(nbf, data, sch)
if err != nil {
r.Close()
return nil, false, err
}
}
return &XLSXReader{r, br, info, sch, 0, decodedRows}, nil
return &XLSXReader{r, br, info, inSch, 0, decodedRows}, fileMatchesSchema, nil
}
func getColHeaders(path string, sheetName string) ([]string, error) {