mirror of
https://github.com/dolthub/dolt.git
synced 2026-02-11 02:59:34 -06:00
Revert "Revert "Change csv importer API slightly""
This commit is contained in:
@@ -58,11 +58,21 @@ func main() {
|
||||
return
|
||||
}
|
||||
|
||||
r := csv.NewCSVReader(res, comma)
|
||||
|
||||
var headers []string
|
||||
if *header == "" {
|
||||
headers, err = r.Read()
|
||||
d.Exp.NoError(err)
|
||||
} else {
|
||||
headers = strings.Split(*header, string(comma))
|
||||
}
|
||||
|
||||
if *reportTypes {
|
||||
keys, kinds := csv.ReportValidFieldTypes(res, *header)
|
||||
d.Chk.Equal(len(keys), len(kinds))
|
||||
kinds := csv.ReportValidFieldTypes(r, headers)
|
||||
d.Chk.Equal(len(headers), len(kinds))
|
||||
fmt.Println("Possible types for each column:")
|
||||
for i, key := range keys {
|
||||
for i, key := range headers {
|
||||
fmt.Printf("%s: %s\n", key, strings.Join(csv.KindsToStrings(kinds[i]), ","))
|
||||
}
|
||||
return
|
||||
@@ -80,7 +90,7 @@ func main() {
|
||||
kinds = csv.StringsToKinds(strings.Split(*columnTypes, ","))
|
||||
}
|
||||
|
||||
value, _, _ := csv.Read(res, *name, *header, kinds, comma, ds.Store())
|
||||
value, _, _ := csv.Read(r, *name, headers, kinds, ds.Store())
|
||||
_, err = ds.Commit(value)
|
||||
d.Exp.NoError(err)
|
||||
}
|
||||
|
||||
@@ -59,7 +59,6 @@ func (s *testSuite) TestCSVImporter() {
|
||||
s.Equal(types.Uint8(i), st.Get("b"))
|
||||
i++
|
||||
})
|
||||
|
||||
}
|
||||
|
||||
func (s *testSuite) TestCSVImporterReportTypes() {
|
||||
|
||||
@@ -4,7 +4,6 @@ import (
|
||||
"encoding/csv"
|
||||
"io"
|
||||
"log"
|
||||
"strings"
|
||||
|
||||
"github.com/attic-labs/noms/chunks"
|
||||
"github.com/attic-labs/noms/d"
|
||||
@@ -49,21 +48,10 @@ func NewCSVReader(res io.Reader, comma rune) *csv.Reader {
|
||||
return r
|
||||
}
|
||||
|
||||
// ReportValidFieldTypes takes res, a reader expected to contain CSV data, and an optional header. Excluding the header (assumed to be the first row if no header is given), it returns a slice of types.NomsKind for each column in the data indicating what Noms types could be used to represent that row.
|
||||
// ReportValidFieldTypes takes a CSV reader and the headers. It returns a slice of types.NomsKind for each column in the data indicating what Noms types could be used to represent that row.
|
||||
// For example, if all values in a row are negative integers between -127 and 0, the slice for that row would be [types.Int8Kind, types.Int16Kind, types.Int32Kind, types.Int64Kind, types.Float32Kind, types.Float64Kind, types.StringKind]. If even one value in the row is a floating point number, however, all the integer kinds would be dropped. All values can be represented as a string, so that option is always provided.
|
||||
func ReportValidFieldTypes(res io.Reader, header string) ([]string, []KindSlice) {
|
||||
var input io.Reader
|
||||
if len(header) == 0 {
|
||||
input = res
|
||||
} else {
|
||||
input = io.MultiReader(strings.NewReader(header+"\n"), res)
|
||||
}
|
||||
|
||||
r := csv.NewReader(input)
|
||||
keys, err := r.Read()
|
||||
d.Exp.NoError(err, "Error decoding CSV")
|
||||
|
||||
options := newSchemaOptions(len(keys))
|
||||
func ReportValidFieldTypes(r *csv.Reader, headers []string) []KindSlice {
|
||||
options := newSchemaOptions(len(headers))
|
||||
rowChan := make(chan []string)
|
||||
doneChan := make(chan struct{})
|
||||
go func() {
|
||||
@@ -84,18 +72,15 @@ func ReportValidFieldTypes(res io.Reader, header string) ([]string, []KindSlice)
|
||||
rowChan <- row
|
||||
}
|
||||
<-doneChan
|
||||
return keys, options.ValidKinds()
|
||||
return options.ValidKinds()
|
||||
}
|
||||
|
||||
// MakeStructTypeFromHeader creates a struct type by reading the first row of the csv.Reader using |kinds| as the type of each field. If |kinds| is empty, default to strings.
|
||||
func MakeStructTypeFromHeader(r *csv.Reader, structName string, kinds KindSlice) (typeRef, typeDef types.Type) {
|
||||
keys, err := r.Read()
|
||||
d.Exp.NoError(err, "Error decoding CSV")
|
||||
|
||||
// MakeStructTypeFromHeaders creates a struct type from the headers using |kinds| as the type of each field. If |kinds| is empty, default to strings.
|
||||
func MakeStructTypeFromHeaders(headers []string, structName string, kinds KindSlice) (typeRef, typeDef types.Type) {
|
||||
useStringType := len(kinds) == 0
|
||||
d.Chk.True(useStringType || len(keys) == len(kinds))
|
||||
fields := make([]types.Field, len(keys))
|
||||
for i, key := range keys {
|
||||
d.Chk.True(useStringType || len(headers) == len(kinds))
|
||||
fields := make([]types.Field, len(headers))
|
||||
for i, key := range headers {
|
||||
kind := types.StringKind
|
||||
if !useStringType {
|
||||
kind = kinds[i]
|
||||
@@ -115,21 +100,11 @@ func MakeStructTypeFromHeader(r *csv.Reader, structName string, kinds KindSlice)
|
||||
return
|
||||
}
|
||||
|
||||
// Read takes comma-delineated data from res and parses it into a typed List of structs. Each row gets parsed into a struct named structName, optionally described by header. If header is empty, the first line of the file is used to guess the form of the struct into which rows are parsed. If kinds is non-empty, it will be used to type the fields in the generated structs; otherwise, they will be left as string-fields.
|
||||
// Read takes a CSV reader and reads it into a typed List of structs. Each row gets read into a struct named structName, described by headers. If the original data contained headers it is expected that the input reader has already read those and are pointing at the first data row.
|
||||
// If kinds is non-empty, it will be used to type the fields in the generated structs; otherwise, they will be left as string-fields.
|
||||
// In addition to the list, Read returns the typeRef for the structs in the list, and last the typeDef of the structs.
|
||||
func Read(res io.Reader, structName, header string, kinds KindSlice, comma rune, cs chunks.ChunkStore) (l types.List, typeRef, typeDef types.Type) {
|
||||
var input io.Reader
|
||||
if len(header) == 0 {
|
||||
input = res
|
||||
} else {
|
||||
input = io.MultiReader(strings.NewReader(header+"\n"), res)
|
||||
}
|
||||
|
||||
r := csv.NewReader(input)
|
||||
r.Comma = comma
|
||||
r.FieldsPerRecord = 0 // Let first row determine the number of fields.
|
||||
|
||||
typeRef, typeDef = MakeStructTypeFromHeader(r, structName, kinds)
|
||||
func Read(r *csv.Reader, structName string, headers []string, kinds KindSlice, cs chunks.ChunkStore) (l types.List, typeRef, typeDef types.Type) {
|
||||
typeRef, typeDef = MakeStructTypeFromHeaders(headers, structName, kinds)
|
||||
valueChan := make(chan types.Value, 128) // TODO: Make this a function param?
|
||||
listType := types.MakeCompoundType(types.ListKind, typeRef)
|
||||
listChan := types.NewStreamingTypedList(listType, cs, valueChan)
|
||||
|
||||
@@ -297,8 +297,12 @@ func TestReportValidFieldTypes(t *testing.T) {
|
||||
for _, row := range data {
|
||||
dataString = dataString + strings.Join(row, ",") + "\n"
|
||||
}
|
||||
keys, kinds := ReportValidFieldTypes(bytes.NewBufferString(dataString), "")
|
||||
assert.Equal(data[0], keys)
|
||||
|
||||
r := NewCSVReader(bytes.NewBufferString(dataString), ',')
|
||||
headers, err := r.Read()
|
||||
assert.NoError(err)
|
||||
assert.Equal(data[0], headers)
|
||||
kinds := ReportValidFieldTypes(r, headers)
|
||||
for i, ks := range kinds {
|
||||
assert.Equal(expectedKinds[i], ks)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user