Revert "Revert "Change csv importer API slightly""

This commit is contained in:
Erik Arvidsson
2016-03-02 11:26:24 -08:00
parent 8929c63ab2
commit a8d41ff7cc
4 changed files with 33 additions and 45 deletions

View File

@@ -58,11 +58,21 @@ func main() {
return
}
r := csv.NewCSVReader(res, comma)
var headers []string
if *header == "" {
headers, err = r.Read()
d.Exp.NoError(err)
} else {
headers = strings.Split(*header, string(comma))
}
if *reportTypes {
keys, kinds := csv.ReportValidFieldTypes(res, *header)
d.Chk.Equal(len(keys), len(kinds))
kinds := csv.ReportValidFieldTypes(r, headers)
d.Chk.Equal(len(headers), len(kinds))
fmt.Println("Possible types for each column:")
for i, key := range keys {
for i, key := range headers {
fmt.Printf("%s: %s\n", key, strings.Join(csv.KindsToStrings(kinds[i]), ","))
}
return
@@ -80,7 +90,7 @@ func main() {
kinds = csv.StringsToKinds(strings.Split(*columnTypes, ","))
}
value, _, _ := csv.Read(res, *name, *header, kinds, comma, ds.Store())
value, _, _ := csv.Read(r, *name, headers, kinds, ds.Store())
_, err = ds.Commit(value)
d.Exp.NoError(err)
}

View File

@@ -59,7 +59,6 @@ func (s *testSuite) TestCSVImporter() {
s.Equal(types.Uint8(i), st.Get("b"))
i++
})
}
func (s *testSuite) TestCSVImporterReportTypes() {

View File

@@ -4,7 +4,6 @@ import (
"encoding/csv"
"io"
"log"
"strings"
"github.com/attic-labs/noms/chunks"
"github.com/attic-labs/noms/d"
@@ -49,21 +48,10 @@ func NewCSVReader(res io.Reader, comma rune) *csv.Reader {
return r
}
// ReportValidFieldTypes takes res, a reader expected to contain CSV data, and an optional header. Excluding the header (assumed to be the first row if no header is given), it returns a slice of types.NomsKind for each column in the data indicating what Noms types could be used to represent that row.
// ReportValidFieldTypes takes a CSV reader and the headers. It returns a slice of types.NomsKind for each column in the data indicating what Noms types could be used to represent that row.
// For example, if all values in a row are negative integers between -127 and 0, the slice for that row would be [types.Int8Kind, types.Int16Kind, types.Int32Kind, types.Int64Kind, types.Float32Kind, types.Float64Kind, types.StringKind]. If even one value in the row is a floating point number, however, all the integer kinds would be dropped. All values can be represented as a string, so that option is always provided.
func ReportValidFieldTypes(res io.Reader, header string) ([]string, []KindSlice) {
var input io.Reader
if len(header) == 0 {
input = res
} else {
input = io.MultiReader(strings.NewReader(header+"\n"), res)
}
r := csv.NewReader(input)
keys, err := r.Read()
d.Exp.NoError(err, "Error decoding CSV")
options := newSchemaOptions(len(keys))
func ReportValidFieldTypes(r *csv.Reader, headers []string) []KindSlice {
options := newSchemaOptions(len(headers))
rowChan := make(chan []string)
doneChan := make(chan struct{})
go func() {
@@ -84,18 +72,15 @@ func ReportValidFieldTypes(res io.Reader, header string) ([]string, []KindSlice)
rowChan <- row
}
<-doneChan
return keys, options.ValidKinds()
return options.ValidKinds()
}
// MakeStructTypeFromHeader creates a struct type by reading the first row of the csv.Reader using |kinds| as the type of each field. If |kinds| is empty, default to strings.
func MakeStructTypeFromHeader(r *csv.Reader, structName string, kinds KindSlice) (typeRef, typeDef types.Type) {
keys, err := r.Read()
d.Exp.NoError(err, "Error decoding CSV")
// MakeStructTypeFromHeaders creates a struct type from the headers using |kinds| as the type of each field. If |kinds| is empty, default to strings.
func MakeStructTypeFromHeaders(headers []string, structName string, kinds KindSlice) (typeRef, typeDef types.Type) {
useStringType := len(kinds) == 0
d.Chk.True(useStringType || len(keys) == len(kinds))
fields := make([]types.Field, len(keys))
for i, key := range keys {
d.Chk.True(useStringType || len(headers) == len(kinds))
fields := make([]types.Field, len(headers))
for i, key := range headers {
kind := types.StringKind
if !useStringType {
kind = kinds[i]
@@ -115,21 +100,11 @@ func MakeStructTypeFromHeader(r *csv.Reader, structName string, kinds KindSlice)
return
}
// Read takes comma-delineated data from res and parses it into a typed List of structs. Each row gets parsed into a struct named structName, optionally described by header. If header is empty, the first line of the file is used to guess the form of the struct into which rows are parsed. If kinds is non-empty, it will be used to type the fields in the generated structs; otherwise, they will be left as string-fields.
// Read takes a CSV reader and reads it into a typed List of structs. Each row gets read into a struct named structName, described by headers. If the original data contained headers it is expected that the input reader has already read those and are pointing at the first data row.
// If kinds is non-empty, it will be used to type the fields in the generated structs; otherwise, they will be left as string-fields.
// In addition to the list, Read returns the typeRef for the structs in the list, and last the typeDef of the structs.
func Read(res io.Reader, structName, header string, kinds KindSlice, comma rune, cs chunks.ChunkStore) (l types.List, typeRef, typeDef types.Type) {
var input io.Reader
if len(header) == 0 {
input = res
} else {
input = io.MultiReader(strings.NewReader(header+"\n"), res)
}
r := csv.NewReader(input)
r.Comma = comma
r.FieldsPerRecord = 0 // Let first row determine the number of fields.
typeRef, typeDef = MakeStructTypeFromHeader(r, structName, kinds)
func Read(r *csv.Reader, structName string, headers []string, kinds KindSlice, cs chunks.ChunkStore) (l types.List, typeRef, typeDef types.Type) {
typeRef, typeDef = MakeStructTypeFromHeaders(headers, structName, kinds)
valueChan := make(chan types.Value, 128) // TODO: Make this a function param?
listType := types.MakeCompoundType(types.ListKind, typeRef)
listChan := types.NewStreamingTypedList(listType, cs, valueChan)

View File

@@ -297,8 +297,12 @@ func TestReportValidFieldTypes(t *testing.T) {
for _, row := range data {
dataString = dataString + strings.Join(row, ",") + "\n"
}
keys, kinds := ReportValidFieldTypes(bytes.NewBufferString(dataString), "")
assert.Equal(data[0], keys)
r := NewCSVReader(bytes.NewBufferString(dataString), ',')
headers, err := r.Read()
assert.NoError(err)
assert.Equal(data[0], headers)
kinds := ReportValidFieldTypes(r, headers)
for i, ks := range kinds {
assert.Equal(expectedKinds[i], ks)
}