Merge pull request #1055 from arv/csv-lenient-column-count

noms.io: Make csv reader more lenient
This commit is contained in:
Erik Arvidsson
2016-03-07 16:24:39 -08:00
3 changed files with 120 additions and 8 deletions

View File

@@ -3,7 +3,6 @@ package csv
import (
"encoding/csv"
"io"
"log"
"github.com/attic-labs/noms/chunks"
"github.com/attic-labs/noms/d"
@@ -44,7 +43,7 @@ func KindsToStrings(kinds KindSlice) []string {
func NewCSVReader(res io.Reader, comma rune) *csv.Reader {
r := csv.NewReader(res)
r.Comma = comma
r.FieldsPerRecord = 0 // Let first row determine the number of fields.
r.FieldsPerRecord = -1 // Don't enforce number of fields.
return r
}
@@ -117,13 +116,15 @@ func Read(r *csv.Reader, structName string, headers []string, kinds KindSlice, c
close(valueChan)
break
} else if err != nil {
log.Fatalln("Error decoding CSV: ", err)
panic(err)
}
fields := make(map[string]types.Value)
for i, v := range row {
f := structFields[i]
fields[f.Name] = StringToType(v, f.T.Kind())
if i < len(headers) {
f := structFields[i]
fields[f.Name] = StringToType(v, f.T.Kind())
}
}
valueChan <- types.NewStruct(typeRef, typeDef, fields)
}

110
clients/csv/read_test.go Normal file
View File

@@ -0,0 +1,110 @@
package csv
import (
"bytes"
"encoding/csv"
"testing"
"github.com/attic-labs/noms/chunks"
"github.com/attic-labs/noms/types"
"github.com/stretchr/testify/assert"
)
func TestRead(t *testing.T) {
assert := assert.New(t)
cs := chunks.NewMemoryStore()
dataString := `a,1,true
b,2,false
`
r := NewCSVReader(bytes.NewBufferString(dataString), ',')
headers := []string{"A", "B", "C"}
kinds := KindSlice{types.StringKind, types.Int8Kind, types.BoolKind}
l, typeRef, typeDef := Read(r, "test", headers, kinds, cs)
assert.Equal(uint64(2), l.Len())
assert.True(typeRef.IsUnresolved())
desc, ok := typeDef.Desc.(types.StructDesc)
assert.True(ok)
assert.Len(desc.Fields, 3)
assert.Equal("A", desc.Fields[0].Name)
assert.Equal("B", desc.Fields[1].Name)
assert.Equal("C", desc.Fields[2].Name)
assert.True(l.Get(0).(types.Struct).Get("A").Equals(types.NewString("a")))
assert.True(l.Get(1).(types.Struct).Get("A").Equals(types.NewString("b")))
assert.True(l.Get(0).(types.Struct).Get("B").Equals(types.Int8(1)))
assert.True(l.Get(1).(types.Struct).Get("B").Equals(types.Int8(2)))
assert.True(l.Get(0).(types.Struct).Get("C").Equals(types.Bool(true)))
assert.True(l.Get(1).(types.Struct).Get("C").Equals(types.Bool(false)))
}
func testTrailingHelper(t *testing.T, dataString string) {
assert := assert.New(t)
cs := chunks.NewMemoryStore()
r := NewCSVReader(bytes.NewBufferString(dataString), ',')
headers := []string{"A", "B"}
kinds := KindSlice{types.StringKind, types.StringKind}
l, typeRef, typeDef := Read(r, "test", headers, kinds, cs)
assert.Equal(uint64(3), l.Len())
assert.True(typeRef.IsUnresolved())
desc, ok := typeDef.Desc.(types.StructDesc)
assert.True(ok)
assert.Len(desc.Fields, 2)
assert.Equal("A", desc.Fields[0].Name)
assert.Equal("B", desc.Fields[1].Name)
}
func TestReadTrailingHole(t *testing.T) {
dataString := `a,b,
d,e,
g,h,
`
testTrailingHelper(t, dataString)
}
func TestReadTrailingHoles(t *testing.T) {
dataString := `a,b,,
d,e
g,h
`
testTrailingHelper(t, dataString)
}
func TestReadTrailingValues(t *testing.T) {
dataString := `a,b
d,e,f
g,h,i,j
`
testTrailingHelper(t, dataString)
}
func TestReadParseError(t *testing.T) {
assert := assert.New(t)
cs := chunks.NewMemoryStore()
dataString := `a,"b`
r := NewCSVReader(bytes.NewBufferString(dataString), ',')
headers := []string{"A", "B"}
kinds := KindSlice{types.StringKind, types.StringKind}
func() {
defer func() {
r := recover()
assert.NotNil(r)
_, ok := r.(*csv.ParseError)
assert.True(ok, "Should be a ParseError")
}()
Read(r, "test", headers, kinds, cs)
}()
}

View File

@@ -18,10 +18,11 @@ func newSchemaOptions(fieldCount int) schemaOptions {
return options
}
func (so schemaOptions) Test(values []string) {
d.Chk.True(len(so) == len(values))
func (so schemaOptions) Test(fields []string) {
for i, t := range so {
t.Test(values[i])
if i < len(fields) {
t.Test(fields[i])
}
}
}