mirror of
https://github.com/dolthub/dolt.git
synced 2026-02-11 02:59:34 -06:00
Merge pull request #1055 from arv/csv-lenient-column-count
noms.io: Make csv reader more lenient
This commit is contained in:
@@ -3,7 +3,6 @@ package csv
|
||||
import (
|
||||
"encoding/csv"
|
||||
"io"
|
||||
"log"
|
||||
|
||||
"github.com/attic-labs/noms/chunks"
|
||||
"github.com/attic-labs/noms/d"
|
||||
@@ -44,7 +43,7 @@ func KindsToStrings(kinds KindSlice) []string {
|
||||
func NewCSVReader(res io.Reader, comma rune) *csv.Reader {
|
||||
r := csv.NewReader(res)
|
||||
r.Comma = comma
|
||||
r.FieldsPerRecord = 0 // Let first row determine the number of fields.
|
||||
r.FieldsPerRecord = -1 // Don't enforce number of fields.
|
||||
return r
|
||||
}
|
||||
|
||||
@@ -117,13 +116,15 @@ func Read(r *csv.Reader, structName string, headers []string, kinds KindSlice, c
|
||||
close(valueChan)
|
||||
break
|
||||
} else if err != nil {
|
||||
log.Fatalln("Error decoding CSV: ", err)
|
||||
panic(err)
|
||||
}
|
||||
|
||||
fields := make(map[string]types.Value)
|
||||
for i, v := range row {
|
||||
f := structFields[i]
|
||||
fields[f.Name] = StringToType(v, f.T.Kind())
|
||||
if i < len(headers) {
|
||||
f := structFields[i]
|
||||
fields[f.Name] = StringToType(v, f.T.Kind())
|
||||
}
|
||||
}
|
||||
valueChan <- types.NewStruct(typeRef, typeDef, fields)
|
||||
}
|
||||
|
||||
110
clients/csv/read_test.go
Normal file
110
clients/csv/read_test.go
Normal file
@@ -0,0 +1,110 @@
|
||||
package csv
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/csv"
|
||||
"testing"
|
||||
|
||||
"github.com/attic-labs/noms/chunks"
|
||||
"github.com/attic-labs/noms/types"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestRead(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
cs := chunks.NewMemoryStore()
|
||||
|
||||
dataString := `a,1,true
|
||||
b,2,false
|
||||
`
|
||||
r := NewCSVReader(bytes.NewBufferString(dataString), ',')
|
||||
|
||||
headers := []string{"A", "B", "C"}
|
||||
kinds := KindSlice{types.StringKind, types.Int8Kind, types.BoolKind}
|
||||
l, typeRef, typeDef := Read(r, "test", headers, kinds, cs)
|
||||
|
||||
assert.Equal(uint64(2), l.Len())
|
||||
|
||||
assert.True(typeRef.IsUnresolved())
|
||||
|
||||
desc, ok := typeDef.Desc.(types.StructDesc)
|
||||
assert.True(ok)
|
||||
assert.Len(desc.Fields, 3)
|
||||
assert.Equal("A", desc.Fields[0].Name)
|
||||
assert.Equal("B", desc.Fields[1].Name)
|
||||
assert.Equal("C", desc.Fields[2].Name)
|
||||
|
||||
assert.True(l.Get(0).(types.Struct).Get("A").Equals(types.NewString("a")))
|
||||
assert.True(l.Get(1).(types.Struct).Get("A").Equals(types.NewString("b")))
|
||||
|
||||
assert.True(l.Get(0).(types.Struct).Get("B").Equals(types.Int8(1)))
|
||||
assert.True(l.Get(1).(types.Struct).Get("B").Equals(types.Int8(2)))
|
||||
|
||||
assert.True(l.Get(0).(types.Struct).Get("C").Equals(types.Bool(true)))
|
||||
assert.True(l.Get(1).(types.Struct).Get("C").Equals(types.Bool(false)))
|
||||
}
|
||||
|
||||
func testTrailingHelper(t *testing.T, dataString string) {
|
||||
assert := assert.New(t)
|
||||
cs := chunks.NewMemoryStore()
|
||||
|
||||
r := NewCSVReader(bytes.NewBufferString(dataString), ',')
|
||||
|
||||
headers := []string{"A", "B"}
|
||||
kinds := KindSlice{types.StringKind, types.StringKind}
|
||||
l, typeRef, typeDef := Read(r, "test", headers, kinds, cs)
|
||||
|
||||
assert.Equal(uint64(3), l.Len())
|
||||
|
||||
assert.True(typeRef.IsUnresolved())
|
||||
|
||||
desc, ok := typeDef.Desc.(types.StructDesc)
|
||||
assert.True(ok)
|
||||
assert.Len(desc.Fields, 2)
|
||||
assert.Equal("A", desc.Fields[0].Name)
|
||||
assert.Equal("B", desc.Fields[1].Name)
|
||||
}
|
||||
|
||||
func TestReadTrailingHole(t *testing.T) {
|
||||
dataString := `a,b,
|
||||
d,e,
|
||||
g,h,
|
||||
`
|
||||
testTrailingHelper(t, dataString)
|
||||
}
|
||||
|
||||
func TestReadTrailingHoles(t *testing.T) {
|
||||
dataString := `a,b,,
|
||||
d,e
|
||||
g,h
|
||||
`
|
||||
testTrailingHelper(t, dataString)
|
||||
}
|
||||
|
||||
func TestReadTrailingValues(t *testing.T) {
|
||||
dataString := `a,b
|
||||
d,e,f
|
||||
g,h,i,j
|
||||
`
|
||||
testTrailingHelper(t, dataString)
|
||||
}
|
||||
|
||||
func TestReadParseError(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
cs := chunks.NewMemoryStore()
|
||||
|
||||
dataString := `a,"b`
|
||||
r := NewCSVReader(bytes.NewBufferString(dataString), ',')
|
||||
|
||||
headers := []string{"A", "B"}
|
||||
kinds := KindSlice{types.StringKind, types.StringKind}
|
||||
func() {
|
||||
defer func() {
|
||||
r := recover()
|
||||
assert.NotNil(r)
|
||||
_, ok := r.(*csv.ParseError)
|
||||
assert.True(ok, "Should be a ParseError")
|
||||
}()
|
||||
Read(r, "test", headers, kinds, cs)
|
||||
}()
|
||||
}
|
||||
@@ -18,10 +18,11 @@ func newSchemaOptions(fieldCount int) schemaOptions {
|
||||
return options
|
||||
}
|
||||
|
||||
func (so schemaOptions) Test(values []string) {
|
||||
d.Chk.True(len(so) == len(values))
|
||||
func (so schemaOptions) Test(fields []string) {
|
||||
for i, t := range so {
|
||||
t.Test(values[i])
|
||||
if i < len(fields) {
|
||||
t.Test(fields[i])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user