Switch csv.Write() to use IterAll()

The parallelism in csv.Write() seemed like overkill since
we had to build the entire CSV in memory in order to take
advantage of it. The code is simpler, now, and uses far
less memory, though it is likely to be slower for large data.
This commit is contained in:
Chris Masone
2016-02-10 16:45:56 -08:00
parent 3331fe4006
commit 82dfd418bb
2 changed files with 10 additions and 15 deletions

View File

@@ -12,7 +12,6 @@ import (
)
var (
p = flag.Int("p", 512, "parallelism")
dsFlags = dataset.NewFlags()
// Actually the delimiter uses runes, which can be multiple characters long.
// https://blog.golang.org/strings
@@ -45,7 +44,7 @@ func main() {
err = d.Try(func() {
nomsList, structDesc := csv.ValueToListAndElemDesc(ds.Head().Value(), ds.Store())
csv.Write(nomsList, structDesc, comma, *p, os.Stdout)
csv.Write(nomsList, structDesc, comma, os.Stdout)
})
if err != nil {
fmt.Println("Failed to export dataset as CSV:")

View File

@@ -29,28 +29,24 @@ func ValueToListAndElemDesc(v types.Value, cs chunks.ChunkSource) (types.List, t
}
// Write takes a types.List l of structs (described by sd) and writes it to output as comma-delineated values.
func Write(l types.List, sd types.StructDesc, comma rune, concurrency int, output io.Writer) {
func Write(l types.List, sd types.StructDesc, comma rune, output io.Writer) {
d.Exp.Equal(types.StructKind, sd.Kind(), "Did not find Struct: %s", sd.Describe())
fieldNames := getFieldNamesFromStruct(sd)
csvWriter := csv.NewWriter(output)
csvWriter.Comma = comma
records := make([][]string, l.Len()+1)
records[0] = fieldNames // Write header
l.IterAllP(concurrency, func(v types.Value, index uint64) {
for _, f := range fieldNames {
records[index+1] = append(
records[index+1],
fmt.Sprintf("%s", v.(types.Struct).Get(f)),
)
d.Exp.NoError(csvWriter.Write(fieldNames), "Failed to write header %v", fieldNames)
record := make([]string, len(fieldNames))
l.IterAll(func(v types.Value, index uint64) {
for i, f := range fieldNames {
record[i] = fmt.Sprintf("%s", v.(types.Struct).Get(f))
}
d.Exp.NoError(csvWriter.Write(record), "Failed to write record %v", record)
})
csvWriter.WriteAll(records)
err := csvWriter.Error()
d.Exp.Equal(nil, err, "error flushing csv:", err)
csvWriter.Flush()
d.Exp.NoError(csvWriter.Error(), "error flushing csv")
}
func getFieldNamesFromStruct(structDesc types.StructDesc) (fieldNames []string) {