From 82dfd418bbfaf51383bdde66f8acec0fc7d51945 Mon Sep 17 00:00:00 2001 From: Chris Masone Date: Wed, 10 Feb 2016 16:45:56 -0800 Subject: [PATCH] Switch csv.Write() to use IterAll() The parallelism in csv.Write() seemed like overkill since we had to build the entire CSV in memory in order to take advantage of it. The code is simpler, now, and uses far less memory, though it is likely to be slower for large data. --- clients/csv/exporter/exporter.go | 3 +-- clients/csv/write.go | 22 +++++++++------------- 2 files changed, 10 insertions(+), 15 deletions(-) diff --git a/clients/csv/exporter/exporter.go b/clients/csv/exporter/exporter.go index 666968ef4c..483ba778a5 100644 --- a/clients/csv/exporter/exporter.go +++ b/clients/csv/exporter/exporter.go @@ -12,7 +12,6 @@ import ( ) var ( - p = flag.Int("p", 512, "parallelism") dsFlags = dataset.NewFlags() // Actually the delimiter uses runes, which can be multiple characters long. // https://blog.golang.org/strings @@ -45,7 +44,7 @@ func main() { err = d.Try(func() { nomsList, structDesc := csv.ValueToListAndElemDesc(ds.Head().Value(), ds.Store()) - csv.Write(nomsList, structDesc, comma, *p, os.Stdout) + csv.Write(nomsList, structDesc, comma, os.Stdout) }) if err != nil { fmt.Println("Failed to export dataset as CSV:") diff --git a/clients/csv/write.go b/clients/csv/write.go index 95b380488c..6b4c8ba0be 100644 --- a/clients/csv/write.go +++ b/clients/csv/write.go @@ -29,28 +29,24 @@ func ValueToListAndElemDesc(v types.Value, cs chunks.ChunkSource) (types.List, t } // Write takes a types.List l of structs (described by sd) and writes it to output as comma-delineated values. -func Write(l types.List, sd types.StructDesc, comma rune, concurrency int, output io.Writer) { +func Write(l types.List, sd types.StructDesc, comma rune, output io.Writer) { d.Exp.Equal(types.StructKind, sd.Kind(), "Did not find Struct: %s", sd.Describe()) fieldNames := getFieldNamesFromStruct(sd) csvWriter := csv.NewWriter(output) csvWriter.Comma = comma - records := make([][]string, l.Len()+1) - records[0] = fieldNames // Write header - - l.IterAllP(concurrency, func(v types.Value, index uint64) { - for _, f := range fieldNames { - records[index+1] = append( - records[index+1], - fmt.Sprintf("%s", v.(types.Struct).Get(f)), - ) + d.Exp.NoError(csvWriter.Write(fieldNames), "Failed to write header %v", fieldNames) + record := make([]string, len(fieldNames)) + l.IterAll(func(v types.Value, index uint64) { + for i, f := range fieldNames { + record[i] = fmt.Sprintf("%s", v.(types.Struct).Get(f)) } + d.Exp.NoError(csvWriter.Write(record), "Failed to write record %v", record) }) - csvWriter.WriteAll(records) - err := csvWriter.Error() - d.Exp.Equal(nil, err, "error flushing csv:", err) + csvWriter.Flush() + d.Exp.NoError(csvWriter.Error(), "error flushing csv") } func getFieldNamesFromStruct(structDesc types.StructDesc) (fieldNames []string) {