Fixing code formatting errors in the csv reader

This commit is contained in:
Kyle Derkacz
2016-03-11 18:32:13 -08:00
parent 506d0013ea
commit d7b55cfbc5
2 changed files with 72 additions and 91 deletions

View File

@@ -1,50 +1,38 @@
/*
Package macreader changes Classic Mac (CR) line endings to Linux (LF) line
endings in files. This is useful when handling CSV files generated by the Mac
version of Microsoft Excel as documented in this issue:
https://github.com/golang/go/issues/7802.
*/
package csv
import (
"encoding/csv"
"io"
"bufio"
"bufio"
"encoding/csv"
"io"
)
var (
rByte byte = 13 // the byte that corresponds to the '\r' rune.
nByte byte = 10 // the byte that corresponds to the '\n' rune.
rByte byte = 13 // the byte that corresponds to the '\r' rune.
nByte byte = 10 // the byte that corresponds to the '\n' rune.
)
type reader struct {
r *bufio.Reader
r *bufio.Reader
}
// New creates a new io.Reader that wraps r to convert Classic Mac (CR)
// line endings to Linux (LF) line endings.
func SafeCSVReader(r *bufio.Reader) *reader {
return &reader{r: r}
}
// Read replaces CR line endings in the source reader with LF line endings.
// Read replaces CR line endings in the source reader with LF line endings if the CR is not followed by a LF.
func (r reader) Read(p []byte) (n int, err error) {
n, err = r.r.Read(p)
bn, err := r.r.Peek(1)
for i, b := range p {
// if the current byte is a CR and the next byte is NOT a LF then replace the current byte with a LF
if j := i + 1; b == rByte && ((j < len(p) && p[j] != nByte) || (len(bn) > 0 && bn[0] != nByte)) {
p[i] = nByte
}
}
return
n, err = r.r.Read(p)
bn, err := r.r.Peek(1)
for i, b := range p {
// if the current byte is a CR and the next byte is NOT a LF then replace the current byte with a LF
if j := i + 1; b == rByte && ((j < len(p) && p[j] != nByte) || (len(bn) > 0 && bn[0] != nByte)) {
p[i] = nByte
}
}
return
}
// NewCSVReader returns a new csv.Reader that splits on comma and asserts that all rows contain the same number of fields as the first.
func NewCSVReader(res io.Reader, comma rune) *csv.Reader {
bufRes := bufio.NewReader(res)
r := csv.NewReader(SafeCSVReader(bufRes))
r.Comma = comma
r.FieldsPerRecord = -1 // Let first row determine the number of fields.
return r
}
bufRes := bufio.NewReader(res)
r := csv.NewReader(reader{r: bufRes})
r.Comma = comma
r.FieldsPerRecord = -1 // Don't enforce number of fields.
return r
}

View File

@@ -1,86 +1,79 @@
package csv
import (
"testing"
"bytes"
"encoding/csv"
// "fmt"
"strings"
"bufio"
"bytes"
"strings"
"testing"
"github.com/stretchr/testify/assert"
)
func TestCR(t *testing.T) {
testFile := bytes.NewBufferString("a,b,c\r1,2,3\r").Bytes()
testFile := []byte("a,b,c\r1,2,3\r")
delimiter, err := StringToRune(",")
r := csv.NewReader(SafeCSVReader(bufio.NewReader(bytes.NewReader(testFile))))
lines, err := r.ReadAll()
r := NewCSVReader(bytes.NewReader(testFile), delimiter)
lines, err := r.ReadAll()
if err != nil {
t.Errorf("An error occurred while reading the data: %v", err)
}
if len(lines) != 2 {
t.Errorf("Wrong number of lines. Expected 2, got %d", len(lines))
}
assert.NoError(t, err, "An error occurred while reading the data: %v", err)
if len(lines) != 2 {
t.Errorf("Wrong number of lines. Expected 2, got %d", len(lines))
}
}
func TestLF(t *testing.T) {
testFile := bytes.NewBufferString("a,b,c\n1,2,3\n").Bytes()
testFile := []byte("a,b,c\n1,2,3\n")
delimiter, err := StringToRune(",")
r := csv.NewReader(SafeCSVReader(bufio.NewReader(bytes.NewReader(testFile))))
lines, err := r.ReadAll()
r := NewCSVReader(bytes.NewReader(testFile), delimiter)
lines, err := r.ReadAll()
if err != nil {
t.Errorf("An error occurred while reading the data: %v", err)
}
if len(lines) != 2 {
t.Errorf("Wrong number of lines. Expected 2, got %d", len(lines))
}
assert.NoError(t, err, "An error occurred while reading the data: %v", err)
if len(lines) != 2 {
t.Errorf("Wrong number of lines. Expected 2, got %d", len(lines))
}
}
func TestCRLF(t *testing.T) {
testFile := bytes.NewBufferString("a,b,c\r\n1,2,3\r\n").Bytes()
testFile := []byte("a,b,c\r\n1,2,3\r\n")
delimiter, err := StringToRune(",")
r := csv.NewReader(SafeCSVReader(bufio.NewReader(bytes.NewReader(testFile))))
lines, err := r.ReadAll()
r := NewCSVReader(bytes.NewReader(testFile), delimiter)
lines, err := r.ReadAll()
if err != nil {
t.Errorf("An error occurred while reading the data: %v", err)
}
if len(lines) != 2 {
t.Errorf("Wrong number of lines. Expected 2, got %d", len(lines))
}
assert.NoError(t, err, "An error occurred while reading the data: %v", err)
if len(lines) != 2 {
t.Errorf("Wrong number of lines. Expected 2, got %d", len(lines))
}
}
func TestCRInQuote(t *testing.T) {
testFile := bytes.NewBufferString("a,\"foo,\rbar\",c\r1,\"2\r\n2\",3\r").Bytes()
testFile := []byte("a,\"foo,\rbar\",c\r1,\"2\r\n2\",3\r")
delimiter, err := StringToRune(",")
r := csv.NewReader(SafeCSVReader(bufio.NewReader(bytes.NewReader(testFile))))
lines, err := r.ReadAll()
r := NewCSVReader(bytes.NewReader(testFile), delimiter)
lines, err := r.ReadAll()
if err != nil {
t.Errorf("An error occurred while reading the data: %v", err)
}
if len(lines) != 2 {
t.Errorf("Wrong number of lines. Expected 2, got %d", len(lines))
}
if strings.Contains(lines[1][1], "\n\n") {
t.Error("The CRLF was converted to a LFLF")
}
assert.NoError(t, err, "An error occurred while reading the data: %v", err)
if len(lines) != 2 {
t.Errorf("Wrong number of lines. Expected 2, got %d", len(lines))
}
if strings.Contains(lines[1][1], "\n\n") {
t.Error("The CRLF was converted to a LFLF")
}
}
func TestCRLFEndOfBufferLength(t *testing.T) {
testFile := bytes.NewBuffer(make([]byte, 4096 * 2, 4096 * 2)).Bytes()
testFile[4095] = 13 // \r byte
testFile[4096] = 10 // \n byte
testFile := make([]byte, 4096*2, 4096*2)
testFile[4095] = 13 // \r byte
testFile[4096] = 10 // \n byte
delimiter, err := StringToRune(",")
r := csv.NewReader(SafeCSVReader(bufio.NewReader(bytes.NewReader(testFile))))
lines, err := r.ReadAll()
r := NewCSVReader(bytes.NewReader(testFile), delimiter)
lines, err := r.ReadAll()
if err != nil {
t.Errorf("An error occurred while reading the data: %v", err)
}
if len(lines) != 2 {
t.Errorf("Wrong number of lines. Expected 2, got %d", len(lines))
}
assert.NoError(t, err, "An error occurred while reading the data: %v", err)
if len(lines) != 2 {
t.Errorf("Wrong number of lines. Expected 2, got %d", len(lines))
}
}