Home | History | Annotate | Download | only in csv
      1 // Copyright 2011 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package csv
      6 
      7 import (
      8 	"reflect"
      9 	"strings"
     10 	"testing"
     11 )
     12 
     13 var readTests = []struct {
     14 	Name               string
     15 	Input              string
     16 	Output             [][]string
     17 	UseFieldsPerRecord bool // false (default) means FieldsPerRecord is -1
     18 
     19 	// These fields are copied into the Reader
     20 	Comma            rune
     21 	Comment          rune
     22 	FieldsPerRecord  int
     23 	LazyQuotes       bool
     24 	TrailingComma    bool
     25 	TrimLeadingSpace bool
     26 
     27 	Error  string
     28 	Line   int // Expected error line if != 0
     29 	Column int // Expected error column if line != 0
     30 }{
     31 	{
     32 		Name:   "Simple",
     33 		Input:  "a,b,c\n",
     34 		Output: [][]string{{"a", "b", "c"}},
     35 	},
     36 	{
     37 		Name:   "CRLF",
     38 		Input:  "a,b\r\nc,d\r\n",
     39 		Output: [][]string{{"a", "b"}, {"c", "d"}},
     40 	},
     41 	{
     42 		Name:   "BareCR",
     43 		Input:  "a,b\rc,d\r\n",
     44 		Output: [][]string{{"a", "b\rc", "d"}},
     45 	},
     46 	{
     47 		Name:               "RFC4180test",
     48 		UseFieldsPerRecord: true,
     49 		Input: `#field1,field2,field3
     50 "aaa","bb
     51 b","ccc"
     52 "a,a","b""bb","ccc"
     53 zzz,yyy,xxx
     54 `,
     55 		Output: [][]string{
     56 			{"#field1", "field2", "field3"},
     57 			{"aaa", "bb\nb", "ccc"},
     58 			{"a,a", `b"bb`, "ccc"},
     59 			{"zzz", "yyy", "xxx"},
     60 		},
     61 	},
     62 	{
     63 		Name:   "NoEOLTest",
     64 		Input:  "a,b,c",
     65 		Output: [][]string{{"a", "b", "c"}},
     66 	},
     67 	{
     68 		Name:   "Semicolon",
     69 		Comma:  ';',
     70 		Input:  "a;b;c\n",
     71 		Output: [][]string{{"a", "b", "c"}},
     72 	},
     73 	{
     74 		Name: "MultiLine",
     75 		Input: `"two
     76 line","one line","three
     77 line
     78 field"`,
     79 		Output: [][]string{{"two\nline", "one line", "three\nline\nfield"}},
     80 	},
     81 	{
     82 		Name:  "BlankLine",
     83 		Input: "a,b,c\n\nd,e,f\n\n",
     84 		Output: [][]string{
     85 			{"a", "b", "c"},
     86 			{"d", "e", "f"},
     87 		},
     88 	},
     89 	{
     90 		Name:               "BlankLineFieldCount",
     91 		Input:              "a,b,c\n\nd,e,f\n\n",
     92 		UseFieldsPerRecord: true,
     93 		Output: [][]string{
     94 			{"a", "b", "c"},
     95 			{"d", "e", "f"},
     96 		},
     97 	},
     98 	{
     99 		Name:             "TrimSpace",
    100 		Input:            " a,  b,   c\n",
    101 		TrimLeadingSpace: true,
    102 		Output:           [][]string{{"a", "b", "c"}},
    103 	},
    104 	{
    105 		Name:   "LeadingSpace",
    106 		Input:  " a,  b,   c\n",
    107 		Output: [][]string{{" a", "  b", "   c"}},
    108 	},
    109 	{
    110 		Name:    "Comment",
    111 		Comment: '#',
    112 		Input:   "#1,2,3\na,b,c\n#comment",
    113 		Output:  [][]string{{"a", "b", "c"}},
    114 	},
    115 	{
    116 		Name:   "NoComment",
    117 		Input:  "#1,2,3\na,b,c",
    118 		Output: [][]string{{"#1", "2", "3"}, {"a", "b", "c"}},
    119 	},
    120 	{
    121 		Name:       "LazyQuotes",
    122 		LazyQuotes: true,
    123 		Input:      `a "word","1"2",a","b`,
    124 		Output:     [][]string{{`a "word"`, `1"2`, `a"`, `b`}},
    125 	},
    126 	{
    127 		Name:       "BareQuotes",
    128 		LazyQuotes: true,
    129 		Input:      `a "word","1"2",a"`,
    130 		Output:     [][]string{{`a "word"`, `1"2`, `a"`}},
    131 	},
    132 	{
    133 		Name:       "BareDoubleQuotes",
    134 		LazyQuotes: true,
    135 		Input:      `a""b,c`,
    136 		Output:     [][]string{{`a""b`, `c`}},
    137 	},
    138 	{
    139 		Name:  "BadDoubleQuotes",
    140 		Input: `a""b,c`,
    141 		Error: `bare " in non-quoted-field`, Line: 1, Column: 1,
    142 	},
    143 	{
    144 		Name:             "TrimQuote",
    145 		Input:            ` "a"," b",c`,
    146 		TrimLeadingSpace: true,
    147 		Output:           [][]string{{"a", " b", "c"}},
    148 	},
    149 	{
    150 		Name:  "BadBareQuote",
    151 		Input: `a "word","b"`,
    152 		Error: `bare " in non-quoted-field`, Line: 1, Column: 2,
    153 	},
    154 	{
    155 		Name:  "BadTrailingQuote",
    156 		Input: `"a word",b"`,
    157 		Error: `bare " in non-quoted-field`, Line: 1, Column: 10,
    158 	},
    159 	{
    160 		Name:  "ExtraneousQuote",
    161 		Input: `"a "word","b"`,
    162 		Error: `extraneous " in field`, Line: 1, Column: 3,
    163 	},
    164 	{
    165 		Name:               "BadFieldCount",
    166 		UseFieldsPerRecord: true,
    167 		Input:              "a,b,c\nd,e",
    168 		Error:              "wrong number of fields", Line: 2,
    169 	},
    170 	{
    171 		Name:               "BadFieldCount1",
    172 		UseFieldsPerRecord: true,
    173 		FieldsPerRecord:    2,
    174 		Input:              `a,b,c`,
    175 		Error:              "wrong number of fields", Line: 1,
    176 	},
    177 	{
    178 		Name:   "FieldCount",
    179 		Input:  "a,b,c\nd,e",
    180 		Output: [][]string{{"a", "b", "c"}, {"d", "e"}},
    181 	},
    182 	{
    183 		Name:   "TrailingCommaEOF",
    184 		Input:  "a,b,c,",
    185 		Output: [][]string{{"a", "b", "c", ""}},
    186 	},
    187 	{
    188 		Name:   "TrailingCommaEOL",
    189 		Input:  "a,b,c,\n",
    190 		Output: [][]string{{"a", "b", "c", ""}},
    191 	},
    192 	{
    193 		Name:             "TrailingCommaSpaceEOF",
    194 		TrimLeadingSpace: true,
    195 		Input:            "a,b,c, ",
    196 		Output:           [][]string{{"a", "b", "c", ""}},
    197 	},
    198 	{
    199 		Name:             "TrailingCommaSpaceEOL",
    200 		TrimLeadingSpace: true,
    201 		Input:            "a,b,c, \n",
    202 		Output:           [][]string{{"a", "b", "c", ""}},
    203 	},
    204 	{
    205 		Name:             "TrailingCommaLine3",
    206 		TrimLeadingSpace: true,
    207 		Input:            "a,b,c\nd,e,f\ng,hi,",
    208 		Output:           [][]string{{"a", "b", "c"}, {"d", "e", "f"}, {"g", "hi", ""}},
    209 	},
    210 	{
    211 		Name:   "NotTrailingComma3",
    212 		Input:  "a,b,c, \n",
    213 		Output: [][]string{{"a", "b", "c", " "}},
    214 	},
    215 	{
    216 		Name:          "CommaFieldTest",
    217 		TrailingComma: true,
    218 		Input: `x,y,z,w
    219 x,y,z,
    220 x,y,,
    221 x,,,
    222 ,,,
    223 "x","y","z","w"
    224 "x","y","z",""
    225 "x","y","",""
    226 "x","","",""
    227 "","","",""
    228 `,
    229 		Output: [][]string{
    230 			{"x", "y", "z", "w"},
    231 			{"x", "y", "z", ""},
    232 			{"x", "y", "", ""},
    233 			{"x", "", "", ""},
    234 			{"", "", "", ""},
    235 			{"x", "y", "z", "w"},
    236 			{"x", "y", "z", ""},
    237 			{"x", "y", "", ""},
    238 			{"x", "", "", ""},
    239 			{"", "", "", ""},
    240 		},
    241 	},
    242 	{
    243 		Name:             "TrailingCommaIneffective1",
    244 		TrailingComma:    true,
    245 		TrimLeadingSpace: true,
    246 		Input:            "a,b,\nc,d,e",
    247 		Output: [][]string{
    248 			{"a", "b", ""},
    249 			{"c", "d", "e"},
    250 		},
    251 	},
    252 	{
    253 		Name:             "TrailingCommaIneffective2",
    254 		TrailingComma:    false,
    255 		TrimLeadingSpace: true,
    256 		Input:            "a,b,\nc,d,e",
    257 		Output: [][]string{
    258 			{"a", "b", ""},
    259 			{"c", "d", "e"},
    260 		},
    261 	},
    262 }
    263 
    264 func TestRead(t *testing.T) {
    265 	for _, tt := range readTests {
    266 		r := NewReader(strings.NewReader(tt.Input))
    267 		r.Comment = tt.Comment
    268 		if tt.UseFieldsPerRecord {
    269 			r.FieldsPerRecord = tt.FieldsPerRecord
    270 		} else {
    271 			r.FieldsPerRecord = -1
    272 		}
    273 		r.LazyQuotes = tt.LazyQuotes
    274 		r.TrailingComma = tt.TrailingComma
    275 		r.TrimLeadingSpace = tt.TrimLeadingSpace
    276 		if tt.Comma != 0 {
    277 			r.Comma = tt.Comma
    278 		}
    279 		out, err := r.ReadAll()
    280 		perr, _ := err.(*ParseError)
    281 		if tt.Error != "" {
    282 			if err == nil || !strings.Contains(err.Error(), tt.Error) {
    283 				t.Errorf("%s: error %v, want error %q", tt.Name, err, tt.Error)
    284 			} else if tt.Line != 0 && (tt.Line != perr.Line || tt.Column != perr.Column) {
    285 				t.Errorf("%s: error at %d:%d expected %d:%d", tt.Name, perr.Line, perr.Column, tt.Line, tt.Column)
    286 			}
    287 		} else if err != nil {
    288 			t.Errorf("%s: unexpected error %v", tt.Name, err)
    289 		} else if !reflect.DeepEqual(out, tt.Output) {
    290 			t.Errorf("%s: out=%q want %q", tt.Name, out, tt.Output)
    291 		}
    292 	}
    293 }
    294 
    295 func BenchmarkRead(b *testing.B) {
    296 	data := `x,y,z,w
    297 x,y,z,
    298 x,y,,
    299 x,,,
    300 ,,,
    301 "x","y","z","w"
    302 "x","y","z",""
    303 "x","y","",""
    304 "x","","",""
    305 "","","",""
    306 `
    307 
    308 	for i := 0; i < b.N; i++ {
    309 		_, err := NewReader(strings.NewReader(data)).ReadAll()
    310 
    311 		if err != nil {
    312 			b.Fatalf("could not read data: %s", err)
    313 		}
    314 	}
    315 }
    316