Home | History | Annotate | Download | only in bufio
      1 // Copyright 2013 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package bufio_test
      6 
      7 import (
      8 	. "bufio"
      9 	"bytes"
     10 	"errors"
     11 	"io"
     12 	"strings"
     13 	"testing"
     14 	"unicode"
     15 	"unicode/utf8"
     16 )
     17 
     18 const smallMaxTokenSize = 256 // Much smaller for more efficient testing.
     19 
     20 // Test white space table matches the Unicode definition.
     21 func TestSpace(t *testing.T) {
     22 	for r := rune(0); r <= utf8.MaxRune; r++ {
     23 		if IsSpace(r) != unicode.IsSpace(r) {
     24 			t.Fatalf("white space property disagrees: %#U should be %t", r, unicode.IsSpace(r))
     25 		}
     26 	}
     27 }
     28 
     29 var scanTests = []string{
     30 	"",
     31 	"a",
     32 	"",
     33 	"",
     34 	"\x81",   // UTF-8 error
     35 	"\uFFFD", // correctly encoded RuneError
     36 	"abcdefgh",
     37 	"abc def\n\t\tgh    ",
     38 	"abc\x81\uFFFD\x82abc",
     39 }
     40 
     41 func TestScanByte(t *testing.T) {
     42 	for n, test := range scanTests {
     43 		buf := strings.NewReader(test)
     44 		s := NewScanner(buf)
     45 		s.Split(ScanBytes)
     46 		var i int
     47 		for i = 0; s.Scan(); i++ {
     48 			if b := s.Bytes(); len(b) != 1 || b[0] != test[i] {
     49 				t.Errorf("#%d: %d: expected %q got %q", n, i, test, b)
     50 			}
     51 		}
     52 		if i != len(test) {
     53 			t.Errorf("#%d: termination expected at %d; got %d", n, len(test), i)
     54 		}
     55 		err := s.Err()
     56 		if err != nil {
     57 			t.Errorf("#%d: %v", n, err)
     58 		}
     59 	}
     60 }
     61 
     62 // Test that the rune splitter returns same sequence of runes (not bytes) as for range string.
     63 func TestScanRune(t *testing.T) {
     64 	for n, test := range scanTests {
     65 		buf := strings.NewReader(test)
     66 		s := NewScanner(buf)
     67 		s.Split(ScanRunes)
     68 		var i, runeCount int
     69 		var expect rune
     70 		// Use a string range loop to validate the sequence of runes.
     71 		for i, expect = range string(test) {
     72 			if !s.Scan() {
     73 				break
     74 			}
     75 			runeCount++
     76 			got, _ := utf8.DecodeRune(s.Bytes())
     77 			if got != expect {
     78 				t.Errorf("#%d: %d: expected %q got %q", n, i, expect, got)
     79 			}
     80 		}
     81 		if s.Scan() {
     82 			t.Errorf("#%d: scan ran too long, got %q", n, s.Text())
     83 		}
     84 		testRuneCount := utf8.RuneCountInString(test)
     85 		if runeCount != testRuneCount {
     86 			t.Errorf("#%d: termination expected at %d; got %d", n, testRuneCount, runeCount)
     87 		}
     88 		err := s.Err()
     89 		if err != nil {
     90 			t.Errorf("#%d: %v", n, err)
     91 		}
     92 	}
     93 }
     94 
     95 var wordScanTests = []string{
     96 	"",
     97 	" ",
     98 	"\n",
     99 	"a",
    100 	" a ",
    101 	"abc def",
    102 	" abc def ",
    103 	" abc\tdef\nghi\rjkl\fmno\vpqr\u0085stu\u00a0\n",
    104 }
    105 
    106 // Test that the word splitter returns the same data as strings.Fields.
    107 func TestScanWords(t *testing.T) {
    108 	for n, test := range wordScanTests {
    109 		buf := strings.NewReader(test)
    110 		s := NewScanner(buf)
    111 		s.Split(ScanWords)
    112 		words := strings.Fields(test)
    113 		var wordCount int
    114 		for wordCount = 0; wordCount < len(words); wordCount++ {
    115 			if !s.Scan() {
    116 				break
    117 			}
    118 			got := s.Text()
    119 			if got != words[wordCount] {
    120 				t.Errorf("#%d: %d: expected %q got %q", n, wordCount, words[wordCount], got)
    121 			}
    122 		}
    123 		if s.Scan() {
    124 			t.Errorf("#%d: scan ran too long, got %q", n, s.Text())
    125 		}
    126 		if wordCount != len(words) {
    127 			t.Errorf("#%d: termination expected at %d; got %d", n, len(words), wordCount)
    128 		}
    129 		err := s.Err()
    130 		if err != nil {
    131 			t.Errorf("#%d: %v", n, err)
    132 		}
    133 	}
    134 }
    135 
    136 // slowReader is a reader that returns only a few bytes at a time, to test the incremental
    137 // reads in Scanner.Scan.
    138 type slowReader struct {
    139 	max int
    140 	buf io.Reader
    141 }
    142 
    143 func (sr *slowReader) Read(p []byte) (n int, err error) {
    144 	if len(p) > sr.max {
    145 		p = p[0:sr.max]
    146 	}
    147 	return sr.buf.Read(p)
    148 }
    149 
    150 // genLine writes to buf a predictable but non-trivial line of text of length
    151 // n, including the terminal newline and an occasional carriage return.
    152 // If addNewline is false, the \r and \n are not emitted.
    153 func genLine(buf *bytes.Buffer, lineNum, n int, addNewline bool) {
    154 	buf.Reset()
    155 	doCR := lineNum%5 == 0
    156 	if doCR {
    157 		n--
    158 	}
    159 	for i := 0; i < n-1; i++ { // Stop early for \n.
    160 		c := 'a' + byte(lineNum+i)
    161 		if c == '\n' || c == '\r' { // Don't confuse us.
    162 			c = 'N'
    163 		}
    164 		buf.WriteByte(c)
    165 	}
    166 	if addNewline {
    167 		if doCR {
    168 			buf.WriteByte('\r')
    169 		}
    170 		buf.WriteByte('\n')
    171 	}
    172 	return
    173 }
    174 
    175 // Test the line splitter, including some carriage returns but no long lines.
    176 func TestScanLongLines(t *testing.T) {
    177 	// Build a buffer of lots of line lengths up to but not exceeding smallMaxTokenSize.
    178 	tmp := new(bytes.Buffer)
    179 	buf := new(bytes.Buffer)
    180 	lineNum := 0
    181 	j := 0
    182 	for i := 0; i < 2*smallMaxTokenSize; i++ {
    183 		genLine(tmp, lineNum, j, true)
    184 		if j < smallMaxTokenSize {
    185 			j++
    186 		} else {
    187 			j--
    188 		}
    189 		buf.Write(tmp.Bytes())
    190 		lineNum++
    191 	}
    192 	s := NewScanner(&slowReader{1, buf})
    193 	s.Split(ScanLines)
    194 	s.MaxTokenSize(smallMaxTokenSize)
    195 	j = 0
    196 	for lineNum := 0; s.Scan(); lineNum++ {
    197 		genLine(tmp, lineNum, j, false)
    198 		if j < smallMaxTokenSize {
    199 			j++
    200 		} else {
    201 			j--
    202 		}
    203 		line := tmp.String() // We use the string-valued token here, for variety.
    204 		if s.Text() != line {
    205 			t.Errorf("%d: bad line: %d %d\n%.100q\n%.100q\n", lineNum, len(s.Bytes()), len(line), s.Text(), line)
    206 		}
    207 	}
    208 	err := s.Err()
    209 	if err != nil {
    210 		t.Fatal(err)
    211 	}
    212 }
    213 
    214 // Test that the line splitter errors out on a long line.
    215 func TestScanLineTooLong(t *testing.T) {
    216 	const smallMaxTokenSize = 256 // Much smaller for more efficient testing.
    217 	// Build a buffer of lots of line lengths up to but not exceeding smallMaxTokenSize.
    218 	tmp := new(bytes.Buffer)
    219 	buf := new(bytes.Buffer)
    220 	lineNum := 0
    221 	j := 0
    222 	for i := 0; i < 2*smallMaxTokenSize; i++ {
    223 		genLine(tmp, lineNum, j, true)
    224 		j++
    225 		buf.Write(tmp.Bytes())
    226 		lineNum++
    227 	}
    228 	s := NewScanner(&slowReader{3, buf})
    229 	s.Split(ScanLines)
    230 	s.MaxTokenSize(smallMaxTokenSize)
    231 	j = 0
    232 	for lineNum := 0; s.Scan(); lineNum++ {
    233 		genLine(tmp, lineNum, j, false)
    234 		if j < smallMaxTokenSize {
    235 			j++
    236 		} else {
    237 			j--
    238 		}
    239 		line := tmp.Bytes()
    240 		if !bytes.Equal(s.Bytes(), line) {
    241 			t.Errorf("%d: bad line: %d %d\n%.100q\n%.100q\n", lineNum, len(s.Bytes()), len(line), s.Bytes(), line)
    242 		}
    243 	}
    244 	err := s.Err()
    245 	if err != ErrTooLong {
    246 		t.Fatalf("expected ErrTooLong; got %s", err)
    247 	}
    248 }
    249 
    250 // Test that the line splitter handles a final line without a newline.
    251 func testNoNewline(text string, lines []string, t *testing.T) {
    252 	buf := strings.NewReader(text)
    253 	s := NewScanner(&slowReader{7, buf})
    254 	s.Split(ScanLines)
    255 	for lineNum := 0; s.Scan(); lineNum++ {
    256 		line := lines[lineNum]
    257 		if s.Text() != line {
    258 			t.Errorf("%d: bad line: %d %d\n%.100q\n%.100q\n", lineNum, len(s.Bytes()), len(line), s.Bytes(), line)
    259 		}
    260 	}
    261 	err := s.Err()
    262 	if err != nil {
    263 		t.Fatal(err)
    264 	}
    265 }
    266 
    267 var noNewlineLines = []string{
    268 	"abcdefghijklmn\nopqrstuvwxyz",
    269 }
    270 
    271 // Test that the line splitter handles a final line without a newline.
    272 func TestScanLineNoNewline(t *testing.T) {
    273 	const text = "abcdefghijklmn\nopqrstuvwxyz"
    274 	lines := []string{
    275 		"abcdefghijklmn",
    276 		"opqrstuvwxyz",
    277 	}
    278 	testNoNewline(text, lines, t)
    279 }
    280 
    281 // Test that the line splitter handles a final line with a carriage return but no newline.
    282 func TestScanLineReturnButNoNewline(t *testing.T) {
    283 	const text = "abcdefghijklmn\nopqrstuvwxyz\r"
    284 	lines := []string{
    285 		"abcdefghijklmn",
    286 		"opqrstuvwxyz",
    287 	}
    288 	testNoNewline(text, lines, t)
    289 }
    290 
    291 // Test that the line splitter handles a final empty line.
    292 func TestScanLineEmptyFinalLine(t *testing.T) {
    293 	const text = "abcdefghijklmn\nopqrstuvwxyz\n\n"
    294 	lines := []string{
    295 		"abcdefghijklmn",
    296 		"opqrstuvwxyz",
    297 		"",
    298 	}
    299 	testNoNewline(text, lines, t)
    300 }
    301 
    302 // Test that the line splitter handles a final empty line with a carriage return but no newline.
    303 func TestScanLineEmptyFinalLineWithCR(t *testing.T) {
    304 	const text = "abcdefghijklmn\nopqrstuvwxyz\n\r"
    305 	lines := []string{
    306 		"abcdefghijklmn",
    307 		"opqrstuvwxyz",
    308 		"",
    309 	}
    310 	testNoNewline(text, lines, t)
    311 }
    312 
    313 var testError = errors.New("testError")
    314 
    315 // Test the correct error is returned when the split function errors out.
    316 func TestSplitError(t *testing.T) {
    317 	// Create a split function that delivers a little data, then a predictable error.
    318 	numSplits := 0
    319 	const okCount = 7
    320 	errorSplit := func(data []byte, atEOF bool) (advance int, token []byte, err error) {
    321 		if atEOF {
    322 			panic("didn't get enough data")
    323 		}
    324 		if numSplits >= okCount {
    325 			return 0, nil, testError
    326 		}
    327 		numSplits++
    328 		return 1, data[0:1], nil
    329 	}
    330 	// Read the data.
    331 	const text = "abcdefghijklmnopqrstuvwxyz"
    332 	buf := strings.NewReader(text)
    333 	s := NewScanner(&slowReader{1, buf})
    334 	s.Split(errorSplit)
    335 	var i int
    336 	for i = 0; s.Scan(); i++ {
    337 		if len(s.Bytes()) != 1 || text[i] != s.Bytes()[0] {
    338 			t.Errorf("#%d: expected %q got %q", i, text[i], s.Bytes()[0])
    339 		}
    340 	}
    341 	// Check correct termination location and error.
    342 	if i != okCount {
    343 		t.Errorf("unexpected termination; expected %d tokens got %d", okCount, i)
    344 	}
    345 	err := s.Err()
    346 	if err != testError {
    347 		t.Fatalf("expected %q got %v", testError, err)
    348 	}
    349 }
    350 
    351 // Test that an EOF is overridden by a user-generated scan error.
    352 func TestErrAtEOF(t *testing.T) {
    353 	s := NewScanner(strings.NewReader("1 2 33"))
    354 	// This spitter will fail on last entry, after s.err==EOF.
    355 	split := func(data []byte, atEOF bool) (advance int, token []byte, err error) {
    356 		advance, token, err = ScanWords(data, atEOF)
    357 		if len(token) > 1 {
    358 			if s.ErrOrEOF() != io.EOF {
    359 				t.Fatal("not testing EOF")
    360 			}
    361 			err = testError
    362 		}
    363 		return
    364 	}
    365 	s.Split(split)
    366 	for s.Scan() {
    367 	}
    368 	if s.Err() != testError {
    369 		t.Fatal("wrong error:", s.Err())
    370 	}
    371 }
    372 
    373 // Test for issue 5268.
    374 type alwaysError struct{}
    375 
    376 func (alwaysError) Read(p []byte) (int, error) {
    377 	return 0, io.ErrUnexpectedEOF
    378 }
    379 
    380 func TestNonEOFWithEmptyRead(t *testing.T) {
    381 	scanner := NewScanner(alwaysError{})
    382 	for scanner.Scan() {
    383 		t.Fatal("read should fail")
    384 	}
    385 	err := scanner.Err()
    386 	if err != io.ErrUnexpectedEOF {
    387 		t.Errorf("unexpected error: %v", err)
    388 	}
    389 }
    390 
    391 // Test that Scan finishes if we have endless empty reads.
    392 type endlessZeros struct{}
    393 
    394 func (endlessZeros) Read(p []byte) (int, error) {
    395 	return 0, nil
    396 }
    397 
    398 func TestBadReader(t *testing.T) {
    399 	scanner := NewScanner(endlessZeros{})
    400 	for scanner.Scan() {
    401 		t.Fatal("read should fail")
    402 	}
    403 	err := scanner.Err()
    404 	if err != io.ErrNoProgress {
    405 		t.Errorf("unexpected error: %v", err)
    406 	}
    407 }
    408 
    409 func TestScanWordsExcessiveWhiteSpace(t *testing.T) {
    410 	const word = "ipsum"
    411 	s := strings.Repeat(" ", 4*smallMaxTokenSize) + word
    412 	scanner := NewScanner(strings.NewReader(s))
    413 	scanner.MaxTokenSize(smallMaxTokenSize)
    414 	scanner.Split(ScanWords)
    415 	if !scanner.Scan() {
    416 		t.Fatalf("scan failed: %v", scanner.Err())
    417 	}
    418 	if token := scanner.Text(); token != word {
    419 		t.Fatalf("unexpected token: %v", token)
    420 	}
    421 }
    422 
    423 // Test that empty tokens, including at end of line or end of file, are found by the scanner.
    424 // Issue 8672: Could miss final empty token.
    425 
    426 func commaSplit(data []byte, atEOF bool) (advance int, token []byte, err error) {
    427 	for i := 0; i < len(data); i++ {
    428 		if data[i] == ',' {
    429 			return i + 1, data[:i], nil
    430 		}
    431 	}
    432 	if !atEOF {
    433 		return 0, nil, nil
    434 	}
    435 	return 0, data, nil
    436 }
    437 
    438 func TestEmptyTokens(t *testing.T) {
    439 	s := NewScanner(strings.NewReader("1,2,3,"))
    440 	values := []string{"1", "2", "3", ""}
    441 	s.Split(commaSplit)
    442 	var i int
    443 	for i = 0; i < len(values); i++ {
    444 		if !s.Scan() {
    445 			break
    446 		}
    447 		if s.Text() != values[i] {
    448 			t.Errorf("%d: expected %q got %q", i, values[i], s.Text())
    449 		}
    450 	}
    451 	if i != len(values) {
    452 		t.Errorf("got %d fields, expected %d", i, len(values))
    453 	}
    454 	if err := s.Err(); err != nil {
    455 		t.Fatal(err)
    456 	}
    457 }
    458 
    459 func loopAtEOFSplit(data []byte, atEOF bool) (advance int, token []byte, err error) {
    460 	if len(data) > 0 {
    461 		return 1, data[:1], nil
    462 	}
    463 	return 0, data, nil
    464 }
    465 
    466 func TestDontLoopForever(t *testing.T) {
    467 	s := NewScanner(strings.NewReader("abc"))
    468 	s.Split(loopAtEOFSplit)
    469 	// Expect a panic
    470 	defer func() {
    471 		err := recover()
    472 		if err == nil {
    473 			t.Fatal("should have panicked")
    474 		}
    475 		if msg, ok := err.(string); !ok || !strings.Contains(msg, "empty tokens") {
    476 			panic(err)
    477 		}
    478 	}()
    479 	for count := 0; s.Scan(); count++ {
    480 		if count > 1000 {
    481 			t.Fatal("looping")
    482 		}
    483 	}
    484 	if s.Err() != nil {
    485 		t.Fatal("after scan:", s.Err())
    486 	}
    487 }
    488 
    489 func TestBlankLines(t *testing.T) {
    490 	s := NewScanner(strings.NewReader(strings.Repeat("\n", 1000)))
    491 	for count := 0; s.Scan(); count++ {
    492 		if count > 2000 {
    493 			t.Fatal("looping")
    494 		}
    495 	}
    496 	if s.Err() != nil {
    497 		t.Fatal("after scan:", s.Err())
    498 	}
    499 }
    500 
    501 type countdown int
    502 
    503 func (c *countdown) split(data []byte, atEOF bool) (advance int, token []byte, err error) {
    504 	if *c > 0 {
    505 		*c--
    506 		return 1, data[:1], nil
    507 	}
    508 	return 0, nil, nil
    509 }
    510 
    511 // Check that the looping-at-EOF check doesn't trigger for merely empty tokens.
    512 func TestEmptyLinesOK(t *testing.T) {
    513 	c := countdown(10000)
    514 	s := NewScanner(strings.NewReader(strings.Repeat("\n", 10000)))
    515 	s.Split(c.split)
    516 	for s.Scan() {
    517 	}
    518 	if s.Err() != nil {
    519 		t.Fatal("after scan:", s.Err())
    520 	}
    521 	if c != 0 {
    522 		t.Fatalf("stopped with %d left to process", c)
    523 	}
    524 }
    525