Home | History | Annotate | Download | only in bufio
      1 // Copyright 2013 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package bufio_test
      6 
      7 import (
      8 	. "bufio"
      9 	"bytes"
     10 	"errors"
     11 	"io"
     12 	"strings"
     13 	"testing"
     14 	"unicode"
     15 	"unicode/utf8"
     16 )
     17 
     18 const smallMaxTokenSize = 256 // Much smaller for more efficient testing.
     19 
     20 // Test white space table matches the Unicode definition.
     21 func TestSpace(t *testing.T) {
     22 	for r := rune(0); r <= utf8.MaxRune; r++ {
     23 		if IsSpace(r) != unicode.IsSpace(r) {
     24 			t.Fatalf("white space property disagrees: %#U should be %t", r, unicode.IsSpace(r))
     25 		}
     26 	}
     27 }
     28 
     29 var scanTests = []string{
     30 	"",
     31 	"a",
     32 	"",
     33 	"",
     34 	"\x81",   // UTF-8 error
     35 	"\uFFFD", // correctly encoded RuneError
     36 	"abcdefgh",
     37 	"abc def\n\t\tgh    ",
     38 	"abc\x81\uFFFD\x82abc",
     39 }
     40 
     41 func TestScanByte(t *testing.T) {
     42 	for n, test := range scanTests {
     43 		buf := strings.NewReader(test)
     44 		s := NewScanner(buf)
     45 		s.Split(ScanBytes)
     46 		var i int
     47 		for i = 0; s.Scan(); i++ {
     48 			if b := s.Bytes(); len(b) != 1 || b[0] != test[i] {
     49 				t.Errorf("#%d: %d: expected %q got %q", n, i, test, b)
     50 			}
     51 		}
     52 		if i != len(test) {
     53 			t.Errorf("#%d: termination expected at %d; got %d", n, len(test), i)
     54 		}
     55 		err := s.Err()
     56 		if err != nil {
     57 			t.Errorf("#%d: %v", n, err)
     58 		}
     59 	}
     60 }
     61 
     62 // Test that the rune splitter returns same sequence of runes (not bytes) as for range string.
     63 func TestScanRune(t *testing.T) {
     64 	for n, test := range scanTests {
     65 		buf := strings.NewReader(test)
     66 		s := NewScanner(buf)
     67 		s.Split(ScanRunes)
     68 		var i, runeCount int
     69 		var expect rune
     70 		// Use a string range loop to validate the sequence of runes.
     71 		for i, expect = range string(test) {
     72 			if !s.Scan() {
     73 				break
     74 			}
     75 			runeCount++
     76 			got, _ := utf8.DecodeRune(s.Bytes())
     77 			if got != expect {
     78 				t.Errorf("#%d: %d: expected %q got %q", n, i, expect, got)
     79 			}
     80 		}
     81 		if s.Scan() {
     82 			t.Errorf("#%d: scan ran too long, got %q", n, s.Text())
     83 		}
     84 		testRuneCount := utf8.RuneCountInString(test)
     85 		if runeCount != testRuneCount {
     86 			t.Errorf("#%d: termination expected at %d; got %d", n, testRuneCount, runeCount)
     87 		}
     88 		err := s.Err()
     89 		if err != nil {
     90 			t.Errorf("#%d: %v", n, err)
     91 		}
     92 	}
     93 }
     94 
     95 var wordScanTests = []string{
     96 	"",
     97 	" ",
     98 	"\n",
     99 	"a",
    100 	" a ",
    101 	"abc def",
    102 	" abc def ",
    103 	" abc\tdef\nghi\rjkl\fmno\vpqr\u0085stu\u00a0\n",
    104 }
    105 
    106 // Test that the word splitter returns the same data as strings.Fields.
    107 func TestScanWords(t *testing.T) {
    108 	for n, test := range wordScanTests {
    109 		buf := strings.NewReader(test)
    110 		s := NewScanner(buf)
    111 		s.Split(ScanWords)
    112 		words := strings.Fields(test)
    113 		var wordCount int
    114 		for wordCount = 0; wordCount < len(words); wordCount++ {
    115 			if !s.Scan() {
    116 				break
    117 			}
    118 			got := s.Text()
    119 			if got != words[wordCount] {
    120 				t.Errorf("#%d: %d: expected %q got %q", n, wordCount, words[wordCount], got)
    121 			}
    122 		}
    123 		if s.Scan() {
    124 			t.Errorf("#%d: scan ran too long, got %q", n, s.Text())
    125 		}
    126 		if wordCount != len(words) {
    127 			t.Errorf("#%d: termination expected at %d; got %d", n, len(words), wordCount)
    128 		}
    129 		err := s.Err()
    130 		if err != nil {
    131 			t.Errorf("#%d: %v", n, err)
    132 		}
    133 	}
    134 }
    135 
    136 // slowReader is a reader that returns only a few bytes at a time, to test the incremental
    137 // reads in Scanner.Scan.
    138 type slowReader struct {
    139 	max int
    140 	buf io.Reader
    141 }
    142 
    143 func (sr *slowReader) Read(p []byte) (n int, err error) {
    144 	if len(p) > sr.max {
    145 		p = p[0:sr.max]
    146 	}
    147 	return sr.buf.Read(p)
    148 }
    149 
    150 // genLine writes to buf a predictable but non-trivial line of text of length
    151 // n, including the terminal newline and an occasional carriage return.
    152 // If addNewline is false, the \r and \n are not emitted.
    153 func genLine(buf *bytes.Buffer, lineNum, n int, addNewline bool) {
    154 	buf.Reset()
    155 	doCR := lineNum%5 == 0
    156 	if doCR {
    157 		n--
    158 	}
    159 	for i := 0; i < n-1; i++ { // Stop early for \n.
    160 		c := 'a' + byte(lineNum+i)
    161 		if c == '\n' || c == '\r' { // Don't confuse us.
    162 			c = 'N'
    163 		}
    164 		buf.WriteByte(c)
    165 	}
    166 	if addNewline {
    167 		if doCR {
    168 			buf.WriteByte('\r')
    169 		}
    170 		buf.WriteByte('\n')
    171 	}
    172 }
    173 
    174 // Test the line splitter, including some carriage returns but no long lines.
    175 func TestScanLongLines(t *testing.T) {
    176 	// Build a buffer of lots of line lengths up to but not exceeding smallMaxTokenSize.
    177 	tmp := new(bytes.Buffer)
    178 	buf := new(bytes.Buffer)
    179 	lineNum := 0
    180 	j := 0
    181 	for i := 0; i < 2*smallMaxTokenSize; i++ {
    182 		genLine(tmp, lineNum, j, true)
    183 		if j < smallMaxTokenSize {
    184 			j++
    185 		} else {
    186 			j--
    187 		}
    188 		buf.Write(tmp.Bytes())
    189 		lineNum++
    190 	}
    191 	s := NewScanner(&slowReader{1, buf})
    192 	s.Split(ScanLines)
    193 	s.MaxTokenSize(smallMaxTokenSize)
    194 	j = 0
    195 	for lineNum := 0; s.Scan(); lineNum++ {
    196 		genLine(tmp, lineNum, j, false)
    197 		if j < smallMaxTokenSize {
    198 			j++
    199 		} else {
    200 			j--
    201 		}
    202 		line := tmp.String() // We use the string-valued token here, for variety.
    203 		if s.Text() != line {
    204 			t.Errorf("%d: bad line: %d %d\n%.100q\n%.100q\n", lineNum, len(s.Bytes()), len(line), s.Text(), line)
    205 		}
    206 	}
    207 	err := s.Err()
    208 	if err != nil {
    209 		t.Fatal(err)
    210 	}
    211 }
    212 
    213 // Test that the line splitter errors out on a long line.
    214 func TestScanLineTooLong(t *testing.T) {
    215 	const smallMaxTokenSize = 256 // Much smaller for more efficient testing.
    216 	// Build a buffer of lots of line lengths up to but not exceeding smallMaxTokenSize.
    217 	tmp := new(bytes.Buffer)
    218 	buf := new(bytes.Buffer)
    219 	lineNum := 0
    220 	j := 0
    221 	for i := 0; i < 2*smallMaxTokenSize; i++ {
    222 		genLine(tmp, lineNum, j, true)
    223 		j++
    224 		buf.Write(tmp.Bytes())
    225 		lineNum++
    226 	}
    227 	s := NewScanner(&slowReader{3, buf})
    228 	s.Split(ScanLines)
    229 	s.MaxTokenSize(smallMaxTokenSize)
    230 	j = 0
    231 	for lineNum := 0; s.Scan(); lineNum++ {
    232 		genLine(tmp, lineNum, j, false)
    233 		if j < smallMaxTokenSize {
    234 			j++
    235 		} else {
    236 			j--
    237 		}
    238 		line := tmp.Bytes()
    239 		if !bytes.Equal(s.Bytes(), line) {
    240 			t.Errorf("%d: bad line: %d %d\n%.100q\n%.100q\n", lineNum, len(s.Bytes()), len(line), s.Bytes(), line)
    241 		}
    242 	}
    243 	err := s.Err()
    244 	if err != ErrTooLong {
    245 		t.Fatalf("expected ErrTooLong; got %s", err)
    246 	}
    247 }
    248 
    249 // Test that the line splitter handles a final line without a newline.
    250 func testNoNewline(text string, lines []string, t *testing.T) {
    251 	buf := strings.NewReader(text)
    252 	s := NewScanner(&slowReader{7, buf})
    253 	s.Split(ScanLines)
    254 	for lineNum := 0; s.Scan(); lineNum++ {
    255 		line := lines[lineNum]
    256 		if s.Text() != line {
    257 			t.Errorf("%d: bad line: %d %d\n%.100q\n%.100q\n", lineNum, len(s.Bytes()), len(line), s.Bytes(), line)
    258 		}
    259 	}
    260 	err := s.Err()
    261 	if err != nil {
    262 		t.Fatal(err)
    263 	}
    264 }
    265 
    266 // Test that the line splitter handles a final line without a newline.
    267 func TestScanLineNoNewline(t *testing.T) {
    268 	const text = "abcdefghijklmn\nopqrstuvwxyz"
    269 	lines := []string{
    270 		"abcdefghijklmn",
    271 		"opqrstuvwxyz",
    272 	}
    273 	testNoNewline(text, lines, t)
    274 }
    275 
    276 // Test that the line splitter handles a final line with a carriage return but no newline.
    277 func TestScanLineReturnButNoNewline(t *testing.T) {
    278 	const text = "abcdefghijklmn\nopqrstuvwxyz\r"
    279 	lines := []string{
    280 		"abcdefghijklmn",
    281 		"opqrstuvwxyz",
    282 	}
    283 	testNoNewline(text, lines, t)
    284 }
    285 
    286 // Test that the line splitter handles a final empty line.
    287 func TestScanLineEmptyFinalLine(t *testing.T) {
    288 	const text = "abcdefghijklmn\nopqrstuvwxyz\n\n"
    289 	lines := []string{
    290 		"abcdefghijklmn",
    291 		"opqrstuvwxyz",
    292 		"",
    293 	}
    294 	testNoNewline(text, lines, t)
    295 }
    296 
    297 // Test that the line splitter handles a final empty line with a carriage return but no newline.
    298 func TestScanLineEmptyFinalLineWithCR(t *testing.T) {
    299 	const text = "abcdefghijklmn\nopqrstuvwxyz\n\r"
    300 	lines := []string{
    301 		"abcdefghijklmn",
    302 		"opqrstuvwxyz",
    303 		"",
    304 	}
    305 	testNoNewline(text, lines, t)
    306 }
    307 
    308 var testError = errors.New("testError")
    309 
    310 // Test the correct error is returned when the split function errors out.
    311 func TestSplitError(t *testing.T) {
    312 	// Create a split function that delivers a little data, then a predictable error.
    313 	numSplits := 0
    314 	const okCount = 7
    315 	errorSplit := func(data []byte, atEOF bool) (advance int, token []byte, err error) {
    316 		if atEOF {
    317 			panic("didn't get enough data")
    318 		}
    319 		if numSplits >= okCount {
    320 			return 0, nil, testError
    321 		}
    322 		numSplits++
    323 		return 1, data[0:1], nil
    324 	}
    325 	// Read the data.
    326 	const text = "abcdefghijklmnopqrstuvwxyz"
    327 	buf := strings.NewReader(text)
    328 	s := NewScanner(&slowReader{1, buf})
    329 	s.Split(errorSplit)
    330 	var i int
    331 	for i = 0; s.Scan(); i++ {
    332 		if len(s.Bytes()) != 1 || text[i] != s.Bytes()[0] {
    333 			t.Errorf("#%d: expected %q got %q", i, text[i], s.Bytes()[0])
    334 		}
    335 	}
    336 	// Check correct termination location and error.
    337 	if i != okCount {
    338 		t.Errorf("unexpected termination; expected %d tokens got %d", okCount, i)
    339 	}
    340 	err := s.Err()
    341 	if err != testError {
    342 		t.Fatalf("expected %q got %v", testError, err)
    343 	}
    344 }
    345 
    346 // Test that an EOF is overridden by a user-generated scan error.
    347 func TestErrAtEOF(t *testing.T) {
    348 	s := NewScanner(strings.NewReader("1 2 33"))
    349 	// This splitter will fail on last entry, after s.err==EOF.
    350 	split := func(data []byte, atEOF bool) (advance int, token []byte, err error) {
    351 		advance, token, err = ScanWords(data, atEOF)
    352 		if len(token) > 1 {
    353 			if s.ErrOrEOF() != io.EOF {
    354 				t.Fatal("not testing EOF")
    355 			}
    356 			err = testError
    357 		}
    358 		return
    359 	}
    360 	s.Split(split)
    361 	for s.Scan() {
    362 	}
    363 	if s.Err() != testError {
    364 		t.Fatal("wrong error:", s.Err())
    365 	}
    366 }
    367 
    368 // Test for issue 5268.
    369 type alwaysError struct{}
    370 
    371 func (alwaysError) Read(p []byte) (int, error) {
    372 	return 0, io.ErrUnexpectedEOF
    373 }
    374 
    375 func TestNonEOFWithEmptyRead(t *testing.T) {
    376 	scanner := NewScanner(alwaysError{})
    377 	for scanner.Scan() {
    378 		t.Fatal("read should fail")
    379 	}
    380 	err := scanner.Err()
    381 	if err != io.ErrUnexpectedEOF {
    382 		t.Errorf("unexpected error: %v", err)
    383 	}
    384 }
    385 
    386 // Test that Scan finishes if we have endless empty reads.
    387 type endlessZeros struct{}
    388 
    389 func (endlessZeros) Read(p []byte) (int, error) {
    390 	return 0, nil
    391 }
    392 
    393 func TestBadReader(t *testing.T) {
    394 	scanner := NewScanner(endlessZeros{})
    395 	for scanner.Scan() {
    396 		t.Fatal("read should fail")
    397 	}
    398 	err := scanner.Err()
    399 	if err != io.ErrNoProgress {
    400 		t.Errorf("unexpected error: %v", err)
    401 	}
    402 }
    403 
    404 func TestScanWordsExcessiveWhiteSpace(t *testing.T) {
    405 	const word = "ipsum"
    406 	s := strings.Repeat(" ", 4*smallMaxTokenSize) + word
    407 	scanner := NewScanner(strings.NewReader(s))
    408 	scanner.MaxTokenSize(smallMaxTokenSize)
    409 	scanner.Split(ScanWords)
    410 	if !scanner.Scan() {
    411 		t.Fatalf("scan failed: %v", scanner.Err())
    412 	}
    413 	if token := scanner.Text(); token != word {
    414 		t.Fatalf("unexpected token: %v", token)
    415 	}
    416 }
    417 
    418 // Test that empty tokens, including at end of line or end of file, are found by the scanner.
    419 // Issue 8672: Could miss final empty token.
    420 
    421 func commaSplit(data []byte, atEOF bool) (advance int, token []byte, err error) {
    422 	for i := 0; i < len(data); i++ {
    423 		if data[i] == ',' {
    424 			return i + 1, data[:i], nil
    425 		}
    426 	}
    427 	return 0, data, ErrFinalToken
    428 }
    429 
    430 func testEmptyTokens(t *testing.T, text string, values []string) {
    431 	s := NewScanner(strings.NewReader(text))
    432 	s.Split(commaSplit)
    433 	var i int
    434 	for i = 0; s.Scan(); i++ {
    435 		if i >= len(values) {
    436 			t.Fatalf("got %d fields, expected %d", i+1, len(values))
    437 		}
    438 		if s.Text() != values[i] {
    439 			t.Errorf("%d: expected %q got %q", i, values[i], s.Text())
    440 		}
    441 	}
    442 	if i != len(values) {
    443 		t.Fatalf("got %d fields, expected %d", i, len(values))
    444 	}
    445 	if err := s.Err(); err != nil {
    446 		t.Fatal(err)
    447 	}
    448 }
    449 
    450 func TestEmptyTokens(t *testing.T) {
    451 	testEmptyTokens(t, "1,2,3,", []string{"1", "2", "3", ""})
    452 }
    453 
    454 func TestWithNoEmptyTokens(t *testing.T) {
    455 	testEmptyTokens(t, "1,2,3", []string{"1", "2", "3"})
    456 }
    457 
    458 func loopAtEOFSplit(data []byte, atEOF bool) (advance int, token []byte, err error) {
    459 	if len(data) > 0 {
    460 		return 1, data[:1], nil
    461 	}
    462 	return 0, data, nil
    463 }
    464 
    465 func TestDontLoopForever(t *testing.T) {
    466 	s := NewScanner(strings.NewReader("abc"))
    467 	s.Split(loopAtEOFSplit)
    468 	// Expect a panic
    469 	defer func() {
    470 		err := recover()
    471 		if err == nil {
    472 			t.Fatal("should have panicked")
    473 		}
    474 		if msg, ok := err.(string); !ok || !strings.Contains(msg, "empty tokens") {
    475 			panic(err)
    476 		}
    477 	}()
    478 	for count := 0; s.Scan(); count++ {
    479 		if count > 1000 {
    480 			t.Fatal("looping")
    481 		}
    482 	}
    483 	if s.Err() != nil {
    484 		t.Fatal("after scan:", s.Err())
    485 	}
    486 }
    487 
    488 func TestBlankLines(t *testing.T) {
    489 	s := NewScanner(strings.NewReader(strings.Repeat("\n", 1000)))
    490 	for count := 0; s.Scan(); count++ {
    491 		if count > 2000 {
    492 			t.Fatal("looping")
    493 		}
    494 	}
    495 	if s.Err() != nil {
    496 		t.Fatal("after scan:", s.Err())
    497 	}
    498 }
    499 
    500 type countdown int
    501 
    502 func (c *countdown) split(data []byte, atEOF bool) (advance int, token []byte, err error) {
    503 	if *c > 0 {
    504 		*c--
    505 		return 1, data[:1], nil
    506 	}
    507 	return 0, nil, nil
    508 }
    509 
    510 // Check that the looping-at-EOF check doesn't trigger for merely empty tokens.
    511 func TestEmptyLinesOK(t *testing.T) {
    512 	c := countdown(10000)
    513 	s := NewScanner(strings.NewReader(strings.Repeat("\n", 10000)))
    514 	s.Split(c.split)
    515 	for s.Scan() {
    516 	}
    517 	if s.Err() != nil {
    518 		t.Fatal("after scan:", s.Err())
    519 	}
    520 	if c != 0 {
    521 		t.Fatalf("stopped with %d left to process", c)
    522 	}
    523 }
    524 
    525 // Make sure we can read a huge token if a big enough buffer is provided.
    526 func TestHugeBuffer(t *testing.T) {
    527 	text := strings.Repeat("x", 2*MaxScanTokenSize)
    528 	s := NewScanner(strings.NewReader(text + "\n"))
    529 	s.Buffer(make([]byte, 100), 3*MaxScanTokenSize)
    530 	for s.Scan() {
    531 		token := s.Text()
    532 		if token != text {
    533 			t.Errorf("scan got incorrect token of length %d", len(token))
    534 		}
    535 	}
    536 	if s.Err() != nil {
    537 		t.Fatal("after scan:", s.Err())
    538 	}
    539 }
    540