Home | History | Annotate | Download | only in xml
      1 // Copyright 2009 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package xml
      6 
      7 import (
      8 	"bytes"
      9 	"fmt"
     10 	"io"
     11 	"reflect"
     12 	"strings"
     13 	"testing"
     14 	"unicode/utf8"
     15 )
     16 
     17 const testInput = `
     18 <?xml version="1.0" encoding="UTF-8"?>
     19 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
     20   "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
     21 <body xmlns:foo="ns1" xmlns="ns2" xmlns:tag="ns3" ` +
     22 	"\r\n\t" + `  >
     23   <hello lang="en">World &lt;&gt;&apos;&quot; &#x767d;&#40300;</hello>
     24   <query>&; &is-it;</query>
     25   <goodbye />
     26   <outer foo:attr="value" xmlns:tag="ns4">
     27     <inner/>
     28   </outer>
     29   <tag:name>
     30     <![CDATA[Some text here.]]>
     31   </tag:name>
     32 </body><!-- missing final newline -->`
     33 
     34 var testEntity = map[string]string{"": "What", "is-it": "is it?"}
     35 
     36 var rawTokens = []Token{
     37 	CharData("\n"),
     38 	ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)},
     39 	CharData("\n"),
     40 	Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
     41   "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`),
     42 	CharData("\n"),
     43 	StartElement{Name{"", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}},
     44 	CharData("\n  "),
     45 	StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}},
     46 	CharData("World <>'\" "),
     47 	EndElement{Name{"", "hello"}},
     48 	CharData("\n  "),
     49 	StartElement{Name{"", "query"}, []Attr{}},
     50 	CharData("What is it?"),
     51 	EndElement{Name{"", "query"}},
     52 	CharData("\n  "),
     53 	StartElement{Name{"", "goodbye"}, []Attr{}},
     54 	EndElement{Name{"", "goodbye"}},
     55 	CharData("\n  "),
     56 	StartElement{Name{"", "outer"}, []Attr{{Name{"foo", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}},
     57 	CharData("\n    "),
     58 	StartElement{Name{"", "inner"}, []Attr{}},
     59 	EndElement{Name{"", "inner"}},
     60 	CharData("\n  "),
     61 	EndElement{Name{"", "outer"}},
     62 	CharData("\n  "),
     63 	StartElement{Name{"tag", "name"}, []Attr{}},
     64 	CharData("\n    "),
     65 	CharData("Some text here."),
     66 	CharData("\n  "),
     67 	EndElement{Name{"tag", "name"}},
     68 	CharData("\n"),
     69 	EndElement{Name{"", "body"}},
     70 	Comment(" missing final newline "),
     71 }
     72 
     73 var cookedTokens = []Token{
     74 	CharData("\n"),
     75 	ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)},
     76 	CharData("\n"),
     77 	Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
     78   "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`),
     79 	CharData("\n"),
     80 	StartElement{Name{"ns2", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}},
     81 	CharData("\n  "),
     82 	StartElement{Name{"ns2", "hello"}, []Attr{{Name{"", "lang"}, "en"}}},
     83 	CharData("World <>'\" "),
     84 	EndElement{Name{"ns2", "hello"}},
     85 	CharData("\n  "),
     86 	StartElement{Name{"ns2", "query"}, []Attr{}},
     87 	CharData("What is it?"),
     88 	EndElement{Name{"ns2", "query"}},
     89 	CharData("\n  "),
     90 	StartElement{Name{"ns2", "goodbye"}, []Attr{}},
     91 	EndElement{Name{"ns2", "goodbye"}},
     92 	CharData("\n  "),
     93 	StartElement{Name{"ns2", "outer"}, []Attr{{Name{"ns1", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}},
     94 	CharData("\n    "),
     95 	StartElement{Name{"ns2", "inner"}, []Attr{}},
     96 	EndElement{Name{"ns2", "inner"}},
     97 	CharData("\n  "),
     98 	EndElement{Name{"ns2", "outer"}},
     99 	CharData("\n  "),
    100 	StartElement{Name{"ns3", "name"}, []Attr{}},
    101 	CharData("\n    "),
    102 	CharData("Some text here."),
    103 	CharData("\n  "),
    104 	EndElement{Name{"ns3", "name"}},
    105 	CharData("\n"),
    106 	EndElement{Name{"ns2", "body"}},
    107 	Comment(" missing final newline "),
    108 }
    109 
    110 const testInputAltEncoding = `
    111 <?xml version="1.0" encoding="x-testing-uppercase"?>
    112 <TAG>VALUE</TAG>`
    113 
    114 var rawTokensAltEncoding = []Token{
    115 	CharData("\n"),
    116 	ProcInst{"xml", []byte(`version="1.0" encoding="x-testing-uppercase"`)},
    117 	CharData("\n"),
    118 	StartElement{Name{"", "tag"}, []Attr{}},
    119 	CharData("value"),
    120 	EndElement{Name{"", "tag"}},
    121 }
    122 
    123 var xmlInput = []string{
    124 	// unexpected EOF cases
    125 	"<",
    126 	"<t",
    127 	"<t ",
    128 	"<t/",
    129 	"<!",
    130 	"<!-",
    131 	"<!--",
    132 	"<!--c-",
    133 	"<!--c--",
    134 	"<!d",
    135 	"<t></",
    136 	"<t></t",
    137 	"<?",
    138 	"<?p",
    139 	"<t a",
    140 	"<t a=",
    141 	"<t a='",
    142 	"<t a=''",
    143 	"<t/><![",
    144 	"<t/><![C",
    145 	"<t/><![CDATA[d",
    146 	"<t/><![CDATA[d]",
    147 	"<t/><![CDATA[d]]",
    148 
    149 	// other Syntax errors
    150 	"<>",
    151 	"<t/a",
    152 	"<0 />",
    153 	"<?0 >",
    154 	//	"<!0 >",	// let the Token() caller handle
    155 	"</0>",
    156 	"<t 0=''>",
    157 	"<t a='&'>",
    158 	"<t a='<'>",
    159 	"<t>&nbspc;</t>",
    160 	"<t a>",
    161 	"<t a=>",
    162 	"<t a=v>",
    163 	//	"<![CDATA[d]]>",	// let the Token() caller handle
    164 	"<t></e>",
    165 	"<t></>",
    166 	"<t></t!",
    167 	"<t>cdata]]></t>",
    168 }
    169 
    170 func TestRawToken(t *testing.T) {
    171 	d := NewDecoder(strings.NewReader(testInput))
    172 	d.Entity = testEntity
    173 	testRawToken(t, d, testInput, rawTokens)
    174 }
    175 
    176 const nonStrictInput = `
    177 <tag>non&entity</tag>
    178 <tag>&unknown;entity</tag>
    179 <tag>&#123</tag>
    180 <tag>&#zzz;</tag>
    181 <tag>&3;</tag>
    182 <tag>&lt-gt;</tag>
    183 <tag>&;</tag>
    184 <tag>&0a;</tag>
    185 `
    186 
    187 var nonStrictTokens = []Token{
    188 	CharData("\n"),
    189 	StartElement{Name{"", "tag"}, []Attr{}},
    190 	CharData("non&entity"),
    191 	EndElement{Name{"", "tag"}},
    192 	CharData("\n"),
    193 	StartElement{Name{"", "tag"}, []Attr{}},
    194 	CharData("&unknown;entity"),
    195 	EndElement{Name{"", "tag"}},
    196 	CharData("\n"),
    197 	StartElement{Name{"", "tag"}, []Attr{}},
    198 	CharData("&#123"),
    199 	EndElement{Name{"", "tag"}},
    200 	CharData("\n"),
    201 	StartElement{Name{"", "tag"}, []Attr{}},
    202 	CharData("&#zzz;"),
    203 	EndElement{Name{"", "tag"}},
    204 	CharData("\n"),
    205 	StartElement{Name{"", "tag"}, []Attr{}},
    206 	CharData("&3;"),
    207 	EndElement{Name{"", "tag"}},
    208 	CharData("\n"),
    209 	StartElement{Name{"", "tag"}, []Attr{}},
    210 	CharData("&lt-gt;"),
    211 	EndElement{Name{"", "tag"}},
    212 	CharData("\n"),
    213 	StartElement{Name{"", "tag"}, []Attr{}},
    214 	CharData("&;"),
    215 	EndElement{Name{"", "tag"}},
    216 	CharData("\n"),
    217 	StartElement{Name{"", "tag"}, []Attr{}},
    218 	CharData("&0a;"),
    219 	EndElement{Name{"", "tag"}},
    220 	CharData("\n"),
    221 }
    222 
    223 func TestNonStrictRawToken(t *testing.T) {
    224 	d := NewDecoder(strings.NewReader(nonStrictInput))
    225 	d.Strict = false
    226 	testRawToken(t, d, nonStrictInput, nonStrictTokens)
    227 }
    228 
    229 type downCaser struct {
    230 	t *testing.T
    231 	r io.ByteReader
    232 }
    233 
    234 func (d *downCaser) ReadByte() (c byte, err error) {
    235 	c, err = d.r.ReadByte()
    236 	if c >= 'A' && c <= 'Z' {
    237 		c += 'a' - 'A'
    238 	}
    239 	return
    240 }
    241 
    242 func (d *downCaser) Read(p []byte) (int, error) {
    243 	d.t.Fatalf("unexpected Read call on downCaser reader")
    244 	panic("unreachable")
    245 }
    246 
    247 func TestRawTokenAltEncoding(t *testing.T) {
    248 	d := NewDecoder(strings.NewReader(testInputAltEncoding))
    249 	d.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) {
    250 		if charset != "x-testing-uppercase" {
    251 			t.Fatalf("unexpected charset %q", charset)
    252 		}
    253 		return &downCaser{t, input.(io.ByteReader)}, nil
    254 	}
    255 	testRawToken(t, d, testInputAltEncoding, rawTokensAltEncoding)
    256 }
    257 
    258 func TestRawTokenAltEncodingNoConverter(t *testing.T) {
    259 	d := NewDecoder(strings.NewReader(testInputAltEncoding))
    260 	token, err := d.RawToken()
    261 	if token == nil {
    262 		t.Fatalf("expected a token on first RawToken call")
    263 	}
    264 	if err != nil {
    265 		t.Fatal(err)
    266 	}
    267 	token, err = d.RawToken()
    268 	if token != nil {
    269 		t.Errorf("expected a nil token; got %#v", token)
    270 	}
    271 	if err == nil {
    272 		t.Fatalf("expected an error on second RawToken call")
    273 	}
    274 	const encoding = "x-testing-uppercase"
    275 	if !strings.Contains(err.Error(), encoding) {
    276 		t.Errorf("expected error to contain %q; got error: %v",
    277 			encoding, err)
    278 	}
    279 }
    280 
    281 func testRawToken(t *testing.T, d *Decoder, raw string, rawTokens []Token) {
    282 	lastEnd := int64(0)
    283 	for i, want := range rawTokens {
    284 		start := d.InputOffset()
    285 		have, err := d.RawToken()
    286 		end := d.InputOffset()
    287 		if err != nil {
    288 			t.Fatalf("token %d: unexpected error: %s", i, err)
    289 		}
    290 		if !reflect.DeepEqual(have, want) {
    291 			var shave, swant string
    292 			if _, ok := have.(CharData); ok {
    293 				shave = fmt.Sprintf("CharData(%q)", have)
    294 			} else {
    295 				shave = fmt.Sprintf("%#v", have)
    296 			}
    297 			if _, ok := want.(CharData); ok {
    298 				swant = fmt.Sprintf("CharData(%q)", want)
    299 			} else {
    300 				swant = fmt.Sprintf("%#v", want)
    301 			}
    302 			t.Errorf("token %d = %s, want %s", i, shave, swant)
    303 		}
    304 
    305 		// Check that InputOffset returned actual token.
    306 		switch {
    307 		case start < lastEnd:
    308 			t.Errorf("token %d: position [%d,%d) for %T is before previous token", i, start, end, have)
    309 		case start >= end:
    310 			// Special case: EndElement can be synthesized.
    311 			if start == end && end == lastEnd {
    312 				break
    313 			}
    314 			t.Errorf("token %d: position [%d,%d) for %T is empty", i, start, end, have)
    315 		case end > int64(len(raw)):
    316 			t.Errorf("token %d: position [%d,%d) for %T extends beyond input", i, start, end, have)
    317 		default:
    318 			text := raw[start:end]
    319 			if strings.ContainsAny(text, "<>") && (!strings.HasPrefix(text, "<") || !strings.HasSuffix(text, ">")) {
    320 				t.Errorf("token %d: misaligned raw token %#q for %T", i, text, have)
    321 			}
    322 		}
    323 		lastEnd = end
    324 	}
    325 }
    326 
    327 // Ensure that directives (specifically !DOCTYPE) include the complete
    328 // text of any nested directives, noting that < and > do not change
    329 // nesting depth if they are in single or double quotes.
    330 
    331 var nestedDirectivesInput = `
    332 <!DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]>
    333 <!DOCTYPE [<!ENTITY xlt ">">]>
    334 <!DOCTYPE [<!ENTITY xlt "<">]>
    335 <!DOCTYPE [<!ENTITY xlt '>'>]>
    336 <!DOCTYPE [<!ENTITY xlt '<'>]>
    337 <!DOCTYPE [<!ENTITY xlt '">'>]>
    338 <!DOCTYPE [<!ENTITY xlt "'<">]>
    339 `
    340 
    341 var nestedDirectivesTokens = []Token{
    342 	CharData("\n"),
    343 	Directive(`DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`),
    344 	CharData("\n"),
    345 	Directive(`DOCTYPE [<!ENTITY xlt ">">]`),
    346 	CharData("\n"),
    347 	Directive(`DOCTYPE [<!ENTITY xlt "<">]`),
    348 	CharData("\n"),
    349 	Directive(`DOCTYPE [<!ENTITY xlt '>'>]`),
    350 	CharData("\n"),
    351 	Directive(`DOCTYPE [<!ENTITY xlt '<'>]`),
    352 	CharData("\n"),
    353 	Directive(`DOCTYPE [<!ENTITY xlt '">'>]`),
    354 	CharData("\n"),
    355 	Directive(`DOCTYPE [<!ENTITY xlt "'<">]`),
    356 	CharData("\n"),
    357 }
    358 
    359 func TestNestedDirectives(t *testing.T) {
    360 	d := NewDecoder(strings.NewReader(nestedDirectivesInput))
    361 
    362 	for i, want := range nestedDirectivesTokens {
    363 		have, err := d.Token()
    364 		if err != nil {
    365 			t.Fatalf("token %d: unexpected error: %s", i, err)
    366 		}
    367 		if !reflect.DeepEqual(have, want) {
    368 			t.Errorf("token %d = %#v want %#v", i, have, want)
    369 		}
    370 	}
    371 }
    372 
    373 func TestToken(t *testing.T) {
    374 	d := NewDecoder(strings.NewReader(testInput))
    375 	d.Entity = testEntity
    376 
    377 	for i, want := range cookedTokens {
    378 		have, err := d.Token()
    379 		if err != nil {
    380 			t.Fatalf("token %d: unexpected error: %s", i, err)
    381 		}
    382 		if !reflect.DeepEqual(have, want) {
    383 			t.Errorf("token %d = %#v want %#v", i, have, want)
    384 		}
    385 	}
    386 }
    387 
    388 func TestSyntax(t *testing.T) {
    389 	for i := range xmlInput {
    390 		d := NewDecoder(strings.NewReader(xmlInput[i]))
    391 		var err error
    392 		for _, err = d.Token(); err == nil; _, err = d.Token() {
    393 		}
    394 		if _, ok := err.(*SyntaxError); !ok {
    395 			t.Fatalf(`xmlInput "%s": expected SyntaxError not received`, xmlInput[i])
    396 		}
    397 	}
    398 }
    399 
    400 type allScalars struct {
    401 	True1     bool
    402 	True2     bool
    403 	False1    bool
    404 	False2    bool
    405 	Int       int
    406 	Int8      int8
    407 	Int16     int16
    408 	Int32     int32
    409 	Int64     int64
    410 	Uint      int
    411 	Uint8     uint8
    412 	Uint16    uint16
    413 	Uint32    uint32
    414 	Uint64    uint64
    415 	Uintptr   uintptr
    416 	Float32   float32
    417 	Float64   float64
    418 	String    string
    419 	PtrString *string
    420 }
    421 
    422 var all = allScalars{
    423 	True1:     true,
    424 	True2:     true,
    425 	False1:    false,
    426 	False2:    false,
    427 	Int:       1,
    428 	Int8:      -2,
    429 	Int16:     3,
    430 	Int32:     -4,
    431 	Int64:     5,
    432 	Uint:      6,
    433 	Uint8:     7,
    434 	Uint16:    8,
    435 	Uint32:    9,
    436 	Uint64:    10,
    437 	Uintptr:   11,
    438 	Float32:   13.0,
    439 	Float64:   14.0,
    440 	String:    "15",
    441 	PtrString: &sixteen,
    442 }
    443 
    444 var sixteen = "16"
    445 
    446 const testScalarsInput = `<allscalars>
    447 	<True1>true</True1>
    448 	<True2>1</True2>
    449 	<False1>false</False1>
    450 	<False2>0</False2>
    451 	<Int>1</Int>
    452 	<Int8>-2</Int8>
    453 	<Int16>3</Int16>
    454 	<Int32>-4</Int32>
    455 	<Int64>5</Int64>
    456 	<Uint>6</Uint>
    457 	<Uint8>7</Uint8>
    458 	<Uint16>8</Uint16>
    459 	<Uint32>9</Uint32>
    460 	<Uint64>10</Uint64>
    461 	<Uintptr>11</Uintptr>
    462 	<Float>12.0</Float>
    463 	<Float32>13.0</Float32>
    464 	<Float64>14.0</Float64>
    465 	<String>15</String>
    466 	<PtrString>16</PtrString>
    467 </allscalars>`
    468 
    469 func TestAllScalars(t *testing.T) {
    470 	var a allScalars
    471 	err := Unmarshal([]byte(testScalarsInput), &a)
    472 
    473 	if err != nil {
    474 		t.Fatal(err)
    475 	}
    476 	if !reflect.DeepEqual(a, all) {
    477 		t.Errorf("have %+v want %+v", a, all)
    478 	}
    479 }
    480 
    481 type item struct {
    482 	Field_a string
    483 }
    484 
    485 func TestIssue569(t *testing.T) {
    486 	data := `<item><Field_a>abcd</Field_a></item>`
    487 	var i item
    488 	err := Unmarshal([]byte(data), &i)
    489 
    490 	if err != nil || i.Field_a != "abcd" {
    491 		t.Fatal("Expecting abcd")
    492 	}
    493 }
    494 
    495 func TestUnquotedAttrs(t *testing.T) {
    496 	data := "<tag attr=azAZ09:-_\t>"
    497 	d := NewDecoder(strings.NewReader(data))
    498 	d.Strict = false
    499 	token, err := d.Token()
    500 	if _, ok := err.(*SyntaxError); ok {
    501 		t.Errorf("Unexpected error: %v", err)
    502 	}
    503 	if token.(StartElement).Name.Local != "tag" {
    504 		t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local)
    505 	}
    506 	attr := token.(StartElement).Attr[0]
    507 	if attr.Value != "azAZ09:-_" {
    508 		t.Errorf("Unexpected attribute value: %v", attr.Value)
    509 	}
    510 	if attr.Name.Local != "attr" {
    511 		t.Errorf("Unexpected attribute name: %v", attr.Name.Local)
    512 	}
    513 }
    514 
    515 func TestValuelessAttrs(t *testing.T) {
    516 	tests := [][3]string{
    517 		{"<p nowrap>", "p", "nowrap"},
    518 		{"<p nowrap >", "p", "nowrap"},
    519 		{"<input checked/>", "input", "checked"},
    520 		{"<input checked />", "input", "checked"},
    521 	}
    522 	for _, test := range tests {
    523 		d := NewDecoder(strings.NewReader(test[0]))
    524 		d.Strict = false
    525 		token, err := d.Token()
    526 		if _, ok := err.(*SyntaxError); ok {
    527 			t.Errorf("Unexpected error: %v", err)
    528 		}
    529 		if token.(StartElement).Name.Local != test[1] {
    530 			t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local)
    531 		}
    532 		attr := token.(StartElement).Attr[0]
    533 		if attr.Value != test[2] {
    534 			t.Errorf("Unexpected attribute value: %v", attr.Value)
    535 		}
    536 		if attr.Name.Local != test[2] {
    537 			t.Errorf("Unexpected attribute name: %v", attr.Name.Local)
    538 		}
    539 	}
    540 }
    541 
    542 func TestCopyTokenCharData(t *testing.T) {
    543 	data := []byte("same data")
    544 	var tok1 Token = CharData(data)
    545 	tok2 := CopyToken(tok1)
    546 	if !reflect.DeepEqual(tok1, tok2) {
    547 		t.Error("CopyToken(CharData) != CharData")
    548 	}
    549 	data[1] = 'o'
    550 	if reflect.DeepEqual(tok1, tok2) {
    551 		t.Error("CopyToken(CharData) uses same buffer.")
    552 	}
    553 }
    554 
    555 func TestCopyTokenStartElement(t *testing.T) {
    556 	elt := StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}}
    557 	var tok1 Token = elt
    558 	tok2 := CopyToken(tok1)
    559 	if tok1.(StartElement).Attr[0].Value != "en" {
    560 		t.Error("CopyToken overwrote Attr[0]")
    561 	}
    562 	if !reflect.DeepEqual(tok1, tok2) {
    563 		t.Error("CopyToken(StartElement) != StartElement")
    564 	}
    565 	tok1.(StartElement).Attr[0] = Attr{Name{"", "lang"}, "de"}
    566 	if reflect.DeepEqual(tok1, tok2) {
    567 		t.Error("CopyToken(CharData) uses same buffer.")
    568 	}
    569 }
    570 
    571 func TestSyntaxErrorLineNum(t *testing.T) {
    572 	testInput := "<P>Foo<P>\n\n<P>Bar</>\n"
    573 	d := NewDecoder(strings.NewReader(testInput))
    574 	var err error
    575 	for _, err = d.Token(); err == nil; _, err = d.Token() {
    576 	}
    577 	synerr, ok := err.(*SyntaxError)
    578 	if !ok {
    579 		t.Error("Expected SyntaxError.")
    580 	}
    581 	if synerr.Line != 3 {
    582 		t.Error("SyntaxError didn't have correct line number.")
    583 	}
    584 }
    585 
    586 func TestTrailingRawToken(t *testing.T) {
    587 	input := `<FOO></FOO>  `
    588 	d := NewDecoder(strings.NewReader(input))
    589 	var err error
    590 	for _, err = d.RawToken(); err == nil; _, err = d.RawToken() {
    591 	}
    592 	if err != io.EOF {
    593 		t.Fatalf("d.RawToken() = _, %v, want _, io.EOF", err)
    594 	}
    595 }
    596 
    597 func TestTrailingToken(t *testing.T) {
    598 	input := `<FOO></FOO>  `
    599 	d := NewDecoder(strings.NewReader(input))
    600 	var err error
    601 	for _, err = d.Token(); err == nil; _, err = d.Token() {
    602 	}
    603 	if err != io.EOF {
    604 		t.Fatalf("d.Token() = _, %v, want _, io.EOF", err)
    605 	}
    606 }
    607 
    608 func TestEntityInsideCDATA(t *testing.T) {
    609 	input := `<test><![CDATA[ &val=foo ]]></test>`
    610 	d := NewDecoder(strings.NewReader(input))
    611 	var err error
    612 	for _, err = d.Token(); err == nil; _, err = d.Token() {
    613 	}
    614 	if err != io.EOF {
    615 		t.Fatalf("d.Token() = _, %v, want _, io.EOF", err)
    616 	}
    617 }
    618 
    619 var characterTests = []struct {
    620 	in  string
    621 	err string
    622 }{
    623 	{"\x12<doc/>", "illegal character code U+0012"},
    624 	{"<?xml version=\"1.0\"?>\x0b<doc/>", "illegal character code U+000B"},
    625 	{"\xef\xbf\xbe<doc/>", "illegal character code U+FFFE"},
    626 	{"<?xml version=\"1.0\"?><doc>\r\n<hiya/>\x07<toots/></doc>", "illegal character code U+0007"},
    627 	{"<?xml version=\"1.0\"?><doc \x12='value'>what's up</doc>", "expected attribute name in element"},
    628 	{"<doc>&abc\x01;</doc>", "invalid character entity &abc (no semicolon)"},
    629 	{"<doc>&\x01;</doc>", "invalid character entity & (no semicolon)"},
    630 	{"<doc>&\xef\xbf\xbe;</doc>", "invalid character entity &\uFFFE;"},
    631 	{"<doc>&hello;</doc>", "invalid character entity &hello;"},
    632 }
    633 
    634 func TestDisallowedCharacters(t *testing.T) {
    635 
    636 	for i, tt := range characterTests {
    637 		d := NewDecoder(strings.NewReader(tt.in))
    638 		var err error
    639 
    640 		for err == nil {
    641 			_, err = d.Token()
    642 		}
    643 		synerr, ok := err.(*SyntaxError)
    644 		if !ok {
    645 			t.Fatalf("input %d d.Token() = _, %v, want _, *SyntaxError", i, err)
    646 		}
    647 		if synerr.Msg != tt.err {
    648 			t.Fatalf("input %d synerr.Msg wrong: want %q, got %q", i, tt.err, synerr.Msg)
    649 		}
    650 	}
    651 }
    652 
    653 var procInstTests = []struct {
    654 	input  string
    655 	expect [2]string
    656 }{
    657 	{`version="1.0" encoding="utf-8"`, [2]string{"1.0", "utf-8"}},
    658 	{`version="1.0" encoding='utf-8'`, [2]string{"1.0", "utf-8"}},
    659 	{`version="1.0" encoding='utf-8' `, [2]string{"1.0", "utf-8"}},
    660 	{`version="1.0" encoding=utf-8`, [2]string{"1.0", ""}},
    661 	{`encoding="FOO" `, [2]string{"", "FOO"}},
    662 }
    663 
    664 func TestProcInstEncoding(t *testing.T) {
    665 	for _, test := range procInstTests {
    666 		if got := procInst("version", test.input); got != test.expect[0] {
    667 			t.Errorf("procInst(version, %q) = %q; want %q", test.input, got, test.expect[0])
    668 		}
    669 		if got := procInst("encoding", test.input); got != test.expect[1] {
    670 			t.Errorf("procInst(encoding, %q) = %q; want %q", test.input, got, test.expect[1])
    671 		}
    672 	}
    673 }
    674 
    675 // Ensure that directives with comments include the complete
    676 // text of any nested directives.
    677 
    678 var directivesWithCommentsInput = `
    679 <!DOCTYPE [<!-- a comment --><!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]>
    680 <!DOCTYPE [<!ENTITY go "Golang"><!-- a comment-->]>
    681 <!DOCTYPE <!-> <!> <!----> <!-->--> <!--->--> [<!ENTITY go "Golang"><!-- a comment-->]>
    682 `
    683 
    684 var directivesWithCommentsTokens = []Token{
    685 	CharData("\n"),
    686 	Directive(`DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`),
    687 	CharData("\n"),
    688 	Directive(`DOCTYPE [<!ENTITY go "Golang">]`),
    689 	CharData("\n"),
    690 	Directive(`DOCTYPE <!-> <!>    [<!ENTITY go "Golang">]`),
    691 	CharData("\n"),
    692 }
    693 
    694 func TestDirectivesWithComments(t *testing.T) {
    695 	d := NewDecoder(strings.NewReader(directivesWithCommentsInput))
    696 
    697 	for i, want := range directivesWithCommentsTokens {
    698 		have, err := d.Token()
    699 		if err != nil {
    700 			t.Fatalf("token %d: unexpected error: %s", i, err)
    701 		}
    702 		if !reflect.DeepEqual(have, want) {
    703 			t.Errorf("token %d = %#v want %#v", i, have, want)
    704 		}
    705 	}
    706 }
    707 
    708 // Writer whose Write method always returns an error.
    709 type errWriter struct{}
    710 
    711 func (errWriter) Write(p []byte) (n int, err error) { return 0, fmt.Errorf("unwritable") }
    712 
    713 func TestEscapeTextIOErrors(t *testing.T) {
    714 	expectErr := "unwritable"
    715 	err := EscapeText(errWriter{}, []byte{'A'})
    716 
    717 	if err == nil || err.Error() != expectErr {
    718 		t.Errorf("have %v, want %v", err, expectErr)
    719 	}
    720 }
    721 
    722 func TestEscapeTextInvalidChar(t *testing.T) {
    723 	input := []byte("A \x00 terminated string.")
    724 	expected := "A \uFFFD terminated string."
    725 
    726 	buff := new(bytes.Buffer)
    727 	if err := EscapeText(buff, input); err != nil {
    728 		t.Fatalf("have %v, want nil", err)
    729 	}
    730 	text := buff.String()
    731 
    732 	if text != expected {
    733 		t.Errorf("have %v, want %v", text, expected)
    734 	}
    735 }
    736 
    737 func TestIssue5880(t *testing.T) {
    738 	type T []byte
    739 	data, err := Marshal(T{192, 168, 0, 1})
    740 	if err != nil {
    741 		t.Errorf("Marshal error: %v", err)
    742 	}
    743 	if !utf8.Valid(data) {
    744 		t.Errorf("Marshal generated invalid UTF-8: %x", data)
    745 	}
    746 }
    747 
    748 func TestIssue11405(t *testing.T) {
    749 	testCases := []string{
    750 		"<root>",
    751 		"<root><foo>",
    752 		"<root><foo></foo>",
    753 	}
    754 	for _, tc := range testCases {
    755 		d := NewDecoder(strings.NewReader(tc))
    756 		var err error
    757 		for {
    758 			_, err = d.Token()
    759 			if err != nil {
    760 				break
    761 			}
    762 		}
    763 		if _, ok := err.(*SyntaxError); !ok {
    764 			t.Errorf("%s: Token: Got error %v, want SyntaxError", tc, err)
    765 		}
    766 	}
    767 }
    768 
    769 func TestIssue12417(t *testing.T) {
    770 	testCases := []struct {
    771 		s  string
    772 		ok bool
    773 	}{
    774 		{`<?xml encoding="UtF-8" version="1.0"?><root/>`, true},
    775 		{`<?xml encoding="UTF-8" version="1.0"?><root/>`, true},
    776 		{`<?xml encoding="utf-8" version="1.0"?><root/>`, true},
    777 		{`<?xml encoding="uuu-9" version="1.0"?><root/>`, false},
    778 	}
    779 	for _, tc := range testCases {
    780 		d := NewDecoder(strings.NewReader(tc.s))
    781 		var err error
    782 		for {
    783 			_, err = d.Token()
    784 			if err != nil {
    785 				if err == io.EOF {
    786 					err = nil
    787 				}
    788 				break
    789 			}
    790 		}
    791 		if err != nil && tc.ok {
    792 			t.Errorf("%q: Encoding charset: expected no error, got %s", tc.s, err)
    793 			continue
    794 		}
    795 		if err == nil && !tc.ok {
    796 			t.Errorf("%q: Encoding charset: expected error, got nil", tc.s)
    797 		}
    798 	}
    799 }
    800