Home | History | Annotate | Download | only in IntrospectiveTests
      1 #include "catch.hpp"
      2 #include "internal/catch_xmlwriter.h"
      3 
      4 #include <sstream>
      5 
      6 inline std::string encode( std::string const& str, Catch::XmlEncode::ForWhat forWhat = Catch::XmlEncode::ForTextNodes ) {
      7     std::ostringstream oss;
      8     oss << Catch::XmlEncode( str, forWhat );
      9     return oss.str();
     10 }
     11 
     12 TEST_CASE( "XmlEncode", "[XML]" ) {
     13     SECTION( "normal string" ) {
     14         REQUIRE( encode( "normal string" ) == "normal string" );
     15     }
     16     SECTION( "empty string" ) {
     17         REQUIRE( encode( "" ) == "" );
     18     }
     19     SECTION( "string with ampersand" ) {
     20         REQUIRE( encode( "smith & jones" ) == "smith &amp; jones" );
     21     }
     22     SECTION( "string with less-than" ) {
     23         REQUIRE( encode( "smith < jones" ) == "smith &lt; jones" );
     24     }
     25     SECTION( "string with greater-than" ) {
     26         REQUIRE( encode( "smith > jones" ) == "smith > jones" );
     27         REQUIRE( encode( "smith ]]> jones" ) == "smith ]]&gt; jones" );
     28     }
     29     SECTION( "string with quotes" ) {
     30         std::string stringWithQuotes = "don't \"quote\" me on that";
     31         REQUIRE( encode( stringWithQuotes ) == stringWithQuotes );
     32         REQUIRE( encode( stringWithQuotes, Catch::XmlEncode::ForAttributes ) == "don't &quot;quote&quot; me on that" );
     33     }
     34     SECTION( "string with control char (1)" ) {
     35         REQUIRE( encode( "[\x01]" ) == "[\\x01]" );
     36     }
     37     SECTION( "string with control char (x7F)" ) {
     38         REQUIRE( encode( "[\x7F]" ) == "[\\x7F]" );
     39     }
     40 }
     41 
     42 // Thanks to Peter Bindels (dascandy) for some of the tests
     43 TEST_CASE("XmlEncode: UTF-8", "[XML][UTF-8]") {
     44     SECTION("Valid utf-8 strings") {
     45         CHECK(encode(u8"Here be ") == u8"Here be ");
     46         CHECK(encode(u8"") == u8"");
     47 
     48         CHECK(encode("\xDF\xBF")         == "\xDF\xBF"); // 0x7FF
     49         CHECK(encode("\xE0\xA0\x80")     == "\xE0\xA0\x80"); // 0x800
     50         CHECK(encode("\xED\x9F\xBF")     == "\xED\x9F\xBF"); // 0xD7FF
     51         CHECK(encode("\xEE\x80\x80")     == "\xEE\x80\x80"); // 0xE000
     52         CHECK(encode("\xEF\xBF\xBF")     == "\xEF\xBF\xBF"); // 0xFFFF
     53         CHECK(encode("\xF0\x90\x80\x80") == "\xF0\x90\x80\x80"); // 0x10000
     54         CHECK(encode("\xF4\x8F\xBF\xBF") == "\xF4\x8F\xBF\xBF"); // 0x10FFFF
     55     }
     56     SECTION("Invalid utf-8 strings") {
     57         SECTION("Various broken strings") {
     58             CHECK(encode("Here \xFF be ") == u8"Here \\xFF be ");
     59             CHECK(encode("\xFF") == "\\xFF");
     60             CHECK(encode("\xC5\xC5\xA0") == u8"\\xC5");
     61             CHECK(encode("\xF4\x90\x80\x80") == u8"\\xF4\\x90\\x80\\x80"); // 0x110000 -- out of unicode range
     62         }
     63 
     64         SECTION("Overlong encodings") {
     65             CHECK(encode("\xC0\x80") == u8"\\xC0\\x80"); // \0
     66             CHECK(encode("\xF0\x80\x80\x80") == u8"\\xF0\\x80\\x80\\x80"); // Super-over-long \0
     67             CHECK(encode("\xC1\xBF") == u8"\\xC1\\xBF"); // ASCII char as UTF-8 (0x7F)
     68             CHECK(encode("\xE0\x9F\xBF") == u8"\\xE0\\x9F\\xBF"); // 0x7FF
     69             CHECK(encode("\xF0\x8F\xBF\xBF") == u8"\\xF0\\x8F\\xBF\\xBF"); // 0xFFFF
     70         }
     71 
     72         // Note that we actually don't modify surrogate pairs, as we do not do strict checking
     73         SECTION("Surrogate pairs") {
     74             CHECK(encode("\xED\xA0\x80") == "\xED\xA0\x80"); // Invalid surrogate half 0xD800
     75             CHECK(encode("\xED\xAF\xBF") == "\xED\xAF\xBF"); // Invalid surrogate half 0xDBFF
     76             CHECK(encode("\xED\xB0\x80") == "\xED\xB0\x80"); // Invalid surrogate half 0xDC00
     77             CHECK(encode("\xED\xBF\xBF") == "\xED\xBF\xBF"); // Invalid surrogate half 0xDFFF
     78         }
     79 
     80         SECTION("Invalid start byte") {
     81             CHECK(encode("\x80") == u8"\\x80");
     82             CHECK(encode("\x81") == u8"\\x81");
     83             CHECK(encode("\xBC") == u8"\\xBC");
     84             CHECK(encode("\xBF") == u8"\\xBF");
     85             // Out of range
     86             CHECK(encode("\xF5\x80\x80\x80") == u8"\\xF5\\x80\\x80\\x80");
     87             CHECK(encode("\xF6\x80\x80\x80") == u8"\\xF6\\x80\\x80\\x80");
     88             CHECK(encode("\xF7\x80\x80\x80") == u8"\\xF7\\x80\\x80\\x80");
     89         }
     90 
     91         SECTION("Missing continuation byte(s)") {
     92             // Missing first continuation byte
     93             CHECK(encode("\xDE") == u8"\\xDE");
     94             CHECK(encode("\xDF") == u8"\\xDF");
     95             CHECK(encode("\xE0") == u8"\\xE0");
     96             CHECK(encode("\xEF") == u8"\\xEF");
     97             CHECK(encode("\xF0") == u8"\\xF0");
     98             CHECK(encode("\xF4") == u8"\\xF4");
     99 
    100             // Missing second continuation byte
    101             CHECK(encode("\xE0\x80") == u8"\\xE0\\x80");
    102             CHECK(encode("\xE0\xBF") == u8"\\xE0\\xBF");
    103             CHECK(encode("\xE1\x80") == u8"\\xE1\\x80");
    104             CHECK(encode("\xF0\x80") == u8"\\xF0\\x80");
    105             CHECK(encode("\xF4\x80") == u8"\\xF4\\x80");
    106 
    107             // Missing third continuation byte
    108             CHECK(encode("\xF0\x80\x80") == u8"\\xF0\\x80\\x80");
    109             CHECK(encode("\xF4\x80\x80") == u8"\\xF4\\x80\\x80");
    110         }
    111     }
    112 }
    113