Home | History | Annotate | Download | only in tests
      1 // Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Some UTF character seqeuences in this file were taken from
      6 // https://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt
      7 
      8 #include <gtest/gtest.h>
      9 #include <stdio.h>
     10 
     11 extern "C" {
     12 #include "cras_utf8.h"
     13 }
     14 
     15 namespace {
     16 
     17 TEST(UTF8, ValidStress) {
     18   size_t pos;
     19 
     20   EXPECT_EQ(1, valid_utf8_string("The greek word 'kosme': "
     21                                  "\xce\xba\xe1\xbd\xb9\xcf\x83\xce"
     22                                  "\xbc\xce\xb5", &pos));
     23   EXPECT_EQ(35, pos);
     24 
     25   EXPECT_EQ(1, valid_utf8_string("Playback", &pos));
     26   EXPECT_EQ(8, pos);
     27 
     28   EXPECT_EQ(1, valid_utf8_string("The Euro sign: \xe2\x82\xac", &pos));
     29   EXPECT_EQ(18, pos);
     30 
     31   /* First possible sequence of a certain length. */
     32   EXPECT_EQ(1, valid_utf8_string("\x01", &pos));
     33   EXPECT_EQ(1, pos);
     34   EXPECT_EQ(1, valid_utf8_string("\xc2\x80", &pos));
     35   EXPECT_EQ(2, pos);
     36   EXPECT_EQ(1, valid_utf8_string("\xe0\xa0\x80", &pos));
     37   EXPECT_EQ(3, pos);
     38   EXPECT_EQ(1, valid_utf8_string("\xe1\x80\x80", &pos));
     39   EXPECT_EQ(3, pos);
     40   EXPECT_EQ(1, valid_utf8_string("\xf0\x90\x80\x80", &pos));
     41   EXPECT_EQ(4, pos);
     42   EXPECT_EQ(1, valid_utf8_string("\xf1\x80\x80\x80", &pos));
     43   EXPECT_EQ(4, pos);
     44 
     45   /* Last possible sequence of a certain length. */
     46   EXPECT_EQ(1, valid_utf8_string("\x7f", &pos));
     47   EXPECT_EQ(1, pos);
     48   EXPECT_EQ(1, valid_utf8_string("\xdf\xbf", &pos));
     49   EXPECT_EQ(2, pos);
     50   EXPECT_EQ(1, valid_utf8_string("\xef\xbf\xbf", &pos));
     51   EXPECT_EQ(3, pos);
     52   EXPECT_EQ(1, valid_utf8_string("\xf4\x8f\xbf\xbf", &pos));
     53   EXPECT_EQ(4, pos);
     54 
     55   /* Other boundary conditions. */
     56   EXPECT_EQ(1, valid_utf8_string("\xed\x9f\xbf", &pos));
     57   EXPECT_EQ(3, pos);
     58   EXPECT_EQ(1, valid_utf8_string("\xee\x80\x80", &pos));
     59   EXPECT_EQ(3, pos);
     60   EXPECT_EQ(1, valid_utf8_string("\xef\xbf\xbd", &pos));
     61   EXPECT_EQ(3, pos);
     62   EXPECT_EQ(1, valid_utf8_string("\xf0\xbf\xbf\xbf", &pos));
     63   EXPECT_EQ(4, pos);
     64 
     65   /* BOM sequence. */
     66   EXPECT_EQ(1, valid_utf8_string("\xef\xbb\xbf", &pos));
     67   EXPECT_EQ(3, pos);
     68 
     69   /* Valid UTF-8 that shouldn't appear in text; chose to allow
     70    * these characters anyway. */
     71   EXPECT_EQ(1, valid_utf8_string("U+FFFE: \xef\xbf\xbe", &pos));
     72   EXPECT_EQ(11, pos);
     73   EXPECT_EQ(1, valid_utf8_string("U+FDD0: \xef\xb7\x90", &pos));
     74   EXPECT_EQ(11, pos);
     75   EXPECT_EQ(1, valid_utf8_string("\xf0\x9f\xbf\xbe", &pos));
     76   EXPECT_EQ(4, pos);
     77 }
     78 
     79 TEST(UTF8, InvalidStress) {
     80   size_t pos;
     81 
     82   /* Malformed continuation bytes. */
     83   EXPECT_EQ(0, valid_utf8_string("\x80", &pos));
     84   EXPECT_EQ(0, pos);
     85   EXPECT_EQ(0, valid_utf8_string("\xbf", &pos));
     86   EXPECT_EQ(0, pos);
     87   EXPECT_EQ(0, valid_utf8_string("\x80\xbf", &pos));
     88   EXPECT_EQ(0, pos);
     89   EXPECT_EQ(0, valid_utf8_string("\xc2\x80\xbf", &pos));
     90   EXPECT_EQ(2, pos);
     91 
     92   /* Lonely start characters. */
     93   EXPECT_EQ(0, valid_utf8_string("\xc2 \xc3 \xc4 ", &pos));
     94   EXPECT_EQ(1, pos);
     95 
     96   /* Out of range cases. */
     97   EXPECT_EQ(0, valid_utf8_string("\xf4\x90\xbf\xbf", &pos));
     98   EXPECT_EQ(1, pos);
     99   EXPECT_EQ(0, valid_utf8_string(" \xf5\x80", &pos));
    100   EXPECT_EQ(1, pos);
    101   EXPECT_EQ(0, valid_utf8_string(" \xe0\x80\x80", &pos));
    102   EXPECT_EQ(2, pos);
    103   EXPECT_EQ(0, valid_utf8_string("\xf4\x80\x80\xcf", &pos));
    104   EXPECT_EQ(3, pos);
    105 
    106   /* Stop in mid-sequence. */
    107   EXPECT_EQ(0, valid_utf8_string("\xf4\x80", &pos));
    108   EXPECT_EQ(2, pos);
    109 
    110   /* Bad characters. */
    111   EXPECT_EQ(0, valid_utf8_string("\xff", &pos));
    112   EXPECT_EQ(0, pos);
    113   EXPECT_EQ(0, valid_utf8_string("\xfe", &pos));
    114   EXPECT_EQ(0, pos);
    115 
    116   /* Overlong representations of ASCII characters. */
    117   EXPECT_EQ(0, valid_utf8_string("This represents the / character with too"
    118                                  "many bytes: \xe0\x80\xaf", &pos));
    119   EXPECT_EQ(53, pos);
    120   EXPECT_EQ(0, valid_utf8_string("This represents the / character with too"
    121                                  "many bytes: \xf0\x80\x80\xaf", &pos));
    122   EXPECT_EQ(53, pos);
    123 
    124   /* Should not be interpreted as the ASCII NUL character. */
    125   EXPECT_EQ(0, valid_utf8_string("This represents the NUL character with too"
    126                                  "many bytes: \xe0\x80\x80", &pos));
    127   EXPECT_EQ(55, pos);
    128   EXPECT_EQ(0, valid_utf8_string("This represents the NUL character with too"
    129                                  "many bytes: \xf0\x80\x80\x80", &pos));
    130   EXPECT_EQ(55, pos);
    131 
    132   /* Single UTF-16 surrogates. */
    133   EXPECT_EQ(0, valid_utf8_string("\xed\xa0\x80", &pos));
    134   EXPECT_EQ(1, pos);
    135   EXPECT_EQ(0, valid_utf8_string("\xed\xad\xbf", &pos));
    136   EXPECT_EQ(1, pos);
    137   EXPECT_EQ(0, valid_utf8_string("\xed\xae\x80", &pos));
    138   EXPECT_EQ(1, pos);
    139   EXPECT_EQ(0, valid_utf8_string("\xed\xaf\xbf", &pos));
    140   EXPECT_EQ(1, pos);
    141   EXPECT_EQ(0, valid_utf8_string("\xed\xb0\x80", &pos));
    142   EXPECT_EQ(1, pos);
    143   EXPECT_EQ(0, valid_utf8_string("\xed\xbe\x80", &pos));
    144   EXPECT_EQ(1, pos);
    145   EXPECT_EQ(0, valid_utf8_string("\xed\xbf\xbf", &pos));
    146   EXPECT_EQ(1, pos);
    147 }
    148 
    149 }  //  namespace
    150 
    151 int main(int argc, char **argv) {
    152   ::testing::InitGoogleTest(&argc, argv);
    153   return RUN_ALL_TESTS();
    154 }
    155