Home | History | Annotate | Download | only in Support
      1 //===- unittests/Support/UnicodeTest.cpp - Unicode.h tests ----------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 
     10 #include "llvm/Support/Unicode.h"
     11 #include "gtest/gtest.h"
     12 
     13 namespace llvm {
     14 namespace sys {
     15 namespace unicode {
     16 namespace {
     17 
     18 TEST(Unicode, columnWidthUTF8) {
     19   EXPECT_EQ(0, columnWidthUTF8(""));
     20   EXPECT_EQ(1, columnWidthUTF8(" "));
     21   EXPECT_EQ(1, columnWidthUTF8("a"));
     22   EXPECT_EQ(1, columnWidthUTF8("~"));
     23 
     24   EXPECT_EQ(6, columnWidthUTF8("abcdef"));
     25 
     26   EXPECT_EQ(-1, columnWidthUTF8("\x01"));
     27   EXPECT_EQ(-1, columnWidthUTF8("aaaaaaaaaa\x01"));
     28   EXPECT_EQ(-1, columnWidthUTF8("\342\200\213")); // 200B ZERO WIDTH SPACE
     29 
     30   // 00AD SOFT HYPHEN is displayed on most terminals as a space or a dash. Some
     31   // text editors display it only when a line is broken at it, some use it as a
     32   // line-break hint, but don't display. We choose terminal-oriented
     33   // interpretation.
     34   EXPECT_EQ(1, columnWidthUTF8("\302\255"));
     35 
     36   EXPECT_EQ(0, columnWidthUTF8("\314\200"));     // 0300 COMBINING GRAVE ACCENT
     37   EXPECT_EQ(1, columnWidthUTF8("\340\270\201")); // 0E01 THAI CHARACTER KO KAI
     38   EXPECT_EQ(2, columnWidthUTF8("\344\270\200")); // CJK UNIFIED IDEOGRAPH-4E00
     39 
     40   EXPECT_EQ(4, columnWidthUTF8("\344\270\200\344\270\200"));
     41   EXPECT_EQ(3, columnWidthUTF8("q\344\270\200"));
     42   EXPECT_EQ(3, columnWidthUTF8("\314\200\340\270\201\344\270\200"));
     43 
     44   // Invalid UTF-8 strings, columnWidthUTF8 should error out.
     45   EXPECT_EQ(-2, columnWidthUTF8("\344"));
     46   EXPECT_EQ(-2, columnWidthUTF8("\344\270"));
     47   EXPECT_EQ(-2, columnWidthUTF8("\344\270\033"));
     48   EXPECT_EQ(-2, columnWidthUTF8("\344\270\300"));
     49   EXPECT_EQ(-2, columnWidthUTF8("\377\366\355"));
     50 
     51   EXPECT_EQ(-2, columnWidthUTF8("qwer\344"));
     52   EXPECT_EQ(-2, columnWidthUTF8("qwer\344\270"));
     53   EXPECT_EQ(-2, columnWidthUTF8("qwer\344\270\033"));
     54   EXPECT_EQ(-2, columnWidthUTF8("qwer\344\270\300"));
     55   EXPECT_EQ(-2, columnWidthUTF8("qwer\377\366\355"));
     56 
     57   // UTF-8 sequences longer than 4 bytes correspond to unallocated Unicode
     58   // characters.
     59   EXPECT_EQ(-2, columnWidthUTF8("\370\200\200\200\200"));     // U+200000
     60   EXPECT_EQ(-2, columnWidthUTF8("\374\200\200\200\200\200")); // U+4000000
     61 }
     62 
     63 TEST(Unicode, isPrintable) {
     64   EXPECT_FALSE(isPrintable(0)); // <control-0000>-<control-001F>
     65   EXPECT_FALSE(isPrintable(0x01));
     66   EXPECT_FALSE(isPrintable(0x1F));
     67   EXPECT_TRUE(isPrintable(' '));
     68   EXPECT_TRUE(isPrintable('A'));
     69   EXPECT_TRUE(isPrintable('~'));
     70   EXPECT_FALSE(isPrintable(0x7F)); // <control-007F>..<control-009F>
     71   EXPECT_FALSE(isPrintable(0x90));
     72   EXPECT_FALSE(isPrintable(0x9F));
     73 
     74   EXPECT_TRUE(isPrintable(0xAC));
     75   EXPECT_TRUE(isPrintable(0xAD)); // SOFT HYPHEN is displayed on most terminals
     76                                   // as either a space or a dash.
     77   EXPECT_TRUE(isPrintable(0xAE));
     78 
     79   EXPECT_TRUE(isPrintable(0x0377));  // GREEK SMALL LETTER PAMPHYLIAN DIGAMMA
     80   EXPECT_FALSE(isPrintable(0x0378)); // <reserved-0378>..<reserved-0379>
     81 
     82   EXPECT_FALSE(isPrintable(0x0600)); // ARABIC NUMBER SIGN
     83 
     84   EXPECT_FALSE(isPrintable(0x1FFFF)); // <reserved-1F774>..<noncharacter-1FFFF>
     85   EXPECT_TRUE(isPrintable(0x20000));  // CJK UNIFIED IDEOGRAPH-20000
     86 
     87   EXPECT_FALSE(isPrintable(0x10FFFF)); // noncharacter
     88 }
     89 
     90 } // namespace
     91 } // namespace unicode
     92 } // namespace sys
     93 } // namespace llvm
     94