Home | History | Annotate | Download | only in icu
      1 /*
      2  *  Copyright (C) 2006 George Staikos <staikos (at) kde.org>
      3  *  Copyright (C) 2006 Alexey Proskuryakov <ap (at) nypop.com>
      4  *  Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
      5  *
      6  *  This library is free software; you can redistribute it and/or
      7  *  modify it under the terms of the GNU Library General Public
      8  *  License as published by the Free Software Foundation; either
      9  *  version 2 of the License, or (at your option) any later version.
     10  *
     11  *  This library is distributed in the hope that it will be useful,
     12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
     13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     14  *  Library General Public License for more details.
     15  *
     16  *  You should have received a copy of the GNU Library General Public License
     17  *  along with this library; see the file COPYING.LIB.  If not, write to
     18  *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     19  *  Boston, MA 02110-1301, USA.
     20  *
     21  */
     22 
     23 #ifndef WTF_UNICODE_ICU_H
     24 #define WTF_UNICODE_ICU_H
     25 
     26 #include <stdlib.h>
     27 #include <unicode/uchar.h>
     28 #include <unicode/ustring.h>
     29 #include <unicode/utf16.h>
     30 
     31 namespace WTF {
     32 namespace Unicode {
     33 
     34 enum Direction {
     35     LeftToRight = U_LEFT_TO_RIGHT,
     36     RightToLeft = U_RIGHT_TO_LEFT,
     37     EuropeanNumber = U_EUROPEAN_NUMBER,
     38     EuropeanNumberSeparator = U_EUROPEAN_NUMBER_SEPARATOR,
     39     EuropeanNumberTerminator = U_EUROPEAN_NUMBER_TERMINATOR,
     40     ArabicNumber = U_ARABIC_NUMBER,
     41     CommonNumberSeparator = U_COMMON_NUMBER_SEPARATOR,
     42     BlockSeparator = U_BLOCK_SEPARATOR,
     43     SegmentSeparator = U_SEGMENT_SEPARATOR,
     44     WhiteSpaceNeutral = U_WHITE_SPACE_NEUTRAL,
     45     OtherNeutral = U_OTHER_NEUTRAL,
     46     LeftToRightEmbedding = U_LEFT_TO_RIGHT_EMBEDDING,
     47     LeftToRightOverride = U_LEFT_TO_RIGHT_OVERRIDE,
     48     RightToLeftArabic = U_RIGHT_TO_LEFT_ARABIC,
     49     RightToLeftEmbedding = U_RIGHT_TO_LEFT_EMBEDDING,
     50     RightToLeftOverride = U_RIGHT_TO_LEFT_OVERRIDE,
     51     PopDirectionalFormat = U_POP_DIRECTIONAL_FORMAT,
     52     NonSpacingMark = U_DIR_NON_SPACING_MARK,
     53     BoundaryNeutral = U_BOUNDARY_NEUTRAL
     54 };
     55 
     56 enum DecompositionType {
     57     DecompositionNone = U_DT_NONE,
     58     DecompositionCanonical = U_DT_CANONICAL,
     59     DecompositionCompat = U_DT_COMPAT,
     60     DecompositionCircle = U_DT_CIRCLE,
     61     DecompositionFinal = U_DT_FINAL,
     62     DecompositionFont = U_DT_FONT,
     63     DecompositionFraction = U_DT_FRACTION,
     64     DecompositionInitial = U_DT_INITIAL,
     65     DecompositionIsolated = U_DT_ISOLATED,
     66     DecompositionMedial = U_DT_MEDIAL,
     67     DecompositionNarrow = U_DT_NARROW,
     68     DecompositionNoBreak = U_DT_NOBREAK,
     69     DecompositionSmall = U_DT_SMALL,
     70     DecompositionSquare = U_DT_SQUARE,
     71     DecompositionSub = U_DT_SUB,
     72     DecompositionSuper = U_DT_SUPER,
     73     DecompositionVertical = U_DT_VERTICAL,
     74     DecompositionWide = U_DT_WIDE,
     75 };
     76 
     77 enum CharCategory {
     78     NoCategory =  0,
     79     Other_NotAssigned = U_MASK(U_GENERAL_OTHER_TYPES),
     80     Letter_Uppercase = U_MASK(U_UPPERCASE_LETTER),
     81     Letter_Lowercase = U_MASK(U_LOWERCASE_LETTER),
     82     Letter_Titlecase = U_MASK(U_TITLECASE_LETTER),
     83     Letter_Modifier = U_MASK(U_MODIFIER_LETTER),
     84     Letter_Other = U_MASK(U_OTHER_LETTER),
     85 
     86     Mark_NonSpacing = U_MASK(U_NON_SPACING_MARK),
     87     Mark_Enclosing = U_MASK(U_ENCLOSING_MARK),
     88     Mark_SpacingCombining = U_MASK(U_COMBINING_SPACING_MARK),
     89 
     90     Number_DecimalDigit = U_MASK(U_DECIMAL_DIGIT_NUMBER),
     91     Number_Letter = U_MASK(U_LETTER_NUMBER),
     92     Number_Other = U_MASK(U_OTHER_NUMBER),
     93 
     94     Separator_Space = U_MASK(U_SPACE_SEPARATOR),
     95     Separator_Line = U_MASK(U_LINE_SEPARATOR),
     96     Separator_Paragraph = U_MASK(U_PARAGRAPH_SEPARATOR),
     97 
     98     Other_Control = U_MASK(U_CONTROL_CHAR),
     99     Other_Format = U_MASK(U_FORMAT_CHAR),
    100     Other_PrivateUse = U_MASK(U_PRIVATE_USE_CHAR),
    101     Other_Surrogate = U_MASK(U_SURROGATE),
    102 
    103     Punctuation_Dash = U_MASK(U_DASH_PUNCTUATION),
    104     Punctuation_Open = U_MASK(U_START_PUNCTUATION),
    105     Punctuation_Close = U_MASK(U_END_PUNCTUATION),
    106     Punctuation_Connector = U_MASK(U_CONNECTOR_PUNCTUATION),
    107     Punctuation_Other = U_MASK(U_OTHER_PUNCTUATION),
    108 
    109     Symbol_Math = U_MASK(U_MATH_SYMBOL),
    110     Symbol_Currency = U_MASK(U_CURRENCY_SYMBOL),
    111     Symbol_Modifier = U_MASK(U_MODIFIER_SYMBOL),
    112     Symbol_Other = U_MASK(U_OTHER_SYMBOL),
    113 
    114     Punctuation_InitialQuote = U_MASK(U_INITIAL_PUNCTUATION),
    115     Punctuation_FinalQuote = U_MASK(U_FINAL_PUNCTUATION)
    116 };
    117 
    118 inline UChar32 foldCase(UChar32 c)
    119 {
    120     return u_foldCase(c, U_FOLD_CASE_DEFAULT);
    121 }
    122 
    123 inline int foldCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
    124 {
    125     UErrorCode status = U_ZERO_ERROR;
    126     int realLength = u_strFoldCase(result, resultLength, src, srcLength, U_FOLD_CASE_DEFAULT, &status);
    127     *error = !U_SUCCESS(status);
    128     return realLength;
    129 }
    130 
    131 inline int toLower(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
    132 {
    133     UErrorCode status = U_ZERO_ERROR;
    134     int realLength = u_strToLower(result, resultLength, src, srcLength, "", &status);
    135     *error = !!U_FAILURE(status);
    136     return realLength;
    137 }
    138 
    139 inline UChar32 toLower(UChar32 c)
    140 {
    141     return u_tolower(c);
    142 }
    143 
    144 inline UChar32 toUpper(UChar32 c)
    145 {
    146     return u_toupper(c);
    147 }
    148 
    149 inline int toUpper(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
    150 {
    151     UErrorCode status = U_ZERO_ERROR;
    152     int realLength = u_strToUpper(result, resultLength, src, srcLength, "", &status);
    153     *error = !!U_FAILURE(status);
    154     return realLength;
    155 }
    156 
    157 inline UChar32 toTitleCase(UChar32 c)
    158 {
    159     return u_totitle(c);
    160 }
    161 
    162 inline bool isArabicChar(UChar32 c)
    163 {
    164       return ublock_getCode(c) == UBLOCK_ARABIC;
    165 }
    166 
    167 inline bool isAlphanumeric(UChar32 c)
    168 {
    169     return u_isalnum(c);
    170 }
    171 
    172 inline bool isSeparatorSpace(UChar32 c)
    173 {
    174     return u_charType(c) == U_SPACE_SEPARATOR;
    175 }
    176 
    177 inline bool isPrintableChar(UChar32 c)
    178 {
    179     return !!u_isprint(c);
    180 }
    181 
    182 inline bool isPunct(UChar32 c)
    183 {
    184     return !!u_ispunct(c);
    185 }
    186 
    187 inline bool hasLineBreakingPropertyComplexContext(UChar32 c)
    188 {
    189     return u_getIntPropertyValue(c, UCHAR_LINE_BREAK) == U_LB_COMPLEX_CONTEXT;
    190 }
    191 
    192 inline bool hasLineBreakingPropertyComplexContextOrIdeographic(UChar32 c)
    193 {
    194     int32_t prop = u_getIntPropertyValue(c, UCHAR_LINE_BREAK);
    195     return prop == U_LB_COMPLEX_CONTEXT || prop == U_LB_IDEOGRAPHIC;
    196 }
    197 
    198 inline UChar32 mirroredChar(UChar32 c)
    199 {
    200     return u_charMirror(c);
    201 }
    202 
    203 inline CharCategory category(UChar32 c)
    204 {
    205     return static_cast<CharCategory>(U_GET_GC_MASK(c));
    206 }
    207 
    208 inline Direction direction(UChar32 c)
    209 {
    210     return static_cast<Direction>(u_charDirection(c));
    211 }
    212 
    213 inline bool isLower(UChar32 c)
    214 {
    215     return !!u_islower(c);
    216 }
    217 
    218 inline uint8_t combiningClass(UChar32 c)
    219 {
    220     return u_getCombiningClass(c);
    221 }
    222 
    223 inline DecompositionType decompositionType(UChar32 c)
    224 {
    225     return static_cast<DecompositionType>(u_getIntPropertyValue(c, UCHAR_DECOMPOSITION_TYPE));
    226 }
    227 
    228 inline int umemcasecmp(const UChar* a, const UChar* b, int len)
    229 {
    230     return u_memcasecmp(a, b, len, U_FOLD_CASE_DEFAULT);
    231 }
    232 
    233 } }
    234 
    235 #endif // WTF_UNICODE_ICU_H
    236