1 /* 2 * Copyright (C) 2006 George Staikos <staikos (at) kde.org> 3 * Copyright (C) 2006 Alexey Proskuryakov <ap (at) nypop.com> 4 * Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Library General Public 8 * License as published by the Free Software Foundation; either 9 * version 2 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Library General Public License for more details. 15 * 16 * You should have received a copy of the GNU Library General Public License 17 * along with this library; see the file COPYING.LIB. If not, write to 18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 19 * Boston, MA 02110-1301, USA. 20 * 21 */ 22 23 #ifndef WTF_UNICODE_ICU_H 24 #define WTF_UNICODE_ICU_H 25 26 #include <unicode/brkiter.h> 27 #include <unicode/rbbi.h> 28 #include <unicode/uchar.h> 29 #include <unicode/uscript.h> 30 #include <unicode/ustring.h> 31 #include <unicode/utf16.h> 32 33 namespace WTF { 34 35 namespace Unicode { 36 37 enum Direction { 38 LeftToRight = U_LEFT_TO_RIGHT, 39 RightToLeft = U_RIGHT_TO_LEFT, 40 EuropeanNumber = U_EUROPEAN_NUMBER, 41 EuropeanNumberSeparator = U_EUROPEAN_NUMBER_SEPARATOR, 42 EuropeanNumberTerminator = U_EUROPEAN_NUMBER_TERMINATOR, 43 ArabicNumber = U_ARABIC_NUMBER, 44 CommonNumberSeparator = U_COMMON_NUMBER_SEPARATOR, 45 BlockSeparator = U_BLOCK_SEPARATOR, 46 SegmentSeparator = U_SEGMENT_SEPARATOR, 47 WhiteSpaceNeutral = U_WHITE_SPACE_NEUTRAL, 48 OtherNeutral = U_OTHER_NEUTRAL, 49 LeftToRightEmbedding = U_LEFT_TO_RIGHT_EMBEDDING, 50 LeftToRightOverride = U_LEFT_TO_RIGHT_OVERRIDE, 51 RightToLeftArabic = U_RIGHT_TO_LEFT_ARABIC, 52 RightToLeftEmbedding = U_RIGHT_TO_LEFT_EMBEDDING, 53 RightToLeftOverride = U_RIGHT_TO_LEFT_OVERRIDE, 54 PopDirectionalFormat = U_POP_DIRECTIONAL_FORMAT, 55 NonSpacingMark = U_DIR_NON_SPACING_MARK, 56 BoundaryNeutral = U_BOUNDARY_NEUTRAL 57 }; 58 59 enum DecompositionType { 60 DecompositionNone = U_DT_NONE, 61 DecompositionCanonical = U_DT_CANONICAL, 62 DecompositionCompat = U_DT_COMPAT, 63 DecompositionCircle = U_DT_CIRCLE, 64 DecompositionFinal = U_DT_FINAL, 65 DecompositionFont = U_DT_FONT, 66 DecompositionFraction = U_DT_FRACTION, 67 DecompositionInitial = U_DT_INITIAL, 68 DecompositionIsolated = U_DT_ISOLATED, 69 DecompositionMedial = U_DT_MEDIAL, 70 DecompositionNarrow = U_DT_NARROW, 71 DecompositionNoBreak = U_DT_NOBREAK, 72 DecompositionSmall = U_DT_SMALL, 73 DecompositionSquare = U_DT_SQUARE, 74 DecompositionSub = U_DT_SUB, 75 DecompositionSuper = U_DT_SUPER, 76 DecompositionVertical = U_DT_VERTICAL, 77 DecompositionWide = U_DT_WIDE, 78 }; 79 80 enum CharCategory { 81 NoCategory = 0, 82 Other_NotAssigned = U_MASK(U_GENERAL_OTHER_TYPES), 83 Letter_Uppercase = U_MASK(U_UPPERCASE_LETTER), 84 Letter_Lowercase = U_MASK(U_LOWERCASE_LETTER), 85 Letter_Titlecase = U_MASK(U_TITLECASE_LETTER), 86 Letter_Modifier = U_MASK(U_MODIFIER_LETTER), 87 Letter_Other = U_MASK(U_OTHER_LETTER), 88 89 Mark_NonSpacing = U_MASK(U_NON_SPACING_MARK), 90 Mark_Enclosing = U_MASK(U_ENCLOSING_MARK), 91 Mark_SpacingCombining = U_MASK(U_COMBINING_SPACING_MARK), 92 93 Number_DecimalDigit = U_MASK(U_DECIMAL_DIGIT_NUMBER), 94 Number_Letter = U_MASK(U_LETTER_NUMBER), 95 Number_Other = U_MASK(U_OTHER_NUMBER), 96 97 Separator_Space = U_MASK(U_SPACE_SEPARATOR), 98 Separator_Line = U_MASK(U_LINE_SEPARATOR), 99 Separator_Paragraph = U_MASK(U_PARAGRAPH_SEPARATOR), 100 101 Other_Control = U_MASK(U_CONTROL_CHAR), 102 Other_Format = U_MASK(U_FORMAT_CHAR), 103 Other_PrivateUse = U_MASK(U_PRIVATE_USE_CHAR), 104 Other_Surrogate = U_MASK(U_SURROGATE), 105 106 Punctuation_Dash = U_MASK(U_DASH_PUNCTUATION), 107 Punctuation_Open = U_MASK(U_START_PUNCTUATION), 108 Punctuation_Close = U_MASK(U_END_PUNCTUATION), 109 Punctuation_Connector = U_MASK(U_CONNECTOR_PUNCTUATION), 110 Punctuation_Other = U_MASK(U_OTHER_PUNCTUATION), 111 112 Symbol_Math = U_MASK(U_MATH_SYMBOL), 113 Symbol_Currency = U_MASK(U_CURRENCY_SYMBOL), 114 Symbol_Modifier = U_MASK(U_MODIFIER_SYMBOL), 115 Symbol_Other = U_MASK(U_OTHER_SYMBOL), 116 117 Punctuation_InitialQuote = U_MASK(U_INITIAL_PUNCTUATION), 118 Punctuation_FinalQuote = U_MASK(U_FINAL_PUNCTUATION) 119 }; 120 121 inline UChar32 foldCase(UChar32 c) 122 { 123 return u_foldCase(c, U_FOLD_CASE_DEFAULT); 124 } 125 126 inline int foldCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) 127 { 128 UErrorCode status = U_ZERO_ERROR; 129 int realLength = u_strFoldCase(result, resultLength, src, srcLength, U_FOLD_CASE_DEFAULT, &status); 130 *error = !U_SUCCESS(status); 131 return realLength; 132 } 133 134 inline int toLower(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) 135 { 136 UErrorCode status = U_ZERO_ERROR; 137 int realLength = u_strToLower(result, resultLength, src, srcLength, "", &status); 138 *error = !!U_FAILURE(status); 139 return realLength; 140 } 141 142 inline UChar32 toLower(UChar32 c) 143 { 144 return u_tolower(c); 145 } 146 147 inline UChar32 toUpper(UChar32 c) 148 { 149 return u_toupper(c); 150 } 151 152 inline int toUpper(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) 153 { 154 UErrorCode status = U_ZERO_ERROR; 155 int realLength = u_strToUpper(result, resultLength, src, srcLength, "", &status); 156 *error = !!U_FAILURE(status); 157 return realLength; 158 } 159 160 inline UChar32 toTitleCase(UChar32 c) 161 { 162 return u_totitle(c); 163 } 164 165 inline bool isArabicChar(UChar32 c) 166 { 167 return ublock_getCode(c) == UBLOCK_ARABIC; 168 } 169 170 inline bool isAlphanumeric(UChar32 c) 171 { 172 return u_isalnum(c); 173 } 174 175 inline bool isSeparatorSpace(UChar32 c) 176 { 177 return u_charType(c) == U_SPACE_SEPARATOR; 178 } 179 180 inline bool isPrintableChar(UChar32 c) 181 { 182 return !!u_isprint(c); 183 } 184 185 inline bool isPunct(UChar32 c) 186 { 187 return !!u_ispunct(c); 188 } 189 190 inline bool hasLineBreakingPropertyComplexContext(UChar32 c) 191 { 192 return u_getIntPropertyValue(c, UCHAR_LINE_BREAK) == U_LB_COMPLEX_CONTEXT; 193 } 194 195 inline UChar32 mirroredChar(UChar32 c) 196 { 197 return u_charMirror(c); 198 } 199 200 inline CharCategory category(UChar32 c) 201 { 202 return static_cast<CharCategory>(U_GET_GC_MASK(c)); 203 } 204 205 inline Direction direction(UChar32 c) 206 { 207 return static_cast<Direction>(u_charDirection(c)); 208 } 209 210 inline bool isLower(UChar32 c) 211 { 212 return !!u_islower(c); 213 } 214 215 inline uint8_t combiningClass(UChar32 c) 216 { 217 return u_getCombiningClass(c); 218 } 219 220 inline DecompositionType decompositionType(UChar32 c) 221 { 222 return static_cast<DecompositionType>(u_getIntPropertyValue(c, UCHAR_DECOMPOSITION_TYPE)); 223 } 224 225 inline int umemcasecmp(const UChar* a, const UChar* b, int len) 226 { 227 return u_memcasecmp(a, b, len, U_FOLD_CASE_DEFAULT); 228 } 229 230 } // namespace Unicode 231 232 } // namespace WTF 233 234 #endif // WTF_UNICODE_ICU_H 235