1 /* 2 * Copyright (C) 2006 George Staikos <staikos (at) kde.org> 3 * Copyright (C) 2006 Alexey Proskuryakov <ap (at) nypop.com> 4 * Copyright (C) 2007 Apple Computer, Inc. All rights reserved. 5 * Copyright (C) 2007-2009 Torch Mobile, Inc. 6 * Copyright (C) 2010 Company 100, Inc. 7 * 8 * This library is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU Library General Public 10 * License as published by the Free Software Foundation; either 11 * version 2 of the License, or (at your option) any later version. 12 * 13 * This library is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * Library General Public License for more details. 17 * 18 * You should have received a copy of the GNU Library General Public License 19 * along with this library; see the file COPYING.LIB. If not, write to 20 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 21 * Boston, MA 02110-1301, USA. 22 * 23 */ 24 25 #ifndef UnicodeBrew_h 26 #define UnicodeBrew_h 27 28 #include "UnicodeFromICU.h" 29 #include "UnicodeMacrosFromICU.h" 30 31 namespace WTF { 32 namespace Unicode { 33 34 enum Direction { 35 LeftToRight = ICU::U_LEFT_TO_RIGHT, 36 RightToLeft = ICU::U_RIGHT_TO_LEFT, 37 EuropeanNumber = ICU::U_EUROPEAN_NUMBER, 38 EuropeanNumberSeparator = ICU::U_EUROPEAN_NUMBER_SEPARATOR, 39 EuropeanNumberTerminator = ICU::U_EUROPEAN_NUMBER_TERMINATOR, 40 ArabicNumber = ICU::U_ARABIC_NUMBER, 41 CommonNumberSeparator = ICU::U_COMMON_NUMBER_SEPARATOR, 42 BlockSeparator = ICU::U_BLOCK_SEPARATOR, 43 SegmentSeparator = ICU::U_SEGMENT_SEPARATOR, 44 WhiteSpaceNeutral = ICU::U_WHITE_SPACE_NEUTRAL, 45 OtherNeutral = ICU::U_OTHER_NEUTRAL, 46 LeftToRightEmbedding = ICU::U_LEFT_TO_RIGHT_EMBEDDING, 47 LeftToRightOverride = ICU::U_LEFT_TO_RIGHT_OVERRIDE, 48 RightToLeftArabic = ICU::U_RIGHT_TO_LEFT_ARABIC, 49 RightToLeftEmbedding = ICU::U_RIGHT_TO_LEFT_EMBEDDING, 50 RightToLeftOverride = ICU::U_RIGHT_TO_LEFT_OVERRIDE, 51 PopDirectionalFormat = ICU::U_POP_DIRECTIONAL_FORMAT, 52 NonSpacingMark = ICU::U_DIR_NON_SPACING_MARK, 53 BoundaryNeutral = ICU::U_BOUNDARY_NEUTRAL 54 }; 55 56 enum DecompositionType { 57 DecompositionNone = ICU::U_DT_NONE, 58 DecompositionCanonical = ICU::U_DT_CANONICAL, 59 DecompositionCompat = ICU::U_DT_COMPAT, 60 DecompositionCircle = ICU::U_DT_CIRCLE, 61 DecompositionFinal = ICU::U_DT_FINAL, 62 DecompositionFont = ICU::U_DT_FONT, 63 DecompositionFraction = ICU::U_DT_FRACTION, 64 DecompositionInitial = ICU::U_DT_INITIAL, 65 DecompositionIsolated = ICU::U_DT_ISOLATED, 66 DecompositionMedial = ICU::U_DT_MEDIAL, 67 DecompositionNarrow = ICU::U_DT_NARROW, 68 DecompositionNoBreak = ICU::U_DT_NOBREAK, 69 DecompositionSmall = ICU::U_DT_SMALL, 70 DecompositionSquare = ICU::U_DT_SQUARE, 71 DecompositionSub = ICU::U_DT_SUB, 72 DecompositionSuper = ICU::U_DT_SUPER, 73 DecompositionVertical = ICU::U_DT_VERTICAL, 74 DecompositionWide = ICU::U_DT_WIDE, 75 }; 76 77 enum CharCategory { 78 NoCategory = 0, 79 Other_NotAssigned = TO_MASK(ICU::U_GENERAL_OTHER_TYPES), 80 Letter_Uppercase = TO_MASK(ICU::U_UPPERCASE_LETTER), 81 Letter_Lowercase = TO_MASK(ICU::U_LOWERCASE_LETTER), 82 Letter_Titlecase = TO_MASK(ICU::U_TITLECASE_LETTER), 83 Letter_Modifier = TO_MASK(ICU::U_MODIFIER_LETTER), 84 Letter_Other = TO_MASK(ICU::U_OTHER_LETTER), 85 86 Mark_NonSpacing = TO_MASK(ICU::U_NON_SPACING_MARK), 87 Mark_Enclosing = TO_MASK(ICU::U_ENCLOSING_MARK), 88 Mark_SpacingCombining = TO_MASK(ICU::U_COMBINING_SPACING_MARK), 89 90 Number_DecimalDigit = TO_MASK(ICU::U_DECIMAL_DIGIT_NUMBER), 91 Number_Letter = TO_MASK(ICU::U_LETTER_NUMBER), 92 Number_Other = TO_MASK(ICU::U_OTHER_NUMBER), 93 94 Separator_Space = TO_MASK(ICU::U_SPACE_SEPARATOR), 95 Separator_Line = TO_MASK(ICU::U_LINE_SEPARATOR), 96 Separator_Paragraph = TO_MASK(ICU::U_PARAGRAPH_SEPARATOR), 97 98 Other_Control = TO_MASK(ICU::U_CONTROL_CHAR), 99 Other_Format = TO_MASK(ICU::U_FORMAT_CHAR), 100 Other_PrivateUse = TO_MASK(ICU::U_PRIVATE_USE_CHAR), 101 Other_Surrogate = TO_MASK(ICU::U_SURROGATE), 102 103 Punctuation_Dash = TO_MASK(ICU::U_DASH_PUNCTUATION), 104 Punctuation_Open = TO_MASK(ICU::U_START_PUNCTUATION), 105 Punctuation_Close = TO_MASK(ICU::U_END_PUNCTUATION), 106 Punctuation_Connector = TO_MASK(ICU::U_CONNECTOR_PUNCTUATION), 107 Punctuation_Other = TO_MASK(ICU::U_OTHER_PUNCTUATION), 108 109 Symbol_Math = TO_MASK(ICU::U_MATH_SYMBOL), 110 Symbol_Currency = TO_MASK(ICU::U_CURRENCY_SYMBOL), 111 Symbol_Modifier = TO_MASK(ICU::U_MODIFIER_SYMBOL), 112 Symbol_Other = TO_MASK(ICU::U_OTHER_SYMBOL), 113 114 Punctuation_InitialQuote = TO_MASK(ICU::U_INITIAL_PUNCTUATION), 115 Punctuation_FinalQuote = TO_MASK(ICU::U_FINAL_PUNCTUATION) 116 }; 117 118 UChar foldCase(UChar); 119 120 int foldCase(UChar* result, int resultLength, const UChar* source, int sourceLength, bool* isError); 121 122 int toLower(UChar* result, int resultLength, const UChar* source, int sourceLength, bool* isError); 123 124 UChar toUpper(UChar); 125 UChar toLower(UChar); 126 127 bool isUpper(UChar); 128 129 int toUpper(UChar* result, int resultLength, const UChar* source, int sourceLength, bool* isError); 130 131 UChar toTitleCase(UChar); 132 133 inline bool isArabicChar(UChar32 c) 134 { 135 return c >= 0x0600 && c <= 0x06FF; 136 } 137 138 bool isAlphanumeric(UChar); 139 140 CharCategory category(unsigned int); 141 142 inline bool isSeparatorSpace(UChar c) 143 { 144 return category(c) == Separator_Space; 145 } 146 147 bool isPrintableChar(UChar); 148 149 bool isDigit(UChar); 150 151 bool isPunct(UChar); 152 153 inline bool hasLineBreakingPropertyComplexContext(UChar32) 154 { 155 // FIXME: implement! 156 return false; 157 } 158 159 inline bool hasLineBreakingPropertyComplexContextOrIdeographic(UChar32 c) 160 { 161 // FIXME 162 return false; 163 } 164 165 UChar mirroredChar(UChar32); 166 167 Direction direction(UChar32); 168 169 bool isLower(UChar); 170 171 int digitValue(UChar); 172 173 unsigned char combiningClass(UChar32); 174 175 DecompositionType decompositionType(UChar32); 176 177 inline int umemcasecmp(const UChar* a, const UChar* b, int len) 178 { 179 for (int i = 0; i < len; ++i) { 180 UChar c1 = foldCase(a[i]); 181 UChar c2 = foldCase(b[i]); 182 if (c1 != c2) 183 return c1 - c2; 184 } 185 return 0; 186 } 187 188 bool isSpace(UChar); 189 bool isLetter(UChar); 190 191 } // namespace Unicode 192 } // namespace WTF 193 194 #endif 195