1 /* 2 * Copyright (C) 2006 George Staikos <staikos (at) kde.org> 3 * Copyright (C) 2006 Alexey Proskuryakov <ap (at) nypop.com> 4 * Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Library General Public 8 * License as published by the Free Software Foundation; either 9 * version 2 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Library General Public License for more details. 15 * 16 * You should have received a copy of the GNU Library General Public License 17 * along with this library; see the file COPYING.LIB. If not, write to 18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 19 * Boston, MA 02110-1301, USA. 20 * 21 */ 22 23 #ifndef WTF_UNICODE_QT4_H 24 #define WTF_UNICODE_QT4_H 25 26 #include "UnicodeMacrosFromICU.h" 27 28 #include <QChar> 29 #include <QString> 30 31 #include <config.h> 32 33 #include <stdint.h> 34 #if USE(QT_ICU_TEXT_BREAKING) 35 #include <unicode/ubrk.h> 36 #endif 37 38 QT_BEGIN_NAMESPACE 39 namespace QUnicodeTables { 40 struct Properties { 41 ushort category : 8; 42 ushort line_break_class : 8; 43 ushort direction : 8; 44 ushort combiningClass :8; 45 ushort joining : 2; 46 signed short digitValue : 6; /* 5 needed */ 47 ushort unicodeVersion : 4; 48 ushort lowerCaseSpecial : 1; 49 ushort upperCaseSpecial : 1; 50 ushort titleCaseSpecial : 1; 51 ushort caseFoldSpecial : 1; /* currently unused */ 52 signed short mirrorDiff : 16; 53 signed short lowerCaseDiff : 16; 54 signed short upperCaseDiff : 16; 55 signed short titleCaseDiff : 16; 56 signed short caseFoldDiff : 16; 57 }; 58 Q_CORE_EXPORT const Properties * QT_FASTCALL properties(uint ucs4); 59 Q_CORE_EXPORT const Properties * QT_FASTCALL properties(ushort ucs2); 60 } 61 QT_END_NAMESPACE 62 63 // ugly hack to make UChar compatible with JSChar in API/JSStringRef.h 64 #if defined(Q_OS_WIN) || COMPILER(WINSCW) || (COMPILER(RVCT) && !OS(LINUX)) 65 typedef wchar_t UChar; 66 #else 67 typedef uint16_t UChar; 68 #endif 69 70 #if !USE(QT_ICU_TEXT_BREAKING) 71 typedef uint32_t UChar32; 72 #endif 73 74 namespace WTF { 75 namespace Unicode { 76 77 enum Direction { 78 LeftToRight = QChar::DirL, 79 RightToLeft = QChar::DirR, 80 EuropeanNumber = QChar::DirEN, 81 EuropeanNumberSeparator = QChar::DirES, 82 EuropeanNumberTerminator = QChar::DirET, 83 ArabicNumber = QChar::DirAN, 84 CommonNumberSeparator = QChar::DirCS, 85 BlockSeparator = QChar::DirB, 86 SegmentSeparator = QChar::DirS, 87 WhiteSpaceNeutral = QChar::DirWS, 88 OtherNeutral = QChar::DirON, 89 LeftToRightEmbedding = QChar::DirLRE, 90 LeftToRightOverride = QChar::DirLRO, 91 RightToLeftArabic = QChar::DirAL, 92 RightToLeftEmbedding = QChar::DirRLE, 93 RightToLeftOverride = QChar::DirRLO, 94 PopDirectionalFormat = QChar::DirPDF, 95 NonSpacingMark = QChar::DirNSM, 96 BoundaryNeutral = QChar::DirBN 97 }; 98 99 enum DecompositionType { 100 DecompositionNone = QChar::NoDecomposition, 101 DecompositionCanonical = QChar::Canonical, 102 DecompositionCompat = QChar::Compat, 103 DecompositionCircle = QChar::Circle, 104 DecompositionFinal = QChar::Final, 105 DecompositionFont = QChar::Font, 106 DecompositionFraction = QChar::Fraction, 107 DecompositionInitial = QChar::Initial, 108 DecompositionIsolated = QChar::Isolated, 109 DecompositionMedial = QChar::Medial, 110 DecompositionNarrow = QChar::Narrow, 111 DecompositionNoBreak = QChar::NoBreak, 112 DecompositionSmall = QChar::Small, 113 DecompositionSquare = QChar::Square, 114 DecompositionSub = QChar::Sub, 115 DecompositionSuper = QChar::Super, 116 DecompositionVertical = QChar::Vertical, 117 DecompositionWide = QChar::Wide 118 }; 119 120 enum CharCategory { 121 NoCategory = 0, 122 Mark_NonSpacing = U_MASK(QChar::Mark_NonSpacing), 123 Mark_SpacingCombining = U_MASK(QChar::Mark_SpacingCombining), 124 Mark_Enclosing = U_MASK(QChar::Mark_Enclosing), 125 Number_DecimalDigit = U_MASK(QChar::Number_DecimalDigit), 126 Number_Letter = U_MASK(QChar::Number_Letter), 127 Number_Other = U_MASK(QChar::Number_Other), 128 Separator_Space = U_MASK(QChar::Separator_Space), 129 Separator_Line = U_MASK(QChar::Separator_Line), 130 Separator_Paragraph = U_MASK(QChar::Separator_Paragraph), 131 Other_Control = U_MASK(QChar::Other_Control), 132 Other_Format = U_MASK(QChar::Other_Format), 133 Other_Surrogate = U_MASK(QChar::Other_Surrogate), 134 Other_PrivateUse = U_MASK(QChar::Other_PrivateUse), 135 Other_NotAssigned = U_MASK(QChar::Other_NotAssigned), 136 Letter_Uppercase = U_MASK(QChar::Letter_Uppercase), 137 Letter_Lowercase = U_MASK(QChar::Letter_Lowercase), 138 Letter_Titlecase = U_MASK(QChar::Letter_Titlecase), 139 Letter_Modifier = U_MASK(QChar::Letter_Modifier), 140 Letter_Other = U_MASK(QChar::Letter_Other), 141 Punctuation_Connector = U_MASK(QChar::Punctuation_Connector), 142 Punctuation_Dash = U_MASK(QChar::Punctuation_Dash), 143 Punctuation_Open = U_MASK(QChar::Punctuation_Open), 144 Punctuation_Close = U_MASK(QChar::Punctuation_Close), 145 Punctuation_InitialQuote = U_MASK(QChar::Punctuation_InitialQuote), 146 Punctuation_FinalQuote = U_MASK(QChar::Punctuation_FinalQuote), 147 Punctuation_Other = U_MASK(QChar::Punctuation_Other), 148 Symbol_Math = U_MASK(QChar::Symbol_Math), 149 Symbol_Currency = U_MASK(QChar::Symbol_Currency), 150 Symbol_Modifier = U_MASK(QChar::Symbol_Modifier), 151 Symbol_Other = U_MASK(QChar::Symbol_Other) 152 }; 153 154 155 // FIXME: handle surrogates correctly in all methods 156 157 inline UChar32 toLower(UChar32 ch) 158 { 159 return QChar::toLower(uint32_t(ch)); 160 } 161 162 inline int toLower(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) 163 { 164 const UChar *e = src + srcLength; 165 const UChar *s = src; 166 UChar *r = result; 167 uint rindex = 0; 168 169 // this avoids one out of bounds check in the loop 170 if (s < e && QChar(*s).isLowSurrogate()) { 171 if (r) 172 r[rindex] = *s++; 173 ++rindex; 174 } 175 176 int needed = 0; 177 while (s < e && (rindex < uint(resultLength) || !r)) { 178 uint c = *s; 179 if (QChar(c).isLowSurrogate() && QChar(*(s - 1)).isHighSurrogate()) 180 c = QChar::surrogateToUcs4(*(s - 1), c); 181 const QUnicodeTables::Properties *prop = QUnicodeTables::properties(c); 182 if (prop->lowerCaseSpecial) { 183 QString qstring; 184 if (c < 0x10000) { 185 qstring += QChar(c); 186 } else { 187 qstring += QChar(*(s-1)); 188 qstring += QChar(*s); 189 } 190 qstring = qstring.toLower(); 191 for (int i = 0; i < qstring.length(); ++i) { 192 if (rindex >= uint(resultLength)) { 193 needed += qstring.length() - i; 194 break; 195 } 196 if (r) 197 r[rindex] = qstring.at(i).unicode(); 198 ++rindex; 199 } 200 } else { 201 if (r) 202 r[rindex] = *s + prop->lowerCaseDiff; 203 ++rindex; 204 } 205 ++s; 206 } 207 if (s < e) 208 needed += e - s; 209 *error = (needed != 0); 210 if (rindex < uint(resultLength)) 211 r[rindex] = 0; 212 return rindex + needed; 213 } 214 215 inline UChar32 toUpper(UChar32 c) 216 { 217 return QChar::toUpper(uint32_t(c)); 218 } 219 220 inline int toUpper(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) 221 { 222 const UChar *e = src + srcLength; 223 const UChar *s = src; 224 UChar *r = result; 225 int rindex = 0; 226 227 // this avoids one out of bounds check in the loop 228 if (s < e && QChar(*s).isLowSurrogate()) { 229 if (r) 230 r[rindex] = *s++; 231 ++rindex; 232 } 233 234 int needed = 0; 235 while (s < e && (rindex < resultLength || !r)) { 236 uint c = *s; 237 if (QChar(c).isLowSurrogate() && QChar(*(s - 1)).isHighSurrogate()) 238 c = QChar::surrogateToUcs4(*(s - 1), c); 239 const QUnicodeTables::Properties *prop = QUnicodeTables::properties(c); 240 if (prop->upperCaseSpecial) { 241 QString qstring; 242 if (c < 0x10000) { 243 qstring += QChar(c); 244 } else { 245 qstring += QChar(*(s-1)); 246 qstring += QChar(*s); 247 } 248 qstring = qstring.toUpper(); 249 for (int i = 0; i < qstring.length(); ++i) { 250 if (rindex >= resultLength) { 251 needed += qstring.length() - i; 252 break; 253 } 254 if (r) 255 r[rindex] = qstring.at(i).unicode(); 256 ++rindex; 257 } 258 } else { 259 if (r) 260 r[rindex] = *s + prop->upperCaseDiff; 261 ++rindex; 262 } 263 ++s; 264 } 265 if (s < e) 266 needed += e - s; 267 *error = (needed != 0); 268 if (rindex < resultLength) 269 r[rindex] = 0; 270 return rindex + needed; 271 } 272 273 inline int toTitleCase(UChar32 c) 274 { 275 return QChar::toTitleCase(uint32_t(c)); 276 } 277 278 inline UChar32 foldCase(UChar32 c) 279 { 280 return QChar::toCaseFolded(uint32_t(c)); 281 } 282 283 inline int foldCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) 284 { 285 // FIXME: handle special casing. Easiest with some low level API in Qt 286 *error = false; 287 if (resultLength < srcLength) { 288 *error = true; 289 return srcLength; 290 } 291 for (int i = 0; i < srcLength; ++i) 292 result[i] = QChar::toCaseFolded(ushort(src[i])); 293 return srcLength; 294 } 295 296 inline bool isArabicChar(UChar32 c) 297 { 298 return c >= 0x0600 && c <= 0x06FF; 299 } 300 301 inline bool isPrintableChar(UChar32 c) 302 { 303 const uint test = U_MASK(QChar::Other_Control) | 304 U_MASK(QChar::Other_NotAssigned); 305 return !(U_MASK(QChar::category(uint32_t(c))) & test); 306 } 307 308 inline bool isSeparatorSpace(UChar32 c) 309 { 310 return QChar::category(uint32_t(c)) == QChar::Separator_Space; 311 } 312 313 inline bool isPunct(UChar32 c) 314 { 315 const uint test = U_MASK(QChar::Punctuation_Connector) | 316 U_MASK(QChar::Punctuation_Dash) | 317 U_MASK(QChar::Punctuation_Open) | 318 U_MASK(QChar::Punctuation_Close) | 319 U_MASK(QChar::Punctuation_InitialQuote) | 320 U_MASK(QChar::Punctuation_FinalQuote) | 321 U_MASK(QChar::Punctuation_Other); 322 return U_MASK(QChar::category(uint32_t(c))) & test; 323 } 324 325 inline bool isLower(UChar32 c) 326 { 327 return QChar::category(uint32_t(c)) == QChar::Letter_Lowercase; 328 } 329 330 inline bool hasLineBreakingPropertyComplexContext(UChar32) 331 { 332 // FIXME: Implement this to return whether the character has line breaking property SA (Complex Context). 333 return false; 334 } 335 336 inline UChar32 mirroredChar(UChar32 c) 337 { 338 return QChar::mirroredChar(uint32_t(c)); 339 } 340 341 inline uint8_t combiningClass(UChar32 c) 342 { 343 return QChar::combiningClass(uint32_t(c)); 344 } 345 346 inline DecompositionType decompositionType(UChar32 c) 347 { 348 return (DecompositionType)QChar::decompositionTag(c); 349 } 350 351 inline int umemcasecmp(const UChar* a, const UChar* b, int len) 352 { 353 // handle surrogates correctly 354 for (int i = 0; i < len; ++i) { 355 uint c1 = QChar::toCaseFolded(ushort(a[i])); 356 uint c2 = QChar::toCaseFolded(ushort(b[i])); 357 if (c1 != c2) 358 return c1 - c2; 359 } 360 return 0; 361 } 362 363 inline Direction direction(UChar32 c) 364 { 365 return (Direction)QChar::direction(uint32_t(c)); 366 } 367 368 inline CharCategory category(UChar32 c) 369 { 370 return (CharCategory) U_MASK(QChar::category(uint32_t(c))); 371 } 372 373 } } 374 375 #endif // WTF_UNICODE_QT4_H 376