1 /* 2 * Copyright (C) 2006 George Staikos <staikos (at) kde.org> 3 * Copyright (C) 2006 Alexey Proskuryakov <ap (at) nypop.com> 4 * Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Library General Public 8 * License as published by the Free Software Foundation; either 9 * version 2 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Library General Public License for more details. 15 * 16 * You should have received a copy of the GNU Library General Public License 17 * along with this library; see the file COPYING.LIB. If not, write to 18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 19 * Boston, MA 02110-1301, USA. 20 * 21 */ 22 23 #ifndef WTF_UNICODE_QT4_H 24 #define WTF_UNICODE_QT4_H 25 26 #include <QChar> 27 #include <QString> 28 29 #include <config.h> 30 31 #include <stdint.h> 32 33 QT_BEGIN_NAMESPACE 34 namespace QUnicodeTables { 35 struct Properties { 36 ushort category : 8; 37 ushort line_break_class : 8; 38 ushort direction : 8; 39 ushort combiningClass :8; 40 ushort joining : 2; 41 signed short digitValue : 6; /* 5 needed */ 42 ushort unicodeVersion : 4; 43 ushort lowerCaseSpecial : 1; 44 ushort upperCaseSpecial : 1; 45 ushort titleCaseSpecial : 1; 46 ushort caseFoldSpecial : 1; /* currently unused */ 47 signed short mirrorDiff : 16; 48 signed short lowerCaseDiff : 16; 49 signed short upperCaseDiff : 16; 50 signed short titleCaseDiff : 16; 51 signed short caseFoldDiff : 16; 52 }; 53 Q_CORE_EXPORT const Properties * QT_FASTCALL properties(uint ucs4); 54 Q_CORE_EXPORT const Properties * QT_FASTCALL properties(ushort ucs2); 55 } 56 QT_END_NAMESPACE 57 58 // ugly hack to make UChar compatible with JSChar in API/JSStringRef.h 59 #if defined(Q_OS_WIN) || COMPILER(WINSCW) || COMPILER(RVCT) 60 typedef wchar_t UChar; 61 #else 62 typedef uint16_t UChar; 63 #endif 64 typedef uint32_t UChar32; 65 66 // some defines from ICU 67 68 #define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800) 69 #define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00) 70 #define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000) 71 #define U16_GET_SUPPLEMENTARY(lead, trail) \ 72 (((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET) 73 74 #define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0) 75 #define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00) 76 77 #define U_IS_SURROGATE(c) (((c)&0xfffff800)==0xd800) 78 #define U16_IS_SINGLE(c) !U_IS_SURROGATE(c) 79 #define U16_IS_SURROGATE(c) U_IS_SURROGATE(c) 80 #define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0) 81 82 #define U16_NEXT(s, i, length, c) { \ 83 (c)=(s)[(i)++]; \ 84 if(U16_IS_LEAD(c)) { \ 85 uint16_t __c2; \ 86 if((i)<(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \ 87 ++(i); \ 88 (c)=U16_GET_SUPPLEMENTARY((c), __c2); \ 89 } \ 90 } \ 91 } 92 93 #define U16_PREV(s, start, i, c) { \ 94 (c)=(s)[--(i)]; \ 95 if(U16_IS_TRAIL(c)) { \ 96 uint16_t __c2; \ 97 if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ 98 --(i); \ 99 (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ 100 } \ 101 } \ 102 } 103 104 #define U_MASK(x) ((uint32_t)1<<(x)) 105 106 namespace WTF { 107 namespace Unicode { 108 109 enum Direction { 110 LeftToRight = QChar::DirL, 111 RightToLeft = QChar::DirR, 112 EuropeanNumber = QChar::DirEN, 113 EuropeanNumberSeparator = QChar::DirES, 114 EuropeanNumberTerminator = QChar::DirET, 115 ArabicNumber = QChar::DirAN, 116 CommonNumberSeparator = QChar::DirCS, 117 BlockSeparator = QChar::DirB, 118 SegmentSeparator = QChar::DirS, 119 WhiteSpaceNeutral = QChar::DirWS, 120 OtherNeutral = QChar::DirON, 121 LeftToRightEmbedding = QChar::DirLRE, 122 LeftToRightOverride = QChar::DirLRO, 123 RightToLeftArabic = QChar::DirAL, 124 RightToLeftEmbedding = QChar::DirRLE, 125 RightToLeftOverride = QChar::DirRLO, 126 PopDirectionalFormat = QChar::DirPDF, 127 NonSpacingMark = QChar::DirNSM, 128 BoundaryNeutral = QChar::DirBN 129 }; 130 131 enum DecompositionType { 132 DecompositionNone = QChar::NoDecomposition, 133 DecompositionCanonical = QChar::Canonical, 134 DecompositionCompat = QChar::Compat, 135 DecompositionCircle = QChar::Circle, 136 DecompositionFinal = QChar::Final, 137 DecompositionFont = QChar::Font, 138 DecompositionFraction = QChar::Fraction, 139 DecompositionInitial = QChar::Initial, 140 DecompositionIsolated = QChar::Isolated, 141 DecompositionMedial = QChar::Medial, 142 DecompositionNarrow = QChar::Narrow, 143 DecompositionNoBreak = QChar::NoBreak, 144 DecompositionSmall = QChar::Small, 145 DecompositionSquare = QChar::Square, 146 DecompositionSub = QChar::Sub, 147 DecompositionSuper = QChar::Super, 148 DecompositionVertical = QChar::Vertical, 149 DecompositionWide = QChar::Wide 150 }; 151 152 enum CharCategory { 153 NoCategory = 0, 154 Mark_NonSpacing = U_MASK(QChar::Mark_NonSpacing), 155 Mark_SpacingCombining = U_MASK(QChar::Mark_SpacingCombining), 156 Mark_Enclosing = U_MASK(QChar::Mark_Enclosing), 157 Number_DecimalDigit = U_MASK(QChar::Number_DecimalDigit), 158 Number_Letter = U_MASK(QChar::Number_Letter), 159 Number_Other = U_MASK(QChar::Number_Other), 160 Separator_Space = U_MASK(QChar::Separator_Space), 161 Separator_Line = U_MASK(QChar::Separator_Line), 162 Separator_Paragraph = U_MASK(QChar::Separator_Paragraph), 163 Other_Control = U_MASK(QChar::Other_Control), 164 Other_Format = U_MASK(QChar::Other_Format), 165 Other_Surrogate = U_MASK(QChar::Other_Surrogate), 166 Other_PrivateUse = U_MASK(QChar::Other_PrivateUse), 167 Other_NotAssigned = U_MASK(QChar::Other_NotAssigned), 168 Letter_Uppercase = U_MASK(QChar::Letter_Uppercase), 169 Letter_Lowercase = U_MASK(QChar::Letter_Lowercase), 170 Letter_Titlecase = U_MASK(QChar::Letter_Titlecase), 171 Letter_Modifier = U_MASK(QChar::Letter_Modifier), 172 Letter_Other = U_MASK(QChar::Letter_Other), 173 Punctuation_Connector = U_MASK(QChar::Punctuation_Connector), 174 Punctuation_Dash = U_MASK(QChar::Punctuation_Dash), 175 Punctuation_Open = U_MASK(QChar::Punctuation_Open), 176 Punctuation_Close = U_MASK(QChar::Punctuation_Close), 177 Punctuation_InitialQuote = U_MASK(QChar::Punctuation_InitialQuote), 178 Punctuation_FinalQuote = U_MASK(QChar::Punctuation_FinalQuote), 179 Punctuation_Other = U_MASK(QChar::Punctuation_Other), 180 Symbol_Math = U_MASK(QChar::Symbol_Math), 181 Symbol_Currency = U_MASK(QChar::Symbol_Currency), 182 Symbol_Modifier = U_MASK(QChar::Symbol_Modifier), 183 Symbol_Other = U_MASK(QChar::Symbol_Other) 184 }; 185 186 187 // FIXME: handle surrogates correctly in all methods 188 189 inline UChar32 toLower(UChar32 ch) 190 { 191 return QChar::toLower(ch); 192 } 193 194 inline int toLower(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) 195 { 196 const UChar *e = src + srcLength; 197 const UChar *s = src; 198 UChar *r = result; 199 uint rindex = 0; 200 201 // this avoids one out of bounds check in the loop 202 if (s < e && QChar(*s).isLowSurrogate()) { 203 if (r) 204 r[rindex] = *s++; 205 ++rindex; 206 } 207 208 int needed = 0; 209 while (s < e && (rindex < uint(resultLength) || !r)) { 210 uint c = *s; 211 if (QChar(c).isLowSurrogate() && QChar(*(s - 1)).isHighSurrogate()) 212 c = QChar::surrogateToUcs4(*(s - 1), c); 213 const QUnicodeTables::Properties *prop = QUnicodeTables::properties(c); 214 if (prop->lowerCaseSpecial) { 215 QString qstring; 216 if (c < 0x10000) { 217 qstring += QChar(c); 218 } else { 219 qstring += QChar(*(s-1)); 220 qstring += QChar(*s); 221 } 222 qstring = qstring.toLower(); 223 for (int i = 0; i < qstring.length(); ++i) { 224 if (rindex >= uint(resultLength)) { 225 needed += qstring.length() - i; 226 break; 227 } 228 if (r) 229 r[rindex] = qstring.at(i).unicode(); 230 ++rindex; 231 } 232 } else { 233 if (r) 234 r[rindex] = *s + prop->lowerCaseDiff; 235 ++rindex; 236 } 237 ++s; 238 } 239 if (s < e) 240 needed += e - s; 241 *error = (needed != 0); 242 if (rindex < uint(resultLength)) 243 r[rindex] = 0; 244 return rindex + needed; 245 } 246 247 inline UChar32 toUpper(UChar32 ch) 248 { 249 return QChar::toUpper(ch); 250 } 251 252 inline int toUpper(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) 253 { 254 const UChar *e = src + srcLength; 255 const UChar *s = src; 256 UChar *r = result; 257 int rindex = 0; 258 259 // this avoids one out of bounds check in the loop 260 if (s < e && QChar(*s).isLowSurrogate()) { 261 if (r) 262 r[rindex] = *s++; 263 ++rindex; 264 } 265 266 int needed = 0; 267 while (s < e && (rindex < resultLength || !r)) { 268 uint c = *s; 269 if (QChar(c).isLowSurrogate() && QChar(*(s - 1)).isHighSurrogate()) 270 c = QChar::surrogateToUcs4(*(s - 1), c); 271 const QUnicodeTables::Properties *prop = QUnicodeTables::properties(c); 272 if (prop->upperCaseSpecial) { 273 QString qstring; 274 if (c < 0x10000) { 275 qstring += QChar(c); 276 } else { 277 qstring += QChar(*(s-1)); 278 qstring += QChar(*s); 279 } 280 qstring = qstring.toUpper(); 281 for (int i = 0; i < qstring.length(); ++i) { 282 if (rindex >= resultLength) { 283 needed += qstring.length() - i; 284 break; 285 } 286 if (r) 287 r[rindex] = qstring.at(i).unicode(); 288 ++rindex; 289 } 290 } else { 291 if (r) 292 r[rindex] = *s + prop->upperCaseDiff; 293 ++rindex; 294 } 295 ++s; 296 } 297 if (s < e) 298 needed += e - s; 299 *error = (needed != 0); 300 if (rindex < resultLength) 301 r[rindex] = 0; 302 return rindex + needed; 303 } 304 305 inline int toTitleCase(UChar32 c) 306 { 307 return QChar::toTitleCase(c); 308 } 309 310 inline UChar32 foldCase(UChar32 c) 311 { 312 return QChar::toCaseFolded(c); 313 } 314 315 inline int foldCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) 316 { 317 // FIXME: handle special casing. Easiest with some low level API in Qt 318 *error = false; 319 if (resultLength < srcLength) { 320 *error = true; 321 return srcLength; 322 } 323 for (int i = 0; i < srcLength; ++i) 324 result[i] = QChar::toCaseFolded(ushort(src[i])); 325 return srcLength; 326 } 327 328 inline bool isArabicChar(UChar32 c) 329 { 330 return c >= 0x0600 && c <= 0x06FF; 331 } 332 333 inline bool isPrintableChar(UChar32 c) 334 { 335 const uint test = U_MASK(QChar::Other_Control) | 336 U_MASK(QChar::Other_NotAssigned); 337 return !(U_MASK(QChar::category(c)) & test); 338 } 339 340 inline bool isSeparatorSpace(UChar32 c) 341 { 342 return QChar::category(c) == QChar::Separator_Space; 343 } 344 345 inline bool isPunct(UChar32 c) 346 { 347 const uint test = U_MASK(QChar::Punctuation_Connector) | 348 U_MASK(QChar::Punctuation_Dash) | 349 U_MASK(QChar::Punctuation_Open) | 350 U_MASK(QChar::Punctuation_Close) | 351 U_MASK(QChar::Punctuation_InitialQuote) | 352 U_MASK(QChar::Punctuation_FinalQuote) | 353 U_MASK(QChar::Punctuation_Other); 354 return U_MASK(QChar::category(c)) & test; 355 } 356 357 inline bool isLower(UChar32 c) 358 { 359 return QChar::category(c) == QChar::Letter_Lowercase; 360 } 361 362 inline bool hasLineBreakingPropertyComplexContext(UChar32) 363 { 364 // FIXME: Implement this to return whether the character has line breaking property SA (Complex Context). 365 return false; 366 } 367 368 inline UChar32 mirroredChar(UChar32 c) 369 { 370 return QChar::mirroredChar(c); 371 } 372 373 inline uint8_t combiningClass(UChar32 c) 374 { 375 return QChar::combiningClass(c); 376 } 377 378 inline DecompositionType decompositionType(UChar32 c) 379 { 380 return (DecompositionType)QChar::decompositionTag(c); 381 } 382 383 inline int umemcasecmp(const UChar* a, const UChar* b, int len) 384 { 385 // handle surrogates correctly 386 for (int i = 0; i < len; ++i) { 387 uint c1 = QChar::toCaseFolded(ushort(a[i])); 388 uint c2 = QChar::toCaseFolded(ushort(b[i])); 389 if (c1 != c2) 390 return c1 - c2; 391 } 392 return 0; 393 } 394 395 inline Direction direction(UChar32 c) 396 { 397 return (Direction)QChar::direction(c); 398 } 399 400 inline CharCategory category(UChar32 c) 401 { 402 return (CharCategory) U_MASK(QChar::category(c)); 403 } 404 405 } } 406 407 #endif // WTF_UNICODE_QT4_H 408