Home | History | Annotate | Download | only in qt4
      1 /*
      2  *  Copyright (C) 2006 George Staikos <staikos (at) kde.org>
      3  *  Copyright (C) 2006 Alexey Proskuryakov <ap (at) nypop.com>
      4  *  Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
      5  *
      6  *  This library is free software; you can redistribute it and/or
      7  *  modify it under the terms of the GNU Library General Public
      8  *  License as published by the Free Software Foundation; either
      9  *  version 2 of the License, or (at your option) any later version.
     10  *
     11  *  This library is distributed in the hope that it will be useful,
     12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
     13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     14  *  Library General Public License for more details.
     15  *
     16  *  You should have received a copy of the GNU Library General Public License
     17  *  along with this library; see the file COPYING.LIB.  If not, write to
     18  *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     19  *  Boston, MA 02110-1301, USA.
     20  *
     21  */
     22 
     23 #ifndef WTF_UNICODE_QT4_H
     24 #define WTF_UNICODE_QT4_H
     25 
     26 #include <QChar>
     27 #include <QString>
     28 
     29 #include <config.h>
     30 
     31 #include <stdint.h>
     32 
     33 QT_BEGIN_NAMESPACE
     34 namespace QUnicodeTables {
     35     struct Properties {
     36         ushort category : 8;
     37         ushort line_break_class : 8;
     38         ushort direction : 8;
     39         ushort combiningClass :8;
     40         ushort joining : 2;
     41         signed short digitValue : 6; /* 5 needed */
     42         ushort unicodeVersion : 4;
     43         ushort lowerCaseSpecial : 1;
     44         ushort upperCaseSpecial : 1;
     45         ushort titleCaseSpecial : 1;
     46         ushort caseFoldSpecial : 1; /* currently unused */
     47         signed short mirrorDiff : 16;
     48         signed short lowerCaseDiff : 16;
     49         signed short upperCaseDiff : 16;
     50         signed short titleCaseDiff : 16;
     51         signed short caseFoldDiff : 16;
     52     };
     53     Q_CORE_EXPORT const Properties * QT_FASTCALL properties(uint ucs4);
     54     Q_CORE_EXPORT const Properties * QT_FASTCALL properties(ushort ucs2);
     55 }
     56 QT_END_NAMESPACE
     57 
     58 // ugly hack to make UChar compatible with JSChar in API/JSStringRef.h
     59 #if defined(Q_OS_WIN) || COMPILER(WINSCW) || COMPILER(RVCT)
     60 typedef wchar_t UChar;
     61 #else
     62 typedef uint16_t UChar;
     63 #endif
     64 typedef uint32_t UChar32;
     65 
     66 // some defines from ICU
     67 
     68 #define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800)
     69 #define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00)
     70 #define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
     71 #define U16_GET_SUPPLEMENTARY(lead, trail) \
     72     (((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET)
     73 
     74 #define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0)
     75 #define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00)
     76 
     77 #define U_IS_SURROGATE(c) (((c)&0xfffff800)==0xd800)
     78 #define U16_IS_SINGLE(c) !U_IS_SURROGATE(c)
     79 #define U16_IS_SURROGATE(c) U_IS_SURROGATE(c)
     80 #define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0)
     81 
     82 #define U16_NEXT(s, i, length, c) { \
     83     (c)=(s)[(i)++]; \
     84     if(U16_IS_LEAD(c)) { \
     85         uint16_t __c2; \
     86         if((i)<(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
     87             ++(i); \
     88             (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
     89         } \
     90     } \
     91 }
     92 
     93 #define U16_PREV(s, start, i, c) { \
     94     (c)=(s)[--(i)]; \
     95     if(U16_IS_TRAIL(c)) { \
     96         uint16_t __c2; \
     97         if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
     98             --(i); \
     99             (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
    100         } \
    101     } \
    102 }
    103 
    104 #define U_MASK(x) ((uint32_t)1<<(x))
    105 
    106 namespace WTF {
    107 namespace Unicode {
    108 
    109 enum Direction {
    110     LeftToRight = QChar::DirL,
    111     RightToLeft = QChar::DirR,
    112     EuropeanNumber = QChar::DirEN,
    113     EuropeanNumberSeparator = QChar::DirES,
    114     EuropeanNumberTerminator = QChar::DirET,
    115     ArabicNumber = QChar::DirAN,
    116     CommonNumberSeparator = QChar::DirCS,
    117     BlockSeparator = QChar::DirB,
    118     SegmentSeparator = QChar::DirS,
    119     WhiteSpaceNeutral = QChar::DirWS,
    120     OtherNeutral = QChar::DirON,
    121     LeftToRightEmbedding = QChar::DirLRE,
    122     LeftToRightOverride = QChar::DirLRO,
    123     RightToLeftArabic = QChar::DirAL,
    124     RightToLeftEmbedding = QChar::DirRLE,
    125     RightToLeftOverride = QChar::DirRLO,
    126     PopDirectionalFormat = QChar::DirPDF,
    127     NonSpacingMark = QChar::DirNSM,
    128     BoundaryNeutral = QChar::DirBN
    129 };
    130 
    131 enum DecompositionType {
    132     DecompositionNone = QChar::NoDecomposition,
    133     DecompositionCanonical = QChar::Canonical,
    134     DecompositionCompat = QChar::Compat,
    135     DecompositionCircle = QChar::Circle,
    136     DecompositionFinal = QChar::Final,
    137     DecompositionFont = QChar::Font,
    138     DecompositionFraction = QChar::Fraction,
    139     DecompositionInitial = QChar::Initial,
    140     DecompositionIsolated = QChar::Isolated,
    141     DecompositionMedial = QChar::Medial,
    142     DecompositionNarrow = QChar::Narrow,
    143     DecompositionNoBreak = QChar::NoBreak,
    144     DecompositionSmall = QChar::Small,
    145     DecompositionSquare = QChar::Square,
    146     DecompositionSub = QChar::Sub,
    147     DecompositionSuper = QChar::Super,
    148     DecompositionVertical = QChar::Vertical,
    149     DecompositionWide = QChar::Wide
    150 };
    151 
    152 enum CharCategory {
    153     NoCategory = 0,
    154     Mark_NonSpacing = U_MASK(QChar::Mark_NonSpacing),
    155     Mark_SpacingCombining = U_MASK(QChar::Mark_SpacingCombining),
    156     Mark_Enclosing = U_MASK(QChar::Mark_Enclosing),
    157     Number_DecimalDigit = U_MASK(QChar::Number_DecimalDigit),
    158     Number_Letter = U_MASK(QChar::Number_Letter),
    159     Number_Other = U_MASK(QChar::Number_Other),
    160     Separator_Space = U_MASK(QChar::Separator_Space),
    161     Separator_Line = U_MASK(QChar::Separator_Line),
    162     Separator_Paragraph = U_MASK(QChar::Separator_Paragraph),
    163     Other_Control = U_MASK(QChar::Other_Control),
    164     Other_Format = U_MASK(QChar::Other_Format),
    165     Other_Surrogate = U_MASK(QChar::Other_Surrogate),
    166     Other_PrivateUse = U_MASK(QChar::Other_PrivateUse),
    167     Other_NotAssigned = U_MASK(QChar::Other_NotAssigned),
    168     Letter_Uppercase = U_MASK(QChar::Letter_Uppercase),
    169     Letter_Lowercase = U_MASK(QChar::Letter_Lowercase),
    170     Letter_Titlecase = U_MASK(QChar::Letter_Titlecase),
    171     Letter_Modifier = U_MASK(QChar::Letter_Modifier),
    172     Letter_Other = U_MASK(QChar::Letter_Other),
    173     Punctuation_Connector = U_MASK(QChar::Punctuation_Connector),
    174     Punctuation_Dash = U_MASK(QChar::Punctuation_Dash),
    175     Punctuation_Open = U_MASK(QChar::Punctuation_Open),
    176     Punctuation_Close = U_MASK(QChar::Punctuation_Close),
    177     Punctuation_InitialQuote = U_MASK(QChar::Punctuation_InitialQuote),
    178     Punctuation_FinalQuote = U_MASK(QChar::Punctuation_FinalQuote),
    179     Punctuation_Other = U_MASK(QChar::Punctuation_Other),
    180     Symbol_Math = U_MASK(QChar::Symbol_Math),
    181     Symbol_Currency = U_MASK(QChar::Symbol_Currency),
    182     Symbol_Modifier = U_MASK(QChar::Symbol_Modifier),
    183     Symbol_Other = U_MASK(QChar::Symbol_Other)
    184 };
    185 
    186 
    187 // FIXME: handle surrogates correctly in all methods
    188 
    189 inline UChar32 toLower(UChar32 ch)
    190 {
    191     return QChar::toLower(ch);
    192 }
    193 
    194 inline int toLower(UChar* result, int resultLength, const UChar* src, int srcLength,  bool* error)
    195 {
    196     const UChar *e = src + srcLength;
    197     const UChar *s = src;
    198     UChar *r = result;
    199     uint rindex = 0;
    200 
    201     // this avoids one out of bounds check in the loop
    202     if (s < e && QChar(*s).isLowSurrogate()) {
    203         if (r)
    204             r[rindex] = *s++;
    205         ++rindex;
    206     }
    207 
    208     int needed = 0;
    209     while (s < e && (rindex < uint(resultLength) || !r)) {
    210         uint c = *s;
    211         if (QChar(c).isLowSurrogate() && QChar(*(s - 1)).isHighSurrogate())
    212             c = QChar::surrogateToUcs4(*(s - 1), c);
    213         const QUnicodeTables::Properties *prop = QUnicodeTables::properties(c);
    214         if (prop->lowerCaseSpecial) {
    215             QString qstring;
    216             if (c < 0x10000) {
    217                 qstring += QChar(c);
    218             } else {
    219                 qstring += QChar(*(s-1));
    220                 qstring += QChar(*s);
    221             }
    222             qstring = qstring.toLower();
    223             for (int i = 0; i < qstring.length(); ++i) {
    224                 if (rindex >= uint(resultLength)) {
    225                     needed += qstring.length() - i;
    226                     break;
    227                 }
    228                 if (r)
    229                     r[rindex] = qstring.at(i).unicode();
    230                 ++rindex;
    231             }
    232         } else {
    233             if (r)
    234                 r[rindex] = *s + prop->lowerCaseDiff;
    235             ++rindex;
    236         }
    237         ++s;
    238     }
    239     if (s < e)
    240         needed += e - s;
    241     *error = (needed != 0);
    242     if (rindex < uint(resultLength))
    243         r[rindex] = 0;
    244     return rindex + needed;
    245 }
    246 
    247 inline UChar32 toUpper(UChar32 ch)
    248 {
    249     return QChar::toUpper(ch);
    250 }
    251 
    252 inline int toUpper(UChar* result, int resultLength, const UChar* src, int srcLength,  bool* error)
    253 {
    254     const UChar *e = src + srcLength;
    255     const UChar *s = src;
    256     UChar *r = result;
    257     int rindex = 0;
    258 
    259     // this avoids one out of bounds check in the loop
    260     if (s < e && QChar(*s).isLowSurrogate()) {
    261         if (r)
    262             r[rindex] = *s++;
    263         ++rindex;
    264     }
    265 
    266     int needed = 0;
    267     while (s < e && (rindex < resultLength || !r)) {
    268         uint c = *s;
    269         if (QChar(c).isLowSurrogate() && QChar(*(s - 1)).isHighSurrogate())
    270             c = QChar::surrogateToUcs4(*(s - 1), c);
    271         const QUnicodeTables::Properties *prop = QUnicodeTables::properties(c);
    272         if (prop->upperCaseSpecial) {
    273             QString qstring;
    274             if (c < 0x10000) {
    275                 qstring += QChar(c);
    276             } else {
    277                 qstring += QChar(*(s-1));
    278                 qstring += QChar(*s);
    279             }
    280             qstring = qstring.toUpper();
    281             for (int i = 0; i < qstring.length(); ++i) {
    282                 if (rindex >= resultLength) {
    283                     needed += qstring.length() - i;
    284                     break;
    285                 }
    286                 if (r)
    287                     r[rindex] = qstring.at(i).unicode();
    288                 ++rindex;
    289             }
    290         } else {
    291             if (r)
    292                 r[rindex] = *s + prop->upperCaseDiff;
    293             ++rindex;
    294         }
    295         ++s;
    296     }
    297     if (s < e)
    298         needed += e - s;
    299     *error = (needed != 0);
    300     if (rindex < resultLength)
    301         r[rindex] = 0;
    302     return rindex + needed;
    303 }
    304 
    305 inline int toTitleCase(UChar32 c)
    306 {
    307     return QChar::toTitleCase(c);
    308 }
    309 
    310 inline UChar32 foldCase(UChar32 c)
    311 {
    312     return QChar::toCaseFolded(c);
    313 }
    314 
    315 inline int foldCase(UChar* result, int resultLength, const UChar* src, int srcLength,  bool* error)
    316 {
    317     // FIXME: handle special casing. Easiest with some low level API in Qt
    318     *error = false;
    319     if (resultLength < srcLength) {
    320         *error = true;
    321         return srcLength;
    322     }
    323     for (int i = 0; i < srcLength; ++i)
    324         result[i] = QChar::toCaseFolded(ushort(src[i]));
    325     return srcLength;
    326 }
    327 
    328 inline bool isArabicChar(UChar32 c)
    329 {
    330     return c >= 0x0600 && c <= 0x06FF;
    331 }
    332 
    333 inline bool isPrintableChar(UChar32 c)
    334 {
    335     const uint test = U_MASK(QChar::Other_Control) |
    336                       U_MASK(QChar::Other_NotAssigned);
    337     return !(U_MASK(QChar::category(c)) & test);
    338 }
    339 
    340 inline bool isSeparatorSpace(UChar32 c)
    341 {
    342     return QChar::category(c) == QChar::Separator_Space;
    343 }
    344 
    345 inline bool isPunct(UChar32 c)
    346 {
    347     const uint test = U_MASK(QChar::Punctuation_Connector) |
    348                       U_MASK(QChar::Punctuation_Dash) |
    349                       U_MASK(QChar::Punctuation_Open) |
    350                       U_MASK(QChar::Punctuation_Close) |
    351                       U_MASK(QChar::Punctuation_InitialQuote) |
    352                       U_MASK(QChar::Punctuation_FinalQuote) |
    353                       U_MASK(QChar::Punctuation_Other);
    354     return U_MASK(QChar::category(c)) & test;
    355 }
    356 
    357 inline bool isLower(UChar32 c)
    358 {
    359     return QChar::category(c) == QChar::Letter_Lowercase;
    360 }
    361 
    362 inline bool hasLineBreakingPropertyComplexContext(UChar32)
    363 {
    364     // FIXME: Implement this to return whether the character has line breaking property SA (Complex Context).
    365     return false;
    366 }
    367 
    368 inline UChar32 mirroredChar(UChar32 c)
    369 {
    370     return QChar::mirroredChar(c);
    371 }
    372 
    373 inline uint8_t combiningClass(UChar32 c)
    374 {
    375     return QChar::combiningClass(c);
    376 }
    377 
    378 inline DecompositionType decompositionType(UChar32 c)
    379 {
    380     return (DecompositionType)QChar::decompositionTag(c);
    381 }
    382 
    383 inline int umemcasecmp(const UChar* a, const UChar* b, int len)
    384 {
    385     // handle surrogates correctly
    386     for (int i = 0; i < len; ++i) {
    387         uint c1 = QChar::toCaseFolded(ushort(a[i]));
    388         uint c2 = QChar::toCaseFolded(ushort(b[i]));
    389         if (c1 != c2)
    390             return c1 - c2;
    391     }
    392     return 0;
    393 }
    394 
    395 inline Direction direction(UChar32 c)
    396 {
    397     return (Direction)QChar::direction(c);
    398 }
    399 
    400 inline CharCategory category(UChar32 c)
    401 {
    402     return (CharCategory) U_MASK(QChar::category(c));
    403 }
    404 
    405 } }
    406 
    407 #endif // WTF_UNICODE_QT4_H
    408