Home | History | Annotate | Download | only in qt4
      1 /*
      2  *  Copyright (C) 2006 George Staikos <staikos (at) kde.org>
      3  *  Copyright (C) 2006 Alexey Proskuryakov <ap (at) nypop.com>
      4  *  Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
      5  *
      6  *  This library is free software; you can redistribute it and/or
      7  *  modify it under the terms of the GNU Library General Public
      8  *  License as published by the Free Software Foundation; either
      9  *  version 2 of the License, or (at your option) any later version.
     10  *
     11  *  This library is distributed in the hope that it will be useful,
     12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
     13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     14  *  Library General Public License for more details.
     15  *
     16  *  You should have received a copy of the GNU Library General Public License
     17  *  along with this library; see the file COPYING.LIB.  If not, write to
     18  *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     19  *  Boston, MA 02110-1301, USA.
     20  *
     21  */
     22 
     23 #ifndef WTF_UNICODE_QT4_H
     24 #define WTF_UNICODE_QT4_H
     25 
     26 #include "UnicodeMacrosFromICU.h"
     27 
     28 #include <QChar>
     29 #include <QString>
     30 
     31 #include <config.h>
     32 
     33 #include <stdint.h>
     34 #if USE(QT_ICU_TEXT_BREAKING)
     35 #include <unicode/ubrk.h>
     36 #endif
     37 
     38 QT_BEGIN_NAMESPACE
     39 namespace QUnicodeTables {
     40     struct Properties {
     41         ushort category : 8;
     42         ushort line_break_class : 8;
     43         ushort direction : 8;
     44         ushort combiningClass :8;
     45         ushort joining : 2;
     46         signed short digitValue : 6; /* 5 needed */
     47         ushort unicodeVersion : 4;
     48         ushort lowerCaseSpecial : 1;
     49         ushort upperCaseSpecial : 1;
     50         ushort titleCaseSpecial : 1;
     51         ushort caseFoldSpecial : 1; /* currently unused */
     52         signed short mirrorDiff : 16;
     53         signed short lowerCaseDiff : 16;
     54         signed short upperCaseDiff : 16;
     55         signed short titleCaseDiff : 16;
     56         signed short caseFoldDiff : 16;
     57     };
     58     Q_CORE_EXPORT const Properties * QT_FASTCALL properties(uint ucs4);
     59     Q_CORE_EXPORT const Properties * QT_FASTCALL properties(ushort ucs2);
     60 }
     61 QT_END_NAMESPACE
     62 
     63 // ugly hack to make UChar compatible with JSChar in API/JSStringRef.h
     64 #if defined(Q_OS_WIN) || COMPILER(WINSCW) || (COMPILER(RVCT) && !OS(LINUX))
     65 typedef wchar_t UChar;
     66 #else
     67 typedef uint16_t UChar;
     68 #endif
     69 
     70 #if !USE(QT_ICU_TEXT_BREAKING)
     71 typedef uint32_t UChar32;
     72 #endif
     73 
     74 namespace WTF {
     75 namespace Unicode {
     76 
     77 enum Direction {
     78     LeftToRight = QChar::DirL,
     79     RightToLeft = QChar::DirR,
     80     EuropeanNumber = QChar::DirEN,
     81     EuropeanNumberSeparator = QChar::DirES,
     82     EuropeanNumberTerminator = QChar::DirET,
     83     ArabicNumber = QChar::DirAN,
     84     CommonNumberSeparator = QChar::DirCS,
     85     BlockSeparator = QChar::DirB,
     86     SegmentSeparator = QChar::DirS,
     87     WhiteSpaceNeutral = QChar::DirWS,
     88     OtherNeutral = QChar::DirON,
     89     LeftToRightEmbedding = QChar::DirLRE,
     90     LeftToRightOverride = QChar::DirLRO,
     91     RightToLeftArabic = QChar::DirAL,
     92     RightToLeftEmbedding = QChar::DirRLE,
     93     RightToLeftOverride = QChar::DirRLO,
     94     PopDirectionalFormat = QChar::DirPDF,
     95     NonSpacingMark = QChar::DirNSM,
     96     BoundaryNeutral = QChar::DirBN
     97 };
     98 
     99 enum DecompositionType {
    100     DecompositionNone = QChar::NoDecomposition,
    101     DecompositionCanonical = QChar::Canonical,
    102     DecompositionCompat = QChar::Compat,
    103     DecompositionCircle = QChar::Circle,
    104     DecompositionFinal = QChar::Final,
    105     DecompositionFont = QChar::Font,
    106     DecompositionFraction = QChar::Fraction,
    107     DecompositionInitial = QChar::Initial,
    108     DecompositionIsolated = QChar::Isolated,
    109     DecompositionMedial = QChar::Medial,
    110     DecompositionNarrow = QChar::Narrow,
    111     DecompositionNoBreak = QChar::NoBreak,
    112     DecompositionSmall = QChar::Small,
    113     DecompositionSquare = QChar::Square,
    114     DecompositionSub = QChar::Sub,
    115     DecompositionSuper = QChar::Super,
    116     DecompositionVertical = QChar::Vertical,
    117     DecompositionWide = QChar::Wide
    118 };
    119 
    120 enum CharCategory {
    121     NoCategory = 0,
    122     Mark_NonSpacing = U_MASK(QChar::Mark_NonSpacing),
    123     Mark_SpacingCombining = U_MASK(QChar::Mark_SpacingCombining),
    124     Mark_Enclosing = U_MASK(QChar::Mark_Enclosing),
    125     Number_DecimalDigit = U_MASK(QChar::Number_DecimalDigit),
    126     Number_Letter = U_MASK(QChar::Number_Letter),
    127     Number_Other = U_MASK(QChar::Number_Other),
    128     Separator_Space = U_MASK(QChar::Separator_Space),
    129     Separator_Line = U_MASK(QChar::Separator_Line),
    130     Separator_Paragraph = U_MASK(QChar::Separator_Paragraph),
    131     Other_Control = U_MASK(QChar::Other_Control),
    132     Other_Format = U_MASK(QChar::Other_Format),
    133     Other_Surrogate = U_MASK(QChar::Other_Surrogate),
    134     Other_PrivateUse = U_MASK(QChar::Other_PrivateUse),
    135     Other_NotAssigned = U_MASK(QChar::Other_NotAssigned),
    136     Letter_Uppercase = U_MASK(QChar::Letter_Uppercase),
    137     Letter_Lowercase = U_MASK(QChar::Letter_Lowercase),
    138     Letter_Titlecase = U_MASK(QChar::Letter_Titlecase),
    139     Letter_Modifier = U_MASK(QChar::Letter_Modifier),
    140     Letter_Other = U_MASK(QChar::Letter_Other),
    141     Punctuation_Connector = U_MASK(QChar::Punctuation_Connector),
    142     Punctuation_Dash = U_MASK(QChar::Punctuation_Dash),
    143     Punctuation_Open = U_MASK(QChar::Punctuation_Open),
    144     Punctuation_Close = U_MASK(QChar::Punctuation_Close),
    145     Punctuation_InitialQuote = U_MASK(QChar::Punctuation_InitialQuote),
    146     Punctuation_FinalQuote = U_MASK(QChar::Punctuation_FinalQuote),
    147     Punctuation_Other = U_MASK(QChar::Punctuation_Other),
    148     Symbol_Math = U_MASK(QChar::Symbol_Math),
    149     Symbol_Currency = U_MASK(QChar::Symbol_Currency),
    150     Symbol_Modifier = U_MASK(QChar::Symbol_Modifier),
    151     Symbol_Other = U_MASK(QChar::Symbol_Other)
    152 };
    153 
    154 
    155 // FIXME: handle surrogates correctly in all methods
    156 
    157 inline UChar32 toLower(UChar32 ch)
    158 {
    159     return QChar::toLower(uint32_t(ch));
    160 }
    161 
    162 inline int toLower(UChar* result, int resultLength, const UChar* src, int srcLength,  bool* error)
    163 {
    164     const UChar *e = src + srcLength;
    165     const UChar *s = src;
    166     UChar *r = result;
    167     uint rindex = 0;
    168 
    169     // this avoids one out of bounds check in the loop
    170     if (s < e && QChar(*s).isLowSurrogate()) {
    171         if (r)
    172             r[rindex] = *s++;
    173         ++rindex;
    174     }
    175 
    176     int needed = 0;
    177     while (s < e && (rindex < uint(resultLength) || !r)) {
    178         uint c = *s;
    179         if (QChar(c).isLowSurrogate() && QChar(*(s - 1)).isHighSurrogate())
    180             c = QChar::surrogateToUcs4(*(s - 1), c);
    181         const QUnicodeTables::Properties *prop = QUnicodeTables::properties(c);
    182         if (prop->lowerCaseSpecial) {
    183             QString qstring;
    184             if (c < 0x10000) {
    185                 qstring += QChar(c);
    186             } else {
    187                 qstring += QChar(*(s-1));
    188                 qstring += QChar(*s);
    189             }
    190             qstring = qstring.toLower();
    191             for (int i = 0; i < qstring.length(); ++i) {
    192                 if (rindex >= uint(resultLength)) {
    193                     needed += qstring.length() - i;
    194                     break;
    195                 }
    196                 if (r)
    197                     r[rindex] = qstring.at(i).unicode();
    198                 ++rindex;
    199             }
    200         } else {
    201             if (r)
    202                 r[rindex] = *s + prop->lowerCaseDiff;
    203             ++rindex;
    204         }
    205         ++s;
    206     }
    207     if (s < e)
    208         needed += e - s;
    209     *error = (needed != 0);
    210     if (rindex < uint(resultLength))
    211         r[rindex] = 0;
    212     return rindex + needed;
    213 }
    214 
    215 inline UChar32 toUpper(UChar32 c)
    216 {
    217     return QChar::toUpper(uint32_t(c));
    218 }
    219 
    220 inline int toUpper(UChar* result, int resultLength, const UChar* src, int srcLength,  bool* error)
    221 {
    222     const UChar *e = src + srcLength;
    223     const UChar *s = src;
    224     UChar *r = result;
    225     int rindex = 0;
    226 
    227     // this avoids one out of bounds check in the loop
    228     if (s < e && QChar(*s).isLowSurrogate()) {
    229         if (r)
    230             r[rindex] = *s++;
    231         ++rindex;
    232     }
    233 
    234     int needed = 0;
    235     while (s < e && (rindex < resultLength || !r)) {
    236         uint c = *s;
    237         if (QChar(c).isLowSurrogate() && QChar(*(s - 1)).isHighSurrogate())
    238             c = QChar::surrogateToUcs4(*(s - 1), c);
    239         const QUnicodeTables::Properties *prop = QUnicodeTables::properties(c);
    240         if (prop->upperCaseSpecial) {
    241             QString qstring;
    242             if (c < 0x10000) {
    243                 qstring += QChar(c);
    244             } else {
    245                 qstring += QChar(*(s-1));
    246                 qstring += QChar(*s);
    247             }
    248             qstring = qstring.toUpper();
    249             for (int i = 0; i < qstring.length(); ++i) {
    250                 if (rindex >= resultLength) {
    251                     needed += qstring.length() - i;
    252                     break;
    253                 }
    254                 if (r)
    255                     r[rindex] = qstring.at(i).unicode();
    256                 ++rindex;
    257             }
    258         } else {
    259             if (r)
    260                 r[rindex] = *s + prop->upperCaseDiff;
    261             ++rindex;
    262         }
    263         ++s;
    264     }
    265     if (s < e)
    266         needed += e - s;
    267     *error = (needed != 0);
    268     if (rindex < resultLength)
    269         r[rindex] = 0;
    270     return rindex + needed;
    271 }
    272 
    273 inline int toTitleCase(UChar32 c)
    274 {
    275     return QChar::toTitleCase(uint32_t(c));
    276 }
    277 
    278 inline UChar32 foldCase(UChar32 c)
    279 {
    280     return QChar::toCaseFolded(uint32_t(c));
    281 }
    282 
    283 inline int foldCase(UChar* result, int resultLength, const UChar* src, int srcLength,  bool* error)
    284 {
    285     // FIXME: handle special casing. Easiest with some low level API in Qt
    286     *error = false;
    287     if (resultLength < srcLength) {
    288         *error = true;
    289         return srcLength;
    290     }
    291     for (int i = 0; i < srcLength; ++i)
    292         result[i] = QChar::toCaseFolded(ushort(src[i]));
    293     return srcLength;
    294 }
    295 
    296 inline bool isArabicChar(UChar32 c)
    297 {
    298     return c >= 0x0600 && c <= 0x06FF;
    299 }
    300 
    301 inline bool isPrintableChar(UChar32 c)
    302 {
    303     const uint test = U_MASK(QChar::Other_Control) |
    304                       U_MASK(QChar::Other_NotAssigned);
    305     return !(U_MASK(QChar::category(uint32_t(c))) & test);
    306 }
    307 
    308 inline bool isSeparatorSpace(UChar32 c)
    309 {
    310     return QChar::category(uint32_t(c)) == QChar::Separator_Space;
    311 }
    312 
    313 inline bool isPunct(UChar32 c)
    314 {
    315     const uint test = U_MASK(QChar::Punctuation_Connector) |
    316                       U_MASK(QChar::Punctuation_Dash) |
    317                       U_MASK(QChar::Punctuation_Open) |
    318                       U_MASK(QChar::Punctuation_Close) |
    319                       U_MASK(QChar::Punctuation_InitialQuote) |
    320                       U_MASK(QChar::Punctuation_FinalQuote) |
    321                       U_MASK(QChar::Punctuation_Other);
    322     return U_MASK(QChar::category(uint32_t(c))) & test;
    323 }
    324 
    325 inline bool isLower(UChar32 c)
    326 {
    327     return QChar::category(uint32_t(c)) == QChar::Letter_Lowercase;
    328 }
    329 
    330 inline bool hasLineBreakingPropertyComplexContext(UChar32)
    331 {
    332     // FIXME: Implement this to return whether the character has line breaking property SA (Complex Context).
    333     return false;
    334 }
    335 
    336 inline UChar32 mirroredChar(UChar32 c)
    337 {
    338     return QChar::mirroredChar(uint32_t(c));
    339 }
    340 
    341 inline uint8_t combiningClass(UChar32 c)
    342 {
    343     return QChar::combiningClass(uint32_t(c));
    344 }
    345 
    346 inline DecompositionType decompositionType(UChar32 c)
    347 {
    348     return (DecompositionType)QChar::decompositionTag(c);
    349 }
    350 
    351 inline int umemcasecmp(const UChar* a, const UChar* b, int len)
    352 {
    353     // handle surrogates correctly
    354     for (int i = 0; i < len; ++i) {
    355         uint c1 = QChar::toCaseFolded(ushort(a[i]));
    356         uint c2 = QChar::toCaseFolded(ushort(b[i]));
    357         if (c1 != c2)
    358             return c1 - c2;
    359     }
    360     return 0;
    361 }
    362 
    363 inline Direction direction(UChar32 c)
    364 {
    365     return (Direction)QChar::direction(uint32_t(c));
    366 }
    367 
    368 inline CharCategory category(UChar32 c)
    369 {
    370     return (CharCategory) U_MASK(QChar::category(uint32_t(c)));
    371 }
    372 
    373 } }
    374 
    375 #endif // WTF_UNICODE_QT4_H
    376