Home | History | Annotate | Download | only in wince
      1 /*
      2  *  Copyright (C) 2006 George Staikos <staikos (at) kde.org>
      3  *  Copyright (C) 2006 Alexey Proskuryakov <ap (at) nypop.com>
      4  *  Copyright (C) 2007 Apple Computer, Inc. All rights reserved.
      5  *  Copyright (C) 2007-2009 Torch Mobile, Inc.
      6  *
      7  *  This library is free software; you can redistribute it and/or
      8  *  modify it under the terms of the GNU Library General Public
      9  *  License as published by the Free Software Foundation; either
     10  *  version 2 of the License, or (at your option) any later version.
     11  *
     12  *  This library is distributed in the hope that it will be useful,
     13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
     14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     15  *  Library General Public License for more details.
     16  *
     17  *  You should have received a copy of the GNU Library General Public License
     18  *  along with this library; see the file COPYING.LIB.  If not, write to
     19  *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     20  *  Boston, MA 02110-1301, USA.
     21  *
     22  */
     23 
     24 #ifndef UNICODE_WINCE_H
     25 #define UNICODE_WINCE_H
     26 
     27 #include "ce_unicode.h"
     28 
     29 #define TO_MASK(x) (1 << (x))
     30 
     31 // some defines from ICU needed one or two places
     32 
     33 #define U16_IS_LEAD(c) (((c) & 0xfffffc00) == 0xd800)
     34 #define U16_IS_TRAIL(c) (((c) & 0xfffffc00) == 0xdc00)
     35 #define U16_SURROGATE_OFFSET ((0xd800 << 10UL) + 0xdc00 - 0x10000)
     36 #define U16_GET_SUPPLEMENTARY(lead, trail) \
     37     (((UChar32)(lead) << 10UL) + (UChar32)(trail) - U16_SURROGATE_OFFSET)
     38 
     39 #define U16_LEAD(supplementary) (UChar)(((supplementary) >> 10) + 0xd7c0)
     40 #define U16_TRAIL(supplementary) (UChar)(((supplementary) & 0x3ff) | 0xdc00)
     41 
     42 #define U_IS_SURROGATE(c) (((c) & 0xfffff800) == 0xd800)
     43 #define U16_IS_SURROGATE(c) U_IS_SURROGATE(c)
     44 #define U16_IS_SURROGATE_LEAD(c) (((c) & 0x400) == 0)
     45 
     46 #define U16_NEXT(s, i, length, c) { \
     47     (c)=(s)[(i)++]; \
     48     if (U16_IS_LEAD(c)) { \
     49         uint16_t __c2; \
     50         if ((i) < (length) && U16_IS_TRAIL(__c2 = (s)[(i)])) { \
     51             ++(i); \
     52             (c) = U16_GET_SUPPLEMENTARY((c), __c2); \
     53         } \
     54     } \
     55 }
     56 
     57 #define U16_PREV(s, start, i, c) { \
     58     (c)=(s)[--(i)]; \
     59     if (U16_IS_TRAIL(c)) { \
     60         uint16_t __c2; \
     61         if ((i) > (start) && U16_IS_LEAD(__c2 = (s)[(i) - 1])) { \
     62             --(i); \
     63             (c) = U16_GET_SUPPLEMENTARY(__c2, (c)); \
     64         } \
     65     } \
     66 }
     67 
     68 #define U16_IS_SINGLE(c) !U_IS_SURROGATE(c)
     69 
     70 namespace WTF {
     71 
     72     namespace Unicode {
     73 
     74         enum Direction {
     75             LeftToRight = UnicodeCE::U_LEFT_TO_RIGHT,
     76             RightToLeft = UnicodeCE::U_RIGHT_TO_LEFT,
     77             EuropeanNumber = UnicodeCE::U_EUROPEAN_NUMBER,
     78             EuropeanNumberSeparator = UnicodeCE::U_EUROPEAN_NUMBER_SEPARATOR,
     79             EuropeanNumberTerminator = UnicodeCE::U_EUROPEAN_NUMBER_TERMINATOR,
     80             ArabicNumber = UnicodeCE::U_ARABIC_NUMBER,
     81             CommonNumberSeparator = UnicodeCE::U_COMMON_NUMBER_SEPARATOR,
     82             BlockSeparator = UnicodeCE::U_BLOCK_SEPARATOR,
     83             SegmentSeparator = UnicodeCE::U_SEGMENT_SEPARATOR,
     84             WhiteSpaceNeutral = UnicodeCE::U_WHITE_SPACE_NEUTRAL,
     85             OtherNeutral = UnicodeCE::U_OTHER_NEUTRAL,
     86             LeftToRightEmbedding = UnicodeCE::U_LEFT_TO_RIGHT_EMBEDDING,
     87             LeftToRightOverride = UnicodeCE::U_LEFT_TO_RIGHT_OVERRIDE,
     88             RightToLeftArabic = UnicodeCE::U_RIGHT_TO_LEFT_ARABIC,
     89             RightToLeftEmbedding = UnicodeCE::U_RIGHT_TO_LEFT_EMBEDDING,
     90             RightToLeftOverride = UnicodeCE::U_RIGHT_TO_LEFT_OVERRIDE,
     91             PopDirectionalFormat = UnicodeCE::U_POP_DIRECTIONAL_FORMAT,
     92             NonSpacingMark = UnicodeCE::U_DIR_NON_SPACING_MARK,
     93             BoundaryNeutral = UnicodeCE::U_BOUNDARY_NEUTRAL
     94         };
     95 
     96         enum DecompositionType {
     97           DecompositionNone = UnicodeCE::U_DT_NONE,
     98           DecompositionCanonical = UnicodeCE::U_DT_CANONICAL,
     99           DecompositionCompat = UnicodeCE::U_DT_COMPAT,
    100           DecompositionCircle = UnicodeCE::U_DT_CIRCLE,
    101           DecompositionFinal = UnicodeCE::U_DT_FINAL,
    102           DecompositionFont = UnicodeCE::U_DT_FONT,
    103           DecompositionFraction = UnicodeCE::U_DT_FRACTION,
    104           DecompositionInitial = UnicodeCE::U_DT_INITIAL,
    105           DecompositionIsolated = UnicodeCE::U_DT_ISOLATED,
    106           DecompositionMedial = UnicodeCE::U_DT_MEDIAL,
    107           DecompositionNarrow = UnicodeCE::U_DT_NARROW,
    108           DecompositionNoBreak = UnicodeCE::U_DT_NOBREAK,
    109           DecompositionSmall = UnicodeCE::U_DT_SMALL,
    110           DecompositionSquare = UnicodeCE::U_DT_SQUARE,
    111           DecompositionSub = UnicodeCE::U_DT_SUB,
    112           DecompositionSuper = UnicodeCE::U_DT_SUPER,
    113           DecompositionVertical = UnicodeCE::U_DT_VERTICAL,
    114           DecompositionWide = UnicodeCE::U_DT_WIDE,
    115         };
    116 
    117         enum CharCategory {
    118           NoCategory =  0,
    119           Other_NotAssigned = TO_MASK(UnicodeCE::U_GENERAL_OTHER_TYPES),
    120           Letter_Uppercase = TO_MASK(UnicodeCE::U_UPPERCASE_LETTER),
    121           Letter_Lowercase = TO_MASK(UnicodeCE::U_LOWERCASE_LETTER),
    122           Letter_Titlecase = TO_MASK(UnicodeCE::U_TITLECASE_LETTER),
    123           Letter_Modifier = TO_MASK(UnicodeCE::U_MODIFIER_LETTER),
    124           Letter_Other = TO_MASK(UnicodeCE::U_OTHER_LETTER),
    125 
    126           Mark_NonSpacing = TO_MASK(UnicodeCE::U_NON_SPACING_MARK),
    127           Mark_Enclosing = TO_MASK(UnicodeCE::U_ENCLOSING_MARK),
    128           Mark_SpacingCombining = TO_MASK(UnicodeCE::U_COMBINING_SPACING_MARK),
    129 
    130           Number_DecimalDigit = TO_MASK(UnicodeCE::U_DECIMAL_DIGIT_NUMBER),
    131           Number_Letter = TO_MASK(UnicodeCE::U_LETTER_NUMBER),
    132           Number_Other = TO_MASK(UnicodeCE::U_OTHER_NUMBER),
    133 
    134           Separator_Space = TO_MASK(UnicodeCE::U_SPACE_SEPARATOR),
    135           Separator_Line = TO_MASK(UnicodeCE::U_LINE_SEPARATOR),
    136           Separator_Paragraph = TO_MASK(UnicodeCE::U_PARAGRAPH_SEPARATOR),
    137 
    138           Other_Control = TO_MASK(UnicodeCE::U_CONTROL_CHAR),
    139           Other_Format = TO_MASK(UnicodeCE::U_FORMAT_CHAR),
    140           Other_PrivateUse = TO_MASK(UnicodeCE::U_PRIVATE_USE_CHAR),
    141           Other_Surrogate = TO_MASK(UnicodeCE::U_SURROGATE),
    142 
    143           Punctuation_Dash = TO_MASK(UnicodeCE::U_DASH_PUNCTUATION),
    144           Punctuation_Open = TO_MASK(UnicodeCE::U_START_PUNCTUATION),
    145           Punctuation_Close = TO_MASK(UnicodeCE::U_END_PUNCTUATION),
    146           Punctuation_Connector = TO_MASK(UnicodeCE::U_CONNECTOR_PUNCTUATION),
    147           Punctuation_Other = TO_MASK(UnicodeCE::U_OTHER_PUNCTUATION),
    148 
    149           Symbol_Math = TO_MASK(UnicodeCE::U_MATH_SYMBOL),
    150           Symbol_Currency = TO_MASK(UnicodeCE::U_CURRENCY_SYMBOL),
    151           Symbol_Modifier = TO_MASK(UnicodeCE::U_MODIFIER_SYMBOL),
    152           Symbol_Other = TO_MASK(UnicodeCE::U_OTHER_SYMBOL),
    153 
    154           Punctuation_InitialQuote = TO_MASK(UnicodeCE::U_INITIAL_PUNCTUATION),
    155           Punctuation_FinalQuote = TO_MASK(UnicodeCE::U_FINAL_PUNCTUATION)
    156         };
    157 
    158         CharCategory category(unsigned int);
    159 
    160         bool isSpace(wchar_t);
    161         bool isLetter(wchar_t);
    162         bool isPrintableChar(wchar_t);
    163         bool isUpper(wchar_t);
    164         bool isLower(wchar_t);
    165         bool isPunct(wchar_t);
    166         bool isDigit(wchar_t);
    167         inline bool isSeparatorSpace(wchar_t c) { return category(c) == Separator_Space; }
    168         inline bool isHighSurrogate(wchar_t c) { return (c & 0xfc00) == 0xd800; }
    169         inline bool isLowSurrogate(wchar_t c) { return (c & 0xfc00) == 0xdc00; }
    170 
    171         wchar_t toLower(wchar_t);
    172         wchar_t toUpper(wchar_t);
    173         wchar_t foldCase(wchar_t);
    174         wchar_t toTitleCase(wchar_t);
    175         int toLower(wchar_t* result, int resultLength, const wchar_t* source, int sourceLength, bool* isError);
    176         int toUpper(wchar_t* result, int resultLength, const wchar_t* source, int sourceLength, bool* isError);
    177         int foldCase(UChar* result, int resultLength, const wchar_t* source, int sourceLength, bool* isError);
    178 
    179         int digitValue(wchar_t);
    180 
    181         wchar_t mirroredChar(UChar32);
    182         unsigned char combiningClass(UChar32);
    183         DecompositionType decompositionType(UChar32);
    184         Direction direction(UChar32);
    185         inline bool isArabicChar(UChar32)
    186         {
    187             return false; // FIXME: implement!
    188         }
    189 
    190         inline bool hasLineBreakingPropertyComplexContext(UChar32)
    191         {
    192             return false; // FIXME: implement!
    193         }
    194 
    195         inline int umemcasecmp(const wchar_t* a, const wchar_t* b, int len)
    196         {
    197             for (int i = 0; i < len; ++i) {
    198                 wchar_t c1 = foldCase(a[i]);
    199                 wchar_t c2 = foldCase(b[i]);
    200                 if (c1 != c2)
    201                     return c1 - c2;
    202             }
    203             return 0;
    204         }
    205 
    206         inline UChar32 surrogateToUcs4(wchar_t high, wchar_t low)
    207         {
    208             return (UChar32(high) << 10) + low - 0x35fdc00;
    209         }
    210 
    211     }   // namespace Unicode
    212 
    213 }   // namespace WTF
    214 
    215 #endif
    216 // vim: ts=2 sw=2 et
    217