Home | History | Annotate | Download | only in glib
      1 /*
      2  *  Copyright (C) 2006 George Staikos <staikos (at) kde.org>
      3  *  Copyright (C) 2006 Alexey Proskuryakov <ap (at) nypop.com>
      4  *  Copyright (C) 2007 Apple Computer, Inc. All rights reserved.
      5  *  Copyright (C) 2008 Jrg Billeter <j (at) bitron.ch>
      6  *  Copyright (C) 2008 Dominik Rttsches <dominik.roettsches (at) access-company.com>
      7  *
      8  *  This library is free software; you can redistribute it and/or
      9  *  modify it under the terms of the GNU Library General Public
     10  *  License as published by the Free Software Foundation; either
     11  *  version 2 of the License, or (at your option) any later version.
     12  *
     13  *  This library is distributed in the hope that it will be useful,
     14  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
     15  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     16  *  Library General Public License for more details.
     17  *
     18  *  You should have received a copy of the GNU Library General Public License
     19  *  along with this library; see the file COPYING.LIB.  If not, write to
     20  *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     21  *  Boston, MA 02110-1301, USA.
     22  *
     23  */
     24 
     25 #ifndef UnicodeGLib_h
     26 #define UnicodeGLib_h
     27 
     28 #include "UnicodeMacrosFromICU.h"
     29 #include "GOwnPtr.h"
     30 
     31 #include <glib.h>
     32 #include <pango/pango.h>
     33 #include <stdint.h>
     34 #include <stdlib.h>
     35 #include <string.h>
     36 
     37 typedef uint16_t UChar;
     38 typedef int32_t UChar32;
     39 
     40 namespace WTF {
     41 namespace Unicode {
     42 
     43 enum Direction {
     44     LeftToRight,
     45     RightToLeft,
     46     EuropeanNumber,
     47     EuropeanNumberSeparator,
     48     EuropeanNumberTerminator,
     49     ArabicNumber,
     50     CommonNumberSeparator,
     51     BlockSeparator,
     52     SegmentSeparator,
     53     WhiteSpaceNeutral,
     54     OtherNeutral,
     55     LeftToRightEmbedding,
     56     LeftToRightOverride,
     57     RightToLeftArabic,
     58     RightToLeftEmbedding,
     59     RightToLeftOverride,
     60     PopDirectionalFormat,
     61     NonSpacingMark,
     62     BoundaryNeutral
     63 };
     64 
     65 enum DecompositionType {
     66     DecompositionNone,
     67     DecompositionCanonical,
     68     DecompositionCompat,
     69     DecompositionCircle,
     70     DecompositionFinal,
     71     DecompositionFont,
     72     DecompositionFraction,
     73     DecompositionInitial,
     74     DecompositionIsolated,
     75     DecompositionMedial,
     76     DecompositionNarrow,
     77     DecompositionNoBreak,
     78     DecompositionSmall,
     79     DecompositionSquare,
     80     DecompositionSub,
     81     DecompositionSuper,
     82     DecompositionVertical,
     83     DecompositionWide,
     84 };
     85 
     86 enum CharCategory {
     87     NoCategory =  0,
     88     Other_NotAssigned = U_MASK(G_UNICODE_UNASSIGNED),
     89     Letter_Uppercase = U_MASK(G_UNICODE_UPPERCASE_LETTER),
     90     Letter_Lowercase = U_MASK(G_UNICODE_LOWERCASE_LETTER),
     91     Letter_Titlecase = U_MASK(G_UNICODE_TITLECASE_LETTER),
     92     Letter_Modifier = U_MASK(G_UNICODE_MODIFIER_LETTER),
     93     Letter_Other = U_MASK(G_UNICODE_OTHER_LETTER),
     94 
     95     Mark_NonSpacing = U_MASK(G_UNICODE_NON_SPACING_MARK),
     96     Mark_Enclosing = U_MASK(G_UNICODE_ENCLOSING_MARK),
     97     Mark_SpacingCombining = U_MASK(G_UNICODE_COMBINING_MARK),
     98 
     99     Number_DecimalDigit = U_MASK(G_UNICODE_DECIMAL_NUMBER),
    100     Number_Letter = U_MASK(G_UNICODE_LETTER_NUMBER),
    101     Number_Other = U_MASK(G_UNICODE_OTHER_NUMBER),
    102 
    103     Separator_Space = U_MASK(G_UNICODE_SPACE_SEPARATOR),
    104     Separator_Line = U_MASK(G_UNICODE_LINE_SEPARATOR),
    105     Separator_Paragraph = U_MASK(G_UNICODE_PARAGRAPH_SEPARATOR),
    106 
    107     Other_Control = U_MASK(G_UNICODE_CONTROL),
    108     Other_Format = U_MASK(G_UNICODE_FORMAT),
    109     Other_PrivateUse = U_MASK(G_UNICODE_PRIVATE_USE),
    110     Other_Surrogate = U_MASK(G_UNICODE_SURROGATE),
    111 
    112     Punctuation_Dash = U_MASK(G_UNICODE_DASH_PUNCTUATION),
    113     Punctuation_Open = U_MASK(G_UNICODE_OPEN_PUNCTUATION),
    114     Punctuation_Close = U_MASK(G_UNICODE_CLOSE_PUNCTUATION),
    115     Punctuation_Connector = U_MASK(G_UNICODE_CONNECT_PUNCTUATION),
    116     Punctuation_Other = U_MASK(G_UNICODE_OTHER_PUNCTUATION),
    117 
    118     Symbol_Math = U_MASK(G_UNICODE_MATH_SYMBOL),
    119     Symbol_Currency = U_MASK(G_UNICODE_CURRENCY_SYMBOL),
    120     Symbol_Modifier = U_MASK(G_UNICODE_MODIFIER_SYMBOL),
    121     Symbol_Other = U_MASK(G_UNICODE_OTHER_SYMBOL),
    122 
    123     Punctuation_InitialQuote = U_MASK(G_UNICODE_INITIAL_PUNCTUATION),
    124     Punctuation_FinalQuote = U_MASK(G_UNICODE_FINAL_PUNCTUATION)
    125 };
    126 
    127 UChar32 foldCase(UChar32);
    128 
    129 int foldCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error);
    130 
    131 int toLower(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error);
    132 
    133 inline UChar32 toLower(UChar32 c)
    134 {
    135     return g_unichar_tolower(c);
    136 }
    137 
    138 inline UChar32 toUpper(UChar32 c)
    139 {
    140     return g_unichar_toupper(c);
    141 }
    142 
    143 int toUpper(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error);
    144 
    145 inline UChar32 toTitleCase(UChar32 c)
    146 {
    147     return g_unichar_totitle(c);
    148 }
    149 
    150 inline bool isArabicChar(UChar32 c)
    151 {
    152     return c >= 0x0600 && c <= 0x06FF;
    153 }
    154 
    155 inline bool isAlphanumeric(UChar32 c)
    156 {
    157     return g_unichar_isalnum(c);
    158 }
    159 
    160 inline bool isFormatChar(UChar32 c)
    161 {
    162     return g_unichar_type(c) == G_UNICODE_FORMAT;
    163 }
    164 
    165 inline bool isSeparatorSpace(UChar32 c)
    166 {
    167     return g_unichar_type(c) == G_UNICODE_SPACE_SEPARATOR;
    168 }
    169 
    170 inline bool isPrintableChar(UChar32 c)
    171 {
    172     return g_unichar_isprint(c);
    173 }
    174 
    175 inline bool isDigit(UChar32 c)
    176 {
    177     return g_unichar_isdigit(c);
    178 }
    179 
    180 inline bool isPunct(UChar32 c)
    181 {
    182     return g_unichar_ispunct(c);
    183 }
    184 
    185 inline bool hasLineBreakingPropertyComplexContext(UChar32 c)
    186 {
    187     // FIXME
    188     return false;
    189 }
    190 
    191 inline bool hasLineBreakingPropertyComplexContextOrIdeographic(UChar32 c)
    192 {
    193     // FIXME
    194     return false;
    195 }
    196 
    197 inline UChar32 mirroredChar(UChar32 c)
    198 {
    199     gunichar mirror = 0;
    200     g_unichar_get_mirror_char(c, &mirror);
    201     return mirror;
    202 }
    203 
    204 inline CharCategory category(UChar32 c)
    205 {
    206     if (c > 0xffff)
    207         return NoCategory;
    208 
    209     return (CharCategory) U_MASK(g_unichar_type(c));
    210 }
    211 
    212 Direction direction(UChar32);
    213 
    214 inline bool isLower(UChar32 c)
    215 {
    216     return g_unichar_islower(c);
    217 }
    218 
    219 inline int digitValue(UChar32 c)
    220 {
    221     return g_unichar_digit_value(c);
    222 }
    223 
    224 inline uint8_t combiningClass(UChar32 c)
    225 {
    226     // FIXME
    227     // return g_unichar_combining_class(c);
    228     return 0;
    229 }
    230 
    231 inline DecompositionType decompositionType(UChar32 c)
    232 {
    233     // FIXME
    234     return DecompositionNone;
    235 }
    236 
    237 int umemcasecmp(const UChar*, const UChar*, int len);
    238 
    239 }
    240 }
    241 
    242 #endif
    243 
    244