Home | History | Annotate | Download | only in Objects
      1 /*
      2    Unicode character type helpers.
      3 
      4    Written by Marc-Andre Lemburg (mal (at) lemburg.com).
      5    Modified for Python 2.0 by Fredrik Lundh (fredrik (at) pythonware.com)
      6 
      7    Copyright (c) Corporation for National Research Initiatives.
      8 
      9 */
     10 
     11 #include "Python.h"
     12 #include "unicodeobject.h"
     13 
     14 #define ALPHA_MASK 0x01
     15 #define DECIMAL_MASK 0x02
     16 #define DIGIT_MASK 0x04
     17 #define LOWER_MASK 0x08
     18 #define LINEBREAK_MASK 0x10
     19 #define SPACE_MASK 0x20
     20 #define TITLE_MASK 0x40
     21 #define UPPER_MASK 0x80
     22 #define NODELTA_MASK 0x100
     23 #define NUMERIC_MASK 0x200
     24 
     25 typedef struct {
     26     const Py_UNICODE upper;
     27     const Py_UNICODE lower;
     28     const Py_UNICODE title;
     29     const unsigned char decimal;
     30     const unsigned char digit;
     31     const unsigned short flags;
     32 } _PyUnicode_TypeRecord;
     33 
     34 #include "unicodetype_db.h"
     35 
     36 static const _PyUnicode_TypeRecord *
     37 gettyperecord(Py_UNICODE code)
     38 {
     39     int index;
     40 
     41 #ifdef Py_UNICODE_WIDE
     42     if (code >= 0x110000)
     43         index = 0;
     44     else
     45 #endif
     46     {
     47         index = index1[(code>>SHIFT)];
     48         index = index2[(index<<SHIFT)+(code&((1<<SHIFT)-1))];
     49     }
     50 
     51     return &_PyUnicode_TypeRecords[index];
     52 }
     53 
     54 /* Returns the titlecase Unicode characters corresponding to ch or just
     55    ch if no titlecase mapping is known. */
     56 
     57 Py_UNICODE _PyUnicode_ToTitlecase(register Py_UNICODE ch)
     58 {
     59     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
     60     int delta = ctype->title;
     61 
     62     if (ctype->flags & NODELTA_MASK)
     63 	return delta;
     64 
     65     if (delta >= 32768)
     66 	    delta -= 65536;
     67 
     68     return ch + delta;
     69 }
     70 
     71 /* Returns 1 for Unicode characters having the category 'Lt', 0
     72    otherwise. */
     73 
     74 int _PyUnicode_IsTitlecase(Py_UNICODE ch)
     75 {
     76     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
     77 
     78     return (ctype->flags & TITLE_MASK) != 0;
     79 }
     80 
     81 /* Returns the integer decimal (0-9) for Unicode characters having
     82    this property, -1 otherwise. */
     83 
     84 int _PyUnicode_ToDecimalDigit(Py_UNICODE ch)
     85 {
     86     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
     87 
     88     return (ctype->flags & DECIMAL_MASK) ? ctype->decimal : -1;
     89 }
     90 
     91 int _PyUnicode_IsDecimalDigit(Py_UNICODE ch)
     92 {
     93     if (_PyUnicode_ToDecimalDigit(ch) < 0)
     94 	return 0;
     95     return 1;
     96 }
     97 
     98 /* Returns the integer digit (0-9) for Unicode characters having
     99    this property, -1 otherwise. */
    100 
    101 int _PyUnicode_ToDigit(Py_UNICODE ch)
    102 {
    103     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
    104 
    105     return (ctype->flags & DIGIT_MASK) ? ctype->digit : -1;
    106 }
    107 
    108 int _PyUnicode_IsDigit(Py_UNICODE ch)
    109 {
    110     if (_PyUnicode_ToDigit(ch) < 0)
    111 	return 0;
    112     return 1;
    113 }
    114 
    115 /* Returns the numeric value as double for Unicode characters having
    116    this property, -1.0 otherwise. */
    117 
    118 int _PyUnicode_IsNumeric(Py_UNICODE ch)
    119 {
    120     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
    121 
    122     return (ctype->flags & NUMERIC_MASK) != 0;
    123 }
    124 
    125 #ifndef WANT_WCTYPE_FUNCTIONS
    126 
    127 /* Returns 1 for Unicode characters having the category 'Ll', 0
    128    otherwise. */
    129 
    130 int _PyUnicode_IsLowercase(Py_UNICODE ch)
    131 {
    132     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
    133 
    134     return (ctype->flags & LOWER_MASK) != 0;
    135 }
    136 
    137 /* Returns 1 for Unicode characters having the category 'Lu', 0
    138    otherwise. */
    139 
    140 int _PyUnicode_IsUppercase(Py_UNICODE ch)
    141 {
    142     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
    143 
    144     return (ctype->flags & UPPER_MASK) != 0;
    145 }
    146 
    147 /* Returns the uppercase Unicode characters corresponding to ch or just
    148    ch if no uppercase mapping is known. */
    149 
    150 Py_UNICODE _PyUnicode_ToUppercase(Py_UNICODE ch)
    151 {
    152     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
    153     int delta = ctype->upper;
    154     if (ctype->flags & NODELTA_MASK)
    155 	return delta;
    156     if (delta >= 32768)
    157 	    delta -= 65536;
    158     return ch + delta;
    159 }
    160 
    161 /* Returns the lowercase Unicode characters corresponding to ch or just
    162    ch if no lowercase mapping is known. */
    163 
    164 Py_UNICODE _PyUnicode_ToLowercase(Py_UNICODE ch)
    165 {
    166     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
    167     int delta = ctype->lower;
    168     if (ctype->flags & NODELTA_MASK)
    169 	return delta;
    170     if (delta >= 32768)
    171 	    delta -= 65536;
    172     return ch + delta;
    173 }
    174 
    175 /* Returns 1 for Unicode characters having the category 'Ll', 'Lu', 'Lt',
    176    'Lo' or 'Lm',  0 otherwise. */
    177 
    178 int _PyUnicode_IsAlpha(Py_UNICODE ch)
    179 {
    180     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
    181 
    182     return (ctype->flags & ALPHA_MASK) != 0;
    183 }
    184 
    185 #else
    186 
    187 /* Export the interfaces using the wchar_t type for portability
    188    reasons:  */
    189 
    190 int _PyUnicode_IsLowercase(Py_UNICODE ch)
    191 {
    192     return iswlower(ch);
    193 }
    194 
    195 int _PyUnicode_IsUppercase(Py_UNICODE ch)
    196 {
    197     return iswupper(ch);
    198 }
    199 
    200 Py_UNICODE _PyUnicode_ToLowercase(Py_UNICODE ch)
    201 {
    202     return towlower(ch);
    203 }
    204 
    205 Py_UNICODE _PyUnicode_ToUppercase(Py_UNICODE ch)
    206 {
    207     return towupper(ch);
    208 }
    209 
    210 int _PyUnicode_IsAlpha(Py_UNICODE ch)
    211 {
    212     return iswalpha(ch);
    213 }
    214 
    215 #endif
    216