Home | History | Annotate | Download | only in src
      1 /*
      2  * Copyright (C) 2010 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef LATINIME_CHAR_UTILS_H
     18 #define LATINIME_CHAR_UTILS_H
     19 
     20 #include <cctype>
     21 
     22 #include "defines.h"
     23 
     24 namespace latinime {
     25 
     26 inline static bool isAsciiUpper(int c) {
     27     // Note: isupper(...) reports false positives for some Cyrillic characters, causing them to
     28     // be incorrectly lower-cased using toAsciiLower(...) rather than latin_tolower(...).
     29     return (c >= 'A' && c <= 'Z');
     30 }
     31 
     32 inline static int toAsciiLower(int c) {
     33     return c - 'A' + 'a';
     34 }
     35 
     36 inline static bool isAscii(int c) {
     37     return isascii(c) != 0;
     38 }
     39 
     40 unsigned short latin_tolower(const unsigned short c);
     41 
     42 /**
     43  * Table mapping most combined Latin, Greek, and Cyrillic characters
     44  * to their base characters.  If c is in range, BASE_CHARS[c] == c
     45  * if c is not a combined character, or the base character if it
     46  * is combined.
     47  */
     48 static const int BASE_CHARS_SIZE = 0x0500;
     49 extern const unsigned short BASE_CHARS[BASE_CHARS_SIZE];
     50 
     51 inline static int toBaseCodePoint(int c) {
     52     if (c < BASE_CHARS_SIZE) {
     53         return static_cast<int>(BASE_CHARS[c]);
     54     }
     55     return c;
     56 }
     57 
     58 AK_FORCE_INLINE static int toLowerCase(const int c) {
     59     if (isAsciiUpper(c)) {
     60         return toAsciiLower(c);
     61     }
     62     if (isAscii(c)) {
     63         return c;
     64     }
     65     return static_cast<int>(latin_tolower(static_cast<unsigned short>(c)));
     66 }
     67 
     68 AK_FORCE_INLINE static int toBaseLowerCase(const int c) {
     69     return toLowerCase(toBaseCodePoint(c));
     70 }
     71 
     72 inline static bool isIntentionalOmissionCodePoint(const int codePoint) {
     73     // TODO: Do not hardcode here
     74     return codePoint == KEYCODE_SINGLE_QUOTE || codePoint == KEYCODE_HYPHEN_MINUS;
     75 }
     76 
     77 inline static int getCodePointCount(const int arraySize, const int *const codePoints) {
     78     int size = 0;
     79     for (; size < arraySize; ++size) {
     80         if (codePoints[size] == '\0') {
     81             break;
     82         }
     83     }
     84     return size;
     85 }
     86 
     87 } // namespace latinime
     88 #endif // LATINIME_CHAR_UTILS_H
     89