Home | History | Annotate | Download | only in utils
      1 /*
      2  * Copyright (C) 2010 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef LATINIME_CHAR_UTILS_H
     18 #define LATINIME_CHAR_UTILS_H
     19 
     20 #include <cctype>
     21 
     22 #include "defines.h"
     23 
     24 namespace latinime {
     25 
     26 class CharUtils {
     27  public:
     28     static AK_FORCE_INLINE bool isAsciiUpper(int c) {
     29         // Note: isupper(...) reports false positives for some Cyrillic characters, causing them to
     30         // be incorrectly lower-cased using toAsciiLower(...) rather than latin_tolower(...).
     31         return (c >= 'A' && c <= 'Z');
     32     }
     33 
     34     static AK_FORCE_INLINE int toAsciiLower(int c) {
     35         return c - 'A' + 'a';
     36     }
     37 
     38     static AK_FORCE_INLINE bool isAscii(int c) {
     39         return isascii(c) != 0;
     40     }
     41 
     42     static AK_FORCE_INLINE int toLowerCase(const int c) {
     43         if (isAsciiUpper(c)) {
     44             return toAsciiLower(c);
     45         }
     46         if (isAscii(c)) {
     47             return c;
     48         }
     49         return static_cast<int>(latin_tolower(static_cast<unsigned short>(c)));
     50     }
     51 
     52     static AK_FORCE_INLINE int toBaseLowerCase(const int c) {
     53         return toLowerCase(toBaseCodePoint(c));
     54     }
     55 
     56     static AK_FORCE_INLINE bool isIntentionalOmissionCodePoint(const int codePoint) {
     57         // TODO: Do not hardcode here
     58         return codePoint == KEYCODE_SINGLE_QUOTE || codePoint == KEYCODE_HYPHEN_MINUS;
     59     }
     60 
     61     static AK_FORCE_INLINE int getCodePointCount(const int arraySize, const int *const codePoints) {
     62         int size = 0;
     63         for (; size < arraySize; ++size) {
     64             if (codePoints[size] == '\0') {
     65                 break;
     66             }
     67         }
     68         return size;
     69     }
     70 
     71     static AK_FORCE_INLINE int toBaseCodePoint(int c) {
     72         if (c < BASE_CHARS_SIZE) {
     73             return static_cast<int>(BASE_CHARS[c]);
     74         }
     75         return c;
     76     }
     77 
     78     static AK_FORCE_INLINE int getSpaceCount(const int *const codePointBuffer, const int length) {
     79         int spaceCount = 0;
     80         for (int i = 0; i < length; ++i) {
     81             if (codePointBuffer[i] == KEYCODE_SPACE) {
     82                 ++spaceCount;
     83             }
     84         }
     85         return spaceCount;
     86     }
     87 
     88     static unsigned short latin_tolower(const unsigned short c);
     89 
     90  private:
     91     DISALLOW_IMPLICIT_CONSTRUCTORS(CharUtils);
     92 
     93     /**
     94      * Table mapping most combined Latin, Greek, and Cyrillic characters
     95      * to their base characters.  If c is in range, BASE_CHARS[c] == c
     96      * if c is not a combined character, or the base character if it
     97      * is combined.
     98      */
     99     static const int BASE_CHARS_SIZE = 0x0500;
    100     static const unsigned short BASE_CHARS[BASE_CHARS_SIZE];
    101 };
    102 } // namespace latinime
    103 #endif // LATINIME_CHAR_UTILS_H
    104