1 /* 2 * Copyright (C) 2010 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef LATINIME_CHAR_UTILS_H 18 #define LATINIME_CHAR_UTILS_H 19 20 #include <cctype> 21 22 #include "defines.h" 23 24 namespace latinime { 25 26 inline static bool isAsciiUpper(int c) { 27 // Note: isupper(...) reports false positives for some Cyrillic characters, causing them to 28 // be incorrectly lower-cased using toAsciiLower(...) rather than latin_tolower(...). 29 return (c >= 'A' && c <= 'Z'); 30 } 31 32 inline static int toAsciiLower(int c) { 33 return c - 'A' + 'a'; 34 } 35 36 inline static bool isAscii(int c) { 37 return isascii(c) != 0; 38 } 39 40 unsigned short latin_tolower(const unsigned short c); 41 42 /** 43 * Table mapping most combined Latin, Greek, and Cyrillic characters 44 * to their base characters. If c is in range, BASE_CHARS[c] == c 45 * if c is not a combined character, or the base character if it 46 * is combined. 47 */ 48 static const int BASE_CHARS_SIZE = 0x0500; 49 extern const unsigned short BASE_CHARS[BASE_CHARS_SIZE]; 50 51 inline static int toBaseCodePoint(int c) { 52 if (c < BASE_CHARS_SIZE) { 53 return static_cast<int>(BASE_CHARS[c]); 54 } 55 return c; 56 } 57 58 AK_FORCE_INLINE static int toLowerCase(const int c) { 59 if (isAsciiUpper(c)) { 60 return toAsciiLower(c); 61 } 62 if (isAscii(c)) { 63 return c; 64 } 65 return static_cast<int>(latin_tolower(static_cast<unsigned short>(c))); 66 } 67 68 AK_FORCE_INLINE static int toBaseLowerCase(const int c) { 69 return toLowerCase(toBaseCodePoint(c)); 70 } 71 72 inline static bool isIntentionalOmissionCodePoint(const int codePoint) { 73 // TODO: Do not hardcode here 74 return codePoint == KEYCODE_SINGLE_QUOTE || codePoint == KEYCODE_HYPHEN_MINUS; 75 } 76 77 inline static int getCodePointCount(const int arraySize, const int *const codePoints) { 78 int size = 0; 79 for (; size < arraySize; ++size) { 80 if (codePoints[size] == '\0') { 81 break; 82 } 83 } 84 return size; 85 } 86 87 } // namespace latinime 88 #endif // LATINIME_CHAR_UTILS_H 89