1 /* 2 * Copyright (C) 2010 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef LATINIME_CHAR_UTILS_H 18 #define LATINIME_CHAR_UTILS_H 19 20 #include <cctype> 21 22 #include "defines.h" 23 24 namespace latinime { 25 26 class CharUtils { 27 public: 28 static AK_FORCE_INLINE bool isAsciiUpper(int c) { 29 // Note: isupper(...) reports false positives for some Cyrillic characters, causing them to 30 // be incorrectly lower-cased using toAsciiLower(...) rather than latin_tolower(...). 31 return (c >= 'A' && c <= 'Z'); 32 } 33 34 static AK_FORCE_INLINE int toAsciiLower(int c) { 35 return c - 'A' + 'a'; 36 } 37 38 static AK_FORCE_INLINE bool isAscii(int c) { 39 return isascii(c) != 0; 40 } 41 42 static AK_FORCE_INLINE int toLowerCase(const int c) { 43 if (isAsciiUpper(c)) { 44 return toAsciiLower(c); 45 } 46 if (isAscii(c)) { 47 return c; 48 } 49 return static_cast<int>(latin_tolower(static_cast<unsigned short>(c))); 50 } 51 52 static AK_FORCE_INLINE int toBaseLowerCase(const int c) { 53 return toLowerCase(toBaseCodePoint(c)); 54 } 55 56 static AK_FORCE_INLINE bool isIntentionalOmissionCodePoint(const int codePoint) { 57 // TODO: Do not hardcode here 58 return codePoint == KEYCODE_SINGLE_QUOTE || codePoint == KEYCODE_HYPHEN_MINUS; 59 } 60 61 static AK_FORCE_INLINE int getCodePointCount(const int arraySize, const int *const codePoints) { 62 int size = 0; 63 for (; size < arraySize; ++size) { 64 if (codePoints[size] == '\0') { 65 break; 66 } 67 } 68 return size; 69 } 70 71 static AK_FORCE_INLINE int toBaseCodePoint(int c) { 72 if (c < BASE_CHARS_SIZE) { 73 return static_cast<int>(BASE_CHARS[c]); 74 } 75 return c; 76 } 77 78 static AK_FORCE_INLINE int getSpaceCount(const int *const codePointBuffer, const int length) { 79 int spaceCount = 0; 80 for (int i = 0; i < length; ++i) { 81 if (codePointBuffer[i] == KEYCODE_SPACE) { 82 ++spaceCount; 83 } 84 } 85 return spaceCount; 86 } 87 88 static unsigned short latin_tolower(const unsigned short c); 89 90 private: 91 DISALLOW_IMPLICIT_CONSTRUCTORS(CharUtils); 92 93 /** 94 * Table mapping most combined Latin, Greek, and Cyrillic characters 95 * to their base characters. If c is in range, BASE_CHARS[c] == c 96 * if c is not a combined character, or the base character if it 97 * is combined. 98 */ 99 static const int BASE_CHARS_SIZE = 0x0500; 100 static const unsigned short BASE_CHARS[BASE_CHARS_SIZE]; 101 }; 102 } // namespace latinime 103 #endif // LATINIME_CHAR_UTILS_H 104