1 /* 2 * Copyright (C) 2010, The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef LATINIME_DEFINES_H 18 #define LATINIME_DEFINES_H 19 20 #include <stdint.h> 21 22 #if defined(FLAG_DO_PROFILE) || defined(FLAG_DBG) 23 #include <android/log.h> 24 #ifndef LOG_TAG 25 #define LOG_TAG "LatinIME: " 26 #endif 27 #define AKLOGE(fmt, ...) __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, fmt, ##__VA_ARGS__) 28 #define AKLOGI(fmt, ...) __android_log_print(ANDROID_LOG_INFO, LOG_TAG, fmt, ##__VA_ARGS__) 29 30 #define DUMP_RESULT(words, frequencies, maxWordCount, maxWordLength) do { \ 31 dumpResult(words, frequencies, maxWordCount, maxWordLength); } while (0) 32 #define DUMP_WORD(word, length) do { dumpWord(word, length); } while (0) 33 #define DUMP_WORD_INT(word, length) do { dumpWordInt(word, length); } while (0) 34 // TODO: INTS_TO_CHARS 35 #define SHORTS_TO_CHARS(input, length, output) do { \ 36 shortArrayToCharArray(input, length, output); } while (0) 37 38 static inline void dumpWordInfo(const unsigned short *word, const int length, 39 const int rank, const int frequency) { 40 static char charBuf[50]; 41 int i = 0; 42 for (; i < length; ++i) { 43 const unsigned short c = word[i]; 44 if (c == 0) { 45 break; 46 } 47 // static_cast only for debugging 48 charBuf[i] = static_cast<char>(c); 49 } 50 charBuf[i] = 0; 51 if (i > 1) { 52 AKLOGI("%2d [ %s ] (%d)", rank, charBuf, frequency); 53 } 54 } 55 56 static inline void dumpResult( 57 const unsigned short *outWords, const int *frequencies, const int maxWordCounts, 58 const int maxWordLength) { 59 AKLOGI("--- DUMP RESULT ---------"); 60 for (int i = 0; i < maxWordCounts; ++i) { 61 dumpWordInfo(&outWords[i * maxWordLength], maxWordLength, i, frequencies[i]); 62 } 63 AKLOGI("-------------------------"); 64 } 65 66 static inline void dumpWord(const unsigned short *word, const int length) { 67 static char charBuf[50]; 68 int i = 0; 69 for (; i < length; ++i) { 70 const unsigned short c = word[i]; 71 if (c == 0) { 72 break; 73 } 74 // static_cast only for debugging 75 charBuf[i] = static_cast<char>(c); 76 } 77 charBuf[i] = 0; 78 if (i > 1) { 79 AKLOGI("[ %s ]", charBuf); 80 } 81 } 82 83 static inline void dumpWordInt(const int *word, const int length) { 84 static char charBuf[50]; 85 86 for (int i = 0; i < length; ++i) { 87 charBuf[i] = word[i]; 88 } 89 charBuf[length] = 0; 90 AKLOGI("i[ %s ]", charBuf); 91 } 92 93 // TODO: Change this to intArrayToCharArray 94 static inline void shortArrayToCharArray( 95 const unsigned short *input, const int length, char *output) { 96 int i = 0; 97 for (;i < length; ++i) { 98 const unsigned short c = input[i]; 99 if (c == 0) { 100 break; 101 } 102 // static_cast only for debugging 103 output[i] = static_cast<char>(c); 104 } 105 output[i] = 0; 106 } 107 108 #ifndef __ANDROID__ 109 #include <cassert> 110 #include <execinfo.h> 111 #include <stdlib.h> 112 113 #define ASSERT(success) do { if (!(success)) { showStackTrace(); assert(success);} } while (0) 114 #define SHOW_STACK_TRACE do { showStackTrace(); } while (0) 115 116 static inline void showStackTrace() { 117 void *callstack[128]; 118 int i, frames = backtrace(callstack, 128); 119 char **strs = backtrace_symbols(callstack, frames); 120 for (i = 0; i < frames; ++i) { 121 if (i == 0) { 122 AKLOGI("=== Trace ==="); 123 continue; 124 } 125 AKLOGI("%s", strs[i]); 126 } 127 free(strs); 128 } 129 #else 130 #include <cassert> 131 #define ASSERT(success) assert(success) 132 #define SHOW_STACK_TRACE 133 #endif 134 135 #else 136 #define AKLOGE(fmt, ...) 137 #define AKLOGI(fmt, ...) 138 #define DUMP_RESULT(words, frequencies, maxWordCount, maxWordLength) 139 #define DUMP_WORD(word, length) 140 #define DUMP_WORD_INT(word, length) 141 #define ASSERT(success) 142 #define SHOW_STACK_TRACE 143 // TODO: INTS_TO_CHARS 144 #define SHORTS_TO_CHARS(input, length, output) 145 #endif 146 147 #ifdef FLAG_DO_PROFILE 148 // Profiler 149 #include <time.h> 150 151 #define PROF_BUF_SIZE 100 152 static float profile_buf[PROF_BUF_SIZE]; 153 static float profile_old[PROF_BUF_SIZE]; 154 static unsigned int profile_counter[PROF_BUF_SIZE]; 155 156 #define PROF_RESET prof_reset() 157 #define PROF_COUNT(prof_buf_id) ++profile_counter[prof_buf_id] 158 #define PROF_OPEN do { PROF_RESET; PROF_START(PROF_BUF_SIZE - 1); } while (0) 159 #define PROF_START(prof_buf_id) do { \ 160 PROF_COUNT(prof_buf_id); profile_old[prof_buf_id] = (clock()); } while (0) 161 #define PROF_CLOSE do { PROF_END(PROF_BUF_SIZE - 1); PROF_OUTALL; } while (0) 162 #define PROF_END(prof_buf_id) profile_buf[prof_buf_id] += ((clock()) - profile_old[prof_buf_id]) 163 #define PROF_CLOCKOUT(prof_buf_id) \ 164 AKLOGI("%s : clock is %f", __FUNCTION__, (clock() - profile_old[prof_buf_id])) 165 #define PROF_OUTALL do { AKLOGI("--- %s ---", __FUNCTION__); prof_out(); } while (0) 166 167 static inline void prof_reset(void) { 168 for (int i = 0; i < PROF_BUF_SIZE; ++i) { 169 profile_buf[i] = 0; 170 profile_old[i] = 0; 171 profile_counter[i] = 0; 172 } 173 } 174 175 static inline void prof_out(void) { 176 if (profile_counter[PROF_BUF_SIZE - 1] != 1) { 177 AKLOGI("Error: You must call PROF_OPEN before PROF_CLOSE."); 178 } 179 AKLOGI("Total time is %6.3f ms.", 180 profile_buf[PROF_BUF_SIZE - 1] * 1000.0f / static_cast<float>(CLOCKS_PER_SEC)); 181 float all = 0; 182 for (int i = 0; i < PROF_BUF_SIZE - 1; ++i) { 183 all += profile_buf[i]; 184 } 185 if (all == 0) all = 1; 186 for (int i = 0; i < PROF_BUF_SIZE - 1; ++i) { 187 if (profile_buf[i]) { 188 AKLOGI("(%d): Used %4.2f%%, %8.4f ms. Called %d times.", 189 i, (profile_buf[i] * 100 / all), 190 profile_buf[i] * 1000.0f / static_cast<float>(CLOCKS_PER_SEC), 191 profile_counter[i]); 192 } 193 } 194 } 195 196 #else // FLAG_DO_PROFILE 197 #define PROF_BUF_SIZE 0 198 #define PROF_RESET 199 #define PROF_COUNT(prof_buf_id) 200 #define PROF_OPEN 201 #define PROF_START(prof_buf_id) 202 #define PROF_CLOSE 203 #define PROF_END(prof_buf_id) 204 #define PROF_CLOCK_OUT(prof_buf_id) 205 #define PROF_CLOCKOUT(prof_buf_id) 206 #define PROF_OUTALL 207 208 #endif // FLAG_DO_PROFILE 209 210 #ifdef FLAG_DBG 211 #define DEBUG_DICT true 212 #define DEBUG_DICT_FULL false 213 #define DEBUG_EDIT_DISTANCE false 214 #define DEBUG_SHOW_FOUND_WORD false 215 #define DEBUG_NODE DEBUG_DICT_FULL 216 #define DEBUG_TRACE DEBUG_DICT_FULL 217 #define DEBUG_PROXIMITY_INFO false 218 #define DEBUG_PROXIMITY_CHARS false 219 #define DEBUG_CORRECTION false 220 #define DEBUG_CORRECTION_FREQ false 221 #define DEBUG_WORDS_PRIORITY_QUEUE false 222 223 #ifdef FLAG_FULL_DBG 224 #define DEBUG_GEO_FULL true 225 #else 226 #define DEBUG_GEO_FULL false 227 #endif 228 229 #else // FLAG_DBG 230 231 #define DEBUG_DICT false 232 #define DEBUG_DICT_FULL false 233 #define DEBUG_EDIT_DISTANCE false 234 #define DEBUG_SHOW_FOUND_WORD false 235 #define DEBUG_NODE false 236 #define DEBUG_TRACE false 237 #define DEBUG_PROXIMITY_INFO false 238 #define DEBUG_PROXIMITY_CHARS false 239 #define DEBUG_CORRECTION false 240 #define DEBUG_CORRECTION_FREQ false 241 #define DEBUG_WORDS_PRIORITY_QUEUE false 242 243 #define DEBUG_GEO_FULL false 244 245 #endif // FLAG_DBG 246 247 #ifndef U_SHORT_MAX 248 #define U_SHORT_MAX 65535 // ((1 << 16) - 1) 249 #endif 250 #ifndef S_INT_MAX 251 #define S_INT_MAX 2147483647 // ((1 << 31) - 1) 252 #endif 253 254 // Define this to use mmap() for dictionary loading. Undefine to use malloc() instead of mmap(). 255 // We measured and compared performance of both, and found mmap() is fairly good in terms of 256 // loading time, and acceptable even for several initial lookups which involve page faults. 257 #define USE_MMAP_FOR_DICTIONARY 258 259 // 22-bit address = ~4MB dictionary size limit, which on average would be about 200k-300k words 260 #define ADDRESS_MASK 0x3FFFFF 261 262 // The bit that decides if an address follows in the next 22 bits 263 #define FLAG_ADDRESS_MASK 0x40 264 // The bit that decides if this is a terminal node for a word. The node could still have children, 265 // if the word has other endings. 266 #define FLAG_TERMINAL_MASK 0x80 267 268 #define FLAG_BIGRAM_READ 0x80 269 #define FLAG_BIGRAM_CHILDEXIST 0x40 270 #define FLAG_BIGRAM_CONTINUED 0x80 271 #define FLAG_BIGRAM_FREQ 0x7F 272 273 #define DICTIONARY_VERSION_MIN 200 274 #define NOT_VALID_WORD (-99) 275 #define NOT_A_CODE_POINT (-1) 276 #define NOT_A_DISTANCE (-1) 277 #define NOT_A_COORDINATE (-1) 278 #define EQUIVALENT_CHAR_WITHOUT_DISTANCE_INFO (-2) 279 #define PROXIMITY_CHAR_WITHOUT_DISTANCE_INFO (-3) 280 #define ADDITIONAL_PROXIMITY_CHAR_DISTANCE_INFO (-4) 281 #define NOT_AN_INDEX (-1) 282 #define NOT_A_PROBABILITY (-1) 283 284 #define KEYCODE_SPACE ' ' 285 286 #define CALIBRATE_SCORE_BY_TOUCH_COORDINATES true 287 288 #define SUGGEST_WORDS_WITH_MISSING_CHARACTER true 289 #define SUGGEST_WORDS_WITH_EXCESSIVE_CHARACTER true 290 #define SUGGEST_WORDS_WITH_TRANSPOSED_CHARACTERS true 291 #define SUGGEST_MULTIPLE_WORDS true 292 293 // The following "rate"s are used as a multiplier before dividing by 100, so they are in percent. 294 #define WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE 80 295 #define WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X 12 296 #define WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE 58 297 #define WORDS_WITH_MISTYPED_SPACE_DEMOTION_RATE 50 298 #define WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE 75 299 #define WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE 75 300 #define WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE 70 301 #define FULL_MATCHED_WORDS_PROMOTION_RATE 120 302 #define WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE 90 303 #define WORDS_WITH_ADDITIONAL_PROXIMITY_CHARACTER_DEMOTION_RATE 70 304 #define WORDS_WITH_MATCH_SKIP_PROMOTION_RATE 105 305 #define WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_RATE 148 306 #define WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_MULTIPLIER 3 307 #define CORRECTION_COUNT_RATE_DEMOTION_RATE_BASE 45 308 #define INPUT_EXCEEDS_OUTPUT_DEMOTION_RATE 70 309 #define FIRST_CHAR_DIFFERENT_DEMOTION_RATE 96 310 #define TWO_WORDS_CAPITALIZED_DEMOTION_RATE 50 311 #define TWO_WORDS_CORRECTION_DEMOTION_BASE 80 312 #define TWO_WORDS_PLUS_OTHER_ERROR_CORRECTION_DEMOTION_DIVIDER 1 313 #define ZERO_DISTANCE_PROMOTION_RATE 110 314 #define NEUTRAL_SCORE_SQUARED_RADIUS 8.0f 315 #define HALF_SCORE_SQUARED_RADIUS 32.0f 316 #define MAX_FREQ 255 317 #define MAX_BIGRAM_FREQ 15 318 319 // This must be greater than or equal to MAX_WORD_LENGTH defined in BinaryDictionary.java 320 // This is only used for the size of array. Not to be used in c functions. 321 #define MAX_WORD_LENGTH_INTERNAL 48 322 323 // This must be the same as ProximityInfo#MAX_PROXIMITY_CHARS_SIZE, currently it's 16. 324 #define MAX_PROXIMITY_CHARS_SIZE_INTERNAL 16 325 326 // This must be equal to ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE in KeyDetector.java 327 #define ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE 2 328 329 // Assuming locale strings such as en_US, sr-Latn etc. 330 #define MAX_LOCALE_STRING_LENGTH 10 331 332 // Word limit for sub queues used in WordsPriorityQueuePool. Sub queues are temporary queues used 333 // for better performance. 334 // Holds up to 1 candidate for each word 335 #define SUB_QUEUE_MAX_WORDS 1 336 #define SUB_QUEUE_MAX_COUNT 10 337 #define SUB_QUEUE_MIN_WORD_LENGTH 4 338 // TODO: Extend this limitation 339 #define MULTIPLE_WORDS_SUGGESTION_MAX_WORDS 5 340 // TODO: Remove this limitation 341 #define MULTIPLE_WORDS_SUGGESTION_MAX_WORD_LENGTH 12 342 // TODO: Remove this limitation 343 #define MULTIPLE_WORDS_SUGGESTION_MAX_TOTAL_TRAVERSE_COUNT 45 344 #define MULTIPLE_WORDS_DEMOTION_RATE 80 345 #define MIN_INPUT_LENGTH_FOR_THREE_OR_MORE_WORDS_CORRECTION 6 346 347 #define TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD 0.35 348 #define START_TWO_WORDS_CORRECTION_THRESHOLD 0.185 349 /* heuristic... This should be changed if we change the unit of the frequency. */ 350 #define SUPPRESS_SHORT_MULTIPLE_WORDS_THRESHOLD_FREQ (MAX_FREQ * 58 / 100) 351 352 #define MAX_DEPTH_MULTIPLIER 3 353 354 #define FIRST_WORD_INDEX 0 355 356 #define MAX_SPACES_INTERNAL 16 357 358 // Max Distance between point to key 359 #define MAX_POINT_TO_KEY_LENGTH 10000000 360 361 // The max number of the keys in one keyboard layout 362 #define MAX_KEY_COUNT_IN_A_KEYBOARD 64 363 364 // TODO: Reduce this constant if possible; check the maximum number of digraphs in the same 365 // word in the dictionary for languages with digraphs, like German and French 366 #define DEFAULT_MAX_DIGRAPH_SEARCH_DEPTH 5 367 368 #define MIN_USER_TYPED_LENGTH_FOR_MULTIPLE_WORD_SUGGESTION 3 369 #define MIN_USER_TYPED_LENGTH_FOR_EXCESSIVE_CHARACTER_SUGGESTION 3 370 371 // TODO: Remove 372 #define MAX_POINTER_COUNT_FOR_G 2 373 374 // Size, in bytes, of the bloom filter index for bigrams 375 // 128 gives us 1024 buckets. The probability of false positive is (1 - e ** (-kn/m))**k, 376 // where k is the number of hash functions, n the number of bigrams, and m the number of 377 // bits we can test. 378 // At the moment 100 is the maximum number of bigrams for a word with the current 379 // dictionaries, so n = 100. 1024 buckets give us m = 1024. 380 // With 1 hash function, our false positive rate is about 9.3%, which should be enough for 381 // our uses since we are only using this to increase average performance. For the record, 382 // k = 2 gives 3.1% and k = 3 gives 1.6%. With k = 1, making m = 2048 gives 4.8%, 383 // and m = 4096 gives 2.4%. 384 #define BIGRAM_FILTER_BYTE_SIZE 128 385 // Must be smaller than BIGRAM_FILTER_BYTE_SIZE * 8, and preferably prime. 1021 is the largest 386 // prime under 128 * 8. 387 #define BIGRAM_FILTER_MODULO 1021 388 #if BIGRAM_FILTER_BYTE_SIZE * 8 < BIGRAM_FILTER_MODULO 389 #error "BIGRAM_FILTER_MODULO is larger than BIGRAM_FILTER_BYTE_SIZE" 390 #endif 391 392 template<typename T> inline T min(T a, T b) { return a < b ? a : b; } 393 template<typename T> inline T max(T a, T b) { return a > b ? a : b; } 394 395 // The ratio of neutral area radius to sweet spot radius. 396 #define NEUTRAL_AREA_RADIUS_RATIO 1.3f 397 398 // DEBUG 399 #define INPUTLENGTH_FOR_DEBUG (-1) 400 #define MIN_OUTPUT_INDEX_FOR_DEBUG (-1) 401 402 #define DISALLOW_COPY_AND_ASSIGN(TypeName) \ 403 TypeName(const TypeName&); \ 404 void operator=(const TypeName&) 405 406 #define DISALLOW_IMPLICIT_CONSTRUCTORS(TypeName) \ 407 TypeName(); \ 408 DISALLOW_COPY_AND_ASSIGN(TypeName) 409 410 // Used as a return value for character comparison 411 typedef enum { 412 // Same char, possibly with different case or accent 413 EQUIVALENT_CHAR, 414 // It is a char located nearby on the keyboard 415 NEAR_PROXIMITY_CHAR, 416 // It is an unrelated char 417 UNRELATED_CHAR, 418 // Additional proximity char which can differ by language. 419 ADDITIONAL_PROXIMITY_CHAR 420 } ProximityType; 421 #endif // LATINIME_DEFINES_H 422