1 /* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef LATINIME_JNI_DATA_UTILS_H 18 #define LATINIME_JNI_DATA_UTILS_H 19 20 #include <vector> 21 22 #include "defines.h" 23 #include "jni.h" 24 #include "suggest/core/session/prev_words_info.h" 25 #include "suggest/core/policy/dictionary_header_structure_policy.h" 26 #include "suggest/policyimpl/dictionary/header/header_read_write_utils.h" 27 #include "utils/char_utils.h" 28 29 namespace latinime { 30 31 class JniDataUtils { 32 public: 33 static void jintarrayToVector(JNIEnv *env, jintArray array, std::vector<int> *const outVector) { 34 if (!array) { 35 outVector->clear(); 36 return; 37 } 38 const jsize arrayLength = env->GetArrayLength(array); 39 outVector->resize(arrayLength); 40 env->GetIntArrayRegion(array, 0 /* start */, arrayLength, outVector->data()); 41 } 42 43 static DictionaryHeaderStructurePolicy::AttributeMap constructAttributeMap(JNIEnv *env, 44 jobjectArray attributeKeyStringArray, jobjectArray attributeValueStringArray) { 45 DictionaryHeaderStructurePolicy::AttributeMap attributeMap; 46 const int keyCount = env->GetArrayLength(attributeKeyStringArray); 47 for (int i = 0; i < keyCount; i++) { 48 jstring keyString = static_cast<jstring>( 49 env->GetObjectArrayElement(attributeKeyStringArray, i)); 50 const jsize keyUtf8Length = env->GetStringUTFLength(keyString); 51 char keyChars[keyUtf8Length + 1]; 52 env->GetStringUTFRegion(keyString, 0, env->GetStringLength(keyString), keyChars); 53 keyChars[keyUtf8Length] = '\0'; 54 DictionaryHeaderStructurePolicy::AttributeMap::key_type key; 55 HeaderReadWriteUtils::insertCharactersIntoVector(keyChars, &key); 56 57 jstring valueString = static_cast<jstring>( 58 env->GetObjectArrayElement(attributeValueStringArray, i)); 59 const jsize valueUtf8Length = env->GetStringUTFLength(valueString); 60 char valueChars[valueUtf8Length + 1]; 61 env->GetStringUTFRegion(valueString, 0, env->GetStringLength(valueString), valueChars); 62 valueChars[valueUtf8Length] = '\0'; 63 DictionaryHeaderStructurePolicy::AttributeMap::mapped_type value; 64 HeaderReadWriteUtils::insertCharactersIntoVector(valueChars, &value); 65 attributeMap[key] = value; 66 } 67 return attributeMap; 68 } 69 70 static void outputCodePoints(JNIEnv *env, jintArray intArrayToOutputCodePoints, const int start, 71 const int maxLength, const int *const codePoints, const int codePointCount, 72 const bool needsNullTermination) { 73 const int codePointBufSize = std::min(maxLength, codePointCount); 74 int outputCodePonts[codePointBufSize]; 75 int outputCodePointCount = 0; 76 for (int i = 0; i < codePointBufSize; ++i) { 77 const int codePoint = codePoints[i]; 78 int codePointToOutput = codePoint; 79 if (!CharUtils::isInUnicodeSpace(codePoint)) { 80 if (codePoint == CODE_POINT_BEGINNING_OF_SENTENCE) { 81 // Just skip Beginning-of-Sentence marker. 82 continue; 83 } 84 codePointToOutput = CODE_POINT_REPLACEMENT_CHARACTER; 85 } else if (codePoint >= 0x01 && codePoint <= 0x1F) { 86 // Control code. 87 codePointToOutput = CODE_POINT_REPLACEMENT_CHARACTER; 88 } 89 outputCodePonts[outputCodePointCount++] = codePointToOutput; 90 } 91 env->SetIntArrayRegion(intArrayToOutputCodePoints, start, outputCodePointCount, 92 outputCodePonts); 93 if (needsNullTermination && outputCodePointCount < maxLength) { 94 env->SetIntArrayRegion(intArrayToOutputCodePoints, start + outputCodePointCount, 95 1 /* len */, &CODE_POINT_NULL); 96 } 97 } 98 99 static PrevWordsInfo constructPrevWordsInfo(JNIEnv *env, jobjectArray prevWordCodePointArrays, 100 jbooleanArray isBeginningOfSentenceArray) { 101 int prevWordCodePoints[MAX_PREV_WORD_COUNT_FOR_N_GRAM][MAX_WORD_LENGTH]; 102 int prevWordCodePointCount[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; 103 bool isBeginningOfSentence[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; 104 jsize prevWordsCount = env->GetArrayLength(prevWordCodePointArrays); 105 for (size_t i = 0; i < NELEMS(prevWordCodePoints); ++i) { 106 prevWordCodePointCount[i] = 0; 107 isBeginningOfSentence[i] = false; 108 if (prevWordsCount <= static_cast<int>(i)) { 109 continue; 110 } 111 jintArray prevWord = (jintArray)env->GetObjectArrayElement(prevWordCodePointArrays, i); 112 if (!prevWord) { 113 continue; 114 } 115 jsize prevWordLength = env->GetArrayLength(prevWord); 116 if (prevWordLength > MAX_WORD_LENGTH) { 117 continue; 118 } 119 env->GetIntArrayRegion(prevWord, 0, prevWordLength, prevWordCodePoints[i]); 120 prevWordCodePointCount[i] = prevWordLength; 121 jboolean isBeginningOfSentenceBoolean = JNI_FALSE; 122 env->GetBooleanArrayRegion(isBeginningOfSentenceArray, i, 1 /* len */, 123 &isBeginningOfSentenceBoolean); 124 isBeginningOfSentence[i] = isBeginningOfSentenceBoolean == JNI_TRUE; 125 } 126 return PrevWordsInfo(prevWordCodePoints, prevWordCodePointCount, isBeginningOfSentence, 127 MAX_PREV_WORD_COUNT_FOR_N_GRAM); 128 } 129 130 static void putBooleanToArray(JNIEnv *env, jbooleanArray array, const int index, 131 const jboolean value) { 132 env->SetBooleanArrayRegion(array, index, 1 /* len */, &value); 133 } 134 135 static void putIntToArray(JNIEnv *env, jintArray array, const int index, const int value) { 136 env->SetIntArrayRegion(array, index, 1 /* len */, &value); 137 } 138 139 static void putFloatToArray(JNIEnv *env, jfloatArray array, const int index, 140 const float value) { 141 env->SetFloatArrayRegion(array, index, 1 /* len */, &value); 142 } 143 144 private: 145 DISALLOW_IMPLICIT_CONSTRUCTORS(JniDataUtils); 146 147 static const int CODE_POINT_REPLACEMENT_CHARACTER; 148 static const int CODE_POINT_NULL; 149 }; 150 } // namespace latinime 151 #endif // LATINIME_JNI_DATA_UTILS_H 152