1 /* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef LATINIME_JNI_DATA_UTILS_H 18 #define LATINIME_JNI_DATA_UTILS_H 19 20 #include <vector> 21 22 #include "defines.h" 23 #include "dictionary/header/header_read_write_utils.h" 24 #include "dictionary/interface/dictionary_header_structure_policy.h" 25 #include "dictionary/property/ngram_context.h" 26 #include "dictionary/property/word_property.h" 27 #include "jni.h" 28 #include "utils/char_utils.h" 29 30 namespace latinime { 31 32 class JniDataUtils { 33 public: 34 static void jintarrayToVector(JNIEnv *env, jintArray array, std::vector<int> *const outVector) { 35 if (!array) { 36 outVector->clear(); 37 return; 38 } 39 const jsize arrayLength = env->GetArrayLength(array); 40 outVector->resize(arrayLength); 41 env->GetIntArrayRegion(array, 0 /* start */, arrayLength, outVector->data()); 42 } 43 44 static DictionaryHeaderStructurePolicy::AttributeMap constructAttributeMap(JNIEnv *env, 45 jobjectArray attributeKeyStringArray, jobjectArray attributeValueStringArray) { 46 DictionaryHeaderStructurePolicy::AttributeMap attributeMap; 47 const int keyCount = env->GetArrayLength(attributeKeyStringArray); 48 for (int i = 0; i < keyCount; i++) { 49 jstring keyString = static_cast<jstring>( 50 env->GetObjectArrayElement(attributeKeyStringArray, i)); 51 const jsize keyUtf8Length = env->GetStringUTFLength(keyString); 52 char keyChars[keyUtf8Length + 1]; 53 env->GetStringUTFRegion(keyString, 0, env->GetStringLength(keyString), keyChars); 54 env->DeleteLocalRef(keyString); 55 keyChars[keyUtf8Length] = '\0'; 56 DictionaryHeaderStructurePolicy::AttributeMap::key_type key; 57 HeaderReadWriteUtils::insertCharactersIntoVector(keyChars, &key); 58 59 jstring valueString = static_cast<jstring>( 60 env->GetObjectArrayElement(attributeValueStringArray, i)); 61 const jsize valueUtf8Length = env->GetStringUTFLength(valueString); 62 char valueChars[valueUtf8Length + 1]; 63 env->GetStringUTFRegion(valueString, 0, env->GetStringLength(valueString), valueChars); 64 env->DeleteLocalRef(valueString); 65 valueChars[valueUtf8Length] = '\0'; 66 DictionaryHeaderStructurePolicy::AttributeMap::mapped_type value; 67 HeaderReadWriteUtils::insertCharactersIntoVector(valueChars, &value); 68 attributeMap[key] = value; 69 } 70 return attributeMap; 71 } 72 73 static void outputCodePoints(JNIEnv *env, jintArray intArrayToOutputCodePoints, const int start, 74 const int maxLength, const int *const codePoints, const int codePointCount, 75 const bool needsNullTermination) { 76 const int codePointBufSize = std::min(maxLength, codePointCount); 77 int outputCodePonts[codePointBufSize]; 78 int outputCodePointCount = 0; 79 for (int i = 0; i < codePointBufSize; ++i) { 80 const int codePoint = codePoints[i]; 81 int codePointToOutput = codePoint; 82 if (!CharUtils::isInUnicodeSpace(codePoint)) { 83 if (codePoint == CODE_POINT_BEGINNING_OF_SENTENCE) { 84 // Just skip Beginning-of-Sentence marker. 85 continue; 86 } 87 codePointToOutput = CODE_POINT_REPLACEMENT_CHARACTER; 88 } else if (codePoint >= 0x01 && codePoint <= 0x1F) { 89 // Control code. 90 codePointToOutput = CODE_POINT_REPLACEMENT_CHARACTER; 91 } 92 outputCodePonts[outputCodePointCount++] = codePointToOutput; 93 } 94 env->SetIntArrayRegion(intArrayToOutputCodePoints, start, outputCodePointCount, 95 outputCodePonts); 96 if (needsNullTermination && outputCodePointCount < maxLength) { 97 env->SetIntArrayRegion(intArrayToOutputCodePoints, start + outputCodePointCount, 98 1 /* len */, &CODE_POINT_NULL); 99 } 100 } 101 102 static NgramContext constructNgramContext(JNIEnv *env, jobjectArray prevWordCodePointArrays, 103 jbooleanArray isBeginningOfSentenceArray, const size_t prevWordCount) { 104 int prevWordCodePoints[MAX_PREV_WORD_COUNT_FOR_N_GRAM][MAX_WORD_LENGTH]; 105 int prevWordCodePointCount[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; 106 bool isBeginningOfSentence[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; 107 for (size_t i = 0; i < prevWordCount; ++i) { 108 prevWordCodePointCount[i] = 0; 109 isBeginningOfSentence[i] = false; 110 jintArray prevWord = (jintArray)env->GetObjectArrayElement(prevWordCodePointArrays, i); 111 if (!prevWord) { 112 continue; 113 } 114 jsize prevWordLength = env->GetArrayLength(prevWord); 115 if (prevWordLength > MAX_WORD_LENGTH) { 116 continue; 117 } 118 env->GetIntArrayRegion(prevWord, 0, prevWordLength, prevWordCodePoints[i]); 119 env->DeleteLocalRef(prevWord); 120 prevWordCodePointCount[i] = prevWordLength; 121 jboolean isBeginningOfSentenceBoolean = JNI_FALSE; 122 env->GetBooleanArrayRegion(isBeginningOfSentenceArray, i, 1 /* len */, 123 &isBeginningOfSentenceBoolean); 124 isBeginningOfSentence[i] = isBeginningOfSentenceBoolean == JNI_TRUE; 125 } 126 return NgramContext(prevWordCodePoints, prevWordCodePointCount, isBeginningOfSentence, 127 prevWordCount); 128 } 129 130 static void putBooleanToArray(JNIEnv *env, jbooleanArray array, const int index, 131 const jboolean value) { 132 env->SetBooleanArrayRegion(array, index, 1 /* len */, &value); 133 } 134 135 static void putIntToArray(JNIEnv *env, jintArray array, const int index, const int value) { 136 env->SetIntArrayRegion(array, index, 1 /* len */, &value); 137 } 138 139 static void putFloatToArray(JNIEnv *env, jfloatArray array, const int index, 140 const float value) { 141 env->SetFloatArrayRegion(array, index, 1 /* len */, &value); 142 } 143 144 static void outputWordProperty(JNIEnv *const env, const WordProperty &wordProperty, 145 jintArray outCodePoints, jbooleanArray outFlags, jintArray outProbabilityInfo, 146 jobject outNgramPrevWordsArray, jobject outNgramPrevWordIsBeginningOfSentenceArray, 147 jobject outNgramTargets, jobject outNgramProbabilities, jobject outShortcutTargets, 148 jobject outShortcutProbabilities); 149 150 private: 151 DISALLOW_IMPLICIT_CONSTRUCTORS(JniDataUtils); 152 153 static const int CODE_POINT_REPLACEMENT_CHARACTER; 154 static const int CODE_POINT_NULL; 155 }; 156 } // namespace latinime 157 #endif // LATINIME_JNI_DATA_UTILS_H 158