1 /* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "utils/jni_data_utils.h" 18 19 #include "utils/int_array_view.h" 20 21 namespace latinime { 22 23 const int JniDataUtils::CODE_POINT_REPLACEMENT_CHARACTER = 0xFFFD; 24 const int JniDataUtils::CODE_POINT_NULL = 0; 25 26 /* static */ void JniDataUtils::outputWordProperty(JNIEnv *const env, 27 const WordProperty &wordProperty, jintArray outCodePoints, jbooleanArray outFlags, 28 jintArray outProbabilityInfo, jobject outNgramPrevWordsArray, 29 jobject outNgramPrevWordIsBeginningOfSentenceArray, jobject outNgramTargets, 30 jobject outNgramProbabilities, jobject outShortcutTargets, 31 jobject outShortcutProbabilities) { 32 const CodePointArrayView codePoints = wordProperty.getCodePoints(); 33 JniDataUtils::outputCodePoints(env, outCodePoints, 0 /* start */, 34 MAX_WORD_LENGTH /* maxLength */, codePoints.data(), codePoints.size(), 35 false /* needsNullTermination */); 36 const UnigramProperty &unigramProperty = wordProperty.getUnigramProperty(); 37 const std::vector<NgramProperty> &ngrams = wordProperty.getNgramProperties(); 38 jboolean flags[] = {unigramProperty.isNotAWord(), unigramProperty.isPossiblyOffensive(), 39 !ngrams.empty(), unigramProperty.hasShortcuts(), 40 unigramProperty.representsBeginningOfSentence()}; 41 env->SetBooleanArrayRegion(outFlags, 0 /* start */, NELEMS(flags), flags); 42 const HistoricalInfo &historicalInfo = unigramProperty.getHistoricalInfo(); 43 int probabilityInfo[] = {unigramProperty.getProbability(), historicalInfo.getTimestamp(), 44 historicalInfo.getLevel(), historicalInfo.getCount()}; 45 env->SetIntArrayRegion(outProbabilityInfo, 0 /* start */, NELEMS(probabilityInfo), 46 probabilityInfo); 47 48 jclass integerClass = env->FindClass("java/lang/Integer"); 49 jmethodID intToIntegerConstructorId = env->GetMethodID(integerClass, "<init>", "(I)V"); 50 jclass arrayListClass = env->FindClass("java/util/ArrayList"); 51 jmethodID addMethodId = env->GetMethodID(arrayListClass, "add", "(Ljava/lang/Object;)Z"); 52 53 // Output ngrams. 54 jclass intArrayClass = env->FindClass("[I"); 55 for (const auto &ngramProperty : ngrams) { 56 const NgramContext *const ngramContext = ngramProperty.getNgramContext(); 57 jobjectArray prevWordWordCodePointsArray = env->NewObjectArray( 58 ngramContext->getPrevWordCount(), intArrayClass, nullptr); 59 jbooleanArray prevWordIsBeginningOfSentenceArray = 60 env->NewBooleanArray(ngramContext->getPrevWordCount()); 61 for (size_t i = 0; i < ngramContext->getPrevWordCount(); ++i) { 62 const CodePointArrayView codePoints = ngramContext->getNthPrevWordCodePoints(i + 1); 63 jintArray prevWordCodePoints = env->NewIntArray(codePoints.size()); 64 JniDataUtils::outputCodePoints(env, prevWordCodePoints, 0 /* start */, 65 codePoints.size(), codePoints.data(), codePoints.size(), 66 false /* needsNullTermination */); 67 env->SetObjectArrayElement(prevWordWordCodePointsArray, i, prevWordCodePoints); 68 env->DeleteLocalRef(prevWordCodePoints); 69 JniDataUtils::putBooleanToArray(env, prevWordIsBeginningOfSentenceArray, i, 70 ngramContext->isNthPrevWordBeginningOfSentence(i + 1)); 71 } 72 env->CallBooleanMethod(outNgramPrevWordsArray, addMethodId, prevWordWordCodePointsArray); 73 env->CallBooleanMethod(outNgramPrevWordIsBeginningOfSentenceArray, addMethodId, 74 prevWordIsBeginningOfSentenceArray); 75 env->DeleteLocalRef(prevWordWordCodePointsArray); 76 env->DeleteLocalRef(prevWordIsBeginningOfSentenceArray); 77 78 const std::vector<int> *const targetWordCodePoints = ngramProperty.getTargetCodePoints(); 79 jintArray targetWordCodePointArray = env->NewIntArray(targetWordCodePoints->size()); 80 JniDataUtils::outputCodePoints(env, targetWordCodePointArray, 0 /* start */, 81 targetWordCodePoints->size(), targetWordCodePoints->data(), 82 targetWordCodePoints->size(), false /* needsNullTermination */); 83 env->CallBooleanMethod(outNgramTargets, addMethodId, targetWordCodePointArray); 84 env->DeleteLocalRef(targetWordCodePointArray); 85 86 const HistoricalInfo &ngramHistoricalInfo = ngramProperty.getHistoricalInfo(); 87 int bigramProbabilityInfo[] = {ngramProperty.getProbability(), 88 ngramHistoricalInfo.getTimestamp(), ngramHistoricalInfo.getLevel(), 89 ngramHistoricalInfo.getCount()}; 90 jintArray bigramProbabilityInfoArray = env->NewIntArray(NELEMS(bigramProbabilityInfo)); 91 env->SetIntArrayRegion(bigramProbabilityInfoArray, 0 /* start */, 92 NELEMS(bigramProbabilityInfo), bigramProbabilityInfo); 93 env->CallBooleanMethod(outNgramProbabilities, addMethodId, bigramProbabilityInfoArray); 94 env->DeleteLocalRef(bigramProbabilityInfoArray); 95 } 96 97 // Output shortcuts. 98 for (const auto &shortcut : unigramProperty.getShortcuts()) { 99 const std::vector<int> *const targetCodePoints = shortcut.getTargetCodePoints(); 100 jintArray shortcutTargetCodePointArray = env->NewIntArray(targetCodePoints->size()); 101 JniDataUtils::outputCodePoints(env, shortcutTargetCodePointArray, 0 /* start */, 102 targetCodePoints->size(), targetCodePoints->data(), targetCodePoints->size(), 103 false /* needsNullTermination */); 104 env->CallBooleanMethod(outShortcutTargets, addMethodId, shortcutTargetCodePointArray); 105 env->DeleteLocalRef(shortcutTargetCodePointArray); 106 jobject integerProbability = env->NewObject(integerClass, intToIntegerConstructorId, 107 shortcut.getProbability()); 108 env->CallBooleanMethod(outShortcutProbabilities, addMethodId, integerProbability); 109 env->DeleteLocalRef(integerProbability); 110 } 111 env->DeleteLocalRef(integerClass); 112 env->DeleteLocalRef(arrayListClass); 113 } 114 115 } // namespace latinime 116