Home | History | Annotate | Download | only in utils
      1 /*
      2  * Copyright (C) 2014 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "utils/jni_data_utils.h"
     18 
     19 #include "utils/int_array_view.h"
     20 
     21 namespace latinime {
     22 
     23 const int JniDataUtils::CODE_POINT_REPLACEMENT_CHARACTER = 0xFFFD;
     24 const int JniDataUtils::CODE_POINT_NULL = 0;
     25 
     26 /* static */ void JniDataUtils::outputWordProperty(JNIEnv *const env,
     27         const WordProperty &wordProperty, jintArray outCodePoints, jbooleanArray outFlags,
     28         jintArray outProbabilityInfo, jobject outNgramPrevWordsArray,
     29         jobject outNgramPrevWordIsBeginningOfSentenceArray, jobject outNgramTargets,
     30         jobject outNgramProbabilities, jobject outShortcutTargets,
     31         jobject outShortcutProbabilities) {
     32     const CodePointArrayView codePoints = wordProperty.getCodePoints();
     33     JniDataUtils::outputCodePoints(env, outCodePoints, 0 /* start */,
     34             MAX_WORD_LENGTH /* maxLength */, codePoints.data(), codePoints.size(),
     35             false /* needsNullTermination */);
     36     const UnigramProperty &unigramProperty = wordProperty.getUnigramProperty();
     37     const std::vector<NgramProperty> &ngrams = wordProperty.getNgramProperties();
     38     jboolean flags[] = {unigramProperty.isNotAWord(), unigramProperty.isPossiblyOffensive(),
     39             !ngrams.empty(), unigramProperty.hasShortcuts(),
     40             unigramProperty.representsBeginningOfSentence()};
     41     env->SetBooleanArrayRegion(outFlags, 0 /* start */, NELEMS(flags), flags);
     42     const HistoricalInfo &historicalInfo = unigramProperty.getHistoricalInfo();
     43     int probabilityInfo[] = {unigramProperty.getProbability(), historicalInfo.getTimestamp(),
     44             historicalInfo.getLevel(), historicalInfo.getCount()};
     45     env->SetIntArrayRegion(outProbabilityInfo, 0 /* start */, NELEMS(probabilityInfo),
     46             probabilityInfo);
     47 
     48     jclass integerClass = env->FindClass("java/lang/Integer");
     49     jmethodID intToIntegerConstructorId = env->GetMethodID(integerClass, "<init>", "(I)V");
     50     jclass arrayListClass = env->FindClass("java/util/ArrayList");
     51     jmethodID addMethodId = env->GetMethodID(arrayListClass, "add", "(Ljava/lang/Object;)Z");
     52 
     53     // Output ngrams.
     54     jclass intArrayClass = env->FindClass("[I");
     55     for (const auto &ngramProperty : ngrams) {
     56         const NgramContext *const ngramContext = ngramProperty.getNgramContext();
     57         jobjectArray prevWordWordCodePointsArray = env->NewObjectArray(
     58                 ngramContext->getPrevWordCount(), intArrayClass, nullptr);
     59         jbooleanArray prevWordIsBeginningOfSentenceArray =
     60                 env->NewBooleanArray(ngramContext->getPrevWordCount());
     61         for (size_t i = 0; i < ngramContext->getPrevWordCount(); ++i) {
     62             const CodePointArrayView codePoints = ngramContext->getNthPrevWordCodePoints(i + 1);
     63             jintArray prevWordCodePoints = env->NewIntArray(codePoints.size());
     64             JniDataUtils::outputCodePoints(env, prevWordCodePoints, 0 /* start */,
     65                     codePoints.size(), codePoints.data(), codePoints.size(),
     66                     false /* needsNullTermination */);
     67             env->SetObjectArrayElement(prevWordWordCodePointsArray, i, prevWordCodePoints);
     68             env->DeleteLocalRef(prevWordCodePoints);
     69             JniDataUtils::putBooleanToArray(env, prevWordIsBeginningOfSentenceArray, i,
     70                     ngramContext->isNthPrevWordBeginningOfSentence(i + 1));
     71         }
     72         env->CallBooleanMethod(outNgramPrevWordsArray, addMethodId, prevWordWordCodePointsArray);
     73         env->CallBooleanMethod(outNgramPrevWordIsBeginningOfSentenceArray, addMethodId,
     74                 prevWordIsBeginningOfSentenceArray);
     75         env->DeleteLocalRef(prevWordWordCodePointsArray);
     76         env->DeleteLocalRef(prevWordIsBeginningOfSentenceArray);
     77 
     78         const std::vector<int> *const targetWordCodePoints = ngramProperty.getTargetCodePoints();
     79         jintArray targetWordCodePointArray = env->NewIntArray(targetWordCodePoints->size());
     80         JniDataUtils::outputCodePoints(env, targetWordCodePointArray, 0 /* start */,
     81                 targetWordCodePoints->size(), targetWordCodePoints->data(),
     82                 targetWordCodePoints->size(), false /* needsNullTermination */);
     83         env->CallBooleanMethod(outNgramTargets, addMethodId, targetWordCodePointArray);
     84         env->DeleteLocalRef(targetWordCodePointArray);
     85 
     86         const HistoricalInfo &ngramHistoricalInfo = ngramProperty.getHistoricalInfo();
     87         int bigramProbabilityInfo[] = {ngramProperty.getProbability(),
     88                 ngramHistoricalInfo.getTimestamp(), ngramHistoricalInfo.getLevel(),
     89                 ngramHistoricalInfo.getCount()};
     90         jintArray bigramProbabilityInfoArray = env->NewIntArray(NELEMS(bigramProbabilityInfo));
     91         env->SetIntArrayRegion(bigramProbabilityInfoArray, 0 /* start */,
     92                 NELEMS(bigramProbabilityInfo), bigramProbabilityInfo);
     93         env->CallBooleanMethod(outNgramProbabilities, addMethodId, bigramProbabilityInfoArray);
     94         env->DeleteLocalRef(bigramProbabilityInfoArray);
     95     }
     96 
     97     // Output shortcuts.
     98     for (const auto &shortcut : unigramProperty.getShortcuts()) {
     99         const std::vector<int> *const targetCodePoints = shortcut.getTargetCodePoints();
    100         jintArray shortcutTargetCodePointArray = env->NewIntArray(targetCodePoints->size());
    101         JniDataUtils::outputCodePoints(env, shortcutTargetCodePointArray, 0 /* start */,
    102                 targetCodePoints->size(), targetCodePoints->data(), targetCodePoints->size(),
    103                 false /* needsNullTermination */);
    104         env->CallBooleanMethod(outShortcutTargets, addMethodId, shortcutTargetCodePointArray);
    105         env->DeleteLocalRef(shortcutTargetCodePointArray);
    106         jobject integerProbability = env->NewObject(integerClass, intToIntegerConstructorId,
    107                 shortcut.getProbability());
    108         env->CallBooleanMethod(outShortcutProbabilities, addMethodId, integerProbability);
    109         env->DeleteLocalRef(integerProbability);
    110     }
    111     env->DeleteLocalRef(integerClass);
    112     env->DeleteLocalRef(arrayListClass);
    113 }
    114 
    115 } // namespace latinime
    116