Home | History | Annotate | Download | only in utils
      1 /*
      2  * Copyright (C) 2014 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef LATINIME_JNI_DATA_UTILS_H
     18 #define LATINIME_JNI_DATA_UTILS_H
     19 
     20 #include <vector>
     21 
     22 #include "defines.h"
     23 #include "dictionary/header/header_read_write_utils.h"
     24 #include "dictionary/interface/dictionary_header_structure_policy.h"
     25 #include "dictionary/property/ngram_context.h"
     26 #include "dictionary/property/word_property.h"
     27 #include "jni.h"
     28 #include "utils/char_utils.h"
     29 
     30 namespace latinime {
     31 
     32 class JniDataUtils {
     33  public:
     34     static void jintarrayToVector(JNIEnv *env, jintArray array, std::vector<int> *const outVector) {
     35         if (!array) {
     36             outVector->clear();
     37             return;
     38         }
     39         const jsize arrayLength = env->GetArrayLength(array);
     40         outVector->resize(arrayLength);
     41         env->GetIntArrayRegion(array, 0 /* start */, arrayLength, outVector->data());
     42     }
     43 
     44     static DictionaryHeaderStructurePolicy::AttributeMap constructAttributeMap(JNIEnv *env,
     45             jobjectArray attributeKeyStringArray, jobjectArray attributeValueStringArray) {
     46         DictionaryHeaderStructurePolicy::AttributeMap attributeMap;
     47         const int keyCount = env->GetArrayLength(attributeKeyStringArray);
     48         for (int i = 0; i < keyCount; i++) {
     49             jstring keyString = static_cast<jstring>(
     50                     env->GetObjectArrayElement(attributeKeyStringArray, i));
     51             const jsize keyUtf8Length = env->GetStringUTFLength(keyString);
     52             char keyChars[keyUtf8Length + 1];
     53             env->GetStringUTFRegion(keyString, 0, env->GetStringLength(keyString), keyChars);
     54             env->DeleteLocalRef(keyString);
     55             keyChars[keyUtf8Length] = '\0';
     56             DictionaryHeaderStructurePolicy::AttributeMap::key_type key;
     57             HeaderReadWriteUtils::insertCharactersIntoVector(keyChars, &key);
     58 
     59             jstring valueString = static_cast<jstring>(
     60                     env->GetObjectArrayElement(attributeValueStringArray, i));
     61             const jsize valueUtf8Length = env->GetStringUTFLength(valueString);
     62             char valueChars[valueUtf8Length + 1];
     63             env->GetStringUTFRegion(valueString, 0, env->GetStringLength(valueString), valueChars);
     64             env->DeleteLocalRef(valueString);
     65             valueChars[valueUtf8Length] = '\0';
     66             DictionaryHeaderStructurePolicy::AttributeMap::mapped_type value;
     67             HeaderReadWriteUtils::insertCharactersIntoVector(valueChars, &value);
     68             attributeMap[key] = value;
     69         }
     70         return attributeMap;
     71     }
     72 
     73     static void outputCodePoints(JNIEnv *env, jintArray intArrayToOutputCodePoints, const int start,
     74             const int maxLength, const int *const codePoints, const int codePointCount,
     75             const bool needsNullTermination) {
     76         const int codePointBufSize = std::min(maxLength, codePointCount);
     77         int outputCodePonts[codePointBufSize];
     78         int outputCodePointCount = 0;
     79         for (int i = 0; i < codePointBufSize; ++i) {
     80             const int codePoint = codePoints[i];
     81             int codePointToOutput = codePoint;
     82             if (!CharUtils::isInUnicodeSpace(codePoint)) {
     83                 if (codePoint == CODE_POINT_BEGINNING_OF_SENTENCE) {
     84                     // Just skip Beginning-of-Sentence marker.
     85                     continue;
     86                 }
     87                 codePointToOutput = CODE_POINT_REPLACEMENT_CHARACTER;
     88             } else if (codePoint >= 0x01 && codePoint <= 0x1F) {
     89                 // Control code.
     90                 codePointToOutput = CODE_POINT_REPLACEMENT_CHARACTER;
     91             }
     92             outputCodePonts[outputCodePointCount++] = codePointToOutput;
     93         }
     94         env->SetIntArrayRegion(intArrayToOutputCodePoints, start, outputCodePointCount,
     95                 outputCodePonts);
     96         if (needsNullTermination && outputCodePointCount < maxLength) {
     97             env->SetIntArrayRegion(intArrayToOutputCodePoints, start + outputCodePointCount,
     98                     1 /* len */, &CODE_POINT_NULL);
     99         }
    100     }
    101 
    102     static NgramContext constructNgramContext(JNIEnv *env, jobjectArray prevWordCodePointArrays,
    103             jbooleanArray isBeginningOfSentenceArray, const size_t prevWordCount) {
    104         int prevWordCodePoints[MAX_PREV_WORD_COUNT_FOR_N_GRAM][MAX_WORD_LENGTH];
    105         int prevWordCodePointCount[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
    106         bool isBeginningOfSentence[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
    107         for (size_t i = 0; i < prevWordCount; ++i) {
    108             prevWordCodePointCount[i] = 0;
    109             isBeginningOfSentence[i] = false;
    110             jintArray prevWord = (jintArray)env->GetObjectArrayElement(prevWordCodePointArrays, i);
    111             if (!prevWord) {
    112                 continue;
    113             }
    114             jsize prevWordLength = env->GetArrayLength(prevWord);
    115             if (prevWordLength > MAX_WORD_LENGTH) {
    116                 continue;
    117             }
    118             env->GetIntArrayRegion(prevWord, 0, prevWordLength, prevWordCodePoints[i]);
    119             env->DeleteLocalRef(prevWord);
    120             prevWordCodePointCount[i] = prevWordLength;
    121             jboolean isBeginningOfSentenceBoolean = JNI_FALSE;
    122             env->GetBooleanArrayRegion(isBeginningOfSentenceArray, i, 1 /* len */,
    123                     &isBeginningOfSentenceBoolean);
    124             isBeginningOfSentence[i] = isBeginningOfSentenceBoolean == JNI_TRUE;
    125         }
    126         return NgramContext(prevWordCodePoints, prevWordCodePointCount, isBeginningOfSentence,
    127                 prevWordCount);
    128     }
    129 
    130     static void putBooleanToArray(JNIEnv *env, jbooleanArray array, const int index,
    131             const jboolean value) {
    132         env->SetBooleanArrayRegion(array, index, 1 /* len */, &value);
    133     }
    134 
    135     static void putIntToArray(JNIEnv *env, jintArray array, const int index, const int value) {
    136         env->SetIntArrayRegion(array, index, 1 /* len */, &value);
    137     }
    138 
    139     static void putFloatToArray(JNIEnv *env, jfloatArray array, const int index,
    140             const float value) {
    141         env->SetFloatArrayRegion(array, index, 1 /* len */, &value);
    142     }
    143 
    144     static void outputWordProperty(JNIEnv *const env, const WordProperty &wordProperty,
    145             jintArray outCodePoints, jbooleanArray outFlags, jintArray outProbabilityInfo,
    146             jobject outNgramPrevWordsArray, jobject outNgramPrevWordIsBeginningOfSentenceArray,
    147             jobject outNgramTargets, jobject outNgramProbabilities, jobject outShortcutTargets,
    148             jobject outShortcutProbabilities);
    149 
    150  private:
    151     DISALLOW_IMPLICIT_CONSTRUCTORS(JniDataUtils);
    152 
    153     static const int CODE_POINT_REPLACEMENT_CHARACTER;
    154     static const int CODE_POINT_NULL;
    155 };
    156 } // namespace latinime
    157 #endif // LATINIME_JNI_DATA_UTILS_H
    158