Home | History | Annotate | Download | only in v4
      1 /*
      2  * Copyright (C) 2013 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
     18 
     19 #include <cerrno>
     20 #include <cstring>
     21 #include <fcntl.h>
     22 #include <sys/stat.h>
     23 #include <sys/types.h>
     24 #include <vector>
     25 
     26 #include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
     27 #include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
     28 #include "suggest/policyimpl/dictionary/utils/file_utils.h"
     29 #include "utils/byte_array_view.h"
     30 
     31 namespace latinime {
     32 
     33 /* static */ Ver4DictBuffers::Ver4DictBuffersPtr Ver4DictBuffers::openVer4DictBuffers(
     34         const char *const dictPath, MmappedBuffer::MmappedBufferPtr &&headerBuffer,
     35         const FormatUtils::FORMAT_VERSION formatVersion) {
     36     if (!headerBuffer) {
     37         ASSERT(false);
     38         AKLOGE("The header buffer must be valid to open ver4 dict buffers.");
     39         return Ver4DictBuffersPtr(nullptr);
     40     }
     41     // TODO: take only dictDirPath, and open both header and trie files in the constructor below
     42     const bool isUpdatable = headerBuffer->isUpdatable();
     43     MmappedBuffer::MmappedBufferPtr bodyBuffer = MmappedBuffer::openBuffer(dictPath,
     44             Ver4DictConstants::BODY_FILE_EXTENSION, isUpdatable);
     45     if (!bodyBuffer) {
     46         return Ver4DictBuffersPtr(nullptr);
     47     }
     48     std::vector<uint8_t *> buffers;
     49     std::vector<int> bufferSizes;
     50     const ReadWriteByteArrayView buffer = bodyBuffer->getReadWriteByteArrayView();
     51     int position = 0;
     52     while (position < static_cast<int>(buffer.size())) {
     53         const int bufferSize = ByteArrayUtils::readUint32AndAdvancePosition(
     54                 buffer.data(), &position);
     55         const ReadWriteByteArrayView subBuffer = buffer.subView(position, bufferSize);
     56         buffers.push_back(subBuffer.data());
     57         bufferSizes.push_back(subBuffer.size());
     58         position += bufferSize;
     59         if (bufferSize < 0 || position < 0 || position > static_cast<int>(buffer.size())) {
     60             AKLOGE("The dict body file is corrupted.");
     61             return Ver4DictBuffersPtr(nullptr);
     62         }
     63     }
     64     if (buffers.size() != Ver4DictConstants::NUM_OF_CONTENT_BUFFERS_IN_BODY_FILE) {
     65         AKLOGE("The dict body file is corrupted.");
     66         return Ver4DictBuffersPtr(nullptr);
     67     }
     68     return Ver4DictBuffersPtr(new Ver4DictBuffers(std::move(headerBuffer), std::move(bodyBuffer),
     69             formatVersion, buffers, bufferSizes));
     70 }
     71 
     72 bool Ver4DictBuffers::flushHeaderAndDictBuffers(const char *const dictDirPath,
     73         const BufferWithExtendableBuffer *const headerBuffer) const {
     74     // Create temporary directory.
     75     const int tmpDirPathBufSize = FileUtils::getFilePathWithSuffixBufSize(dictDirPath,
     76             DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE);
     77     char tmpDirPath[tmpDirPathBufSize];
     78     FileUtils::getFilePathWithSuffix(dictDirPath,
     79             DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE, tmpDirPathBufSize,
     80             tmpDirPath);
     81     if (FileUtils::existsDir(tmpDirPath)) {
     82         if (!FileUtils::removeDirAndFiles(tmpDirPath)) {
     83             AKLOGE("Existing directory %s cannot be removed.", tmpDirPath);
     84             ASSERT(false);
     85             return false;
     86         }
     87     }
     88     umask(S_IWGRP | S_IWOTH);
     89     if (mkdir(tmpDirPath, S_IRWXU) == -1) {
     90         AKLOGE("Cannot create directory: %s. errno: %d.", tmpDirPath, errno);
     91         return false;
     92     }
     93     // Get dictionary base path.
     94     const int dictNameBufSize = strlen(dictDirPath) + 1 /* terminator */;
     95     char dictName[dictNameBufSize];
     96     FileUtils::getBasename(dictDirPath, dictNameBufSize, dictName);
     97     const int dictPathBufSize = FileUtils::getFilePathBufSize(tmpDirPath, dictName);
     98     char dictPath[dictPathBufSize];
     99     FileUtils::getFilePath(tmpDirPath, dictName, dictPathBufSize, dictPath);
    100 
    101     // Write header file.
    102     if (!DictFileWritingUtils::flushBufferToFileWithSuffix(dictPath,
    103             Ver4DictConstants::HEADER_FILE_EXTENSION, headerBuffer)) {
    104         AKLOGE("Dictionary header file %s%s cannot be written.", tmpDirPath,
    105                 Ver4DictConstants::HEADER_FILE_EXTENSION);
    106         return false;
    107     }
    108 
    109     // Write body file.
    110     const int bodyFilePathBufSize = FileUtils::getFilePathWithSuffixBufSize(dictPath,
    111             Ver4DictConstants::BODY_FILE_EXTENSION);
    112     char bodyFilePath[bodyFilePathBufSize];
    113     FileUtils::getFilePathWithSuffix(dictPath, Ver4DictConstants::BODY_FILE_EXTENSION,
    114             bodyFilePathBufSize, bodyFilePath);
    115 
    116     const int fd = open(bodyFilePath, O_WRONLY | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
    117     if (fd == -1) {
    118         AKLOGE("File %s cannot be opened. errno: %d", bodyFilePath, errno);
    119         ASSERT(false);
    120         return false;
    121     }
    122     FILE *const file = fdopen(fd, "wb");
    123     if (!file) {
    124         AKLOGE("fdopen failed for the file %s. errno: %d", bodyFilePath, errno);
    125         ASSERT(false);
    126         return false;
    127     }
    128 
    129     if (!flushDictBuffers(file)) {
    130         fclose(file);
    131         return false;
    132     }
    133     fclose(file);
    134     // Remove existing dictionary.
    135     if (!FileUtils::removeDirAndFiles(dictDirPath)) {
    136         AKLOGE("Existing directory %s cannot be removed.", dictDirPath);
    137         ASSERT(false);
    138         return false;
    139     }
    140     // Rename temporary directory.
    141     if (rename(tmpDirPath, dictDirPath) != 0) {
    142         AKLOGE("%s cannot be renamed to %s", tmpDirPath, dictDirPath);
    143         ASSERT(false);
    144         return false;
    145     }
    146     return true;
    147 }
    148 
    149 bool Ver4DictBuffers::flushDictBuffers(FILE *const file) const {
    150     // Write trie.
    151     if (!DictFileWritingUtils::writeBufferToFileTail(file, &mExpandableTrieBuffer)) {
    152         AKLOGE("Trie cannot be written.");
    153         return false;
    154     }
    155     // Write terminal position lookup table.
    156     if (!mTerminalPositionLookupTable.flushToFile(file)) {
    157         AKLOGE("Terminal position lookup table cannot be written.");
    158         return false;
    159     }
    160     // Write language model content.
    161     if (!mLanguageModelDictContent.save(file)) {
    162         AKLOGE("Language model dict content cannot be written.");
    163         return false;
    164     }
    165     // Write bigram dict content.
    166     if (!mBigramDictContent.flushToFile(file)) {
    167         AKLOGE("Bigram dict content cannot be written.");
    168         return false;
    169     }
    170     // Write shortcut dict content.
    171     if (!mShortcutDictContent.flushToFile(file)) {
    172         AKLOGE("Shortcut dict content cannot be written.");
    173         return false;
    174     }
    175     return true;
    176 }
    177 
    178 Ver4DictBuffers::Ver4DictBuffers(MmappedBuffer::MmappedBufferPtr &&headerBuffer,
    179         MmappedBuffer::MmappedBufferPtr &&bodyBuffer,
    180         const FormatUtils::FORMAT_VERSION formatVersion,
    181         const std::vector<uint8_t *> &contentBuffers, const std::vector<int> &contentBufferSizes)
    182         : mHeaderBuffer(std::move(headerBuffer)), mDictBuffer(std::move(bodyBuffer)),
    183           mHeaderPolicy(mHeaderBuffer->getReadOnlyByteArrayView().data(), formatVersion),
    184           mExpandableHeaderBuffer(mHeaderBuffer->getReadWriteByteArrayView(),
    185                   BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
    186           mExpandableTrieBuffer(
    187                   ReadWriteByteArrayView(contentBuffers[Ver4DictConstants::TRIE_BUFFER_INDEX],
    188                           contentBufferSizes[Ver4DictConstants::TRIE_BUFFER_INDEX]),
    189                   BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
    190           mTerminalPositionLookupTable(
    191                   contentBuffers[Ver4DictConstants::TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX],
    192                   contentBufferSizes[
    193                           Ver4DictConstants::TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX]),
    194           mLanguageModelDictContent(
    195                   ReadWriteByteArrayView(
    196                           contentBuffers[Ver4DictConstants::LANGUAGE_MODEL_BUFFER_INDEX],
    197                           contentBufferSizes[Ver4DictConstants::LANGUAGE_MODEL_BUFFER_INDEX]),
    198                   mHeaderPolicy.hasHistoricalInfoOfWords()),
    199           mBigramDictContent(&contentBuffers[Ver4DictConstants::BIGRAM_BUFFERS_INDEX],
    200                   &contentBufferSizes[Ver4DictConstants::BIGRAM_BUFFERS_INDEX],
    201                   mHeaderPolicy.hasHistoricalInfoOfWords()),
    202           mShortcutDictContent(&contentBuffers[Ver4DictConstants::SHORTCUT_BUFFERS_INDEX],
    203                   &contentBufferSizes[Ver4DictConstants::SHORTCUT_BUFFERS_INDEX]),
    204           mIsUpdatable(mDictBuffer->isUpdatable()) {}
    205 
    206 Ver4DictBuffers::Ver4DictBuffers(const HeaderPolicy *const headerPolicy, const int maxTrieSize)
    207         : mHeaderBuffer(nullptr), mDictBuffer(nullptr), mHeaderPolicy(headerPolicy),
    208           mExpandableHeaderBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
    209           mExpandableTrieBuffer(maxTrieSize), mTerminalPositionLookupTable(),
    210           mLanguageModelDictContent(headerPolicy->hasHistoricalInfoOfWords()),
    211           mBigramDictContent(headerPolicy->hasHistoricalInfoOfWords()), mShortcutDictContent(),
    212           mIsUpdatable(true) {}
    213 
    214 } // namespace latinime
    215