1 /* 2 * Copyright (C) 2013, The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "dictionary/header/header_read_write_utils.h" 18 19 #include <cctype> 20 #include <cstdio> 21 #include <memory> 22 #include <vector> 23 24 #include "defines.h" 25 #include "dictionary/utils/buffer_with_extendable_buffer.h" 26 #include "dictionary/utils/byte_array_utils.h" 27 28 namespace latinime { 29 30 // Number of base-10 digits in the largest integer + 1 to leave room for a zero terminator. 31 // As such, this is the maximum number of characters will be needed to represent an int as a 32 // string, including the terminator; this is used as the size of a string buffer large enough to 33 // hold any value that is intended to fit in an integer, e.g. in the code that reads the header 34 // of the binary dictionary where a {key,value} string pair scheme is used. 35 const int HeaderReadWriteUtils::LARGEST_INT_DIGIT_COUNT = 11; 36 37 const int HeaderReadWriteUtils::MAX_ATTRIBUTE_KEY_LENGTH = 256; 38 const int HeaderReadWriteUtils::MAX_ATTRIBUTE_VALUE_LENGTH = 2048; 39 40 const int HeaderReadWriteUtils::HEADER_MAGIC_NUMBER_SIZE = 4; 41 const int HeaderReadWriteUtils::HEADER_DICTIONARY_VERSION_SIZE = 2; 42 const int HeaderReadWriteUtils::HEADER_FLAG_SIZE = 2; 43 const int HeaderReadWriteUtils::HEADER_SIZE_FIELD_SIZE = 4; 44 const char *const HeaderReadWriteUtils::CODE_POINT_TABLE_KEY = "codePointTable"; 45 46 const HeaderReadWriteUtils::DictionaryFlags HeaderReadWriteUtils::NO_FLAGS = 0; 47 48 typedef DictionaryHeaderStructurePolicy::AttributeMap AttributeMap; 49 50 /* static */ int HeaderReadWriteUtils::getHeaderSize(const uint8_t *const dictBuf) { 51 // See the format of the header in the comment in 52 // BinaryDictionaryFormatUtils::detectFormatVersion() 53 return ByteArrayUtils::readUint32(dictBuf, HEADER_MAGIC_NUMBER_SIZE 54 + HEADER_DICTIONARY_VERSION_SIZE + HEADER_FLAG_SIZE); 55 } 56 57 /* static */ HeaderReadWriteUtils::DictionaryFlags 58 HeaderReadWriteUtils::getFlags(const uint8_t *const dictBuf) { 59 return ByteArrayUtils::readUint16(dictBuf, 60 HEADER_MAGIC_NUMBER_SIZE + HEADER_DICTIONARY_VERSION_SIZE); 61 } 62 63 /* static */ HeaderReadWriteUtils::DictionaryFlags 64 HeaderReadWriteUtils::createAndGetDictionaryFlagsUsingAttributeMap( 65 const AttributeMap *const attributeMap) { 66 return NO_FLAGS; 67 } 68 69 /* static */ void HeaderReadWriteUtils::fetchAllHeaderAttributes(const uint8_t *const dictBuf, 70 AttributeMap *const headerAttributes) { 71 const int headerSize = getHeaderSize(dictBuf); 72 int pos = getHeaderOptionsPosition(); 73 if (pos == NOT_A_DICT_POS) { 74 // The header doesn't have header options. 75 return; 76 } 77 int keyBuffer[MAX_ATTRIBUTE_KEY_LENGTH]; 78 std::unique_ptr<int[]> valueBuffer(new int[MAX_ATTRIBUTE_VALUE_LENGTH]); 79 while (pos < headerSize) { 80 // The values in the header don't use the code point table for their encoding. 81 const int keyLength = ByteArrayUtils::readStringAndAdvancePosition(dictBuf, 82 MAX_ATTRIBUTE_KEY_LENGTH, nullptr /* codePointTable */, keyBuffer, &pos); 83 std::vector<int> key; 84 key.insert(key.end(), keyBuffer, keyBuffer + keyLength); 85 const int valueLength = ByteArrayUtils::readStringAndAdvancePosition(dictBuf, 86 MAX_ATTRIBUTE_VALUE_LENGTH, nullptr /* codePointTable */, valueBuffer.get(), &pos); 87 std::vector<int> value; 88 value.insert(value.end(), valueBuffer.get(), valueBuffer.get() + valueLength); 89 headerAttributes->insert(AttributeMap::value_type(key, value)); 90 } 91 } 92 93 /* static */ const int *HeaderReadWriteUtils::readCodePointTable( 94 AttributeMap *const headerAttributes) { 95 AttributeMap::key_type keyVector; 96 insertCharactersIntoVector(CODE_POINT_TABLE_KEY, &keyVector); 97 AttributeMap::const_iterator it = headerAttributes->find(keyVector); 98 if (it == headerAttributes->end()) { 99 return nullptr; 100 } 101 return it->second.data(); 102 } 103 104 /* static */ bool HeaderReadWriteUtils::writeDictionaryVersion( 105 BufferWithExtendableBuffer *const buffer, const FormatUtils::FORMAT_VERSION version, 106 int *const writingPos) { 107 if (!buffer->writeUintAndAdvancePosition(FormatUtils::MAGIC_NUMBER, HEADER_MAGIC_NUMBER_SIZE, 108 writingPos)) { 109 return false; 110 } 111 switch (version) { 112 case FormatUtils::VERSION_2: 113 case FormatUtils::VERSION_201: 114 case FormatUtils::VERSION_202: 115 // None of the static dictionaries (v2x) support writing 116 return false; 117 case FormatUtils::VERSION_4_ONLY_FOR_TESTING: 118 case FormatUtils::VERSION_402: 119 case FormatUtils::VERSION_403: 120 return buffer->writeUintAndAdvancePosition(version /* data */, 121 HEADER_DICTIONARY_VERSION_SIZE, writingPos); 122 default: 123 return false; 124 } 125 } 126 127 /* static */ bool HeaderReadWriteUtils::writeDictionaryFlags( 128 BufferWithExtendableBuffer *const buffer, const DictionaryFlags flags, 129 int *const writingPos) { 130 return buffer->writeUintAndAdvancePosition(flags, HEADER_FLAG_SIZE, writingPos); 131 } 132 133 /* static */ bool HeaderReadWriteUtils::writeDictionaryHeaderSize( 134 BufferWithExtendableBuffer *const buffer, const int size, int *const writingPos) { 135 return buffer->writeUintAndAdvancePosition(size, HEADER_SIZE_FIELD_SIZE, writingPos); 136 } 137 138 /* static */ bool HeaderReadWriteUtils::writeHeaderAttributes( 139 BufferWithExtendableBuffer *const buffer, const AttributeMap *const headerAttributes, 140 int *const writingPos) { 141 for (AttributeMap::const_iterator it = headerAttributes->begin(); 142 it != headerAttributes->end(); ++it) { 143 if (it->first.empty() || it->second.empty()) { 144 continue; 145 } 146 // Write a key. 147 if (!buffer->writeCodePointsAndAdvancePosition(&(it->first.at(0)), it->first.size(), 148 true /* writesTerminator */, writingPos)) { 149 return false; 150 } 151 // Write a value. 152 if (!buffer->writeCodePointsAndAdvancePosition(&(it->second.at(0)), it->second.size(), 153 true /* writesTerminator */, writingPos)) { 154 return false; 155 } 156 } 157 return true; 158 } 159 160 /* static */ void HeaderReadWriteUtils::setCodePointVectorAttribute( 161 AttributeMap *const headerAttributes, const char *const key, 162 const std::vector<int> &value) { 163 AttributeMap::key_type keyVector; 164 insertCharactersIntoVector(key, &keyVector); 165 (*headerAttributes)[keyVector] = value; 166 } 167 168 /* static */ void HeaderReadWriteUtils::setBoolAttribute(AttributeMap *const headerAttributes, 169 const char *const key, const bool value) { 170 setIntAttribute(headerAttributes, key, value ? 1 : 0); 171 } 172 173 /* static */ void HeaderReadWriteUtils::setIntAttribute(AttributeMap *const headerAttributes, 174 const char *const key, const int value) { 175 AttributeMap::key_type keyVector; 176 insertCharactersIntoVector(key, &keyVector); 177 setIntAttributeInner(headerAttributes, &keyVector, value); 178 } 179 180 /* static */ void HeaderReadWriteUtils::setIntAttributeInner(AttributeMap *const headerAttributes, 181 const AttributeMap::key_type *const key, const int value) { 182 AttributeMap::mapped_type valueVector; 183 char charBuf[LARGEST_INT_DIGIT_COUNT]; 184 snprintf(charBuf, sizeof(charBuf), "%d", value); 185 insertCharactersIntoVector(charBuf, &valueVector); 186 (*headerAttributes)[*key] = valueVector; 187 } 188 189 /* static */ const std::vector<int> HeaderReadWriteUtils::readCodePointVectorAttributeValue( 190 const AttributeMap *const headerAttributes, const char *const key) { 191 AttributeMap::key_type keyVector; 192 insertCharactersIntoVector(key, &keyVector); 193 AttributeMap::const_iterator it = headerAttributes->find(keyVector); 194 if (it == headerAttributes->end()) { 195 return std::vector<int>(); 196 } else { 197 return it->second; 198 } 199 } 200 201 /* static */ bool HeaderReadWriteUtils::readBoolAttributeValue( 202 const AttributeMap *const headerAttributes, const char *const key, 203 const bool defaultValue) { 204 const int intDefaultValue = defaultValue ? 1 : 0; 205 const int intValue = readIntAttributeValue(headerAttributes, key, intDefaultValue); 206 return intValue != 0; 207 } 208 209 /* static */ int HeaderReadWriteUtils::readIntAttributeValue( 210 const AttributeMap *const headerAttributes, const char *const key, 211 const int defaultValue) { 212 AttributeMap::key_type keyVector; 213 insertCharactersIntoVector(key, &keyVector); 214 return readIntAttributeValueInner(headerAttributes, &keyVector, defaultValue); 215 } 216 217 /* static */ int HeaderReadWriteUtils::readIntAttributeValueInner( 218 const AttributeMap *const headerAttributes, const AttributeMap::key_type *const key, 219 const int defaultValue) { 220 AttributeMap::const_iterator it = headerAttributes->find(*key); 221 if (it != headerAttributes->end()) { 222 int value = 0; 223 bool isNegative = false; 224 for (size_t i = 0; i < it->second.size(); ++i) { 225 if (i == 0 && it->second.at(i) == '-') { 226 isNegative = true; 227 } else { 228 if (!isdigit(it->second.at(i))) { 229 // If not a number. 230 return defaultValue; 231 } 232 value *= 10; 233 value += it->second.at(i) - '0'; 234 } 235 } 236 return isNegative ? -value : value; 237 } 238 return defaultValue; 239 } 240 241 /* static */ void HeaderReadWriteUtils::insertCharactersIntoVector(const char *const characters, 242 std::vector<int> *const vector) { 243 for (int i = 0; characters[i]; ++i) { 244 vector->push_back(characters[i]); 245 } 246 } 247 248 } // namespace latinime 249