1 /* 2 * Copyright (C) 2013, The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef LATINIME_BYTE_ARRAY_UTILS_H 18 #define LATINIME_BYTE_ARRAY_UTILS_H 19 20 #include <cstdint> 21 22 #include "defines.h" 23 24 namespace latinime { 25 26 /** 27 * Utility methods for reading byte arrays. 28 */ 29 class ByteArrayUtils { 30 public: 31 /** 32 * Integer writing 33 * 34 * Each method write a corresponding size integer in a big endian manner. 35 */ 36 static AK_FORCE_INLINE void writeUintAndAdvancePosition(uint8_t *const buffer, 37 const uint32_t data, const int size, int *const pos) { 38 // size must be in 1 to 4. 39 ASSERT(size >= 1 && size <= 4); 40 switch (size) { 41 case 1: 42 ByteArrayUtils::writeUint8AndAdvancePosition(buffer, data, pos); 43 return; 44 case 2: 45 ByteArrayUtils::writeUint16AndAdvancePosition(buffer, data, pos); 46 return; 47 case 3: 48 ByteArrayUtils::writeUint24AndAdvancePosition(buffer, data, pos); 49 return; 50 case 4: 51 ByteArrayUtils::writeUint32AndAdvancePosition(buffer, data, pos); 52 return; 53 default: 54 break; 55 } 56 } 57 58 /** 59 * Integer reading 60 * 61 * Each method read a corresponding size integer in a big endian manner. 62 */ 63 static AK_FORCE_INLINE uint32_t readUint32(const uint8_t *const buffer, const int pos) { 64 return (buffer[pos] << 24) ^ (buffer[pos + 1] << 16) 65 ^ (buffer[pos + 2] << 8) ^ buffer[pos + 3]; 66 } 67 68 static AK_FORCE_INLINE uint32_t readUint24(const uint8_t *const buffer, const int pos) { 69 return (buffer[pos] << 16) ^ (buffer[pos + 1] << 8) ^ buffer[pos + 2]; 70 } 71 72 static AK_FORCE_INLINE uint16_t readUint16(const uint8_t *const buffer, const int pos) { 73 return (buffer[pos] << 8) ^ buffer[pos + 1]; 74 } 75 76 static AK_FORCE_INLINE uint8_t readUint8(const uint8_t *const buffer, const int pos) { 77 return buffer[pos]; 78 } 79 80 static AK_FORCE_INLINE uint32_t readUint32AndAdvancePosition( 81 const uint8_t *const buffer, int *const pos) { 82 const uint32_t value = readUint32(buffer, *pos); 83 *pos += 4; 84 return value; 85 } 86 87 static AK_FORCE_INLINE int readSint24AndAdvancePosition( 88 const uint8_t *const buffer, int *const pos) { 89 const uint8_t value = readUint8(buffer, *pos); 90 if (value < 0x80) { 91 return readUint24AndAdvancePosition(buffer, pos); 92 } else { 93 (*pos)++; 94 return -(((value & 0x7F) << 16) ^ readUint16AndAdvancePosition(buffer, pos)); 95 } 96 } 97 98 static AK_FORCE_INLINE uint32_t readUint24AndAdvancePosition( 99 const uint8_t *const buffer, int *const pos) { 100 const uint32_t value = readUint24(buffer, *pos); 101 *pos += 3; 102 return value; 103 } 104 105 static AK_FORCE_INLINE uint16_t readUint16AndAdvancePosition( 106 const uint8_t *const buffer, int *const pos) { 107 const uint16_t value = readUint16(buffer, *pos); 108 *pos += 2; 109 return value; 110 } 111 112 static AK_FORCE_INLINE uint8_t readUint8AndAdvancePosition( 113 const uint8_t *const buffer, int *const pos) { 114 return buffer[(*pos)++]; 115 } 116 117 static AK_FORCE_INLINE int readUint(const uint8_t *const buffer, 118 const int size, const int pos) { 119 // size must be in 1 to 4. 120 ASSERT(size >= 1 && size <= 4); 121 switch (size) { 122 case 1: 123 return ByteArrayUtils::readUint8(buffer, pos); 124 case 2: 125 return ByteArrayUtils::readUint16(buffer, pos); 126 case 3: 127 return ByteArrayUtils::readUint24(buffer, pos); 128 case 4: 129 return ByteArrayUtils::readUint32(buffer, pos); 130 default: 131 return 0; 132 } 133 } 134 135 /** 136 * Code Point Reading 137 * 138 * 1 byte = bbbbbbbb match 139 * case 000xxxxx: xxxxx << 16 + next byte << 8 + next byte 140 * else: if 00011111 (= 0x1F) : this is the terminator. This is a relevant choice because 141 * unicode code points range from 0 to 0x10FFFF, so any 3-byte value starting with 142 * 00011111 would be outside unicode. 143 * else: iso-latin-1 code 144 * This allows for the whole unicode range to be encoded, including chars outside of 145 * the BMP. Also everything in the iso-latin-1 charset is only 1 byte, except control 146 * characters which should never happen anyway (and still work, but take 3 bytes). 147 */ 148 static AK_FORCE_INLINE int readCodePoint(const uint8_t *const buffer, const int pos) { 149 int p = pos; 150 return readCodePointAndAdvancePosition(buffer, &p); 151 } 152 153 static AK_FORCE_INLINE int readCodePointAndAdvancePosition( 154 const uint8_t *const buffer, int *const pos) { 155 const uint8_t firstByte = readUint8(buffer, *pos); 156 if (firstByte < MINIMUM_ONE_BYTE_CHARACTER_VALUE) { 157 if (firstByte == CHARACTER_ARRAY_TERMINATOR) { 158 *pos += 1; 159 return NOT_A_CODE_POINT; 160 } else { 161 return readUint24AndAdvancePosition(buffer, pos); 162 } 163 } else { 164 *pos += 1; 165 return firstByte; 166 } 167 } 168 169 /** 170 * String (array of code points) Reading 171 * 172 * Reads code points until the terminator is found. 173 */ 174 // Returns the length of the string. 175 static int readStringAndAdvancePosition(const uint8_t *const buffer, 176 const int maxLength, int *const outBuffer, int *const pos) { 177 int length = 0; 178 int codePoint = readCodePointAndAdvancePosition(buffer, pos); 179 while (NOT_A_CODE_POINT != codePoint && length < maxLength) { 180 outBuffer[length++] = codePoint; 181 codePoint = readCodePointAndAdvancePosition(buffer, pos); 182 } 183 return length; 184 } 185 186 // Advances the position and returns the length of the string. 187 static int advancePositionToBehindString( 188 const uint8_t *const buffer, const int maxLength, int *const pos) { 189 int length = 0; 190 int codePoint = readCodePointAndAdvancePosition(buffer, pos); 191 while (NOT_A_CODE_POINT != codePoint && length < maxLength) { 192 codePoint = readCodePointAndAdvancePosition(buffer, pos); 193 length++; 194 } 195 return length; 196 } 197 198 /** 199 * String (array of code points) Writing 200 */ 201 static void writeCodePointsAndAdvancePosition(uint8_t *const buffer, 202 const int *const codePoints, const int codePointCount, const bool writesTerminator, 203 int *const pos) { 204 for (int i = 0; i < codePointCount; ++i) { 205 const int codePoint = codePoints[i]; 206 if (codePoint == NOT_A_CODE_POINT || codePoint == CHARACTER_ARRAY_TERMINATOR) { 207 break; 208 } else if (codePoint < MINIMUM_ONE_BYTE_CHARACTER_VALUE 209 || codePoint > MAXIMUM_ONE_BYTE_CHARACTER_VALUE) { 210 // three bytes character. 211 writeUint24AndAdvancePosition(buffer, codePoint, pos); 212 } else { 213 // one byte character. 214 writeUint8AndAdvancePosition(buffer, codePoint, pos); 215 } 216 } 217 if (writesTerminator) { 218 writeUint8AndAdvancePosition(buffer, CHARACTER_ARRAY_TERMINATOR, pos); 219 } 220 } 221 222 static int calculateRequiredByteCountToStoreCodePoints(const int *const codePoints, 223 const int codePointCount, const bool writesTerminator) { 224 int byteCount = 0; 225 for (int i = 0; i < codePointCount; ++i) { 226 const int codePoint = codePoints[i]; 227 if (codePoint == NOT_A_CODE_POINT || codePoint == CHARACTER_ARRAY_TERMINATOR) { 228 break; 229 } else if (codePoint < MINIMUM_ONE_BYTE_CHARACTER_VALUE 230 || codePoint > MAXIMUM_ONE_BYTE_CHARACTER_VALUE) { 231 // three bytes character. 232 byteCount += 3; 233 } else { 234 // one byte character. 235 byteCount += 1; 236 } 237 } 238 if (writesTerminator) { 239 // The terminator is one byte. 240 byteCount += 1; 241 } 242 return byteCount; 243 } 244 245 private: 246 DISALLOW_IMPLICIT_CONSTRUCTORS(ByteArrayUtils); 247 248 static const uint8_t MINIMUM_ONE_BYTE_CHARACTER_VALUE; 249 static const uint8_t MAXIMUM_ONE_BYTE_CHARACTER_VALUE; 250 static const uint8_t CHARACTER_ARRAY_TERMINATOR; 251 252 static AK_FORCE_INLINE void writeUint32AndAdvancePosition(uint8_t *const buffer, 253 const uint32_t data, int *const pos) { 254 buffer[(*pos)++] = (data >> 24) & 0xFF; 255 buffer[(*pos)++] = (data >> 16) & 0xFF; 256 buffer[(*pos)++] = (data >> 8) & 0xFF; 257 buffer[(*pos)++] = data & 0xFF; 258 } 259 260 static AK_FORCE_INLINE void writeUint24AndAdvancePosition(uint8_t *const buffer, 261 const uint32_t data, int *const pos) { 262 buffer[(*pos)++] = (data >> 16) & 0xFF; 263 buffer[(*pos)++] = (data >> 8) & 0xFF; 264 buffer[(*pos)++] = data & 0xFF; 265 } 266 267 static AK_FORCE_INLINE void writeUint16AndAdvancePosition(uint8_t *const buffer, 268 const uint16_t data, int *const pos) { 269 buffer[(*pos)++] = (data >> 8) & 0xFF; 270 buffer[(*pos)++] = data & 0xFF; 271 } 272 273 static AK_FORCE_INLINE void writeUint8AndAdvancePosition(uint8_t *const buffer, 274 const uint8_t data, int *const pos) { 275 buffer[(*pos)++] = data & 0xFF; 276 } 277 }; 278 } // namespace latinime 279 #endif /* LATINIME_BYTE_ARRAY_UTILS_H */ 280