1 /* 2 * Copyright (C) 2013, The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef LATINIME_BYTE_ARRAY_UTILS_H 18 #define LATINIME_BYTE_ARRAY_UTILS_H 19 20 #include <stdint.h> 21 22 #include "defines.h" 23 24 namespace latinime { 25 26 /** 27 * Utility methods for reading byte arrays. 28 */ 29 class ByteArrayUtils { 30 public: 31 /** 32 * Integer writing 33 * 34 * Each method write a corresponding size integer in a big endian manner. 35 */ 36 static AK_FORCE_INLINE void writeUintAndAdvancePosition(uint8_t *const buffer, 37 const uint32_t data, const int size, int *const pos) { 38 // size must be in 1 to 4. 39 ASSERT(size >= 1 && size <= 4); 40 switch (size) { 41 case 1: 42 ByteArrayUtils::writeUint8AndAdvancePosition(buffer, data, pos); 43 return; 44 case 2: 45 ByteArrayUtils::writeUint16AndAdvancePosition(buffer, data, pos); 46 return; 47 case 3: 48 ByteArrayUtils::writeUint24AndAdvancePosition(buffer, data, pos); 49 return; 50 case 4: 51 ByteArrayUtils::writeUint32AndAdvancePosition(buffer, data, pos); 52 return; 53 default: 54 break; 55 } 56 } 57 58 /** 59 * Integer reading 60 * 61 * Each method read a corresponding size integer in a big endian manner. 62 */ 63 static AK_FORCE_INLINE uint32_t readUint32(const uint8_t *const buffer, const int pos) { 64 return (buffer[pos] << 24) ^ (buffer[pos + 1] << 16) 65 ^ (buffer[pos + 2] << 8) ^ buffer[pos + 3]; 66 } 67 68 static AK_FORCE_INLINE uint32_t readUint24(const uint8_t *const buffer, const int pos) { 69 return (buffer[pos] << 16) ^ (buffer[pos + 1] << 8) ^ buffer[pos + 2]; 70 } 71 72 static AK_FORCE_INLINE uint16_t readUint16(const uint8_t *const buffer, const int pos) { 73 return (buffer[pos] << 8) ^ buffer[pos + 1]; 74 } 75 76 static AK_FORCE_INLINE uint8_t readUint8(const uint8_t *const buffer, const int pos) { 77 return buffer[pos]; 78 } 79 80 static AK_FORCE_INLINE uint32_t readUint32AndAdvancePosition( 81 const uint8_t *const buffer, int *const pos) { 82 const uint32_t value = readUint32(buffer, *pos); 83 *pos += 4; 84 return value; 85 } 86 87 static AK_FORCE_INLINE int readSint24AndAdvancePosition( 88 const uint8_t *const buffer, int *const pos) { 89 const uint8_t value = readUint8(buffer, *pos); 90 if (value < 0x80) { 91 return readUint24AndAdvancePosition(buffer, pos); 92 } else { 93 (*pos)++; 94 return -(((value & 0x7F) << 16) ^ readUint16AndAdvancePosition(buffer, pos)); 95 } 96 } 97 98 static AK_FORCE_INLINE uint32_t readUint24AndAdvancePosition( 99 const uint8_t *const buffer, int *const pos) { 100 const uint32_t value = readUint24(buffer, *pos); 101 *pos += 3; 102 return value; 103 } 104 105 static AK_FORCE_INLINE uint16_t readUint16AndAdvancePosition( 106 const uint8_t *const buffer, int *const pos) { 107 const uint16_t value = readUint16(buffer, *pos); 108 *pos += 2; 109 return value; 110 } 111 112 static AK_FORCE_INLINE uint8_t readUint8AndAdvancePosition( 113 const uint8_t *const buffer, int *const pos) { 114 return buffer[(*pos)++]; 115 } 116 117 /** 118 * Code Point Reading 119 * 120 * 1 byte = bbbbbbbb match 121 * case 000xxxxx: xxxxx << 16 + next byte << 8 + next byte 122 * else: if 00011111 (= 0x1F) : this is the terminator. This is a relevant choice because 123 * unicode code points range from 0 to 0x10FFFF, so any 3-byte value starting with 124 * 00011111 would be outside unicode. 125 * else: iso-latin-1 code 126 * This allows for the whole unicode range to be encoded, including chars outside of 127 * the BMP. Also everything in the iso-latin-1 charset is only 1 byte, except control 128 * characters which should never happen anyway (and still work, but take 3 bytes). 129 */ 130 static AK_FORCE_INLINE int readCodePoint(const uint8_t *const buffer, const int pos) { 131 int p = pos; 132 return readCodePointAndAdvancePosition(buffer, &p); 133 } 134 135 static AK_FORCE_INLINE int readCodePointAndAdvancePosition( 136 const uint8_t *const buffer, int *const pos) { 137 const uint8_t firstByte = readUint8(buffer, *pos); 138 if (firstByte < MINIMUM_ONE_BYTE_CHARACTER_VALUE) { 139 if (firstByte == CHARACTER_ARRAY_TERMINATOR) { 140 *pos += 1; 141 return NOT_A_CODE_POINT; 142 } else { 143 return readUint24AndAdvancePosition(buffer, pos); 144 } 145 } else { 146 *pos += 1; 147 return firstByte; 148 } 149 } 150 151 /** 152 * String (array of code points) Reading 153 * 154 * Reads code points until the terminator is found. 155 */ 156 // Returns the length of the string. 157 static int readStringAndAdvancePosition(const uint8_t *const buffer, 158 const int maxLength, int *const outBuffer, int *const pos) { 159 int length = 0; 160 int codePoint = readCodePointAndAdvancePosition(buffer, pos); 161 while (NOT_A_CODE_POINT != codePoint && length < maxLength) { 162 outBuffer[length++] = codePoint; 163 codePoint = readCodePointAndAdvancePosition(buffer, pos); 164 } 165 return length; 166 } 167 168 // Advances the position and returns the length of the string. 169 static int advancePositionToBehindString( 170 const uint8_t *const buffer, const int maxLength, int *const pos) { 171 int length = 0; 172 int codePoint = readCodePointAndAdvancePosition(buffer, pos); 173 while (NOT_A_CODE_POINT != codePoint && length < maxLength) { 174 codePoint = readCodePointAndAdvancePosition(buffer, pos); 175 length++; 176 } 177 return length; 178 } 179 180 /** 181 * String (array of code points) Writing 182 */ 183 static void writeCodePointsAndAdvancePosition(uint8_t *const buffer, 184 const int *const codePoints, const int codePointCount, const bool writesTerminator, 185 int *const pos) { 186 for (int i = 0; i < codePointCount; ++i) { 187 const int codePoint = codePoints[i]; 188 if (codePoint == NOT_A_CODE_POINT || codePoint == CHARACTER_ARRAY_TERMINATOR) { 189 break; 190 } else if (codePoint < MINIMUM_ONE_BYTE_CHARACTER_VALUE 191 || codePoint > MAXIMUM_ONE_BYTE_CHARACTER_VALUE) { 192 // three bytes character. 193 writeUint24AndAdvancePosition(buffer, codePoint, pos); 194 } else { 195 // one byte character. 196 writeUint8AndAdvancePosition(buffer, codePoint, pos); 197 } 198 } 199 if (writesTerminator) { 200 writeUint8AndAdvancePosition(buffer, CHARACTER_ARRAY_TERMINATOR, pos); 201 } 202 } 203 204 static int calculateRequiredByteCountToStoreCodePoints(const int *const codePoints, 205 const int codePointCount, const bool writesTerminator) { 206 int byteCount = 0; 207 for (int i = 0; i < codePointCount; ++i) { 208 const int codePoint = codePoints[i]; 209 if (codePoint == NOT_A_CODE_POINT || codePoint == CHARACTER_ARRAY_TERMINATOR) { 210 break; 211 } else if (codePoint < MINIMUM_ONE_BYTE_CHARACTER_VALUE 212 || codePoint > MAXIMUM_ONE_BYTE_CHARACTER_VALUE) { 213 // three bytes character. 214 byteCount += 3; 215 } else { 216 // one byte character. 217 byteCount += 1; 218 } 219 } 220 if (writesTerminator) { 221 // The terminator is one byte. 222 byteCount += 1; 223 } 224 return byteCount; 225 } 226 227 private: 228 DISALLOW_IMPLICIT_CONSTRUCTORS(ByteArrayUtils); 229 230 static const uint8_t MINIMUM_ONE_BYTE_CHARACTER_VALUE; 231 static const uint8_t MAXIMUM_ONE_BYTE_CHARACTER_VALUE; 232 static const uint8_t CHARACTER_ARRAY_TERMINATOR; 233 234 static AK_FORCE_INLINE void writeUint32AndAdvancePosition(uint8_t *const buffer, 235 const uint32_t data, int *const pos) { 236 buffer[(*pos)++] = (data >> 24) & 0xFF; 237 buffer[(*pos)++] = (data >> 16) & 0xFF; 238 buffer[(*pos)++] = (data >> 8) & 0xFF; 239 buffer[(*pos)++] = data & 0xFF; 240 } 241 242 static AK_FORCE_INLINE void writeUint24AndAdvancePosition(uint8_t *const buffer, 243 const uint32_t data, int *const pos) { 244 buffer[(*pos)++] = (data >> 16) & 0xFF; 245 buffer[(*pos)++] = (data >> 8) & 0xFF; 246 buffer[(*pos)++] = data & 0xFF; 247 } 248 249 static AK_FORCE_INLINE void writeUint16AndAdvancePosition(uint8_t *const buffer, 250 const uint16_t data, int *const pos) { 251 buffer[(*pos)++] = (data >> 8) & 0xFF; 252 buffer[(*pos)++] = data & 0xFF; 253 } 254 255 static AK_FORCE_INLINE void writeUint8AndAdvancePosition(uint8_t *const buffer, 256 const uint8_t data, int *const pos) { 257 buffer[(*pos)++] = data & 0xFF; 258 } 259 }; 260 } // namespace latinime 261 #endif /* LATINIME_BYTE_ARRAY_UTILS_H */ 262