1 // Copyright 2014 PDFium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 // Original code is licensed as follows: 7 /* 8 * Copyright 2006 Jeremias Maerki in part, and ZXing Authors in part 9 * 10 * Licensed under the Apache License, Version 2.0 (the "License"); 11 * you may not use this file except in compliance with the License. 12 * You may obtain a copy of the License at 13 * 14 * http://www.apache.org/licenses/LICENSE-2.0 15 * 16 * Unless required by applicable law or agreed to in writing, software 17 * distributed under the License is distributed on an "AS IS" BASIS, 18 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 19 * See the License for the specific language governing permissions and 20 * limitations under the License. 21 */ 22 23 #include "fxbarcode/pdf417/BC_PDF417HighLevelEncoder.h" 24 25 #include "fxbarcode/BC_UtilCodingConvert.h" 26 #include "fxbarcode/pdf417/BC_PDF417Compaction.h" 27 #include "fxbarcode/utils.h" 28 #include "third_party/bigint/BigIntegerLibrary.hh" 29 30 #define SUBMODE_ALPHA 0 31 #define SUBMODE_LOWER 1 32 #define SUBMODE_MIXED 2 33 34 int32_t CBC_PDF417HighLevelEncoder::TEXT_COMPACTION = 0; 35 int32_t CBC_PDF417HighLevelEncoder::BYTE_COMPACTION = 1; 36 int32_t CBC_PDF417HighLevelEncoder::NUMERIC_COMPACTION = 2; 37 int32_t CBC_PDF417HighLevelEncoder::SUBMODE_PUNCTUATION = 3; 38 int32_t CBC_PDF417HighLevelEncoder::LATCH_TO_TEXT = 900; 39 int32_t CBC_PDF417HighLevelEncoder::LATCH_TO_BYTE_PADDED = 901; 40 int32_t CBC_PDF417HighLevelEncoder::LATCH_TO_NUMERIC = 902; 41 int32_t CBC_PDF417HighLevelEncoder::SHIFT_TO_BYTE = 913; 42 int32_t CBC_PDF417HighLevelEncoder::LATCH_TO_BYTE = 924; 43 uint8_t CBC_PDF417HighLevelEncoder::TEXT_MIXED_RAW[] = { 44 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 38, 13, 9, 44, 58, 45 35, 45, 46, 36, 47, 43, 37, 42, 61, 94, 0, 32, 0, 0, 0}; 46 uint8_t CBC_PDF417HighLevelEncoder::TEXT_PUNCTUATION_RAW[] = { 47 59, 60, 62, 64, 91, 92, 93, 95, 96, 126, 33, 13, 9, 44, 58, 48 10, 45, 46, 36, 47, 34, 124, 42, 40, 41, 63, 123, 125, 39, 0}; 49 int32_t CBC_PDF417HighLevelEncoder::MIXED[128] = {0}; 50 int32_t CBC_PDF417HighLevelEncoder::PUNCTUATION[128] = {0}; 51 52 void CBC_PDF417HighLevelEncoder::Initialize() { 53 Inverse(); 54 } 55 56 void CBC_PDF417HighLevelEncoder::Finalize() {} 57 58 WideString CBC_PDF417HighLevelEncoder::encodeHighLevel(WideString wideMsg, 59 Compaction compaction, 60 int32_t& e) { 61 ByteString bytes; 62 CBC_UtilCodingConvert::UnicodeToUTF8(wideMsg, bytes); 63 WideString msg; 64 int32_t len = bytes.GetLength(); 65 for (int32_t i = 0; i < len; i++) { 66 wchar_t ch = (wchar_t)(bytes[i] & 0xff); 67 if (ch == '?' && bytes[i] != '?') { 68 e = BCExceptionCharactersOutsideISO88591Encoding; 69 return WideString(); 70 } 71 msg += ch; 72 } 73 std::vector<uint8_t> byteArr(bytes.begin(), bytes.end()); 74 WideString sb; 75 len = msg.GetLength(); 76 int32_t p = 0; 77 int32_t textSubMode = SUBMODE_ALPHA; 78 if (compaction == TEXT) { 79 encodeText(msg, p, len, sb, textSubMode); 80 } else if (compaction == BYTES) { 81 encodeBinary(&byteArr, p, byteArr.size(), BYTE_COMPACTION, sb); 82 } else if (compaction == NUMERIC) { 83 sb += (wchar_t)LATCH_TO_NUMERIC; 84 encodeNumeric(msg, p, len, sb); 85 } else { 86 int32_t encodingMode = LATCH_TO_TEXT; 87 while (p < len) { 88 int32_t n = determineConsecutiveDigitCount(msg, p); 89 if (n >= 13) { 90 sb += (wchar_t)LATCH_TO_NUMERIC; 91 encodingMode = NUMERIC_COMPACTION; 92 textSubMode = SUBMODE_ALPHA; 93 encodeNumeric(msg, p, n, sb); 94 p += n; 95 } else { 96 int32_t t = determineConsecutiveTextCount(msg, p); 97 if (t >= 5 || n == len) { 98 if (encodingMode != TEXT_COMPACTION) { 99 sb += (wchar_t)LATCH_TO_TEXT; 100 encodingMode = TEXT_COMPACTION; 101 textSubMode = SUBMODE_ALPHA; 102 } 103 textSubMode = encodeText(msg, p, t, sb, textSubMode); 104 p += t; 105 } else { 106 int32_t b = determineConsecutiveBinaryCount(msg, &byteArr, p, e); 107 if (e != BCExceptionNO) 108 return L" "; 109 if (b == 0) { 110 b = 1; 111 } 112 if (b == 1 && encodingMode == TEXT_COMPACTION) { 113 encodeBinary(&byteArr, p, 1, TEXT_COMPACTION, sb); 114 } else { 115 encodeBinary(&byteArr, p, b, encodingMode, sb); 116 encodingMode = BYTE_COMPACTION; 117 textSubMode = SUBMODE_ALPHA; 118 } 119 p += b; 120 } 121 } 122 } 123 } 124 return sb; 125 } 126 127 void CBC_PDF417HighLevelEncoder::Inverse() { 128 for (size_t l = 0; l < FX_ArraySize(MIXED); ++l) 129 MIXED[l] = -1; 130 131 for (uint8_t i = 0; i < FX_ArraySize(TEXT_MIXED_RAW); ++i) { 132 uint8_t b = TEXT_MIXED_RAW[i]; 133 if (b != 0) 134 MIXED[b] = i; 135 } 136 137 for (size_t l = 0; l < FX_ArraySize(PUNCTUATION); ++l) 138 PUNCTUATION[l] = -1; 139 140 for (uint8_t i = 0; i < FX_ArraySize(TEXT_PUNCTUATION_RAW); ++i) { 141 uint8_t b = TEXT_PUNCTUATION_RAW[i]; 142 if (b != 0) 143 PUNCTUATION[b] = i; 144 } 145 } 146 147 int32_t CBC_PDF417HighLevelEncoder::encodeText(WideString msg, 148 int32_t startpos, 149 int32_t count, 150 WideString& sb, 151 int32_t initialSubmode) { 152 WideString tmp; 153 int32_t submode = initialSubmode; 154 int32_t idx = 0; 155 while (true) { 156 wchar_t ch = msg[startpos + idx]; 157 switch (submode) { 158 case SUBMODE_ALPHA: 159 if (isAlphaUpper(ch)) { 160 if (ch == ' ') 161 tmp += (wchar_t)26; 162 else 163 tmp += (wchar_t)(ch - 65); 164 break; 165 } 166 if (isAlphaLower(ch)) { 167 submode = SUBMODE_LOWER; 168 tmp += (wchar_t)27; 169 continue; 170 } 171 if (isMixed(ch)) { 172 submode = SUBMODE_MIXED; 173 tmp += (wchar_t)28; 174 continue; 175 } 176 tmp += (wchar_t)29; 177 tmp += PUNCTUATION[ch]; 178 break; 179 case SUBMODE_LOWER: 180 if (isAlphaLower(ch)) { 181 if (ch == ' ') { 182 tmp += (wchar_t)26; 183 } else { 184 tmp += (wchar_t)(ch - 97); 185 } 186 break; 187 } 188 if (isAlphaUpper(ch)) { 189 tmp += (wchar_t)27; 190 tmp += (wchar_t)(ch - 65); 191 break; 192 } 193 if (isMixed(ch)) { 194 submode = SUBMODE_MIXED; 195 tmp += (wchar_t)28; 196 continue; 197 } 198 199 tmp += (wchar_t)29; 200 tmp += PUNCTUATION[ch]; 201 break; 202 case SUBMODE_MIXED: 203 if (isMixed(ch)) { 204 tmp += MIXED[ch]; 205 break; 206 } 207 if (isAlphaUpper(ch)) { 208 submode = SUBMODE_ALPHA; 209 tmp += (wchar_t)28; 210 continue; 211 } 212 if (isAlphaLower(ch)) { 213 submode = SUBMODE_LOWER; 214 tmp += (wchar_t)27; 215 continue; 216 } 217 if (startpos + idx + 1 < count) { 218 wchar_t next = msg[startpos + idx + 1]; 219 if (isPunctuation(next)) { 220 submode = SUBMODE_PUNCTUATION; 221 tmp += (wchar_t)25; 222 continue; 223 } 224 } 225 tmp += (wchar_t)29; 226 tmp += PUNCTUATION[ch]; 227 break; 228 default: 229 if (isPunctuation(ch)) { 230 tmp += PUNCTUATION[ch]; 231 break; 232 } 233 submode = SUBMODE_ALPHA; 234 tmp += (wchar_t)29; 235 continue; 236 } 237 idx++; 238 if (idx >= count) { 239 break; 240 } 241 } 242 wchar_t h = 0; 243 int32_t len = tmp.GetLength(); 244 for (int32_t i = 0; i < len; i++) { 245 bool odd = (i % 2) != 0; 246 if (odd) { 247 h = (wchar_t)((h * 30) + tmp[i]); 248 sb += h; 249 } else { 250 h = tmp[i]; 251 } 252 } 253 if ((len % 2) != 0) { 254 sb += (wchar_t)((h * 30) + 29); 255 } 256 return submode; 257 } 258 void CBC_PDF417HighLevelEncoder::encodeBinary(std::vector<uint8_t>* bytes, 259 int32_t startpos, 260 int32_t count, 261 int32_t startmode, 262 WideString& sb) { 263 if (count == 1 && startmode == TEXT_COMPACTION) { 264 sb += (wchar_t)SHIFT_TO_BYTE; 265 } 266 int32_t idx = startpos; 267 int32_t i = 0; 268 if (count >= 6) { 269 sb += (wchar_t)LATCH_TO_BYTE; 270 wchar_t chars[5]; 271 while ((startpos + count - idx) >= 6) { 272 int64_t t = 0; 273 for (i = 0; i < 6; i++) { 274 t <<= 8; 275 t += (*bytes)[idx + i] & 0xff; 276 } 277 for (i = 0; i < 5; i++) { 278 chars[i] = (wchar_t)(t % 900); 279 t /= 900; 280 } 281 for (i = 4; i >= 0; i--) { 282 sb += (chars[i]); 283 } 284 idx += 6; 285 } 286 } 287 if (idx < startpos + count) { 288 sb += (wchar_t)LATCH_TO_BYTE_PADDED; 289 } 290 for (i = idx; i < startpos + count; i++) { 291 int32_t ch = (*bytes)[i] & 0xff; 292 sb += (wchar_t)ch; 293 } 294 } 295 void CBC_PDF417HighLevelEncoder::encodeNumeric(WideString msg, 296 int32_t startpos, 297 int32_t count, 298 WideString& sb) { 299 int32_t idx = 0; 300 BigInteger num900 = 900; 301 while (idx < count) { 302 WideString tmp; 303 int32_t len = 44 < count - idx ? 44 : count - idx; 304 ByteString part = 305 ((wchar_t)'1' + msg.Mid(startpos + idx, len)).UTF8Encode(); 306 BigInteger bigint = stringToBigInteger(part.c_str()); 307 do { 308 int32_t c = (bigint % num900).toInt(); 309 tmp += (wchar_t)(c); 310 bigint = bigint / num900; 311 } while (!bigint.isZero()); 312 for (int32_t i = tmp.GetLength() - 1; i >= 0; i--) { 313 sb += tmp[i]; 314 } 315 idx += len; 316 } 317 } 318 bool CBC_PDF417HighLevelEncoder::isDigit(wchar_t ch) { 319 return ch >= '0' && ch <= '9'; 320 } 321 bool CBC_PDF417HighLevelEncoder::isAlphaUpper(wchar_t ch) { 322 return ch == ' ' || (ch >= 'A' && ch <= 'Z'); 323 } 324 bool CBC_PDF417HighLevelEncoder::isAlphaLower(wchar_t ch) { 325 return ch == ' ' || (ch >= 'a' && ch <= 'z'); 326 } 327 bool CBC_PDF417HighLevelEncoder::isMixed(wchar_t ch) { 328 return MIXED[ch] != -1; 329 } 330 bool CBC_PDF417HighLevelEncoder::isPunctuation(wchar_t ch) { 331 return PUNCTUATION[ch] != -1; 332 } 333 bool CBC_PDF417HighLevelEncoder::isText(wchar_t ch) { 334 return ch == '\t' || ch == '\n' || ch == '\r' || (ch >= 32 && ch <= 126); 335 } 336 int32_t CBC_PDF417HighLevelEncoder::determineConsecutiveDigitCount( 337 WideString msg, 338 int32_t startpos) { 339 int32_t count = 0; 340 int32_t len = msg.GetLength(); 341 int32_t idx = startpos; 342 if (idx < len) { 343 wchar_t ch = msg[idx]; 344 while (isDigit(ch) && idx < len) { 345 count++; 346 idx++; 347 if (idx < len) { 348 ch = msg[idx]; 349 } 350 } 351 } 352 return count; 353 } 354 int32_t CBC_PDF417HighLevelEncoder::determineConsecutiveTextCount( 355 WideString msg, 356 int32_t startpos) { 357 int32_t len = msg.GetLength(); 358 int32_t idx = startpos; 359 while (idx < len) { 360 wchar_t ch = msg[idx]; 361 int32_t numericCount = 0; 362 while (numericCount < 13 && isDigit(ch) && idx < len) { 363 numericCount++; 364 idx++; 365 if (idx < len) { 366 ch = msg[idx]; 367 } 368 } 369 if (numericCount >= 13) { 370 return idx - startpos - numericCount; 371 } 372 if (numericCount > 0) { 373 continue; 374 } 375 ch = msg[idx]; 376 if (!isText(ch)) { 377 break; 378 } 379 idx++; 380 } 381 return idx - startpos; 382 } 383 int32_t CBC_PDF417HighLevelEncoder::determineConsecutiveBinaryCount( 384 WideString msg, 385 std::vector<uint8_t>* bytes, 386 int32_t startpos, 387 int32_t& e) { 388 int32_t len = msg.GetLength(); 389 int32_t idx = startpos; 390 while (idx < len) { 391 wchar_t ch = msg[idx]; 392 int32_t numericCount = 0; 393 while (numericCount < 13 && isDigit(ch)) { 394 numericCount++; 395 int32_t i = idx + numericCount; 396 if (i >= len) { 397 break; 398 } 399 ch = msg[i]; 400 } 401 if (numericCount >= 13) { 402 return idx - startpos; 403 } 404 int32_t textCount = 0; 405 while (textCount < 5 && isText(ch)) { 406 textCount++; 407 int32_t i = idx + textCount; 408 if (i >= len) { 409 break; 410 } 411 ch = msg[i]; 412 } 413 if (textCount >= 5) { 414 return idx - startpos; 415 } 416 ch = msg[idx]; 417 if ((*bytes)[idx] == 63 && ch != '?') { 418 e = BCExceptionNonEncodableCharacterDetected; 419 return -1; 420 } 421 idx++; 422 } 423 return idx - startpos; 424 } 425