Home | History | Annotate | Download | only in pdf417
      1 // Copyright 2014 PDFium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
      6 // Original code is licensed as follows:
      7 /*
      8  * Copyright 2006 Jeremias Maerki in part, and ZXing Authors in part
      9  *
     10  * Licensed under the Apache License, Version 2.0 (the "License");
     11  * you may not use this file except in compliance with the License.
     12  * You may obtain a copy of the License at
     13  *
     14  * http://www.apache.org/licenses/LICENSE-2.0
     15  *
     16  * Unless required by applicable law or agreed to in writing, software
     17  * distributed under the License is distributed on an "AS IS" BASIS,
     18  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     19  * See the License for the specific language governing permissions and
     20  * limitations under the License.
     21  */
     22 
     23 #include "fxbarcode/pdf417/BC_PDF417HighLevelEncoder.h"
     24 
     25 #include "fxbarcode/BC_UtilCodingConvert.h"
     26 #include "fxbarcode/pdf417/BC_PDF417Compaction.h"
     27 #include "fxbarcode/utils.h"
     28 #include "third_party/bigint/BigIntegerLibrary.hh"
     29 
     30 #define SUBMODE_ALPHA 0
     31 #define SUBMODE_LOWER 1
     32 #define SUBMODE_MIXED 2
     33 
     34 int32_t CBC_PDF417HighLevelEncoder::TEXT_COMPACTION = 0;
     35 int32_t CBC_PDF417HighLevelEncoder::BYTE_COMPACTION = 1;
     36 int32_t CBC_PDF417HighLevelEncoder::NUMERIC_COMPACTION = 2;
     37 int32_t CBC_PDF417HighLevelEncoder::SUBMODE_PUNCTUATION = 3;
     38 int32_t CBC_PDF417HighLevelEncoder::LATCH_TO_TEXT = 900;
     39 int32_t CBC_PDF417HighLevelEncoder::LATCH_TO_BYTE_PADDED = 901;
     40 int32_t CBC_PDF417HighLevelEncoder::LATCH_TO_NUMERIC = 902;
     41 int32_t CBC_PDF417HighLevelEncoder::SHIFT_TO_BYTE = 913;
     42 int32_t CBC_PDF417HighLevelEncoder::LATCH_TO_BYTE = 924;
     43 uint8_t CBC_PDF417HighLevelEncoder::TEXT_MIXED_RAW[] = {
     44     48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 38, 13, 9, 44, 58,
     45     35, 45, 46, 36, 47, 43, 37, 42, 61, 94, 0,  32, 0, 0,  0};
     46 uint8_t CBC_PDF417HighLevelEncoder::TEXT_PUNCTUATION_RAW[] = {
     47     59, 60, 62, 64, 91, 92, 93,  95, 96, 126, 33, 13,  9,   44, 58,
     48     10, 45, 46, 36, 47, 34, 124, 42, 40, 41,  63, 123, 125, 39, 0};
     49 int32_t CBC_PDF417HighLevelEncoder::MIXED[128] = {0};
     50 int32_t CBC_PDF417HighLevelEncoder::PUNCTUATION[128] = {0};
     51 
     52 void CBC_PDF417HighLevelEncoder::Initialize() {
     53   Inverse();
     54 }
     55 
     56 void CBC_PDF417HighLevelEncoder::Finalize() {}
     57 
     58 WideString CBC_PDF417HighLevelEncoder::encodeHighLevel(WideString wideMsg,
     59                                                        Compaction compaction,
     60                                                        int32_t& e) {
     61   ByteString bytes;
     62   CBC_UtilCodingConvert::UnicodeToUTF8(wideMsg, bytes);
     63   WideString msg;
     64   int32_t len = bytes.GetLength();
     65   for (int32_t i = 0; i < len; i++) {
     66     wchar_t ch = (wchar_t)(bytes[i] & 0xff);
     67     if (ch == '?' && bytes[i] != '?') {
     68       e = BCExceptionCharactersOutsideISO88591Encoding;
     69       return WideString();
     70     }
     71     msg += ch;
     72   }
     73   std::vector<uint8_t> byteArr(bytes.begin(), bytes.end());
     74   WideString sb;
     75   len = msg.GetLength();
     76   int32_t p = 0;
     77   int32_t textSubMode = SUBMODE_ALPHA;
     78   if (compaction == TEXT) {
     79     encodeText(msg, p, len, sb, textSubMode);
     80   } else if (compaction == BYTES) {
     81     encodeBinary(&byteArr, p, byteArr.size(), BYTE_COMPACTION, sb);
     82   } else if (compaction == NUMERIC) {
     83     sb += (wchar_t)LATCH_TO_NUMERIC;
     84     encodeNumeric(msg, p, len, sb);
     85   } else {
     86     int32_t encodingMode = LATCH_TO_TEXT;
     87     while (p < len) {
     88       int32_t n = determineConsecutiveDigitCount(msg, p);
     89       if (n >= 13) {
     90         sb += (wchar_t)LATCH_TO_NUMERIC;
     91         encodingMode = NUMERIC_COMPACTION;
     92         textSubMode = SUBMODE_ALPHA;
     93         encodeNumeric(msg, p, n, sb);
     94         p += n;
     95       } else {
     96         int32_t t = determineConsecutiveTextCount(msg, p);
     97         if (t >= 5 || n == len) {
     98           if (encodingMode != TEXT_COMPACTION) {
     99             sb += (wchar_t)LATCH_TO_TEXT;
    100             encodingMode = TEXT_COMPACTION;
    101             textSubMode = SUBMODE_ALPHA;
    102           }
    103           textSubMode = encodeText(msg, p, t, sb, textSubMode);
    104           p += t;
    105         } else {
    106           int32_t b = determineConsecutiveBinaryCount(msg, &byteArr, p, e);
    107           if (e != BCExceptionNO)
    108             return L" ";
    109           if (b == 0) {
    110             b = 1;
    111           }
    112           if (b == 1 && encodingMode == TEXT_COMPACTION) {
    113             encodeBinary(&byteArr, p, 1, TEXT_COMPACTION, sb);
    114           } else {
    115             encodeBinary(&byteArr, p, b, encodingMode, sb);
    116             encodingMode = BYTE_COMPACTION;
    117             textSubMode = SUBMODE_ALPHA;
    118           }
    119           p += b;
    120         }
    121       }
    122     }
    123   }
    124   return sb;
    125 }
    126 
    127 void CBC_PDF417HighLevelEncoder::Inverse() {
    128   for (size_t l = 0; l < FX_ArraySize(MIXED); ++l)
    129     MIXED[l] = -1;
    130 
    131   for (uint8_t i = 0; i < FX_ArraySize(TEXT_MIXED_RAW); ++i) {
    132     uint8_t b = TEXT_MIXED_RAW[i];
    133     if (b != 0)
    134       MIXED[b] = i;
    135   }
    136 
    137   for (size_t l = 0; l < FX_ArraySize(PUNCTUATION); ++l)
    138     PUNCTUATION[l] = -1;
    139 
    140   for (uint8_t i = 0; i < FX_ArraySize(TEXT_PUNCTUATION_RAW); ++i) {
    141     uint8_t b = TEXT_PUNCTUATION_RAW[i];
    142     if (b != 0)
    143       PUNCTUATION[b] = i;
    144   }
    145 }
    146 
    147 int32_t CBC_PDF417HighLevelEncoder::encodeText(WideString msg,
    148                                                int32_t startpos,
    149                                                int32_t count,
    150                                                WideString& sb,
    151                                                int32_t initialSubmode) {
    152   WideString tmp;
    153   int32_t submode = initialSubmode;
    154   int32_t idx = 0;
    155   while (true) {
    156     wchar_t ch = msg[startpos + idx];
    157     switch (submode) {
    158       case SUBMODE_ALPHA:
    159         if (isAlphaUpper(ch)) {
    160           if (ch == ' ')
    161             tmp += (wchar_t)26;
    162           else
    163             tmp += (wchar_t)(ch - 65);
    164           break;
    165         }
    166         if (isAlphaLower(ch)) {
    167           submode = SUBMODE_LOWER;
    168           tmp += (wchar_t)27;
    169           continue;
    170         }
    171         if (isMixed(ch)) {
    172           submode = SUBMODE_MIXED;
    173           tmp += (wchar_t)28;
    174           continue;
    175         }
    176         tmp += (wchar_t)29;
    177         tmp += PUNCTUATION[ch];
    178         break;
    179       case SUBMODE_LOWER:
    180         if (isAlphaLower(ch)) {
    181           if (ch == ' ') {
    182             tmp += (wchar_t)26;
    183           } else {
    184             tmp += (wchar_t)(ch - 97);
    185           }
    186           break;
    187         }
    188         if (isAlphaUpper(ch)) {
    189           tmp += (wchar_t)27;
    190           tmp += (wchar_t)(ch - 65);
    191           break;
    192         }
    193         if (isMixed(ch)) {
    194           submode = SUBMODE_MIXED;
    195           tmp += (wchar_t)28;
    196           continue;
    197         }
    198 
    199         tmp += (wchar_t)29;
    200         tmp += PUNCTUATION[ch];
    201         break;
    202       case SUBMODE_MIXED:
    203         if (isMixed(ch)) {
    204           tmp += MIXED[ch];
    205           break;
    206         }
    207         if (isAlphaUpper(ch)) {
    208           submode = SUBMODE_ALPHA;
    209           tmp += (wchar_t)28;
    210           continue;
    211         }
    212         if (isAlphaLower(ch)) {
    213           submode = SUBMODE_LOWER;
    214           tmp += (wchar_t)27;
    215           continue;
    216         }
    217         if (startpos + idx + 1 < count) {
    218           wchar_t next = msg[startpos + idx + 1];
    219           if (isPunctuation(next)) {
    220             submode = SUBMODE_PUNCTUATION;
    221             tmp += (wchar_t)25;
    222             continue;
    223           }
    224         }
    225         tmp += (wchar_t)29;
    226         tmp += PUNCTUATION[ch];
    227         break;
    228       default:
    229         if (isPunctuation(ch)) {
    230           tmp += PUNCTUATION[ch];
    231           break;
    232         }
    233         submode = SUBMODE_ALPHA;
    234         tmp += (wchar_t)29;
    235         continue;
    236     }
    237     idx++;
    238     if (idx >= count) {
    239       break;
    240     }
    241   }
    242   wchar_t h = 0;
    243   int32_t len = tmp.GetLength();
    244   for (int32_t i = 0; i < len; i++) {
    245     bool odd = (i % 2) != 0;
    246     if (odd) {
    247       h = (wchar_t)((h * 30) + tmp[i]);
    248       sb += h;
    249     } else {
    250       h = tmp[i];
    251     }
    252   }
    253   if ((len % 2) != 0) {
    254     sb += (wchar_t)((h * 30) + 29);
    255   }
    256   return submode;
    257 }
    258 void CBC_PDF417HighLevelEncoder::encodeBinary(std::vector<uint8_t>* bytes,
    259                                               int32_t startpos,
    260                                               int32_t count,
    261                                               int32_t startmode,
    262                                               WideString& sb) {
    263   if (count == 1 && startmode == TEXT_COMPACTION) {
    264     sb += (wchar_t)SHIFT_TO_BYTE;
    265   }
    266   int32_t idx = startpos;
    267   int32_t i = 0;
    268   if (count >= 6) {
    269     sb += (wchar_t)LATCH_TO_BYTE;
    270     wchar_t chars[5];
    271     while ((startpos + count - idx) >= 6) {
    272       int64_t t = 0;
    273       for (i = 0; i < 6; i++) {
    274         t <<= 8;
    275         t += (*bytes)[idx + i] & 0xff;
    276       }
    277       for (i = 0; i < 5; i++) {
    278         chars[i] = (wchar_t)(t % 900);
    279         t /= 900;
    280       }
    281       for (i = 4; i >= 0; i--) {
    282         sb += (chars[i]);
    283       }
    284       idx += 6;
    285     }
    286   }
    287   if (idx < startpos + count) {
    288     sb += (wchar_t)LATCH_TO_BYTE_PADDED;
    289   }
    290   for (i = idx; i < startpos + count; i++) {
    291     int32_t ch = (*bytes)[i] & 0xff;
    292     sb += (wchar_t)ch;
    293   }
    294 }
    295 void CBC_PDF417HighLevelEncoder::encodeNumeric(WideString msg,
    296                                                int32_t startpos,
    297                                                int32_t count,
    298                                                WideString& sb) {
    299   int32_t idx = 0;
    300   BigInteger num900 = 900;
    301   while (idx < count) {
    302     WideString tmp;
    303     int32_t len = 44 < count - idx ? 44 : count - idx;
    304     ByteString part =
    305         ((wchar_t)'1' + msg.Mid(startpos + idx, len)).UTF8Encode();
    306     BigInteger bigint = stringToBigInteger(part.c_str());
    307     do {
    308       int32_t c = (bigint % num900).toInt();
    309       tmp += (wchar_t)(c);
    310       bigint = bigint / num900;
    311     } while (!bigint.isZero());
    312     for (int32_t i = tmp.GetLength() - 1; i >= 0; i--) {
    313       sb += tmp[i];
    314     }
    315     idx += len;
    316   }
    317 }
    318 bool CBC_PDF417HighLevelEncoder::isDigit(wchar_t ch) {
    319   return ch >= '0' && ch <= '9';
    320 }
    321 bool CBC_PDF417HighLevelEncoder::isAlphaUpper(wchar_t ch) {
    322   return ch == ' ' || (ch >= 'A' && ch <= 'Z');
    323 }
    324 bool CBC_PDF417HighLevelEncoder::isAlphaLower(wchar_t ch) {
    325   return ch == ' ' || (ch >= 'a' && ch <= 'z');
    326 }
    327 bool CBC_PDF417HighLevelEncoder::isMixed(wchar_t ch) {
    328   return MIXED[ch] != -1;
    329 }
    330 bool CBC_PDF417HighLevelEncoder::isPunctuation(wchar_t ch) {
    331   return PUNCTUATION[ch] != -1;
    332 }
    333 bool CBC_PDF417HighLevelEncoder::isText(wchar_t ch) {
    334   return ch == '\t' || ch == '\n' || ch == '\r' || (ch >= 32 && ch <= 126);
    335 }
    336 int32_t CBC_PDF417HighLevelEncoder::determineConsecutiveDigitCount(
    337     WideString msg,
    338     int32_t startpos) {
    339   int32_t count = 0;
    340   int32_t len = msg.GetLength();
    341   int32_t idx = startpos;
    342   if (idx < len) {
    343     wchar_t ch = msg[idx];
    344     while (isDigit(ch) && idx < len) {
    345       count++;
    346       idx++;
    347       if (idx < len) {
    348         ch = msg[idx];
    349       }
    350     }
    351   }
    352   return count;
    353 }
    354 int32_t CBC_PDF417HighLevelEncoder::determineConsecutiveTextCount(
    355     WideString msg,
    356     int32_t startpos) {
    357   int32_t len = msg.GetLength();
    358   int32_t idx = startpos;
    359   while (idx < len) {
    360     wchar_t ch = msg[idx];
    361     int32_t numericCount = 0;
    362     while (numericCount < 13 && isDigit(ch) && idx < len) {
    363       numericCount++;
    364       idx++;
    365       if (idx < len) {
    366         ch = msg[idx];
    367       }
    368     }
    369     if (numericCount >= 13) {
    370       return idx - startpos - numericCount;
    371     }
    372     if (numericCount > 0) {
    373       continue;
    374     }
    375     ch = msg[idx];
    376     if (!isText(ch)) {
    377       break;
    378     }
    379     idx++;
    380   }
    381   return idx - startpos;
    382 }
    383 int32_t CBC_PDF417HighLevelEncoder::determineConsecutiveBinaryCount(
    384     WideString msg,
    385     std::vector<uint8_t>* bytes,
    386     int32_t startpos,
    387     int32_t& e) {
    388   int32_t len = msg.GetLength();
    389   int32_t idx = startpos;
    390   while (idx < len) {
    391     wchar_t ch = msg[idx];
    392     int32_t numericCount = 0;
    393     while (numericCount < 13 && isDigit(ch)) {
    394       numericCount++;
    395       int32_t i = idx + numericCount;
    396       if (i >= len) {
    397         break;
    398       }
    399       ch = msg[i];
    400     }
    401     if (numericCount >= 13) {
    402       return idx - startpos;
    403     }
    404     int32_t textCount = 0;
    405     while (textCount < 5 && isText(ch)) {
    406       textCount++;
    407       int32_t i = idx + textCount;
    408       if (i >= len) {
    409         break;
    410       }
    411       ch = msg[i];
    412     }
    413     if (textCount >= 5) {
    414       return idx - startpos;
    415     }
    416     ch = msg[idx];
    417     if ((*bytes)[idx] == 63 && ch != '?') {
    418       e = BCExceptionNonEncodableCharacterDetected;
    419       return -1;
    420     }
    421     idx++;
    422   }
    423   return idx - startpos;
    424 }
    425