Home | History | Annotate | Download | only in pdf417
      1 // Copyright 2014 PDFium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
      6 // Original code is licensed as follows:
      7 /*
      8  * Copyright 2006 Jeremias Maerki in part, and ZXing Authors in part
      9  *
     10  * Licensed under the Apache License, Version 2.0 (the "License");
     11  * you may not use this file except in compliance with the License.
     12  * You may obtain a copy of the License at
     13  *
     14  * http://www.apache.org/licenses/LICENSE-2.0
     15  *
     16  * Unless required by applicable law or agreed to in writing, software
     17  * distributed under the License is distributed on an "AS IS" BASIS,
     18  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     19  * See the License for the specific language governing permissions and
     20  * limitations under the License.
     21  */
     22 
     23 #include "xfa/fxbarcode/pdf417/BC_PDF417HighLevelEncoder.h"
     24 
     25 #include "third_party/bigint/BigIntegerLibrary.hh"
     26 #include "xfa/fxbarcode/BC_UtilCodingConvert.h"
     27 #include "xfa/fxbarcode/pdf417/BC_PDF417Compaction.h"
     28 #include "xfa/fxbarcode/utils.h"
     29 
     30 #define SUBMODE_ALPHA 0
     31 #define SUBMODE_LOWER 1
     32 #define SUBMODE_MIXED 2
     33 
     34 int32_t CBC_PDF417HighLevelEncoder::TEXT_COMPACTION = 0;
     35 int32_t CBC_PDF417HighLevelEncoder::BYTE_COMPACTION = 1;
     36 int32_t CBC_PDF417HighLevelEncoder::NUMERIC_COMPACTION = 2;
     37 int32_t CBC_PDF417HighLevelEncoder::SUBMODE_PUNCTUATION = 3;
     38 int32_t CBC_PDF417HighLevelEncoder::LATCH_TO_TEXT = 900;
     39 int32_t CBC_PDF417HighLevelEncoder::LATCH_TO_BYTE_PADDED = 901;
     40 int32_t CBC_PDF417HighLevelEncoder::LATCH_TO_NUMERIC = 902;
     41 int32_t CBC_PDF417HighLevelEncoder::SHIFT_TO_BYTE = 913;
     42 int32_t CBC_PDF417HighLevelEncoder::LATCH_TO_BYTE = 924;
     43 uint8_t CBC_PDF417HighLevelEncoder::TEXT_MIXED_RAW[] = {
     44     48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 38, 13, 9, 44, 58,
     45     35, 45, 46, 36, 47, 43, 37, 42, 61, 94, 0,  32, 0, 0,  0};
     46 uint8_t CBC_PDF417HighLevelEncoder::TEXT_PUNCTUATION_RAW[] = {
     47     59, 60, 62, 64, 91, 92, 93,  95, 96, 126, 33, 13,  9,   44, 58,
     48     10, 45, 46, 36, 47, 34, 124, 42, 40, 41,  63, 123, 125, 39, 0};
     49 int32_t CBC_PDF417HighLevelEncoder::MIXED[128] = {0};
     50 int32_t CBC_PDF417HighLevelEncoder::PUNCTUATION[128] = {0};
     51 
     52 void CBC_PDF417HighLevelEncoder::Initialize() {
     53   Inverse();
     54 }
     55 
     56 void CBC_PDF417HighLevelEncoder::Finalize() {}
     57 
     58 CFX_WideString CBC_PDF417HighLevelEncoder::encodeHighLevel(
     59     CFX_WideString wideMsg,
     60     Compaction compaction,
     61     int32_t& e) {
     62   CFX_ByteString bytes;
     63   CBC_UtilCodingConvert::UnicodeToUTF8(wideMsg, bytes);
     64   CFX_WideString msg;
     65   int32_t len = bytes.GetLength();
     66   for (int32_t i = 0; i < len; i++) {
     67     FX_WCHAR ch = (FX_WCHAR)(bytes.GetAt(i) & 0xff);
     68     if (ch == '?' && bytes.GetAt(i) != '?') {
     69       e = BCExceptionCharactersOutsideISO88591Encoding;
     70       return CFX_WideString();
     71     }
     72     msg += ch;
     73   }
     74   CFX_ArrayTemplate<uint8_t> byteArr;
     75   for (int32_t k = 0; k < bytes.GetLength(); k++) {
     76     byteArr.Add(bytes.GetAt(k));
     77   }
     78   CFX_WideString sb;
     79   len = msg.GetLength();
     80   int32_t p = 0;
     81   int32_t textSubMode = SUBMODE_ALPHA;
     82   if (compaction == TEXT) {
     83     encodeText(msg, p, len, sb, textSubMode);
     84   } else if (compaction == BYTES) {
     85     encodeBinary(&byteArr, p, byteArr.GetSize(), BYTE_COMPACTION, sb);
     86   } else if (compaction == NUMERIC) {
     87     sb += (FX_WCHAR)LATCH_TO_NUMERIC;
     88     encodeNumeric(msg, p, len, sb);
     89   } else {
     90     int32_t encodingMode = LATCH_TO_TEXT;
     91     while (p < len) {
     92       int32_t n = determineConsecutiveDigitCount(msg, p);
     93       if (n >= 13) {
     94         sb += (FX_WCHAR)LATCH_TO_NUMERIC;
     95         encodingMode = NUMERIC_COMPACTION;
     96         textSubMode = SUBMODE_ALPHA;
     97         encodeNumeric(msg, p, n, sb);
     98         p += n;
     99       } else {
    100         int32_t t = determineConsecutiveTextCount(msg, p);
    101         if (t >= 5 || n == len) {
    102           if (encodingMode != TEXT_COMPACTION) {
    103             sb += (FX_WCHAR)LATCH_TO_TEXT;
    104             encodingMode = TEXT_COMPACTION;
    105             textSubMode = SUBMODE_ALPHA;
    106           }
    107           textSubMode = encodeText(msg, p, t, sb, textSubMode);
    108           p += t;
    109         } else {
    110           int32_t b = determineConsecutiveBinaryCount(msg, &byteArr, p, e);
    111           if (e != BCExceptionNO)
    112             return L" ";
    113           if (b == 0) {
    114             b = 1;
    115           }
    116           if (b == 1 && encodingMode == TEXT_COMPACTION) {
    117             encodeBinary(&byteArr, p, 1, TEXT_COMPACTION, sb);
    118           } else {
    119             encodeBinary(&byteArr, p, b, encodingMode, sb);
    120             encodingMode = BYTE_COMPACTION;
    121             textSubMode = SUBMODE_ALPHA;
    122           }
    123           p += b;
    124         }
    125       }
    126     }
    127   }
    128   return sb;
    129 }
    130 
    131 void CBC_PDF417HighLevelEncoder::Inverse() {
    132   for (size_t l = 0; l < FX_ArraySize(MIXED); ++l)
    133     MIXED[l] = -1;
    134 
    135   for (uint8_t i = 0; i < FX_ArraySize(TEXT_MIXED_RAW); ++i) {
    136     uint8_t b = TEXT_MIXED_RAW[i];
    137     if (b != 0)
    138       MIXED[b] = i;
    139   }
    140 
    141   for (size_t l = 0; l < FX_ArraySize(PUNCTUATION); ++l)
    142     PUNCTUATION[l] = -1;
    143 
    144   for (uint8_t i = 0; i < FX_ArraySize(TEXT_PUNCTUATION_RAW); ++i) {
    145     uint8_t b = TEXT_PUNCTUATION_RAW[i];
    146     if (b != 0)
    147       PUNCTUATION[b] = i;
    148   }
    149 }
    150 
    151 int32_t CBC_PDF417HighLevelEncoder::encodeText(CFX_WideString msg,
    152                                                int32_t startpos,
    153                                                int32_t count,
    154                                                CFX_WideString& sb,
    155                                                int32_t initialSubmode) {
    156   CFX_WideString tmp;
    157   int32_t submode = initialSubmode;
    158   int32_t idx = 0;
    159   while (true) {
    160     FX_WCHAR ch = msg.GetAt(startpos + idx);
    161     switch (submode) {
    162       case SUBMODE_ALPHA:
    163         if (isAlphaUpper(ch)) {
    164           if (ch == ' ') {
    165             tmp += (FX_WCHAR)26;
    166           } else {
    167             tmp += (FX_WCHAR)(ch - 65);
    168           }
    169         } else {
    170           if (isAlphaLower(ch)) {
    171             submode = SUBMODE_LOWER;
    172             tmp += (FX_WCHAR)27;
    173             continue;
    174           } else if (isMixed(ch)) {
    175             submode = SUBMODE_MIXED;
    176             tmp += (FX_WCHAR)28;
    177             continue;
    178           } else {
    179             tmp += (FX_WCHAR)29;
    180             tmp += PUNCTUATION[ch];
    181             break;
    182           }
    183         }
    184         break;
    185       case SUBMODE_LOWER:
    186         if (isAlphaLower(ch)) {
    187           if (ch == ' ') {
    188             tmp += (FX_WCHAR)26;
    189           } else {
    190             tmp += (FX_WCHAR)(ch - 97);
    191           }
    192         } else {
    193           if (isAlphaUpper(ch)) {
    194             tmp += (FX_WCHAR)27;
    195             tmp += (FX_WCHAR)(ch - 65);
    196             break;
    197           } else if (isMixed(ch)) {
    198             submode = SUBMODE_MIXED;
    199             tmp += (FX_WCHAR)28;
    200             continue;
    201           } else {
    202             tmp += (FX_WCHAR)29;
    203             tmp += PUNCTUATION[ch];
    204             break;
    205           }
    206         }
    207         break;
    208       case SUBMODE_MIXED:
    209         if (isMixed(ch)) {
    210           tmp += MIXED[ch];
    211         } else {
    212           if (isAlphaUpper(ch)) {
    213             submode = SUBMODE_ALPHA;
    214             tmp += (FX_WCHAR)28;
    215             continue;
    216           } else if (isAlphaLower(ch)) {
    217             submode = SUBMODE_LOWER;
    218             tmp += (FX_WCHAR)27;
    219             continue;
    220           } else {
    221             if (startpos + idx + 1 < count) {
    222               FX_WCHAR next = msg.GetAt(startpos + idx + 1);
    223               if (isPunctuation(next)) {
    224                 submode = SUBMODE_PUNCTUATION;
    225                 tmp += (FX_WCHAR)25;
    226                 continue;
    227               }
    228             }
    229             tmp += (FX_WCHAR)29;
    230             tmp += PUNCTUATION[ch];
    231           }
    232         }
    233         break;
    234       default:
    235         if (isPunctuation(ch)) {
    236           tmp += PUNCTUATION[ch];
    237         } else {
    238           submode = SUBMODE_ALPHA;
    239           tmp += (FX_WCHAR)29;
    240           continue;
    241         }
    242     }
    243     idx++;
    244     if (idx >= count) {
    245       break;
    246     }
    247   }
    248   FX_WCHAR h = 0;
    249   int32_t len = tmp.GetLength();
    250   for (int32_t i = 0; i < len; i++) {
    251     bool odd = (i % 2) != 0;
    252     if (odd) {
    253       h = (FX_WCHAR)((h * 30) + tmp.GetAt(i));
    254       sb += h;
    255     } else {
    256       h = tmp.GetAt(i);
    257     }
    258   }
    259   if ((len % 2) != 0) {
    260     sb += (FX_WCHAR)((h * 30) + 29);
    261   }
    262   return submode;
    263 }
    264 void CBC_PDF417HighLevelEncoder::encodeBinary(CFX_ArrayTemplate<uint8_t>* bytes,
    265                                               int32_t startpos,
    266                                               int32_t count,
    267                                               int32_t startmode,
    268                                               CFX_WideString& sb) {
    269   if (count == 1 && startmode == TEXT_COMPACTION) {
    270     sb += (FX_WCHAR)SHIFT_TO_BYTE;
    271   }
    272   int32_t idx = startpos;
    273   int32_t i = 0;
    274   if (count >= 6) {
    275     sb += (FX_WCHAR)LATCH_TO_BYTE;
    276     FX_WCHAR chars[5];
    277     while ((startpos + count - idx) >= 6) {
    278       int64_t t = 0;
    279       for (i = 0; i < 6; i++) {
    280         t <<= 8;
    281         t += bytes->GetAt(idx + i) & 0xff;
    282       }
    283       for (i = 0; i < 5; i++) {
    284         chars[i] = (FX_WCHAR)(t % 900);
    285         t /= 900;
    286       }
    287       for (i = 4; i >= 0; i--) {
    288         sb += (chars[i]);
    289       }
    290       idx += 6;
    291     }
    292   }
    293   if (idx < startpos + count) {
    294     sb += (FX_WCHAR)LATCH_TO_BYTE_PADDED;
    295   }
    296   for (i = idx; i < startpos + count; i++) {
    297     int32_t ch = bytes->GetAt(i) & 0xff;
    298     sb += (FX_WCHAR)ch;
    299   }
    300 }
    301 void CBC_PDF417HighLevelEncoder::encodeNumeric(CFX_WideString msg,
    302                                                int32_t startpos,
    303                                                int32_t count,
    304                                                CFX_WideString& sb) {
    305   int32_t idx = 0;
    306   BigInteger num900 = 900;
    307   while (idx < count) {
    308     CFX_WideString tmp;
    309     int32_t len = 44 < count - idx ? 44 : count - idx;
    310     CFX_ByteString part =
    311         ((FX_WCHAR)'1' + msg.Mid(startpos + idx, len)).UTF8Encode();
    312     BigInteger bigint = stringToBigInteger(part.c_str());
    313     do {
    314       int32_t c = (bigint % num900).toInt();
    315       tmp += (FX_WCHAR)(c);
    316       bigint = bigint / num900;
    317     } while (!bigint.isZero());
    318     for (int32_t i = tmp.GetLength() - 1; i >= 0; i--) {
    319       sb += tmp.GetAt(i);
    320     }
    321     idx += len;
    322   }
    323 }
    324 bool CBC_PDF417HighLevelEncoder::isDigit(FX_WCHAR ch) {
    325   return ch >= '0' && ch <= '9';
    326 }
    327 bool CBC_PDF417HighLevelEncoder::isAlphaUpper(FX_WCHAR ch) {
    328   return ch == ' ' || (ch >= 'A' && ch <= 'Z');
    329 }
    330 bool CBC_PDF417HighLevelEncoder::isAlphaLower(FX_WCHAR ch) {
    331   return ch == ' ' || (ch >= 'a' && ch <= 'z');
    332 }
    333 bool CBC_PDF417HighLevelEncoder::isMixed(FX_WCHAR ch) {
    334   return MIXED[ch] != -1;
    335 }
    336 bool CBC_PDF417HighLevelEncoder::isPunctuation(FX_WCHAR ch) {
    337   return PUNCTUATION[ch] != -1;
    338 }
    339 bool CBC_PDF417HighLevelEncoder::isText(FX_WCHAR ch) {
    340   return ch == '\t' || ch == '\n' || ch == '\r' || (ch >= 32 && ch <= 126);
    341 }
    342 int32_t CBC_PDF417HighLevelEncoder::determineConsecutiveDigitCount(
    343     CFX_WideString msg,
    344     int32_t startpos) {
    345   int32_t count = 0;
    346   int32_t len = msg.GetLength();
    347   int32_t idx = startpos;
    348   if (idx < len) {
    349     FX_WCHAR ch = msg.GetAt(idx);
    350     while (isDigit(ch) && idx < len) {
    351       count++;
    352       idx++;
    353       if (idx < len) {
    354         ch = msg.GetAt(idx);
    355       }
    356     }
    357   }
    358   return count;
    359 }
    360 int32_t CBC_PDF417HighLevelEncoder::determineConsecutiveTextCount(
    361     CFX_WideString msg,
    362     int32_t startpos) {
    363   int32_t len = msg.GetLength();
    364   int32_t idx = startpos;
    365   while (idx < len) {
    366     FX_WCHAR ch = msg.GetAt(idx);
    367     int32_t numericCount = 0;
    368     while (numericCount < 13 && isDigit(ch) && idx < len) {
    369       numericCount++;
    370       idx++;
    371       if (idx < len) {
    372         ch = msg.GetAt(idx);
    373       }
    374     }
    375     if (numericCount >= 13) {
    376       return idx - startpos - numericCount;
    377     }
    378     if (numericCount > 0) {
    379       continue;
    380     }
    381     ch = msg.GetAt(idx);
    382     if (!isText(ch)) {
    383       break;
    384     }
    385     idx++;
    386   }
    387   return idx - startpos;
    388 }
    389 int32_t CBC_PDF417HighLevelEncoder::determineConsecutiveBinaryCount(
    390     CFX_WideString msg,
    391     CFX_ArrayTemplate<uint8_t>* bytes,
    392     int32_t startpos,
    393     int32_t& e) {
    394   int32_t len = msg.GetLength();
    395   int32_t idx = startpos;
    396   while (idx < len) {
    397     FX_WCHAR ch = msg.GetAt(idx);
    398     int32_t numericCount = 0;
    399     while (numericCount < 13 && isDigit(ch)) {
    400       numericCount++;
    401       int32_t i = idx + numericCount;
    402       if (i >= len) {
    403         break;
    404       }
    405       ch = msg.GetAt(i);
    406     }
    407     if (numericCount >= 13) {
    408       return idx - startpos;
    409     }
    410     int32_t textCount = 0;
    411     while (textCount < 5 && isText(ch)) {
    412       textCount++;
    413       int32_t i = idx + textCount;
    414       if (i >= len) {
    415         break;
    416       }
    417       ch = msg.GetAt(i);
    418     }
    419     if (textCount >= 5) {
    420       return idx - startpos;
    421     }
    422     ch = msg.GetAt(idx);
    423     if (bytes->GetAt(idx) == 63 && ch != '?') {
    424       e = BCExceptionNonEncodableCharacterDetected;
    425       return -1;
    426     }
    427     idx++;
    428   }
    429   return idx - startpos;
    430 }
    431