Home | History | Annotate | Download | only in pdf417
      1 // Copyright 2014 PDFium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
      6 // Original code is licensed as follows:
      7 /*
      8  * Copyright 2006 Jeremias Maerki in part, and ZXing Authors in part
      9  *
     10  * Licensed under the Apache License, Version 2.0 (the "License");
     11  * you may not use this file except in compliance with the License.
     12  * You may obtain a copy of the License at
     13  *
     14  * http://www.apache.org/licenses/LICENSE-2.0
     15  *
     16  * Unless required by applicable law or agreed to in writing, software
     17  * distributed under the License is distributed on an "AS IS" BASIS,
     18  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     19  * See the License for the specific language governing permissions and
     20  * limitations under the License.
     21  */
     22 
     23 #include "BC_PDF417HighLevelEncoder.h"
     24 
     25 #include "xfa/src/fxbarcode/BC_UtilCodingConvert.h"
     26 #include "xfa/src/fxbarcode/barcode.h"
     27 #include "BC_PDF417Compaction.h"
     28 #include "third_party/bigint/BigIntegerLibrary.hh"
     29 
     30 #define SUBMODE_ALPHA 0
     31 #define SUBMODE_LOWER 1
     32 #define SUBMODE_MIXED 2
     33 int32_t CBC_PDF417HighLevelEncoder::TEXT_COMPACTION = 0;
     34 int32_t CBC_PDF417HighLevelEncoder::BYTE_COMPACTION = 1;
     35 int32_t CBC_PDF417HighLevelEncoder::NUMERIC_COMPACTION = 2;
     36 int32_t CBC_PDF417HighLevelEncoder::SUBMODE_PUNCTUATION = 3;
     37 int32_t CBC_PDF417HighLevelEncoder::LATCH_TO_TEXT = 900;
     38 int32_t CBC_PDF417HighLevelEncoder::LATCH_TO_BYTE_PADDED = 901;
     39 int32_t CBC_PDF417HighLevelEncoder::LATCH_TO_NUMERIC = 902;
     40 int32_t CBC_PDF417HighLevelEncoder::SHIFT_TO_BYTE = 913;
     41 int32_t CBC_PDF417HighLevelEncoder::LATCH_TO_BYTE = 924;
     42 uint8_t CBC_PDF417HighLevelEncoder::TEXT_MIXED_RAW[] = {
     43     48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 38, 13, 9, 44, 58,
     44     35, 45, 46, 36, 47, 43, 37, 42, 61, 94, 0,  32, 0, 0,  0};
     45 uint8_t CBC_PDF417HighLevelEncoder::TEXT_PUNCTUATION_RAW[] = {
     46     59, 60, 62, 64, 91, 92, 93,  95, 96, 126, 33, 13,  9,   44, 58,
     47     10, 45, 46, 36, 47, 34, 124, 42, 40, 41,  63, 123, 125, 39, 0};
     48 int32_t CBC_PDF417HighLevelEncoder::MIXED[128] = {0};
     49 int32_t CBC_PDF417HighLevelEncoder::PUNCTUATION[128] = {0};
     50 void CBC_PDF417HighLevelEncoder::Initialize() {
     51   Inverse();
     52 }
     53 void CBC_PDF417HighLevelEncoder::Finalize() {}
     54 CFX_WideString CBC_PDF417HighLevelEncoder::encodeHighLevel(
     55     CFX_WideString wideMsg,
     56     Compaction compaction,
     57     int32_t& e) {
     58   CFX_ByteString bytes;
     59   CBC_UtilCodingConvert::UnicodeToUTF8(wideMsg, bytes);
     60   CFX_WideString msg;
     61   int32_t len = bytes.GetLength();
     62   for (int32_t i = 0; i < len; i++) {
     63     FX_WCHAR ch = (FX_WCHAR)(bytes.GetAt(i) & 0xff);
     64     if (ch == '?' && bytes.GetAt(i) != '?') {
     65       e = BCExceptionCharactersOutsideISO88591Encoding;
     66       return (FX_WCHAR*)"";
     67     }
     68     msg += ch;
     69   }
     70   CFX_ByteArray byteArr;
     71   for (int32_t k = 0; k < bytes.GetLength(); k++) {
     72     byteArr.Add(bytes.GetAt(k));
     73   }
     74   CFX_WideString sb;
     75   len = msg.GetLength();
     76   int32_t p = 0;
     77   int32_t textSubMode = SUBMODE_ALPHA;
     78   if (compaction == TEXT) {
     79     encodeText(msg, p, len, sb, textSubMode);
     80   } else if (compaction == BYTES) {
     81     encodeBinary(&byteArr, p, byteArr.GetSize(), BYTE_COMPACTION, sb);
     82   } else if (compaction == NUMERIC) {
     83     sb += (FX_WCHAR)LATCH_TO_NUMERIC;
     84     encodeNumeric(msg, p, len, sb);
     85   } else {
     86     int32_t encodingMode = LATCH_TO_TEXT;
     87     while (p < len) {
     88       int32_t n = determineConsecutiveDigitCount(msg, p);
     89       if (n >= 13) {
     90         sb += (FX_WCHAR)LATCH_TO_NUMERIC;
     91         encodingMode = NUMERIC_COMPACTION;
     92         textSubMode = SUBMODE_ALPHA;
     93         encodeNumeric(msg, p, n, sb);
     94         p += n;
     95       } else {
     96         int32_t t = determineConsecutiveTextCount(msg, p);
     97         if (t >= 5 || n == len) {
     98           if (encodingMode != TEXT_COMPACTION) {
     99             sb += (FX_WCHAR)LATCH_TO_TEXT;
    100             encodingMode = TEXT_COMPACTION;
    101             textSubMode = SUBMODE_ALPHA;
    102           }
    103           textSubMode = encodeText(msg, p, t, sb, textSubMode);
    104           p += t;
    105         } else {
    106           int32_t b = determineConsecutiveBinaryCount(msg, &byteArr, p, e);
    107           BC_EXCEPTION_CHECK_ReturnValue(e, (FX_WCHAR)' ');
    108           if (b == 0) {
    109             b = 1;
    110           }
    111           if (b == 1 && encodingMode == TEXT_COMPACTION) {
    112             encodeBinary(&byteArr, p, 1, TEXT_COMPACTION, sb);
    113           } else {
    114             encodeBinary(&byteArr, p, b, encodingMode, sb);
    115             encodingMode = BYTE_COMPACTION;
    116             textSubMode = SUBMODE_ALPHA;
    117           }
    118           p += b;
    119         }
    120       }
    121     }
    122   }
    123   return sb;
    124 }
    125 void CBC_PDF417HighLevelEncoder::Inverse() {
    126   uint8_t i = 0;
    127   int32_t l = 0;
    128   for (l = 0; l < sizeof(MIXED) / sizeof(MIXED[0]); l++) {
    129     MIXED[l] = -1;
    130   }
    131   for (i = 0; i < sizeof(TEXT_MIXED_RAW) / sizeof(TEXT_MIXED_RAW[0]); i++) {
    132     uint8_t b = TEXT_MIXED_RAW[i];
    133     if (b > 0) {
    134       MIXED[b] = i;
    135     }
    136   }
    137   for (l = 0; l < sizeof(PUNCTUATION) / sizeof(PUNCTUATION[0]); l++) {
    138     PUNCTUATION[l] = -1;
    139   }
    140   for (i = 0;
    141        i < sizeof(TEXT_PUNCTUATION_RAW) / sizeof(TEXT_PUNCTUATION_RAW[0]);
    142        i++) {
    143     uint8_t b = TEXT_PUNCTUATION_RAW[i];
    144     if (b > 0) {
    145       PUNCTUATION[b] = i;
    146     }
    147   }
    148 }
    149 int32_t CBC_PDF417HighLevelEncoder::encodeText(CFX_WideString msg,
    150                                                int32_t startpos,
    151                                                int32_t count,
    152                                                CFX_WideString& sb,
    153                                                int32_t initialSubmode) {
    154   CFX_WideString tmp;
    155   int32_t submode = initialSubmode;
    156   int32_t idx = 0;
    157   while (TRUE) {
    158     FX_WCHAR ch = msg.GetAt(startpos + idx);
    159     switch (submode) {
    160       case SUBMODE_ALPHA:
    161         if (isAlphaUpper(ch)) {
    162           if (ch == ' ') {
    163             tmp += (FX_WCHAR)26;
    164           } else {
    165             tmp += (FX_WCHAR)(ch - 65);
    166           }
    167         } else {
    168           if (isAlphaLower(ch)) {
    169             submode = SUBMODE_LOWER;
    170             tmp += (FX_WCHAR)27;
    171             continue;
    172           } else if (isMixed(ch)) {
    173             submode = SUBMODE_MIXED;
    174             tmp += (FX_WCHAR)28;
    175             continue;
    176           } else {
    177             tmp += (FX_WCHAR)29;
    178             tmp += PUNCTUATION[ch];
    179             break;
    180           }
    181         }
    182         break;
    183       case SUBMODE_LOWER:
    184         if (isAlphaLower(ch)) {
    185           if (ch == ' ') {
    186             tmp += (FX_WCHAR)26;
    187           } else {
    188             tmp += (FX_WCHAR)(ch - 97);
    189           }
    190         } else {
    191           if (isAlphaUpper(ch)) {
    192             tmp += (FX_WCHAR)27;
    193             tmp += (FX_WCHAR)(ch - 65);
    194             break;
    195           } else if (isMixed(ch)) {
    196             submode = SUBMODE_MIXED;
    197             tmp += (FX_WCHAR)28;
    198             continue;
    199           } else {
    200             tmp += (FX_WCHAR)29;
    201             tmp += PUNCTUATION[ch];
    202             break;
    203           }
    204         }
    205         break;
    206       case SUBMODE_MIXED:
    207         if (isMixed(ch)) {
    208           tmp += MIXED[ch];
    209         } else {
    210           if (isAlphaUpper(ch)) {
    211             submode = SUBMODE_ALPHA;
    212             tmp += (FX_WCHAR)28;
    213             continue;
    214           } else if (isAlphaLower(ch)) {
    215             submode = SUBMODE_LOWER;
    216             tmp += (FX_WCHAR)27;
    217             continue;
    218           } else {
    219             if (startpos + idx + 1 < count) {
    220               FX_WCHAR next = msg.GetAt(startpos + idx + 1);
    221               if (isPunctuation(next)) {
    222                 submode = SUBMODE_PUNCTUATION;
    223                 tmp += (FX_WCHAR)25;
    224                 continue;
    225               }
    226             }
    227             tmp += (FX_WCHAR)29;
    228             tmp += PUNCTUATION[ch];
    229           }
    230         }
    231         break;
    232       default:
    233         if (isPunctuation(ch)) {
    234           tmp += PUNCTUATION[ch];
    235         } else {
    236           submode = SUBMODE_ALPHA;
    237           tmp += (FX_WCHAR)29;
    238           continue;
    239         }
    240     }
    241     idx++;
    242     if (idx >= count) {
    243       break;
    244     }
    245   }
    246   FX_WCHAR h = 0;
    247   int32_t len = tmp.GetLength();
    248   for (int32_t i = 0; i < len; i++) {
    249     FX_BOOL odd = (i % 2) != 0;
    250     if (odd) {
    251       h = (FX_WCHAR)((h * 30) + tmp.GetAt(i));
    252       sb += h;
    253     } else {
    254       h = tmp.GetAt(i);
    255     }
    256   }
    257   if ((len % 2) != 0) {
    258     sb += (FX_WCHAR)((h * 30) + 29);
    259   }
    260   return submode;
    261 }
    262 void CBC_PDF417HighLevelEncoder::encodeBinary(CFX_ByteArray* bytes,
    263                                               int32_t startpos,
    264                                               int32_t count,
    265                                               int32_t startmode,
    266                                               CFX_WideString& sb) {
    267   if (count == 1 && startmode == TEXT_COMPACTION) {
    268     sb += (FX_WCHAR)SHIFT_TO_BYTE;
    269   }
    270   int32_t idx = startpos;
    271   int32_t i = 0;
    272   if (count >= 6) {
    273     sb += (FX_WCHAR)LATCH_TO_BYTE;
    274     FX_WCHAR chars[5];
    275     while ((startpos + count - idx) >= 6) {
    276       int64_t t = 0;
    277       for (i = 0; i < 6; i++) {
    278         t <<= 8;
    279         t += bytes->GetAt(idx + i) & 0xff;
    280       }
    281       for (i = 0; i < 5; i++) {
    282         chars[i] = (FX_WCHAR)(t % 900);
    283         t /= 900;
    284       }
    285       for (i = 4; i >= 0; i--) {
    286         sb += (chars[i]);
    287       }
    288       idx += 6;
    289     }
    290   }
    291   if (idx < startpos + count) {
    292     sb += (FX_WCHAR)LATCH_TO_BYTE_PADDED;
    293   }
    294   for (i = idx; i < startpos + count; i++) {
    295     int32_t ch = bytes->GetAt(i) & 0xff;
    296     sb += (FX_WCHAR)ch;
    297   }
    298 }
    299 void CBC_PDF417HighLevelEncoder::encodeNumeric(CFX_WideString msg,
    300                                                int32_t startpos,
    301                                                int32_t count,
    302                                                CFX_WideString& sb) {
    303   int32_t idx = 0;
    304   BigInteger num900 = 900;
    305   while (idx < count) {
    306     CFX_WideString tmp;
    307     int32_t len = 44 < count - idx ? 44 : count - idx;
    308     CFX_ByteString part =
    309         ((FX_WCHAR)'1' + msg.Mid(startpos + idx, len)).UTF8Encode();
    310     BigInteger bigint = stringToBigInteger(part.c_str());
    311     do {
    312       int32_t c = (bigint % num900).toInt();
    313       tmp += (FX_WCHAR)(c);
    314       bigint = bigint / num900;
    315     } while (!bigint.isZero());
    316     for (int32_t i = tmp.GetLength() - 1; i >= 0; i--) {
    317       sb += tmp.GetAt(i);
    318     }
    319     idx += len;
    320   }
    321 }
    322 FX_BOOL CBC_PDF417HighLevelEncoder::isDigit(FX_WCHAR ch) {
    323   return ch >= '0' && ch <= '9';
    324 }
    325 FX_BOOL CBC_PDF417HighLevelEncoder::isAlphaUpper(FX_WCHAR ch) {
    326   return ch == ' ' || (ch >= 'A' && ch <= 'Z');
    327 }
    328 FX_BOOL CBC_PDF417HighLevelEncoder::isAlphaLower(FX_WCHAR ch) {
    329   return ch == ' ' || (ch >= 'a' && ch <= 'z');
    330 }
    331 FX_BOOL CBC_PDF417HighLevelEncoder::isMixed(FX_WCHAR ch) {
    332   return MIXED[ch] != -1;
    333 }
    334 FX_BOOL CBC_PDF417HighLevelEncoder::isPunctuation(FX_WCHAR ch) {
    335   return PUNCTUATION[ch] != -1;
    336 }
    337 FX_BOOL CBC_PDF417HighLevelEncoder::isText(FX_WCHAR ch) {
    338   return ch == '\t' || ch == '\n' || ch == '\r' || (ch >= 32 && ch <= 126);
    339 }
    340 int32_t CBC_PDF417HighLevelEncoder::determineConsecutiveDigitCount(
    341     CFX_WideString msg,
    342     int32_t startpos) {
    343   int32_t count = 0;
    344   int32_t len = msg.GetLength();
    345   int32_t idx = startpos;
    346   if (idx < len) {
    347     FX_WCHAR ch = msg.GetAt(idx);
    348     while (isDigit(ch) && idx < len) {
    349       count++;
    350       idx++;
    351       if (idx < len) {
    352         ch = msg.GetAt(idx);
    353       }
    354     }
    355   }
    356   return count;
    357 }
    358 int32_t CBC_PDF417HighLevelEncoder::determineConsecutiveTextCount(
    359     CFX_WideString msg,
    360     int32_t startpos) {
    361   int32_t len = msg.GetLength();
    362   int32_t idx = startpos;
    363   while (idx < len) {
    364     FX_WCHAR ch = msg.GetAt(idx);
    365     int32_t numericCount = 0;
    366     while (numericCount < 13 && isDigit(ch) && idx < len) {
    367       numericCount++;
    368       idx++;
    369       if (idx < len) {
    370         ch = msg.GetAt(idx);
    371       }
    372     }
    373     if (numericCount >= 13) {
    374       return idx - startpos - numericCount;
    375     }
    376     if (numericCount > 0) {
    377       continue;
    378     }
    379     ch = msg.GetAt(idx);
    380     if (!isText(ch)) {
    381       break;
    382     }
    383     idx++;
    384   }
    385   return idx - startpos;
    386 }
    387 int32_t CBC_PDF417HighLevelEncoder::determineConsecutiveBinaryCount(
    388     CFX_WideString msg,
    389     CFX_ByteArray* bytes,
    390     int32_t startpos,
    391     int32_t& e) {
    392   int32_t len = msg.GetLength();
    393   int32_t idx = startpos;
    394   while (idx < len) {
    395     FX_WCHAR ch = msg.GetAt(idx);
    396     int32_t numericCount = 0;
    397     while (numericCount < 13 && isDigit(ch)) {
    398       numericCount++;
    399       int32_t i = idx + numericCount;
    400       if (i >= len) {
    401         break;
    402       }
    403       ch = msg.GetAt(i);
    404     }
    405     if (numericCount >= 13) {
    406       return idx - startpos;
    407     }
    408     int32_t textCount = 0;
    409     while (textCount < 5 && isText(ch)) {
    410       textCount++;
    411       int32_t i = idx + textCount;
    412       if (i >= len) {
    413         break;
    414       }
    415       ch = msg.GetAt(i);
    416     }
    417     if (textCount >= 5) {
    418       return idx - startpos;
    419     }
    420     ch = msg.GetAt(idx);
    421     if (bytes->GetAt(idx) == 63 && ch != '?') {
    422       e = BCExceptionNonEncodableCharacterDetected;
    423       return -1;
    424     }
    425     idx++;
    426   }
    427   return idx - startpos;
    428 }
    429