Home | History | Annotate | Download | only in datamatrix
      1 // Copyright 2014 PDFium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
      6 // Original code is licensed as follows:
      7 /*
      8  * Copyright 2006-2007 Jeremias Maerki.
      9  *
     10  * Licensed under the Apache License, Version 2.0 (the "License");
     11  * you may not use this file except in compliance with the License.
     12  * You may obtain a copy of the License at
     13  *
     14  *      http://www.apache.org/licenses/LICENSE-2.0
     15  *
     16  * Unless required by applicable law or agreed to in writing, software
     17  * distributed under the License is distributed on an "AS IS" BASIS,
     18  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     19  * See the License for the specific language governing permissions and
     20  * limitations under the License.
     21  */
     22 
     23 #include "fxbarcode/datamatrix/BC_HighLevelEncoder.h"
     24 
     25 #include <limits>
     26 #include <memory>
     27 #include <vector>
     28 
     29 #include "fxbarcode/BC_UtilCodingConvert.h"
     30 #include "fxbarcode/common/BC_CommonBitMatrix.h"
     31 #include "fxbarcode/datamatrix/BC_ASCIIEncoder.h"
     32 #include "fxbarcode/datamatrix/BC_Base256Encoder.h"
     33 #include "fxbarcode/datamatrix/BC_C40Encoder.h"
     34 #include "fxbarcode/datamatrix/BC_EdifactEncoder.h"
     35 #include "fxbarcode/datamatrix/BC_Encoder.h"
     36 #include "fxbarcode/datamatrix/BC_EncoderContext.h"
     37 #include "fxbarcode/datamatrix/BC_SymbolInfo.h"
     38 #include "fxbarcode/datamatrix/BC_TextEncoder.h"
     39 #include "fxbarcode/datamatrix/BC_X12Encoder.h"
     40 #include "fxbarcode/utils.h"
     41 #include "third_party/base/ptr_util.h"
     42 
     43 const wchar_t CBC_HighLevelEncoder::LATCH_TO_C40 = 230;
     44 const wchar_t CBC_HighLevelEncoder::LATCH_TO_BASE256 = 231;
     45 const wchar_t CBC_HighLevelEncoder::UPPER_SHIFT = 235;
     46 const wchar_t CBC_HighLevelEncoder::LATCH_TO_ANSIX12 = 238;
     47 const wchar_t CBC_HighLevelEncoder::LATCH_TO_TEXT = 239;
     48 const wchar_t CBC_HighLevelEncoder::LATCH_TO_EDIFACT = 240;
     49 const wchar_t CBC_HighLevelEncoder::C40_UNLATCH = 254;
     50 const wchar_t CBC_HighLevelEncoder::X12_UNLATCH = 254;
     51 const wchar_t CBC_HighLevelEncoder::PAD = 129;
     52 const wchar_t CBC_HighLevelEncoder::MACRO_05 = 236;
     53 const wchar_t CBC_HighLevelEncoder::MACRO_06 = 237;
     54 const wchar_t CBC_HighLevelEncoder::MACRO_05_HEADER[] = L"[)>05";
     55 const wchar_t CBC_HighLevelEncoder::MACRO_06_HEADER[] = L"[)>06";
     56 const wchar_t CBC_HighLevelEncoder::MACRO_TRAILER = 0x0004;
     57 
     58 CBC_HighLevelEncoder::CBC_HighLevelEncoder() {}
     59 CBC_HighLevelEncoder::~CBC_HighLevelEncoder() {}
     60 
     61 std::vector<uint8_t>& CBC_HighLevelEncoder::getBytesForMessage(WideString msg) {
     62   ByteString bytestr;
     63   CBC_UtilCodingConvert::UnicodeToUTF8(msg, bytestr);
     64   m_bytearray.insert(m_bytearray.end(), bytestr.begin(), bytestr.end());
     65   return m_bytearray;
     66 }
     67 
     68 // static
     69 WideString CBC_HighLevelEncoder::encodeHighLevel(WideString msg,
     70                                                  WideString ecLevel,
     71                                                  bool allowRectangular,
     72                                                  int32_t& e) {
     73   CBC_EncoderContext context(msg, ecLevel, e);
     74   if (e != BCExceptionNO)
     75     return WideString();
     76 
     77   context.setAllowRectangular(allowRectangular);
     78   if ((msg.Left(6) == MACRO_05_HEADER) && (msg.Last() == MACRO_TRAILER)) {
     79     context.writeCodeword(MACRO_05);
     80     context.setSkipAtEnd(2);
     81     context.m_pos += 6;
     82   } else if ((msg.Left(6) == MACRO_06_HEADER) &&
     83              (msg.Last() == MACRO_TRAILER)) {
     84     context.writeCodeword(MACRO_06);
     85     context.setSkipAtEnd(2);
     86     context.m_pos += 6;
     87   }
     88 
     89   std::vector<std::unique_ptr<CBC_Encoder>> encoders;
     90   encoders.push_back(pdfium::MakeUnique<CBC_ASCIIEncoder>());
     91   encoders.push_back(pdfium::MakeUnique<CBC_C40Encoder>());
     92   encoders.push_back(pdfium::MakeUnique<CBC_TextEncoder>());
     93   encoders.push_back(pdfium::MakeUnique<CBC_X12Encoder>());
     94   encoders.push_back(pdfium::MakeUnique<CBC_EdifactEncoder>());
     95   encoders.push_back(pdfium::MakeUnique<CBC_Base256Encoder>());
     96   int32_t encodingMode = ASCII_ENCODATION;
     97   while (context.hasMoreCharacters()) {
     98     encoders[encodingMode]->Encode(context, e);
     99     if (e != BCExceptionNO)
    100       return L"";
    101 
    102     if (context.m_newEncoding >= 0) {
    103       encodingMode = context.m_newEncoding;
    104       context.resetEncoderSignal();
    105     }
    106   }
    107   int32_t len = context.m_codewords.GetLength();
    108   context.updateSymbolInfo(e);
    109   if (e != BCExceptionNO)
    110     return L"";
    111 
    112   int32_t capacity = context.m_symbolInfo->dataCapacity();
    113   if (len < capacity) {
    114     if (encodingMode != ASCII_ENCODATION &&
    115         encodingMode != BASE256_ENCODATION) {
    116       context.writeCodeword(0x00fe);
    117     }
    118   }
    119   WideString codewords = context.m_codewords;
    120   if (pdfium::base::checked_cast<int32_t>(codewords.GetLength()) < capacity) {
    121     codewords += PAD;
    122   }
    123   while (pdfium::base::checked_cast<int32_t>(codewords.GetLength()) <
    124          capacity) {
    125     codewords += (randomize253State(
    126         PAD, pdfium::base::checked_cast<int32_t>(codewords.GetLength()) + 1));
    127   }
    128   return codewords;
    129 }
    130 int32_t CBC_HighLevelEncoder::lookAheadTest(WideString msg,
    131                                             int32_t startpos,
    132                                             int32_t currentMode) {
    133   if (startpos >= pdfium::base::checked_cast<int32_t>(msg.GetLength())) {
    134     return currentMode;
    135   }
    136   std::vector<float> charCounts;
    137   if (currentMode == ASCII_ENCODATION) {
    138     charCounts.push_back(0);
    139     charCounts.push_back(1);
    140     charCounts.push_back(1);
    141     charCounts.push_back(1);
    142     charCounts.push_back(1);
    143     charCounts.push_back(1.25f);
    144   } else {
    145     charCounts.push_back(1);
    146     charCounts.push_back(2);
    147     charCounts.push_back(2);
    148     charCounts.push_back(2);
    149     charCounts.push_back(2);
    150     charCounts.push_back(2.25f);
    151     charCounts[currentMode] = 0;
    152   }
    153   int32_t charsProcessed = 0;
    154   while (true) {
    155     if ((startpos + charsProcessed) ==
    156         pdfium::base::checked_cast<int32_t>(msg.GetLength())) {
    157       int32_t min = std::numeric_limits<int32_t>::max();
    158       std::vector<uint8_t> mins(6);
    159       std::vector<int32_t> intCharCounts(6);
    160       min = findMinimums(charCounts, intCharCounts, min, mins);
    161       int32_t minCount = getMinimumCount(mins);
    162       if (intCharCounts[ASCII_ENCODATION] == min) {
    163         return ASCII_ENCODATION;
    164       }
    165       if (minCount == 1 && mins[BASE256_ENCODATION] > 0) {
    166         return BASE256_ENCODATION;
    167       }
    168       if (minCount == 1 && mins[EDIFACT_ENCODATION] > 0) {
    169         return EDIFACT_ENCODATION;
    170       }
    171       if (minCount == 1 && mins[TEXT_ENCODATION] > 0) {
    172         return TEXT_ENCODATION;
    173       }
    174       if (minCount == 1 && mins[X12_ENCODATION] > 0) {
    175         return X12_ENCODATION;
    176       }
    177       return C40_ENCODATION;
    178     }
    179     wchar_t c = msg[startpos + charsProcessed];
    180     charsProcessed++;
    181     if (isDigit(c)) {
    182       charCounts[ASCII_ENCODATION] += 0.5;
    183     } else if (isExtendedASCII(c)) {
    184       charCounts[ASCII_ENCODATION] = (float)ceil(charCounts[ASCII_ENCODATION]);
    185       charCounts[ASCII_ENCODATION] += 2;
    186     } else {
    187       charCounts[ASCII_ENCODATION] = (float)ceil(charCounts[ASCII_ENCODATION]);
    188       charCounts[ASCII_ENCODATION]++;
    189     }
    190     if (isNativeC40(c)) {
    191       charCounts[C40_ENCODATION] += 2.0f / 3.0f;
    192     } else if (isExtendedASCII(c)) {
    193       charCounts[C40_ENCODATION] += 8.0f / 3.0f;
    194     } else {
    195       charCounts[C40_ENCODATION] += 4.0f / 3.0f;
    196     }
    197     if (isNativeText(c)) {
    198       charCounts[TEXT_ENCODATION] += 2.0f / 3.0f;
    199     } else if (isExtendedASCII(c)) {
    200       charCounts[TEXT_ENCODATION] += 8.0f / 3.0f;
    201     } else {
    202       charCounts[TEXT_ENCODATION] += 4.0f / 3.0f;
    203     }
    204     if (isNativeX12(c)) {
    205       charCounts[X12_ENCODATION] += 2.0f / 3.0f;
    206     } else if (isExtendedASCII(c)) {
    207       charCounts[X12_ENCODATION] += 13.0f / 3.0f;
    208     } else {
    209       charCounts[X12_ENCODATION] += 10.0f / 3.0f;
    210     }
    211     if (isNativeEDIFACT(c)) {
    212       charCounts[EDIFACT_ENCODATION] += 3.0f / 4.0f;
    213     } else if (isExtendedASCII(c)) {
    214       charCounts[EDIFACT_ENCODATION] += 17.0f / 4.0f;
    215     } else {
    216       charCounts[EDIFACT_ENCODATION] += 13.0f / 4.0f;
    217     }
    218     charCounts[BASE256_ENCODATION]++;
    219     if (charsProcessed >= 4) {
    220       std::vector<int32_t> intCharCounts(6);
    221       std::vector<uint8_t> mins(6);
    222       findMinimums(charCounts, intCharCounts,
    223                    std::numeric_limits<int32_t>::max(), mins);
    224       int32_t minCount = getMinimumCount(mins);
    225       if (intCharCounts[ASCII_ENCODATION] < intCharCounts[BASE256_ENCODATION] &&
    226           intCharCounts[ASCII_ENCODATION] < intCharCounts[C40_ENCODATION] &&
    227           intCharCounts[ASCII_ENCODATION] < intCharCounts[TEXT_ENCODATION] &&
    228           intCharCounts[ASCII_ENCODATION] < intCharCounts[X12_ENCODATION] &&
    229           intCharCounts[ASCII_ENCODATION] < intCharCounts[EDIFACT_ENCODATION]) {
    230         return ASCII_ENCODATION;
    231       }
    232       if (intCharCounts[BASE256_ENCODATION] < intCharCounts[ASCII_ENCODATION] ||
    233           (mins[C40_ENCODATION] + mins[TEXT_ENCODATION] + mins[X12_ENCODATION] +
    234            mins[EDIFACT_ENCODATION]) == 0) {
    235         return BASE256_ENCODATION;
    236       }
    237       if (minCount == 1 && mins[EDIFACT_ENCODATION] > 0) {
    238         return EDIFACT_ENCODATION;
    239       }
    240       if (minCount == 1 && mins[TEXT_ENCODATION] > 0) {
    241         return TEXT_ENCODATION;
    242       }
    243       if (minCount == 1 && mins[X12_ENCODATION] > 0) {
    244         return X12_ENCODATION;
    245       }
    246       if (intCharCounts[C40_ENCODATION] + 1 < intCharCounts[ASCII_ENCODATION] &&
    247           intCharCounts[C40_ENCODATION] + 1 <
    248               intCharCounts[BASE256_ENCODATION] &&
    249           intCharCounts[C40_ENCODATION] + 1 <
    250               intCharCounts[EDIFACT_ENCODATION] &&
    251           intCharCounts[C40_ENCODATION] + 1 < intCharCounts[TEXT_ENCODATION]) {
    252         if (intCharCounts[C40_ENCODATION] < intCharCounts[X12_ENCODATION]) {
    253           return C40_ENCODATION;
    254         }
    255         if (intCharCounts[C40_ENCODATION] == intCharCounts[X12_ENCODATION]) {
    256           int32_t p = startpos + charsProcessed + 1;
    257           int32_t checked_length =
    258               pdfium::base::checked_cast<int32_t>(msg.GetLength());
    259           while (p < checked_length) {
    260             wchar_t tc = msg[p];
    261             if (isX12TermSep(tc)) {
    262               return X12_ENCODATION;
    263             }
    264             if (!isNativeX12(tc)) {
    265               break;
    266             }
    267             p++;
    268           }
    269           return C40_ENCODATION;
    270         }
    271       }
    272     }
    273   }
    274 }
    275 bool CBC_HighLevelEncoder::isDigit(wchar_t ch) {
    276   return ch >= '0' && ch <= '9';
    277 }
    278 bool CBC_HighLevelEncoder::isExtendedASCII(wchar_t ch) {
    279   return ch >= 128 && ch <= 255;
    280 }
    281 int32_t CBC_HighLevelEncoder::determineConsecutiveDigitCount(WideString msg,
    282                                                              int32_t startpos) {
    283   int32_t count = 0;
    284   int32_t len = msg.GetLength();
    285   int32_t idx = startpos;
    286   if (idx < len) {
    287     wchar_t ch = msg[idx];
    288     while (isDigit(ch) && idx < len) {
    289       count++;
    290       idx++;
    291       if (idx < len) {
    292         ch = msg[idx];
    293       }
    294     }
    295   }
    296   return count;
    297 }
    298 
    299 wchar_t CBC_HighLevelEncoder::randomize253State(wchar_t ch,
    300                                                 int32_t codewordPosition) {
    301   int32_t pseudoRandom = ((149 * codewordPosition) % 253) + 1;
    302   int32_t tempVariable = ch + pseudoRandom;
    303   return tempVariable <= 254 ? (wchar_t)tempVariable
    304                              : (wchar_t)(tempVariable - 254);
    305 }
    306 int32_t CBC_HighLevelEncoder::findMinimums(std::vector<float>& charCounts,
    307                                            std::vector<int32_t>& intCharCounts,
    308                                            int32_t min,
    309                                            std::vector<uint8_t>& mins) {
    310   for (size_t l = 0; l < mins.size(); l++)
    311     mins[l] = 0;
    312 
    313   for (size_t i = 0; i < 6; i++) {
    314     intCharCounts[i] = static_cast<int32_t>(ceil(charCounts[i]));
    315     int32_t current = intCharCounts[i];
    316     if (min > current) {
    317       min = current;
    318       for (size_t j = 0; j < mins.size(); j++)
    319         mins[j] = 0;
    320     }
    321     if (min == current)
    322       mins[i]++;
    323   }
    324   return min;
    325 }
    326 int32_t CBC_HighLevelEncoder::getMinimumCount(std::vector<uint8_t>& mins) {
    327   int32_t minCount = 0;
    328   for (int32_t i = 0; i < 6; i++) {
    329     minCount += mins[i];
    330   }
    331   return minCount;
    332 }
    333 bool CBC_HighLevelEncoder::isNativeC40(wchar_t ch) {
    334   return (ch == ' ') || (ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'Z');
    335 }
    336 bool CBC_HighLevelEncoder::isNativeText(wchar_t ch) {
    337   return (ch == ' ') || (ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'z');
    338 }
    339 bool CBC_HighLevelEncoder::isNativeX12(wchar_t ch) {
    340   return isX12TermSep(ch) || (ch == ' ') || (ch >= '0' && ch <= '9') ||
    341          (ch >= 'A' && ch <= 'Z');
    342 }
    343 bool CBC_HighLevelEncoder::isX12TermSep(wchar_t ch) {
    344   return (ch == '\r') || (ch == '*') || (ch == '>');
    345 }
    346 bool CBC_HighLevelEncoder::isNativeEDIFACT(wchar_t ch) {
    347   return ch >= ' ' && ch <= '^';
    348 }
    349