1 // Copyright 2014 PDFium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 // Original code is licensed as follows: 7 /* 8 * Copyright 2006-2007 Jeremias Maerki. 9 * 10 * Licensed under the Apache License, Version 2.0 (the "License"); 11 * you may not use this file except in compliance with the License. 12 * You may obtain a copy of the License at 13 * 14 * http://www.apache.org/licenses/LICENSE-2.0 15 * 16 * Unless required by applicable law or agreed to in writing, software 17 * distributed under the License is distributed on an "AS IS" BASIS, 18 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 19 * See the License for the specific language governing permissions and 20 * limitations under the License. 21 */ 22 23 #include "fxbarcode/datamatrix/BC_HighLevelEncoder.h" 24 25 #include <limits> 26 #include <memory> 27 #include <vector> 28 29 #include "fxbarcode/BC_UtilCodingConvert.h" 30 #include "fxbarcode/common/BC_CommonBitMatrix.h" 31 #include "fxbarcode/datamatrix/BC_ASCIIEncoder.h" 32 #include "fxbarcode/datamatrix/BC_Base256Encoder.h" 33 #include "fxbarcode/datamatrix/BC_C40Encoder.h" 34 #include "fxbarcode/datamatrix/BC_EdifactEncoder.h" 35 #include "fxbarcode/datamatrix/BC_Encoder.h" 36 #include "fxbarcode/datamatrix/BC_EncoderContext.h" 37 #include "fxbarcode/datamatrix/BC_SymbolInfo.h" 38 #include "fxbarcode/datamatrix/BC_TextEncoder.h" 39 #include "fxbarcode/datamatrix/BC_X12Encoder.h" 40 #include "fxbarcode/utils.h" 41 #include "third_party/base/ptr_util.h" 42 43 const wchar_t CBC_HighLevelEncoder::LATCH_TO_C40 = 230; 44 const wchar_t CBC_HighLevelEncoder::LATCH_TO_BASE256 = 231; 45 const wchar_t CBC_HighLevelEncoder::UPPER_SHIFT = 235; 46 const wchar_t CBC_HighLevelEncoder::LATCH_TO_ANSIX12 = 238; 47 const wchar_t CBC_HighLevelEncoder::LATCH_TO_TEXT = 239; 48 const wchar_t CBC_HighLevelEncoder::LATCH_TO_EDIFACT = 240; 49 const wchar_t CBC_HighLevelEncoder::C40_UNLATCH = 254; 50 const wchar_t CBC_HighLevelEncoder::X12_UNLATCH = 254; 51 const wchar_t CBC_HighLevelEncoder::PAD = 129; 52 const wchar_t CBC_HighLevelEncoder::MACRO_05 = 236; 53 const wchar_t CBC_HighLevelEncoder::MACRO_06 = 237; 54 const wchar_t CBC_HighLevelEncoder::MACRO_05_HEADER[] = L"[)>05"; 55 const wchar_t CBC_HighLevelEncoder::MACRO_06_HEADER[] = L"[)>06"; 56 const wchar_t CBC_HighLevelEncoder::MACRO_TRAILER = 0x0004; 57 58 CBC_HighLevelEncoder::CBC_HighLevelEncoder() {} 59 CBC_HighLevelEncoder::~CBC_HighLevelEncoder() {} 60 61 std::vector<uint8_t>& CBC_HighLevelEncoder::getBytesForMessage(WideString msg) { 62 ByteString bytestr; 63 CBC_UtilCodingConvert::UnicodeToUTF8(msg, bytestr); 64 m_bytearray.insert(m_bytearray.end(), bytestr.begin(), bytestr.end()); 65 return m_bytearray; 66 } 67 68 // static 69 WideString CBC_HighLevelEncoder::encodeHighLevel(WideString msg, 70 WideString ecLevel, 71 bool allowRectangular, 72 int32_t& e) { 73 CBC_EncoderContext context(msg, ecLevel, e); 74 if (e != BCExceptionNO) 75 return WideString(); 76 77 context.setAllowRectangular(allowRectangular); 78 if ((msg.Left(6) == MACRO_05_HEADER) && (msg.Last() == MACRO_TRAILER)) { 79 context.writeCodeword(MACRO_05); 80 context.setSkipAtEnd(2); 81 context.m_pos += 6; 82 } else if ((msg.Left(6) == MACRO_06_HEADER) && 83 (msg.Last() == MACRO_TRAILER)) { 84 context.writeCodeword(MACRO_06); 85 context.setSkipAtEnd(2); 86 context.m_pos += 6; 87 } 88 89 std::vector<std::unique_ptr<CBC_Encoder>> encoders; 90 encoders.push_back(pdfium::MakeUnique<CBC_ASCIIEncoder>()); 91 encoders.push_back(pdfium::MakeUnique<CBC_C40Encoder>()); 92 encoders.push_back(pdfium::MakeUnique<CBC_TextEncoder>()); 93 encoders.push_back(pdfium::MakeUnique<CBC_X12Encoder>()); 94 encoders.push_back(pdfium::MakeUnique<CBC_EdifactEncoder>()); 95 encoders.push_back(pdfium::MakeUnique<CBC_Base256Encoder>()); 96 int32_t encodingMode = ASCII_ENCODATION; 97 while (context.hasMoreCharacters()) { 98 encoders[encodingMode]->Encode(context, e); 99 if (e != BCExceptionNO) 100 return L""; 101 102 if (context.m_newEncoding >= 0) { 103 encodingMode = context.m_newEncoding; 104 context.resetEncoderSignal(); 105 } 106 } 107 int32_t len = context.m_codewords.GetLength(); 108 context.updateSymbolInfo(e); 109 if (e != BCExceptionNO) 110 return L""; 111 112 int32_t capacity = context.m_symbolInfo->dataCapacity(); 113 if (len < capacity) { 114 if (encodingMode != ASCII_ENCODATION && 115 encodingMode != BASE256_ENCODATION) { 116 context.writeCodeword(0x00fe); 117 } 118 } 119 WideString codewords = context.m_codewords; 120 if (pdfium::base::checked_cast<int32_t>(codewords.GetLength()) < capacity) { 121 codewords += PAD; 122 } 123 while (pdfium::base::checked_cast<int32_t>(codewords.GetLength()) < 124 capacity) { 125 codewords += (randomize253State( 126 PAD, pdfium::base::checked_cast<int32_t>(codewords.GetLength()) + 1)); 127 } 128 return codewords; 129 } 130 int32_t CBC_HighLevelEncoder::lookAheadTest(WideString msg, 131 int32_t startpos, 132 int32_t currentMode) { 133 if (startpos >= pdfium::base::checked_cast<int32_t>(msg.GetLength())) { 134 return currentMode; 135 } 136 std::vector<float> charCounts; 137 if (currentMode == ASCII_ENCODATION) { 138 charCounts.push_back(0); 139 charCounts.push_back(1); 140 charCounts.push_back(1); 141 charCounts.push_back(1); 142 charCounts.push_back(1); 143 charCounts.push_back(1.25f); 144 } else { 145 charCounts.push_back(1); 146 charCounts.push_back(2); 147 charCounts.push_back(2); 148 charCounts.push_back(2); 149 charCounts.push_back(2); 150 charCounts.push_back(2.25f); 151 charCounts[currentMode] = 0; 152 } 153 int32_t charsProcessed = 0; 154 while (true) { 155 if ((startpos + charsProcessed) == 156 pdfium::base::checked_cast<int32_t>(msg.GetLength())) { 157 int32_t min = std::numeric_limits<int32_t>::max(); 158 std::vector<uint8_t> mins(6); 159 std::vector<int32_t> intCharCounts(6); 160 min = findMinimums(charCounts, intCharCounts, min, mins); 161 int32_t minCount = getMinimumCount(mins); 162 if (intCharCounts[ASCII_ENCODATION] == min) { 163 return ASCII_ENCODATION; 164 } 165 if (minCount == 1 && mins[BASE256_ENCODATION] > 0) { 166 return BASE256_ENCODATION; 167 } 168 if (minCount == 1 && mins[EDIFACT_ENCODATION] > 0) { 169 return EDIFACT_ENCODATION; 170 } 171 if (minCount == 1 && mins[TEXT_ENCODATION] > 0) { 172 return TEXT_ENCODATION; 173 } 174 if (minCount == 1 && mins[X12_ENCODATION] > 0) { 175 return X12_ENCODATION; 176 } 177 return C40_ENCODATION; 178 } 179 wchar_t c = msg[startpos + charsProcessed]; 180 charsProcessed++; 181 if (isDigit(c)) { 182 charCounts[ASCII_ENCODATION] += 0.5; 183 } else if (isExtendedASCII(c)) { 184 charCounts[ASCII_ENCODATION] = (float)ceil(charCounts[ASCII_ENCODATION]); 185 charCounts[ASCII_ENCODATION] += 2; 186 } else { 187 charCounts[ASCII_ENCODATION] = (float)ceil(charCounts[ASCII_ENCODATION]); 188 charCounts[ASCII_ENCODATION]++; 189 } 190 if (isNativeC40(c)) { 191 charCounts[C40_ENCODATION] += 2.0f / 3.0f; 192 } else if (isExtendedASCII(c)) { 193 charCounts[C40_ENCODATION] += 8.0f / 3.0f; 194 } else { 195 charCounts[C40_ENCODATION] += 4.0f / 3.0f; 196 } 197 if (isNativeText(c)) { 198 charCounts[TEXT_ENCODATION] += 2.0f / 3.0f; 199 } else if (isExtendedASCII(c)) { 200 charCounts[TEXT_ENCODATION] += 8.0f / 3.0f; 201 } else { 202 charCounts[TEXT_ENCODATION] += 4.0f / 3.0f; 203 } 204 if (isNativeX12(c)) { 205 charCounts[X12_ENCODATION] += 2.0f / 3.0f; 206 } else if (isExtendedASCII(c)) { 207 charCounts[X12_ENCODATION] += 13.0f / 3.0f; 208 } else { 209 charCounts[X12_ENCODATION] += 10.0f / 3.0f; 210 } 211 if (isNativeEDIFACT(c)) { 212 charCounts[EDIFACT_ENCODATION] += 3.0f / 4.0f; 213 } else if (isExtendedASCII(c)) { 214 charCounts[EDIFACT_ENCODATION] += 17.0f / 4.0f; 215 } else { 216 charCounts[EDIFACT_ENCODATION] += 13.0f / 4.0f; 217 } 218 charCounts[BASE256_ENCODATION]++; 219 if (charsProcessed >= 4) { 220 std::vector<int32_t> intCharCounts(6); 221 std::vector<uint8_t> mins(6); 222 findMinimums(charCounts, intCharCounts, 223 std::numeric_limits<int32_t>::max(), mins); 224 int32_t minCount = getMinimumCount(mins); 225 if (intCharCounts[ASCII_ENCODATION] < intCharCounts[BASE256_ENCODATION] && 226 intCharCounts[ASCII_ENCODATION] < intCharCounts[C40_ENCODATION] && 227 intCharCounts[ASCII_ENCODATION] < intCharCounts[TEXT_ENCODATION] && 228 intCharCounts[ASCII_ENCODATION] < intCharCounts[X12_ENCODATION] && 229 intCharCounts[ASCII_ENCODATION] < intCharCounts[EDIFACT_ENCODATION]) { 230 return ASCII_ENCODATION; 231 } 232 if (intCharCounts[BASE256_ENCODATION] < intCharCounts[ASCII_ENCODATION] || 233 (mins[C40_ENCODATION] + mins[TEXT_ENCODATION] + mins[X12_ENCODATION] + 234 mins[EDIFACT_ENCODATION]) == 0) { 235 return BASE256_ENCODATION; 236 } 237 if (minCount == 1 && mins[EDIFACT_ENCODATION] > 0) { 238 return EDIFACT_ENCODATION; 239 } 240 if (minCount == 1 && mins[TEXT_ENCODATION] > 0) { 241 return TEXT_ENCODATION; 242 } 243 if (minCount == 1 && mins[X12_ENCODATION] > 0) { 244 return X12_ENCODATION; 245 } 246 if (intCharCounts[C40_ENCODATION] + 1 < intCharCounts[ASCII_ENCODATION] && 247 intCharCounts[C40_ENCODATION] + 1 < 248 intCharCounts[BASE256_ENCODATION] && 249 intCharCounts[C40_ENCODATION] + 1 < 250 intCharCounts[EDIFACT_ENCODATION] && 251 intCharCounts[C40_ENCODATION] + 1 < intCharCounts[TEXT_ENCODATION]) { 252 if (intCharCounts[C40_ENCODATION] < intCharCounts[X12_ENCODATION]) { 253 return C40_ENCODATION; 254 } 255 if (intCharCounts[C40_ENCODATION] == intCharCounts[X12_ENCODATION]) { 256 int32_t p = startpos + charsProcessed + 1; 257 int32_t checked_length = 258 pdfium::base::checked_cast<int32_t>(msg.GetLength()); 259 while (p < checked_length) { 260 wchar_t tc = msg[p]; 261 if (isX12TermSep(tc)) { 262 return X12_ENCODATION; 263 } 264 if (!isNativeX12(tc)) { 265 break; 266 } 267 p++; 268 } 269 return C40_ENCODATION; 270 } 271 } 272 } 273 } 274 } 275 bool CBC_HighLevelEncoder::isDigit(wchar_t ch) { 276 return ch >= '0' && ch <= '9'; 277 } 278 bool CBC_HighLevelEncoder::isExtendedASCII(wchar_t ch) { 279 return ch >= 128 && ch <= 255; 280 } 281 int32_t CBC_HighLevelEncoder::determineConsecutiveDigitCount(WideString msg, 282 int32_t startpos) { 283 int32_t count = 0; 284 int32_t len = msg.GetLength(); 285 int32_t idx = startpos; 286 if (idx < len) { 287 wchar_t ch = msg[idx]; 288 while (isDigit(ch) && idx < len) { 289 count++; 290 idx++; 291 if (idx < len) { 292 ch = msg[idx]; 293 } 294 } 295 } 296 return count; 297 } 298 299 wchar_t CBC_HighLevelEncoder::randomize253State(wchar_t ch, 300 int32_t codewordPosition) { 301 int32_t pseudoRandom = ((149 * codewordPosition) % 253) + 1; 302 int32_t tempVariable = ch + pseudoRandom; 303 return tempVariable <= 254 ? (wchar_t)tempVariable 304 : (wchar_t)(tempVariable - 254); 305 } 306 int32_t CBC_HighLevelEncoder::findMinimums(std::vector<float>& charCounts, 307 std::vector<int32_t>& intCharCounts, 308 int32_t min, 309 std::vector<uint8_t>& mins) { 310 for (size_t l = 0; l < mins.size(); l++) 311 mins[l] = 0; 312 313 for (size_t i = 0; i < 6; i++) { 314 intCharCounts[i] = static_cast<int32_t>(ceil(charCounts[i])); 315 int32_t current = intCharCounts[i]; 316 if (min > current) { 317 min = current; 318 for (size_t j = 0; j < mins.size(); j++) 319 mins[j] = 0; 320 } 321 if (min == current) 322 mins[i]++; 323 } 324 return min; 325 } 326 int32_t CBC_HighLevelEncoder::getMinimumCount(std::vector<uint8_t>& mins) { 327 int32_t minCount = 0; 328 for (int32_t i = 0; i < 6; i++) { 329 minCount += mins[i]; 330 } 331 return minCount; 332 } 333 bool CBC_HighLevelEncoder::isNativeC40(wchar_t ch) { 334 return (ch == ' ') || (ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'Z'); 335 } 336 bool CBC_HighLevelEncoder::isNativeText(wchar_t ch) { 337 return (ch == ' ') || (ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'z'); 338 } 339 bool CBC_HighLevelEncoder::isNativeX12(wchar_t ch) { 340 return isX12TermSep(ch) || (ch == ' ') || (ch >= '0' && ch <= '9') || 341 (ch >= 'A' && ch <= 'Z'); 342 } 343 bool CBC_HighLevelEncoder::isX12TermSep(wchar_t ch) { 344 return (ch == '\r') || (ch == '*') || (ch == '>'); 345 } 346 bool CBC_HighLevelEncoder::isNativeEDIFACT(wchar_t ch) { 347 return ch >= ' ' && ch <= '^'; 348 } 349