1 // Copyright 2014 PDFium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7 #include "core/fpdfapi/font/cpdf_cmapparser.h" 8 9 #include <vector> 10 11 #include "core/fpdfapi/cmaps/cmap_int.h" 12 #include "core/fpdfapi/cpdf_modulemgr.h" 13 #include "core/fpdfapi/page/cpdf_pagemodule.h" 14 #include "core/fpdfapi/parser/cpdf_array.h" 15 #include "core/fpdfapi/parser/cpdf_dictionary.h" 16 #include "core/fpdfapi/parser/cpdf_simple_parser.h" 17 #include "core/fxcrt/fx_extension.h" 18 #include "core/fxge/fx_freetype.h" 19 #include "third_party/base/logging.h" 20 21 namespace { 22 23 const char* const g_CharsetNames[CIDSET_NUM_SETS] = {nullptr, "GB1", "CNS1", 24 "Japan1", "Korea1", "UCS"}; 25 26 CIDSet CIDSetFromSizeT(size_t index) { 27 if (index >= CIDSET_NUM_SETS) { 28 NOTREACHED(); 29 return CIDSET_UNKNOWN; 30 } 31 return static_cast<CIDSet>(index); 32 } 33 34 ByteStringView CMap_GetString(const ByteStringView& word) { 35 if (word.GetLength() <= 2) 36 return ByteStringView(); 37 return word.Right(word.GetLength() - 2); 38 } 39 40 } // namespace 41 42 CPDF_CMapParser::CPDF_CMapParser(CPDF_CMap* pCMap) 43 : m_pCMap(pCMap), m_Status(0), m_CodeSeq(0) {} 44 45 CPDF_CMapParser::~CPDF_CMapParser() {} 46 47 void CPDF_CMapParser::ParseWord(const ByteStringView& word) { 48 if (word.IsEmpty()) { 49 return; 50 } 51 if (word == "begincidchar") { 52 m_Status = 1; 53 m_CodeSeq = 0; 54 } else if (word == "begincidrange") { 55 m_Status = 2; 56 m_CodeSeq = 0; 57 } else if (word == "endcidrange" || word == "endcidchar") { 58 m_Status = 0; 59 } else if (word == "/WMode") { 60 m_Status = 6; 61 } else if (word == "/Registry") { 62 m_Status = 3; 63 } else if (word == "/Ordering") { 64 m_Status = 4; 65 } else if (word == "/Supplement") { 66 m_Status = 5; 67 } else if (word == "begincodespacerange") { 68 m_Status = 7; 69 m_CodeSeq = 0; 70 } else if (word == "usecmap") { 71 } else if (m_Status == 1 || m_Status == 2) { 72 m_CodePoints[m_CodeSeq] = GetCode(word); 73 m_CodeSeq++; 74 uint32_t StartCode, EndCode; 75 uint16_t StartCID; 76 if (m_Status == 1) { 77 if (m_CodeSeq < 2) { 78 return; 79 } 80 EndCode = StartCode = m_CodePoints[0]; 81 StartCID = (uint16_t)m_CodePoints[1]; 82 } else { 83 if (m_CodeSeq < 3) { 84 return; 85 } 86 StartCode = m_CodePoints[0]; 87 EndCode = m_CodePoints[1]; 88 StartCID = (uint16_t)m_CodePoints[2]; 89 } 90 if (EndCode < 0x10000) { 91 for (uint32_t code = StartCode; code <= EndCode; code++) { 92 m_pCMap->SetDirectCharcodeToCIDTable( 93 code, static_cast<uint16_t>(StartCID + code - StartCode)); 94 } 95 } else { 96 m_AdditionalCharcodeToCIDMappings.push_back( 97 {StartCode, EndCode, StartCID}); 98 } 99 m_CodeSeq = 0; 100 } else if (m_Status == 3) { 101 m_Status = 0; 102 } else if (m_Status == 4) { 103 m_pCMap->SetCharset(CharsetFromOrdering(CMap_GetString(word))); 104 m_Status = 0; 105 } else if (m_Status == 5) { 106 m_Status = 0; 107 } else if (m_Status == 6) { 108 m_pCMap->SetVertical(GetCode(word) != 0); 109 m_Status = 0; 110 } else if (m_Status == 7) { 111 if (word == "endcodespacerange") { 112 size_t nSegs = m_CodeRanges.size(); 113 if (nSegs == 1) { 114 m_pCMap->SetCodingScheme((m_CodeRanges[0].m_CharSize == 2) 115 ? CPDF_CMap::TwoBytes 116 : CPDF_CMap::OneByte); 117 } else if (nSegs > 1) { 118 m_pCMap->SetCodingScheme(CPDF_CMap::MixedFourBytes); 119 m_pCMap->SetMixedFourByteLeadingRanges(m_CodeRanges); 120 } 121 m_Status = 0; 122 } else { 123 if (word.GetLength() == 0 || word[0] != '<') { 124 return; 125 } 126 if (m_CodeSeq % 2) { 127 CPDF_CMap::CodeRange range; 128 if (GetCodeRange(range, m_LastWord.AsStringView(), word)) 129 m_CodeRanges.push_back(range); 130 } 131 m_CodeSeq++; 132 } 133 } 134 m_LastWord = word; 135 } 136 137 uint32_t CPDF_CMapParser::GetCode(const ByteStringView& word) const { 138 if (word.IsEmpty()) 139 return 0; 140 141 pdfium::base::CheckedNumeric<uint32_t> num = 0; 142 if (word[0] == '<') { 143 for (size_t i = 1; i < word.GetLength() && std::isxdigit(word[i]); ++i) { 144 num = num * 16 + FXSYS_HexCharToInt(word[i]); 145 if (!num.IsValid()) 146 return 0; 147 } 148 return num.ValueOrDie(); 149 } 150 151 for (size_t i = 0; i < word.GetLength() && std::isdigit(word[i]); ++i) { 152 num = num * 10 + FXSYS_DecimalCharToInt(static_cast<wchar_t>(word[i])); 153 if (!num.IsValid()) 154 return 0; 155 } 156 return num.ValueOrDie(); 157 } 158 159 bool CPDF_CMapParser::GetCodeRange(CPDF_CMap::CodeRange& range, 160 const ByteStringView& first, 161 const ByteStringView& second) const { 162 if (first.GetLength() == 0 || first[0] != '<') 163 return false; 164 165 size_t i; 166 for (i = 1; i < first.GetLength(); ++i) { 167 if (first[i] == '>') { 168 break; 169 } 170 } 171 range.m_CharSize = (i - 1) / 2; 172 if (range.m_CharSize > 4) 173 return false; 174 175 for (i = 0; i < range.m_CharSize; ++i) { 176 uint8_t digit1 = first[i * 2 + 1]; 177 uint8_t digit2 = first[i * 2 + 2]; 178 range.m_Lower[i] = 179 FXSYS_HexCharToInt(digit1) * 16 + FXSYS_HexCharToInt(digit2); 180 } 181 182 size_t size = second.GetLength(); 183 for (i = 0; i < range.m_CharSize; ++i) { 184 uint8_t digit1 = (i * 2 + 1 < size) ? second[i * 2 + 1] : '0'; 185 uint8_t digit2 = (i * 2 + 2 < size) ? second[i * 2 + 2] : '0'; 186 range.m_Upper[i] = 187 FXSYS_HexCharToInt(digit1) * 16 + FXSYS_HexCharToInt(digit2); 188 } 189 return true; 190 } 191 192 // static 193 CIDSet CPDF_CMapParser::CharsetFromOrdering(const ByteStringView& ordering) { 194 for (size_t charset = 1; charset < FX_ArraySize(g_CharsetNames); ++charset) { 195 if (ordering == g_CharsetNames[charset]) 196 return CIDSetFromSizeT(charset); 197 } 198 return CIDSET_UNKNOWN; 199 } 200