Home | History | Annotate | Download | only in font
      1 // Copyright 2014 PDFium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
      6 
      7 #include "core/fpdfapi/font/cpdf_cmapparser.h"
      8 
      9 #include <vector>
     10 
     11 #include "core/fpdfapi/cmaps/cmap_int.h"
     12 #include "core/fpdfapi/cpdf_modulemgr.h"
     13 #include "core/fpdfapi/page/cpdf_pagemodule.h"
     14 #include "core/fpdfapi/parser/cpdf_array.h"
     15 #include "core/fpdfapi/parser/cpdf_dictionary.h"
     16 #include "core/fpdfapi/parser/cpdf_simple_parser.h"
     17 #include "core/fxcrt/fx_extension.h"
     18 #include "core/fxge/fx_freetype.h"
     19 #include "third_party/base/logging.h"
     20 
     21 namespace {
     22 
     23 const char* const g_CharsetNames[CIDSET_NUM_SETS] = {nullptr,  "GB1",    "CNS1",
     24                                                      "Japan1", "Korea1", "UCS"};
     25 
     26 CIDSet CIDSetFromSizeT(size_t index) {
     27   if (index >= CIDSET_NUM_SETS) {
     28     NOTREACHED();
     29     return CIDSET_UNKNOWN;
     30   }
     31   return static_cast<CIDSet>(index);
     32 }
     33 
     34 ByteStringView CMap_GetString(const ByteStringView& word) {
     35   if (word.GetLength() <= 2)
     36     return ByteStringView();
     37   return word.Right(word.GetLength() - 2);
     38 }
     39 
     40 }  // namespace
     41 
     42 CPDF_CMapParser::CPDF_CMapParser(CPDF_CMap* pCMap)
     43     : m_pCMap(pCMap), m_Status(0), m_CodeSeq(0) {}
     44 
     45 CPDF_CMapParser::~CPDF_CMapParser() {}
     46 
     47 void CPDF_CMapParser::ParseWord(const ByteStringView& word) {
     48   if (word.IsEmpty()) {
     49     return;
     50   }
     51   if (word == "begincidchar") {
     52     m_Status = 1;
     53     m_CodeSeq = 0;
     54   } else if (word == "begincidrange") {
     55     m_Status = 2;
     56     m_CodeSeq = 0;
     57   } else if (word == "endcidrange" || word == "endcidchar") {
     58     m_Status = 0;
     59   } else if (word == "/WMode") {
     60     m_Status = 6;
     61   } else if (word == "/Registry") {
     62     m_Status = 3;
     63   } else if (word == "/Ordering") {
     64     m_Status = 4;
     65   } else if (word == "/Supplement") {
     66     m_Status = 5;
     67   } else if (word == "begincodespacerange") {
     68     m_Status = 7;
     69     m_CodeSeq = 0;
     70   } else if (word == "usecmap") {
     71   } else if (m_Status == 1 || m_Status == 2) {
     72     m_CodePoints[m_CodeSeq] = GetCode(word);
     73     m_CodeSeq++;
     74     uint32_t StartCode, EndCode;
     75     uint16_t StartCID;
     76     if (m_Status == 1) {
     77       if (m_CodeSeq < 2) {
     78         return;
     79       }
     80       EndCode = StartCode = m_CodePoints[0];
     81       StartCID = (uint16_t)m_CodePoints[1];
     82     } else {
     83       if (m_CodeSeq < 3) {
     84         return;
     85       }
     86       StartCode = m_CodePoints[0];
     87       EndCode = m_CodePoints[1];
     88       StartCID = (uint16_t)m_CodePoints[2];
     89     }
     90     if (EndCode < 0x10000) {
     91       for (uint32_t code = StartCode; code <= EndCode; code++) {
     92         m_pCMap->SetDirectCharcodeToCIDTable(
     93             code, static_cast<uint16_t>(StartCID + code - StartCode));
     94       }
     95     } else {
     96       m_AdditionalCharcodeToCIDMappings.push_back(
     97           {StartCode, EndCode, StartCID});
     98     }
     99     m_CodeSeq = 0;
    100   } else if (m_Status == 3) {
    101     m_Status = 0;
    102   } else if (m_Status == 4) {
    103     m_pCMap->SetCharset(CharsetFromOrdering(CMap_GetString(word)));
    104     m_Status = 0;
    105   } else if (m_Status == 5) {
    106     m_Status = 0;
    107   } else if (m_Status == 6) {
    108     m_pCMap->SetVertical(GetCode(word) != 0);
    109     m_Status = 0;
    110   } else if (m_Status == 7) {
    111     if (word == "endcodespacerange") {
    112       size_t nSegs = m_CodeRanges.size();
    113       if (nSegs == 1) {
    114         m_pCMap->SetCodingScheme((m_CodeRanges[0].m_CharSize == 2)
    115                                      ? CPDF_CMap::TwoBytes
    116                                      : CPDF_CMap::OneByte);
    117       } else if (nSegs > 1) {
    118         m_pCMap->SetCodingScheme(CPDF_CMap::MixedFourBytes);
    119         m_pCMap->SetMixedFourByteLeadingRanges(m_CodeRanges);
    120       }
    121       m_Status = 0;
    122     } else {
    123       if (word.GetLength() == 0 || word[0] != '<') {
    124         return;
    125       }
    126       if (m_CodeSeq % 2) {
    127         CPDF_CMap::CodeRange range;
    128         if (GetCodeRange(range, m_LastWord.AsStringView(), word))
    129           m_CodeRanges.push_back(range);
    130       }
    131       m_CodeSeq++;
    132     }
    133   }
    134   m_LastWord = word;
    135 }
    136 
    137 uint32_t CPDF_CMapParser::GetCode(const ByteStringView& word) const {
    138   if (word.IsEmpty())
    139     return 0;
    140 
    141   pdfium::base::CheckedNumeric<uint32_t> num = 0;
    142   if (word[0] == '<') {
    143     for (size_t i = 1; i < word.GetLength() && std::isxdigit(word[i]); ++i) {
    144       num = num * 16 + FXSYS_HexCharToInt(word[i]);
    145       if (!num.IsValid())
    146         return 0;
    147     }
    148     return num.ValueOrDie();
    149   }
    150 
    151   for (size_t i = 0; i < word.GetLength() && std::isdigit(word[i]); ++i) {
    152     num = num * 10 + FXSYS_DecimalCharToInt(static_cast<wchar_t>(word[i]));
    153     if (!num.IsValid())
    154       return 0;
    155   }
    156   return num.ValueOrDie();
    157 }
    158 
    159 bool CPDF_CMapParser::GetCodeRange(CPDF_CMap::CodeRange& range,
    160                                    const ByteStringView& first,
    161                                    const ByteStringView& second) const {
    162   if (first.GetLength() == 0 || first[0] != '<')
    163     return false;
    164 
    165   size_t i;
    166   for (i = 1; i < first.GetLength(); ++i) {
    167     if (first[i] == '>') {
    168       break;
    169     }
    170   }
    171   range.m_CharSize = (i - 1) / 2;
    172   if (range.m_CharSize > 4)
    173     return false;
    174 
    175   for (i = 0; i < range.m_CharSize; ++i) {
    176     uint8_t digit1 = first[i * 2 + 1];
    177     uint8_t digit2 = first[i * 2 + 2];
    178     range.m_Lower[i] =
    179         FXSYS_HexCharToInt(digit1) * 16 + FXSYS_HexCharToInt(digit2);
    180   }
    181 
    182   size_t size = second.GetLength();
    183   for (i = 0; i < range.m_CharSize; ++i) {
    184     uint8_t digit1 = (i * 2 + 1 < size) ? second[i * 2 + 1] : '0';
    185     uint8_t digit2 = (i * 2 + 2 < size) ? second[i * 2 + 2] : '0';
    186     range.m_Upper[i] =
    187         FXSYS_HexCharToInt(digit1) * 16 + FXSYS_HexCharToInt(digit2);
    188   }
    189   return true;
    190 }
    191 
    192 // static
    193 CIDSet CPDF_CMapParser::CharsetFromOrdering(const ByteStringView& ordering) {
    194   for (size_t charset = 1; charset < FX_ArraySize(g_CharsetNames); ++charset) {
    195     if (ordering == g_CharsetNames[charset])
    196       return CIDSetFromSizeT(charset);
    197   }
    198   return CIDSET_UNKNOWN;
    199 }
    200