Home | History | Annotate | Download | only in font
      1 // Copyright 2017 PDFium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
      6 
      7 #include "core/fpdfapi/font/cpdf_tounicodemap.h"
      8 
      9 #include "core/fpdfapi/cpdf_modulemgr.h"
     10 #include "core/fpdfapi/font/cpdf_cid2unicodemap.h"
     11 #include "core/fpdfapi/page/cpdf_pagemodule.h"
     12 #include "core/fpdfapi/parser/cpdf_simple_parser.h"
     13 #include "core/fxcrt/fx_extension.h"
     14 #include "core/fxcrt/fx_safe_types.h"
     15 #include "third_party/base/numerics/safe_conversions.h"
     16 
     17 WideString CPDF_ToUnicodeMap::Lookup(uint32_t charcode) const {
     18   auto it = m_Map.find(charcode);
     19   if (it != m_Map.end()) {
     20     uint32_t value = it->second;
     21     wchar_t unicode = (wchar_t)(value & 0xffff);
     22     if (unicode != 0xffff) {
     23       return unicode;
     24     }
     25     const wchar_t* buf = m_MultiCharBuf.GetBuffer();
     26     uint32_t buf_len = m_MultiCharBuf.GetLength();
     27     if (!buf || buf_len == 0) {
     28       return WideString();
     29     }
     30     uint32_t index = value >> 16;
     31     if (index >= buf_len) {
     32       return WideString();
     33     }
     34     uint32_t len = buf[index];
     35     if (index + len < index || index + len >= buf_len) {
     36       return WideString();
     37     }
     38     return WideString(buf + index + 1, len);
     39   }
     40   if (m_pBaseMap) {
     41     return m_pBaseMap->UnicodeFromCID((uint16_t)charcode);
     42   }
     43   return WideString();
     44 }
     45 
     46 uint32_t CPDF_ToUnicodeMap::ReverseLookup(wchar_t unicode) const {
     47   for (const auto& pair : m_Map) {
     48     if (pair.second == static_cast<uint32_t>(unicode))
     49       return pair.first;
     50   }
     51   return 0;
     52 }
     53 
     54 // Static.
     55 uint32_t CPDF_ToUnicodeMap::StringToCode(const ByteStringView& str) {
     56   int len = str.GetLength();
     57   if (len == 0)
     58     return 0;
     59 
     60   uint32_t result = 0;
     61   if (str[0] == '<') {
     62     for (int i = 1; i < len && std::isxdigit(str[i]); ++i)
     63       result = result * 16 + FXSYS_HexCharToInt(str.CharAt(i));
     64     return result;
     65   }
     66 
     67   for (int i = 0; i < len && std::isdigit(str[i]); ++i)
     68     result = result * 10 + FXSYS_DecimalCharToInt(str.CharAt(i));
     69 
     70   return result;
     71 }
     72 
     73 static WideString StringDataAdd(WideString str) {
     74   WideString ret;
     75   int len = str.GetLength();
     76   wchar_t value = 1;
     77   for (int i = len - 1; i >= 0; --i) {
     78     wchar_t ch = str[i] + value;
     79     if (ch < str[i]) {
     80       ret.InsertAtFront(0);
     81     } else {
     82       ret.InsertAtFront(ch);
     83       value = 0;
     84     }
     85   }
     86   if (value)
     87     ret.InsertAtFront(value);
     88   return ret;
     89 }
     90 
     91 // Static.
     92 WideString CPDF_ToUnicodeMap::StringToWideString(const ByteStringView& str) {
     93   int len = str.GetLength();
     94   if (len == 0)
     95     return WideString();
     96 
     97   WideString result;
     98   if (str[0] == '<') {
     99     int byte_pos = 0;
    100     wchar_t ch = 0;
    101     for (int i = 1; i < len && std::isxdigit(str[i]); ++i) {
    102       ch = ch * 16 + FXSYS_HexCharToInt(str[i]);
    103       byte_pos++;
    104       if (byte_pos == 4) {
    105         result += ch;
    106         byte_pos = 0;
    107         ch = 0;
    108       }
    109     }
    110     return result;
    111   }
    112   return result;
    113 }
    114 
    115 CPDF_ToUnicodeMap::CPDF_ToUnicodeMap() : m_pBaseMap(nullptr) {}
    116 
    117 CPDF_ToUnicodeMap::~CPDF_ToUnicodeMap() {}
    118 
    119 uint32_t CPDF_ToUnicodeMap::GetUnicode() {
    120   FX_SAFE_UINT32 uni = m_MultiCharBuf.GetLength();
    121   uni = uni * 0x10000 + 0xffff;
    122   return uni.ValueOrDefault(0);
    123 }
    124 
    125 void CPDF_ToUnicodeMap::Load(CPDF_Stream* pStream) {
    126   CIDSet cid_set = CIDSET_UNKNOWN;
    127   auto pAcc = pdfium::MakeRetain<CPDF_StreamAcc>(pStream);
    128   pAcc->LoadAllDataFiltered();
    129   CPDF_SimpleParser parser(pAcc->GetData(), pAcc->GetSize());
    130   while (1) {
    131     ByteStringView word = parser.GetWord();
    132     if (word.IsEmpty()) {
    133       break;
    134     }
    135     if (word == "beginbfchar") {
    136       while (1) {
    137         word = parser.GetWord();
    138         if (word.IsEmpty() || word == "endbfchar") {
    139           break;
    140         }
    141         uint32_t srccode = StringToCode(word);
    142         word = parser.GetWord();
    143         WideString destcode = StringToWideString(word);
    144         int len = destcode.GetLength();
    145         if (len == 0) {
    146           continue;
    147         }
    148         if (len == 1) {
    149           m_Map[srccode] = destcode[0];
    150         } else {
    151           m_Map[srccode] = GetUnicode();
    152           m_MultiCharBuf.AppendChar(destcode.GetLength());
    153           m_MultiCharBuf << destcode;
    154         }
    155       }
    156     } else if (word == "beginbfrange") {
    157       while (1) {
    158         ByteString low, high;
    159         low = parser.GetWord();
    160         if (low.IsEmpty() || low == "endbfrange") {
    161           break;
    162         }
    163         high = parser.GetWord();
    164         uint32_t lowcode = StringToCode(low.AsStringView());
    165         uint32_t highcode =
    166             (lowcode & 0xffffff00) | (StringToCode(high.AsStringView()) & 0xff);
    167         if (highcode == (uint32_t)-1) {
    168           break;
    169         }
    170         ByteString start(parser.GetWord());
    171         if (start == "[") {
    172           for (uint32_t code = lowcode; code <= highcode; code++) {
    173             ByteString dest(parser.GetWord());
    174             WideString destcode = StringToWideString(dest.AsStringView());
    175             int len = destcode.GetLength();
    176             if (len == 0) {
    177               continue;
    178             }
    179             if (len == 1) {
    180               m_Map[code] = destcode[0];
    181             } else {
    182               m_Map[code] = GetUnicode();
    183               m_MultiCharBuf.AppendChar(destcode.GetLength());
    184               m_MultiCharBuf << destcode;
    185             }
    186           }
    187           parser.GetWord();
    188         } else {
    189           WideString destcode = StringToWideString(start.AsStringView());
    190           int len = destcode.GetLength();
    191           uint32_t value = 0;
    192           if (len == 1) {
    193             value = StringToCode(start.AsStringView());
    194             for (uint32_t code = lowcode; code <= highcode; code++) {
    195               m_Map[code] = value++;
    196             }
    197           } else {
    198             for (uint32_t code = lowcode; code <= highcode; code++) {
    199               WideString retcode;
    200               if (code == lowcode) {
    201                 retcode = destcode;
    202               } else {
    203                 retcode = StringDataAdd(destcode);
    204               }
    205               m_Map[code] = GetUnicode();
    206               m_MultiCharBuf.AppendChar(retcode.GetLength());
    207               m_MultiCharBuf << retcode;
    208               destcode = retcode;
    209             }
    210           }
    211         }
    212       }
    213     } else if (word == "/Adobe-Korea1-UCS2") {
    214       cid_set = CIDSET_KOREA1;
    215     } else if (word == "/Adobe-Japan1-UCS2") {
    216       cid_set = CIDSET_JAPAN1;
    217     } else if (word == "/Adobe-CNS1-UCS2") {
    218       cid_set = CIDSET_CNS1;
    219     } else if (word == "/Adobe-GB1-UCS2") {
    220       cid_set = CIDSET_GB1;
    221     }
    222   }
    223   if (cid_set) {
    224     m_pBaseMap = CPDF_ModuleMgr::Get()
    225                      ->GetPageModule()
    226                      ->GetFontGlobals()
    227                      ->GetCMapManager()
    228                      ->GetCID2UnicodeMap(cid_set, false);
    229   } else {
    230     m_pBaseMap = nullptr;
    231   }
    232 }
    233