Home | History | Annotate | Download | only in pwl
      1 // Copyright 2014 PDFium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
      6 
      7 #include "fpdfsdk/pwl/cpwl_font_map.h"
      8 
      9 #include <utility>
     10 
     11 #include "core/fpdfapi/cpdf_modulemgr.h"
     12 #include "core/fpdfapi/font/cpdf_font.h"
     13 #include "core/fpdfapi/font/cpdf_fontencoding.h"
     14 #include "core/fpdfapi/parser/cpdf_document.h"
     15 #include "core/fpdfapi/parser/cpdf_parser.h"
     16 #include "core/fpdfdoc/ipvt_fontmap.h"
     17 #include "core/fxcrt/fx_codepage.h"
     18 #include "fpdfsdk/pwl/cpwl_wnd.h"
     19 #include "third_party/base/ptr_util.h"
     20 #include "third_party/base/stl_util.h"
     21 
     22 namespace {
     23 
     24 const char kDefaultFontName[] = "Helvetica";
     25 
     26 const char* const g_sDEStandardFontName[] = {"Courier",
     27                                              "Courier-Bold",
     28                                              "Courier-BoldOblique",
     29                                              "Courier-Oblique",
     30                                              "Helvetica",
     31                                              "Helvetica-Bold",
     32                                              "Helvetica-BoldOblique",
     33                                              "Helvetica-Oblique",
     34                                              "Times-Roman",
     35                                              "Times-Bold",
     36                                              "Times-Italic",
     37                                              "Times-BoldItalic",
     38                                              "Symbol",
     39                                              "ZapfDingbats"};
     40 
     41 }  // namespace
     42 
     43 CPWL_FontMap::CPWL_FontMap(CFX_SystemHandler* pSystemHandler)
     44     : m_pSystemHandler(pSystemHandler) {
     45   ASSERT(m_pSystemHandler);
     46 }
     47 
     48 CPWL_FontMap::~CPWL_FontMap() {
     49   Empty();
     50 }
     51 
     52 CPDF_Document* CPWL_FontMap::GetDocument() {
     53   if (!m_pPDFDoc) {
     54     if (CPDF_ModuleMgr::Get()) {
     55       m_pPDFDoc = pdfium::MakeUnique<CPDF_Document>(nullptr);
     56       m_pPDFDoc->CreateNewDoc();
     57     }
     58   }
     59   return m_pPDFDoc.get();
     60 }
     61 
     62 CPDF_Font* CPWL_FontMap::GetPDFFont(int32_t nFontIndex) {
     63   if (pdfium::IndexInBounds(m_Data, nFontIndex) && m_Data[nFontIndex])
     64     return m_Data[nFontIndex]->pFont;
     65 
     66   return nullptr;
     67 }
     68 
     69 ByteString CPWL_FontMap::GetPDFFontAlias(int32_t nFontIndex) {
     70   if (pdfium::IndexInBounds(m_Data, nFontIndex) && m_Data[nFontIndex])
     71     return m_Data[nFontIndex]->sFontName;
     72 
     73   return ByteString();
     74 }
     75 
     76 bool CPWL_FontMap::KnowWord(int32_t nFontIndex, uint16_t word) {
     77   return pdfium::IndexInBounds(m_Data, nFontIndex) && m_Data[nFontIndex] &&
     78          CharCodeFromUnicode(nFontIndex, word) >= 0;
     79 }
     80 
     81 int32_t CPWL_FontMap::GetWordFontIndex(uint16_t word,
     82                                        int32_t nCharset,
     83                                        int32_t nFontIndex) {
     84   if (nFontIndex > 0) {
     85     if (KnowWord(nFontIndex, word))
     86       return nFontIndex;
     87   } else {
     88     if (const CPWL_FontMap_Data* pData = GetFontMapData(0)) {
     89       if (nCharset == FX_CHARSET_Default ||
     90           pData->nCharset == FX_CHARSET_Symbol || nCharset == pData->nCharset) {
     91         if (KnowWord(0, word))
     92           return 0;
     93       }
     94     }
     95   }
     96 
     97   int32_t nNewFontIndex =
     98       GetFontIndex(GetNativeFontName(nCharset), nCharset, true);
     99   if (nNewFontIndex >= 0) {
    100     if (KnowWord(nNewFontIndex, word))
    101       return nNewFontIndex;
    102   }
    103   nNewFontIndex = GetFontIndex("Arial Unicode MS", FX_CHARSET_Default, false);
    104   if (nNewFontIndex >= 0) {
    105     if (KnowWord(nNewFontIndex, word))
    106       return nNewFontIndex;
    107   }
    108   return -1;
    109 }
    110 
    111 int32_t CPWL_FontMap::CharCodeFromUnicode(int32_t nFontIndex, uint16_t word) {
    112   if (!pdfium::IndexInBounds(m_Data, nFontIndex))
    113     return -1;
    114 
    115   CPWL_FontMap_Data* pData = m_Data[nFontIndex].get();
    116   if (!pData || !pData->pFont)
    117     return -1;
    118 
    119   if (pData->pFont->IsUnicodeCompatible())
    120     return pData->pFont->CharCodeFromUnicode(word);
    121 
    122   return word < 0xFF ? word : -1;
    123 }
    124 
    125 ByteString CPWL_FontMap::GetNativeFontName(int32_t nCharset) {
    126   for (const auto& pData : m_NativeFont) {
    127     if (pData && pData->nCharset == nCharset)
    128       return pData->sFontName;
    129   }
    130 
    131   ByteString sNew = GetNativeFont(nCharset);
    132   if (sNew.IsEmpty())
    133     return ByteString();
    134 
    135   auto pNewData = pdfium::MakeUnique<CPWL_FontMap_Native>();
    136   pNewData->nCharset = nCharset;
    137   pNewData->sFontName = sNew;
    138   m_NativeFont.push_back(std::move(pNewData));
    139   return sNew;
    140 }
    141 
    142 void CPWL_FontMap::Empty() {
    143   m_Data.clear();
    144   m_NativeFont.clear();
    145 }
    146 
    147 void CPWL_FontMap::Initialize() {
    148   GetFontIndex(kDefaultFontName, FX_CHARSET_ANSI, false);
    149 }
    150 
    151 bool CPWL_FontMap::IsStandardFont(const ByteString& sFontName) {
    152   for (const char* name : g_sDEStandardFontName) {
    153     if (sFontName == name)
    154       return true;
    155   }
    156 
    157   return false;
    158 }
    159 
    160 int32_t CPWL_FontMap::FindFont(const ByteString& sFontName, int32_t nCharset) {
    161   int32_t i = 0;
    162   for (const auto& pData : m_Data) {
    163     if (pData &&
    164         (nCharset == FX_CHARSET_Default || nCharset == pData->nCharset) &&
    165         (sFontName.IsEmpty() || pData->sFontName == sFontName)) {
    166       return i;
    167     }
    168     ++i;
    169   }
    170   return -1;
    171 }
    172 
    173 int32_t CPWL_FontMap::GetFontIndex(const ByteString& sFontName,
    174                                    int32_t nCharset,
    175                                    bool bFind) {
    176   int32_t nFontIndex = FindFont(EncodeFontAlias(sFontName, nCharset), nCharset);
    177   if (nFontIndex >= 0)
    178     return nFontIndex;
    179 
    180   ByteString sAlias;
    181   CPDF_Font* pFont = bFind ? FindFontSameCharset(&sAlias, nCharset) : nullptr;
    182   if (!pFont) {
    183     ByteString sTemp = sFontName;
    184     pFont = AddFontToDocument(GetDocument(), sTemp, nCharset);
    185     sAlias = EncodeFontAlias(sTemp, nCharset);
    186   }
    187   AddedFont(pFont, sAlias);
    188   return AddFontData(pFont, sAlias, nCharset);
    189 }
    190 
    191 CPDF_Font* CPWL_FontMap::FindFontSameCharset(ByteString* sFontAlias,
    192                                              int32_t nCharset) {
    193   return nullptr;
    194 }
    195 
    196 int32_t CPWL_FontMap::AddFontData(CPDF_Font* pFont,
    197                                   const ByteString& sFontAlias,
    198                                   int32_t nCharset) {
    199   auto pNewData = pdfium::MakeUnique<CPWL_FontMap_Data>();
    200   pNewData->pFont = pFont;
    201   pNewData->sFontName = sFontAlias;
    202   pNewData->nCharset = nCharset;
    203   m_Data.push_back(std::move(pNewData));
    204   return pdfium::CollectionSize<int32_t>(m_Data) - 1;
    205 }
    206 
    207 void CPWL_FontMap::AddedFont(CPDF_Font* pFont, const ByteString& sFontAlias) {}
    208 
    209 ByteString CPWL_FontMap::GetNativeFont(int32_t nCharset) {
    210   if (nCharset == FX_CHARSET_Default)
    211     nCharset = GetNativeCharset();
    212 
    213   ByteString sFontName = GetDefaultFontByCharset(nCharset);
    214   if (!m_pSystemHandler->FindNativeTrueTypeFont(sFontName))
    215     return ByteString();
    216 
    217   return sFontName;
    218 }
    219 
    220 CPDF_Font* CPWL_FontMap::AddFontToDocument(CPDF_Document* pDoc,
    221                                            ByteString& sFontName,
    222                                            uint8_t nCharset) {
    223   if (IsStandardFont(sFontName))
    224     return AddStandardFont(pDoc, sFontName);
    225 
    226   return AddSystemFont(pDoc, sFontName, nCharset);
    227 }
    228 
    229 CPDF_Font* CPWL_FontMap::AddStandardFont(CPDF_Document* pDoc,
    230                                          ByteString& sFontName) {
    231   if (!pDoc)
    232     return nullptr;
    233 
    234   CPDF_Font* pFont = nullptr;
    235 
    236   if (sFontName == "ZapfDingbats") {
    237     pFont = pDoc->AddStandardFont(sFontName.c_str(), nullptr);
    238   } else {
    239     CPDF_FontEncoding fe(PDFFONT_ENCODING_WINANSI);
    240     pFont = pDoc->AddStandardFont(sFontName.c_str(), &fe);
    241   }
    242 
    243   return pFont;
    244 }
    245 
    246 CPDF_Font* CPWL_FontMap::AddSystemFont(CPDF_Document* pDoc,
    247                                        ByteString& sFontName,
    248                                        uint8_t nCharset) {
    249   if (!pDoc)
    250     return nullptr;
    251 
    252   if (sFontName.IsEmpty())
    253     sFontName = GetNativeFont(nCharset);
    254   if (nCharset == FX_CHARSET_Default)
    255     nCharset = GetNativeCharset();
    256 
    257   return m_pSystemHandler->AddNativeTrueTypeFontToPDF(pDoc, sFontName,
    258                                                       nCharset);
    259 }
    260 
    261 ByteString CPWL_FontMap::EncodeFontAlias(const ByteString& sFontName,
    262                                          int32_t nCharset) {
    263   return EncodeFontAlias(sFontName) + ByteString::Format("_%02X", nCharset);
    264 }
    265 
    266 ByteString CPWL_FontMap::EncodeFontAlias(const ByteString& sFontName) {
    267   ByteString sRet = sFontName;
    268   sRet.Remove(' ');
    269   return sRet;
    270 }
    271 
    272 const CPWL_FontMap_Data* CPWL_FontMap::GetFontMapData(int32_t nIndex) const {
    273   return pdfium::IndexInBounds(m_Data, nIndex) ? m_Data[nIndex].get() : nullptr;
    274 }
    275 
    276 int32_t CPWL_FontMap::GetNativeCharset() {
    277   uint8_t nCharset = FX_CHARSET_ANSI;
    278   int32_t iCodePage = FXSYS_GetACP();
    279   switch (iCodePage) {
    280     case FX_CODEPAGE_ShiftJIS:
    281       nCharset = FX_CHARSET_ShiftJIS;
    282       break;
    283     case FX_CODEPAGE_ChineseSimplified:
    284       nCharset = FX_CHARSET_ChineseSimplified;
    285       break;
    286     case FX_CODEPAGE_ChineseTraditional:
    287       nCharset = FX_CHARSET_ChineseTraditional;
    288       break;
    289     case FX_CODEPAGE_MSWin_WesternEuropean:
    290       nCharset = FX_CHARSET_ANSI;
    291       break;
    292     case FX_CODEPAGE_MSDOS_Thai:
    293       nCharset = FX_CHARSET_Thai;
    294       break;
    295     case FX_CODEPAGE_Hangul:
    296       nCharset = FX_CHARSET_Hangul;
    297       break;
    298     case FX_CODEPAGE_UTF16LE:
    299       nCharset = FX_CHARSET_ANSI;
    300       break;
    301     case FX_CODEPAGE_MSWin_EasternEuropean:
    302       nCharset = FX_CHARSET_MSWin_EasternEuropean;
    303       break;
    304     case FX_CODEPAGE_MSWin_Cyrillic:
    305       nCharset = FX_CHARSET_MSWin_Cyrillic;
    306       break;
    307     case FX_CODEPAGE_MSWin_Greek:
    308       nCharset = FX_CHARSET_MSWin_Greek;
    309       break;
    310     case FX_CODEPAGE_MSWin_Turkish:
    311       nCharset = FX_CHARSET_MSWin_Turkish;
    312       break;
    313     case FX_CODEPAGE_MSWin_Hebrew:
    314       nCharset = FX_CHARSET_MSWin_Hebrew;
    315       break;
    316     case FX_CODEPAGE_MSWin_Arabic:
    317       nCharset = FX_CHARSET_MSWin_Arabic;
    318       break;
    319     case FX_CODEPAGE_MSWin_Baltic:
    320       nCharset = FX_CHARSET_MSWin_Baltic;
    321       break;
    322     case FX_CODEPAGE_MSWin_Vietnamese:
    323       nCharset = FX_CHARSET_MSWin_Vietnamese;
    324       break;
    325     case FX_CODEPAGE_Johab:
    326       nCharset = FX_CHARSET_Johab;
    327       break;
    328   }
    329   return nCharset;
    330 }
    331 
    332 const FPDF_CharsetFontMap CPWL_FontMap::defaultTTFMap[] = {
    333     {FX_CHARSET_ANSI, "Helvetica"},
    334     {FX_CHARSET_ChineseSimplified, "SimSun"},
    335     {FX_CHARSET_ChineseTraditional, "MingLiU"},
    336     {FX_CHARSET_ShiftJIS, "MS Gothic"},
    337     {FX_CHARSET_Hangul, "Batang"},
    338     {FX_CHARSET_MSWin_Cyrillic, "Arial"},
    339 #if _FX_PLATFORM_ == _FX_PLATFORM_LINUX_ || _FX_PLATFORM_ == _FX_PLATFORM_APPLE_
    340     {FX_CHARSET_MSWin_EasternEuropean, "Arial"},
    341 #else
    342     {FX_CHARSET_MSWin_EasternEuropean, "Tahoma"},
    343 #endif
    344     {FX_CHARSET_MSWin_Arabic, "Arial"},
    345     {-1, nullptr}};
    346 
    347 ByteString CPWL_FontMap::GetDefaultFontByCharset(int32_t nCharset) {
    348   int i = 0;
    349   while (defaultTTFMap[i].charset != -1) {
    350     if (nCharset == defaultTTFMap[i].charset)
    351       return defaultTTFMap[i].fontname;
    352     ++i;
    353   }
    354   return "";
    355 }
    356 
    357 int32_t CPWL_FontMap::CharSetFromUnicode(uint16_t word, int32_t nOldCharset) {
    358   // to avoid CJK Font to show ASCII
    359   if (word < 0x7F)
    360     return FX_CHARSET_ANSI;
    361   // follow the old charset
    362   if (nOldCharset != FX_CHARSET_Default)
    363     return nOldCharset;
    364 
    365   // find new charset
    366   if ((word >= 0x4E00 && word <= 0x9FA5) ||
    367       (word >= 0xE7C7 && word <= 0xE7F3) ||
    368       (word >= 0x3000 && word <= 0x303F) ||
    369       (word >= 0x2000 && word <= 0x206F)) {
    370     return FX_CHARSET_ChineseSimplified;
    371   }
    372 
    373   if (((word >= 0x3040) && (word <= 0x309F)) ||
    374       ((word >= 0x30A0) && (word <= 0x30FF)) ||
    375       ((word >= 0x31F0) && (word <= 0x31FF)) ||
    376       ((word >= 0xFF00) && (word <= 0xFFEF))) {
    377     return FX_CHARSET_ShiftJIS;
    378   }
    379 
    380   if (((word >= 0xAC00) && (word <= 0xD7AF)) ||
    381       ((word >= 0x1100) && (word <= 0x11FF)) ||
    382       ((word >= 0x3130) && (word <= 0x318F))) {
    383     return FX_CHARSET_Hangul;
    384   }
    385 
    386   if (word >= 0x0E00 && word <= 0x0E7F)
    387     return FX_CHARSET_Thai;
    388 
    389   if ((word >= 0x0370 && word <= 0x03FF) || (word >= 0x1F00 && word <= 0x1FFF))
    390     return FX_CHARSET_MSWin_Greek;
    391 
    392   if ((word >= 0x0600 && word <= 0x06FF) || (word >= 0xFB50 && word <= 0xFEFC))
    393     return FX_CHARSET_MSWin_Arabic;
    394 
    395   if (word >= 0x0590 && word <= 0x05FF)
    396     return FX_CHARSET_MSWin_Hebrew;
    397 
    398   if (word >= 0x0400 && word <= 0x04FF)
    399     return FX_CHARSET_MSWin_Cyrillic;
    400 
    401   if (word >= 0x0100 && word <= 0x024F)
    402     return FX_CHARSET_MSWin_EasternEuropean;
    403 
    404   if (word >= 0x1E00 && word <= 0x1EFF)
    405     return FX_CHARSET_MSWin_Vietnamese;
    406 
    407   return FX_CHARSET_ANSI;
    408 }
    409