Home | History | Annotate | Download | only in pdfwindow
      1 // Copyright 2014 PDFium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
      6 
      7 #include "fpdfsdk/pdfwindow/PWL_FontMap.h"
      8 
      9 #include <utility>
     10 
     11 #include "core/fpdfapi/cpdf_modulemgr.h"
     12 #include "core/fpdfapi/font/cpdf_font.h"
     13 #include "core/fpdfapi/font/cpdf_fontencoding.h"
     14 #include "core/fpdfapi/parser/cpdf_document.h"
     15 #include "core/fpdfapi/parser/cpdf_parser.h"
     16 #include "core/fpdfdoc/ipvt_fontmap.h"
     17 #include "fpdfsdk/pdfwindow/PWL_Wnd.h"
     18 #include "third_party/base/ptr_util.h"
     19 #include "third_party/base/stl_util.h"
     20 
     21 namespace {
     22 
     23 const char kDefaultFontName[] = "Helvetica";
     24 
     25 const char* const g_sDEStandardFontName[] = {"Courier",
     26                                              "Courier-Bold",
     27                                              "Courier-BoldOblique",
     28                                              "Courier-Oblique",
     29                                              "Helvetica",
     30                                              "Helvetica-Bold",
     31                                              "Helvetica-BoldOblique",
     32                                              "Helvetica-Oblique",
     33                                              "Times-Roman",
     34                                              "Times-Bold",
     35                                              "Times-Italic",
     36                                              "Times-BoldItalic",
     37                                              "Symbol",
     38                                              "ZapfDingbats"};
     39 
     40 }  // namespace
     41 
     42 CPWL_FontMap::CPWL_FontMap(CFX_SystemHandler* pSystemHandler)
     43     : m_pSystemHandler(pSystemHandler) {
     44   ASSERT(m_pSystemHandler);
     45 }
     46 
     47 CPWL_FontMap::~CPWL_FontMap() {
     48   Empty();
     49 }
     50 
     51 CPDF_Document* CPWL_FontMap::GetDocument() {
     52   if (!m_pPDFDoc) {
     53     if (CPDF_ModuleMgr::Get()) {
     54       m_pPDFDoc = pdfium::MakeUnique<CPDF_Document>(nullptr);
     55       m_pPDFDoc->CreateNewDoc();
     56     }
     57   }
     58 
     59   return m_pPDFDoc.get();
     60 }
     61 
     62 CPDF_Font* CPWL_FontMap::GetPDFFont(int32_t nFontIndex) {
     63   if (nFontIndex >= 0 && nFontIndex < pdfium::CollectionSize<int32_t>(m_Data)) {
     64     if (m_Data[nFontIndex])
     65       return m_Data[nFontIndex]->pFont;
     66   }
     67   return nullptr;
     68 }
     69 
     70 CFX_ByteString CPWL_FontMap::GetPDFFontAlias(int32_t nFontIndex) {
     71   if (nFontIndex >= 0 && nFontIndex < pdfium::CollectionSize<int32_t>(m_Data)) {
     72     if (m_Data[nFontIndex])
     73       return m_Data[nFontIndex]->sFontName;
     74   }
     75   return CFX_ByteString();
     76 }
     77 
     78 bool CPWL_FontMap::KnowWord(int32_t nFontIndex, uint16_t word) {
     79   if (nFontIndex >= 0 && nFontIndex < pdfium::CollectionSize<int32_t>(m_Data)) {
     80     if (m_Data[nFontIndex])
     81       return CharCodeFromUnicode(nFontIndex, word) >= 0;
     82   }
     83   return false;
     84 }
     85 
     86 int32_t CPWL_FontMap::GetWordFontIndex(uint16_t word,
     87                                        int32_t nCharset,
     88                                        int32_t nFontIndex) {
     89   if (nFontIndex > 0) {
     90     if (KnowWord(nFontIndex, word))
     91       return nFontIndex;
     92   } else {
     93     if (const CPWL_FontMap_Data* pData = GetFontMapData(0)) {
     94       if (nCharset == FXFONT_DEFAULT_CHARSET ||
     95           pData->nCharset == FXFONT_SYMBOL_CHARSET ||
     96           nCharset == pData->nCharset) {
     97         if (KnowWord(0, word))
     98           return 0;
     99       }
    100     }
    101   }
    102 
    103   int32_t nNewFontIndex =
    104       GetFontIndex(GetNativeFontName(nCharset), nCharset, true);
    105   if (nNewFontIndex >= 0) {
    106     if (KnowWord(nNewFontIndex, word))
    107       return nNewFontIndex;
    108   }
    109   nNewFontIndex =
    110       GetFontIndex("Arial Unicode MS", FXFONT_DEFAULT_CHARSET, false);
    111   if (nNewFontIndex >= 0) {
    112     if (KnowWord(nNewFontIndex, word))
    113       return nNewFontIndex;
    114   }
    115   return -1;
    116 }
    117 
    118 int32_t CPWL_FontMap::CharCodeFromUnicode(int32_t nFontIndex, uint16_t word) {
    119   if (nFontIndex < 0 || nFontIndex >= pdfium::CollectionSize<int32_t>(m_Data))
    120     return -1;
    121 
    122   CPWL_FontMap_Data* pData = m_Data[nFontIndex].get();
    123   if (!pData || !pData->pFont)
    124     return -1;
    125 
    126   if (pData->pFont->IsUnicodeCompatible())
    127     return pData->pFont->CharCodeFromUnicode(word);
    128 
    129   return word < 0xFF ? word : -1;
    130 }
    131 
    132 CFX_ByteString CPWL_FontMap::GetNativeFontName(int32_t nCharset) {
    133   for (const auto& pData : m_NativeFont) {
    134     if (pData && pData->nCharset == nCharset)
    135       return pData->sFontName;
    136   }
    137 
    138   CFX_ByteString sNew = GetNativeFont(nCharset);
    139   if (sNew.IsEmpty())
    140     return CFX_ByteString();
    141 
    142   auto pNewData = pdfium::MakeUnique<CPWL_FontMap_Native>();
    143   pNewData->nCharset = nCharset;
    144   pNewData->sFontName = sNew;
    145   m_NativeFont.push_back(std::move(pNewData));
    146   return sNew;
    147 }
    148 
    149 void CPWL_FontMap::Empty() {
    150   m_Data.clear();
    151   m_NativeFont.clear();
    152 }
    153 
    154 void CPWL_FontMap::Initialize() {
    155   GetFontIndex(kDefaultFontName, FXFONT_ANSI_CHARSET, false);
    156 }
    157 
    158 bool CPWL_FontMap::IsStandardFont(const CFX_ByteString& sFontName) {
    159   for (size_t i = 0; i < FX_ArraySize(g_sDEStandardFontName); ++i) {
    160     if (sFontName == g_sDEStandardFontName[i])
    161       return true;
    162   }
    163 
    164   return false;
    165 }
    166 
    167 int32_t CPWL_FontMap::FindFont(const CFX_ByteString& sFontName,
    168                                int32_t nCharset) {
    169   int32_t i = 0;
    170   for (const auto& pData : m_Data) {
    171     if (pData &&
    172         (nCharset == FXFONT_DEFAULT_CHARSET || nCharset == pData->nCharset) &&
    173         (sFontName.IsEmpty() || pData->sFontName == sFontName)) {
    174       return i;
    175     }
    176     ++i;
    177   }
    178   return -1;
    179 }
    180 
    181 int32_t CPWL_FontMap::GetFontIndex(const CFX_ByteString& sFontName,
    182                                    int32_t nCharset,
    183                                    bool bFind) {
    184   int32_t nFontIndex = FindFont(EncodeFontAlias(sFontName, nCharset), nCharset);
    185   if (nFontIndex >= 0)
    186     return nFontIndex;
    187 
    188   CFX_ByteString sAlias;
    189   CPDF_Font* pFont = nullptr;
    190   if (bFind)
    191     pFont = FindFontSameCharset(sAlias, nCharset);
    192 
    193   if (!pFont) {
    194     CFX_ByteString sTemp = sFontName;
    195     pFont = AddFontToDocument(GetDocument(), sTemp, nCharset);
    196     sAlias = EncodeFontAlias(sTemp, nCharset);
    197   }
    198   AddedFont(pFont, sAlias);
    199   return AddFontData(pFont, sAlias, nCharset);
    200 }
    201 
    202 CPDF_Font* CPWL_FontMap::FindFontSameCharset(CFX_ByteString& sFontAlias,
    203                                              int32_t nCharset) {
    204   return nullptr;
    205 }
    206 
    207 int32_t CPWL_FontMap::AddFontData(CPDF_Font* pFont,
    208                                   const CFX_ByteString& sFontAlias,
    209                                   int32_t nCharset) {
    210   auto pNewData = pdfium::MakeUnique<CPWL_FontMap_Data>();
    211   pNewData->pFont = pFont;
    212   pNewData->sFontName = sFontAlias;
    213   pNewData->nCharset = nCharset;
    214   m_Data.push_back(std::move(pNewData));
    215   return pdfium::CollectionSize<int32_t>(m_Data) - 1;
    216 }
    217 
    218 void CPWL_FontMap::AddedFont(CPDF_Font* pFont,
    219                              const CFX_ByteString& sFontAlias) {}
    220 
    221 CFX_ByteString CPWL_FontMap::GetNativeFont(int32_t nCharset) {
    222   if (nCharset == FXFONT_DEFAULT_CHARSET)
    223     nCharset = GetNativeCharset();
    224 
    225   CFX_ByteString sFontName = GetDefaultFontByCharset(nCharset);
    226   if (!m_pSystemHandler->FindNativeTrueTypeFont(sFontName))
    227     return CFX_ByteString();
    228 
    229   return sFontName;
    230 }
    231 
    232 CPDF_Font* CPWL_FontMap::AddFontToDocument(CPDF_Document* pDoc,
    233                                            CFX_ByteString& sFontName,
    234                                            uint8_t nCharset) {
    235   if (IsStandardFont(sFontName))
    236     return AddStandardFont(pDoc, sFontName);
    237 
    238   return AddSystemFont(pDoc, sFontName, nCharset);
    239 }
    240 
    241 CPDF_Font* CPWL_FontMap::AddStandardFont(CPDF_Document* pDoc,
    242                                          CFX_ByteString& sFontName) {
    243   if (!pDoc)
    244     return nullptr;
    245 
    246   CPDF_Font* pFont = nullptr;
    247 
    248   if (sFontName == "ZapfDingbats") {
    249     pFont = pDoc->AddStandardFont(sFontName.c_str(), nullptr);
    250   } else {
    251     CPDF_FontEncoding fe(PDFFONT_ENCODING_WINANSI);
    252     pFont = pDoc->AddStandardFont(sFontName.c_str(), &fe);
    253   }
    254 
    255   return pFont;
    256 }
    257 
    258 CPDF_Font* CPWL_FontMap::AddSystemFont(CPDF_Document* pDoc,
    259                                        CFX_ByteString& sFontName,
    260                                        uint8_t nCharset) {
    261   if (!pDoc)
    262     return nullptr;
    263 
    264   if (sFontName.IsEmpty())
    265     sFontName = GetNativeFont(nCharset);
    266   if (nCharset == FXFONT_DEFAULT_CHARSET)
    267     nCharset = GetNativeCharset();
    268 
    269   return m_pSystemHandler->AddNativeTrueTypeFontToPDF(pDoc, sFontName,
    270                                                       nCharset);
    271 }
    272 
    273 CFX_ByteString CPWL_FontMap::EncodeFontAlias(const CFX_ByteString& sFontName,
    274                                              int32_t nCharset) {
    275   CFX_ByteString sPostfix;
    276   sPostfix.Format("_%02X", nCharset);
    277   return EncodeFontAlias(sFontName) + sPostfix;
    278 }
    279 
    280 CFX_ByteString CPWL_FontMap::EncodeFontAlias(const CFX_ByteString& sFontName) {
    281   CFX_ByteString sRet = sFontName;
    282   sRet.Remove(' ');
    283   return sRet;
    284 }
    285 
    286 const CPWL_FontMap_Data* CPWL_FontMap::GetFontMapData(int32_t nIndex) const {
    287   if (nIndex < 0 || nIndex >= pdfium::CollectionSize<int32_t>(m_Data))
    288     return nullptr;
    289 
    290   return m_Data[nIndex].get();
    291 }
    292 
    293 int32_t CPWL_FontMap::GetNativeCharset() {
    294   uint8_t nCharset = FXFONT_ANSI_CHARSET;
    295   int32_t iCodePage = FXSYS_GetACP();
    296   switch (iCodePage) {
    297     case 932:  // Japan
    298       nCharset = FXFONT_SHIFTJIS_CHARSET;
    299       break;
    300     case 936:  // Chinese (PRC, Singapore)
    301       nCharset = FXFONT_GB2312_CHARSET;
    302       break;
    303     case 950:  // Chinese (Taiwan; Hong Kong SAR, PRC)
    304       nCharset = FXFONT_GB2312_CHARSET;
    305       break;
    306     case 1252:  // Windows 3.1 Latin 1 (US, Western Europe)
    307       nCharset = FXFONT_ANSI_CHARSET;
    308       break;
    309     case 874:  // Thai
    310       nCharset = FXFONT_THAI_CHARSET;
    311       break;
    312     case 949:  // Korean
    313       nCharset = FXFONT_HANGUL_CHARSET;
    314       break;
    315     case 1200:  // Unicode (BMP of ISO 10646)
    316       nCharset = FXFONT_ANSI_CHARSET;
    317       break;
    318     case 1250:  // Windows 3.1 Eastern European
    319       nCharset = FXFONT_EASTEUROPE_CHARSET;
    320       break;
    321     case 1251:  // Windows 3.1 Cyrillic
    322       nCharset = FXFONT_RUSSIAN_CHARSET;
    323       break;
    324     case 1253:  // Windows 3.1 Greek
    325       nCharset = FXFONT_GREEK_CHARSET;
    326       break;
    327     case 1254:  // Windows 3.1 Turkish
    328       nCharset = FXFONT_TURKISH_CHARSET;
    329       break;
    330     case 1255:  // Hebrew
    331       nCharset = FXFONT_HEBREW_CHARSET;
    332       break;
    333     case 1256:  // Arabic
    334       nCharset = FXFONT_ARABIC_CHARSET;
    335       break;
    336     case 1257:  // Baltic
    337       nCharset = FXFONT_BALTIC_CHARSET;
    338       break;
    339     case 1258:  // Vietnamese
    340       nCharset = FXFONT_VIETNAMESE_CHARSET;
    341       break;
    342     case 1361:  // Korean(Johab)
    343       nCharset = FXFONT_JOHAB_CHARSET;
    344       break;
    345   }
    346   return nCharset;
    347 }
    348 
    349 const FPDF_CharsetFontMap CPWL_FontMap::defaultTTFMap[] = {
    350     {FXFONT_ANSI_CHARSET, "Helvetica"},
    351     {FXFONT_GB2312_CHARSET, "SimSun"},
    352     {FXFONT_CHINESEBIG5_CHARSET, "MingLiU"},
    353     {FXFONT_SHIFTJIS_CHARSET, "MS Gothic"},
    354     {FXFONT_HANGUL_CHARSET, "Batang"},
    355     {FXFONT_RUSSIAN_CHARSET, "Arial"},
    356 #if _FXM_PLATFORM_ == _FXM_PLATFORM_LINUX_ || \
    357     _FXM_PLATFORM_ == _FXM_PLATFORM_APPLE_
    358     {FXFONT_EASTEUROPE_CHARSET, "Arial"},
    359 #else
    360     {FXFONT_EASTEUROPE_CHARSET, "Tahoma"},
    361 #endif
    362     {FXFONT_ARABIC_CHARSET, "Arial"},
    363     {-1, nullptr}};
    364 
    365 CFX_ByteString CPWL_FontMap::GetDefaultFontByCharset(int32_t nCharset) {
    366   int i = 0;
    367   while (defaultTTFMap[i].charset != -1) {
    368     if (nCharset == defaultTTFMap[i].charset)
    369       return defaultTTFMap[i].fontname;
    370     ++i;
    371   }
    372   return "";
    373 }
    374 
    375 int32_t CPWL_FontMap::CharSetFromUnicode(uint16_t word, int32_t nOldCharset) {
    376   // to avoid CJK Font to show ASCII
    377   if (word < 0x7F)
    378     return FXFONT_ANSI_CHARSET;
    379   // follow the old charset
    380   if (nOldCharset != FXFONT_DEFAULT_CHARSET)
    381     return nOldCharset;
    382 
    383   // find new charset
    384   if ((word >= 0x4E00 && word <= 0x9FA5) ||
    385       (word >= 0xE7C7 && word <= 0xE7F3) ||
    386       (word >= 0x3000 && word <= 0x303F) ||
    387       (word >= 0x2000 && word <= 0x206F)) {
    388     return FXFONT_GB2312_CHARSET;
    389   }
    390 
    391   if (((word >= 0x3040) && (word <= 0x309F)) ||
    392       ((word >= 0x30A0) && (word <= 0x30FF)) ||
    393       ((word >= 0x31F0) && (word <= 0x31FF)) ||
    394       ((word >= 0xFF00) && (word <= 0xFFEF))) {
    395     return FXFONT_SHIFTJIS_CHARSET;
    396   }
    397 
    398   if (((word >= 0xAC00) && (word <= 0xD7AF)) ||
    399       ((word >= 0x1100) && (word <= 0x11FF)) ||
    400       ((word >= 0x3130) && (word <= 0x318F))) {
    401     return FXFONT_HANGUL_CHARSET;
    402   }
    403 
    404   if (word >= 0x0E00 && word <= 0x0E7F)
    405     return FXFONT_THAI_CHARSET;
    406 
    407   if ((word >= 0x0370 && word <= 0x03FF) || (word >= 0x1F00 && word <= 0x1FFF))
    408     return FXFONT_GREEK_CHARSET;
    409 
    410   if ((word >= 0x0600 && word <= 0x06FF) || (word >= 0xFB50 && word <= 0xFEFC))
    411     return FXFONT_ARABIC_CHARSET;
    412 
    413   if (word >= 0x0590 && word <= 0x05FF)
    414     return FXFONT_HEBREW_CHARSET;
    415 
    416   if (word >= 0x0400 && word <= 0x04FF)
    417     return FXFONT_RUSSIAN_CHARSET;
    418 
    419   if (word >= 0x0100 && word <= 0x024F)
    420     return FXFONT_EASTEUROPE_CHARSET;
    421 
    422   if (word >= 0x1E00 && word <= 0x1EFF)
    423     return FXFONT_VIETNAMESE_CHARSET;
    424 
    425   return FXFONT_ANSI_CHARSET;
    426 }
    427