Home | History | Annotate | Download | only in pdfwindow
      1 // Copyright 2014 PDFium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
      6 
      7 #include "fpdfsdk/include/pdfwindow/PWL_FontMap.h"
      8 
      9 #include "core/include/fpdfapi/fpdf_module.h"
     10 #include "fpdfsdk/include/pdfwindow/PWL_Wnd.h"
     11 
     12 namespace {
     13 
     14 const char kDefaultFontName[] = "Helvetica";
     15 
     16 const char* const g_sDEStandardFontName[] = {"Courier",
     17                                              "Courier-Bold",
     18                                              "Courier-BoldOblique",
     19                                              "Courier-Oblique",
     20                                              "Helvetica",
     21                                              "Helvetica-Bold",
     22                                              "Helvetica-BoldOblique",
     23                                              "Helvetica-Oblique",
     24                                              "Times-Roman",
     25                                              "Times-Bold",
     26                                              "Times-Italic",
     27                                              "Times-BoldItalic",
     28                                              "Symbol",
     29                                              "ZapfDingbats"};
     30 
     31 }  // namespace
     32 
     33 CPWL_FontMap::CPWL_FontMap(IFX_SystemHandler* pSystemHandler)
     34     : m_pPDFDoc(NULL), m_pSystemHandler(pSystemHandler) {
     35   ASSERT(m_pSystemHandler);
     36 }
     37 
     38 CPWL_FontMap::~CPWL_FontMap() {
     39   delete m_pPDFDoc;
     40   m_pPDFDoc = NULL;
     41 
     42   Empty();
     43 }
     44 
     45 void CPWL_FontMap::SetSystemHandler(IFX_SystemHandler* pSystemHandler) {
     46   m_pSystemHandler = pSystemHandler;
     47 }
     48 
     49 CPDF_Document* CPWL_FontMap::GetDocument() {
     50   if (!m_pPDFDoc) {
     51     if (CPDF_ModuleMgr::Get()) {
     52       m_pPDFDoc = new CPDF_Document;
     53       m_pPDFDoc->CreateNewDoc();
     54     }
     55   }
     56 
     57   return m_pPDFDoc;
     58 }
     59 
     60 CPDF_Font* CPWL_FontMap::GetPDFFont(int32_t nFontIndex) {
     61   if (nFontIndex >= 0 && nFontIndex < m_aData.GetSize()) {
     62     if (CPWL_FontMap_Data* pData = m_aData.GetAt(nFontIndex)) {
     63       return pData->pFont;
     64     }
     65   }
     66 
     67   return NULL;
     68 }
     69 
     70 CFX_ByteString CPWL_FontMap::GetPDFFontAlias(int32_t nFontIndex) {
     71   if (nFontIndex >= 0 && nFontIndex < m_aData.GetSize()) {
     72     if (CPWL_FontMap_Data* pData = m_aData.GetAt(nFontIndex)) {
     73       return pData->sFontName;
     74     }
     75   }
     76 
     77   return "";
     78 }
     79 
     80 FX_BOOL CPWL_FontMap::KnowWord(int32_t nFontIndex, FX_WORD word) {
     81   if (nFontIndex >= 0 && nFontIndex < m_aData.GetSize()) {
     82     if (m_aData.GetAt(nFontIndex)) {
     83       return CharCodeFromUnicode(nFontIndex, word) >= 0;
     84     }
     85   }
     86 
     87   return FALSE;
     88 }
     89 
     90 int32_t CPWL_FontMap::GetWordFontIndex(FX_WORD word,
     91                                        int32_t nCharset,
     92                                        int32_t nFontIndex) {
     93   if (nFontIndex > 0) {
     94     if (KnowWord(nFontIndex, word))
     95       return nFontIndex;
     96   } else {
     97     if (const CPWL_FontMap_Data* pData = GetFontMapData(0)) {
     98       if (nCharset == DEFAULT_CHARSET || pData->nCharset == SYMBOL_CHARSET ||
     99           nCharset == pData->nCharset) {
    100         if (KnowWord(0, word))
    101           return 0;
    102       }
    103     }
    104   }
    105 
    106   int32_t nNewFontIndex =
    107       GetFontIndex(GetNativeFontName(nCharset), nCharset, TRUE);
    108   if (nNewFontIndex >= 0) {
    109     if (KnowWord(nNewFontIndex, word))
    110       return nNewFontIndex;
    111   }
    112   nNewFontIndex = GetFontIndex("Arial Unicode MS", DEFAULT_CHARSET, FALSE);
    113   if (nNewFontIndex >= 0) {
    114     if (KnowWord(nNewFontIndex, word))
    115       return nNewFontIndex;
    116   }
    117   return -1;
    118 }
    119 
    120 int32_t CPWL_FontMap::CharCodeFromUnicode(int32_t nFontIndex, FX_WORD word) {
    121   if (CPWL_FontMap_Data* pData = m_aData.GetAt(nFontIndex)) {
    122     if (pData->pFont) {
    123       if (pData->pFont->IsUnicodeCompatible()) {
    124         int nCharCode = pData->pFont->CharCodeFromUnicode(word);
    125         pData->pFont->GlyphFromCharCode(nCharCode);
    126         return nCharCode;
    127       }
    128       if (word < 0xFF)
    129         return word;
    130     }
    131   }
    132   return -1;
    133 }
    134 
    135 CFX_ByteString CPWL_FontMap::GetNativeFontName(int32_t nCharset) {
    136   // searching native font is slow, so we must save time
    137   for (int32_t i = 0, sz = m_aNativeFont.GetSize(); i < sz; i++) {
    138     if (CPWL_FontMap_Native* pData = m_aNativeFont.GetAt(i)) {
    139       if (pData->nCharset == nCharset)
    140         return pData->sFontName;
    141     }
    142   }
    143 
    144   CFX_ByteString sNew = GetNativeFont(nCharset);
    145 
    146   if (!sNew.IsEmpty()) {
    147     CPWL_FontMap_Native* pNewData = new CPWL_FontMap_Native;
    148     pNewData->nCharset = nCharset;
    149     pNewData->sFontName = sNew;
    150 
    151     m_aNativeFont.Add(pNewData);
    152   }
    153 
    154   return sNew;
    155 }
    156 
    157 void CPWL_FontMap::Empty() {
    158   {
    159     for (int32_t i = 0, sz = m_aData.GetSize(); i < sz; i++)
    160       delete m_aData.GetAt(i);
    161 
    162     m_aData.RemoveAll();
    163   }
    164   {
    165     for (int32_t i = 0, sz = m_aNativeFont.GetSize(); i < sz; i++)
    166       delete m_aNativeFont.GetAt(i);
    167 
    168     m_aNativeFont.RemoveAll();
    169   }
    170 }
    171 
    172 void CPWL_FontMap::Initialize() {
    173   GetFontIndex(kDefaultFontName, ANSI_CHARSET, FALSE);
    174 }
    175 
    176 FX_BOOL CPWL_FontMap::IsStandardFont(const CFX_ByteString& sFontName) {
    177   for (int32_t i = 0; i < FX_ArraySize(g_sDEStandardFontName); ++i) {
    178     if (sFontName == g_sDEStandardFontName[i])
    179       return TRUE;
    180   }
    181 
    182   return FALSE;
    183 }
    184 
    185 int32_t CPWL_FontMap::FindFont(const CFX_ByteString& sFontName,
    186                                int32_t nCharset) {
    187   for (int32_t i = 0, sz = m_aData.GetSize(); i < sz; i++) {
    188     if (CPWL_FontMap_Data* pData = m_aData.GetAt(i)) {
    189       if (nCharset == DEFAULT_CHARSET || nCharset == pData->nCharset) {
    190         if (sFontName.IsEmpty() || pData->sFontName == sFontName)
    191           return i;
    192       }
    193     }
    194   }
    195 
    196   return -1;
    197 }
    198 
    199 int32_t CPWL_FontMap::GetFontIndex(const CFX_ByteString& sFontName,
    200                                    int32_t nCharset,
    201                                    FX_BOOL bFind) {
    202   int32_t nFontIndex = FindFont(EncodeFontAlias(sFontName, nCharset), nCharset);
    203   if (nFontIndex >= 0)
    204     return nFontIndex;
    205 
    206   CFX_ByteString sAlias;
    207   CPDF_Font* pFont = NULL;
    208   if (bFind)
    209     pFont = FindFontSameCharset(sAlias, nCharset);
    210 
    211   if (!pFont) {
    212     CFX_ByteString sTemp = sFontName;
    213     pFont = AddFontToDocument(GetDocument(), sTemp, nCharset);
    214     sAlias = EncodeFontAlias(sTemp, nCharset);
    215   }
    216   AddedFont(pFont, sAlias);
    217   return AddFontData(pFont, sAlias, nCharset);
    218 }
    219 
    220 int32_t CPWL_FontMap::GetPWLFontIndex(FX_WORD word, int32_t nCharset) {
    221   int32_t nFind = -1;
    222 
    223   for (int32_t i = 0, sz = m_aData.GetSize(); i < sz; i++) {
    224     if (CPWL_FontMap_Data* pData = m_aData.GetAt(i)) {
    225       if (pData->nCharset == nCharset) {
    226         nFind = i;
    227         break;
    228       }
    229     }
    230   }
    231 
    232   CPDF_Font* pNewFont = GetPDFFont(nFind);
    233 
    234   if (!pNewFont)
    235     return -1;
    236 
    237   CFX_ByteString sAlias = EncodeFontAlias("Arial_Chrome", nCharset);
    238   AddedFont(pNewFont, sAlias);
    239 
    240   return AddFontData(pNewFont, sAlias, nCharset);
    241 }
    242 
    243 CPDF_Font* CPWL_FontMap::FindFontSameCharset(CFX_ByteString& sFontAlias,
    244                                              int32_t nCharset) {
    245   return NULL;
    246 }
    247 
    248 int32_t CPWL_FontMap::AddFontData(CPDF_Font* pFont,
    249                                   const CFX_ByteString& sFontAlias,
    250                                   int32_t nCharset) {
    251   CPWL_FontMap_Data* pNewData = new CPWL_FontMap_Data;
    252   pNewData->pFont = pFont;
    253   pNewData->sFontName = sFontAlias;
    254   pNewData->nCharset = nCharset;
    255 
    256   m_aData.Add(pNewData);
    257 
    258   return m_aData.GetSize() - 1;
    259 }
    260 
    261 void CPWL_FontMap::AddedFont(CPDF_Font* pFont,
    262                              const CFX_ByteString& sFontAlias) {}
    263 
    264 CFX_ByteString CPWL_FontMap::GetFontName(int32_t nFontIndex) {
    265   if (nFontIndex >= 0 && nFontIndex < m_aData.GetSize()) {
    266     if (CPWL_FontMap_Data* pData = m_aData.GetAt(nFontIndex)) {
    267       return pData->sFontName;
    268     }
    269   }
    270 
    271   return "";
    272 }
    273 
    274 CFX_ByteString CPWL_FontMap::GetNativeFont(int32_t nCharset) {
    275   if (nCharset == DEFAULT_CHARSET)
    276     nCharset = GetNativeCharset();
    277 
    278   CFX_ByteString sFontName = GetDefaultFontByCharset(nCharset);
    279   if (m_pSystemHandler) {
    280     if (m_pSystemHandler->FindNativeTrueTypeFont(nCharset, sFontName))
    281       return sFontName;
    282 
    283     sFontName = m_pSystemHandler->GetNativeTrueTypeFont(nCharset);
    284   }
    285   return sFontName;
    286 }
    287 
    288 CPDF_Font* CPWL_FontMap::AddFontToDocument(CPDF_Document* pDoc,
    289                                            CFX_ByteString& sFontName,
    290                                            uint8_t nCharset) {
    291   if (IsStandardFont(sFontName))
    292     return AddStandardFont(pDoc, sFontName);
    293 
    294   return AddSystemFont(pDoc, sFontName, nCharset);
    295 }
    296 
    297 CPDF_Font* CPWL_FontMap::AddStandardFont(CPDF_Document* pDoc,
    298                                          CFX_ByteString& sFontName) {
    299   if (!pDoc)
    300     return NULL;
    301 
    302   CPDF_Font* pFont = NULL;
    303 
    304   if (sFontName == "ZapfDingbats") {
    305     pFont = pDoc->AddStandardFont(sFontName, NULL);
    306   } else {
    307     CPDF_FontEncoding fe(PDFFONT_ENCODING_WINANSI);
    308     pFont = pDoc->AddStandardFont(sFontName, &fe);
    309   }
    310 
    311   return pFont;
    312 }
    313 
    314 CPDF_Font* CPWL_FontMap::AddSystemFont(CPDF_Document* pDoc,
    315                                        CFX_ByteString& sFontName,
    316                                        uint8_t nCharset) {
    317   if (!pDoc)
    318     return NULL;
    319 
    320   if (sFontName.IsEmpty())
    321     sFontName = GetNativeFont(nCharset);
    322   if (nCharset == DEFAULT_CHARSET)
    323     nCharset = GetNativeCharset();
    324 
    325   if (m_pSystemHandler)
    326     return m_pSystemHandler->AddNativeTrueTypeFontToPDF(pDoc, sFontName,
    327                                                         nCharset);
    328 
    329   return NULL;
    330 }
    331 
    332 CFX_ByteString CPWL_FontMap::EncodeFontAlias(const CFX_ByteString& sFontName,
    333                                              int32_t nCharset) {
    334   CFX_ByteString sPostfix;
    335   sPostfix.Format("_%02X", nCharset);
    336   return EncodeFontAlias(sFontName) + sPostfix;
    337 }
    338 
    339 CFX_ByteString CPWL_FontMap::EncodeFontAlias(const CFX_ByteString& sFontName) {
    340   CFX_ByteString sRet = sFontName;
    341   sRet.Remove(' ');
    342   return sRet;
    343 }
    344 
    345 int32_t CPWL_FontMap::GetFontMapCount() const {
    346   return m_aData.GetSize();
    347 }
    348 
    349 const CPWL_FontMap_Data* CPWL_FontMap::GetFontMapData(int32_t nIndex) const {
    350   if (nIndex >= 0 && nIndex < m_aData.GetSize()) {
    351     return m_aData.GetAt(nIndex);
    352   }
    353 
    354   return NULL;
    355 }
    356 
    357 int32_t CPWL_FontMap::GetNativeCharset() {
    358   uint8_t nCharset = ANSI_CHARSET;
    359   int32_t iCodePage = FXSYS_GetACP();
    360   switch (iCodePage) {
    361     case 932:  // Japan
    362       nCharset = SHIFTJIS_CHARSET;
    363       break;
    364     case 936:  // Chinese (PRC, Singapore)
    365       nCharset = GB2312_CHARSET;
    366       break;
    367     case 950:  // Chinese (Taiwan; Hong Kong SAR, PRC)
    368       nCharset = GB2312_CHARSET;
    369       break;
    370     case 1252:  // Windows 3.1 Latin 1 (US, Western Europe)
    371       nCharset = ANSI_CHARSET;
    372       break;
    373     case 874:  // Thai
    374       nCharset = THAI_CHARSET;
    375       break;
    376     case 949:  // Korean
    377       nCharset = HANGUL_CHARSET;
    378       break;
    379     case 1200:  // Unicode (BMP of ISO 10646)
    380       nCharset = ANSI_CHARSET;
    381       break;
    382     case 1250:  // Windows 3.1 Eastern European
    383       nCharset = EASTEUROPE_CHARSET;
    384       break;
    385     case 1251:  // Windows 3.1 Cyrillic
    386       nCharset = RUSSIAN_CHARSET;
    387       break;
    388     case 1253:  // Windows 3.1 Greek
    389       nCharset = GREEK_CHARSET;
    390       break;
    391     case 1254:  // Windows 3.1 Turkish
    392       nCharset = TURKISH_CHARSET;
    393       break;
    394     case 1255:  // Hebrew
    395       nCharset = HEBREW_CHARSET;
    396       break;
    397     case 1256:  // Arabic
    398       nCharset = ARABIC_CHARSET;
    399       break;
    400     case 1257:  // Baltic
    401       nCharset = BALTIC_CHARSET;
    402       break;
    403     case 1258:  // Vietnamese
    404       nCharset = VIETNAMESE_CHARSET;
    405       break;
    406     case 1361:  // Korean(Johab)
    407       nCharset = JOHAB_CHARSET;
    408       break;
    409   }
    410   return nCharset;
    411 }
    412 
    413 const CPWL_FontMap::CharsetFontMap CPWL_FontMap::defaultTTFMap[] = {
    414     {ANSI_CHARSET, "Helvetica"},      {GB2312_CHARSET, "SimSun"},
    415     {CHINESEBIG5_CHARSET, "MingLiU"}, {SHIFTJIS_CHARSET, "MS Gothic"},
    416     {HANGUL_CHARSET, "Batang"},       {RUSSIAN_CHARSET, "Arial"},
    417 #if _FXM_PLATFORM_ == _FXM_PLATFORM_LINUX_ || \
    418     _FXM_PLATFORM_ == _FXM_PLATFORM_APPLE_
    419     {EASTEUROPE_CHARSET, "Arial"},
    420 #else
    421     {EASTEUROPE_CHARSET, "Tahoma"},
    422 #endif
    423     {ARABIC_CHARSET, "Arial"},        {-1, NULL}};
    424 
    425 CFX_ByteString CPWL_FontMap::GetDefaultFontByCharset(int32_t nCharset) {
    426   int i = 0;
    427   while (defaultTTFMap[i].charset != -1) {
    428     if (nCharset == defaultTTFMap[i].charset)
    429       return defaultTTFMap[i].fontname;
    430     ++i;
    431   }
    432   return "";
    433 }
    434 
    435 int32_t CPWL_FontMap::CharSetFromUnicode(FX_WORD word, int32_t nOldCharset) {
    436   if (m_pSystemHandler && (-1 != m_pSystemHandler->GetCharSet()))
    437     return m_pSystemHandler->GetCharSet();
    438   // to avoid CJK Font to show ASCII
    439   if (word < 0x7F)
    440     return ANSI_CHARSET;
    441   // follow the old charset
    442   if (nOldCharset != DEFAULT_CHARSET)
    443     return nOldCharset;
    444 
    445   // find new charset
    446   if ((word >= 0x4E00 && word <= 0x9FA5) ||
    447       (word >= 0xE7C7 && word <= 0xE7F3) ||
    448       (word >= 0x3000 && word <= 0x303F) ||
    449       (word >= 0x2000 && word <= 0x206F)) {
    450     return GB2312_CHARSET;
    451   }
    452 
    453   if (((word >= 0x3040) && (word <= 0x309F)) ||
    454       ((word >= 0x30A0) && (word <= 0x30FF)) ||
    455       ((word >= 0x31F0) && (word <= 0x31FF)) ||
    456       ((word >= 0xFF00) && (word <= 0xFFEF))) {
    457     return SHIFTJIS_CHARSET;
    458   }
    459 
    460   if (((word >= 0xAC00) && (word <= 0xD7AF)) ||
    461       ((word >= 0x1100) && (word <= 0x11FF)) ||
    462       ((word >= 0x3130) && (word <= 0x318F))) {
    463     return HANGUL_CHARSET;
    464   }
    465 
    466   if (word >= 0x0E00 && word <= 0x0E7F)
    467     return THAI_CHARSET;
    468 
    469   if ((word >= 0x0370 && word <= 0x03FF) || (word >= 0x1F00 && word <= 0x1FFF))
    470     return GREEK_CHARSET;
    471 
    472   if ((word >= 0x0600 && word <= 0x06FF) || (word >= 0xFB50 && word <= 0xFEFC))
    473     return ARABIC_CHARSET;
    474 
    475   if (word >= 0x0590 && word <= 0x05FF)
    476     return HEBREW_CHARSET;
    477 
    478   if (word >= 0x0400 && word <= 0x04FF)
    479     return RUSSIAN_CHARSET;
    480 
    481   if (word >= 0x0100 && word <= 0x024F)
    482     return EASTEUROPE_CHARSET;
    483 
    484   if (word >= 0x1E00 && word <= 0x1EFF)
    485     return VIETNAMESE_CHARSET;
    486 
    487   return ANSI_CHARSET;
    488 }
    489 
    490 CPWL_DocFontMap::CPWL_DocFontMap(IFX_SystemHandler* pSystemHandler,
    491                                  CPDF_Document* pAttachedDoc)
    492     : CPWL_FontMap(pSystemHandler), m_pAttachedDoc(pAttachedDoc) {}
    493 
    494 CPWL_DocFontMap::~CPWL_DocFontMap() {}
    495 
    496 CPDF_Document* CPWL_DocFontMap::GetDocument() {
    497   return m_pAttachedDoc;
    498 }
    499