1 // Copyright 2014 PDFium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7 #include "fpdfsdk/pdfwindow/PWL_FontMap.h" 8 9 #include <utility> 10 11 #include "core/fpdfapi/cpdf_modulemgr.h" 12 #include "core/fpdfapi/font/cpdf_font.h" 13 #include "core/fpdfapi/font/cpdf_fontencoding.h" 14 #include "core/fpdfapi/parser/cpdf_document.h" 15 #include "core/fpdfapi/parser/cpdf_parser.h" 16 #include "core/fpdfdoc/ipvt_fontmap.h" 17 #include "fpdfsdk/pdfwindow/PWL_Wnd.h" 18 #include "third_party/base/ptr_util.h" 19 #include "third_party/base/stl_util.h" 20 21 namespace { 22 23 const char kDefaultFontName[] = "Helvetica"; 24 25 const char* const g_sDEStandardFontName[] = {"Courier", 26 "Courier-Bold", 27 "Courier-BoldOblique", 28 "Courier-Oblique", 29 "Helvetica", 30 "Helvetica-Bold", 31 "Helvetica-BoldOblique", 32 "Helvetica-Oblique", 33 "Times-Roman", 34 "Times-Bold", 35 "Times-Italic", 36 "Times-BoldItalic", 37 "Symbol", 38 "ZapfDingbats"}; 39 40 } // namespace 41 42 CPWL_FontMap::CPWL_FontMap(CFX_SystemHandler* pSystemHandler) 43 : m_pSystemHandler(pSystemHandler) { 44 ASSERT(m_pSystemHandler); 45 } 46 47 CPWL_FontMap::~CPWL_FontMap() { 48 Empty(); 49 } 50 51 CPDF_Document* CPWL_FontMap::GetDocument() { 52 if (!m_pPDFDoc) { 53 if (CPDF_ModuleMgr::Get()) { 54 m_pPDFDoc = pdfium::MakeUnique<CPDF_Document>(nullptr); 55 m_pPDFDoc->CreateNewDoc(); 56 } 57 } 58 59 return m_pPDFDoc.get(); 60 } 61 62 CPDF_Font* CPWL_FontMap::GetPDFFont(int32_t nFontIndex) { 63 if (nFontIndex >= 0 && nFontIndex < pdfium::CollectionSize<int32_t>(m_Data)) { 64 if (m_Data[nFontIndex]) 65 return m_Data[nFontIndex]->pFont; 66 } 67 return nullptr; 68 } 69 70 CFX_ByteString CPWL_FontMap::GetPDFFontAlias(int32_t nFontIndex) { 71 if (nFontIndex >= 0 && nFontIndex < pdfium::CollectionSize<int32_t>(m_Data)) { 72 if (m_Data[nFontIndex]) 73 return m_Data[nFontIndex]->sFontName; 74 } 75 return CFX_ByteString(); 76 } 77 78 bool CPWL_FontMap::KnowWord(int32_t nFontIndex, uint16_t word) { 79 if (nFontIndex >= 0 && nFontIndex < pdfium::CollectionSize<int32_t>(m_Data)) { 80 if (m_Data[nFontIndex]) 81 return CharCodeFromUnicode(nFontIndex, word) >= 0; 82 } 83 return false; 84 } 85 86 int32_t CPWL_FontMap::GetWordFontIndex(uint16_t word, 87 int32_t nCharset, 88 int32_t nFontIndex) { 89 if (nFontIndex > 0) { 90 if (KnowWord(nFontIndex, word)) 91 return nFontIndex; 92 } else { 93 if (const CPWL_FontMap_Data* pData = GetFontMapData(0)) { 94 if (nCharset == FXFONT_DEFAULT_CHARSET || 95 pData->nCharset == FXFONT_SYMBOL_CHARSET || 96 nCharset == pData->nCharset) { 97 if (KnowWord(0, word)) 98 return 0; 99 } 100 } 101 } 102 103 int32_t nNewFontIndex = 104 GetFontIndex(GetNativeFontName(nCharset), nCharset, true); 105 if (nNewFontIndex >= 0) { 106 if (KnowWord(nNewFontIndex, word)) 107 return nNewFontIndex; 108 } 109 nNewFontIndex = 110 GetFontIndex("Arial Unicode MS", FXFONT_DEFAULT_CHARSET, false); 111 if (nNewFontIndex >= 0) { 112 if (KnowWord(nNewFontIndex, word)) 113 return nNewFontIndex; 114 } 115 return -1; 116 } 117 118 int32_t CPWL_FontMap::CharCodeFromUnicode(int32_t nFontIndex, uint16_t word) { 119 if (nFontIndex < 0 || nFontIndex >= pdfium::CollectionSize<int32_t>(m_Data)) 120 return -1; 121 122 CPWL_FontMap_Data* pData = m_Data[nFontIndex].get(); 123 if (!pData || !pData->pFont) 124 return -1; 125 126 if (pData->pFont->IsUnicodeCompatible()) 127 return pData->pFont->CharCodeFromUnicode(word); 128 129 return word < 0xFF ? word : -1; 130 } 131 132 CFX_ByteString CPWL_FontMap::GetNativeFontName(int32_t nCharset) { 133 for (const auto& pData : m_NativeFont) { 134 if (pData && pData->nCharset == nCharset) 135 return pData->sFontName; 136 } 137 138 CFX_ByteString sNew = GetNativeFont(nCharset); 139 if (sNew.IsEmpty()) 140 return CFX_ByteString(); 141 142 auto pNewData = pdfium::MakeUnique<CPWL_FontMap_Native>(); 143 pNewData->nCharset = nCharset; 144 pNewData->sFontName = sNew; 145 m_NativeFont.push_back(std::move(pNewData)); 146 return sNew; 147 } 148 149 void CPWL_FontMap::Empty() { 150 m_Data.clear(); 151 m_NativeFont.clear(); 152 } 153 154 void CPWL_FontMap::Initialize() { 155 GetFontIndex(kDefaultFontName, FXFONT_ANSI_CHARSET, false); 156 } 157 158 bool CPWL_FontMap::IsStandardFont(const CFX_ByteString& sFontName) { 159 for (size_t i = 0; i < FX_ArraySize(g_sDEStandardFontName); ++i) { 160 if (sFontName == g_sDEStandardFontName[i]) 161 return true; 162 } 163 164 return false; 165 } 166 167 int32_t CPWL_FontMap::FindFont(const CFX_ByteString& sFontName, 168 int32_t nCharset) { 169 int32_t i = 0; 170 for (const auto& pData : m_Data) { 171 if (pData && 172 (nCharset == FXFONT_DEFAULT_CHARSET || nCharset == pData->nCharset) && 173 (sFontName.IsEmpty() || pData->sFontName == sFontName)) { 174 return i; 175 } 176 ++i; 177 } 178 return -1; 179 } 180 181 int32_t CPWL_FontMap::GetFontIndex(const CFX_ByteString& sFontName, 182 int32_t nCharset, 183 bool bFind) { 184 int32_t nFontIndex = FindFont(EncodeFontAlias(sFontName, nCharset), nCharset); 185 if (nFontIndex >= 0) 186 return nFontIndex; 187 188 CFX_ByteString sAlias; 189 CPDF_Font* pFont = nullptr; 190 if (bFind) 191 pFont = FindFontSameCharset(sAlias, nCharset); 192 193 if (!pFont) { 194 CFX_ByteString sTemp = sFontName; 195 pFont = AddFontToDocument(GetDocument(), sTemp, nCharset); 196 sAlias = EncodeFontAlias(sTemp, nCharset); 197 } 198 AddedFont(pFont, sAlias); 199 return AddFontData(pFont, sAlias, nCharset); 200 } 201 202 CPDF_Font* CPWL_FontMap::FindFontSameCharset(CFX_ByteString& sFontAlias, 203 int32_t nCharset) { 204 return nullptr; 205 } 206 207 int32_t CPWL_FontMap::AddFontData(CPDF_Font* pFont, 208 const CFX_ByteString& sFontAlias, 209 int32_t nCharset) { 210 auto pNewData = pdfium::MakeUnique<CPWL_FontMap_Data>(); 211 pNewData->pFont = pFont; 212 pNewData->sFontName = sFontAlias; 213 pNewData->nCharset = nCharset; 214 m_Data.push_back(std::move(pNewData)); 215 return pdfium::CollectionSize<int32_t>(m_Data) - 1; 216 } 217 218 void CPWL_FontMap::AddedFont(CPDF_Font* pFont, 219 const CFX_ByteString& sFontAlias) {} 220 221 CFX_ByteString CPWL_FontMap::GetNativeFont(int32_t nCharset) { 222 if (nCharset == FXFONT_DEFAULT_CHARSET) 223 nCharset = GetNativeCharset(); 224 225 CFX_ByteString sFontName = GetDefaultFontByCharset(nCharset); 226 if (!m_pSystemHandler->FindNativeTrueTypeFont(sFontName)) 227 return CFX_ByteString(); 228 229 return sFontName; 230 } 231 232 CPDF_Font* CPWL_FontMap::AddFontToDocument(CPDF_Document* pDoc, 233 CFX_ByteString& sFontName, 234 uint8_t nCharset) { 235 if (IsStandardFont(sFontName)) 236 return AddStandardFont(pDoc, sFontName); 237 238 return AddSystemFont(pDoc, sFontName, nCharset); 239 } 240 241 CPDF_Font* CPWL_FontMap::AddStandardFont(CPDF_Document* pDoc, 242 CFX_ByteString& sFontName) { 243 if (!pDoc) 244 return nullptr; 245 246 CPDF_Font* pFont = nullptr; 247 248 if (sFontName == "ZapfDingbats") { 249 pFont = pDoc->AddStandardFont(sFontName.c_str(), nullptr); 250 } else { 251 CPDF_FontEncoding fe(PDFFONT_ENCODING_WINANSI); 252 pFont = pDoc->AddStandardFont(sFontName.c_str(), &fe); 253 } 254 255 return pFont; 256 } 257 258 CPDF_Font* CPWL_FontMap::AddSystemFont(CPDF_Document* pDoc, 259 CFX_ByteString& sFontName, 260 uint8_t nCharset) { 261 if (!pDoc) 262 return nullptr; 263 264 if (sFontName.IsEmpty()) 265 sFontName = GetNativeFont(nCharset); 266 if (nCharset == FXFONT_DEFAULT_CHARSET) 267 nCharset = GetNativeCharset(); 268 269 return m_pSystemHandler->AddNativeTrueTypeFontToPDF(pDoc, sFontName, 270 nCharset); 271 } 272 273 CFX_ByteString CPWL_FontMap::EncodeFontAlias(const CFX_ByteString& sFontName, 274 int32_t nCharset) { 275 CFX_ByteString sPostfix; 276 sPostfix.Format("_%02X", nCharset); 277 return EncodeFontAlias(sFontName) + sPostfix; 278 } 279 280 CFX_ByteString CPWL_FontMap::EncodeFontAlias(const CFX_ByteString& sFontName) { 281 CFX_ByteString sRet = sFontName; 282 sRet.Remove(' '); 283 return sRet; 284 } 285 286 const CPWL_FontMap_Data* CPWL_FontMap::GetFontMapData(int32_t nIndex) const { 287 if (nIndex < 0 || nIndex >= pdfium::CollectionSize<int32_t>(m_Data)) 288 return nullptr; 289 290 return m_Data[nIndex].get(); 291 } 292 293 int32_t CPWL_FontMap::GetNativeCharset() { 294 uint8_t nCharset = FXFONT_ANSI_CHARSET; 295 int32_t iCodePage = FXSYS_GetACP(); 296 switch (iCodePage) { 297 case 932: // Japan 298 nCharset = FXFONT_SHIFTJIS_CHARSET; 299 break; 300 case 936: // Chinese (PRC, Singapore) 301 nCharset = FXFONT_GB2312_CHARSET; 302 break; 303 case 950: // Chinese (Taiwan; Hong Kong SAR, PRC) 304 nCharset = FXFONT_GB2312_CHARSET; 305 break; 306 case 1252: // Windows 3.1 Latin 1 (US, Western Europe) 307 nCharset = FXFONT_ANSI_CHARSET; 308 break; 309 case 874: // Thai 310 nCharset = FXFONT_THAI_CHARSET; 311 break; 312 case 949: // Korean 313 nCharset = FXFONT_HANGUL_CHARSET; 314 break; 315 case 1200: // Unicode (BMP of ISO 10646) 316 nCharset = FXFONT_ANSI_CHARSET; 317 break; 318 case 1250: // Windows 3.1 Eastern European 319 nCharset = FXFONT_EASTEUROPE_CHARSET; 320 break; 321 case 1251: // Windows 3.1 Cyrillic 322 nCharset = FXFONT_RUSSIAN_CHARSET; 323 break; 324 case 1253: // Windows 3.1 Greek 325 nCharset = FXFONT_GREEK_CHARSET; 326 break; 327 case 1254: // Windows 3.1 Turkish 328 nCharset = FXFONT_TURKISH_CHARSET; 329 break; 330 case 1255: // Hebrew 331 nCharset = FXFONT_HEBREW_CHARSET; 332 break; 333 case 1256: // Arabic 334 nCharset = FXFONT_ARABIC_CHARSET; 335 break; 336 case 1257: // Baltic 337 nCharset = FXFONT_BALTIC_CHARSET; 338 break; 339 case 1258: // Vietnamese 340 nCharset = FXFONT_VIETNAMESE_CHARSET; 341 break; 342 case 1361: // Korean(Johab) 343 nCharset = FXFONT_JOHAB_CHARSET; 344 break; 345 } 346 return nCharset; 347 } 348 349 const FPDF_CharsetFontMap CPWL_FontMap::defaultTTFMap[] = { 350 {FXFONT_ANSI_CHARSET, "Helvetica"}, 351 {FXFONT_GB2312_CHARSET, "SimSun"}, 352 {FXFONT_CHINESEBIG5_CHARSET, "MingLiU"}, 353 {FXFONT_SHIFTJIS_CHARSET, "MS Gothic"}, 354 {FXFONT_HANGUL_CHARSET, "Batang"}, 355 {FXFONT_RUSSIAN_CHARSET, "Arial"}, 356 #if _FXM_PLATFORM_ == _FXM_PLATFORM_LINUX_ || \ 357 _FXM_PLATFORM_ == _FXM_PLATFORM_APPLE_ 358 {FXFONT_EASTEUROPE_CHARSET, "Arial"}, 359 #else 360 {FXFONT_EASTEUROPE_CHARSET, "Tahoma"}, 361 #endif 362 {FXFONT_ARABIC_CHARSET, "Arial"}, 363 {-1, nullptr}}; 364 365 CFX_ByteString CPWL_FontMap::GetDefaultFontByCharset(int32_t nCharset) { 366 int i = 0; 367 while (defaultTTFMap[i].charset != -1) { 368 if (nCharset == defaultTTFMap[i].charset) 369 return defaultTTFMap[i].fontname; 370 ++i; 371 } 372 return ""; 373 } 374 375 int32_t CPWL_FontMap::CharSetFromUnicode(uint16_t word, int32_t nOldCharset) { 376 // to avoid CJK Font to show ASCII 377 if (word < 0x7F) 378 return FXFONT_ANSI_CHARSET; 379 // follow the old charset 380 if (nOldCharset != FXFONT_DEFAULT_CHARSET) 381 return nOldCharset; 382 383 // find new charset 384 if ((word >= 0x4E00 && word <= 0x9FA5) || 385 (word >= 0xE7C7 && word <= 0xE7F3) || 386 (word >= 0x3000 && word <= 0x303F) || 387 (word >= 0x2000 && word <= 0x206F)) { 388 return FXFONT_GB2312_CHARSET; 389 } 390 391 if (((word >= 0x3040) && (word <= 0x309F)) || 392 ((word >= 0x30A0) && (word <= 0x30FF)) || 393 ((word >= 0x31F0) && (word <= 0x31FF)) || 394 ((word >= 0xFF00) && (word <= 0xFFEF))) { 395 return FXFONT_SHIFTJIS_CHARSET; 396 } 397 398 if (((word >= 0xAC00) && (word <= 0xD7AF)) || 399 ((word >= 0x1100) && (word <= 0x11FF)) || 400 ((word >= 0x3130) && (word <= 0x318F))) { 401 return FXFONT_HANGUL_CHARSET; 402 } 403 404 if (word >= 0x0E00 && word <= 0x0E7F) 405 return FXFONT_THAI_CHARSET; 406 407 if ((word >= 0x0370 && word <= 0x03FF) || (word >= 0x1F00 && word <= 0x1FFF)) 408 return FXFONT_GREEK_CHARSET; 409 410 if ((word >= 0x0600 && word <= 0x06FF) || (word >= 0xFB50 && word <= 0xFEFC)) 411 return FXFONT_ARABIC_CHARSET; 412 413 if (word >= 0x0590 && word <= 0x05FF) 414 return FXFONT_HEBREW_CHARSET; 415 416 if (word >= 0x0400 && word <= 0x04FF) 417 return FXFONT_RUSSIAN_CHARSET; 418 419 if (word >= 0x0100 && word <= 0x024F) 420 return FXFONT_EASTEUROPE_CHARSET; 421 422 if (word >= 0x1E00 && word <= 0x1EFF) 423 return FXFONT_VIETNAMESE_CHARSET; 424 425 return FXFONT_ANSI_CHARSET; 426 } 427