1 // Copyright 2014 PDFium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7 #include "fpdfsdk/pwl/cpwl_font_map.h" 8 9 #include <utility> 10 11 #include "core/fpdfapi/cpdf_modulemgr.h" 12 #include "core/fpdfapi/font/cpdf_font.h" 13 #include "core/fpdfapi/font/cpdf_fontencoding.h" 14 #include "core/fpdfapi/parser/cpdf_document.h" 15 #include "core/fpdfapi/parser/cpdf_parser.h" 16 #include "core/fpdfdoc/ipvt_fontmap.h" 17 #include "core/fxcrt/fx_codepage.h" 18 #include "fpdfsdk/pwl/cpwl_wnd.h" 19 #include "third_party/base/ptr_util.h" 20 #include "third_party/base/stl_util.h" 21 22 namespace { 23 24 const char kDefaultFontName[] = "Helvetica"; 25 26 const char* const g_sDEStandardFontName[] = {"Courier", 27 "Courier-Bold", 28 "Courier-BoldOblique", 29 "Courier-Oblique", 30 "Helvetica", 31 "Helvetica-Bold", 32 "Helvetica-BoldOblique", 33 "Helvetica-Oblique", 34 "Times-Roman", 35 "Times-Bold", 36 "Times-Italic", 37 "Times-BoldItalic", 38 "Symbol", 39 "ZapfDingbats"}; 40 41 } // namespace 42 43 CPWL_FontMap::CPWL_FontMap(CFX_SystemHandler* pSystemHandler) 44 : m_pSystemHandler(pSystemHandler) { 45 ASSERT(m_pSystemHandler); 46 } 47 48 CPWL_FontMap::~CPWL_FontMap() { 49 Empty(); 50 } 51 52 CPDF_Document* CPWL_FontMap::GetDocument() { 53 if (!m_pPDFDoc) { 54 if (CPDF_ModuleMgr::Get()) { 55 m_pPDFDoc = pdfium::MakeUnique<CPDF_Document>(nullptr); 56 m_pPDFDoc->CreateNewDoc(); 57 } 58 } 59 return m_pPDFDoc.get(); 60 } 61 62 CPDF_Font* CPWL_FontMap::GetPDFFont(int32_t nFontIndex) { 63 if (pdfium::IndexInBounds(m_Data, nFontIndex) && m_Data[nFontIndex]) 64 return m_Data[nFontIndex]->pFont; 65 66 return nullptr; 67 } 68 69 ByteString CPWL_FontMap::GetPDFFontAlias(int32_t nFontIndex) { 70 if (pdfium::IndexInBounds(m_Data, nFontIndex) && m_Data[nFontIndex]) 71 return m_Data[nFontIndex]->sFontName; 72 73 return ByteString(); 74 } 75 76 bool CPWL_FontMap::KnowWord(int32_t nFontIndex, uint16_t word) { 77 return pdfium::IndexInBounds(m_Data, nFontIndex) && m_Data[nFontIndex] && 78 CharCodeFromUnicode(nFontIndex, word) >= 0; 79 } 80 81 int32_t CPWL_FontMap::GetWordFontIndex(uint16_t word, 82 int32_t nCharset, 83 int32_t nFontIndex) { 84 if (nFontIndex > 0) { 85 if (KnowWord(nFontIndex, word)) 86 return nFontIndex; 87 } else { 88 if (const CPWL_FontMap_Data* pData = GetFontMapData(0)) { 89 if (nCharset == FX_CHARSET_Default || 90 pData->nCharset == FX_CHARSET_Symbol || nCharset == pData->nCharset) { 91 if (KnowWord(0, word)) 92 return 0; 93 } 94 } 95 } 96 97 int32_t nNewFontIndex = 98 GetFontIndex(GetNativeFontName(nCharset), nCharset, true); 99 if (nNewFontIndex >= 0) { 100 if (KnowWord(nNewFontIndex, word)) 101 return nNewFontIndex; 102 } 103 nNewFontIndex = GetFontIndex("Arial Unicode MS", FX_CHARSET_Default, false); 104 if (nNewFontIndex >= 0) { 105 if (KnowWord(nNewFontIndex, word)) 106 return nNewFontIndex; 107 } 108 return -1; 109 } 110 111 int32_t CPWL_FontMap::CharCodeFromUnicode(int32_t nFontIndex, uint16_t word) { 112 if (!pdfium::IndexInBounds(m_Data, nFontIndex)) 113 return -1; 114 115 CPWL_FontMap_Data* pData = m_Data[nFontIndex].get(); 116 if (!pData || !pData->pFont) 117 return -1; 118 119 if (pData->pFont->IsUnicodeCompatible()) 120 return pData->pFont->CharCodeFromUnicode(word); 121 122 return word < 0xFF ? word : -1; 123 } 124 125 ByteString CPWL_FontMap::GetNativeFontName(int32_t nCharset) { 126 for (const auto& pData : m_NativeFont) { 127 if (pData && pData->nCharset == nCharset) 128 return pData->sFontName; 129 } 130 131 ByteString sNew = GetNativeFont(nCharset); 132 if (sNew.IsEmpty()) 133 return ByteString(); 134 135 auto pNewData = pdfium::MakeUnique<CPWL_FontMap_Native>(); 136 pNewData->nCharset = nCharset; 137 pNewData->sFontName = sNew; 138 m_NativeFont.push_back(std::move(pNewData)); 139 return sNew; 140 } 141 142 void CPWL_FontMap::Empty() { 143 m_Data.clear(); 144 m_NativeFont.clear(); 145 } 146 147 void CPWL_FontMap::Initialize() { 148 GetFontIndex(kDefaultFontName, FX_CHARSET_ANSI, false); 149 } 150 151 bool CPWL_FontMap::IsStandardFont(const ByteString& sFontName) { 152 for (const char* name : g_sDEStandardFontName) { 153 if (sFontName == name) 154 return true; 155 } 156 157 return false; 158 } 159 160 int32_t CPWL_FontMap::FindFont(const ByteString& sFontName, int32_t nCharset) { 161 int32_t i = 0; 162 for (const auto& pData : m_Data) { 163 if (pData && 164 (nCharset == FX_CHARSET_Default || nCharset == pData->nCharset) && 165 (sFontName.IsEmpty() || pData->sFontName == sFontName)) { 166 return i; 167 } 168 ++i; 169 } 170 return -1; 171 } 172 173 int32_t CPWL_FontMap::GetFontIndex(const ByteString& sFontName, 174 int32_t nCharset, 175 bool bFind) { 176 int32_t nFontIndex = FindFont(EncodeFontAlias(sFontName, nCharset), nCharset); 177 if (nFontIndex >= 0) 178 return nFontIndex; 179 180 ByteString sAlias; 181 CPDF_Font* pFont = bFind ? FindFontSameCharset(&sAlias, nCharset) : nullptr; 182 if (!pFont) { 183 ByteString sTemp = sFontName; 184 pFont = AddFontToDocument(GetDocument(), sTemp, nCharset); 185 sAlias = EncodeFontAlias(sTemp, nCharset); 186 } 187 AddedFont(pFont, sAlias); 188 return AddFontData(pFont, sAlias, nCharset); 189 } 190 191 CPDF_Font* CPWL_FontMap::FindFontSameCharset(ByteString* sFontAlias, 192 int32_t nCharset) { 193 return nullptr; 194 } 195 196 int32_t CPWL_FontMap::AddFontData(CPDF_Font* pFont, 197 const ByteString& sFontAlias, 198 int32_t nCharset) { 199 auto pNewData = pdfium::MakeUnique<CPWL_FontMap_Data>(); 200 pNewData->pFont = pFont; 201 pNewData->sFontName = sFontAlias; 202 pNewData->nCharset = nCharset; 203 m_Data.push_back(std::move(pNewData)); 204 return pdfium::CollectionSize<int32_t>(m_Data) - 1; 205 } 206 207 void CPWL_FontMap::AddedFont(CPDF_Font* pFont, const ByteString& sFontAlias) {} 208 209 ByteString CPWL_FontMap::GetNativeFont(int32_t nCharset) { 210 if (nCharset == FX_CHARSET_Default) 211 nCharset = GetNativeCharset(); 212 213 ByteString sFontName = GetDefaultFontByCharset(nCharset); 214 if (!m_pSystemHandler->FindNativeTrueTypeFont(sFontName)) 215 return ByteString(); 216 217 return sFontName; 218 } 219 220 CPDF_Font* CPWL_FontMap::AddFontToDocument(CPDF_Document* pDoc, 221 ByteString& sFontName, 222 uint8_t nCharset) { 223 if (IsStandardFont(sFontName)) 224 return AddStandardFont(pDoc, sFontName); 225 226 return AddSystemFont(pDoc, sFontName, nCharset); 227 } 228 229 CPDF_Font* CPWL_FontMap::AddStandardFont(CPDF_Document* pDoc, 230 ByteString& sFontName) { 231 if (!pDoc) 232 return nullptr; 233 234 CPDF_Font* pFont = nullptr; 235 236 if (sFontName == "ZapfDingbats") { 237 pFont = pDoc->AddStandardFont(sFontName.c_str(), nullptr); 238 } else { 239 CPDF_FontEncoding fe(PDFFONT_ENCODING_WINANSI); 240 pFont = pDoc->AddStandardFont(sFontName.c_str(), &fe); 241 } 242 243 return pFont; 244 } 245 246 CPDF_Font* CPWL_FontMap::AddSystemFont(CPDF_Document* pDoc, 247 ByteString& sFontName, 248 uint8_t nCharset) { 249 if (!pDoc) 250 return nullptr; 251 252 if (sFontName.IsEmpty()) 253 sFontName = GetNativeFont(nCharset); 254 if (nCharset == FX_CHARSET_Default) 255 nCharset = GetNativeCharset(); 256 257 return m_pSystemHandler->AddNativeTrueTypeFontToPDF(pDoc, sFontName, 258 nCharset); 259 } 260 261 ByteString CPWL_FontMap::EncodeFontAlias(const ByteString& sFontName, 262 int32_t nCharset) { 263 return EncodeFontAlias(sFontName) + ByteString::Format("_%02X", nCharset); 264 } 265 266 ByteString CPWL_FontMap::EncodeFontAlias(const ByteString& sFontName) { 267 ByteString sRet = sFontName; 268 sRet.Remove(' '); 269 return sRet; 270 } 271 272 const CPWL_FontMap_Data* CPWL_FontMap::GetFontMapData(int32_t nIndex) const { 273 return pdfium::IndexInBounds(m_Data, nIndex) ? m_Data[nIndex].get() : nullptr; 274 } 275 276 int32_t CPWL_FontMap::GetNativeCharset() { 277 uint8_t nCharset = FX_CHARSET_ANSI; 278 int32_t iCodePage = FXSYS_GetACP(); 279 switch (iCodePage) { 280 case FX_CODEPAGE_ShiftJIS: 281 nCharset = FX_CHARSET_ShiftJIS; 282 break; 283 case FX_CODEPAGE_ChineseSimplified: 284 nCharset = FX_CHARSET_ChineseSimplified; 285 break; 286 case FX_CODEPAGE_ChineseTraditional: 287 nCharset = FX_CHARSET_ChineseTraditional; 288 break; 289 case FX_CODEPAGE_MSWin_WesternEuropean: 290 nCharset = FX_CHARSET_ANSI; 291 break; 292 case FX_CODEPAGE_MSDOS_Thai: 293 nCharset = FX_CHARSET_Thai; 294 break; 295 case FX_CODEPAGE_Hangul: 296 nCharset = FX_CHARSET_Hangul; 297 break; 298 case FX_CODEPAGE_UTF16LE: 299 nCharset = FX_CHARSET_ANSI; 300 break; 301 case FX_CODEPAGE_MSWin_EasternEuropean: 302 nCharset = FX_CHARSET_MSWin_EasternEuropean; 303 break; 304 case FX_CODEPAGE_MSWin_Cyrillic: 305 nCharset = FX_CHARSET_MSWin_Cyrillic; 306 break; 307 case FX_CODEPAGE_MSWin_Greek: 308 nCharset = FX_CHARSET_MSWin_Greek; 309 break; 310 case FX_CODEPAGE_MSWin_Turkish: 311 nCharset = FX_CHARSET_MSWin_Turkish; 312 break; 313 case FX_CODEPAGE_MSWin_Hebrew: 314 nCharset = FX_CHARSET_MSWin_Hebrew; 315 break; 316 case FX_CODEPAGE_MSWin_Arabic: 317 nCharset = FX_CHARSET_MSWin_Arabic; 318 break; 319 case FX_CODEPAGE_MSWin_Baltic: 320 nCharset = FX_CHARSET_MSWin_Baltic; 321 break; 322 case FX_CODEPAGE_MSWin_Vietnamese: 323 nCharset = FX_CHARSET_MSWin_Vietnamese; 324 break; 325 case FX_CODEPAGE_Johab: 326 nCharset = FX_CHARSET_Johab; 327 break; 328 } 329 return nCharset; 330 } 331 332 const FPDF_CharsetFontMap CPWL_FontMap::defaultTTFMap[] = { 333 {FX_CHARSET_ANSI, "Helvetica"}, 334 {FX_CHARSET_ChineseSimplified, "SimSun"}, 335 {FX_CHARSET_ChineseTraditional, "MingLiU"}, 336 {FX_CHARSET_ShiftJIS, "MS Gothic"}, 337 {FX_CHARSET_Hangul, "Batang"}, 338 {FX_CHARSET_MSWin_Cyrillic, "Arial"}, 339 #if _FX_PLATFORM_ == _FX_PLATFORM_LINUX_ || _FX_PLATFORM_ == _FX_PLATFORM_APPLE_ 340 {FX_CHARSET_MSWin_EasternEuropean, "Arial"}, 341 #else 342 {FX_CHARSET_MSWin_EasternEuropean, "Tahoma"}, 343 #endif 344 {FX_CHARSET_MSWin_Arabic, "Arial"}, 345 {-1, nullptr}}; 346 347 ByteString CPWL_FontMap::GetDefaultFontByCharset(int32_t nCharset) { 348 int i = 0; 349 while (defaultTTFMap[i].charset != -1) { 350 if (nCharset == defaultTTFMap[i].charset) 351 return defaultTTFMap[i].fontname; 352 ++i; 353 } 354 return ""; 355 } 356 357 int32_t CPWL_FontMap::CharSetFromUnicode(uint16_t word, int32_t nOldCharset) { 358 // to avoid CJK Font to show ASCII 359 if (word < 0x7F) 360 return FX_CHARSET_ANSI; 361 // follow the old charset 362 if (nOldCharset != FX_CHARSET_Default) 363 return nOldCharset; 364 365 // find new charset 366 if ((word >= 0x4E00 && word <= 0x9FA5) || 367 (word >= 0xE7C7 && word <= 0xE7F3) || 368 (word >= 0x3000 && word <= 0x303F) || 369 (word >= 0x2000 && word <= 0x206F)) { 370 return FX_CHARSET_ChineseSimplified; 371 } 372 373 if (((word >= 0x3040) && (word <= 0x309F)) || 374 ((word >= 0x30A0) && (word <= 0x30FF)) || 375 ((word >= 0x31F0) && (word <= 0x31FF)) || 376 ((word >= 0xFF00) && (word <= 0xFFEF))) { 377 return FX_CHARSET_ShiftJIS; 378 } 379 380 if (((word >= 0xAC00) && (word <= 0xD7AF)) || 381 ((word >= 0x1100) && (word <= 0x11FF)) || 382 ((word >= 0x3130) && (word <= 0x318F))) { 383 return FX_CHARSET_Hangul; 384 } 385 386 if (word >= 0x0E00 && word <= 0x0E7F) 387 return FX_CHARSET_Thai; 388 389 if ((word >= 0x0370 && word <= 0x03FF) || (word >= 0x1F00 && word <= 0x1FFF)) 390 return FX_CHARSET_MSWin_Greek; 391 392 if ((word >= 0x0600 && word <= 0x06FF) || (word >= 0xFB50 && word <= 0xFEFC)) 393 return FX_CHARSET_MSWin_Arabic; 394 395 if (word >= 0x0590 && word <= 0x05FF) 396 return FX_CHARSET_MSWin_Hebrew; 397 398 if (word >= 0x0400 && word <= 0x04FF) 399 return FX_CHARSET_MSWin_Cyrillic; 400 401 if (word >= 0x0100 && word <= 0x024F) 402 return FX_CHARSET_MSWin_EasternEuropean; 403 404 if (word >= 0x1E00 && word <= 0x1EFF) 405 return FX_CHARSET_MSWin_Vietnamese; 406 407 return FX_CHARSET_ANSI; 408 } 409