1 // Copyright 2017 PDFium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include <map> 6 #include <memory> 7 #include <utility> 8 #include <vector> 9 10 #include "core/fpdfapi/cpdf_modulemgr.h" 11 #include "core/fpdfapi/font/cpdf_font.h" 12 #include "core/fpdfapi/font/cpdf_type1font.h" 13 #include "core/fpdfapi/page/cpdf_docpagedata.h" 14 #include "core/fpdfapi/page/cpdf_textobject.h" 15 #include "core/fpdfapi/parser/cpdf_array.h" 16 #include "core/fpdfapi/parser/cpdf_dictionary.h" 17 #include "core/fpdfapi/parser/cpdf_document.h" 18 #include "core/fpdfapi/parser/cpdf_name.h" 19 #include "core/fpdfapi/parser/cpdf_number.h" 20 #include "core/fpdfapi/parser/cpdf_reference.h" 21 #include "core/fpdfapi/parser/cpdf_stream.h" 22 #include "core/fxcrt/fx_extension.h" 23 #include "core/fxge/cfx_fontmgr.h" 24 #include "core/fxge/fx_font.h" 25 #include "fpdfsdk/fsdk_define.h" 26 #include "public/fpdf_edit.h" 27 28 namespace { 29 30 CPDF_Dictionary* LoadFontDesc(CPDF_Document* pDoc, 31 const ByteString& font_name, 32 CFX_Font* pFont, 33 const uint8_t* data, 34 uint32_t size, 35 int font_type) { 36 CPDF_Dictionary* fontDesc = pDoc->NewIndirect<CPDF_Dictionary>(); 37 fontDesc->SetNewFor<CPDF_Name>("Type", "FontDescriptor"); 38 fontDesc->SetNewFor<CPDF_Name>("FontName", font_name); 39 int flags = 0; 40 if (FXFT_Is_Face_fixedwidth(pFont->GetFace())) 41 flags |= FXFONT_FIXED_PITCH; 42 if (font_name.Contains("Serif")) 43 flags |= FXFONT_SERIF; 44 if (FXFT_Is_Face_Italic(pFont->GetFace())) 45 flags |= FXFONT_ITALIC; 46 if (FXFT_Is_Face_Bold(pFont->GetFace())) 47 flags |= FXFONT_BOLD; 48 49 // TODO(npm): How do I know if a font is symbolic, script, allcap, smallcap 50 flags |= FXFONT_NONSYMBOLIC; 51 52 fontDesc->SetNewFor<CPDF_Number>("Flags", flags); 53 FX_RECT bbox; 54 pFont->GetBBox(bbox); 55 auto pBBox = pdfium::MakeUnique<CPDF_Array>(); 56 pBBox->AddNew<CPDF_Number>(bbox.left); 57 pBBox->AddNew<CPDF_Number>(bbox.top); 58 pBBox->AddNew<CPDF_Number>(bbox.right); 59 pBBox->AddNew<CPDF_Number>(bbox.bottom); 60 fontDesc->SetFor("FontBBox", std::move(pBBox)); 61 62 // TODO(npm): calculate italic angle correctly 63 fontDesc->SetNewFor<CPDF_Number>("ItalicAngle", pFont->IsItalic() ? -12 : 0); 64 65 fontDesc->SetNewFor<CPDF_Number>("Ascent", pFont->GetAscent()); 66 fontDesc->SetNewFor<CPDF_Number>("Descent", pFont->GetDescent()); 67 68 // TODO(npm): calculate the capheight, stemV correctly 69 fontDesc->SetNewFor<CPDF_Number>("CapHeight", pFont->GetAscent()); 70 fontDesc->SetNewFor<CPDF_Number>("StemV", pFont->IsBold() ? 120 : 70); 71 72 CPDF_Stream* pStream = pDoc->NewIndirect<CPDF_Stream>(); 73 pStream->SetData(data, size); 74 // TODO(npm): Lengths for Type1 fonts. 75 if (font_type == FPDF_FONT_TRUETYPE) { 76 pStream->GetDict()->SetNewFor<CPDF_Number>("Length1", 77 static_cast<int>(size)); 78 } 79 ByteString fontFile = font_type == FPDF_FONT_TYPE1 ? "FontFile" : "FontFile2"; 80 fontDesc->SetNewFor<CPDF_Reference>(fontFile, pDoc, pStream->GetObjNum()); 81 return fontDesc; 82 } 83 84 const char ToUnicodeStart[] = 85 "/CIDInit /ProcSet findresource begin\n" 86 "12 dict begin\n" 87 "begincmap\n" 88 "/CIDSystemInfo\n" 89 "<</Registry (Adobe)\n" 90 "/Ordering (Identity)\n" 91 "/Supplement 0\n" 92 ">> def\n" 93 "/CMapName /Adobe-Identity-H def\n" 94 "CMapType 2 def\n" 95 "1 begincodespacerange\n" 96 "<0000> <FFFFF>\n" 97 "endcodespacerange\n"; 98 99 const char ToUnicodeEnd[] = 100 "endcmap\n" 101 "CMapName currentdict /CMap defineresource pop\n" 102 "end\n" 103 "end\n"; 104 105 void AddCharcode(std::ostringstream* pBuffer, uint32_t number) { 106 ASSERT(number <= 0xFFFF); 107 *pBuffer << "<"; 108 char ans[4]; 109 FXSYS_IntToFourHexChars(number, ans); 110 for (size_t i = 0; i < 4; ++i) 111 *pBuffer << ans[i]; 112 *pBuffer << ">"; 113 } 114 115 // PDF spec 1.7 Section 5.9.2: "Unicode character sequences as expressed in 116 // UTF-16BE encoding." See https://en.wikipedia.org/wiki/UTF-16#Description 117 void AddUnicode(std::ostringstream* pBuffer, uint32_t unicode) { 118 if (unicode >= 0xD800 && unicode <= 0xDFFF) 119 unicode = 0; 120 121 char ans[8]; 122 *pBuffer << "<"; 123 size_t numChars = FXSYS_ToUTF16BE(unicode, ans); 124 for (size_t i = 0; i < numChars; ++i) 125 *pBuffer << ans[i]; 126 *pBuffer << ">"; 127 } 128 129 // Loads the charcode to unicode mapping into a stream 130 CPDF_Stream* LoadUnicode(CPDF_Document* pDoc, 131 const std::map<uint32_t, uint32_t>& to_unicode) { 132 // A map charcode->unicode 133 std::map<uint32_t, uint32_t> char_to_uni; 134 // A map <char_start, char_end> to vector v of unicode characters of size (end 135 // - start + 1). This abbreviates: start->v[0], start+1->v[1], etc. PDF spec 136 // 1.7 Section 5.9.2 says that only the last byte of the unicode may change. 137 std::map<std::pair<uint32_t, uint32_t>, std::vector<uint32_t>> 138 map_range_vector; 139 // A map <start, end> -> unicode 140 // This abbreviates: start->unicode, start+1->unicode+1, etc. 141 // PDF spec 1.7 Section 5.9.2 says that only the last byte of the unicode may 142 // change. 143 std::map<std::pair<uint32_t, uint32_t>, uint32_t> map_range; 144 145 // Calculate the maps 146 for (auto iter = to_unicode.begin(); iter != to_unicode.end(); ++iter) { 147 uint32_t firstCharcode = iter->first; 148 uint32_t firstUnicode = iter->second; 149 if (std::next(iter) == to_unicode.end() || 150 firstCharcode + 1 != std::next(iter)->first) { 151 char_to_uni[firstCharcode] = firstUnicode; 152 continue; 153 } 154 ++iter; 155 uint32_t curCharcode = iter->first; 156 uint32_t curUnicode = iter->second; 157 if (curCharcode % 256 == 0) { 158 char_to_uni[firstCharcode] = firstUnicode; 159 char_to_uni[curCharcode] = curUnicode; 160 continue; 161 } 162 const size_t maxExtra = 255 - (curCharcode % 256); 163 auto next_it = std::next(iter); 164 if (firstUnicode + 1 != curUnicode) { 165 // Consecutive charcodes mapping to non-consecutive unicodes 166 std::vector<uint32_t> unicodes; 167 unicodes.push_back(firstUnicode); 168 unicodes.push_back(curUnicode); 169 for (size_t i = 0; i < maxExtra; ++i) { 170 if (next_it == to_unicode.end() || curCharcode + 1 != next_it->first) 171 break; 172 ++iter; 173 ++curCharcode; 174 unicodes.push_back(iter->second); 175 next_it = std::next(iter); 176 } 177 ASSERT(iter->first - firstCharcode + 1 == unicodes.size()); 178 map_range_vector[std::make_pair(firstCharcode, iter->first)] = unicodes; 179 continue; 180 } 181 // Consecutive charcodes mapping to consecutive unicodes 182 for (size_t i = 0; i < maxExtra; ++i) { 183 if (next_it == to_unicode.end() || curCharcode + 1 != next_it->first || 184 curUnicode + 1 != next_it->second) { 185 break; 186 } 187 ++iter; 188 ++curCharcode; 189 ++curUnicode; 190 next_it = std::next(iter); 191 } 192 map_range[std::make_pair(firstCharcode, curCharcode)] = firstUnicode; 193 } 194 std::ostringstream buffer; 195 buffer << ToUnicodeStart; 196 // Add maps to buffer 197 buffer << static_cast<uint32_t>(char_to_uni.size()) << " beginbfchar\n"; 198 for (const auto& iter : char_to_uni) { 199 AddCharcode(&buffer, iter.first); 200 buffer << " "; 201 AddUnicode(&buffer, iter.second); 202 buffer << "\n"; 203 } 204 buffer << "endbfchar\n" 205 << static_cast<uint32_t>(map_range_vector.size() + map_range.size()) 206 << " beginbfrange\n"; 207 for (const auto& iter : map_range_vector) { 208 const std::pair<uint32_t, uint32_t>& charcodeRange = iter.first; 209 AddCharcode(&buffer, charcodeRange.first); 210 buffer << " "; 211 AddCharcode(&buffer, charcodeRange.second); 212 buffer << " ["; 213 const std::vector<uint32_t>& unicodes = iter.second; 214 for (size_t i = 0; i < unicodes.size(); ++i) { 215 uint32_t uni = unicodes[i]; 216 AddUnicode(&buffer, uni); 217 if (i != unicodes.size() - 1) 218 buffer << " "; 219 } 220 buffer << "]\n"; 221 } 222 for (const auto& iter : map_range) { 223 const std::pair<uint32_t, uint32_t>& charcodeRange = iter.first; 224 AddCharcode(&buffer, charcodeRange.first); 225 buffer << " "; 226 AddCharcode(&buffer, charcodeRange.second); 227 buffer << " "; 228 AddUnicode(&buffer, iter.second); 229 buffer << "\n"; 230 } 231 buffer << "endbfrange\n"; 232 buffer << ToUnicodeEnd; 233 // TODO(npm): Encrypt / Compress? 234 CPDF_Stream* stream = pDoc->NewIndirect<CPDF_Stream>(); 235 stream->SetData(&buffer); 236 return stream; 237 } 238 239 const uint32_t kMaxSimpleFontChar = 0xFF; 240 241 void* LoadSimpleFont(CPDF_Document* pDoc, 242 std::unique_ptr<CFX_Font> pFont, 243 const uint8_t* data, 244 uint32_t size, 245 int font_type) { 246 CPDF_Dictionary* fontDict = pDoc->NewIndirect<CPDF_Dictionary>(); 247 fontDict->SetNewFor<CPDF_Name>("Type", "Font"); 248 fontDict->SetNewFor<CPDF_Name>( 249 "Subtype", font_type == FPDF_FONT_TYPE1 ? "Type1" : "TrueType"); 250 ByteString name = pFont->GetFaceName(); 251 if (name.IsEmpty()) 252 name = "Unnamed"; 253 fontDict->SetNewFor<CPDF_Name>("BaseFont", name); 254 255 uint32_t glyphIndex; 256 uint32_t currentChar = FXFT_Get_First_Char(pFont->GetFace(), &glyphIndex); 257 if (currentChar > kMaxSimpleFontChar || glyphIndex == 0) 258 return nullptr; 259 fontDict->SetNewFor<CPDF_Number>("FirstChar", static_cast<int>(currentChar)); 260 CPDF_Array* widthsArray = pDoc->NewIndirect<CPDF_Array>(); 261 while (true) { 262 widthsArray->AddNew<CPDF_Number>(pFont->GetGlyphWidth(glyphIndex)); 263 uint32_t nextChar = 264 FXFT_Get_Next_Char(pFont->GetFace(), currentChar, &glyphIndex); 265 // Simple fonts have 1-byte charcodes only. 266 if (nextChar > kMaxSimpleFontChar || glyphIndex == 0) 267 break; 268 for (uint32_t i = currentChar + 1; i < nextChar; i++) 269 widthsArray->AddNew<CPDF_Number>(0); 270 currentChar = nextChar; 271 } 272 fontDict->SetNewFor<CPDF_Number>("LastChar", static_cast<int>(currentChar)); 273 fontDict->SetNewFor<CPDF_Reference>("Widths", pDoc, widthsArray->GetObjNum()); 274 CPDF_Dictionary* fontDesc = 275 LoadFontDesc(pDoc, name, pFont.get(), data, size, font_type); 276 277 fontDict->SetNewFor<CPDF_Reference>("FontDescriptor", pDoc, 278 fontDesc->GetObjNum()); 279 return pDoc->LoadFont(fontDict); 280 } 281 282 const uint32_t kMaxUnicode = 0x10FFFF; 283 284 void* LoadCompositeFont(CPDF_Document* pDoc, 285 std::unique_ptr<CFX_Font> pFont, 286 const uint8_t* data, 287 uint32_t size, 288 int font_type) { 289 CPDF_Dictionary* fontDict = pDoc->NewIndirect<CPDF_Dictionary>(); 290 fontDict->SetNewFor<CPDF_Name>("Type", "Font"); 291 fontDict->SetNewFor<CPDF_Name>("Subtype", "Type0"); 292 // TODO(npm): Get the correct encoding, if it's not identity. 293 ByteString encoding = "Identity-H"; 294 fontDict->SetNewFor<CPDF_Name>("Encoding", encoding); 295 ByteString name = pFont->GetFaceName(); 296 if (name.IsEmpty()) 297 name = "Unnamed"; 298 fontDict->SetNewFor<CPDF_Name>( 299 "BaseFont", font_type == FPDF_FONT_TYPE1 ? name + "-" + encoding : name); 300 301 CPDF_Dictionary* pCIDFont = pDoc->NewIndirect<CPDF_Dictionary>(); 302 pCIDFont->SetNewFor<CPDF_Name>("Type", "Font"); 303 pCIDFont->SetNewFor<CPDF_Name>("Subtype", font_type == FPDF_FONT_TYPE1 304 ? "CIDFontType0" 305 : "CIDFontType2"); 306 pCIDFont->SetNewFor<CPDF_Name>("BaseFont", name); 307 308 // TODO(npm): Maybe use FT_Get_CID_Registry_Ordering_Supplement to get the 309 // CIDSystemInfo 310 CPDF_Dictionary* pCIDSystemInfo = pDoc->NewIndirect<CPDF_Dictionary>(); 311 pCIDSystemInfo->SetNewFor<CPDF_Name>("Registry", "Adobe"); 312 pCIDSystemInfo->SetNewFor<CPDF_Name>("Ordering", "Identity"); 313 pCIDSystemInfo->SetNewFor<CPDF_Number>("Supplement", 0); 314 pCIDFont->SetNewFor<CPDF_Reference>("CIDSystemInfo", pDoc, 315 pCIDSystemInfo->GetObjNum()); 316 317 CPDF_Dictionary* fontDesc = 318 LoadFontDesc(pDoc, name, pFont.get(), data, size, font_type); 319 pCIDFont->SetNewFor<CPDF_Reference>("FontDescriptor", pDoc, 320 fontDesc->GetObjNum()); 321 322 uint32_t glyphIndex; 323 uint32_t currentChar = FXFT_Get_First_Char(pFont->GetFace(), &glyphIndex); 324 // If it doesn't have a single char, just fail 325 if (glyphIndex == 0 || currentChar > kMaxUnicode) 326 return nullptr; 327 328 std::map<uint32_t, uint32_t> to_unicode; 329 std::map<uint32_t, uint32_t> widths; 330 while (true) { 331 if (currentChar > kMaxUnicode) 332 break; 333 334 widths[glyphIndex] = pFont->GetGlyphWidth(glyphIndex); 335 to_unicode[glyphIndex] = currentChar; 336 currentChar = 337 FXFT_Get_Next_Char(pFont->GetFace(), currentChar, &glyphIndex); 338 if (glyphIndex == 0) 339 break; 340 } 341 CPDF_Array* widthsArray = pDoc->NewIndirect<CPDF_Array>(); 342 for (auto it = widths.begin(); it != widths.end(); ++it) { 343 int ch = it->first; 344 int w = it->second; 345 if (std::next(it) == widths.end()) { 346 // Only one char left, use format c [w] 347 auto oneW = pdfium::MakeUnique<CPDF_Array>(); 348 oneW->AddNew<CPDF_Number>(w); 349 widthsArray->AddNew<CPDF_Number>(ch); 350 widthsArray->Add(std::move(oneW)); 351 break; 352 } 353 ++it; 354 int next_ch = it->first; 355 int next_w = it->second; 356 if (next_ch == ch + 1 && next_w == w) { 357 // The array can have a group c_first c_last w: all CIDs in the range from 358 // c_first to c_last will have width w 359 widthsArray->AddNew<CPDF_Number>(ch); 360 ch = next_ch; 361 while (true) { 362 auto next_it = std::next(it); 363 if (next_it == widths.end() || next_it->first != it->first + 1 || 364 next_it->second != it->second) { 365 break; 366 } 367 ++it; 368 ch = it->first; 369 } 370 widthsArray->AddNew<CPDF_Number>(ch); 371 widthsArray->AddNew<CPDF_Number>(w); 372 continue; 373 } 374 // Otherwise we can have a group of the form c [w1 w2 ...]: c has width 375 // w1, c+1 has width w2, etc. 376 widthsArray->AddNew<CPDF_Number>(ch); 377 auto curWidthArray = pdfium::MakeUnique<CPDF_Array>(); 378 curWidthArray->AddNew<CPDF_Number>(w); 379 curWidthArray->AddNew<CPDF_Number>(next_w); 380 while (true) { 381 auto next_it = std::next(it); 382 if (next_it == widths.end() || next_it->first != it->first + 1) 383 break; 384 ++it; 385 curWidthArray->AddNew<CPDF_Number>(static_cast<int>(it->second)); 386 } 387 widthsArray->Add(std::move(curWidthArray)); 388 } 389 pCIDFont->SetNewFor<CPDF_Reference>("W", pDoc, widthsArray->GetObjNum()); 390 // TODO(npm): Support vertical writing 391 392 auto pDescendant = pdfium::MakeUnique<CPDF_Array>(); 393 pDescendant->AddNew<CPDF_Reference>(pDoc, pCIDFont->GetObjNum()); 394 fontDict->SetFor("DescendantFonts", std::move(pDescendant)); 395 CPDF_Stream* toUnicodeStream = LoadUnicode(pDoc, to_unicode); 396 fontDict->SetNewFor<CPDF_Reference>("ToUnicode", pDoc, 397 toUnicodeStream->GetObjNum()); 398 return pDoc->LoadFont(fontDict); 399 } 400 401 } // namespace 402 403 FPDF_EXPORT FPDF_PAGEOBJECT FPDF_CALLCONV 404 FPDFPageObj_NewTextObj(FPDF_DOCUMENT document, 405 FPDF_BYTESTRING font, 406 float font_size) { 407 CPDF_Document* pDoc = CPDFDocumentFromFPDFDocument(document); 408 if (!pDoc) 409 return nullptr; 410 411 CPDF_Font* pFont = CPDF_Font::GetStockFont(pDoc, ByteStringView(font)); 412 if (!pFont) 413 return nullptr; 414 415 auto pTextObj = pdfium::MakeUnique<CPDF_TextObject>(); 416 pTextObj->m_TextState.SetFont(pFont); 417 pTextObj->m_TextState.SetFontSize(font_size); 418 pTextObj->DefaultStates(); 419 return pTextObj.release(); // Caller takes ownership. 420 } 421 422 FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV 423 FPDFText_SetText(FPDF_PAGEOBJECT text_object, FPDF_WIDESTRING text) { 424 auto* pTextObj = static_cast<CPDF_TextObject*>(text_object); 425 if (!pTextObj) 426 return false; 427 428 size_t len = WideString::WStringLength(text); 429 WideString encodedText = WideString::FromUTF16LE(text, len); 430 ByteString byteText; 431 for (wchar_t wc : encodedText) { 432 pTextObj->GetFont()->AppendChar( 433 &byteText, pTextObj->GetFont()->CharCodeFromUnicode(wc)); 434 } 435 pTextObj->SetText(byteText); 436 return true; 437 } 438 439 FPDF_EXPORT FPDF_FONT FPDF_CALLCONV FPDFText_LoadFont(FPDF_DOCUMENT document, 440 const uint8_t* data, 441 uint32_t size, 442 int font_type, 443 FPDF_BOOL cid) { 444 CPDF_Document* pDoc = CPDFDocumentFromFPDFDocument(document); 445 if (!pDoc || !data || size == 0 || 446 (font_type != FPDF_FONT_TYPE1 && font_type != FPDF_FONT_TRUETYPE)) { 447 return nullptr; 448 } 449 450 auto pFont = pdfium::MakeUnique<CFX_Font>(); 451 452 // TODO(npm): Maybe use FT_Get_X11_Font_Format to check format? Otherwise, we 453 // are allowing giving any font that can be loaded on freetype and setting it 454 // as any font type. 455 if (!pFont->LoadEmbedded(data, size)) 456 return nullptr; 457 458 return cid ? LoadCompositeFont(pDoc, std::move(pFont), data, size, font_type) 459 : LoadSimpleFont(pDoc, std::move(pFont), data, size, font_type); 460 } 461 462 FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV 463 FPDFText_SetFillColor(FPDF_PAGEOBJECT text_object, 464 unsigned int R, 465 unsigned int G, 466 unsigned int B, 467 unsigned int A) { 468 return FPDFPageObj_SetFillColor(text_object, R, G, B, A); 469 } 470 471 FPDF_EXPORT void FPDF_CALLCONV FPDFFont_Close(FPDF_FONT font) { 472 CPDF_Font* pFont = static_cast<CPDF_Font*>(font); 473 if (!pFont) 474 return; 475 476 CPDF_Document* pDoc = pFont->GetDocument(); 477 if (!pDoc) 478 return; 479 480 CPDF_DocPageData* pPageData = pDoc->GetPageData(); 481 if (!pPageData->IsForceClear()) 482 pPageData->ReleaseFont(pFont->GetFontDict()); 483 } 484 485 FPDF_EXPORT FPDF_PAGEOBJECT FPDF_CALLCONV 486 FPDFPageObj_CreateTextObj(FPDF_DOCUMENT document, 487 FPDF_FONT font, 488 float font_size) { 489 CPDF_Document* pDoc = CPDFDocumentFromFPDFDocument(document); 490 CPDF_Font* pFont = static_cast<CPDF_Font*>(font); 491 if (!pDoc || !pFont) 492 return nullptr; 493 494 auto pTextObj = pdfium::MakeUnique<CPDF_TextObject>(); 495 pTextObj->m_TextState.SetFont(pDoc->LoadFont(pFont->GetFontDict())); 496 pTextObj->m_TextState.SetFontSize(font_size); 497 pTextObj->DefaultStates(); 498 return pTextObj.release(); 499 } 500