1 // Copyright 2016 PDFium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7 #include "core/fpdfdoc/ctypeset.h" 8 9 #include <algorithm> 10 11 #include "core/fpdfdoc/cline.h" 12 #include "core/fpdfdoc/cpdf_variabletext.h" 13 #include "core/fpdfdoc/cpvt_wordinfo.h" 14 #include "core/fpdfdoc/csection.h" 15 #include "third_party/base/stl_util.h" 16 17 namespace { 18 19 const uint8_t special_chars[128] = { 20 0x00, 0x0C, 0x08, 0x0C, 0x08, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 21 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 22 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x08, 0x00, 23 0x10, 0x00, 0x00, 0x28, 0x0C, 0x08, 0x00, 0x00, 0x28, 0x28, 0x28, 0x28, 24 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x08, 0x08, 25 0x00, 0x00, 0x00, 0x08, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 26 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 27 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x0C, 0x00, 0x08, 0x00, 0x00, 28 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 29 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 30 0x01, 0x01, 0x01, 0x0C, 0x00, 0x08, 0x00, 0x00, 31 }; 32 33 bool IsLatin(uint16_t word) { 34 if (word <= 0x007F) 35 return !!(special_chars[word] & 0x01); 36 37 return ((word >= 0x00C0 && word <= 0x00FF) || 38 (word >= 0x0100 && word <= 0x024F) || 39 (word >= 0x1E00 && word <= 0x1EFF) || 40 (word >= 0x2C60 && word <= 0x2C7F) || 41 (word >= 0xA720 && word <= 0xA7FF) || 42 (word >= 0xFF21 && word <= 0xFF3A) || 43 (word >= 0xFF41 && word <= 0xFF5A)); 44 } 45 46 bool IsDigit(uint32_t word) { 47 return word >= 0x0030 && word <= 0x0039; 48 } 49 50 bool IsCJK(uint32_t word) { 51 if ((word >= 0x1100 && word <= 0x11FF) || 52 (word >= 0x2E80 && word <= 0x2FFF) || 53 (word >= 0x3040 && word <= 0x9FBF) || 54 (word >= 0xAC00 && word <= 0xD7AF) || 55 (word >= 0xF900 && word <= 0xFAFF) || 56 (word >= 0xFE30 && word <= 0xFE4F) || 57 (word >= 0x20000 && word <= 0x2A6DF) || 58 (word >= 0x2F800 && word <= 0x2FA1F)) { 59 return true; 60 } 61 if (word >= 0x3000 && word <= 0x303F) { 62 return ( 63 word == 0x3005 || word == 0x3006 || word == 0x3021 || word == 0x3022 || 64 word == 0x3023 || word == 0x3024 || word == 0x3025 || word == 0x3026 || 65 word == 0x3027 || word == 0x3028 || word == 0x3029 || word == 0x3031 || 66 word == 0x3032 || word == 0x3033 || word == 0x3034 || word == 0x3035); 67 } 68 return word >= 0xFF66 && word <= 0xFF9D; 69 } 70 71 bool IsPunctuation(uint32_t word) { 72 if (word <= 0x007F) 73 return !!(special_chars[word] & 0x08); 74 75 if (word >= 0x0080 && word <= 0x00FF) { 76 return (word == 0x0082 || word == 0x0084 || word == 0x0085 || 77 word == 0x0091 || word == 0x0092 || word == 0x0093 || 78 word <= 0x0094 || word == 0x0096 || word == 0x00B4 || 79 word == 0x00B8); 80 } 81 82 if (word >= 0x2000 && word <= 0x206F) { 83 return ( 84 word == 0x2010 || word == 0x2011 || word == 0x2012 || word == 0x2013 || 85 word == 0x2018 || word == 0x2019 || word == 0x201A || word == 0x201B || 86 word == 0x201C || word == 0x201D || word == 0x201E || word == 0x201F || 87 word == 0x2032 || word == 0x2033 || word == 0x2034 || word == 0x2035 || 88 word == 0x2036 || word == 0x2037 || word == 0x203C || word == 0x203D || 89 word == 0x203E || word == 0x2044); 90 } 91 92 if (word >= 0x3000 && word <= 0x303F) { 93 return ( 94 word == 0x3001 || word == 0x3002 || word == 0x3003 || word == 0x3005 || 95 word == 0x3009 || word == 0x300A || word == 0x300B || word == 0x300C || 96 word == 0x300D || word == 0x300F || word == 0x300E || word == 0x3010 || 97 word == 0x3011 || word == 0x3014 || word == 0x3015 || word == 0x3016 || 98 word == 0x3017 || word == 0x3018 || word == 0x3019 || word == 0x301A || 99 word == 0x301B || word == 0x301D || word == 0x301E || word == 0x301F); 100 } 101 102 if (word >= 0xFE50 && word <= 0xFE6F) 103 return (word >= 0xFE50 && word <= 0xFE5E) || word == 0xFE63; 104 105 if (word >= 0xFF00 && word <= 0xFFEF) { 106 return ( 107 word == 0xFF01 || word == 0xFF02 || word == 0xFF07 || word == 0xFF08 || 108 word == 0xFF09 || word == 0xFF0C || word == 0xFF0E || word == 0xFF0F || 109 word == 0xFF1A || word == 0xFF1B || word == 0xFF1F || word == 0xFF3B || 110 word == 0xFF3D || word == 0xFF40 || word == 0xFF5B || word == 0xFF5C || 111 word == 0xFF5D || word == 0xFF61 || word == 0xFF62 || word == 0xFF63 || 112 word == 0xFF64 || word == 0xFF65 || word == 0xFF9E || word == 0xFF9F); 113 } 114 115 return false; 116 } 117 118 bool IsConnectiveSymbol(uint32_t word) { 119 return word <= 0x007F && (special_chars[word] & 0x20); 120 } 121 122 bool IsOpenStylePunctuation(uint32_t word) { 123 if (word <= 0x007F) 124 return !!(special_chars[word] & 0x04); 125 126 return (word == 0x300A || word == 0x300C || word == 0x300E || 127 word == 0x3010 || word == 0x3014 || word == 0x3016 || 128 word == 0x3018 || word == 0x301A || word == 0xFF08 || 129 word == 0xFF3B || word == 0xFF5B || word == 0xFF62); 130 } 131 132 bool IsCurrencySymbol(uint16_t word) { 133 return (word == 0x0024 || word == 0x0080 || word == 0x00A2 || 134 word == 0x00A3 || word == 0x00A4 || word == 0x00A5 || 135 (word >= 0x20A0 && word <= 0x20CF) || word == 0xFE69 || 136 word == 0xFF04 || word == 0xFFE0 || word == 0xFFE1 || 137 word == 0xFFE5 || word == 0xFFE6); 138 } 139 140 bool IsPrefixSymbol(uint16_t word) { 141 return IsCurrencySymbol(word) || word == 0x2116; 142 } 143 144 bool IsSpace(uint16_t word) { 145 return word == 0x0020 || word == 0x3000; 146 } 147 148 bool NeedDivision(uint16_t prevWord, uint16_t curWord) { 149 if ((IsLatin(prevWord) || IsDigit(prevWord)) && 150 (IsLatin(curWord) || IsDigit(curWord))) { 151 return false; 152 } 153 if (IsSpace(curWord) || IsPunctuation(curWord)) { 154 return false; 155 } 156 if (IsConnectiveSymbol(prevWord) || IsConnectiveSymbol(curWord)) { 157 return false; 158 } 159 if (IsSpace(prevWord) || IsPunctuation(prevWord)) { 160 return true; 161 } 162 if (IsPrefixSymbol(prevWord)) { 163 return false; 164 } 165 if (IsPrefixSymbol(curWord) || IsCJK(curWord)) { 166 return true; 167 } 168 if (IsCJK(prevWord)) { 169 return true; 170 } 171 return false; 172 } 173 174 } // namespace 175 176 CTypeset::CTypeset(CSection* pSection) 177 : m_rcRet(0.0f, 0.0f, 0.0f, 0.0f), 178 m_pVT(pSection->m_pVT), 179 m_pSection(pSection) {} 180 181 CTypeset::~CTypeset() {} 182 183 CPVT_FloatRect CTypeset::CharArray() { 184 m_rcRet = CPVT_FloatRect(0, 0, 0, 0); 185 if (m_pSection->m_LineArray.empty()) 186 return m_rcRet; 187 188 float fNodeWidth = m_pVT->GetPlateWidth() / 189 (m_pVT->GetCharArray() <= 0 ? 1 : m_pVT->GetCharArray()); 190 float fLineAscent = 191 m_pVT->GetFontAscent(m_pVT->GetDefaultFontIndex(), m_pVT->GetFontSize()); 192 float fLineDescent = 193 m_pVT->GetFontDescent(m_pVT->GetDefaultFontIndex(), m_pVT->GetFontSize()); 194 float x = 0.0f; 195 float y = m_pVT->GetLineLeading() + fLineAscent; 196 int32_t nStart = 0; 197 CLine* pLine = m_pSection->m_LineArray.front().get(); 198 switch (m_pVT->GetAlignment()) { 199 case 0: 200 pLine->m_LineInfo.fLineX = fNodeWidth * VARIABLETEXT_HALF; 201 break; 202 case 1: 203 nStart = (m_pVT->GetCharArray() - 204 pdfium::CollectionSize<int32_t>(m_pSection->m_WordArray)) / 205 2; 206 pLine->m_LineInfo.fLineX = 207 fNodeWidth * nStart - fNodeWidth * VARIABLETEXT_HALF; 208 break; 209 case 2: 210 nStart = m_pVT->GetCharArray() - 211 pdfium::CollectionSize<int32_t>(m_pSection->m_WordArray); 212 pLine->m_LineInfo.fLineX = 213 fNodeWidth * nStart - fNodeWidth * VARIABLETEXT_HALF; 214 break; 215 } 216 for (int32_t w = 0, 217 sz = pdfium::CollectionSize<int32_t>(m_pSection->m_WordArray); 218 w < sz; w++) { 219 if (w >= m_pVT->GetCharArray()) 220 break; 221 222 float fNextWidth = 0; 223 if (pdfium::IndexInBounds(m_pSection->m_WordArray, w + 1)) { 224 CPVT_WordInfo* pNextWord = m_pSection->m_WordArray[w + 1].get(); 225 pNextWord->fWordTail = 0; 226 fNextWidth = m_pVT->GetWordWidth(*pNextWord); 227 } 228 CPVT_WordInfo* pWord = m_pSection->m_WordArray[w].get(); 229 pWord->fWordTail = 0; 230 float fWordWidth = m_pVT->GetWordWidth(*pWord); 231 float fWordAscent = m_pVT->GetWordAscent(*pWord); 232 float fWordDescent = m_pVT->GetWordDescent(*pWord); 233 x = (float)(fNodeWidth * (w + nStart + 0.5) - 234 fWordWidth * VARIABLETEXT_HALF); 235 pWord->fWordX = x; 236 pWord->fWordY = y; 237 if (w == 0) { 238 pLine->m_LineInfo.fLineX = x; 239 } 240 if (w != pdfium::CollectionSize<int32_t>(m_pSection->m_WordArray) - 1) { 241 pWord->fWordTail = 242 (fNodeWidth - (fWordWidth + fNextWidth) * VARIABLETEXT_HALF > 0 243 ? fNodeWidth - (fWordWidth + fNextWidth) * VARIABLETEXT_HALF 244 : 0); 245 } else { 246 pWord->fWordTail = 0; 247 } 248 x += fWordWidth; 249 fLineAscent = std::max(fLineAscent, fWordAscent); 250 fLineDescent = std::min(fLineDescent, fWordDescent); 251 } 252 pLine->m_LineInfo.nBeginWordIndex = 0; 253 pLine->m_LineInfo.nEndWordIndex = 254 pdfium::CollectionSize<int32_t>(m_pSection->m_WordArray) - 1; 255 pLine->m_LineInfo.fLineY = y; 256 pLine->m_LineInfo.fLineWidth = x - pLine->m_LineInfo.fLineX; 257 pLine->m_LineInfo.fLineAscent = fLineAscent; 258 pLine->m_LineInfo.fLineDescent = fLineDescent; 259 m_rcRet = CPVT_FloatRect(0, 0, x, y - fLineDescent); 260 return m_rcRet; 261 } 262 263 CFX_SizeF CTypeset::GetEditSize(float fFontSize) { 264 ASSERT(m_pSection); 265 ASSERT(m_pVT); 266 SplitLines(false, fFontSize); 267 return CFX_SizeF(m_rcRet.Width(), m_rcRet.Height()); 268 } 269 270 CPVT_FloatRect CTypeset::Typeset() { 271 ASSERT(m_pVT); 272 m_pSection->m_LineArray.clear(); 273 SplitLines(true, 0.0f); 274 OutputLines(); 275 return m_rcRet; 276 } 277 278 void CTypeset::SplitLines(bool bTypeset, float fFontSize) { 279 ASSERT(m_pVT); 280 ASSERT(m_pSection); 281 int32_t nLineHead = 0; 282 int32_t nLineTail = 0; 283 float fMaxX = 0.0f, fMaxY = 0.0f; 284 float fLineWidth = 0.0f, fBackupLineWidth = 0.0f; 285 float fLineAscent = 0.0f, fBackupLineAscent = 0.0f; 286 float fLineDescent = 0.0f, fBackupLineDescent = 0.0f; 287 int32_t nWordStartPos = 0; 288 bool bFullWord = false; 289 int32_t nLineFullWordIndex = 0; 290 int32_t nCharIndex = 0; 291 CPVT_LineInfo line; 292 float fWordWidth = 0; 293 float fTypesetWidth = 294 std::max(m_pVT->GetPlateWidth() - m_pVT->GetLineIndent(), 0.0f); 295 int32_t nTotalWords = 296 pdfium::CollectionSize<int32_t>(m_pSection->m_WordArray); 297 bool bOpened = false; 298 if (nTotalWords > 0) { 299 int32_t i = 0; 300 while (i < nTotalWords) { 301 CPVT_WordInfo* pWord = m_pSection->m_WordArray[i].get(); 302 CPVT_WordInfo* pOldWord = pWord; 303 if (i > 0) { 304 pOldWord = m_pSection->m_WordArray[i - 1].get(); 305 } 306 if (pWord) { 307 if (bTypeset) { 308 fLineAscent = std::max(fLineAscent, m_pVT->GetWordAscent(*pWord)); 309 fLineDescent = std::min(fLineDescent, m_pVT->GetWordDescent(*pWord)); 310 fWordWidth = m_pVT->GetWordWidth(*pWord); 311 } else { 312 fLineAscent = 313 std::max(fLineAscent, m_pVT->GetWordAscent(*pWord, fFontSize)); 314 fLineDescent = 315 std::min(fLineDescent, m_pVT->GetWordDescent(*pWord, fFontSize)); 316 fWordWidth = m_pVT->GetWordWidth( 317 pWord->nFontIndex, pWord->Word, m_pVT->GetSubWord(), 318 m_pVT->GetCharSpace(), m_pVT->GetHorzScale(), fFontSize, 319 pWord->fWordTail); 320 } 321 if (!bOpened) { 322 if (IsOpenStylePunctuation(pWord->Word)) { 323 bOpened = true; 324 bFullWord = true; 325 } else if (pOldWord) { 326 if (NeedDivision(pOldWord->Word, pWord->Word)) { 327 bFullWord = true; 328 } 329 } 330 } else { 331 if (!IsSpace(pWord->Word) && !IsOpenStylePunctuation(pWord->Word)) { 332 bOpened = false; 333 } 334 } 335 if (bFullWord) { 336 bFullWord = false; 337 if (nCharIndex > 0) { 338 nLineFullWordIndex++; 339 } 340 nWordStartPos = i; 341 fBackupLineWidth = fLineWidth; 342 fBackupLineAscent = fLineAscent; 343 fBackupLineDescent = fLineDescent; 344 } 345 nCharIndex++; 346 } 347 if (m_pVT->IsAutoReturn() && fTypesetWidth > 0 && 348 fLineWidth + fWordWidth > fTypesetWidth) { 349 if (nLineFullWordIndex > 0) { 350 i = nWordStartPos; 351 fLineWidth = fBackupLineWidth; 352 fLineAscent = fBackupLineAscent; 353 fLineDescent = fBackupLineDescent; 354 } 355 if (nCharIndex == 1) { 356 fLineWidth = fWordWidth; 357 i++; 358 } 359 nLineTail = i - 1; 360 if (bTypeset) { 361 line.nBeginWordIndex = nLineHead; 362 line.nEndWordIndex = nLineTail; 363 line.nTotalWord = nLineTail - nLineHead + 1; 364 line.fLineWidth = fLineWidth; 365 line.fLineAscent = fLineAscent; 366 line.fLineDescent = fLineDescent; 367 m_pSection->AddLine(line); 368 } 369 fMaxY += (fLineAscent + m_pVT->GetLineLeading()); 370 fMaxY -= fLineDescent; 371 fMaxX = std::max(fLineWidth, fMaxX); 372 nLineHead = i; 373 fLineWidth = 0.0f; 374 fLineAscent = 0.0f; 375 fLineDescent = 0.0f; 376 nCharIndex = 0; 377 nLineFullWordIndex = 0; 378 bFullWord = false; 379 } else { 380 fLineWidth += fWordWidth; 381 i++; 382 } 383 } 384 if (nLineHead <= nTotalWords - 1) { 385 nLineTail = nTotalWords - 1; 386 if (bTypeset) { 387 line.nBeginWordIndex = nLineHead; 388 line.nEndWordIndex = nLineTail; 389 line.nTotalWord = nLineTail - nLineHead + 1; 390 line.fLineWidth = fLineWidth; 391 line.fLineAscent = fLineAscent; 392 line.fLineDescent = fLineDescent; 393 m_pSection->AddLine(line); 394 } 395 fMaxY += (fLineAscent + m_pVT->GetLineLeading()); 396 fMaxY -= fLineDescent; 397 fMaxX = std::max(fLineWidth, fMaxX); 398 } 399 } else { 400 if (bTypeset) { 401 fLineAscent = m_pVT->GetLineAscent(); 402 fLineDescent = m_pVT->GetLineDescent(); 403 } else { 404 fLineAscent = 405 m_pVT->GetFontAscent(m_pVT->GetDefaultFontIndex(), fFontSize); 406 fLineDescent = 407 m_pVT->GetFontDescent(m_pVT->GetDefaultFontIndex(), fFontSize); 408 } 409 if (bTypeset) { 410 line.nBeginWordIndex = -1; 411 line.nEndWordIndex = -1; 412 line.nTotalWord = 0; 413 line.fLineWidth = 0; 414 line.fLineAscent = fLineAscent; 415 line.fLineDescent = fLineDescent; 416 m_pSection->AddLine(line); 417 } 418 fMaxY += m_pVT->GetLineLeading() + fLineAscent - fLineDescent; 419 } 420 m_rcRet = CPVT_FloatRect(0, 0, fMaxX, fMaxY); 421 } 422 423 void CTypeset::OutputLines() { 424 ASSERT(m_pVT); 425 ASSERT(m_pSection); 426 float fMinX = 0.0f, fMinY = 0.0f, fMaxX = 0.0f, fMaxY = 0.0f; 427 float fPosX = 0.0f, fPosY = 0.0f; 428 float fLineIndent = m_pVT->GetLineIndent(); 429 float fTypesetWidth = std::max(m_pVT->GetPlateWidth() - fLineIndent, 0.0f); 430 switch (m_pVT->GetAlignment()) { 431 default: 432 case 0: 433 fMinX = 0.0f; 434 break; 435 case 1: 436 fMinX = (fTypesetWidth - m_rcRet.Width()) * VARIABLETEXT_HALF; 437 break; 438 case 2: 439 fMinX = fTypesetWidth - m_rcRet.Width(); 440 break; 441 } 442 fMaxX = fMinX + m_rcRet.Width(); 443 fMinY = 0.0f; 444 fMaxY = m_rcRet.Height(); 445 int32_t nTotalLines = 446 pdfium::CollectionSize<int32_t>(m_pSection->m_LineArray); 447 if (nTotalLines > 0) { 448 for (int32_t l = 0; l < nTotalLines; l++) { 449 CLine* pLine = m_pSection->m_LineArray[l].get(); 450 switch (m_pVT->GetAlignment()) { 451 default: 452 case 0: 453 fPosX = 0; 454 break; 455 case 1: 456 fPosX = (fTypesetWidth - pLine->m_LineInfo.fLineWidth) * 457 VARIABLETEXT_HALF; 458 break; 459 case 2: 460 fPosX = fTypesetWidth - pLine->m_LineInfo.fLineWidth; 461 break; 462 } 463 fPosX += fLineIndent; 464 fPosY += m_pVT->GetLineLeading(); 465 fPosY += pLine->m_LineInfo.fLineAscent; 466 pLine->m_LineInfo.fLineX = fPosX - fMinX; 467 pLine->m_LineInfo.fLineY = fPosY - fMinY; 468 for (int32_t w = pLine->m_LineInfo.nBeginWordIndex; 469 w <= pLine->m_LineInfo.nEndWordIndex; w++) { 470 if (pdfium::IndexInBounds(m_pSection->m_WordArray, w)) { 471 CPVT_WordInfo* pWord = m_pSection->m_WordArray[w].get(); 472 pWord->fWordX = fPosX - fMinX; 473 pWord->fWordY = fPosY - fMinY; 474 475 fPosX += m_pVT->GetWordWidth(*pWord); 476 } 477 } 478 fPosY -= pLine->m_LineInfo.fLineDescent; 479 } 480 } 481 m_rcRet = CPVT_FloatRect(fMinX, fMinY, fMaxX, fMaxY); 482 } 483