1 // Copyright 2016 PDFium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7 #include "core/fpdfdoc/ctypeset.h" 8 9 #include <algorithm> 10 11 #include "core/fpdfdoc/cline.h" 12 #include "core/fpdfdoc/cpvt_wordinfo.h" 13 #include "core/fpdfdoc/csection.h" 14 15 namespace { 16 17 const uint8_t special_chars[128] = { 18 0x00, 0x0C, 0x08, 0x0C, 0x08, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 19 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 20 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x08, 0x00, 21 0x10, 0x00, 0x00, 0x28, 0x0C, 0x08, 0x00, 0x00, 0x28, 0x28, 0x28, 0x28, 22 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x08, 0x08, 23 0x00, 0x00, 0x00, 0x08, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 24 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 25 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x0C, 0x00, 0x08, 0x00, 0x00, 26 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 27 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 28 0x01, 0x01, 0x01, 0x0C, 0x00, 0x08, 0x00, 0x00, 29 }; 30 31 bool IsLatin(uint16_t word) { 32 if (word <= 0x007F) 33 return !!(special_chars[word] & 0x01); 34 35 return ((word >= 0x00C0 && word <= 0x00FF) || 36 (word >= 0x0100 && word <= 0x024F) || 37 (word >= 0x1E00 && word <= 0x1EFF) || 38 (word >= 0x2C60 && word <= 0x2C7F) || 39 (word >= 0xA720 && word <= 0xA7FF) || 40 (word >= 0xFF21 && word <= 0xFF3A) || 41 (word >= 0xFF41 && word <= 0xFF5A)); 42 } 43 44 bool IsDigit(uint32_t word) { 45 return word >= 0x0030 && word <= 0x0039; 46 } 47 48 bool IsCJK(uint32_t word) { 49 if ((word >= 0x1100 && word <= 0x11FF) || 50 (word >= 0x2E80 && word <= 0x2FFF) || 51 (word >= 0x3040 && word <= 0x9FBF) || 52 (word >= 0xAC00 && word <= 0xD7AF) || 53 (word >= 0xF900 && word <= 0xFAFF) || 54 (word >= 0xFE30 && word <= 0xFE4F) || 55 (word >= 0x20000 && word <= 0x2A6DF) || 56 (word >= 0x2F800 && word <= 0x2FA1F)) { 57 return true; 58 } 59 if (word >= 0x3000 && word <= 0x303F) { 60 return ( 61 word == 0x3005 || word == 0x3006 || word == 0x3021 || word == 0x3022 || 62 word == 0x3023 || word == 0x3024 || word == 0x3025 || word == 0x3026 || 63 word == 0x3027 || word == 0x3028 || word == 0x3029 || word == 0x3031 || 64 word == 0x3032 || word == 0x3033 || word == 0x3034 || word == 0x3035); 65 } 66 return word >= 0xFF66 && word <= 0xFF9D; 67 } 68 69 bool IsPunctuation(uint32_t word) { 70 if (word <= 0x007F) 71 return !!(special_chars[word] & 0x08); 72 73 if (word >= 0x0080 && word <= 0x00FF) { 74 return (word == 0x0082 || word == 0x0084 || word == 0x0085 || 75 word == 0x0091 || word == 0x0092 || word == 0x0093 || 76 word <= 0x0094 || word == 0x0096 || word == 0x00B4 || 77 word == 0x00B8); 78 } 79 80 if (word >= 0x2000 && word <= 0x206F) { 81 return ( 82 word == 0x2010 || word == 0x2011 || word == 0x2012 || word == 0x2013 || 83 word == 0x2018 || word == 0x2019 || word == 0x201A || word == 0x201B || 84 word == 0x201C || word == 0x201D || word == 0x201E || word == 0x201F || 85 word == 0x2032 || word == 0x2033 || word == 0x2034 || word == 0x2035 || 86 word == 0x2036 || word == 0x2037 || word == 0x203C || word == 0x203D || 87 word == 0x203E || word == 0x2044); 88 } 89 90 if (word >= 0x3000 && word <= 0x303F) { 91 return ( 92 word == 0x3001 || word == 0x3002 || word == 0x3003 || word == 0x3005 || 93 word == 0x3009 || word == 0x300A || word == 0x300B || word == 0x300C || 94 word == 0x300D || word == 0x300F || word == 0x300E || word == 0x3010 || 95 word == 0x3011 || word == 0x3014 || word == 0x3015 || word == 0x3016 || 96 word == 0x3017 || word == 0x3018 || word == 0x3019 || word == 0x301A || 97 word == 0x301B || word == 0x301D || word == 0x301E || word == 0x301F); 98 } 99 100 if (word >= 0xFE50 && word <= 0xFE6F) 101 return (word >= 0xFE50 && word <= 0xFE5E) || word == 0xFE63; 102 103 if (word >= 0xFF00 && word <= 0xFFEF) { 104 return ( 105 word == 0xFF01 || word == 0xFF02 || word == 0xFF07 || word == 0xFF08 || 106 word == 0xFF09 || word == 0xFF0C || word == 0xFF0E || word == 0xFF0F || 107 word == 0xFF1A || word == 0xFF1B || word == 0xFF1F || word == 0xFF3B || 108 word == 0xFF3D || word == 0xFF40 || word == 0xFF5B || word == 0xFF5C || 109 word == 0xFF5D || word == 0xFF61 || word == 0xFF62 || word == 0xFF63 || 110 word == 0xFF64 || word == 0xFF65 || word == 0xFF9E || word == 0xFF9F); 111 } 112 113 return false; 114 } 115 116 bool IsConnectiveSymbol(uint32_t word) { 117 return word <= 0x007F && (special_chars[word] & 0x20); 118 } 119 120 bool IsOpenStylePunctuation(uint32_t word) { 121 if (word <= 0x007F) 122 return !!(special_chars[word] & 0x04); 123 124 return (word == 0x300A || word == 0x300C || word == 0x300E || 125 word == 0x3010 || word == 0x3014 || word == 0x3016 || 126 word == 0x3018 || word == 0x301A || word == 0xFF08 || 127 word == 0xFF3B || word == 0xFF5B || word == 0xFF62); 128 } 129 130 bool IsCurrencySymbol(uint16_t word) { 131 return (word == 0x0024 || word == 0x0080 || word == 0x00A2 || 132 word == 0x00A3 || word == 0x00A4 || word == 0x00A5 || 133 (word >= 0x20A0 && word <= 0x20CF) || word == 0xFE69 || 134 word == 0xFF04 || word == 0xFFE0 || word == 0xFFE1 || 135 word == 0xFFE5 || word == 0xFFE6); 136 } 137 138 bool IsPrefixSymbol(uint16_t word) { 139 return IsCurrencySymbol(word) || word == 0x2116; 140 } 141 142 bool IsSpace(uint16_t word) { 143 return word == 0x0020 || word == 0x3000; 144 } 145 146 bool NeedDivision(uint16_t prevWord, uint16_t curWord) { 147 if ((IsLatin(prevWord) || IsDigit(prevWord)) && 148 (IsLatin(curWord) || IsDigit(curWord))) { 149 return false; 150 } 151 if (IsSpace(curWord) || IsPunctuation(curWord)) { 152 return false; 153 } 154 if (IsConnectiveSymbol(prevWord) || IsConnectiveSymbol(curWord)) { 155 return false; 156 } 157 if (IsSpace(prevWord) || IsPunctuation(prevWord)) { 158 return true; 159 } 160 if (IsPrefixSymbol(prevWord)) { 161 return false; 162 } 163 if (IsPrefixSymbol(curWord) || IsCJK(curWord)) { 164 return true; 165 } 166 if (IsCJK(prevWord)) { 167 return true; 168 } 169 return false; 170 } 171 172 } // namespace 173 174 CTypeset::CTypeset(CSection* pSection) 175 : m_rcRet(0.0f, 0.0f, 0.0f, 0.0f), 176 m_pVT(pSection->m_pVT), 177 m_pSection(pSection) {} 178 179 CTypeset::~CTypeset() {} 180 181 CPVT_FloatRect CTypeset::CharArray() { 182 ASSERT(m_pSection); 183 FX_FLOAT fLineAscent = 184 m_pVT->GetFontAscent(m_pVT->GetDefaultFontIndex(), m_pVT->GetFontSize()); 185 FX_FLOAT fLineDescent = 186 m_pVT->GetFontDescent(m_pVT->GetDefaultFontIndex(), m_pVT->GetFontSize()); 187 m_rcRet.Default(); 188 FX_FLOAT x = 0.0f, y = 0.0f; 189 FX_FLOAT fNextWidth; 190 int32_t nStart = 0; 191 FX_FLOAT fNodeWidth = m_pVT->GetPlateWidth() / 192 (m_pVT->m_nCharArray <= 0 ? 1 : m_pVT->m_nCharArray); 193 if (CLine* pLine = m_pSection->m_LineArray.GetAt(0)) { 194 x = 0.0f; 195 y += m_pVT->GetLineLeading(m_pSection->m_SecInfo); 196 y += fLineAscent; 197 nStart = 0; 198 switch (m_pVT->GetAlignment(m_pSection->m_SecInfo)) { 199 case 0: 200 pLine->m_LineInfo.fLineX = fNodeWidth * VARIABLETEXT_HALF; 201 break; 202 case 1: 203 nStart = (m_pVT->m_nCharArray - m_pSection->m_WordArray.GetSize()) / 2; 204 pLine->m_LineInfo.fLineX = 205 fNodeWidth * nStart - fNodeWidth * VARIABLETEXT_HALF; 206 break; 207 case 2: 208 nStart = m_pVT->m_nCharArray - m_pSection->m_WordArray.GetSize(); 209 pLine->m_LineInfo.fLineX = 210 fNodeWidth * nStart - fNodeWidth * VARIABLETEXT_HALF; 211 break; 212 } 213 for (int32_t w = 0, sz = m_pSection->m_WordArray.GetSize(); w < sz; w++) { 214 if (w >= m_pVT->m_nCharArray) { 215 break; 216 } 217 fNextWidth = 0; 218 if (CPVT_WordInfo* pNextWord = m_pSection->m_WordArray.GetAt(w + 1)) { 219 pNextWord->fWordTail = 0; 220 fNextWidth = m_pVT->GetWordWidth(*pNextWord); 221 } 222 if (CPVT_WordInfo* pWord = m_pSection->m_WordArray.GetAt(w)) { 223 pWord->fWordTail = 0; 224 FX_FLOAT fWordWidth = m_pVT->GetWordWidth(*pWord); 225 FX_FLOAT fWordAscent = m_pVT->GetWordAscent(*pWord); 226 FX_FLOAT fWordDescent = m_pVT->GetWordDescent(*pWord); 227 x = (FX_FLOAT)(fNodeWidth * (w + nStart + 0.5) - 228 fWordWidth * VARIABLETEXT_HALF); 229 pWord->fWordX = x; 230 pWord->fWordY = y; 231 if (w == 0) { 232 pLine->m_LineInfo.fLineX = x; 233 } 234 if (w != m_pSection->m_WordArray.GetSize() - 1) { 235 pWord->fWordTail = 236 (fNodeWidth - (fWordWidth + fNextWidth) * VARIABLETEXT_HALF > 0 237 ? fNodeWidth - (fWordWidth + fNextWidth) * VARIABLETEXT_HALF 238 : 0); 239 } else { 240 pWord->fWordTail = 0; 241 } 242 x += fWordWidth; 243 fLineAscent = std::max(fLineAscent, fWordAscent); 244 fLineDescent = std::min(fLineDescent, fWordDescent); 245 } 246 } 247 pLine->m_LineInfo.nBeginWordIndex = 0; 248 pLine->m_LineInfo.nEndWordIndex = m_pSection->m_WordArray.GetSize() - 1; 249 pLine->m_LineInfo.fLineY = y; 250 pLine->m_LineInfo.fLineWidth = x - pLine->m_LineInfo.fLineX; 251 pLine->m_LineInfo.fLineAscent = fLineAscent; 252 pLine->m_LineInfo.fLineDescent = fLineDescent; 253 y -= fLineDescent; 254 } 255 return m_rcRet = CPVT_FloatRect(0, 0, x, y); 256 } 257 258 CFX_SizeF CTypeset::GetEditSize(FX_FLOAT fFontSize) { 259 ASSERT(m_pSection); 260 ASSERT(m_pVT); 261 SplitLines(false, fFontSize); 262 return CFX_SizeF(m_rcRet.Width(), m_rcRet.Height()); 263 } 264 265 CPVT_FloatRect CTypeset::Typeset() { 266 ASSERT(m_pVT); 267 m_pSection->m_LineArray.Empty(); 268 SplitLines(true, 0.0f); 269 m_pSection->m_LineArray.Clear(); 270 OutputLines(); 271 return m_rcRet; 272 } 273 274 void CTypeset::SplitLines(bool bTypeset, FX_FLOAT fFontSize) { 275 ASSERT(m_pVT); 276 ASSERT(m_pSection); 277 int32_t nLineHead = 0; 278 int32_t nLineTail = 0; 279 FX_FLOAT fMaxX = 0.0f, fMaxY = 0.0f; 280 FX_FLOAT fLineWidth = 0.0f, fBackupLineWidth = 0.0f; 281 FX_FLOAT fLineAscent = 0.0f, fBackupLineAscent = 0.0f; 282 FX_FLOAT fLineDescent = 0.0f, fBackupLineDescent = 0.0f; 283 int32_t nWordStartPos = 0; 284 bool bFullWord = false; 285 int32_t nLineFullWordIndex = 0; 286 int32_t nCharIndex = 0; 287 CPVT_LineInfo line; 288 FX_FLOAT fWordWidth = 0; 289 FX_FLOAT fTypesetWidth = std::max( 290 m_pVT->GetPlateWidth() - m_pVT->GetLineIndent(m_pSection->m_SecInfo), 291 0.0f); 292 int32_t nTotalWords = m_pSection->m_WordArray.GetSize(); 293 bool bOpened = false; 294 if (nTotalWords > 0) { 295 int32_t i = 0; 296 while (i < nTotalWords) { 297 CPVT_WordInfo* pWord = m_pSection->m_WordArray.GetAt(i); 298 CPVT_WordInfo* pOldWord = pWord; 299 if (i > 0) { 300 pOldWord = m_pSection->m_WordArray.GetAt(i - 1); 301 } 302 if (pWord) { 303 if (bTypeset) { 304 fLineAscent = std::max(fLineAscent, m_pVT->GetWordAscent(*pWord)); 305 fLineDescent = std::min(fLineDescent, m_pVT->GetWordDescent(*pWord)); 306 fWordWidth = m_pVT->GetWordWidth(*pWord); 307 } else { 308 fLineAscent = 309 std::max(fLineAscent, m_pVT->GetWordAscent(*pWord, fFontSize)); 310 fLineDescent = 311 std::min(fLineDescent, m_pVT->GetWordDescent(*pWord, fFontSize)); 312 fWordWidth = m_pVT->GetWordWidth( 313 pWord->nFontIndex, pWord->Word, m_pVT->m_wSubWord, 314 m_pVT->m_fCharSpace, m_pVT->m_nHorzScale, fFontSize, 315 pWord->fWordTail); 316 } 317 if (!bOpened) { 318 if (IsOpenStylePunctuation(pWord->Word)) { 319 bOpened = true; 320 bFullWord = true; 321 } else if (pOldWord) { 322 if (NeedDivision(pOldWord->Word, pWord->Word)) { 323 bFullWord = true; 324 } 325 } 326 } else { 327 if (!IsSpace(pWord->Word) && !IsOpenStylePunctuation(pWord->Word)) { 328 bOpened = false; 329 } 330 } 331 if (bFullWord) { 332 bFullWord = false; 333 if (nCharIndex > 0) { 334 nLineFullWordIndex++; 335 } 336 nWordStartPos = i; 337 fBackupLineWidth = fLineWidth; 338 fBackupLineAscent = fLineAscent; 339 fBackupLineDescent = fLineDescent; 340 } 341 nCharIndex++; 342 } 343 if (m_pVT->m_bLimitWidth && fTypesetWidth > 0 && 344 fLineWidth + fWordWidth > fTypesetWidth) { 345 if (nLineFullWordIndex > 0) { 346 i = nWordStartPos; 347 fLineWidth = fBackupLineWidth; 348 fLineAscent = fBackupLineAscent; 349 fLineDescent = fBackupLineDescent; 350 } 351 if (nCharIndex == 1) { 352 fLineWidth = fWordWidth; 353 i++; 354 } 355 nLineTail = i - 1; 356 if (bTypeset) { 357 line.nBeginWordIndex = nLineHead; 358 line.nEndWordIndex = nLineTail; 359 line.nTotalWord = nLineTail - nLineHead + 1; 360 line.fLineWidth = fLineWidth; 361 line.fLineAscent = fLineAscent; 362 line.fLineDescent = fLineDescent; 363 m_pSection->AddLine(line); 364 } 365 fMaxY += (fLineAscent + m_pVT->GetLineLeading(m_pSection->m_SecInfo)); 366 fMaxY -= fLineDescent; 367 fMaxX = std::max(fLineWidth, fMaxX); 368 nLineHead = i; 369 fLineWidth = 0.0f; 370 fLineAscent = 0.0f; 371 fLineDescent = 0.0f; 372 nCharIndex = 0; 373 nLineFullWordIndex = 0; 374 bFullWord = false; 375 } else { 376 fLineWidth += fWordWidth; 377 i++; 378 } 379 } 380 if (nLineHead <= nTotalWords - 1) { 381 nLineTail = nTotalWords - 1; 382 if (bTypeset) { 383 line.nBeginWordIndex = nLineHead; 384 line.nEndWordIndex = nLineTail; 385 line.nTotalWord = nLineTail - nLineHead + 1; 386 line.fLineWidth = fLineWidth; 387 line.fLineAscent = fLineAscent; 388 line.fLineDescent = fLineDescent; 389 m_pSection->AddLine(line); 390 } 391 fMaxY += (fLineAscent + m_pVT->GetLineLeading(m_pSection->m_SecInfo)); 392 fMaxY -= fLineDescent; 393 fMaxX = std::max(fLineWidth, fMaxX); 394 } 395 } else { 396 if (bTypeset) { 397 fLineAscent = m_pVT->GetLineAscent(m_pSection->m_SecInfo); 398 fLineDescent = m_pVT->GetLineDescent(m_pSection->m_SecInfo); 399 } else { 400 fLineAscent = 401 m_pVT->GetFontAscent(m_pVT->GetDefaultFontIndex(), fFontSize); 402 fLineDescent = 403 m_pVT->GetFontDescent(m_pVT->GetDefaultFontIndex(), fFontSize); 404 } 405 if (bTypeset) { 406 line.nBeginWordIndex = -1; 407 line.nEndWordIndex = -1; 408 line.nTotalWord = 0; 409 line.fLineWidth = 0; 410 line.fLineAscent = fLineAscent; 411 line.fLineDescent = fLineDescent; 412 m_pSection->AddLine(line); 413 } 414 fMaxY += m_pVT->GetLineLeading(m_pSection->m_SecInfo) + fLineAscent - 415 fLineDescent; 416 } 417 m_rcRet = CPVT_FloatRect(0, 0, fMaxX, fMaxY); 418 } 419 420 void CTypeset::OutputLines() { 421 ASSERT(m_pVT); 422 ASSERT(m_pSection); 423 FX_FLOAT fMinX = 0.0f, fMinY = 0.0f, fMaxX = 0.0f, fMaxY = 0.0f; 424 FX_FLOAT fPosX = 0.0f, fPosY = 0.0f; 425 FX_FLOAT fLineIndent = m_pVT->GetLineIndent(m_pSection->m_SecInfo); 426 FX_FLOAT fTypesetWidth = std::max(m_pVT->GetPlateWidth() - fLineIndent, 0.0f); 427 switch (m_pVT->GetAlignment(m_pSection->m_SecInfo)) { 428 default: 429 case 0: 430 fMinX = 0.0f; 431 break; 432 case 1: 433 fMinX = (fTypesetWidth - m_rcRet.Width()) * VARIABLETEXT_HALF; 434 break; 435 case 2: 436 fMinX = fTypesetWidth - m_rcRet.Width(); 437 break; 438 } 439 fMaxX = fMinX + m_rcRet.Width(); 440 fMinY = 0.0f; 441 fMaxY = m_rcRet.Height(); 442 int32_t nTotalLines = m_pSection->m_LineArray.GetSize(); 443 if (nTotalLines > 0) { 444 m_pSection->m_SecInfo.nTotalLine = nTotalLines; 445 for (int32_t l = 0; l < nTotalLines; l++) { 446 if (CLine* pLine = m_pSection->m_LineArray.GetAt(l)) { 447 switch (m_pVT->GetAlignment(m_pSection->m_SecInfo)) { 448 default: 449 case 0: 450 fPosX = 0; 451 break; 452 case 1: 453 fPosX = (fTypesetWidth - pLine->m_LineInfo.fLineWidth) * 454 VARIABLETEXT_HALF; 455 break; 456 case 2: 457 fPosX = fTypesetWidth - pLine->m_LineInfo.fLineWidth; 458 break; 459 } 460 fPosX += fLineIndent; 461 fPosY += m_pVT->GetLineLeading(m_pSection->m_SecInfo); 462 fPosY += pLine->m_LineInfo.fLineAscent; 463 pLine->m_LineInfo.fLineX = fPosX - fMinX; 464 pLine->m_LineInfo.fLineY = fPosY - fMinY; 465 for (int32_t w = pLine->m_LineInfo.nBeginWordIndex; 466 w <= pLine->m_LineInfo.nEndWordIndex; w++) { 467 if (CPVT_WordInfo* pWord = m_pSection->m_WordArray.GetAt(w)) { 468 pWord->fWordX = fPosX - fMinX; 469 if (pWord->pWordProps) { 470 switch (pWord->pWordProps->nScriptType) { 471 default: 472 case CPDF_VariableText::ScriptType::Normal: 473 pWord->fWordY = fPosY - fMinY; 474 break; 475 case CPDF_VariableText::ScriptType::Super: 476 pWord->fWordY = fPosY - m_pVT->GetWordAscent(*pWord) - fMinY; 477 break; 478 case CPDF_VariableText::ScriptType::Sub: 479 pWord->fWordY = fPosY - m_pVT->GetWordDescent(*pWord) - fMinY; 480 break; 481 } 482 } else { 483 pWord->fWordY = fPosY - fMinY; 484 } 485 fPosX += m_pVT->GetWordWidth(*pWord); 486 } 487 } 488 fPosY -= pLine->m_LineInfo.fLineDescent; 489 } 490 } 491 } 492 m_rcRet = CPVT_FloatRect(fMinX, fMinY, fMaxX, fMaxY); 493 } 494