Home | History | Annotate | Download | only in fpdfdoc
      1 // Copyright 2016 PDFium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
      6 
      7 #include "core/fpdfdoc/ctypeset.h"
      8 
      9 #include <algorithm>
     10 
     11 #include "core/fpdfdoc/cline.h"
     12 #include "core/fpdfdoc/cpdf_variabletext.h"
     13 #include "core/fpdfdoc/cpvt_wordinfo.h"
     14 #include "core/fpdfdoc/csection.h"
     15 #include "third_party/base/stl_util.h"
     16 
     17 namespace {
     18 
     19 const uint8_t special_chars[128] = {
     20     0x00, 0x0C, 0x08, 0x0C, 0x08, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00,
     21     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
     22     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x08, 0x00,
     23     0x10, 0x00, 0x00, 0x28, 0x0C, 0x08, 0x00, 0x00, 0x28, 0x28, 0x28, 0x28,
     24     0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x08, 0x08,
     25     0x00, 0x00, 0x00, 0x08, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
     26     0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
     27     0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x0C, 0x00, 0x08, 0x00, 0x00,
     28     0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
     29     0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
     30     0x01, 0x01, 0x01, 0x0C, 0x00, 0x08, 0x00, 0x00,
     31 };
     32 
     33 bool IsLatin(uint16_t word) {
     34   if (word <= 0x007F)
     35     return !!(special_chars[word] & 0x01);
     36 
     37   return ((word >= 0x00C0 && word <= 0x00FF) ||
     38           (word >= 0x0100 && word <= 0x024F) ||
     39           (word >= 0x1E00 && word <= 0x1EFF) ||
     40           (word >= 0x2C60 && word <= 0x2C7F) ||
     41           (word >= 0xA720 && word <= 0xA7FF) ||
     42           (word >= 0xFF21 && word <= 0xFF3A) ||
     43           (word >= 0xFF41 && word <= 0xFF5A));
     44 }
     45 
     46 bool IsDigit(uint32_t word) {
     47   return word >= 0x0030 && word <= 0x0039;
     48 }
     49 
     50 bool IsCJK(uint32_t word) {
     51   if ((word >= 0x1100 && word <= 0x11FF) ||
     52       (word >= 0x2E80 && word <= 0x2FFF) ||
     53       (word >= 0x3040 && word <= 0x9FBF) ||
     54       (word >= 0xAC00 && word <= 0xD7AF) ||
     55       (word >= 0xF900 && word <= 0xFAFF) ||
     56       (word >= 0xFE30 && word <= 0xFE4F) ||
     57       (word >= 0x20000 && word <= 0x2A6DF) ||
     58       (word >= 0x2F800 && word <= 0x2FA1F)) {
     59     return true;
     60   }
     61   if (word >= 0x3000 && word <= 0x303F) {
     62     return (
     63         word == 0x3005 || word == 0x3006 || word == 0x3021 || word == 0x3022 ||
     64         word == 0x3023 || word == 0x3024 || word == 0x3025 || word == 0x3026 ||
     65         word == 0x3027 || word == 0x3028 || word == 0x3029 || word == 0x3031 ||
     66         word == 0x3032 || word == 0x3033 || word == 0x3034 || word == 0x3035);
     67   }
     68   return word >= 0xFF66 && word <= 0xFF9D;
     69 }
     70 
     71 bool IsPunctuation(uint32_t word) {
     72   if (word <= 0x007F)
     73     return !!(special_chars[word] & 0x08);
     74 
     75   if (word >= 0x0080 && word <= 0x00FF) {
     76     return (word == 0x0082 || word == 0x0084 || word == 0x0085 ||
     77             word == 0x0091 || word == 0x0092 || word == 0x0093 ||
     78             word <= 0x0094 || word == 0x0096 || word == 0x00B4 ||
     79             word == 0x00B8);
     80   }
     81 
     82   if (word >= 0x2000 && word <= 0x206F) {
     83     return (
     84         word == 0x2010 || word == 0x2011 || word == 0x2012 || word == 0x2013 ||
     85         word == 0x2018 || word == 0x2019 || word == 0x201A || word == 0x201B ||
     86         word == 0x201C || word == 0x201D || word == 0x201E || word == 0x201F ||
     87         word == 0x2032 || word == 0x2033 || word == 0x2034 || word == 0x2035 ||
     88         word == 0x2036 || word == 0x2037 || word == 0x203C || word == 0x203D ||
     89         word == 0x203E || word == 0x2044);
     90   }
     91 
     92   if (word >= 0x3000 && word <= 0x303F) {
     93     return (
     94         word == 0x3001 || word == 0x3002 || word == 0x3003 || word == 0x3005 ||
     95         word == 0x3009 || word == 0x300A || word == 0x300B || word == 0x300C ||
     96         word == 0x300D || word == 0x300F || word == 0x300E || word == 0x3010 ||
     97         word == 0x3011 || word == 0x3014 || word == 0x3015 || word == 0x3016 ||
     98         word == 0x3017 || word == 0x3018 || word == 0x3019 || word == 0x301A ||
     99         word == 0x301B || word == 0x301D || word == 0x301E || word == 0x301F);
    100   }
    101 
    102   if (word >= 0xFE50 && word <= 0xFE6F)
    103     return (word >= 0xFE50 && word <= 0xFE5E) || word == 0xFE63;
    104 
    105   if (word >= 0xFF00 && word <= 0xFFEF) {
    106     return (
    107         word == 0xFF01 || word == 0xFF02 || word == 0xFF07 || word == 0xFF08 ||
    108         word == 0xFF09 || word == 0xFF0C || word == 0xFF0E || word == 0xFF0F ||
    109         word == 0xFF1A || word == 0xFF1B || word == 0xFF1F || word == 0xFF3B ||
    110         word == 0xFF3D || word == 0xFF40 || word == 0xFF5B || word == 0xFF5C ||
    111         word == 0xFF5D || word == 0xFF61 || word == 0xFF62 || word == 0xFF63 ||
    112         word == 0xFF64 || word == 0xFF65 || word == 0xFF9E || word == 0xFF9F);
    113   }
    114 
    115   return false;
    116 }
    117 
    118 bool IsConnectiveSymbol(uint32_t word) {
    119   return word <= 0x007F && (special_chars[word] & 0x20);
    120 }
    121 
    122 bool IsOpenStylePunctuation(uint32_t word) {
    123   if (word <= 0x007F)
    124     return !!(special_chars[word] & 0x04);
    125 
    126   return (word == 0x300A || word == 0x300C || word == 0x300E ||
    127           word == 0x3010 || word == 0x3014 || word == 0x3016 ||
    128           word == 0x3018 || word == 0x301A || word == 0xFF08 ||
    129           word == 0xFF3B || word == 0xFF5B || word == 0xFF62);
    130 }
    131 
    132 bool IsCurrencySymbol(uint16_t word) {
    133   return (word == 0x0024 || word == 0x0080 || word == 0x00A2 ||
    134           word == 0x00A3 || word == 0x00A4 || word == 0x00A5 ||
    135           (word >= 0x20A0 && word <= 0x20CF) || word == 0xFE69 ||
    136           word == 0xFF04 || word == 0xFFE0 || word == 0xFFE1 ||
    137           word == 0xFFE5 || word == 0xFFE6);
    138 }
    139 
    140 bool IsPrefixSymbol(uint16_t word) {
    141   return IsCurrencySymbol(word) || word == 0x2116;
    142 }
    143 
    144 bool IsSpace(uint16_t word) {
    145   return word == 0x0020 || word == 0x3000;
    146 }
    147 
    148 bool NeedDivision(uint16_t prevWord, uint16_t curWord) {
    149   if ((IsLatin(prevWord) || IsDigit(prevWord)) &&
    150       (IsLatin(curWord) || IsDigit(curWord))) {
    151     return false;
    152   }
    153   if (IsSpace(curWord) || IsPunctuation(curWord)) {
    154     return false;
    155   }
    156   if (IsConnectiveSymbol(prevWord) || IsConnectiveSymbol(curWord)) {
    157     return false;
    158   }
    159   if (IsSpace(prevWord) || IsPunctuation(prevWord)) {
    160     return true;
    161   }
    162   if (IsPrefixSymbol(prevWord)) {
    163     return false;
    164   }
    165   if (IsPrefixSymbol(curWord) || IsCJK(curWord)) {
    166     return true;
    167   }
    168   if (IsCJK(prevWord)) {
    169     return true;
    170   }
    171   return false;
    172 }
    173 
    174 }  // namespace
    175 
    176 CTypeset::CTypeset(CSection* pSection)
    177     : m_rcRet(0.0f, 0.0f, 0.0f, 0.0f),
    178       m_pVT(pSection->m_pVT),
    179       m_pSection(pSection) {}
    180 
    181 CTypeset::~CTypeset() {}
    182 
    183 CPVT_FloatRect CTypeset::CharArray() {
    184   m_rcRet = CPVT_FloatRect(0, 0, 0, 0);
    185   if (m_pSection->m_LineArray.empty())
    186     return m_rcRet;
    187 
    188   float fNodeWidth = m_pVT->GetPlateWidth() /
    189                      (m_pVT->GetCharArray() <= 0 ? 1 : m_pVT->GetCharArray());
    190   float fLineAscent =
    191       m_pVT->GetFontAscent(m_pVT->GetDefaultFontIndex(), m_pVT->GetFontSize());
    192   float fLineDescent =
    193       m_pVT->GetFontDescent(m_pVT->GetDefaultFontIndex(), m_pVT->GetFontSize());
    194   float x = 0.0f;
    195   float y = m_pVT->GetLineLeading() + fLineAscent;
    196   int32_t nStart = 0;
    197   CLine* pLine = m_pSection->m_LineArray.front().get();
    198   switch (m_pVT->GetAlignment()) {
    199     case 0:
    200       pLine->m_LineInfo.fLineX = fNodeWidth * VARIABLETEXT_HALF;
    201       break;
    202     case 1:
    203       nStart = (m_pVT->GetCharArray() -
    204                 pdfium::CollectionSize<int32_t>(m_pSection->m_WordArray)) /
    205                2;
    206       pLine->m_LineInfo.fLineX =
    207           fNodeWidth * nStart - fNodeWidth * VARIABLETEXT_HALF;
    208       break;
    209     case 2:
    210       nStart = m_pVT->GetCharArray() -
    211                pdfium::CollectionSize<int32_t>(m_pSection->m_WordArray);
    212       pLine->m_LineInfo.fLineX =
    213           fNodeWidth * nStart - fNodeWidth * VARIABLETEXT_HALF;
    214       break;
    215   }
    216   for (int32_t w = 0,
    217                sz = pdfium::CollectionSize<int32_t>(m_pSection->m_WordArray);
    218        w < sz; w++) {
    219     if (w >= m_pVT->GetCharArray())
    220       break;
    221 
    222     float fNextWidth = 0;
    223     if (pdfium::IndexInBounds(m_pSection->m_WordArray, w + 1)) {
    224       CPVT_WordInfo* pNextWord = m_pSection->m_WordArray[w + 1].get();
    225       pNextWord->fWordTail = 0;
    226       fNextWidth = m_pVT->GetWordWidth(*pNextWord);
    227     }
    228     CPVT_WordInfo* pWord = m_pSection->m_WordArray[w].get();
    229     pWord->fWordTail = 0;
    230     float fWordWidth = m_pVT->GetWordWidth(*pWord);
    231     float fWordAscent = m_pVT->GetWordAscent(*pWord);
    232     float fWordDescent = m_pVT->GetWordDescent(*pWord);
    233     x = (float)(fNodeWidth * (w + nStart + 0.5) -
    234                 fWordWidth * VARIABLETEXT_HALF);
    235     pWord->fWordX = x;
    236     pWord->fWordY = y;
    237     if (w == 0) {
    238       pLine->m_LineInfo.fLineX = x;
    239     }
    240     if (w != pdfium::CollectionSize<int32_t>(m_pSection->m_WordArray) - 1) {
    241       pWord->fWordTail =
    242           (fNodeWidth - (fWordWidth + fNextWidth) * VARIABLETEXT_HALF > 0
    243                ? fNodeWidth - (fWordWidth + fNextWidth) * VARIABLETEXT_HALF
    244                : 0);
    245     } else {
    246       pWord->fWordTail = 0;
    247     }
    248     x += fWordWidth;
    249     fLineAscent = std::max(fLineAscent, fWordAscent);
    250     fLineDescent = std::min(fLineDescent, fWordDescent);
    251   }
    252   pLine->m_LineInfo.nBeginWordIndex = 0;
    253   pLine->m_LineInfo.nEndWordIndex =
    254       pdfium::CollectionSize<int32_t>(m_pSection->m_WordArray) - 1;
    255   pLine->m_LineInfo.fLineY = y;
    256   pLine->m_LineInfo.fLineWidth = x - pLine->m_LineInfo.fLineX;
    257   pLine->m_LineInfo.fLineAscent = fLineAscent;
    258   pLine->m_LineInfo.fLineDescent = fLineDescent;
    259   m_rcRet = CPVT_FloatRect(0, 0, x, y - fLineDescent);
    260   return m_rcRet;
    261 }
    262 
    263 CFX_SizeF CTypeset::GetEditSize(float fFontSize) {
    264   ASSERT(m_pSection);
    265   ASSERT(m_pVT);
    266   SplitLines(false, fFontSize);
    267   return CFX_SizeF(m_rcRet.Width(), m_rcRet.Height());
    268 }
    269 
    270 CPVT_FloatRect CTypeset::Typeset() {
    271   ASSERT(m_pVT);
    272   m_pSection->m_LineArray.clear();
    273   SplitLines(true, 0.0f);
    274   OutputLines();
    275   return m_rcRet;
    276 }
    277 
    278 void CTypeset::SplitLines(bool bTypeset, float fFontSize) {
    279   ASSERT(m_pVT);
    280   ASSERT(m_pSection);
    281   int32_t nLineHead = 0;
    282   int32_t nLineTail = 0;
    283   float fMaxX = 0.0f, fMaxY = 0.0f;
    284   float fLineWidth = 0.0f, fBackupLineWidth = 0.0f;
    285   float fLineAscent = 0.0f, fBackupLineAscent = 0.0f;
    286   float fLineDescent = 0.0f, fBackupLineDescent = 0.0f;
    287   int32_t nWordStartPos = 0;
    288   bool bFullWord = false;
    289   int32_t nLineFullWordIndex = 0;
    290   int32_t nCharIndex = 0;
    291   CPVT_LineInfo line;
    292   float fWordWidth = 0;
    293   float fTypesetWidth =
    294       std::max(m_pVT->GetPlateWidth() - m_pVT->GetLineIndent(), 0.0f);
    295   int32_t nTotalWords =
    296       pdfium::CollectionSize<int32_t>(m_pSection->m_WordArray);
    297   bool bOpened = false;
    298   if (nTotalWords > 0) {
    299     int32_t i = 0;
    300     while (i < nTotalWords) {
    301       CPVT_WordInfo* pWord = m_pSection->m_WordArray[i].get();
    302       CPVT_WordInfo* pOldWord = pWord;
    303       if (i > 0) {
    304         pOldWord = m_pSection->m_WordArray[i - 1].get();
    305       }
    306       if (pWord) {
    307         if (bTypeset) {
    308           fLineAscent = std::max(fLineAscent, m_pVT->GetWordAscent(*pWord));
    309           fLineDescent = std::min(fLineDescent, m_pVT->GetWordDescent(*pWord));
    310           fWordWidth = m_pVT->GetWordWidth(*pWord);
    311         } else {
    312           fLineAscent =
    313               std::max(fLineAscent, m_pVT->GetWordAscent(*pWord, fFontSize));
    314           fLineDescent =
    315               std::min(fLineDescent, m_pVT->GetWordDescent(*pWord, fFontSize));
    316           fWordWidth = m_pVT->GetWordWidth(
    317               pWord->nFontIndex, pWord->Word, m_pVT->GetSubWord(),
    318               m_pVT->GetCharSpace(), m_pVT->GetHorzScale(), fFontSize,
    319               pWord->fWordTail);
    320         }
    321         if (!bOpened) {
    322           if (IsOpenStylePunctuation(pWord->Word)) {
    323             bOpened = true;
    324             bFullWord = true;
    325           } else if (pOldWord) {
    326             if (NeedDivision(pOldWord->Word, pWord->Word)) {
    327               bFullWord = true;
    328             }
    329           }
    330         } else {
    331           if (!IsSpace(pWord->Word) && !IsOpenStylePunctuation(pWord->Word)) {
    332             bOpened = false;
    333           }
    334         }
    335         if (bFullWord) {
    336           bFullWord = false;
    337           if (nCharIndex > 0) {
    338             nLineFullWordIndex++;
    339           }
    340           nWordStartPos = i;
    341           fBackupLineWidth = fLineWidth;
    342           fBackupLineAscent = fLineAscent;
    343           fBackupLineDescent = fLineDescent;
    344         }
    345         nCharIndex++;
    346       }
    347       if (m_pVT->IsAutoReturn() && fTypesetWidth > 0 &&
    348           fLineWidth + fWordWidth > fTypesetWidth) {
    349         if (nLineFullWordIndex > 0) {
    350           i = nWordStartPos;
    351           fLineWidth = fBackupLineWidth;
    352           fLineAscent = fBackupLineAscent;
    353           fLineDescent = fBackupLineDescent;
    354         }
    355         if (nCharIndex == 1) {
    356           fLineWidth = fWordWidth;
    357           i++;
    358         }
    359         nLineTail = i - 1;
    360         if (bTypeset) {
    361           line.nBeginWordIndex = nLineHead;
    362           line.nEndWordIndex = nLineTail;
    363           line.nTotalWord = nLineTail - nLineHead + 1;
    364           line.fLineWidth = fLineWidth;
    365           line.fLineAscent = fLineAscent;
    366           line.fLineDescent = fLineDescent;
    367           m_pSection->AddLine(line);
    368         }
    369         fMaxY += (fLineAscent + m_pVT->GetLineLeading());
    370         fMaxY -= fLineDescent;
    371         fMaxX = std::max(fLineWidth, fMaxX);
    372         nLineHead = i;
    373         fLineWidth = 0.0f;
    374         fLineAscent = 0.0f;
    375         fLineDescent = 0.0f;
    376         nCharIndex = 0;
    377         nLineFullWordIndex = 0;
    378         bFullWord = false;
    379       } else {
    380         fLineWidth += fWordWidth;
    381         i++;
    382       }
    383     }
    384     if (nLineHead <= nTotalWords - 1) {
    385       nLineTail = nTotalWords - 1;
    386       if (bTypeset) {
    387         line.nBeginWordIndex = nLineHead;
    388         line.nEndWordIndex = nLineTail;
    389         line.nTotalWord = nLineTail - nLineHead + 1;
    390         line.fLineWidth = fLineWidth;
    391         line.fLineAscent = fLineAscent;
    392         line.fLineDescent = fLineDescent;
    393         m_pSection->AddLine(line);
    394       }
    395       fMaxY += (fLineAscent + m_pVT->GetLineLeading());
    396       fMaxY -= fLineDescent;
    397       fMaxX = std::max(fLineWidth, fMaxX);
    398     }
    399   } else {
    400     if (bTypeset) {
    401       fLineAscent = m_pVT->GetLineAscent();
    402       fLineDescent = m_pVT->GetLineDescent();
    403     } else {
    404       fLineAscent =
    405           m_pVT->GetFontAscent(m_pVT->GetDefaultFontIndex(), fFontSize);
    406       fLineDescent =
    407           m_pVT->GetFontDescent(m_pVT->GetDefaultFontIndex(), fFontSize);
    408     }
    409     if (bTypeset) {
    410       line.nBeginWordIndex = -1;
    411       line.nEndWordIndex = -1;
    412       line.nTotalWord = 0;
    413       line.fLineWidth = 0;
    414       line.fLineAscent = fLineAscent;
    415       line.fLineDescent = fLineDescent;
    416       m_pSection->AddLine(line);
    417     }
    418     fMaxY += m_pVT->GetLineLeading() + fLineAscent - fLineDescent;
    419   }
    420   m_rcRet = CPVT_FloatRect(0, 0, fMaxX, fMaxY);
    421 }
    422 
    423 void CTypeset::OutputLines() {
    424   ASSERT(m_pVT);
    425   ASSERT(m_pSection);
    426   float fMinX = 0.0f, fMinY = 0.0f, fMaxX = 0.0f, fMaxY = 0.0f;
    427   float fPosX = 0.0f, fPosY = 0.0f;
    428   float fLineIndent = m_pVT->GetLineIndent();
    429   float fTypesetWidth = std::max(m_pVT->GetPlateWidth() - fLineIndent, 0.0f);
    430   switch (m_pVT->GetAlignment()) {
    431     default:
    432     case 0:
    433       fMinX = 0.0f;
    434       break;
    435     case 1:
    436       fMinX = (fTypesetWidth - m_rcRet.Width()) * VARIABLETEXT_HALF;
    437       break;
    438     case 2:
    439       fMinX = fTypesetWidth - m_rcRet.Width();
    440       break;
    441   }
    442   fMaxX = fMinX + m_rcRet.Width();
    443   fMinY = 0.0f;
    444   fMaxY = m_rcRet.Height();
    445   int32_t nTotalLines =
    446       pdfium::CollectionSize<int32_t>(m_pSection->m_LineArray);
    447   if (nTotalLines > 0) {
    448     for (int32_t l = 0; l < nTotalLines; l++) {
    449       CLine* pLine = m_pSection->m_LineArray[l].get();
    450       switch (m_pVT->GetAlignment()) {
    451         default:
    452         case 0:
    453           fPosX = 0;
    454           break;
    455         case 1:
    456           fPosX = (fTypesetWidth - pLine->m_LineInfo.fLineWidth) *
    457                   VARIABLETEXT_HALF;
    458           break;
    459         case 2:
    460           fPosX = fTypesetWidth - pLine->m_LineInfo.fLineWidth;
    461           break;
    462       }
    463       fPosX += fLineIndent;
    464       fPosY += m_pVT->GetLineLeading();
    465       fPosY += pLine->m_LineInfo.fLineAscent;
    466       pLine->m_LineInfo.fLineX = fPosX - fMinX;
    467       pLine->m_LineInfo.fLineY = fPosY - fMinY;
    468       for (int32_t w = pLine->m_LineInfo.nBeginWordIndex;
    469            w <= pLine->m_LineInfo.nEndWordIndex; w++) {
    470         if (pdfium::IndexInBounds(m_pSection->m_WordArray, w)) {
    471           CPVT_WordInfo* pWord = m_pSection->m_WordArray[w].get();
    472           pWord->fWordX = fPosX - fMinX;
    473           pWord->fWordY = fPosY - fMinY;
    474 
    475           fPosX += m_pVT->GetWordWidth(*pWord);
    476         }
    477       }
    478       fPosY -= pLine->m_LineInfo.fLineDescent;
    479     }
    480   }
    481   m_rcRet = CPVT_FloatRect(fMinX, fMinY, fMaxX, fMaxY);
    482 }
    483