Home | History | Annotate | Download | only in fpdftext
      1 // Copyright 2014 PDFium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
      6 
      7 #include "../../include/fpdfapi/fpdf_resource.h"
      8 #include "../../include/fpdfapi/fpdf_pageobj.h"
      9 #include "../../include/fpdftext/fpdf_text.h"
     10 #include "../../include/fpdfapi/fpdf_page.h"
     11 #include "../../include/fpdfapi/fpdf_module.h"
     12 #include <ctype.h>
     13 #include "text_int.h"
     14 FX_BOOL _IsIgnoreSpaceCharacter(FX_WCHAR curChar)
     15 {
     16     if(curChar < 255 ) {
     17         return FALSE;
     18     }
     19     if ( (curChar >= 0x0600 && curChar <= 0x06FF)
     20             || (curChar >= 0xFE70 && curChar <= 0xFEFF)
     21             || (curChar >= 0xFB50 && curChar <= 0xFDFF)
     22             || (curChar >= 0x0400 && curChar <= 0x04FF)
     23             || (curChar >= 0x0500 && curChar <= 0x052F)
     24             || (curChar >= 0xA640 && curChar <= 0xA69F)
     25             || (curChar >= 0x2DE0 && curChar <= 0x2DFF)
     26             || curChar == 8467
     27             || (curChar >= 0x2000 && curChar <= 0x206F)) {
     28         return FALSE;
     29     }
     30     return TRUE;
     31 }
     32 CPDFText_ParseOptions::CPDFText_ParseOptions()
     33     : m_bGetCharCodeOnly(FALSE), m_bNormalizeObjs(TRUE), m_bOutputHyphen(FALSE)
     34 {
     35 }
     36 IPDF_TextPage* IPDF_TextPage::CreateTextPage(const CPDF_Page* pPage, CPDFText_ParseOptions ParserOptions)
     37 {
     38     CPDF_TextPage* pTextPageEx = FX_NEW CPDF_TextPage(pPage, ParserOptions);
     39     return pTextPageEx;
     40 }
     41 IPDF_TextPage* IPDF_TextPage::CreateTextPage(const CPDF_Page* pPage, int flags)
     42 {
     43     CPDF_TextPage* pTextPage = FX_NEW CPDF_TextPage(pPage, flags);
     44     return	pTextPage;
     45 }
     46 IPDF_TextPage*	IPDF_TextPage::CreateTextPage(const CPDF_PageObjects* pObjs, int flags)
     47 {
     48     CPDF_TextPage* pTextPage = FX_NEW CPDF_TextPage(pObjs, flags);
     49     return	pTextPage;
     50 }
     51 IPDF_TextPageFind*	IPDF_TextPageFind::CreatePageFind(const IPDF_TextPage* pTextPage)
     52 {
     53     if (!pTextPage) {
     54         return NULL;
     55     }
     56     return FX_NEW CPDF_TextPageFind(pTextPage);
     57 }
     58 IPDF_LinkExtract* IPDF_LinkExtract::CreateLinkExtract()
     59 {
     60     return FX_NEW CPDF_LinkExtract();
     61 }
     62 #define  TEXT_BLANK_CHAR		L' '
     63 #define  TEXT_LINEFEED_CHAR		L'\n'
     64 #define	 TEXT_RETURN_CHAR		L'\r'
     65 #define  TEXT_EMPTY				L""
     66 #define  TEXT_BLANK				L" "
     67 #define  TEXT_RETURN_LINEFEED	L"\r\n"
     68 #define  TEXT_LINEFEED			L"\n"
     69 #define	 TEXT_CHARRATIO_GAPDELTA	0.070
     70 CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, int flags)
     71     : m_pPreTextObj(NULL),
     72       m_IsParsered(FALSE),
     73       m_charList(512),
     74       m_TempCharList(50),
     75       m_TextlineDir(-1),
     76       m_CurlineRect(0, 0, 0, 0)
     77 {
     78     m_pPage = pPage;
     79     m_parserflag = flags;
     80     m_TextBuf.EstimateSize(0, 10240);
     81     pPage->GetDisplayMatrix(m_DisplayMatrix, 0, 0, (int) pPage->GetPageWidth(), (int)pPage->GetPageHeight(), 0);
     82 }
     83 CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, CPDFText_ParseOptions ParserOptions)
     84     : m_pPreTextObj(NULL)
     85     , m_IsParsered(FALSE)
     86     , m_charList(512)
     87     , m_TempCharList(50)
     88     , m_TextlineDir(-1)
     89     , m_CurlineRect(0, 0, 0, 0)
     90     , m_ParseOptions(ParserOptions)
     91 {
     92     m_pPage = pPage;
     93     m_parserflag = 0;
     94     m_TextBuf.EstimateSize(0, 10240);
     95     pPage->GetDisplayMatrix(m_DisplayMatrix, 0, 0, (int) pPage->GetPageWidth(), (int)pPage->GetPageHeight(), 0);
     96 }
     97 CPDF_TextPage::CPDF_TextPage(const CPDF_PageObjects* pPage, int flags)
     98     : m_pPreTextObj(NULL),
     99       m_IsParsered(FALSE),
    100       m_charList(512),
    101       m_TempCharList(50),
    102       m_TextlineDir(-1),
    103       m_CurlineRect(0, 0, 0, 0)
    104 {
    105     m_pPage = pPage;
    106     m_parserflag = flags;
    107     m_TextBuf.EstimateSize(0, 10240);
    108     CFX_FloatRect pageRect = pPage->CalcBoundingBox();
    109     m_DisplayMatrix = CFX_AffineMatrix(1, 0, 0, -1, pageRect.right, pageRect.top);
    110 }
    111 void CPDF_TextPage::NormalizeObjects(FX_BOOL bNormalize)
    112 {
    113     m_ParseOptions.m_bNormalizeObjs = bNormalize;
    114 }
    115 FX_BOOL CPDF_TextPage::IsControlChar(PAGECHAR_INFO* pCharInfo)
    116 {
    117     if(!pCharInfo) {
    118         return FALSE;
    119     }
    120     switch(pCharInfo->m_Unicode) {
    121         case 0x2:
    122         case 0x3:
    123         case 0x93:
    124         case 0x94:
    125         case 0x96:
    126         case 0x97:
    127         case 0x98:
    128         case 0xfffe:
    129             if(pCharInfo->m_Flag == FPDFTEXT_CHAR_HYPHEN) {
    130                 return FALSE;
    131             } else {
    132                 return TRUE;
    133             }
    134         default:
    135             return FALSE;
    136     }
    137 }
    138 FX_BOOL CPDF_TextPage::ParseTextPage()
    139 {
    140     if (!m_pPage) {
    141         m_IsParsered = FALSE;
    142         return FALSE;
    143     }
    144     m_IsParsered = FALSE;
    145     m_TextBuf.Clear();
    146     m_charList.RemoveAll();
    147     m_pPreTextObj = NULL;
    148     ProcessObject();
    149     m_IsParsered = TRUE;
    150     if(!m_ParseOptions.m_bGetCharCodeOnly) {
    151         m_CharIndex.RemoveAll();
    152         int nCount = m_charList.GetSize();
    153         if(nCount) {
    154             m_CharIndex.Add(0);
    155         }
    156         for(int i = 0; i < nCount; i++) {
    157             int indexSize = m_CharIndex.GetSize();
    158             FX_BOOL bNormal = FALSE;
    159             PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(i);
    160             if(charinfo.m_Flag == FPDFTEXT_CHAR_GENERATED) {
    161                 bNormal = TRUE;
    162             }
    163 #ifdef FOXIT_CHROME_BUILD
    164             else if(charinfo.m_Unicode == 0 || IsControlChar(&charinfo))
    165 #else
    166             else if(charinfo.m_Unicode == 0)
    167 #endif
    168                 bNormal = FALSE;
    169             else {
    170                 bNormal = TRUE;
    171             }
    172             if(bNormal) {
    173                 if(indexSize % 2) {
    174                     m_CharIndex.Add(1);
    175                 } else {
    176                     if(indexSize <= 0) {
    177                         continue;
    178                     }
    179                     m_CharIndex.SetAt(indexSize - 1, m_CharIndex.GetAt(indexSize - 1) + 1);
    180                 }
    181             } else {
    182                 if(indexSize % 2) {
    183                     if(indexSize <= 0) {
    184                         continue;
    185                     }
    186                     m_CharIndex.SetAt(indexSize - 1, i + 1);
    187                 } else {
    188                     m_CharIndex.Add(i + 1);
    189                 }
    190             }
    191         }
    192         int indexSize = m_CharIndex.GetSize();
    193         if(indexSize % 2) {
    194             m_CharIndex.RemoveAt(indexSize - 1);
    195         }
    196     }
    197     return TRUE;
    198 }
    199 int	CPDF_TextPage::CountChars() const
    200 {
    201     if(m_ParseOptions.m_bGetCharCodeOnly) {
    202         return m_TextBuf.GetSize();
    203     }
    204     return m_charList.GetSize();
    205 }
    206 int CPDF_TextPage::CharIndexFromTextIndex(int TextIndex) const
    207 {
    208     int indexSize = m_CharIndex.GetSize();
    209     int count = 0;
    210     for(int i = 0; i < indexSize; i += 2) {
    211         count += m_CharIndex.GetAt(i + 1);
    212         if(count > TextIndex) {
    213             return 	TextIndex - count + m_CharIndex.GetAt(i + 1) + m_CharIndex.GetAt(i);
    214         }
    215     }
    216     return -1;
    217 }
    218 int CPDF_TextPage::TextIndexFromCharIndex(int CharIndex) const
    219 {
    220     int indexSize = m_CharIndex.GetSize();
    221     int count = 0;
    222     for(int i = 0; i < indexSize; i += 2) {
    223         count += m_CharIndex.GetAt(i + 1);
    224         if(m_CharIndex.GetAt(i + 1) + m_CharIndex.GetAt(i) > CharIndex) {
    225             if(CharIndex - m_CharIndex.GetAt(i) < 0) {
    226                 return -1;
    227             }
    228             return 	CharIndex - m_CharIndex.GetAt(i) + count - m_CharIndex.GetAt(i + 1);
    229         }
    230     }
    231     return -1;
    232 }
    233 void CPDF_TextPage::GetRectArray(int start, int nCount, CFX_RectArray& rectArray) const
    234 {
    235     if(m_ParseOptions.m_bGetCharCodeOnly) {
    236         return;
    237     }
    238     if(start < 0 || nCount == 0) {
    239         return;
    240     }
    241     if (!m_IsParsered)	{
    242         return;
    243     }
    244     PAGECHAR_INFO		info_curchar;
    245     CPDF_TextObject*	pCurObj = NULL;
    246     CFX_FloatRect		rect;
    247     int					curPos = start;
    248     FX_BOOL				flagNewRect = TRUE;
    249     if (nCount + start > m_charList.GetSize() || nCount == -1) {
    250         nCount = m_charList.GetSize() - start;
    251     }
    252     while (nCount--) {
    253         info_curchar = *(PAGECHAR_INFO*)m_charList.GetAt(curPos++);
    254         if (info_curchar.m_Flag == FPDFTEXT_CHAR_GENERATED) {
    255             continue;
    256         }
    257         if(info_curchar.m_CharBox.Width() < 0.01 || info_curchar.m_CharBox.Height() < 0.01) {
    258             continue;
    259         }
    260         if(!pCurObj) {
    261             pCurObj = info_curchar.m_pTextObj;
    262         }
    263         if (pCurObj != info_curchar.m_pTextObj) {
    264             rectArray.Add(rect);
    265             pCurObj = info_curchar.m_pTextObj;
    266             flagNewRect = TRUE;
    267         }
    268         if (flagNewRect) {
    269             FX_FLOAT orgX = info_curchar.m_OriginX, orgY = info_curchar.m_OriginY;
    270             CFX_AffineMatrix matrix, matrix_reverse;
    271             info_curchar.m_pTextObj->GetTextMatrix(&matrix);
    272             matrix.Concat(info_curchar.m_Matrix);
    273             matrix_reverse.SetReverse(matrix);
    274             matrix_reverse.Transform(orgX, orgY);
    275             rect.left = info_curchar.m_CharBox.left;
    276             rect.right = info_curchar.m_CharBox.right;
    277             if (pCurObj->GetFont()->GetTypeDescent()) {
    278                 rect.bottom = orgY + pCurObj->GetFont()->GetTypeDescent() * pCurObj->GetFontSize() / 1000;
    279                 FX_FLOAT xPosTemp = orgX;
    280                 matrix.Transform(xPosTemp, rect.bottom);
    281             } else {
    282                 rect.bottom = info_curchar.m_CharBox.bottom;
    283             }
    284             if (pCurObj->GetFont()->GetTypeAscent()) {
    285                 rect.top = orgY + pCurObj->GetFont()->GetTypeAscent() * pCurObj->GetFontSize() / 1000;
    286                 FX_FLOAT xPosTemp = orgX + GetCharWidth(info_curchar.m_CharCode, pCurObj->GetFont()) * pCurObj->GetFontSize() / 1000;
    287                 matrix.Transform(xPosTemp, rect.top);
    288             } else {
    289                 rect.top = info_curchar.m_CharBox.top;
    290             }
    291             flagNewRect = FALSE;
    292             rect = info_curchar.m_CharBox;
    293             rect.Normalize();
    294         } else {
    295             info_curchar.m_CharBox.Normalize();
    296             if (rect.left > info_curchar.m_CharBox.left) {
    297                 rect.left = info_curchar.m_CharBox.left;
    298             }
    299             if (rect.right < info_curchar.m_CharBox.right) {
    300                 rect.right = info_curchar.m_CharBox.right;
    301             }
    302             if ( rect.top < info_curchar.m_CharBox.top) {
    303                 rect.top = info_curchar.m_CharBox.top;
    304             }
    305             if (rect.bottom > info_curchar.m_CharBox.bottom) {
    306                 rect.bottom = info_curchar.m_CharBox.bottom;
    307             }
    308         }
    309     }
    310     rectArray.Add(rect);
    311     return;
    312 }
    313 int CPDF_TextPage::GetIndexAtPos(CPDF_Point point , FX_FLOAT xTorelance, FX_FLOAT yTorelance) const
    314 {
    315     if(m_ParseOptions.m_bGetCharCodeOnly) {
    316         return -3;
    317     }
    318     if (!m_IsParsered)	{
    319         return	-3;
    320     }
    321     FX_FLOAT distance = 0;
    322     int pos = 0;
    323     int NearPos = -1;
    324     double xdif = 5000, ydif = 5000;
    325     while(pos < m_charList.GetSize()) {
    326         PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)(m_charList.GetAt(pos));
    327         CFX_FloatRect charrect = charinfo.m_CharBox;
    328         if (charrect.Contains(point.x, point.y)) {
    329             break;
    330         }
    331         if (xTorelance > 0 || yTorelance > 0) {
    332             CFX_FloatRect charRectExt;
    333             charrect.Normalize();
    334             charRectExt.left = charrect.left - xTorelance / 2;
    335             charRectExt.right = charrect.right + xTorelance / 2;
    336             charRectExt.top = charrect.top + yTorelance / 2;
    337             charRectExt.bottom = charrect.bottom - yTorelance / 2;
    338             if (charRectExt.Contains(point.x, point.y)) {
    339                 double curXdif, curYdif;
    340                 curXdif = FXSYS_fabs(point.x - charrect.left) < FXSYS_fabs(point.x - charrect.right) ? FXSYS_fabs(point.x - charrect.left) : FXSYS_fabs(point.x - charrect.right);
    341                 curYdif = FXSYS_fabs(point.y - charrect.bottom) < FXSYS_fabs(point.y - charrect.top	) ? FXSYS_fabs(point.y - charrect.bottom) : FXSYS_fabs(point.y - charrect.top);
    342                 if (curYdif + curXdif < xdif + ydif) {
    343                     ydif = curYdif;
    344                     xdif = curXdif;
    345                     NearPos = pos;
    346                 }
    347             }
    348         }
    349         ++pos;
    350     }
    351     if (pos >= m_charList.GetSize()) {
    352         pos = NearPos;
    353     }
    354     return pos;
    355 }
    356 CFX_WideString CPDF_TextPage::GetTextByRect(CFX_FloatRect rect) const
    357 {
    358     CFX_WideString strText;
    359     if(m_ParseOptions.m_bGetCharCodeOnly || !m_IsParsered) {
    360         return strText;
    361     }
    362     int nCount = m_charList.GetSize();
    363     int pos = 0;
    364     FX_FLOAT posy = 0;
    365     FX_BOOL IsContainPreChar = FALSE;
    366     FX_BOOL	ISAddLineFeed = FALSE;
    367     while (pos < nCount) {
    368         PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(pos++);
    369         if (IsRectIntersect(rect, charinfo.m_CharBox)) {
    370             if (FXSYS_fabs(posy - charinfo.m_OriginY) > 0 && !IsContainPreChar && ISAddLineFeed) {
    371                 posy = charinfo.m_OriginY;
    372                 if (strText.GetLength() > 0) {
    373                     strText += L"\r\n";
    374                 }
    375             }
    376             IsContainPreChar = TRUE;
    377             ISAddLineFeed = FALSE;
    378             if (charinfo.m_Unicode) {
    379                 strText += charinfo.m_Unicode;
    380             }
    381         } else if (charinfo.m_Unicode == 32) {
    382             if (IsContainPreChar && charinfo.m_Unicode) {
    383                 strText += charinfo.m_Unicode;
    384                 IsContainPreChar = FALSE;
    385                 ISAddLineFeed = FALSE;
    386             }
    387         } else {
    388             IsContainPreChar = FALSE;
    389             ISAddLineFeed = TRUE;
    390         }
    391     }
    392     return strText;
    393 }
    394 void CPDF_TextPage::GetRectsArrayByRect(CFX_FloatRect rect, CFX_RectArray& resRectArray) const
    395 {
    396     if(m_ParseOptions.m_bGetCharCodeOnly) {
    397         return;
    398     }
    399     if (!m_IsParsered)	{
    400         return;
    401     }
    402     CFX_FloatRect		curRect;
    403     FX_BOOL				flagNewRect = TRUE;
    404     CPDF_TextObject*	pCurObj = NULL;
    405     int nCount = m_charList.GetSize();
    406     int pos = 0;
    407     while (pos < nCount) {
    408         PAGECHAR_INFO info_curchar = *(PAGECHAR_INFO*)m_charList.GetAt(pos++);
    409         if (info_curchar.m_Flag == FPDFTEXT_CHAR_GENERATED) {
    410             continue;
    411         }
    412         if(pos == 494) {
    413             int a = 0;
    414         }
    415         if (IsRectIntersect(rect, info_curchar.m_CharBox)) {
    416             if(!pCurObj) {
    417                 pCurObj = info_curchar.m_pTextObj;
    418             }
    419             if (pCurObj != info_curchar.m_pTextObj) {
    420                 resRectArray.Add(curRect);
    421                 pCurObj = info_curchar.m_pTextObj;
    422                 flagNewRect = TRUE;
    423             }
    424             if (flagNewRect) {
    425                 curRect = info_curchar.m_CharBox;
    426                 flagNewRect = FALSE;
    427                 curRect.Normalize();
    428             } else {
    429                 info_curchar.m_CharBox.Normalize();
    430                 if (curRect.left > info_curchar.m_CharBox.left) {
    431                     curRect.left = info_curchar.m_CharBox.left;
    432                 }
    433                 if (curRect.right < info_curchar.m_CharBox.right) {
    434                     curRect.right = info_curchar.m_CharBox.right;
    435                 }
    436                 if ( curRect.top < info_curchar.m_CharBox.top) {
    437                     curRect.top = info_curchar.m_CharBox.top;
    438                 }
    439                 if (curRect.bottom > info_curchar.m_CharBox.bottom) {
    440                     curRect.bottom = info_curchar.m_CharBox.bottom;
    441                 }
    442             }
    443         }
    444     }
    445     resRectArray.Add(curRect);
    446     return;
    447 }
    448 int	CPDF_TextPage::GetIndexAtPos(FX_FLOAT x, FX_FLOAT y, FX_FLOAT xTorelance, FX_FLOAT yTorelance) const
    449 {
    450     if(m_ParseOptions.m_bGetCharCodeOnly) {
    451         return -3;
    452     }
    453     CPDF_Point point(x, y);
    454     return GetIndexAtPos(point, xTorelance, yTorelance);
    455 }
    456 int CPDF_TextPage::GetOrderByDirection(int order, int direction) const
    457 {
    458     if(m_ParseOptions.m_bGetCharCodeOnly) {
    459         return -3;
    460     }
    461     if (!m_IsParsered) {
    462         return -3;
    463     }
    464     if (direction == FPDFTEXT_RIGHT || direction == FPDFTEXT_LEFT) {
    465         order += direction;
    466         while(order >= 0 && order < m_charList.GetSize()) {
    467             PAGECHAR_INFO cinfo = *(PAGECHAR_INFO*)m_charList.GetAt(order);
    468             if (cinfo.m_Flag != FPDFTEXT_CHAR_GENERATED) {
    469                 break;
    470             } else {
    471                 if (cinfo.m_Unicode == TEXT_LINEFEED_CHAR || cinfo.m_Unicode == TEXT_RETURN_CHAR) {
    472                     order += direction;
    473                 } else {
    474                     break;
    475                 }
    476             }
    477         }
    478         if (order >= m_charList.GetSize()) {
    479             order = -2;
    480         }
    481         return order;
    482     }
    483     PAGECHAR_INFO charinfo;
    484     charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(order);
    485     CPDF_Point curPos(charinfo.m_OriginX, charinfo.m_OriginY);
    486     FX_FLOAT difPosY = 0.0, minXdif = 1000;
    487     int	minIndex = -2;
    488     int index = order;
    489     FX_FLOAT height = charinfo.m_CharBox.Height();
    490     if (direction == FPDFTEXT_UP) {
    491         minIndex = -1;
    492         while (1) {
    493             if (--index < 0)	{
    494                 return -1;
    495             }
    496             charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(index);
    497             if (FXSYS_fabs(charinfo.m_OriginY - curPos.y) > FX_MAX(height, charinfo.m_CharBox.Height()) / 2) {
    498                 difPosY = charinfo.m_OriginY;
    499                 minIndex = index;
    500                 break;
    501             }
    502         }
    503         FX_FLOAT PreXdif = charinfo.m_OriginX - curPos.x;
    504         minXdif = PreXdif;
    505         if (PreXdif == 0)	{
    506             return index;
    507         }
    508         FX_FLOAT curXdif = 0;
    509         while (--index >= 0) {
    510             charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(index);
    511             if (difPosY != charinfo.m_OriginY) {
    512                 break;
    513             }
    514             curXdif = charinfo.m_OriginX - curPos.x;
    515             if (curXdif == 0) {
    516                 return index;
    517             }
    518             int signflag = 0;
    519             if (curXdif > 0) {
    520                 signflag = 1;
    521             } else {
    522                 signflag = -1;
    523             }
    524             if (signflag * PreXdif < 0) {
    525                 if (FXSYS_fabs(PreXdif) < FXSYS_fabs(curXdif)) {
    526                     return index + 1;
    527                 } else {
    528                     return index;
    529                 }
    530             }
    531             if (FXSYS_fabs(curXdif) < FXSYS_fabs(minXdif)) {
    532                 minIndex = index;
    533                 minXdif = curXdif;
    534             }
    535             PreXdif = curXdif;
    536             if (difPosY != charinfo.m_OriginY) {
    537                 break;
    538             }
    539         }
    540         return minIndex;
    541     } else if(FPDFTEXT_DOWN) {
    542         minIndex = -2;
    543         while (1) {
    544             if (++index > m_charList.GetSize() - 1)	{
    545                 return minIndex;
    546             }
    547             charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(index);
    548             if (FXSYS_fabs(charinfo.m_OriginY - curPos.y) > FX_MAX(height, charinfo.m_CharBox.Height()) / 2) {
    549                 difPosY = charinfo.m_OriginY;
    550                 minIndex = index;
    551                 break;
    552             }
    553         }
    554         FX_FLOAT PreXdif = charinfo.m_OriginX - curPos.x;
    555         minXdif = PreXdif;
    556         if (PreXdif == 0)	{
    557             return index;
    558         }
    559         FX_FLOAT curXdif = 0;
    560         while (++index < m_charList.GetSize()) {
    561             charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(index);
    562             if (difPosY != charinfo.m_OriginY) {
    563                 break;
    564             }
    565             curXdif = charinfo.m_OriginX - curPos.x;
    566             if (curXdif == 0) {
    567                 return index;
    568             }
    569             int signflag = 0;
    570             if (curXdif > 0) {
    571                 signflag = 1;
    572             } else {
    573                 signflag = -1;
    574             }
    575             if (signflag * PreXdif < 0) {
    576                 if (FXSYS_fabs(PreXdif) < FXSYS_fabs(curXdif)) {
    577                     return index - 1;
    578                 } else {
    579                     return index;
    580                 }
    581             }
    582             if (FXSYS_fabs(curXdif) < FXSYS_fabs(minXdif)) {
    583                 minXdif = curXdif;
    584                 minIndex = index;
    585             }
    586             PreXdif = curXdif;
    587         }
    588         return minIndex;
    589     }
    590 }
    591 void CPDF_TextPage::GetCharInfo(int index, FPDF_CHAR_INFO & info) const
    592 {
    593     if(m_ParseOptions.m_bGetCharCodeOnly) {
    594         return;
    595     }
    596     if (!m_IsParsered)	{
    597         return;
    598     }
    599     if (index < 0 || index >= m_charList.GetSize())	{
    600         return;
    601     }
    602     PAGECHAR_INFO charinfo;
    603     charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(index);
    604     info.m_Charcode = charinfo.m_CharCode;
    605     info.m_OriginX = charinfo.m_OriginX;
    606     info.m_OriginY = charinfo.m_OriginY;
    607     info.m_Unicode = charinfo.m_Unicode;
    608     info.m_Flag = charinfo.m_Flag;
    609     info.m_CharBox = charinfo.m_CharBox;
    610     info.m_pTextObj = charinfo.m_pTextObj;
    611     if (charinfo.m_pTextObj && charinfo.m_pTextObj->GetFont()) {
    612         info.m_FontSize = charinfo.m_pTextObj->GetFontSize();
    613     }
    614     info.m_Matrix.Copy(charinfo.m_Matrix);
    615     return;
    616 }
    617 void CPDF_TextPage::CheckMarkedContentObject(FX_INT32& start, FX_INT32& nCount) const
    618 {
    619     PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(start);
    620     PAGECHAR_INFO charinfo2 = *(PAGECHAR_INFO*)m_charList.GetAt(start + nCount - 1);
    621     if (FPDFTEXT_CHAR_PIECE != charinfo.m_Flag && FPDFTEXT_CHAR_PIECE != charinfo2.m_Flag) {
    622         return;
    623     }
    624     if (FPDFTEXT_CHAR_PIECE == charinfo.m_Flag) {
    625         PAGECHAR_INFO charinfo1 = charinfo;
    626         int startIndex = start;
    627         while(FPDFTEXT_CHAR_PIECE == charinfo1.m_Flag && charinfo1.m_Index == charinfo.m_Index) {
    628             startIndex--;
    629             if (startIndex < 0)	{
    630                 break;
    631             }
    632             charinfo1 = *(PAGECHAR_INFO*)m_charList.GetAt(startIndex);
    633         }
    634         startIndex++;
    635         start = startIndex;
    636     }
    637     if (FPDFTEXT_CHAR_PIECE == charinfo2.m_Flag) {
    638         PAGECHAR_INFO charinfo3 = charinfo2;
    639         int endIndex = start + nCount - 1;
    640         while(FPDFTEXT_CHAR_PIECE == charinfo3.m_Flag && charinfo3.m_Index == charinfo2.m_Index) {
    641             endIndex++;
    642             if (endIndex >= m_charList.GetSize())	{
    643                 break;
    644             }
    645             charinfo3 = *(PAGECHAR_INFO*)m_charList.GetAt(endIndex);
    646         }
    647         endIndex--;
    648         nCount = endIndex - start + 1;
    649     }
    650 }
    651 CFX_WideString CPDF_TextPage::GetPageText(int start , int nCount) const
    652 {
    653     if (!m_IsParsered || nCount == 0) {
    654         return L"";
    655     }
    656     if (start < 0) {
    657         start = 0;
    658     }
    659     if	(nCount == -1) {
    660         nCount = m_charList.GetSize() - start;
    661         return m_TextBuf.GetWideString().Mid(start, m_TextBuf.GetWideString().GetLength());
    662     }
    663     if(nCount <= 0 || m_charList.GetSize() <= 0) {
    664         return L"";
    665     }
    666     if(nCount + start > m_charList.GetSize() - 1) {
    667         nCount = m_charList.GetSize() - start;
    668     }
    669     if (nCount <= 0) {
    670         return L"";
    671     }
    672     CheckMarkedContentObject(start, nCount);
    673     int startindex = 0;
    674     PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(start);
    675     int startOffset = 0;
    676     while(charinfo.m_Index == -1) {
    677         startOffset++;
    678         if (startOffset > nCount || start + startOffset >= m_charList.GetSize())	{
    679             return L"";
    680         }
    681         charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(start + startOffset);
    682     }
    683     startindex = charinfo.m_Index;
    684     charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(start + nCount - 1);
    685     int nCountOffset = 0;
    686     while (charinfo.m_Index == -1) {
    687         nCountOffset++;
    688         if (nCountOffset >= nCount) {
    689             return L"";
    690         }
    691         charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(start + nCount - nCountOffset - 1);
    692     }
    693     nCount = start + nCount - nCountOffset - startindex;
    694     if(nCount <= 0) {
    695         return L"";
    696     }
    697     return m_TextBuf.GetWideString().Mid(startindex, nCount);
    698 }
    699 int CPDF_TextPage::CountRects(int start, int nCount)
    700 {
    701     if(m_ParseOptions.m_bGetCharCodeOnly) {
    702         return -1;
    703     }
    704     if (!m_IsParsered)	{
    705         return -1;
    706     }
    707     if (start < 0) {
    708         return -1;
    709     }
    710     if (nCount == -1 || nCount + start > m_charList.GetSize() ) {
    711         nCount = m_charList.GetSize() - start;
    712     }
    713     m_SelRects.RemoveAll();
    714     GetRectArray(start, nCount, m_SelRects);
    715     return m_SelRects.GetSize();
    716 }
    717 void CPDF_TextPage::GetRect(int rectIndex, FX_FLOAT& left, FX_FLOAT& top, FX_FLOAT& right, FX_FLOAT &bottom) const
    718 {
    719     if(m_ParseOptions.m_bGetCharCodeOnly) {
    720         return ;
    721     }
    722     if (!m_IsParsered || rectIndex < 0 || rectIndex >= m_SelRects.GetSize()) {
    723         return;
    724     }
    725     left = m_SelRects.GetAt(rectIndex).left;
    726     top = m_SelRects.GetAt(rectIndex).top;
    727     right = m_SelRects.GetAt(rectIndex).right;
    728     bottom = m_SelRects.GetAt(rectIndex).bottom;
    729 }
    730 FX_BOOL CPDF_TextPage::GetBaselineRotate(int start, int end, int& Rotate)
    731 {
    732     if(m_ParseOptions.m_bGetCharCodeOnly) {
    733         return FALSE;
    734     }
    735     if(end == start) {
    736         return FALSE;
    737     }
    738     FX_FLOAT dx, dy;
    739     FPDF_CHAR_INFO info1, info2;
    740     GetCharInfo(start, info1);
    741     GetCharInfo(end, info2);
    742     while(info2.m_CharBox.Width() == 0 || info2.m_CharBox.Height() == 0) {
    743         end--;
    744         if(end <= start) {
    745             return FALSE;
    746         }
    747         GetCharInfo(end, info2);
    748     }
    749     dx = (info2.m_OriginX - info1.m_OriginX);
    750     dy = (info2.m_OriginY - info1.m_OriginY);
    751     if(dx == 0) {
    752         if(dy > 0) {
    753             Rotate = 90;
    754         } else if (dy < 0) {
    755             Rotate = 270;
    756         } else {
    757             Rotate = 0;
    758         }
    759     } else {
    760         float a = FXSYS_atan2(dy, dx);
    761         Rotate = (int)(a * 180 / FX_PI + 0.5);
    762     }
    763     if(Rotate < 0) {
    764         Rotate = -Rotate;
    765     } else if(Rotate > 0) {
    766         Rotate = 360 - Rotate;
    767     }
    768     return TRUE;
    769 }
    770 FX_BOOL	CPDF_TextPage::GetBaselineRotate(CFX_FloatRect rect , int& Rotate)
    771 {
    772     if(m_ParseOptions.m_bGetCharCodeOnly) {
    773         return FALSE;
    774     }
    775     int start, end, count, n = CountBoundedSegments(rect.left, rect.top, rect.right, rect.bottom, TRUE);
    776     if(n < 1) {
    777         return FALSE;
    778     }
    779     if(n > 1) {
    780         GetBoundedSegment(n - 1, start, count);
    781         end = start + count - 1;
    782         GetBoundedSegment(0, start, count);
    783     } else {
    784         GetBoundedSegment(0, start, count);
    785         end = start + count - 1;
    786     }
    787     return GetBaselineRotate(start, end, Rotate);
    788 }
    789 FX_BOOL	CPDF_TextPage::GetBaselineRotate(int rectIndex, int& Rotate)
    790 {
    791     if(m_ParseOptions.m_bGetCharCodeOnly) {
    792         return FALSE;
    793     }
    794     if (!m_IsParsered || rectIndex < 0 || rectIndex > m_SelRects.GetSize()) {
    795         return FALSE;
    796     }
    797     CFX_FloatRect rect = m_SelRects.GetAt(rectIndex);
    798     return GetBaselineRotate(rect , Rotate);
    799 }
    800 int	CPDF_TextPage::CountBoundedSegments(FX_FLOAT left, FX_FLOAT top, FX_FLOAT right, FX_FLOAT bottom, FX_BOOL bContains )
    801 {
    802     if(m_ParseOptions.m_bGetCharCodeOnly) {
    803         return -1;
    804     }
    805     m_Segment.RemoveAll();
    806     if (!m_IsParsered)	{
    807         return -1;
    808     }
    809     CFX_FloatRect rect(left, bottom, right, top);
    810     rect.Normalize();
    811     int nCount = m_charList.GetSize();
    812     int pos = 0;
    813     FPDF_SEGMENT	segment;
    814     segment.m_Start = 0;
    815     segment.m_nCount = 0;
    816     FX_BOOL		segmentStatus = 0;
    817     FX_BOOL		IsContainPreChar = FALSE;
    818     while (pos < nCount) {
    819         if(pos == 493) {
    820             int a = 0;
    821         }
    822         PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(pos);
    823         if(bContains && rect.Contains(charinfo.m_CharBox)) {
    824             if (segmentStatus == 0 || segmentStatus == 2) {
    825                 segment.m_Start = pos;
    826                 segment.m_nCount = 1;
    827                 segmentStatus = 1;
    828             } else if (segmentStatus == 1) {
    829                 segment.m_nCount++;
    830             }
    831             IsContainPreChar = TRUE;
    832         } else if (!bContains && (IsRectIntersect(rect, charinfo.m_CharBox) || rect.Contains(charinfo.m_OriginX, charinfo.m_OriginY))) {
    833             if (segmentStatus == 0 || segmentStatus == 2) {
    834                 segment.m_Start = pos;
    835                 segment.m_nCount = 1;
    836                 segmentStatus = 1;
    837             } else if (segmentStatus == 1) {
    838                 segment.m_nCount++;
    839             }
    840             IsContainPreChar = TRUE;
    841         } else if (charinfo.m_Unicode == 32) {
    842             if (IsContainPreChar == TRUE) {
    843                 if (segmentStatus == 0 || segmentStatus == 2) {
    844                     segment.m_Start = pos;
    845                     segment.m_nCount = 1;
    846                     segmentStatus = 1;
    847                 } else if (segmentStatus == 1) {
    848                     segment.m_nCount++;
    849                 }
    850                 IsContainPreChar = FALSE;
    851             } else {
    852                 if (segmentStatus == 1) {
    853                     segmentStatus = 2;
    854                     m_Segment.Add(segment);
    855                     segment.m_Start = 0;
    856                     segment.m_nCount = 0;
    857                 }
    858             }
    859         } else {
    860             if (segmentStatus == 1) {
    861                 segmentStatus = 2;
    862                 m_Segment.Add(segment);
    863                 segment.m_Start = 0;
    864                 segment.m_nCount = 0;
    865             }
    866             IsContainPreChar = FALSE;
    867         }
    868         pos++;
    869     }
    870     if (segmentStatus == 1) {
    871         segmentStatus = 2;
    872         m_Segment.Add(segment);
    873         segment.m_Start = 0;
    874         segment.m_nCount = 0;
    875     }
    876     return m_Segment.GetSize();
    877 }
    878 void CPDF_TextPage::GetBoundedSegment(int index, int& start, int& count) const
    879 {
    880     if(m_ParseOptions.m_bGetCharCodeOnly) {
    881         return ;
    882     }
    883     if (index < 0 || index >= m_Segment.GetSize()) {
    884         return;
    885     }
    886     start = m_Segment.GetAt(index).m_Start;
    887     count = m_Segment.GetAt(index).m_nCount;
    888 }
    889 int CPDF_TextPage::GetWordBreak(int index, int direction) const
    890 {
    891     if(m_ParseOptions.m_bGetCharCodeOnly) {
    892         return -1;
    893     }
    894     if (!m_IsParsered)	{
    895         return -1;
    896     }
    897     if (direction != FPDFTEXT_LEFT && direction != FPDFTEXT_RIGHT) {
    898         return -1;
    899     }
    900     if (index < 0 || index >= m_charList.GetSize()) {
    901         return -1;
    902     }
    903     PAGECHAR_INFO charinfo;
    904     charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(index);
    905     if (charinfo.m_Index == -1 || charinfo.m_Flag == FPDFTEXT_CHAR_GENERATED)	{
    906         return index;
    907     }
    908     if (!IsLetter(charinfo.m_Unicode)) {
    909         return index;
    910     }
    911     int breakPos = index;
    912     if (direction == FPDFTEXT_LEFT) {
    913         while (--breakPos > 0) {
    914             charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(breakPos);
    915             if (!IsLetter(charinfo.m_Unicode)) {
    916                 return breakPos;
    917             }
    918         }
    919         return breakPos;
    920     } else if (direction == FPDFTEXT_RIGHT) {
    921         while (++breakPos < m_charList.GetSize()) {
    922             charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(breakPos);
    923             if (!IsLetter(charinfo.m_Unicode)) {
    924                 return breakPos;
    925             }
    926         }
    927         return breakPos;
    928     }
    929     return breakPos;
    930 }
    931 FX_INT32 CPDF_TextPage::FindTextlineFlowDirection()
    932 {
    933     if (!m_pPage)	{
    934         return -1;
    935     }
    936     const FX_INT32 nPageWidth = (FX_INT32)((CPDF_Page*)m_pPage)->GetPageWidth();
    937     const FX_INT32 nPageHeight = (FX_INT32)((CPDF_Page*)m_pPage)->GetPageHeight();
    938     CFX_ByteArray nHorizontalMask;
    939     if (!nHorizontalMask.SetSize(nPageWidth)) {
    940         return -1;
    941     }
    942 	FX_BYTE* pDataH = nHorizontalMask.GetData();
    943     CFX_ByteArray nVerticalMask;
    944     if (!nVerticalMask.SetSize(nPageHeight)) {
    945         return -1;
    946     }
    947 	FX_BYTE* pDataV = nVerticalMask.GetData();
    948     FX_INT32 index = 0;
    949     FX_FLOAT fLineHeight = 0.0f;
    950     CPDF_PageObject* pPageObj = NULL;
    951     FX_POSITION	pos = NULL;
    952     pos = m_pPage->GetFirstObjectPosition();
    953     if(!pos) {
    954         return -1;
    955     }
    956     while(pos) {
    957         pPageObj = m_pPage->GetNextObject(pos);
    958         if(NULL == pPageObj) {
    959             continue;
    960         }
    961         if(PDFPAGE_TEXT != pPageObj->m_Type) {
    962             continue;
    963         }
    964 		FX_INT32 minH = (FX_INT32)pPageObj->m_Left < 0 ? 0 : (FX_INT32)pPageObj->m_Left;
    965 		FX_INT32 maxH = (FX_INT32)pPageObj->m_Right > nPageWidth ? nPageWidth : (FX_INT32)pPageObj->m_Right;
    966 		FX_INT32 minV = (FX_INT32)pPageObj->m_Bottom < 0 ? 0 : (FX_INT32)pPageObj->m_Bottom;
    967 		FX_INT32 maxV = (FX_INT32)pPageObj->m_Top > nPageHeight ? nPageHeight : (FX_INT32)pPageObj->m_Top;
    968 		if (minH >= maxH || minV >= maxV){
    969 			continue;
    970 		}
    971 
    972 		FXSYS_memset8(pDataH + minH, 1, maxH - minH);
    973 		FXSYS_memset8(pDataV + minV, 1, maxV - minV);
    974 
    975 		if (fLineHeight <= 0.0f) {
    976 			fLineHeight = pPageObj->m_Top - pPageObj->m_Bottom;
    977 		}
    978 
    979 		pPageObj = NULL;
    980     }
    981     FX_INT32 nStartH = 0;
    982     FX_INT32 nEndH = 0;
    983     FX_FLOAT nSumH = 0.0f;
    984     for (index = 0; index < nPageWidth; index++)
    985         if(1 == nHorizontalMask[index]) {
    986             break;
    987         }
    988     nStartH = index;
    989     for (index = nPageWidth; index > 0; index--)
    990         if(1 == nHorizontalMask[index - 1]) {
    991             break;
    992         }
    993     nEndH = index;
    994     for (index = nStartH; index < nEndH; index++) {
    995         nSumH += nHorizontalMask[index];
    996     }
    997     nSumH /= nEndH - nStartH;
    998     FX_INT32 nStartV = 0;
    999     FX_INT32 nEndV = 0;
   1000     FX_FLOAT nSumV = 0.0f;
   1001     for (index = 0; index < nPageHeight; index++)
   1002         if(1 == nVerticalMask[index]) {
   1003             break;
   1004         }
   1005     nStartV = index;
   1006     for (index = nPageHeight; index > 0; index--)
   1007         if(1 == nVerticalMask[index - 1]) {
   1008             break;
   1009         }
   1010     nEndV = index;
   1011     for (index = nStartV; index < nEndV; index++) {
   1012         nSumV += nVerticalMask[index];
   1013     }
   1014     nSumV /= nEndV - nStartV;
   1015     if ((nEndV - nStartV) < (FX_INT32)(2 * fLineHeight)) {
   1016         return 0;
   1017     }
   1018     if ((nEndH - nStartH) < (FX_INT32)(2 * fLineHeight)) {
   1019         return 1;
   1020     }
   1021     if (nSumH > 0.8f) {
   1022         return 0;
   1023     }
   1024     if (nSumH - nSumV > 0.0f) {
   1025         return 0;
   1026     }
   1027     if (nSumV - nSumH > 0.0f) {
   1028         return 1;
   1029     }
   1030     return -1;
   1031 }
   1032 void CPDF_TextPage::ProcessObject()
   1033 {
   1034     CPDF_PageObject*	pPageObj = NULL;
   1035     if (!m_pPage)	{
   1036         return;
   1037     }
   1038     FX_POSITION	pos;
   1039     pos = m_pPage->GetFirstObjectPosition();
   1040     if (!pos)	{
   1041         return;
   1042     }
   1043     m_TextlineDir = FindTextlineFlowDirection();
   1044     int nCount = 0;
   1045     while (pos) {
   1046         pPageObj = m_pPage->GetNextObject(pos);
   1047         if(pPageObj) {
   1048             if(pPageObj->m_Type == PDFPAGE_TEXT) {
   1049                 if (nCount == 3) {
   1050                     nCount = nCount;
   1051                 }
   1052                 CFX_AffineMatrix matrix;
   1053                 ProcessTextObject((CPDF_TextObject*)pPageObj, matrix, pos);
   1054                 nCount++;
   1055             } else if (pPageObj->m_Type == PDFPAGE_FORM) {
   1056                 CFX_AffineMatrix formMatrix(1, 0, 0, 1, 0, 0);
   1057                 ProcessFormObject((CPDF_FormObject*)pPageObj, formMatrix);
   1058             }
   1059         }
   1060         pPageObj = NULL;
   1061     }
   1062     int count = m_LineObj.GetSize();
   1063     for(int i = 0; i < count; i++) {
   1064         ProcessTextObject(m_LineObj.GetAt(i));
   1065     }
   1066     m_LineObj.RemoveAll();
   1067     CloseTempLine();
   1068 }
   1069 void CPDF_TextPage::ProcessFormObject(CPDF_FormObject* pFormObj, CFX_AffineMatrix formMatrix)
   1070 {
   1071     CPDF_PageObject*	pPageObj = NULL;
   1072     FX_POSITION	pos;
   1073     if (!pFormObj)	{
   1074         return;
   1075     }
   1076     pos = pFormObj->m_pForm->GetFirstObjectPosition();
   1077     if (!pos)	{
   1078         return;
   1079     }
   1080     CFX_AffineMatrix curFormMatrix;
   1081     curFormMatrix.Copy(pFormObj->m_FormMatrix);
   1082     curFormMatrix.Concat(formMatrix);
   1083     while (pos) {
   1084         pPageObj = pFormObj->m_pForm->GetNextObject(pos);
   1085         if(pPageObj) {
   1086             if(pPageObj->m_Type == PDFPAGE_TEXT) {
   1087                 ProcessTextObject((CPDF_TextObject*)pPageObj, curFormMatrix, pos);
   1088             } else if (pPageObj->m_Type == PDFPAGE_FORM) {
   1089                 ProcessFormObject((CPDF_FormObject*)pPageObj, curFormMatrix);
   1090             }
   1091         }
   1092         pPageObj = NULL;
   1093     }
   1094 }
   1095 int CPDF_TextPage::GetCharWidth(FX_DWORD charCode, CPDF_Font* pFont) const
   1096 {
   1097     if(charCode == -1) {
   1098         return 0;
   1099     }
   1100     int w = pFont->GetCharWidthF(charCode);
   1101     if(w == 0) {
   1102         CFX_ByteString str;
   1103         pFont->AppendChar(str, charCode);
   1104         w = pFont->GetStringWidth(str, 1);
   1105         if(w == 0) {
   1106             FX_RECT BBox;
   1107             pFont->GetCharBBox(charCode, BBox);
   1108             w = BBox.right - BBox.left;
   1109         }
   1110     }
   1111     return w;
   1112 }
   1113 void CPDF_TextPage::OnPiece(IFX_BidiChar* pBidi, CFX_WideString& str)
   1114 {
   1115     FX_INT32 start, count;
   1116     FX_INT32 ret = pBidi->GetBidiInfo(start, count);
   1117     if(ret == 2) {
   1118         for(int i = start + count - 1; i >= start; i--) {
   1119             m_TextBuf.AppendChar(str.GetAt(i));
   1120             m_charList.Add(*(PAGECHAR_INFO*)m_TempCharList.GetAt(i));
   1121         }
   1122     } else {
   1123         int end = start + count ;
   1124         for(int i = start; i < end; i++) {
   1125             m_TextBuf.AppendChar(str.GetAt(i));
   1126             m_charList.Add(*(PAGECHAR_INFO*)m_TempCharList.GetAt(i));
   1127         }
   1128     }
   1129 }
   1130 void CPDF_TextPage::AddCharInfoByLRDirection(CFX_WideString& str, int i)
   1131 {
   1132     PAGECHAR_INFO Info = *(PAGECHAR_INFO*)m_TempCharList.GetAt(i);
   1133     FX_WCHAR wChar = str.GetAt(i);
   1134 #ifdef FOXIT_CHROME_BUILD
   1135     if(!IsControlChar(&Info)) {
   1136 #else
   1137     if(wChar != 0xfffe) {
   1138 #endif
   1139         Info.m_Index = m_TextBuf.GetLength();
   1140         if (wChar >= 0xFB00 && wChar <= 0xFB06) {
   1141             FX_LPWSTR pDst = NULL;
   1142             FX_STRSIZE nCount = FX_Unicode_GetNormalization(wChar, pDst);
   1143             if (nCount >= 1) {
   1144                 pDst = FX_Alloc(FX_WCHAR, nCount);
   1145                 if (!pDst) {
   1146                     return;
   1147                 }
   1148                 FX_Unicode_GetNormalization(wChar, pDst);
   1149                 for (int nIndex = 0; nIndex < nCount; nIndex++) {
   1150                     PAGECHAR_INFO Info2 = Info;
   1151                     Info2.m_Unicode = pDst[nIndex];
   1152                     Info2.m_Flag = FPDFTEXT_CHAR_PIECE;
   1153                     m_TextBuf.AppendChar(Info2.m_Unicode);
   1154                     if( !m_ParseOptions.m_bGetCharCodeOnly) {
   1155                         m_charList.Add(Info2);
   1156                     }
   1157                 }
   1158                 FX_Free(pDst);
   1159                 return;
   1160             }
   1161         }
   1162         m_TextBuf.AppendChar(wChar);
   1163     } else {
   1164         Info.m_Index = -1;
   1165     }
   1166     if( !m_ParseOptions.m_bGetCharCodeOnly) {
   1167         m_charList.Add(Info);
   1168     }
   1169 }
   1170 void CPDF_TextPage::AddCharInfoByRLDirection(CFX_WideString& str, int i)
   1171 {
   1172     PAGECHAR_INFO Info = *(PAGECHAR_INFO*)m_TempCharList.GetAt(i);
   1173 #ifdef FOXIT_CHROME_BUILD
   1174     if(!IsControlChar(&Info)) {
   1175 #else
   1176     if(str.GetAt(i) != 0xfffe) {
   1177 #endif
   1178         Info.m_Index = m_TextBuf.GetLength();
   1179         FX_WCHAR wChar = FX_GetMirrorChar(str.GetAt(i), TRUE, FALSE);
   1180         FX_LPWSTR pDst = NULL;
   1181         FX_STRSIZE nCount = FX_Unicode_GetNormalization(wChar, pDst);
   1182         if (nCount >= 1) {
   1183             pDst = FX_Alloc(FX_WCHAR, nCount);
   1184             if (!pDst) {
   1185                 return;
   1186             }
   1187             FX_Unicode_GetNormalization(wChar, pDst);
   1188             for (int nIndex = 0; nIndex < nCount; nIndex++) {
   1189                 PAGECHAR_INFO Info2 = Info;
   1190                 Info2.m_Unicode = pDst[nIndex];
   1191                 Info2.m_Flag = FPDFTEXT_CHAR_PIECE;
   1192                 m_TextBuf.AppendChar(Info2.m_Unicode);
   1193                 if( !m_ParseOptions.m_bGetCharCodeOnly) {
   1194                     m_charList.Add(Info2);
   1195                 }
   1196             }
   1197             FX_Free(pDst);
   1198             return;
   1199         } else {
   1200             Info.m_Unicode = wChar;
   1201         }
   1202         m_TextBuf.AppendChar(Info.m_Unicode);
   1203     } else {
   1204         Info.m_Index = -1;
   1205     }
   1206     if( !m_ParseOptions.m_bGetCharCodeOnly) {
   1207         m_charList.Add(Info);
   1208     }
   1209 }
   1210 void CPDF_TextPage::CloseTempLine()
   1211 {
   1212     int count1 = m_TempCharList.GetSize();
   1213     if (count1 <= 0) {
   1214         return;
   1215     }
   1216     IFX_BidiChar* BidiChar = IFX_BidiChar::Create();
   1217     CFX_WideString str = m_TempTextBuf.GetWideString();
   1218     CFX_WordArray order;
   1219     FX_BOOL bR2L = FALSE;
   1220     FX_INT32 start = 0, count = 0, i = 0;
   1221     int nR2L = 0, nL2R = 0;
   1222     FX_BOOL bPrevSpace = FALSE;
   1223     for (i = 0; i < str.GetLength(); i++) {
   1224         if(str.GetAt(i) == 32) {
   1225             if(bPrevSpace) {
   1226                 m_TempTextBuf.Delete(i, 1);
   1227                 m_TempCharList.Delete(i);
   1228                 str.Delete(i);
   1229                 count1 --;
   1230                 i--;
   1231                 continue;
   1232             }
   1233             bPrevSpace = TRUE;
   1234         } else {
   1235             bPrevSpace = FALSE;
   1236         }
   1237         if(BidiChar && BidiChar->AppendChar(str.GetAt(i))) {
   1238             FX_INT32 ret = BidiChar->GetBidiInfo(start, count);
   1239             order.Add(start);
   1240             order.Add(count);
   1241             order.Add(ret);
   1242             if(!bR2L) {
   1243                 if(ret == 2) {
   1244                     nR2L++;
   1245                 } else if (ret == 1) {
   1246                     nL2R++;
   1247                 }
   1248             }
   1249         }
   1250     }
   1251     if(BidiChar && BidiChar->EndChar()) {
   1252         FX_INT32 ret = BidiChar->GetBidiInfo(start, count);
   1253         order.Add(start);
   1254         order.Add(count);
   1255         order.Add(ret);
   1256         if(!bR2L) {
   1257             if(ret == 2) {
   1258                 nR2L++;
   1259             } else if(ret == 1) {
   1260                 nL2R++;
   1261             }
   1262         }
   1263     }
   1264     if(nR2L > 0 && nR2L >= nL2R) {
   1265         bR2L = TRUE;
   1266     }
   1267     if(this->m_parserflag == FPDFTEXT_RLTB || bR2L) {
   1268         int count = order.GetSize();
   1269         for(int j = count - 1; j > 0; j -= 3) {
   1270             int ret = order.GetAt(j);
   1271             int start = order.GetAt(j - 2);
   1272             int count1 = order.GetAt(j - 1);
   1273             if(ret == 2 || ret == 0) {
   1274                 for(int i = start + count1 - 1; i >= start; i--) {
   1275                     AddCharInfoByRLDirection(str, i);
   1276                 }
   1277             } else {
   1278                 i = j;
   1279                 FX_BOOL bSymbol = FALSE;
   1280                 while(i > 0 && order.GetAt(i) != 2) {
   1281                     bSymbol = !order.GetAt(i);
   1282                     i -= 3;
   1283                 }
   1284                 int end = start + count1 ;
   1285                 int n = 0;
   1286                 if(bSymbol) {
   1287                     n = i + 6;
   1288                 } else {
   1289                     n = i + 3;
   1290                 }
   1291                 if(n >= j) {
   1292                     for(int m = start; m < end; m++) {
   1293                         AddCharInfoByLRDirection(str, m);
   1294                     }
   1295                 } else {
   1296                     i = j;
   1297                     j = n;
   1298                     for(; n <= i; n += 3) {
   1299                         int ret = order.GetAt(n);
   1300                         int start = order.GetAt(n - 2);
   1301                         int count1 = order.GetAt(n - 1);
   1302                         int end = start + count1 ;
   1303                         for(int m = start; m < end; m++) {
   1304                             AddCharInfoByLRDirection(str, m);
   1305                         }
   1306                     }
   1307                 }
   1308             }
   1309         }
   1310     } else {
   1311         int count = order.GetSize();
   1312         FX_BOOL bL2R = FALSE;
   1313         for(int j = 0; j < count; j += 3) {
   1314             int ret = order.GetAt(j + 2);
   1315             int start = order.GetAt(j);
   1316             int count1 = order.GetAt(j + 1);
   1317             if(ret == 2 || (j == 0 && ret == 0 && !bL2R)) {
   1318                 int i = j + 3;
   1319                 while(bR2L && i < count) {
   1320                     if(order.GetAt(i + 2) == 1) {
   1321                         break;
   1322                     } else {
   1323                         i += 3;
   1324                     }
   1325                 }
   1326                 if(i == 3) {
   1327                     j = -3;
   1328                     bL2R = TRUE;
   1329                     continue;
   1330                 }
   1331                 int end = m_TempCharList.GetSize() - 1;
   1332                 if(i < count) {
   1333                     end = order.GetAt(i) - 1;
   1334                 }
   1335                 j = i - 3;
   1336                 for(int n = end; n >= start; n--) {
   1337                     AddCharInfoByRLDirection(str, n);
   1338                 }
   1339             } else {
   1340                 int end = start + count1 ;
   1341                 for(int i = start; i < end; i++) {
   1342                     AddCharInfoByLRDirection(str, i);
   1343                 }
   1344             }
   1345         }
   1346     }
   1347     int ntext = m_TextBuf.GetSize();
   1348     ntext = m_charList.GetSize();
   1349     order.RemoveAll();
   1350     m_TempCharList.RemoveAll();
   1351     m_TempTextBuf.Delete(0, m_TempTextBuf.GetLength());
   1352     BidiChar->Release();
   1353 }
   1354 void CPDF_TextPage::ProcessTextObject(CPDF_TextObject*	pTextObj, CFX_AffineMatrix formMatrix, FX_POSITION ObjPos)
   1355 {
   1356     CFX_FloatRect re(pTextObj->m_Left, pTextObj->m_Bottom, pTextObj->m_Right, pTextObj->m_Top);
   1357     if(FXSYS_fabs(pTextObj->m_Right - pTextObj->m_Left) < 0.01f ) {
   1358         return;
   1359     }
   1360     int count = m_LineObj.GetSize();
   1361     PDFTEXT_Obj Obj;
   1362     Obj.m_pTextObj = pTextObj;
   1363     Obj.m_formMatrix = formMatrix;
   1364     if(count == 0) {
   1365         m_LineObj.Add(Obj);
   1366         return;
   1367     }
   1368     if (IsSameAsPreTextObject(pTextObj, ObjPos)) {
   1369         return;
   1370     }
   1371     PDFTEXT_Obj prev_Obj = m_LineObj.GetAt(count - 1);
   1372     CPDF_TextObjectItem item;
   1373     int nItem = prev_Obj.m_pTextObj->CountItems();
   1374     prev_Obj.m_pTextObj->GetItemInfo(nItem - 1, &item);
   1375     FX_FLOAT prev_width = GetCharWidth(item.m_CharCode, prev_Obj.m_pTextObj->GetFont()) * prev_Obj.m_pTextObj->GetFontSize() / 1000;
   1376     CFX_AffineMatrix prev_matrix;
   1377     prev_Obj.m_pTextObj->GetTextMatrix(&prev_matrix);
   1378     prev_width = FXSYS_fabs(prev_width);
   1379     prev_matrix.Concat(prev_Obj.m_formMatrix);
   1380     prev_width = prev_matrix.TransformDistance(prev_width);
   1381     pTextObj->GetItemInfo(0, &item);
   1382     FX_FLOAT this_width = GetCharWidth(item.m_CharCode, pTextObj->GetFont()) * pTextObj->GetFontSize() / 1000;
   1383     this_width = FXSYS_fabs(this_width);
   1384     CFX_AffineMatrix this_matrix;
   1385     pTextObj->GetTextMatrix(&this_matrix);
   1386     this_width = FXSYS_fabs(this_width);
   1387     this_matrix.Concat(formMatrix);
   1388     this_width = this_matrix.TransformDistance(this_width);
   1389     FX_FLOAT threshold = prev_width > this_width ? prev_width / 4 : this_width / 4;
   1390     FX_FLOAT prev_x = prev_Obj.m_pTextObj->GetPosX(), prev_y = prev_Obj.m_pTextObj->GetPosY();
   1391     prev_Obj.m_formMatrix.Transform(prev_x, prev_y);
   1392     m_DisplayMatrix.Transform(prev_x, prev_y);
   1393     FX_FLOAT this_x = pTextObj->GetPosX(), this_y = pTextObj->GetPosY();
   1394     formMatrix.Transform(this_x, this_y);
   1395     m_DisplayMatrix.Transform(this_x, this_y);
   1396     if (FXSYS_fabs(this_y - prev_y) > threshold * 2) {
   1397         for(int i = 0; i < count; i++) {
   1398             ProcessTextObject(m_LineObj.GetAt(i));
   1399         }
   1400         m_LineObj.RemoveAll();
   1401         m_LineObj.Add(Obj);
   1402         return;
   1403     }
   1404     int i = 0;
   1405     if(m_ParseOptions.m_bNormalizeObjs) {
   1406         for(i = count - 1; i >= 0; i--) {
   1407             PDFTEXT_Obj prev_Obj = m_LineObj.GetAt(i);
   1408             CFX_AffineMatrix prev_matrix;
   1409             prev_Obj.m_pTextObj->GetTextMatrix(&prev_matrix);
   1410             FX_FLOAT Prev_x = prev_Obj.m_pTextObj->GetPosX(), Prev_y = prev_Obj.m_pTextObj->GetPosY();
   1411             prev_Obj.m_formMatrix.Transform(Prev_x, Prev_y);
   1412             m_DisplayMatrix.Transform(Prev_x, Prev_y);
   1413             if(this_x >= Prev_x) {
   1414                 if(i == count - 1) {
   1415                     m_LineObj.Add(Obj);
   1416                 } else {
   1417                     m_LineObj.InsertAt(i + 1, Obj);
   1418                 }
   1419                 break;
   1420             }
   1421         }
   1422         if(i < 0) {
   1423             m_LineObj.InsertAt(0, Obj);
   1424         }
   1425     } else {
   1426         m_LineObj.Add(Obj);
   1427     }
   1428 }
   1429 FX_INT32 CPDF_TextPage::PreMarkedContent(PDFTEXT_Obj Obj)
   1430 {
   1431     CPDF_TextObject* pTextObj = Obj.m_pTextObj;
   1432     CPDF_ContentMarkData* pMarkData = (CPDF_ContentMarkData*)pTextObj->m_ContentMark.GetObject();
   1433     if(!pMarkData) {
   1434         return FPDFTEXT_MC_PASS;
   1435     }
   1436     int nContentMark = pMarkData->CountItems();
   1437     if (nContentMark < 1) {
   1438         return FPDFTEXT_MC_PASS;
   1439     }
   1440     CFX_WideString actText;
   1441     FX_BOOL bExist = FALSE;
   1442     CPDF_Dictionary* pDict = NULL;
   1443     int n = 0;
   1444     for (n = 0; n < nContentMark; n++) {
   1445         CPDF_ContentMarkItem& item = pMarkData->GetItem(n);
   1446         CFX_ByteString tagStr = (CFX_ByteString)item.GetName();
   1447         pDict = (CPDF_Dictionary*)item.GetParam();
   1448         CPDF_String* temp = (CPDF_String*)pDict->GetElement(FX_BSTRC("ActualText"));
   1449         if (temp) {
   1450             bExist = TRUE;
   1451             actText = temp->GetUnicodeText();
   1452         }
   1453     }
   1454     if (!bExist) {
   1455         return FPDFTEXT_MC_PASS;
   1456     }
   1457     if (m_pPreTextObj) {
   1458         if (CPDF_ContentMarkData* pPreMarkData = (CPDF_ContentMarkData*)m_pPreTextObj->m_ContentMark.GetObject()) {
   1459             if (pPreMarkData->CountItems() == n) {
   1460                 CPDF_ContentMarkItem& item = pPreMarkData->GetItem(n - 1);
   1461                 if (pDict == item.GetParam()) {
   1462                     return FPDFTEXT_MC_DONE;
   1463                 }
   1464             }
   1465         }
   1466     }
   1467     CPDF_Font*	pFont = pTextObj->GetFont();
   1468     FX_STRSIZE nItems = actText.GetLength();
   1469     if (nItems < 1) {
   1470         return FPDFTEXT_MC_PASS;
   1471     }
   1472     bExist = FALSE;
   1473     for (FX_STRSIZE i = 0; i < nItems; i++) {
   1474         FX_WCHAR wChar = actText.GetAt(i);
   1475         if (-1 == pFont->CharCodeFromUnicode(wChar)) {
   1476             continue;
   1477         } else {
   1478             bExist = TRUE;
   1479             break;
   1480         }
   1481     }
   1482     if (!bExist) {
   1483         return FPDFTEXT_MC_PASS;
   1484     }
   1485     bExist = FALSE;
   1486     for (FX_STRSIZE j = 0; j < nItems; j++) {
   1487         FX_WCHAR wChar = actText.GetAt(j);
   1488         if ((wChar > 0x80 && wChar < 0xFFFD) || (wChar <= 0x80 && isprint(wChar))) {
   1489             bExist = TRUE;
   1490             break;
   1491         }
   1492     }
   1493     if (!bExist) {
   1494         return FPDFTEXT_MC_DONE;
   1495     }
   1496     return FPDFTEXT_MC_DELAY;
   1497 }
   1498 void CPDF_TextPage::ProcessMarkedContent(PDFTEXT_Obj Obj)
   1499 {
   1500     CPDF_TextObject* pTextObj = Obj.m_pTextObj;
   1501     CPDF_ContentMarkData* pMarkData = (CPDF_ContentMarkData*)pTextObj->m_ContentMark.GetObject();
   1502     if(!pMarkData) {
   1503         return;
   1504     }
   1505     int nContentMark = pMarkData->CountItems();
   1506     if (nContentMark < 1) {
   1507         return;
   1508     }
   1509     CFX_WideString actText;
   1510     CPDF_Dictionary* pDict = NULL;
   1511     int n = 0;
   1512     for (n = 0; n < nContentMark; n++) {
   1513         CPDF_ContentMarkItem& item = pMarkData->GetItem(n);
   1514         CFX_ByteString tagStr = (CFX_ByteString)item.GetName();
   1515         pDict = (CPDF_Dictionary*)item.GetParam();
   1516         CPDF_String* temp = (CPDF_String*)pDict->GetElement(FX_BSTRC("ActualText"));
   1517         if (temp) {
   1518             actText = temp->GetUnicodeText();
   1519         }
   1520     }
   1521     FX_STRSIZE nItems = actText.GetLength();
   1522     if (nItems < 1) {
   1523         return;
   1524     }
   1525     CPDF_Font*	pFont = pTextObj->GetFont();
   1526     CFX_AffineMatrix formMatrix = Obj.m_formMatrix;
   1527     CFX_AffineMatrix matrix;
   1528     pTextObj->GetTextMatrix(&matrix);
   1529     matrix.Concat(formMatrix);
   1530     FX_FLOAT fPosX = pTextObj->GetPosX();
   1531     FX_FLOAT fPosY = pTextObj->GetPosY();
   1532     int nCharInfoIndex = m_TextBuf.GetLength();
   1533     CFX_FloatRect charBox;
   1534     charBox.top = pTextObj->m_Top;
   1535     charBox.left = pTextObj->m_Left;
   1536     charBox.right = pTextObj->m_Right;
   1537     charBox.bottom = pTextObj->m_Bottom;
   1538     for (FX_STRSIZE k = 0; k < nItems; k++) {
   1539         FX_WCHAR wChar = actText.GetAt(k);
   1540         if (wChar <= 0x80 && !isprint(wChar)) {
   1541             wChar = 0x20;
   1542         }
   1543         if (wChar >= 0xFFFD) {
   1544             continue;
   1545         }
   1546         PAGECHAR_INFO charinfo;
   1547         charinfo.m_OriginX = fPosX;
   1548         charinfo.m_OriginY = fPosY;
   1549         charinfo.m_Index = nCharInfoIndex;
   1550         charinfo.m_Unicode = wChar;
   1551         charinfo.m_CharCode = pFont->CharCodeFromUnicode(wChar);
   1552         charinfo.m_Flag = FPDFTEXT_CHAR_PIECE;
   1553         charinfo.m_pTextObj = pTextObj;
   1554         charinfo.m_CharBox.top = charBox.top;
   1555         charinfo.m_CharBox.left = charBox.left;
   1556         charinfo.m_CharBox.right = charBox.right;
   1557         charinfo.m_CharBox.bottom = charBox.bottom;
   1558         charinfo.m_Matrix.Copy(matrix);
   1559         m_TempTextBuf.AppendChar(wChar);
   1560         m_TempCharList.Add(charinfo);
   1561     }
   1562 }
   1563 void CPDF_TextPage::FindPreviousTextObject(void)
   1564 {
   1565     if (m_TempCharList.GetSize() < 1 && m_charList.GetSize() < 1) {
   1566         return;
   1567     }
   1568     PAGECHAR_INFO preChar;
   1569     if (m_TempCharList.GetSize() >= 1) {
   1570         preChar = *(PAGECHAR_INFO*)m_TempCharList.GetAt(m_TempCharList.GetSize() - 1);
   1571     } else {
   1572         preChar = *(PAGECHAR_INFO*)m_charList.GetAt(m_charList.GetSize() - 1);
   1573     }
   1574     if (preChar.m_pTextObj) {
   1575         m_pPreTextObj = preChar.m_pTextObj;
   1576     }
   1577 }
   1578 void CPDF_TextPage::ProcessTextObject(PDFTEXT_Obj Obj)
   1579 {
   1580     CPDF_TextObject* pTextObj = Obj.m_pTextObj;
   1581     if(FXSYS_fabs(pTextObj->m_Right - pTextObj->m_Left) < 0.01f ) {
   1582         return;
   1583     }
   1584     CFX_AffineMatrix formMatrix = Obj.m_formMatrix;
   1585     CPDF_Font*	pFont = pTextObj->GetFont();
   1586     CFX_AffineMatrix matrix;
   1587     pTextObj->GetTextMatrix(&matrix);
   1588     matrix.Concat(formMatrix);
   1589     FX_INT32 bPreMKC = PreMarkedContent(Obj);
   1590     if (FPDFTEXT_MC_DONE == bPreMKC) {
   1591         m_pPreTextObj = pTextObj;
   1592         m_perMatrix.Copy(formMatrix);
   1593         return;
   1594     }
   1595     int result = 0;
   1596     if (m_pPreTextObj) {
   1597         result = ProcessInsertObject(pTextObj, formMatrix);
   1598         if (2 == result) {
   1599             m_CurlineRect = CFX_FloatRect(Obj.m_pTextObj->m_Left, Obj.m_pTextObj->m_Bottom, Obj.m_pTextObj->m_Right, Obj.m_pTextObj->m_Top);
   1600         } else {
   1601             m_CurlineRect.Union(CFX_FloatRect(Obj.m_pTextObj->m_Left, Obj.m_pTextObj->m_Bottom, Obj.m_pTextObj->m_Right, Obj.m_pTextObj->m_Top));
   1602         }
   1603         PAGECHAR_INFO generateChar;
   1604         if (result == 1) {
   1605             if (GenerateCharInfo(TEXT_BLANK_CHAR, generateChar)) {
   1606                 if (!formMatrix.IsIdentity()) {
   1607                     generateChar.m_Matrix.Copy(formMatrix);
   1608                 }
   1609                 m_TempTextBuf.AppendChar(TEXT_BLANK_CHAR);
   1610                 m_TempCharList.Add(generateChar);
   1611             }
   1612         } else if(result == 2) {
   1613             CloseTempLine();
   1614             if(m_TextBuf.GetSize()) {
   1615                 if(m_ParseOptions.m_bGetCharCodeOnly) {
   1616                     m_TextBuf.AppendChar(TEXT_RETURN_CHAR);
   1617                     m_TextBuf.AppendChar(TEXT_LINEFEED_CHAR);
   1618                 } else {
   1619                     if(GenerateCharInfo(TEXT_RETURN_CHAR, generateChar)) {
   1620                         m_TextBuf.AppendChar(TEXT_RETURN_CHAR);
   1621                         if (!formMatrix.IsIdentity()) {
   1622                             generateChar.m_Matrix.Copy(formMatrix);
   1623                         }
   1624                         m_charList.Add(generateChar);
   1625                     }
   1626                     if(GenerateCharInfo(TEXT_LINEFEED_CHAR, generateChar)) {
   1627                         m_TextBuf.AppendChar(TEXT_LINEFEED_CHAR);
   1628                         if (!formMatrix.IsIdentity()) {
   1629                             generateChar.m_Matrix.Copy(formMatrix);
   1630                         }
   1631                         m_charList.Add(generateChar);
   1632                     }
   1633                 }
   1634             }
   1635         } else if (result == 3 && !m_ParseOptions.m_bOutputHyphen) {
   1636             FX_INT32 nChars = pTextObj->CountChars();
   1637             if (nChars == 1) {
   1638                 CPDF_TextObjectItem item;
   1639                 pTextObj->GetCharInfo(0, &item);
   1640                 CFX_WideString wstrItem = pTextObj->GetFont()->UnicodeFromCharCode(item.m_CharCode);
   1641                 if(wstrItem.IsEmpty()) {
   1642                     wstrItem += (FX_WCHAR)item.m_CharCode;
   1643                 }
   1644                 FX_WCHAR curChar = wstrItem.GetAt(0);
   1645                 if (0x2D == curChar || 0xAD == curChar) {
   1646                     return;
   1647                 }
   1648             }
   1649             while (m_TempTextBuf.GetSize() > 0 && m_TempTextBuf.GetWideString().GetAt(m_TempTextBuf.GetLength() - 1) == 0x20) {
   1650                 m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1);
   1651                 m_TempCharList.Delete(m_TempCharList.GetSize() - 1);
   1652             }
   1653             PAGECHAR_INFO* cha = (PAGECHAR_INFO*)m_TempCharList.GetAt(m_TempCharList.GetSize() - 1);
   1654             m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1);
   1655 #ifdef FOXIT_CHROME_BUILD
   1656             cha->m_Unicode = 0x2;
   1657             cha->m_Flag = FPDFTEXT_CHAR_HYPHEN;
   1658             m_TempTextBuf.AppendChar(0xfffe);
   1659 #else
   1660             cha->m_Unicode = 0;
   1661             m_TempTextBuf.AppendChar(0xfffe);
   1662 #endif
   1663         }
   1664     } else {
   1665         m_CurlineRect = CFX_FloatRect(Obj.m_pTextObj->m_Left, Obj.m_pTextObj->m_Bottom, Obj.m_pTextObj->m_Right, Obj.m_pTextObj->m_Top);
   1666     }
   1667     if (FPDFTEXT_MC_DELAY == bPreMKC) {
   1668         ProcessMarkedContent(Obj);
   1669         m_pPreTextObj = pTextObj;
   1670         m_perMatrix.Copy(formMatrix);
   1671         return;
   1672     }
   1673     m_pPreTextObj = pTextObj;
   1674     m_perMatrix.Copy(formMatrix);
   1675     int nItems = pTextObj->CountItems();
   1676     FX_FLOAT spacing = 0;
   1677     FX_FLOAT baseSpace = 0.0;
   1678     FX_BOOL bAllChar = TRUE;
   1679     if (pTextObj->m_TextState.GetObject()->m_CharSpace && nItems >= 3) {
   1680         spacing = matrix.TransformDistance(pTextObj->m_TextState.GetObject()->m_CharSpace);
   1681         baseSpace = spacing;
   1682         for (int i = 0; i < nItems; i++) {
   1683             CPDF_TextObjectItem item;
   1684             pTextObj->GetItemInfo(i, &item);
   1685             if (item.m_CharCode == (FX_DWORD) - 1) {
   1686                 FX_FLOAT fontsize_h = pTextObj->m_TextState.GetFontSizeH();
   1687                 FX_FLOAT kerning = -fontsize_h * item.m_OriginX / 1000;
   1688                 if(kerning + spacing < baseSpace) {
   1689                     baseSpace = kerning + spacing;
   1690                 }
   1691                 bAllChar = FALSE;
   1692             }
   1693         }
   1694         spacing = 0;
   1695         if(baseSpace < 0.0 || (nItems == 3 && !bAllChar)) {
   1696             baseSpace = 0.0;
   1697         }
   1698     }
   1699     for (int i = 0; i < nItems; i++) {
   1700         CPDF_TextObjectItem item;
   1701         PAGECHAR_INFO charinfo;
   1702         charinfo.m_OriginX = 0;
   1703         charinfo.m_OriginY = 0;
   1704         pTextObj->GetItemInfo(i, &item);
   1705         if (item.m_CharCode == (FX_DWORD) - 1) {
   1706             CFX_WideString str = m_TempTextBuf.GetWideString();
   1707             if(str.IsEmpty()) {
   1708                 str = m_TextBuf.GetWideString();
   1709             }
   1710             if (str.IsEmpty() || str.GetAt(str.GetLength() - 1) == TEXT_BLANK_CHAR) {
   1711                 continue;
   1712             }
   1713             FX_FLOAT fontsize_h = pTextObj->m_TextState.GetFontSizeH();
   1714             spacing = -fontsize_h * item.m_OriginX / 1000;
   1715             continue;
   1716         }
   1717         FX_FLOAT charSpace = pTextObj->m_TextState.GetObject()->m_CharSpace;
   1718         if (charSpace > 0.001) {
   1719             spacing += matrix.TransformDistance(charSpace);
   1720         } else if(charSpace < -0.001) {
   1721             spacing -= matrix.TransformDistance(FXSYS_fabs(charSpace));
   1722         }
   1723         spacing -= baseSpace;
   1724         if (spacing && i > 0) {
   1725             int last_width = 0;
   1726             FX_FLOAT fontsize_h = pTextObj->m_TextState.GetFontSizeH();
   1727             FX_DWORD space_charcode = pFont->CharCodeFromUnicode(' ');
   1728             FX_FLOAT threshold = 0;
   1729             if (space_charcode != -1) {
   1730                 threshold = fontsize_h * pFont->GetCharWidthF(space_charcode) / 1000 ;
   1731             }
   1732             if (threshold > fontsize_h / 3) {
   1733                 threshold = 0;
   1734             } else {
   1735                 threshold /= 2;
   1736             }
   1737             if (threshold == 0) {
   1738                 threshold = fontsize_h;
   1739                 int this_width = FXSYS_abs(GetCharWidth(item.m_CharCode, pFont));
   1740                 threshold = this_width > last_width ? (FX_FLOAT)this_width : (FX_FLOAT)last_width;
   1741                 int nDivide = 6;
   1742                 if (threshold < 300) {
   1743                     nDivide = 2;
   1744                 } else if (threshold < 500) {
   1745                     nDivide = 4;
   1746                 } else if (threshold < 700) {
   1747                     nDivide = 5;
   1748                 }
   1749                 threshold = threshold / nDivide;
   1750                 threshold = fontsize_h * threshold / 1000;
   1751             }
   1752             if (threshold && (spacing && spacing >= threshold) ) {
   1753                 charinfo.m_Unicode = TEXT_BLANK_CHAR;
   1754                 charinfo.m_Flag = FPDFTEXT_CHAR_GENERATED;
   1755                 charinfo.m_pTextObj = pTextObj;
   1756                 charinfo.m_Index = m_TextBuf.GetLength();
   1757                 m_TempTextBuf.AppendChar(TEXT_BLANK_CHAR);
   1758                 charinfo.m_CharCode = -1;
   1759                 charinfo.m_Matrix.Copy(formMatrix);
   1760                 matrix.Transform(item.m_OriginX, item.m_OriginY, charinfo.m_OriginX, charinfo.m_OriginY);
   1761                 charinfo.m_CharBox = CFX_FloatRect(charinfo.m_OriginX, charinfo.m_OriginY, charinfo.m_OriginX, charinfo.m_OriginY);
   1762                 m_TempCharList.Add(charinfo);
   1763             }
   1764             if (item.m_CharCode == (FX_DWORD) - 1) {
   1765                 continue;
   1766             }
   1767         }
   1768         spacing = 0;
   1769         CFX_WideString wstrItem = pFont->UnicodeFromCharCode(item.m_CharCode);
   1770         FX_BOOL bNoUnicode = FALSE;
   1771         FX_WCHAR wChar = wstrItem.GetAt(0);
   1772         if ((wstrItem.IsEmpty() || wChar == 0) && item.m_CharCode) {
   1773             if(wstrItem.IsEmpty()) {
   1774                 wstrItem += (FX_WCHAR)item.m_CharCode;
   1775             } else {
   1776                 wstrItem.SetAt(0, (FX_WCHAR)item.m_CharCode);
   1777             }
   1778             bNoUnicode = TRUE;
   1779         }
   1780         charinfo.m_Index = -1;
   1781         charinfo.m_CharCode = item.m_CharCode;
   1782         if(bNoUnicode) {
   1783             charinfo.m_Flag = FPDFTEXT_CHAR_UNUNICODE;
   1784         } else {
   1785             charinfo.m_Flag = FPDFTEXT_CHAR_NORMAL;
   1786         }
   1787         charinfo.m_pTextObj = pTextObj;
   1788         charinfo.m_OriginX = 0, charinfo.m_OriginY = 0;
   1789         matrix.Transform(item.m_OriginX, item.m_OriginY, charinfo.m_OriginX, charinfo.m_OriginY);
   1790         FX_RECT rect(0, 0, 0, 0);
   1791         rect.Intersect(0, 0, 0, 0);
   1792         charinfo.m_pTextObj->GetFont()->GetCharBBox(charinfo.m_CharCode, rect);
   1793         charinfo.m_CharBox.top = rect.top * pTextObj->GetFontSize() / 1000 + item.m_OriginY;
   1794         charinfo.m_CharBox.left = rect.left * pTextObj->GetFontSize() / 1000 + item.m_OriginX;
   1795         charinfo.m_CharBox.right = rect.right * pTextObj->GetFontSize() / 1000 + item.m_OriginX;
   1796         charinfo.m_CharBox.bottom = rect.bottom * pTextObj->GetFontSize() / 1000 + item.m_OriginY;
   1797         if (fabsf(charinfo.m_CharBox.top - charinfo.m_CharBox.bottom) < 0.01f) {
   1798             charinfo.m_CharBox.top = charinfo.m_CharBox.bottom + pTextObj->GetFontSize();
   1799         }
   1800         if (fabsf(charinfo.m_CharBox.right - charinfo.m_CharBox.left) < 0.01f) {
   1801             charinfo.m_CharBox.right = charinfo.m_CharBox.left + pTextObj->GetCharWidth(charinfo.m_CharCode);
   1802         }
   1803         matrix.TransformRect(charinfo.m_CharBox);
   1804         charinfo.m_Matrix.Copy(matrix);
   1805         if (wstrItem.IsEmpty()) {
   1806             charinfo.m_Unicode = 0;
   1807             m_TempCharList.Add(charinfo);
   1808             m_TempTextBuf.AppendChar(0xfffe);
   1809             continue;
   1810         } else {
   1811             int nTotal = wstrItem.GetLength();
   1812             int n = 0;
   1813             FX_BOOL bDel = FALSE;
   1814             while (n < m_TempCharList.GetSize() && n < 7) {
   1815                 n++;
   1816                 PAGECHAR_INFO* charinfo1 = (PAGECHAR_INFO*)m_TempCharList.GetAt(m_TempCharList.GetSize() - n);
   1817                 if(charinfo1->m_CharCode == charinfo.m_CharCode &&
   1818                         charinfo1->m_pTextObj->GetFont() == charinfo.m_pTextObj->GetFont()  &&
   1819                         FXSYS_fabs(charinfo1->m_OriginX - charinfo.m_OriginX) < TEXT_CHARRATIO_GAPDELTA * pTextObj->GetFontSize()  &&
   1820                         FXSYS_fabs(charinfo1->m_OriginY - charinfo.m_OriginY) < TEXT_CHARRATIO_GAPDELTA * pTextObj->GetFontSize() ) {
   1821                     bDel = TRUE;
   1822                     break;
   1823                 }
   1824             }
   1825             if(!bDel) {
   1826                 for (int nIndex = 0; nIndex < nTotal; nIndex++) {
   1827                     charinfo.m_Unicode = wstrItem.GetAt(nIndex);
   1828                     if (charinfo.m_Unicode) {
   1829                         charinfo.m_Index = m_TextBuf.GetLength();
   1830                         m_TempTextBuf.AppendChar(charinfo.m_Unicode);
   1831                     } else {
   1832                         m_TempTextBuf.AppendChar(0xfffe);
   1833                     }
   1834                     m_TempCharList.Add(charinfo);
   1835                 }
   1836             } else if(i == 0) {
   1837                 CFX_WideString str = m_TempTextBuf.GetWideString();
   1838                 if (!str.IsEmpty() && str.GetAt(str.GetLength() - 1) == TEXT_BLANK_CHAR) {
   1839                     m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1);
   1840                     m_TempCharList.Delete(m_TempCharList.GetSize() - 1);
   1841                 }
   1842             }
   1843         }
   1844     }
   1845 }
   1846 FX_INT32 CPDF_TextPage::GetTextObjectWritingMode(const CPDF_TextObject* pTextObj)
   1847 {
   1848     FX_INT32 nChars = pTextObj->CountChars();
   1849     if (nChars == 1) {
   1850         return m_TextlineDir;
   1851     }
   1852     CPDF_TextObjectItem first, last;
   1853     pTextObj->GetCharInfo(0, &first);
   1854     pTextObj->GetCharInfo(nChars - 1, &last);
   1855     CFX_Matrix textMatrix;
   1856     pTextObj->GetTextMatrix(&textMatrix);
   1857     textMatrix.TransformPoint(first.m_OriginX, first.m_OriginY);
   1858     textMatrix.TransformPoint(last.m_OriginX, last.m_OriginY);
   1859     FX_FLOAT dX = FXSYS_fabs(last.m_OriginX - first.m_OriginX);
   1860     FX_FLOAT dY = FXSYS_fabs(last.m_OriginY - first.m_OriginY);
   1861     if (dX <= 0.0001f && dY <= 0.0001f) {
   1862         return -1;
   1863     }
   1864     CFX_VectorF v;
   1865     v.Set(dX, dY);
   1866     v.Normalize();
   1867     if (v.y <= 0.0872f) {
   1868         if (v.x <= 0.0872f) {
   1869             return m_TextlineDir;
   1870         }
   1871         return 0;
   1872     } else if (v.x <= 0.0872f) {
   1873         return 1;
   1874     }
   1875     return m_TextlineDir;
   1876 }
   1877 FX_BOOL CPDF_TextPage::IsHyphen(FX_WCHAR curChar)
   1878 {
   1879     CFX_WideString strCurText = m_TempTextBuf.GetWideString();
   1880     if(strCurText.GetLength() == 0) {
   1881         strCurText = m_TextBuf.GetWideString();
   1882     }
   1883     FX_STRSIZE nCount = strCurText.GetLength();
   1884     int nIndex = nCount - 1;
   1885     FX_WCHAR wcTmp = strCurText.GetAt(nIndex);
   1886     while(wcTmp == 0x20 && nIndex <= nCount - 1 && nIndex >= 0) {
   1887         wcTmp = strCurText.GetAt(--nIndex);
   1888     }
   1889     if (0x2D == wcTmp || 0xAD == wcTmp) {
   1890         if (--nIndex > 0) {
   1891             FX_WCHAR preChar = strCurText.GetAt((nIndex));
   1892             if (((preChar >= L'A' && preChar <= L'Z') || (preChar >= L'a' && preChar <= L'z'))
   1893                     && ((curChar >= L'A' && curChar <= L'Z') || (curChar >= L'a' && curChar <= L'z'))) {
   1894                 return TRUE;
   1895             }
   1896         }
   1897         int size = m_TempCharList.GetSize();
   1898         PAGECHAR_INFO preChar;
   1899         if (size) {
   1900             preChar = (PAGECHAR_INFO)m_TempCharList[size - 1];
   1901         } else {
   1902             size = m_charList.GetSize();
   1903             if(size == 0) {
   1904                 return FALSE;
   1905             }
   1906             preChar = (PAGECHAR_INFO)m_charList[size - 1];
   1907         }
   1908         if (FPDFTEXT_CHAR_PIECE == preChar.m_Flag)
   1909             if (0xAD == preChar.m_Unicode || 0x2D == preChar.m_Unicode) {
   1910                 return TRUE;
   1911             }
   1912     }
   1913     return FALSE;
   1914 }
   1915 int CPDF_TextPage::ProcessInsertObject(const CPDF_TextObject* pObj, CFX_AffineMatrix formMatrix)
   1916 {
   1917     FindPreviousTextObject();
   1918     FX_BOOL bNewline = FALSE;
   1919     int WritingMode = GetTextObjectWritingMode(pObj);
   1920     if(WritingMode == -1) {
   1921         WritingMode = GetTextObjectWritingMode(m_pPreTextObj);
   1922     }
   1923     CFX_FloatRect this_rect(pObj->m_Left, pObj->m_Bottom, pObj->m_Right, pObj->m_Top);
   1924     CFX_FloatRect prev_rect(m_pPreTextObj->m_Left, m_pPreTextObj->m_Bottom, m_pPreTextObj->m_Right, m_pPreTextObj->m_Top);
   1925     CPDF_TextObjectItem PrevItem, item;
   1926     int nItem = m_pPreTextObj->CountItems();
   1927     m_pPreTextObj->GetItemInfo(nItem - 1, &PrevItem);
   1928     pObj->GetItemInfo(0, &item);
   1929     CFX_WideString wstrItem = pObj->GetFont()->UnicodeFromCharCode(item.m_CharCode);
   1930     if(wstrItem.IsEmpty()) {
   1931         wstrItem += (FX_WCHAR)item.m_CharCode;
   1932     }
   1933     FX_WCHAR curChar = wstrItem.GetAt(0);
   1934     if(WritingMode == 0) {
   1935         if(this_rect.Height() > 4.5 && prev_rect.Height() > 4.5) {
   1936             FX_FLOAT top = this_rect.top < prev_rect.top ? this_rect.top : prev_rect.top;
   1937             FX_FLOAT bottom = this_rect.bottom > prev_rect.bottom ? this_rect.bottom : prev_rect.bottom;
   1938             if(bottom >= top) {
   1939                 if(IsHyphen(curChar)) {
   1940                     return 3;
   1941                 }
   1942                 return 2;
   1943             }
   1944         }
   1945     } else if (WritingMode == 1) {
   1946         if(this_rect.Width() > pObj->GetFontSize() * 0.1f && prev_rect.Width() > m_pPreTextObj->GetFontSize() * 0.1f) {
   1947             FX_FLOAT left = this_rect.left > m_CurlineRect.left ? this_rect.left : m_CurlineRect.left;
   1948             FX_FLOAT right = this_rect.right < m_CurlineRect.right ? this_rect.right : m_CurlineRect.right;
   1949             if(right <= left) {
   1950                 if(IsHyphen(curChar)) {
   1951                     return 3;
   1952                 }
   1953                 return 2;
   1954             }
   1955         }
   1956     }
   1957     FX_FLOAT last_pos = PrevItem.m_OriginX;
   1958     int nLastWidth = GetCharWidth(PrevItem.m_CharCode, m_pPreTextObj->GetFont());
   1959     FX_FLOAT last_width = nLastWidth * m_pPreTextObj->GetFontSize() / 1000;
   1960     last_width = FXSYS_fabs(last_width);
   1961     int nThisWidth = GetCharWidth(item.m_CharCode, pObj->GetFont());
   1962     FX_FLOAT this_width = nThisWidth * pObj->GetFontSize() / 1000;
   1963     this_width = FXSYS_fabs(this_width);
   1964     FX_FLOAT threshold = last_width > this_width ? last_width / 4 : this_width / 4;
   1965     CFX_AffineMatrix prev_matrix, prev_reverse;
   1966     m_pPreTextObj->GetTextMatrix(&prev_matrix);
   1967     prev_matrix.Concat(m_perMatrix);
   1968     prev_reverse.SetReverse(prev_matrix);
   1969     FX_FLOAT x = pObj->GetPosX();
   1970     FX_FLOAT y = pObj->GetPosY();
   1971     formMatrix.Transform(x, y);
   1972     prev_reverse.Transform(x, y);
   1973     if(last_width < this_width) {
   1974         threshold = prev_reverse.TransformDistance(threshold);
   1975     }
   1976     CFX_FloatRect rect1(m_pPreTextObj->m_Left, pObj->m_Bottom, m_pPreTextObj->m_Right, pObj->m_Top);
   1977     CFX_FloatRect rect2(m_pPreTextObj->m_Left, m_pPreTextObj->m_Bottom, m_pPreTextObj->m_Right, m_pPreTextObj->m_Top);
   1978     CFX_FloatRect rect3 = rect1;
   1979     rect1.Intersect(rect2);
   1980     if (WritingMode == 0) {
   1981         if ((rect1.IsEmpty() && rect2.Height() > 5 && rect3.Height() > 5)
   1982                 || ((y > threshold * 2 || y < threshold * -3) && (FXSYS_fabs(y) < 1 ? FXSYS_fabs(x) < FXSYS_fabs(y) : TRUE))) {
   1983             bNewline = TRUE;
   1984             if(nItem > 1 ) {
   1985                 CPDF_TextObjectItem tempItem;
   1986                 m_pPreTextObj->GetItemInfo(0, &tempItem);
   1987                 CFX_AffineMatrix m;
   1988                 m_pPreTextObj->GetTextMatrix(&m);
   1989                 if(PrevItem.m_OriginX > tempItem.m_OriginX &&
   1990                         m_DisplayMatrix.a > 0.9 && m_DisplayMatrix.b < 0.1 &&
   1991                         m_DisplayMatrix.c < 0.1 && m_DisplayMatrix.d < -0.9
   1992                         && m.b < 0.1 && m.c < 0.1 ) {
   1993                     CFX_FloatRect re(0, m_pPreTextObj->m_Bottom, 1000, m_pPreTextObj->m_Top);
   1994                     if(re.Contains(pObj->GetPosX(), pObj->GetPosY())) {
   1995                         bNewline = FALSE;
   1996                     } else {
   1997                         CFX_FloatRect re(0, pObj->m_Bottom, 1000, pObj->m_Top);
   1998                         if(re.Contains(m_pPreTextObj->GetPosX(), m_pPreTextObj->GetPosY())) {
   1999                             bNewline = FALSE;
   2000                         }
   2001                     }
   2002                 }
   2003             }
   2004         }
   2005     }
   2006     if(bNewline) {
   2007         if(IsHyphen(curChar)) {
   2008             return 3;
   2009         }
   2010         return 2;
   2011     }
   2012     FX_INT32 nChars = pObj->CountChars();
   2013     if (nChars == 1 && ( 0x2D == curChar || 0xAD == curChar))
   2014         if (IsHyphen(curChar)) {
   2015             return 3;
   2016         }
   2017     CFX_WideString PrevStr = m_pPreTextObj->GetFont()->UnicodeFromCharCode(PrevItem.m_CharCode);
   2018     FX_WCHAR preChar = PrevStr.GetAt(PrevStr.GetLength() - 1);
   2019     CFX_AffineMatrix matrix;
   2020     pObj->GetTextMatrix(&matrix);
   2021     matrix.Concat(formMatrix);
   2022     threshold = (FX_FLOAT)(nLastWidth > nThisWidth ? nLastWidth : nThisWidth);
   2023     threshold = threshold > 400 ? (threshold < 700 ? threshold / 4 :  (threshold > 800 ? threshold / 6 : threshold / 5)) : (threshold / 2);
   2024     if(nLastWidth >= nThisWidth) {
   2025         threshold *= FXSYS_fabs(m_pPreTextObj->GetFontSize());
   2026     } else {
   2027         threshold *= FXSYS_fabs(pObj->GetFontSize());
   2028         threshold = matrix.TransformDistance(threshold);
   2029         threshold = prev_reverse.TransformDistance(threshold);
   2030     }
   2031     threshold /= 1000;
   2032     if((threshold < 1.4881 && threshold > 1.4879)
   2033             || (threshold < 1.39001 && threshold > 1.38999)) {
   2034         threshold *= 1.5;
   2035     }
   2036     if (FXSYS_fabs(last_pos + last_width - x) > threshold && curChar != L' ' && preChar != L' ')
   2037         if (curChar != L' ' && preChar != L' ') {
   2038             if((x - last_pos - last_width) > threshold || (last_pos - x - last_width) > threshold) {
   2039                 return 1;
   2040             }
   2041             if(x < 0 && (last_pos - x - last_width) > threshold) {
   2042                 return 1;
   2043             }
   2044             if((x - last_pos - last_width) > this_width || (x - last_pos - this_width) > last_width ) {
   2045                 return 1;
   2046             }
   2047         }
   2048     return 0;
   2049 }
   2050 FX_BOOL CPDF_TextPage::IsSameTextObject(CPDF_TextObject* pTextObj1, CPDF_TextObject* pTextObj2)
   2051 {
   2052     if (!pTextObj1 || !pTextObj2) {
   2053         return FALSE;
   2054     }
   2055     CFX_FloatRect rcPreObj(pTextObj2->m_Left, pTextObj2->m_Bottom, pTextObj2->m_Right, pTextObj2->m_Top);
   2056     CFX_FloatRect rcCurObj(pTextObj1->m_Left, pTextObj1->m_Bottom, pTextObj1->m_Right, pTextObj1->m_Top);
   2057     if (rcPreObj.IsEmpty() && rcCurObj.IsEmpty() && !m_ParseOptions.m_bGetCharCodeOnly) {
   2058         FX_FLOAT dbXdif = FXSYS_fabs(rcPreObj.left - rcCurObj.left);
   2059         int nCount = m_charList.GetSize();
   2060         if (nCount >= 2) {
   2061             PAGECHAR_INFO perCharTemp = (PAGECHAR_INFO)m_charList[nCount - 2];
   2062             FX_FLOAT dbSpace = perCharTemp.m_CharBox.Width();
   2063             if (dbXdif > dbSpace) {
   2064                 return FALSE;
   2065             }
   2066         }
   2067     }
   2068     if (!rcPreObj.IsEmpty() || !rcCurObj.IsEmpty()) {
   2069         rcPreObj.Intersect(rcCurObj);
   2070         if (rcPreObj.IsEmpty()) {
   2071             return FALSE;
   2072         }
   2073         if (FXSYS_fabs(rcPreObj.Width() - rcCurObj.Width()) > rcCurObj.Width() / 2) {
   2074             return FALSE;
   2075         }
   2076         if (pTextObj2->GetFontSize() != pTextObj1->GetFontSize()) {
   2077             return FALSE;
   2078         }
   2079     }
   2080     int nPreCount = pTextObj2->CountItems();
   2081     int nCurCount = pTextObj1->CountItems();
   2082     if (nPreCount != nCurCount) {
   2083         return FALSE;
   2084     }
   2085     CPDF_TextObjectItem itemPer, itemCur;
   2086     for (int i = 0; i < nPreCount; i++) {
   2087         pTextObj2->GetItemInfo(i, &itemPer);
   2088         pTextObj1->GetItemInfo(i, &itemCur);
   2089         if (itemCur.m_CharCode != itemPer.m_CharCode) {
   2090             return FALSE;
   2091         }
   2092     }
   2093     if(FXSYS_fabs(pTextObj1->GetPosX() - pTextObj2->GetPosX()) > GetCharWidth(itemPer.m_CharCode, pTextObj2->GetFont())*pTextObj2->GetFontSize() / 1000 * 0.9 ||
   2094             FXSYS_fabs(pTextObj1->GetPosY() - pTextObj2->GetPosY()) >
   2095             FX_MAX(FX_MAX(rcPreObj.Height() , rcPreObj.Width()), pTextObj2->GetFontSize()) / 8) {
   2096         return FALSE;
   2097     }
   2098     return TRUE;
   2099 }
   2100 FX_BOOL CPDF_TextPage::IsSameAsPreTextObject(CPDF_TextObject* pTextObj, FX_POSITION ObjPos)
   2101 {
   2102     if (!pTextObj) {
   2103         return FALSE;
   2104     }
   2105     int i = 0;
   2106     if (!ObjPos) {
   2107         ObjPos = m_pPage->GetLastObjectPosition();
   2108     }
   2109     CPDF_PageObject* pObj = m_pPage->GetPrevObject(ObjPos);
   2110     while (i < 5 && ObjPos) {
   2111         pObj = m_pPage->GetPrevObject(ObjPos);
   2112         if(pObj == pTextObj) {
   2113             continue;
   2114         }
   2115         if(pObj->m_Type != PDFPAGE_TEXT) {
   2116             continue;
   2117         }
   2118         if(IsSameTextObject((CPDF_TextObject*)pObj, pTextObj)) {
   2119             return TRUE;
   2120         }
   2121         i++;
   2122     }
   2123     return FALSE;
   2124 }
   2125 FX_BOOL CPDF_TextPage::GenerateCharInfo(FX_WCHAR unicode, PAGECHAR_INFO& info)
   2126 {
   2127     int size = m_TempCharList.GetSize();
   2128     PAGECHAR_INFO preChar;
   2129     if (size) {
   2130         preChar = (PAGECHAR_INFO)m_TempCharList[size - 1];
   2131     } else {
   2132         size = m_charList.GetSize();
   2133         if(size == 0) {
   2134             return FALSE;
   2135         }
   2136         preChar = (PAGECHAR_INFO)m_charList[size - 1];
   2137     }
   2138     info.m_Index = m_TextBuf.GetLength();
   2139     info.m_Unicode = unicode;
   2140     info.m_pTextObj = NULL;
   2141     info.m_CharCode = -1;
   2142     info.m_Flag = FPDFTEXT_CHAR_GENERATED;
   2143     int preWidth = 0;
   2144     if (preChar.m_pTextObj && preChar.m_CharCode != (FX_DWORD) - 1) {
   2145         preWidth = GetCharWidth(preChar.m_CharCode, preChar.m_pTextObj->GetFont());
   2146     }
   2147     FX_FLOAT fs = 0;
   2148     if(preChar.m_pTextObj) {
   2149         fs = preChar.m_pTextObj->GetFontSize();
   2150     } else {
   2151         fs = preChar.m_CharBox.Height();
   2152     }
   2153     if(!fs) {
   2154         fs = 1;
   2155     }
   2156     info.m_OriginX = preChar.m_OriginX + preWidth * (fs) / 1000;
   2157     info.m_OriginY = preChar.m_OriginY;
   2158     info.m_CharBox = CFX_FloatRect(info.m_OriginX, info.m_OriginY, info.m_OriginX, info.m_OriginY);
   2159     return TRUE;
   2160 }
   2161 FX_BOOL CPDF_TextPage::IsRectIntersect(CFX_FloatRect rect1, CFX_FloatRect rect2)
   2162 {
   2163     rect1.Intersect(rect2);
   2164     if(rect1.IsEmpty()) {
   2165         return FALSE;
   2166     }
   2167     return TRUE;
   2168 }
   2169 FX_BOOL	CPDF_TextPage::IsLetter(FX_WCHAR unicode)
   2170 {
   2171     if (unicode < L'A') {
   2172         return FALSE;
   2173     }
   2174     if (unicode > L'Z' && unicode < L'a') {
   2175         return FALSE;
   2176     }
   2177     if (unicode > L'z') {
   2178         return FALSE;
   2179     }
   2180     return TRUE;
   2181 }
   2182 CPDF_TextPageFind::CPDF_TextPageFind(const IPDF_TextPage* pTextPage)
   2183     : m_IsFind(FALSE),
   2184       m_pTextPage(NULL)
   2185 {
   2186     if (!pTextPage) {
   2187         return;
   2188     }
   2189     CPDF_ModuleMgr* pPDFModule = CPDF_ModuleMgr::Get();
   2190     m_pTextPage = pTextPage;
   2191     m_strText = m_pTextPage->GetPageText();
   2192     int nCount = pTextPage->CountChars();
   2193     if(nCount) {
   2194         m_CharIndex.Add(0);
   2195     }
   2196     for(int i = 0; i < nCount; i++) {
   2197         FPDF_CHAR_INFO info;
   2198         pTextPage->GetCharInfo(i, info);
   2199         int indexSize = m_CharIndex.GetSize();
   2200         if(info.m_Flag == CHAR_NORMAL || info.m_Flag == CHAR_GENERATED) {
   2201             if(indexSize % 2) {
   2202                 m_CharIndex.Add(1);
   2203             } else {
   2204                 if(indexSize <= 0) {
   2205                     continue;
   2206                 }
   2207                 m_CharIndex.SetAt(indexSize - 1, m_CharIndex.GetAt(indexSize - 1) + 1);
   2208             }
   2209         } else {
   2210             if(indexSize % 2) {
   2211                 if(indexSize <= 0) {
   2212                     continue;
   2213                 }
   2214                 m_CharIndex.SetAt(indexSize - 1, i + 1);
   2215             } else {
   2216                 m_CharIndex.Add(i + 1);
   2217             }
   2218         }
   2219     }
   2220     int indexSize = m_CharIndex.GetSize();
   2221     if(indexSize % 2) {
   2222         m_CharIndex.RemoveAt(indexSize - 1);
   2223     }
   2224     m_resStart = 0;
   2225     m_resEnd = -1;
   2226 }
   2227 int CPDF_TextPageFind::GetCharIndex(int index) const
   2228 {
   2229     return m_pTextPage->CharIndexFromTextIndex(index);
   2230     int indexSize = m_CharIndex.GetSize();
   2231     int count = 0;
   2232     for(int i = 0; i < indexSize; i += 2) {
   2233         count += m_CharIndex.GetAt(i + 1);
   2234         if(count > index) {
   2235             return 	index - count + m_CharIndex.GetAt(i + 1) + m_CharIndex.GetAt(i);
   2236         }
   2237     }
   2238     return -1;
   2239 }
   2240 FX_BOOL	CPDF_TextPageFind::FindFirst(CFX_WideString findwhat, int flags, int startPos)
   2241 {
   2242     if (!m_pTextPage) {
   2243         return FALSE;
   2244     }
   2245     if (m_strText.IsEmpty() || m_bMatchCase != (flags & FPDFTEXT_MATCHCASE)) {
   2246         m_strText = m_pTextPage->GetPageText();
   2247     }
   2248     m_findWhat = findwhat;
   2249     m_flags = flags;
   2250     m_bMatchCase = flags & FPDFTEXT_MATCHCASE;
   2251     if (m_strText.IsEmpty()) {
   2252         m_IsFind = FALSE;
   2253         return TRUE;
   2254     }
   2255     FX_STRSIZE len = findwhat.GetLength();
   2256     if (!m_bMatchCase) {
   2257         findwhat.MakeLower();
   2258         m_strText.MakeLower();
   2259     }
   2260     m_bMatchWholeWord = flags & FPDFTEXT_MATCHWHOLEWORD;
   2261     m_findNextStart = startPos;
   2262     if (startPos == -1) {
   2263         m_findPreStart = m_strText.GetLength() - 1;
   2264     } else {
   2265         m_findPreStart = startPos;
   2266     }
   2267     m_csFindWhatArray.RemoveAll();
   2268     int i = 0;
   2269     while(i < len) {
   2270         if(findwhat.GetAt(i) != ' ') {
   2271             break;
   2272         }
   2273         i++;
   2274     }
   2275     if(i < len) {
   2276         ExtractFindWhat(findwhat);
   2277     } else {
   2278         m_csFindWhatArray.Add(findwhat);
   2279     }
   2280     if(m_csFindWhatArray.GetSize() <= 0) {
   2281         return FALSE;
   2282     }
   2283     m_IsFind = TRUE;
   2284     m_resStart = 0;
   2285     m_resEnd = -1;
   2286     return TRUE;
   2287 }
   2288 FX_BOOL CPDF_TextPageFind::FindNext()
   2289 {
   2290     if (!m_pTextPage) {
   2291         return FALSE;
   2292     }
   2293     m_resArray.RemoveAll();
   2294     if(m_findNextStart == -1) {
   2295         return FALSE;
   2296     }
   2297     if(m_strText.IsEmpty()) {
   2298         m_IsFind = FALSE;
   2299         return m_IsFind;
   2300     }
   2301     int strLen = m_strText.GetLength();
   2302     if (m_findNextStart > strLen - 1) {
   2303         m_IsFind = FALSE;
   2304         return m_IsFind;
   2305     }
   2306     int nCount = m_csFindWhatArray.GetSize();
   2307     int nResultPos = 0;
   2308     int	nStartPos = 0;
   2309     nStartPos = m_findNextStart;
   2310     FX_BOOL bSpaceStart = FALSE;
   2311     for(int iWord = 0; iWord < nCount; iWord++) {
   2312         CFX_WideString csWord = m_csFindWhatArray[iWord];
   2313         if(csWord.IsEmpty()) {
   2314             if(iWord == nCount - 1) {
   2315                 FX_WCHAR strInsert = m_strText.GetAt(nStartPos);
   2316                 if(strInsert == TEXT_LINEFEED_CHAR || strInsert == TEXT_BLANK_CHAR || strInsert == TEXT_RETURN_CHAR || strInsert == 160) {
   2317                     nResultPos = nStartPos + 1;
   2318                     break;
   2319                 }
   2320                 iWord = -1;
   2321             } else if(iWord == 0) {
   2322                 bSpaceStart = TRUE;
   2323             }
   2324             continue;
   2325         }
   2326         int endIndex;
   2327         nResultPos = m_strText.Find(csWord, nStartPos);
   2328         if (nResultPos == -1) {
   2329             m_IsFind = FALSE;
   2330             return m_IsFind;
   2331         }
   2332         endIndex = nResultPos + csWord.GetLength() - 1;
   2333         if(iWord == 0) {
   2334             m_resStart = nResultPos;
   2335         }
   2336         FX_BOOL bMatch = TRUE;
   2337         if(iWord != 0 && !bSpaceStart) {
   2338             int PreResEndPos = nStartPos;
   2339             int curChar = csWord.GetAt(0);
   2340             CFX_WideString lastWord = m_csFindWhatArray[iWord - 1];
   2341             int lastChar = lastWord.GetAt(lastWord.GetLength() - 1);
   2342             if(nStartPos == nResultPos && !(_IsIgnoreSpaceCharacter(lastChar) || _IsIgnoreSpaceCharacter(curChar))) {
   2343                 bMatch = FALSE;
   2344             }
   2345             for(int d = PreResEndPos; d < nResultPos; d++) {
   2346                 FX_WCHAR strInsert = m_strText.GetAt(d);
   2347                 if(strInsert != TEXT_LINEFEED_CHAR && strInsert != TEXT_BLANK_CHAR && strInsert != TEXT_RETURN_CHAR && strInsert != 160) {
   2348                     bMatch = FALSE;
   2349                     break;
   2350                 }
   2351             }
   2352         } else if(bSpaceStart) {
   2353             if(nResultPos > 0) {
   2354                 FX_WCHAR strInsert = m_strText.GetAt(nResultPos - 1);
   2355                 if(strInsert != TEXT_LINEFEED_CHAR && strInsert != TEXT_BLANK_CHAR && strInsert != TEXT_RETURN_CHAR && strInsert != 160) {
   2356                     bMatch = FALSE;
   2357                     m_resStart = nResultPos;
   2358                 } else {
   2359                     m_resStart = nResultPos - 1;
   2360                 }
   2361             }
   2362         }
   2363         if(m_bMatchWholeWord && bMatch) {
   2364             bMatch = IsMatchWholeWord(m_strText, nResultPos, endIndex);
   2365         }
   2366         nStartPos = endIndex + 1;
   2367         if(!bMatch) {
   2368             iWord = -1;
   2369             if(bSpaceStart) {
   2370                 nStartPos = m_resStart + m_csFindWhatArray[1].GetLength();
   2371             } else {
   2372                 nStartPos = m_resStart + m_csFindWhatArray[0].GetLength();
   2373             }
   2374         }
   2375     }
   2376     m_resEnd = nResultPos + m_csFindWhatArray[m_csFindWhatArray.GetSize() - 1].GetLength() - 1;
   2377     m_IsFind = TRUE;
   2378     int resStart = GetCharIndex(m_resStart);
   2379     int resEnd = GetCharIndex(m_resEnd);
   2380     m_pTextPage->GetRectArray(resStart, resEnd - resStart + 1, m_resArray);
   2381     if(m_flags & FPDFTEXT_CONSECUTIVE) {
   2382         m_findNextStart = m_resStart + 1;
   2383         m_findPreStart = m_resEnd - 1;
   2384     } else {
   2385         m_findNextStart = m_resEnd + 1;
   2386         m_findPreStart = m_resStart - 1;
   2387     }
   2388     return m_IsFind;
   2389 }
   2390 FX_BOOL CPDF_TextPageFind::FindPrev()
   2391 {
   2392     if (!m_pTextPage) {
   2393         return FALSE;
   2394     }
   2395     m_resArray.RemoveAll();
   2396     if(m_strText.IsEmpty() || m_findPreStart < 0) {
   2397         m_IsFind = FALSE;
   2398         return m_IsFind;
   2399     }
   2400     CPDF_TextPageFind findEngine(m_pTextPage);
   2401     FX_BOOL ret = findEngine.FindFirst(m_findWhat, m_flags);
   2402     if(!ret) {
   2403         m_IsFind = FALSE;
   2404         return m_IsFind;
   2405     }
   2406     int	order = -1, MatchedCount = 0;
   2407     while(ret) {
   2408         ret = findEngine.FindNext();
   2409         if(ret) {
   2410             int order1 = findEngine.GetCurOrder() ;
   2411             int	MatchedCount1 = findEngine.GetMatchedCount();
   2412             if(((order1 + MatchedCount1) - 1) > m_findPreStart) {
   2413                 break;
   2414             }
   2415             order = order1;
   2416             MatchedCount = MatchedCount1;
   2417         }
   2418     }
   2419     if(order == -1) {
   2420         m_IsFind = FALSE;
   2421         return m_IsFind;
   2422     }
   2423     m_resStart = m_pTextPage->TextIndexFromCharIndex(order);
   2424     m_resEnd = m_pTextPage->TextIndexFromCharIndex(order + MatchedCount - 1);
   2425     m_IsFind = TRUE;
   2426     m_pTextPage->GetRectArray(order, MatchedCount, m_resArray);
   2427     if(m_flags & FPDFTEXT_CONSECUTIVE) {
   2428         m_findNextStart = m_resStart + 1;
   2429         m_findPreStart = m_resEnd - 1;
   2430     } else {
   2431         m_findNextStart = m_resEnd + 1;
   2432         m_findPreStart = m_resStart - 1;
   2433     }
   2434     return m_IsFind;
   2435 }
   2436 void CPDF_TextPageFind::ExtractFindWhat(CFX_WideString findwhat)
   2437 {
   2438     if(findwhat.IsEmpty()) {
   2439         return ;
   2440     }
   2441     int index = 0;
   2442     while(1) {
   2443         CFX_WideString csWord = TEXT_EMPTY;
   2444         int ret = ExtractSubString(csWord, findwhat, index, TEXT_BLANK_CHAR);
   2445         if(csWord.IsEmpty()) {
   2446             if(ret) {
   2447                 m_csFindWhatArray.Add(CFX_WideString(L""));
   2448                 index++;
   2449                 continue;
   2450             } else {
   2451                 break;
   2452             }
   2453         }
   2454         int pos = 0;
   2455         FX_BOOL bLastIgnore = FALSE;
   2456         while(pos < csWord.GetLength()) {
   2457             CFX_WideString curStr = csWord.Mid(pos, 1);
   2458             FX_WCHAR curChar = csWord.GetAt(pos);
   2459             if (_IsIgnoreSpaceCharacter(curChar)) {
   2460                 if (pos > 0 && curChar == 0x2019) {
   2461                     pos++;
   2462                     continue;
   2463                 }
   2464                 if (pos > 0 ) {
   2465                     CFX_WideString preStr = csWord.Mid(0, pos);
   2466                     m_csFindWhatArray.Add(preStr);
   2467                 }
   2468                 m_csFindWhatArray.Add(curStr);
   2469                 if (pos == csWord.GetLength() - 1) {
   2470                     csWord.Empty();
   2471                     break;
   2472                 }
   2473                 csWord = csWord.Right(csWord.GetLength() - pos - 1);
   2474                 pos = 0;
   2475                 bLastIgnore = TRUE;
   2476                 continue;
   2477             } else {
   2478                 bLastIgnore = FALSE;
   2479             }
   2480             pos++;
   2481         }
   2482         if (!csWord.IsEmpty()) {
   2483             m_csFindWhatArray.Add(csWord);
   2484         }
   2485         index++;
   2486     }
   2487     return;
   2488 }
   2489 FX_BOOL CPDF_TextPageFind::IsMatchWholeWord(CFX_WideString csPageText, int startPos, int endPos)
   2490 {
   2491     int char_left = 0;
   2492     int char_right = 0;
   2493     int char_count = endPos - startPos + 1;
   2494     if(char_count < 1) {
   2495         return FALSE;
   2496     }
   2497     if (char_count == 1 && csPageText.GetAt(startPos) > 255) {
   2498         return TRUE;
   2499     }
   2500     if(startPos - 1 >= 0 ) {
   2501         char_left = csPageText.GetAt(startPos - 1);
   2502     }
   2503     if(startPos + char_count < csPageText.GetLength()) {
   2504         char_right = csPageText.GetAt(startPos + char_count);
   2505     }
   2506     if(char_left == 0x61) {
   2507         int a = 0;
   2508     }
   2509     if ((char_left > 'A' && char_left < 'a') || (char_left > 'a' && char_left < 'z') || (char_left > 0xfb00 && char_left < 0xfb06) || (char_left >= '0' && char_left <= '9') ||
   2510             (char_right > 'A' && char_right < 'a') || (char_right > 'a' && char_right < 'z') || (char_right > 0xfb00 && char_right < 0xfb06) || (char_right >= '0' && char_right <= '9')) {
   2511         return FALSE;
   2512     }
   2513     if(!(('A' > char_left || char_left > 'Z')  && ('a' > char_left || char_left > 'z')
   2514             && ('A' > char_right || char_right > 'Z')  && ('a' > char_right || char_right > 'z'))) {
   2515         return FALSE;
   2516     }
   2517     if (char_count > 0) {
   2518         if (csPageText.GetAt(startPos) >= L'0' && csPageText.GetAt(startPos) <= L'9' && char_left >= L'0' && char_left <= L'9') {
   2519             return FALSE;
   2520         }
   2521         if (csPageText.GetAt(endPos) >= L'0' && csPageText.GetAt(endPos) <= L'9' && char_right >= L'0' && char_right <= L'9') {
   2522             return FALSE;
   2523         }
   2524     }
   2525     return TRUE;
   2526 }
   2527 FX_BOOL CPDF_TextPageFind::ExtractSubString(CFX_WideString& rString, FX_LPCWSTR lpszFullString,
   2528         int iSubString, FX_WCHAR chSep)
   2529 {
   2530     if (lpszFullString == NULL) {
   2531         return FALSE;
   2532     }
   2533     while (iSubString--) {
   2534         lpszFullString = FXSYS_wcschr(lpszFullString, chSep);
   2535         if (lpszFullString == NULL) {
   2536             rString.Empty();
   2537             return FALSE;
   2538         }
   2539         lpszFullString++;
   2540         while(*lpszFullString == chSep) {
   2541             lpszFullString++;
   2542         }
   2543     }
   2544     FX_LPCWSTR lpchEnd = FXSYS_wcschr(lpszFullString, chSep);
   2545     int nLen = (lpchEnd == NULL) ?
   2546                (int)FXSYS_wcslen(lpszFullString) : (int)(lpchEnd - lpszFullString);
   2547     ASSERT(nLen >= 0);
   2548     FXSYS_memcpy32(rString.GetBuffer(nLen), lpszFullString, nLen * sizeof(FX_WCHAR));
   2549     rString.ReleaseBuffer();
   2550     return TRUE;
   2551 }
   2552 CFX_WideString CPDF_TextPageFind::MakeReverse(const CFX_WideString str)
   2553 {
   2554     CFX_WideString str2;
   2555     str2.Empty();
   2556     int nlen = str.GetLength();
   2557     for(int i = nlen - 1; i >= 0; i--) {
   2558         str2 += str.GetAt(i);
   2559     }
   2560     return str2;
   2561 }
   2562 void CPDF_TextPageFind::GetRectArray(CFX_RectArray& rects) const
   2563 {
   2564     rects.Copy(m_resArray);
   2565 }
   2566 int	CPDF_TextPageFind::GetCurOrder() const
   2567 {
   2568     return GetCharIndex(m_resStart);
   2569 }
   2570 int	CPDF_TextPageFind::GetMatchedCount()const
   2571 {
   2572     int resStart = GetCharIndex(m_resStart);
   2573     int resEnd = GetCharIndex(m_resEnd);
   2574     return resEnd - resStart + 1;
   2575 }
   2576 CPDF_LinkExtract::CPDF_LinkExtract()
   2577     : m_pTextPage(NULL),
   2578       m_IsParserd(FALSE)
   2579 {
   2580 }
   2581 CPDF_LinkExtract::~CPDF_LinkExtract()
   2582 {
   2583     DeleteLinkList();
   2584 }
   2585 FX_BOOL CPDF_LinkExtract::ExtractLinks(const IPDF_TextPage* pTextPage)
   2586 {
   2587     if (!pTextPage || !pTextPage->IsParsered()) {
   2588         return FALSE;
   2589     }
   2590     m_pTextPage = (const CPDF_TextPage*)pTextPage;
   2591     m_strPageText = m_pTextPage->GetPageText(0, -1);
   2592     DeleteLinkList();
   2593     if (m_strPageText.IsEmpty()) {
   2594         return FALSE;
   2595     }
   2596     parserLink();
   2597     m_IsParserd = TRUE;
   2598     return TRUE;
   2599 }
   2600 void CPDF_LinkExtract::DeleteLinkList()
   2601 {
   2602     while (m_LinkList.GetSize()) {
   2603         CPDF_LinkExt* linkinfo = NULL;
   2604         linkinfo = m_LinkList.GetAt(0);
   2605         m_LinkList.RemoveAt(0);
   2606         delete linkinfo;
   2607     }
   2608     m_LinkList.RemoveAll();
   2609 }
   2610 int CPDF_LinkExtract::CountLinks() const
   2611 {
   2612     if (!m_IsParserd)	{
   2613         return -1;
   2614     }
   2615     return m_LinkList.GetSize();
   2616 }
   2617 void CPDF_LinkExtract::parserLink()
   2618 {
   2619     int start = 0, pos = 0;
   2620     int TotalChar = m_pTextPage->CountChars();
   2621     while (pos < TotalChar) {
   2622         FPDF_CHAR_INFO pageChar;
   2623         m_pTextPage->GetCharInfo(pos, pageChar);
   2624         if (pageChar.m_Flag == CHAR_GENERATED || pageChar.m_Unicode == 0x20 || pos == TotalChar - 1) {
   2625             int nCount = pos - start;
   2626             if(pos == TotalChar - 1) {
   2627                 nCount++;
   2628             }
   2629             CFX_WideString strBeCheck;
   2630             strBeCheck = m_pTextPage->GetPageText(start, nCount);
   2631             if (strBeCheck.GetLength() > 5) {
   2632                 while(strBeCheck.GetLength() > 0) {
   2633                     FX_WCHAR ch = strBeCheck.GetAt(strBeCheck.GetLength() - 1);
   2634                     if (ch == L')' || ch == L',' || ch == L'>' || ch == L'.') {
   2635                         strBeCheck = strBeCheck.Mid(0, strBeCheck.GetLength() - 1);
   2636                         nCount--;
   2637                     } else {
   2638                         break;
   2639                     }
   2640                 }
   2641                 if (nCount > 5 && (CheckWebLink(strBeCheck) || CheckMailLink(strBeCheck))) {
   2642                     if (!AppendToLinkList(start, nCount, strBeCheck)) {
   2643                         break;
   2644                     }
   2645                 }
   2646             }
   2647             start = ++pos;
   2648         } else {
   2649             pos++;
   2650         }
   2651     }
   2652 }
   2653 FX_BOOL CPDF_LinkExtract::CheckWebLink(CFX_WideString& strBeCheck)
   2654 {
   2655     CFX_WideString str = strBeCheck;
   2656     str.MakeLower();
   2657     if (str.Find(L"http://www.") != -1) {
   2658         strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"http://www."));
   2659         return TRUE;
   2660     } else if (str.Find(L"http://") != -1) {
   2661         strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"http://"));
   2662         return TRUE;
   2663     } else if (str.Find(L"https://www.") != -1) {
   2664         strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"https://www."));
   2665         return TRUE;
   2666     } else if (str.Find(L"https://") != -1) {
   2667         strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"https://"));
   2668         return TRUE;
   2669     } else if (str.Find(L"www.") != -1) {
   2670         strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"www."));
   2671         strBeCheck = L"http://" + strBeCheck;
   2672         return TRUE;
   2673     } else {
   2674         return FALSE;
   2675     }
   2676 }
   2677 FX_BOOL CPDF_LinkExtract::CheckMailLink(CFX_WideString& str)
   2678 {
   2679     str.MakeLower();
   2680     int aPos = str.Find(L'@');
   2681     if (aPos < 1) {
   2682         return FALSE;
   2683     }
   2684     if (str.GetAt(aPos - 1) == L'.' || str.GetAt(aPos - 1) == L'_') {
   2685         return FALSE;
   2686     }
   2687     int i;
   2688     for (i = aPos - 1; i >= 0; i--) {
   2689         FX_WCHAR ch = str.GetAt(i);
   2690         if (ch == L'_' || ch == L'.' || (ch >= L'a' && ch <= L'z') || (ch >= L'0' && ch <= L'9')) {
   2691             continue;
   2692         } else {
   2693             if (i == aPos - 1) {
   2694                 return FALSE;
   2695             }
   2696             str = str.Right(str.GetLength() - i - 1);
   2697             break;
   2698         }
   2699     }
   2700     aPos = str.Find(L'@');
   2701     if (aPos < 1) {
   2702         return FALSE;
   2703     }
   2704     CFX_WideString strtemp = L"";
   2705     for (i = 0; i < aPos; i++) {
   2706         FX_WCHAR wch = str.GetAt(i);
   2707         if (wch >= L'a' && wch <= L'z') {
   2708             break;
   2709         } else {
   2710             strtemp = str.Right(str.GetLength() - i + 1);
   2711         }
   2712     }
   2713     if (strtemp != L"") {
   2714         str = strtemp;
   2715     }
   2716     aPos = str.Find(L'@');
   2717     if (aPos < 1) {
   2718         return FALSE;
   2719     }
   2720     str.TrimRight(L'.');
   2721     strtemp = str;
   2722     int ePos = str.Find(L'.');
   2723     if (ePos == -1) {
   2724         return FALSE;
   2725     }
   2726     while (ePos != -1) {
   2727         strtemp = strtemp.Right(strtemp.GetLength() - ePos - 1);
   2728         ePos = strtemp.Find('.');
   2729     }
   2730     ePos = strtemp.GetLength();
   2731     for (i = 0; i < ePos; i++) {
   2732         FX_WCHAR wch = str.GetAt(i);
   2733         if ((wch >= L'a' && wch <= L'z') || (wch >= L'0' && wch <= L'9')) {
   2734             continue;
   2735         } else {
   2736             str = str.Left(str.GetLength() - ePos + i + 1);
   2737             ePos = ePos - i - 1;
   2738             break;
   2739         }
   2740     }
   2741     int nLen = str.GetLength();
   2742     for (i = aPos + 1; i < nLen - ePos; i++) {
   2743         FX_WCHAR wch = str.GetAt(i);
   2744         if (wch == L'-' || wch == L'.' || (wch >= L'a' && wch <= L'z') || (wch >= L'0' && wch <= L'9')) {
   2745             continue;
   2746         } else {
   2747             return FALSE;
   2748         }
   2749     }
   2750     if (str.Find(L"mailto:") == -1) {
   2751         str = L"mailto:" + str;
   2752     }
   2753     return TRUE;
   2754 }
   2755 FX_BOOL CPDF_LinkExtract::AppendToLinkList(int start, int count, CFX_WideString strUrl)
   2756 {
   2757     CPDF_LinkExt* linkInfo = NULL;
   2758     linkInfo = FX_NEW CPDF_LinkExt;
   2759     if (!linkInfo) {
   2760         return FALSE;
   2761     }
   2762     linkInfo->m_strUrl = strUrl;
   2763     linkInfo->m_Start = start;
   2764     linkInfo->m_Count = count;
   2765     m_LinkList.Add(linkInfo);
   2766     return TRUE;
   2767 }
   2768 CFX_WideString CPDF_LinkExtract::GetURL(int index) const
   2769 {
   2770     if (!m_IsParserd || index < 0 || index >= m_LinkList.GetSize()) {
   2771         return L"";
   2772     }
   2773     CPDF_LinkExt* link = NULL;
   2774     link = m_LinkList.GetAt(index);
   2775     if (!link) {
   2776         return L"";
   2777     }
   2778     return link->m_strUrl;
   2779 }
   2780 void CPDF_LinkExtract::GetBoundedSegment(int index, int& start, int& count) const
   2781 {
   2782     if (!m_IsParserd || index < 0 || index >= m_LinkList.GetSize()) {
   2783         return ;
   2784     }
   2785     CPDF_LinkExt* link = NULL;
   2786     link = m_LinkList.GetAt(index);
   2787     if (!link) {
   2788         return ;
   2789     }
   2790     start = link->m_Start;
   2791     count = link->m_Count;
   2792 }
   2793 void CPDF_LinkExtract::GetRects(int index, CFX_RectArray& rects) const
   2794 {
   2795     if (!m_IsParserd || index < 0 || index >= m_LinkList.GetSize()) {
   2796         return;
   2797     }
   2798     CPDF_LinkExt* link = NULL;
   2799     link = m_LinkList.GetAt(index);
   2800     if (!link) {
   2801         return ;
   2802     }
   2803     m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects);
   2804 }
   2805