Home | History | Annotate | Download | only in fpdftext
      1 // Copyright 2014 PDFium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
      6 
      7 #include "../../include/fpdfapi/fpdf_resource.h"
      8 #include "../../include/fpdfapi/fpdf_pageobj.h"
      9 #include "../../include/fpdftext/fpdf_text.h"
     10 #include "../../include/fpdfapi/fpdf_page.h"
     11 #include "../../include/fpdfapi/fpdf_module.h"
     12 #include <ctype.h>
     13 #include <algorithm>
     14 #include "text_int.h"
     15 
     16 namespace {
     17 
     18 FX_BOOL _IsIgnoreSpaceCharacter(FX_WCHAR curChar)
     19 {
     20     if(curChar < 255 ) {
     21         return FALSE;
     22     }
     23     if ( (curChar >= 0x0600 && curChar <= 0x06FF)
     24             || (curChar >= 0xFE70 && curChar <= 0xFEFF)
     25             || (curChar >= 0xFB50 && curChar <= 0xFDFF)
     26             || (curChar >= 0x0400 && curChar <= 0x04FF)
     27             || (curChar >= 0x0500 && curChar <= 0x052F)
     28             || (curChar >= 0xA640 && curChar <= 0xA69F)
     29             || (curChar >= 0x2DE0 && curChar <= 0x2DFF)
     30             || curChar == 8467
     31             || (curChar >= 0x2000 && curChar <= 0x206F)) {
     32         return FALSE;
     33     }
     34     return TRUE;
     35 }
     36 
     37 FX_FLOAT _NormalizeThreshold(FX_FLOAT threshold)
     38 {
     39     if (threshold < 300) {
     40         return threshold / 2.0f;
     41     } else if (threshold < 500) {
     42         return threshold / 4.0f;
     43     } else if (threshold < 700) {
     44         return threshold / 5.0f;
     45     }
     46     return threshold / 6.0f;
     47 }
     48 
     49 FX_FLOAT _CalculateBaseSpace(const CPDF_TextObject* pTextObj,
     50                              const CFX_AffineMatrix& matrix)
     51 {
     52     FX_FLOAT baseSpace = 0.0;
     53     const int nItems = pTextObj->CountItems();
     54     if (pTextObj->m_TextState.GetObject()->m_CharSpace && nItems >= 3) {
     55         FX_BOOL bAllChar = TRUE;
     56         FX_FLOAT spacing = matrix.TransformDistance(
     57             pTextObj->m_TextState.GetObject()->m_CharSpace);
     58         baseSpace = spacing;
     59         for (int i = 0; i < nItems; i++) {
     60             CPDF_TextObjectItem item;
     61             pTextObj->GetItemInfo(i, &item);
     62             if (item.m_CharCode == (FX_DWORD) - 1) {
     63                 FX_FLOAT fontsize_h = pTextObj->m_TextState.GetFontSizeH();
     64                 FX_FLOAT kerning = -fontsize_h * item.m_OriginX / 1000;
     65                 baseSpace = std::min(baseSpace, kerning + spacing);
     66                 bAllChar = FALSE;
     67             }
     68         }
     69         if (baseSpace < 0.0 || (nItems == 3 && !bAllChar)) {
     70             baseSpace = 0.0;
     71         }
     72     }
     73     return baseSpace;
     74 }
     75 
     76 }  // namespace
     77 
     78 CPDFText_ParseOptions::CPDFText_ParseOptions()
     79     : m_bGetCharCodeOnly(FALSE), m_bNormalizeObjs(TRUE), m_bOutputHyphen(FALSE)
     80 {
     81 }
     82 IPDF_TextPage* IPDF_TextPage::CreateTextPage(const CPDF_Page* pPage, CPDFText_ParseOptions ParserOptions)
     83 {
     84     return new CPDF_TextPage(pPage, ParserOptions);
     85 }
     86 IPDF_TextPage* IPDF_TextPage::CreateTextPage(const CPDF_Page* pPage, int flags)
     87 {
     88     return new CPDF_TextPage(pPage, flags);
     89 }
     90 IPDF_TextPage*	IPDF_TextPage::CreateTextPage(const CPDF_PageObjects* pObjs, int flags)
     91 {
     92     return new CPDF_TextPage(pObjs, flags);
     93 }
     94 IPDF_TextPageFind*	IPDF_TextPageFind::CreatePageFind(const IPDF_TextPage* pTextPage)
     95 {
     96     if (!pTextPage) {
     97         return NULL;
     98     }
     99     return new CPDF_TextPageFind(pTextPage);
    100 }
    101 IPDF_LinkExtract* IPDF_LinkExtract::CreateLinkExtract()
    102 {
    103     return new CPDF_LinkExtract();
    104 }
    105 #define  TEXT_BLANK_CHAR		L' '
    106 #define  TEXT_LINEFEED_CHAR		L'\n'
    107 #define	 TEXT_RETURN_CHAR		L'\r'
    108 #define  TEXT_EMPTY				L""
    109 #define  TEXT_BLANK				L" "
    110 #define  TEXT_RETURN_LINEFEED	L"\r\n"
    111 #define  TEXT_LINEFEED			L"\n"
    112 #define	 TEXT_CHARRATIO_GAPDELTA	0.070
    113 CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, int flags)
    114     : m_charList(512),
    115       m_TempCharList(50),
    116       m_pPreTextObj(NULL),
    117       m_IsParsered(FALSE),
    118       m_TextlineDir(-1),
    119       m_CurlineRect(0, 0, 0, 0)
    120 {
    121     m_pPage = pPage;
    122     m_parserflag = flags;
    123     m_TextBuf.EstimateSize(0, 10240);
    124     pPage->GetDisplayMatrix(m_DisplayMatrix, 0, 0, (int) pPage->GetPageWidth(), (int)pPage->GetPageHeight(), 0);
    125 }
    126 CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, CPDFText_ParseOptions ParserOptions)
    127     : m_ParseOptions(ParserOptions)
    128     , m_charList(512)
    129     , m_TempCharList(50)
    130     , m_pPreTextObj(NULL)
    131     , m_IsParsered(FALSE)
    132     , m_TextlineDir(-1)
    133     , m_CurlineRect(0, 0, 0, 0)
    134 {
    135     m_pPage = pPage;
    136     m_parserflag = 0;
    137     m_TextBuf.EstimateSize(0, 10240);
    138     pPage->GetDisplayMatrix(m_DisplayMatrix, 0, 0, (int) pPage->GetPageWidth(), (int)pPage->GetPageHeight(), 0);
    139 }
    140 CPDF_TextPage::CPDF_TextPage(const CPDF_PageObjects* pPage, int flags)
    141     : m_charList(512),
    142       m_TempCharList(50),
    143       m_pPreTextObj(NULL),
    144       m_IsParsered(FALSE),
    145       m_TextlineDir(-1),
    146       m_CurlineRect(0, 0, 0, 0)
    147 {
    148     m_pPage = pPage;
    149     m_parserflag = flags;
    150     m_TextBuf.EstimateSize(0, 10240);
    151     CFX_FloatRect pageRect = pPage->CalcBoundingBox();
    152     m_DisplayMatrix = CFX_AffineMatrix(1, 0, 0, -1, pageRect.right, pageRect.top);
    153 }
    154 void CPDF_TextPage::NormalizeObjects(FX_BOOL bNormalize)
    155 {
    156     m_ParseOptions.m_bNormalizeObjs = bNormalize;
    157 }
    158 FX_BOOL CPDF_TextPage::IsControlChar(PAGECHAR_INFO* pCharInfo)
    159 {
    160     if(!pCharInfo) {
    161         return FALSE;
    162     }
    163     switch(pCharInfo->m_Unicode) {
    164         case 0x2:
    165         case 0x3:
    166         case 0x93:
    167         case 0x94:
    168         case 0x96:
    169         case 0x97:
    170         case 0x98:
    171         case 0xfffe:
    172             if(pCharInfo->m_Flag == FPDFTEXT_CHAR_HYPHEN) {
    173                 return FALSE;
    174             } else {
    175                 return TRUE;
    176             }
    177         default:
    178             return FALSE;
    179     }
    180 }
    181 FX_BOOL CPDF_TextPage::ParseTextPage()
    182 {
    183     if (!m_pPage) {
    184         m_IsParsered = FALSE;
    185         return FALSE;
    186     }
    187     m_IsParsered = FALSE;
    188     m_TextBuf.Clear();
    189     m_charList.RemoveAll();
    190     m_pPreTextObj = NULL;
    191     ProcessObject();
    192     m_IsParsered = TRUE;
    193     if(!m_ParseOptions.m_bGetCharCodeOnly) {
    194         m_CharIndex.RemoveAll();
    195         int nCount = m_charList.GetSize();
    196         if(nCount) {
    197             m_CharIndex.Add(0);
    198         }
    199         for(int i = 0; i < nCount; i++) {
    200             int indexSize = m_CharIndex.GetSize();
    201             FX_BOOL bNormal = FALSE;
    202             PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(i);
    203             if(charinfo.m_Flag == FPDFTEXT_CHAR_GENERATED) {
    204                 bNormal = TRUE;
    205             }
    206             else if(charinfo.m_Unicode == 0 || IsControlChar(&charinfo))
    207                 bNormal = FALSE;
    208             else {
    209                 bNormal = TRUE;
    210             }
    211             if(bNormal) {
    212                 if(indexSize % 2) {
    213                     m_CharIndex.Add(1);
    214                 } else {
    215                     if(indexSize <= 0) {
    216                         continue;
    217                     }
    218                     m_CharIndex.SetAt(indexSize - 1, m_CharIndex.GetAt(indexSize - 1) + 1);
    219                 }
    220             } else {
    221                 if(indexSize % 2) {
    222                     if(indexSize <= 0) {
    223                         continue;
    224                     }
    225                     m_CharIndex.SetAt(indexSize - 1, i + 1);
    226                 } else {
    227                     m_CharIndex.Add(i + 1);
    228                 }
    229             }
    230         }
    231         int indexSize = m_CharIndex.GetSize();
    232         if(indexSize % 2) {
    233             m_CharIndex.RemoveAt(indexSize - 1);
    234         }
    235     }
    236     return TRUE;
    237 }
    238 int	CPDF_TextPage::CountChars() const
    239 {
    240     if(m_ParseOptions.m_bGetCharCodeOnly) {
    241         return m_TextBuf.GetSize();
    242     }
    243     return m_charList.GetSize();
    244 }
    245 int CPDF_TextPage::CharIndexFromTextIndex(int TextIndex) const
    246 {
    247     int indexSize = m_CharIndex.GetSize();
    248     int count = 0;
    249     for(int i = 0; i < indexSize; i += 2) {
    250         count += m_CharIndex.GetAt(i + 1);
    251         if(count > TextIndex) {
    252             return 	TextIndex - count + m_CharIndex.GetAt(i + 1) + m_CharIndex.GetAt(i);
    253         }
    254     }
    255     return -1;
    256 }
    257 int CPDF_TextPage::TextIndexFromCharIndex(int CharIndex) const
    258 {
    259     int indexSize = m_CharIndex.GetSize();
    260     int count = 0;
    261     for(int i = 0; i < indexSize; i += 2) {
    262         count += m_CharIndex.GetAt(i + 1);
    263         if(m_CharIndex.GetAt(i + 1) + m_CharIndex.GetAt(i) > CharIndex) {
    264             if(CharIndex - m_CharIndex.GetAt(i) < 0) {
    265                 return -1;
    266             }
    267             return 	CharIndex - m_CharIndex.GetAt(i) + count - m_CharIndex.GetAt(i + 1);
    268         }
    269     }
    270     return -1;
    271 }
    272 void CPDF_TextPage::GetRectArray(int start, int nCount, CFX_RectArray& rectArray) const
    273 {
    274     if(m_ParseOptions.m_bGetCharCodeOnly) {
    275         return;
    276     }
    277     if(start < 0 || nCount == 0) {
    278         return;
    279     }
    280     if (!m_IsParsered)	{
    281         return;
    282     }
    283     PAGECHAR_INFO		info_curchar;
    284     CPDF_TextObject*	pCurObj = NULL;
    285     CFX_FloatRect		rect;
    286     int					curPos = start;
    287     FX_BOOL				flagNewRect = TRUE;
    288     if (nCount + start > m_charList.GetSize() || nCount == -1) {
    289         nCount = m_charList.GetSize() - start;
    290     }
    291     while (nCount--) {
    292         info_curchar = *(PAGECHAR_INFO*)m_charList.GetAt(curPos++);
    293         if (info_curchar.m_Flag == FPDFTEXT_CHAR_GENERATED) {
    294             continue;
    295         }
    296         if(info_curchar.m_CharBox.Width() < 0.01 || info_curchar.m_CharBox.Height() < 0.01) {
    297             continue;
    298         }
    299         if(!pCurObj) {
    300             pCurObj = info_curchar.m_pTextObj;
    301         }
    302         if (pCurObj != info_curchar.m_pTextObj) {
    303             rectArray.Add(rect);
    304             pCurObj = info_curchar.m_pTextObj;
    305             flagNewRect = TRUE;
    306         }
    307         if (flagNewRect) {
    308             FX_FLOAT orgX = info_curchar.m_OriginX, orgY = info_curchar.m_OriginY;
    309             CFX_AffineMatrix matrix, matrix_reverse;
    310             info_curchar.m_pTextObj->GetTextMatrix(&matrix);
    311             matrix.Concat(info_curchar.m_Matrix);
    312             matrix_reverse.SetReverse(matrix);
    313             matrix_reverse.Transform(orgX, orgY);
    314             rect.left = info_curchar.m_CharBox.left;
    315             rect.right = info_curchar.m_CharBox.right;
    316             if (pCurObj->GetFont()->GetTypeDescent()) {
    317                 rect.bottom = orgY + pCurObj->GetFont()->GetTypeDescent() * pCurObj->GetFontSize() / 1000;
    318                 FX_FLOAT xPosTemp = orgX;
    319                 matrix.Transform(xPosTemp, rect.bottom);
    320             } else {
    321                 rect.bottom = info_curchar.m_CharBox.bottom;
    322             }
    323             if (pCurObj->GetFont()->GetTypeAscent()) {
    324                 rect.top = orgY + pCurObj->GetFont()->GetTypeAscent() * pCurObj->GetFontSize() / 1000;
    325                 FX_FLOAT xPosTemp = orgX + GetCharWidth(info_curchar.m_CharCode, pCurObj->GetFont()) * pCurObj->GetFontSize() / 1000;
    326                 matrix.Transform(xPosTemp, rect.top);
    327             } else {
    328                 rect.top = info_curchar.m_CharBox.top;
    329             }
    330             flagNewRect = FALSE;
    331             rect = info_curchar.m_CharBox;
    332             rect.Normalize();
    333         } else {
    334             info_curchar.m_CharBox.Normalize();
    335             if (rect.left > info_curchar.m_CharBox.left) {
    336                 rect.left = info_curchar.m_CharBox.left;
    337             }
    338             if (rect.right < info_curchar.m_CharBox.right) {
    339                 rect.right = info_curchar.m_CharBox.right;
    340             }
    341             if ( rect.top < info_curchar.m_CharBox.top) {
    342                 rect.top = info_curchar.m_CharBox.top;
    343             }
    344             if (rect.bottom > info_curchar.m_CharBox.bottom) {
    345                 rect.bottom = info_curchar.m_CharBox.bottom;
    346             }
    347         }
    348     }
    349     rectArray.Add(rect);
    350     return;
    351 }
    352 int CPDF_TextPage::GetIndexAtPos(CPDF_Point point , FX_FLOAT xTorelance, FX_FLOAT yTorelance) const
    353 {
    354     if(m_ParseOptions.m_bGetCharCodeOnly) {
    355         return -3;
    356     }
    357     if (!m_IsParsered)	{
    358         return	-3;
    359     }
    360     int pos = 0;
    361     int NearPos = -1;
    362     double xdif = 5000, ydif = 5000;
    363     while(pos < m_charList.GetSize()) {
    364         PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)(m_charList.GetAt(pos));
    365         CFX_FloatRect charrect = charinfo.m_CharBox;
    366         if (charrect.Contains(point.x, point.y)) {
    367             break;
    368         }
    369         if (xTorelance > 0 || yTorelance > 0) {
    370             CFX_FloatRect charRectExt;
    371             charrect.Normalize();
    372             charRectExt.left = charrect.left - xTorelance / 2;
    373             charRectExt.right = charrect.right + xTorelance / 2;
    374             charRectExt.top = charrect.top + yTorelance / 2;
    375             charRectExt.bottom = charrect.bottom - yTorelance / 2;
    376             if (charRectExt.Contains(point.x, point.y)) {
    377                 double curXdif, curYdif;
    378                 curXdif = FXSYS_fabs(point.x - charrect.left) < FXSYS_fabs(point.x - charrect.right) ? FXSYS_fabs(point.x - charrect.left) : FXSYS_fabs(point.x - charrect.right);
    379                 curYdif = FXSYS_fabs(point.y - charrect.bottom) < FXSYS_fabs(point.y - charrect.top	) ? FXSYS_fabs(point.y - charrect.bottom) : FXSYS_fabs(point.y - charrect.top);
    380                 if (curYdif + curXdif < xdif + ydif) {
    381                     ydif = curYdif;
    382                     xdif = curXdif;
    383                     NearPos = pos;
    384                 }
    385             }
    386         }
    387         ++pos;
    388     }
    389     if (pos >= m_charList.GetSize()) {
    390         pos = NearPos;
    391     }
    392     return pos;
    393 }
    394 CFX_WideString CPDF_TextPage::GetTextByRect(const CFX_FloatRect& rect) const
    395 {
    396     CFX_WideString strText;
    397     if(m_ParseOptions.m_bGetCharCodeOnly || !m_IsParsered) {
    398         return strText;
    399     }
    400     int nCount = m_charList.GetSize();
    401     int pos = 0;
    402     FX_FLOAT posy = 0;
    403     FX_BOOL IsContainPreChar = FALSE;
    404     FX_BOOL	ISAddLineFeed = FALSE;
    405     while (pos < nCount) {
    406         PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(pos++);
    407         if (IsRectIntersect(rect, charinfo.m_CharBox)) {
    408             if (FXSYS_fabs(posy - charinfo.m_OriginY) > 0 && !IsContainPreChar && ISAddLineFeed) {
    409                 posy = charinfo.m_OriginY;
    410                 if (strText.GetLength() > 0) {
    411                     strText += L"\r\n";
    412                 }
    413             }
    414             IsContainPreChar = TRUE;
    415             ISAddLineFeed = FALSE;
    416             if (charinfo.m_Unicode) {
    417                 strText += charinfo.m_Unicode;
    418             }
    419         } else if (charinfo.m_Unicode == 32) {
    420             if (IsContainPreChar && charinfo.m_Unicode) {
    421                 strText += charinfo.m_Unicode;
    422                 IsContainPreChar = FALSE;
    423                 ISAddLineFeed = FALSE;
    424             }
    425         } else {
    426             IsContainPreChar = FALSE;
    427             ISAddLineFeed = TRUE;
    428         }
    429     }
    430     return strText;
    431 }
    432 void CPDF_TextPage::GetRectsArrayByRect(const CFX_FloatRect& rect, CFX_RectArray& resRectArray) const
    433 {
    434     if(m_ParseOptions.m_bGetCharCodeOnly) {
    435         return;
    436     }
    437     if (!m_IsParsered)	{
    438         return;
    439     }
    440     CFX_FloatRect		curRect;
    441     FX_BOOL				flagNewRect = TRUE;
    442     CPDF_TextObject*	pCurObj = NULL;
    443     int nCount = m_charList.GetSize();
    444     int pos = 0;
    445     while (pos < nCount) {
    446         PAGECHAR_INFO info_curchar = *(PAGECHAR_INFO*)m_charList.GetAt(pos++);
    447         if (info_curchar.m_Flag == FPDFTEXT_CHAR_GENERATED) {
    448             continue;
    449         }
    450         if (IsRectIntersect(rect, info_curchar.m_CharBox)) {
    451             if(!pCurObj) {
    452                 pCurObj = info_curchar.m_pTextObj;
    453             }
    454             if (pCurObj != info_curchar.m_pTextObj) {
    455                 resRectArray.Add(curRect);
    456                 pCurObj = info_curchar.m_pTextObj;
    457                 flagNewRect = TRUE;
    458             }
    459             if (flagNewRect) {
    460                 curRect = info_curchar.m_CharBox;
    461                 flagNewRect = FALSE;
    462                 curRect.Normalize();
    463             } else {
    464                 info_curchar.m_CharBox.Normalize();
    465                 if (curRect.left > info_curchar.m_CharBox.left) {
    466                     curRect.left = info_curchar.m_CharBox.left;
    467                 }
    468                 if (curRect.right < info_curchar.m_CharBox.right) {
    469                     curRect.right = info_curchar.m_CharBox.right;
    470                 }
    471                 if ( curRect.top < info_curchar.m_CharBox.top) {
    472                     curRect.top = info_curchar.m_CharBox.top;
    473                 }
    474                 if (curRect.bottom > info_curchar.m_CharBox.bottom) {
    475                     curRect.bottom = info_curchar.m_CharBox.bottom;
    476                 }
    477             }
    478         }
    479     }
    480     resRectArray.Add(curRect);
    481     return;
    482 }
    483 int	CPDF_TextPage::GetIndexAtPos(FX_FLOAT x, FX_FLOAT y, FX_FLOAT xTorelance, FX_FLOAT yTorelance) const
    484 {
    485     if(m_ParseOptions.m_bGetCharCodeOnly) {
    486         return -3;
    487     }
    488     CPDF_Point point(x, y);
    489     return GetIndexAtPos(point, xTorelance, yTorelance);
    490 }
    491 int CPDF_TextPage::GetOrderByDirection(int order, int direction) const
    492 {
    493     if(m_ParseOptions.m_bGetCharCodeOnly) {
    494         return -3;
    495     }
    496     if (!m_IsParsered) {
    497         return -3;
    498     }
    499     if (direction == FPDFTEXT_RIGHT || direction == FPDFTEXT_LEFT) {
    500         order += direction;
    501         while(order >= 0 && order < m_charList.GetSize()) {
    502             PAGECHAR_INFO cinfo = *(PAGECHAR_INFO*)m_charList.GetAt(order);
    503             if (cinfo.m_Flag != FPDFTEXT_CHAR_GENERATED) {
    504                 break;
    505             } else {
    506                 if (cinfo.m_Unicode == TEXT_LINEFEED_CHAR || cinfo.m_Unicode == TEXT_RETURN_CHAR) {
    507                     order += direction;
    508                 } else {
    509                     break;
    510                 }
    511             }
    512         }
    513         if (order >= m_charList.GetSize()) {
    514             order = -2;
    515         }
    516         return order;
    517     }
    518     PAGECHAR_INFO charinfo;
    519     charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(order);
    520     CPDF_Point curPos(charinfo.m_OriginX, charinfo.m_OriginY);
    521     FX_FLOAT difPosY = 0.0, minXdif = 1000;
    522     int	minIndex = -2;
    523     int index = order;
    524     FX_FLOAT height = charinfo.m_CharBox.Height();
    525     if (direction == FPDFTEXT_UP) {
    526         minIndex = -1;
    527         while (1) {
    528             if (--index < 0)	{
    529                 return -1;
    530             }
    531             charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(index);
    532             if (FXSYS_fabs(charinfo.m_OriginY - curPos.y) > FX_MAX(height, charinfo.m_CharBox.Height()) / 2) {
    533                 difPosY = charinfo.m_OriginY;
    534                 minIndex = index;
    535                 break;
    536             }
    537         }
    538         FX_FLOAT PreXdif = charinfo.m_OriginX - curPos.x;
    539         minXdif = PreXdif;
    540         if (PreXdif == 0)	{
    541             return index;
    542         }
    543         FX_FLOAT curXdif = 0;
    544         while (--index >= 0) {
    545             charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(index);
    546             if (difPosY != charinfo.m_OriginY) {
    547                 break;
    548             }
    549             curXdif = charinfo.m_OriginX - curPos.x;
    550             if (curXdif == 0) {
    551                 return index;
    552             }
    553             int signflag = 0;
    554             if (curXdif > 0) {
    555                 signflag = 1;
    556             } else {
    557                 signflag = -1;
    558             }
    559             if (signflag * PreXdif < 0) {
    560                 if (FXSYS_fabs(PreXdif) < FXSYS_fabs(curXdif)) {
    561                     return index + 1;
    562                 } else {
    563                     return index;
    564                 }
    565             }
    566             if (FXSYS_fabs(curXdif) < FXSYS_fabs(minXdif)) {
    567                 minIndex = index;
    568                 minXdif = curXdif;
    569             }
    570             PreXdif = curXdif;
    571             if (difPosY != charinfo.m_OriginY) {
    572                 break;
    573             }
    574         }
    575         return minIndex;
    576     } else if(FPDFTEXT_DOWN) {
    577         minIndex = -2;
    578         while (1) {
    579             if (++index > m_charList.GetSize() - 1)	{
    580                 return minIndex;
    581             }
    582             charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(index);
    583             if (FXSYS_fabs(charinfo.m_OriginY - curPos.y) > FX_MAX(height, charinfo.m_CharBox.Height()) / 2) {
    584                 difPosY = charinfo.m_OriginY;
    585                 minIndex = index;
    586                 break;
    587             }
    588         }
    589         FX_FLOAT PreXdif = charinfo.m_OriginX - curPos.x;
    590         minXdif = PreXdif;
    591         if (PreXdif == 0)	{
    592             return index;
    593         }
    594         FX_FLOAT curXdif = 0;
    595         while (++index < m_charList.GetSize()) {
    596             charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(index);
    597             if (difPosY != charinfo.m_OriginY) {
    598                 break;
    599             }
    600             curXdif = charinfo.m_OriginX - curPos.x;
    601             if (curXdif == 0) {
    602                 return index;
    603             }
    604             int signflag = 0;
    605             if (curXdif > 0) {
    606                 signflag = 1;
    607             } else {
    608                 signflag = -1;
    609             }
    610             if (signflag * PreXdif < 0) {
    611                 if (FXSYS_fabs(PreXdif) < FXSYS_fabs(curXdif)) {
    612                     return index - 1;
    613                 } else {
    614                     return index;
    615                 }
    616             }
    617             if (FXSYS_fabs(curXdif) < FXSYS_fabs(minXdif)) {
    618                 minXdif = curXdif;
    619                 minIndex = index;
    620             }
    621             PreXdif = curXdif;
    622         }
    623         return minIndex;
    624     }
    625 }
    626 void CPDF_TextPage::GetCharInfo(int index, FPDF_CHAR_INFO & info) const
    627 {
    628     if(m_ParseOptions.m_bGetCharCodeOnly) {
    629         return;
    630     }
    631     if (!m_IsParsered)	{
    632         return;
    633     }
    634     if (index < 0 || index >= m_charList.GetSize())	{
    635         return;
    636     }
    637     PAGECHAR_INFO charinfo;
    638     charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(index);
    639     info.m_Charcode = charinfo.m_CharCode;
    640     info.m_OriginX = charinfo.m_OriginX;
    641     info.m_OriginY = charinfo.m_OriginY;
    642     info.m_Unicode = charinfo.m_Unicode;
    643     info.m_Flag = charinfo.m_Flag;
    644     info.m_CharBox = charinfo.m_CharBox;
    645     info.m_pTextObj = charinfo.m_pTextObj;
    646     if (charinfo.m_pTextObj && charinfo.m_pTextObj->GetFont()) {
    647         info.m_FontSize = charinfo.m_pTextObj->GetFontSize();
    648     }
    649     info.m_Matrix.Copy(charinfo.m_Matrix);
    650     return;
    651 }
    652 void CPDF_TextPage::CheckMarkedContentObject(FX_INT32& start, FX_INT32& nCount) const
    653 {
    654     PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(start);
    655     PAGECHAR_INFO charinfo2 = *(PAGECHAR_INFO*)m_charList.GetAt(start + nCount - 1);
    656     if (FPDFTEXT_CHAR_PIECE != charinfo.m_Flag && FPDFTEXT_CHAR_PIECE != charinfo2.m_Flag) {
    657         return;
    658     }
    659     if (FPDFTEXT_CHAR_PIECE == charinfo.m_Flag) {
    660         PAGECHAR_INFO charinfo1 = charinfo;
    661         int startIndex = start;
    662         while(FPDFTEXT_CHAR_PIECE == charinfo1.m_Flag && charinfo1.m_Index == charinfo.m_Index) {
    663             startIndex--;
    664             if (startIndex < 0)	{
    665                 break;
    666             }
    667             charinfo1 = *(PAGECHAR_INFO*)m_charList.GetAt(startIndex);
    668         }
    669         startIndex++;
    670         start = startIndex;
    671     }
    672     if (FPDFTEXT_CHAR_PIECE == charinfo2.m_Flag) {
    673         PAGECHAR_INFO charinfo3 = charinfo2;
    674         int endIndex = start + nCount - 1;
    675         while(FPDFTEXT_CHAR_PIECE == charinfo3.m_Flag && charinfo3.m_Index == charinfo2.m_Index) {
    676             endIndex++;
    677             if (endIndex >= m_charList.GetSize())	{
    678                 break;
    679             }
    680             charinfo3 = *(PAGECHAR_INFO*)m_charList.GetAt(endIndex);
    681         }
    682         endIndex--;
    683         nCount = endIndex - start + 1;
    684     }
    685 }
    686 CFX_WideString CPDF_TextPage::GetPageText(int start , int nCount) const
    687 {
    688     if (!m_IsParsered || nCount == 0) {
    689         return L"";
    690     }
    691     if (start < 0) {
    692         start = 0;
    693     }
    694     if	(nCount == -1) {
    695         nCount = m_charList.GetSize() - start;
    696         return m_TextBuf.GetWideString().Mid(start, m_TextBuf.GetWideString().GetLength());
    697     }
    698     if(nCount <= 0 || m_charList.GetSize() <= 0) {
    699         return L"";
    700     }
    701     if(nCount + start > m_charList.GetSize() - 1) {
    702         nCount = m_charList.GetSize() - start;
    703     }
    704     if (nCount <= 0) {
    705         return L"";
    706     }
    707     CheckMarkedContentObject(start, nCount);
    708     int startindex = 0;
    709     PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(start);
    710     int startOffset = 0;
    711     while(charinfo.m_Index == -1) {
    712         startOffset++;
    713         if (startOffset > nCount || start + startOffset >= m_charList.GetSize())	{
    714             return L"";
    715         }
    716         charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(start + startOffset);
    717     }
    718     startindex = charinfo.m_Index;
    719     charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(start + nCount - 1);
    720     int nCountOffset = 0;
    721     while (charinfo.m_Index == -1) {
    722         nCountOffset++;
    723         if (nCountOffset >= nCount) {
    724             return L"";
    725         }
    726         charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(start + nCount - nCountOffset - 1);
    727     }
    728     nCount = start + nCount - nCountOffset - startindex;
    729     if(nCount <= 0) {
    730         return L"";
    731     }
    732     return m_TextBuf.GetWideString().Mid(startindex, nCount);
    733 }
    734 int CPDF_TextPage::CountRects(int start, int nCount)
    735 {
    736     if(m_ParseOptions.m_bGetCharCodeOnly) {
    737         return -1;
    738     }
    739     if (!m_IsParsered)	{
    740         return -1;
    741     }
    742     if (start < 0) {
    743         return -1;
    744     }
    745     if (nCount == -1 || nCount + start > m_charList.GetSize() ) {
    746         nCount = m_charList.GetSize() - start;
    747     }
    748     m_SelRects.RemoveAll();
    749     GetRectArray(start, nCount, m_SelRects);
    750     return m_SelRects.GetSize();
    751 }
    752 void CPDF_TextPage::GetRect(int rectIndex, FX_FLOAT& left, FX_FLOAT& top, FX_FLOAT& right, FX_FLOAT &bottom) const
    753 {
    754     if(m_ParseOptions.m_bGetCharCodeOnly) {
    755         return ;
    756     }
    757     if (!m_IsParsered || rectIndex < 0 || rectIndex >= m_SelRects.GetSize()) {
    758         return;
    759     }
    760     left = m_SelRects.GetAt(rectIndex).left;
    761     top = m_SelRects.GetAt(rectIndex).top;
    762     right = m_SelRects.GetAt(rectIndex).right;
    763     bottom = m_SelRects.GetAt(rectIndex).bottom;
    764 }
    765 FX_BOOL CPDF_TextPage::GetBaselineRotate(int start, int end, int& Rotate)
    766 {
    767     if(m_ParseOptions.m_bGetCharCodeOnly) {
    768         return FALSE;
    769     }
    770     if(end == start) {
    771         return FALSE;
    772     }
    773     FX_FLOAT dx, dy;
    774     FPDF_CHAR_INFO info1, info2;
    775     GetCharInfo(start, info1);
    776     GetCharInfo(end, info2);
    777     while(info2.m_CharBox.Width() == 0 || info2.m_CharBox.Height() == 0) {
    778         end--;
    779         if(end <= start) {
    780             return FALSE;
    781         }
    782         GetCharInfo(end, info2);
    783     }
    784     dx = (info2.m_OriginX - info1.m_OriginX);
    785     dy = (info2.m_OriginY - info1.m_OriginY);
    786     if(dx == 0) {
    787         if(dy > 0) {
    788             Rotate = 90;
    789         } else if (dy < 0) {
    790             Rotate = 270;
    791         } else {
    792             Rotate = 0;
    793         }
    794     } else {
    795         float a = FXSYS_atan2(dy, dx);
    796         Rotate = (int)(a * 180 / FX_PI + 0.5);
    797     }
    798     if(Rotate < 0) {
    799         Rotate = -Rotate;
    800     } else if(Rotate > 0) {
    801         Rotate = 360 - Rotate;
    802     }
    803     return TRUE;
    804 }
    805 FX_BOOL	CPDF_TextPage::GetBaselineRotate(const CFX_FloatRect& rect , int& Rotate)
    806 {
    807     if(m_ParseOptions.m_bGetCharCodeOnly) {
    808         return FALSE;
    809     }
    810     int start, end, count, n = CountBoundedSegments(rect.left, rect.top, rect.right, rect.bottom, TRUE);
    811     if(n < 1) {
    812         return FALSE;
    813     }
    814     if(n > 1) {
    815         GetBoundedSegment(n - 1, start, count);
    816         end = start + count - 1;
    817         GetBoundedSegment(0, start, count);
    818     } else {
    819         GetBoundedSegment(0, start, count);
    820         end = start + count - 1;
    821     }
    822     return GetBaselineRotate(start, end, Rotate);
    823 }
    824 FX_BOOL	CPDF_TextPage::GetBaselineRotate(int rectIndex, int& Rotate)
    825 {
    826     if(m_ParseOptions.m_bGetCharCodeOnly) {
    827         return FALSE;
    828     }
    829     if (!m_IsParsered || rectIndex < 0 || rectIndex > m_SelRects.GetSize()) {
    830         return FALSE;
    831     }
    832     CFX_FloatRect rect = m_SelRects.GetAt(rectIndex);
    833     return GetBaselineRotate(rect , Rotate);
    834 }
    835 int	CPDF_TextPage::CountBoundedSegments(FX_FLOAT left, FX_FLOAT top, FX_FLOAT right, FX_FLOAT bottom, FX_BOOL bContains )
    836 {
    837     if(m_ParseOptions.m_bGetCharCodeOnly) {
    838         return -1;
    839     }
    840     m_Segment.RemoveAll();
    841     if (!m_IsParsered)	{
    842         return -1;
    843     }
    844     CFX_FloatRect rect(left, bottom, right, top);
    845     rect.Normalize();
    846     int nCount = m_charList.GetSize();
    847     int pos = 0;
    848     FPDF_SEGMENT	segment;
    849     segment.m_Start = 0;
    850     segment.m_nCount = 0;
    851     FX_BOOL		segmentStatus = 0;
    852     FX_BOOL		IsContainPreChar = FALSE;
    853     while (pos < nCount) {
    854         PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(pos);
    855         if(bContains && rect.Contains(charinfo.m_CharBox)) {
    856             if (segmentStatus == 0 || segmentStatus == 2) {
    857                 segment.m_Start = pos;
    858                 segment.m_nCount = 1;
    859                 segmentStatus = 1;
    860             } else if (segmentStatus == 1) {
    861                 segment.m_nCount++;
    862             }
    863             IsContainPreChar = TRUE;
    864         } else if (!bContains && (IsRectIntersect(rect, charinfo.m_CharBox) || rect.Contains(charinfo.m_OriginX, charinfo.m_OriginY))) {
    865             if (segmentStatus == 0 || segmentStatus == 2) {
    866                 segment.m_Start = pos;
    867                 segment.m_nCount = 1;
    868                 segmentStatus = 1;
    869             } else if (segmentStatus == 1) {
    870                 segment.m_nCount++;
    871             }
    872             IsContainPreChar = TRUE;
    873         } else if (charinfo.m_Unicode == 32) {
    874             if (IsContainPreChar == TRUE) {
    875                 if (segmentStatus == 0 || segmentStatus == 2) {
    876                     segment.m_Start = pos;
    877                     segment.m_nCount = 1;
    878                     segmentStatus = 1;
    879                 } else if (segmentStatus == 1) {
    880                     segment.m_nCount++;
    881                 }
    882                 IsContainPreChar = FALSE;
    883             } else {
    884                 if (segmentStatus == 1) {
    885                     segmentStatus = 2;
    886                     m_Segment.Add(segment);
    887                     segment.m_Start = 0;
    888                     segment.m_nCount = 0;
    889                 }
    890             }
    891         } else {
    892             if (segmentStatus == 1) {
    893                 segmentStatus = 2;
    894                 m_Segment.Add(segment);
    895                 segment.m_Start = 0;
    896                 segment.m_nCount = 0;
    897             }
    898             IsContainPreChar = FALSE;
    899         }
    900         pos++;
    901     }
    902     if (segmentStatus == 1) {
    903         segmentStatus = 2;
    904         m_Segment.Add(segment);
    905         segment.m_Start = 0;
    906         segment.m_nCount = 0;
    907     }
    908     return m_Segment.GetSize();
    909 }
    910 void CPDF_TextPage::GetBoundedSegment(int index, int& start, int& count) const
    911 {
    912     if(m_ParseOptions.m_bGetCharCodeOnly) {
    913         return ;
    914     }
    915     if (index < 0 || index >= m_Segment.GetSize()) {
    916         return;
    917     }
    918     start = m_Segment.GetAt(index).m_Start;
    919     count = m_Segment.GetAt(index).m_nCount;
    920 }
    921 int CPDF_TextPage::GetWordBreak(int index, int direction) const
    922 {
    923     if(m_ParseOptions.m_bGetCharCodeOnly) {
    924         return -1;
    925     }
    926     if (!m_IsParsered)	{
    927         return -1;
    928     }
    929     if (direction != FPDFTEXT_LEFT && direction != FPDFTEXT_RIGHT) {
    930         return -1;
    931     }
    932     if (index < 0 || index >= m_charList.GetSize()) {
    933         return -1;
    934     }
    935     PAGECHAR_INFO charinfo;
    936     charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(index);
    937     if (charinfo.m_Index == -1 || charinfo.m_Flag == FPDFTEXT_CHAR_GENERATED)	{
    938         return index;
    939     }
    940     if (!IsLetter(charinfo.m_Unicode)) {
    941         return index;
    942     }
    943     int breakPos = index;
    944     if (direction == FPDFTEXT_LEFT) {
    945         while (--breakPos > 0) {
    946             charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(breakPos);
    947             if (!IsLetter(charinfo.m_Unicode)) {
    948                 return breakPos;
    949             }
    950         }
    951         return breakPos;
    952     } else if (direction == FPDFTEXT_RIGHT) {
    953         while (++breakPos < m_charList.GetSize()) {
    954             charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(breakPos);
    955             if (!IsLetter(charinfo.m_Unicode)) {
    956                 return breakPos;
    957             }
    958         }
    959         return breakPos;
    960     }
    961     return breakPos;
    962 }
    963 FX_INT32 CPDF_TextPage::FindTextlineFlowDirection()
    964 {
    965     if (!m_pPage)	{
    966         return -1;
    967     }
    968     const FX_INT32 nPageWidth = (FX_INT32)((CPDF_Page*)m_pPage)->GetPageWidth();
    969     const FX_INT32 nPageHeight = (FX_INT32)((CPDF_Page*)m_pPage)->GetPageHeight();
    970     CFX_ByteArray nHorizontalMask;
    971     if (!nHorizontalMask.SetSize(nPageWidth)) {
    972         return -1;
    973     }
    974     FX_BYTE* pDataH = nHorizontalMask.GetData();
    975     CFX_ByteArray nVerticalMask;
    976     if (!nVerticalMask.SetSize(nPageHeight)) {
    977         return -1;
    978     }
    979     FX_BYTE* pDataV = nVerticalMask.GetData();
    980     FX_INT32 index = 0;
    981     FX_FLOAT fLineHeight = 0.0f;
    982     CPDF_PageObject* pPageObj = NULL;
    983     FX_POSITION	pos = NULL;
    984     pos = m_pPage->GetFirstObjectPosition();
    985     if(!pos) {
    986         return -1;
    987     }
    988     while(pos) {
    989         pPageObj = m_pPage->GetNextObject(pos);
    990         if(NULL == pPageObj) {
    991             continue;
    992         }
    993         if(PDFPAGE_TEXT != pPageObj->m_Type) {
    994             continue;
    995         }
    996         FX_INT32 minH = (FX_INT32)pPageObj->m_Left < 0 ? 0 : (FX_INT32)pPageObj->m_Left;
    997         FX_INT32 maxH = (FX_INT32)pPageObj->m_Right > nPageWidth ? nPageWidth : (FX_INT32)pPageObj->m_Right;
    998         FX_INT32 minV = (FX_INT32)pPageObj->m_Bottom < 0 ? 0 : (FX_INT32)pPageObj->m_Bottom;
    999         FX_INT32 maxV = (FX_INT32)pPageObj->m_Top > nPageHeight ? nPageHeight : (FX_INT32)pPageObj->m_Top;
   1000         if (minH >= maxH || minV >= maxV) {
   1001             continue;
   1002         }
   1003         FXSYS_memset8(pDataH + minH, 1, maxH - minH);
   1004         FXSYS_memset8(pDataV + minV, 1, maxV - minV);
   1005         if (fLineHeight <= 0.0f) {
   1006             fLineHeight = pPageObj->m_Top - pPageObj->m_Bottom;
   1007         }
   1008         pPageObj = NULL;
   1009     }
   1010     FX_INT32 nStartH = 0;
   1011     FX_INT32 nEndH = 0;
   1012     FX_FLOAT nSumH = 0.0f;
   1013     for (index = 0; index < nPageWidth; index++)
   1014         if(1 == nHorizontalMask[index]) {
   1015             break;
   1016         }
   1017     nStartH = index;
   1018     for (index = nPageWidth; index > 0; index--)
   1019         if(1 == nHorizontalMask[index - 1]) {
   1020             break;
   1021         }
   1022     nEndH = index;
   1023     for (index = nStartH; index < nEndH; index++) {
   1024         nSumH += nHorizontalMask[index];
   1025     }
   1026     nSumH /= nEndH - nStartH;
   1027     FX_INT32 nStartV = 0;
   1028     FX_INT32 nEndV = 0;
   1029     FX_FLOAT nSumV = 0.0f;
   1030     for (index = 0; index < nPageHeight; index++)
   1031         if(1 == nVerticalMask[index]) {
   1032             break;
   1033         }
   1034     nStartV = index;
   1035     for (index = nPageHeight; index > 0; index--)
   1036         if(1 == nVerticalMask[index - 1]) {
   1037             break;
   1038         }
   1039     nEndV = index;
   1040     for (index = nStartV; index < nEndV; index++) {
   1041         nSumV += nVerticalMask[index];
   1042     }
   1043     nSumV /= nEndV - nStartV;
   1044     if ((nEndV - nStartV) < (FX_INT32)(2 * fLineHeight)) {
   1045         return 0;
   1046     }
   1047     if ((nEndH - nStartH) < (FX_INT32)(2 * fLineHeight)) {
   1048         return 1;
   1049     }
   1050     if (nSumH > 0.8f) {
   1051         return 0;
   1052     }
   1053     if (nSumH - nSumV > 0.0f) {
   1054         return 0;
   1055     }
   1056     if (nSumV - nSumH > 0.0f) {
   1057         return 1;
   1058     }
   1059     return -1;
   1060 }
   1061 void CPDF_TextPage::ProcessObject()
   1062 {
   1063     CPDF_PageObject*	pPageObj = NULL;
   1064     if (!m_pPage)	{
   1065         return;
   1066     }
   1067     FX_POSITION	pos;
   1068     pos = m_pPage->GetFirstObjectPosition();
   1069     if (!pos)	{
   1070         return;
   1071     }
   1072     m_TextlineDir = FindTextlineFlowDirection();
   1073     int nCount = 0;
   1074     while (pos) {
   1075         pPageObj = m_pPage->GetNextObject(pos);
   1076         if(pPageObj) {
   1077             if(pPageObj->m_Type == PDFPAGE_TEXT) {
   1078                 CFX_AffineMatrix matrix;
   1079                 ProcessTextObject((CPDF_TextObject*)pPageObj, matrix, pos);
   1080                 nCount++;
   1081             } else if (pPageObj->m_Type == PDFPAGE_FORM) {
   1082                 CFX_AffineMatrix formMatrix(1, 0, 0, 1, 0, 0);
   1083                 ProcessFormObject((CPDF_FormObject*)pPageObj, formMatrix);
   1084             }
   1085         }
   1086         pPageObj = NULL;
   1087     }
   1088     int count = m_LineObj.GetSize();
   1089     for(int i = 0; i < count; i++) {
   1090         ProcessTextObject(m_LineObj.GetAt(i));
   1091     }
   1092     m_LineObj.RemoveAll();
   1093     CloseTempLine();
   1094 }
   1095 void CPDF_TextPage::ProcessFormObject(CPDF_FormObject* pFormObj, const CFX_AffineMatrix& formMatrix)
   1096 {
   1097     CPDF_PageObject*	pPageObj = NULL;
   1098     FX_POSITION	pos;
   1099     if (!pFormObj)	{
   1100         return;
   1101     }
   1102     pos = pFormObj->m_pForm->GetFirstObjectPosition();
   1103     if (!pos)	{
   1104         return;
   1105     }
   1106     CFX_AffineMatrix curFormMatrix;
   1107     curFormMatrix.Copy(pFormObj->m_FormMatrix);
   1108     curFormMatrix.Concat(formMatrix);
   1109     while (pos) {
   1110         pPageObj = pFormObj->m_pForm->GetNextObject(pos);
   1111         if(pPageObj) {
   1112             if(pPageObj->m_Type == PDFPAGE_TEXT) {
   1113                 ProcessTextObject((CPDF_TextObject*)pPageObj, curFormMatrix, pos);
   1114             } else if (pPageObj->m_Type == PDFPAGE_FORM) {
   1115                 ProcessFormObject((CPDF_FormObject*)pPageObj, curFormMatrix);
   1116             }
   1117         }
   1118         pPageObj = NULL;
   1119     }
   1120 }
   1121 int CPDF_TextPage::GetCharWidth(FX_DWORD charCode, CPDF_Font* pFont) const
   1122 {
   1123     if(charCode == -1) {
   1124         return 0;
   1125     }
   1126     int w = pFont->GetCharWidthF(charCode);
   1127     if(w == 0) {
   1128         CFX_ByteString str;
   1129         pFont->AppendChar(str, charCode);
   1130         w = pFont->GetStringWidth(str, 1);
   1131         if(w == 0) {
   1132             FX_RECT BBox;
   1133             pFont->GetCharBBox(charCode, BBox);
   1134             w = BBox.right - BBox.left;
   1135         }
   1136     }
   1137     return w;
   1138 }
   1139 void CPDF_TextPage::OnPiece(IFX_BidiChar* pBidi, CFX_WideString& str)
   1140 {
   1141     FX_INT32 start, count;
   1142     FX_INT32 ret = pBidi->GetBidiInfo(start, count);
   1143     if(ret == 2) {
   1144         for(int i = start + count - 1; i >= start; i--) {
   1145             m_TextBuf.AppendChar(str.GetAt(i));
   1146             m_charList.Add(*(PAGECHAR_INFO*)m_TempCharList.GetAt(i));
   1147         }
   1148     } else {
   1149         int end = start + count ;
   1150         for(int i = start; i < end; i++) {
   1151             m_TextBuf.AppendChar(str.GetAt(i));
   1152             m_charList.Add(*(PAGECHAR_INFO*)m_TempCharList.GetAt(i));
   1153         }
   1154     }
   1155 }
   1156 void CPDF_TextPage::AddCharInfoByLRDirection(CFX_WideString& str, int i)
   1157 {
   1158     PAGECHAR_INFO Info = *(PAGECHAR_INFO*)m_TempCharList.GetAt(i);
   1159     FX_WCHAR wChar = str.GetAt(i);
   1160     if(!IsControlChar(&Info)) {
   1161         Info.m_Index = m_TextBuf.GetLength();
   1162         if (wChar >= 0xFB00 && wChar <= 0xFB06) {
   1163             FX_LPWSTR pDst = NULL;
   1164             FX_STRSIZE nCount = FX_Unicode_GetNormalization(wChar, pDst);
   1165             if (nCount >= 1) {
   1166                 pDst = FX_Alloc(FX_WCHAR, nCount);
   1167                 FX_Unicode_GetNormalization(wChar, pDst);
   1168                 for (int nIndex = 0; nIndex < nCount; nIndex++) {
   1169                     PAGECHAR_INFO Info2 = Info;
   1170                     Info2.m_Unicode = pDst[nIndex];
   1171                     Info2.m_Flag = FPDFTEXT_CHAR_PIECE;
   1172                     m_TextBuf.AppendChar(Info2.m_Unicode);
   1173                     if( !m_ParseOptions.m_bGetCharCodeOnly) {
   1174                         m_charList.Add(Info2);
   1175                     }
   1176                 }
   1177                 FX_Free(pDst);
   1178                 return;
   1179             }
   1180         }
   1181         m_TextBuf.AppendChar(wChar);
   1182     } else {
   1183         Info.m_Index = -1;
   1184     }
   1185     if( !m_ParseOptions.m_bGetCharCodeOnly) {
   1186         m_charList.Add(Info);
   1187     }
   1188 }
   1189 void CPDF_TextPage::AddCharInfoByRLDirection(CFX_WideString& str, int i)
   1190 {
   1191     PAGECHAR_INFO Info = *(PAGECHAR_INFO*)m_TempCharList.GetAt(i);
   1192     if(!IsControlChar(&Info)) {
   1193         Info.m_Index = m_TextBuf.GetLength();
   1194         FX_WCHAR wChar = FX_GetMirrorChar(str.GetAt(i), TRUE, FALSE);
   1195         FX_LPWSTR pDst = NULL;
   1196         FX_STRSIZE nCount = FX_Unicode_GetNormalization(wChar, pDst);
   1197         if (nCount >= 1) {
   1198             pDst = FX_Alloc(FX_WCHAR, nCount);
   1199             FX_Unicode_GetNormalization(wChar, pDst);
   1200             for (int nIndex = 0; nIndex < nCount; nIndex++) {
   1201                 PAGECHAR_INFO Info2 = Info;
   1202                 Info2.m_Unicode = pDst[nIndex];
   1203                 Info2.m_Flag = FPDFTEXT_CHAR_PIECE;
   1204                 m_TextBuf.AppendChar(Info2.m_Unicode);
   1205                 if( !m_ParseOptions.m_bGetCharCodeOnly) {
   1206                     m_charList.Add(Info2);
   1207                 }
   1208             }
   1209             FX_Free(pDst);
   1210             return;
   1211         } else {
   1212             Info.m_Unicode = wChar;
   1213         }
   1214         m_TextBuf.AppendChar(Info.m_Unicode);
   1215     } else {
   1216         Info.m_Index = -1;
   1217     }
   1218     if( !m_ParseOptions.m_bGetCharCodeOnly) {
   1219         m_charList.Add(Info);
   1220     }
   1221 }
   1222 void CPDF_TextPage::CloseTempLine()
   1223 {
   1224     int count1 = m_TempCharList.GetSize();
   1225     if (count1 <= 0) {
   1226         return;
   1227     }
   1228     IFX_BidiChar* BidiChar = IFX_BidiChar::Create();
   1229     CFX_WideString str = m_TempTextBuf.GetWideString();
   1230     CFX_WordArray order;
   1231     FX_BOOL bR2L = FALSE;
   1232     FX_INT32 start = 0, count = 0;
   1233     int nR2L = 0, nL2R = 0;
   1234     FX_BOOL bPrevSpace = FALSE;
   1235     for (int i = 0; i < str.GetLength(); i++) {
   1236         if(str.GetAt(i) == 32) {
   1237             if(bPrevSpace) {
   1238                 m_TempTextBuf.Delete(i, 1);
   1239                 m_TempCharList.Delete(i);
   1240                 str.Delete(i);
   1241                 count1--;
   1242                 i--;
   1243                 continue;
   1244             }
   1245             bPrevSpace = TRUE;
   1246         } else {
   1247             bPrevSpace = FALSE;
   1248         }
   1249         if(BidiChar && BidiChar->AppendChar(str.GetAt(i))) {
   1250             FX_INT32 ret = BidiChar->GetBidiInfo(start, count);
   1251             order.Add(start);
   1252             order.Add(count);
   1253             order.Add(ret);
   1254             if(!bR2L) {
   1255                 if(ret == 2) {
   1256                     nR2L++;
   1257                 } else if (ret == 1) {
   1258                     nL2R++;
   1259                 }
   1260             }
   1261         }
   1262     }
   1263     if(BidiChar && BidiChar->EndChar()) {
   1264         FX_INT32 ret = BidiChar->GetBidiInfo(start, count);
   1265         order.Add(start);
   1266         order.Add(count);
   1267         order.Add(ret);
   1268         if(!bR2L) {
   1269             if(ret == 2) {
   1270                 nR2L++;
   1271             } else if(ret == 1) {
   1272                 nL2R++;
   1273             }
   1274         }
   1275     }
   1276     if(nR2L > 0 && nR2L >= nL2R) {
   1277         bR2L = TRUE;
   1278     }
   1279     if(this->m_parserflag == FPDFTEXT_RLTB || bR2L) {
   1280         int count = order.GetSize();
   1281         for(int i = count - 1; i > 0; i -= 3) {
   1282             int ret = order.GetAt(i);
   1283             int start = order.GetAt(i - 2);
   1284             int count1 = order.GetAt(i - 1);
   1285             if(ret == 2 || ret == 0) {
   1286                 for(int j = start + count1 - 1; j >= start; j--) {
   1287                     AddCharInfoByRLDirection(str, j);
   1288                 }
   1289             } else {
   1290                 int j = i;
   1291                 FX_BOOL bSymbol = FALSE;
   1292                 while(j > 0 && order.GetAt(j) != 2) {
   1293                     bSymbol = !order.GetAt(j);
   1294                     j -= 3;
   1295                 }
   1296                 int end = start + count1 ;
   1297                 int n = 0;
   1298                 if(bSymbol) {
   1299                     n = j + 6;
   1300                 } else {
   1301                     n = j + 3;
   1302                 }
   1303                 if(n >= i) {
   1304                     for(int m = start; m < end; m++) {
   1305                         AddCharInfoByLRDirection(str, m);
   1306                     }
   1307                 } else {
   1308                     j = i;
   1309                     i = n;
   1310                     for(; n <= j; n += 3) {
   1311                         int start = order.GetAt(n - 2);
   1312                         int count1 = order.GetAt(n - 1);
   1313                         int end = start + count1 ;
   1314                         for(int m = start; m < end; m++) {
   1315                             AddCharInfoByLRDirection(str, m);
   1316                         }
   1317                     }
   1318                 }
   1319             }
   1320         }
   1321     } else {
   1322         int count = order.GetSize();
   1323         FX_BOOL bL2R = FALSE;
   1324         for(int i = 0; i < count; i += 3) {
   1325             int ret = order.GetAt(i + 2);
   1326             int start = order.GetAt(i);
   1327             int count1 = order.GetAt(i + 1);
   1328             if(ret == 2 || (i == 0 && ret == 0 && !bL2R)) {
   1329                 int j = i + 3;
   1330                 while(bR2L && j < count) {
   1331                     if(order.GetAt(j + 2) == 1) {
   1332                         break;
   1333                     } else {
   1334                         j += 3;
   1335                     }
   1336                 }
   1337                 if(j == 3) {
   1338                     i = -3;
   1339                     bL2R = TRUE;
   1340                     continue;
   1341                 }
   1342                 int end = m_TempCharList.GetSize() - 1;
   1343                 if(j < count) {
   1344                     end = order.GetAt(j) - 1;
   1345                 }
   1346                 i = j - 3;
   1347                 for(int n = end; n >= start; n--) {
   1348                     AddCharInfoByRLDirection(str, n);
   1349                 }
   1350             } else {
   1351                 int end = start + count1 ;
   1352                 for(int n = start; n < end; n++) {
   1353                     AddCharInfoByLRDirection(str, n);
   1354                 }
   1355             }
   1356         }
   1357     }
   1358     order.RemoveAll();
   1359     m_TempCharList.RemoveAll();
   1360     m_TempTextBuf.Delete(0, m_TempTextBuf.GetLength());
   1361     BidiChar->Release();
   1362 }
   1363 void CPDF_TextPage::ProcessTextObject(CPDF_TextObject*	pTextObj, const CFX_AffineMatrix& formMatrix, FX_POSITION ObjPos)
   1364 {
   1365     CFX_FloatRect re(pTextObj->m_Left, pTextObj->m_Bottom, pTextObj->m_Right, pTextObj->m_Top);
   1366     if(FXSYS_fabs(pTextObj->m_Right - pTextObj->m_Left) < 0.01f ) {
   1367         return;
   1368     }
   1369     int count = m_LineObj.GetSize();
   1370     PDFTEXT_Obj Obj;
   1371     Obj.m_pTextObj = pTextObj;
   1372     Obj.m_formMatrix = formMatrix;
   1373     if(count == 0) {
   1374         m_LineObj.Add(Obj);
   1375         return;
   1376     }
   1377     if (IsSameAsPreTextObject(pTextObj, ObjPos)) {
   1378         return;
   1379     }
   1380     PDFTEXT_Obj prev_Obj = m_LineObj.GetAt(count - 1);
   1381     CPDF_TextObjectItem item;
   1382     int nItem = prev_Obj.m_pTextObj->CountItems();
   1383     prev_Obj.m_pTextObj->GetItemInfo(nItem - 1, &item);
   1384     FX_FLOAT prev_width = GetCharWidth(item.m_CharCode, prev_Obj.m_pTextObj->GetFont()) * prev_Obj.m_pTextObj->GetFontSize() / 1000;
   1385     CFX_AffineMatrix prev_matrix;
   1386     prev_Obj.m_pTextObj->GetTextMatrix(&prev_matrix);
   1387     prev_width = FXSYS_fabs(prev_width);
   1388     prev_matrix.Concat(prev_Obj.m_formMatrix);
   1389     prev_width = prev_matrix.TransformDistance(prev_width);
   1390     pTextObj->GetItemInfo(0, &item);
   1391     FX_FLOAT this_width = GetCharWidth(item.m_CharCode, pTextObj->GetFont()) * pTextObj->GetFontSize() / 1000;
   1392     this_width = FXSYS_fabs(this_width);
   1393     CFX_AffineMatrix this_matrix;
   1394     pTextObj->GetTextMatrix(&this_matrix);
   1395     this_width = FXSYS_fabs(this_width);
   1396     this_matrix.Concat(formMatrix);
   1397     this_width = this_matrix.TransformDistance(this_width);
   1398     FX_FLOAT threshold = prev_width > this_width ? prev_width / 4 : this_width / 4;
   1399     FX_FLOAT prev_x = prev_Obj.m_pTextObj->GetPosX(), prev_y = prev_Obj.m_pTextObj->GetPosY();
   1400     prev_Obj.m_formMatrix.Transform(prev_x, prev_y);
   1401     m_DisplayMatrix.Transform(prev_x, prev_y);
   1402     FX_FLOAT this_x = pTextObj->GetPosX(), this_y = pTextObj->GetPosY();
   1403     formMatrix.Transform(this_x, this_y);
   1404     m_DisplayMatrix.Transform(this_x, this_y);
   1405     if (FXSYS_fabs(this_y - prev_y) > threshold * 2) {
   1406         for(int i = 0; i < count; i++) {
   1407             ProcessTextObject(m_LineObj.GetAt(i));
   1408         }
   1409         m_LineObj.RemoveAll();
   1410         m_LineObj.Add(Obj);
   1411         return;
   1412     }
   1413     int i = 0;
   1414     if(m_ParseOptions.m_bNormalizeObjs) {
   1415         for(i = count - 1; i >= 0; i--) {
   1416             PDFTEXT_Obj prev_Obj = m_LineObj.GetAt(i);
   1417             CFX_AffineMatrix prev_matrix;
   1418             prev_Obj.m_pTextObj->GetTextMatrix(&prev_matrix);
   1419             FX_FLOAT Prev_x = prev_Obj.m_pTextObj->GetPosX(), Prev_y = prev_Obj.m_pTextObj->GetPosY();
   1420             prev_Obj.m_formMatrix.Transform(Prev_x, Prev_y);
   1421             m_DisplayMatrix.Transform(Prev_x, Prev_y);
   1422             if(this_x >= Prev_x) {
   1423                 if(i == count - 1) {
   1424                     m_LineObj.Add(Obj);
   1425                 } else {
   1426                     m_LineObj.InsertAt(i + 1, Obj);
   1427                 }
   1428                 break;
   1429             }
   1430         }
   1431         if(i < 0) {
   1432             m_LineObj.InsertAt(0, Obj);
   1433         }
   1434     } else {
   1435         m_LineObj.Add(Obj);
   1436     }
   1437 }
   1438 FX_INT32 CPDF_TextPage::PreMarkedContent(PDFTEXT_Obj Obj)
   1439 {
   1440     CPDF_TextObject* pTextObj = Obj.m_pTextObj;
   1441     CPDF_ContentMarkData* pMarkData = (CPDF_ContentMarkData*)pTextObj->m_ContentMark.GetObject();
   1442     if(!pMarkData) {
   1443         return FPDFTEXT_MC_PASS;
   1444     }
   1445     int nContentMark = pMarkData->CountItems();
   1446     if (nContentMark < 1) {
   1447         return FPDFTEXT_MC_PASS;
   1448     }
   1449     CFX_WideString actText;
   1450     FX_BOOL bExist = FALSE;
   1451     CPDF_Dictionary* pDict = NULL;
   1452     int n = 0;
   1453     for (n = 0; n < nContentMark; n++) {
   1454         CPDF_ContentMarkItem& item = pMarkData->GetItem(n);
   1455         CFX_ByteString tagStr = (CFX_ByteString)item.GetName();
   1456         pDict = (CPDF_Dictionary*)item.GetParam();
   1457         CPDF_String* temp = (CPDF_String*)(pDict ? pDict->GetElement(FX_BSTRC("ActualText")) : NULL);
   1458         if (temp) {
   1459             bExist = TRUE;
   1460             actText = temp->GetUnicodeText();
   1461         }
   1462     }
   1463     if (!bExist) {
   1464         return FPDFTEXT_MC_PASS;
   1465     }
   1466     if (m_pPreTextObj) {
   1467         if (CPDF_ContentMarkData* pPreMarkData = (CPDF_ContentMarkData*)m_pPreTextObj->m_ContentMark.GetObject()) {
   1468             if (pPreMarkData->CountItems() == n) {
   1469                 CPDF_ContentMarkItem& item = pPreMarkData->GetItem(n - 1);
   1470                 if (pDict == item.GetParam()) {
   1471                     return FPDFTEXT_MC_DONE;
   1472                 }
   1473             }
   1474         }
   1475     }
   1476     CPDF_Font*	pFont = pTextObj->GetFont();
   1477     FX_STRSIZE nItems = actText.GetLength();
   1478     if (nItems < 1) {
   1479         return FPDFTEXT_MC_PASS;
   1480     }
   1481     bExist = FALSE;
   1482     for (FX_STRSIZE i = 0; i < nItems; i++) {
   1483         FX_WCHAR wChar = actText.GetAt(i);
   1484         if (-1 == pFont->CharCodeFromUnicode(wChar)) {
   1485             continue;
   1486         } else {
   1487             bExist = TRUE;
   1488             break;
   1489         }
   1490     }
   1491     if (!bExist) {
   1492         return FPDFTEXT_MC_PASS;
   1493     }
   1494     bExist = FALSE;
   1495     for (FX_STRSIZE i = 0; i < nItems; i++) {
   1496         FX_WCHAR wChar = actText.GetAt(i);
   1497         if ((wChar > 0x80 && wChar < 0xFFFD) || (wChar <= 0x80 && isprint(wChar))) {
   1498             bExist = TRUE;
   1499             break;
   1500         }
   1501     }
   1502     if (!bExist) {
   1503         return FPDFTEXT_MC_DONE;
   1504     }
   1505     return FPDFTEXT_MC_DELAY;
   1506 }
   1507 void CPDF_TextPage::ProcessMarkedContent(PDFTEXT_Obj Obj)
   1508 {
   1509     CPDF_TextObject* pTextObj = Obj.m_pTextObj;
   1510     CPDF_ContentMarkData* pMarkData = (CPDF_ContentMarkData*)pTextObj->m_ContentMark.GetObject();
   1511     if(!pMarkData) {
   1512         return;
   1513     }
   1514     int nContentMark = pMarkData->CountItems();
   1515     if (nContentMark < 1) {
   1516         return;
   1517     }
   1518     CFX_WideString actText;
   1519     CPDF_Dictionary* pDict = NULL;
   1520     int n = 0;
   1521     for (n = 0; n < nContentMark; n++) {
   1522         CPDF_ContentMarkItem& item = pMarkData->GetItem(n);
   1523         CFX_ByteString tagStr = (CFX_ByteString)item.GetName();
   1524         pDict = (CPDF_Dictionary*)item.GetParam();
   1525         CPDF_String* temp = (CPDF_String*)(pDict ? pDict->GetElement(FX_BSTRC("ActualText")) : NULL);
   1526         if (temp) {
   1527             actText = temp->GetUnicodeText();
   1528         }
   1529     }
   1530     FX_STRSIZE nItems = actText.GetLength();
   1531     if (nItems < 1) {
   1532         return;
   1533     }
   1534     CPDF_Font*	pFont = pTextObj->GetFont();
   1535     CFX_AffineMatrix formMatrix = Obj.m_formMatrix;
   1536     CFX_AffineMatrix matrix;
   1537     pTextObj->GetTextMatrix(&matrix);
   1538     matrix.Concat(formMatrix);
   1539     FX_FLOAT fPosX = pTextObj->GetPosX();
   1540     FX_FLOAT fPosY = pTextObj->GetPosY();
   1541     int nCharInfoIndex = m_TextBuf.GetLength();
   1542     CFX_FloatRect charBox;
   1543     charBox.top = pTextObj->m_Top;
   1544     charBox.left = pTextObj->m_Left;
   1545     charBox.right = pTextObj->m_Right;
   1546     charBox.bottom = pTextObj->m_Bottom;
   1547     for (FX_STRSIZE k = 0; k < nItems; k++) {
   1548         FX_WCHAR wChar = actText.GetAt(k);
   1549         if (wChar <= 0x80 && !isprint(wChar)) {
   1550             wChar = 0x20;
   1551         }
   1552         if (wChar >= 0xFFFD) {
   1553             continue;
   1554         }
   1555         PAGECHAR_INFO charinfo;
   1556         charinfo.m_OriginX = fPosX;
   1557         charinfo.m_OriginY = fPosY;
   1558         charinfo.m_Index = nCharInfoIndex;
   1559         charinfo.m_Unicode = wChar;
   1560         charinfo.m_CharCode = pFont->CharCodeFromUnicode(wChar);
   1561         charinfo.m_Flag = FPDFTEXT_CHAR_PIECE;
   1562         charinfo.m_pTextObj = pTextObj;
   1563         charinfo.m_CharBox.top = charBox.top;
   1564         charinfo.m_CharBox.left = charBox.left;
   1565         charinfo.m_CharBox.right = charBox.right;
   1566         charinfo.m_CharBox.bottom = charBox.bottom;
   1567         charinfo.m_Matrix.Copy(matrix);
   1568         m_TempTextBuf.AppendChar(wChar);
   1569         m_TempCharList.Add(charinfo);
   1570     }
   1571 }
   1572 void CPDF_TextPage::FindPreviousTextObject(void)
   1573 {
   1574     if (m_TempCharList.GetSize() < 1 && m_charList.GetSize() < 1) {
   1575         return;
   1576     }
   1577     PAGECHAR_INFO preChar;
   1578     if (m_TempCharList.GetSize() >= 1) {
   1579         preChar = *(PAGECHAR_INFO*)m_TempCharList.GetAt(m_TempCharList.GetSize() - 1);
   1580     } else {
   1581         preChar = *(PAGECHAR_INFO*)m_charList.GetAt(m_charList.GetSize() - 1);
   1582     }
   1583     if (preChar.m_pTextObj) {
   1584         m_pPreTextObj = preChar.m_pTextObj;
   1585     }
   1586 }
   1587 void CPDF_TextPage::ProcessTextObject(PDFTEXT_Obj Obj)
   1588 {
   1589     CPDF_TextObject* pTextObj = Obj.m_pTextObj;
   1590     if(FXSYS_fabs(pTextObj->m_Right - pTextObj->m_Left) < 0.01f ) {
   1591         return;
   1592     }
   1593     CFX_AffineMatrix formMatrix = Obj.m_formMatrix;
   1594     CPDF_Font*	pFont = pTextObj->GetFont();
   1595     CFX_AffineMatrix matrix;
   1596     pTextObj->GetTextMatrix(&matrix);
   1597     matrix.Concat(formMatrix);
   1598     FX_INT32 bPreMKC = PreMarkedContent(Obj);
   1599     if (FPDFTEXT_MC_DONE == bPreMKC) {
   1600         m_pPreTextObj = pTextObj;
   1601         m_perMatrix.Copy(formMatrix);
   1602         return;
   1603     }
   1604     int result = 0;
   1605     if (m_pPreTextObj) {
   1606         result = ProcessInsertObject(pTextObj, formMatrix);
   1607         if (2 == result) {
   1608             m_CurlineRect = CFX_FloatRect(Obj.m_pTextObj->m_Left, Obj.m_pTextObj->m_Bottom, Obj.m_pTextObj->m_Right, Obj.m_pTextObj->m_Top);
   1609         } else {
   1610             m_CurlineRect.Union(CFX_FloatRect(Obj.m_pTextObj->m_Left, Obj.m_pTextObj->m_Bottom, Obj.m_pTextObj->m_Right, Obj.m_pTextObj->m_Top));
   1611         }
   1612         PAGECHAR_INFO generateChar;
   1613         if (result == 1) {
   1614             if (GenerateCharInfo(TEXT_BLANK_CHAR, generateChar)) {
   1615                 if (!formMatrix.IsIdentity()) {
   1616                     generateChar.m_Matrix.Copy(formMatrix);
   1617                 }
   1618                 m_TempTextBuf.AppendChar(TEXT_BLANK_CHAR);
   1619                 m_TempCharList.Add(generateChar);
   1620             }
   1621         } else if(result == 2) {
   1622             CloseTempLine();
   1623             if(m_TextBuf.GetSize()) {
   1624                 if(m_ParseOptions.m_bGetCharCodeOnly) {
   1625                     m_TextBuf.AppendChar(TEXT_RETURN_CHAR);
   1626                     m_TextBuf.AppendChar(TEXT_LINEFEED_CHAR);
   1627                 } else {
   1628                     if(GenerateCharInfo(TEXT_RETURN_CHAR, generateChar)) {
   1629                         m_TextBuf.AppendChar(TEXT_RETURN_CHAR);
   1630                         if (!formMatrix.IsIdentity()) {
   1631                             generateChar.m_Matrix.Copy(formMatrix);
   1632                         }
   1633                         m_charList.Add(generateChar);
   1634                     }
   1635                     if(GenerateCharInfo(TEXT_LINEFEED_CHAR, generateChar)) {
   1636                         m_TextBuf.AppendChar(TEXT_LINEFEED_CHAR);
   1637                         if (!formMatrix.IsIdentity()) {
   1638                             generateChar.m_Matrix.Copy(formMatrix);
   1639                         }
   1640                         m_charList.Add(generateChar);
   1641                     }
   1642                 }
   1643             }
   1644         } else if (result == 3 && !m_ParseOptions.m_bOutputHyphen) {
   1645             FX_INT32 nChars = pTextObj->CountChars();
   1646             if (nChars == 1) {
   1647                 CPDF_TextObjectItem item;
   1648                 pTextObj->GetCharInfo(0, &item);
   1649                 CFX_WideString wstrItem = pTextObj->GetFont()->UnicodeFromCharCode(item.m_CharCode);
   1650                 if(wstrItem.IsEmpty()) {
   1651                     wstrItem += (FX_WCHAR)item.m_CharCode;
   1652                 }
   1653                 FX_WCHAR curChar = wstrItem.GetAt(0);
   1654                 if (0x2D == curChar || 0xAD == curChar) {
   1655                     return;
   1656                 }
   1657             }
   1658             while (m_TempTextBuf.GetSize() > 0 && m_TempTextBuf.GetWideString().GetAt(m_TempTextBuf.GetLength() - 1) == 0x20) {
   1659                 m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1);
   1660                 m_TempCharList.Delete(m_TempCharList.GetSize() - 1);
   1661             }
   1662             PAGECHAR_INFO* cha = (PAGECHAR_INFO*)m_TempCharList.GetAt(m_TempCharList.GetSize() - 1);
   1663             m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1);
   1664             cha->m_Unicode = 0x2;
   1665             cha->m_Flag = FPDFTEXT_CHAR_HYPHEN;
   1666             m_TempTextBuf.AppendChar(0xfffe);
   1667         }
   1668     } else {
   1669         m_CurlineRect = CFX_FloatRect(Obj.m_pTextObj->m_Left, Obj.m_pTextObj->m_Bottom, Obj.m_pTextObj->m_Right, Obj.m_pTextObj->m_Top);
   1670     }
   1671     if (FPDFTEXT_MC_DELAY == bPreMKC) {
   1672         ProcessMarkedContent(Obj);
   1673         m_pPreTextObj = pTextObj;
   1674         m_perMatrix.Copy(formMatrix);
   1675         return;
   1676     }
   1677     m_pPreTextObj = pTextObj;
   1678     m_perMatrix.Copy(formMatrix);
   1679     int nItems = pTextObj->CountItems();
   1680     FX_FLOAT baseSpace = _CalculateBaseSpace(pTextObj, matrix);
   1681 
   1682     const FX_BOOL bR2L = IsRightToLeft(pTextObj, pFont, nItems);
   1683     const FX_BOOL bIsBidiAndMirrorInverse =
   1684         bR2L && (matrix.a * matrix.d - matrix.b * matrix.c) < 0;
   1685     FX_INT32 iBufStartAppend = m_TempTextBuf.GetLength();
   1686     FX_INT32 iCharListStartAppend = m_TempCharList.GetSize();
   1687 
   1688     FX_FLOAT spacing = 0;
   1689     for (int i = 0; i < nItems; i++) {
   1690         CPDF_TextObjectItem item;
   1691         PAGECHAR_INFO charinfo;
   1692         charinfo.m_OriginX = 0;
   1693         charinfo.m_OriginY = 0;
   1694         pTextObj->GetItemInfo(i, &item);
   1695         if (item.m_CharCode == (FX_DWORD) - 1) {
   1696             CFX_WideString str = m_TempTextBuf.GetWideString();
   1697             if(str.IsEmpty()) {
   1698                 str = m_TextBuf.GetWideString();
   1699             }
   1700             if (str.IsEmpty() || str.GetAt(str.GetLength() - 1) == TEXT_BLANK_CHAR) {
   1701                 continue;
   1702             }
   1703             FX_FLOAT fontsize_h = pTextObj->m_TextState.GetFontSizeH();
   1704             spacing = -fontsize_h * item.m_OriginX / 1000;
   1705             continue;
   1706         }
   1707         FX_FLOAT charSpace = pTextObj->m_TextState.GetObject()->m_CharSpace;
   1708         if (charSpace > 0.001) {
   1709             spacing += matrix.TransformDistance(charSpace);
   1710         } else if(charSpace < -0.001) {
   1711             spacing -= matrix.TransformDistance(FXSYS_fabs(charSpace));
   1712         }
   1713         spacing -= baseSpace;
   1714         if (spacing && i > 0) {
   1715             int last_width = 0;
   1716             FX_FLOAT fontsize_h = pTextObj->m_TextState.GetFontSizeH();
   1717             FX_DWORD space_charcode = pFont->CharCodeFromUnicode(' ');
   1718             FX_FLOAT threshold = 0;
   1719             if (space_charcode != -1) {
   1720                 threshold = fontsize_h * pFont->GetCharWidthF(space_charcode) / 1000 ;
   1721             }
   1722             if (threshold > fontsize_h / 3) {
   1723                 threshold = 0;
   1724             } else {
   1725                 threshold /= 2;
   1726             }
   1727             if (threshold == 0) {
   1728                 threshold = fontsize_h;
   1729                 int this_width = FXSYS_abs(GetCharWidth(item.m_CharCode, pFont));
   1730                 threshold = this_width > last_width ? (FX_FLOAT)this_width : (FX_FLOAT)last_width;
   1731                 threshold = _NormalizeThreshold(threshold);
   1732                 threshold = fontsize_h * threshold / 1000;
   1733             }
   1734             if (threshold && (spacing && spacing >= threshold) ) {
   1735                 charinfo.m_Unicode = TEXT_BLANK_CHAR;
   1736                 charinfo.m_Flag = FPDFTEXT_CHAR_GENERATED;
   1737                 charinfo.m_pTextObj = pTextObj;
   1738                 charinfo.m_Index = m_TextBuf.GetLength();
   1739                 m_TempTextBuf.AppendChar(TEXT_BLANK_CHAR);
   1740                 charinfo.m_CharCode = -1;
   1741                 charinfo.m_Matrix.Copy(formMatrix);
   1742                 matrix.Transform(item.m_OriginX, item.m_OriginY, charinfo.m_OriginX, charinfo.m_OriginY);
   1743                 charinfo.m_CharBox = CFX_FloatRect(charinfo.m_OriginX, charinfo.m_OriginY, charinfo.m_OriginX, charinfo.m_OriginY);
   1744                 m_TempCharList.Add(charinfo);
   1745             }
   1746             if (item.m_CharCode == (FX_DWORD) - 1) {
   1747                 continue;
   1748             }
   1749         }
   1750         spacing = 0;
   1751         CFX_WideString wstrItem = pFont->UnicodeFromCharCode(item.m_CharCode);
   1752         FX_BOOL bNoUnicode = FALSE;
   1753         FX_WCHAR wChar = wstrItem.GetAt(0);
   1754         if ((wstrItem.IsEmpty() || wChar == 0) && item.m_CharCode) {
   1755             if(wstrItem.IsEmpty()) {
   1756                 wstrItem += (FX_WCHAR)item.m_CharCode;
   1757             } else {
   1758                 wstrItem.SetAt(0, (FX_WCHAR)item.m_CharCode);
   1759             }
   1760             bNoUnicode = TRUE;
   1761         }
   1762         charinfo.m_Index = -1;
   1763         charinfo.m_CharCode = item.m_CharCode;
   1764         if(bNoUnicode) {
   1765             charinfo.m_Flag = FPDFTEXT_CHAR_UNUNICODE;
   1766         } else {
   1767             charinfo.m_Flag = FPDFTEXT_CHAR_NORMAL;
   1768         }
   1769         charinfo.m_pTextObj = pTextObj;
   1770         charinfo.m_OriginX = 0, charinfo.m_OriginY = 0;
   1771         matrix.Transform(item.m_OriginX, item.m_OriginY, charinfo.m_OriginX, charinfo.m_OriginY);
   1772         FX_RECT rect(0, 0, 0, 0);
   1773         rect.Intersect(0, 0, 0, 0);
   1774         charinfo.m_pTextObj->GetFont()->GetCharBBox(charinfo.m_CharCode, rect);
   1775         charinfo.m_CharBox.top = rect.top * pTextObj->GetFontSize() / 1000 + item.m_OriginY;
   1776         charinfo.m_CharBox.left = rect.left * pTextObj->GetFontSize() / 1000 + item.m_OriginX;
   1777         charinfo.m_CharBox.right = rect.right * pTextObj->GetFontSize() / 1000 + item.m_OriginX;
   1778         charinfo.m_CharBox.bottom = rect.bottom * pTextObj->GetFontSize() / 1000 + item.m_OriginY;
   1779         if (fabsf(charinfo.m_CharBox.top - charinfo.m_CharBox.bottom) < 0.01f) {
   1780             charinfo.m_CharBox.top = charinfo.m_CharBox.bottom + pTextObj->GetFontSize();
   1781         }
   1782         if (fabsf(charinfo.m_CharBox.right - charinfo.m_CharBox.left) < 0.01f) {
   1783             charinfo.m_CharBox.right = charinfo.m_CharBox.left + pTextObj->GetCharWidth(charinfo.m_CharCode);
   1784         }
   1785         matrix.TransformRect(charinfo.m_CharBox);
   1786         charinfo.m_Matrix.Copy(matrix);
   1787         if (wstrItem.IsEmpty()) {
   1788             charinfo.m_Unicode = 0;
   1789             m_TempCharList.Add(charinfo);
   1790             m_TempTextBuf.AppendChar(0xfffe);
   1791             continue;
   1792         } else {
   1793             int nTotal = wstrItem.GetLength();
   1794             FX_BOOL bDel = FALSE;
   1795             const int count = std::min(m_TempCharList.GetSize(), 7);
   1796             FX_FLOAT threshold = charinfo.m_Matrix.TransformXDistance((FX_FLOAT)TEXT_CHARRATIO_GAPDELTA * pTextObj->GetFontSize());
   1797             for (int n = m_TempCharList.GetSize();
   1798                  n > m_TempCharList.GetSize() - count;
   1799                  n--) {
   1800                 PAGECHAR_INFO* charinfo1 = (PAGECHAR_INFO*)m_TempCharList.GetAt(n - 1);
   1801                 if(charinfo1->m_CharCode == charinfo.m_CharCode &&
   1802                         charinfo1->m_pTextObj->GetFont() == charinfo.m_pTextObj->GetFont()  &&
   1803                         FXSYS_fabs(charinfo1->m_OriginX - charinfo.m_OriginX) < threshold  &&
   1804                         FXSYS_fabs(charinfo1->m_OriginY - charinfo.m_OriginY) < threshold) {
   1805                     bDel = TRUE;
   1806                     break;
   1807                 }
   1808             }
   1809             if(!bDel) {
   1810                 for (int nIndex = 0; nIndex < nTotal; nIndex++) {
   1811                     charinfo.m_Unicode = wstrItem.GetAt(nIndex);
   1812                     if (charinfo.m_Unicode) {
   1813                         charinfo.m_Index = m_TextBuf.GetLength();
   1814                         m_TempTextBuf.AppendChar(charinfo.m_Unicode);
   1815                     } else {
   1816                         m_TempTextBuf.AppendChar(0xfffe);
   1817                     }
   1818                     m_TempCharList.Add(charinfo);
   1819                 }
   1820             } else if(i == 0) {
   1821                 CFX_WideString str = m_TempTextBuf.GetWideString();
   1822                 if (!str.IsEmpty() && str.GetAt(str.GetLength() - 1) == TEXT_BLANK_CHAR) {
   1823                     m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1);
   1824                     m_TempCharList.Delete(m_TempCharList.GetSize() - 1);
   1825                 }
   1826             }
   1827         }
   1828     }
   1829     if (bIsBidiAndMirrorInverse) {
   1830         SwapTempTextBuf(iCharListStartAppend, iBufStartAppend);
   1831     }
   1832 }
   1833 void CPDF_TextPage::SwapTempTextBuf(FX_INT32 iCharListStartAppend,
   1834                                     FX_INT32 iBufStartAppend)
   1835 {
   1836     FX_INT32 i, j;
   1837     i = iCharListStartAppend;
   1838     j = m_TempCharList.GetSize() - 1;
   1839     for (; i < j; i++, j--) {
   1840         std::swap(m_TempCharList[i], m_TempCharList[j]);
   1841         std::swap(m_TempCharList[i].m_Index, m_TempCharList[j].m_Index);
   1842     }
   1843     FX_WCHAR * pTempBuffer = m_TempTextBuf.GetBuffer();
   1844     i = iBufStartAppend;
   1845     j = m_TempTextBuf.GetLength() - 1;
   1846     for (; i < j; i++, j--) {
   1847         std::swap(pTempBuffer[i], pTempBuffer[j]);
   1848     }
   1849 }
   1850 FX_BOOL CPDF_TextPage::IsRightToLeft(const CPDF_TextObject* pTextObj,
   1851                                      const CPDF_Font* pFont,
   1852                                      int nItems) const
   1853 {
   1854     IFX_BidiChar* BidiChar = IFX_BidiChar::Create();
   1855     FX_INT32 nR2L = 0;
   1856     FX_INT32 nL2R = 0;
   1857     FX_INT32 start = 0, count = 0;
   1858     CPDF_TextObjectItem item;
   1859     for (FX_INT32 i = 0; i < nItems; i++) {
   1860         pTextObj->GetItemInfo(i, &item);
   1861         if (item.m_CharCode == (FX_DWORD)-1) {
   1862             continue;
   1863         }
   1864         CFX_WideString wstrItem = pFont->UnicodeFromCharCode(item.m_CharCode);
   1865         FX_WCHAR wChar = wstrItem.GetAt(0);
   1866         if ((wstrItem.IsEmpty() || wChar == 0) && item.m_CharCode) {
   1867             wChar = (FX_WCHAR)item.m_CharCode;
   1868         }
   1869         if (!wChar) {
   1870             continue;
   1871         }
   1872         if (BidiChar && BidiChar->AppendChar(wChar)) {
   1873             FX_INT32 ret = BidiChar->GetBidiInfo(start, count);
   1874             if (ret == 2) {
   1875                 nR2L++;
   1876             }
   1877             else if (ret == 1) {
   1878                 nL2R++;
   1879             }
   1880         }
   1881     }
   1882     if (BidiChar && BidiChar->EndChar()) {
   1883         FX_INT32 ret = BidiChar->GetBidiInfo(start, count);
   1884         if (ret == 2) {
   1885             nR2L++;
   1886         }
   1887         else if (ret == 1) {
   1888             nL2R++;
   1889         }
   1890     }
   1891     if (BidiChar)
   1892       BidiChar->Release();
   1893     return (nR2L > 0 && nR2L >= nL2R);
   1894 }
   1895 FX_INT32 CPDF_TextPage::GetTextObjectWritingMode(const CPDF_TextObject* pTextObj)
   1896 {
   1897     FX_INT32 nChars = pTextObj->CountChars();
   1898     if (nChars == 1) {
   1899         return m_TextlineDir;
   1900     }
   1901     CPDF_TextObjectItem first, last;
   1902     pTextObj->GetCharInfo(0, &first);
   1903     pTextObj->GetCharInfo(nChars - 1, &last);
   1904     CFX_Matrix textMatrix;
   1905     pTextObj->GetTextMatrix(&textMatrix);
   1906     textMatrix.TransformPoint(first.m_OriginX, first.m_OriginY);
   1907     textMatrix.TransformPoint(last.m_OriginX, last.m_OriginY);
   1908     FX_FLOAT dX = FXSYS_fabs(last.m_OriginX - first.m_OriginX);
   1909     FX_FLOAT dY = FXSYS_fabs(last.m_OriginY - first.m_OriginY);
   1910     if (dX <= 0.0001f && dY <= 0.0001f) {
   1911         return -1;
   1912     }
   1913     CFX_VectorF v;
   1914     v.Set(dX, dY);
   1915     v.Normalize();
   1916     if (v.y <= 0.0872f) {
   1917         if (v.x <= 0.0872f) {
   1918             return m_TextlineDir;
   1919         }
   1920         return 0;
   1921     } else if (v.x <= 0.0872f) {
   1922         return 1;
   1923     }
   1924     return m_TextlineDir;
   1925 }
   1926 FX_BOOL CPDF_TextPage::IsHyphen(FX_WCHAR curChar)
   1927 {
   1928     CFX_WideString strCurText = m_TempTextBuf.GetWideString();
   1929     if(strCurText.GetLength() == 0) {
   1930         strCurText = m_TextBuf.GetWideString();
   1931     }
   1932     FX_STRSIZE nCount = strCurText.GetLength();
   1933     int nIndex = nCount - 1;
   1934     FX_WCHAR wcTmp = strCurText.GetAt(nIndex);
   1935     while(wcTmp == 0x20 && nIndex <= nCount - 1 && nIndex >= 0) {
   1936         wcTmp = strCurText.GetAt(--nIndex);
   1937     }
   1938     if (0x2D == wcTmp || 0xAD == wcTmp) {
   1939         if (--nIndex > 0) {
   1940             FX_WCHAR preChar = strCurText.GetAt((nIndex));
   1941             if (((preChar >= L'A' && preChar <= L'Z') || (preChar >= L'a' && preChar <= L'z'))
   1942                     && ((curChar >= L'A' && curChar <= L'Z') || (curChar >= L'a' && curChar <= L'z'))) {
   1943                 return TRUE;
   1944             }
   1945         }
   1946         int size = m_TempCharList.GetSize();
   1947         PAGECHAR_INFO preChar;
   1948         if (size) {
   1949             preChar = (PAGECHAR_INFO)m_TempCharList[size - 1];
   1950         } else {
   1951             size = m_charList.GetSize();
   1952             if(size == 0) {
   1953                 return FALSE;
   1954             }
   1955             preChar = (PAGECHAR_INFO)m_charList[size - 1];
   1956         }
   1957         if (FPDFTEXT_CHAR_PIECE == preChar.m_Flag)
   1958             if (0xAD == preChar.m_Unicode || 0x2D == preChar.m_Unicode) {
   1959                 return TRUE;
   1960             }
   1961     }
   1962     return FALSE;
   1963 }
   1964 int CPDF_TextPage::ProcessInsertObject(const CPDF_TextObject* pObj, const CFX_AffineMatrix& formMatrix)
   1965 {
   1966     FindPreviousTextObject();
   1967     FX_BOOL bNewline = FALSE;
   1968     int WritingMode = GetTextObjectWritingMode(pObj);
   1969     if(WritingMode == -1) {
   1970         WritingMode = GetTextObjectWritingMode(m_pPreTextObj);
   1971     }
   1972     CFX_FloatRect this_rect(pObj->m_Left, pObj->m_Bottom, pObj->m_Right, pObj->m_Top);
   1973     CFX_FloatRect prev_rect(m_pPreTextObj->m_Left, m_pPreTextObj->m_Bottom, m_pPreTextObj->m_Right, m_pPreTextObj->m_Top);
   1974     CPDF_TextObjectItem PrevItem, item;
   1975     int nItem = m_pPreTextObj->CountItems();
   1976     m_pPreTextObj->GetItemInfo(nItem - 1, &PrevItem);
   1977     pObj->GetItemInfo(0, &item);
   1978     CFX_WideString wstrItem = pObj->GetFont()->UnicodeFromCharCode(item.m_CharCode);
   1979     if(wstrItem.IsEmpty()) {
   1980         wstrItem += (FX_WCHAR)item.m_CharCode;
   1981     }
   1982     FX_WCHAR curChar = wstrItem.GetAt(0);
   1983     if(WritingMode == 0) {
   1984         if(this_rect.Height() > 4.5 && prev_rect.Height() > 4.5) {
   1985             FX_FLOAT top = this_rect.top < prev_rect.top ? this_rect.top : prev_rect.top;
   1986             FX_FLOAT bottom = this_rect.bottom > prev_rect.bottom ? this_rect.bottom : prev_rect.bottom;
   1987             if(bottom >= top) {
   1988                 if(IsHyphen(curChar)) {
   1989                     return 3;
   1990                 }
   1991                 return 2;
   1992             }
   1993         }
   1994     } else if (WritingMode == 1) {
   1995         if(this_rect.Width() > pObj->GetFontSize() * 0.1f && prev_rect.Width() > m_pPreTextObj->GetFontSize() * 0.1f) {
   1996             FX_FLOAT left = this_rect.left > m_CurlineRect.left ? this_rect.left : m_CurlineRect.left;
   1997             FX_FLOAT right = this_rect.right < m_CurlineRect.right ? this_rect.right : m_CurlineRect.right;
   1998             if(right <= left) {
   1999                 if(IsHyphen(curChar)) {
   2000                     return 3;
   2001                 }
   2002                 return 2;
   2003             }
   2004         }
   2005     }
   2006     FX_FLOAT last_pos = PrevItem.m_OriginX;
   2007     int nLastWidth = GetCharWidth(PrevItem.m_CharCode, m_pPreTextObj->GetFont());
   2008     FX_FLOAT last_width = nLastWidth * m_pPreTextObj->GetFontSize() / 1000;
   2009     last_width = FXSYS_fabs(last_width);
   2010     int nThisWidth = GetCharWidth(item.m_CharCode, pObj->GetFont());
   2011     FX_FLOAT this_width = nThisWidth * pObj->GetFontSize() / 1000;
   2012     this_width = FXSYS_fabs(this_width);
   2013     FX_FLOAT threshold = last_width > this_width ? last_width / 4 : this_width / 4;
   2014     CFX_AffineMatrix prev_matrix, prev_reverse;
   2015     m_pPreTextObj->GetTextMatrix(&prev_matrix);
   2016     prev_matrix.Concat(m_perMatrix);
   2017     prev_reverse.SetReverse(prev_matrix);
   2018     FX_FLOAT x = pObj->GetPosX();
   2019     FX_FLOAT y = pObj->GetPosY();
   2020     formMatrix.Transform(x, y);
   2021     prev_reverse.Transform(x, y);
   2022     if(last_width < this_width) {
   2023         threshold = prev_reverse.TransformDistance(threshold);
   2024     }
   2025     CFX_FloatRect rect1(m_pPreTextObj->m_Left, pObj->m_Bottom, m_pPreTextObj->m_Right, pObj->m_Top);
   2026     CFX_FloatRect rect2(m_pPreTextObj->m_Left, m_pPreTextObj->m_Bottom, m_pPreTextObj->m_Right, m_pPreTextObj->m_Top);
   2027     CFX_FloatRect rect3 = rect1;
   2028     rect1.Intersect(rect2);
   2029     if (WritingMode == 0) {
   2030         if ((rect1.IsEmpty() && rect2.Height() > 5 && rect3.Height() > 5)
   2031                 || ((y > threshold * 2 || y < threshold * -3) && (FXSYS_fabs(y) < 1 ? FXSYS_fabs(x) < FXSYS_fabs(y) : TRUE))) {
   2032             bNewline = TRUE;
   2033             if(nItem > 1 ) {
   2034                 CPDF_TextObjectItem tempItem;
   2035                 m_pPreTextObj->GetItemInfo(0, &tempItem);
   2036                 CFX_AffineMatrix m;
   2037                 m_pPreTextObj->GetTextMatrix(&m);
   2038                 if(PrevItem.m_OriginX > tempItem.m_OriginX &&
   2039                         m_DisplayMatrix.a > 0.9 && m_DisplayMatrix.b < 0.1 &&
   2040                         m_DisplayMatrix.c < 0.1 && m_DisplayMatrix.d < -0.9
   2041                         && m.b < 0.1 && m.c < 0.1 ) {
   2042                     CFX_FloatRect re(0, m_pPreTextObj->m_Bottom, 1000, m_pPreTextObj->m_Top);
   2043                     if(re.Contains(pObj->GetPosX(), pObj->GetPosY())) {
   2044                         bNewline = FALSE;
   2045                     } else {
   2046                         CFX_FloatRect re(0, pObj->m_Bottom, 1000, pObj->m_Top);
   2047                         if(re.Contains(m_pPreTextObj->GetPosX(), m_pPreTextObj->GetPosY())) {
   2048                             bNewline = FALSE;
   2049                         }
   2050                     }
   2051                 }
   2052             }
   2053         }
   2054     }
   2055     if(bNewline) {
   2056         if(IsHyphen(curChar)) {
   2057             return 3;
   2058         }
   2059         return 2;
   2060     }
   2061     FX_INT32 nChars = pObj->CountChars();
   2062     if (nChars == 1 && ( 0x2D == curChar || 0xAD == curChar))
   2063         if (IsHyphen(curChar)) {
   2064             return 3;
   2065         }
   2066     CFX_WideString PrevStr = m_pPreTextObj->GetFont()->UnicodeFromCharCode(PrevItem.m_CharCode);
   2067     FX_WCHAR preChar = PrevStr.GetAt(PrevStr.GetLength() - 1);
   2068     CFX_AffineMatrix matrix;
   2069     pObj->GetTextMatrix(&matrix);
   2070     matrix.Concat(formMatrix);
   2071     threshold = (FX_FLOAT)(nLastWidth > nThisWidth ? nLastWidth : nThisWidth);
   2072     threshold = threshold > 400 ? (threshold < 700 ? threshold / 4 :  (threshold > 800 ? threshold / 6 : threshold / 5)) : (threshold / 2);
   2073     if(nLastWidth >= nThisWidth) {
   2074         threshold *= FXSYS_fabs(m_pPreTextObj->GetFontSize());
   2075     } else {
   2076         threshold *= FXSYS_fabs(pObj->GetFontSize());
   2077         threshold = matrix.TransformDistance(threshold);
   2078         threshold = prev_reverse.TransformDistance(threshold);
   2079     }
   2080     threshold /= 1000;
   2081     if((threshold < 1.4881 && threshold > 1.4879)
   2082             || (threshold < 1.39001 && threshold > 1.38999)) {
   2083         threshold *= 1.5;
   2084     }
   2085     if (FXSYS_fabs(last_pos + last_width - x) > threshold && curChar != L' ' && preChar != L' ')
   2086         if (curChar != L' ' && preChar != L' ') {
   2087             if((x - last_pos - last_width) > threshold || (last_pos - x - last_width) > threshold) {
   2088                 return 1;
   2089             }
   2090             if(x < 0 && (last_pos - x - last_width) > threshold) {
   2091                 return 1;
   2092             }
   2093             if((x - last_pos - last_width) > this_width || (x - last_pos - this_width) > last_width ) {
   2094                 return 1;
   2095             }
   2096         }
   2097     return 0;
   2098 }
   2099 FX_BOOL CPDF_TextPage::IsSameTextObject(CPDF_TextObject* pTextObj1, CPDF_TextObject* pTextObj2)
   2100 {
   2101     if (!pTextObj1 || !pTextObj2) {
   2102         return FALSE;
   2103     }
   2104     CFX_FloatRect rcPreObj(pTextObj2->m_Left, pTextObj2->m_Bottom, pTextObj2->m_Right, pTextObj2->m_Top);
   2105     CFX_FloatRect rcCurObj(pTextObj1->m_Left, pTextObj1->m_Bottom, pTextObj1->m_Right, pTextObj1->m_Top);
   2106     if (rcPreObj.IsEmpty() && rcCurObj.IsEmpty() && !m_ParseOptions.m_bGetCharCodeOnly) {
   2107         FX_FLOAT dbXdif = FXSYS_fabs(rcPreObj.left - rcCurObj.left);
   2108         int nCount = m_charList.GetSize();
   2109         if (nCount >= 2) {
   2110             PAGECHAR_INFO perCharTemp = (PAGECHAR_INFO)m_charList[nCount - 2];
   2111             FX_FLOAT dbSpace = perCharTemp.m_CharBox.Width();
   2112             if (dbXdif > dbSpace) {
   2113                 return FALSE;
   2114             }
   2115         }
   2116     }
   2117     if (!rcPreObj.IsEmpty() || !rcCurObj.IsEmpty()) {
   2118         rcPreObj.Intersect(rcCurObj);
   2119         if (rcPreObj.IsEmpty()) {
   2120             return FALSE;
   2121         }
   2122         if (FXSYS_fabs(rcPreObj.Width() - rcCurObj.Width()) > rcCurObj.Width() / 2) {
   2123             return FALSE;
   2124         }
   2125         if (pTextObj2->GetFontSize() != pTextObj1->GetFontSize()) {
   2126             return FALSE;
   2127         }
   2128     }
   2129     int nPreCount = pTextObj2->CountItems();
   2130     int nCurCount = pTextObj1->CountItems();
   2131     if (nPreCount != nCurCount) {
   2132         return FALSE;
   2133     }
   2134     CPDF_TextObjectItem itemPer, itemCur;
   2135     for (int i = 0; i < nPreCount; i++) {
   2136         pTextObj2->GetItemInfo(i, &itemPer);
   2137         pTextObj1->GetItemInfo(i, &itemCur);
   2138         if (itemCur.m_CharCode != itemPer.m_CharCode) {
   2139             return FALSE;
   2140         }
   2141     }
   2142     if(FXSYS_fabs(pTextObj1->GetPosX() - pTextObj2->GetPosX()) > GetCharWidth(itemPer.m_CharCode, pTextObj2->GetFont())*pTextObj2->GetFontSize() / 1000 * 0.9 ||
   2143             FXSYS_fabs(pTextObj1->GetPosY() - pTextObj2->GetPosY()) >
   2144             FX_MAX(FX_MAX(rcPreObj.Height() , rcPreObj.Width()), pTextObj2->GetFontSize()) / 8) {
   2145         return FALSE;
   2146     }
   2147     return TRUE;
   2148 }
   2149 FX_BOOL CPDF_TextPage::IsSameAsPreTextObject(CPDF_TextObject* pTextObj, FX_POSITION ObjPos)
   2150 {
   2151     if (!pTextObj) {
   2152         return FALSE;
   2153     }
   2154     int i = 0;
   2155     if (!ObjPos) {
   2156         ObjPos = m_pPage->GetLastObjectPosition();
   2157     }
   2158     CPDF_PageObject* pObj = m_pPage->GetPrevObject(ObjPos);
   2159     while (i < 5 && ObjPos) {
   2160         pObj = m_pPage->GetPrevObject(ObjPos);
   2161         if(pObj == pTextObj) {
   2162             continue;
   2163         }
   2164         if(pObj->m_Type != PDFPAGE_TEXT) {
   2165             continue;
   2166         }
   2167         if(IsSameTextObject((CPDF_TextObject*)pObj, pTextObj)) {
   2168             return TRUE;
   2169         }
   2170         i++;
   2171     }
   2172     return FALSE;
   2173 }
   2174 FX_BOOL CPDF_TextPage::GenerateCharInfo(FX_WCHAR unicode, PAGECHAR_INFO& info)
   2175 {
   2176     int size = m_TempCharList.GetSize();
   2177     PAGECHAR_INFO preChar;
   2178     if (size) {
   2179         preChar = (PAGECHAR_INFO)m_TempCharList[size - 1];
   2180     } else {
   2181         size = m_charList.GetSize();
   2182         if(size == 0) {
   2183             return FALSE;
   2184         }
   2185         preChar = (PAGECHAR_INFO)m_charList[size - 1];
   2186     }
   2187     info.m_Index = m_TextBuf.GetLength();
   2188     info.m_Unicode = unicode;
   2189     info.m_pTextObj = NULL;
   2190     info.m_CharCode = -1;
   2191     info.m_Flag = FPDFTEXT_CHAR_GENERATED;
   2192     int preWidth = 0;
   2193     if (preChar.m_pTextObj && preChar.m_CharCode != (FX_DWORD) - 1) {
   2194         preWidth = GetCharWidth(preChar.m_CharCode, preChar.m_pTextObj->GetFont());
   2195     }
   2196     FX_FLOAT fs = 0;
   2197     if(preChar.m_pTextObj) {
   2198         fs = preChar.m_pTextObj->GetFontSize();
   2199     } else {
   2200         fs = preChar.m_CharBox.Height();
   2201     }
   2202     if(!fs) {
   2203         fs = 1;
   2204     }
   2205     info.m_OriginX = preChar.m_OriginX + preWidth * (fs) / 1000;
   2206     info.m_OriginY = preChar.m_OriginY;
   2207     info.m_CharBox = CFX_FloatRect(info.m_OriginX, info.m_OriginY, info.m_OriginX, info.m_OriginY);
   2208     return TRUE;
   2209 }
   2210 FX_BOOL CPDF_TextPage::IsRectIntersect(const CFX_FloatRect& rect1, const CFX_FloatRect& rect2)
   2211 {
   2212     CFX_FloatRect rect = rect1;
   2213     rect.Intersect(rect2);
   2214     return !rect.IsEmpty();
   2215 }
   2216 FX_BOOL	CPDF_TextPage::IsLetter(FX_WCHAR unicode)
   2217 {
   2218     if (unicode < L'A') {
   2219         return FALSE;
   2220     }
   2221     if (unicode > L'Z' && unicode < L'a') {
   2222         return FALSE;
   2223     }
   2224     if (unicode > L'z') {
   2225         return FALSE;
   2226     }
   2227     return TRUE;
   2228 }
   2229 CPDF_TextPageFind::CPDF_TextPageFind(const IPDF_TextPage* pTextPage)
   2230     : m_pTextPage(pTextPage),
   2231       m_flags(0),
   2232       m_findNextStart(-1),
   2233       m_findPreStart(-1),
   2234       m_bMatchCase(FALSE),
   2235       m_bMatchWholeWord(FALSE),
   2236       m_resStart(0),
   2237       m_resEnd(-1),
   2238       m_IsFind(FALSE)
   2239 {
   2240     m_strText = m_pTextPage->GetPageText();
   2241     int nCount = pTextPage->CountChars();
   2242     if(nCount) {
   2243         m_CharIndex.Add(0);
   2244     }
   2245     for(int i = 0; i < nCount; i++) {
   2246         FPDF_CHAR_INFO info;
   2247         pTextPage->GetCharInfo(i, info);
   2248         int indexSize = m_CharIndex.GetSize();
   2249         if(info.m_Flag == CHAR_NORMAL || info.m_Flag == CHAR_GENERATED) {
   2250             if(indexSize % 2) {
   2251                 m_CharIndex.Add(1);
   2252             } else {
   2253                 if(indexSize <= 0) {
   2254                     continue;
   2255                 }
   2256                 m_CharIndex.SetAt(indexSize - 1, m_CharIndex.GetAt(indexSize - 1) + 1);
   2257             }
   2258         } else {
   2259             if(indexSize % 2) {
   2260                 if(indexSize <= 0) {
   2261                     continue;
   2262                 }
   2263                 m_CharIndex.SetAt(indexSize - 1, i + 1);
   2264             } else {
   2265                 m_CharIndex.Add(i + 1);
   2266             }
   2267         }
   2268     }
   2269     int indexSize = m_CharIndex.GetSize();
   2270     if(indexSize % 2) {
   2271         m_CharIndex.RemoveAt(indexSize - 1);
   2272     }
   2273 }
   2274 int CPDF_TextPageFind::GetCharIndex(int index) const
   2275 {
   2276     return m_pTextPage->CharIndexFromTextIndex(index);
   2277     int indexSize = m_CharIndex.GetSize();
   2278     int count = 0;
   2279     for(int i = 0; i < indexSize; i += 2) {
   2280         count += m_CharIndex.GetAt(i + 1);
   2281         if(count > index) {
   2282             return 	index - count + m_CharIndex.GetAt(i + 1) + m_CharIndex.GetAt(i);
   2283         }
   2284     }
   2285     return -1;
   2286 }
   2287 FX_BOOL	CPDF_TextPageFind::FindFirst(const CFX_WideString& findwhat, int flags, int startPos)
   2288 {
   2289     if (!m_pTextPage) {
   2290         return FALSE;
   2291     }
   2292     if (m_strText.IsEmpty() || m_bMatchCase != (flags & FPDFTEXT_MATCHCASE)) {
   2293         m_strText = m_pTextPage->GetPageText();
   2294     }
   2295     CFX_WideString findwhatStr = findwhat;
   2296     m_findWhat = findwhatStr;
   2297     m_flags = flags;
   2298     m_bMatchCase = flags & FPDFTEXT_MATCHCASE;
   2299     if (m_strText.IsEmpty()) {
   2300         m_IsFind = FALSE;
   2301         return TRUE;
   2302     }
   2303     FX_STRSIZE len = findwhatStr.GetLength();
   2304     if (!m_bMatchCase) {
   2305         findwhatStr.MakeLower();
   2306         m_strText.MakeLower();
   2307     }
   2308     m_bMatchWholeWord = flags & FPDFTEXT_MATCHWHOLEWORD;
   2309     m_findNextStart = startPos;
   2310     if (startPos == -1) {
   2311         m_findPreStart = m_strText.GetLength() - 1;
   2312     } else {
   2313         m_findPreStart = startPos;
   2314     }
   2315     m_csFindWhatArray.RemoveAll();
   2316     int i = 0;
   2317     while(i < len) {
   2318         if(findwhatStr.GetAt(i) != ' ') {
   2319             break;
   2320         }
   2321         i++;
   2322     }
   2323     if(i < len) {
   2324         ExtractFindWhat(findwhatStr);
   2325     } else {
   2326         m_csFindWhatArray.Add(findwhatStr);
   2327     }
   2328     if(m_csFindWhatArray.GetSize() <= 0) {
   2329         return FALSE;
   2330     }
   2331     m_IsFind = TRUE;
   2332     m_resStart = 0;
   2333     m_resEnd = -1;
   2334     return TRUE;
   2335 }
   2336 FX_BOOL CPDF_TextPageFind::FindNext()
   2337 {
   2338     if (!m_pTextPage) {
   2339         return FALSE;
   2340     }
   2341     m_resArray.RemoveAll();
   2342     if(m_findNextStart == -1) {
   2343         return FALSE;
   2344     }
   2345     if(m_strText.IsEmpty()) {
   2346         m_IsFind = FALSE;
   2347         return m_IsFind;
   2348     }
   2349     int strLen = m_strText.GetLength();
   2350     if (m_findNextStart > strLen - 1) {
   2351         m_IsFind = FALSE;
   2352         return m_IsFind;
   2353     }
   2354     int nCount = m_csFindWhatArray.GetSize();
   2355     int nResultPos = 0;
   2356     int	nStartPos = 0;
   2357     nStartPos = m_findNextStart;
   2358     FX_BOOL bSpaceStart = FALSE;
   2359     for(int iWord = 0; iWord < nCount; iWord++) {
   2360         CFX_WideString csWord = m_csFindWhatArray[iWord];
   2361         if(csWord.IsEmpty()) {
   2362             if(iWord == nCount - 1) {
   2363                 FX_WCHAR strInsert = m_strText.GetAt(nStartPos);
   2364                 if(strInsert == TEXT_LINEFEED_CHAR || strInsert == TEXT_BLANK_CHAR || strInsert == TEXT_RETURN_CHAR || strInsert == 160) {
   2365                     nResultPos = nStartPos + 1;
   2366                     break;
   2367                 }
   2368                 iWord = -1;
   2369             } else if(iWord == 0) {
   2370                 bSpaceStart = TRUE;
   2371             }
   2372             continue;
   2373         }
   2374         int endIndex;
   2375         nResultPos = m_strText.Find(csWord.c_str(), nStartPos);
   2376         if (nResultPos == -1) {
   2377             m_IsFind = FALSE;
   2378             return m_IsFind;
   2379         }
   2380         endIndex = nResultPos + csWord.GetLength() - 1;
   2381         if(iWord == 0) {
   2382             m_resStart = nResultPos;
   2383         }
   2384         FX_BOOL bMatch = TRUE;
   2385         if(iWord != 0 && !bSpaceStart) {
   2386             int PreResEndPos = nStartPos;
   2387             int curChar = csWord.GetAt(0);
   2388             CFX_WideString lastWord = m_csFindWhatArray[iWord - 1];
   2389             int lastChar = lastWord.GetAt(lastWord.GetLength() - 1);
   2390             if(nStartPos == nResultPos && !(_IsIgnoreSpaceCharacter(lastChar) || _IsIgnoreSpaceCharacter(curChar))) {
   2391                 bMatch = FALSE;
   2392             }
   2393             for(int d = PreResEndPos; d < nResultPos; d++) {
   2394                 FX_WCHAR strInsert = m_strText.GetAt(d);
   2395                 if(strInsert != TEXT_LINEFEED_CHAR && strInsert != TEXT_BLANK_CHAR && strInsert != TEXT_RETURN_CHAR && strInsert != 160) {
   2396                     bMatch = FALSE;
   2397                     break;
   2398                 }
   2399             }
   2400         } else if(bSpaceStart) {
   2401             if(nResultPos > 0) {
   2402                 FX_WCHAR strInsert = m_strText.GetAt(nResultPos - 1);
   2403                 if(strInsert != TEXT_LINEFEED_CHAR && strInsert != TEXT_BLANK_CHAR && strInsert != TEXT_RETURN_CHAR && strInsert != 160) {
   2404                     bMatch = FALSE;
   2405                     m_resStart = nResultPos;
   2406                 } else {
   2407                     m_resStart = nResultPos - 1;
   2408                 }
   2409             }
   2410         }
   2411         if(m_bMatchWholeWord && bMatch) {
   2412             bMatch = IsMatchWholeWord(m_strText, nResultPos, endIndex);
   2413         }
   2414         nStartPos = endIndex + 1;
   2415         if(!bMatch) {
   2416             iWord = -1;
   2417             if(bSpaceStart) {
   2418                 nStartPos = m_resStart + m_csFindWhatArray[1].GetLength();
   2419             } else {
   2420                 nStartPos = m_resStart + m_csFindWhatArray[0].GetLength();
   2421             }
   2422         }
   2423     }
   2424     m_resEnd = nResultPos + m_csFindWhatArray[m_csFindWhatArray.GetSize() - 1].GetLength() - 1;
   2425     m_IsFind = TRUE;
   2426     int resStart = GetCharIndex(m_resStart);
   2427     int resEnd = GetCharIndex(m_resEnd);
   2428     m_pTextPage->GetRectArray(resStart, resEnd - resStart + 1, m_resArray);
   2429     if(m_flags & FPDFTEXT_CONSECUTIVE) {
   2430         m_findNextStart = m_resStart + 1;
   2431         m_findPreStart = m_resEnd - 1;
   2432     } else {
   2433         m_findNextStart = m_resEnd + 1;
   2434         m_findPreStart = m_resStart - 1;
   2435     }
   2436     return m_IsFind;
   2437 }
   2438 FX_BOOL CPDF_TextPageFind::FindPrev()
   2439 {
   2440     if (!m_pTextPage) {
   2441         return FALSE;
   2442     }
   2443     m_resArray.RemoveAll();
   2444     if(m_strText.IsEmpty() || m_findPreStart < 0) {
   2445         m_IsFind = FALSE;
   2446         return m_IsFind;
   2447     }
   2448     CPDF_TextPageFind findEngine(m_pTextPage);
   2449     FX_BOOL ret = findEngine.FindFirst(m_findWhat, m_flags);
   2450     if(!ret) {
   2451         m_IsFind = FALSE;
   2452         return m_IsFind;
   2453     }
   2454     int	order = -1, MatchedCount = 0;
   2455     while(ret) {
   2456         ret = findEngine.FindNext();
   2457         if(ret) {
   2458             int order1 = findEngine.GetCurOrder() ;
   2459             int	MatchedCount1 = findEngine.GetMatchedCount();
   2460             if(((order1 + MatchedCount1) - 1) > m_findPreStart) {
   2461                 break;
   2462             }
   2463             order = order1;
   2464             MatchedCount = MatchedCount1;
   2465         }
   2466     }
   2467     if(order == -1) {
   2468         m_IsFind = FALSE;
   2469         return m_IsFind;
   2470     }
   2471     m_resStart = m_pTextPage->TextIndexFromCharIndex(order);
   2472     m_resEnd = m_pTextPage->TextIndexFromCharIndex(order + MatchedCount - 1);
   2473     m_IsFind = TRUE;
   2474     m_pTextPage->GetRectArray(order, MatchedCount, m_resArray);
   2475     if(m_flags & FPDFTEXT_CONSECUTIVE) {
   2476         m_findNextStart = m_resStart + 1;
   2477         m_findPreStart = m_resEnd - 1;
   2478     } else {
   2479         m_findNextStart = m_resEnd + 1;
   2480         m_findPreStart = m_resStart - 1;
   2481     }
   2482     return m_IsFind;
   2483 }
   2484 void CPDF_TextPageFind::ExtractFindWhat(const CFX_WideString& findwhat)
   2485 {
   2486     if(findwhat.IsEmpty()) {
   2487         return ;
   2488     }
   2489     int index = 0;
   2490     while(1) {
   2491         CFX_WideString csWord = TEXT_EMPTY;
   2492         int ret = ExtractSubString(csWord, findwhat.c_str(), index, TEXT_BLANK_CHAR);
   2493         if(csWord.IsEmpty()) {
   2494             if(ret) {
   2495                 m_csFindWhatArray.Add(CFX_WideString(L""));
   2496                 index++;
   2497                 continue;
   2498             } else {
   2499                 break;
   2500             }
   2501         }
   2502         int pos = 0;
   2503         while(pos < csWord.GetLength()) {
   2504             CFX_WideString curStr = csWord.Mid(pos, 1);
   2505             FX_WCHAR curChar = csWord.GetAt(pos);
   2506             if (_IsIgnoreSpaceCharacter(curChar)) {
   2507                 if (pos > 0 && curChar == 0x2019) {
   2508                     pos++;
   2509                     continue;
   2510                 }
   2511                 if (pos > 0 ) {
   2512                     CFX_WideString preStr = csWord.Mid(0, pos);
   2513                     m_csFindWhatArray.Add(preStr);
   2514                 }
   2515                 m_csFindWhatArray.Add(curStr);
   2516                 if (pos == csWord.GetLength() - 1) {
   2517                     csWord.Empty();
   2518                     break;
   2519                 }
   2520                 csWord = csWord.Right(csWord.GetLength() - pos - 1);
   2521                 pos = 0;
   2522                 continue;
   2523             }
   2524             pos++;
   2525         }
   2526         if (!csWord.IsEmpty()) {
   2527             m_csFindWhatArray.Add(csWord);
   2528         }
   2529         index++;
   2530     }
   2531 }
   2532 FX_BOOL CPDF_TextPageFind::IsMatchWholeWord(const CFX_WideString& csPageText, int startPos, int endPos)
   2533 {
   2534     int char_left = 0;
   2535     int char_right = 0;
   2536     int char_count = endPos - startPos + 1;
   2537     if(char_count < 1) {
   2538         return FALSE;
   2539     }
   2540     if (char_count == 1 && csPageText.GetAt(startPos) > 255) {
   2541         return TRUE;
   2542     }
   2543     if(startPos - 1 >= 0 ) {
   2544         char_left = csPageText.GetAt(startPos - 1);
   2545     }
   2546     if(startPos + char_count < csPageText.GetLength()) {
   2547         char_right = csPageText.GetAt(startPos + char_count);
   2548     }
   2549     if ((char_left > 'A' && char_left < 'a') || (char_left > 'a' && char_left < 'z') || (char_left > 0xfb00 && char_left < 0xfb06) || (char_left >= '0' && char_left <= '9') ||
   2550             (char_right > 'A' && char_right < 'a') || (char_right > 'a' && char_right < 'z') || (char_right > 0xfb00 && char_right < 0xfb06) || (char_right >= '0' && char_right <= '9')) {
   2551         return FALSE;
   2552     }
   2553     if(!(('A' > char_left || char_left > 'Z')  && ('a' > char_left || char_left > 'z')
   2554             && ('A' > char_right || char_right > 'Z')  && ('a' > char_right || char_right > 'z'))) {
   2555         return FALSE;
   2556     }
   2557     if (char_count > 0) {
   2558         if (csPageText.GetAt(startPos) >= L'0' && csPageText.GetAt(startPos) <= L'9' && char_left >= L'0' && char_left <= L'9') {
   2559             return FALSE;
   2560         }
   2561         if (csPageText.GetAt(endPos) >= L'0' && csPageText.GetAt(endPos) <= L'9' && char_right >= L'0' && char_right <= L'9') {
   2562             return FALSE;
   2563         }
   2564     }
   2565     return TRUE;
   2566 }
   2567 FX_BOOL CPDF_TextPageFind::ExtractSubString(CFX_WideString& rString, FX_LPCWSTR lpszFullString,
   2568         int iSubString, FX_WCHAR chSep)
   2569 {
   2570     if (lpszFullString == NULL) {
   2571         return FALSE;
   2572     }
   2573     while (iSubString--) {
   2574         lpszFullString = FXSYS_wcschr(lpszFullString, chSep);
   2575         if (lpszFullString == NULL) {
   2576             rString.Empty();
   2577             return FALSE;
   2578         }
   2579         lpszFullString++;
   2580         while(*lpszFullString == chSep) {
   2581             lpszFullString++;
   2582         }
   2583     }
   2584     FX_LPCWSTR lpchEnd = FXSYS_wcschr(lpszFullString, chSep);
   2585     int nLen = (lpchEnd == NULL) ?
   2586                (int)FXSYS_wcslen(lpszFullString) : (int)(lpchEnd - lpszFullString);
   2587     ASSERT(nLen >= 0);
   2588     FXSYS_memcpy32(rString.GetBuffer(nLen), lpszFullString, nLen * sizeof(FX_WCHAR));
   2589     rString.ReleaseBuffer();
   2590     return TRUE;
   2591 }
   2592 CFX_WideString CPDF_TextPageFind::MakeReverse(const CFX_WideString& str)
   2593 {
   2594     CFX_WideString str2;
   2595     str2.Empty();
   2596     int nlen = str.GetLength();
   2597     for(int i = nlen - 1; i >= 0; i--) {
   2598         str2 += str.GetAt(i);
   2599     }
   2600     return str2;
   2601 }
   2602 void CPDF_TextPageFind::GetRectArray(CFX_RectArray& rects) const
   2603 {
   2604     rects.Copy(m_resArray);
   2605 }
   2606 int	CPDF_TextPageFind::GetCurOrder() const
   2607 {
   2608     return GetCharIndex(m_resStart);
   2609 }
   2610 int	CPDF_TextPageFind::GetMatchedCount()const
   2611 {
   2612     int resStart = GetCharIndex(m_resStart);
   2613     int resEnd = GetCharIndex(m_resEnd);
   2614     return resEnd - resStart + 1;
   2615 }
   2616 CPDF_LinkExtract::CPDF_LinkExtract()
   2617     : m_pTextPage(NULL),
   2618       m_IsParserd(FALSE)
   2619 {
   2620 }
   2621 CPDF_LinkExtract::~CPDF_LinkExtract()
   2622 {
   2623     DeleteLinkList();
   2624 }
   2625 FX_BOOL CPDF_LinkExtract::ExtractLinks(const IPDF_TextPage* pTextPage)
   2626 {
   2627     if (!pTextPage || !pTextPage->IsParsered()) {
   2628         return FALSE;
   2629     }
   2630     m_pTextPage = (const CPDF_TextPage*)pTextPage;
   2631     m_strPageText = m_pTextPage->GetPageText(0, -1);
   2632     DeleteLinkList();
   2633     if (m_strPageText.IsEmpty()) {
   2634         return FALSE;
   2635     }
   2636     parserLink();
   2637     m_IsParserd = TRUE;
   2638     return TRUE;
   2639 }
   2640 void CPDF_LinkExtract::DeleteLinkList()
   2641 {
   2642     while (m_LinkList.GetSize()) {
   2643         CPDF_LinkExt* linkinfo = NULL;
   2644         linkinfo = m_LinkList.GetAt(0);
   2645         m_LinkList.RemoveAt(0);
   2646         delete linkinfo;
   2647     }
   2648     m_LinkList.RemoveAll();
   2649 }
   2650 int CPDF_LinkExtract::CountLinks() const
   2651 {
   2652     if (!m_IsParserd)	{
   2653         return -1;
   2654     }
   2655     return m_LinkList.GetSize();
   2656 }
   2657 void CPDF_LinkExtract::parserLink()
   2658 {
   2659     int start = 0, pos = 0;
   2660     int TotalChar = m_pTextPage->CountChars();
   2661     while (pos < TotalChar) {
   2662         FPDF_CHAR_INFO pageChar;
   2663         m_pTextPage->GetCharInfo(pos, pageChar);
   2664         if (pageChar.m_Flag == CHAR_GENERATED || pageChar.m_Unicode == 0x20 || pos == TotalChar - 1) {
   2665             int nCount = pos - start;
   2666             if(pos == TotalChar - 1) {
   2667                 nCount++;
   2668             }
   2669             CFX_WideString strBeCheck;
   2670             strBeCheck = m_pTextPage->GetPageText(start, nCount);
   2671             if (strBeCheck.GetLength() > 5) {
   2672                 while(strBeCheck.GetLength() > 0) {
   2673                     FX_WCHAR ch = strBeCheck.GetAt(strBeCheck.GetLength() - 1);
   2674                     if (ch == L')' || ch == L',' || ch == L'>' || ch == L'.') {
   2675                         strBeCheck = strBeCheck.Mid(0, strBeCheck.GetLength() - 1);
   2676                         nCount--;
   2677                     } else {
   2678                         break;
   2679                     }
   2680                 }
   2681                 if (nCount > 5 && (CheckWebLink(strBeCheck) || CheckMailLink(strBeCheck))) {
   2682                     if (!AppendToLinkList(start, nCount, strBeCheck)) {
   2683                         break;
   2684                     }
   2685                 }
   2686             }
   2687             start = ++pos;
   2688         } else {
   2689             pos++;
   2690         }
   2691     }
   2692 }
   2693 FX_BOOL CPDF_LinkExtract::CheckWebLink(CFX_WideString& strBeCheck)
   2694 {
   2695     CFX_WideString str = strBeCheck;
   2696     str.MakeLower();
   2697     if (str.Find(L"http://www.") != -1) {
   2698         strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"http://www."));
   2699         return TRUE;
   2700     } else if (str.Find(L"http://") != -1) {
   2701         strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"http://"));
   2702         return TRUE;
   2703     } else if (str.Find(L"https://www.") != -1) {
   2704         strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"https://www."));
   2705         return TRUE;
   2706     } else if (str.Find(L"https://") != -1) {
   2707         strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"https://"));
   2708         return TRUE;
   2709     } else if (str.Find(L"www.") != -1) {
   2710         strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"www."));
   2711         strBeCheck = L"http://" + strBeCheck;
   2712         return TRUE;
   2713     } else {
   2714         return FALSE;
   2715     }
   2716 }
   2717 FX_BOOL CPDF_LinkExtract::CheckMailLink(CFX_WideString& str)
   2718 {
   2719     str.MakeLower();
   2720     int aPos = str.Find(L'@');
   2721     if (aPos < 1) {
   2722         return FALSE;
   2723     }
   2724     if (str.GetAt(aPos - 1) == L'.' || str.GetAt(aPos - 1) == L'_') {
   2725         return FALSE;
   2726     }
   2727     int i;
   2728     for (i = aPos - 1; i >= 0; i--) {
   2729         FX_WCHAR ch = str.GetAt(i);
   2730         if (ch == L'_' || ch == L'.' || (ch >= L'a' && ch <= L'z') || (ch >= L'0' && ch <= L'9')) {
   2731             continue;
   2732         } else {
   2733             if (i == aPos - 1) {
   2734                 return FALSE;
   2735             }
   2736             str = str.Right(str.GetLength() - i - 1);
   2737             break;
   2738         }
   2739     }
   2740     aPos = str.Find(L'@');
   2741     if (aPos < 1) {
   2742         return FALSE;
   2743     }
   2744     CFX_WideString strtemp = L"";
   2745     for (i = 0; i < aPos; i++) {
   2746         FX_WCHAR wch = str.GetAt(i);
   2747         if (wch >= L'a' && wch <= L'z') {
   2748             break;
   2749         } else {
   2750             strtemp = str.Right(str.GetLength() - i + 1);
   2751         }
   2752     }
   2753     if (strtemp != L"") {
   2754         str = strtemp;
   2755     }
   2756     aPos = str.Find(L'@');
   2757     if (aPos < 1) {
   2758         return FALSE;
   2759     }
   2760     str.TrimRight(L'.');
   2761     strtemp = str;
   2762     int ePos = str.Find(L'.');
   2763     if (ePos == -1) {
   2764         return FALSE;
   2765     }
   2766     while (ePos != -1) {
   2767         strtemp = strtemp.Right(strtemp.GetLength() - ePos - 1);
   2768         ePos = strtemp.Find('.');
   2769     }
   2770     ePos = strtemp.GetLength();
   2771     for (i = 0; i < ePos; i++) {
   2772         FX_WCHAR wch = str.GetAt(i);
   2773         if ((wch >= L'a' && wch <= L'z') || (wch >= L'0' && wch <= L'9')) {
   2774             continue;
   2775         } else {
   2776             str = str.Left(str.GetLength() - ePos + i + 1);
   2777             ePos = ePos - i - 1;
   2778             break;
   2779         }
   2780     }
   2781     int nLen = str.GetLength();
   2782     for (i = aPos + 1; i < nLen - ePos; i++) {
   2783         FX_WCHAR wch = str.GetAt(i);
   2784         if (wch == L'-' || wch == L'.' || (wch >= L'a' && wch <= L'z') || (wch >= L'0' && wch <= L'9')) {
   2785             continue;
   2786         } else {
   2787             return FALSE;
   2788         }
   2789     }
   2790     if (str.Find(L"mailto:") == -1) {
   2791         str = L"mailto:" + str;
   2792     }
   2793     return TRUE;
   2794 }
   2795 FX_BOOL CPDF_LinkExtract::AppendToLinkList(int start, int count, const CFX_WideString& strUrl)
   2796 {
   2797     CPDF_LinkExt* linkInfo = new CPDF_LinkExt;
   2798     linkInfo->m_strUrl = strUrl;
   2799     linkInfo->m_Start = start;
   2800     linkInfo->m_Count = count;
   2801     m_LinkList.Add(linkInfo);
   2802     return TRUE;
   2803 }
   2804 CFX_WideString CPDF_LinkExtract::GetURL(int index) const
   2805 {
   2806     if (!m_IsParserd || index < 0 || index >= m_LinkList.GetSize()) {
   2807         return L"";
   2808     }
   2809     CPDF_LinkExt* link = NULL;
   2810     link = m_LinkList.GetAt(index);
   2811     if (!link) {
   2812         return L"";
   2813     }
   2814     return link->m_strUrl;
   2815 }
   2816 void CPDF_LinkExtract::GetBoundedSegment(int index, int& start, int& count) const
   2817 {
   2818     if (!m_IsParserd || index < 0 || index >= m_LinkList.GetSize()) {
   2819         return ;
   2820     }
   2821     CPDF_LinkExt* link = NULL;
   2822     link = m_LinkList.GetAt(index);
   2823     if (!link) {
   2824         return ;
   2825     }
   2826     start = link->m_Start;
   2827     count = link->m_Count;
   2828 }
   2829 void CPDF_LinkExtract::GetRects(int index, CFX_RectArray& rects) const
   2830 {
   2831     if (!m_IsParserd || index < 0 || index >= m_LinkList.GetSize()) {
   2832         return;
   2833     }
   2834     CPDF_LinkExt* link = NULL;
   2835     link = m_LinkList.GetAt(index);
   2836     if (!link) {
   2837         return ;
   2838     }
   2839     m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects);
   2840 }
   2841