Home | History | Annotate | Download | only in fx_wordbreak
      1 // Copyright 2014 PDFium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
      6 
      7 #include "xfa/src/foxitlib.h"
      8 #include "fx_wordbreak_impl.h"
      9 #define FX_IsOdd(a) ((a)&1)
     10 FX_WordBreakProp FX_GetWordBreakProperty(FX_WCHAR wcCodePoint) {
     11   FX_DWORD dwProperty =
     12       (FX_DWORD)gs_FX_WordBreak_CodePointProperties[wcCodePoint >> 1];
     13   return (FX_WordBreakProp)(FX_IsOdd(wcCodePoint) ? (dwProperty & 0x0F)
     14                                                   : (dwProperty >> 4));
     15 }
     16 CFX_CharIter::CFX_CharIter(const CFX_WideString& wsText)
     17     : m_wsText(wsText), m_nIndex(0) {
     18   FXSYS_assert(!wsText.IsEmpty());
     19 }
     20 CFX_CharIter::~CFX_CharIter() {}
     21 void CFX_CharIter::Release() {
     22   delete this;
     23 }
     24 FX_BOOL CFX_CharIter::Next(FX_BOOL bPrev) {
     25   if (bPrev) {
     26     if (m_nIndex <= 0) {
     27       return FALSE;
     28     }
     29     m_nIndex--;
     30   } else {
     31     if (m_nIndex + 1 >= m_wsText.GetLength()) {
     32       return FALSE;
     33     }
     34     m_nIndex++;
     35   }
     36   return TRUE;
     37 }
     38 FX_WCHAR CFX_CharIter::GetChar() {
     39   return m_wsText.GetAt(m_nIndex);
     40 }
     41 void CFX_CharIter::SetAt(int32_t nIndex) {
     42   if (nIndex < 0 || nIndex >= m_wsText.GetLength()) {
     43     return;
     44   }
     45   m_nIndex = nIndex;
     46 }
     47 int32_t CFX_CharIter::GetAt() const {
     48   return m_nIndex;
     49 }
     50 FX_BOOL CFX_CharIter::IsEOF(FX_BOOL bTail) const {
     51   return bTail ? (m_nIndex + 1 == m_wsText.GetLength()) : (m_nIndex == 0);
     52 }
     53 IFX_CharIter* CFX_CharIter::Clone() {
     54   CFX_CharIter* pIter = new CFX_CharIter(m_wsText);
     55   pIter->m_nIndex = m_nIndex;
     56   return pIter;
     57 }
     58 CFX_WordBreak::CFX_WordBreak() : m_pPreIter(NULL), m_pCurIter(NULL) {}
     59 CFX_WordBreak::~CFX_WordBreak() {
     60   if (m_pPreIter) {
     61     m_pPreIter->Release();
     62     m_pPreIter = NULL;
     63   }
     64   if (m_pCurIter) {
     65     m_pCurIter->Release();
     66     m_pCurIter = NULL;
     67   }
     68 }
     69 void CFX_WordBreak::Release() {
     70   delete this;
     71 }
     72 void CFX_WordBreak::Attach(IFX_CharIter* pIter) {
     73   FXSYS_assert(pIter);
     74   m_pCurIter = pIter;
     75 }
     76 void CFX_WordBreak::Attach(const CFX_WideString& wsText) {
     77   m_pCurIter = new CFX_CharIter(wsText);
     78 }
     79 FX_BOOL CFX_WordBreak::Next(FX_BOOL bPrev) {
     80   IFX_CharIter* pIter = bPrev ? m_pPreIter->Clone() : m_pCurIter->Clone();
     81   if (pIter->IsEOF(!bPrev)) {
     82     return FALSE;
     83   }
     84   pIter->Next(bPrev);
     85   if (!FindNextBreakPos(pIter, bPrev, TRUE)) {
     86     pIter->Release();
     87     return FALSE;
     88   }
     89   if (bPrev) {
     90     m_pCurIter->Release();
     91     m_pCurIter = m_pPreIter;
     92     m_pCurIter->Next(TRUE);
     93     m_pPreIter = pIter;
     94   } else {
     95     m_pPreIter->Release();
     96     m_pPreIter = m_pCurIter;
     97     m_pPreIter->Next();
     98     m_pCurIter = pIter;
     99   }
    100   return TRUE;
    101 }
    102 void CFX_WordBreak::SetAt(int32_t nIndex) {
    103   if (m_pPreIter) {
    104     m_pPreIter->Release();
    105     m_pPreIter = NULL;
    106   }
    107   m_pCurIter->SetAt(nIndex);
    108   FindNextBreakPos(m_pCurIter, TRUE, FALSE);
    109   m_pPreIter = m_pCurIter;
    110   m_pCurIter = m_pPreIter->Clone();
    111   FindNextBreakPos(m_pCurIter, FALSE, FALSE);
    112 }
    113 int32_t CFX_WordBreak::GetWordPos() const {
    114   return m_pPreIter->GetAt();
    115 }
    116 int32_t CFX_WordBreak::GetWordLength() const {
    117   return m_pCurIter->GetAt() - m_pPreIter->GetAt() + 1;
    118 }
    119 void CFX_WordBreak::GetWord(CFX_WideString& wsWord) const {
    120   int32_t nWordLength = GetWordLength();
    121   if (nWordLength <= 0) {
    122     return;
    123   }
    124   FX_WCHAR* lpBuf = wsWord.GetBuffer(nWordLength);
    125   IFX_CharIter* pTempIter = m_pPreIter->Clone();
    126   int32_t i = 0;
    127   while (pTempIter->GetAt() <= m_pCurIter->GetAt()) {
    128     lpBuf[i++] = pTempIter->GetChar();
    129     FX_BOOL bEnd = pTempIter->Next();
    130     if (!bEnd) {
    131       break;
    132     }
    133   }
    134   pTempIter->Release();
    135   wsWord.ReleaseBuffer(nWordLength);
    136 }
    137 FX_BOOL CFX_WordBreak::IsEOF(FX_BOOL bTail) const {
    138   return m_pCurIter->IsEOF(bTail);
    139 }
    140 FX_BOOL CFX_WordBreak::FindNextBreakPos(IFX_CharIter* pIter,
    141                                         FX_BOOL bPrev,
    142                                         FX_BOOL bFromNext) {
    143   FX_WordBreakProp ePreType = FX_WordBreakProp_None;
    144   FX_WordBreakProp eCurType = FX_WordBreakProp_None;
    145   FX_WordBreakProp eNextType = FX_WordBreakProp_None;
    146   if (pIter->IsEOF(!bPrev)) {
    147     return TRUE;
    148   }
    149   if (!(bFromNext || pIter->IsEOF(bPrev))) {
    150     pIter->Next(!bPrev);
    151     FX_WCHAR wcTemp = pIter->GetChar();
    152     ePreType = FX_GetWordBreakProperty(wcTemp);
    153     pIter->Next(bPrev);
    154   }
    155   FX_WCHAR wcTemp = pIter->GetChar();
    156   eCurType = FX_GetWordBreakProperty(wcTemp);
    157   FX_BOOL bFirst = TRUE;
    158   do {
    159     pIter->Next(bPrev);
    160     FX_WCHAR wcTemp = pIter->GetChar();
    161     eNextType = FX_GetWordBreakProperty(wcTemp);
    162     FX_WORD wBreak =
    163         gs_FX_WordBreak_Table[eCurType] & ((FX_WORD)(1 << eNextType));
    164     if (wBreak) {
    165       if (pIter->IsEOF(!bPrev)) {
    166         pIter->Next(!bPrev);
    167         return TRUE;
    168       }
    169       if (bFirst) {
    170         int32_t nFlags = 0;
    171         if (eCurType == FX_WordBreakProp_MidLetter) {
    172           if (eNextType == FX_WordBreakProp_ALetter) {
    173             nFlags = 1;
    174           }
    175         } else if (eCurType == FX_WordBreakProp_MidNum) {
    176           if (eNextType == FX_WordBreakProp_Numberic) {
    177             nFlags = 2;
    178           }
    179         } else if (eCurType == FX_WordBreakProp_MidNumLet) {
    180           if (eNextType == FX_WordBreakProp_ALetter) {
    181             nFlags = 1;
    182           } else if (eNextType == FX_WordBreakProp_Numberic) {
    183             nFlags = 2;
    184           }
    185         }
    186         if (nFlags > 0) {
    187           FXSYS_assert(nFlags <= 2);
    188           if (!((nFlags == 1 && ePreType == FX_WordBreakProp_ALetter) ||
    189                 (nFlags == 2 && ePreType == FX_WordBreakProp_Numberic))) {
    190             pIter->Next(!bPrev);
    191             return TRUE;
    192           }
    193           pIter->Next(bPrev);
    194           wBreak = FALSE;
    195         }
    196         bFirst = FALSE;
    197       }
    198       if (wBreak) {
    199         int32_t nFlags = 0;
    200         if (eNextType == FX_WordBreakProp_MidLetter) {
    201           if (eCurType == FX_WordBreakProp_ALetter) {
    202             nFlags = 1;
    203           }
    204         } else if (eNextType == FX_WordBreakProp_MidNum) {
    205           if (eCurType == FX_WordBreakProp_Numberic) {
    206             nFlags = 2;
    207           }
    208         } else if (eNextType == FX_WordBreakProp_MidNumLet) {
    209           if (eCurType == FX_WordBreakProp_ALetter) {
    210             nFlags = 1;
    211           } else if (eCurType == FX_WordBreakProp_Numberic) {
    212             nFlags = 2;
    213           }
    214         }
    215         if (nFlags <= 0) {
    216           pIter->Next(!bPrev);
    217           return TRUE;
    218         }
    219         FXSYS_assert(nFlags <= 2);
    220         pIter->Next(bPrev);
    221         wcTemp = pIter->GetChar();
    222         eNextType = (FX_WordBreakProp)FX_GetWordBreakProperty(wcTemp);
    223         if (!((nFlags == 1 && eNextType == FX_WordBreakProp_ALetter) ||
    224               (nFlags == 2 && eNextType == FX_WordBreakProp_Numberic))) {
    225           pIter->Next(!bPrev);
    226           pIter->Next(!bPrev);
    227           return TRUE;
    228         }
    229       }
    230     }
    231     ePreType = eCurType;
    232     eCurType = eNextType;
    233     bFirst = FALSE;
    234   } while (!pIter->IsEOF(!bPrev));
    235   return TRUE;
    236 }
    237 IFX_WordBreak* FX_WordBreak_Create() {
    238   return new CFX_WordBreak;
    239 }
    240