1 // Copyright 2014 PDFium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7 #include "xfa/src/foxitlib.h" 8 #include "fx_wordbreak_impl.h" 9 #define FX_IsOdd(a) ((a)&1) 10 FX_WordBreakProp FX_GetWordBreakProperty(FX_WCHAR wcCodePoint) { 11 FX_DWORD dwProperty = 12 (FX_DWORD)gs_FX_WordBreak_CodePointProperties[wcCodePoint >> 1]; 13 return (FX_WordBreakProp)(FX_IsOdd(wcCodePoint) ? (dwProperty & 0x0F) 14 : (dwProperty >> 4)); 15 } 16 CFX_CharIter::CFX_CharIter(const CFX_WideString& wsText) 17 : m_wsText(wsText), m_nIndex(0) { 18 FXSYS_assert(!wsText.IsEmpty()); 19 } 20 CFX_CharIter::~CFX_CharIter() {} 21 void CFX_CharIter::Release() { 22 delete this; 23 } 24 FX_BOOL CFX_CharIter::Next(FX_BOOL bPrev) { 25 if (bPrev) { 26 if (m_nIndex <= 0) { 27 return FALSE; 28 } 29 m_nIndex--; 30 } else { 31 if (m_nIndex + 1 >= m_wsText.GetLength()) { 32 return FALSE; 33 } 34 m_nIndex++; 35 } 36 return TRUE; 37 } 38 FX_WCHAR CFX_CharIter::GetChar() { 39 return m_wsText.GetAt(m_nIndex); 40 } 41 void CFX_CharIter::SetAt(int32_t nIndex) { 42 if (nIndex < 0 || nIndex >= m_wsText.GetLength()) { 43 return; 44 } 45 m_nIndex = nIndex; 46 } 47 int32_t CFX_CharIter::GetAt() const { 48 return m_nIndex; 49 } 50 FX_BOOL CFX_CharIter::IsEOF(FX_BOOL bTail) const { 51 return bTail ? (m_nIndex + 1 == m_wsText.GetLength()) : (m_nIndex == 0); 52 } 53 IFX_CharIter* CFX_CharIter::Clone() { 54 CFX_CharIter* pIter = new CFX_CharIter(m_wsText); 55 pIter->m_nIndex = m_nIndex; 56 return pIter; 57 } 58 CFX_WordBreak::CFX_WordBreak() : m_pPreIter(NULL), m_pCurIter(NULL) {} 59 CFX_WordBreak::~CFX_WordBreak() { 60 if (m_pPreIter) { 61 m_pPreIter->Release(); 62 m_pPreIter = NULL; 63 } 64 if (m_pCurIter) { 65 m_pCurIter->Release(); 66 m_pCurIter = NULL; 67 } 68 } 69 void CFX_WordBreak::Release() { 70 delete this; 71 } 72 void CFX_WordBreak::Attach(IFX_CharIter* pIter) { 73 FXSYS_assert(pIter); 74 m_pCurIter = pIter; 75 } 76 void CFX_WordBreak::Attach(const CFX_WideString& wsText) { 77 m_pCurIter = new CFX_CharIter(wsText); 78 } 79 FX_BOOL CFX_WordBreak::Next(FX_BOOL bPrev) { 80 IFX_CharIter* pIter = bPrev ? m_pPreIter->Clone() : m_pCurIter->Clone(); 81 if (pIter->IsEOF(!bPrev)) { 82 return FALSE; 83 } 84 pIter->Next(bPrev); 85 if (!FindNextBreakPos(pIter, bPrev, TRUE)) { 86 pIter->Release(); 87 return FALSE; 88 } 89 if (bPrev) { 90 m_pCurIter->Release(); 91 m_pCurIter = m_pPreIter; 92 m_pCurIter->Next(TRUE); 93 m_pPreIter = pIter; 94 } else { 95 m_pPreIter->Release(); 96 m_pPreIter = m_pCurIter; 97 m_pPreIter->Next(); 98 m_pCurIter = pIter; 99 } 100 return TRUE; 101 } 102 void CFX_WordBreak::SetAt(int32_t nIndex) { 103 if (m_pPreIter) { 104 m_pPreIter->Release(); 105 m_pPreIter = NULL; 106 } 107 m_pCurIter->SetAt(nIndex); 108 FindNextBreakPos(m_pCurIter, TRUE, FALSE); 109 m_pPreIter = m_pCurIter; 110 m_pCurIter = m_pPreIter->Clone(); 111 FindNextBreakPos(m_pCurIter, FALSE, FALSE); 112 } 113 int32_t CFX_WordBreak::GetWordPos() const { 114 return m_pPreIter->GetAt(); 115 } 116 int32_t CFX_WordBreak::GetWordLength() const { 117 return m_pCurIter->GetAt() - m_pPreIter->GetAt() + 1; 118 } 119 void CFX_WordBreak::GetWord(CFX_WideString& wsWord) const { 120 int32_t nWordLength = GetWordLength(); 121 if (nWordLength <= 0) { 122 return; 123 } 124 FX_WCHAR* lpBuf = wsWord.GetBuffer(nWordLength); 125 IFX_CharIter* pTempIter = m_pPreIter->Clone(); 126 int32_t i = 0; 127 while (pTempIter->GetAt() <= m_pCurIter->GetAt()) { 128 lpBuf[i++] = pTempIter->GetChar(); 129 FX_BOOL bEnd = pTempIter->Next(); 130 if (!bEnd) { 131 break; 132 } 133 } 134 pTempIter->Release(); 135 wsWord.ReleaseBuffer(nWordLength); 136 } 137 FX_BOOL CFX_WordBreak::IsEOF(FX_BOOL bTail) const { 138 return m_pCurIter->IsEOF(bTail); 139 } 140 FX_BOOL CFX_WordBreak::FindNextBreakPos(IFX_CharIter* pIter, 141 FX_BOOL bPrev, 142 FX_BOOL bFromNext) { 143 FX_WordBreakProp ePreType = FX_WordBreakProp_None; 144 FX_WordBreakProp eCurType = FX_WordBreakProp_None; 145 FX_WordBreakProp eNextType = FX_WordBreakProp_None; 146 if (pIter->IsEOF(!bPrev)) { 147 return TRUE; 148 } 149 if (!(bFromNext || pIter->IsEOF(bPrev))) { 150 pIter->Next(!bPrev); 151 FX_WCHAR wcTemp = pIter->GetChar(); 152 ePreType = FX_GetWordBreakProperty(wcTemp); 153 pIter->Next(bPrev); 154 } 155 FX_WCHAR wcTemp = pIter->GetChar(); 156 eCurType = FX_GetWordBreakProperty(wcTemp); 157 FX_BOOL bFirst = TRUE; 158 do { 159 pIter->Next(bPrev); 160 FX_WCHAR wcTemp = pIter->GetChar(); 161 eNextType = FX_GetWordBreakProperty(wcTemp); 162 FX_WORD wBreak = 163 gs_FX_WordBreak_Table[eCurType] & ((FX_WORD)(1 << eNextType)); 164 if (wBreak) { 165 if (pIter->IsEOF(!bPrev)) { 166 pIter->Next(!bPrev); 167 return TRUE; 168 } 169 if (bFirst) { 170 int32_t nFlags = 0; 171 if (eCurType == FX_WordBreakProp_MidLetter) { 172 if (eNextType == FX_WordBreakProp_ALetter) { 173 nFlags = 1; 174 } 175 } else if (eCurType == FX_WordBreakProp_MidNum) { 176 if (eNextType == FX_WordBreakProp_Numberic) { 177 nFlags = 2; 178 } 179 } else if (eCurType == FX_WordBreakProp_MidNumLet) { 180 if (eNextType == FX_WordBreakProp_ALetter) { 181 nFlags = 1; 182 } else if (eNextType == FX_WordBreakProp_Numberic) { 183 nFlags = 2; 184 } 185 } 186 if (nFlags > 0) { 187 FXSYS_assert(nFlags <= 2); 188 if (!((nFlags == 1 && ePreType == FX_WordBreakProp_ALetter) || 189 (nFlags == 2 && ePreType == FX_WordBreakProp_Numberic))) { 190 pIter->Next(!bPrev); 191 return TRUE; 192 } 193 pIter->Next(bPrev); 194 wBreak = FALSE; 195 } 196 bFirst = FALSE; 197 } 198 if (wBreak) { 199 int32_t nFlags = 0; 200 if (eNextType == FX_WordBreakProp_MidLetter) { 201 if (eCurType == FX_WordBreakProp_ALetter) { 202 nFlags = 1; 203 } 204 } else if (eNextType == FX_WordBreakProp_MidNum) { 205 if (eCurType == FX_WordBreakProp_Numberic) { 206 nFlags = 2; 207 } 208 } else if (eNextType == FX_WordBreakProp_MidNumLet) { 209 if (eCurType == FX_WordBreakProp_ALetter) { 210 nFlags = 1; 211 } else if (eCurType == FX_WordBreakProp_Numberic) { 212 nFlags = 2; 213 } 214 } 215 if (nFlags <= 0) { 216 pIter->Next(!bPrev); 217 return TRUE; 218 } 219 FXSYS_assert(nFlags <= 2); 220 pIter->Next(bPrev); 221 wcTemp = pIter->GetChar(); 222 eNextType = (FX_WordBreakProp)FX_GetWordBreakProperty(wcTemp); 223 if (!((nFlags == 1 && eNextType == FX_WordBreakProp_ALetter) || 224 (nFlags == 2 && eNextType == FX_WordBreakProp_Numberic))) { 225 pIter->Next(!bPrev); 226 pIter->Next(!bPrev); 227 return TRUE; 228 } 229 } 230 } 231 ePreType = eCurType; 232 eCurType = eNextType; 233 bFirst = FALSE; 234 } while (!pIter->IsEOF(!bPrev)); 235 return TRUE; 236 } 237 IFX_WordBreak* FX_WordBreak_Create() { 238 return new CFX_WordBreak; 239 } 240