1 // Copyright 2014 PDFium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7 #include "core/fxcrt/fx_bidi.h" 8 9 #include <algorithm> 10 11 #include "core/fxcrt/fx_unicode.h" 12 #include "third_party/base/ptr_util.h" 13 14 #ifdef PDF_ENABLE_XFA 15 #include "core/fxcrt/fx_extension.h" 16 #endif // PDF_ENABLE_XFA 17 18 namespace { 19 20 enum FX_BIDICLASS { 21 FX_BIDICLASS_ON = 0, // Other Neutral 22 FX_BIDICLASS_L = 1, // Left Letter 23 FX_BIDICLASS_R = 2, // Right Letter 24 FX_BIDICLASS_AN = 3, // Arabic Number 25 FX_BIDICLASS_EN = 4, // European Number 26 FX_BIDICLASS_AL = 5, // Arabic Letter 27 FX_BIDICLASS_NSM = 6, // Non-spacing Mark 28 FX_BIDICLASS_CS = 7, // Common Number Separator 29 FX_BIDICLASS_ES = 8, // European Separator 30 FX_BIDICLASS_ET = 9, // European Number Terminator 31 FX_BIDICLASS_BN = 10, // Boundary Neutral 32 FX_BIDICLASS_S = 11, // Segment Separator 33 FX_BIDICLASS_WS = 12, // Whitespace 34 FX_BIDICLASS_B = 13, // Paragraph Separator 35 FX_BIDICLASS_RLO = 14, // Right-to-Left Override 36 FX_BIDICLASS_RLE = 15, // Right-to-Left Embedding 37 FX_BIDICLASS_LRO = 16, // Left-to-Right Override 38 FX_BIDICLASS_LRE = 17, // Left-to-Right Embedding 39 FX_BIDICLASS_PDF = 18, // Pop Directional Format 40 FX_BIDICLASS_N = FX_BIDICLASS_ON, 41 }; 42 constexpr uint32_t FX_BIDICLASSBITS = 6; 43 constexpr uint32_t FX_BIDICLASSBITSMASK = 0x1F << FX_BIDICLASSBITS; 44 45 #ifdef PDF_ENABLE_XFA 46 47 #ifndef NDEBUG 48 constexpr int32_t kBidiMaxLevel = 61; 49 #endif // NDEBUG 50 51 enum FX_BIDIWEAKSTATE { 52 FX_BWSxa = 0, 53 FX_BWSxr, 54 FX_BWSxl, 55 FX_BWSao, 56 FX_BWSro, 57 FX_BWSlo, 58 FX_BWSrt, 59 FX_BWSlt, 60 FX_BWScn, 61 FX_BWSra, 62 FX_BWSre, 63 FX_BWSla, 64 FX_BWSle, 65 FX_BWSac, 66 FX_BWSrc, 67 FX_BWSrs, 68 FX_BWSlc, 69 FX_BWSls, 70 FX_BWSret, 71 FX_BWSlet 72 }; 73 74 enum FX_BIDIWEAKACTION { 75 FX_BWAIX = 0x100, 76 FX_BWAXX = 0x0F, 77 FX_BWAxxx = (0x0F << 4) + 0x0F, 78 FX_BWAxIx = 0x100 + FX_BWAxxx, 79 FX_BWAxxN = (0x0F << 4) + FX_BIDICLASS_ON, 80 FX_BWAxxE = (0x0F << 4) + FX_BIDICLASS_EN, 81 FX_BWAxxA = (0x0F << 4) + FX_BIDICLASS_AN, 82 FX_BWAxxR = (0x0F << 4) + FX_BIDICLASS_R, 83 FX_BWAxxL = (0x0F << 4) + FX_BIDICLASS_L, 84 FX_BWANxx = (FX_BIDICLASS_ON << 4) + 0x0F, 85 FX_BWAAxx = (FX_BIDICLASS_AN << 4) + 0x0F, 86 FX_BWAExE = (FX_BIDICLASS_EN << 4) + FX_BIDICLASS_EN, 87 FX_BWANIx = (FX_BIDICLASS_ON << 4) + 0x0F + 0x100, 88 FX_BWANxN = (FX_BIDICLASS_ON << 4) + FX_BIDICLASS_ON, 89 FX_BWANxR = (FX_BIDICLASS_ON << 4) + FX_BIDICLASS_R, 90 FX_BWANxE = (FX_BIDICLASS_ON << 4) + FX_BIDICLASS_EN, 91 FX_BWAAxA = (FX_BIDICLASS_AN << 4) + FX_BIDICLASS_AN, 92 FX_BWANxL = (FX_BIDICLASS_ON << 4) + FX_BIDICLASS_L, 93 FX_BWALxL = (FX_BIDICLASS_L << 4) + FX_BIDICLASS_L, 94 FX_BWAxIL = (0x0F << 4) + FX_BIDICLASS_L + 0x100, 95 FX_BWAAxR = (FX_BIDICLASS_AN << 4) + FX_BIDICLASS_R, 96 FX_BWALxx = (FX_BIDICLASS_L << 4) + 0x0F, 97 }; 98 99 enum FX_BIDINEUTRALSTATE { 100 FX_BNSr = 0, 101 FX_BNSl, 102 FX_BNSrn, 103 FX_BNSln, 104 FX_BNSa, 105 FX_BNSna 106 }; 107 108 enum FX_BIDINEUTRALACTION { 109 FX_BNAnL = FX_BIDICLASS_L, 110 FX_BNAEn = (FX_BIDICLASS_AN << 4), 111 FX_BNARn = (FX_BIDICLASS_R << 4), 112 FX_BNALn = (FX_BIDICLASS_L << 4), 113 FX_BNAIn = FX_BWAIX, 114 FX_BNALnL = (FX_BIDICLASS_L << 4) + FX_BIDICLASS_L, 115 }; 116 117 const int32_t gc_FX_BidiNTypes[] = { 118 FX_BIDICLASS_N, FX_BIDICLASS_L, FX_BIDICLASS_R, FX_BIDICLASS_AN, 119 FX_BIDICLASS_EN, FX_BIDICLASS_AL, FX_BIDICLASS_NSM, FX_BIDICLASS_CS, 120 FX_BIDICLASS_ES, FX_BIDICLASS_ET, FX_BIDICLASS_BN, FX_BIDICLASS_BN, 121 FX_BIDICLASS_N, FX_BIDICLASS_B, FX_BIDICLASS_RLO, FX_BIDICLASS_RLE, 122 FX_BIDICLASS_LRO, FX_BIDICLASS_LRE, FX_BIDICLASS_PDF, FX_BIDICLASS_ON, 123 }; 124 125 const int32_t gc_FX_BidiWeakStates[][10] = { 126 {FX_BWSao, FX_BWSxl, FX_BWSxr, FX_BWScn, FX_BWScn, FX_BWSxa, FX_BWSxa, 127 FX_BWSao, FX_BWSao, FX_BWSao}, 128 {FX_BWSro, FX_BWSxl, FX_BWSxr, FX_BWSra, FX_BWSre, FX_BWSxa, FX_BWSxr, 129 FX_BWSro, FX_BWSro, FX_BWSrt}, 130 {FX_BWSlo, FX_BWSxl, FX_BWSxr, FX_BWSla, FX_BWSle, FX_BWSxa, FX_BWSxl, 131 FX_BWSlo, FX_BWSlo, FX_BWSlt}, 132 {FX_BWSao, FX_BWSxl, FX_BWSxr, FX_BWScn, FX_BWScn, FX_BWSxa, FX_BWSao, 133 FX_BWSao, FX_BWSao, FX_BWSao}, 134 {FX_BWSro, FX_BWSxl, FX_BWSxr, FX_BWSra, FX_BWSre, FX_BWSxa, FX_BWSro, 135 FX_BWSro, FX_BWSro, FX_BWSrt}, 136 {FX_BWSlo, FX_BWSxl, FX_BWSxr, FX_BWSla, FX_BWSle, FX_BWSxa, FX_BWSlo, 137 FX_BWSlo, FX_BWSlo, FX_BWSlt}, 138 {FX_BWSro, FX_BWSxl, FX_BWSxr, FX_BWSra, FX_BWSre, FX_BWSxa, FX_BWSrt, 139 FX_BWSro, FX_BWSro, FX_BWSrt}, 140 {FX_BWSlo, FX_BWSxl, FX_BWSxr, FX_BWSla, FX_BWSle, FX_BWSxa, FX_BWSlt, 141 FX_BWSlo, FX_BWSlo, FX_BWSlt}, 142 {FX_BWSao, FX_BWSxl, FX_BWSxr, FX_BWScn, FX_BWScn, FX_BWSxa, FX_BWScn, 143 FX_BWSac, FX_BWSao, FX_BWSao}, 144 {FX_BWSro, FX_BWSxl, FX_BWSxr, FX_BWSra, FX_BWSre, FX_BWSxa, FX_BWSra, 145 FX_BWSrc, FX_BWSro, FX_BWSrt}, 146 {FX_BWSro, FX_BWSxl, FX_BWSxr, FX_BWSra, FX_BWSre, FX_BWSxa, FX_BWSre, 147 FX_BWSrs, FX_BWSrs, FX_BWSret}, 148 {FX_BWSlo, FX_BWSxl, FX_BWSxr, FX_BWSla, FX_BWSle, FX_BWSxa, FX_BWSla, 149 FX_BWSlc, FX_BWSlo, FX_BWSlt}, 150 {FX_BWSlo, FX_BWSxl, FX_BWSxr, FX_BWSla, FX_BWSle, FX_BWSxa, FX_BWSle, 151 FX_BWSls, FX_BWSls, FX_BWSlet}, 152 {FX_BWSao, FX_BWSxl, FX_BWSxr, FX_BWScn, FX_BWScn, FX_BWSxa, FX_BWSao, 153 FX_BWSao, FX_BWSao, FX_BWSao}, 154 {FX_BWSro, FX_BWSxl, FX_BWSxr, FX_BWSra, FX_BWSre, FX_BWSxa, FX_BWSro, 155 FX_BWSro, FX_BWSro, FX_BWSrt}, 156 {FX_BWSro, FX_BWSxl, FX_BWSxr, FX_BWSra, FX_BWSre, FX_BWSxa, FX_BWSro, 157 FX_BWSro, FX_BWSro, FX_BWSrt}, 158 {FX_BWSlo, FX_BWSxl, FX_BWSxr, FX_BWSla, FX_BWSle, FX_BWSxa, FX_BWSlo, 159 FX_BWSlo, FX_BWSlo, FX_BWSlt}, 160 {FX_BWSlo, FX_BWSxl, FX_BWSxr, FX_BWSla, FX_BWSle, FX_BWSxa, FX_BWSlo, 161 FX_BWSlo, FX_BWSlo, FX_BWSlt}, 162 {FX_BWSro, FX_BWSxl, FX_BWSxr, FX_BWSra, FX_BWSre, FX_BWSxa, FX_BWSret, 163 FX_BWSro, FX_BWSro, FX_BWSret}, 164 {FX_BWSlo, FX_BWSxl, FX_BWSxr, FX_BWSla, FX_BWSle, FX_BWSxa, FX_BWSlet, 165 FX_BWSlo, FX_BWSlo, FX_BWSlet}, 166 }; 167 168 const int32_t gc_FX_BidiWeakActions[][10] = { 169 {FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxA, FX_BWAxxR, 170 FX_BWAxxR, FX_BWAxxN, FX_BWAxxN, FX_BWAxxN}, 171 {FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxE, FX_BWAxxR, 172 FX_BWAxxR, FX_BWAxxN, FX_BWAxxN, FX_BWAxIx}, 173 {FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxL, FX_BWAxxR, 174 FX_BWAxxL, FX_BWAxxN, FX_BWAxxN, FX_BWAxIx}, 175 {FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxA, FX_BWAxxR, 176 FX_BWAxxN, FX_BWAxxN, FX_BWAxxN, FX_BWAxxN}, 177 {FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxE, FX_BWAxxR, 178 FX_BWAxxN, FX_BWAxxN, FX_BWAxxN, FX_BWAxIx}, 179 {FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxL, FX_BWAxxR, 180 FX_BWAxxN, FX_BWAxxN, FX_BWAxxN, FX_BWAxIx}, 181 {FX_BWANxx, FX_BWANxx, FX_BWANxx, FX_BWANxx, FX_BWAExE, FX_BWANxR, 182 FX_BWAxIx, FX_BWANxN, FX_BWANxN, FX_BWAxIx}, 183 {FX_BWANxx, FX_BWANxx, FX_BWANxx, FX_BWANxx, FX_BWALxL, FX_BWANxR, 184 FX_BWAxIx, FX_BWANxN, FX_BWANxN, FX_BWAxIx}, 185 {FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxA, FX_BWAxxR, 186 FX_BWAxxA, FX_BWAxIx, FX_BWAxxN, FX_BWAxxN}, 187 {FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxE, FX_BWAxxR, 188 FX_BWAxxA, FX_BWAxIx, FX_BWAxxN, FX_BWAxIx}, 189 {FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxE, FX_BWAxxR, 190 FX_BWAxxE, FX_BWAxIx, FX_BWAxIx, FX_BWAxxE}, 191 {FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxL, FX_BWAxxR, 192 FX_BWAxxA, FX_BWAxIx, FX_BWAxxN, FX_BWAxIx}, 193 {FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxL, FX_BWAxxR, 194 FX_BWAxxL, FX_BWAxIx, FX_BWAxIx, FX_BWAxxL}, 195 {FX_BWANxx, FX_BWANxx, FX_BWANxx, FX_BWAAxx, FX_BWAAxA, FX_BWANxR, 196 FX_BWANxN, FX_BWANxN, FX_BWANxN, FX_BWANxN}, 197 {FX_BWANxx, FX_BWANxx, FX_BWANxx, FX_BWAAxx, FX_BWANxE, FX_BWANxR, 198 FX_BWANxN, FX_BWANxN, FX_BWANxN, FX_BWANIx}, 199 {FX_BWANxx, FX_BWANxx, FX_BWANxx, FX_BWANxx, FX_BWAExE, FX_BWANxR, 200 FX_BWANxN, FX_BWANxN, FX_BWANxN, FX_BWANIx}, 201 {FX_BWANxx, FX_BWANxx, FX_BWANxx, FX_BWAAxx, FX_BWANxL, FX_BWANxR, 202 FX_BWANxN, FX_BWANxN, FX_BWANxN, FX_BWANIx}, 203 {FX_BWANxx, FX_BWANxx, FX_BWANxx, FX_BWANxx, FX_BWALxL, FX_BWANxR, 204 FX_BWANxN, FX_BWANxN, FX_BWANxN, FX_BWANIx}, 205 {FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxE, FX_BWAxxR, 206 FX_BWAxxE, FX_BWAxxN, FX_BWAxxN, FX_BWAxxE}, 207 {FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxL, FX_BWAxxR, 208 FX_BWAxxL, FX_BWAxxN, FX_BWAxxN, FX_BWAxxL}, 209 }; 210 211 const int32_t gc_FX_BidiNeutralStates[][5] = { 212 {FX_BNSrn, FX_BNSl, FX_BNSr, FX_BNSr, FX_BNSr}, 213 {FX_BNSln, FX_BNSl, FX_BNSr, FX_BNSa, FX_BNSl}, 214 {FX_BNSrn, FX_BNSl, FX_BNSr, FX_BNSr, FX_BNSr}, 215 {FX_BNSln, FX_BNSl, FX_BNSr, FX_BNSa, FX_BNSl}, 216 {FX_BNSna, FX_BNSl, FX_BNSr, FX_BNSa, FX_BNSl}, 217 {FX_BNSna, FX_BNSl, FX_BNSr, FX_BNSa, FX_BNSl}, 218 }; 219 const int32_t gc_FX_BidiNeutralActions[][5] = { 220 {FX_BNAIn, 0, 0, 0, 0}, 221 {FX_BNAIn, 0, 0, 0, FX_BIDICLASS_L}, 222 {FX_BNAIn, FX_BNAEn, FX_BNARn, FX_BNARn, FX_BNARn}, 223 {FX_BNAIn, FX_BNALn, FX_BNAEn, FX_BNAEn, FX_BNALnL}, 224 {FX_BNAIn, 0, 0, 0, FX_BIDICLASS_L}, 225 {FX_BNAIn, FX_BNAEn, FX_BNARn, FX_BNARn, FX_BNAEn}, 226 }; 227 228 const int32_t gc_FX_BidiAddLevel[][4] = { 229 {0, 1, 2, 2}, 230 {1, 0, 1, 1}, 231 }; 232 233 class CFX_BidiLine { 234 public: 235 void BidiLine(std::vector<CFX_Char>* chars, size_t iCount) { 236 ASSERT(iCount <= chars->size()); 237 if (iCount < 2) 238 return; 239 240 Classify(chars, iCount, false); 241 ResolveExplicit(chars, iCount); 242 ResolveWeak(chars, iCount); 243 ResolveNeutrals(chars, iCount); 244 ResolveImplicit(chars, iCount); 245 Classify(chars, iCount, true); 246 ResolveWhitespace(chars, iCount); 247 Reorder(chars, iCount); 248 Position(chars, iCount); 249 } 250 251 private: 252 int32_t Direction(int32_t val) { 253 return FX_IsOdd(val) ? FX_BIDICLASS_R : FX_BIDICLASS_L; 254 } 255 256 int32_t GetDeferredType(int32_t val) { return (val >> 4) & 0x0F; } 257 258 int32_t GetResolvedType(int32_t val) { return val & 0x0F; } 259 260 int32_t GetDeferredNeutrals(int32_t iAction, int32_t iLevel) { 261 iAction = (iAction >> 4) & 0xF; 262 if (iAction == (FX_BNAEn >> 4)) 263 return Direction(iLevel); 264 return iAction; 265 } 266 267 int32_t GetResolvedNeutrals(int32_t iAction) { 268 iAction &= 0xF; 269 return iAction == FX_BNAIn ? 0 : iAction; 270 } 271 272 void ReverseString(std::vector<CFX_Char>* chars, 273 size_t iStart, 274 size_t iCount) { 275 ASSERT(pdfium::IndexInBounds(*chars, iStart)); 276 ASSERT(iStart + iCount <= chars->size()); 277 278 std::reverse(chars->begin() + iStart, chars->begin() + iStart + iCount); 279 } 280 281 void SetDeferredRun(std::vector<CFX_Char>* chars, 282 bool bClass, 283 size_t iStart, 284 size_t iCount, 285 int32_t iValue) { 286 ASSERT(iStart <= chars->size()); 287 ASSERT(iStart >= iCount); 288 289 size_t iLast = iStart - iCount; 290 for (size_t i = iStart - 1; i >= iLast; --i) { 291 if (bClass) 292 (*chars)[i].m_iBidiClass = static_cast<int16_t>(iValue); 293 else 294 (*chars)[i].m_iBidiLevel = static_cast<int16_t>(iValue); 295 296 if (i == 0) 297 break; 298 } 299 } 300 301 void Classify(std::vector<CFX_Char>* chars, size_t iCount, bool bWS) { 302 if (bWS) { 303 for (size_t i = 0; i < iCount; ++i) { 304 CFX_Char& cur = (*chars)[i]; 305 cur.m_iBidiClass = 306 static_cast<int16_t>(cur.char_props() & FX_BIDICLASSBITSMASK) >> 307 FX_BIDICLASSBITS; 308 } 309 return; 310 } 311 312 for (size_t i = 0; i < iCount; ++i) { 313 CFX_Char& cur = (*chars)[i]; 314 cur.m_iBidiClass = static_cast<int16_t>( 315 gc_FX_BidiNTypes[(cur.char_props() & FX_BIDICLASSBITSMASK) >> 316 FX_BIDICLASSBITS]); 317 } 318 } 319 320 void ResolveExplicit(std::vector<CFX_Char>* chars, size_t iCount) { 321 for (size_t i = 0; i < iCount; ++i) 322 (*chars)[i].m_iBidiLevel = 0; 323 } 324 325 void ResolveWeak(std::vector<CFX_Char>* chars, size_t iCount) { 326 if (iCount <= 1) 327 return; 328 --iCount; 329 330 int32_t iLevelCur = 0; 331 int32_t iState = FX_BWSxl; 332 size_t i = 0; 333 size_t iNum = 0; 334 int32_t iClsCur; 335 int32_t iClsRun; 336 int32_t iClsNew; 337 int32_t iAction; 338 for (; i <= iCount; ++i) { 339 CFX_Char* pTC = &(*chars)[i]; 340 iClsCur = pTC->m_iBidiClass; 341 if (iClsCur == FX_BIDICLASS_BN) { 342 pTC->m_iBidiLevel = (int16_t)iLevelCur; 343 if (i == iCount && iLevelCur != 0) { 344 iClsCur = Direction(iLevelCur); 345 pTC->m_iBidiClass = (int16_t)iClsCur; 346 } else if (i < iCount) { 347 CFX_Char* pTCNext = &(*chars)[i + 1]; 348 int32_t iLevelNext, iLevelNew; 349 iClsNew = pTCNext->m_iBidiClass; 350 iLevelNext = pTCNext->m_iBidiLevel; 351 if (iClsNew != FX_BIDICLASS_BN && iLevelCur != iLevelNext) { 352 iLevelNew = std::max(iLevelNext, iLevelCur); 353 pTC->m_iBidiLevel = static_cast<int16_t>(iLevelNew); 354 iClsCur = Direction(iLevelNew); 355 pTC->m_iBidiClass = static_cast<int16_t>(iClsCur); 356 iLevelCur = iLevelNext; 357 } else { 358 if (iNum > 0) 359 ++iNum; 360 continue; 361 } 362 } else { 363 if (iNum > 0) 364 ++iNum; 365 continue; 366 } 367 } 368 369 ASSERT(iClsCur <= FX_BIDICLASS_BN); 370 iAction = gc_FX_BidiWeakActions[iState][iClsCur]; 371 iClsRun = GetDeferredType(iAction); 372 if (iClsRun != FX_BWAXX && iNum > 0) { 373 SetDeferredRun(chars, true, i, iNum, iClsRun); 374 iNum = 0; 375 } 376 iClsNew = GetResolvedType(iAction); 377 if (iClsNew != FX_BWAXX) 378 pTC->m_iBidiClass = static_cast<int16_t>(iClsNew); 379 if (FX_BWAIX & iAction) 380 ++iNum; 381 382 iState = gc_FX_BidiWeakStates[iState][iClsCur]; 383 } 384 if (iNum == 0) 385 return; 386 387 iClsCur = Direction(0); 388 iClsRun = GetDeferredType(gc_FX_BidiWeakActions[iState][iClsCur]); 389 if (iClsRun != FX_BWAXX) 390 SetDeferredRun(chars, true, i, iNum, iClsRun); 391 } 392 393 void ResolveNeutrals(std::vector<CFX_Char>* chars, size_t iCount) { 394 if (iCount <= 1) 395 return; 396 --iCount; 397 398 CFX_Char* pTC; 399 int32_t iLevel = 0; 400 int32_t iState = FX_BNSl; 401 size_t i = 0; 402 size_t iNum = 0; 403 int32_t iClsCur; 404 int32_t iClsRun; 405 int32_t iClsNew; 406 int32_t iAction; 407 for (; i <= iCount; ++i) { 408 pTC = &(*chars)[i]; 409 iClsCur = pTC->m_iBidiClass; 410 if (iClsCur == FX_BIDICLASS_BN) { 411 if (iNum) 412 ++iNum; 413 continue; 414 } 415 416 ASSERT(iClsCur < FX_BIDICLASS_AL); 417 iAction = gc_FX_BidiNeutralActions[iState][iClsCur]; 418 iClsRun = GetDeferredNeutrals(iAction, iLevel); 419 if (iClsRun != FX_BIDICLASS_N && iNum > 0) { 420 SetDeferredRun(chars, true, i, iNum, iClsRun); 421 iNum = 0; 422 } 423 424 iClsNew = GetResolvedNeutrals(iAction); 425 if (iClsNew != FX_BIDICLASS_N) 426 pTC->m_iBidiClass = (int16_t)iClsNew; 427 if (FX_BNAIn & iAction) 428 ++iNum; 429 430 iState = gc_FX_BidiNeutralStates[iState][iClsCur]; 431 iLevel = pTC->m_iBidiLevel; 432 } 433 if (iNum == 0) 434 return; 435 436 iClsCur = Direction(iLevel); 437 iClsRun = 438 GetDeferredNeutrals(gc_FX_BidiNeutralActions[iState][iClsCur], iLevel); 439 if (iClsRun != FX_BIDICLASS_N) 440 SetDeferredRun(chars, true, i, iNum, iClsRun); 441 } 442 443 void ResolveImplicit(std::vector<CFX_Char>* chars, size_t iCount) { 444 for (size_t i = 0; i < iCount; ++i) { 445 int32_t iCls = (*chars)[i].m_iBidiClass; 446 if (iCls == FX_BIDICLASS_BN) 447 continue; 448 449 ASSERT(iCls > FX_BIDICLASS_ON && iCls < FX_BIDICLASS_AL); 450 int32_t iLevel = (*chars)[i].m_iBidiLevel; 451 iLevel += gc_FX_BidiAddLevel[FX_IsOdd(iLevel)][iCls - 1]; 452 (*chars)[i].m_iBidiLevel = (int16_t)iLevel; 453 } 454 } 455 456 void ResolveWhitespace(std::vector<CFX_Char>* chars, size_t iCount) { 457 if (iCount <= 1) 458 return; 459 iCount--; 460 461 int32_t iLevel = 0; 462 size_t i = 0; 463 size_t iNum = 0; 464 for (; i <= iCount; ++i) { 465 switch ((*chars)[i].m_iBidiClass) { 466 case FX_BIDICLASS_WS: 467 ++iNum; 468 break; 469 case FX_BIDICLASS_RLE: 470 case FX_BIDICLASS_LRE: 471 case FX_BIDICLASS_LRO: 472 case FX_BIDICLASS_RLO: 473 case FX_BIDICLASS_PDF: 474 case FX_BIDICLASS_BN: 475 (*chars)[i].m_iBidiLevel = static_cast<int16_t>(iLevel); 476 ++iNum; 477 break; 478 case FX_BIDICLASS_S: 479 case FX_BIDICLASS_B: 480 if (iNum > 0) 481 SetDeferredRun(chars, false, i, iNum, 0); 482 483 (*chars)[i].m_iBidiLevel = 0; 484 iNum = 0; 485 break; 486 default: 487 iNum = 0; 488 break; 489 } 490 iLevel = (*chars)[i].m_iBidiLevel; 491 } 492 if (iNum > 0) 493 SetDeferredRun(chars, false, i, iNum, 0); 494 } 495 496 size_t ReorderLevel(std::vector<CFX_Char>* chars, 497 size_t iCount, 498 int32_t iBaseLevel, 499 size_t iStart, 500 bool bReverse) { 501 ASSERT(iBaseLevel >= 0 && iBaseLevel <= kBidiMaxLevel); 502 ASSERT(iStart < iCount); 503 504 if (iCount < 1) 505 return 0; 506 507 bReverse = bReverse || FX_IsOdd(iBaseLevel); 508 size_t i = iStart; 509 for (; i < iCount; ++i) { 510 int32_t iLevel = (*chars)[i].m_iBidiLevel; 511 if (iLevel == iBaseLevel) 512 continue; 513 if (iLevel < iBaseLevel) 514 break; 515 516 i += ReorderLevel(chars, iCount, iBaseLevel + 1, i, bReverse) - 1; 517 } 518 519 size_t iNum = i - iStart; 520 if (bReverse && iNum > 1) 521 ReverseString(chars, iStart, iNum); 522 523 return iNum; 524 } 525 526 void Reorder(std::vector<CFX_Char>* chars, size_t iCount) { 527 for (size_t i = 0; i < iCount;) 528 i += ReorderLevel(chars, iCount, 0, i, false); 529 } 530 531 void Position(std::vector<CFX_Char>* chars, size_t iCount) { 532 for (size_t i = 0; i < iCount; ++i) 533 (*chars)[(*chars)[i].m_iBidiPos].m_iBidiOrder = i; 534 } 535 }; 536 537 #endif // PDF_ENABLE_XFA 538 539 } // namespace 540 541 CFX_BidiChar::CFX_BidiChar() 542 : m_CurrentSegment({0, 0, NEUTRAL}), m_LastSegment({0, 0, NEUTRAL}) {} 543 544 bool CFX_BidiChar::AppendChar(wchar_t wch) { 545 uint32_t dwProps = FX_GetUnicodeProperties(wch); 546 int32_t iBidiCls = (dwProps & FX_BIDICLASSBITSMASK) >> FX_BIDICLASSBITS; 547 Direction direction = NEUTRAL; 548 switch (iBidiCls) { 549 case FX_BIDICLASS_L: 550 case FX_BIDICLASS_AN: 551 case FX_BIDICLASS_EN: 552 direction = LEFT; 553 break; 554 case FX_BIDICLASS_R: 555 case FX_BIDICLASS_AL: 556 direction = RIGHT; 557 break; 558 } 559 560 bool bChangeDirection = (direction != m_CurrentSegment.direction); 561 if (bChangeDirection) 562 StartNewSegment(direction); 563 564 m_CurrentSegment.count++; 565 return bChangeDirection; 566 } 567 568 bool CFX_BidiChar::EndChar() { 569 StartNewSegment(NEUTRAL); 570 return m_LastSegment.count > 0; 571 } 572 573 void CFX_BidiChar::StartNewSegment(CFX_BidiChar::Direction direction) { 574 m_LastSegment = m_CurrentSegment; 575 m_CurrentSegment.start += m_CurrentSegment.count; 576 m_CurrentSegment.count = 0; 577 m_CurrentSegment.direction = direction; 578 } 579 580 CFX_BidiString::CFX_BidiString(const WideString& str) 581 : m_Str(str), 582 m_pBidiChar(pdfium::MakeUnique<CFX_BidiChar>()), 583 m_eOverallDirection(CFX_BidiChar::LEFT) { 584 for (const auto& c : m_Str) { 585 if (m_pBidiChar->AppendChar(c)) 586 m_Order.push_back(m_pBidiChar->GetSegmentInfo()); 587 } 588 if (m_pBidiChar->EndChar()) 589 m_Order.push_back(m_pBidiChar->GetSegmentInfo()); 590 591 size_t nR2L = std::count_if(m_Order.begin(), m_Order.end(), 592 [](const CFX_BidiChar::Segment& seg) { 593 return seg.direction == CFX_BidiChar::RIGHT; 594 }); 595 596 size_t nL2R = std::count_if(m_Order.begin(), m_Order.end(), 597 [](const CFX_BidiChar::Segment& seg) { 598 return seg.direction == CFX_BidiChar::LEFT; 599 }); 600 601 if (nR2L > 0 && nR2L >= nL2R) 602 SetOverallDirectionRight(); 603 } 604 605 CFX_BidiString::~CFX_BidiString() {} 606 607 void CFX_BidiString::SetOverallDirectionRight() { 608 if (m_eOverallDirection != CFX_BidiChar::RIGHT) { 609 std::reverse(m_Order.begin(), m_Order.end()); 610 m_eOverallDirection = CFX_BidiChar::RIGHT; 611 } 612 } 613 614 #ifdef PDF_ENABLE_XFA 615 void FX_BidiLine(std::vector<CFX_Char>* chars, size_t iCount) { 616 CFX_BidiLine blt; 617 blt.BidiLine(chars, iCount); 618 } 619 #endif // PDF_ENABLE_XFA 620