1 // Copyright 2014 PDFium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7 #include "core/fxcrt/fx_arabic.h" 8 9 #include <algorithm> 10 #include <vector> 11 12 #include "core/fxcrt/fx_ucd.h" 13 #include "third_party/base/stl_util.h" 14 15 namespace { 16 17 const FX_ARBFORMTABLE g_FX_ArabicFormTables[] = { 18 {0xFE81, 0xFE82, 0xFE81, 0xFE82}, {0xFE83, 0xFE84, 0xFE83, 0xFE84}, 19 {0xFE85, 0xFE86, 0xFE85, 0xFE86}, {0xFE87, 0xFE88, 0xFE87, 0xFE88}, 20 {0xFE89, 0xFE8A, 0xFE8B, 0xFE8C}, {0xFE8D, 0xFE8E, 0xFE8D, 0xFE8E}, 21 {0xFE8F, 0xFE90, 0xFE91, 0xFE92}, {0xFE93, 0xFE94, 0xFE93, 0xFE94}, 22 {0xFE95, 0xFE96, 0xFE97, 0xFE98}, {0xFE99, 0xFE9A, 0xFE9B, 0xFE9C}, 23 {0xFE9D, 0xFE9E, 0xFE9F, 0xFEA0}, {0xFEA1, 0xFEA2, 0xFEA3, 0xFEA4}, 24 {0xFEA5, 0xFEA6, 0xFEA7, 0xFEA8}, {0xFEA9, 0xFEAA, 0xFEA9, 0xFEAA}, 25 {0xFEAB, 0xFEAC, 0xFEAB, 0xFEAC}, {0xFEAD, 0xFEAE, 0xFEAD, 0xFEAE}, 26 {0xFEAF, 0xFEB0, 0xFEAF, 0xFEB0}, {0xFEB1, 0xFEB2, 0xFEB3, 0xFEB4}, 27 {0xFEB5, 0xFEB6, 0xFEB7, 0xFEB8}, {0xFEB9, 0xFEBA, 0xFEBB, 0xFEBC}, 28 {0xFEBD, 0xFEBE, 0xFEBF, 0xFEC0}, {0xFEC1, 0xFEC2, 0xFEC3, 0xFEC4}, 29 {0xFEC5, 0xFEC6, 0xFEC7, 0xFEC8}, {0xFEC9, 0xFECA, 0xFECB, 0xFECC}, 30 {0xFECD, 0xFECE, 0xFECF, 0xFED0}, {0x063B, 0x063B, 0x063B, 0x063B}, 31 {0x063C, 0x063C, 0x063C, 0x063C}, {0x063D, 0x063D, 0x063D, 0x063D}, 32 {0x063E, 0x063E, 0x063E, 0x063E}, {0x063F, 0x063F, 0x063F, 0x063F}, 33 {0x0640, 0x0640, 0x0640, 0x0640}, {0xFED1, 0xFED2, 0xFED3, 0xFED4}, 34 {0xFED5, 0xFED6, 0xFED7, 0xFED8}, {0xFED9, 0xFEDA, 0xFEDB, 0xFEDC}, 35 {0xFEDD, 0xFEDE, 0xFEDF, 0xFEE0}, {0xFEE1, 0xFEE2, 0xFEE3, 0xFEE4}, 36 {0xFEE5, 0xFEE6, 0xFEE7, 0xFEE8}, {0xFEE9, 0xFEEA, 0xFEEB, 0xFEEC}, 37 {0xFEED, 0xFEEE, 0xFEED, 0xFEEE}, {0xFEEF, 0xFEF0, 0xFBFE, 0xFBFF}, 38 {0xFEF1, 0xFEF2, 0xFEF3, 0xFEF4}, {0x064B, 0x064B, 0x064B, 0x064B}, 39 {0x064C, 0x064C, 0x064C, 0x064C}, {0x064D, 0x064D, 0x064D, 0x064D}, 40 {0x064E, 0x064E, 0x064E, 0x064E}, {0x064F, 0x064F, 0x064F, 0x064F}, 41 {0x0650, 0x0650, 0x0650, 0x0650}, {0x0651, 0x0651, 0x0651, 0x0651}, 42 {0x0652, 0x0652, 0x0652, 0x0652}, {0x0653, 0x0653, 0x0653, 0x0653}, 43 {0x0654, 0x0654, 0x0654, 0x0654}, {0x0655, 0x0655, 0x0655, 0x0655}, 44 {0x0656, 0x0656, 0x0656, 0x0656}, {0x0657, 0x0657, 0x0657, 0x0657}, 45 {0x0658, 0x0658, 0x0658, 0x0658}, {0x0659, 0x0659, 0x0659, 0x0659}, 46 {0x065A, 0x065A, 0x065A, 0x065A}, {0x065B, 0x065B, 0x065B, 0x065B}, 47 {0x065C, 0x065C, 0x065C, 0x065C}, {0x065D, 0x065D, 0x065D, 0x065D}, 48 {0x065E, 0x065E, 0x065E, 0x065E}, {0x065F, 0x065F, 0x065F, 0x065F}, 49 {0x0660, 0x0660, 0x0660, 0x0660}, {0x0661, 0x0661, 0x0661, 0x0661}, 50 {0x0662, 0x0662, 0x0662, 0x0662}, {0x0663, 0x0663, 0x0663, 0x0663}, 51 {0x0664, 0x0664, 0x0664, 0x0664}, {0x0665, 0x0665, 0x0665, 0x0665}, 52 {0x0666, 0x0666, 0x0666, 0x0666}, {0x0667, 0x0667, 0x0667, 0x0667}, 53 {0x0668, 0x0668, 0x0668, 0x0668}, {0x0669, 0x0669, 0x0669, 0x0669}, 54 {0x066A, 0x066A, 0x066A, 0x066A}, {0x066B, 0x066B, 0x066B, 0x066B}, 55 {0x066C, 0x066C, 0x066C, 0x066C}, {0x066D, 0x066D, 0x066D, 0x066D}, 56 {0x066E, 0x066E, 0x066E, 0x066E}, {0x066F, 0x066F, 0x066F, 0x066F}, 57 {0x0670, 0x0670, 0x0670, 0x0670}, {0xFB50, 0xFB51, 0xFB50, 0xFB51}, 58 {0x0672, 0x0672, 0x0672, 0x0672}, {0x0673, 0x0673, 0x0673, 0x0673}, 59 {0x0674, 0x0674, 0x0674, 0x0674}, {0x0675, 0x0675, 0x0675, 0x0675}, 60 {0x0676, 0x0676, 0x0676, 0x0676}, {0x0677, 0x0677, 0x0677, 0x0677}, 61 {0x0678, 0x0678, 0x0678, 0x0678}, {0xFB66, 0xFB67, 0xFB68, 0xFB69}, 62 {0xFB5E, 0xFB5F, 0xFB60, 0xFB61}, {0xFB52, 0xFB53, 0xFB54, 0xFB55}, 63 {0x067C, 0x067C, 0x067C, 0x067C}, {0x067D, 0x067D, 0x067D, 0x067D}, 64 {0xFB56, 0xFB57, 0xFB58, 0xFB59}, {0xFB62, 0xFB63, 0xFB64, 0xFB65}, 65 {0xFB5A, 0xFB5B, 0xFB5C, 0xFB5D}, {0x0681, 0x0681, 0x0681, 0x0681}, 66 {0x0682, 0x0682, 0x0682, 0x0682}, {0xFB76, 0xFB77, 0xFB78, 0xFB79}, 67 {0xFB72, 0xFB73, 0xFB74, 0xFB75}, {0x0685, 0x0685, 0x0685, 0x0685}, 68 {0xFB7A, 0xFB7B, 0xFB7C, 0xFB7D}, {0xFB7E, 0xFB7F, 0xFB80, 0xFB81}, 69 {0xFB88, 0xFB89, 0xFB88, 0xFB89}, {0x0689, 0x0689, 0x0689, 0x0689}, 70 {0x068A, 0x068A, 0x068A, 0x068A}, {0x068B, 0x068B, 0x068B, 0x068B}, 71 {0xFB84, 0xFB85, 0xFB84, 0xFB85}, {0xFB82, 0xFB83, 0xFB82, 0xFB83}, 72 {0xFB86, 0xFB87, 0xFB86, 0xFB87}, {0x068F, 0x068F, 0x068F, 0x068F}, 73 {0x0690, 0x0690, 0x0690, 0x0690}, {0xFB8C, 0xFB8D, 0xFB8C, 0xFB8D}, 74 {0x0692, 0x0692, 0x0692, 0x0692}, {0x0693, 0x0693, 0x0693, 0x0693}, 75 {0x0694, 0x0694, 0x0694, 0x0694}, {0x0695, 0x0695, 0x0695, 0x0695}, 76 {0x0696, 0x0696, 0x0696, 0x0696}, {0x0697, 0x0697, 0x0697, 0x0697}, 77 {0xFB8A, 0xFB8B, 0xFB8A, 0xFB8B}, {0x0699, 0x0699, 0x0699, 0x0699}, 78 {0x069A, 0x069A, 0x069A, 0x069A}, {0x069B, 0x069B, 0x069B, 0x069B}, 79 {0x069C, 0x069C, 0x069C, 0x069C}, {0x069D, 0x069D, 0x069D, 0x069D}, 80 {0x069E, 0x069E, 0x069E, 0x069E}, {0x069F, 0x069F, 0x069F, 0x069F}, 81 {0x06A0, 0x06A0, 0x06A0, 0x06A0}, {0x06A1, 0x06A1, 0x06A1, 0x06A1}, 82 {0x06A2, 0x06A2, 0x06A2, 0x06A2}, {0x06A3, 0x06A3, 0x06A3, 0x06A3}, 83 {0xFB6A, 0xFB6B, 0xFB6C, 0xFB6D}, {0x06A5, 0x06A5, 0x06A5, 0x06A5}, 84 {0xFB6E, 0xFB6F, 0xFB70, 0xFB71}, {0x06A7, 0x06A7, 0x06A7, 0x06A7}, 85 {0x06A8, 0x06A8, 0x06A8, 0x06A8}, {0xFB8E, 0xFB8F, 0xFB90, 0xFB91}, 86 {0x06AA, 0x06AA, 0x06AA, 0x06AA}, {0x06AB, 0x06AB, 0x06AB, 0x06AB}, 87 {0x06AC, 0x06AC, 0x06AC, 0x06AC}, {0xFBD3, 0xFBD4, 0xFBD5, 0xFBD6}, 88 {0x06AE, 0x06AE, 0x06AE, 0x06AE}, {0xFB92, 0xFB93, 0xFB94, 0xFB95}, 89 {0x06B0, 0x06B0, 0x06B0, 0x06B0}, {0xFB9A, 0xFB9B, 0xFB9C, 0xFB9D}, 90 {0x06B2, 0x06B2, 0x06B2, 0x06B2}, {0xFB96, 0xFB97, 0xFB98, 0xFB99}, 91 {0x06B4, 0x06B4, 0x06B4, 0x06B4}, {0x06B5, 0x06B5, 0x06B5, 0x06B5}, 92 {0x06B6, 0x06B6, 0x06B6, 0x06B6}, {0x06B7, 0x06B7, 0x06B7, 0x06B7}, 93 {0x06B8, 0x06B8, 0x06B8, 0x06B8}, {0x06B9, 0x06B9, 0x06B9, 0x06B9}, 94 {0xFB9E, 0xFB9F, 0xFBE8, 0xFBE9}, {0xFBA0, 0xFBA1, 0xFBA2, 0xFBA3}, 95 {0x06BC, 0x06BC, 0x06BC, 0x06BC}, {0x06BD, 0x06BD, 0x06BD, 0x06BD}, 96 {0xFBAA, 0xFBAB, 0xFBAC, 0xFBAD}, {0x06BF, 0x06BF, 0x06BF, 0x06BF}, 97 {0xFBA4, 0xFBA5, 0xFBA4, 0xFBA5}, {0xFBA6, 0xFBA7, 0xFBA8, 0xFBA9}, 98 {0x06C2, 0x06C2, 0x06C2, 0x06C2}, {0x06C3, 0x06C3, 0x06C3, 0x06C3}, 99 {0x06C4, 0x06C4, 0x06C4, 0x06C4}, {0xFBE0, 0xFBE1, 0xFBE0, 0xFBE1}, 100 {0xFBD9, 0xFBDA, 0xFBD9, 0xFBDA}, {0xFBD7, 0xFBD8, 0xFBD7, 0xFBD8}, 101 {0xFBDB, 0xFBDC, 0xFBDB, 0xFBDC}, {0xFBE2, 0xFBE3, 0xFBE2, 0xFBE3}, 102 {0x06CA, 0x06CA, 0x06CA, 0x06CA}, {0xFBDE, 0xFBDF, 0xFBDE, 0xFBDF}, 103 {0xFBFC, 0xFBFD, 0xFBFE, 0xFBFF}, {0x06CD, 0x06CD, 0x06CD, 0x06CD}, 104 {0x06CE, 0x06CE, 0x06CE, 0x06CE}, {0x06CF, 0x06CF, 0x06CF, 0x06CF}, 105 {0xFBE4, 0xFBE5, 0xFBE6, 0xFBE7}, {0x06D1, 0x06D1, 0x06D1, 0x06D1}, 106 {0xFBAE, 0xFBAF, 0xFBAE, 0xFBAF}, {0xFBB0, 0xFBB1, 0xFBB0, 0xFBB1}, 107 {0x06D4, 0x06D4, 0x06D4, 0x06D4}, {0x06D5, 0x06D5, 0x06D5, 0x06D5}, 108 }; 109 110 const FX_ARAALEF gs_FX_AlefTable[] = { 111 {0x0622, 0xFEF5}, 112 {0x0623, 0xFEF7}, 113 {0x0625, 0xFEF9}, 114 {0x0627, 0xFEFB}, 115 }; 116 117 const FX_ARASHADDA gs_FX_ShaddaTable[] = { 118 {0x064C, 0xFC5E}, {0x064D, 0xFC5F}, {0x064E, 0xFC60}, 119 {0x064F, 0xFC61}, {0x0650, 0xFC62}, 120 }; 121 122 const int32_t gc_FX_BidiNTypes[] = { 123 FX_BIDICLASS_N, FX_BIDICLASS_L, FX_BIDICLASS_R, FX_BIDICLASS_AN, 124 FX_BIDICLASS_EN, FX_BIDICLASS_AL, FX_BIDICLASS_NSM, FX_BIDICLASS_CS, 125 FX_BIDICLASS_ES, FX_BIDICLASS_ET, FX_BIDICLASS_BN, FX_BIDICLASS_BN, 126 FX_BIDICLASS_N, FX_BIDICLASS_B, FX_BIDICLASS_RLO, FX_BIDICLASS_RLE, 127 FX_BIDICLASS_LRO, FX_BIDICLASS_LRE, FX_BIDICLASS_PDF, FX_BIDICLASS_ON, 128 }; 129 130 const int32_t gc_FX_BidiWeakStates[][10] = { 131 {FX_BWSao, FX_BWSxl, FX_BWSxr, FX_BWScn, FX_BWScn, FX_BWSxa, FX_BWSxa, 132 FX_BWSao, FX_BWSao, FX_BWSao}, 133 {FX_BWSro, FX_BWSxl, FX_BWSxr, FX_BWSra, FX_BWSre, FX_BWSxa, FX_BWSxr, 134 FX_BWSro, FX_BWSro, FX_BWSrt}, 135 {FX_BWSlo, FX_BWSxl, FX_BWSxr, FX_BWSla, FX_BWSle, FX_BWSxa, FX_BWSxl, 136 FX_BWSlo, FX_BWSlo, FX_BWSlt}, 137 {FX_BWSao, FX_BWSxl, FX_BWSxr, FX_BWScn, FX_BWScn, FX_BWSxa, FX_BWSao, 138 FX_BWSao, FX_BWSao, FX_BWSao}, 139 {FX_BWSro, FX_BWSxl, FX_BWSxr, FX_BWSra, FX_BWSre, FX_BWSxa, FX_BWSro, 140 FX_BWSro, FX_BWSro, FX_BWSrt}, 141 {FX_BWSlo, FX_BWSxl, FX_BWSxr, FX_BWSla, FX_BWSle, FX_BWSxa, FX_BWSlo, 142 FX_BWSlo, FX_BWSlo, FX_BWSlt}, 143 {FX_BWSro, FX_BWSxl, FX_BWSxr, FX_BWSra, FX_BWSre, FX_BWSxa, FX_BWSrt, 144 FX_BWSro, FX_BWSro, FX_BWSrt}, 145 {FX_BWSlo, FX_BWSxl, FX_BWSxr, FX_BWSla, FX_BWSle, FX_BWSxa, FX_BWSlt, 146 FX_BWSlo, FX_BWSlo, FX_BWSlt}, 147 {FX_BWSao, FX_BWSxl, FX_BWSxr, FX_BWScn, FX_BWScn, FX_BWSxa, FX_BWScn, 148 FX_BWSac, FX_BWSao, FX_BWSao}, 149 {FX_BWSro, FX_BWSxl, FX_BWSxr, FX_BWSra, FX_BWSre, FX_BWSxa, FX_BWSra, 150 FX_BWSrc, FX_BWSro, FX_BWSrt}, 151 {FX_BWSro, FX_BWSxl, FX_BWSxr, FX_BWSra, FX_BWSre, FX_BWSxa, FX_BWSre, 152 FX_BWSrs, FX_BWSrs, FX_BWSret}, 153 {FX_BWSlo, FX_BWSxl, FX_BWSxr, FX_BWSla, FX_BWSle, FX_BWSxa, FX_BWSla, 154 FX_BWSlc, FX_BWSlo, FX_BWSlt}, 155 {FX_BWSlo, FX_BWSxl, FX_BWSxr, FX_BWSla, FX_BWSle, FX_BWSxa, FX_BWSle, 156 FX_BWSls, FX_BWSls, FX_BWSlet}, 157 {FX_BWSao, FX_BWSxl, FX_BWSxr, FX_BWScn, FX_BWScn, FX_BWSxa, FX_BWSao, 158 FX_BWSao, FX_BWSao, FX_BWSao}, 159 {FX_BWSro, FX_BWSxl, FX_BWSxr, FX_BWSra, FX_BWSre, FX_BWSxa, FX_BWSro, 160 FX_BWSro, FX_BWSro, FX_BWSrt}, 161 {FX_BWSro, FX_BWSxl, FX_BWSxr, FX_BWSra, FX_BWSre, FX_BWSxa, FX_BWSro, 162 FX_BWSro, FX_BWSro, FX_BWSrt}, 163 {FX_BWSlo, FX_BWSxl, FX_BWSxr, FX_BWSla, FX_BWSle, FX_BWSxa, FX_BWSlo, 164 FX_BWSlo, FX_BWSlo, FX_BWSlt}, 165 {FX_BWSlo, FX_BWSxl, FX_BWSxr, FX_BWSla, FX_BWSle, FX_BWSxa, FX_BWSlo, 166 FX_BWSlo, FX_BWSlo, FX_BWSlt}, 167 {FX_BWSro, FX_BWSxl, FX_BWSxr, FX_BWSra, FX_BWSre, FX_BWSxa, FX_BWSret, 168 FX_BWSro, FX_BWSro, FX_BWSret}, 169 {FX_BWSlo, FX_BWSxl, FX_BWSxr, FX_BWSla, FX_BWSle, FX_BWSxa, FX_BWSlet, 170 FX_BWSlo, FX_BWSlo, FX_BWSlet}, 171 }; 172 173 const int32_t gc_FX_BidiWeakActions[][10] = { 174 {FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxA, FX_BWAxxR, 175 FX_BWAxxR, FX_BWAxxN, FX_BWAxxN, FX_BWAxxN}, 176 {FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxE, FX_BWAxxR, 177 FX_BWAxxR, FX_BWAxxN, FX_BWAxxN, FX_BWAxIx}, 178 {FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxL, FX_BWAxxR, 179 FX_BWAxxL, FX_BWAxxN, FX_BWAxxN, FX_BWAxIx}, 180 {FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxA, FX_BWAxxR, 181 FX_BWAxxN, FX_BWAxxN, FX_BWAxxN, FX_BWAxxN}, 182 {FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxE, FX_BWAxxR, 183 FX_BWAxxN, FX_BWAxxN, FX_BWAxxN, FX_BWAxIx}, 184 {FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxL, FX_BWAxxR, 185 FX_BWAxxN, FX_BWAxxN, FX_BWAxxN, FX_BWAxIx}, 186 {FX_BWANxx, FX_BWANxx, FX_BWANxx, FX_BWANxx, FX_BWAExE, FX_BWANxR, 187 FX_BWAxIx, FX_BWANxN, FX_BWANxN, FX_BWAxIx}, 188 {FX_BWANxx, FX_BWANxx, FX_BWANxx, FX_BWANxx, FX_BWALxL, FX_BWANxR, 189 FX_BWAxIx, FX_BWANxN, FX_BWANxN, FX_BWAxIx}, 190 {FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxA, FX_BWAxxR, 191 FX_BWAxxA, FX_BWAxIx, FX_BWAxxN, FX_BWAxxN}, 192 {FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxE, FX_BWAxxR, 193 FX_BWAxxA, FX_BWAxIx, FX_BWAxxN, FX_BWAxIx}, 194 {FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxE, FX_BWAxxR, 195 FX_BWAxxE, FX_BWAxIx, FX_BWAxIx, FX_BWAxxE}, 196 {FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxL, FX_BWAxxR, 197 FX_BWAxxA, FX_BWAxIx, FX_BWAxxN, FX_BWAxIx}, 198 {FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxL, FX_BWAxxR, 199 FX_BWAxxL, FX_BWAxIx, FX_BWAxIx, FX_BWAxxL}, 200 {FX_BWANxx, FX_BWANxx, FX_BWANxx, FX_BWAAxx, FX_BWAAxA, FX_BWANxR, 201 FX_BWANxN, FX_BWANxN, FX_BWANxN, FX_BWANxN}, 202 {FX_BWANxx, FX_BWANxx, FX_BWANxx, FX_BWAAxx, FX_BWANxE, FX_BWANxR, 203 FX_BWANxN, FX_BWANxN, FX_BWANxN, FX_BWANIx}, 204 {FX_BWANxx, FX_BWANxx, FX_BWANxx, FX_BWANxx, FX_BWAExE, FX_BWANxR, 205 FX_BWANxN, FX_BWANxN, FX_BWANxN, FX_BWANIx}, 206 {FX_BWANxx, FX_BWANxx, FX_BWANxx, FX_BWAAxx, FX_BWANxL, FX_BWANxR, 207 FX_BWANxN, FX_BWANxN, FX_BWANxN, FX_BWANIx}, 208 {FX_BWANxx, FX_BWANxx, FX_BWANxx, FX_BWANxx, FX_BWALxL, FX_BWANxR, 209 FX_BWANxN, FX_BWANxN, FX_BWANxN, FX_BWANIx}, 210 {FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxE, FX_BWAxxR, 211 FX_BWAxxE, FX_BWAxxN, FX_BWAxxN, FX_BWAxxE}, 212 {FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxL, FX_BWAxxR, 213 FX_BWAxxL, FX_BWAxxN, FX_BWAxxN, FX_BWAxxL}, 214 }; 215 216 const int32_t gc_FX_BidiNeutralStates[][5] = { 217 {FX_BNSrn, FX_BNSl, FX_BNSr, FX_BNSr, FX_BNSr}, 218 {FX_BNSln, FX_BNSl, FX_BNSr, FX_BNSa, FX_BNSl}, 219 {FX_BNSrn, FX_BNSl, FX_BNSr, FX_BNSr, FX_BNSr}, 220 {FX_BNSln, FX_BNSl, FX_BNSr, FX_BNSa, FX_BNSl}, 221 {FX_BNSna, FX_BNSl, FX_BNSr, FX_BNSa, FX_BNSl}, 222 {FX_BNSna, FX_BNSl, FX_BNSr, FX_BNSa, FX_BNSl}, 223 }; 224 const int32_t gc_FX_BidiNeutralActions[][5] = { 225 {FX_BNAIn, 0, 0, 0, 0}, 226 {FX_BNAIn, 0, 0, 0, FX_BIDICLASS_L}, 227 {FX_BNAIn, FX_BNAEn, FX_BNARn, FX_BNARn, FX_BNARn}, 228 {FX_BNAIn, FX_BNALn, FX_BNAEn, FX_BNAEn, FX_BNALnL}, 229 {FX_BNAIn, 0, 0, 0, FX_BIDICLASS_L}, 230 {FX_BNAIn, FX_BNAEn, FX_BNARn, FX_BNARn, FX_BNAEn}, 231 }; 232 233 const int32_t gc_FX_BidiAddLevel[][4] = { 234 {0, 1, 2, 2}, 235 {1, 0, 1, 1}, 236 }; 237 238 const FX_ARBFORMTABLE* ParseChar(const CFX_Char* pTC, 239 FX_WCHAR& wChar, 240 FX_CHARTYPE& eType) { 241 if (!pTC) { 242 eType = FX_CHARTYPE_Unknown; 243 wChar = 0xFEFF; 244 return nullptr; 245 } 246 eType = pTC->GetCharType(); 247 wChar = (FX_WCHAR)pTC->m_wCharCode; 248 const FX_ARBFORMTABLE* pFT = FX_GetArabicFormTable(wChar); 249 if (!pFT || eType >= FX_CHARTYPE_ArabicNormal) 250 eType = FX_CHARTYPE_Unknown; 251 252 return pFT; 253 } 254 255 } // namespace 256 257 const FX_ARBFORMTABLE* FX_GetArabicFormTable(FX_WCHAR unicode) { 258 if (unicode < 0x622 || unicode > 0x6d5) { 259 return nullptr; 260 } 261 return g_FX_ArabicFormTables + unicode - 0x622; 262 } 263 FX_WCHAR FX_GetArabicFromAlefTable(FX_WCHAR alef) { 264 static const int32_t s_iAlefCount = 265 sizeof(gs_FX_AlefTable) / sizeof(FX_ARAALEF); 266 for (int32_t iStart = 0; iStart < s_iAlefCount; iStart++) { 267 const FX_ARAALEF& v = gs_FX_AlefTable[iStart]; 268 if (v.wAlef == alef) { 269 return v.wIsolated; 270 } 271 } 272 return alef; 273 } 274 FX_WCHAR FX_GetArabicFromShaddaTable(FX_WCHAR shadda) { 275 static const int32_t s_iShaddaCount = 276 sizeof(gs_FX_ShaddaTable) / sizeof(FX_ARASHADDA); 277 for (int32_t iStart = 0; iStart < s_iShaddaCount; iStart++) { 278 const FX_ARASHADDA& v = gs_FX_ShaddaTable[iStart]; 279 if (v.wShadda == shadda) { 280 return v.wIsolated; 281 } 282 } 283 return shadda; 284 } 285 286 namespace pdfium { 287 namespace arabic { 288 289 bool IsArabicChar(FX_WCHAR wch) { 290 uint32_t dwRet = 291 kTextLayoutCodeProperties[(uint16_t)wch] & FX_CHARTYPEBITSMASK; 292 return dwRet >= FX_CHARTYPE_ArabicAlef; 293 } 294 295 bool IsArabicFormChar(FX_WCHAR wch) { 296 return (kTextLayoutCodeProperties[(uint16_t)wch] & FX_CHARTYPEBITSMASK) == 297 FX_CHARTYPE_ArabicForm; 298 } 299 300 FX_WCHAR GetFormChar(FX_WCHAR wch, FX_WCHAR prev, FX_WCHAR next) { 301 CFX_Char c(wch, kTextLayoutCodeProperties[(uint16_t)wch]); 302 CFX_Char p(prev, kTextLayoutCodeProperties[(uint16_t)prev]); 303 CFX_Char n(next, kTextLayoutCodeProperties[(uint16_t)next]); 304 return GetFormChar(&c, &p, &n); 305 } 306 307 FX_WCHAR GetFormChar(const CFX_Char* cur, 308 const CFX_Char* prev, 309 const CFX_Char* next) { 310 FX_CHARTYPE eCur; 311 FX_WCHAR wCur; 312 const FX_ARBFORMTABLE* ft = ParseChar(cur, wCur, eCur); 313 if (eCur < FX_CHARTYPE_ArabicAlef || eCur >= FX_CHARTYPE_ArabicNormal) { 314 return wCur; 315 } 316 FX_CHARTYPE ePrev; 317 FX_WCHAR wPrev; 318 ParseChar(prev, wPrev, ePrev); 319 if (wPrev == 0x0644 && eCur == FX_CHARTYPE_ArabicAlef) { 320 return 0xFEFF; 321 } 322 FX_CHARTYPE eNext; 323 FX_WCHAR wNext; 324 ParseChar(next, wNext, eNext); 325 bool bAlef = (eNext == FX_CHARTYPE_ArabicAlef && wCur == 0x644); 326 if (ePrev < FX_CHARTYPE_ArabicAlef) { 327 if (bAlef) { 328 return FX_GetArabicFromAlefTable(wNext); 329 } 330 return (eNext < FX_CHARTYPE_ArabicAlef) ? ft->wIsolated : ft->wInitial; 331 } 332 if (bAlef) { 333 wCur = FX_GetArabicFromAlefTable(wNext); 334 return (ePrev != FX_CHARTYPE_ArabicDistortion) ? wCur : ++wCur; 335 } 336 if (ePrev == FX_CHARTYPE_ArabicAlef || ePrev == FX_CHARTYPE_ArabicSpecial) { 337 return (eNext < FX_CHARTYPE_ArabicAlef) ? ft->wIsolated : ft->wInitial; 338 } 339 return (eNext < FX_CHARTYPE_ArabicAlef) ? ft->wFinal : ft->wMedial; 340 } 341 342 } // namespace arabic 343 } // namespace pdfium 344 345 void FX_BidiReverseString(CFX_WideString& wsText, 346 int32_t iStart, 347 int32_t iCount) { 348 ASSERT(iStart > -1 && iStart < wsText.GetLength()); 349 ASSERT(iCount >= 0 && iStart + iCount <= wsText.GetLength()); 350 FX_WCHAR wch; 351 FX_WCHAR* pStart = const_cast<FX_WCHAR*>(wsText.c_str()); 352 pStart += iStart; 353 FX_WCHAR* pEnd = pStart + iCount - 1; 354 while (pStart < pEnd) { 355 wch = *pStart; 356 *pStart++ = *pEnd; 357 *pEnd-- = wch; 358 } 359 } 360 361 362 int32_t FX_BidiGetDeferredNeutrals(int32_t iAction, int32_t iLevel) { 363 iAction = (iAction >> 4) & 0xF; 364 if (iAction == (FX_BIDINEUTRALACTION_En >> 4)) { 365 return FX_BidiDirection(iLevel); 366 } else { 367 return iAction; 368 } 369 } 370 371 int32_t FX_BidiGetResolvedNeutrals(int32_t iAction) { 372 iAction = (iAction & 0xF); 373 if (iAction == FX_BIDINEUTRALACTION_In) { 374 return 0; 375 } else { 376 return iAction; 377 } 378 } 379 380 int32_t FX_BidiReorderLevel(int32_t iBaseLevel, 381 CFX_WideString& wsText, 382 const CFX_ArrayTemplate<int32_t>& levels, 383 int32_t iStart, 384 bool bReverse) { 385 ASSERT(iBaseLevel >= 0 && iBaseLevel <= FX_BIDIMAXLEVEL); 386 ASSERT(wsText.GetLength() == levels.GetSize()); 387 ASSERT(iStart >= 0 && iStart < wsText.GetLength()); 388 int32_t iSize = wsText.GetLength(); 389 if (iSize < 1) { 390 return 0; 391 } 392 bReverse = bReverse || FX_IsOdd(iBaseLevel); 393 int32_t i = iStart, iLevel; 394 for (; i < iSize; i++) { 395 if ((iLevel = levels.GetAt(i)) == iBaseLevel) { 396 continue; 397 } 398 if (iLevel < iBaseLevel) { 399 break; 400 } 401 i += FX_BidiReorderLevel(iBaseLevel + 1, wsText, levels, i, bReverse) - 1; 402 } 403 int32_t iCount = i - iStart; 404 if (bReverse && iCount > 1) { 405 FX_BidiReverseString(wsText, iStart, iCount); 406 } 407 return iCount; 408 } 409 void FX_BidiReorder(int32_t iBaseLevel, 410 CFX_WideString& wsText, 411 const CFX_ArrayTemplate<int32_t>& levels) { 412 ASSERT(iBaseLevel >= 0 && iBaseLevel <= FX_BIDIMAXLEVEL); 413 ASSERT(wsText.GetLength() == levels.GetSize()); 414 int32_t iSize = wsText.GetLength(); 415 if (iSize < 1) { 416 return; 417 } 418 int32_t i = 0; 419 while (i < iSize) { 420 i += FX_BidiReorderLevel(iBaseLevel, wsText, levels, i, false); 421 } 422 } 423 424 template <class baseType> 425 class CFX_BidiLineTemplate { 426 public: 427 void FX_BidiReverseString(std::vector<baseType>& chars, 428 int32_t iStart, 429 int32_t iCount) { 430 ASSERT(iStart >= 0 && iStart < pdfium::CollectionSize<int32_t>(chars)); 431 ASSERT(iCount >= 0 && 432 iStart + iCount <= pdfium::CollectionSize<int32_t>(chars)); 433 std::reverse(chars.begin() + iStart, chars.begin() + iStart + iCount); 434 } 435 436 void FX_BidiSetDeferredRun(std::vector<baseType>& chars, 437 bool bClass, 438 int32_t iStart, 439 int32_t iCount, 440 int32_t iValue) { 441 ASSERT(iStart >= 0 && iStart <= pdfium::CollectionSize<int32_t>(chars)); 442 ASSERT(iStart - iCount > -1); 443 int32_t iLast = iStart - iCount; 444 if (bClass) { 445 for (int32_t i = iStart - 1; i >= iLast; i--) 446 chars[i].m_iBidiClass = (int16_t)iValue; 447 } else { 448 for (int32_t i = iStart - 1; i >= iLast; i--) 449 chars[i].m_iBidiLevel = (int16_t)iValue; 450 } 451 } 452 453 void FX_BidiClassify(std::vector<baseType>& chars, int32_t iCount, bool bWS) { 454 ASSERT(iCount >= 0 && iCount <= pdfium::CollectionSize<int32_t>(chars)); 455 if (bWS) { 456 for (int32_t i = 0; i < iCount; i++) { 457 chars[i].m_iBidiClass = 458 (int16_t)(chars[i].m_dwCharProps & FX_BIDICLASSBITSMASK) >> 459 FX_BIDICLASSBITS; 460 } 461 } else { 462 for (int32_t i = 0; i < iCount; i++) { 463 chars[i].m_iBidiClass = (int16_t) 464 gc_FX_BidiNTypes[(chars[i].m_dwCharProps & FX_BIDICLASSBITSMASK) >> 465 FX_BIDICLASSBITS]; 466 } 467 } 468 } 469 470 void FX_BidiResolveExplicit(std::vector<baseType>& chars, 471 int32_t iCount, 472 int32_t iBaseLevel) { 473 ASSERT(iCount >= 0 && iCount <= pdfium::CollectionSize<int32_t>(chars)); 474 ASSERT(iBaseLevel >= 0 && iBaseLevel <= FX_BIDIMAXLEVEL); 475 for (int32_t i = 0; i < iCount; i++) 476 chars[i].m_iBidiLevel = static_cast<int16_t>(iBaseLevel); 477 } 478 479 void FX_BidiResolveWeak(std::vector<baseType>& chars, 480 int32_t iCount, 481 int32_t iBaseLevel) { 482 ASSERT(iCount >= 0 && iCount <= pdfium::CollectionSize<int32_t>(chars)); 483 iCount--; 484 if (iCount < 1) { 485 return; 486 } 487 baseType *pTC, *pTCNext; 488 int32_t iLevelCur = iBaseLevel; 489 int32_t iState = FX_IsOdd(iBaseLevel) ? FX_BWSxr : FX_BWSxl; 490 int32_t i = 0, iNum = 0, iClsCur, iClsRun, iClsNew, iAction; 491 for (; i <= iCount; i++) { 492 pTC = &chars[i]; 493 iClsCur = pTC->m_iBidiClass; 494 if (iClsCur == FX_BIDICLASS_BN) { 495 pTC->m_iBidiLevel = (int16_t)iLevelCur; 496 if (i == iCount && iLevelCur != iBaseLevel) { 497 iClsCur = FX_BidiDirection(iLevelCur); 498 pTC->m_iBidiClass = (int16_t)iClsCur; 499 } else if (i < iCount) { 500 pTCNext = &chars[i + 1]; 501 int32_t iLevelNext, iLevelNew; 502 iClsNew = pTCNext->m_iBidiClass; 503 iLevelNext = pTCNext->m_iBidiLevel; 504 if (iClsNew != FX_BIDICLASS_BN && iLevelCur != iLevelNext) { 505 iLevelNew = iLevelNext; 506 if (iLevelCur > iLevelNew) { 507 iLevelNew = iLevelCur; 508 } 509 pTC->m_iBidiLevel = (int16_t)iLevelNew; 510 iClsCur = FX_BidiDirection(iLevelNew); 511 pTC->m_iBidiClass = (int16_t)iClsCur; 512 iLevelCur = iLevelNext; 513 } else { 514 if (iNum > 0) { 515 iNum++; 516 } 517 continue; 518 } 519 } else { 520 if (iNum > 0) { 521 iNum++; 522 } 523 continue; 524 } 525 } 526 ASSERT(iClsCur <= FX_BIDICLASS_BN); 527 iAction = gc_FX_BidiWeakActions[iState][iClsCur]; 528 iClsRun = FX_BidiGetDeferredType(iAction); 529 if (iClsRun != FX_BIDIWEAKACTION_XX && iNum > 0) { 530 FX_BidiSetDeferredRun(chars, true, i, iNum, iClsRun); 531 iNum = 0; 532 } 533 iClsNew = FX_BidiGetResolvedType(iAction); 534 if (iClsNew != FX_BIDIWEAKACTION_XX) { 535 pTC->m_iBidiClass = (int16_t)iClsNew; 536 } 537 if (FX_BIDIWEAKACTION_IX & iAction) { 538 iNum++; 539 } 540 iState = gc_FX_BidiWeakStates[iState][iClsCur]; 541 } 542 if (iNum > 0) { 543 iClsCur = FX_BidiDirection(iBaseLevel); 544 iClsRun = FX_BidiGetDeferredType(gc_FX_BidiWeakActions[iState][iClsCur]); 545 if (iClsRun != FX_BIDIWEAKACTION_XX) { 546 FX_BidiSetDeferredRun(chars, true, i, iNum, iClsRun); 547 } 548 } 549 } 550 551 void FX_BidiResolveNeutrals(std::vector<baseType>& chars, 552 int32_t iCount, 553 int32_t iBaseLevel) { 554 ASSERT(iCount >= 0 && iCount <= pdfium::CollectionSize<int32_t>(chars)); 555 ASSERT(iBaseLevel >= 0 && iBaseLevel <= FX_BIDIMAXLEVEL); 556 iCount--; 557 if (iCount < 1) { 558 return; 559 } 560 baseType* pTC; 561 int32_t iLevel = iBaseLevel; 562 int32_t iState = FX_IsOdd(iBaseLevel) ? FX_BNSr : FX_BNSl; 563 int32_t i = 0, iNum = 0, iClsCur, iClsRun, iClsNew, iAction; 564 for (; i <= iCount; i++) { 565 pTC = &chars[i]; 566 iClsCur = pTC->m_iBidiClass; 567 if (iClsCur == FX_BIDICLASS_BN) { 568 if (iNum) { 569 iNum++; 570 } 571 continue; 572 } 573 ASSERT(iClsCur < FX_BIDICLASS_AL); 574 iAction = gc_FX_BidiNeutralActions[iState][iClsCur]; 575 iClsRun = FX_BidiGetDeferredNeutrals(iAction, iLevel); 576 if (iClsRun != FX_BIDICLASS_N && iNum > 0) { 577 FX_BidiSetDeferredRun(chars, true, i, iNum, iClsRun); 578 iNum = 0; 579 } 580 iClsNew = FX_BidiGetResolvedNeutrals(iAction); 581 if (iClsNew != FX_BIDICLASS_N) { 582 pTC->m_iBidiClass = (int16_t)iClsNew; 583 } 584 if (FX_BIDINEUTRALACTION_In & iAction) { 585 iNum++; 586 } 587 iState = gc_FX_BidiNeutralStates[iState][iClsCur]; 588 iLevel = pTC->m_iBidiLevel; 589 } 590 if (iNum > 0) { 591 iClsCur = FX_BidiDirection(iLevel); 592 iClsRun = FX_BidiGetDeferredNeutrals( 593 gc_FX_BidiNeutralActions[iState][iClsCur], iLevel); 594 if (iClsRun != FX_BIDICLASS_N) { 595 FX_BidiSetDeferredRun(chars, true, i, iNum, iClsRun); 596 } 597 } 598 } 599 600 void FX_BidiResolveImplicit(std::vector<baseType>& chars, int32_t iCount) { 601 ASSERT(iCount >= 0 && iCount <= pdfium::CollectionSize<int32_t>(chars)); 602 for (int32_t i = 0; i < iCount; i++) { 603 int32_t iCls = chars[i].m_iBidiClass; 604 if (iCls == FX_BIDICLASS_BN) { 605 continue; 606 } 607 ASSERT(iCls > FX_BIDICLASS_ON && iCls < FX_BIDICLASS_AL); 608 int32_t iLevel = chars[i].m_iBidiLevel; 609 iLevel += gc_FX_BidiAddLevel[FX_IsOdd(iLevel)][iCls - 1]; 610 chars[i].m_iBidiLevel = (int16_t)iLevel; 611 } 612 } 613 614 void FX_BidiResolveWhitespace(std::vector<baseType>& chars, 615 int32_t iCount, 616 int32_t iBaseLevel) { 617 ASSERT(iCount >= 0 && iCount <= pdfium::CollectionSize<int32_t>(chars)); 618 ASSERT(iBaseLevel >= 0 && iBaseLevel <= FX_BIDIMAXLEVEL); 619 if (iCount < 1) { 620 return; 621 } 622 iCount--; 623 int32_t iLevel = iBaseLevel; 624 int32_t i = 0, iNum = 0; 625 for (; i <= iCount; i++) { 626 switch (chars[i].m_iBidiClass) { 627 case FX_BIDICLASS_WS: 628 iNum++; 629 break; 630 case FX_BIDICLASS_RLE: 631 case FX_BIDICLASS_LRE: 632 case FX_BIDICLASS_LRO: 633 case FX_BIDICLASS_RLO: 634 case FX_BIDICLASS_PDF: 635 case FX_BIDICLASS_BN: 636 chars[i].m_iBidiLevel = (int16_t)iLevel; 637 iNum++; 638 break; 639 case FX_BIDICLASS_S: 640 case FX_BIDICLASS_B: 641 if (iNum > 0) { 642 FX_BidiSetDeferredRun(chars, false, i, iNum, iBaseLevel); 643 } 644 chars[i].m_iBidiLevel = (int16_t)iBaseLevel; 645 iNum = 0; 646 break; 647 default: 648 iNum = 0; 649 break; 650 } 651 iLevel = chars[i].m_iBidiLevel; 652 } 653 if (iNum > 0) { 654 FX_BidiSetDeferredRun(chars, false, i, iNum, iBaseLevel); 655 } 656 } 657 658 int32_t FX_BidiReorderLevel(std::vector<baseType>& chars, 659 int32_t iCount, 660 int32_t iBaseLevel, 661 int32_t iStart, 662 bool bReverse) { 663 ASSERT(iCount >= 0 && iCount <= pdfium::CollectionSize<int32_t>(chars)); 664 ASSERT(iBaseLevel >= 0 && iBaseLevel <= FX_BIDIMAXLEVEL); 665 ASSERT(iStart >= 0 && iStart < iCount); 666 if (iCount < 1) { 667 return 0; 668 } 669 bReverse = bReverse || FX_IsOdd(iBaseLevel); 670 int32_t i = iStart; 671 for (; i < iCount; i++) { 672 int32_t iLevel = chars[i].m_iBidiLevel; 673 if (iLevel == iBaseLevel) 674 continue; 675 if (iLevel < iBaseLevel) 676 break; 677 i += FX_BidiReorderLevel(chars, iCount, iBaseLevel + 1, i, bReverse) - 1; 678 } 679 int32_t iNum = i - iStart; 680 if (bReverse && iNum > 1) { 681 FX_BidiReverseString(chars, iStart, iNum); 682 } 683 return iNum; 684 } 685 686 void FX_BidiReorder(std::vector<baseType>& chars, 687 int32_t iCount, 688 int32_t iBaseLevel) { 689 ASSERT(iCount >= 0 && iCount <= pdfium::CollectionSize<int32_t>(chars)); 690 ASSERT(iBaseLevel >= 0 && iBaseLevel <= FX_BIDIMAXLEVEL); 691 int32_t i = 0; 692 while (i < iCount) { 693 i += FX_BidiReorderLevel(chars, iCount, iBaseLevel, i, false); 694 } 695 } 696 697 void FX_BidiPosition(std::vector<baseType>& chars, int32_t iCount) { 698 ASSERT(iCount >= 0 && iCount <= pdfium::CollectionSize<int32_t>(chars)); 699 for (int32_t i = 0; i < iCount; ++i) 700 chars[chars[i].m_iBidiPos].m_iBidiOrder = i; 701 } 702 703 void FX_BidiLine(std::vector<baseType>& chars, 704 int32_t iCount, 705 int32_t iBaseLevel) { 706 ASSERT(iCount >= 0 && iCount <= pdfium::CollectionSize<int32_t>(chars)); 707 if (iCount < 2) { 708 return; 709 } 710 FX_BidiClassify(chars, iCount, false); 711 FX_BidiResolveExplicit(chars, iCount, iBaseLevel); 712 FX_BidiResolveWeak(chars, iCount, iBaseLevel); 713 FX_BidiResolveNeutrals(chars, iCount, iBaseLevel); 714 FX_BidiResolveImplicit(chars, iCount); 715 FX_BidiClassify(chars, iCount, true); 716 FX_BidiResolveWhitespace(chars, iCount, iBaseLevel); 717 FX_BidiReorder(chars, iCount, iBaseLevel); 718 FX_BidiPosition(chars, iCount); 719 } 720 }; 721 722 void FX_BidiLine(std::vector<CFX_TxtChar>& chars, 723 int32_t iCount, 724 int32_t iBaseLevel) { 725 CFX_BidiLineTemplate<CFX_TxtChar> blt; 726 blt.FX_BidiLine(chars, iCount, iBaseLevel); 727 } 728 void FX_BidiLine(std::vector<CFX_RTFChar>& chars, 729 int32_t iCount, 730 int32_t iBaseLevel) { 731 CFX_BidiLineTemplate<CFX_RTFChar> blt; 732 blt.FX_BidiLine(chars, iCount, iBaseLevel); 733 } 734