1 // Copyright 2014 PDFium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7 #include "core/fxcrt/fx_arabic.h" 8 9 #include <algorithm> 10 #include <vector> 11 12 #include "core/fxcrt/fx_memory.h" 13 #include "core/fxcrt/fx_unicode.h" 14 #include "third_party/base/stl_util.h" 15 16 namespace { 17 18 struct FX_ARBFORMTABLE { 19 uint16_t wIsolated; 20 uint16_t wFinal; 21 uint16_t wInitial; 22 uint16_t wMedial; 23 }; 24 25 struct FX_ARAALEF { 26 uint16_t wAlef; 27 uint16_t wIsolated; 28 }; 29 30 struct FX_ARASHADDA { 31 uint16_t wShadda; 32 uint16_t wIsolated; 33 }; 34 35 const FX_ARBFORMTABLE g_FX_ArabicFormTables[] = { 36 {0xFE81, 0xFE82, 0xFE81, 0xFE82}, {0xFE83, 0xFE84, 0xFE83, 0xFE84}, 37 {0xFE85, 0xFE86, 0xFE85, 0xFE86}, {0xFE87, 0xFE88, 0xFE87, 0xFE88}, 38 {0xFE89, 0xFE8A, 0xFE8B, 0xFE8C}, {0xFE8D, 0xFE8E, 0xFE8D, 0xFE8E}, 39 {0xFE8F, 0xFE90, 0xFE91, 0xFE92}, {0xFE93, 0xFE94, 0xFE93, 0xFE94}, 40 {0xFE95, 0xFE96, 0xFE97, 0xFE98}, {0xFE99, 0xFE9A, 0xFE9B, 0xFE9C}, 41 {0xFE9D, 0xFE9E, 0xFE9F, 0xFEA0}, {0xFEA1, 0xFEA2, 0xFEA3, 0xFEA4}, 42 {0xFEA5, 0xFEA6, 0xFEA7, 0xFEA8}, {0xFEA9, 0xFEAA, 0xFEA9, 0xFEAA}, 43 {0xFEAB, 0xFEAC, 0xFEAB, 0xFEAC}, {0xFEAD, 0xFEAE, 0xFEAD, 0xFEAE}, 44 {0xFEAF, 0xFEB0, 0xFEAF, 0xFEB0}, {0xFEB1, 0xFEB2, 0xFEB3, 0xFEB4}, 45 {0xFEB5, 0xFEB6, 0xFEB7, 0xFEB8}, {0xFEB9, 0xFEBA, 0xFEBB, 0xFEBC}, 46 {0xFEBD, 0xFEBE, 0xFEBF, 0xFEC0}, {0xFEC1, 0xFEC2, 0xFEC3, 0xFEC4}, 47 {0xFEC5, 0xFEC6, 0xFEC7, 0xFEC8}, {0xFEC9, 0xFECA, 0xFECB, 0xFECC}, 48 {0xFECD, 0xFECE, 0xFECF, 0xFED0}, {0x063B, 0x063B, 0x063B, 0x063B}, 49 {0x063C, 0x063C, 0x063C, 0x063C}, {0x063D, 0x063D, 0x063D, 0x063D}, 50 {0x063E, 0x063E, 0x063E, 0x063E}, {0x063F, 0x063F, 0x063F, 0x063F}, 51 {0x0640, 0x0640, 0x0640, 0x0640}, {0xFED1, 0xFED2, 0xFED3, 0xFED4}, 52 {0xFED5, 0xFED6, 0xFED7, 0xFED8}, {0xFED9, 0xFEDA, 0xFEDB, 0xFEDC}, 53 {0xFEDD, 0xFEDE, 0xFEDF, 0xFEE0}, {0xFEE1, 0xFEE2, 0xFEE3, 0xFEE4}, 54 {0xFEE5, 0xFEE6, 0xFEE7, 0xFEE8}, {0xFEE9, 0xFEEA, 0xFEEB, 0xFEEC}, 55 {0xFEED, 0xFEEE, 0xFEED, 0xFEEE}, {0xFEEF, 0xFEF0, 0xFBFE, 0xFBFF}, 56 {0xFEF1, 0xFEF2, 0xFEF3, 0xFEF4}, {0x064B, 0x064B, 0x064B, 0x064B}, 57 {0x064C, 0x064C, 0x064C, 0x064C}, {0x064D, 0x064D, 0x064D, 0x064D}, 58 {0x064E, 0x064E, 0x064E, 0x064E}, {0x064F, 0x064F, 0x064F, 0x064F}, 59 {0x0650, 0x0650, 0x0650, 0x0650}, {0x0651, 0x0651, 0x0651, 0x0651}, 60 {0x0652, 0x0652, 0x0652, 0x0652}, {0x0653, 0x0653, 0x0653, 0x0653}, 61 {0x0654, 0x0654, 0x0654, 0x0654}, {0x0655, 0x0655, 0x0655, 0x0655}, 62 {0x0656, 0x0656, 0x0656, 0x0656}, {0x0657, 0x0657, 0x0657, 0x0657}, 63 {0x0658, 0x0658, 0x0658, 0x0658}, {0x0659, 0x0659, 0x0659, 0x0659}, 64 {0x065A, 0x065A, 0x065A, 0x065A}, {0x065B, 0x065B, 0x065B, 0x065B}, 65 {0x065C, 0x065C, 0x065C, 0x065C}, {0x065D, 0x065D, 0x065D, 0x065D}, 66 {0x065E, 0x065E, 0x065E, 0x065E}, {0x065F, 0x065F, 0x065F, 0x065F}, 67 {0x0660, 0x0660, 0x0660, 0x0660}, {0x0661, 0x0661, 0x0661, 0x0661}, 68 {0x0662, 0x0662, 0x0662, 0x0662}, {0x0663, 0x0663, 0x0663, 0x0663}, 69 {0x0664, 0x0664, 0x0664, 0x0664}, {0x0665, 0x0665, 0x0665, 0x0665}, 70 {0x0666, 0x0666, 0x0666, 0x0666}, {0x0667, 0x0667, 0x0667, 0x0667}, 71 {0x0668, 0x0668, 0x0668, 0x0668}, {0x0669, 0x0669, 0x0669, 0x0669}, 72 {0x066A, 0x066A, 0x066A, 0x066A}, {0x066B, 0x066B, 0x066B, 0x066B}, 73 {0x066C, 0x066C, 0x066C, 0x066C}, {0x066D, 0x066D, 0x066D, 0x066D}, 74 {0x066E, 0x066E, 0x066E, 0x066E}, {0x066F, 0x066F, 0x066F, 0x066F}, 75 {0x0670, 0x0670, 0x0670, 0x0670}, {0xFB50, 0xFB51, 0xFB50, 0xFB51}, 76 {0x0672, 0x0672, 0x0672, 0x0672}, {0x0673, 0x0673, 0x0673, 0x0673}, 77 {0x0674, 0x0674, 0x0674, 0x0674}, {0x0675, 0x0675, 0x0675, 0x0675}, 78 {0x0676, 0x0676, 0x0676, 0x0676}, {0x0677, 0x0677, 0x0677, 0x0677}, 79 {0x0678, 0x0678, 0x0678, 0x0678}, {0xFB66, 0xFB67, 0xFB68, 0xFB69}, 80 {0xFB5E, 0xFB5F, 0xFB60, 0xFB61}, {0xFB52, 0xFB53, 0xFB54, 0xFB55}, 81 {0x067C, 0x067C, 0x067C, 0x067C}, {0x067D, 0x067D, 0x067D, 0x067D}, 82 {0xFB56, 0xFB57, 0xFB58, 0xFB59}, {0xFB62, 0xFB63, 0xFB64, 0xFB65}, 83 {0xFB5A, 0xFB5B, 0xFB5C, 0xFB5D}, {0x0681, 0x0681, 0x0681, 0x0681}, 84 {0x0682, 0x0682, 0x0682, 0x0682}, {0xFB76, 0xFB77, 0xFB78, 0xFB79}, 85 {0xFB72, 0xFB73, 0xFB74, 0xFB75}, {0x0685, 0x0685, 0x0685, 0x0685}, 86 {0xFB7A, 0xFB7B, 0xFB7C, 0xFB7D}, {0xFB7E, 0xFB7F, 0xFB80, 0xFB81}, 87 {0xFB88, 0xFB89, 0xFB88, 0xFB89}, {0x0689, 0x0689, 0x0689, 0x0689}, 88 {0x068A, 0x068A, 0x068A, 0x068A}, {0x068B, 0x068B, 0x068B, 0x068B}, 89 {0xFB84, 0xFB85, 0xFB84, 0xFB85}, {0xFB82, 0xFB83, 0xFB82, 0xFB83}, 90 {0xFB86, 0xFB87, 0xFB86, 0xFB87}, {0x068F, 0x068F, 0x068F, 0x068F}, 91 {0x0690, 0x0690, 0x0690, 0x0690}, {0xFB8C, 0xFB8D, 0xFB8C, 0xFB8D}, 92 {0x0692, 0x0692, 0x0692, 0x0692}, {0x0693, 0x0693, 0x0693, 0x0693}, 93 {0x0694, 0x0694, 0x0694, 0x0694}, {0x0695, 0x0695, 0x0695, 0x0695}, 94 {0x0696, 0x0696, 0x0696, 0x0696}, {0x0697, 0x0697, 0x0697, 0x0697}, 95 {0xFB8A, 0xFB8B, 0xFB8A, 0xFB8B}, {0x0699, 0x0699, 0x0699, 0x0699}, 96 {0x069A, 0x069A, 0x069A, 0x069A}, {0x069B, 0x069B, 0x069B, 0x069B}, 97 {0x069C, 0x069C, 0x069C, 0x069C}, {0x069D, 0x069D, 0x069D, 0x069D}, 98 {0x069E, 0x069E, 0x069E, 0x069E}, {0x069F, 0x069F, 0x069F, 0x069F}, 99 {0x06A0, 0x06A0, 0x06A0, 0x06A0}, {0x06A1, 0x06A1, 0x06A1, 0x06A1}, 100 {0x06A2, 0x06A2, 0x06A2, 0x06A2}, {0x06A3, 0x06A3, 0x06A3, 0x06A3}, 101 {0xFB6A, 0xFB6B, 0xFB6C, 0xFB6D}, {0x06A5, 0x06A5, 0x06A5, 0x06A5}, 102 {0xFB6E, 0xFB6F, 0xFB70, 0xFB71}, {0x06A7, 0x06A7, 0x06A7, 0x06A7}, 103 {0x06A8, 0x06A8, 0x06A8, 0x06A8}, {0xFB8E, 0xFB8F, 0xFB90, 0xFB91}, 104 {0x06AA, 0x06AA, 0x06AA, 0x06AA}, {0x06AB, 0x06AB, 0x06AB, 0x06AB}, 105 {0x06AC, 0x06AC, 0x06AC, 0x06AC}, {0xFBD3, 0xFBD4, 0xFBD5, 0xFBD6}, 106 {0x06AE, 0x06AE, 0x06AE, 0x06AE}, {0xFB92, 0xFB93, 0xFB94, 0xFB95}, 107 {0x06B0, 0x06B0, 0x06B0, 0x06B0}, {0xFB9A, 0xFB9B, 0xFB9C, 0xFB9D}, 108 {0x06B2, 0x06B2, 0x06B2, 0x06B2}, {0xFB96, 0xFB97, 0xFB98, 0xFB99}, 109 {0x06B4, 0x06B4, 0x06B4, 0x06B4}, {0x06B5, 0x06B5, 0x06B5, 0x06B5}, 110 {0x06B6, 0x06B6, 0x06B6, 0x06B6}, {0x06B7, 0x06B7, 0x06B7, 0x06B7}, 111 {0x06B8, 0x06B8, 0x06B8, 0x06B8}, {0x06B9, 0x06B9, 0x06B9, 0x06B9}, 112 {0xFB9E, 0xFB9F, 0xFBE8, 0xFBE9}, {0xFBA0, 0xFBA1, 0xFBA2, 0xFBA3}, 113 {0x06BC, 0x06BC, 0x06BC, 0x06BC}, {0x06BD, 0x06BD, 0x06BD, 0x06BD}, 114 {0xFBAA, 0xFBAB, 0xFBAC, 0xFBAD}, {0x06BF, 0x06BF, 0x06BF, 0x06BF}, 115 {0xFBA4, 0xFBA5, 0xFBA4, 0xFBA5}, {0xFBA6, 0xFBA7, 0xFBA8, 0xFBA9}, 116 {0x06C2, 0x06C2, 0x06C2, 0x06C2}, {0x06C3, 0x06C3, 0x06C3, 0x06C3}, 117 {0x06C4, 0x06C4, 0x06C4, 0x06C4}, {0xFBE0, 0xFBE1, 0xFBE0, 0xFBE1}, 118 {0xFBD9, 0xFBDA, 0xFBD9, 0xFBDA}, {0xFBD7, 0xFBD8, 0xFBD7, 0xFBD8}, 119 {0xFBDB, 0xFBDC, 0xFBDB, 0xFBDC}, {0xFBE2, 0xFBE3, 0xFBE2, 0xFBE3}, 120 {0x06CA, 0x06CA, 0x06CA, 0x06CA}, {0xFBDE, 0xFBDF, 0xFBDE, 0xFBDF}, 121 {0xFBFC, 0xFBFD, 0xFBFE, 0xFBFF}, {0x06CD, 0x06CD, 0x06CD, 0x06CD}, 122 {0x06CE, 0x06CE, 0x06CE, 0x06CE}, {0x06CF, 0x06CF, 0x06CF, 0x06CF}, 123 {0xFBE4, 0xFBE5, 0xFBE6, 0xFBE7}, {0x06D1, 0x06D1, 0x06D1, 0x06D1}, 124 {0xFBAE, 0xFBAF, 0xFBAE, 0xFBAF}, {0xFBB0, 0xFBB1, 0xFBB0, 0xFBB1}, 125 {0x06D4, 0x06D4, 0x06D4, 0x06D4}, {0x06D5, 0x06D5, 0x06D5, 0x06D5}, 126 }; 127 128 const FX_ARAALEF gs_FX_AlefTable[] = { 129 {0x0622, 0xFEF5}, 130 {0x0623, 0xFEF7}, 131 {0x0625, 0xFEF9}, 132 {0x0627, 0xFEFB}, 133 }; 134 135 const FX_ARASHADDA gs_FX_ShaddaTable[] = { 136 {0x064C, 0xFC5E}, {0x064D, 0xFC5F}, {0x064E, 0xFC60}, 137 {0x064F, 0xFC61}, {0x0650, 0xFC62}, 138 }; 139 140 const FX_ARBFORMTABLE* GetArabicFormTable(wchar_t unicode) { 141 if (unicode < 0x622 || unicode > 0x6d5) 142 return nullptr; 143 return g_FX_ArabicFormTables + unicode - 0x622; 144 } 145 146 const FX_ARBFORMTABLE* ParseChar(const CFX_Char* pTC, 147 wchar_t* wChar, 148 FX_CHARTYPE* eType) { 149 if (!pTC) { 150 *eType = FX_CHARTYPE_Unknown; 151 *wChar = 0xFEFF; 152 return nullptr; 153 } 154 155 *eType = pTC->GetCharType(); 156 *wChar = static_cast<wchar_t>(pTC->char_code()); 157 const FX_ARBFORMTABLE* pFT = GetArabicFormTable(*wChar); 158 if (!pFT || *eType >= FX_CHARTYPE_ArabicNormal) 159 *eType = FX_CHARTYPE_Unknown; 160 161 return pFT; 162 } 163 164 wchar_t GetArabicFromAlefTable(wchar_t alef) { 165 static const size_t s_iAlefCount = FX_ArraySize(gs_FX_AlefTable); 166 for (size_t iStart = 0; iStart < s_iAlefCount; iStart++) { 167 const FX_ARAALEF& v = gs_FX_AlefTable[iStart]; 168 if (v.wAlef == alef) 169 return v.wIsolated; 170 } 171 return alef; 172 } 173 174 } // namespace 175 176 namespace pdfium { 177 namespace arabic { 178 179 wchar_t GetFormChar(wchar_t wch, wchar_t prev, wchar_t next) { 180 CFX_Char c(wch, FX_GetUnicodeProperties(wch)); 181 CFX_Char p(prev, FX_GetUnicodeProperties(prev)); 182 CFX_Char n(next, FX_GetUnicodeProperties(next)); 183 return GetFormChar(&c, &p, &n); 184 } 185 186 wchar_t GetFormChar(const CFX_Char* cur, 187 const CFX_Char* prev, 188 const CFX_Char* next) { 189 FX_CHARTYPE eCur; 190 wchar_t wCur; 191 const FX_ARBFORMTABLE* ft = ParseChar(cur, &wCur, &eCur); 192 if (eCur < FX_CHARTYPE_ArabicAlef || eCur >= FX_CHARTYPE_ArabicNormal) 193 return wCur; 194 195 FX_CHARTYPE ePrev; 196 wchar_t wPrev; 197 ParseChar(prev, &wPrev, &ePrev); 198 if (wPrev == 0x0644 && eCur == FX_CHARTYPE_ArabicAlef) 199 return 0xFEFF; 200 201 FX_CHARTYPE eNext; 202 wchar_t wNext; 203 ParseChar(next, &wNext, &eNext); 204 bool bAlef = (eNext == FX_CHARTYPE_ArabicAlef && wCur == 0x644); 205 if (ePrev < FX_CHARTYPE_ArabicAlef) { 206 if (bAlef) 207 return GetArabicFromAlefTable(wNext); 208 return (eNext < FX_CHARTYPE_ArabicAlef) ? ft->wIsolated : ft->wInitial; 209 } 210 211 if (bAlef) { 212 wCur = GetArabicFromAlefTable(wNext); 213 return (ePrev != FX_CHARTYPE_ArabicDistortion) ? wCur : ++wCur; 214 } 215 216 if (ePrev == FX_CHARTYPE_ArabicAlef || ePrev == FX_CHARTYPE_ArabicSpecial) 217 return (eNext < FX_CHARTYPE_ArabicAlef) ? ft->wIsolated : ft->wInitial; 218 return (eNext < FX_CHARTYPE_ArabicAlef) ? ft->wFinal : ft->wMedial; 219 } 220 221 } // namespace arabic 222 } // namespace pdfium 223 224 wchar_t FX_GetArabicFromShaddaTable(wchar_t shadda) { 225 static const size_t s_iShaddaCount = FX_ArraySize(gs_FX_ShaddaTable); 226 for (size_t iStart = 0; iStart < s_iShaddaCount; iStart++) { 227 const FX_ARASHADDA& v = gs_FX_ShaddaTable[iStart]; 228 if (v.wShadda == shadda) 229 return v.wIsolated; 230 } 231 return shadda; 232 } 233