1 /* 2 ****************************************************************************** 3 * 4 * Copyright (C) 2000-2013, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ****************************************************************************** 8 * file name: ushape.cpp 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2000jun29 14 * created by: Markus W. Scherer 15 * 16 * Arabic letter shaping implemented by Ayman Roshdy 17 */ 18 19 #include "unicode/utypes.h" 20 #include "unicode/uchar.h" 21 #include "unicode/ustring.h" 22 #include "unicode/ushape.h" 23 #include "cmemory.h" 24 #include "putilimp.h" 25 #include "ustr_imp.h" 26 #include "ubidi_props.h" 27 #include "uassert.h" 28 29 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 30 31 /* 32 * This implementation is designed for 16-bit Unicode strings. 33 * The main assumption is that the Arabic characters and their 34 * presentation forms each fit into a single UChar. 35 * With UTF-8, they occupy 2 or 3 bytes, and more than the ASCII 36 * characters. 37 */ 38 39 /* 40 * ### TODO in general for letter shaping: 41 * - the letter shaping code is UTF-16-unaware; needs update 42 * + especially invertBuffer()?! 43 * - needs to handle the "Arabic Tail" that is used in some legacy codepages 44 * as a glyph fragment of wide-glyph letters 45 * + IBM Unicode conversion tables map it to U+200B (ZWSP) 46 * + IBM Egypt has proposed to encode the tail in Unicode among Arabic Presentation Forms 47 * + Unicode 3.2 added U+FE73 ARABIC TAIL FRAGMENT 48 */ 49 50 /* definitions for Arabic letter shaping ------------------------------------ */ 51 52 #define IRRELEVANT 4 53 #define LAMTYPE 16 54 #define ALEFTYPE 32 55 #define LINKR 1 56 #define LINKL 2 57 #define APRESENT 8 58 #define SHADDA 64 59 #define CSHADDA 128 60 #define COMBINE (SHADDA+CSHADDA) 61 62 #define HAMZAFE_CHAR 0xfe80 63 #define HAMZA06_CHAR 0x0621 64 #define YEH_HAMZA_CHAR 0x0626 65 #define YEH_HAMZAFE_CHAR 0xFE89 66 #define LAMALEF_SPACE_SUB 0xFFFF 67 #define TASHKEEL_SPACE_SUB 0xFFFE 68 #define NEW_TAIL_CHAR 0xFE73 69 #define OLD_TAIL_CHAR 0x200B 70 #define LAM_CHAR 0x0644 71 #define SPACE_CHAR 0x0020 72 #define SHADDA_CHAR 0xFE7C 73 #define TATWEEL_CHAR 0x0640 74 #define SHADDA_TATWEEL_CHAR 0xFE7D 75 #define SHADDA06_CHAR 0x0651 76 77 #define SHAPE_MODE 0 78 #define DESHAPE_MODE 1 79 80 struct uShapeVariables { 81 UChar tailChar; 82 uint32_t uShapeLamalefBegin; 83 uint32_t uShapeLamalefEnd; 84 uint32_t uShapeTashkeelBegin; 85 uint32_t uShapeTashkeelEnd; 86 int spacesRelativeToTextBeginEnd; 87 }; 88 89 static const uint8_t tailFamilyIsolatedFinal[] = { 90 /* FEB1 */ 1, 91 /* FEB2 */ 1, 92 /* FEB3 */ 0, 93 /* FEB4 */ 0, 94 /* FEB5 */ 1, 95 /* FEB6 */ 1, 96 /* FEB7 */ 0, 97 /* FEB8 */ 0, 98 /* FEB9 */ 1, 99 /* FEBA */ 1, 100 /* FEBB */ 0, 101 /* FEBC */ 0, 102 /* FEBD */ 1, 103 /* FEBE */ 1 104 }; 105 106 static const uint8_t tashkeelMedial[] = { 107 /* FE70 */ 0, 108 /* FE71 */ 1, 109 /* FE72 */ 0, 110 /* FE73 */ 0, 111 /* FE74 */ 0, 112 /* FE75 */ 0, 113 /* FE76 */ 0, 114 /* FE77 */ 1, 115 /* FE78 */ 0, 116 /* FE79 */ 1, 117 /* FE7A */ 0, 118 /* FE7B */ 1, 119 /* FE7C */ 0, 120 /* FE7D */ 1, 121 /* FE7E */ 0, 122 /* FE7F */ 1 123 }; 124 125 static const UChar yehHamzaToYeh[] = 126 { 127 /* isolated*/ 0xFEEF, 128 /* final */ 0xFEF0 129 }; 130 131 static const uint8_t IrrelevantPos[] = { 132 0x0, 0x2, 0x4, 0x6, 133 0x8, 0xA, 0xC, 0xE 134 }; 135 136 137 static const UChar convertLamAlef[] = 138 { 139 /*FEF5*/ 0x0622, 140 /*FEF6*/ 0x0622, 141 /*FEF7*/ 0x0623, 142 /*FEF8*/ 0x0623, 143 /*FEF9*/ 0x0625, 144 /*FEFA*/ 0x0625, 145 /*FEFB*/ 0x0627, 146 /*FEFC*/ 0x0627 147 }; 148 149 static const UChar araLink[178]= 150 { 151 1 + 32 + 256 * 0x11,/*0x0622*/ 152 1 + 32 + 256 * 0x13,/*0x0623*/ 153 1 + 256 * 0x15,/*0x0624*/ 154 1 + 32 + 256 * 0x17,/*0x0625*/ 155 1 + 2 + 256 * 0x19,/*0x0626*/ 156 1 + 32 + 256 * 0x1D,/*0x0627*/ 157 1 + 2 + 256 * 0x1F,/*0x0628*/ 158 1 + 256 * 0x23,/*0x0629*/ 159 1 + 2 + 256 * 0x25,/*0x062A*/ 160 1 + 2 + 256 * 0x29,/*0x062B*/ 161 1 + 2 + 256 * 0x2D,/*0x062C*/ 162 1 + 2 + 256 * 0x31,/*0x062D*/ 163 1 + 2 + 256 * 0x35,/*0x062E*/ 164 1 + 256 * 0x39,/*0x062F*/ 165 1 + 256 * 0x3B,/*0x0630*/ 166 1 + 256 * 0x3D,/*0x0631*/ 167 1 + 256 * 0x3F,/*0x0632*/ 168 1 + 2 + 256 * 0x41,/*0x0633*/ 169 1 + 2 + 256 * 0x45,/*0x0634*/ 170 1 + 2 + 256 * 0x49,/*0x0635*/ 171 1 + 2 + 256 * 0x4D,/*0x0636*/ 172 1 + 2 + 256 * 0x51,/*0x0637*/ 173 1 + 2 + 256 * 0x55,/*0x0638*/ 174 1 + 2 + 256 * 0x59,/*0x0639*/ 175 1 + 2 + 256 * 0x5D,/*0x063A*/ 176 0, 0, 0, 0, 0, /*0x063B-0x063F*/ 177 1 + 2, /*0x0640*/ 178 1 + 2 + 256 * 0x61,/*0x0641*/ 179 1 + 2 + 256 * 0x65,/*0x0642*/ 180 1 + 2 + 256 * 0x69,/*0x0643*/ 181 1 + 2 + 16 + 256 * 0x6D,/*0x0644*/ 182 1 + 2 + 256 * 0x71,/*0x0645*/ 183 1 + 2 + 256 * 0x75,/*0x0646*/ 184 1 + 2 + 256 * 0x79,/*0x0647*/ 185 1 + 256 * 0x7D,/*0x0648*/ 186 1 + 256 * 0x7F,/*0x0649*/ 187 1 + 2 + 256 * 0x81,/*0x064A*/ 188 4 + 256 * 1, /*0x064B*/ 189 4 + 128 + 256 * 1, /*0x064C*/ 190 4 + 128 + 256 * 1, /*0x064D*/ 191 4 + 128 + 256 * 1, /*0x064E*/ 192 4 + 128 + 256 * 1, /*0x064F*/ 193 4 + 128 + 256 * 1, /*0x0650*/ 194 4 + 64 + 256 * 3, /*0x0651*/ 195 4 + 256 * 1, /*0x0652*/ 196 4 + 256 * 7, /*0x0653*/ 197 4 + 256 * 8, /*0x0654*/ 198 4 + 256 * 8, /*0x0655*/ 199 4 + 256 * 1, /*0x0656*/ 200 0, 0, 0, 0, 0, /*0x0657-0x065B*/ 201 1 + 256 * 0x85,/*0x065C*/ 202 1 + 256 * 0x87,/*0x065D*/ 203 1 + 256 * 0x89,/*0x065E*/ 204 1 + 256 * 0x8B,/*0x065F*/ 205 0, 0, 0, 0, 0, /*0x0660-0x0664*/ 206 0, 0, 0, 0, 0, /*0x0665-0x0669*/ 207 0, 0, 0, 0, 0, 0, /*0x066A-0x066F*/ 208 4 + 256 * 6, /*0x0670*/ 209 1 + 8 + 256 * 0x00,/*0x0671*/ 210 1 + 32, /*0x0672*/ 211 1 + 32, /*0x0673*/ 212 0, /*0x0674*/ 213 1 + 32, /*0x0675*/ 214 1, 1, /*0x0676-0x0677*/ 215 1+2, 1+2, 1+2, 1+2, 1+2, 1+2, /*0x0678-0x067D*/ 216 1+2+8+256 * 0x06, 1+2, 1+2, 1+2, 1+2, 1+2, /*0x067E-0x0683*/ 217 1+2, 1+2, 1+2+8+256 * 0x2A, 1+2, /*0x0684-0x0687*/ 218 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*0x0688-0x0691*/ 219 1, 1, 1, 1, 1, 1, 1+8+256 * 0x3A, 1, /*0x0692-0x0699*/ 220 1+2, 1+2, 1+2, 1+2, 1+2, 1+2, /*0x069A-0x06A3*/ 221 1+2, 1+2, 1+2, 1+2, /*0x069A-0x06A3*/ 222 1+2, 1+2, 1+2, 1+2, 1+2, 1+2+8+256 * 0x3E, /*0x06A4-0x06AD*/ 223 1+2, 1+2, 1+2, 1+2, /*0x06A4-0x06AD*/ 224 1+2, 1+2+8+256 * 0x42, 1+2, 1+2, 1+2, 1+2, /*0x06AE-0x06B7*/ 225 1+2, 1+2, 1+2, 1+2, /*0x06AE-0x06B7*/ 226 1+2, 1+2, 1+2, 1+2, 1+2, 1+2, /*0x06B8-0x06BF*/ 227 1+2, 1+2, /*0x06B8-0x06BF*/ 228 1, /*0x06C0*/ 229 1+2, /*0x06C1*/ 230 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*0x06C2-0x06CB*/ 231 1+2+8+256 * 0xAC, /*0x06CC*/ 232 1, /*0x06CD*/ 233 1+2, 1+2, 1+2, 1+2, /*0x06CE-0x06D1*/ 234 1, 1 /*0x06D2-0x06D3*/ 235 }; 236 237 static const uint8_t presALink[] = { 238 /***********0*****1*****2*****3*****4*****5*****6*****7*****8*****9*****A*****B*****C*****D*****E*****F*/ 239 /*FB5*/ 0, 1, 0, 0, 0, 0, 0, 1, 2,1 + 2, 0, 0, 0, 0, 0, 0, 240 /*FB6*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 241 /*FB7*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2,1 + 2, 0, 0, 242 /*FB8*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 243 /*FB9*/ 2,1 + 2, 0, 1, 2,1 + 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 244 /*FBA*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 245 /*FBB*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 246 /*FBC*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 247 /*FBD*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 248 /*FBE*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 249 /*FBF*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2,1 + 2, 250 /*FC0*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 251 /*FC1*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 252 /*FC2*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 253 /*FC3*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 254 /*FC4*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255 /*FC5*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 256 /*FC6*/ 4, 4, 4 257 }; 258 259 static const uint8_t presBLink[]= 260 { 261 /***********0*****1*****2*****3*****4*****5*****6*****7*****8*****9*****A*****B*****C*****D*****E*****F*/ 262 /*FE7*/1 + 2,1 + 2,1 + 2, 0,1 + 2, 0,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2, 263 /*FE8*/ 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2,1 + 2, 0, 1, 0, 264 /*FE9*/ 1, 2,1 + 2, 0, 1, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2, 265 /*FEA*/1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 0, 1, 0, 1, 0, 266 /*FEB*/ 1, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2, 267 /*FEC*/1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2, 268 /*FED*/1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2, 269 /*FEE*/1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 0, 270 /*FEF*/ 1, 0, 1, 2,1 + 2, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0 271 }; 272 273 static const UChar convertFBto06[] = 274 { 275 /***********0******1******2******3******4******5******6******7******8******9******A******B******C******D******E******F***/ 276 /*FB5*/ 0x671, 0x671, 0, 0, 0, 0, 0x67E, 0x67E, 0x67E, 0x67E, 0, 0, 0, 0, 0, 0, 277 /*FB6*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 278 /*FB7*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x686, 0x686, 0x686, 0x686, 0, 0, 279 /*FB8*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x698, 0x698, 0, 0, 0x6A9, 0x6A9, 280 /*FB9*/ 0x6A9, 0x6A9, 0x6AF, 0x6AF, 0x6AF, 0x6AF, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 281 /*FBA*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 282 /*FBB*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 283 /*FBC*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 284 /*FBD*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 285 /*FBE*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 286 /*FBF*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x6CC, 0x6CC, 0x6CC, 0x6CC 287 }; 288 289 static const UChar convertFEto06[] = 290 { 291 /***********0******1******2******3******4******5******6******7******8******9******A******B******C******D******E******F***/ 292 /*FE7*/ 0x64B, 0x64B, 0x64C, 0x64C, 0x64D, 0x64D, 0x64E, 0x64E, 0x64F, 0x64F, 0x650, 0x650, 0x651, 0x651, 0x652, 0x652, 293 /*FE8*/ 0x621, 0x622, 0x622, 0x623, 0x623, 0x624, 0x624, 0x625, 0x625, 0x626, 0x626, 0x626, 0x626, 0x627, 0x627, 0x628, 294 /*FE9*/ 0x628, 0x628, 0x628, 0x629, 0x629, 0x62A, 0x62A, 0x62A, 0x62A, 0x62B, 0x62B, 0x62B, 0x62B, 0x62C, 0x62C, 0x62C, 295 /*FEA*/ 0x62C, 0x62D, 0x62D, 0x62D, 0x62D, 0x62E, 0x62E, 0x62E, 0x62E, 0x62F, 0x62F, 0x630, 0x630, 0x631, 0x631, 0x632, 296 /*FEB*/ 0x632, 0x633, 0x633, 0x633, 0x633, 0x634, 0x634, 0x634, 0x634, 0x635, 0x635, 0x635, 0x635, 0x636, 0x636, 0x636, 297 /*FEC*/ 0x636, 0x637, 0x637, 0x637, 0x637, 0x638, 0x638, 0x638, 0x638, 0x639, 0x639, 0x639, 0x639, 0x63A, 0x63A, 0x63A, 298 /*FED*/ 0x63A, 0x641, 0x641, 0x641, 0x641, 0x642, 0x642, 0x642, 0x642, 0x643, 0x643, 0x643, 0x643, 0x644, 0x644, 0x644, 299 /*FEE*/ 0x644, 0x645, 0x645, 0x645, 0x645, 0x646, 0x646, 0x646, 0x646, 0x647, 0x647, 0x647, 0x647, 0x648, 0x648, 0x649, 300 /*FEF*/ 0x649, 0x64A, 0x64A, 0x64A, 0x64A, 0x65C, 0x65C, 0x65D, 0x65D, 0x65E, 0x65E, 0x65F, 0x65F 301 }; 302 303 static const uint8_t shapeTable[4][4][4]= 304 { 305 { {0,0,0,0}, {0,0,0,0}, {0,1,0,3}, {0,1,0,1} }, 306 { {0,0,2,2}, {0,0,1,2}, {0,1,1,2}, {0,1,1,3} }, 307 { {0,0,0,0}, {0,0,0,0}, {0,1,0,3}, {0,1,0,3} }, 308 { {0,0,1,2}, {0,0,1,2}, {0,1,1,2}, {0,1,1,3} } 309 }; 310 311 /* 312 * This function shapes European digits to Arabic-Indic digits 313 * in-place, writing over the input characters. 314 * Since we know that we are only looking for BMP code points, 315 * we can safely just work with code units (again, at least UTF-16). 316 */ 317 static void 318 _shapeToArabicDigitsWithContext(UChar *s, int32_t length, 319 UChar digitBase, 320 UBool isLogical, UBool lastStrongWasAL) { 321 const UBiDiProps *bdp; 322 int32_t i; 323 UChar c; 324 325 bdp=ubidi_getSingleton(); 326 digitBase-=0x30; 327 328 /* the iteration direction depends on the type of input */ 329 if(isLogical) { 330 for(i=0; i<length; ++i) { 331 c=s[i]; 332 switch(ubidi_getClass(bdp, c)) { 333 case U_LEFT_TO_RIGHT: /* L */ 334 case U_RIGHT_TO_LEFT: /* R */ 335 lastStrongWasAL=FALSE; 336 break; 337 case U_RIGHT_TO_LEFT_ARABIC: /* AL */ 338 lastStrongWasAL=TRUE; 339 break; 340 case U_EUROPEAN_NUMBER: /* EN */ 341 if(lastStrongWasAL && (uint32_t)(c-0x30)<10) { 342 s[i]=(UChar)(digitBase+c); /* digitBase+(c-0x30) - digitBase was modified above */ 343 } 344 break; 345 default : 346 break; 347 } 348 } 349 } else { 350 for(i=length; i>0; /* pre-decrement in the body */) { 351 c=s[--i]; 352 switch(ubidi_getClass(bdp, c)) { 353 case U_LEFT_TO_RIGHT: /* L */ 354 case U_RIGHT_TO_LEFT: /* R */ 355 lastStrongWasAL=FALSE; 356 break; 357 case U_RIGHT_TO_LEFT_ARABIC: /* AL */ 358 lastStrongWasAL=TRUE; 359 break; 360 case U_EUROPEAN_NUMBER: /* EN */ 361 if(lastStrongWasAL && (uint32_t)(c-0x30)<10) { 362 s[i]=(UChar)(digitBase+c); /* digitBase+(c-0x30) - digitBase was modified above */ 363 } 364 break; 365 default : 366 break; 367 } 368 } 369 } 370 } 371 372 /* 373 *Name : invertBuffer 374 *Function : This function inverts the buffer, it's used 375 * in case the user specifies the buffer to be 376 * U_SHAPE_TEXT_DIRECTION_LOGICAL 377 */ 378 static void 379 invertBuffer(UChar *buffer, int32_t size, uint32_t /*options*/, int32_t lowlimit, int32_t highlimit) { 380 UChar temp; 381 int32_t i=0,j=0; 382 for(i=lowlimit,j=size-highlimit-1;i<j;i++,j--) { 383 temp = buffer[i]; 384 buffer[i] = buffer[j]; 385 buffer[j] = temp; 386 } 387 } 388 389 /* 390 *Name : changeLamAlef 391 *Function : Converts the Alef characters into an equivalent 392 * LamAlef location in the 0x06xx Range, this is an 393 * intermediate stage in the operation of the program 394 * later it'll be converted into the 0xFExx LamAlefs 395 * in the shaping function. 396 */ 397 static inline UChar 398 changeLamAlef(UChar ch) { 399 switch(ch) { 400 case 0x0622 : 401 return 0x065C; 402 case 0x0623 : 403 return 0x065D; 404 case 0x0625 : 405 return 0x065E; 406 case 0x0627 : 407 return 0x065F; 408 } 409 return 0; 410 } 411 412 /* 413 *Name : getLink 414 *Function : Resolves the link between the characters as 415 * Arabic characters have four forms : 416 * Isolated, Initial, Middle and Final Form 417 */ 418 static UChar 419 getLink(UChar ch) { 420 if(ch >= 0x0622 && ch <= 0x06D3) { 421 return(araLink[ch-0x0622]); 422 } else if(ch == 0x200D) { 423 return(3); 424 } else if(ch >= 0x206D && ch <= 0x206F) { 425 return(4); 426 }else if(ch >= 0xFB50 && ch <= 0xFC62) { 427 return(presALink[ch-0xFB50]); 428 } else if(ch >= 0xFE70 && ch <= 0xFEFC) { 429 return(presBLink[ch-0xFE70]); 430 }else { 431 return(0); 432 } 433 } 434 435 /* 436 *Name : countSpaces 437 *Function : Counts the number of spaces 438 * at each end of the logical buffer 439 */ 440 static void 441 countSpaces(UChar *dest, int32_t size, uint32_t /*options*/, int32_t *spacesCountl, int32_t *spacesCountr) { 442 int32_t i = 0; 443 int32_t countl = 0,countr = 0; 444 while((dest[i] == SPACE_CHAR) && (countl < size)) { 445 countl++; 446 i++; 447 } 448 if (countl < size) { /* the entire buffer is not all space */ 449 while(dest[size-1] == SPACE_CHAR) { 450 countr++; 451 size--; 452 } 453 } 454 *spacesCountl = countl; 455 *spacesCountr = countr; 456 } 457 458 /* 459 *Name : isTashkeelChar 460 *Function : Returns 1 for Tashkeel characters in 06 range else return 0 461 */ 462 static inline int32_t 463 isTashkeelChar(UChar ch) { 464 return (int32_t)( ch>=0x064B && ch<= 0x0652 ); 465 } 466 467 /* 468 *Name : isTashkeelCharFE 469 *Function : Returns 1 for Tashkeel characters in FE range else return 0 470 */ 471 static inline int32_t 472 isTashkeelCharFE(UChar ch) { 473 return (int32_t)( ch>=0xFE70 && ch<= 0xFE7F ); 474 } 475 476 /* 477 *Name : isAlefChar 478 *Function : Returns 1 for Alef characters else return 0 479 */ 480 static inline int32_t 481 isAlefChar(UChar ch) { 482 return (int32_t)( (ch==0x0622)||(ch==0x0623)||(ch==0x0625)||(ch==0x0627) ); 483 } 484 485 /* 486 *Name : isLamAlefChar 487 *Function : Returns 1 for LamAlef characters else return 0 488 */ 489 static inline int32_t 490 isLamAlefChar(UChar ch) { 491 return (int32_t)((ch>=0xFEF5)&&(ch<=0xFEFC) ); 492 } 493 494 /*BIDI 495 *Name : isTailChar 496 *Function : returns 1 if the character matches one of the tail characters (0xfe73 or 0x200b) otherwise returns 0 497 */ 498 499 static inline int32_t 500 isTailChar(UChar ch) { 501 if(ch == OLD_TAIL_CHAR || ch == NEW_TAIL_CHAR){ 502 return 1; 503 }else{ 504 return 0; 505 } 506 } 507 508 /*BIDI 509 *Name : isSeenTailFamilyChar 510 *Function : returns 1 if the character is a seen family isolated character 511 * in the FE range otherwise returns 0 512 */ 513 514 static inline int32_t 515 isSeenTailFamilyChar(UChar ch) { 516 if(ch >= 0xfeb1 && ch < 0xfebf){ 517 return tailFamilyIsolatedFinal [ch - 0xFEB1]; 518 }else{ 519 return 0; 520 } 521 } 522 523 /* Name : isSeenFamilyChar 524 * Function : returns 1 if the character is a seen family character in the Unicode 525 * 06 range otherwise returns 0 526 */ 527 528 static inline int32_t 529 isSeenFamilyChar(UChar ch){ 530 if(ch >= 0x633 && ch <= 0x636){ 531 return 1; 532 }else { 533 return 0; 534 } 535 } 536 537 /*Start of BIDI*/ 538 /* 539 *Name : isAlefMaksouraChar 540 *Function : returns 1 if the character is a Alef Maksoura Final or isolated 541 * otherwise returns 0 542 */ 543 static inline int32_t 544 isAlefMaksouraChar(UChar ch) { 545 return (int32_t)( (ch == 0xFEEF) || ( ch == 0xFEF0) || (ch == 0x0649)); 546 } 547 548 /* 549 * Name : isYehHamzaChar 550 * Function : returns 1 if the character is a yehHamza isolated or yehhamza 551 * final is found otherwise returns 0 552 */ 553 static inline int32_t 554 isYehHamzaChar(UChar ch) { 555 if((ch==0xFE89)||(ch==0xFE8A)){ 556 return 1; 557 }else{ 558 return 0; 559 } 560 } 561 562 /* 563 * Name: isTashkeelOnTatweelChar 564 * Function: Checks if the Tashkeel Character is on Tatweel or not,if the 565 * Tashkeel on tatweel (FE range), it returns 1 else if the 566 * Tashkeel with shadda on tatweel (FC range)return 2 otherwise 567 * returns 0 568 */ 569 static inline int32_t 570 isTashkeelOnTatweelChar(UChar ch){ 571 if(ch >= 0xfe70 && ch <= 0xfe7f && ch != NEW_TAIL_CHAR && ch != 0xFE75 && ch != SHADDA_TATWEEL_CHAR) 572 { 573 return tashkeelMedial [ch - 0xFE70]; 574 }else if( (ch >= 0xfcf2 && ch <= 0xfcf4) || (ch == SHADDA_TATWEEL_CHAR)) { 575 return 2; 576 }else{ 577 return 0; 578 } 579 } 580 581 /* 582 * Name: isIsolatedTashkeelChar 583 * Function: Checks if the Tashkeel Character is in the isolated form 584 * (i.e. Unicode FE range) returns 1 else if the Tashkeel 585 * with shadda is in the isolated form (i.e. Unicode FC range) 586 * returns 2 otherwise returns 0 587 */ 588 static inline int32_t 589 isIsolatedTashkeelChar(UChar ch){ 590 if(ch >= 0xfe70 && ch <= 0xfe7f && ch != NEW_TAIL_CHAR && ch != 0xFE75){ 591 return (1 - tashkeelMedial [ch - 0xFE70]); 592 }else if(ch >= 0xfc5e && ch <= 0xfc63){ 593 return 1; 594 }else{ 595 return 0; 596 } 597 } 598 599 600 601 602 /* 603 *Name : calculateSize 604 *Function : This function calculates the destSize to be used in preflighting 605 * when the destSize is equal to 0 606 * It is used also to calculate the new destsize in case the 607 * destination buffer will be resized. 608 */ 609 610 static int32_t 611 calculateSize(const UChar *source, int32_t sourceLength, 612 int32_t destSize,uint32_t options) { 613 int32_t i = 0; 614 615 int lamAlefOption = 0; 616 int tashkeelOption = 0; 617 618 destSize = sourceLength; 619 620 if (((options&U_SHAPE_LETTERS_MASK) == U_SHAPE_LETTERS_SHAPE || 621 ((options&U_SHAPE_LETTERS_MASK) == U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED )) && 622 ((options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_RESIZE )){ 623 lamAlefOption = 1; 624 } 625 if((options&U_SHAPE_LETTERS_MASK) == U_SHAPE_LETTERS_SHAPE && 626 ((options&U_SHAPE_TASHKEEL_MASK) == U_SHAPE_TASHKEEL_RESIZE ) ){ 627 tashkeelOption = 1; 628 } 629 630 if(lamAlefOption || tashkeelOption){ 631 if((options&U_SHAPE_TEXT_DIRECTION_MASK)==U_SHAPE_TEXT_DIRECTION_VISUAL_LTR) { 632 for(i=0;i<sourceLength;i++) { 633 if( ((isAlefChar(source[i]))&& (i<(sourceLength-1)) &&(source[i+1] == LAM_CHAR)) || (isTashkeelCharFE(source[i])) ) { 634 destSize--; 635 } 636 } 637 }else if((options&U_SHAPE_TEXT_DIRECTION_MASK)==U_SHAPE_TEXT_DIRECTION_LOGICAL) { 638 for(i=0;i<sourceLength;i++) { 639 if( ( (source[i] == LAM_CHAR) && (i<(sourceLength-1)) && (isAlefChar(source[i+1]))) || (isTashkeelCharFE(source[i])) ) { 640 destSize--; 641 } 642 } 643 } 644 } 645 646 if ((options&U_SHAPE_LETTERS_MASK) == U_SHAPE_LETTERS_UNSHAPE){ 647 if ( (options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_RESIZE){ 648 for(i=0;i<sourceLength;i++) { 649 if(isLamAlefChar(source[i])) 650 destSize++; 651 } 652 } 653 } 654 655 return destSize; 656 } 657 658 /* 659 *Name : handleTashkeelWithTatweel 660 *Function : Replaces Tashkeel as following: 661 * Case 1 :if the Tashkeel on tatweel, replace it with Tatweel. 662 * Case 2 :if the Tashkeel aggregated with Shadda on Tatweel, replace 663 * it with Shadda on Tatweel. 664 * Case 3: if the Tashkeel is isolated replace it with Space. 665 * 666 */ 667 static int32_t 668 handleTashkeelWithTatweel(UChar *dest, int32_t sourceLength, 669 int32_t /*destSize*/, uint32_t /*options*/, 670 UErrorCode * /*pErrorCode*/) { 671 int i; 672 for(i = 0; i < sourceLength; i++){ 673 if((isTashkeelOnTatweelChar(dest[i]) == 1)){ 674 dest[i] = TATWEEL_CHAR; 675 }else if((isTashkeelOnTatweelChar(dest[i]) == 2)){ 676 dest[i] = SHADDA_TATWEEL_CHAR; 677 }else if(isIsolatedTashkeelChar(dest[i]) && dest[i] != SHADDA_CHAR){ 678 dest[i] = SPACE_CHAR; 679 } 680 } 681 return sourceLength; 682 } 683 684 685 686 /* 687 *Name : handleGeneratedSpaces 688 *Function : The shapeUnicode function converts Lam + Alef into LamAlef + space, 689 * and Tashkeel to space. 690 * handleGeneratedSpaces function puts these generated spaces 691 * according to the options the user specifies. LamAlef and Tashkeel 692 * spaces can be replaced at begin, at end, at near or decrease the 693 * buffer size. 694 * 695 * There is also Auto option for LamAlef and tashkeel, which will put 696 * the spaces at end of the buffer (or end of text if the user used 697 * the option U_SHAPE_SPACES_RELATIVE_TO_TEXT_BEGIN_END). 698 * 699 * If the text type was visual_LTR and the option 700 * U_SHAPE_SPACES_RELATIVE_TO_TEXT_BEGIN_END was selected the END 701 * option will place the space at the beginning of the buffer and 702 * BEGIN will place the space at the end of the buffer. 703 */ 704 705 static int32_t 706 handleGeneratedSpaces(UChar *dest, int32_t sourceLength, 707 int32_t destSize, 708 /* BEGIN android-changed */ 709 uint64_t options, 710 /* END android-changed */ 711 UErrorCode *pErrorCode,struct uShapeVariables shapeVars ) { 712 713 int32_t i = 0, j = 0; 714 int32_t count = 0; 715 UChar *tempbuffer=NULL; 716 717 int lamAlefOption = 0; 718 int tashkeelOption = 0; 719 int shapingMode = SHAPE_MODE; 720 721 if (shapingMode == 0){ 722 if ( (options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_RESIZE ){ 723 lamAlefOption = 1; 724 } 725 if ( (options&U_SHAPE_TASHKEEL_MASK) == U_SHAPE_TASHKEEL_RESIZE ){ 726 tashkeelOption = 1; 727 } 728 } 729 730 tempbuffer = (UChar *)uprv_malloc((sourceLength+1)*U_SIZEOF_UCHAR); 731 /* Test for NULL */ 732 if(tempbuffer == NULL) { 733 *pErrorCode = U_MEMORY_ALLOCATION_ERROR; 734 return 0; 735 } 736 737 738 if (lamAlefOption || tashkeelOption){ 739 uprv_memset(tempbuffer, 0, (sourceLength+1)*U_SIZEOF_UCHAR); 740 741 i = j = 0; count = 0; 742 while(i < sourceLength) { 743 if ( (lamAlefOption && dest[i] == LAMALEF_SPACE_SUB) || 744 (tashkeelOption && dest[i] == TASHKEEL_SPACE_SUB) ){ 745 j--; 746 count++; 747 } else { 748 tempbuffer[j] = dest[i]; 749 } 750 i++; 751 j++; 752 } 753 754 while(count >= 0) { 755 tempbuffer[i] = 0x0000; 756 i--; 757 count--; 758 } 759 760 uprv_memcpy(dest, tempbuffer, sourceLength*U_SIZEOF_UCHAR); 761 destSize = u_strlen(dest); 762 } 763 764 lamAlefOption = 0; 765 766 if (shapingMode == 0){ 767 /* BEGIN android-changed */ 768 if ( (options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_NEAR && 769 (options&U_SHAPE_X_LAMALEF_SUB_ALTERNATE) == 0) { /* if set, leave LAMALEF_SPACE_SUB in the output */ 770 /* END android-changed */ 771 lamAlefOption = 1; 772 } 773 } 774 775 if (lamAlefOption){ 776 /* Lam+Alef is already shaped into LamAlef + FFFF */ 777 i = 0; 778 while(i < sourceLength) { 779 if(lamAlefOption&&dest[i] == LAMALEF_SPACE_SUB){ 780 dest[i] = SPACE_CHAR; 781 } 782 i++; 783 } 784 destSize = sourceLength; 785 } 786 lamAlefOption = 0; 787 tashkeelOption = 0; 788 789 if (shapingMode == 0) { 790 if ( ((options&U_SHAPE_LAMALEF_MASK) == shapeVars.uShapeLamalefBegin) || 791 (((options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_AUTO ) 792 && (shapeVars.spacesRelativeToTextBeginEnd==1)) ) { 793 lamAlefOption = 1; 794 } 795 if ( (options&U_SHAPE_TASHKEEL_MASK) == shapeVars.uShapeTashkeelBegin ) { 796 tashkeelOption = 1; 797 } 798 } 799 800 if(lamAlefOption || tashkeelOption){ 801 uprv_memset(tempbuffer, 0, (sourceLength+1)*U_SIZEOF_UCHAR); 802 803 i = j = sourceLength; count = 0; 804 805 while(i >= 0) { 806 if ( (lamAlefOption && dest[i] == LAMALEF_SPACE_SUB) || 807 (tashkeelOption && dest[i] == TASHKEEL_SPACE_SUB) ){ 808 j++; 809 count++; 810 }else { 811 tempbuffer[j] = dest[i]; 812 } 813 i--; 814 j--; 815 } 816 817 for(i=0 ;i < count; i++){ 818 tempbuffer[i] = SPACE_CHAR; 819 } 820 821 uprv_memcpy(dest, tempbuffer, sourceLength*U_SIZEOF_UCHAR); 822 destSize = sourceLength; 823 } 824 825 826 827 lamAlefOption = 0; 828 tashkeelOption = 0; 829 830 if (shapingMode == 0) { 831 if ( ((options&U_SHAPE_LAMALEF_MASK) == shapeVars.uShapeLamalefEnd) || 832 (((options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_AUTO ) 833 && (shapeVars.spacesRelativeToTextBeginEnd==0)) ) { 834 lamAlefOption = 1; 835 } 836 if ( (options&U_SHAPE_TASHKEEL_MASK) == shapeVars.uShapeTashkeelEnd ){ 837 tashkeelOption = 1; 838 } 839 } 840 841 if(lamAlefOption || tashkeelOption){ 842 uprv_memset(tempbuffer, 0, (sourceLength+1)*U_SIZEOF_UCHAR); 843 844 i = j = 0; count = 0; 845 while(i < sourceLength) { 846 if ( (lamAlefOption && dest[i] == LAMALEF_SPACE_SUB) || 847 (tashkeelOption && dest[i] == TASHKEEL_SPACE_SUB) ){ 848 j--; 849 count++; 850 }else { 851 tempbuffer[j] = dest[i]; 852 } 853 i++; 854 j++; 855 } 856 857 while(count >= 0) { 858 tempbuffer[i] = SPACE_CHAR; 859 i--; 860 count--; 861 } 862 863 uprv_memcpy(dest,tempbuffer, sourceLength*U_SIZEOF_UCHAR); 864 destSize = sourceLength; 865 } 866 867 868 if(tempbuffer){ 869 uprv_free(tempbuffer); 870 } 871 872 return destSize; 873 } 874 875 /* 876 *Name :expandCompositCharAtBegin 877 *Function :Expands the LamAlef character to Lam and Alef consuming the required 878 * space from beginning of the buffer. If the text type was visual_LTR 879 * and the option U_SHAPE_SPACES_RELATIVE_TO_TEXT_BEGIN_END was selected 880 * the spaces will be located at end of buffer. 881 * If there are no spaces to expand the LamAlef, an error 882 * will be set to U_NO_SPACE_AVAILABLE as defined in utypes.h 883 */ 884 885 static int32_t 886 expandCompositCharAtBegin(UChar *dest, int32_t sourceLength, int32_t destSize,UErrorCode *pErrorCode) { 887 int32_t i = 0,j = 0; 888 int32_t countl = 0; 889 UChar *tempbuffer=NULL; 890 891 tempbuffer = (UChar *)uprv_malloc((sourceLength+1)*U_SIZEOF_UCHAR); 892 893 /* Test for NULL */ 894 if(tempbuffer == NULL) { 895 *pErrorCode = U_MEMORY_ALLOCATION_ERROR; 896 return 0; 897 } 898 899 uprv_memset(tempbuffer, 0, (sourceLength+1)*U_SIZEOF_UCHAR); 900 901 i = 0; 902 while(dest[i] == SPACE_CHAR) { 903 countl++; 904 i++; 905 } 906 907 i = j = sourceLength-1; 908 909 while(i >= 0 && j >= 0) { 910 if( countl>0 && isLamAlefChar(dest[i])) { 911 tempbuffer[j] = LAM_CHAR; 912 /* to ensure the array index is within the range */ 913 U_ASSERT(dest[i] >= 0xFEF5u 914 && dest[i]-0xFEF5u < sizeof(convertLamAlef)/sizeof(convertLamAlef[0])); 915 tempbuffer[j-1] = convertLamAlef[ dest[i] - 0xFEF5 ]; 916 j--; 917 countl--; 918 }else { 919 if( countl == 0 && isLamAlefChar(dest[i]) ) { 920 *pErrorCode=U_NO_SPACE_AVAILABLE; 921 } 922 tempbuffer[j] = dest[i]; 923 } 924 i--; 925 j--; 926 } 927 uprv_memcpy(dest, tempbuffer, sourceLength*U_SIZEOF_UCHAR); 928 929 uprv_free(tempbuffer); 930 931 destSize = sourceLength; 932 return destSize; 933 } 934 935 /* 936 *Name : expandCompositCharAtEnd 937 *Function : Expands the LamAlef character to Lam and Alef consuming the 938 * required space from end of the buffer. If the text type was 939 * Visual LTR and the option U_SHAPE_SPACES_RELATIVE_TO_TEXT_BEGIN_END 940 * was used, the spaces will be consumed from begin of buffer. If 941 * there are no spaces to expand the LamAlef, an error 942 * will be set to U_NO_SPACE_AVAILABLE as defined in utypes.h 943 */ 944 945 static int32_t 946 expandCompositCharAtEnd(UChar *dest, int32_t sourceLength, int32_t destSize,UErrorCode *pErrorCode) { 947 int32_t i = 0,j = 0; 948 949 int32_t countr = 0; 950 int32_t inpsize = sourceLength; 951 952 UChar *tempbuffer=NULL; 953 tempbuffer = (UChar *)uprv_malloc((sourceLength+1)*U_SIZEOF_UCHAR); 954 955 /* Test for NULL */ 956 if(tempbuffer == NULL) { 957 *pErrorCode = U_MEMORY_ALLOCATION_ERROR; 958 return 0; 959 } 960 961 uprv_memset(tempbuffer, 0, (sourceLength+1)*U_SIZEOF_UCHAR); 962 963 while(dest[inpsize-1] == SPACE_CHAR) { 964 countr++; 965 inpsize--; 966 } 967 968 i = sourceLength - countr - 1; 969 j = sourceLength - 1; 970 971 while(i >= 0 && j >= 0) { 972 if( countr>0 && isLamAlefChar(dest[i]) ) { 973 tempbuffer[j] = LAM_CHAR; 974 tempbuffer[j-1] = convertLamAlef[ dest[i] - 0xFEF5 ]; 975 j--; 976 countr--; 977 }else { 978 if ((countr == 0) && isLamAlefChar(dest[i]) ) { 979 *pErrorCode=U_NO_SPACE_AVAILABLE; 980 } 981 tempbuffer[j] = dest[i]; 982 } 983 i--; 984 j--; 985 } 986 987 if(countr > 0) { 988 uprv_memmove(tempbuffer, tempbuffer+countr, sourceLength*U_SIZEOF_UCHAR); 989 if(u_strlen(tempbuffer) < sourceLength) { 990 for(i=sourceLength-1;i>=sourceLength-countr;i--) { 991 tempbuffer[i] = SPACE_CHAR; 992 } 993 } 994 } 995 uprv_memcpy(dest, tempbuffer, sourceLength*U_SIZEOF_UCHAR); 996 997 uprv_free(tempbuffer); 998 999 destSize = sourceLength; 1000 return destSize; 1001 } 1002 1003 /* 1004 *Name : expandCompositCharAtNear 1005 *Function : Expands the LamAlef character into Lam + Alef, YehHamza character 1006 * into Yeh + Hamza, SeenFamily character into SeenFamily character 1007 * + Tail, while consuming the space next to the character. 1008 * If there are no spaces next to the character, an error 1009 * will be set to U_NO_SPACE_AVAILABLE as defined in utypes.h 1010 */ 1011 1012 static int32_t 1013 expandCompositCharAtNear(UChar *dest, int32_t sourceLength, int32_t destSize,UErrorCode *pErrorCode, 1014 int yehHamzaOption, int seenTailOption, int lamAlefOption, struct uShapeVariables shapeVars) { 1015 int32_t i = 0; 1016 1017 1018 UChar lamalefChar, yehhamzaChar; 1019 1020 for(i = 0 ;i<=sourceLength-1;i++) { 1021 if (seenTailOption && isSeenTailFamilyChar(dest[i])) { 1022 if ((i>0) && (dest[i-1] == SPACE_CHAR) ) { 1023 dest[i-1] = shapeVars.tailChar; 1024 }else { 1025 *pErrorCode=U_NO_SPACE_AVAILABLE; 1026 } 1027 }else if(yehHamzaOption && (isYehHamzaChar(dest[i])) ) { 1028 if ((i>0) && (dest[i-1] == SPACE_CHAR) ) { 1029 yehhamzaChar = dest[i]; 1030 dest[i] = yehHamzaToYeh[yehhamzaChar - YEH_HAMZAFE_CHAR]; 1031 dest[i-1] = HAMZAFE_CHAR; 1032 }else { 1033 1034 *pErrorCode=U_NO_SPACE_AVAILABLE; 1035 } 1036 }else if(lamAlefOption && isLamAlefChar(dest[i+1])) { 1037 if(dest[i] == SPACE_CHAR){ 1038 lamalefChar = dest[i+1]; 1039 dest[i+1] = LAM_CHAR; 1040 dest[i] = convertLamAlef[ lamalefChar - 0xFEF5 ]; 1041 }else { 1042 *pErrorCode=U_NO_SPACE_AVAILABLE; 1043 } 1044 } 1045 } 1046 destSize = sourceLength; 1047 return destSize; 1048 } 1049 /* 1050 * Name : expandCompositChar 1051 * Function : LamAlef, need special handling, since it expands from one 1052 * character into two characters while shaping or deshaping. 1053 * In order to expand it, near or far spaces according to the 1054 * options user specifies. Also buffer size can be increased. 1055 * 1056 * For SeenFamily characters and YehHamza only the near option is 1057 * supported, while for LamAlef we can take spaces from begin, end, 1058 * near or even increase the buffer size. 1059 * There is also the Auto option for LamAlef only, which will first 1060 * search for a space at end, begin then near, respectively. 1061 * If there are no spaces to expand these characters, an error will be set to 1062 * U_NO_SPACE_AVAILABLE as defined in utypes.h 1063 */ 1064 1065 static int32_t 1066 expandCompositChar(UChar *dest, int32_t sourceLength, 1067 int32_t destSize,uint32_t options, 1068 UErrorCode *pErrorCode, int shapingMode,struct uShapeVariables shapeVars) { 1069 1070 int32_t i = 0,j = 0; 1071 1072 UChar *tempbuffer=NULL; 1073 int yehHamzaOption = 0; 1074 int seenTailOption = 0; 1075 int lamAlefOption = 0; 1076 1077 if (shapingMode == 1){ 1078 if ( (options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_AUTO){ 1079 1080 if(shapeVars.spacesRelativeToTextBeginEnd == 0) { 1081 destSize = expandCompositCharAtEnd(dest, sourceLength, destSize, pErrorCode); 1082 1083 if(*pErrorCode == U_NO_SPACE_AVAILABLE) { 1084 *pErrorCode = U_ZERO_ERROR; 1085 destSize = expandCompositCharAtBegin(dest, sourceLength, destSize, pErrorCode); 1086 } 1087 }else { 1088 destSize = expandCompositCharAtBegin(dest, sourceLength, destSize, pErrorCode); 1089 1090 if(*pErrorCode == U_NO_SPACE_AVAILABLE) { 1091 *pErrorCode = U_ZERO_ERROR; 1092 destSize = expandCompositCharAtEnd(dest, sourceLength, destSize, pErrorCode); 1093 } 1094 } 1095 1096 if(*pErrorCode == U_NO_SPACE_AVAILABLE) { 1097 *pErrorCode = U_ZERO_ERROR; 1098 destSize = expandCompositCharAtNear(dest, sourceLength, destSize, pErrorCode, yehHamzaOption, 1099 seenTailOption, 1,shapeVars); 1100 } 1101 } 1102 } 1103 1104 if (shapingMode == 1){ 1105 if ( (options&U_SHAPE_LAMALEF_MASK) == shapeVars.uShapeLamalefEnd){ 1106 destSize = expandCompositCharAtEnd(dest, sourceLength, destSize, pErrorCode); 1107 } 1108 } 1109 1110 if (shapingMode == 1){ 1111 if ( (options&U_SHAPE_LAMALEF_MASK) == shapeVars.uShapeLamalefBegin){ 1112 destSize = expandCompositCharAtBegin(dest, sourceLength, destSize, pErrorCode); 1113 } 1114 } 1115 1116 if (shapingMode == 0){ 1117 if ((options&U_SHAPE_YEHHAMZA_MASK) == U_SHAPE_YEHHAMZA_TWOCELL_NEAR){ 1118 yehHamzaOption = 1; 1119 } 1120 if ((options&U_SHAPE_SEEN_MASK) == U_SHAPE_SEEN_TWOCELL_NEAR){ 1121 seenTailOption = 1; 1122 } 1123 } 1124 if (shapingMode == 1) { 1125 if ( (options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_NEAR) { 1126 lamAlefOption = 1; 1127 } 1128 } 1129 1130 1131 if (yehHamzaOption || seenTailOption || lamAlefOption){ 1132 destSize = expandCompositCharAtNear(dest, sourceLength, destSize, pErrorCode, yehHamzaOption, 1133 seenTailOption,lamAlefOption,shapeVars); 1134 } 1135 1136 1137 if (shapingMode == 1){ 1138 if ( (options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_RESIZE){ 1139 destSize = calculateSize(dest,sourceLength,destSize,options); 1140 tempbuffer = (UChar *)uprv_malloc((destSize+1)*U_SIZEOF_UCHAR); 1141 1142 /* Test for NULL */ 1143 if(tempbuffer == NULL) { 1144 *pErrorCode = U_MEMORY_ALLOCATION_ERROR; 1145 return 0; 1146 } 1147 1148 uprv_memset(tempbuffer, 0, (destSize+1)*U_SIZEOF_UCHAR); 1149 1150 i = j = 0; 1151 while(i < destSize && j < destSize) { 1152 if(isLamAlefChar(dest[i]) ) { 1153 tempbuffer[j] = convertLamAlef[ dest[i] - 0xFEF5 ]; 1154 tempbuffer[j+1] = LAM_CHAR; 1155 j++; 1156 }else { 1157 tempbuffer[j] = dest[i]; 1158 } 1159 i++; 1160 j++; 1161 } 1162 1163 uprv_memcpy(dest, tempbuffer, destSize*U_SIZEOF_UCHAR); 1164 } 1165 } 1166 1167 if(tempbuffer) { 1168 uprv_free(tempbuffer); 1169 } 1170 return destSize; 1171 } 1172 1173 /* 1174 *Name : shapeUnicode 1175 *Function : Converts an Arabic Unicode buffer in 06xx Range into a shaped 1176 * arabic Unicode buffer in FExx Range 1177 */ 1178 static int32_t 1179 shapeUnicode(UChar *dest, int32_t sourceLength, 1180 int32_t destSize,uint32_t options, 1181 UErrorCode *pErrorCode, 1182 int tashkeelFlag, struct uShapeVariables shapeVars) { 1183 1184 int32_t i, iend; 1185 int32_t step; 1186 int32_t lastPos,Nx, Nw; 1187 unsigned int Shape; 1188 int32_t lamalef_found = 0; 1189 int32_t seenfamFound = 0, yehhamzaFound =0, tashkeelFound = 0; 1190 UChar prevLink = 0, lastLink = 0, currLink, nextLink = 0; 1191 UChar wLamalef; 1192 1193 /* 1194 * Converts the input buffer from FExx Range into 06xx Range 1195 * to make sure that all characters are in the 06xx range 1196 * even the lamalef is converted to the special region in 1197 * the 06xx range 1198 */ 1199 if ((options & U_SHAPE_PRESERVE_PRESENTATION_MASK) == U_SHAPE_PRESERVE_PRESENTATION_NOOP) { 1200 for (i = 0; i < sourceLength; i++) { 1201 UChar inputChar = dest[i]; 1202 if ( (inputChar >= 0xFB50) && (inputChar <= 0xFBFF)) { 1203 UChar c = convertFBto06 [ (inputChar - 0xFB50) ]; 1204 if (c != 0) 1205 dest[i] = c; 1206 } else if ( (inputChar >= 0xFE70) && (inputChar <= 0xFEFC)) { 1207 dest[i] = convertFEto06 [ (inputChar - 0xFE70) ] ; 1208 } else { 1209 dest[i] = inputChar ; 1210 } 1211 } 1212 } 1213 1214 1215 /* sets the index to the end of the buffer, together with the step point to -1 */ 1216 i = sourceLength - 1; 1217 iend = -1; 1218 step = -1; 1219 1220 /* 1221 * This function resolves the link between the characters . 1222 * Arabic characters have four forms : 1223 * Isolated Form, Initial Form, Middle Form and Final Form 1224 */ 1225 currLink = getLink(dest[i]); 1226 1227 lastPos = i; 1228 Nx = -2, Nw = 0; 1229 1230 while (i != iend) { 1231 /* If high byte of currLink > 0 then more than one shape */ 1232 if ((currLink & 0xFF00) > 0 || (getLink(dest[i]) & IRRELEVANT) != 0) { 1233 Nw = i + step; 1234 while (Nx < 0) { /* we need to know about next char */ 1235 if(Nw == iend) { 1236 nextLink = 0; 1237 Nx = 3000; 1238 } else { 1239 nextLink = getLink(dest[Nw]); 1240 if((nextLink & IRRELEVANT) == 0) { 1241 Nx = Nw; 1242 } else { 1243 Nw = Nw + step; 1244 } 1245 } 1246 } 1247 1248 if ( ((currLink & ALEFTYPE) > 0) && ((lastLink & LAMTYPE) > 0) ) { 1249 lamalef_found = 1; 1250 wLamalef = changeLamAlef(dest[i]); /*get from 0x065C-0x065f */ 1251 if ( wLamalef != 0) { 1252 dest[i] = LAMALEF_SPACE_SUB; /* The default case is to drop the Alef and replace */ 1253 dest[lastPos] =wLamalef; /* it by LAMALEF_SPACE_SUB which is the last character in the */ 1254 i=lastPos; /* unicode private use area, this is done to make */ 1255 } /* sure that removeLamAlefSpaces() handles only the */ 1256 lastLink = prevLink; /* spaces generated during lamalef generation. */ 1257 currLink = getLink(wLamalef); /* LAMALEF_SPACE_SUB is added here and is replaced by spaces */ 1258 } /* in removeLamAlefSpaces() */ 1259 1260 if ((i > 0) && (dest[i-1] == SPACE_CHAR)){ 1261 if ( isSeenFamilyChar(dest[i])) { 1262 seenfamFound = 1; 1263 } else if (dest[i] == YEH_HAMZA_CHAR) { 1264 yehhamzaFound = 1; 1265 } 1266 } 1267 else if(i==0){ 1268 if ( isSeenFamilyChar(dest[i])){ 1269 seenfamFound = 1; 1270 } else if (dest[i] == YEH_HAMZA_CHAR) { 1271 yehhamzaFound = 1; 1272 } 1273 } 1274 1275 /* 1276 * get the proper shape according to link ability of neighbors 1277 * and of character; depends on the order of the shapes 1278 * (isolated, initial, middle, final) in the compatibility area 1279 */ 1280 Shape = shapeTable[nextLink & (LINKR + LINKL)] 1281 [lastLink & (LINKR + LINKL)] 1282 [currLink & (LINKR + LINKL)]; 1283 1284 if ((currLink & (LINKR+LINKL)) == 1) { 1285 Shape &= 1; 1286 } else if(isTashkeelChar(dest[i])) { 1287 if( (lastLink & LINKL) && (nextLink & LINKR) && (tashkeelFlag == 1) && 1288 dest[i] != 0x064C && dest[i] != 0x064D ) 1289 { 1290 Shape = 1; 1291 if( (nextLink&ALEFTYPE) == ALEFTYPE && (lastLink&LAMTYPE) == LAMTYPE ) { 1292 Shape = 0; 1293 } 1294 } else if(tashkeelFlag == 2 && dest[i] == SHADDA06_CHAR){ 1295 Shape = 1; 1296 } else { 1297 Shape = 0; 1298 } 1299 } 1300 if ((dest[i] ^ 0x0600) < 0x100) { 1301 if ( isTashkeelChar(dest[i]) ){ 1302 if (tashkeelFlag == 2 && dest[i] != SHADDA06_CHAR){ 1303 dest[i] = TASHKEEL_SPACE_SUB; 1304 tashkeelFound = 1; 1305 } else { 1306 /* to ensure the array index is within the range */ 1307 U_ASSERT(dest[i] >= 0x064Bu 1308 && dest[i]-0x064Bu < sizeof(IrrelevantPos)/sizeof(IrrelevantPos[0])); 1309 dest[i] = 0xFE70 + IrrelevantPos[(dest[i] - 0x064B)] + Shape; 1310 } 1311 }else if ((currLink & APRESENT) > 0) { 1312 dest[i] = (UChar)(0xFB50 + (currLink >> 8) + Shape); 1313 }else if ((currLink >> 8) > 0 && (currLink & IRRELEVANT) == 0) { 1314 dest[i] = (UChar)(0xFE70 + (currLink >> 8) + Shape); 1315 } 1316 } 1317 } 1318 1319 /* move one notch forward */ 1320 if ((currLink & IRRELEVANT) == 0) { 1321 prevLink = lastLink; 1322 lastLink = currLink; 1323 lastPos = i; 1324 } 1325 1326 i = i + step; 1327 if (i == Nx) { 1328 currLink = nextLink; 1329 Nx = -2; 1330 } else if(i != iend) { 1331 currLink = getLink(dest[i]); 1332 } 1333 } 1334 destSize = sourceLength; 1335 if ( (lamalef_found != 0 ) || (tashkeelFound != 0) ){ 1336 destSize = handleGeneratedSpaces(dest,sourceLength,destSize,options,pErrorCode, shapeVars); 1337 } 1338 1339 if ( (seenfamFound != 0) || (yehhamzaFound != 0) ) { 1340 destSize = expandCompositChar(dest, sourceLength,destSize,options,pErrorCode, SHAPE_MODE,shapeVars); 1341 } 1342 return destSize; 1343 } 1344 1345 /* 1346 *Name : deShapeUnicode 1347 *Function : Converts an Arabic Unicode buffer in FExx Range into unshaped 1348 * arabic Unicode buffer in 06xx Range 1349 */ 1350 static int32_t 1351 deShapeUnicode(UChar *dest, int32_t sourceLength, 1352 int32_t destSize,uint32_t options, 1353 UErrorCode *pErrorCode, struct uShapeVariables shapeVars) { 1354 int32_t i = 0; 1355 int32_t lamalef_found = 0; 1356 int32_t yehHamzaComposeEnabled = 0; 1357 int32_t seenComposeEnabled = 0; 1358 1359 yehHamzaComposeEnabled = ((options&U_SHAPE_YEHHAMZA_MASK) == U_SHAPE_YEHHAMZA_TWOCELL_NEAR) ? 1 : 0; 1360 seenComposeEnabled = ((options&U_SHAPE_SEEN_MASK) == U_SHAPE_SEEN_TWOCELL_NEAR)? 1 : 0; 1361 1362 /* 1363 *This for loop changes the buffer from the Unicode FE range to 1364 *the Unicode 06 range 1365 */ 1366 1367 for(i = 0; i < sourceLength; i++) { 1368 UChar inputChar = dest[i]; 1369 if ( (inputChar >= 0xFB50) && (inputChar <= 0xFBFF)) { /* FBxx Arabic range */ 1370 UChar c = convertFBto06 [ (inputChar - 0xFB50) ]; 1371 if (c != 0) 1372 dest[i] = c; 1373 } else if( (yehHamzaComposeEnabled == 1) && ((inputChar == HAMZA06_CHAR) || (inputChar == HAMZAFE_CHAR)) 1374 && (i < (sourceLength - 1)) && isAlefMaksouraChar(dest[i+1] )) { 1375 dest[i] = SPACE_CHAR; 1376 dest[i+1] = YEH_HAMZA_CHAR; 1377 } else if ( (seenComposeEnabled == 1) && (isTailChar(inputChar)) && (i< (sourceLength - 1)) 1378 && (isSeenTailFamilyChar(dest[i+1])) ) { 1379 dest[i] = SPACE_CHAR; 1380 } else if (( inputChar >= 0xFE70) && (inputChar <= 0xFEF4 )) { /* FExx Arabic range */ 1381 dest[i] = convertFEto06 [ (inputChar - 0xFE70) ]; 1382 } else { 1383 dest[i] = inputChar ; 1384 } 1385 1386 if( isLamAlefChar(dest[i]) ) 1387 lamalef_found = 1; 1388 } 1389 1390 destSize = sourceLength; 1391 if (lamalef_found != 0){ 1392 destSize = expandCompositChar(dest,sourceLength,destSize,options,pErrorCode,DESHAPE_MODE, shapeVars); 1393 } 1394 return destSize; 1395 } 1396 1397 /* 1398 **************************************** 1399 * u_shapeArabic 1400 **************************************** 1401 */ 1402 1403 /* BEGIN android-changed */ 1404 U_CAPI int32_t U_EXPORT2 1405 u_shapeArabic(const UChar *source, int32_t sourceLength, 1406 UChar *dest, int32_t destCapacity, 1407 uint64_t options, 1408 UErrorCode *pErrorCode) { 1409 /* END android-changed */ 1410 1411 int32_t destLength; 1412 struct uShapeVariables shapeVars = { OLD_TAIL_CHAR,U_SHAPE_LAMALEF_BEGIN,U_SHAPE_LAMALEF_END,U_SHAPE_TASHKEEL_BEGIN,U_SHAPE_TASHKEEL_END,0}; 1413 1414 /* usual error checking */ 1415 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 1416 return 0; 1417 } 1418 1419 /* make sure that no reserved options values are used; allow dest==NULL only for preflighting */ 1420 if( source==NULL || sourceLength<-1 || (dest==NULL && destCapacity!=0) || destCapacity<0 || 1421 (((options&U_SHAPE_TASHKEEL_MASK) > 0) && 1422 ((options&U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED) == U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED) ) || 1423 (((options&U_SHAPE_TASHKEEL_MASK) > 0) && 1424 ((options&U_SHAPE_LETTERS_MASK) == U_SHAPE_LETTERS_UNSHAPE)) || 1425 (options&U_SHAPE_DIGIT_TYPE_RESERVED)==U_SHAPE_DIGIT_TYPE_RESERVED || 1426 (options&U_SHAPE_DIGITS_MASK)==U_SHAPE_DIGITS_RESERVED || 1427 ((options&U_SHAPE_LAMALEF_MASK) != U_SHAPE_LAMALEF_RESIZE && 1428 (options&U_SHAPE_AGGREGATE_TASHKEEL_MASK) != 0) || 1429 ((options&U_SHAPE_AGGREGATE_TASHKEEL_MASK) == U_SHAPE_AGGREGATE_TASHKEEL && 1430 (options&U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED) != U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED) 1431 ) 1432 { 1433 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1434 return 0; 1435 } 1436 /* Validate lamalef options */ 1437 if(((options&U_SHAPE_LAMALEF_MASK) > 0)&& 1438 !(((options & U_SHAPE_LAMALEF_MASK)==U_SHAPE_LAMALEF_BEGIN) || 1439 ((options & U_SHAPE_LAMALEF_MASK)==U_SHAPE_LAMALEF_END ) || 1440 ((options & U_SHAPE_LAMALEF_MASK)==U_SHAPE_LAMALEF_RESIZE )|| 1441 ((options & U_SHAPE_LAMALEF_MASK)==U_SHAPE_LAMALEF_AUTO) || 1442 ((options & U_SHAPE_LAMALEF_MASK)==U_SHAPE_LAMALEF_NEAR))) 1443 { 1444 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1445 return 0; 1446 } 1447 /* Validate Tashkeel options */ 1448 if(((options&U_SHAPE_TASHKEEL_MASK) > 0)&& 1449 !(((options & U_SHAPE_TASHKEEL_MASK)==U_SHAPE_TASHKEEL_BEGIN) || 1450 ((options & U_SHAPE_TASHKEEL_MASK)==U_SHAPE_TASHKEEL_END ) 1451 ||((options & U_SHAPE_TASHKEEL_MASK)==U_SHAPE_TASHKEEL_RESIZE )|| 1452 ((options & U_SHAPE_TASHKEEL_MASK)==U_SHAPE_TASHKEEL_REPLACE_BY_TATWEEL))) 1453 { 1454 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1455 return 0; 1456 } 1457 /* determine the source length */ 1458 if(sourceLength==-1) { 1459 sourceLength=u_strlen(source); 1460 } 1461 if(sourceLength<=0) { 1462 return u_terminateUChars(dest, destCapacity, 0, pErrorCode); 1463 } 1464 1465 /* check that source and destination do not overlap */ 1466 if( dest!=NULL && 1467 ((source<=dest && dest<source+sourceLength) || 1468 (dest<=source && source<dest+destCapacity))) { 1469 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1470 return 0; 1471 } 1472 1473 /* Does Options contain the new Seen Tail Unicode code point option */ 1474 if ( (options&U_SHAPE_TAIL_TYPE_MASK) == U_SHAPE_TAIL_NEW_UNICODE){ 1475 shapeVars.tailChar = NEW_TAIL_CHAR; 1476 }else { 1477 shapeVars.tailChar = OLD_TAIL_CHAR; 1478 } 1479 1480 if((options&U_SHAPE_LETTERS_MASK)!=U_SHAPE_LETTERS_NOOP) { 1481 UChar buffer[300]; 1482 UChar *tempbuffer, *tempsource = NULL; 1483 int32_t outputSize, spacesCountl=0, spacesCountr=0; 1484 1485 if((options&U_SHAPE_AGGREGATE_TASHKEEL_MASK)>0) { 1486 int32_t logical_order = (options&U_SHAPE_TEXT_DIRECTION_MASK) == U_SHAPE_TEXT_DIRECTION_LOGICAL; 1487 int32_t aggregate_tashkeel = 1488 (options&(U_SHAPE_AGGREGATE_TASHKEEL_MASK+U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED)) == 1489 (U_SHAPE_AGGREGATE_TASHKEEL+U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED); 1490 int step=logical_order?1:-1; 1491 int j=logical_order?-1:2*sourceLength; 1492 int i=logical_order?-1:sourceLength; 1493 int end=logical_order?sourceLength:-1; 1494 int aggregation_possible = 1; 1495 UChar prev = 0; 1496 UChar prevLink, currLink = 0; 1497 int newSourceLength = 0; 1498 tempsource = (UChar *)uprv_malloc(2*sourceLength*U_SIZEOF_UCHAR); 1499 if(tempsource == NULL) { 1500 *pErrorCode = U_MEMORY_ALLOCATION_ERROR; 1501 return 0; 1502 } 1503 1504 while ((i+=step) != end) { 1505 prevLink = currLink; 1506 currLink = getLink(source[i]); 1507 if (aggregate_tashkeel && ((prevLink|currLink)&COMBINE) == COMBINE && aggregation_possible) { 1508 aggregation_possible = 0; 1509 tempsource[j] = (prev<source[i]?prev:source[i])-0x064C+0xFC5E; 1510 currLink = getLink(tempsource[j]); 1511 } else { 1512 aggregation_possible = 1; 1513 tempsource[j+=step] = source[i]; 1514 prev = source[i]; 1515 newSourceLength++; 1516 } 1517 } 1518 source = tempsource+(logical_order?0:j); 1519 sourceLength = newSourceLength; 1520 } 1521 1522 /* calculate destination size */ 1523 /* TODO: do we ever need to do this pure preflighting? */ 1524 if(((options&U_SHAPE_LAMALEF_MASK)==U_SHAPE_LAMALEF_RESIZE) || 1525 ((options&U_SHAPE_TASHKEEL_MASK)==U_SHAPE_TASHKEEL_RESIZE)) { 1526 outputSize=calculateSize(source,sourceLength,destCapacity,options); 1527 } else { 1528 outputSize=sourceLength; 1529 } 1530 1531 if(outputSize>destCapacity) { 1532 *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 1533 if (tempsource != NULL) uprv_free(tempsource); 1534 return outputSize; 1535 } 1536 1537 /* 1538 * need a temporary buffer of size max(outputSize, sourceLength) 1539 * because at first we copy source->temp 1540 */ 1541 if(sourceLength>outputSize) { 1542 outputSize=sourceLength; 1543 } 1544 1545 /* Start of Arabic letter shaping part */ 1546 if(outputSize<=LENGTHOF(buffer)) { 1547 outputSize=LENGTHOF(buffer); 1548 tempbuffer=buffer; 1549 } else { 1550 tempbuffer = (UChar *)uprv_malloc(outputSize*U_SIZEOF_UCHAR); 1551 1552 /*Test for NULL*/ 1553 if(tempbuffer == NULL) { 1554 *pErrorCode = U_MEMORY_ALLOCATION_ERROR; 1555 if (tempsource != NULL) uprv_free(tempsource); 1556 return 0; 1557 } 1558 } 1559 uprv_memcpy(tempbuffer, source, sourceLength*U_SIZEOF_UCHAR); 1560 if (tempsource != NULL){ 1561 uprv_free(tempsource); 1562 } 1563 1564 if(sourceLength<outputSize) { 1565 uprv_memset(tempbuffer+sourceLength, 0, (outputSize-sourceLength)*U_SIZEOF_UCHAR); 1566 } 1567 1568 if((options&U_SHAPE_TEXT_DIRECTION_MASK) == U_SHAPE_TEXT_DIRECTION_LOGICAL) { 1569 countSpaces(tempbuffer,sourceLength,options,&spacesCountl,&spacesCountr); 1570 invertBuffer(tempbuffer,sourceLength,options,spacesCountl,spacesCountr); 1571 } 1572 1573 if((options&U_SHAPE_TEXT_DIRECTION_MASK) == U_SHAPE_TEXT_DIRECTION_VISUAL_LTR) { 1574 if((options&U_SHAPE_SPACES_RELATIVE_TO_TEXT_MASK) == U_SHAPE_SPACES_RELATIVE_TO_TEXT_BEGIN_END) { 1575 shapeVars.spacesRelativeToTextBeginEnd = 1; 1576 shapeVars.uShapeLamalefBegin = U_SHAPE_LAMALEF_END; 1577 shapeVars.uShapeLamalefEnd = U_SHAPE_LAMALEF_BEGIN; 1578 shapeVars.uShapeTashkeelBegin = U_SHAPE_TASHKEEL_END; 1579 shapeVars.uShapeTashkeelEnd = U_SHAPE_TASHKEEL_BEGIN; 1580 } 1581 } 1582 1583 switch(options&U_SHAPE_LETTERS_MASK) { 1584 case U_SHAPE_LETTERS_SHAPE : 1585 if( (options&U_SHAPE_TASHKEEL_MASK)> 0 1586 && ((options&U_SHAPE_TASHKEEL_MASK) !=U_SHAPE_TASHKEEL_REPLACE_BY_TATWEEL)) { 1587 /* Call the shaping function with tashkeel flag == 2 for removal of tashkeel */ 1588 destLength = shapeUnicode(tempbuffer,sourceLength,destCapacity,options,pErrorCode,2,shapeVars); 1589 }else { 1590 /* default Call the shaping function with tashkeel flag == 1 */ 1591 destLength = shapeUnicode(tempbuffer,sourceLength,destCapacity,options,pErrorCode,1,shapeVars); 1592 1593 /*After shaping text check if user wants to remove tashkeel and replace it with tatweel*/ 1594 if( (options&U_SHAPE_TASHKEEL_MASK) == U_SHAPE_TASHKEEL_REPLACE_BY_TATWEEL){ 1595 destLength = handleTashkeelWithTatweel(tempbuffer,destLength,destCapacity,options,pErrorCode); 1596 } 1597 } 1598 break; 1599 case U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED : 1600 /* Call the shaping function with tashkeel flag == 0 */ 1601 destLength = shapeUnicode(tempbuffer,sourceLength,destCapacity,options,pErrorCode,0,shapeVars); 1602 break; 1603 1604 case U_SHAPE_LETTERS_UNSHAPE : 1605 /* Call the deshaping function */ 1606 destLength = deShapeUnicode(tempbuffer,sourceLength,destCapacity,options,pErrorCode,shapeVars); 1607 break; 1608 default : 1609 /* will never occur because of validity checks above */ 1610 destLength = 0; 1611 break; 1612 } 1613 1614 /* 1615 * TODO: (markus 2002aug01) 1616 * For as long as we always preflight the outputSize above 1617 * we should U_ASSERT(outputSize==destLength) 1618 * except for the adjustment above before the tempbuffer allocation 1619 */ 1620 1621 if((options&U_SHAPE_TEXT_DIRECTION_MASK) == U_SHAPE_TEXT_DIRECTION_LOGICAL) { 1622 countSpaces(tempbuffer,destLength,options,&spacesCountl,&spacesCountr); 1623 invertBuffer(tempbuffer,destLength,options,spacesCountl,spacesCountr); 1624 } 1625 uprv_memcpy(dest, tempbuffer, uprv_min(destLength, destCapacity)*U_SIZEOF_UCHAR); 1626 1627 if(tempbuffer!=buffer) { 1628 uprv_free(tempbuffer); 1629 } 1630 1631 if(destLength>destCapacity) { 1632 *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 1633 return destLength; 1634 } 1635 1636 /* End of Arabic letter shaping part */ 1637 } else { 1638 /* 1639 * No letter shaping: 1640 * just make sure the destination is large enough and copy the string. 1641 */ 1642 if(destCapacity<sourceLength) { 1643 /* this catches preflighting, too */ 1644 *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 1645 return sourceLength; 1646 } 1647 uprv_memcpy(dest, source, sourceLength*U_SIZEOF_UCHAR); 1648 destLength=sourceLength; 1649 } 1650 1651 /* 1652 * Perform number shaping. 1653 * With UTF-16 or UTF-32, the length of the string is constant. 1654 * The easiest way to do this is to operate on the destination and 1655 * "shape" the digits in-place. 1656 */ 1657 if((options&U_SHAPE_DIGITS_MASK)!=U_SHAPE_DIGITS_NOOP) { 1658 UChar digitBase; 1659 int32_t i; 1660 1661 /* select the requested digit group */ 1662 switch(options&U_SHAPE_DIGIT_TYPE_MASK) { 1663 case U_SHAPE_DIGIT_TYPE_AN: 1664 digitBase=0x660; /* Unicode: "Arabic-Indic digits" */ 1665 break; 1666 case U_SHAPE_DIGIT_TYPE_AN_EXTENDED: 1667 digitBase=0x6f0; /* Unicode: "Eastern Arabic-Indic digits (Persian and Urdu)" */ 1668 break; 1669 default: 1670 /* will never occur because of validity checks above */ 1671 digitBase=0; 1672 break; 1673 } 1674 1675 /* perform the requested operation */ 1676 switch(options&U_SHAPE_DIGITS_MASK) { 1677 case U_SHAPE_DIGITS_EN2AN: 1678 /* add (digitBase-'0') to each European (ASCII) digit code point */ 1679 digitBase-=0x30; 1680 for(i=0; i<destLength; ++i) { 1681 if(((uint32_t)dest[i]-0x30)<10) { 1682 dest[i]+=digitBase; 1683 } 1684 } 1685 break; 1686 case U_SHAPE_DIGITS_AN2EN: 1687 /* subtract (digitBase-'0') from each Arabic digit code point */ 1688 for(i=0; i<destLength; ++i) { 1689 if(((uint32_t)dest[i]-(uint32_t)digitBase)<10) { 1690 dest[i]-=digitBase-0x30; 1691 } 1692 } 1693 break; 1694 case U_SHAPE_DIGITS_ALEN2AN_INIT_LR: 1695 _shapeToArabicDigitsWithContext(dest, destLength, 1696 digitBase, 1697 (UBool)((options&U_SHAPE_TEXT_DIRECTION_MASK)==U_SHAPE_TEXT_DIRECTION_LOGICAL), 1698 FALSE); 1699 break; 1700 case U_SHAPE_DIGITS_ALEN2AN_INIT_AL: 1701 _shapeToArabicDigitsWithContext(dest, destLength, 1702 digitBase, 1703 (UBool)((options&U_SHAPE_TEXT_DIRECTION_MASK)==U_SHAPE_TEXT_DIRECTION_LOGICAL), 1704 TRUE); 1705 break; 1706 default: 1707 /* will never occur because of validity checks above */ 1708 break; 1709 } 1710 } 1711 1712 return u_terminateUChars(dest, destCapacity, destLength, pErrorCode); 1713 } 1714