1 /* 2 ****************************************************************************** 3 * 4 * Copyright (C) 2000-2011, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ****************************************************************************** 8 * file name: ushape.c 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2000jun29 14 * created by: Markus W. Scherer 15 * 16 * Arabic letter shaping implemented by Ayman Roshdy 17 */ 18 19 #include "unicode/utypes.h" 20 #include "unicode/uchar.h" 21 #include "unicode/ustring.h" 22 #include "unicode/ushape.h" 23 #include "cmemory.h" 24 #include "putilimp.h" 25 #include "ustr_imp.h" 26 #include "ubidi_props.h" 27 28 #if UTF_SIZE<16 29 /* 30 * This implementation assumes that the internal encoding is UTF-16 31 * or UTF-32, not UTF-8. 32 * The main assumption is that the Arabic characters and their 33 * presentation forms each fit into a single UChar. 34 * With UTF-8, they occupy 2 or 3 bytes, and more than the ASCII 35 * characters. 36 */ 37 # error This implementation assumes UTF-16 or UTF-32 (check UTF_SIZE) 38 #endif 39 40 /* 41 * ### TODO in general for letter shaping: 42 * - the letter shaping code is UTF-16-unaware; needs update 43 * + especially invertBuffer()?! 44 * - needs to handle the "Arabic Tail" that is used in some legacy codepages 45 * as a glyph fragment of wide-glyph letters 46 * + IBM Unicode conversion tables map it to U+200B (ZWSP) 47 * + IBM Egypt has proposed to encode the tail in Unicode among Arabic Presentation Forms 48 */ 49 50 /* definitions for Arabic letter shaping ------------------------------------ */ 51 52 #define IRRELEVANT 4 53 #define LAMTYPE 16 54 #define ALEFTYPE 32 55 #define LINKR 1 56 #define LINKL 2 57 #define APRESENT 8 58 #define SHADDA 64 59 #define CSHADDA 128 60 #define COMBINE (SHADDA+CSHADDA) 61 62 #define HAMZAFE_CHAR 0xfe80 63 #define HAMZA06_CHAR 0x0621 64 #define YEH_HAMZA_CHAR 0x0626 65 #define YEH_HAMZAFE_CHAR 0xFE89 66 #define LAMALEF_SPACE_SUB 0xFFFF 67 #define TASHKEEL_SPACE_SUB 0xFFFE 68 #define NEW_TAIL_CHAR 0xFE73 69 #define OLD_TAIL_CHAR 0x200B 70 #define LAM_CHAR 0x0644 71 #define SPACE_CHAR 0x0020 72 #define SHADDA_CHAR 0xFE7C 73 #define TATWEEL_CHAR 0x0640 74 #define SHADDA_TATWEEL_CHAR 0xFE7D 75 76 #define SHAPE_MODE 0 77 #define DESHAPE_MODE 1 78 79 static UChar tailChar = OLD_TAIL_CHAR; 80 static uint32_t uShapeLamalefBegin = U_SHAPE_LAMALEF_BEGIN; 81 static uint32_t uShapeLamalefEnd = U_SHAPE_LAMALEF_END; 82 static uint32_t uShapeTashkeelBegin = U_SHAPE_TASHKEEL_BEGIN; 83 static uint32_t uShapeTashkeelEnd = U_SHAPE_TASHKEEL_END; 84 static int spacesRelativeToTextBeginEnd = 0; 85 86 static const uint8_t tailFamilyIsolatedFinal[] = { 87 /* FEB1 */ 1, 88 /* FEB2 */ 1, 89 /* FEB3 */ 0, 90 /* FEB4 */ 0, 91 /* FEB5 */ 1, 92 /* FEB6 */ 1, 93 /* FEB7 */ 0, 94 /* FEB8 */ 0, 95 /* FEB9 */ 1, 96 /* FEBA */ 1, 97 /* FEBB */ 0, 98 /* FEBC */ 0, 99 /* FEBD */ 1, 100 /* FEBE */ 1 101 }; 102 103 static const uint8_t tashkeelMedial[] = { 104 /* FE70 */ 0, 105 /* FE71 */ 1, 106 /* FE72 */ 0, 107 /* FE73 */ 0, 108 /* FE74 */ 0, 109 /* FE75 */ 0, 110 /* FE76 */ 0, 111 /* FE77 */ 1, 112 /* FE78 */ 0, 113 /* FE79 */ 1, 114 /* FE7A */ 0, 115 /* FE7B */ 1, 116 /* FE7C */ 0, 117 /* FE7D */ 1, 118 /* FE7E */ 0, 119 /* FE7F */ 1 120 }; 121 122 static const UChar yehHamzaToYeh[] = 123 { 124 /* isolated*/ 0xFEEF, 125 /* final */ 0xFEF0 126 }; 127 128 static const uint8_t IrrelevantPos[] = { 129 0x0, 0x2, 0x4, 0x6, 130 0x8, 0xA, 0xC, 0xE 131 }; 132 133 134 static const UChar convertLamAlef[] = 135 { 136 /*FEF5*/ 0x0622, 137 /*FEF6*/ 0x0622, 138 /*FEF7*/ 0x0623, 139 /*FEF8*/ 0x0623, 140 /*FEF9*/ 0x0625, 141 /*FEFA*/ 0x0625, 142 /*FEFB*/ 0x0627, 143 /*FEFC*/ 0x0627 144 }; 145 146 static const UChar araLink[178]= 147 { 148 1 + 32 + 256 * 0x11,/*0x0622*/ 149 1 + 32 + 256 * 0x13,/*0x0623*/ 150 1 + 256 * 0x15,/*0x0624*/ 151 1 + 32 + 256 * 0x17,/*0x0625*/ 152 1 + 2 + 256 * 0x19,/*0x0626*/ 153 1 + 32 + 256 * 0x1D,/*0x0627*/ 154 1 + 2 + 256 * 0x1F,/*0x0628*/ 155 1 + 256 * 0x23,/*0x0629*/ 156 1 + 2 + 256 * 0x25,/*0x062A*/ 157 1 + 2 + 256 * 0x29,/*0x062B*/ 158 1 + 2 + 256 * 0x2D,/*0x062C*/ 159 1 + 2 + 256 * 0x31,/*0x062D*/ 160 1 + 2 + 256 * 0x35,/*0x062E*/ 161 1 + 256 * 0x39,/*0x062F*/ 162 1 + 256 * 0x3B,/*0x0630*/ 163 1 + 256 * 0x3D,/*0x0631*/ 164 1 + 256 * 0x3F,/*0x0632*/ 165 1 + 2 + 256 * 0x41,/*0x0633*/ 166 1 + 2 + 256 * 0x45,/*0x0634*/ 167 1 + 2 + 256 * 0x49,/*0x0635*/ 168 1 + 2 + 256 * 0x4D,/*0x0636*/ 169 1 + 2 + 256 * 0x51,/*0x0637*/ 170 1 + 2 + 256 * 0x55,/*0x0638*/ 171 1 + 2 + 256 * 0x59,/*0x0639*/ 172 1 + 2 + 256 * 0x5D,/*0x063A*/ 173 0, 0, 0, 0, 0, /*0x063B-0x063F*/ 174 1 + 2, /*0x0640*/ 175 1 + 2 + 256 * 0x61,/*0x0641*/ 176 1 + 2 + 256 * 0x65,/*0x0642*/ 177 1 + 2 + 256 * 0x69,/*0x0643*/ 178 1 + 2 + 16 + 256 * 0x6D,/*0x0644*/ 179 1 + 2 + 256 * 0x71,/*0x0645*/ 180 1 + 2 + 256 * 0x75,/*0x0646*/ 181 1 + 2 + 256 * 0x79,/*0x0647*/ 182 1 + 256 * 0x7D,/*0x0648*/ 183 1 + 256 * 0x7F,/*0x0649*/ 184 1 + 2 + 256 * 0x81,/*0x064A*/ 185 4 + 256 * 1, /*0x064B*/ 186 4 + 128 + 256 * 1, /*0x064C*/ 187 4 + 128 + 256 * 1, /*0x064D*/ 188 4 + 128 + 256 * 1, /*0x064E*/ 189 4 + 128 + 256 * 1, /*0x064F*/ 190 4 + 128 + 256 * 1, /*0x0650*/ 191 4 + 64 + 256 * 3, /*0x0651*/ 192 4 + 256 * 1, /*0x0652*/ 193 4 + 256 * 7, /*0x0653*/ 194 4 + 256 * 8, /*0x0654*/ 195 4 + 256 * 8, /*0x0655*/ 196 4 + 256 * 1, /*0x0656*/ 197 0, 0, 0, 0, 0, /*0x0657-0x065B*/ 198 1 + 256 * 0x85,/*0x065C*/ 199 1 + 256 * 0x87,/*0x065D*/ 200 1 + 256 * 0x89,/*0x065E*/ 201 1 + 256 * 0x8B,/*0x065F*/ 202 0, 0, 0, 0, 0, /*0x0660-0x0664*/ 203 0, 0, 0, 0, 0, /*0x0665-0x0669*/ 204 0, 0, 0, 0, 0, 0, /*0x066A-0x066F*/ 205 4 + 256 * 6, /*0x0670*/ 206 1 + 8 + 256 * 0x00,/*0x0671*/ 207 1 + 32, /*0x0672*/ 208 1 + 32, /*0x0673*/ 209 0, /*0x0674*/ 210 1 + 32, /*0x0675*/ 211 1, 1, /*0x0676-0x0677*/ 212 1+2, 1+2, 1+2, 1+2, 1+2, 1+2, /*0x0678-0x067D*/ 213 1+2+8+256 * 0x06, 1+2, 1+2, 1+2, 1+2, 1+2, /*0x067E-0x0683*/ 214 1+2, 1+2, 1+2+8+256 * 0x2A, 1+2, /*0x0684-0x0687*/ 215 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*0x0688-0x0691*/ 216 1, 1, 1, 1, 1, 1, 1+8+256 * 0x3A, 1, /*0x0692-0x0699*/ 217 1+2, 1+2, 1+2, 1+2, 1+2, 1+2, /*0x069A-0x06A3*/ 218 1+2, 1+2, 1+2, 1+2, /*0x069A-0x06A3*/ 219 1+2, 1+2, 1+2, 1+2, 1+2, 1+2+8+256 * 0x3E, /*0x06A4-0x06AD*/ 220 1+2, 1+2, 1+2, 1+2, /*0x06A4-0x06AD*/ 221 1+2, 1+2+8+256 * 0x42, 1+2, 1+2, 1+2, 1+2, /*0x06AE-0x06B7*/ 222 1+2, 1+2, 1+2, 1+2, /*0x06AE-0x06B7*/ 223 1+2, 1+2, 1+2, 1+2, 1+2, 1+2, /*0x06B8-0x06BF*/ 224 1+2, 1+2, /*0x06B8-0x06BF*/ 225 1, /*0x06C0*/ 226 1+2, /*0x06C1*/ 227 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*0x06C2-0x06CB*/ 228 1+2+8+256 * 0xAC, /*0x06CC*/ 229 1, /*0x06CD*/ 230 1+2, 1+2, 1+2, 1+2, /*0x06CE-0x06D1*/ 231 1, 1 /*0x06D2-0x06D3*/ 232 }; 233 234 static const uint8_t presALink[] = { 235 /***********0*****1*****2*****3*****4*****5*****6*****7*****8*****9*****A*****B*****C*****D*****E*****F*/ 236 /*FB5*/ 0, 1, 0, 0, 0, 0, 0, 1, 2,1 + 2, 0, 0, 0, 0, 0, 0, 237 /*FB6*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 238 /*FB7*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2,1 + 2, 0, 0, 239 /*FB8*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 240 /*FB9*/ 2,1 + 2, 0, 1, 2,1 + 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 241 /*FBA*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 242 /*FBB*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 243 /*FBC*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 244 /*FBD*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 245 /*FBE*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 246 /*FBF*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2,1 + 2, 247 /*FC0*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 248 /*FC1*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 249 /*FC2*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 250 /*FC3*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 251 /*FC4*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 252 /*FC5*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 253 /*FC6*/ 4, 4, 4 254 }; 255 256 static const uint8_t presBLink[]= 257 { 258 /***********0*****1*****2*****3*****4*****5*****6*****7*****8*****9*****A*****B*****C*****D*****E*****F*/ 259 /*FE7*/1 + 2,1 + 2,1 + 2, 0,1 + 2, 0,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2, 260 /*FE8*/ 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2,1 + 2, 0, 1, 0, 261 /*FE9*/ 1, 2,1 + 2, 0, 1, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2, 262 /*FEA*/1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 0, 1, 0, 1, 0, 263 /*FEB*/ 1, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2, 264 /*FEC*/1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2, 265 /*FED*/1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2, 266 /*FEE*/1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 0, 267 /*FEF*/ 1, 0, 1, 2,1 + 2, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0 268 }; 269 270 static const UChar convertFBto06[] = 271 { 272 /***********0******1******2******3******4******5******6******7******8******9******A******B******C******D******E******F***/ 273 /*FB5*/ 0x671, 0x671, 0, 0, 0, 0, 0x07E, 0x07E, 0x07E, 0x07E, 0, 0, 0, 0, 0, 0, 274 /*FB6*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 275 /*FB7*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x686, 0x686, 0x686, 0x686, 0, 0, 276 /*FB8*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x698, 0x698, 0, 0, 0x6A9, 0x6A9, 277 /*FB9*/ 0x6A9, 0x6A9, 0x6AF, 0x6AF, 0x6AF, 0x6AF, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 278 /*FBA*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 279 /*FBB*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 280 /*FBC*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 281 /*FBD*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 282 /*FBE*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 283 /*FBF*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x6CC, 0x6CC, 0x6CC, 0x6CC 284 }; 285 286 static const UChar convertFEto06[] = 287 { 288 /***********0******1******2******3******4******5******6******7******8******9******A******B******C******D******E******F***/ 289 /*FE7*/ 0x64B, 0x64B, 0x64C, 0x64C, 0x64D, 0x64D, 0x64E, 0x64E, 0x64F, 0x64F, 0x650, 0x650, 0x651, 0x651, 0x652, 0x652, 290 /*FE8*/ 0x621, 0x622, 0x622, 0x623, 0x623, 0x624, 0x624, 0x625, 0x625, 0x626, 0x626, 0x626, 0x626, 0x627, 0x627, 0x628, 291 /*FE9*/ 0x628, 0x628, 0x628, 0x629, 0x629, 0x62A, 0x62A, 0x62A, 0x62A, 0x62B, 0x62B, 0x62B, 0x62B, 0x62C, 0x62C, 0x62C, 292 /*FEA*/ 0x62C, 0x62D, 0x62D, 0x62D, 0x62D, 0x62E, 0x62E, 0x62E, 0x62E, 0x62F, 0x62F, 0x630, 0x630, 0x631, 0x631, 0x632, 293 /*FEB*/ 0x632, 0x633, 0x633, 0x633, 0x633, 0x634, 0x634, 0x634, 0x634, 0x635, 0x635, 0x635, 0x635, 0x636, 0x636, 0x636, 294 /*FEC*/ 0x636, 0x637, 0x637, 0x637, 0x637, 0x638, 0x638, 0x638, 0x638, 0x639, 0x639, 0x639, 0x639, 0x63A, 0x63A, 0x63A, 295 /*FED*/ 0x63A, 0x641, 0x641, 0x641, 0x641, 0x642, 0x642, 0x642, 0x642, 0x643, 0x643, 0x643, 0x643, 0x644, 0x644, 0x644, 296 /*FEE*/ 0x644, 0x645, 0x645, 0x645, 0x645, 0x646, 0x646, 0x646, 0x646, 0x647, 0x647, 0x647, 0x647, 0x648, 0x648, 0x649, 297 /*FEF*/ 0x649, 0x64A, 0x64A, 0x64A, 0x64A, 0x65C, 0x65C, 0x65D, 0x65D, 0x65E, 0x65E, 0x65F, 0x65F 298 }; 299 300 static const uint8_t shapeTable[4][4][4]= 301 { 302 { {0,0,0,0}, {0,0,0,0}, {0,1,0,3}, {0,1,0,1} }, 303 { {0,0,2,2}, {0,0,1,2}, {0,1,1,2}, {0,1,1,3} }, 304 { {0,0,0,0}, {0,0,0,0}, {0,1,0,3}, {0,1,0,3} }, 305 { {0,0,1,2}, {0,0,1,2}, {0,1,1,2}, {0,1,1,3} } 306 }; 307 308 /* 309 * This function shapes European digits to Arabic-Indic digits 310 * in-place, writing over the input characters. 311 * Since we know that we are only looking for BMP code points, 312 * we can safely just work with code units (again, at least UTF-16). 313 */ 314 static void 315 _shapeToArabicDigitsWithContext(UChar *s, int32_t length, 316 UChar digitBase, 317 UBool isLogical, UBool lastStrongWasAL) { 318 const UBiDiProps *bdp; 319 int32_t i; 320 UChar c; 321 322 bdp=ubidi_getSingleton(); 323 digitBase-=0x30; 324 325 /* the iteration direction depends on the type of input */ 326 if(isLogical) { 327 for(i=0; i<length; ++i) { 328 c=s[i]; 329 switch(ubidi_getClass(bdp, c)) { 330 case U_LEFT_TO_RIGHT: /* L */ 331 case U_RIGHT_TO_LEFT: /* R */ 332 lastStrongWasAL=FALSE; 333 break; 334 case U_RIGHT_TO_LEFT_ARABIC: /* AL */ 335 lastStrongWasAL=TRUE; 336 break; 337 case U_EUROPEAN_NUMBER: /* EN */ 338 if(lastStrongWasAL && (uint32_t)(c-0x30)<10) { 339 s[i]=(UChar)(digitBase+c); /* digitBase+(c-0x30) - digitBase was modified above */ 340 } 341 break; 342 default : 343 break; 344 } 345 } 346 } else { 347 for(i=length; i>0; /* pre-decrement in the body */) { 348 c=s[--i]; 349 switch(ubidi_getClass(bdp, c)) { 350 case U_LEFT_TO_RIGHT: /* L */ 351 case U_RIGHT_TO_LEFT: /* R */ 352 lastStrongWasAL=FALSE; 353 break; 354 case U_RIGHT_TO_LEFT_ARABIC: /* AL */ 355 lastStrongWasAL=TRUE; 356 break; 357 case U_EUROPEAN_NUMBER: /* EN */ 358 if(lastStrongWasAL && (uint32_t)(c-0x30)<10) { 359 s[i]=(UChar)(digitBase+c); /* digitBase+(c-0x30) - digitBase was modified above */ 360 } 361 break; 362 default : 363 break; 364 } 365 } 366 } 367 } 368 369 /* 370 *Name : invertBuffer 371 *Function : This function inverts the buffer, it's used 372 * in case the user specifies the buffer to be 373 * U_SHAPE_TEXT_DIRECTION_LOGICAL 374 */ 375 static void 376 /* BEGIN android-changed */ 377 invertBuffer(UChar *buffer,int32_t size,uint64_t options,int32_t lowlimit,int32_t highlimit) { 378 /* END android-changed */ 379 UChar temp; 380 int32_t i=0,j=0; 381 for(i=lowlimit,j=size-highlimit-1;i<j;i++,j--) { 382 temp = buffer[i]; 383 buffer[i] = buffer[j]; 384 buffer[j] = temp; 385 } 386 } 387 388 /* 389 *Name : changeLamAlef 390 *Function : Converts the Alef characters into an equivalent 391 * LamAlef location in the 0x06xx Range, this is an 392 * intermediate stage in the operation of the program 393 * later it'll be converted into the 0xFExx LamAlefs 394 * in the shaping function. 395 */ 396 static U_INLINE UChar 397 changeLamAlef(UChar ch) { 398 switch(ch) { 399 case 0x0622 : 400 return 0x065C; 401 case 0x0623 : 402 return 0x065D; 403 case 0x0625 : 404 return 0x065E; 405 case 0x0627 : 406 return 0x065F; 407 } 408 return 0; 409 } 410 411 /* 412 *Name : getLink 413 *Function : Resolves the link between the characters as 414 * Arabic characters have four forms : 415 * Isolated, Initial, Middle and Final Form 416 */ 417 static UChar 418 getLink(UChar ch) { 419 if(ch >= 0x0622 && ch <= 0x06D3) { 420 return(araLink[ch-0x0622]); 421 } else if(ch == 0x200D) { 422 return(3); 423 } else if(ch >= 0x206D && ch <= 0x206F) { 424 return(4); 425 }else if(ch >= 0xFB50 && ch <= 0xFC62) { 426 return(presALink[ch-0xFB50]); 427 } else if(ch >= 0xFE70 && ch <= 0xFEFC) { 428 return(presBLink[ch-0xFE70]); 429 }else { 430 return(0); 431 } 432 } 433 434 /* 435 *Name : countSpaces 436 *Function : Counts the number of spaces 437 * at each end of the logical buffer 438 */ 439 static void 440 /* BEGIN android-changed */ 441 countSpaces(UChar *dest,int32_t size,uint64_t options,int32_t *spacesCountl,int32_t *spacesCountr) { 442 /* END android-changed */ 443 int32_t i = 0; 444 int32_t countl = 0,countr = 0; 445 while(dest[i] == SPACE_CHAR) { 446 countl++; 447 i++; 448 } 449 while(dest[size-1] == SPACE_CHAR) { 450 countr++; 451 size--; 452 } 453 *spacesCountl = countl; 454 *spacesCountr = countr; 455 } 456 457 /* 458 *Name : isTashkeelChar 459 *Function : Returns 1 for Tashkeel characters in 06 range else return 0 460 */ 461 static U_INLINE int32_t 462 isTashkeelChar(UChar ch) { 463 return (int32_t)( ch>=0x064B && ch<= 0x0652 ); 464 } 465 466 /* 467 *Name : isTashkeelCharFE 468 *Function : Returns 1 for Tashkeel characters in FE range else return 0 469 */ 470 static U_INLINE int32_t 471 isTashkeelCharFE(UChar ch) { 472 return (int32_t)( ch>=0xFE70 && ch<= 0xFE7F ); 473 } 474 475 /* 476 *Name : isAlefChar 477 *Function : Returns 1 for Alef characters else return 0 478 */ 479 static U_INLINE int32_t 480 isAlefChar(UChar ch) { 481 return (int32_t)( (ch==0x0622)||(ch==0x0623)||(ch==0x0625)||(ch==0x0627) ); 482 } 483 484 /* 485 *Name : isLamAlefChar 486 *Function : Returns 1 for LamAlef characters else return 0 487 */ 488 static U_INLINE int32_t 489 isLamAlefChar(UChar ch) { 490 return (int32_t)((ch>=0xFEF5)&&(ch<=0xFEFC) ); 491 } 492 493 /*BIDI 494 *Name : isTailChar 495 *Function : returns 1 if the character matches one of the tail characters (0xfe73 or 0x200b) otherwise returns 0 496 */ 497 498 static U_INLINE int32_t 499 isTailChar(UChar ch) { 500 if(ch == OLD_TAIL_CHAR || ch == NEW_TAIL_CHAR){ 501 return 1; 502 }else{ 503 return 0; 504 } 505 } 506 507 /*BIDI 508 *Name : isSeenTailFamilyChar 509 *Function : returns 1 if the character is a seen family isolated character 510 * in the FE range otherwise returns 0 511 */ 512 513 static U_INLINE int32_t 514 isSeenTailFamilyChar(UChar ch) { 515 if(ch >= 0xfeb1 && ch < 0xfebf){ 516 return tailFamilyIsolatedFinal [ch - 0xFEB1]; 517 }else{ 518 return 0; 519 } 520 } 521 522 /* Name : isSeenFamilyChar 523 * Function : returns 1 if the character is a seen family character in the Unicode 524 * 06 range otherwise returns 0 525 */ 526 527 static U_INLINE int32_t 528 isSeenFamilyChar(UChar ch){ 529 if(ch >= 0x633 && ch <= 0x636){ 530 return 1; 531 }else { 532 return 0; 533 } 534 } 535 536 /*Start of BIDI*/ 537 /* 538 *Name : isAlefMaksouraChar 539 *Function : returns 1 if the character is a Alef Maksoura Final or isolated 540 * otherwise returns 0 541 */ 542 static U_INLINE int32_t 543 isAlefMaksouraChar(UChar ch) { 544 return (int32_t)( (ch == 0xFEEF) || ( ch == 0xFEF0) || (ch == 0x0649)); 545 } 546 547 /* 548 * Name : isYehHamzaChar 549 * Function : returns 1 if the character is a yehHamza isolated or yehhamza 550 * final is found otherwise returns 0 551 */ 552 static U_INLINE int32_t 553 isYehHamzaChar(UChar ch) { 554 if((ch==0xFE89)||(ch==0xFE8A)){ 555 return 1; 556 }else{ 557 return 0; 558 } 559 } 560 561 /* 562 * Name: isTashkeelOnTatweelChar 563 * Function: Checks if the Tashkeel Character is on Tatweel or not,if the 564 * Tashkeel on tatweel (FE range), it returns 1 else if the 565 * Tashkeel with shadda on tatweel (FC range)return 2 otherwise 566 * returns 0 567 */ 568 static U_INLINE int32_t 569 isTashkeelOnTatweelChar(UChar ch){ 570 if(ch >= 0xfe70 && ch <= 0xfe7f && ch != NEW_TAIL_CHAR && ch != 0xFE75 && ch != SHADDA_TATWEEL_CHAR) 571 { 572 return tashkeelMedial [ch - 0xFE70]; 573 }else if( (ch >= 0xfcf2 && ch <= 0xfcf4) || (ch == SHADDA_TATWEEL_CHAR)) { 574 return 2; 575 }else{ 576 return 0; 577 } 578 } 579 580 /* 581 * Name: isIsolatedTashkeelChar 582 * Function: Checks if the Tashkeel Character is in the isolated form 583 * (i.e. Unicode FE range) returns 1 else if the Tashkeel 584 * with shadda is in the isolated form (i.e. Unicode FC range) 585 * returns 2 otherwise returns 0 586 */ 587 static U_INLINE int32_t 588 isIsolatedTashkeelChar(UChar ch){ 589 if(ch >= 0xfe70 && ch <= 0xfe7f && ch != NEW_TAIL_CHAR && ch != 0xFE75){ 590 return (1 - tashkeelMedial [ch - 0xFE70]); 591 }else if(ch >= 0xfc5e && ch <= 0xfc63){ 592 return 1; 593 }else{ 594 return 0; 595 } 596 } 597 598 599 600 601 /* 602 *Name : calculateSize 603 *Function : This function calculates the destSize to be used in preflighting 604 * when the destSize is equal to 0 605 * It is used also to calculate the new destsize in case the 606 * destination buffer will be resized. 607 */ 608 609 /* BEGIN android-changed */ 610 static int32_t 611 calculateSize(const UChar *source, int32_t sourceLength, 612 int32_t destSize,uint64_t options) { 613 /* END android-changed */ 614 int32_t i = 0; 615 616 int lamAlefOption = 0; 617 int tashkeelOption = 0; 618 619 destSize = sourceLength; 620 621 if (((options&U_SHAPE_LETTERS_MASK) == U_SHAPE_LETTERS_SHAPE || 622 ((options&U_SHAPE_LETTERS_MASK) == U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED )) && 623 ((options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_RESIZE )){ 624 lamAlefOption = 1; 625 } 626 if((options&U_SHAPE_LETTERS_MASK) == U_SHAPE_LETTERS_SHAPE && 627 ((options&U_SHAPE_TASHKEEL_MASK) == U_SHAPE_TASHKEEL_RESIZE ) ){ 628 tashkeelOption = 1; 629 } 630 631 if(lamAlefOption || tashkeelOption){ 632 if((options&U_SHAPE_TEXT_DIRECTION_MASK)==U_SHAPE_TEXT_DIRECTION_VISUAL_LTR) { 633 for(i=0;i<sourceLength;i++) { 634 if( ((isAlefChar(source[i]))&& (i<(sourceLength-1)) &&(source[i+1] == LAM_CHAR)) || (isTashkeelCharFE(source[i])) ) { 635 destSize--; 636 } 637 } 638 }else if((options&U_SHAPE_TEXT_DIRECTION_MASK)==U_SHAPE_TEXT_DIRECTION_LOGICAL) { 639 for(i=0;i<sourceLength;i++) { 640 if( ( (source[i] == LAM_CHAR) && (i<(sourceLength-1)) && (isAlefChar(source[i+1]))) || (isTashkeelCharFE(source[i])) ) { 641 destSize--; 642 } 643 } 644 } 645 } 646 647 if ((options&U_SHAPE_LETTERS_MASK) == U_SHAPE_LETTERS_UNSHAPE){ 648 if ( (options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_RESIZE){ 649 for(i=0;i<sourceLength;i++) { 650 if(isLamAlefChar(source[i])) 651 destSize++; 652 } 653 } 654 } 655 656 return destSize; 657 } 658 659 /* 660 *Name : handleTashkeelWithTatweel 661 *Function : Replaces Tashkeel as following: 662 * Case 1 :if the Tashkeel on tatweel, replace it with Tatweel. 663 * Case 2 :if the Tashkeel aggregated with Shadda on Tatweel, replace 664 * it with Shadda on Tatweel. 665 * Case 3: if the Tashkeel is isolated replace it with Space. 666 * 667 */ 668 /* BEGIN android-changed */ 669 static int32_t 670 handleTashkeelWithTatweel(UChar *dest, int32_t sourceLength, 671 int32_t destSize,uint64_t options, 672 UErrorCode *pErrorCode) { 673 /* END android-changed */ 674 int i; 675 for(i = 0; i < sourceLength; i++){ 676 if((isTashkeelOnTatweelChar(dest[i]) == 1)){ 677 dest[i] = TATWEEL_CHAR; 678 }else if((isTashkeelOnTatweelChar(dest[i]) == 2)){ 679 dest[i] = SHADDA_TATWEEL_CHAR; 680 }else if(isIsolatedTashkeelChar(dest[i]) && dest[i] != SHADDA_CHAR){ 681 dest[i] = SPACE_CHAR; 682 } 683 } 684 return sourceLength; 685 } 686 687 688 689 /* 690 *Name : handleGeneratedSpaces 691 *Function : The shapeUnicode function converts Lam + Alef into LamAlef + space, 692 * and Tashkeel to space. 693 * handleGeneratedSpaces function puts these generated spaces 694 * according to the options the user specifies. LamAlef and Tashkeel 695 * spaces can be replaced at begin, at end, at near or decrease the 696 * buffer size. 697 * 698 * There is also Auto option for LamAlef and tashkeel, which will put 699 * the spaces at end of the buffer (or end of text if the user used 700 * the option U_SHAPE_SPACES_RELATIVE_TO_TEXT_BEGIN_END). 701 * 702 * If the text type was visual_LTR and the option 703 * U_SHAPE_SPACES_RELATIVE_TO_TEXT_BEGIN_END was selected the END 704 * option will place the space at the beginning of the buffer and 705 * BEGIN will place the space at the end of the buffer. 706 */ 707 708 /* BEGIN android-changed */ 709 static int32_t 710 handleGeneratedSpaces(UChar *dest, int32_t sourceLength, 711 int32_t destSize, 712 uint64_t options, 713 UErrorCode *pErrorCode ) { 714 /* END android-changed */ 715 716 int32_t i = 0, j = 0; 717 int32_t count = 0; 718 UChar *tempbuffer=NULL; 719 720 int lamAlefOption = 0; 721 int tashkeelOption = 0; 722 int shapingMode = SHAPE_MODE; 723 724 if (shapingMode == 0){ 725 if ( (options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_RESIZE ){ 726 lamAlefOption = 1; 727 } 728 if ( (options&U_SHAPE_TASHKEEL_MASK) == U_SHAPE_TASHKEEL_RESIZE ){ 729 tashkeelOption = 1; 730 } 731 } 732 733 if (lamAlefOption || tashkeelOption){ 734 tempbuffer = (UChar *)uprv_malloc((sourceLength+1)*U_SIZEOF_UCHAR); 735 /* Test for NULL */ 736 if(tempbuffer == NULL) { 737 *pErrorCode = U_MEMORY_ALLOCATION_ERROR; 738 return 0; 739 } 740 741 uprv_memset(tempbuffer, 0, (sourceLength+1)*U_SIZEOF_UCHAR); 742 743 i = j = 0; count = 0; 744 while(i < sourceLength) { 745 if ( (lamAlefOption && dest[i] == LAMALEF_SPACE_SUB) || 746 (tashkeelOption && dest[i] == TASHKEEL_SPACE_SUB) ){ 747 j--; 748 count++; 749 } else { 750 tempbuffer[j] = dest[i]; 751 } 752 i++; 753 j++; 754 } 755 756 while(count >= 0) { 757 tempbuffer[i] = 0x0000; 758 i--; 759 count--; 760 } 761 762 uprv_memcpy(dest, tempbuffer, sourceLength*U_SIZEOF_UCHAR); 763 destSize = u_strlen(dest); 764 } 765 766 lamAlefOption = 0; 767 768 if (shapingMode == 0){ 769 /* BEGIN android-changed */ 770 if ( (options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_NEAR && 771 (options&U_SHAPE_X_LAMALEF_SUB_ALTERNATE) == 0) { /* if set, leave LAMALEF_SPACE_SUB in the output */ 772 /* END android-changed */ 773 lamAlefOption = 1; 774 } 775 } 776 777 if (lamAlefOption){ 778 /* Lam+Alef is already shaped into LamAlef + FFFF */ 779 i = 0; 780 while(i < sourceLength) { 781 if(lamAlefOption&&dest[i] == LAMALEF_SPACE_SUB){ 782 dest[i] = SPACE_CHAR; 783 } 784 i++; 785 } 786 destSize = sourceLength; 787 } 788 lamAlefOption = 0; 789 tashkeelOption = 0; 790 791 if (shapingMode == 0) { 792 if ( ((options&U_SHAPE_LAMALEF_MASK) == uShapeLamalefBegin) || 793 (((options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_AUTO ) 794 && (spacesRelativeToTextBeginEnd==1)) ) { 795 lamAlefOption = 1; 796 } 797 if ( (options&U_SHAPE_TASHKEEL_MASK) == uShapeTashkeelBegin ) { 798 tashkeelOption = 1; 799 } 800 } 801 802 if(lamAlefOption || tashkeelOption){ 803 tempbuffer = (UChar *)uprv_malloc((sourceLength+1)*U_SIZEOF_UCHAR); 804 805 /* Test for NULL */ 806 if(tempbuffer == NULL) { 807 *pErrorCode = U_MEMORY_ALLOCATION_ERROR; 808 return 0; 809 } 810 811 uprv_memset(tempbuffer, 0, (sourceLength+1)*U_SIZEOF_UCHAR); 812 i = j = sourceLength; count = 0; 813 while(i >= 0) { 814 if ( (lamAlefOption && dest[i] == LAMALEF_SPACE_SUB) || 815 (tashkeelOption && dest[i] == TASHKEEL_SPACE_SUB) ){ 816 j++; 817 count++; 818 }else { 819 tempbuffer[j] = dest[i]; 820 } 821 i--; 822 j--; 823 } 824 825 for(i=0 ;i < count; i++){ 826 tempbuffer[i] = SPACE_CHAR; 827 } 828 829 uprv_memcpy(dest, tempbuffer, sourceLength*U_SIZEOF_UCHAR); 830 destSize = sourceLength; 831 } 832 833 834 835 lamAlefOption = 0; 836 tashkeelOption = 0; 837 838 if (shapingMode == 0) { 839 if ( ((options&U_SHAPE_LAMALEF_MASK) == uShapeLamalefEnd) || 840 (((options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_AUTO ) 841 && (spacesRelativeToTextBeginEnd==0)) ) { 842 lamAlefOption = 1; 843 } 844 if ( (options&U_SHAPE_TASHKEEL_MASK) == uShapeTashkeelEnd ){ 845 tashkeelOption = 1; 846 } 847 } 848 849 if(lamAlefOption || tashkeelOption){ 850 tempbuffer = (UChar *)uprv_malloc((sourceLength+1)*U_SIZEOF_UCHAR); 851 /* Test for NULL */ 852 if(tempbuffer == NULL) { 853 *pErrorCode = U_MEMORY_ALLOCATION_ERROR; 854 return 0; 855 } 856 857 uprv_memset(tempbuffer, 0, (sourceLength+1)*U_SIZEOF_UCHAR); 858 859 i = j = 0; count = 0; 860 while(i < sourceLength) { 861 if ( (lamAlefOption && dest[i] == LAMALEF_SPACE_SUB) || 862 (tashkeelOption && dest[i] == TASHKEEL_SPACE_SUB) ){ 863 j--; 864 count++; 865 }else { 866 tempbuffer[j] = dest[i]; 867 } 868 i++; 869 j++; 870 } 871 872 while(count >= 0) { 873 tempbuffer[i] = SPACE_CHAR; 874 i--; 875 count--; 876 } 877 878 uprv_memcpy(dest,tempbuffer, sourceLength*U_SIZEOF_UCHAR); 879 destSize = sourceLength; 880 } 881 882 883 if(tempbuffer){ 884 uprv_free(tempbuffer); 885 } 886 887 return destSize; 888 } 889 890 /* 891 *Name :expandCompositCharAtBegin 892 *Function :Expands the LamAlef character to Lam and Alef consuming the required 893 * space from beginning of the buffer. If the text type was visual_LTR 894 * and the option U_SHAPE_SPACES_RELATIVE_TO_TEXT_BEGIN_END was selected 895 * the spaces will be located at end of buffer. 896 * If there are no spaces to expand the LamAlef, an error 897 * will be set to U_NO_SPACE_AVAILABLE as defined in utypes.h 898 */ 899 900 static int32_t 901 expandCompositCharAtBegin(UChar *dest, int32_t sourceLength, int32_t destSize,UErrorCode *pErrorCode) { 902 int32_t i = 0,j = 0; 903 int32_t countl = 0; 904 UChar *tempbuffer=NULL; 905 906 tempbuffer = (UChar *)uprv_malloc((sourceLength+1)*U_SIZEOF_UCHAR); 907 908 /* Test for NULL */ 909 if(tempbuffer == NULL) { 910 *pErrorCode = U_MEMORY_ALLOCATION_ERROR; 911 return 0; 912 } 913 914 uprv_memset(tempbuffer, 0, (sourceLength+1)*U_SIZEOF_UCHAR); 915 916 i = 0; 917 while(dest[i] == SPACE_CHAR) { 918 countl++; 919 i++; 920 } 921 922 i = j = sourceLength-1; 923 924 while(i >= 0 && j >= 0) { 925 if( countl>0 && isLamAlefChar(dest[i])) { 926 tempbuffer[j] = LAM_CHAR; 927 tempbuffer[j-1] = convertLamAlef[ dest[i] - 0xFEF5 ]; 928 j--; 929 countl--; 930 }else { 931 if( countl == 0 && isLamAlefChar(dest[i]) ) { 932 *pErrorCode=U_NO_SPACE_AVAILABLE; 933 } 934 tempbuffer[j] = dest[i]; 935 } 936 i--; 937 j--; 938 } 939 uprv_memcpy(dest, tempbuffer, sourceLength*U_SIZEOF_UCHAR); 940 941 uprv_free(tempbuffer); 942 943 destSize = sourceLength; 944 return destSize; 945 } 946 947 /* 948 *Name : expandCompositCharAtEnd 949 *Function : Expands the LamAlef character to Lam and Alef consuming the 950 * required space from end of the buffer. If the text type was 951 * Visual LTR and the option U_SHAPE_SPACES_RELATIVE_TO_TEXT_BEGIN_END 952 * was used, the spaces will be consumed from begin of buffer. If 953 * there are no spaces to expand the LamAlef, an error 954 * will be set to U_NO_SPACE_AVAILABLE as defined in utypes.h 955 */ 956 957 static int32_t 958 expandCompositCharAtEnd(UChar *dest, int32_t sourceLength, int32_t destSize,UErrorCode *pErrorCode) { 959 int32_t i = 0,j = 0; 960 961 int32_t countr = 0; 962 int32_t inpsize = sourceLength; 963 964 UChar *tempbuffer=NULL; 965 tempbuffer = (UChar *)uprv_malloc((sourceLength+1)*U_SIZEOF_UCHAR); 966 967 /* Test for NULL */ 968 if(tempbuffer == NULL) { 969 *pErrorCode = U_MEMORY_ALLOCATION_ERROR; 970 return 0; 971 } 972 973 uprv_memset(tempbuffer, 0, (sourceLength+1)*U_SIZEOF_UCHAR); 974 975 while(dest[inpsize-1] == SPACE_CHAR) { 976 countr++; 977 inpsize--; 978 } 979 980 i = sourceLength - countr - 1; 981 j = sourceLength - 1; 982 983 while(i >= 0 && j >= 0) { 984 if( countr>0 && isLamAlefChar(dest[i]) ) { 985 tempbuffer[j] = LAM_CHAR; 986 tempbuffer[j-1] = convertLamAlef[ dest[i] - 0xFEF5 ]; 987 j--; 988 countr--; 989 }else { 990 if ((countr == 0) && isLamAlefChar(dest[i]) ) { 991 *pErrorCode=U_NO_SPACE_AVAILABLE; 992 } 993 tempbuffer[j] = dest[i]; 994 } 995 i--; 996 j--; 997 } 998 999 if(countr > 0) { 1000 uprv_memmove(tempbuffer, tempbuffer+countr, sourceLength*U_SIZEOF_UCHAR); 1001 if(u_strlen(tempbuffer) < sourceLength) { 1002 for(i=sourceLength-1;i>=sourceLength-countr;i--) { 1003 tempbuffer[i] = SPACE_CHAR; 1004 } 1005 } 1006 } 1007 uprv_memcpy(dest, tempbuffer, sourceLength*U_SIZEOF_UCHAR); 1008 1009 uprv_free(tempbuffer); 1010 1011 destSize = sourceLength; 1012 return destSize; 1013 } 1014 1015 /* 1016 *Name : expandCompositCharAtNear 1017 *Function : Expands the LamAlef character into Lam + Alef, YehHamza character 1018 * into Yeh + Hamza, SeenFamily character into SeenFamily character 1019 * + Tail, while consuming the space next to the character. 1020 * If there are no spaces next to the character, an error 1021 * will be set to U_NO_SPACE_AVAILABLE as defined in utypes.h 1022 */ 1023 1024 static int32_t 1025 expandCompositCharAtNear(UChar *dest, int32_t sourceLength, int32_t destSize,UErrorCode *pErrorCode, 1026 int yehHamzaOption, int seenTailOption, int lamAlefOption) { 1027 int32_t i = 0; 1028 1029 1030 UChar lamalefChar, yehhamzaChar; 1031 1032 for(i = 0 ;i<=sourceLength-1;i++) { 1033 if (seenTailOption && isSeenTailFamilyChar(dest[i])) { 1034 if ((i>0) && (dest[i-1] == SPACE_CHAR) ) { 1035 dest[i-1] = tailChar; 1036 }else { 1037 *pErrorCode=U_NO_SPACE_AVAILABLE; 1038 } 1039 }else if(yehHamzaOption && (isYehHamzaChar(dest[i])) ) { 1040 if ((i>0) && (dest[i-1] == SPACE_CHAR) ) { 1041 yehhamzaChar = dest[i]; 1042 dest[i] = yehHamzaToYeh[yehhamzaChar - YEH_HAMZAFE_CHAR]; 1043 dest[i-1] = HAMZAFE_CHAR; 1044 }else { 1045 1046 *pErrorCode=U_NO_SPACE_AVAILABLE; 1047 } 1048 }else if(lamAlefOption && isLamAlefChar(dest[i+1])) { 1049 if(dest[i] == SPACE_CHAR){ 1050 lamalefChar = dest[i+1]; 1051 dest[i+1] = LAM_CHAR; 1052 dest[i] = convertLamAlef[ lamalefChar - 0xFEF5 ]; 1053 }else { 1054 *pErrorCode=U_NO_SPACE_AVAILABLE; 1055 } 1056 } 1057 } 1058 destSize = sourceLength; 1059 return destSize; 1060 } 1061 /* 1062 * Name : expandCompositChar 1063 * Function : LamAlef, need special handling, since it expands from one 1064 * character into two characters while shaping or deshaping. 1065 * In order to expand it, near or far spaces according to the 1066 * options user specifies. Also buffer size can be increased. 1067 * 1068 * For SeenFamily characters and YehHamza only the near option is 1069 * supported, while for LamAlef we can take spaces from begin, end, 1070 * near or even increase the buffer size. 1071 * There is also the Auto option for LamAlef only, which will first 1072 * search for a space at end, begin then near, respectively. 1073 * If there are no spaces to expand these characters, an error will be set to 1074 * U_NO_SPACE_AVAILABLE as defined in utypes.h 1075 */ 1076 /* BEGIN android-changed */ 1077 static int32_t 1078 expandCompositChar(UChar *dest, int32_t sourceLength, 1079 int32_t destSize,uint64_t options, 1080 UErrorCode *pErrorCode, int shapingMode) { 1081 /* END android-changed */ 1082 1083 int32_t i = 0,j = 0; 1084 1085 UChar *tempbuffer=NULL; 1086 int yehHamzaOption = 0; 1087 int seenTailOption = 0; 1088 int lamAlefOption = 0; 1089 1090 if (shapingMode == 1){ 1091 if ( (options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_AUTO){ 1092 1093 if(spacesRelativeToTextBeginEnd == 0) { 1094 destSize = expandCompositCharAtEnd(dest, sourceLength, destSize, pErrorCode); 1095 1096 if(*pErrorCode == U_NO_SPACE_AVAILABLE) { 1097 *pErrorCode = U_ZERO_ERROR; 1098 destSize = expandCompositCharAtBegin(dest, sourceLength, destSize, pErrorCode); 1099 } 1100 }else { 1101 destSize = expandCompositCharAtBegin(dest, sourceLength, destSize, pErrorCode); 1102 1103 if(*pErrorCode == U_NO_SPACE_AVAILABLE) { 1104 *pErrorCode = U_ZERO_ERROR; 1105 destSize = expandCompositCharAtEnd(dest, sourceLength, destSize, pErrorCode); 1106 } 1107 } 1108 1109 if(*pErrorCode == U_NO_SPACE_AVAILABLE) { 1110 *pErrorCode = U_ZERO_ERROR; 1111 destSize = expandCompositCharAtNear(dest, sourceLength, destSize, pErrorCode, yehHamzaOption, 1112 seenTailOption, 1); 1113 } 1114 } 1115 } 1116 1117 if (shapingMode == 1){ 1118 if ( (options&U_SHAPE_LAMALEF_MASK) == uShapeLamalefEnd){ 1119 destSize = expandCompositCharAtEnd(dest, sourceLength, destSize, pErrorCode); 1120 } 1121 } 1122 1123 if (shapingMode == 1){ 1124 if ( (options&U_SHAPE_LAMALEF_MASK) == uShapeLamalefBegin){ 1125 destSize = expandCompositCharAtBegin(dest, sourceLength, destSize, pErrorCode); 1126 } 1127 } 1128 1129 if (shapingMode == 0){ 1130 if ((options&U_SHAPE_YEHHAMZA_MASK) == U_SHAPE_YEHHAMZA_TWOCELL_NEAR){ 1131 yehHamzaOption = 1; 1132 } 1133 if ((options&U_SHAPE_SEEN_MASK) == U_SHAPE_SEEN_TWOCELL_NEAR){ 1134 seenTailOption = 1; 1135 } 1136 } 1137 if (shapingMode == 1) { 1138 if ( (options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_NEAR) { 1139 lamAlefOption = 1; 1140 } 1141 } 1142 1143 1144 if (yehHamzaOption || seenTailOption || lamAlefOption){ 1145 destSize = expandCompositCharAtNear(dest, sourceLength, destSize, pErrorCode, yehHamzaOption, 1146 seenTailOption,lamAlefOption); 1147 } 1148 1149 1150 if (shapingMode == 1){ 1151 if ( (options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_RESIZE){ 1152 destSize = calculateSize(dest,sourceLength,destSize,options); 1153 tempbuffer = (UChar *)uprv_malloc((destSize+1)*U_SIZEOF_UCHAR); 1154 1155 /* Test for NULL */ 1156 if(tempbuffer == NULL) { 1157 *pErrorCode = U_MEMORY_ALLOCATION_ERROR; 1158 return 0; 1159 } 1160 1161 uprv_memset(tempbuffer, 0, (destSize+1)*U_SIZEOF_UCHAR); 1162 1163 i = j = 0; 1164 while(i < destSize && j < destSize) { 1165 if(isLamAlefChar(dest[i]) ) { 1166 tempbuffer[j] = convertLamAlef[ dest[i] - 0xFEF5 ]; 1167 tempbuffer[j+1] = LAM_CHAR; 1168 j++; 1169 }else { 1170 tempbuffer[j] = dest[i]; 1171 } 1172 i++; 1173 j++; 1174 } 1175 1176 uprv_memcpy(dest, tempbuffer, destSize*U_SIZEOF_UCHAR); 1177 } 1178 } 1179 1180 if(tempbuffer) { 1181 uprv_free(tempbuffer); 1182 } 1183 return destSize; 1184 } 1185 1186 /* 1187 *Name : shapeUnicode 1188 *Function : Converts an Arabic Unicode buffer in 06xx Range into a shaped 1189 * arabic Unicode buffer in FExx Range 1190 */ 1191 /* BEGIN android-changed */ 1192 static int32_t 1193 shapeUnicode(UChar *dest, int32_t sourceLength, 1194 int32_t destSize,uint64_t options, 1195 UErrorCode *pErrorCode, 1196 int tashkeelFlag) { 1197 /* END android-changed */ 1198 1199 int32_t i, iend; 1200 int32_t step; 1201 int32_t lastPos,Nx, Nw; 1202 unsigned int Shape; 1203 int32_t lamalef_found = 0; 1204 int32_t seenfamFound = 0, yehhamzaFound =0, tashkeelFound = 0; 1205 UChar prevLink = 0, lastLink = 0, currLink, nextLink = 0; 1206 UChar wLamalef; 1207 1208 /* 1209 * Converts the input buffer from FExx Range into 06xx Range 1210 * to make sure that all characters are in the 06xx range 1211 * even the lamalef is converted to the special region in 1212 * the 06xx range 1213 */ 1214 if ((options & U_SHAPE_PRESERVE_PRESENTATION_MASK) == U_SHAPE_PRESERVE_PRESENTATION_NOOP) { 1215 for (i = 0; i < sourceLength; i++) { 1216 UChar inputChar = dest[i]; 1217 if ( (inputChar >= 0xFB50) && (inputChar <= 0xFBFF)) { 1218 UChar c = convertFBto06 [ (inputChar - 0xFB50) ]; 1219 if (c != 0) 1220 dest[i] = c; 1221 } else if ( (inputChar >= 0xFE70) && (inputChar <= 0xFEFC)) { 1222 dest[i] = convertFEto06 [ (inputChar - 0xFE70) ] ; 1223 } else { 1224 dest[i] = inputChar ; 1225 } 1226 } 1227 } 1228 1229 1230 /* sets the index to the end of the buffer, together with the step point to -1 */ 1231 i = sourceLength - 1; 1232 iend = -1; 1233 step = -1; 1234 1235 /* 1236 * This function resolves the link between the characters . 1237 * Arabic characters have four forms : 1238 * Isolated Form, Initial Form, Middle Form and Final Form 1239 */ 1240 currLink = getLink(dest[i]); 1241 1242 lastPos = i; 1243 Nx = -2, Nw = 0; 1244 1245 while (i != iend) { 1246 /* If high byte of currLink > 0 then more than one shape */ 1247 if ((currLink & 0xFF00) > 0 || (getLink(dest[i]) & IRRELEVANT) != 0) { 1248 Nw = i + step; 1249 while (Nx < 0) { /* we need to know about next char */ 1250 if(Nw == iend) { 1251 nextLink = 0; 1252 Nx = 3000; 1253 } else { 1254 nextLink = getLink(dest[Nw]); 1255 if((nextLink & IRRELEVANT) == 0) { 1256 Nx = Nw; 1257 } else { 1258 Nw = Nw + step; 1259 } 1260 } 1261 } 1262 1263 if ( ((currLink & ALEFTYPE) > 0) && ((lastLink & LAMTYPE) > 0) ) { 1264 lamalef_found = 1; 1265 wLamalef = changeLamAlef(dest[i]); /*get from 0x065C-0x065f */ 1266 if ( wLamalef != 0) { 1267 dest[i] = LAMALEF_SPACE_SUB; /* The default case is to drop the Alef and replace */ 1268 dest[lastPos] =wLamalef; /* it by LAMALEF_SPACE_SUB which is the last character in the */ 1269 i=lastPos; /* unicode private use area, this is done to make */ 1270 } /* sure that removeLamAlefSpaces() handles only the */ 1271 lastLink = prevLink; /* spaces generated during lamalef generation. */ 1272 currLink = getLink(wLamalef); /* LAMALEF_SPACE_SUB is added here and is replaced by spaces */ 1273 } /* in removeLamAlefSpaces() */ 1274 1275 if ((i > 0) && (dest[i-1] == SPACE_CHAR)){ 1276 if ( isSeenFamilyChar(dest[i])){ 1277 seenfamFound = 1; 1278 } else if (dest[i] == YEH_HAMZA_CHAR) { 1279 yehhamzaFound = 1; 1280 } 1281 } 1282 else if(i==0){ 1283 if ( isSeenFamilyChar(dest[i])){ 1284 seenfamFound = 1; 1285 } else if (dest[i] == YEH_HAMZA_CHAR) { 1286 yehhamzaFound = 1; 1287 } 1288 } 1289 1290 /* 1291 * get the proper shape according to link ability of neighbors 1292 * and of character; depends on the order of the shapes 1293 * (isolated, initial, middle, final) in the compatibility area 1294 */ 1295 Shape = shapeTable[nextLink & (LINKR + LINKL)] 1296 [lastLink & (LINKR + LINKL)] 1297 [currLink & (LINKR + LINKL)]; 1298 1299 if ((currLink & (LINKR+LINKL)) == 1) { 1300 Shape &= 1; 1301 } else if(isTashkeelChar(dest[i])) { 1302 if( (lastLink & LINKL) && (nextLink & LINKR) && (tashkeelFlag == 1) && 1303 dest[i] != 0x064C && dest[i] != 0x064D ) 1304 { 1305 Shape = 1; 1306 if( (nextLink&ALEFTYPE) == ALEFTYPE && (lastLink&LAMTYPE) == LAMTYPE ) { 1307 Shape = 0; 1308 } 1309 } 1310 else { 1311 Shape = 0; 1312 } 1313 } 1314 if ((dest[i] ^ 0x0600) < 0x100) { 1315 if ( isTashkeelChar(dest[i]) ){ 1316 if (tashkeelFlag == 2){ 1317 dest[i] = TASHKEEL_SPACE_SUB; 1318 tashkeelFound = 1; 1319 }else { 1320 dest[i] = 0xFE70 + IrrelevantPos[(dest[i] - 0x064B)] + Shape; 1321 } 1322 }else if ((currLink & APRESENT) > 0) { 1323 dest[i] = (UChar)(0xFB50 + (currLink >> 8) + Shape); 1324 }else if ((currLink >> 8) > 0 && (currLink & IRRELEVANT) == 0) { 1325 dest[i] = (UChar)(0xFE70 + (currLink >> 8) + Shape); 1326 } 1327 } 1328 } 1329 1330 /* move one notch forward */ 1331 if ((currLink & IRRELEVANT) == 0) { 1332 prevLink = lastLink; 1333 lastLink = currLink; 1334 lastPos = i; 1335 } 1336 1337 i = i + step; 1338 if (i == Nx) { 1339 currLink = nextLink; 1340 Nx = -2; 1341 } else if(i != iend) { 1342 currLink = getLink(dest[i]); 1343 } 1344 } 1345 destSize = sourceLength; 1346 if ( (lamalef_found != 0 ) || (tashkeelFound != 0) ){ 1347 destSize = handleGeneratedSpaces(dest,sourceLength,destSize,options,pErrorCode); 1348 } 1349 1350 if ( (seenfamFound != 0) || (yehhamzaFound != 0) ) { 1351 destSize = expandCompositChar(dest, sourceLength,destSize,options,pErrorCode, SHAPE_MODE); 1352 } 1353 return destSize; 1354 } 1355 1356 /* 1357 *Name : deShapeUnicode 1358 *Function : Converts an Arabic Unicode buffer in FExx Range into unshaped 1359 * arabic Unicode buffer in 06xx Range 1360 */ 1361 /* BEGIN android-changed */ 1362 static int32_t 1363 deShapeUnicode(UChar *dest, int32_t sourceLength, 1364 int32_t destSize,uint64_t options, 1365 UErrorCode *pErrorCode) { 1366 /* END android-changed */ 1367 int32_t i = 0; 1368 int32_t lamalef_found = 0; 1369 int32_t yehHamzaComposeEnabled = 0; 1370 int32_t seenComposeEnabled = 0; 1371 1372 yehHamzaComposeEnabled = ((options&U_SHAPE_YEHHAMZA_MASK) == U_SHAPE_YEHHAMZA_TWOCELL_NEAR) ? 1 : 0; 1373 seenComposeEnabled = ((options&U_SHAPE_SEEN_MASK) == U_SHAPE_SEEN_TWOCELL_NEAR)? 1 : 0; 1374 1375 /* 1376 *This for loop changes the buffer from the Unicode FE range to 1377 *the Unicode 06 range 1378 */ 1379 1380 for(i = 0; i < sourceLength; i++) { 1381 UChar inputChar = dest[i]; 1382 if ( (inputChar >= 0xFB50) && (inputChar <= 0xFBFF)) { /* FBxx Arabic range */ 1383 UChar c = convertFBto06 [ (inputChar - 0xFB50) ]; 1384 if (c != 0) 1385 dest[i] = c; 1386 } else if( (yehHamzaComposeEnabled == 1) && ((inputChar == HAMZA06_CHAR) || (inputChar == HAMZAFE_CHAR)) 1387 && (i < (sourceLength - 1)) && isAlefMaksouraChar(dest[i+1] )) { 1388 dest[i] = SPACE_CHAR; 1389 dest[i+1] = YEH_HAMZA_CHAR; 1390 } else if ( (seenComposeEnabled == 1) && (isTailChar(inputChar)) && (i< (sourceLength - 1)) 1391 && (isSeenTailFamilyChar(dest[i+1])) ) { 1392 dest[i] = SPACE_CHAR; 1393 } else if (( inputChar >= 0xFE70) && (inputChar <= 0xFEF4 )) { /* FExx Arabic range */ 1394 dest[i] = convertFEto06 [ (inputChar - 0xFE70) ]; 1395 } else { 1396 dest[i] = inputChar ; 1397 } 1398 1399 if( isLamAlefChar(dest[i]) ) 1400 lamalef_found = 1; 1401 } 1402 1403 destSize = sourceLength; 1404 if (lamalef_found != 0){ 1405 destSize = expandCompositChar(dest,sourceLength,destSize,options,pErrorCode,DESHAPE_MODE); 1406 } 1407 return destSize; 1408 } 1409 1410 /* 1411 **************************************** 1412 * u_shapeArabic 1413 **************************************** 1414 */ 1415 1416 /* BEGIN android-changed */ 1417 U_CAPI int32_t U_EXPORT2 1418 u_shapeArabic(const UChar *source, int32_t sourceLength, 1419 UChar *dest, int32_t destCapacity, 1420 uint64_t options, 1421 UErrorCode *pErrorCode) { 1422 /* END android-changed */ 1423 1424 int32_t destLength; 1425 1426 spacesRelativeToTextBeginEnd = 0; 1427 uShapeLamalefBegin = U_SHAPE_LAMALEF_BEGIN; 1428 uShapeLamalefEnd = U_SHAPE_LAMALEF_END; 1429 uShapeTashkeelBegin = U_SHAPE_TASHKEEL_BEGIN; 1430 uShapeTashkeelEnd = U_SHAPE_TASHKEEL_END; 1431 1432 /* usual error checking */ 1433 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 1434 return 0; 1435 } 1436 1437 /* make sure that no reserved options values are used; allow dest==NULL only for preflighting */ 1438 if( source==NULL || sourceLength<-1 || (dest==NULL && destCapacity!=0) || destCapacity<0 || 1439 (((options&U_SHAPE_TASHKEEL_MASK) > 0) && 1440 ((options&U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED) == U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED) ) || 1441 (((options&U_SHAPE_TASHKEEL_MASK) > 0) && 1442 ((options&U_SHAPE_LETTERS_MASK) == U_SHAPE_LETTERS_UNSHAPE)) || 1443 (options&U_SHAPE_DIGIT_TYPE_RESERVED)==U_SHAPE_DIGIT_TYPE_RESERVED || 1444 (options&U_SHAPE_DIGITS_MASK)==U_SHAPE_DIGITS_RESERVED || 1445 ((options&U_SHAPE_LAMALEF_MASK) != U_SHAPE_LAMALEF_RESIZE && 1446 (options&U_SHAPE_AGGREGATE_TASHKEEL_MASK) != 0) || 1447 ((options&U_SHAPE_AGGREGATE_TASHKEEL_MASK) == U_SHAPE_AGGREGATE_TASHKEEL && 1448 (options&U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED) != U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED) 1449 ) 1450 { 1451 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1452 return 0; 1453 } 1454 /* Validate lamalef options */ 1455 if(((options&U_SHAPE_LAMALEF_MASK) > 0)&& 1456 !(((options & U_SHAPE_LAMALEF_MASK)==U_SHAPE_LAMALEF_BEGIN) || 1457 ((options & U_SHAPE_LAMALEF_MASK)==U_SHAPE_LAMALEF_END ) || 1458 ((options & U_SHAPE_LAMALEF_MASK)==U_SHAPE_LAMALEF_RESIZE )|| 1459 ((options & U_SHAPE_LAMALEF_MASK)==U_SHAPE_LAMALEF_AUTO) || 1460 ((options & U_SHAPE_LAMALEF_MASK)==U_SHAPE_LAMALEF_NEAR))) 1461 { 1462 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1463 return 0; 1464 } 1465 /* Validate Tashkeel options */ 1466 if(((options&U_SHAPE_TASHKEEL_MASK) > 0)&& 1467 !(((options & U_SHAPE_TASHKEEL_MASK)==U_SHAPE_TASHKEEL_BEGIN) || 1468 ((options & U_SHAPE_TASHKEEL_MASK)==U_SHAPE_TASHKEEL_END ) 1469 ||((options & U_SHAPE_TASHKEEL_MASK)==U_SHAPE_TASHKEEL_RESIZE )|| 1470 ((options & U_SHAPE_TASHKEEL_MASK)==U_SHAPE_TASHKEEL_REPLACE_BY_TATWEEL))) 1471 { 1472 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1473 return 0; 1474 } 1475 /* determine the source length */ 1476 if(sourceLength==-1) { 1477 sourceLength=u_strlen(source); 1478 } 1479 if(sourceLength<=0) { 1480 return u_terminateUChars(dest, destCapacity, 0, pErrorCode); 1481 } 1482 1483 /* check that source and destination do not overlap */ 1484 if( dest!=NULL && 1485 ((source<=dest && dest<source+sourceLength) || 1486 (dest<=source && source<dest+destCapacity))) { 1487 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1488 return 0; 1489 } 1490 1491 /* Does Options contain the new Seen Tail Unicode code point option */ 1492 if ( (options&U_SHAPE_TAIL_TYPE_MASK) == U_SHAPE_TAIL_NEW_UNICODE){ 1493 tailChar = NEW_TAIL_CHAR; 1494 }else { 1495 tailChar = OLD_TAIL_CHAR; 1496 } 1497 1498 if((options&U_SHAPE_LETTERS_MASK)!=U_SHAPE_LETTERS_NOOP) { 1499 UChar buffer[300]; 1500 UChar *tempbuffer, *tempsource = NULL; 1501 int32_t outputSize, spacesCountl=0, spacesCountr=0; 1502 1503 if((options&U_SHAPE_AGGREGATE_TASHKEEL_MASK)>0) { 1504 int32_t logical_order = (options&U_SHAPE_TEXT_DIRECTION_MASK) == U_SHAPE_TEXT_DIRECTION_LOGICAL; 1505 int32_t aggregate_tashkeel = 1506 (options&(U_SHAPE_AGGREGATE_TASHKEEL_MASK+U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED)) == 1507 (U_SHAPE_AGGREGATE_TASHKEEL+U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED); 1508 int step=logical_order?1:-1; 1509 int j=logical_order?-1:2*sourceLength; 1510 int i=logical_order?-1:sourceLength; 1511 int end=logical_order?sourceLength:-1; 1512 int aggregation_possible = 1; 1513 UChar prev = 0; 1514 UChar prevLink, currLink = 0; 1515 int newSourceLength = 0; 1516 tempsource = (UChar *)uprv_malloc(2*sourceLength*U_SIZEOF_UCHAR); 1517 if(tempsource == NULL) { 1518 *pErrorCode = U_MEMORY_ALLOCATION_ERROR; 1519 return 0; 1520 } 1521 1522 while ((i+=step) != end) { 1523 prevLink = currLink; 1524 currLink = getLink(source[i]); 1525 if (aggregate_tashkeel && ((prevLink|currLink)&COMBINE) == COMBINE && aggregation_possible) { 1526 aggregation_possible = 0; 1527 tempsource[j] = (prev<source[i]?prev:source[i])-0x064C+0xFC5E; 1528 currLink = getLink(tempsource[j]); 1529 } else { 1530 aggregation_possible = 1; 1531 tempsource[j+=step] = source[i]; 1532 prev = source[i]; 1533 newSourceLength++; 1534 } 1535 } 1536 source = tempsource+(logical_order?0:j); 1537 sourceLength = newSourceLength; 1538 } 1539 1540 /* calculate destination size */ 1541 /* TODO: do we ever need to do this pure preflighting? */ 1542 if(((options&U_SHAPE_LAMALEF_MASK)==U_SHAPE_LAMALEF_RESIZE) || 1543 ((options&U_SHAPE_TASHKEEL_MASK)==U_SHAPE_TASHKEEL_RESIZE)) { 1544 outputSize=calculateSize(source,sourceLength,destCapacity,options); 1545 } else { 1546 outputSize=sourceLength; 1547 } 1548 1549 if(outputSize>destCapacity) { 1550 *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 1551 if (tempsource != NULL) uprv_free(tempsource); 1552 return outputSize; 1553 } 1554 1555 /* 1556 * need a temporary buffer of size max(outputSize, sourceLength) 1557 * because at first we copy source->temp 1558 */ 1559 if(sourceLength>outputSize) { 1560 outputSize=sourceLength; 1561 } 1562 1563 /* Start of Arabic letter shaping part */ 1564 if(outputSize<=sizeof(buffer)/U_SIZEOF_UCHAR) { 1565 outputSize=sizeof(buffer)/U_SIZEOF_UCHAR; 1566 tempbuffer=buffer; 1567 } else { 1568 tempbuffer = (UChar *)uprv_malloc(outputSize*U_SIZEOF_UCHAR); 1569 1570 /*Test for NULL*/ 1571 if(tempbuffer == NULL) { 1572 *pErrorCode = U_MEMORY_ALLOCATION_ERROR; 1573 if (tempsource != NULL) uprv_free(tempsource); 1574 return 0; 1575 } 1576 } 1577 uprv_memcpy(tempbuffer, source, sourceLength*U_SIZEOF_UCHAR); 1578 if (tempsource != NULL){ 1579 uprv_free(tempsource); 1580 } 1581 1582 if(sourceLength<outputSize) { 1583 uprv_memset(tempbuffer+sourceLength, 0, (outputSize-sourceLength)*U_SIZEOF_UCHAR); 1584 } 1585 1586 if((options&U_SHAPE_TEXT_DIRECTION_MASK) == U_SHAPE_TEXT_DIRECTION_LOGICAL) { 1587 countSpaces(tempbuffer,sourceLength,options,&spacesCountl,&spacesCountr); 1588 invertBuffer(tempbuffer,sourceLength,options,spacesCountl,spacesCountr); 1589 } 1590 1591 if((options&U_SHAPE_TEXT_DIRECTION_MASK) == U_SHAPE_TEXT_DIRECTION_VISUAL_LTR) { 1592 if((options&U_SHAPE_SPACES_RELATIVE_TO_TEXT_MASK) == U_SHAPE_SPACES_RELATIVE_TO_TEXT_BEGIN_END) { 1593 spacesRelativeToTextBeginEnd = 1; 1594 uShapeLamalefBegin = U_SHAPE_LAMALEF_END; 1595 uShapeLamalefEnd = U_SHAPE_LAMALEF_BEGIN; 1596 1597 uShapeTashkeelBegin = U_SHAPE_TASHKEEL_END; 1598 uShapeTashkeelEnd = U_SHAPE_TASHKEEL_BEGIN; 1599 } 1600 } 1601 1602 switch(options&U_SHAPE_LETTERS_MASK) { 1603 case U_SHAPE_LETTERS_SHAPE : 1604 if( (options&U_SHAPE_TASHKEEL_MASK)> 0 1605 && ((options&U_SHAPE_TASHKEEL_MASK) !=U_SHAPE_TASHKEEL_REPLACE_BY_TATWEEL)) { 1606 /* Call the shaping function with tashkeel flag == 2 for removal of tashkeel */ 1607 destLength = shapeUnicode(tempbuffer,sourceLength,destCapacity,options,pErrorCode,2); 1608 }else { 1609 /* default Call the shaping function with tashkeel flag == 1 */ 1610 destLength = shapeUnicode(tempbuffer,sourceLength,destCapacity,options,pErrorCode,1); 1611 1612 /*After shaping text check if user wants to remove tashkeel and replace it with tatweel*/ 1613 if( (options&U_SHAPE_TASHKEEL_MASK) == U_SHAPE_TASHKEEL_REPLACE_BY_TATWEEL){ 1614 destLength = handleTashkeelWithTatweel(tempbuffer,destLength,destCapacity,options,pErrorCode); 1615 } 1616 } 1617 break; 1618 case U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED : 1619 /* Call the shaping function with tashkeel flag == 0 */ 1620 destLength = shapeUnicode(tempbuffer,sourceLength,destCapacity,options,pErrorCode,0); 1621 break; 1622 1623 case U_SHAPE_LETTERS_UNSHAPE : 1624 /* Call the deshaping function */ 1625 destLength = deShapeUnicode(tempbuffer,sourceLength,destCapacity,options,pErrorCode); 1626 break; 1627 default : 1628 /* will never occur because of validity checks above */ 1629 destLength = 0; 1630 break; 1631 } 1632 1633 /* 1634 * TODO: (markus 2002aug01) 1635 * For as long as we always preflight the outputSize above 1636 * we should U_ASSERT(outputSize==destLength) 1637 * except for the adjustment above before the tempbuffer allocation 1638 */ 1639 1640 if((options&U_SHAPE_TEXT_DIRECTION_MASK) == U_SHAPE_TEXT_DIRECTION_LOGICAL) { 1641 countSpaces(tempbuffer,destLength,options,&spacesCountl,&spacesCountr); 1642 invertBuffer(tempbuffer,destLength,options,spacesCountl,spacesCountr); 1643 } 1644 uprv_memcpy(dest, tempbuffer, uprv_min(destLength, destCapacity)*U_SIZEOF_UCHAR); 1645 1646 if(tempbuffer!=buffer) { 1647 uprv_free(tempbuffer); 1648 } 1649 1650 if(destLength>destCapacity) { 1651 *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 1652 return destLength; 1653 } 1654 1655 /* End of Arabic letter shaping part */ 1656 } else { 1657 /* 1658 * No letter shaping: 1659 * just make sure the destination is large enough and copy the string. 1660 */ 1661 if(destCapacity<sourceLength) { 1662 /* this catches preflighting, too */ 1663 *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 1664 return sourceLength; 1665 } 1666 uprv_memcpy(dest, source, sourceLength*U_SIZEOF_UCHAR); 1667 destLength=sourceLength; 1668 } 1669 1670 /* 1671 * Perform number shaping. 1672 * With UTF-16 or UTF-32, the length of the string is constant. 1673 * The easiest way to do this is to operate on the destination and 1674 * "shape" the digits in-place. 1675 */ 1676 if((options&U_SHAPE_DIGITS_MASK)!=U_SHAPE_DIGITS_NOOP) { 1677 UChar digitBase; 1678 int32_t i; 1679 1680 /* select the requested digit group */ 1681 switch(options&U_SHAPE_DIGIT_TYPE_MASK) { 1682 case U_SHAPE_DIGIT_TYPE_AN: 1683 digitBase=0x660; /* Unicode: "Arabic-Indic digits" */ 1684 break; 1685 case U_SHAPE_DIGIT_TYPE_AN_EXTENDED: 1686 digitBase=0x6f0; /* Unicode: "Eastern Arabic-Indic digits (Persian and Urdu)" */ 1687 break; 1688 default: 1689 /* will never occur because of validity checks above */ 1690 digitBase=0; 1691 break; 1692 } 1693 1694 /* perform the requested operation */ 1695 switch(options&U_SHAPE_DIGITS_MASK) { 1696 case U_SHAPE_DIGITS_EN2AN: 1697 /* add (digitBase-'0') to each European (ASCII) digit code point */ 1698 digitBase-=0x30; 1699 for(i=0; i<destLength; ++i) { 1700 if(((uint32_t)dest[i]-0x30)<10) { 1701 dest[i]+=digitBase; 1702 } 1703 } 1704 break; 1705 case U_SHAPE_DIGITS_AN2EN: 1706 /* subtract (digitBase-'0') from each Arabic digit code point */ 1707 for(i=0; i<destLength; ++i) { 1708 if(((uint32_t)dest[i]-(uint32_t)digitBase)<10) { 1709 dest[i]-=digitBase-0x30; 1710 } 1711 } 1712 break; 1713 case U_SHAPE_DIGITS_ALEN2AN_INIT_LR: 1714 _shapeToArabicDigitsWithContext(dest, destLength, 1715 digitBase, 1716 (UBool)((options&U_SHAPE_TEXT_DIRECTION_MASK)==U_SHAPE_TEXT_DIRECTION_LOGICAL), 1717 FALSE); 1718 break; 1719 case U_SHAPE_DIGITS_ALEN2AN_INIT_AL: 1720 _shapeToArabicDigitsWithContext(dest, destLength, 1721 digitBase, 1722 (UBool)((options&U_SHAPE_TEXT_DIRECTION_MASK)==U_SHAPE_TEXT_DIRECTION_LOGICAL), 1723 TRUE); 1724 break; 1725 default: 1726 /* will never occur because of validity checks above */ 1727 break; 1728 } 1729 } 1730 1731 return u_terminateUChars(dest, destCapacity, destLength, pErrorCode); 1732 } 1733