1 /* 2 ****************************************************************************** 3 * 4 * Copyright (C) 2000-2012, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ****************************************************************************** 8 * file name: ushape.cpp 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2000jun29 14 * created by: Markus W. Scherer 15 * 16 * Arabic letter shaping implemented by Ayman Roshdy 17 */ 18 19 #include "unicode/utypes.h" 20 #include "unicode/uchar.h" 21 #include "unicode/ustring.h" 22 #include "unicode/ushape.h" 23 #include "cmemory.h" 24 #include "putilimp.h" 25 #include "ustr_imp.h" 26 #include "ubidi_props.h" 27 #include "uassert.h" 28 29 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 30 31 /* 32 * This implementation is designed for 16-bit Unicode strings. 33 * The main assumption is that the Arabic characters and their 34 * presentation forms each fit into a single UChar. 35 * With UTF-8, they occupy 2 or 3 bytes, and more than the ASCII 36 * characters. 37 */ 38 39 /* 40 * ### TODO in general for letter shaping: 41 * - the letter shaping code is UTF-16-unaware; needs update 42 * + especially invertBuffer()?! 43 * - needs to handle the "Arabic Tail" that is used in some legacy codepages 44 * as a glyph fragment of wide-glyph letters 45 * + IBM Unicode conversion tables map it to U+200B (ZWSP) 46 * + IBM Egypt has proposed to encode the tail in Unicode among Arabic Presentation Forms 47 */ 48 49 /* definitions for Arabic letter shaping ------------------------------------ */ 50 51 #define IRRELEVANT 4 52 #define LAMTYPE 16 53 #define ALEFTYPE 32 54 #define LINKR 1 55 #define LINKL 2 56 #define APRESENT 8 57 #define SHADDA 64 58 #define CSHADDA 128 59 #define COMBINE (SHADDA+CSHADDA) 60 61 #define HAMZAFE_CHAR 0xfe80 62 #define HAMZA06_CHAR 0x0621 63 #define YEH_HAMZA_CHAR 0x0626 64 #define YEH_HAMZAFE_CHAR 0xFE89 65 #define LAMALEF_SPACE_SUB 0xFFFF 66 #define TASHKEEL_SPACE_SUB 0xFFFE 67 #define NEW_TAIL_CHAR 0xFE73 68 #define OLD_TAIL_CHAR 0x200B 69 #define LAM_CHAR 0x0644 70 #define SPACE_CHAR 0x0020 71 #define SHADDA_CHAR 0xFE7C 72 #define TATWEEL_CHAR 0x0640 73 #define SHADDA_TATWEEL_CHAR 0xFE7D 74 #define SHADDA06_CHAR 0x0651 75 76 #define SHAPE_MODE 0 77 #define DESHAPE_MODE 1 78 79 struct uShapeVariables { 80 UChar tailChar; 81 uint32_t uShapeLamalefBegin; 82 uint32_t uShapeLamalefEnd; 83 uint32_t uShapeTashkeelBegin; 84 uint32_t uShapeTashkeelEnd; 85 int spacesRelativeToTextBeginEnd; 86 }; 87 88 static const uint8_t tailFamilyIsolatedFinal[] = { 89 /* FEB1 */ 1, 90 /* FEB2 */ 1, 91 /* FEB3 */ 0, 92 /* FEB4 */ 0, 93 /* FEB5 */ 1, 94 /* FEB6 */ 1, 95 /* FEB7 */ 0, 96 /* FEB8 */ 0, 97 /* FEB9 */ 1, 98 /* FEBA */ 1, 99 /* FEBB */ 0, 100 /* FEBC */ 0, 101 /* FEBD */ 1, 102 /* FEBE */ 1 103 }; 104 105 static const uint8_t tashkeelMedial[] = { 106 /* FE70 */ 0, 107 /* FE71 */ 1, 108 /* FE72 */ 0, 109 /* FE73 */ 0, 110 /* FE74 */ 0, 111 /* FE75 */ 0, 112 /* FE76 */ 0, 113 /* FE77 */ 1, 114 /* FE78 */ 0, 115 /* FE79 */ 1, 116 /* FE7A */ 0, 117 /* FE7B */ 1, 118 /* FE7C */ 0, 119 /* FE7D */ 1, 120 /* FE7E */ 0, 121 /* FE7F */ 1 122 }; 123 124 static const UChar yehHamzaToYeh[] = 125 { 126 /* isolated*/ 0xFEEF, 127 /* final */ 0xFEF0 128 }; 129 130 static const uint8_t IrrelevantPos[] = { 131 0x0, 0x2, 0x4, 0x6, 132 0x8, 0xA, 0xC, 0xE 133 }; 134 135 136 static const UChar convertLamAlef[] = 137 { 138 /*FEF5*/ 0x0622, 139 /*FEF6*/ 0x0622, 140 /*FEF7*/ 0x0623, 141 /*FEF8*/ 0x0623, 142 /*FEF9*/ 0x0625, 143 /*FEFA*/ 0x0625, 144 /*FEFB*/ 0x0627, 145 /*FEFC*/ 0x0627 146 }; 147 148 static const UChar araLink[178]= 149 { 150 1 + 32 + 256 * 0x11,/*0x0622*/ 151 1 + 32 + 256 * 0x13,/*0x0623*/ 152 1 + 256 * 0x15,/*0x0624*/ 153 1 + 32 + 256 * 0x17,/*0x0625*/ 154 1 + 2 + 256 * 0x19,/*0x0626*/ 155 1 + 32 + 256 * 0x1D,/*0x0627*/ 156 1 + 2 + 256 * 0x1F,/*0x0628*/ 157 1 + 256 * 0x23,/*0x0629*/ 158 1 + 2 + 256 * 0x25,/*0x062A*/ 159 1 + 2 + 256 * 0x29,/*0x062B*/ 160 1 + 2 + 256 * 0x2D,/*0x062C*/ 161 1 + 2 + 256 * 0x31,/*0x062D*/ 162 1 + 2 + 256 * 0x35,/*0x062E*/ 163 1 + 256 * 0x39,/*0x062F*/ 164 1 + 256 * 0x3B,/*0x0630*/ 165 1 + 256 * 0x3D,/*0x0631*/ 166 1 + 256 * 0x3F,/*0x0632*/ 167 1 + 2 + 256 * 0x41,/*0x0633*/ 168 1 + 2 + 256 * 0x45,/*0x0634*/ 169 1 + 2 + 256 * 0x49,/*0x0635*/ 170 1 + 2 + 256 * 0x4D,/*0x0636*/ 171 1 + 2 + 256 * 0x51,/*0x0637*/ 172 1 + 2 + 256 * 0x55,/*0x0638*/ 173 1 + 2 + 256 * 0x59,/*0x0639*/ 174 1 + 2 + 256 * 0x5D,/*0x063A*/ 175 0, 0, 0, 0, 0, /*0x063B-0x063F*/ 176 1 + 2, /*0x0640*/ 177 1 + 2 + 256 * 0x61,/*0x0641*/ 178 1 + 2 + 256 * 0x65,/*0x0642*/ 179 1 + 2 + 256 * 0x69,/*0x0643*/ 180 1 + 2 + 16 + 256 * 0x6D,/*0x0644*/ 181 1 + 2 + 256 * 0x71,/*0x0645*/ 182 1 + 2 + 256 * 0x75,/*0x0646*/ 183 1 + 2 + 256 * 0x79,/*0x0647*/ 184 1 + 256 * 0x7D,/*0x0648*/ 185 1 + 256 * 0x7F,/*0x0649*/ 186 1 + 2 + 256 * 0x81,/*0x064A*/ 187 4 + 256 * 1, /*0x064B*/ 188 4 + 128 + 256 * 1, /*0x064C*/ 189 4 + 128 + 256 * 1, /*0x064D*/ 190 4 + 128 + 256 * 1, /*0x064E*/ 191 4 + 128 + 256 * 1, /*0x064F*/ 192 4 + 128 + 256 * 1, /*0x0650*/ 193 4 + 64 + 256 * 3, /*0x0651*/ 194 4 + 256 * 1, /*0x0652*/ 195 4 + 256 * 7, /*0x0653*/ 196 4 + 256 * 8, /*0x0654*/ 197 4 + 256 * 8, /*0x0655*/ 198 4 + 256 * 1, /*0x0656*/ 199 0, 0, 0, 0, 0, /*0x0657-0x065B*/ 200 1 + 256 * 0x85,/*0x065C*/ 201 1 + 256 * 0x87,/*0x065D*/ 202 1 + 256 * 0x89,/*0x065E*/ 203 1 + 256 * 0x8B,/*0x065F*/ 204 0, 0, 0, 0, 0, /*0x0660-0x0664*/ 205 0, 0, 0, 0, 0, /*0x0665-0x0669*/ 206 0, 0, 0, 0, 0, 0, /*0x066A-0x066F*/ 207 4 + 256 * 6, /*0x0670*/ 208 1 + 8 + 256 * 0x00,/*0x0671*/ 209 1 + 32, /*0x0672*/ 210 1 + 32, /*0x0673*/ 211 0, /*0x0674*/ 212 1 + 32, /*0x0675*/ 213 1, 1, /*0x0676-0x0677*/ 214 1+2, 1+2, 1+2, 1+2, 1+2, 1+2, /*0x0678-0x067D*/ 215 1+2+8+256 * 0x06, 1+2, 1+2, 1+2, 1+2, 1+2, /*0x067E-0x0683*/ 216 1+2, 1+2, 1+2+8+256 * 0x2A, 1+2, /*0x0684-0x0687*/ 217 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*0x0688-0x0691*/ 218 1, 1, 1, 1, 1, 1, 1+8+256 * 0x3A, 1, /*0x0692-0x0699*/ 219 1+2, 1+2, 1+2, 1+2, 1+2, 1+2, /*0x069A-0x06A3*/ 220 1+2, 1+2, 1+2, 1+2, /*0x069A-0x06A3*/ 221 1+2, 1+2, 1+2, 1+2, 1+2, 1+2+8+256 * 0x3E, /*0x06A4-0x06AD*/ 222 1+2, 1+2, 1+2, 1+2, /*0x06A4-0x06AD*/ 223 1+2, 1+2+8+256 * 0x42, 1+2, 1+2, 1+2, 1+2, /*0x06AE-0x06B7*/ 224 1+2, 1+2, 1+2, 1+2, /*0x06AE-0x06B7*/ 225 1+2, 1+2, 1+2, 1+2, 1+2, 1+2, /*0x06B8-0x06BF*/ 226 1+2, 1+2, /*0x06B8-0x06BF*/ 227 1, /*0x06C0*/ 228 1+2, /*0x06C1*/ 229 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*0x06C2-0x06CB*/ 230 1+2+8+256 * 0xAC, /*0x06CC*/ 231 1, /*0x06CD*/ 232 1+2, 1+2, 1+2, 1+2, /*0x06CE-0x06D1*/ 233 1, 1 /*0x06D2-0x06D3*/ 234 }; 235 236 static const uint8_t presALink[] = { 237 /***********0*****1*****2*****3*****4*****5*****6*****7*****8*****9*****A*****B*****C*****D*****E*****F*/ 238 /*FB5*/ 0, 1, 0, 0, 0, 0, 0, 1, 2,1 + 2, 0, 0, 0, 0, 0, 0, 239 /*FB6*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 240 /*FB7*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2,1 + 2, 0, 0, 241 /*FB8*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 242 /*FB9*/ 2,1 + 2, 0, 1, 2,1 + 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 243 /*FBA*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 244 /*FBB*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 245 /*FBC*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 246 /*FBD*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 247 /*FBE*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 248 /*FBF*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2,1 + 2, 249 /*FC0*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 250 /*FC1*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 251 /*FC2*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 252 /*FC3*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 253 /*FC4*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 254 /*FC5*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 255 /*FC6*/ 4, 4, 4 256 }; 257 258 static const uint8_t presBLink[]= 259 { 260 /***********0*****1*****2*****3*****4*****5*****6*****7*****8*****9*****A*****B*****C*****D*****E*****F*/ 261 /*FE7*/1 + 2,1 + 2,1 + 2, 0,1 + 2, 0,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2, 262 /*FE8*/ 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2,1 + 2, 0, 1, 0, 263 /*FE9*/ 1, 2,1 + 2, 0, 1, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2, 264 /*FEA*/1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 0, 1, 0, 1, 0, 265 /*FEB*/ 1, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2, 266 /*FEC*/1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2, 267 /*FED*/1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2, 268 /*FEE*/1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 0, 269 /*FEF*/ 1, 0, 1, 2,1 + 2, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0 270 }; 271 272 static const UChar convertFBto06[] = 273 { 274 /***********0******1******2******3******4******5******6******7******8******9******A******B******C******D******E******F***/ 275 /*FB5*/ 0x671, 0x671, 0, 0, 0, 0, 0x07E, 0x07E, 0x07E, 0x07E, 0, 0, 0, 0, 0, 0, 276 /*FB6*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 277 /*FB7*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x686, 0x686, 0x686, 0x686, 0, 0, 278 /*FB8*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x698, 0x698, 0, 0, 0x6A9, 0x6A9, 279 /*FB9*/ 0x6A9, 0x6A9, 0x6AF, 0x6AF, 0x6AF, 0x6AF, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 280 /*FBA*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 281 /*FBB*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 282 /*FBC*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 283 /*FBD*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 284 /*FBE*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 285 /*FBF*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x6CC, 0x6CC, 0x6CC, 0x6CC 286 }; 287 288 static const UChar convertFEto06[] = 289 { 290 /***********0******1******2******3******4******5******6******7******8******9******A******B******C******D******E******F***/ 291 /*FE7*/ 0x64B, 0x64B, 0x64C, 0x64C, 0x64D, 0x64D, 0x64E, 0x64E, 0x64F, 0x64F, 0x650, 0x650, 0x651, 0x651, 0x652, 0x652, 292 /*FE8*/ 0x621, 0x622, 0x622, 0x623, 0x623, 0x624, 0x624, 0x625, 0x625, 0x626, 0x626, 0x626, 0x626, 0x627, 0x627, 0x628, 293 /*FE9*/ 0x628, 0x628, 0x628, 0x629, 0x629, 0x62A, 0x62A, 0x62A, 0x62A, 0x62B, 0x62B, 0x62B, 0x62B, 0x62C, 0x62C, 0x62C, 294 /*FEA*/ 0x62C, 0x62D, 0x62D, 0x62D, 0x62D, 0x62E, 0x62E, 0x62E, 0x62E, 0x62F, 0x62F, 0x630, 0x630, 0x631, 0x631, 0x632, 295 /*FEB*/ 0x632, 0x633, 0x633, 0x633, 0x633, 0x634, 0x634, 0x634, 0x634, 0x635, 0x635, 0x635, 0x635, 0x636, 0x636, 0x636, 296 /*FEC*/ 0x636, 0x637, 0x637, 0x637, 0x637, 0x638, 0x638, 0x638, 0x638, 0x639, 0x639, 0x639, 0x639, 0x63A, 0x63A, 0x63A, 297 /*FED*/ 0x63A, 0x641, 0x641, 0x641, 0x641, 0x642, 0x642, 0x642, 0x642, 0x643, 0x643, 0x643, 0x643, 0x644, 0x644, 0x644, 298 /*FEE*/ 0x644, 0x645, 0x645, 0x645, 0x645, 0x646, 0x646, 0x646, 0x646, 0x647, 0x647, 0x647, 0x647, 0x648, 0x648, 0x649, 299 /*FEF*/ 0x649, 0x64A, 0x64A, 0x64A, 0x64A, 0x65C, 0x65C, 0x65D, 0x65D, 0x65E, 0x65E, 0x65F, 0x65F 300 }; 301 302 static const uint8_t shapeTable[4][4][4]= 303 { 304 { {0,0,0,0}, {0,0,0,0}, {0,1,0,3}, {0,1,0,1} }, 305 { {0,0,2,2}, {0,0,1,2}, {0,1,1,2}, {0,1,1,3} }, 306 { {0,0,0,0}, {0,0,0,0}, {0,1,0,3}, {0,1,0,3} }, 307 { {0,0,1,2}, {0,0,1,2}, {0,1,1,2}, {0,1,1,3} } 308 }; 309 310 /* 311 * This function shapes European digits to Arabic-Indic digits 312 * in-place, writing over the input characters. 313 * Since we know that we are only looking for BMP code points, 314 * we can safely just work with code units (again, at least UTF-16). 315 */ 316 static void 317 _shapeToArabicDigitsWithContext(UChar *s, int32_t length, 318 UChar digitBase, 319 UBool isLogical, UBool lastStrongWasAL) { 320 const UBiDiProps *bdp; 321 int32_t i; 322 UChar c; 323 324 bdp=ubidi_getSingleton(); 325 digitBase-=0x30; 326 327 /* the iteration direction depends on the type of input */ 328 if(isLogical) { 329 for(i=0; i<length; ++i) { 330 c=s[i]; 331 switch(ubidi_getClass(bdp, c)) { 332 case U_LEFT_TO_RIGHT: /* L */ 333 case U_RIGHT_TO_LEFT: /* R */ 334 lastStrongWasAL=FALSE; 335 break; 336 case U_RIGHT_TO_LEFT_ARABIC: /* AL */ 337 lastStrongWasAL=TRUE; 338 break; 339 case U_EUROPEAN_NUMBER: /* EN */ 340 if(lastStrongWasAL && (uint32_t)(c-0x30)<10) { 341 s[i]=(UChar)(digitBase+c); /* digitBase+(c-0x30) - digitBase was modified above */ 342 } 343 break; 344 default : 345 break; 346 } 347 } 348 } else { 349 for(i=length; i>0; /* pre-decrement in the body */) { 350 c=s[--i]; 351 switch(ubidi_getClass(bdp, c)) { 352 case U_LEFT_TO_RIGHT: /* L */ 353 case U_RIGHT_TO_LEFT: /* R */ 354 lastStrongWasAL=FALSE; 355 break; 356 case U_RIGHT_TO_LEFT_ARABIC: /* AL */ 357 lastStrongWasAL=TRUE; 358 break; 359 case U_EUROPEAN_NUMBER: /* EN */ 360 if(lastStrongWasAL && (uint32_t)(c-0x30)<10) { 361 s[i]=(UChar)(digitBase+c); /* digitBase+(c-0x30) - digitBase was modified above */ 362 } 363 break; 364 default : 365 break; 366 } 367 } 368 } 369 } 370 371 /* 372 *Name : invertBuffer 373 *Function : This function inverts the buffer, it's used 374 * in case the user specifies the buffer to be 375 * U_SHAPE_TEXT_DIRECTION_LOGICAL 376 */ 377 static void 378 invertBuffer(UChar *buffer, int32_t size, uint32_t /*options*/, int32_t lowlimit, int32_t highlimit) { 379 UChar temp; 380 int32_t i=0,j=0; 381 for(i=lowlimit,j=size-highlimit-1;i<j;i++,j--) { 382 temp = buffer[i]; 383 buffer[i] = buffer[j]; 384 buffer[j] = temp; 385 } 386 } 387 388 /* 389 *Name : changeLamAlef 390 *Function : Converts the Alef characters into an equivalent 391 * LamAlef location in the 0x06xx Range, this is an 392 * intermediate stage in the operation of the program 393 * later it'll be converted into the 0xFExx LamAlefs 394 * in the shaping function. 395 */ 396 static inline UChar 397 changeLamAlef(UChar ch) { 398 switch(ch) { 399 case 0x0622 : 400 return 0x065C; 401 case 0x0623 : 402 return 0x065D; 403 case 0x0625 : 404 return 0x065E; 405 case 0x0627 : 406 return 0x065F; 407 } 408 return 0; 409 } 410 411 /* 412 *Name : getLink 413 *Function : Resolves the link between the characters as 414 * Arabic characters have four forms : 415 * Isolated, Initial, Middle and Final Form 416 */ 417 static UChar 418 getLink(UChar ch) { 419 if(ch >= 0x0622 && ch <= 0x06D3) { 420 return(araLink[ch-0x0622]); 421 } else if(ch == 0x200D) { 422 return(3); 423 } else if(ch >= 0x206D && ch <= 0x206F) { 424 return(4); 425 }else if(ch >= 0xFB50 && ch <= 0xFC62) { 426 return(presALink[ch-0xFB50]); 427 } else if(ch >= 0xFE70 && ch <= 0xFEFC) { 428 return(presBLink[ch-0xFE70]); 429 }else { 430 return(0); 431 } 432 } 433 434 /* 435 *Name : countSpaces 436 *Function : Counts the number of spaces 437 * at each end of the logical buffer 438 */ 439 static void 440 countSpaces(UChar *dest, int32_t size, uint32_t /*options*/, int32_t *spacesCountl, int32_t *spacesCountr) { 441 int32_t i = 0; 442 int32_t countl = 0,countr = 0; 443 while((dest[i] == SPACE_CHAR) && (countl < size)) { 444 countl++; 445 i++; 446 } 447 if (countl < size) { /* the entire buffer is not all space */ 448 while(dest[size-1] == SPACE_CHAR) { 449 countr++; 450 size--; 451 } 452 } 453 *spacesCountl = countl; 454 *spacesCountr = countr; 455 } 456 457 /* 458 *Name : isTashkeelChar 459 *Function : Returns 1 for Tashkeel characters in 06 range else return 0 460 */ 461 static inline int32_t 462 isTashkeelChar(UChar ch) { 463 return (int32_t)( ch>=0x064B && ch<= 0x0652 ); 464 } 465 466 /* 467 *Name : isTashkeelCharFE 468 *Function : Returns 1 for Tashkeel characters in FE range else return 0 469 */ 470 static inline int32_t 471 isTashkeelCharFE(UChar ch) { 472 return (int32_t)( ch>=0xFE70 && ch<= 0xFE7F ); 473 } 474 475 /* 476 *Name : isAlefChar 477 *Function : Returns 1 for Alef characters else return 0 478 */ 479 static inline int32_t 480 isAlefChar(UChar ch) { 481 return (int32_t)( (ch==0x0622)||(ch==0x0623)||(ch==0x0625)||(ch==0x0627) ); 482 } 483 484 /* 485 *Name : isLamAlefChar 486 *Function : Returns 1 for LamAlef characters else return 0 487 */ 488 static inline int32_t 489 isLamAlefChar(UChar ch) { 490 return (int32_t)((ch>=0xFEF5)&&(ch<=0xFEFC) ); 491 } 492 493 /*BIDI 494 *Name : isTailChar 495 *Function : returns 1 if the character matches one of the tail characters (0xfe73 or 0x200b) otherwise returns 0 496 */ 497 498 static inline int32_t 499 isTailChar(UChar ch) { 500 if(ch == OLD_TAIL_CHAR || ch == NEW_TAIL_CHAR){ 501 return 1; 502 }else{ 503 return 0; 504 } 505 } 506 507 /*BIDI 508 *Name : isSeenTailFamilyChar 509 *Function : returns 1 if the character is a seen family isolated character 510 * in the FE range otherwise returns 0 511 */ 512 513 static inline int32_t 514 isSeenTailFamilyChar(UChar ch) { 515 if(ch >= 0xfeb1 && ch < 0xfebf){ 516 return tailFamilyIsolatedFinal [ch - 0xFEB1]; 517 }else{ 518 return 0; 519 } 520 } 521 522 /* Name : isSeenFamilyChar 523 * Function : returns 1 if the character is a seen family character in the Unicode 524 * 06 range otherwise returns 0 525 */ 526 527 static inline int32_t 528 isSeenFamilyChar(UChar ch){ 529 if(ch >= 0x633 && ch <= 0x636){ 530 return 1; 531 }else { 532 return 0; 533 } 534 } 535 536 /*Start of BIDI*/ 537 /* 538 *Name : isAlefMaksouraChar 539 *Function : returns 1 if the character is a Alef Maksoura Final or isolated 540 * otherwise returns 0 541 */ 542 static inline int32_t 543 isAlefMaksouraChar(UChar ch) { 544 return (int32_t)( (ch == 0xFEEF) || ( ch == 0xFEF0) || (ch == 0x0649)); 545 } 546 547 /* 548 * Name : isYehHamzaChar 549 * Function : returns 1 if the character is a yehHamza isolated or yehhamza 550 * final is found otherwise returns 0 551 */ 552 static inline int32_t 553 isYehHamzaChar(UChar ch) { 554 if((ch==0xFE89)||(ch==0xFE8A)){ 555 return 1; 556 }else{ 557 return 0; 558 } 559 } 560 561 /* 562 * Name: isTashkeelOnTatweelChar 563 * Function: Checks if the Tashkeel Character is on Tatweel or not,if the 564 * Tashkeel on tatweel (FE range), it returns 1 else if the 565 * Tashkeel with shadda on tatweel (FC range)return 2 otherwise 566 * returns 0 567 */ 568 static inline int32_t 569 isTashkeelOnTatweelChar(UChar ch){ 570 if(ch >= 0xfe70 && ch <= 0xfe7f && ch != NEW_TAIL_CHAR && ch != 0xFE75 && ch != SHADDA_TATWEEL_CHAR) 571 { 572 return tashkeelMedial [ch - 0xFE70]; 573 }else if( (ch >= 0xfcf2 && ch <= 0xfcf4) || (ch == SHADDA_TATWEEL_CHAR)) { 574 return 2; 575 }else{ 576 return 0; 577 } 578 } 579 580 /* 581 * Name: isIsolatedTashkeelChar 582 * Function: Checks if the Tashkeel Character is in the isolated form 583 * (i.e. Unicode FE range) returns 1 else if the Tashkeel 584 * with shadda is in the isolated form (i.e. Unicode FC range) 585 * returns 2 otherwise returns 0 586 */ 587 static inline int32_t 588 isIsolatedTashkeelChar(UChar ch){ 589 if(ch >= 0xfe70 && ch <= 0xfe7f && ch != NEW_TAIL_CHAR && ch != 0xFE75){ 590 return (1 - tashkeelMedial [ch - 0xFE70]); 591 }else if(ch >= 0xfc5e && ch <= 0xfc63){ 592 return 1; 593 }else{ 594 return 0; 595 } 596 } 597 598 599 600 601 /* 602 *Name : calculateSize 603 *Function : This function calculates the destSize to be used in preflighting 604 * when the destSize is equal to 0 605 * It is used also to calculate the new destsize in case the 606 * destination buffer will be resized. 607 */ 608 609 static int32_t 610 calculateSize(const UChar *source, int32_t sourceLength, 611 int32_t destSize,uint32_t options) { 612 int32_t i = 0; 613 614 int lamAlefOption = 0; 615 int tashkeelOption = 0; 616 617 destSize = sourceLength; 618 619 if (((options&U_SHAPE_LETTERS_MASK) == U_SHAPE_LETTERS_SHAPE || 620 ((options&U_SHAPE_LETTERS_MASK) == U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED )) && 621 ((options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_RESIZE )){ 622 lamAlefOption = 1; 623 } 624 if((options&U_SHAPE_LETTERS_MASK) == U_SHAPE_LETTERS_SHAPE && 625 ((options&U_SHAPE_TASHKEEL_MASK) == U_SHAPE_TASHKEEL_RESIZE ) ){ 626 tashkeelOption = 1; 627 } 628 629 if(lamAlefOption || tashkeelOption){ 630 if((options&U_SHAPE_TEXT_DIRECTION_MASK)==U_SHAPE_TEXT_DIRECTION_VISUAL_LTR) { 631 for(i=0;i<sourceLength;i++) { 632 if( ((isAlefChar(source[i]))&& (i<(sourceLength-1)) &&(source[i+1] == LAM_CHAR)) || (isTashkeelCharFE(source[i])) ) { 633 destSize--; 634 } 635 } 636 }else if((options&U_SHAPE_TEXT_DIRECTION_MASK)==U_SHAPE_TEXT_DIRECTION_LOGICAL) { 637 for(i=0;i<sourceLength;i++) { 638 if( ( (source[i] == LAM_CHAR) && (i<(sourceLength-1)) && (isAlefChar(source[i+1]))) || (isTashkeelCharFE(source[i])) ) { 639 destSize--; 640 } 641 } 642 } 643 } 644 645 if ((options&U_SHAPE_LETTERS_MASK) == U_SHAPE_LETTERS_UNSHAPE){ 646 if ( (options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_RESIZE){ 647 for(i=0;i<sourceLength;i++) { 648 if(isLamAlefChar(source[i])) 649 destSize++; 650 } 651 } 652 } 653 654 return destSize; 655 } 656 657 /* 658 *Name : handleTashkeelWithTatweel 659 *Function : Replaces Tashkeel as following: 660 * Case 1 :if the Tashkeel on tatweel, replace it with Tatweel. 661 * Case 2 :if the Tashkeel aggregated with Shadda on Tatweel, replace 662 * it with Shadda on Tatweel. 663 * Case 3: if the Tashkeel is isolated replace it with Space. 664 * 665 */ 666 static int32_t 667 handleTashkeelWithTatweel(UChar *dest, int32_t sourceLength, 668 int32_t /*destSize*/, uint32_t /*options*/, 669 UErrorCode * /*pErrorCode*/) { 670 int i; 671 for(i = 0; i < sourceLength; i++){ 672 if((isTashkeelOnTatweelChar(dest[i]) == 1)){ 673 dest[i] = TATWEEL_CHAR; 674 }else if((isTashkeelOnTatweelChar(dest[i]) == 2)){ 675 dest[i] = SHADDA_TATWEEL_CHAR; 676 }else if(isIsolatedTashkeelChar(dest[i]) && dest[i] != SHADDA_CHAR){ 677 dest[i] = SPACE_CHAR; 678 } 679 } 680 return sourceLength; 681 } 682 683 684 685 /* 686 *Name : handleGeneratedSpaces 687 *Function : The shapeUnicode function converts Lam + Alef into LamAlef + space, 688 * and Tashkeel to space. 689 * handleGeneratedSpaces function puts these generated spaces 690 * according to the options the user specifies. LamAlef and Tashkeel 691 * spaces can be replaced at begin, at end, at near or decrease the 692 * buffer size. 693 * 694 * There is also Auto option for LamAlef and tashkeel, which will put 695 * the spaces at end of the buffer (or end of text if the user used 696 * the option U_SHAPE_SPACES_RELATIVE_TO_TEXT_BEGIN_END). 697 * 698 * If the text type was visual_LTR and the option 699 * U_SHAPE_SPACES_RELATIVE_TO_TEXT_BEGIN_END was selected the END 700 * option will place the space at the beginning of the buffer and 701 * BEGIN will place the space at the end of the buffer. 702 */ 703 704 static int32_t 705 handleGeneratedSpaces(UChar *dest, int32_t sourceLength, 706 int32_t destSize, 707 /* BEGIN android-changed */ 708 uint64_t options, 709 /* END android-changed */ 710 UErrorCode *pErrorCode,struct uShapeVariables shapeVars ) { 711 712 int32_t i = 0, j = 0; 713 int32_t count = 0; 714 UChar *tempbuffer=NULL; 715 716 int lamAlefOption = 0; 717 int tashkeelOption = 0; 718 int shapingMode = SHAPE_MODE; 719 720 if (shapingMode == 0){ 721 if ( (options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_RESIZE ){ 722 lamAlefOption = 1; 723 } 724 if ( (options&U_SHAPE_TASHKEEL_MASK) == U_SHAPE_TASHKEEL_RESIZE ){ 725 tashkeelOption = 1; 726 } 727 } 728 729 tempbuffer = (UChar *)uprv_malloc((sourceLength+1)*U_SIZEOF_UCHAR); 730 /* Test for NULL */ 731 if(tempbuffer == NULL) { 732 *pErrorCode = U_MEMORY_ALLOCATION_ERROR; 733 return 0; 734 } 735 736 737 if (lamAlefOption || tashkeelOption){ 738 uprv_memset(tempbuffer, 0, (sourceLength+1)*U_SIZEOF_UCHAR); 739 740 i = j = 0; count = 0; 741 while(i < sourceLength) { 742 if ( (lamAlefOption && dest[i] == LAMALEF_SPACE_SUB) || 743 (tashkeelOption && dest[i] == TASHKEEL_SPACE_SUB) ){ 744 j--; 745 count++; 746 } else { 747 tempbuffer[j] = dest[i]; 748 } 749 i++; 750 j++; 751 } 752 753 while(count >= 0) { 754 tempbuffer[i] = 0x0000; 755 i--; 756 count--; 757 } 758 759 uprv_memcpy(dest, tempbuffer, sourceLength*U_SIZEOF_UCHAR); 760 destSize = u_strlen(dest); 761 } 762 763 lamAlefOption = 0; 764 765 if (shapingMode == 0){ 766 /* BEGIN android-changed */ 767 if ( (options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_NEAR && 768 (options&U_SHAPE_X_LAMALEF_SUB_ALTERNATE) == 0) { /* if set, leave LAMALEF_SPACE_SUB in the output */ 769 /* END android-changed */ 770 lamAlefOption = 1; 771 } 772 } 773 774 if (lamAlefOption){ 775 /* Lam+Alef is already shaped into LamAlef + FFFF */ 776 i = 0; 777 while(i < sourceLength) { 778 if(lamAlefOption&&dest[i] == LAMALEF_SPACE_SUB){ 779 dest[i] = SPACE_CHAR; 780 } 781 i++; 782 } 783 destSize = sourceLength; 784 } 785 lamAlefOption = 0; 786 tashkeelOption = 0; 787 788 if (shapingMode == 0) { 789 if ( ((options&U_SHAPE_LAMALEF_MASK) == shapeVars.uShapeLamalefBegin) || 790 (((options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_AUTO ) 791 && (shapeVars.spacesRelativeToTextBeginEnd==1)) ) { 792 lamAlefOption = 1; 793 } 794 if ( (options&U_SHAPE_TASHKEEL_MASK) == shapeVars.uShapeTashkeelBegin ) { 795 tashkeelOption = 1; 796 } 797 } 798 799 if(lamAlefOption || tashkeelOption){ 800 uprv_memset(tempbuffer, 0, (sourceLength+1)*U_SIZEOF_UCHAR); 801 802 i = j = sourceLength; count = 0; 803 804 while(i >= 0) { 805 if ( (lamAlefOption && dest[i] == LAMALEF_SPACE_SUB) || 806 (tashkeelOption && dest[i] == TASHKEEL_SPACE_SUB) ){ 807 j++; 808 count++; 809 }else { 810 tempbuffer[j] = dest[i]; 811 } 812 i--; 813 j--; 814 } 815 816 for(i=0 ;i < count; i++){ 817 tempbuffer[i] = SPACE_CHAR; 818 } 819 820 uprv_memcpy(dest, tempbuffer, sourceLength*U_SIZEOF_UCHAR); 821 destSize = sourceLength; 822 } 823 824 825 826 lamAlefOption = 0; 827 tashkeelOption = 0; 828 829 if (shapingMode == 0) { 830 if ( ((options&U_SHAPE_LAMALEF_MASK) == shapeVars.uShapeLamalefEnd) || 831 (((options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_AUTO ) 832 && (shapeVars.spacesRelativeToTextBeginEnd==0)) ) { 833 lamAlefOption = 1; 834 } 835 if ( (options&U_SHAPE_TASHKEEL_MASK) == shapeVars.uShapeTashkeelEnd ){ 836 tashkeelOption = 1; 837 } 838 } 839 840 if(lamAlefOption || tashkeelOption){ 841 uprv_memset(tempbuffer, 0, (sourceLength+1)*U_SIZEOF_UCHAR); 842 843 i = j = 0; count = 0; 844 while(i < sourceLength) { 845 if ( (lamAlefOption && dest[i] == LAMALEF_SPACE_SUB) || 846 (tashkeelOption && dest[i] == TASHKEEL_SPACE_SUB) ){ 847 j--; 848 count++; 849 }else { 850 tempbuffer[j] = dest[i]; 851 } 852 i++; 853 j++; 854 } 855 856 while(count >= 0) { 857 tempbuffer[i] = SPACE_CHAR; 858 i--; 859 count--; 860 } 861 862 uprv_memcpy(dest,tempbuffer, sourceLength*U_SIZEOF_UCHAR); 863 destSize = sourceLength; 864 } 865 866 867 if(tempbuffer){ 868 uprv_free(tempbuffer); 869 } 870 871 return destSize; 872 } 873 874 /* 875 *Name :expandCompositCharAtBegin 876 *Function :Expands the LamAlef character to Lam and Alef consuming the required 877 * space from beginning of the buffer. If the text type was visual_LTR 878 * and the option U_SHAPE_SPACES_RELATIVE_TO_TEXT_BEGIN_END was selected 879 * the spaces will be located at end of buffer. 880 * If there are no spaces to expand the LamAlef, an error 881 * will be set to U_NO_SPACE_AVAILABLE as defined in utypes.h 882 */ 883 884 static int32_t 885 expandCompositCharAtBegin(UChar *dest, int32_t sourceLength, int32_t destSize,UErrorCode *pErrorCode) { 886 int32_t i = 0,j = 0; 887 int32_t countl = 0; 888 UChar *tempbuffer=NULL; 889 890 tempbuffer = (UChar *)uprv_malloc((sourceLength+1)*U_SIZEOF_UCHAR); 891 892 /* Test for NULL */ 893 if(tempbuffer == NULL) { 894 *pErrorCode = U_MEMORY_ALLOCATION_ERROR; 895 return 0; 896 } 897 898 uprv_memset(tempbuffer, 0, (sourceLength+1)*U_SIZEOF_UCHAR); 899 900 i = 0; 901 while(dest[i] == SPACE_CHAR) { 902 countl++; 903 i++; 904 } 905 906 i = j = sourceLength-1; 907 908 while(i >= 0 && j >= 0) { 909 if( countl>0 && isLamAlefChar(dest[i])) { 910 tempbuffer[j] = LAM_CHAR; 911 /* to ensure the array index is within the range */ 912 U_ASSERT(dest[i] >= 0xFEF5u 913 && dest[i]-0xFEF5u < sizeof(convertLamAlef)/sizeof(convertLamAlef[0])); 914 tempbuffer[j-1] = convertLamAlef[ dest[i] - 0xFEF5 ]; 915 j--; 916 countl--; 917 }else { 918 if( countl == 0 && isLamAlefChar(dest[i]) ) { 919 *pErrorCode=U_NO_SPACE_AVAILABLE; 920 } 921 tempbuffer[j] = dest[i]; 922 } 923 i--; 924 j--; 925 } 926 uprv_memcpy(dest, tempbuffer, sourceLength*U_SIZEOF_UCHAR); 927 928 uprv_free(tempbuffer); 929 930 destSize = sourceLength; 931 return destSize; 932 } 933 934 /* 935 *Name : expandCompositCharAtEnd 936 *Function : Expands the LamAlef character to Lam and Alef consuming the 937 * required space from end of the buffer. If the text type was 938 * Visual LTR and the option U_SHAPE_SPACES_RELATIVE_TO_TEXT_BEGIN_END 939 * was used, the spaces will be consumed from begin of buffer. If 940 * there are no spaces to expand the LamAlef, an error 941 * will be set to U_NO_SPACE_AVAILABLE as defined in utypes.h 942 */ 943 944 static int32_t 945 expandCompositCharAtEnd(UChar *dest, int32_t sourceLength, int32_t destSize,UErrorCode *pErrorCode) { 946 int32_t i = 0,j = 0; 947 948 int32_t countr = 0; 949 int32_t inpsize = sourceLength; 950 951 UChar *tempbuffer=NULL; 952 tempbuffer = (UChar *)uprv_malloc((sourceLength+1)*U_SIZEOF_UCHAR); 953 954 /* Test for NULL */ 955 if(tempbuffer == NULL) { 956 *pErrorCode = U_MEMORY_ALLOCATION_ERROR; 957 return 0; 958 } 959 960 uprv_memset(tempbuffer, 0, (sourceLength+1)*U_SIZEOF_UCHAR); 961 962 while(dest[inpsize-1] == SPACE_CHAR) { 963 countr++; 964 inpsize--; 965 } 966 967 i = sourceLength - countr - 1; 968 j = sourceLength - 1; 969 970 while(i >= 0 && j >= 0) { 971 if( countr>0 && isLamAlefChar(dest[i]) ) { 972 tempbuffer[j] = LAM_CHAR; 973 tempbuffer[j-1] = convertLamAlef[ dest[i] - 0xFEF5 ]; 974 j--; 975 countr--; 976 }else { 977 if ((countr == 0) && isLamAlefChar(dest[i]) ) { 978 *pErrorCode=U_NO_SPACE_AVAILABLE; 979 } 980 tempbuffer[j] = dest[i]; 981 } 982 i--; 983 j--; 984 } 985 986 if(countr > 0) { 987 uprv_memmove(tempbuffer, tempbuffer+countr, sourceLength*U_SIZEOF_UCHAR); 988 if(u_strlen(tempbuffer) < sourceLength) { 989 for(i=sourceLength-1;i>=sourceLength-countr;i--) { 990 tempbuffer[i] = SPACE_CHAR; 991 } 992 } 993 } 994 uprv_memcpy(dest, tempbuffer, sourceLength*U_SIZEOF_UCHAR); 995 996 uprv_free(tempbuffer); 997 998 destSize = sourceLength; 999 return destSize; 1000 } 1001 1002 /* 1003 *Name : expandCompositCharAtNear 1004 *Function : Expands the LamAlef character into Lam + Alef, YehHamza character 1005 * into Yeh + Hamza, SeenFamily character into SeenFamily character 1006 * + Tail, while consuming the space next to the character. 1007 * If there are no spaces next to the character, an error 1008 * will be set to U_NO_SPACE_AVAILABLE as defined in utypes.h 1009 */ 1010 1011 static int32_t 1012 expandCompositCharAtNear(UChar *dest, int32_t sourceLength, int32_t destSize,UErrorCode *pErrorCode, 1013 int yehHamzaOption, int seenTailOption, int lamAlefOption, struct uShapeVariables shapeVars) { 1014 int32_t i = 0; 1015 1016 1017 UChar lamalefChar, yehhamzaChar; 1018 1019 for(i = 0 ;i<=sourceLength-1;i++) { 1020 if (seenTailOption && isSeenTailFamilyChar(dest[i])) { 1021 if ((i>0) && (dest[i-1] == SPACE_CHAR) ) { 1022 dest[i-1] = shapeVars.tailChar; 1023 }else { 1024 *pErrorCode=U_NO_SPACE_AVAILABLE; 1025 } 1026 }else if(yehHamzaOption && (isYehHamzaChar(dest[i])) ) { 1027 if ((i>0) && (dest[i-1] == SPACE_CHAR) ) { 1028 yehhamzaChar = dest[i]; 1029 dest[i] = yehHamzaToYeh[yehhamzaChar - YEH_HAMZAFE_CHAR]; 1030 dest[i-1] = HAMZAFE_CHAR; 1031 }else { 1032 1033 *pErrorCode=U_NO_SPACE_AVAILABLE; 1034 } 1035 }else if(lamAlefOption && isLamAlefChar(dest[i+1])) { 1036 if(dest[i] == SPACE_CHAR){ 1037 lamalefChar = dest[i+1]; 1038 dest[i+1] = LAM_CHAR; 1039 dest[i] = convertLamAlef[ lamalefChar - 0xFEF5 ]; 1040 }else { 1041 *pErrorCode=U_NO_SPACE_AVAILABLE; 1042 } 1043 } 1044 } 1045 destSize = sourceLength; 1046 return destSize; 1047 } 1048 /* 1049 * Name : expandCompositChar 1050 * Function : LamAlef, need special handling, since it expands from one 1051 * character into two characters while shaping or deshaping. 1052 * In order to expand it, near or far spaces according to the 1053 * options user specifies. Also buffer size can be increased. 1054 * 1055 * For SeenFamily characters and YehHamza only the near option is 1056 * supported, while for LamAlef we can take spaces from begin, end, 1057 * near or even increase the buffer size. 1058 * There is also the Auto option for LamAlef only, which will first 1059 * search for a space at end, begin then near, respectively. 1060 * If there are no spaces to expand these characters, an error will be set to 1061 * U_NO_SPACE_AVAILABLE as defined in utypes.h 1062 */ 1063 1064 static int32_t 1065 expandCompositChar(UChar *dest, int32_t sourceLength, 1066 int32_t destSize,uint32_t options, 1067 UErrorCode *pErrorCode, int shapingMode,struct uShapeVariables shapeVars) { 1068 1069 int32_t i = 0,j = 0; 1070 1071 UChar *tempbuffer=NULL; 1072 int yehHamzaOption = 0; 1073 int seenTailOption = 0; 1074 int lamAlefOption = 0; 1075 1076 if (shapingMode == 1){ 1077 if ( (options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_AUTO){ 1078 1079 if(shapeVars.spacesRelativeToTextBeginEnd == 0) { 1080 destSize = expandCompositCharAtEnd(dest, sourceLength, destSize, pErrorCode); 1081 1082 if(*pErrorCode == U_NO_SPACE_AVAILABLE) { 1083 *pErrorCode = U_ZERO_ERROR; 1084 destSize = expandCompositCharAtBegin(dest, sourceLength, destSize, pErrorCode); 1085 } 1086 }else { 1087 destSize = expandCompositCharAtBegin(dest, sourceLength, destSize, pErrorCode); 1088 1089 if(*pErrorCode == U_NO_SPACE_AVAILABLE) { 1090 *pErrorCode = U_ZERO_ERROR; 1091 destSize = expandCompositCharAtEnd(dest, sourceLength, destSize, pErrorCode); 1092 } 1093 } 1094 1095 if(*pErrorCode == U_NO_SPACE_AVAILABLE) { 1096 *pErrorCode = U_ZERO_ERROR; 1097 destSize = expandCompositCharAtNear(dest, sourceLength, destSize, pErrorCode, yehHamzaOption, 1098 seenTailOption, 1,shapeVars); 1099 } 1100 } 1101 } 1102 1103 if (shapingMode == 1){ 1104 if ( (options&U_SHAPE_LAMALEF_MASK) == shapeVars.uShapeLamalefEnd){ 1105 destSize = expandCompositCharAtEnd(dest, sourceLength, destSize, pErrorCode); 1106 } 1107 } 1108 1109 if (shapingMode == 1){ 1110 if ( (options&U_SHAPE_LAMALEF_MASK) == shapeVars.uShapeLamalefBegin){ 1111 destSize = expandCompositCharAtBegin(dest, sourceLength, destSize, pErrorCode); 1112 } 1113 } 1114 1115 if (shapingMode == 0){ 1116 if ((options&U_SHAPE_YEHHAMZA_MASK) == U_SHAPE_YEHHAMZA_TWOCELL_NEAR){ 1117 yehHamzaOption = 1; 1118 } 1119 if ((options&U_SHAPE_SEEN_MASK) == U_SHAPE_SEEN_TWOCELL_NEAR){ 1120 seenTailOption = 1; 1121 } 1122 } 1123 if (shapingMode == 1) { 1124 if ( (options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_NEAR) { 1125 lamAlefOption = 1; 1126 } 1127 } 1128 1129 1130 if (yehHamzaOption || seenTailOption || lamAlefOption){ 1131 destSize = expandCompositCharAtNear(dest, sourceLength, destSize, pErrorCode, yehHamzaOption, 1132 seenTailOption,lamAlefOption,shapeVars); 1133 } 1134 1135 1136 if (shapingMode == 1){ 1137 if ( (options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_RESIZE){ 1138 destSize = calculateSize(dest,sourceLength,destSize,options); 1139 tempbuffer = (UChar *)uprv_malloc((destSize+1)*U_SIZEOF_UCHAR); 1140 1141 /* Test for NULL */ 1142 if(tempbuffer == NULL) { 1143 *pErrorCode = U_MEMORY_ALLOCATION_ERROR; 1144 return 0; 1145 } 1146 1147 uprv_memset(tempbuffer, 0, (destSize+1)*U_SIZEOF_UCHAR); 1148 1149 i = j = 0; 1150 while(i < destSize && j < destSize) { 1151 if(isLamAlefChar(dest[i]) ) { 1152 tempbuffer[j] = convertLamAlef[ dest[i] - 0xFEF5 ]; 1153 tempbuffer[j+1] = LAM_CHAR; 1154 j++; 1155 }else { 1156 tempbuffer[j] = dest[i]; 1157 } 1158 i++; 1159 j++; 1160 } 1161 1162 uprv_memcpy(dest, tempbuffer, destSize*U_SIZEOF_UCHAR); 1163 } 1164 } 1165 1166 if(tempbuffer) { 1167 uprv_free(tempbuffer); 1168 } 1169 return destSize; 1170 } 1171 1172 /* 1173 *Name : shapeUnicode 1174 *Function : Converts an Arabic Unicode buffer in 06xx Range into a shaped 1175 * arabic Unicode buffer in FExx Range 1176 */ 1177 static int32_t 1178 shapeUnicode(UChar *dest, int32_t sourceLength, 1179 int32_t destSize,uint32_t options, 1180 UErrorCode *pErrorCode, 1181 int tashkeelFlag, struct uShapeVariables shapeVars) { 1182 1183 int32_t i, iend; 1184 int32_t step; 1185 int32_t lastPos,Nx, Nw; 1186 unsigned int Shape; 1187 int32_t lamalef_found = 0; 1188 int32_t seenfamFound = 0, yehhamzaFound =0, tashkeelFound = 0; 1189 UChar prevLink = 0, lastLink = 0, currLink, nextLink = 0; 1190 UChar wLamalef; 1191 1192 /* 1193 * Converts the input buffer from FExx Range into 06xx Range 1194 * to make sure that all characters are in the 06xx range 1195 * even the lamalef is converted to the special region in 1196 * the 06xx range 1197 */ 1198 if ((options & U_SHAPE_PRESERVE_PRESENTATION_MASK) == U_SHAPE_PRESERVE_PRESENTATION_NOOP) { 1199 for (i = 0; i < sourceLength; i++) { 1200 UChar inputChar = dest[i]; 1201 if ( (inputChar >= 0xFB50) && (inputChar <= 0xFBFF)) { 1202 UChar c = convertFBto06 [ (inputChar - 0xFB50) ]; 1203 if (c != 0) 1204 dest[i] = c; 1205 } else if ( (inputChar >= 0xFE70) && (inputChar <= 0xFEFC)) { 1206 dest[i] = convertFEto06 [ (inputChar - 0xFE70) ] ; 1207 } else { 1208 dest[i] = inputChar ; 1209 } 1210 } 1211 } 1212 1213 1214 /* sets the index to the end of the buffer, together with the step point to -1 */ 1215 i = sourceLength - 1; 1216 iend = -1; 1217 step = -1; 1218 1219 /* 1220 * This function resolves the link between the characters . 1221 * Arabic characters have four forms : 1222 * Isolated Form, Initial Form, Middle Form and Final Form 1223 */ 1224 currLink = getLink(dest[i]); 1225 1226 lastPos = i; 1227 Nx = -2, Nw = 0; 1228 1229 while (i != iend) { 1230 /* If high byte of currLink > 0 then more than one shape */ 1231 if ((currLink & 0xFF00) > 0 || (getLink(dest[i]) & IRRELEVANT) != 0) { 1232 Nw = i + step; 1233 while (Nx < 0) { /* we need to know about next char */ 1234 if(Nw == iend) { 1235 nextLink = 0; 1236 Nx = 3000; 1237 } else { 1238 nextLink = getLink(dest[Nw]); 1239 if((nextLink & IRRELEVANT) == 0) { 1240 Nx = Nw; 1241 } else { 1242 Nw = Nw + step; 1243 } 1244 } 1245 } 1246 1247 if ( ((currLink & ALEFTYPE) > 0) && ((lastLink & LAMTYPE) > 0) ) { 1248 lamalef_found = 1; 1249 wLamalef = changeLamAlef(dest[i]); /*get from 0x065C-0x065f */ 1250 if ( wLamalef != 0) { 1251 dest[i] = LAMALEF_SPACE_SUB; /* The default case is to drop the Alef and replace */ 1252 dest[lastPos] =wLamalef; /* it by LAMALEF_SPACE_SUB which is the last character in the */ 1253 i=lastPos; /* unicode private use area, this is done to make */ 1254 } /* sure that removeLamAlefSpaces() handles only the */ 1255 lastLink = prevLink; /* spaces generated during lamalef generation. */ 1256 currLink = getLink(wLamalef); /* LAMALEF_SPACE_SUB is added here and is replaced by spaces */ 1257 } /* in removeLamAlefSpaces() */ 1258 1259 if ((i > 0) && (dest[i-1] == SPACE_CHAR)){ 1260 if ( isSeenFamilyChar(dest[i])) { 1261 seenfamFound = 1; 1262 } else if (dest[i] == YEH_HAMZA_CHAR) { 1263 yehhamzaFound = 1; 1264 } 1265 } 1266 else if(i==0){ 1267 if ( isSeenFamilyChar(dest[i])){ 1268 seenfamFound = 1; 1269 } else if (dest[i] == YEH_HAMZA_CHAR) { 1270 yehhamzaFound = 1; 1271 } 1272 } 1273 1274 /* 1275 * get the proper shape according to link ability of neighbors 1276 * and of character; depends on the order of the shapes 1277 * (isolated, initial, middle, final) in the compatibility area 1278 */ 1279 Shape = shapeTable[nextLink & (LINKR + LINKL)] 1280 [lastLink & (LINKR + LINKL)] 1281 [currLink & (LINKR + LINKL)]; 1282 1283 if ((currLink & (LINKR+LINKL)) == 1) { 1284 Shape &= 1; 1285 } else if(isTashkeelChar(dest[i])) { 1286 if( (lastLink & LINKL) && (nextLink & LINKR) && (tashkeelFlag == 1) && 1287 dest[i] != 0x064C && dest[i] != 0x064D ) 1288 { 1289 Shape = 1; 1290 if( (nextLink&ALEFTYPE) == ALEFTYPE && (lastLink&LAMTYPE) == LAMTYPE ) { 1291 Shape = 0; 1292 } 1293 } else if(tashkeelFlag == 2 && dest[i] == SHADDA06_CHAR){ 1294 Shape = 1; 1295 } else { 1296 Shape = 0; 1297 } 1298 } 1299 if ((dest[i] ^ 0x0600) < 0x100) { 1300 if ( isTashkeelChar(dest[i]) ){ 1301 if (tashkeelFlag == 2 && dest[i] != SHADDA06_CHAR){ 1302 dest[i] = TASHKEEL_SPACE_SUB; 1303 tashkeelFound = 1; 1304 } else { 1305 /* to ensure the array index is within the range */ 1306 U_ASSERT(dest[i] >= 0x064Bu 1307 && dest[i]-0x064Bu < sizeof(IrrelevantPos)/sizeof(IrrelevantPos[0])); 1308 dest[i] = 0xFE70 + IrrelevantPos[(dest[i] - 0x064B)] + Shape; 1309 } 1310 }else if ((currLink & APRESENT) > 0) { 1311 dest[i] = (UChar)(0xFB50 + (currLink >> 8) + Shape); 1312 }else if ((currLink >> 8) > 0 && (currLink & IRRELEVANT) == 0) { 1313 dest[i] = (UChar)(0xFE70 + (currLink >> 8) + Shape); 1314 } 1315 } 1316 } 1317 1318 /* move one notch forward */ 1319 if ((currLink & IRRELEVANT) == 0) { 1320 prevLink = lastLink; 1321 lastLink = currLink; 1322 lastPos = i; 1323 } 1324 1325 i = i + step; 1326 if (i == Nx) { 1327 currLink = nextLink; 1328 Nx = -2; 1329 } else if(i != iend) { 1330 currLink = getLink(dest[i]); 1331 } 1332 } 1333 destSize = sourceLength; 1334 if ( (lamalef_found != 0 ) || (tashkeelFound != 0) ){ 1335 destSize = handleGeneratedSpaces(dest,sourceLength,destSize,options,pErrorCode, shapeVars); 1336 } 1337 1338 if ( (seenfamFound != 0) || (yehhamzaFound != 0) ) { 1339 destSize = expandCompositChar(dest, sourceLength,destSize,options,pErrorCode, SHAPE_MODE,shapeVars); 1340 } 1341 return destSize; 1342 } 1343 1344 /* 1345 *Name : deShapeUnicode 1346 *Function : Converts an Arabic Unicode buffer in FExx Range into unshaped 1347 * arabic Unicode buffer in 06xx Range 1348 */ 1349 static int32_t 1350 deShapeUnicode(UChar *dest, int32_t sourceLength, 1351 int32_t destSize,uint32_t options, 1352 UErrorCode *pErrorCode, struct uShapeVariables shapeVars) { 1353 int32_t i = 0; 1354 int32_t lamalef_found = 0; 1355 int32_t yehHamzaComposeEnabled = 0; 1356 int32_t seenComposeEnabled = 0; 1357 1358 yehHamzaComposeEnabled = ((options&U_SHAPE_YEHHAMZA_MASK) == U_SHAPE_YEHHAMZA_TWOCELL_NEAR) ? 1 : 0; 1359 seenComposeEnabled = ((options&U_SHAPE_SEEN_MASK) == U_SHAPE_SEEN_TWOCELL_NEAR)? 1 : 0; 1360 1361 /* 1362 *This for loop changes the buffer from the Unicode FE range to 1363 *the Unicode 06 range 1364 */ 1365 1366 for(i = 0; i < sourceLength; i++) { 1367 UChar inputChar = dest[i]; 1368 if ( (inputChar >= 0xFB50) && (inputChar <= 0xFBFF)) { /* FBxx Arabic range */ 1369 UChar c = convertFBto06 [ (inputChar - 0xFB50) ]; 1370 if (c != 0) 1371 dest[i] = c; 1372 } else if( (yehHamzaComposeEnabled == 1) && ((inputChar == HAMZA06_CHAR) || (inputChar == HAMZAFE_CHAR)) 1373 && (i < (sourceLength - 1)) && isAlefMaksouraChar(dest[i+1] )) { 1374 dest[i] = SPACE_CHAR; 1375 dest[i+1] = YEH_HAMZA_CHAR; 1376 } else if ( (seenComposeEnabled == 1) && (isTailChar(inputChar)) && (i< (sourceLength - 1)) 1377 && (isSeenTailFamilyChar(dest[i+1])) ) { 1378 dest[i] = SPACE_CHAR; 1379 } else if (( inputChar >= 0xFE70) && (inputChar <= 0xFEF4 )) { /* FExx Arabic range */ 1380 dest[i] = convertFEto06 [ (inputChar - 0xFE70) ]; 1381 } else { 1382 dest[i] = inputChar ; 1383 } 1384 1385 if( isLamAlefChar(dest[i]) ) 1386 lamalef_found = 1; 1387 } 1388 1389 destSize = sourceLength; 1390 if (lamalef_found != 0){ 1391 destSize = expandCompositChar(dest,sourceLength,destSize,options,pErrorCode,DESHAPE_MODE, shapeVars); 1392 } 1393 return destSize; 1394 } 1395 1396 /* 1397 **************************************** 1398 * u_shapeArabic 1399 **************************************** 1400 */ 1401 1402 /* BEGIN android-changed */ 1403 U_CAPI int32_t U_EXPORT2 1404 u_shapeArabic(const UChar *source, int32_t sourceLength, 1405 UChar *dest, int32_t destCapacity, 1406 uint64_t options, 1407 UErrorCode *pErrorCode) { 1408 /* END android-changed */ 1409 1410 int32_t destLength; 1411 struct uShapeVariables shapeVars = { OLD_TAIL_CHAR,U_SHAPE_LAMALEF_BEGIN,U_SHAPE_LAMALEF_END,U_SHAPE_TASHKEEL_BEGIN,U_SHAPE_TASHKEEL_END,0}; 1412 1413 /* usual error checking */ 1414 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 1415 return 0; 1416 } 1417 1418 /* make sure that no reserved options values are used; allow dest==NULL only for preflighting */ 1419 if( source==NULL || sourceLength<-1 || (dest==NULL && destCapacity!=0) || destCapacity<0 || 1420 (((options&U_SHAPE_TASHKEEL_MASK) > 0) && 1421 ((options&U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED) == U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED) ) || 1422 (((options&U_SHAPE_TASHKEEL_MASK) > 0) && 1423 ((options&U_SHAPE_LETTERS_MASK) == U_SHAPE_LETTERS_UNSHAPE)) || 1424 (options&U_SHAPE_DIGIT_TYPE_RESERVED)==U_SHAPE_DIGIT_TYPE_RESERVED || 1425 (options&U_SHAPE_DIGITS_MASK)==U_SHAPE_DIGITS_RESERVED || 1426 ((options&U_SHAPE_LAMALEF_MASK) != U_SHAPE_LAMALEF_RESIZE && 1427 (options&U_SHAPE_AGGREGATE_TASHKEEL_MASK) != 0) || 1428 ((options&U_SHAPE_AGGREGATE_TASHKEEL_MASK) == U_SHAPE_AGGREGATE_TASHKEEL && 1429 (options&U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED) != U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED) 1430 ) 1431 { 1432 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1433 return 0; 1434 } 1435 /* Validate lamalef options */ 1436 if(((options&U_SHAPE_LAMALEF_MASK) > 0)&& 1437 !(((options & U_SHAPE_LAMALEF_MASK)==U_SHAPE_LAMALEF_BEGIN) || 1438 ((options & U_SHAPE_LAMALEF_MASK)==U_SHAPE_LAMALEF_END ) || 1439 ((options & U_SHAPE_LAMALEF_MASK)==U_SHAPE_LAMALEF_RESIZE )|| 1440 ((options & U_SHAPE_LAMALEF_MASK)==U_SHAPE_LAMALEF_AUTO) || 1441 ((options & U_SHAPE_LAMALEF_MASK)==U_SHAPE_LAMALEF_NEAR))) 1442 { 1443 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1444 return 0; 1445 } 1446 /* Validate Tashkeel options */ 1447 if(((options&U_SHAPE_TASHKEEL_MASK) > 0)&& 1448 !(((options & U_SHAPE_TASHKEEL_MASK)==U_SHAPE_TASHKEEL_BEGIN) || 1449 ((options & U_SHAPE_TASHKEEL_MASK)==U_SHAPE_TASHKEEL_END ) 1450 ||((options & U_SHAPE_TASHKEEL_MASK)==U_SHAPE_TASHKEEL_RESIZE )|| 1451 ((options & U_SHAPE_TASHKEEL_MASK)==U_SHAPE_TASHKEEL_REPLACE_BY_TATWEEL))) 1452 { 1453 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1454 return 0; 1455 } 1456 /* determine the source length */ 1457 if(sourceLength==-1) { 1458 sourceLength=u_strlen(source); 1459 } 1460 if(sourceLength<=0) { 1461 return u_terminateUChars(dest, destCapacity, 0, pErrorCode); 1462 } 1463 1464 /* check that source and destination do not overlap */ 1465 if( dest!=NULL && 1466 ((source<=dest && dest<source+sourceLength) || 1467 (dest<=source && source<dest+destCapacity))) { 1468 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1469 return 0; 1470 } 1471 1472 /* Does Options contain the new Seen Tail Unicode code point option */ 1473 if ( (options&U_SHAPE_TAIL_TYPE_MASK) == U_SHAPE_TAIL_NEW_UNICODE){ 1474 shapeVars.tailChar = NEW_TAIL_CHAR; 1475 }else { 1476 shapeVars.tailChar = OLD_TAIL_CHAR; 1477 } 1478 1479 if((options&U_SHAPE_LETTERS_MASK)!=U_SHAPE_LETTERS_NOOP) { 1480 UChar buffer[300]; 1481 UChar *tempbuffer, *tempsource = NULL; 1482 int32_t outputSize, spacesCountl=0, spacesCountr=0; 1483 1484 if((options&U_SHAPE_AGGREGATE_TASHKEEL_MASK)>0) { 1485 int32_t logical_order = (options&U_SHAPE_TEXT_DIRECTION_MASK) == U_SHAPE_TEXT_DIRECTION_LOGICAL; 1486 int32_t aggregate_tashkeel = 1487 (options&(U_SHAPE_AGGREGATE_TASHKEEL_MASK+U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED)) == 1488 (U_SHAPE_AGGREGATE_TASHKEEL+U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED); 1489 int step=logical_order?1:-1; 1490 int j=logical_order?-1:2*sourceLength; 1491 int i=logical_order?-1:sourceLength; 1492 int end=logical_order?sourceLength:-1; 1493 int aggregation_possible = 1; 1494 UChar prev = 0; 1495 UChar prevLink, currLink = 0; 1496 int newSourceLength = 0; 1497 tempsource = (UChar *)uprv_malloc(2*sourceLength*U_SIZEOF_UCHAR); 1498 if(tempsource == NULL) { 1499 *pErrorCode = U_MEMORY_ALLOCATION_ERROR; 1500 return 0; 1501 } 1502 1503 while ((i+=step) != end) { 1504 prevLink = currLink; 1505 currLink = getLink(source[i]); 1506 if (aggregate_tashkeel && ((prevLink|currLink)&COMBINE) == COMBINE && aggregation_possible) { 1507 aggregation_possible = 0; 1508 tempsource[j] = (prev<source[i]?prev:source[i])-0x064C+0xFC5E; 1509 currLink = getLink(tempsource[j]); 1510 } else { 1511 aggregation_possible = 1; 1512 tempsource[j+=step] = source[i]; 1513 prev = source[i]; 1514 newSourceLength++; 1515 } 1516 } 1517 source = tempsource+(logical_order?0:j); 1518 sourceLength = newSourceLength; 1519 } 1520 1521 /* calculate destination size */ 1522 /* TODO: do we ever need to do this pure preflighting? */ 1523 if(((options&U_SHAPE_LAMALEF_MASK)==U_SHAPE_LAMALEF_RESIZE) || 1524 ((options&U_SHAPE_TASHKEEL_MASK)==U_SHAPE_TASHKEEL_RESIZE)) { 1525 outputSize=calculateSize(source,sourceLength,destCapacity,options); 1526 } else { 1527 outputSize=sourceLength; 1528 } 1529 1530 if(outputSize>destCapacity) { 1531 *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 1532 if (tempsource != NULL) uprv_free(tempsource); 1533 return outputSize; 1534 } 1535 1536 /* 1537 * need a temporary buffer of size max(outputSize, sourceLength) 1538 * because at first we copy source->temp 1539 */ 1540 if(sourceLength>outputSize) { 1541 outputSize=sourceLength; 1542 } 1543 1544 /* Start of Arabic letter shaping part */ 1545 if(outputSize<=LENGTHOF(buffer)) { 1546 outputSize=LENGTHOF(buffer); 1547 tempbuffer=buffer; 1548 } else { 1549 tempbuffer = (UChar *)uprv_malloc(outputSize*U_SIZEOF_UCHAR); 1550 1551 /*Test for NULL*/ 1552 if(tempbuffer == NULL) { 1553 *pErrorCode = U_MEMORY_ALLOCATION_ERROR; 1554 if (tempsource != NULL) uprv_free(tempsource); 1555 return 0; 1556 } 1557 } 1558 uprv_memcpy(tempbuffer, source, sourceLength*U_SIZEOF_UCHAR); 1559 if (tempsource != NULL){ 1560 uprv_free(tempsource); 1561 } 1562 1563 if(sourceLength<outputSize) { 1564 uprv_memset(tempbuffer+sourceLength, 0, (outputSize-sourceLength)*U_SIZEOF_UCHAR); 1565 } 1566 1567 if((options&U_SHAPE_TEXT_DIRECTION_MASK) == U_SHAPE_TEXT_DIRECTION_LOGICAL) { 1568 countSpaces(tempbuffer,sourceLength,options,&spacesCountl,&spacesCountr); 1569 invertBuffer(tempbuffer,sourceLength,options,spacesCountl,spacesCountr); 1570 } 1571 1572 if((options&U_SHAPE_TEXT_DIRECTION_MASK) == U_SHAPE_TEXT_DIRECTION_VISUAL_LTR) { 1573 if((options&U_SHAPE_SPACES_RELATIVE_TO_TEXT_MASK) == U_SHAPE_SPACES_RELATIVE_TO_TEXT_BEGIN_END) { 1574 shapeVars.spacesRelativeToTextBeginEnd = 1; 1575 shapeVars.uShapeLamalefBegin = U_SHAPE_LAMALEF_END; 1576 shapeVars.uShapeLamalefEnd = U_SHAPE_LAMALEF_BEGIN; 1577 shapeVars.uShapeTashkeelBegin = U_SHAPE_TASHKEEL_END; 1578 shapeVars.uShapeTashkeelEnd = U_SHAPE_TASHKEEL_BEGIN; 1579 } 1580 } 1581 1582 switch(options&U_SHAPE_LETTERS_MASK) { 1583 case U_SHAPE_LETTERS_SHAPE : 1584 if( (options&U_SHAPE_TASHKEEL_MASK)> 0 1585 && ((options&U_SHAPE_TASHKEEL_MASK) !=U_SHAPE_TASHKEEL_REPLACE_BY_TATWEEL)) { 1586 /* Call the shaping function with tashkeel flag == 2 for removal of tashkeel */ 1587 destLength = shapeUnicode(tempbuffer,sourceLength,destCapacity,options,pErrorCode,2,shapeVars); 1588 }else { 1589 /* default Call the shaping function with tashkeel flag == 1 */ 1590 destLength = shapeUnicode(tempbuffer,sourceLength,destCapacity,options,pErrorCode,1,shapeVars); 1591 1592 /*After shaping text check if user wants to remove tashkeel and replace it with tatweel*/ 1593 if( (options&U_SHAPE_TASHKEEL_MASK) == U_SHAPE_TASHKEEL_REPLACE_BY_TATWEEL){ 1594 destLength = handleTashkeelWithTatweel(tempbuffer,destLength,destCapacity,options,pErrorCode); 1595 } 1596 } 1597 break; 1598 case U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED : 1599 /* Call the shaping function with tashkeel flag == 0 */ 1600 destLength = shapeUnicode(tempbuffer,sourceLength,destCapacity,options,pErrorCode,0,shapeVars); 1601 break; 1602 1603 case U_SHAPE_LETTERS_UNSHAPE : 1604 /* Call the deshaping function */ 1605 destLength = deShapeUnicode(tempbuffer,sourceLength,destCapacity,options,pErrorCode,shapeVars); 1606 break; 1607 default : 1608 /* will never occur because of validity checks above */ 1609 destLength = 0; 1610 break; 1611 } 1612 1613 /* 1614 * TODO: (markus 2002aug01) 1615 * For as long as we always preflight the outputSize above 1616 * we should U_ASSERT(outputSize==destLength) 1617 * except for the adjustment above before the tempbuffer allocation 1618 */ 1619 1620 if((options&U_SHAPE_TEXT_DIRECTION_MASK) == U_SHAPE_TEXT_DIRECTION_LOGICAL) { 1621 countSpaces(tempbuffer,destLength,options,&spacesCountl,&spacesCountr); 1622 invertBuffer(tempbuffer,destLength,options,spacesCountl,spacesCountr); 1623 } 1624 uprv_memcpy(dest, tempbuffer, uprv_min(destLength, destCapacity)*U_SIZEOF_UCHAR); 1625 1626 if(tempbuffer!=buffer) { 1627 uprv_free(tempbuffer); 1628 } 1629 1630 if(destLength>destCapacity) { 1631 *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 1632 return destLength; 1633 } 1634 1635 /* End of Arabic letter shaping part */ 1636 } else { 1637 /* 1638 * No letter shaping: 1639 * just make sure the destination is large enough and copy the string. 1640 */ 1641 if(destCapacity<sourceLength) { 1642 /* this catches preflighting, too */ 1643 *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 1644 return sourceLength; 1645 } 1646 uprv_memcpy(dest, source, sourceLength*U_SIZEOF_UCHAR); 1647 destLength=sourceLength; 1648 } 1649 1650 /* 1651 * Perform number shaping. 1652 * With UTF-16 or UTF-32, the length of the string is constant. 1653 * The easiest way to do this is to operate on the destination and 1654 * "shape" the digits in-place. 1655 */ 1656 if((options&U_SHAPE_DIGITS_MASK)!=U_SHAPE_DIGITS_NOOP) { 1657 UChar digitBase; 1658 int32_t i; 1659 1660 /* select the requested digit group */ 1661 switch(options&U_SHAPE_DIGIT_TYPE_MASK) { 1662 case U_SHAPE_DIGIT_TYPE_AN: 1663 digitBase=0x660; /* Unicode: "Arabic-Indic digits" */ 1664 break; 1665 case U_SHAPE_DIGIT_TYPE_AN_EXTENDED: 1666 digitBase=0x6f0; /* Unicode: "Eastern Arabic-Indic digits (Persian and Urdu)" */ 1667 break; 1668 default: 1669 /* will never occur because of validity checks above */ 1670 digitBase=0; 1671 break; 1672 } 1673 1674 /* perform the requested operation */ 1675 switch(options&U_SHAPE_DIGITS_MASK) { 1676 case U_SHAPE_DIGITS_EN2AN: 1677 /* add (digitBase-'0') to each European (ASCII) digit code point */ 1678 digitBase-=0x30; 1679 for(i=0; i<destLength; ++i) { 1680 if(((uint32_t)dest[i]-0x30)<10) { 1681 dest[i]+=digitBase; 1682 } 1683 } 1684 break; 1685 case U_SHAPE_DIGITS_AN2EN: 1686 /* subtract (digitBase-'0') from each Arabic digit code point */ 1687 for(i=0; i<destLength; ++i) { 1688 if(((uint32_t)dest[i]-(uint32_t)digitBase)<10) { 1689 dest[i]-=digitBase-0x30; 1690 } 1691 } 1692 break; 1693 case U_SHAPE_DIGITS_ALEN2AN_INIT_LR: 1694 _shapeToArabicDigitsWithContext(dest, destLength, 1695 digitBase, 1696 (UBool)((options&U_SHAPE_TEXT_DIRECTION_MASK)==U_SHAPE_TEXT_DIRECTION_LOGICAL), 1697 FALSE); 1698 break; 1699 case U_SHAPE_DIGITS_ALEN2AN_INIT_AL: 1700 _shapeToArabicDigitsWithContext(dest, destLength, 1701 digitBase, 1702 (UBool)((options&U_SHAPE_TEXT_DIRECTION_MASK)==U_SHAPE_TEXT_DIRECTION_LOGICAL), 1703 TRUE); 1704 break; 1705 default: 1706 /* will never occur because of validity checks above */ 1707 break; 1708 } 1709 } 1710 1711 return u_terminateUChars(dest, destCapacity, destLength, pErrorCode); 1712 } 1713