1 /* 2 ****************************************************************************** 3 * 4 * Copyright (C) 2000-2014, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ****************************************************************************** 8 * file name: ushape.cpp 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2000jun29 14 * created by: Markus W. Scherer 15 * 16 * Arabic letter shaping implemented by Ayman Roshdy 17 */ 18 19 #include "unicode/utypes.h" 20 #include "unicode/uchar.h" 21 #include "unicode/ustring.h" 22 #include "unicode/ushape.h" 23 #include "cmemory.h" 24 #include "putilimp.h" 25 #include "ustr_imp.h" 26 #include "ubidi_props.h" 27 #include "uassert.h" 28 29 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 30 31 /* 32 * This implementation is designed for 16-bit Unicode strings. 33 * The main assumption is that the Arabic characters and their 34 * presentation forms each fit into a single UChar. 35 * With UTF-8, they occupy 2 or 3 bytes, and more than the ASCII 36 * characters. 37 */ 38 39 /* 40 * ### TODO in general for letter shaping: 41 * - the letter shaping code is UTF-16-unaware; needs update 42 * + especially invertBuffer()?! 43 * - needs to handle the "Arabic Tail" that is used in some legacy codepages 44 * as a glyph fragment of wide-glyph letters 45 * + IBM Unicode conversion tables map it to U+200B (ZWSP) 46 * + IBM Egypt has proposed to encode the tail in Unicode among Arabic Presentation Forms 47 * + Unicode 3.2 added U+FE73 ARABIC TAIL FRAGMENT 48 */ 49 50 /* definitions for Arabic letter shaping ------------------------------------ */ 51 52 #define IRRELEVANT 4 53 #define LAMTYPE 16 54 #define ALEFTYPE 32 55 #define LINKR 1 56 #define LINKL 2 57 #define APRESENT 8 58 #define SHADDA 64 59 #define CSHADDA 128 60 #define COMBINE (SHADDA+CSHADDA) 61 62 #define HAMZAFE_CHAR 0xfe80 63 #define HAMZA06_CHAR 0x0621 64 #define YEH_HAMZA_CHAR 0x0626 65 #define YEH_HAMZAFE_CHAR 0xFE89 66 #define LAMALEF_SPACE_SUB 0xFFFF 67 #define TASHKEEL_SPACE_SUB 0xFFFE 68 #define NEW_TAIL_CHAR 0xFE73 69 #define OLD_TAIL_CHAR 0x200B 70 #define LAM_CHAR 0x0644 71 #define SPACE_CHAR 0x0020 72 #define SHADDA_CHAR 0xFE7C 73 #define TATWEEL_CHAR 0x0640 74 #define SHADDA_TATWEEL_CHAR 0xFE7D 75 #define SHADDA06_CHAR 0x0651 76 77 #define SHAPE_MODE 0 78 #define DESHAPE_MODE 1 79 80 struct uShapeVariables { 81 UChar tailChar; 82 uint32_t uShapeLamalefBegin; 83 uint32_t uShapeLamalefEnd; 84 uint32_t uShapeTashkeelBegin; 85 uint32_t uShapeTashkeelEnd; 86 int spacesRelativeToTextBeginEnd; 87 }; 88 89 static const uint8_t tailFamilyIsolatedFinal[] = { 90 /* FEB1 */ 1, 91 /* FEB2 */ 1, 92 /* FEB3 */ 0, 93 /* FEB4 */ 0, 94 /* FEB5 */ 1, 95 /* FEB6 */ 1, 96 /* FEB7 */ 0, 97 /* FEB8 */ 0, 98 /* FEB9 */ 1, 99 /* FEBA */ 1, 100 /* FEBB */ 0, 101 /* FEBC */ 0, 102 /* FEBD */ 1, 103 /* FEBE */ 1 104 }; 105 106 static const uint8_t tashkeelMedial[] = { 107 /* FE70 */ 0, 108 /* FE71 */ 1, 109 /* FE72 */ 0, 110 /* FE73 */ 0, 111 /* FE74 */ 0, 112 /* FE75 */ 0, 113 /* FE76 */ 0, 114 /* FE77 */ 1, 115 /* FE78 */ 0, 116 /* FE79 */ 1, 117 /* FE7A */ 0, 118 /* FE7B */ 1, 119 /* FE7C */ 0, 120 /* FE7D */ 1, 121 /* FE7E */ 0, 122 /* FE7F */ 1 123 }; 124 125 static const UChar yehHamzaToYeh[] = 126 { 127 /* isolated*/ 0xFEEF, 128 /* final */ 0xFEF0 129 }; 130 131 static const uint8_t IrrelevantPos[] = { 132 0x0, 0x2, 0x4, 0x6, 133 0x8, 0xA, 0xC, 0xE 134 }; 135 136 137 static const UChar convertLamAlef[] = 138 { 139 /*FEF5*/ 0x0622, 140 /*FEF6*/ 0x0622, 141 /*FEF7*/ 0x0623, 142 /*FEF8*/ 0x0623, 143 /*FEF9*/ 0x0625, 144 /*FEFA*/ 0x0625, 145 /*FEFB*/ 0x0627, 146 /*FEFC*/ 0x0627 147 }; 148 149 static const UChar araLink[178]= 150 { 151 1 + 32 + 256 * 0x11,/*0x0622*/ 152 1 + 32 + 256 * 0x13,/*0x0623*/ 153 1 + 256 * 0x15,/*0x0624*/ 154 1 + 32 + 256 * 0x17,/*0x0625*/ 155 1 + 2 + 256 * 0x19,/*0x0626*/ 156 1 + 32 + 256 * 0x1D,/*0x0627*/ 157 1 + 2 + 256 * 0x1F,/*0x0628*/ 158 1 + 256 * 0x23,/*0x0629*/ 159 1 + 2 + 256 * 0x25,/*0x062A*/ 160 1 + 2 + 256 * 0x29,/*0x062B*/ 161 1 + 2 + 256 * 0x2D,/*0x062C*/ 162 1 + 2 + 256 * 0x31,/*0x062D*/ 163 1 + 2 + 256 * 0x35,/*0x062E*/ 164 1 + 256 * 0x39,/*0x062F*/ 165 1 + 256 * 0x3B,/*0x0630*/ 166 1 + 256 * 0x3D,/*0x0631*/ 167 1 + 256 * 0x3F,/*0x0632*/ 168 1 + 2 + 256 * 0x41,/*0x0633*/ 169 1 + 2 + 256 * 0x45,/*0x0634*/ 170 1 + 2 + 256 * 0x49,/*0x0635*/ 171 1 + 2 + 256 * 0x4D,/*0x0636*/ 172 1 + 2 + 256 * 0x51,/*0x0637*/ 173 1 + 2 + 256 * 0x55,/*0x0638*/ 174 1 + 2 + 256 * 0x59,/*0x0639*/ 175 1 + 2 + 256 * 0x5D,/*0x063A*/ 176 0, 0, 0, 0, 0, /*0x063B-0x063F*/ 177 1 + 2, /*0x0640*/ 178 1 + 2 + 256 * 0x61,/*0x0641*/ 179 1 + 2 + 256 * 0x65,/*0x0642*/ 180 1 + 2 + 256 * 0x69,/*0x0643*/ 181 1 + 2 + 16 + 256 * 0x6D,/*0x0644*/ 182 1 + 2 + 256 * 0x71,/*0x0645*/ 183 1 + 2 + 256 * 0x75,/*0x0646*/ 184 1 + 2 + 256 * 0x79,/*0x0647*/ 185 1 + 256 * 0x7D,/*0x0648*/ 186 1 + 256 * 0x7F,/*0x0649*/ 187 1 + 2 + 256 * 0x81,/*0x064A*/ 188 4 + 256 * 1, /*0x064B*/ 189 4 + 128 + 256 * 1, /*0x064C*/ 190 4 + 128 + 256 * 1, /*0x064D*/ 191 4 + 128 + 256 * 1, /*0x064E*/ 192 4 + 128 + 256 * 1, /*0x064F*/ 193 4 + 128 + 256 * 1, /*0x0650*/ 194 4 + 64 + 256 * 3, /*0x0651*/ 195 4 + 256 * 1, /*0x0652*/ 196 4 + 256 * 7, /*0x0653*/ 197 4 + 256 * 8, /*0x0654*/ 198 4 + 256 * 8, /*0x0655*/ 199 4 + 256 * 1, /*0x0656*/ 200 0, 0, 0, 0, 0, /*0x0657-0x065B*/ 201 1 + 256 * 0x85,/*0x065C*/ 202 1 + 256 * 0x87,/*0x065D*/ 203 1 + 256 * 0x89,/*0x065E*/ 204 1 + 256 * 0x8B,/*0x065F*/ 205 0, 0, 0, 0, 0, /*0x0660-0x0664*/ 206 0, 0, 0, 0, 0, /*0x0665-0x0669*/ 207 0, 0, 0, 0, 0, 0, /*0x066A-0x066F*/ 208 4 + 256 * 6, /*0x0670*/ 209 1 + 8 + 256 * 0x00,/*0x0671*/ 210 1 + 32, /*0x0672*/ 211 1 + 32, /*0x0673*/ 212 0, /*0x0674*/ 213 1 + 32, /*0x0675*/ 214 1, 1, /*0x0676-0x0677*/ 215 1 + 2, /*0x0678*/ 216 1 + 2 + 8 + 256 * 0x16,/*0x0679*/ 217 1 + 2 + 8 + 256 * 0x0E,/*0x067A*/ 218 1 + 2 + 8 + 256 * 0x02,/*0x067B*/ 219 1+2, 1+2, /*0x67C-0x067D*/ 220 1+2+8+256 * 0x06, 1+2, 1+2, 1+2, 1+2, 1+2, /*0x067E-0x0683*/ 221 1+2, 1+2, 1+2+8+256 * 0x2A, 1+2, /*0x0684-0x0687*/ 222 1 + 8 + 256 * 0x38,/*0x0688*/ 223 1, 1, 1, /*0x0689-0x068B*/ 224 1 + 8 + 256 * 0x34,/*0x068C*/ 225 1 + 8 + 256 * 0x32,/*0x068D*/ 226 1 + 8 + 256 * 0x36,/*0x068E*/ 227 1, 1, /*0x068F-0x0690*/ 228 1 + 8 + 256 * 0x3C,/*0x0691*/ 229 1, 1, 1, 1, 1, 1, 1+8+256 * 0x3A, 1, /*0x0692-0x0699*/ 230 1+2, 1+2, 1+2, 1+2, 1+2, 1+2, /*0x069A-0x06A3*/ 231 1+2, 1+2, 1+2, 1+2, /*0x069A-0x06A3*/ 232 1+2, 1+2, 1+2, 1+2, 1+2, 1+2+8+256 * 0x3E, /*0x06A4-0x06AD*/ 233 1+2, 1+2, 1+2, 1+2, /*0x06A4-0x06AD*/ 234 1+2, 1+2+8+256 * 0x42, 1+2, 1+2, 1+2, 1+2, /*0x06AE-0x06B7*/ 235 1+2, 1+2, 1+2, 1+2, /*0x06AE-0x06B7*/ 236 1+2, 1+2, /*0x06B8-0x06B9*/ 237 1 + 8 + 256 * 0x4E,/*0x06BA*/ 238 1 + 2 + 8 + 256 * 0x50,/*0x06BB*/ 239 1+2, 1+2, /*0x06BC-0x06BD*/ 240 1 + 2 + 8 + 256 * 0x5A,/*0x06BE*/ 241 1+2, /*0x06BF*/ 242 1 + 8 + 256 * 0x54,/*0x06C0*/ 243 1 + 2 + 8 + 256 * 0x56,/*0x06C1*/ 244 1, 1, 1, /*0x06C2-0x06C4*/ 245 1 + 8 + 256 * 0x90,/*0x06C5*/ 246 1 + 8 + 256 * 0x89,/*0x06C6*/ 247 1 + 8 + 256 * 0x87,/*0x06C7*/ 248 1 + 8 + 256 * 0x8B,/*0x06C8*/ 249 1 + 8 + 256 * 0x92,/*0x06C9*/ 250 1, /*0x06CA*/ 251 1 + 8 + 256 * 0x8E,/*0x06CB*/ 252 1 + 2 + 8 + 256 * 0xAC,/*0x06CC*/ 253 1, /*0x06CD*/ 254 1+2, 1+2, /*0x06CE-0x06CF*/ 255 1 + 2 + 8 + 256 * 0x94,/*0x06D0*/ 256 1+2, /*0x06D1*/ 257 1 + 8 + 256 * 0x5E,/*0x06D2*/ 258 1 + 8 + 256 * 0x60 /*0x06D3*/ 259 }; 260 261 static const uint8_t presALink[] = { 262 /***********0*****1*****2*****3*****4*****5*****6*****7*****8*****9*****A*****B*****C*****D*****E*****F*/ 263 /*FB5*/ 0, 1, 0, 0, 0, 0, 0, 1, 2,1 + 2, 0, 0, 0, 0, 0, 0, 264 /*FB6*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 265 /*FB7*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2,1 + 2, 0, 0, 266 /*FB8*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 267 /*FB9*/ 2,1 + 2, 0, 1, 2,1 + 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 268 /*FBA*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 269 /*FBB*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 270 /*FBC*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 271 /*FBD*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 272 /*FBE*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 273 /*FBF*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2,1 + 2, 274 /*FC0*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 275 /*FC1*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 276 /*FC2*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 277 /*FC3*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 278 /*FC4*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 279 /*FC5*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 280 /*FC6*/ 4, 4, 4 281 }; 282 283 static const uint8_t presBLink[]= 284 { 285 /***********0*****1*****2*****3*****4*****5*****6*****7*****8*****9*****A*****B*****C*****D*****E*****F*/ 286 /*FE7*/1 + 2,1 + 2,1 + 2, 0,1 + 2, 0,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2, 287 /*FE8*/ 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2,1 + 2, 0, 1, 0, 288 /*FE9*/ 1, 2,1 + 2, 0, 1, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2, 289 /*FEA*/1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 0, 1, 0, 1, 0, 290 /*FEB*/ 1, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2, 291 /*FEC*/1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2, 292 /*FED*/1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2, 293 /*FEE*/1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 0, 294 /*FEF*/ 1, 0, 1, 2,1 + 2, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0 295 }; 296 297 static const UChar convertFBto06[] = 298 { 299 /***********0******1******2******3******4******5******6******7******8******9******A******B******C******D******E******F***/ 300 /*FB5*/ 0x671, 0x671, 0x67B, 0x67B, 0x67B, 0x67B, 0x67E, 0x67E, 0x67E, 0x67E, 0, 0, 0, 0, 0x67A, 0x67A, 301 /*FB6*/ 0x67A, 0x67A, 0, 0, 0, 0, 0x679, 0x679, 0x679, 0x679, 0, 0, 0, 0, 0, 0, 302 /*FB7*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x686, 0x686, 0x686, 0x686, 0, 0, 303 /*FB8*/ 0, 0, 0x68D, 0x68D, 0x68C, 0x68C, 0x68E, 0x68E, 0x688, 0x688, 0x698, 0x698, 0x691, 0x691, 0x6A9, 0x6A9, 304 /*FB9*/ 0x6A9, 0x6A9, 0x6AF, 0x6AF, 0x6AF, 0x6AF, 0, 0, 0, 0, 0, 0, 0, 0, 0x6BA, 0x6BA, 305 /*FBA*/ 0x6BB, 0x6BB, 0x6BB, 0x6BB, 0x6C0, 0x6C0, 0x6C1, 0x6C1, 0x6C1, 0x6C1, 0x6BE, 0x6BE, 0x6BE, 0x6BE, 0x6d2, 0x6D2, 306 /*FBB*/ 0x6D3, 0x6D3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 307 /*FBC*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 308 /*FBD*/ 0, 0, 0, 0, 0, 0, 0, 0x6C7, 0x6C7, 0x6C6, 0x6C6, 0x6C8, 0x6C8, 0, 0x6CB, 0x6CB, 309 /*FBE*/ 0x6C5, 0x6C5, 0x6C9, 0x6C9, 0x6D0, 0x6D0, 0x6D0, 0x6D0, 0, 0, 0, 0, 0, 0, 0, 0, 310 /*FBF*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x6CC, 0x6CC, 0x6CC, 0x6CC 311 }; 312 313 static const UChar convertFEto06[] = 314 { 315 /***********0******1******2******3******4******5******6******7******8******9******A******B******C******D******E******F***/ 316 /*FE7*/ 0x64B, 0x64B, 0x64C, 0x64C, 0x64D, 0x64D, 0x64E, 0x64E, 0x64F, 0x64F, 0x650, 0x650, 0x651, 0x651, 0x652, 0x652, 317 /*FE8*/ 0x621, 0x622, 0x622, 0x623, 0x623, 0x624, 0x624, 0x625, 0x625, 0x626, 0x626, 0x626, 0x626, 0x627, 0x627, 0x628, 318 /*FE9*/ 0x628, 0x628, 0x628, 0x629, 0x629, 0x62A, 0x62A, 0x62A, 0x62A, 0x62B, 0x62B, 0x62B, 0x62B, 0x62C, 0x62C, 0x62C, 319 /*FEA*/ 0x62C, 0x62D, 0x62D, 0x62D, 0x62D, 0x62E, 0x62E, 0x62E, 0x62E, 0x62F, 0x62F, 0x630, 0x630, 0x631, 0x631, 0x632, 320 /*FEB*/ 0x632, 0x633, 0x633, 0x633, 0x633, 0x634, 0x634, 0x634, 0x634, 0x635, 0x635, 0x635, 0x635, 0x636, 0x636, 0x636, 321 /*FEC*/ 0x636, 0x637, 0x637, 0x637, 0x637, 0x638, 0x638, 0x638, 0x638, 0x639, 0x639, 0x639, 0x639, 0x63A, 0x63A, 0x63A, 322 /*FED*/ 0x63A, 0x641, 0x641, 0x641, 0x641, 0x642, 0x642, 0x642, 0x642, 0x643, 0x643, 0x643, 0x643, 0x644, 0x644, 0x644, 323 /*FEE*/ 0x644, 0x645, 0x645, 0x645, 0x645, 0x646, 0x646, 0x646, 0x646, 0x647, 0x647, 0x647, 0x647, 0x648, 0x648, 0x649, 324 /*FEF*/ 0x649, 0x64A, 0x64A, 0x64A, 0x64A, 0x65C, 0x65C, 0x65D, 0x65D, 0x65E, 0x65E, 0x65F, 0x65F 325 }; 326 327 static const uint8_t shapeTable[4][4][4]= 328 { 329 { {0,0,0,0}, {0,0,0,0}, {0,1,0,3}, {0,1,0,1} }, 330 { {0,0,2,2}, {0,0,1,2}, {0,1,1,2}, {0,1,1,3} }, 331 { {0,0,0,0}, {0,0,0,0}, {0,1,0,3}, {0,1,0,3} }, 332 { {0,0,1,2}, {0,0,1,2}, {0,1,1,2}, {0,1,1,3} } 333 }; 334 335 /* 336 * This function shapes European digits to Arabic-Indic digits 337 * in-place, writing over the input characters. 338 * Since we know that we are only looking for BMP code points, 339 * we can safely just work with code units (again, at least UTF-16). 340 */ 341 static void 342 _shapeToArabicDigitsWithContext(UChar *s, int32_t length, 343 UChar digitBase, 344 UBool isLogical, UBool lastStrongWasAL) { 345 const UBiDiProps *bdp; 346 int32_t i; 347 UChar c; 348 349 bdp=ubidi_getSingleton(); 350 digitBase-=0x30; 351 352 /* the iteration direction depends on the type of input */ 353 if(isLogical) { 354 for(i=0; i<length; ++i) { 355 c=s[i]; 356 switch(ubidi_getClass(bdp, c)) { 357 case U_LEFT_TO_RIGHT: /* L */ 358 case U_RIGHT_TO_LEFT: /* R */ 359 lastStrongWasAL=FALSE; 360 break; 361 case U_RIGHT_TO_LEFT_ARABIC: /* AL */ 362 lastStrongWasAL=TRUE; 363 break; 364 case U_EUROPEAN_NUMBER: /* EN */ 365 if(lastStrongWasAL && (uint32_t)(c-0x30)<10) { 366 s[i]=(UChar)(digitBase+c); /* digitBase+(c-0x30) - digitBase was modified above */ 367 } 368 break; 369 default : 370 break; 371 } 372 } 373 } else { 374 for(i=length; i>0; /* pre-decrement in the body */) { 375 c=s[--i]; 376 switch(ubidi_getClass(bdp, c)) { 377 case U_LEFT_TO_RIGHT: /* L */ 378 case U_RIGHT_TO_LEFT: /* R */ 379 lastStrongWasAL=FALSE; 380 break; 381 case U_RIGHT_TO_LEFT_ARABIC: /* AL */ 382 lastStrongWasAL=TRUE; 383 break; 384 case U_EUROPEAN_NUMBER: /* EN */ 385 if(lastStrongWasAL && (uint32_t)(c-0x30)<10) { 386 s[i]=(UChar)(digitBase+c); /* digitBase+(c-0x30) - digitBase was modified above */ 387 } 388 break; 389 default : 390 break; 391 } 392 } 393 } 394 } 395 396 /* 397 *Name : invertBuffer 398 *Function : This function inverts the buffer, it's used 399 * in case the user specifies the buffer to be 400 * U_SHAPE_TEXT_DIRECTION_LOGICAL 401 */ 402 static void 403 invertBuffer(UChar *buffer, int32_t size, uint32_t /*options*/, int32_t lowlimit, int32_t highlimit) { 404 UChar temp; 405 int32_t i=0,j=0; 406 for(i=lowlimit,j=size-highlimit-1;i<j;i++,j--) { 407 temp = buffer[i]; 408 buffer[i] = buffer[j]; 409 buffer[j] = temp; 410 } 411 } 412 413 /* 414 *Name : changeLamAlef 415 *Function : Converts the Alef characters into an equivalent 416 * LamAlef location in the 0x06xx Range, this is an 417 * intermediate stage in the operation of the program 418 * later it'll be converted into the 0xFExx LamAlefs 419 * in the shaping function. 420 */ 421 static inline UChar 422 changeLamAlef(UChar ch) { 423 switch(ch) { 424 case 0x0622 : 425 return 0x065C; 426 case 0x0623 : 427 return 0x065D; 428 case 0x0625 : 429 return 0x065E; 430 case 0x0627 : 431 return 0x065F; 432 } 433 return 0; 434 } 435 436 /* 437 *Name : getLink 438 *Function : Resolves the link between the characters as 439 * Arabic characters have four forms : 440 * Isolated, Initial, Middle and Final Form 441 */ 442 static UChar 443 getLink(UChar ch) { 444 if(ch >= 0x0622 && ch <= 0x06D3) { 445 return(araLink[ch-0x0622]); 446 } else if(ch == 0x200D) { 447 return(3); 448 } else if(ch >= 0x206D && ch <= 0x206F) { 449 return(4); 450 }else if(ch >= 0xFB50 && ch <= 0xFC62) { 451 return(presALink[ch-0xFB50]); 452 } else if(ch >= 0xFE70 && ch <= 0xFEFC) { 453 return(presBLink[ch-0xFE70]); 454 }else { 455 return(0); 456 } 457 } 458 459 /* 460 *Name : countSpaces 461 *Function : Counts the number of spaces 462 * at each end of the logical buffer 463 */ 464 static void 465 countSpaces(UChar *dest, int32_t size, uint32_t /*options*/, int32_t *spacesCountl, int32_t *spacesCountr) { 466 int32_t i = 0; 467 int32_t countl = 0,countr = 0; 468 while((dest[i] == SPACE_CHAR) && (countl < size)) { 469 countl++; 470 i++; 471 } 472 if (countl < size) { /* the entire buffer is not all space */ 473 while(dest[size-1] == SPACE_CHAR) { 474 countr++; 475 size--; 476 } 477 } 478 *spacesCountl = countl; 479 *spacesCountr = countr; 480 } 481 482 /* 483 *Name : isTashkeelChar 484 *Function : Returns 1 for Tashkeel characters in 06 range else return 0 485 */ 486 static inline int32_t 487 isTashkeelChar(UChar ch) { 488 return (int32_t)( ch>=0x064B && ch<= 0x0652 ); 489 } 490 491 /* 492 *Name : isTashkeelCharFE 493 *Function : Returns 1 for Tashkeel characters in FE range else return 0 494 */ 495 static inline int32_t 496 isTashkeelCharFE(UChar ch) { 497 return (int32_t)( ch>=0xFE70 && ch<= 0xFE7F ); 498 } 499 500 /* 501 *Name : isAlefChar 502 *Function : Returns 1 for Alef characters else return 0 503 */ 504 static inline int32_t 505 isAlefChar(UChar ch) { 506 return (int32_t)( (ch==0x0622)||(ch==0x0623)||(ch==0x0625)||(ch==0x0627) ); 507 } 508 509 /* 510 *Name : isLamAlefChar 511 *Function : Returns 1 for LamAlef characters else return 0 512 */ 513 static inline int32_t 514 isLamAlefChar(UChar ch) { 515 return (int32_t)((ch>=0xFEF5)&&(ch<=0xFEFC) ); 516 } 517 518 /*BIDI 519 *Name : isTailChar 520 *Function : returns 1 if the character matches one of the tail characters (0xfe73 or 0x200b) otherwise returns 0 521 */ 522 523 static inline int32_t 524 isTailChar(UChar ch) { 525 if(ch == OLD_TAIL_CHAR || ch == NEW_TAIL_CHAR){ 526 return 1; 527 }else{ 528 return 0; 529 } 530 } 531 532 /*BIDI 533 *Name : isSeenTailFamilyChar 534 *Function : returns 1 if the character is a seen family isolated character 535 * in the FE range otherwise returns 0 536 */ 537 538 static inline int32_t 539 isSeenTailFamilyChar(UChar ch) { 540 if(ch >= 0xfeb1 && ch < 0xfebf){ 541 return tailFamilyIsolatedFinal [ch - 0xFEB1]; 542 }else{ 543 return 0; 544 } 545 } 546 547 /* Name : isSeenFamilyChar 548 * Function : returns 1 if the character is a seen family character in the Unicode 549 * 06 range otherwise returns 0 550 */ 551 552 static inline int32_t 553 isSeenFamilyChar(UChar ch){ 554 if(ch >= 0x633 && ch <= 0x636){ 555 return 1; 556 }else { 557 return 0; 558 } 559 } 560 561 /*Start of BIDI*/ 562 /* 563 *Name : isAlefMaksouraChar 564 *Function : returns 1 if the character is a Alef Maksoura Final or isolated 565 * otherwise returns 0 566 */ 567 static inline int32_t 568 isAlefMaksouraChar(UChar ch) { 569 return (int32_t)( (ch == 0xFEEF) || ( ch == 0xFEF0) || (ch == 0x0649)); 570 } 571 572 /* 573 * Name : isYehHamzaChar 574 * Function : returns 1 if the character is a yehHamza isolated or yehhamza 575 * final is found otherwise returns 0 576 */ 577 static inline int32_t 578 isYehHamzaChar(UChar ch) { 579 if((ch==0xFE89)||(ch==0xFE8A)){ 580 return 1; 581 }else{ 582 return 0; 583 } 584 } 585 586 /* 587 * Name: isTashkeelOnTatweelChar 588 * Function: Checks if the Tashkeel Character is on Tatweel or not,if the 589 * Tashkeel on tatweel (FE range), it returns 1 else if the 590 * Tashkeel with shadda on tatweel (FC range)return 2 otherwise 591 * returns 0 592 */ 593 static inline int32_t 594 isTashkeelOnTatweelChar(UChar ch){ 595 if(ch >= 0xfe70 && ch <= 0xfe7f && ch != NEW_TAIL_CHAR && ch != 0xFE75 && ch != SHADDA_TATWEEL_CHAR) 596 { 597 return tashkeelMedial [ch - 0xFE70]; 598 }else if( (ch >= 0xfcf2 && ch <= 0xfcf4) || (ch == SHADDA_TATWEEL_CHAR)) { 599 return 2; 600 }else{ 601 return 0; 602 } 603 } 604 605 /* 606 * Name: isIsolatedTashkeelChar 607 * Function: Checks if the Tashkeel Character is in the isolated form 608 * (i.e. Unicode FE range) returns 1 else if the Tashkeel 609 * with shadda is in the isolated form (i.e. Unicode FC range) 610 * returns 2 otherwise returns 0 611 */ 612 static inline int32_t 613 isIsolatedTashkeelChar(UChar ch){ 614 if(ch >= 0xfe70 && ch <= 0xfe7f && ch != NEW_TAIL_CHAR && ch != 0xFE75){ 615 return (1 - tashkeelMedial [ch - 0xFE70]); 616 }else if(ch >= 0xfc5e && ch <= 0xfc63){ 617 return 1; 618 }else{ 619 return 0; 620 } 621 } 622 623 624 625 626 /* 627 *Name : calculateSize 628 *Function : This function calculates the destSize to be used in preflighting 629 * when the destSize is equal to 0 630 * It is used also to calculate the new destsize in case the 631 * destination buffer will be resized. 632 */ 633 634 static int32_t 635 calculateSize(const UChar *source, int32_t sourceLength, 636 int32_t destSize,uint32_t options) { 637 int32_t i = 0; 638 639 int lamAlefOption = 0; 640 int tashkeelOption = 0; 641 642 destSize = sourceLength; 643 644 if (((options&U_SHAPE_LETTERS_MASK) == U_SHAPE_LETTERS_SHAPE || 645 ((options&U_SHAPE_LETTERS_MASK) == U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED )) && 646 ((options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_RESIZE )){ 647 lamAlefOption = 1; 648 } 649 if((options&U_SHAPE_LETTERS_MASK) == U_SHAPE_LETTERS_SHAPE && 650 ((options&U_SHAPE_TASHKEEL_MASK) == U_SHAPE_TASHKEEL_RESIZE ) ){ 651 tashkeelOption = 1; 652 } 653 654 if(lamAlefOption || tashkeelOption){ 655 if((options&U_SHAPE_TEXT_DIRECTION_MASK)==U_SHAPE_TEXT_DIRECTION_VISUAL_LTR) { 656 for(i=0;i<sourceLength;i++) { 657 if( ((isAlefChar(source[i]))&& (i<(sourceLength-1)) &&(source[i+1] == LAM_CHAR)) || (isTashkeelCharFE(source[i])) ) { 658 destSize--; 659 } 660 } 661 }else if((options&U_SHAPE_TEXT_DIRECTION_MASK)==U_SHAPE_TEXT_DIRECTION_LOGICAL) { 662 for(i=0;i<sourceLength;i++) { 663 if( ( (source[i] == LAM_CHAR) && (i<(sourceLength-1)) && (isAlefChar(source[i+1]))) || (isTashkeelCharFE(source[i])) ) { 664 destSize--; 665 } 666 } 667 } 668 } 669 670 if ((options&U_SHAPE_LETTERS_MASK) == U_SHAPE_LETTERS_UNSHAPE){ 671 if ( (options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_RESIZE){ 672 for(i=0;i<sourceLength;i++) { 673 if(isLamAlefChar(source[i])) 674 destSize++; 675 } 676 } 677 } 678 679 return destSize; 680 } 681 682 /* 683 *Name : handleTashkeelWithTatweel 684 *Function : Replaces Tashkeel as following: 685 * Case 1 :if the Tashkeel on tatweel, replace it with Tatweel. 686 * Case 2 :if the Tashkeel aggregated with Shadda on Tatweel, replace 687 * it with Shadda on Tatweel. 688 * Case 3: if the Tashkeel is isolated replace it with Space. 689 * 690 */ 691 static int32_t 692 handleTashkeelWithTatweel(UChar *dest, int32_t sourceLength, 693 int32_t /*destSize*/, uint32_t /*options*/, 694 UErrorCode * /*pErrorCode*/) { 695 int i; 696 for(i = 0; i < sourceLength; i++){ 697 if((isTashkeelOnTatweelChar(dest[i]) == 1)){ 698 dest[i] = TATWEEL_CHAR; 699 }else if((isTashkeelOnTatweelChar(dest[i]) == 2)){ 700 dest[i] = SHADDA_TATWEEL_CHAR; 701 }else if(isIsolatedTashkeelChar(dest[i]) && dest[i] != SHADDA_CHAR){ 702 dest[i] = SPACE_CHAR; 703 } 704 } 705 return sourceLength; 706 } 707 708 709 710 /* 711 *Name : handleGeneratedSpaces 712 *Function : The shapeUnicode function converts Lam + Alef into LamAlef + space, 713 * and Tashkeel to space. 714 * handleGeneratedSpaces function puts these generated spaces 715 * according to the options the user specifies. LamAlef and Tashkeel 716 * spaces can be replaced at begin, at end, at near or decrease the 717 * buffer size. 718 * 719 * There is also Auto option for LamAlef and tashkeel, which will put 720 * the spaces at end of the buffer (or end of text if the user used 721 * the option U_SHAPE_SPACES_RELATIVE_TO_TEXT_BEGIN_END). 722 * 723 * If the text type was visual_LTR and the option 724 * U_SHAPE_SPACES_RELATIVE_TO_TEXT_BEGIN_END was selected the END 725 * option will place the space at the beginning of the buffer and 726 * BEGIN will place the space at the end of the buffer. 727 */ 728 729 static int32_t 730 handleGeneratedSpaces(UChar *dest, int32_t sourceLength, 731 int32_t destSize, 732 uint32_t options, 733 UErrorCode *pErrorCode,struct uShapeVariables shapeVars ) { 734 735 int32_t i = 0, j = 0; 736 int32_t count = 0; 737 UChar *tempbuffer=NULL; 738 739 int lamAlefOption = 0; 740 int tashkeelOption = 0; 741 int shapingMode = SHAPE_MODE; 742 743 if (shapingMode == 0){ 744 if ( (options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_RESIZE ){ 745 lamAlefOption = 1; 746 } 747 if ( (options&U_SHAPE_TASHKEEL_MASK) == U_SHAPE_TASHKEEL_RESIZE ){ 748 tashkeelOption = 1; 749 } 750 } 751 752 tempbuffer = (UChar *)uprv_malloc((sourceLength+1)*U_SIZEOF_UCHAR); 753 /* Test for NULL */ 754 if(tempbuffer == NULL) { 755 *pErrorCode = U_MEMORY_ALLOCATION_ERROR; 756 return 0; 757 } 758 759 760 if (lamAlefOption || tashkeelOption){ 761 uprv_memset(tempbuffer, 0, (sourceLength+1)*U_SIZEOF_UCHAR); 762 763 i = j = 0; count = 0; 764 while(i < sourceLength) { 765 if ( (lamAlefOption && dest[i] == LAMALEF_SPACE_SUB) || 766 (tashkeelOption && dest[i] == TASHKEEL_SPACE_SUB) ){ 767 j--; 768 count++; 769 } else { 770 tempbuffer[j] = dest[i]; 771 } 772 i++; 773 j++; 774 } 775 776 while(count >= 0) { 777 tempbuffer[i] = 0x0000; 778 i--; 779 count--; 780 } 781 782 uprv_memcpy(dest, tempbuffer, sourceLength*U_SIZEOF_UCHAR); 783 destSize = u_strlen(dest); 784 } 785 786 lamAlefOption = 0; 787 788 if (shapingMode == 0){ 789 if ( (options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_NEAR ){ 790 lamAlefOption = 1; 791 } 792 } 793 794 if (lamAlefOption){ 795 /* Lam+Alef is already shaped into LamAlef + FFFF */ 796 i = 0; 797 while(i < sourceLength) { 798 if(lamAlefOption&&dest[i] == LAMALEF_SPACE_SUB){ 799 dest[i] = SPACE_CHAR; 800 } 801 i++; 802 } 803 destSize = sourceLength; 804 } 805 lamAlefOption = 0; 806 tashkeelOption = 0; 807 808 if (shapingMode == 0) { 809 if ( ((options&U_SHAPE_LAMALEF_MASK) == shapeVars.uShapeLamalefBegin) || 810 (((options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_AUTO ) 811 && (shapeVars.spacesRelativeToTextBeginEnd==1)) ) { 812 lamAlefOption = 1; 813 } 814 if ( (options&U_SHAPE_TASHKEEL_MASK) == shapeVars.uShapeTashkeelBegin ) { 815 tashkeelOption = 1; 816 } 817 } 818 819 if(lamAlefOption || tashkeelOption){ 820 uprv_memset(tempbuffer, 0, (sourceLength+1)*U_SIZEOF_UCHAR); 821 822 i = j = sourceLength; count = 0; 823 824 while(i >= 0) { 825 if ( (lamAlefOption && dest[i] == LAMALEF_SPACE_SUB) || 826 (tashkeelOption && dest[i] == TASHKEEL_SPACE_SUB) ){ 827 j++; 828 count++; 829 }else { 830 tempbuffer[j] = dest[i]; 831 } 832 i--; 833 j--; 834 } 835 836 for(i=0 ;i < count; i++){ 837 tempbuffer[i] = SPACE_CHAR; 838 } 839 840 uprv_memcpy(dest, tempbuffer, sourceLength*U_SIZEOF_UCHAR); 841 destSize = sourceLength; 842 } 843 844 845 846 lamAlefOption = 0; 847 tashkeelOption = 0; 848 849 if (shapingMode == 0) { 850 if ( ((options&U_SHAPE_LAMALEF_MASK) == shapeVars.uShapeLamalefEnd) || 851 (((options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_AUTO ) 852 && (shapeVars.spacesRelativeToTextBeginEnd==0)) ) { 853 lamAlefOption = 1; 854 } 855 if ( (options&U_SHAPE_TASHKEEL_MASK) == shapeVars.uShapeTashkeelEnd ){ 856 tashkeelOption = 1; 857 } 858 } 859 860 if(lamAlefOption || tashkeelOption){ 861 uprv_memset(tempbuffer, 0, (sourceLength+1)*U_SIZEOF_UCHAR); 862 863 i = j = 0; count = 0; 864 while(i < sourceLength) { 865 if ( (lamAlefOption && dest[i] == LAMALEF_SPACE_SUB) || 866 (tashkeelOption && dest[i] == TASHKEEL_SPACE_SUB) ){ 867 j--; 868 count++; 869 }else { 870 tempbuffer[j] = dest[i]; 871 } 872 i++; 873 j++; 874 } 875 876 while(count >= 0) { 877 tempbuffer[i] = SPACE_CHAR; 878 i--; 879 count--; 880 } 881 882 uprv_memcpy(dest,tempbuffer, sourceLength*U_SIZEOF_UCHAR); 883 destSize = sourceLength; 884 } 885 886 887 if(tempbuffer){ 888 uprv_free(tempbuffer); 889 } 890 891 return destSize; 892 } 893 894 /* 895 *Name :expandCompositCharAtBegin 896 *Function :Expands the LamAlef character to Lam and Alef consuming the required 897 * space from beginning of the buffer. If the text type was visual_LTR 898 * and the option U_SHAPE_SPACES_RELATIVE_TO_TEXT_BEGIN_END was selected 899 * the spaces will be located at end of buffer. 900 * If there are no spaces to expand the LamAlef, an error 901 * will be set to U_NO_SPACE_AVAILABLE as defined in utypes.h 902 */ 903 904 static int32_t 905 expandCompositCharAtBegin(UChar *dest, int32_t sourceLength, int32_t destSize,UErrorCode *pErrorCode) { 906 int32_t i = 0,j = 0; 907 int32_t countl = 0; 908 UChar *tempbuffer=NULL; 909 910 tempbuffer = (UChar *)uprv_malloc((sourceLength+1)*U_SIZEOF_UCHAR); 911 912 /* Test for NULL */ 913 if(tempbuffer == NULL) { 914 *pErrorCode = U_MEMORY_ALLOCATION_ERROR; 915 return 0; 916 } 917 918 uprv_memset(tempbuffer, 0, (sourceLength+1)*U_SIZEOF_UCHAR); 919 920 i = 0; 921 while(dest[i] == SPACE_CHAR) { 922 countl++; 923 i++; 924 } 925 926 i = j = sourceLength-1; 927 928 while(i >= 0 && j >= 0) { 929 if( countl>0 && isLamAlefChar(dest[i])) { 930 tempbuffer[j] = LAM_CHAR; 931 /* to ensure the array index is within the range */ 932 U_ASSERT(dest[i] >= 0xFEF5u 933 && dest[i]-0xFEF5u < sizeof(convertLamAlef)/sizeof(convertLamAlef[0])); 934 tempbuffer[j-1] = convertLamAlef[ dest[i] - 0xFEF5 ]; 935 j--; 936 countl--; 937 }else { 938 if( countl == 0 && isLamAlefChar(dest[i]) ) { 939 *pErrorCode=U_NO_SPACE_AVAILABLE; 940 } 941 tempbuffer[j] = dest[i]; 942 } 943 i--; 944 j--; 945 } 946 uprv_memcpy(dest, tempbuffer, sourceLength*U_SIZEOF_UCHAR); 947 948 uprv_free(tempbuffer); 949 950 destSize = sourceLength; 951 return destSize; 952 } 953 954 /* 955 *Name : expandCompositCharAtEnd 956 *Function : Expands the LamAlef character to Lam and Alef consuming the 957 * required space from end of the buffer. If the text type was 958 * Visual LTR and the option U_SHAPE_SPACES_RELATIVE_TO_TEXT_BEGIN_END 959 * was used, the spaces will be consumed from begin of buffer. If 960 * there are no spaces to expand the LamAlef, an error 961 * will be set to U_NO_SPACE_AVAILABLE as defined in utypes.h 962 */ 963 964 static int32_t 965 expandCompositCharAtEnd(UChar *dest, int32_t sourceLength, int32_t destSize,UErrorCode *pErrorCode) { 966 int32_t i = 0,j = 0; 967 968 int32_t countr = 0; 969 int32_t inpsize = sourceLength; 970 971 UChar *tempbuffer=NULL; 972 tempbuffer = (UChar *)uprv_malloc((sourceLength+1)*U_SIZEOF_UCHAR); 973 974 /* Test for NULL */ 975 if(tempbuffer == NULL) { 976 *pErrorCode = U_MEMORY_ALLOCATION_ERROR; 977 return 0; 978 } 979 980 uprv_memset(tempbuffer, 0, (sourceLength+1)*U_SIZEOF_UCHAR); 981 982 while(dest[inpsize-1] == SPACE_CHAR) { 983 countr++; 984 inpsize--; 985 } 986 987 i = sourceLength - countr - 1; 988 j = sourceLength - 1; 989 990 while(i >= 0 && j >= 0) { 991 if( countr>0 && isLamAlefChar(dest[i]) ) { 992 tempbuffer[j] = LAM_CHAR; 993 tempbuffer[j-1] = convertLamAlef[ dest[i] - 0xFEF5 ]; 994 j--; 995 countr--; 996 }else { 997 if ((countr == 0) && isLamAlefChar(dest[i]) ) { 998 *pErrorCode=U_NO_SPACE_AVAILABLE; 999 } 1000 tempbuffer[j] = dest[i]; 1001 } 1002 i--; 1003 j--; 1004 } 1005 1006 if(countr > 0) { 1007 uprv_memmove(tempbuffer, tempbuffer+countr, sourceLength*U_SIZEOF_UCHAR); 1008 if(u_strlen(tempbuffer) < sourceLength) { 1009 for(i=sourceLength-1;i>=sourceLength-countr;i--) { 1010 tempbuffer[i] = SPACE_CHAR; 1011 } 1012 } 1013 } 1014 uprv_memcpy(dest, tempbuffer, sourceLength*U_SIZEOF_UCHAR); 1015 1016 uprv_free(tempbuffer); 1017 1018 destSize = sourceLength; 1019 return destSize; 1020 } 1021 1022 /* 1023 *Name : expandCompositCharAtNear 1024 *Function : Expands the LamAlef character into Lam + Alef, YehHamza character 1025 * into Yeh + Hamza, SeenFamily character into SeenFamily character 1026 * + Tail, while consuming the space next to the character. 1027 * If there are no spaces next to the character, an error 1028 * will be set to U_NO_SPACE_AVAILABLE as defined in utypes.h 1029 */ 1030 1031 static int32_t 1032 expandCompositCharAtNear(UChar *dest, int32_t sourceLength, int32_t destSize,UErrorCode *pErrorCode, 1033 int yehHamzaOption, int seenTailOption, int lamAlefOption, struct uShapeVariables shapeVars) { 1034 int32_t i = 0; 1035 1036 1037 UChar lamalefChar, yehhamzaChar; 1038 1039 for(i = 0 ;i<=sourceLength-1;i++) { 1040 if (seenTailOption && isSeenTailFamilyChar(dest[i])) { 1041 if ((i>0) && (dest[i-1] == SPACE_CHAR) ) { 1042 dest[i-1] = shapeVars.tailChar; 1043 }else { 1044 *pErrorCode=U_NO_SPACE_AVAILABLE; 1045 } 1046 }else if(yehHamzaOption && (isYehHamzaChar(dest[i])) ) { 1047 if ((i>0) && (dest[i-1] == SPACE_CHAR) ) { 1048 yehhamzaChar = dest[i]; 1049 dest[i] = yehHamzaToYeh[yehhamzaChar - YEH_HAMZAFE_CHAR]; 1050 dest[i-1] = HAMZAFE_CHAR; 1051 }else { 1052 1053 *pErrorCode=U_NO_SPACE_AVAILABLE; 1054 } 1055 }else if(lamAlefOption && isLamAlefChar(dest[i+1])) { 1056 if(dest[i] == SPACE_CHAR){ 1057 lamalefChar = dest[i+1]; 1058 dest[i+1] = LAM_CHAR; 1059 dest[i] = convertLamAlef[ lamalefChar - 0xFEF5 ]; 1060 }else { 1061 *pErrorCode=U_NO_SPACE_AVAILABLE; 1062 } 1063 } 1064 } 1065 destSize = sourceLength; 1066 return destSize; 1067 } 1068 /* 1069 * Name : expandCompositChar 1070 * Function : LamAlef, need special handling, since it expands from one 1071 * character into two characters while shaping or deshaping. 1072 * In order to expand it, near or far spaces according to the 1073 * options user specifies. Also buffer size can be increased. 1074 * 1075 * For SeenFamily characters and YehHamza only the near option is 1076 * supported, while for LamAlef we can take spaces from begin, end, 1077 * near or even increase the buffer size. 1078 * There is also the Auto option for LamAlef only, which will first 1079 * search for a space at end, begin then near, respectively. 1080 * If there are no spaces to expand these characters, an error will be set to 1081 * U_NO_SPACE_AVAILABLE as defined in utypes.h 1082 */ 1083 1084 static int32_t 1085 expandCompositChar(UChar *dest, int32_t sourceLength, 1086 int32_t destSize,uint32_t options, 1087 UErrorCode *pErrorCode, int shapingMode,struct uShapeVariables shapeVars) { 1088 1089 int32_t i = 0,j = 0; 1090 1091 UChar *tempbuffer=NULL; 1092 int yehHamzaOption = 0; 1093 int seenTailOption = 0; 1094 int lamAlefOption = 0; 1095 1096 if (shapingMode == 1){ 1097 if ( (options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_AUTO){ 1098 1099 if(shapeVars.spacesRelativeToTextBeginEnd == 0) { 1100 destSize = expandCompositCharAtEnd(dest, sourceLength, destSize, pErrorCode); 1101 1102 if(*pErrorCode == U_NO_SPACE_AVAILABLE) { 1103 *pErrorCode = U_ZERO_ERROR; 1104 destSize = expandCompositCharAtBegin(dest, sourceLength, destSize, pErrorCode); 1105 } 1106 }else { 1107 destSize = expandCompositCharAtBegin(dest, sourceLength, destSize, pErrorCode); 1108 1109 if(*pErrorCode == U_NO_SPACE_AVAILABLE) { 1110 *pErrorCode = U_ZERO_ERROR; 1111 destSize = expandCompositCharAtEnd(dest, sourceLength, destSize, pErrorCode); 1112 } 1113 } 1114 1115 if(*pErrorCode == U_NO_SPACE_AVAILABLE) { 1116 *pErrorCode = U_ZERO_ERROR; 1117 destSize = expandCompositCharAtNear(dest, sourceLength, destSize, pErrorCode, yehHamzaOption, 1118 seenTailOption, 1,shapeVars); 1119 } 1120 } 1121 } 1122 1123 if (shapingMode == 1){ 1124 if ( (options&U_SHAPE_LAMALEF_MASK) == shapeVars.uShapeLamalefEnd){ 1125 destSize = expandCompositCharAtEnd(dest, sourceLength, destSize, pErrorCode); 1126 } 1127 } 1128 1129 if (shapingMode == 1){ 1130 if ( (options&U_SHAPE_LAMALEF_MASK) == shapeVars.uShapeLamalefBegin){ 1131 destSize = expandCompositCharAtBegin(dest, sourceLength, destSize, pErrorCode); 1132 } 1133 } 1134 1135 if (shapingMode == 0){ 1136 if ((options&U_SHAPE_YEHHAMZA_MASK) == U_SHAPE_YEHHAMZA_TWOCELL_NEAR){ 1137 yehHamzaOption = 1; 1138 } 1139 if ((options&U_SHAPE_SEEN_MASK) == U_SHAPE_SEEN_TWOCELL_NEAR){ 1140 seenTailOption = 1; 1141 } 1142 } 1143 if (shapingMode == 1) { 1144 if ( (options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_NEAR) { 1145 lamAlefOption = 1; 1146 } 1147 } 1148 1149 1150 if (yehHamzaOption || seenTailOption || lamAlefOption){ 1151 destSize = expandCompositCharAtNear(dest, sourceLength, destSize, pErrorCode, yehHamzaOption, 1152 seenTailOption,lamAlefOption,shapeVars); 1153 } 1154 1155 1156 if (shapingMode == 1){ 1157 if ( (options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_RESIZE){ 1158 destSize = calculateSize(dest,sourceLength,destSize,options); 1159 tempbuffer = (UChar *)uprv_malloc((destSize+1)*U_SIZEOF_UCHAR); 1160 1161 /* Test for NULL */ 1162 if(tempbuffer == NULL) { 1163 *pErrorCode = U_MEMORY_ALLOCATION_ERROR; 1164 return 0; 1165 } 1166 1167 uprv_memset(tempbuffer, 0, (destSize+1)*U_SIZEOF_UCHAR); 1168 1169 i = j = 0; 1170 while(i < destSize && j < destSize) { 1171 if(isLamAlefChar(dest[i]) ) { 1172 tempbuffer[j] = convertLamAlef[ dest[i] - 0xFEF5 ]; 1173 tempbuffer[j+1] = LAM_CHAR; 1174 j++; 1175 }else { 1176 tempbuffer[j] = dest[i]; 1177 } 1178 i++; 1179 j++; 1180 } 1181 1182 uprv_memcpy(dest, tempbuffer, destSize*U_SIZEOF_UCHAR); 1183 } 1184 } 1185 1186 if(tempbuffer) { 1187 uprv_free(tempbuffer); 1188 } 1189 return destSize; 1190 } 1191 1192 /* 1193 *Name : shapeUnicode 1194 *Function : Converts an Arabic Unicode buffer in 06xx Range into a shaped 1195 * arabic Unicode buffer in FExx Range 1196 */ 1197 static int32_t 1198 shapeUnicode(UChar *dest, int32_t sourceLength, 1199 int32_t destSize,uint32_t options, 1200 UErrorCode *pErrorCode, 1201 int tashkeelFlag, struct uShapeVariables shapeVars) { 1202 1203 int32_t i, iend; 1204 int32_t step; 1205 int32_t lastPos,Nx, Nw; 1206 unsigned int Shape; 1207 int32_t lamalef_found = 0; 1208 int32_t seenfamFound = 0, yehhamzaFound =0, tashkeelFound = 0; 1209 UChar prevLink = 0, lastLink = 0, currLink, nextLink = 0; 1210 UChar wLamalef; 1211 1212 /* 1213 * Converts the input buffer from FExx Range into 06xx Range 1214 * to make sure that all characters are in the 06xx range 1215 * even the lamalef is converted to the special region in 1216 * the 06xx range 1217 */ 1218 if ((options & U_SHAPE_PRESERVE_PRESENTATION_MASK) == U_SHAPE_PRESERVE_PRESENTATION_NOOP) { 1219 for (i = 0; i < sourceLength; i++) { 1220 UChar inputChar = dest[i]; 1221 if ( (inputChar >= 0xFB50) && (inputChar <= 0xFBFF)) { 1222 UChar c = convertFBto06 [ (inputChar - 0xFB50) ]; 1223 if (c != 0) 1224 dest[i] = c; 1225 } else if ( (inputChar >= 0xFE70) && (inputChar <= 0xFEFC)) { 1226 dest[i] = convertFEto06 [ (inputChar - 0xFE70) ] ; 1227 } else { 1228 dest[i] = inputChar ; 1229 } 1230 } 1231 } 1232 1233 1234 /* sets the index to the end of the buffer, together with the step point to -1 */ 1235 i = sourceLength - 1; 1236 iend = -1; 1237 step = -1; 1238 1239 /* 1240 * This function resolves the link between the characters . 1241 * Arabic characters have four forms : 1242 * Isolated Form, Initial Form, Middle Form and Final Form 1243 */ 1244 currLink = getLink(dest[i]); 1245 1246 lastPos = i; 1247 Nx = -2, Nw = 0; 1248 1249 while (i != iend) { 1250 /* If high byte of currLink > 0 then more than one shape */ 1251 if ((currLink & 0xFF00) > 0 || (getLink(dest[i]) & IRRELEVANT) != 0) { 1252 Nw = i + step; 1253 while (Nx < 0) { /* we need to know about next char */ 1254 if(Nw == iend) { 1255 nextLink = 0; 1256 Nx = 3000; 1257 } else { 1258 nextLink = getLink(dest[Nw]); 1259 if((nextLink & IRRELEVANT) == 0) { 1260 Nx = Nw; 1261 } else { 1262 Nw = Nw + step; 1263 } 1264 } 1265 } 1266 1267 if ( ((currLink & ALEFTYPE) > 0) && ((lastLink & LAMTYPE) > 0) ) { 1268 lamalef_found = 1; 1269 wLamalef = changeLamAlef(dest[i]); /*get from 0x065C-0x065f */ 1270 if ( wLamalef != 0) { 1271 dest[i] = LAMALEF_SPACE_SUB; /* The default case is to drop the Alef and replace */ 1272 dest[lastPos] =wLamalef; /* it by LAMALEF_SPACE_SUB which is the last character in the */ 1273 i=lastPos; /* unicode private use area, this is done to make */ 1274 } /* sure that removeLamAlefSpaces() handles only the */ 1275 lastLink = prevLink; /* spaces generated during lamalef generation. */ 1276 currLink = getLink(wLamalef); /* LAMALEF_SPACE_SUB is added here and is replaced by spaces */ 1277 } /* in removeLamAlefSpaces() */ 1278 1279 if ((i > 0) && (dest[i-1] == SPACE_CHAR)){ 1280 if ( isSeenFamilyChar(dest[i])) { 1281 seenfamFound = 1; 1282 } else if (dest[i] == YEH_HAMZA_CHAR) { 1283 yehhamzaFound = 1; 1284 } 1285 } 1286 else if(i==0){ 1287 if ( isSeenFamilyChar(dest[i])){ 1288 seenfamFound = 1; 1289 } else if (dest[i] == YEH_HAMZA_CHAR) { 1290 yehhamzaFound = 1; 1291 } 1292 } 1293 1294 /* 1295 * get the proper shape according to link ability of neighbors 1296 * and of character; depends on the order of the shapes 1297 * (isolated, initial, middle, final) in the compatibility area 1298 */ 1299 Shape = shapeTable[nextLink & (LINKR + LINKL)] 1300 [lastLink & (LINKR + LINKL)] 1301 [currLink & (LINKR + LINKL)]; 1302 1303 if ((currLink & (LINKR+LINKL)) == 1) { 1304 Shape &= 1; 1305 } else if(isTashkeelChar(dest[i])) { 1306 if( (lastLink & LINKL) && (nextLink & LINKR) && (tashkeelFlag == 1) && 1307 dest[i] != 0x064C && dest[i] != 0x064D ) 1308 { 1309 Shape = 1; 1310 if( (nextLink&ALEFTYPE) == ALEFTYPE && (lastLink&LAMTYPE) == LAMTYPE ) { 1311 Shape = 0; 1312 } 1313 } else if(tashkeelFlag == 2 && dest[i] == SHADDA06_CHAR){ 1314 Shape = 1; 1315 } else { 1316 Shape = 0; 1317 } 1318 } 1319 if ((dest[i] ^ 0x0600) < 0x100) { 1320 if ( isTashkeelChar(dest[i]) ){ 1321 if (tashkeelFlag == 2 && dest[i] != SHADDA06_CHAR){ 1322 dest[i] = TASHKEEL_SPACE_SUB; 1323 tashkeelFound = 1; 1324 } else { 1325 /* to ensure the array index is within the range */ 1326 U_ASSERT(dest[i] >= 0x064Bu 1327 && dest[i]-0x064Bu < sizeof(IrrelevantPos)/sizeof(IrrelevantPos[0])); 1328 dest[i] = 0xFE70 + IrrelevantPos[(dest[i] - 0x064B)] + Shape; 1329 } 1330 }else if ((currLink & APRESENT) > 0) { 1331 dest[i] = (UChar)(0xFB50 + (currLink >> 8) + Shape); 1332 }else if ((currLink >> 8) > 0 && (currLink & IRRELEVANT) == 0) { 1333 dest[i] = (UChar)(0xFE70 + (currLink >> 8) + Shape); 1334 } 1335 } 1336 } 1337 1338 /* move one notch forward */ 1339 if ((currLink & IRRELEVANT) == 0) { 1340 prevLink = lastLink; 1341 lastLink = currLink; 1342 lastPos = i; 1343 } 1344 1345 i = i + step; 1346 if (i == Nx) { 1347 currLink = nextLink; 1348 Nx = -2; 1349 } else if(i != iend) { 1350 currLink = getLink(dest[i]); 1351 } 1352 } 1353 destSize = sourceLength; 1354 if ( (lamalef_found != 0 ) || (tashkeelFound != 0) ){ 1355 destSize = handleGeneratedSpaces(dest,sourceLength,destSize,options,pErrorCode, shapeVars); 1356 } 1357 1358 if ( (seenfamFound != 0) || (yehhamzaFound != 0) ) { 1359 destSize = expandCompositChar(dest, sourceLength,destSize,options,pErrorCode, SHAPE_MODE,shapeVars); 1360 } 1361 return destSize; 1362 } 1363 1364 /* 1365 *Name : deShapeUnicode 1366 *Function : Converts an Arabic Unicode buffer in FExx Range into unshaped 1367 * arabic Unicode buffer in 06xx Range 1368 */ 1369 static int32_t 1370 deShapeUnicode(UChar *dest, int32_t sourceLength, 1371 int32_t destSize,uint32_t options, 1372 UErrorCode *pErrorCode, struct uShapeVariables shapeVars) { 1373 int32_t i = 0; 1374 int32_t lamalef_found = 0; 1375 int32_t yehHamzaComposeEnabled = 0; 1376 int32_t seenComposeEnabled = 0; 1377 1378 yehHamzaComposeEnabled = ((options&U_SHAPE_YEHHAMZA_MASK) == U_SHAPE_YEHHAMZA_TWOCELL_NEAR) ? 1 : 0; 1379 seenComposeEnabled = ((options&U_SHAPE_SEEN_MASK) == U_SHAPE_SEEN_TWOCELL_NEAR)? 1 : 0; 1380 1381 /* 1382 *This for loop changes the buffer from the Unicode FE range to 1383 *the Unicode 06 range 1384 */ 1385 1386 for(i = 0; i < sourceLength; i++) { 1387 UChar inputChar = dest[i]; 1388 if ( (inputChar >= 0xFB50) && (inputChar <= 0xFBFF)) { /* FBxx Arabic range */ 1389 UChar c = convertFBto06 [ (inputChar - 0xFB50) ]; 1390 if (c != 0) 1391 dest[i] = c; 1392 } else if( (yehHamzaComposeEnabled == 1) && ((inputChar == HAMZA06_CHAR) || (inputChar == HAMZAFE_CHAR)) 1393 && (i < (sourceLength - 1)) && isAlefMaksouraChar(dest[i+1] )) { 1394 dest[i] = SPACE_CHAR; 1395 dest[i+1] = YEH_HAMZA_CHAR; 1396 } else if ( (seenComposeEnabled == 1) && (isTailChar(inputChar)) && (i< (sourceLength - 1)) 1397 && (isSeenTailFamilyChar(dest[i+1])) ) { 1398 dest[i] = SPACE_CHAR; 1399 } else if (( inputChar >= 0xFE70) && (inputChar <= 0xFEF4 )) { /* FExx Arabic range */ 1400 dest[i] = convertFEto06 [ (inputChar - 0xFE70) ]; 1401 } else { 1402 dest[i] = inputChar ; 1403 } 1404 1405 if( isLamAlefChar(dest[i]) ) 1406 lamalef_found = 1; 1407 } 1408 1409 destSize = sourceLength; 1410 if (lamalef_found != 0){ 1411 destSize = expandCompositChar(dest,sourceLength,destSize,options,pErrorCode,DESHAPE_MODE, shapeVars); 1412 } 1413 return destSize; 1414 } 1415 1416 /* 1417 **************************************** 1418 * u_shapeArabic 1419 **************************************** 1420 */ 1421 1422 U_CAPI int32_t U_EXPORT2 1423 u_shapeArabic(const UChar *source, int32_t sourceLength, 1424 UChar *dest, int32_t destCapacity, 1425 uint32_t options, 1426 UErrorCode *pErrorCode) { 1427 1428 int32_t destLength; 1429 struct uShapeVariables shapeVars = { OLD_TAIL_CHAR,U_SHAPE_LAMALEF_BEGIN,U_SHAPE_LAMALEF_END,U_SHAPE_TASHKEEL_BEGIN,U_SHAPE_TASHKEEL_END,0}; 1430 1431 /* usual error checking */ 1432 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 1433 return 0; 1434 } 1435 1436 /* make sure that no reserved options values are used; allow dest==NULL only for preflighting */ 1437 if( source==NULL || sourceLength<-1 || (dest==NULL && destCapacity!=0) || destCapacity<0 || 1438 (((options&U_SHAPE_TASHKEEL_MASK) > 0) && 1439 ((options&U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED) == U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED) ) || 1440 (((options&U_SHAPE_TASHKEEL_MASK) > 0) && 1441 ((options&U_SHAPE_LETTERS_MASK) == U_SHAPE_LETTERS_UNSHAPE)) || 1442 (options&U_SHAPE_DIGIT_TYPE_RESERVED)==U_SHAPE_DIGIT_TYPE_RESERVED || 1443 (options&U_SHAPE_DIGITS_MASK)==U_SHAPE_DIGITS_RESERVED || 1444 ((options&U_SHAPE_LAMALEF_MASK) != U_SHAPE_LAMALEF_RESIZE && 1445 (options&U_SHAPE_AGGREGATE_TASHKEEL_MASK) != 0) || 1446 ((options&U_SHAPE_AGGREGATE_TASHKEEL_MASK) == U_SHAPE_AGGREGATE_TASHKEEL && 1447 (options&U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED) != U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED) 1448 ) 1449 { 1450 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1451 return 0; 1452 } 1453 /* Validate lamalef options */ 1454 if(((options&U_SHAPE_LAMALEF_MASK) > 0)&& 1455 !(((options & U_SHAPE_LAMALEF_MASK)==U_SHAPE_LAMALEF_BEGIN) || 1456 ((options & U_SHAPE_LAMALEF_MASK)==U_SHAPE_LAMALEF_END ) || 1457 ((options & U_SHAPE_LAMALEF_MASK)==U_SHAPE_LAMALEF_RESIZE )|| 1458 ((options & U_SHAPE_LAMALEF_MASK)==U_SHAPE_LAMALEF_AUTO) || 1459 ((options & U_SHAPE_LAMALEF_MASK)==U_SHAPE_LAMALEF_NEAR))) 1460 { 1461 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1462 return 0; 1463 } 1464 /* Validate Tashkeel options */ 1465 if(((options&U_SHAPE_TASHKEEL_MASK) > 0)&& 1466 !(((options & U_SHAPE_TASHKEEL_MASK)==U_SHAPE_TASHKEEL_BEGIN) || 1467 ((options & U_SHAPE_TASHKEEL_MASK)==U_SHAPE_TASHKEEL_END ) 1468 ||((options & U_SHAPE_TASHKEEL_MASK)==U_SHAPE_TASHKEEL_RESIZE )|| 1469 ((options & U_SHAPE_TASHKEEL_MASK)==U_SHAPE_TASHKEEL_REPLACE_BY_TATWEEL))) 1470 { 1471 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1472 return 0; 1473 } 1474 /* determine the source length */ 1475 if(sourceLength==-1) { 1476 sourceLength=u_strlen(source); 1477 } 1478 if(sourceLength<=0) { 1479 return u_terminateUChars(dest, destCapacity, 0, pErrorCode); 1480 } 1481 1482 /* check that source and destination do not overlap */ 1483 if( dest!=NULL && 1484 ((source<=dest && dest<source+sourceLength) || 1485 (dest<=source && source<dest+destCapacity))) { 1486 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1487 return 0; 1488 } 1489 1490 /* Does Options contain the new Seen Tail Unicode code point option */ 1491 if ( (options&U_SHAPE_TAIL_TYPE_MASK) == U_SHAPE_TAIL_NEW_UNICODE){ 1492 shapeVars.tailChar = NEW_TAIL_CHAR; 1493 }else { 1494 shapeVars.tailChar = OLD_TAIL_CHAR; 1495 } 1496 1497 if((options&U_SHAPE_LETTERS_MASK)!=U_SHAPE_LETTERS_NOOP) { 1498 UChar buffer[300]; 1499 UChar *tempbuffer, *tempsource = NULL; 1500 int32_t outputSize, spacesCountl=0, spacesCountr=0; 1501 1502 if((options&U_SHAPE_AGGREGATE_TASHKEEL_MASK)>0) { 1503 int32_t logical_order = (options&U_SHAPE_TEXT_DIRECTION_MASK) == U_SHAPE_TEXT_DIRECTION_LOGICAL; 1504 int32_t aggregate_tashkeel = 1505 (options&(U_SHAPE_AGGREGATE_TASHKEEL_MASK+U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED)) == 1506 (U_SHAPE_AGGREGATE_TASHKEEL+U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED); 1507 int step=logical_order?1:-1; 1508 int j=logical_order?-1:2*sourceLength; 1509 int i=logical_order?-1:sourceLength; 1510 int end=logical_order?sourceLength:-1; 1511 int aggregation_possible = 1; 1512 UChar prev = 0; 1513 UChar prevLink, currLink = 0; 1514 int newSourceLength = 0; 1515 tempsource = (UChar *)uprv_malloc(2*sourceLength*U_SIZEOF_UCHAR); 1516 if(tempsource == NULL) { 1517 *pErrorCode = U_MEMORY_ALLOCATION_ERROR; 1518 return 0; 1519 } 1520 1521 while ((i+=step) != end) { 1522 prevLink = currLink; 1523 currLink = getLink(source[i]); 1524 if (aggregate_tashkeel && ((prevLink|currLink)&COMBINE) == COMBINE && aggregation_possible) { 1525 aggregation_possible = 0; 1526 tempsource[j] = (prev<source[i]?prev:source[i])-0x064C+0xFC5E; 1527 currLink = getLink(tempsource[j]); 1528 } else { 1529 aggregation_possible = 1; 1530 tempsource[j+=step] = source[i]; 1531 prev = source[i]; 1532 newSourceLength++; 1533 } 1534 } 1535 source = tempsource+(logical_order?0:j); 1536 sourceLength = newSourceLength; 1537 } 1538 1539 /* calculate destination size */ 1540 /* TODO: do we ever need to do this pure preflighting? */ 1541 if(((options&U_SHAPE_LAMALEF_MASK)==U_SHAPE_LAMALEF_RESIZE) || 1542 ((options&U_SHAPE_TASHKEEL_MASK)==U_SHAPE_TASHKEEL_RESIZE)) { 1543 outputSize=calculateSize(source,sourceLength,destCapacity,options); 1544 } else { 1545 outputSize=sourceLength; 1546 } 1547 1548 if(outputSize>destCapacity) { 1549 *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 1550 if (tempsource != NULL) uprv_free(tempsource); 1551 return outputSize; 1552 } 1553 1554 /* 1555 * need a temporary buffer of size max(outputSize, sourceLength) 1556 * because at first we copy source->temp 1557 */ 1558 if(sourceLength>outputSize) { 1559 outputSize=sourceLength; 1560 } 1561 1562 /* Start of Arabic letter shaping part */ 1563 if(outputSize<=LENGTHOF(buffer)) { 1564 outputSize=LENGTHOF(buffer); 1565 tempbuffer=buffer; 1566 } else { 1567 tempbuffer = (UChar *)uprv_malloc(outputSize*U_SIZEOF_UCHAR); 1568 1569 /*Test for NULL*/ 1570 if(tempbuffer == NULL) { 1571 *pErrorCode = U_MEMORY_ALLOCATION_ERROR; 1572 if (tempsource != NULL) uprv_free(tempsource); 1573 return 0; 1574 } 1575 } 1576 uprv_memcpy(tempbuffer, source, sourceLength*U_SIZEOF_UCHAR); 1577 if (tempsource != NULL){ 1578 uprv_free(tempsource); 1579 } 1580 1581 if(sourceLength<outputSize) { 1582 uprv_memset(tempbuffer+sourceLength, 0, (outputSize-sourceLength)*U_SIZEOF_UCHAR); 1583 } 1584 1585 if((options&U_SHAPE_TEXT_DIRECTION_MASK) == U_SHAPE_TEXT_DIRECTION_LOGICAL) { 1586 countSpaces(tempbuffer,sourceLength,options,&spacesCountl,&spacesCountr); 1587 invertBuffer(tempbuffer,sourceLength,options,spacesCountl,spacesCountr); 1588 } 1589 1590 if((options&U_SHAPE_TEXT_DIRECTION_MASK) == U_SHAPE_TEXT_DIRECTION_VISUAL_LTR) { 1591 if((options&U_SHAPE_SPACES_RELATIVE_TO_TEXT_MASK) == U_SHAPE_SPACES_RELATIVE_TO_TEXT_BEGIN_END) { 1592 shapeVars.spacesRelativeToTextBeginEnd = 1; 1593 shapeVars.uShapeLamalefBegin = U_SHAPE_LAMALEF_END; 1594 shapeVars.uShapeLamalefEnd = U_SHAPE_LAMALEF_BEGIN; 1595 shapeVars.uShapeTashkeelBegin = U_SHAPE_TASHKEEL_END; 1596 shapeVars.uShapeTashkeelEnd = U_SHAPE_TASHKEEL_BEGIN; 1597 } 1598 } 1599 1600 switch(options&U_SHAPE_LETTERS_MASK) { 1601 case U_SHAPE_LETTERS_SHAPE : 1602 if( (options&U_SHAPE_TASHKEEL_MASK)> 0 1603 && ((options&U_SHAPE_TASHKEEL_MASK) !=U_SHAPE_TASHKEEL_REPLACE_BY_TATWEEL)) { 1604 /* Call the shaping function with tashkeel flag == 2 for removal of tashkeel */ 1605 destLength = shapeUnicode(tempbuffer,sourceLength,destCapacity,options,pErrorCode,2,shapeVars); 1606 }else { 1607 /* default Call the shaping function with tashkeel flag == 1 */ 1608 destLength = shapeUnicode(tempbuffer,sourceLength,destCapacity,options,pErrorCode,1,shapeVars); 1609 1610 /*After shaping text check if user wants to remove tashkeel and replace it with tatweel*/ 1611 if( (options&U_SHAPE_TASHKEEL_MASK) == U_SHAPE_TASHKEEL_REPLACE_BY_TATWEEL){ 1612 destLength = handleTashkeelWithTatweel(tempbuffer,destLength,destCapacity,options,pErrorCode); 1613 } 1614 } 1615 break; 1616 case U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED : 1617 /* Call the shaping function with tashkeel flag == 0 */ 1618 destLength = shapeUnicode(tempbuffer,sourceLength,destCapacity,options,pErrorCode,0,shapeVars); 1619 break; 1620 1621 case U_SHAPE_LETTERS_UNSHAPE : 1622 /* Call the deshaping function */ 1623 destLength = deShapeUnicode(tempbuffer,sourceLength,destCapacity,options,pErrorCode,shapeVars); 1624 break; 1625 default : 1626 /* will never occur because of validity checks above */ 1627 destLength = 0; 1628 break; 1629 } 1630 1631 /* 1632 * TODO: (markus 2002aug01) 1633 * For as long as we always preflight the outputSize above 1634 * we should U_ASSERT(outputSize==destLength) 1635 * except for the adjustment above before the tempbuffer allocation 1636 */ 1637 1638 if((options&U_SHAPE_TEXT_DIRECTION_MASK) == U_SHAPE_TEXT_DIRECTION_LOGICAL) { 1639 countSpaces(tempbuffer,destLength,options,&spacesCountl,&spacesCountr); 1640 invertBuffer(tempbuffer,destLength,options,spacesCountl,spacesCountr); 1641 } 1642 uprv_memcpy(dest, tempbuffer, uprv_min(destLength, destCapacity)*U_SIZEOF_UCHAR); 1643 1644 if(tempbuffer!=buffer) { 1645 uprv_free(tempbuffer); 1646 } 1647 1648 if(destLength>destCapacity) { 1649 *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 1650 return destLength; 1651 } 1652 1653 /* End of Arabic letter shaping part */ 1654 } else { 1655 /* 1656 * No letter shaping: 1657 * just make sure the destination is large enough and copy the string. 1658 */ 1659 if(destCapacity<sourceLength) { 1660 /* this catches preflighting, too */ 1661 *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 1662 return sourceLength; 1663 } 1664 uprv_memcpy(dest, source, sourceLength*U_SIZEOF_UCHAR); 1665 destLength=sourceLength; 1666 } 1667 1668 /* 1669 * Perform number shaping. 1670 * With UTF-16 or UTF-32, the length of the string is constant. 1671 * The easiest way to do this is to operate on the destination and 1672 * "shape" the digits in-place. 1673 */ 1674 if((options&U_SHAPE_DIGITS_MASK)!=U_SHAPE_DIGITS_NOOP) { 1675 UChar digitBase; 1676 int32_t i; 1677 1678 /* select the requested digit group */ 1679 switch(options&U_SHAPE_DIGIT_TYPE_MASK) { 1680 case U_SHAPE_DIGIT_TYPE_AN: 1681 digitBase=0x660; /* Unicode: "Arabic-Indic digits" */ 1682 break; 1683 case U_SHAPE_DIGIT_TYPE_AN_EXTENDED: 1684 digitBase=0x6f0; /* Unicode: "Eastern Arabic-Indic digits (Persian and Urdu)" */ 1685 break; 1686 default: 1687 /* will never occur because of validity checks above */ 1688 digitBase=0; 1689 break; 1690 } 1691 1692 /* perform the requested operation */ 1693 switch(options&U_SHAPE_DIGITS_MASK) { 1694 case U_SHAPE_DIGITS_EN2AN: 1695 /* add (digitBase-'0') to each European (ASCII) digit code point */ 1696 digitBase-=0x30; 1697 for(i=0; i<destLength; ++i) { 1698 if(((uint32_t)dest[i]-0x30)<10) { 1699 dest[i]+=digitBase; 1700 } 1701 } 1702 break; 1703 case U_SHAPE_DIGITS_AN2EN: 1704 /* subtract (digitBase-'0') from each Arabic digit code point */ 1705 for(i=0; i<destLength; ++i) { 1706 if(((uint32_t)dest[i]-(uint32_t)digitBase)<10) { 1707 dest[i]-=digitBase-0x30; 1708 } 1709 } 1710 break; 1711 case U_SHAPE_DIGITS_ALEN2AN_INIT_LR: 1712 _shapeToArabicDigitsWithContext(dest, destLength, 1713 digitBase, 1714 (UBool)((options&U_SHAPE_TEXT_DIRECTION_MASK)==U_SHAPE_TEXT_DIRECTION_LOGICAL), 1715 FALSE); 1716 break; 1717 case U_SHAPE_DIGITS_ALEN2AN_INIT_AL: 1718 _shapeToArabicDigitsWithContext(dest, destLength, 1719 digitBase, 1720 (UBool)((options&U_SHAPE_TEXT_DIRECTION_MASK)==U_SHAPE_TEXT_DIRECTION_LOGICAL), 1721 TRUE); 1722 break; 1723 default: 1724 /* will never occur because of validity checks above */ 1725 break; 1726 } 1727 } 1728 1729 return u_terminateUChars(dest, destCapacity, destLength, pErrorCode); 1730 } 1731