1 /* 2 * Copyright (c) 2011 The LibYuv project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "row.h" 12 13 extern "C" { 14 15 #ifdef HAS_ARGBTOYROW_SSSE3 16 #define TALIGN16(t, var) static __declspec(align(16)) t _ ## var 17 18 // Constant multiplication table for converting ARGB to I400. 19 extern "C" TALIGN16(const int8, kARGBToY[16]) = { 20 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0 21 }; 22 23 extern "C" TALIGN16(const int8, kARGBToU[16]) = { 24 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0 25 }; 26 27 extern "C" TALIGN16(const int8, kARGBToV[16]) = { 28 -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, 29 }; 30 31 // Constants for BGRA 32 extern "C" TALIGN16(const int8, kBGRAToY[16]) = { 33 0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13 34 }; 35 36 extern "C" TALIGN16(const int8, kBGRAToU[16]) = { 37 0, -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112 38 }; 39 40 extern "C" TALIGN16(const int8, kBGRAToV[16]) = { 41 0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18 42 }; 43 44 // Constants for ABGR 45 extern "C" TALIGN16(const int8, kABGRToY[16]) = { 46 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0 47 }; 48 49 extern "C" TALIGN16(const int8, kABGRToU[16]) = { 50 -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112, 0 51 }; 52 53 extern "C" TALIGN16(const int8, kABGRToV[16]) = { 54 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0 55 }; 56 57 extern "C" TALIGN16(const uint8, kAddY16[16]) = { 58 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 59 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 60 }; 61 62 extern "C" TALIGN16(const uint8, kAddUV128[16]) = { 63 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 64 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u 65 }; 66 67 // Shuffle table for converting BG24 to ARGB. 68 extern "C" TALIGN16(const uint8, kShuffleMaskBG24ToARGB[16]) = { 69 0u, 1u, 2u, 12u, 3u, 4u, 5u, 13u, 6u, 7u, 8u, 14u, 9u, 10u, 11u, 15u 70 }; 71 72 // Shuffle table for converting RAW to ARGB. 73 extern "C" TALIGN16(const uint8, kShuffleMaskRAWToARGB[16]) = { 74 2u, 1u, 0u, 12u, 5u, 4u, 3u, 13u, 8u, 7u, 6u, 14u, 11u, 10u, 9u, 15u 75 }; 76 77 // Convert 16 ARGB pixels (64 bytes) to 16 Y values 78 __declspec(naked) 79 void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { 80 __asm { 81 mov eax, [esp + 4] /* src_argb */ 82 mov edx, [esp + 8] /* dst_y */ 83 mov ecx, [esp + 12] /* pix */ 84 movdqa xmm7, _kARGBToY 85 movdqa xmm6, _kAddY16 86 87 convertloop : 88 movdqa xmm0, [eax] 89 movdqa xmm1, [eax + 16] 90 movdqa xmm2, [eax + 32] 91 movdqa xmm3, [eax + 48] 92 pmaddubsw xmm0, xmm7 93 pmaddubsw xmm1, xmm7 94 pmaddubsw xmm2, xmm7 95 pmaddubsw xmm3, xmm7 96 lea eax, [eax + 64] 97 phaddw xmm0, xmm1 98 phaddw xmm2, xmm3 99 psrlw xmm0, 7 100 psrlw xmm2, 7 101 packuswb xmm0, xmm2 102 paddb xmm0, xmm6 103 movdqa [edx], xmm0 104 lea edx, [edx + 16] 105 sub ecx, 16 106 ja convertloop 107 ret 108 } 109 } 110 111 __declspec(naked) 112 void BGRAToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { 113 __asm { 114 mov eax, [esp + 4] /* src_argb */ 115 mov edx, [esp + 8] /* dst_y */ 116 mov ecx, [esp + 12] /* pix */ 117 movdqa xmm7, _kBGRAToY 118 movdqa xmm6, _kAddY16 119 120 convertloop : 121 movdqa xmm0, [eax] 122 movdqa xmm1, [eax + 16] 123 movdqa xmm2, [eax + 32] 124 movdqa xmm3, [eax + 48] 125 pmaddubsw xmm0, xmm7 126 pmaddubsw xmm1, xmm7 127 pmaddubsw xmm2, xmm7 128 pmaddubsw xmm3, xmm7 129 lea eax, [eax + 64] 130 phaddw xmm0, xmm1 131 phaddw xmm2, xmm3 132 psrlw xmm0, 7 133 psrlw xmm2, 7 134 packuswb xmm0, xmm2 135 paddb xmm0, xmm6 136 movdqa [edx], xmm0 137 lea edx, [edx + 16] 138 sub ecx, 16 139 ja convertloop 140 ret 141 } 142 } 143 144 __declspec(naked) 145 void ABGRToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { 146 __asm { 147 mov eax, [esp + 4] /* src_argb */ 148 mov edx, [esp + 8] /* dst_y */ 149 mov ecx, [esp + 12] /* pix */ 150 movdqa xmm7, _kABGRToY 151 movdqa xmm6, _kAddY16 152 153 convertloop : 154 movdqa xmm0, [eax] 155 movdqa xmm1, [eax + 16] 156 movdqa xmm2, [eax + 32] 157 movdqa xmm3, [eax + 48] 158 pmaddubsw xmm0, xmm7 159 pmaddubsw xmm1, xmm7 160 pmaddubsw xmm2, xmm7 161 pmaddubsw xmm3, xmm7 162 lea eax, [eax + 64] 163 phaddw xmm0, xmm1 164 phaddw xmm2, xmm3 165 psrlw xmm0, 7 166 psrlw xmm2, 7 167 packuswb xmm0, xmm2 168 paddb xmm0, xmm6 169 movdqa [edx], xmm0 170 lea edx, [edx + 16] 171 sub ecx, 16 172 ja convertloop 173 ret 174 } 175 } 176 177 __declspec(naked) 178 void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, 179 uint8* dst_u, uint8* dst_v, int width) { 180 __asm { 181 push esi 182 push edi 183 mov eax, [esp + 8 + 4] // src_argb 184 mov esi, [esp + 8 + 8] // src_stride_argb 185 mov edx, [esp + 8 + 12] // dst_u 186 mov edi, [esp + 8 + 16] // dst_v 187 mov ecx, [esp + 8 + 20] // pix 188 movdqa xmm7, _kARGBToU 189 movdqa xmm6, _kARGBToV 190 movdqa xmm5, _kAddUV128 191 sub edi, edx // stride from u to v 192 193 convertloop : 194 /* step 1 - subsample 16x2 argb pixels to 8x1 */ 195 movdqa xmm0, [eax] 196 movdqa xmm1, [eax + 16] 197 movdqa xmm2, [eax + 32] 198 movdqa xmm3, [eax + 48] 199 pavgb xmm0, [eax + esi] 200 pavgb xmm1, [eax + esi + 16] 201 pavgb xmm2, [eax + esi + 32] 202 pavgb xmm3, [eax + esi + 48] 203 lea eax, [eax + 64] 204 movdqa xmm4, xmm0 205 shufps xmm0, xmm1, 0x88 206 shufps xmm4, xmm1, 0xdd 207 pavgb xmm0, xmm4 208 movdqa xmm4, xmm2 209 shufps xmm2, xmm3, 0x88 210 shufps xmm4, xmm3, 0xdd 211 pavgb xmm2, xmm4 212 213 // step 2 - convert to U and V 214 // from here down is very similar to Y code except 215 // instead of 16 different pixels, its 8 pixels of U and 8 of V 216 movdqa xmm1, xmm0 217 movdqa xmm3, xmm2 218 pmaddubsw xmm0, xmm7 // U 219 pmaddubsw xmm2, xmm7 220 pmaddubsw xmm1, xmm6 // V 221 pmaddubsw xmm3, xmm6 222 phaddw xmm0, xmm2 223 phaddw xmm1, xmm3 224 psraw xmm0, 8 225 psraw xmm1, 8 226 packsswb xmm0, xmm1 227 paddb xmm0, xmm5 // -> unsigned 228 229 // step 3 - store 8 U and 8 V values 230 movlps qword ptr [edx], xmm0 // U 231 movhps qword ptr [edx + edi], xmm0 // V 232 lea edx, [edx + 8] 233 sub ecx, 16 234 ja convertloop 235 pop edi 236 pop esi 237 ret 238 } 239 } 240 241 __declspec(naked) 242 void BGRAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, 243 uint8* dst_u, uint8* dst_v, int width) { 244 __asm { 245 push esi 246 push edi 247 mov eax, [esp + 8 + 4] // src_argb 248 mov esi, [esp + 8 + 8] // src_stride_argb 249 mov edx, [esp + 8 + 12] // dst_u 250 mov edi, [esp + 8 + 16] // dst_v 251 mov ecx, [esp + 8 + 20] // pix 252 movdqa xmm7, _kBGRAToU 253 movdqa xmm6, _kBGRAToV 254 movdqa xmm5, _kAddUV128 255 sub edi, edx // stride from u to v 256 257 convertloop : 258 /* step 1 - subsample 16x2 argb pixels to 8x1 */ 259 movdqa xmm0, [eax] 260 movdqa xmm1, [eax + 16] 261 movdqa xmm2, [eax + 32] 262 movdqa xmm3, [eax + 48] 263 pavgb xmm0, [eax + esi] 264 pavgb xmm1, [eax + esi + 16] 265 pavgb xmm2, [eax + esi + 32] 266 pavgb xmm3, [eax + esi + 48] 267 lea eax, [eax + 64] 268 movdqa xmm4, xmm0 269 shufps xmm0, xmm1, 0x88 270 shufps xmm4, xmm1, 0xdd 271 pavgb xmm0, xmm4 272 movdqa xmm4, xmm2 273 shufps xmm2, xmm3, 0x88 274 shufps xmm4, xmm3, 0xdd 275 pavgb xmm2, xmm4 276 277 // step 2 - convert to U and V 278 // from here down is very similar to Y code except 279 // instead of 16 different pixels, its 8 pixels of U and 8 of V 280 movdqa xmm1, xmm0 281 movdqa xmm3, xmm2 282 pmaddubsw xmm0, xmm7 // U 283 pmaddubsw xmm2, xmm7 284 pmaddubsw xmm1, xmm6 // V 285 pmaddubsw xmm3, xmm6 286 phaddw xmm0, xmm2 287 phaddw xmm1, xmm3 288 psraw xmm0, 8 289 psraw xmm1, 8 290 packsswb xmm0, xmm1 291 paddb xmm0, xmm5 // -> unsigned 292 293 // step 3 - store 8 U and 8 V values 294 movlps qword ptr [edx], xmm0 // U 295 movhps qword ptr [edx + edi], xmm0 // V 296 lea edx, [edx + 8] 297 sub ecx, 16 298 ja convertloop 299 pop edi 300 pop esi 301 ret 302 } 303 } 304 305 __declspec(naked) 306 void ABGRToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, 307 uint8* dst_u, uint8* dst_v, int width) { 308 __asm { 309 push esi 310 push edi 311 mov eax, [esp + 8 + 4] // src_argb 312 mov esi, [esp + 8 + 8] // src_stride_argb 313 mov edx, [esp + 8 + 12] // dst_u 314 mov edi, [esp + 8 + 16] // dst_v 315 mov ecx, [esp + 8 + 20] // pix 316 movdqa xmm7, _kABGRToU 317 movdqa xmm6, _kABGRToV 318 movdqa xmm5, _kAddUV128 319 sub edi, edx // stride from u to v 320 321 convertloop : 322 /* step 1 - subsample 16x2 argb pixels to 8x1 */ 323 movdqa xmm0, [eax] 324 movdqa xmm1, [eax + 16] 325 movdqa xmm2, [eax + 32] 326 movdqa xmm3, [eax + 48] 327 pavgb xmm0, [eax + esi] 328 pavgb xmm1, [eax + esi + 16] 329 pavgb xmm2, [eax + esi + 32] 330 pavgb xmm3, [eax + esi + 48] 331 lea eax, [eax + 64] 332 movdqa xmm4, xmm0 333 shufps xmm0, xmm1, 0x88 334 shufps xmm4, xmm1, 0xdd 335 pavgb xmm0, xmm4 336 movdqa xmm4, xmm2 337 shufps xmm2, xmm3, 0x88 338 shufps xmm4, xmm3, 0xdd 339 pavgb xmm2, xmm4 340 341 // step 2 - convert to U and V 342 // from here down is very similar to Y code except 343 // instead of 16 different pixels, its 8 pixels of U and 8 of V 344 movdqa xmm1, xmm0 345 movdqa xmm3, xmm2 346 pmaddubsw xmm0, xmm7 // U 347 pmaddubsw xmm2, xmm7 348 pmaddubsw xmm1, xmm6 // V 349 pmaddubsw xmm3, xmm6 350 phaddw xmm0, xmm2 351 phaddw xmm1, xmm3 352 psraw xmm0, 8 353 psraw xmm1, 8 354 packsswb xmm0, xmm1 355 paddb xmm0, xmm5 // -> unsigned 356 357 // step 3 - store 8 U and 8 V values 358 movlps qword ptr [edx], xmm0 // U 359 movhps qword ptr [edx + edi], xmm0 // V 360 lea edx, [edx + 8] 361 sub ecx, 16 362 ja convertloop 363 pop edi 364 pop esi 365 ret 366 } 367 } 368 369 __declspec(naked) 370 void BG24ToARGBRow_SSSE3(const uint8* src_bg24, uint8* dst_argb, int pix) { 371 __asm { 372 mov eax, [esp + 4] // src_bg24 373 mov edx, [esp + 8] // dst_argb 374 mov ecx, [esp + 12] // pix 375 pcmpeqb xmm7, xmm7 // generate mask 0xff000000 376 pslld xmm7, 24 377 movdqa xmm6, _kShuffleMaskBG24ToARGB 378 379 convertloop : 380 movdqa xmm0, [eax] 381 movdqa xmm1, [eax + 16] 382 movdqa xmm3, [eax + 32] 383 lea eax, [eax + 48] 384 movdqa xmm2, xmm3 385 palignr xmm2, xmm1, 8 // xmm2 = { xmm3[0:3] xmm1[8:15]} 386 pshufb xmm2, xmm6 387 por xmm2, xmm7 388 palignr xmm1, xmm0, 12 // xmm1 = { xmm3[0:7] xmm0[12:15]} 389 pshufb xmm0, xmm6 390 movdqa [edx + 32], xmm2 391 por xmm0, xmm7 392 pshufb xmm1, xmm6 393 movdqa [edx], xmm0 394 por xmm1, xmm7 395 palignr xmm3, xmm3, 4 // xmm3 = { xmm3[4:15]} 396 pshufb xmm3, xmm6 397 movdqa [edx + 16], xmm1 398 por xmm3, xmm7 399 movdqa [edx + 48], xmm3 400 lea edx, [edx + 64] 401 sub ecx, 16 402 ja convertloop 403 ret 404 } 405 } 406 407 __declspec(naked) 408 void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb, 409 int pix) { 410 __asm { 411 mov eax, [esp + 4] // src_raw 412 mov edx, [esp + 8] // dst_argb 413 mov ecx, [esp + 12] // pix 414 pcmpeqb xmm7, xmm7 // generate mask 0xff000000 415 pslld xmm7, 24 416 movdqa xmm6, _kShuffleMaskRAWToARGB 417 418 convertloop : 419 movdqa xmm0, [eax] 420 movdqa xmm1, [eax + 16] 421 movdqa xmm3, [eax + 32] 422 lea eax, [eax + 48] 423 movdqa xmm2, xmm3 424 palignr xmm2, xmm1, 8 // xmm2 = { xmm3[0:3] xmm1[8:15]} 425 pshufb xmm2, xmm6 426 por xmm2, xmm7 427 palignr xmm1, xmm0, 12 // xmm1 = { xmm3[0:7] xmm0[12:15]} 428 pshufb xmm0, xmm6 429 movdqa [edx + 32], xmm2 430 por xmm0, xmm7 431 pshufb xmm1, xmm6 432 movdqa [edx], xmm0 433 por xmm1, xmm7 434 palignr xmm3, xmm3, 4 // xmm3 = { xmm3[4:15]} 435 pshufb xmm3, xmm6 436 movdqa [edx + 16], xmm1 437 por xmm3, xmm7 438 movdqa [edx + 48], xmm3 439 lea edx, [edx + 64] 440 sub ecx, 16 441 ja convertloop 442 ret 443 } 444 } 445 446 __declspec(naked) 447 void FastConvertYUVToRGB32Row(const uint8* y_buf, 448 const uint8* u_buf, 449 const uint8* v_buf, 450 uint8* rgb_buf, 451 int width) { 452 __asm { 453 pushad 454 mov edx, [esp + 32 + 4] 455 mov edi, [esp + 32 + 8] 456 mov esi, [esp + 32 + 12] 457 mov ebp, [esp + 32 + 16] 458 mov ecx, [esp + 32 + 20] 459 460 convertloop : 461 movzx eax, byte ptr [edi] 462 lea edi, [edi + 1] 463 movzx ebx, byte ptr [esi] 464 lea esi, [esi + 1] 465 movq mm0, [_kCoefficientsRgbY + 2048 + 8 * eax] 466 movzx eax, byte ptr [edx] 467 paddsw mm0, [_kCoefficientsRgbY + 4096 + 8 * ebx] 468 movzx ebx, byte ptr [edx + 1] 469 movq mm1, [_kCoefficientsRgbY + 8 * eax] 470 lea edx, [edx + 2] 471 movq mm2, [_kCoefficientsRgbY + 8 * ebx] 472 paddsw mm1, mm0 473 paddsw mm2, mm0 474 psraw mm1, 6 475 psraw mm2, 6 476 packuswb mm1, mm2 477 movntq [ebp], mm1 478 lea ebp, [ebp + 8] 479 sub ecx, 2 480 ja convertloop 481 482 popad 483 ret 484 } 485 } 486 487 __declspec(naked) 488 void FastConvertYUVToBGRARow(const uint8* y_buf, 489 const uint8* u_buf, 490 const uint8* v_buf, 491 uint8* rgb_buf, 492 int width) { 493 __asm { 494 pushad 495 mov edx, [esp + 32 + 4] 496 mov edi, [esp + 32 + 8] 497 mov esi, [esp + 32 + 12] 498 mov ebp, [esp + 32 + 16] 499 mov ecx, [esp + 32 + 20] 500 501 convertloop : 502 movzx eax, byte ptr [edi] 503 lea edi, [edi + 1] 504 movzx ebx, byte ptr [esi] 505 lea esi, [esi + 1] 506 movq mm0, [_kCoefficientsBgraY + 2048 + 8 * eax] 507 movzx eax, byte ptr [edx] 508 paddsw mm0, [_kCoefficientsBgraY + 4096 + 8 * ebx] 509 movzx ebx, byte ptr [edx + 1] 510 movq mm1, [_kCoefficientsBgraY + 8 * eax] 511 lea edx, [edx + 2] 512 movq mm2, [_kCoefficientsBgraY + 8 * ebx] 513 paddsw mm1, mm0 514 paddsw mm2, mm0 515 psraw mm1, 6 516 psraw mm2, 6 517 packuswb mm1, mm2 518 movntq [ebp], mm1 519 lea ebp, [ebp + 8] 520 sub ecx, 2 521 ja convertloop 522 523 popad 524 ret 525 } 526 } 527 528 __declspec(naked) 529 void FastConvertYUVToABGRRow(const uint8* y_buf, 530 const uint8* u_buf, 531 const uint8* v_buf, 532 uint8* rgb_buf, 533 int width) { 534 __asm { 535 pushad 536 mov edx, [esp + 32 + 4] 537 mov edi, [esp + 32 + 8] 538 mov esi, [esp + 32 + 12] 539 mov ebp, [esp + 32 + 16] 540 mov ecx, [esp + 32 + 20] 541 542 convertloop : 543 movzx eax, byte ptr [edi] 544 lea edi, [edi + 1] 545 movzx ebx, byte ptr [esi] 546 lea esi, [esi + 1] 547 movq mm0, [_kCoefficientsAbgrY + 2048 + 8 * eax] 548 movzx eax, byte ptr [edx] 549 paddsw mm0, [_kCoefficientsAbgrY + 4096 + 8 * ebx] 550 movzx ebx, byte ptr [edx + 1] 551 movq mm1, [_kCoefficientsAbgrY + 8 * eax] 552 lea edx, [edx + 2] 553 movq mm2, [_kCoefficientsAbgrY + 8 * ebx] 554 paddsw mm1, mm0 555 paddsw mm2, mm0 556 psraw mm1, 6 557 psraw mm2, 6 558 packuswb mm1, mm2 559 movntq [ebp], mm1 560 lea ebp, [ebp + 8] 561 sub ecx, 2 562 ja convertloop 563 564 popad 565 ret 566 } 567 } 568 569 __declspec(naked) 570 void FastConvertYUV444ToRGB32Row(const uint8* y_buf, 571 const uint8* u_buf, 572 const uint8* v_buf, 573 uint8* rgb_buf, 574 int width) { 575 __asm { 576 pushad 577 mov edx, [esp + 32 + 4] // Y 578 mov edi, [esp + 32 + 8] // U 579 mov esi, [esp + 32 + 12] // V 580 mov ebp, [esp + 32 + 16] // rgb 581 mov ecx, [esp + 32 + 20] // width 582 583 convertloop : 584 movzx eax, byte ptr [edi] 585 lea edi, [edi + 1] 586 movzx ebx, byte ptr [esi] 587 lea esi, [esi + 1] 588 movq mm0, [_kCoefficientsRgbY + 2048 + 8 * eax] 589 movzx eax, byte ptr [edx] 590 paddsw mm0, [_kCoefficientsRgbY + 4096 + 8 * ebx] 591 lea edx, [edx + 1] 592 paddsw mm0, [_kCoefficientsRgbY + 8 * eax] 593 psraw mm0, 6 594 packuswb mm0, mm0 595 movd [ebp], mm0 596 lea ebp, [ebp + 4] 597 sub ecx, 1 598 ja convertloop 599 600 popad 601 ret 602 } 603 } 604 605 __declspec(naked) 606 void FastConvertYToRGB32Row(const uint8* y_buf, 607 uint8* rgb_buf, 608 int width) { 609 __asm { 610 push ebx 611 mov eax, [esp + 4 + 4] // Y 612 mov edx, [esp + 4 + 8] // rgb 613 mov ecx, [esp + 4 + 12] // width 614 615 convertloop : 616 movzx ebx, byte ptr [eax] 617 movq mm0, [_kCoefficientsRgbY + 8 * ebx] 618 psraw mm0, 6 619 movzx ebx, byte ptr [eax + 1] 620 movq mm1, [_kCoefficientsRgbY + 8 * ebx] 621 psraw mm1, 6 622 packuswb mm0, mm1 623 lea eax, [eax + 2] 624 movq [edx], mm0 625 lea edx, [edx + 8] 626 sub ecx, 2 627 ja convertloop 628 629 pop ebx 630 ret 631 } 632 } 633 634 #endif 635 636 } // extern "C" 637