1 #!/usr/local/bin/perl 2 3 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 4 push(@INC,"${dir}","${dir}../../perlasm"); 5 require "x86asm.pl"; 6 7 &asm_init($ARGV[0],$0); 8 9 $sse2=0; 10 for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } 11 12 &external_label("OPENSSL_ia32cap_P") if ($sse2); 13 14 &bn_mul_add_words("bn_mul_add_words"); 15 &bn_mul_words("bn_mul_words"); 16 &bn_sqr_words("bn_sqr_words"); 17 &bn_div_words("bn_div_words"); 18 &bn_add_words("bn_add_words"); 19 &bn_sub_words("bn_sub_words"); 20 &bn_sub_part_words("bn_sub_part_words"); 21 22 &asm_finish(); 23 24 sub bn_mul_add_words 25 { 26 local($name)=@_; 27 28 &function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":""); 29 30 $r="eax"; 31 $a="edx"; 32 $c="ecx"; 33 34 if ($sse2) { 35 &picmeup("eax","OPENSSL_ia32cap_P"); 36 &bt(&DWP(0,"eax"),26); 37 &jnc(&label("maw_non_sse2")); 38 39 &mov($r,&wparam(0)); 40 &mov($a,&wparam(1)); 41 &mov($c,&wparam(2)); 42 &movd("mm0",&wparam(3)); # mm0 = w 43 &pxor("mm1","mm1"); # mm1 = carry_in 44 &jmp(&label("maw_sse2_entry")); 45 46 &set_label("maw_sse2_unrolled",16); 47 &movd("mm3",&DWP(0,$r,"",0)); # mm3 = r[0] 48 &paddq("mm1","mm3"); # mm1 = carry_in + r[0] 49 &movd("mm2",&DWP(0,$a,"",0)); # mm2 = a[0] 50 &pmuludq("mm2","mm0"); # mm2 = w*a[0] 51 &movd("mm4",&DWP(4,$a,"",0)); # mm4 = a[1] 52 &pmuludq("mm4","mm0"); # mm4 = w*a[1] 53 &movd("mm6",&DWP(8,$a,"",0)); # mm6 = a[2] 54 &pmuludq("mm6","mm0"); # mm6 = w*a[2] 55 &movd("mm7",&DWP(12,$a,"",0)); # mm7 = a[3] 56 &pmuludq("mm7","mm0"); # mm7 = w*a[3] 57 &paddq("mm1","mm2"); # mm1 = carry_in + r[0] + w*a[0] 58 &movd("mm3",&DWP(4,$r,"",0)); # mm3 = r[1] 59 &paddq("mm3","mm4"); # mm3 = r[1] + w*a[1] 60 &movd("mm5",&DWP(8,$r,"",0)); # mm5 = r[2] 61 &paddq("mm5","mm6"); # mm5 = r[2] + w*a[2] 62 &movd("mm4",&DWP(12,$r,"",0)); # mm4 = r[3] 63 &paddq("mm7","mm4"); # mm7 = r[3] + w*a[3] 64 &movd(&DWP(0,$r,"",0),"mm1"); 65 &movd("mm2",&DWP(16,$a,"",0)); # mm2 = a[4] 66 &pmuludq("mm2","mm0"); # mm2 = w*a[4] 67 &psrlq("mm1",32); # mm1 = carry0 68 &movd("mm4",&DWP(20,$a,"",0)); # mm4 = a[5] 69 &pmuludq("mm4","mm0"); # mm4 = w*a[5] 70 &paddq("mm1","mm3"); # mm1 = carry0 + r[1] + w*a[1] 71 &movd("mm6",&DWP(24,$a,"",0)); # mm6 = a[6] 72 &pmuludq("mm6","mm0"); # mm6 = w*a[6] 73 &movd(&DWP(4,$r,"",0),"mm1"); 74 &psrlq("mm1",32); # mm1 = carry1 75 &movd("mm3",&DWP(28,$a,"",0)); # mm3 = a[7] 76 &add($a,32); 77 &pmuludq("mm3","mm0"); # mm3 = w*a[7] 78 &paddq("mm1","mm5"); # mm1 = carry1 + r[2] + w*a[2] 79 &movd("mm5",&DWP(16,$r,"",0)); # mm5 = r[4] 80 &paddq("mm2","mm5"); # mm2 = r[4] + w*a[4] 81 &movd(&DWP(8,$r,"",0),"mm1"); 82 &psrlq("mm1",32); # mm1 = carry2 83 &paddq("mm1","mm7"); # mm1 = carry2 + r[3] + w*a[3] 84 &movd("mm5",&DWP(20,$r,"",0)); # mm5 = r[5] 85 &paddq("mm4","mm5"); # mm4 = r[5] + w*a[5] 86 &movd(&DWP(12,$r,"",0),"mm1"); 87 &psrlq("mm1",32); # mm1 = carry3 88 &paddq("mm1","mm2"); # mm1 = carry3 + r[4] + w*a[4] 89 &movd("mm5",&DWP(24,$r,"",0)); # mm5 = r[6] 90 &paddq("mm6","mm5"); # mm6 = r[6] + w*a[6] 91 &movd(&DWP(16,$r,"",0),"mm1"); 92 &psrlq("mm1",32); # mm1 = carry4 93 &paddq("mm1","mm4"); # mm1 = carry4 + r[5] + w*a[5] 94 &movd("mm5",&DWP(28,$r,"",0)); # mm5 = r[7] 95 &paddq("mm3","mm5"); # mm3 = r[7] + w*a[7] 96 &movd(&DWP(20,$r,"",0),"mm1"); 97 &psrlq("mm1",32); # mm1 = carry5 98 &paddq("mm1","mm6"); # mm1 = carry5 + r[6] + w*a[6] 99 &movd(&DWP(24,$r,"",0),"mm1"); 100 &psrlq("mm1",32); # mm1 = carry6 101 &paddq("mm1","mm3"); # mm1 = carry6 + r[7] + w*a[7] 102 &movd(&DWP(28,$r,"",0),"mm1"); 103 &lea($r,&DWP(32,$r)); 104 &psrlq("mm1",32); # mm1 = carry_out 105 106 &sub($c,8); 107 &jz(&label("maw_sse2_exit")); 108 &set_label("maw_sse2_entry"); 109 &test($c,0xfffffff8); 110 &jnz(&label("maw_sse2_unrolled")); 111 112 &set_label("maw_sse2_loop",4); 113 &movd("mm2",&DWP(0,$a)); # mm2 = a[i] 114 &movd("mm3",&DWP(0,$r)); # mm3 = r[i] 115 &pmuludq("mm2","mm0"); # a[i] *= w 116 &lea($a,&DWP(4,$a)); 117 &paddq("mm1","mm3"); # carry += r[i] 118 &paddq("mm1","mm2"); # carry += a[i]*w 119 &movd(&DWP(0,$r),"mm1"); # r[i] = carry_low 120 &sub($c,1); 121 &psrlq("mm1",32); # carry = carry_high 122 &lea($r,&DWP(4,$r)); 123 &jnz(&label("maw_sse2_loop")); 124 &set_label("maw_sse2_exit"); 125 &movd("eax","mm1"); # c = carry_out 126 &emms(); 127 &ret(); 128 129 &set_label("maw_non_sse2",16); 130 } 131 132 # function_begin prologue 133 &push("ebp"); 134 &push("ebx"); 135 &push("esi"); 136 &push("edi"); 137 138 &comment(""); 139 $Low="eax"; 140 $High="edx"; 141 $a="ebx"; 142 $w="ebp"; 143 $r="edi"; 144 $c="esi"; 145 146 &xor($c,$c); # clear carry 147 &mov($r,&wparam(0)); # 148 149 &mov("ecx",&wparam(2)); # 150 &mov($a,&wparam(1)); # 151 152 &and("ecx",0xfffffff8); # num / 8 153 &mov($w,&wparam(3)); # 154 155 &push("ecx"); # Up the stack for a tmp variable 156 157 &jz(&label("maw_finish")); 158 159 &set_label("maw_loop",16); 160 161 for ($i=0; $i<32; $i+=4) 162 { 163 &comment("Round $i"); 164 165 &mov("eax",&DWP($i,$a)); # *a 166 &mul($w); # *a * w 167 &add("eax",$c); # L(t)+= c 168 &adc("edx",0); # H(t)+=carry 169 &add("eax",&DWP($i,$r)); # L(t)+= *r 170 &adc("edx",0); # H(t)+=carry 171 &mov(&DWP($i,$r),"eax"); # *r= L(t); 172 &mov($c,"edx"); # c= H(t); 173 } 174 175 &comment(""); 176 &sub("ecx",8); 177 &lea($a,&DWP(32,$a)); 178 &lea($r,&DWP(32,$r)); 179 &jnz(&label("maw_loop")); 180 181 &set_label("maw_finish",0); 182 &mov("ecx",&wparam(2)); # get num 183 &and("ecx",7); 184 &jnz(&label("maw_finish2")); # helps branch prediction 185 &jmp(&label("maw_end")); 186 187 &set_label("maw_finish2",1); 188 for ($i=0; $i<7; $i++) 189 { 190 &comment("Tail Round $i"); 191 &mov("eax",&DWP($i*4,$a)); # *a 192 &mul($w); # *a * w 193 &add("eax",$c); # L(t)+=c 194 &adc("edx",0); # H(t)+=carry 195 &add("eax",&DWP($i*4,$r)); # L(t)+= *r 196 &adc("edx",0); # H(t)+=carry 197 &dec("ecx") if ($i != 7-1); 198 &mov(&DWP($i*4,$r),"eax"); # *r= L(t); 199 &mov($c,"edx"); # c= H(t); 200 &jz(&label("maw_end")) if ($i != 7-1); 201 } 202 &set_label("maw_end",0); 203 &mov("eax",$c); 204 205 &pop("ecx"); # clear variable from 206 207 &function_end($name); 208 } 209 210 sub bn_mul_words 211 { 212 local($name)=@_; 213 214 &function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":""); 215 216 $r="eax"; 217 $a="edx"; 218 $c="ecx"; 219 220 if ($sse2) { 221 &picmeup("eax","OPENSSL_ia32cap_P"); 222 &bt(&DWP(0,"eax"),26); 223 &jnc(&label("mw_non_sse2")); 224 225 &mov($r,&wparam(0)); 226 &mov($a,&wparam(1)); 227 &mov($c,&wparam(2)); 228 &movd("mm0",&wparam(3)); # mm0 = w 229 &pxor("mm1","mm1"); # mm1 = carry = 0 230 231 &set_label("mw_sse2_loop",16); 232 &movd("mm2",&DWP(0,$a)); # mm2 = a[i] 233 &pmuludq("mm2","mm0"); # a[i] *= w 234 &lea($a,&DWP(4,$a)); 235 &paddq("mm1","mm2"); # carry += a[i]*w 236 &movd(&DWP(0,$r),"mm1"); # r[i] = carry_low 237 &sub($c,1); 238 &psrlq("mm1",32); # carry = carry_high 239 &lea($r,&DWP(4,$r)); 240 &jnz(&label("mw_sse2_loop")); 241 242 &movd("eax","mm1"); # return carry 243 &emms(); 244 &ret(); 245 &set_label("mw_non_sse2",16); 246 } 247 248 # function_begin prologue 249 &push("ebp"); 250 &push("ebx"); 251 &push("esi"); 252 &push("edi"); 253 254 &comment(""); 255 $Low="eax"; 256 $High="edx"; 257 $a="ebx"; 258 $w="ecx"; 259 $r="edi"; 260 $c="esi"; 261 $num="ebp"; 262 263 &xor($c,$c); # clear carry 264 &mov($r,&wparam(0)); # 265 &mov($a,&wparam(1)); # 266 &mov($num,&wparam(2)); # 267 &mov($w,&wparam(3)); # 268 269 &and($num,0xfffffff8); # num / 8 270 &jz(&label("mw_finish")); 271 272 &set_label("mw_loop",0); 273 for ($i=0; $i<32; $i+=4) 274 { 275 &comment("Round $i"); 276 277 &mov("eax",&DWP($i,$a,"",0)); # *a 278 &mul($w); # *a * w 279 &add("eax",$c); # L(t)+=c 280 # XXX 281 282 &adc("edx",0); # H(t)+=carry 283 &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t); 284 285 &mov($c,"edx"); # c= H(t); 286 } 287 288 &comment(""); 289 &add($a,32); 290 &add($r,32); 291 &sub($num,8); 292 &jz(&label("mw_finish")); 293 &jmp(&label("mw_loop")); 294 295 &set_label("mw_finish",0); 296 &mov($num,&wparam(2)); # get num 297 &and($num,7); 298 &jnz(&label("mw_finish2")); 299 &jmp(&label("mw_end")); 300 301 &set_label("mw_finish2",1); 302 for ($i=0; $i<7; $i++) 303 { 304 &comment("Tail Round $i"); 305 &mov("eax",&DWP($i*4,$a,"",0));# *a 306 &mul($w); # *a * w 307 &add("eax",$c); # L(t)+=c 308 # XXX 309 &adc("edx",0); # H(t)+=carry 310 &mov(&DWP($i*4,$r,"",0),"eax");# *r= L(t); 311 &mov($c,"edx"); # c= H(t); 312 &dec($num) if ($i != 7-1); 313 &jz(&label("mw_end")) if ($i != 7-1); 314 } 315 &set_label("mw_end",0); 316 &mov("eax",$c); 317 318 &function_end($name); 319 } 320 321 sub bn_sqr_words 322 { 323 local($name)=@_; 324 325 &function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":""); 326 327 $r="eax"; 328 $a="edx"; 329 $c="ecx"; 330 331 if ($sse2) { 332 &picmeup("eax","OPENSSL_ia32cap_P"); 333 &bt(&DWP(0,"eax"),26); 334 &jnc(&label("sqr_non_sse2")); 335 336 &mov($r,&wparam(0)); 337 &mov($a,&wparam(1)); 338 &mov($c,&wparam(2)); 339 340 &set_label("sqr_sse2_loop",16); 341 &movd("mm0",&DWP(0,$a)); # mm0 = a[i] 342 &pmuludq("mm0","mm0"); # a[i] *= a[i] 343 &lea($a,&DWP(4,$a)); # a++ 344 &movq(&QWP(0,$r),"mm0"); # r[i] = a[i]*a[i] 345 &sub($c,1); 346 &lea($r,&DWP(8,$r)); # r += 2 347 &jnz(&label("sqr_sse2_loop")); 348 349 &emms(); 350 &ret(); 351 &set_label("sqr_non_sse2",16); 352 } 353 354 # function_begin prologue 355 &push("ebp"); 356 &push("ebx"); 357 &push("esi"); 358 &push("edi"); 359 360 &comment(""); 361 $r="esi"; 362 $a="edi"; 363 $num="ebx"; 364 365 &mov($r,&wparam(0)); # 366 &mov($a,&wparam(1)); # 367 &mov($num,&wparam(2)); # 368 369 &and($num,0xfffffff8); # num / 8 370 &jz(&label("sw_finish")); 371 372 &set_label("sw_loop",0); 373 for ($i=0; $i<32; $i+=4) 374 { 375 &comment("Round $i"); 376 &mov("eax",&DWP($i,$a,"",0)); # *a 377 # XXX 378 &mul("eax"); # *a * *a 379 &mov(&DWP($i*2,$r,"",0),"eax"); # 380 &mov(&DWP($i*2+4,$r,"",0),"edx");# 381 } 382 383 &comment(""); 384 &add($a,32); 385 &add($r,64); 386 &sub($num,8); 387 &jnz(&label("sw_loop")); 388 389 &set_label("sw_finish",0); 390 &mov($num,&wparam(2)); # get num 391 &and($num,7); 392 &jz(&label("sw_end")); 393 394 for ($i=0; $i<7; $i++) 395 { 396 &comment("Tail Round $i"); 397 &mov("eax",&DWP($i*4,$a,"",0)); # *a 398 # XXX 399 &mul("eax"); # *a * *a 400 &mov(&DWP($i*8,$r,"",0),"eax"); # 401 &dec($num) if ($i != 7-1); 402 &mov(&DWP($i*8+4,$r,"",0),"edx"); 403 &jz(&label("sw_end")) if ($i != 7-1); 404 } 405 &set_label("sw_end",0); 406 407 &function_end($name); 408 } 409 410 sub bn_div_words 411 { 412 local($name)=@_; 413 414 &function_begin_B($name,""); 415 &mov("edx",&wparam(0)); # 416 &mov("eax",&wparam(1)); # 417 &mov("ecx",&wparam(2)); # 418 &div("ecx"); 419 &ret(); 420 &function_end_B($name); 421 } 422 423 sub bn_add_words 424 { 425 local($name)=@_; 426 427 &function_begin($name,""); 428 429 &comment(""); 430 $a="esi"; 431 $b="edi"; 432 $c="eax"; 433 $r="ebx"; 434 $tmp1="ecx"; 435 $tmp2="edx"; 436 $num="ebp"; 437 438 &mov($r,&wparam(0)); # get r 439 &mov($a,&wparam(1)); # get a 440 &mov($b,&wparam(2)); # get b 441 &mov($num,&wparam(3)); # get num 442 &xor($c,$c); # clear carry 443 &and($num,0xfffffff8); # num / 8 444 445 &jz(&label("aw_finish")); 446 447 &set_label("aw_loop",0); 448 for ($i=0; $i<8; $i++) 449 { 450 &comment("Round $i"); 451 452 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 453 &mov($tmp2,&DWP($i*4,$b,"",0)); # *b 454 &add($tmp1,$c); 455 &mov($c,0); 456 &adc($c,$c); 457 &add($tmp1,$tmp2); 458 &adc($c,0); 459 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 460 } 461 462 &comment(""); 463 &add($a,32); 464 &add($b,32); 465 &add($r,32); 466 &sub($num,8); 467 &jnz(&label("aw_loop")); 468 469 &set_label("aw_finish",0); 470 &mov($num,&wparam(3)); # get num 471 &and($num,7); 472 &jz(&label("aw_end")); 473 474 for ($i=0; $i<7; $i++) 475 { 476 &comment("Tail Round $i"); 477 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 478 &mov($tmp2,&DWP($i*4,$b,"",0));# *b 479 &add($tmp1,$c); 480 &mov($c,0); 481 &adc($c,$c); 482 &add($tmp1,$tmp2); 483 &adc($c,0); 484 &dec($num) if ($i != 6); 485 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 486 &jz(&label("aw_end")) if ($i != 6); 487 } 488 &set_label("aw_end",0); 489 490 # &mov("eax",$c); # $c is "eax" 491 492 &function_end($name); 493 } 494 495 sub bn_sub_words 496 { 497 local($name)=@_; 498 499 &function_begin($name,""); 500 501 &comment(""); 502 $a="esi"; 503 $b="edi"; 504 $c="eax"; 505 $r="ebx"; 506 $tmp1="ecx"; 507 $tmp2="edx"; 508 $num="ebp"; 509 510 &mov($r,&wparam(0)); # get r 511 &mov($a,&wparam(1)); # get a 512 &mov($b,&wparam(2)); # get b 513 &mov($num,&wparam(3)); # get num 514 &xor($c,$c); # clear carry 515 &and($num,0xfffffff8); # num / 8 516 517 &jz(&label("aw_finish")); 518 519 &set_label("aw_loop",0); 520 for ($i=0; $i<8; $i++) 521 { 522 &comment("Round $i"); 523 524 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 525 &mov($tmp2,&DWP($i*4,$b,"",0)); # *b 526 &sub($tmp1,$c); 527 &mov($c,0); 528 &adc($c,$c); 529 &sub($tmp1,$tmp2); 530 &adc($c,0); 531 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 532 } 533 534 &comment(""); 535 &add($a,32); 536 &add($b,32); 537 &add($r,32); 538 &sub($num,8); 539 &jnz(&label("aw_loop")); 540 541 &set_label("aw_finish",0); 542 &mov($num,&wparam(3)); # get num 543 &and($num,7); 544 &jz(&label("aw_end")); 545 546 for ($i=0; $i<7; $i++) 547 { 548 &comment("Tail Round $i"); 549 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 550 &mov($tmp2,&DWP($i*4,$b,"",0));# *b 551 &sub($tmp1,$c); 552 &mov($c,0); 553 &adc($c,$c); 554 &sub($tmp1,$tmp2); 555 &adc($c,0); 556 &dec($num) if ($i != 6); 557 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 558 &jz(&label("aw_end")) if ($i != 6); 559 } 560 &set_label("aw_end",0); 561 562 # &mov("eax",$c); # $c is "eax" 563 564 &function_end($name); 565 } 566 567 sub bn_sub_part_words 568 { 569 local($name)=@_; 570 571 &function_begin($name,""); 572 573 &comment(""); 574 $a="esi"; 575 $b="edi"; 576 $c="eax"; 577 $r="ebx"; 578 $tmp1="ecx"; 579 $tmp2="edx"; 580 $num="ebp"; 581 582 &mov($r,&wparam(0)); # get r 583 &mov($a,&wparam(1)); # get a 584 &mov($b,&wparam(2)); # get b 585 &mov($num,&wparam(3)); # get num 586 &xor($c,$c); # clear carry 587 &and($num,0xfffffff8); # num / 8 588 589 &jz(&label("aw_finish")); 590 591 &set_label("aw_loop",0); 592 for ($i=0; $i<8; $i++) 593 { 594 &comment("Round $i"); 595 596 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 597 &mov($tmp2,&DWP($i*4,$b,"",0)); # *b 598 &sub($tmp1,$c); 599 &mov($c,0); 600 &adc($c,$c); 601 &sub($tmp1,$tmp2); 602 &adc($c,0); 603 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 604 } 605 606 &comment(""); 607 &add($a,32); 608 &add($b,32); 609 &add($r,32); 610 &sub($num,8); 611 &jnz(&label("aw_loop")); 612 613 &set_label("aw_finish",0); 614 &mov($num,&wparam(3)); # get num 615 &and($num,7); 616 &jz(&label("aw_end")); 617 618 for ($i=0; $i<7; $i++) 619 { 620 &comment("Tail Round $i"); 621 &mov($tmp1,&DWP(0,$a,"",0)); # *a 622 &mov($tmp2,&DWP(0,$b,"",0));# *b 623 &sub($tmp1,$c); 624 &mov($c,0); 625 &adc($c,$c); 626 &sub($tmp1,$tmp2); 627 &adc($c,0); 628 &mov(&DWP(0,$r,"",0),$tmp1); # *r 629 &add($a, 4); 630 &add($b, 4); 631 &add($r, 4); 632 &dec($num) if ($i != 6); 633 &jz(&label("aw_end")) if ($i != 6); 634 } 635 &set_label("aw_end",0); 636 637 &cmp(&wparam(4),0); 638 &je(&label("pw_end")); 639 640 &mov($num,&wparam(4)); # get dl 641 &cmp($num,0); 642 &je(&label("pw_end")); 643 &jge(&label("pw_pos")); 644 645 &comment("pw_neg"); 646 &mov($tmp2,0); 647 &sub($tmp2,$num); 648 &mov($num,$tmp2); 649 &and($num,0xfffffff8); # num / 8 650 &jz(&label("pw_neg_finish")); 651 652 &set_label("pw_neg_loop",0); 653 for ($i=0; $i<8; $i++) 654 { 655 &comment("dl<0 Round $i"); 656 657 &mov($tmp1,0); 658 &mov($tmp2,&DWP($i*4,$b,"",0)); # *b 659 &sub($tmp1,$c); 660 &mov($c,0); 661 &adc($c,$c); 662 &sub($tmp1,$tmp2); 663 &adc($c,0); 664 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 665 } 666 667 &comment(""); 668 &add($b,32); 669 &add($r,32); 670 &sub($num,8); 671 &jnz(&label("pw_neg_loop")); 672 673 &set_label("pw_neg_finish",0); 674 &mov($tmp2,&wparam(4)); # get dl 675 &mov($num,0); 676 &sub($num,$tmp2); 677 &and($num,7); 678 &jz(&label("pw_end")); 679 680 for ($i=0; $i<7; $i++) 681 { 682 &comment("dl<0 Tail Round $i"); 683 &mov($tmp1,0); 684 &mov($tmp2,&DWP($i*4,$b,"",0));# *b 685 &sub($tmp1,$c); 686 &mov($c,0); 687 &adc($c,$c); 688 &sub($tmp1,$tmp2); 689 &adc($c,0); 690 &dec($num) if ($i != 6); 691 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 692 &jz(&label("pw_end")) if ($i != 6); 693 } 694 695 &jmp(&label("pw_end")); 696 697 &set_label("pw_pos",0); 698 699 &and($num,0xfffffff8); # num / 8 700 &jz(&label("pw_pos_finish")); 701 702 &set_label("pw_pos_loop",0); 703 704 for ($i=0; $i<8; $i++) 705 { 706 &comment("dl>0 Round $i"); 707 708 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 709 &sub($tmp1,$c); 710 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 711 &jnc(&label("pw_nc".$i)); 712 } 713 714 &comment(""); 715 &add($a,32); 716 &add($r,32); 717 &sub($num,8); 718 &jnz(&label("pw_pos_loop")); 719 720 &set_label("pw_pos_finish",0); 721 &mov($num,&wparam(4)); # get dl 722 &and($num,7); 723 &jz(&label("pw_end")); 724 725 for ($i=0; $i<7; $i++) 726 { 727 &comment("dl>0 Tail Round $i"); 728 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 729 &sub($tmp1,$c); 730 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 731 &jnc(&label("pw_tail_nc".$i)); 732 &dec($num) if ($i != 6); 733 &jz(&label("pw_end")) if ($i != 6); 734 } 735 &mov($c,1); 736 &jmp(&label("pw_end")); 737 738 &set_label("pw_nc_loop",0); 739 for ($i=0; $i<8; $i++) 740 { 741 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 742 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 743 &set_label("pw_nc".$i,0); 744 } 745 746 &comment(""); 747 &add($a,32); 748 &add($r,32); 749 &sub($num,8); 750 &jnz(&label("pw_nc_loop")); 751 752 &mov($num,&wparam(4)); # get dl 753 &and($num,7); 754 &jz(&label("pw_nc_end")); 755 756 for ($i=0; $i<7; $i++) 757 { 758 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 759 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 760 &set_label("pw_tail_nc".$i,0); 761 &dec($num) if ($i != 6); 762 &jz(&label("pw_nc_end")) if ($i != 6); 763 } 764 765 &set_label("pw_nc_end",0); 766 &mov($c,0); 767 768 &set_label("pw_end",0); 769 770 # &mov("eax",$c); # $c is "eax" 771 772 &function_end($name); 773 } 774 775