1 #!/usr/bin/env perl 2 3 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 4 push(@INC,"${dir}","${dir}../../../perlasm"); 5 require "x86asm.pl"; 6 7 $output = pop; 8 open STDOUT,">$output"; 9 10 &asm_init($ARGV[0]); 11 12 $sse2=0; 13 for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } 14 15 &external_label("OPENSSL_ia32cap_P") if ($sse2); 16 17 &bn_mul_add_words("bn_mul_add_words"); 18 &bn_mul_words("bn_mul_words"); 19 &bn_sqr_words("bn_sqr_words"); 20 &bn_div_words("bn_div_words"); 21 &bn_add_words("bn_add_words"); 22 &bn_sub_words("bn_sub_words"); 23 &bn_sub_part_words("bn_sub_part_words"); 24 25 &asm_finish(); 26 27 close STDOUT; 28 29 sub bn_mul_add_words 30 { 31 local($name)=@_; 32 33 &function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":""); 34 35 $r="eax"; 36 $a="edx"; 37 $c="ecx"; 38 39 if ($sse2) { 40 &picmeup("eax","OPENSSL_ia32cap_P"); 41 &bt(&DWP(0,"eax"),26); 42 &jnc(&label("maw_non_sse2")); 43 44 &mov($r,&wparam(0)); 45 &mov($a,&wparam(1)); 46 &mov($c,&wparam(2)); 47 &movd("mm0",&wparam(3)); # mm0 = w 48 &pxor("mm1","mm1"); # mm1 = carry_in 49 &jmp(&label("maw_sse2_entry")); 50 51 &set_label("maw_sse2_unrolled",16); 52 &movd("mm3",&DWP(0,$r,"",0)); # mm3 = r[0] 53 &paddq("mm1","mm3"); # mm1 = carry_in + r[0] 54 &movd("mm2",&DWP(0,$a,"",0)); # mm2 = a[0] 55 &pmuludq("mm2","mm0"); # mm2 = w*a[0] 56 &movd("mm4",&DWP(4,$a,"",0)); # mm4 = a[1] 57 &pmuludq("mm4","mm0"); # mm4 = w*a[1] 58 &movd("mm6",&DWP(8,$a,"",0)); # mm6 = a[2] 59 &pmuludq("mm6","mm0"); # mm6 = w*a[2] 60 &movd("mm7",&DWP(12,$a,"",0)); # mm7 = a[3] 61 &pmuludq("mm7","mm0"); # mm7 = w*a[3] 62 &paddq("mm1","mm2"); # mm1 = carry_in + r[0] + w*a[0] 63 &movd("mm3",&DWP(4,$r,"",0)); # mm3 = r[1] 64 &paddq("mm3","mm4"); # mm3 = r[1] + w*a[1] 65 &movd("mm5",&DWP(8,$r,"",0)); # mm5 = r[2] 66 &paddq("mm5","mm6"); # mm5 = r[2] + w*a[2] 67 &movd("mm4",&DWP(12,$r,"",0)); # mm4 = r[3] 68 &paddq("mm7","mm4"); # mm7 = r[3] + w*a[3] 69 &movd(&DWP(0,$r,"",0),"mm1"); 70 &movd("mm2",&DWP(16,$a,"",0)); # mm2 = a[4] 71 &pmuludq("mm2","mm0"); # mm2 = w*a[4] 72 &psrlq("mm1",32); # mm1 = carry0 73 &movd("mm4",&DWP(20,$a,"",0)); # mm4 = a[5] 74 &pmuludq("mm4","mm0"); # mm4 = w*a[5] 75 &paddq("mm1","mm3"); # mm1 = carry0 + r[1] + w*a[1] 76 &movd("mm6",&DWP(24,$a,"",0)); # mm6 = a[6] 77 &pmuludq("mm6","mm0"); # mm6 = w*a[6] 78 &movd(&DWP(4,$r,"",0),"mm1"); 79 &psrlq("mm1",32); # mm1 = carry1 80 &movd("mm3",&DWP(28,$a,"",0)); # mm3 = a[7] 81 &add($a,32); 82 &pmuludq("mm3","mm0"); # mm3 = w*a[7] 83 &paddq("mm1","mm5"); # mm1 = carry1 + r[2] + w*a[2] 84 &movd("mm5",&DWP(16,$r,"",0)); # mm5 = r[4] 85 &paddq("mm2","mm5"); # mm2 = r[4] + w*a[4] 86 &movd(&DWP(8,$r,"",0),"mm1"); 87 &psrlq("mm1",32); # mm1 = carry2 88 &paddq("mm1","mm7"); # mm1 = carry2 + r[3] + w*a[3] 89 &movd("mm5",&DWP(20,$r,"",0)); # mm5 = r[5] 90 &paddq("mm4","mm5"); # mm4 = r[5] + w*a[5] 91 &movd(&DWP(12,$r,"",0),"mm1"); 92 &psrlq("mm1",32); # mm1 = carry3 93 &paddq("mm1","mm2"); # mm1 = carry3 + r[4] + w*a[4] 94 &movd("mm5",&DWP(24,$r,"",0)); # mm5 = r[6] 95 &paddq("mm6","mm5"); # mm6 = r[6] + w*a[6] 96 &movd(&DWP(16,$r,"",0),"mm1"); 97 &psrlq("mm1",32); # mm1 = carry4 98 &paddq("mm1","mm4"); # mm1 = carry4 + r[5] + w*a[5] 99 &movd("mm5",&DWP(28,$r,"",0)); # mm5 = r[7] 100 &paddq("mm3","mm5"); # mm3 = r[7] + w*a[7] 101 &movd(&DWP(20,$r,"",0),"mm1"); 102 &psrlq("mm1",32); # mm1 = carry5 103 &paddq("mm1","mm6"); # mm1 = carry5 + r[6] + w*a[6] 104 &movd(&DWP(24,$r,"",0),"mm1"); 105 &psrlq("mm1",32); # mm1 = carry6 106 &paddq("mm1","mm3"); # mm1 = carry6 + r[7] + w*a[7] 107 &movd(&DWP(28,$r,"",0),"mm1"); 108 &lea($r,&DWP(32,$r)); 109 &psrlq("mm1",32); # mm1 = carry_out 110 111 &sub($c,8); 112 &jz(&label("maw_sse2_exit")); 113 &set_label("maw_sse2_entry"); 114 &test($c,0xfffffff8); 115 &jnz(&label("maw_sse2_unrolled")); 116 117 &set_label("maw_sse2_loop",4); 118 &movd("mm2",&DWP(0,$a)); # mm2 = a[i] 119 &movd("mm3",&DWP(0,$r)); # mm3 = r[i] 120 &pmuludq("mm2","mm0"); # a[i] *= w 121 &lea($a,&DWP(4,$a)); 122 &paddq("mm1","mm3"); # carry += r[i] 123 &paddq("mm1","mm2"); # carry += a[i]*w 124 &movd(&DWP(0,$r),"mm1"); # r[i] = carry_low 125 &sub($c,1); 126 &psrlq("mm1",32); # carry = carry_high 127 &lea($r,&DWP(4,$r)); 128 &jnz(&label("maw_sse2_loop")); 129 &set_label("maw_sse2_exit"); 130 &movd("eax","mm1"); # c = carry_out 131 &emms(); 132 &ret(); 133 134 &set_label("maw_non_sse2",16); 135 } 136 137 # function_begin prologue 138 &push("ebp"); 139 &push("ebx"); 140 &push("esi"); 141 &push("edi"); 142 143 &comment(""); 144 $Low="eax"; 145 $High="edx"; 146 $a="ebx"; 147 $w="ebp"; 148 $r="edi"; 149 $c="esi"; 150 151 &xor($c,$c); # clear carry 152 &mov($r,&wparam(0)); # 153 154 &mov("ecx",&wparam(2)); # 155 &mov($a,&wparam(1)); # 156 157 &and("ecx",0xfffffff8); # num / 8 158 &mov($w,&wparam(3)); # 159 160 &push("ecx"); # Up the stack for a tmp variable 161 162 &jz(&label("maw_finish")); 163 164 &set_label("maw_loop",16); 165 166 for ($i=0; $i<32; $i+=4) 167 { 168 &comment("Round $i"); 169 170 &mov("eax",&DWP($i,$a)); # *a 171 &mul($w); # *a * w 172 &add("eax",$c); # L(t)+= c 173 &adc("edx",0); # H(t)+=carry 174 &add("eax",&DWP($i,$r)); # L(t)+= *r 175 &adc("edx",0); # H(t)+=carry 176 &mov(&DWP($i,$r),"eax"); # *r= L(t); 177 &mov($c,"edx"); # c= H(t); 178 } 179 180 &comment(""); 181 &sub("ecx",8); 182 &lea($a,&DWP(32,$a)); 183 &lea($r,&DWP(32,$r)); 184 &jnz(&label("maw_loop")); 185 186 &set_label("maw_finish",0); 187 &mov("ecx",&wparam(2)); # get num 188 &and("ecx",7); 189 &jnz(&label("maw_finish2")); # helps branch prediction 190 &jmp(&label("maw_end")); 191 192 &set_label("maw_finish2",1); 193 for ($i=0; $i<7; $i++) 194 { 195 &comment("Tail Round $i"); 196 &mov("eax",&DWP($i*4,$a)); # *a 197 &mul($w); # *a * w 198 &add("eax",$c); # L(t)+=c 199 &adc("edx",0); # H(t)+=carry 200 &add("eax",&DWP($i*4,$r)); # L(t)+= *r 201 &adc("edx",0); # H(t)+=carry 202 &dec("ecx") if ($i != 7-1); 203 &mov(&DWP($i*4,$r),"eax"); # *r= L(t); 204 &mov($c,"edx"); # c= H(t); 205 &jz(&label("maw_end")) if ($i != 7-1); 206 } 207 &set_label("maw_end",0); 208 &mov("eax",$c); 209 210 &pop("ecx"); # clear variable from 211 212 &function_end($name); 213 } 214 215 sub bn_mul_words 216 { 217 local($name)=@_; 218 219 &function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":""); 220 221 $r="eax"; 222 $a="edx"; 223 $c="ecx"; 224 225 if ($sse2) { 226 &picmeup("eax","OPENSSL_ia32cap_P"); 227 &bt(&DWP(0,"eax"),26); 228 &jnc(&label("mw_non_sse2")); 229 230 &mov($r,&wparam(0)); 231 &mov($a,&wparam(1)); 232 &mov($c,&wparam(2)); 233 &movd("mm0",&wparam(3)); # mm0 = w 234 &pxor("mm1","mm1"); # mm1 = carry = 0 235 236 &set_label("mw_sse2_loop",16); 237 &movd("mm2",&DWP(0,$a)); # mm2 = a[i] 238 &pmuludq("mm2","mm0"); # a[i] *= w 239 &lea($a,&DWP(4,$a)); 240 &paddq("mm1","mm2"); # carry += a[i]*w 241 &movd(&DWP(0,$r),"mm1"); # r[i] = carry_low 242 &sub($c,1); 243 &psrlq("mm1",32); # carry = carry_high 244 &lea($r,&DWP(4,$r)); 245 &jnz(&label("mw_sse2_loop")); 246 247 &movd("eax","mm1"); # return carry 248 &emms(); 249 &ret(); 250 &set_label("mw_non_sse2",16); 251 } 252 253 # function_begin prologue 254 &push("ebp"); 255 &push("ebx"); 256 &push("esi"); 257 &push("edi"); 258 259 &comment(""); 260 $Low="eax"; 261 $High="edx"; 262 $a="ebx"; 263 $w="ecx"; 264 $r="edi"; 265 $c="esi"; 266 $num="ebp"; 267 268 &xor($c,$c); # clear carry 269 &mov($r,&wparam(0)); # 270 &mov($a,&wparam(1)); # 271 &mov($num,&wparam(2)); # 272 &mov($w,&wparam(3)); # 273 274 &and($num,0xfffffff8); # num / 8 275 &jz(&label("mw_finish")); 276 277 &set_label("mw_loop",0); 278 for ($i=0; $i<32; $i+=4) 279 { 280 &comment("Round $i"); 281 282 &mov("eax",&DWP($i,$a,"",0)); # *a 283 &mul($w); # *a * w 284 &add("eax",$c); # L(t)+=c 285 # XXX 286 287 &adc("edx",0); # H(t)+=carry 288 &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t); 289 290 &mov($c,"edx"); # c= H(t); 291 } 292 293 &comment(""); 294 &add($a,32); 295 &add($r,32); 296 &sub($num,8); 297 &jz(&label("mw_finish")); 298 &jmp(&label("mw_loop")); 299 300 &set_label("mw_finish",0); 301 &mov($num,&wparam(2)); # get num 302 &and($num,7); 303 &jnz(&label("mw_finish2")); 304 &jmp(&label("mw_end")); 305 306 &set_label("mw_finish2",1); 307 for ($i=0; $i<7; $i++) 308 { 309 &comment("Tail Round $i"); 310 &mov("eax",&DWP($i*4,$a,"",0));# *a 311 &mul($w); # *a * w 312 &add("eax",$c); # L(t)+=c 313 # XXX 314 &adc("edx",0); # H(t)+=carry 315 &mov(&DWP($i*4,$r,"",0),"eax");# *r= L(t); 316 &mov($c,"edx"); # c= H(t); 317 &dec($num) if ($i != 7-1); 318 &jz(&label("mw_end")) if ($i != 7-1); 319 } 320 &set_label("mw_end",0); 321 &mov("eax",$c); 322 323 &function_end($name); 324 } 325 326 sub bn_sqr_words 327 { 328 local($name)=@_; 329 330 &function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":""); 331 332 $r="eax"; 333 $a="edx"; 334 $c="ecx"; 335 336 if ($sse2) { 337 &picmeup("eax","OPENSSL_ia32cap_P"); 338 &bt(&DWP(0,"eax"),26); 339 &jnc(&label("sqr_non_sse2")); 340 341 &mov($r,&wparam(0)); 342 &mov($a,&wparam(1)); 343 &mov($c,&wparam(2)); 344 345 &set_label("sqr_sse2_loop",16); 346 &movd("mm0",&DWP(0,$a)); # mm0 = a[i] 347 &pmuludq("mm0","mm0"); # a[i] *= a[i] 348 &lea($a,&DWP(4,$a)); # a++ 349 &movq(&QWP(0,$r),"mm0"); # r[i] = a[i]*a[i] 350 &sub($c,1); 351 &lea($r,&DWP(8,$r)); # r += 2 352 &jnz(&label("sqr_sse2_loop")); 353 354 &emms(); 355 &ret(); 356 &set_label("sqr_non_sse2",16); 357 } 358 359 # function_begin prologue 360 &push("ebp"); 361 &push("ebx"); 362 &push("esi"); 363 &push("edi"); 364 365 &comment(""); 366 $r="esi"; 367 $a="edi"; 368 $num="ebx"; 369 370 &mov($r,&wparam(0)); # 371 &mov($a,&wparam(1)); # 372 &mov($num,&wparam(2)); # 373 374 &and($num,0xfffffff8); # num / 8 375 &jz(&label("sw_finish")); 376 377 &set_label("sw_loop",0); 378 for ($i=0; $i<32; $i+=4) 379 { 380 &comment("Round $i"); 381 &mov("eax",&DWP($i,$a,"",0)); # *a 382 # XXX 383 &mul("eax"); # *a * *a 384 &mov(&DWP($i*2,$r,"",0),"eax"); # 385 &mov(&DWP($i*2+4,$r,"",0),"edx");# 386 } 387 388 &comment(""); 389 &add($a,32); 390 &add($r,64); 391 &sub($num,8); 392 &jnz(&label("sw_loop")); 393 394 &set_label("sw_finish",0); 395 &mov($num,&wparam(2)); # get num 396 &and($num,7); 397 &jz(&label("sw_end")); 398 399 for ($i=0; $i<7; $i++) 400 { 401 &comment("Tail Round $i"); 402 &mov("eax",&DWP($i*4,$a,"",0)); # *a 403 # XXX 404 &mul("eax"); # *a * *a 405 &mov(&DWP($i*8,$r,"",0),"eax"); # 406 &dec($num) if ($i != 7-1); 407 &mov(&DWP($i*8+4,$r,"",0),"edx"); 408 &jz(&label("sw_end")) if ($i != 7-1); 409 } 410 &set_label("sw_end",0); 411 412 &function_end($name); 413 } 414 415 sub bn_div_words 416 { 417 local($name)=@_; 418 419 &function_begin_B($name,""); 420 &mov("edx",&wparam(0)); # 421 &mov("eax",&wparam(1)); # 422 &mov("ecx",&wparam(2)); # 423 &div("ecx"); 424 &ret(); 425 &function_end_B($name); 426 } 427 428 sub bn_add_words 429 { 430 local($name)=@_; 431 432 &function_begin($name,""); 433 434 &comment(""); 435 $a="esi"; 436 $b="edi"; 437 $c="eax"; 438 $r="ebx"; 439 $tmp1="ecx"; 440 $tmp2="edx"; 441 $num="ebp"; 442 443 &mov($r,&wparam(0)); # get r 444 &mov($a,&wparam(1)); # get a 445 &mov($b,&wparam(2)); # get b 446 &mov($num,&wparam(3)); # get num 447 &xor($c,$c); # clear carry 448 &and($num,0xfffffff8); # num / 8 449 450 &jz(&label("aw_finish")); 451 452 &set_label("aw_loop",0); 453 for ($i=0; $i<8; $i++) 454 { 455 &comment("Round $i"); 456 457 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 458 &mov($tmp2,&DWP($i*4,$b,"",0)); # *b 459 &add($tmp1,$c); 460 &mov($c,0); 461 &adc($c,$c); 462 &add($tmp1,$tmp2); 463 &adc($c,0); 464 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 465 } 466 467 &comment(""); 468 &add($a,32); 469 &add($b,32); 470 &add($r,32); 471 &sub($num,8); 472 &jnz(&label("aw_loop")); 473 474 &set_label("aw_finish",0); 475 &mov($num,&wparam(3)); # get num 476 &and($num,7); 477 &jz(&label("aw_end")); 478 479 for ($i=0; $i<7; $i++) 480 { 481 &comment("Tail Round $i"); 482 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 483 &mov($tmp2,&DWP($i*4,$b,"",0));# *b 484 &add($tmp1,$c); 485 &mov($c,0); 486 &adc($c,$c); 487 &add($tmp1,$tmp2); 488 &adc($c,0); 489 &dec($num) if ($i != 6); 490 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 491 &jz(&label("aw_end")) if ($i != 6); 492 } 493 &set_label("aw_end",0); 494 495 # &mov("eax",$c); # $c is "eax" 496 497 &function_end($name); 498 } 499 500 sub bn_sub_words 501 { 502 local($name)=@_; 503 504 &function_begin($name,""); 505 506 &comment(""); 507 $a="esi"; 508 $b="edi"; 509 $c="eax"; 510 $r="ebx"; 511 $tmp1="ecx"; 512 $tmp2="edx"; 513 $num="ebp"; 514 515 &mov($r,&wparam(0)); # get r 516 &mov($a,&wparam(1)); # get a 517 &mov($b,&wparam(2)); # get b 518 &mov($num,&wparam(3)); # get num 519 &xor($c,$c); # clear carry 520 &and($num,0xfffffff8); # num / 8 521 522 &jz(&label("aw_finish")); 523 524 &set_label("aw_loop",0); 525 for ($i=0; $i<8; $i++) 526 { 527 &comment("Round $i"); 528 529 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 530 &mov($tmp2,&DWP($i*4,$b,"",0)); # *b 531 &sub($tmp1,$c); 532 &mov($c,0); 533 &adc($c,$c); 534 &sub($tmp1,$tmp2); 535 &adc($c,0); 536 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 537 } 538 539 &comment(""); 540 &add($a,32); 541 &add($b,32); 542 &add($r,32); 543 &sub($num,8); 544 &jnz(&label("aw_loop")); 545 546 &set_label("aw_finish",0); 547 &mov($num,&wparam(3)); # get num 548 &and($num,7); 549 &jz(&label("aw_end")); 550 551 for ($i=0; $i<7; $i++) 552 { 553 &comment("Tail Round $i"); 554 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 555 &mov($tmp2,&DWP($i*4,$b,"",0));# *b 556 &sub($tmp1,$c); 557 &mov($c,0); 558 &adc($c,$c); 559 &sub($tmp1,$tmp2); 560 &adc($c,0); 561 &dec($num) if ($i != 6); 562 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 563 &jz(&label("aw_end")) if ($i != 6); 564 } 565 &set_label("aw_end",0); 566 567 # &mov("eax",$c); # $c is "eax" 568 569 &function_end($name); 570 } 571 572 sub bn_sub_part_words 573 { 574 local($name)=@_; 575 576 &function_begin($name,""); 577 578 &comment(""); 579 $a="esi"; 580 $b="edi"; 581 $c="eax"; 582 $r="ebx"; 583 $tmp1="ecx"; 584 $tmp2="edx"; 585 $num="ebp"; 586 587 &mov($r,&wparam(0)); # get r 588 &mov($a,&wparam(1)); # get a 589 &mov($b,&wparam(2)); # get b 590 &mov($num,&wparam(3)); # get num 591 &xor($c,$c); # clear carry 592 &and($num,0xfffffff8); # num / 8 593 594 &jz(&label("aw_finish")); 595 596 &set_label("aw_loop",0); 597 for ($i=0; $i<8; $i++) 598 { 599 &comment("Round $i"); 600 601 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 602 &mov($tmp2,&DWP($i*4,$b,"",0)); # *b 603 &sub($tmp1,$c); 604 &mov($c,0); 605 &adc($c,$c); 606 &sub($tmp1,$tmp2); 607 &adc($c,0); 608 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 609 } 610 611 &comment(""); 612 &add($a,32); 613 &add($b,32); 614 &add($r,32); 615 &sub($num,8); 616 &jnz(&label("aw_loop")); 617 618 &set_label("aw_finish",0); 619 &mov($num,&wparam(3)); # get num 620 &and($num,7); 621 &jz(&label("aw_end")); 622 623 for ($i=0; $i<7; $i++) 624 { 625 &comment("Tail Round $i"); 626 &mov($tmp1,&DWP(0,$a,"",0)); # *a 627 &mov($tmp2,&DWP(0,$b,"",0));# *b 628 &sub($tmp1,$c); 629 &mov($c,0); 630 &adc($c,$c); 631 &sub($tmp1,$tmp2); 632 &adc($c,0); 633 &mov(&DWP(0,$r,"",0),$tmp1); # *r 634 &add($a, 4); 635 &add($b, 4); 636 &add($r, 4); 637 &dec($num) if ($i != 6); 638 &jz(&label("aw_end")) if ($i != 6); 639 } 640 &set_label("aw_end",0); 641 642 &cmp(&wparam(4),0); 643 &je(&label("pw_end")); 644 645 &mov($num,&wparam(4)); # get dl 646 &cmp($num,0); 647 &je(&label("pw_end")); 648 &jge(&label("pw_pos")); 649 650 &comment("pw_neg"); 651 &mov($tmp2,0); 652 &sub($tmp2,$num); 653 &mov($num,$tmp2); 654 &and($num,0xfffffff8); # num / 8 655 &jz(&label("pw_neg_finish")); 656 657 &set_label("pw_neg_loop",0); 658 for ($i=0; $i<8; $i++) 659 { 660 &comment("dl<0 Round $i"); 661 662 &mov($tmp1,0); 663 &mov($tmp2,&DWP($i*4,$b,"",0)); # *b 664 &sub($tmp1,$c); 665 &mov($c,0); 666 &adc($c,$c); 667 &sub($tmp1,$tmp2); 668 &adc($c,0); 669 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 670 } 671 672 &comment(""); 673 &add($b,32); 674 &add($r,32); 675 &sub($num,8); 676 &jnz(&label("pw_neg_loop")); 677 678 &set_label("pw_neg_finish",0); 679 &mov($tmp2,&wparam(4)); # get dl 680 &mov($num,0); 681 &sub($num,$tmp2); 682 &and($num,7); 683 &jz(&label("pw_end")); 684 685 for ($i=0; $i<7; $i++) 686 { 687 &comment("dl<0 Tail Round $i"); 688 &mov($tmp1,0); 689 &mov($tmp2,&DWP($i*4,$b,"",0));# *b 690 &sub($tmp1,$c); 691 &mov($c,0); 692 &adc($c,$c); 693 &sub($tmp1,$tmp2); 694 &adc($c,0); 695 &dec($num) if ($i != 6); 696 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 697 &jz(&label("pw_end")) if ($i != 6); 698 } 699 700 &jmp(&label("pw_end")); 701 702 &set_label("pw_pos",0); 703 704 &and($num,0xfffffff8); # num / 8 705 &jz(&label("pw_pos_finish")); 706 707 &set_label("pw_pos_loop",0); 708 709 for ($i=0; $i<8; $i++) 710 { 711 &comment("dl>0 Round $i"); 712 713 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 714 &sub($tmp1,$c); 715 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 716 &jnc(&label("pw_nc".$i)); 717 } 718 719 &comment(""); 720 &add($a,32); 721 &add($r,32); 722 &sub($num,8); 723 &jnz(&label("pw_pos_loop")); 724 725 &set_label("pw_pos_finish",0); 726 &mov($num,&wparam(4)); # get dl 727 &and($num,7); 728 &jz(&label("pw_end")); 729 730 for ($i=0; $i<7; $i++) 731 { 732 &comment("dl>0 Tail Round $i"); 733 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 734 &sub($tmp1,$c); 735 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 736 &jnc(&label("pw_tail_nc".$i)); 737 &dec($num) if ($i != 6); 738 &jz(&label("pw_end")) if ($i != 6); 739 } 740 &mov($c,1); 741 &jmp(&label("pw_end")); 742 743 &set_label("pw_nc_loop",0); 744 for ($i=0; $i<8; $i++) 745 { 746 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 747 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 748 &set_label("pw_nc".$i,0); 749 } 750 751 &comment(""); 752 &add($a,32); 753 &add($r,32); 754 &sub($num,8); 755 &jnz(&label("pw_nc_loop")); 756 757 &mov($num,&wparam(4)); # get dl 758 &and($num,7); 759 &jz(&label("pw_nc_end")); 760 761 for ($i=0; $i<7; $i++) 762 { 763 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 764 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 765 &set_label("pw_tail_nc".$i,0); 766 &dec($num) if ($i != 6); 767 &jz(&label("pw_nc_end")) if ($i != 6); 768 } 769 770 &set_label("pw_nc_end",0); 771 &mov($c,0); 772 773 &set_label("pw_end",0); 774 775 # &mov("eax",$c); # $c is "eax" 776 777 &function_end($name); 778 } 779