Home | History | Annotate | Download | only in asm
      1 #!/usr/local/bin/perl
      2 
      3 push(@INC,"perlasm","../../perlasm");
      4 require "x86asm.pl";
      5 
      6 &asm_init($ARGV[0],$0);
      7 
      8 $sse2=0;
      9 for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
     10 
     11 &external_label("OPENSSL_ia32cap_P") if ($sse2);
     12 
     13 &bn_mul_add_words("bn_mul_add_words");
     14 &bn_mul_words("bn_mul_words");
     15 &bn_sqr_words("bn_sqr_words");
     16 &bn_div_words("bn_div_words");
     17 &bn_add_words("bn_add_words");
     18 &bn_sub_words("bn_sub_words");
     19 &bn_sub_part_words("bn_sub_part_words");
     20 
     21 &asm_finish();
     22 
     23 sub bn_mul_add_words
     24 	{
     25 	local($name)=@_;
     26 
     27 	&function_begin($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":"");
     28 
     29 	&comment("");
     30 	$Low="eax";
     31 	$High="edx";
     32 	$a="ebx";
     33 	$w="ebp";
     34 	$r="edi";
     35 	$c="esi";
     36 
     37 	&xor($c,$c);		# clear carry
     38 	&mov($r,&wparam(0));	#
     39 
     40 	&mov("ecx",&wparam(2));	#
     41 	&mov($a,&wparam(1));	#
     42 
     43 	&and("ecx",0xfffffff8);	# num / 8
     44 	&mov($w,&wparam(3));	#
     45 
     46 	&push("ecx");		# Up the stack for a tmp variable
     47 
     48 	&jz(&label("maw_finish"));
     49 
     50 	if ($sse2) {
     51 		&picmeup("eax","OPENSSL_ia32cap_P");
     52 		&bt(&DWP(0,"eax"),26);
     53 		&jnc(&label("maw_loop"));
     54 
     55 		&movd("mm0",$w);		# mm0 = w
     56 		&pxor("mm1","mm1");		# mm1 = carry_in
     57 
     58 		&set_label("maw_sse2_loop",0);
     59 		&movd("mm3",&DWP(0,$r,"",0));	# mm3 = r[0]
     60 		&paddq("mm1","mm3");		# mm1 = carry_in + r[0]
     61 		&movd("mm2",&DWP(0,$a,"",0));	# mm2 = a[0]
     62 		&pmuludq("mm2","mm0");		# mm2 = w*a[0]
     63 		&movd("mm4",&DWP(4,$a,"",0));	# mm4 = a[1]
     64 		&pmuludq("mm4","mm0");		# mm4 = w*a[1]
     65 		&movd("mm6",&DWP(8,$a,"",0));	# mm6 = a[2]
     66 		&pmuludq("mm6","mm0");		# mm6 = w*a[2]
     67 		&movd("mm7",&DWP(12,$a,"",0));	# mm7 = a[3]
     68 		&pmuludq("mm7","mm0");		# mm7 = w*a[3]
     69 		&paddq("mm1","mm2");		# mm1 = carry_in + r[0] + w*a[0]
     70 		&movd("mm3",&DWP(4,$r,"",0));	# mm3 = r[1]
     71 		&paddq("mm3","mm4");		# mm3 = r[1] + w*a[1]
     72 		&movd("mm5",&DWP(8,$r,"",0));	# mm5 = r[2]
     73 		&paddq("mm5","mm6");		# mm5 = r[2] + w*a[2]
     74 		&movd("mm4",&DWP(12,$r,"",0));	# mm4 = r[3]
     75 		&paddq("mm7","mm4");		# mm7 = r[3] + w*a[3]
     76 		&movd(&DWP(0,$r,"",0),"mm1");
     77 		&movd("mm2",&DWP(16,$a,"",0));	# mm2 = a[4]
     78 		&pmuludq("mm2","mm0");		# mm2 = w*a[4]
     79 		&psrlq("mm1",32);		# mm1 = carry0
     80 		&movd("mm4",&DWP(20,$a,"",0));	# mm4 = a[5]
     81 		&pmuludq("mm4","mm0");		# mm4 = w*a[5]
     82 		&paddq("mm1","mm3");		# mm1 = carry0 + r[1] + w*a[1]
     83 		&movd("mm6",&DWP(24,$a,"",0));	# mm6 = a[6]
     84 		&pmuludq("mm6","mm0");		# mm6 = w*a[6]
     85 		&movd(&DWP(4,$r,"",0),"mm1");
     86 		&psrlq("mm1",32);		# mm1 = carry1
     87 		&movd("mm3",&DWP(28,$a,"",0));	# mm3 = a[7]
     88 		&add($a,32);
     89 		&pmuludq("mm3","mm0");		# mm3 = w*a[7]
     90 		&paddq("mm1","mm5");		# mm1 = carry1 + r[2] + w*a[2]
     91 		&movd("mm5",&DWP(16,$r,"",0));	# mm5 = r[4]
     92 		&paddq("mm2","mm5");		# mm2 = r[4] + w*a[4]
     93 		&movd(&DWP(8,$r,"",0),"mm1");
     94 		&psrlq("mm1",32);		# mm1 = carry2
     95 		&paddq("mm1","mm7");		# mm1 = carry2 + r[3] + w*a[3]
     96 		&movd("mm5",&DWP(20,$r,"",0));	# mm5 = r[5]
     97 		&paddq("mm4","mm5");		# mm4 = r[5] + w*a[5]
     98 		&movd(&DWP(12,$r,"",0),"mm1");
     99 		&psrlq("mm1",32);		# mm1 = carry3
    100 		&paddq("mm1","mm2");		# mm1 = carry3 + r[4] + w*a[4]
    101 		&movd("mm5",&DWP(24,$r,"",0));	# mm5 = r[6]
    102 		&paddq("mm6","mm5");		# mm6 = r[6] + w*a[6]
    103 		&movd(&DWP(16,$r,"",0),"mm1");
    104 		&psrlq("mm1",32);		# mm1 = carry4
    105 		&paddq("mm1","mm4");		# mm1 = carry4 + r[5] + w*a[5]
    106 		&movd("mm5",&DWP(28,$r,"",0));	# mm5 = r[7]
    107 		&paddq("mm3","mm5");		# mm3 = r[7] + w*a[7]
    108 		&movd(&DWP(20,$r,"",0),"mm1");
    109 		&psrlq("mm1",32);		# mm1 = carry5
    110 		&paddq("mm1","mm6");		# mm1 = carry5 + r[6] + w*a[6]
    111 		&movd(&DWP(24,$r,"",0),"mm1");
    112 		&psrlq("mm1",32);		# mm1 = carry6
    113 		&paddq("mm1","mm3");		# mm1 = carry6 + r[7] + w*a[7]
    114 		&movd(&DWP(28,$r,"",0),"mm1");
    115 		&add($r,32);
    116 		&psrlq("mm1",32);		# mm1 = carry_out
    117 
    118 		&sub("ecx",8);
    119 		&jnz(&label("maw_sse2_loop"));
    120 
    121 		&movd($c,"mm1");		# c = carry_out
    122 		&emms();
    123 
    124 		&jmp(&label("maw_finish"));
    125 	}
    126 
    127 	&set_label("maw_loop",0);
    128 
    129 	&mov(&swtmp(0),"ecx");	#
    130 
    131 	for ($i=0; $i<32; $i+=4)
    132 		{
    133 		&comment("Round $i");
    134 
    135 		 &mov("eax",&DWP($i,$a,"",0)); 	# *a
    136 		&mul($w);			# *a * w
    137 		&add("eax",$c);		# L(t)+= *r
    138 		 &mov($c,&DWP($i,$r,"",0));	# L(t)+= *r
    139 		&adc("edx",0);			# H(t)+=carry
    140 		 &add("eax",$c);		# L(t)+=c
    141 		&adc("edx",0);			# H(t)+=carry
    142 		 &mov(&DWP($i,$r,"",0),"eax");	# *r= L(t);
    143 		&mov($c,"edx");			# c=  H(t);
    144 		}
    145 
    146 	&comment("");
    147 	&mov("ecx",&swtmp(0));	#
    148 	&add($a,32);
    149 	&add($r,32);
    150 	&sub("ecx",8);
    151 	&jnz(&label("maw_loop"));
    152 
    153 	&set_label("maw_finish",0);
    154 	&mov("ecx",&wparam(2));	# get num
    155 	&and("ecx",7);
    156 	&jnz(&label("maw_finish2"));	# helps branch prediction
    157 	&jmp(&label("maw_end"));
    158 
    159 	&set_label("maw_finish2",1);
    160 	for ($i=0; $i<7; $i++)
    161 		{
    162 		&comment("Tail Round $i");
    163 		 &mov("eax",&DWP($i*4,$a,"",0));# *a
    164 		&mul($w);			# *a * w
    165 		&add("eax",$c);			# L(t)+=c
    166 		 &mov($c,&DWP($i*4,$r,"",0));	# L(t)+= *r
    167 		&adc("edx",0);			# H(t)+=carry
    168 		 &add("eax",$c);
    169 		&adc("edx",0);			# H(t)+=carry
    170 		 &dec("ecx") if ($i != 7-1);
    171 		&mov(&DWP($i*4,$r,"",0),"eax");	# *r= L(t);
    172 		 &mov($c,"edx");			# c=  H(t);
    173 		&jz(&label("maw_end")) if ($i != 7-1);
    174 		}
    175 	&set_label("maw_end",0);
    176 	&mov("eax",$c);
    177 
    178 	&pop("ecx");	# clear variable from
    179 
    180 	&function_end($name);
    181 	}
    182 
    183 sub bn_mul_words
    184 	{
    185 	local($name)=@_;
    186 
    187 	&function_begin($name,"");
    188 
    189 	&comment("");
    190 	$Low="eax";
    191 	$High="edx";
    192 	$a="ebx";
    193 	$w="ecx";
    194 	$r="edi";
    195 	$c="esi";
    196 	$num="ebp";
    197 
    198 	&xor($c,$c);		# clear carry
    199 	&mov($r,&wparam(0));	#
    200 	&mov($a,&wparam(1));	#
    201 	&mov($num,&wparam(2));	#
    202 	&mov($w,&wparam(3));	#
    203 
    204 	&and($num,0xfffffff8);	# num / 8
    205 	&jz(&label("mw_finish"));
    206 
    207 	&set_label("mw_loop",0);
    208 	for ($i=0; $i<32; $i+=4)
    209 		{
    210 		&comment("Round $i");
    211 
    212 		 &mov("eax",&DWP($i,$a,"",0)); 	# *a
    213 		&mul($w);			# *a * w
    214 		&add("eax",$c);			# L(t)+=c
    215 		 # XXX
    216 
    217 		&adc("edx",0);			# H(t)+=carry
    218 		 &mov(&DWP($i,$r,"",0),"eax");	# *r= L(t);
    219 
    220 		&mov($c,"edx");			# c=  H(t);
    221 		}
    222 
    223 	&comment("");
    224 	&add($a,32);
    225 	&add($r,32);
    226 	&sub($num,8);
    227 	&jz(&label("mw_finish"));
    228 	&jmp(&label("mw_loop"));
    229 
    230 	&set_label("mw_finish",0);
    231 	&mov($num,&wparam(2));	# get num
    232 	&and($num,7);
    233 	&jnz(&label("mw_finish2"));
    234 	&jmp(&label("mw_end"));
    235 
    236 	&set_label("mw_finish2",1);
    237 	for ($i=0; $i<7; $i++)
    238 		{
    239 		&comment("Tail Round $i");
    240 		 &mov("eax",&DWP($i*4,$a,"",0));# *a
    241 		&mul($w);			# *a * w
    242 		&add("eax",$c);			# L(t)+=c
    243 		 # XXX
    244 		&adc("edx",0);			# H(t)+=carry
    245 		 &mov(&DWP($i*4,$r,"",0),"eax");# *r= L(t);
    246 		&mov($c,"edx");			# c=  H(t);
    247 		 &dec($num) if ($i != 7-1);
    248 		&jz(&label("mw_end")) if ($i != 7-1);
    249 		}
    250 	&set_label("mw_end",0);
    251 	&mov("eax",$c);
    252 
    253 	&function_end($name);
    254 	}
    255 
    256 sub bn_sqr_words
    257 	{
    258 	local($name)=@_;
    259 
    260 	&function_begin($name,"");
    261 
    262 	&comment("");
    263 	$r="esi";
    264 	$a="edi";
    265 	$num="ebx";
    266 
    267 	&mov($r,&wparam(0));	#
    268 	&mov($a,&wparam(1));	#
    269 	&mov($num,&wparam(2));	#
    270 
    271 	&and($num,0xfffffff8);	# num / 8
    272 	&jz(&label("sw_finish"));
    273 
    274 	&set_label("sw_loop",0);
    275 	for ($i=0; $i<32; $i+=4)
    276 		{
    277 		&comment("Round $i");
    278 		&mov("eax",&DWP($i,$a,"",0)); 	# *a
    279 		 # XXX
    280 		&mul("eax");			# *a * *a
    281 		&mov(&DWP($i*2,$r,"",0),"eax");	#
    282 		 &mov(&DWP($i*2+4,$r,"",0),"edx");#
    283 		}
    284 
    285 	&comment("");
    286 	&add($a,32);
    287 	&add($r,64);
    288 	&sub($num,8);
    289 	&jnz(&label("sw_loop"));
    290 
    291 	&set_label("sw_finish",0);
    292 	&mov($num,&wparam(2));	# get num
    293 	&and($num,7);
    294 	&jz(&label("sw_end"));
    295 
    296 	for ($i=0; $i<7; $i++)
    297 		{
    298 		&comment("Tail Round $i");
    299 		&mov("eax",&DWP($i*4,$a,"",0));	# *a
    300 		 # XXX
    301 		&mul("eax");			# *a * *a
    302 		&mov(&DWP($i*8,$r,"",0),"eax");	#
    303 		 &dec($num) if ($i != 7-1);
    304 		&mov(&DWP($i*8+4,$r,"",0),"edx");
    305 		 &jz(&label("sw_end")) if ($i != 7-1);
    306 		}
    307 	&set_label("sw_end",0);
    308 
    309 	&function_end($name);
    310 	}
    311 
    312 sub bn_div_words
    313 	{
    314 	local($name)=@_;
    315 
    316 	&function_begin($name,"");
    317 	&mov("edx",&wparam(0));	#
    318 	&mov("eax",&wparam(1));	#
    319 	&mov("ebx",&wparam(2));	#
    320 	&div("ebx");
    321 	&function_end($name);
    322 	}
    323 
    324 sub bn_add_words
    325 	{
    326 	local($name)=@_;
    327 
    328 	&function_begin($name,"");
    329 
    330 	&comment("");
    331 	$a="esi";
    332 	$b="edi";
    333 	$c="eax";
    334 	$r="ebx";
    335 	$tmp1="ecx";
    336 	$tmp2="edx";
    337 	$num="ebp";
    338 
    339 	&mov($r,&wparam(0));	# get r
    340 	 &mov($a,&wparam(1));	# get a
    341 	&mov($b,&wparam(2));	# get b
    342 	 &mov($num,&wparam(3));	# get num
    343 	&xor($c,$c);		# clear carry
    344 	 &and($num,0xfffffff8);	# num / 8
    345 
    346 	&jz(&label("aw_finish"));
    347 
    348 	&set_label("aw_loop",0);
    349 	for ($i=0; $i<8; $i++)
    350 		{
    351 		&comment("Round $i");
    352 
    353 		&mov($tmp1,&DWP($i*4,$a,"",0)); 	# *a
    354 		 &mov($tmp2,&DWP($i*4,$b,"",0)); 	# *b
    355 		&add($tmp1,$c);
    356 		 &mov($c,0);
    357 		&adc($c,$c);
    358 		 &add($tmp1,$tmp2);
    359 		&adc($c,0);
    360 		 &mov(&DWP($i*4,$r,"",0),$tmp1); 	# *r
    361 		}
    362 
    363 	&comment("");
    364 	&add($a,32);
    365 	 &add($b,32);
    366 	&add($r,32);
    367 	 &sub($num,8);
    368 	&jnz(&label("aw_loop"));
    369 
    370 	&set_label("aw_finish",0);
    371 	&mov($num,&wparam(3));	# get num
    372 	&and($num,7);
    373 	 &jz(&label("aw_end"));
    374 
    375 	for ($i=0; $i<7; $i++)
    376 		{
    377 		&comment("Tail Round $i");
    378 		&mov($tmp1,&DWP($i*4,$a,"",0));	# *a
    379 		 &mov($tmp2,&DWP($i*4,$b,"",0));# *b
    380 		&add($tmp1,$c);
    381 		 &mov($c,0);
    382 		&adc($c,$c);
    383 		 &add($tmp1,$tmp2);
    384 		&adc($c,0);
    385 		 &dec($num) if ($i != 6);
    386 		&mov(&DWP($i*4,$r,"",0),$tmp1);	# *r
    387 		 &jz(&label("aw_end")) if ($i != 6);
    388 		}
    389 	&set_label("aw_end",0);
    390 
    391 #	&mov("eax",$c);		# $c is "eax"
    392 
    393 	&function_end($name);
    394 	}
    395 
    396 sub bn_sub_words
    397 	{
    398 	local($name)=@_;
    399 
    400 	&function_begin($name,"");
    401 
    402 	&comment("");
    403 	$a="esi";
    404 	$b="edi";
    405 	$c="eax";
    406 	$r="ebx";
    407 	$tmp1="ecx";
    408 	$tmp2="edx";
    409 	$num="ebp";
    410 
    411 	&mov($r,&wparam(0));	# get r
    412 	 &mov($a,&wparam(1));	# get a
    413 	&mov($b,&wparam(2));	# get b
    414 	 &mov($num,&wparam(3));	# get num
    415 	&xor($c,$c);		# clear carry
    416 	 &and($num,0xfffffff8);	# num / 8
    417 
    418 	&jz(&label("aw_finish"));
    419 
    420 	&set_label("aw_loop",0);
    421 	for ($i=0; $i<8; $i++)
    422 		{
    423 		&comment("Round $i");
    424 
    425 		&mov($tmp1,&DWP($i*4,$a,"",0)); 	# *a
    426 		 &mov($tmp2,&DWP($i*4,$b,"",0)); 	# *b
    427 		&sub($tmp1,$c);
    428 		 &mov($c,0);
    429 		&adc($c,$c);
    430 		 &sub($tmp1,$tmp2);
    431 		&adc($c,0);
    432 		 &mov(&DWP($i*4,$r,"",0),$tmp1); 	# *r
    433 		}
    434 
    435 	&comment("");
    436 	&add($a,32);
    437 	 &add($b,32);
    438 	&add($r,32);
    439 	 &sub($num,8);
    440 	&jnz(&label("aw_loop"));
    441 
    442 	&set_label("aw_finish",0);
    443 	&mov($num,&wparam(3));	# get num
    444 	&and($num,7);
    445 	 &jz(&label("aw_end"));
    446 
    447 	for ($i=0; $i<7; $i++)
    448 		{
    449 		&comment("Tail Round $i");
    450 		&mov($tmp1,&DWP($i*4,$a,"",0));	# *a
    451 		 &mov($tmp2,&DWP($i*4,$b,"",0));# *b
    452 		&sub($tmp1,$c);
    453 		 &mov($c,0);
    454 		&adc($c,$c);
    455 		 &sub($tmp1,$tmp2);
    456 		&adc($c,0);
    457 		 &dec($num) if ($i != 6);
    458 		&mov(&DWP($i*4,$r,"",0),$tmp1);	# *r
    459 		 &jz(&label("aw_end")) if ($i != 6);
    460 		}
    461 	&set_label("aw_end",0);
    462 
    463 #	&mov("eax",$c);		# $c is "eax"
    464 
    465 	&function_end($name);
    466 	}
    467 
    468 sub bn_sub_part_words
    469 	{
    470 	local($name)=@_;
    471 
    472 	&function_begin($name,"");
    473 
    474 	&comment("");
    475 	$a="esi";
    476 	$b="edi";
    477 	$c="eax";
    478 	$r="ebx";
    479 	$tmp1="ecx";
    480 	$tmp2="edx";
    481 	$num="ebp";
    482 
    483 	&mov($r,&wparam(0));	# get r
    484 	 &mov($a,&wparam(1));	# get a
    485 	&mov($b,&wparam(2));	# get b
    486 	 &mov($num,&wparam(3));	# get num
    487 	&xor($c,$c);		# clear carry
    488 	 &and($num,0xfffffff8);	# num / 8
    489 
    490 	&jz(&label("aw_finish"));
    491 
    492 	&set_label("aw_loop",0);
    493 	for ($i=0; $i<8; $i++)
    494 		{
    495 		&comment("Round $i");
    496 
    497 		&mov($tmp1,&DWP($i*4,$a,"",0)); 	# *a
    498 		 &mov($tmp2,&DWP($i*4,$b,"",0)); 	# *b
    499 		&sub($tmp1,$c);
    500 		 &mov($c,0);
    501 		&adc($c,$c);
    502 		 &sub($tmp1,$tmp2);
    503 		&adc($c,0);
    504 		 &mov(&DWP($i*4,$r,"",0),$tmp1); 	# *r
    505 		}
    506 
    507 	&comment("");
    508 	&add($a,32);
    509 	 &add($b,32);
    510 	&add($r,32);
    511 	 &sub($num,8);
    512 	&jnz(&label("aw_loop"));
    513 
    514 	&set_label("aw_finish",0);
    515 	&mov($num,&wparam(3));	# get num
    516 	&and($num,7);
    517 	 &jz(&label("aw_end"));
    518 
    519 	for ($i=0; $i<7; $i++)
    520 		{
    521 		&comment("Tail Round $i");
    522 		&mov($tmp1,&DWP(0,$a,"",0));	# *a
    523 		 &mov($tmp2,&DWP(0,$b,"",0));# *b
    524 		&sub($tmp1,$c);
    525 		 &mov($c,0);
    526 		&adc($c,$c);
    527 		 &sub($tmp1,$tmp2);
    528 		&adc($c,0);
    529 		&mov(&DWP(0,$r,"",0),$tmp1);	# *r
    530 		&add($a, 4);
    531 		&add($b, 4);
    532 		&add($r, 4);
    533 		 &dec($num) if ($i != 6);
    534 		 &jz(&label("aw_end")) if ($i != 6);
    535 		}
    536 	&set_label("aw_end",0);
    537 
    538 	&cmp(&wparam(4),0);
    539 	&je(&label("pw_end"));
    540 
    541 	&mov($num,&wparam(4));	# get dl
    542 	&cmp($num,0);
    543 	&je(&label("pw_end"));
    544 	&jge(&label("pw_pos"));
    545 
    546 	&comment("pw_neg");
    547 	&mov($tmp2,0);
    548 	&sub($tmp2,$num);
    549 	&mov($num,$tmp2);
    550 	&and($num,0xfffffff8);	# num / 8
    551 	&jz(&label("pw_neg_finish"));
    552 
    553 	&set_label("pw_neg_loop",0);
    554 	for ($i=0; $i<8; $i++)
    555 	{
    556 	    &comment("dl<0 Round $i");
    557 
    558 	    &mov($tmp1,0);
    559 	    &mov($tmp2,&DWP($i*4,$b,"",0)); 	# *b
    560 	    &sub($tmp1,$c);
    561 	    &mov($c,0);
    562 	    &adc($c,$c);
    563 	    &sub($tmp1,$tmp2);
    564 	    &adc($c,0);
    565 	    &mov(&DWP($i*4,$r,"",0),$tmp1); 	# *r
    566 	}
    567 	    
    568 	&comment("");
    569 	&add($b,32);
    570 	&add($r,32);
    571 	&sub($num,8);
    572 	&jnz(&label("pw_neg_loop"));
    573 	    
    574 	&set_label("pw_neg_finish",0);
    575 	&mov($tmp2,&wparam(4));	# get dl
    576 	&mov($num,0);
    577 	&sub($num,$tmp2);
    578 	&and($num,7);
    579 	&jz(&label("pw_end"));
    580 	    
    581 	for ($i=0; $i<7; $i++)
    582 	{
    583 	    &comment("dl<0 Tail Round $i");
    584 	    &mov($tmp1,0);
    585 	    &mov($tmp2,&DWP($i*4,$b,"",0));# *b
    586 	    &sub($tmp1,$c);
    587 	    &mov($c,0);
    588 	    &adc($c,$c);
    589 	    &sub($tmp1,$tmp2);
    590 	    &adc($c,0);
    591 	    &dec($num) if ($i != 6);
    592 	    &mov(&DWP($i*4,$r,"",0),$tmp1);	# *r
    593 	    &jz(&label("pw_end")) if ($i != 6);
    594 	}
    595 
    596 	&jmp(&label("pw_end"));
    597 	
    598 	&set_label("pw_pos",0);
    599 	
    600 	&and($num,0xfffffff8);	# num / 8
    601 	&jz(&label("pw_pos_finish"));
    602 
    603 	&set_label("pw_pos_loop",0);
    604 
    605 	for ($i=0; $i<8; $i++)
    606 	{
    607 	    &comment("dl>0 Round $i");
    608 
    609 	    &mov($tmp1,&DWP($i*4,$a,"",0));	# *a
    610 	    &sub($tmp1,$c);
    611 	    &mov(&DWP($i*4,$r,"",0),$tmp1);	# *r
    612 	    &jnc(&label("pw_nc".$i));
    613 	}
    614 	    
    615 	&comment("");
    616 	&add($a,32);
    617 	&add($r,32);
    618 	&sub($num,8);
    619 	&jnz(&label("pw_pos_loop"));
    620 	    
    621 	&set_label("pw_pos_finish",0);
    622 	&mov($num,&wparam(4));	# get dl
    623 	&and($num,7);
    624 	&jz(&label("pw_end"));
    625 	    
    626 	for ($i=0; $i<7; $i++)
    627 	{
    628 	    &comment("dl>0 Tail Round $i");
    629 	    &mov($tmp1,&DWP($i*4,$a,"",0));	# *a
    630 	    &sub($tmp1,$c);
    631 	    &mov(&DWP($i*4,$r,"",0),$tmp1);	# *r
    632 	    &jnc(&label("pw_tail_nc".$i));
    633 	    &dec($num) if ($i != 6);
    634 	    &jz(&label("pw_end")) if ($i != 6);
    635 	}
    636 	&mov($c,1);
    637 	&jmp(&label("pw_end"));
    638 
    639 	&set_label("pw_nc_loop",0);
    640 	for ($i=0; $i<8; $i++)
    641 	{
    642 	    &mov($tmp1,&DWP($i*4,$a,"",0));	# *a
    643 	    &mov(&DWP($i*4,$r,"",0),$tmp1);	# *r
    644 	    &set_label("pw_nc".$i,0);
    645 	}
    646 	    
    647 	&comment("");
    648 	&add($a,32);
    649 	&add($r,32);
    650 	&sub($num,8);
    651 	&jnz(&label("pw_nc_loop"));
    652 	    
    653 	&mov($num,&wparam(4));	# get dl
    654 	&and($num,7);
    655 	&jz(&label("pw_nc_end"));
    656 	    
    657 	for ($i=0; $i<7; $i++)
    658 	{
    659 	    &mov($tmp1,&DWP($i*4,$a,"",0));	# *a
    660 	    &mov(&DWP($i*4,$r,"",0),$tmp1);	# *r
    661 	    &set_label("pw_tail_nc".$i,0);
    662 	    &dec($num) if ($i != 6);
    663 	    &jz(&label("pw_nc_end")) if ($i != 6);
    664 	}
    665 
    666 	&set_label("pw_nc_end",0);
    667 	&mov($c,0);
    668 
    669 	&set_label("pw_end",0);
    670 
    671 #	&mov("eax",$c);		# $c is "eax"
    672 
    673 	&function_end($name);
    674 	}
    675 
    676