Home | History | Annotate | Download | only in asm
      1 #!/usr/local/bin/perl
      2 #
      3 # The inner loop instruction sequence and the IP/FP modifications are from
      4 # Svend Olaf Mikkelsen <svolaf (at] inet.uni-c.dk>
      5 #
      6 
      7 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
      8 push(@INC,"${dir}","${dir}../../perlasm");
      9 require "x86asm.pl";
     10 require "cbc.pl";
     11 require "desboth.pl";
     12 
     13 # base code is in microsft
     14 # op dest, source
     15 # format.
     16 #
     17 
     18 &asm_init($ARGV[0],"des-586.pl");
     19 
     20 $L="edi";
     21 $R="esi";
     22 $trans="ebp";
     23 $small_footprint=1 if (grep(/\-DOPENSSL_SMALL_FOOTPRINT/,@ARGV));
     24 # one can discuss setting this variable to 1 unconditionally, as
     25 # the folded loop is only 3% slower than unrolled, but >7 times smaller
     26 
     27 &public_label("DES_SPtrans");
     28 
     29 &DES_encrypt_internal();
     30 &DES_decrypt_internal();
     31 &DES_encrypt("DES_encrypt1",1);
     32 &DES_encrypt("DES_encrypt2",0);
     33 &DES_encrypt3("DES_encrypt3",1);
     34 &DES_encrypt3("DES_decrypt3",0);
     35 &cbc("DES_ncbc_encrypt","DES_encrypt1","DES_encrypt1",0,4,5,3,5,-1);
     36 &cbc("DES_ede3_cbc_encrypt","DES_encrypt3","DES_decrypt3",0,6,7,3,4,5);
     37 &DES_SPtrans();
     38 
     39 &asm_finish();
     40 
     41 sub DES_encrypt_internal()
     42 	{
     43 	&function_begin_B("_x86_DES_encrypt");
     44 
     45 	if ($small_footprint)
     46 	    {
     47 	    &lea("edx",&DWP(128,"ecx"));
     48 	    &push("edx");
     49 	    &push("ecx");
     50 	    &set_label("eloop");
     51 		&D_ENCRYPT(0,$L,$R,0,$trans,"eax","ebx","ecx","edx",&swtmp(0));
     52 		&comment("");
     53 		&D_ENCRYPT(1,$R,$L,2,$trans,"eax","ebx","ecx","edx",&swtmp(0));
     54 		&comment("");
     55 		&add("ecx",16);
     56 		&cmp("ecx",&swtmp(1));
     57 		&mov(&swtmp(0),"ecx");
     58 		&jb(&label("eloop"));
     59 	    &add("esp",8);
     60 	    }
     61 	else
     62 	    {
     63 	    &push("ecx");
     64 	    for ($i=0; $i<16; $i+=2)
     65 		{
     66 		&comment("Round $i");
     67 		&D_ENCRYPT($i,$L,$R,$i*2,$trans,"eax","ebx","ecx","edx",&swtmp(0));
     68 		&comment("Round ".sprintf("%d",$i+1));
     69 		&D_ENCRYPT($i+1,$R,$L,($i+1)*2,$trans,"eax","ebx","ecx","edx",&swtmp(0));
     70 		}
     71 	    &add("esp",4);
     72 	}
     73 	&ret();
     74 
     75 	&function_end_B("_x86_DES_encrypt");
     76 	}
     77 	
     78 sub DES_decrypt_internal()
     79 	{
     80 	&function_begin_B("_x86_DES_decrypt");
     81 
     82 	if ($small_footprint)
     83 	    {
     84 	    &push("ecx");
     85 	    &lea("ecx",&DWP(128,"ecx"));
     86 	    &push("ecx");
     87 	    &set_label("dloop");
     88 		&D_ENCRYPT(0,$L,$R,-2,$trans,"eax","ebx","ecx","edx",&swtmp(0));
     89 		&comment("");
     90 		&D_ENCRYPT(1,$R,$L,-4,$trans,"eax","ebx","ecx","edx",&swtmp(0));
     91 		&comment("");
     92 		&sub("ecx",16);
     93 		&cmp("ecx",&swtmp(1));
     94 		&mov(&swtmp(0),"ecx");
     95 		&ja(&label("dloop"));
     96 	    &add("esp",8);
     97 	    }
     98 	else
     99 	    {
    100 	    &push("ecx");
    101 	    for ($i=15; $i>0; $i-=2)
    102 		{
    103 		&comment("Round $i");
    104 		&D_ENCRYPT(15-$i,$L,$R,$i*2,$trans,"eax","ebx","ecx","edx",&swtmp(0));
    105 		&comment("Round ".sprintf("%d",$i-1));
    106 		&D_ENCRYPT(15-$i+1,$R,$L,($i-1)*2,$trans,"eax","ebx","ecx","edx",&swtmp(0));
    107 		}
    108 	    &add("esp",4);
    109 	    }
    110 	&ret();
    111 
    112 	&function_end_B("_x86_DES_decrypt");
    113 	}
    114 	
    115 sub DES_encrypt
    116 	{
    117 	local($name,$do_ip)=@_;
    118 
    119 	&function_begin_B($name);
    120 
    121 	&push("esi");
    122 	&push("edi");
    123 
    124 	&comment("");
    125 	&comment("Load the 2 words");
    126 
    127 	if ($do_ip)
    128 		{
    129 		&mov($R,&wparam(0));
    130 		 &xor(	"ecx",		"ecx"		);
    131 
    132 		&push("ebx");
    133 		&push("ebp");
    134 
    135 		&mov("eax",&DWP(0,$R,"",0));
    136 		 &mov("ebx",&wparam(2));	# get encrypt flag
    137 		&mov($L,&DWP(4,$R,"",0));
    138 		&comment("");
    139 		&comment("IP");
    140 		&IP_new("eax",$L,$R,3);
    141 		}
    142 	else
    143 		{
    144 		&mov("eax",&wparam(0));
    145 		 &xor(	"ecx",		"ecx"		);
    146 
    147 		&push("ebx");
    148 		&push("ebp");
    149 
    150 		&mov($R,&DWP(0,"eax","",0));
    151 		 &mov("ebx",&wparam(2));	# get encrypt flag
    152 		&rotl($R,3);
    153 		&mov($L,&DWP(4,"eax","",0));
    154 		&rotl($L,3);
    155 		}
    156 
    157 	# PIC-ification:-)
    158 	&call	(&label("pic_point"));
    159 	&set_label("pic_point");
    160 	&blindpop($trans);
    161 	&lea	($trans,&DWP(&label("DES_SPtrans")."-".&label("pic_point"),$trans));
    162 
    163 	&mov(	"ecx",	&wparam(1)	);
    164 
    165 	&cmp("ebx","0");
    166 	&je(&label("decrypt"));
    167 	&call("_x86_DES_encrypt");
    168 	&jmp(&label("done"));
    169 	&set_label("decrypt");
    170 	&call("_x86_DES_decrypt");
    171 	&set_label("done");
    172 
    173 	if ($do_ip)
    174 		{
    175 		&comment("");
    176 		&comment("FP");
    177 		&mov("edx",&wparam(0));
    178 		&FP_new($L,$R,"eax",3);
    179 
    180 		&mov(&DWP(0,"edx","",0),"eax");
    181 		&mov(&DWP(4,"edx","",0),$R);
    182 		}
    183 	else
    184 		{
    185 		&comment("");
    186 		&comment("Fixup");
    187 		&rotr($L,3);		# r
    188 		 &mov("eax",&wparam(0));
    189 		&rotr($R,3);		# l
    190 		 &mov(&DWP(0,"eax","",0),$L);
    191 		 &mov(&DWP(4,"eax","",0),$R);
    192 		}
    193 
    194 	&pop("ebp");
    195 	&pop("ebx");
    196 	&pop("edi");
    197 	&pop("esi");
    198 	&ret();
    199 
    200 	&function_end_B($name);
    201 	}
    202 
    203 sub D_ENCRYPT
    204 	{
    205 	local($r,$L,$R,$S,$trans,$u,$tmp1,$tmp2,$t,$wp1)=@_;
    206 
    207 	 &mov(	$u,		&DWP(&n2a($S*4),$tmp2,"",0));
    208 	&xor(	$tmp1,		$tmp1);
    209 	 &mov(	$t,		&DWP(&n2a(($S+1)*4),$tmp2,"",0));
    210 	&xor(	$u,		$R);
    211 	&xor(	$tmp2,		$tmp2);
    212 	 &xor(	$t,		$R);
    213 	&and(	$u,		"0xfcfcfcfc"	);
    214 	 &and(	$t,		"0xcfcfcfcf"	);
    215 	&movb(	&LB($tmp1),	&LB($u)	);
    216 	 &movb(	&LB($tmp2),	&HB($u)	);
    217 	&rotr(	$t,		4		);
    218 	&xor(	$L,		&DWP("     ",$trans,$tmp1,0));
    219 	 &movb(	&LB($tmp1),	&LB($t)	);
    220 	 &xor(	$L,		&DWP("0x200",$trans,$tmp2,0));
    221 	 &movb(	&LB($tmp2),	&HB($t)	);
    222 	&shr(	$u,		16);
    223 	 &xor(	$L,		&DWP("0x100",$trans,$tmp1,0));
    224 	 &movb(	&LB($tmp1),	&HB($u)	);
    225 	&shr(	$t,		16);
    226 	 &xor(	$L,		&DWP("0x300",$trans,$tmp2,0));
    227 	&movb(	&LB($tmp2),	&HB($t)	);
    228 	 &and(	$u,		"0xff"	);
    229 	&and(	$t,		"0xff"	);
    230 	 &xor(	$L,		&DWP("0x600",$trans,$tmp1,0));
    231 	 &xor(	$L,		&DWP("0x700",$trans,$tmp2,0));
    232 	&mov(	$tmp2,		$wp1	);
    233 	 &xor(	$L,		&DWP("0x400",$trans,$u,0));
    234 	 &xor(	$L,		&DWP("0x500",$trans,$t,0));
    235 	}
    236 
    237 sub n2a
    238 	{
    239 	sprintf("%d",$_[0]);
    240 	}
    241 
    242 # now has a side affect of rotating $a by $shift
    243 sub R_PERM_OP
    244 	{
    245 	local($a,$b,$tt,$shift,$mask,$last)=@_;
    246 
    247 	&rotl(	$a,		$shift		) if ($shift != 0);
    248 	&mov(	$tt,		$a		);
    249 	&xor(	$a,		$b		);
    250 	&and(	$a,		$mask		);
    251 	# This can never succeed, and besides it is difficult to see what the
    252 	# idea was - Ben 13 Feb 99
    253 	if (!$last eq $b)
    254 		{
    255 		&xor(	$b,		$a		);
    256 		&xor(	$tt,		$a		);
    257 		}
    258 	else
    259 		{
    260 		&xor(	$tt,		$a		);
    261 		&xor(	$b,		$a		);
    262 		}
    263 	&comment("");
    264 	}
    265 
    266 sub IP_new
    267 	{
    268 	local($l,$r,$tt,$lr)=@_;
    269 
    270 	&R_PERM_OP($l,$r,$tt, 4,"0xf0f0f0f0",$l);
    271 	&R_PERM_OP($r,$tt,$l,20,"0xfff0000f",$l);
    272 	&R_PERM_OP($l,$tt,$r,14,"0x33333333",$r);
    273 	&R_PERM_OP($tt,$r,$l,22,"0x03fc03fc",$r);
    274 	&R_PERM_OP($l,$r,$tt, 9,"0xaaaaaaaa",$r);
    275 	
    276 	if ($lr != 3)
    277 		{
    278 		if (($lr-3) < 0)
    279 			{ &rotr($tt,	3-$lr); }
    280 		else	{ &rotl($tt,	$lr-3); }
    281 		}
    282 	if ($lr != 2)
    283 		{
    284 		if (($lr-2) < 0)
    285 			{ &rotr($r,	2-$lr); }
    286 		else	{ &rotl($r,	$lr-2); }
    287 		}
    288 	}
    289 
    290 sub FP_new
    291 	{
    292 	local($l,$r,$tt,$lr)=@_;
    293 
    294 	if ($lr != 2)
    295 		{
    296 		if (($lr-2) < 0)
    297 			{ &rotl($r,	2-$lr); }
    298 		else	{ &rotr($r,	$lr-2); }
    299 		}
    300 	if ($lr != 3)
    301 		{
    302 		if (($lr-3) < 0)
    303 			{ &rotl($l,	3-$lr); }
    304 		else	{ &rotr($l,	$lr-3); }
    305 		}
    306 
    307 	&R_PERM_OP($l,$r,$tt, 0,"0xaaaaaaaa",$r);
    308 	&R_PERM_OP($tt,$r,$l,23,"0x03fc03fc",$r);
    309 	&R_PERM_OP($l,$r,$tt,10,"0x33333333",$l);
    310 	&R_PERM_OP($r,$tt,$l,18,"0xfff0000f",$l);
    311 	&R_PERM_OP($l,$tt,$r,12,"0xf0f0f0f0",$r);
    312 	&rotr($tt	, 4);
    313 	}
    314 
    315 sub DES_SPtrans
    316 	{
    317 	&set_label("DES_SPtrans",64);
    318 	&data_word(0x02080800, 0x00080000, 0x02000002, 0x02080802);
    319 	&data_word(0x02000000, 0x00080802, 0x00080002, 0x02000002);
    320 	&data_word(0x00080802, 0x02080800, 0x02080000, 0x00000802);
    321 	&data_word(0x02000802, 0x02000000, 0x00000000, 0x00080002);
    322 	&data_word(0x00080000, 0x00000002, 0x02000800, 0x00080800);
    323 	&data_word(0x02080802, 0x02080000, 0x00000802, 0x02000800);
    324 	&data_word(0x00000002, 0x00000800, 0x00080800, 0x02080002);
    325 	&data_word(0x00000800, 0x02000802, 0x02080002, 0x00000000);
    326 	&data_word(0x00000000, 0x02080802, 0x02000800, 0x00080002);
    327 	&data_word(0x02080800, 0x00080000, 0x00000802, 0x02000800);
    328 	&data_word(0x02080002, 0x00000800, 0x00080800, 0x02000002);
    329 	&data_word(0x00080802, 0x00000002, 0x02000002, 0x02080000);
    330 	&data_word(0x02080802, 0x00080800, 0x02080000, 0x02000802);
    331 	&data_word(0x02000000, 0x00000802, 0x00080002, 0x00000000);
    332 	&data_word(0x00080000, 0x02000000, 0x02000802, 0x02080800);
    333 	&data_word(0x00000002, 0x02080002, 0x00000800, 0x00080802);
    334 	# nibble 1
    335 	&data_word(0x40108010, 0x00000000, 0x00108000, 0x40100000);
    336 	&data_word(0x40000010, 0x00008010, 0x40008000, 0x00108000);
    337 	&data_word(0x00008000, 0x40100010, 0x00000010, 0x40008000);
    338 	&data_word(0x00100010, 0x40108000, 0x40100000, 0x00000010);
    339 	&data_word(0x00100000, 0x40008010, 0x40100010, 0x00008000);
    340 	&data_word(0x00108010, 0x40000000, 0x00000000, 0x00100010);
    341 	&data_word(0x40008010, 0x00108010, 0x40108000, 0x40000010);
    342 	&data_word(0x40000000, 0x00100000, 0x00008010, 0x40108010);
    343 	&data_word(0x00100010, 0x40108000, 0x40008000, 0x00108010);
    344 	&data_word(0x40108010, 0x00100010, 0x40000010, 0x00000000);
    345 	&data_word(0x40000000, 0x00008010, 0x00100000, 0x40100010);
    346 	&data_word(0x00008000, 0x40000000, 0x00108010, 0x40008010);
    347 	&data_word(0x40108000, 0x00008000, 0x00000000, 0x40000010);
    348 	&data_word(0x00000010, 0x40108010, 0x00108000, 0x40100000);
    349 	&data_word(0x40100010, 0x00100000, 0x00008010, 0x40008000);
    350 	&data_word(0x40008010, 0x00000010, 0x40100000, 0x00108000);
    351 	# nibble 2
    352 	&data_word(0x04000001, 0x04040100, 0x00000100, 0x04000101);
    353 	&data_word(0x00040001, 0x04000000, 0x04000101, 0x00040100);
    354 	&data_word(0x04000100, 0x00040000, 0x04040000, 0x00000001);
    355 	&data_word(0x04040101, 0x00000101, 0x00000001, 0x04040001);
    356 	&data_word(0x00000000, 0x00040001, 0x04040100, 0x00000100);
    357 	&data_word(0x00000101, 0x04040101, 0x00040000, 0x04000001);
    358 	&data_word(0x04040001, 0x04000100, 0x00040101, 0x04040000);
    359 	&data_word(0x00040100, 0x00000000, 0x04000000, 0x00040101);
    360 	&data_word(0x04040100, 0x00000100, 0x00000001, 0x00040000);
    361 	&data_word(0x00000101, 0x00040001, 0x04040000, 0x04000101);
    362 	&data_word(0x00000000, 0x04040100, 0x00040100, 0x04040001);
    363 	&data_word(0x00040001, 0x04000000, 0x04040101, 0x00000001);
    364 	&data_word(0x00040101, 0x04000001, 0x04000000, 0x04040101);
    365 	&data_word(0x00040000, 0x04000100, 0x04000101, 0x00040100);
    366 	&data_word(0x04000100, 0x00000000, 0x04040001, 0x00000101);
    367 	&data_word(0x04000001, 0x00040101, 0x00000100, 0x04040000);
    368 	# nibble 3
    369 	&data_word(0x00401008, 0x10001000, 0x00000008, 0x10401008);
    370 	&data_word(0x00000000, 0x10400000, 0x10001008, 0x00400008);
    371 	&data_word(0x10401000, 0x10000008, 0x10000000, 0x00001008);
    372 	&data_word(0x10000008, 0x00401008, 0x00400000, 0x10000000);
    373 	&data_word(0x10400008, 0x00401000, 0x00001000, 0x00000008);
    374 	&data_word(0x00401000, 0x10001008, 0x10400000, 0x00001000);
    375 	&data_word(0x00001008, 0x00000000, 0x00400008, 0x10401000);
    376 	&data_word(0x10001000, 0x10400008, 0x10401008, 0x00400000);
    377 	&data_word(0x10400008, 0x00001008, 0x00400000, 0x10000008);
    378 	&data_word(0x00401000, 0x10001000, 0x00000008, 0x10400000);
    379 	&data_word(0x10001008, 0x00000000, 0x00001000, 0x00400008);
    380 	&data_word(0x00000000, 0x10400008, 0x10401000, 0x00001000);
    381 	&data_word(0x10000000, 0x10401008, 0x00401008, 0x00400000);
    382 	&data_word(0x10401008, 0x00000008, 0x10001000, 0x00401008);
    383 	&data_word(0x00400008, 0x00401000, 0x10400000, 0x10001008);
    384 	&data_word(0x00001008, 0x10000000, 0x10000008, 0x10401000);
    385 	# nibble 4
    386 	&data_word(0x08000000, 0x00010000, 0x00000400, 0x08010420);
    387 	&data_word(0x08010020, 0x08000400, 0x00010420, 0x08010000);
    388 	&data_word(0x00010000, 0x00000020, 0x08000020, 0x00010400);
    389 	&data_word(0x08000420, 0x08010020, 0x08010400, 0x00000000);
    390 	&data_word(0x00010400, 0x08000000, 0x00010020, 0x00000420);
    391 	&data_word(0x08000400, 0x00010420, 0x00000000, 0x08000020);
    392 	&data_word(0x00000020, 0x08000420, 0x08010420, 0x00010020);
    393 	&data_word(0x08010000, 0x00000400, 0x00000420, 0x08010400);
    394 	&data_word(0x08010400, 0x08000420, 0x00010020, 0x08010000);
    395 	&data_word(0x00010000, 0x00000020, 0x08000020, 0x08000400);
    396 	&data_word(0x08000000, 0x00010400, 0x08010420, 0x00000000);
    397 	&data_word(0x00010420, 0x08000000, 0x00000400, 0x00010020);
    398 	&data_word(0x08000420, 0x00000400, 0x00000000, 0x08010420);
    399 	&data_word(0x08010020, 0x08010400, 0x00000420, 0x00010000);
    400 	&data_word(0x00010400, 0x08010020, 0x08000400, 0x00000420);
    401 	&data_word(0x00000020, 0x00010420, 0x08010000, 0x08000020);
    402 	# nibble 5
    403 	&data_word(0x80000040, 0x00200040, 0x00000000, 0x80202000);
    404 	&data_word(0x00200040, 0x00002000, 0x80002040, 0x00200000);
    405 	&data_word(0x00002040, 0x80202040, 0x00202000, 0x80000000);
    406 	&data_word(0x80002000, 0x80000040, 0x80200000, 0x00202040);
    407 	&data_word(0x00200000, 0x80002040, 0x80200040, 0x00000000);
    408 	&data_word(0x00002000, 0x00000040, 0x80202000, 0x80200040);
    409 	&data_word(0x80202040, 0x80200000, 0x80000000, 0x00002040);
    410 	&data_word(0x00000040, 0x00202000, 0x00202040, 0x80002000);
    411 	&data_word(0x00002040, 0x80000000, 0x80002000, 0x00202040);
    412 	&data_word(0x80202000, 0x00200040, 0x00000000, 0x80002000);
    413 	&data_word(0x80000000, 0x00002000, 0x80200040, 0x00200000);
    414 	&data_word(0x00200040, 0x80202040, 0x00202000, 0x00000040);
    415 	&data_word(0x80202040, 0x00202000, 0x00200000, 0x80002040);
    416 	&data_word(0x80000040, 0x80200000, 0x00202040, 0x00000000);
    417 	&data_word(0x00002000, 0x80000040, 0x80002040, 0x80202000);
    418 	&data_word(0x80200000, 0x00002040, 0x00000040, 0x80200040);
    419 	# nibble 6
    420 	&data_word(0x00004000, 0x00000200, 0x01000200, 0x01000004);
    421 	&data_word(0x01004204, 0x00004004, 0x00004200, 0x00000000);
    422 	&data_word(0x01000000, 0x01000204, 0x00000204, 0x01004000);
    423 	&data_word(0x00000004, 0x01004200, 0x01004000, 0x00000204);
    424 	&data_word(0x01000204, 0x00004000, 0x00004004, 0x01004204);
    425 	&data_word(0x00000000, 0x01000200, 0x01000004, 0x00004200);
    426 	&data_word(0x01004004, 0x00004204, 0x01004200, 0x00000004);
    427 	&data_word(0x00004204, 0x01004004, 0x00000200, 0x01000000);
    428 	&data_word(0x00004204, 0x01004000, 0x01004004, 0x00000204);
    429 	&data_word(0x00004000, 0x00000200, 0x01000000, 0x01004004);
    430 	&data_word(0x01000204, 0x00004204, 0x00004200, 0x00000000);
    431 	&data_word(0x00000200, 0x01000004, 0x00000004, 0x01000200);
    432 	&data_word(0x00000000, 0x01000204, 0x01000200, 0x00004200);
    433 	&data_word(0x00000204, 0x00004000, 0x01004204, 0x01000000);
    434 	&data_word(0x01004200, 0x00000004, 0x00004004, 0x01004204);
    435 	&data_word(0x01000004, 0x01004200, 0x01004000, 0x00004004);
    436 	# nibble 7
    437 	&data_word(0x20800080, 0x20820000, 0x00020080, 0x00000000);
    438 	&data_word(0x20020000, 0x00800080, 0x20800000, 0x20820080);
    439 	&data_word(0x00000080, 0x20000000, 0x00820000, 0x00020080);
    440 	&data_word(0x00820080, 0x20020080, 0x20000080, 0x20800000);
    441 	&data_word(0x00020000, 0x00820080, 0x00800080, 0x20020000);
    442 	&data_word(0x20820080, 0x20000080, 0x00000000, 0x00820000);
    443 	&data_word(0x20000000, 0x00800000, 0x20020080, 0x20800080);
    444 	&data_word(0x00800000, 0x00020000, 0x20820000, 0x00000080);
    445 	&data_word(0x00800000, 0x00020000, 0x20000080, 0x20820080);
    446 	&data_word(0x00020080, 0x20000000, 0x00000000, 0x00820000);
    447 	&data_word(0x20800080, 0x20020080, 0x20020000, 0x00800080);
    448 	&data_word(0x20820000, 0x00000080, 0x00800080, 0x20020000);
    449 	&data_word(0x20820080, 0x00800000, 0x20800000, 0x20000080);
    450 	&data_word(0x00820000, 0x00020080, 0x20020080, 0x20800000);
    451 	&data_word(0x00000080, 0x20820000, 0x00820080, 0x00000000);
    452 	&data_word(0x20000000, 0x20800080, 0x00020000, 0x00820080);
    453 	}
    454