Home | History | Annotate | Download | only in crypto
      1 #!/usr/bin/env perl
      2 
      3 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
      4 push(@INC, "${dir}perlasm", "perlasm");
      5 require "x86asm.pl";
      6 
      7 &asm_init($ARGV[0],"x86cpuid");
      8 
      9 for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
     10 
     11 &function_begin("OPENSSL_ia32_cpuid");
     12 	&xor	("edx","edx");
     13 	&pushf	();
     14 	&pop	("eax");
     15 	&mov	("ecx","eax");
     16 	&xor	("eax",1<<21);
     17 	&push	("eax");
     18 	&popf	();
     19 	&pushf	();
     20 	&pop	("eax");
     21 	&xor	("ecx","eax");
     22 	&xor	("eax","eax");
     23 	&bt	("ecx",21);
     24 	&jnc	(&label("nocpuid"));
     25 	&cpuid	();
     26 	&mov	("edi","eax");		# max value for standard query level
     27 
     28 	&xor	("eax","eax");
     29 	&cmp	("ebx",0x756e6547);	# "Genu"
     30 	&setne	(&LB("eax"));
     31 	&mov	("ebp","eax");
     32 	&cmp	("edx",0x49656e69);	# "ineI"
     33 	&setne	(&LB("eax"));
     34 	&or	("ebp","eax");
     35 	&cmp	("ecx",0x6c65746e);	# "ntel"
     36 	&setne	(&LB("eax"));
     37 	&or	("ebp","eax");		# 0 indicates Intel CPU
     38 	&jz	(&label("intel"));
     39 
     40 	&cmp	("ebx",0x68747541);	# "Auth"
     41 	&setne	(&LB("eax"));
     42 	&mov	("esi","eax");
     43 	&cmp	("edx",0x69746E65);	# "enti"
     44 	&setne	(&LB("eax"));
     45 	&or	("esi","eax");
     46 	&cmp	("ecx",0x444D4163);	# "cAMD"
     47 	&setne	(&LB("eax"));
     48 	&or	("esi","eax");		# 0 indicates AMD CPU
     49 	&jnz	(&label("intel"));
     50 
     51 	# AMD specific
     52 	&mov	("eax",0x80000000);
     53 	&cpuid	();
     54 	&cmp	("eax",0x80000001);
     55 	&jb	(&label("intel"));
     56 	&mov	("esi","eax");
     57 	&mov	("eax",0x80000001);
     58 	&cpuid	();
     59 	&or	("ebp","ecx");
     60 	&and	("ebp",1<<11|1);	# isolate XOP bit
     61 	&cmp	("esi",0x80000008);
     62 	&jb	(&label("intel"));
     63 
     64 	&mov	("eax",0x80000008);
     65 	&cpuid	();
     66 	&movz	("esi",&LB("ecx"));	# number of cores - 1
     67 	&inc	("esi");		# number of cores
     68 
     69 	&mov	("eax",1);
     70 	&xor	("ecx","ecx");
     71 	&cpuid	();
     72 	&bt	("edx",28);
     73 	&jnc	(&label("generic"));
     74 	&shr	("ebx",16);
     75 	&and	("ebx",0xff);
     76 	&cmp	("ebx","esi");
     77 	&ja	(&label("generic"));
     78 	&and	("edx",0xefffffff);	# clear hyper-threading bit
     79 	&jmp	(&label("generic"));
     80 	
     81 &set_label("intel");
     82 	&cmp	("edi",4);
     83 	&mov	("edi",-1);
     84 	&jb	(&label("nocacheinfo"));
     85 
     86 	&mov	("eax",4);
     87 	&mov	("ecx",0);		# query L1D
     88 	&cpuid	();
     89 	&mov	("edi","eax");
     90 	&shr	("edi",14);
     91 	&and	("edi",0xfff);		# number of cores -1 per L1D
     92 
     93 &set_label("nocacheinfo");
     94 	&mov	("eax",1);
     95 	&xor	("ecx","ecx");
     96 	&cpuid	();
     97 	&and	("edx",0xbfefffff);	# force reserved bits #20, #30 to 0
     98 	&cmp	("ebp",0);
     99 	&jne	(&label("notintel"));
    100 	&or	("edx",1<<30);		# set reserved bit#30 on Intel CPUs
    101 	&and	(&HB("eax"),15);	# familiy ID
    102 	&cmp	(&HB("eax"),15);	# P4?
    103 	&jne	(&label("notintel"));
    104 	&or	("edx",1<<20);		# set reserved bit#20 to engage RC4_CHAR
    105 &set_label("notintel");
    106 	&bt	("edx",28);		# test hyper-threading bit
    107 	&jnc	(&label("generic"));
    108 	&and	("edx",0xefffffff);
    109 	&cmp	("edi",0);
    110 	&je	(&label("generic"));
    111 
    112 	&or	("edx",0x10000000);
    113 	&shr	("ebx",16);
    114 	&cmp	(&LB("ebx"),1);
    115 	&ja	(&label("generic"));
    116 	&and	("edx",0xefffffff);	# clear hyper-threading bit if not
    117 
    118 &set_label("generic");
    119 	&and	("ebp",1<<11);		# isolate AMD XOP flag
    120 	&and	("ecx",0xfffff7ff);	# force 11th bit to 0
    121 	&mov	("esi","edx");
    122 	&or	("ebp","ecx");		# merge AMD XOP flag
    123 
    124 	&bt	("ecx",27);		# check OSXSAVE bit
    125 	&jnc	(&label("clear_avx"));
    126 	&xor	("ecx","ecx");
    127 	&data_byte(0x0f,0x01,0xd0);	# xgetbv
    128 	&and	("eax",6);
    129 	&cmp	("eax",6);
    130 	&je	(&label("done"));
    131 	&cmp	("eax",2);
    132 	&je	(&label("clear_avx"));
    133 &set_label("clear_xmm");
    134 	&and	("ebp",0xfdfffffd);	# clear AESNI and PCLMULQDQ bits
    135 	&and	("esi",0xfeffffff);	# clear FXSR
    136 &set_label("clear_avx");
    137 	&and	("ebp",0xefffe7ff);	# clear AVX, FMA and AMD XOP bits
    138 &set_label("done");
    139 	&mov	("eax","esi");
    140 	&mov	("edx","ebp");
    141 &set_label("nocpuid");
    142 &function_end("OPENSSL_ia32_cpuid");
    143 
    144 &external_label("OPENSSL_ia32cap_P");
    145 
    146 &function_begin_B("OPENSSL_rdtsc","EXTRN\t_OPENSSL_ia32cap_P:DWORD");
    147 	&xor	("eax","eax");
    148 	&xor	("edx","edx");
    149 	&picmeup("ecx","OPENSSL_ia32cap_P");
    150 	&bt	(&DWP(0,"ecx"),4);
    151 	&jnc	(&label("notsc"));
    152 	&rdtsc	();
    153 &set_label("notsc");
    154 	&ret	();
    155 &function_end_B("OPENSSL_rdtsc");
    156 
    157 # This works in Ring 0 only [read DJGPP+MS-DOS+privileged DPMI host],
    158 # but it's safe to call it on any [supported] 32-bit platform...
    159 # Just check for [non-]zero return value...
    160 &function_begin_B("OPENSSL_instrument_halt","EXTRN\t_OPENSSL_ia32cap_P:DWORD");
    161 	&picmeup("ecx","OPENSSL_ia32cap_P");
    162 	&bt	(&DWP(0,"ecx"),4);
    163 	&jnc	(&label("nohalt"));	# no TSC
    164 
    165 	&data_word(0x9058900e);		# push %cs; pop %eax
    166 	&and	("eax",3);
    167 	&jnz	(&label("nohalt"));	# not enough privileges
    168 
    169 	&pushf	();
    170 	&pop	("eax");
    171 	&bt	("eax",9);
    172 	&jnc	(&label("nohalt"));	# interrupts are disabled
    173 
    174 	&rdtsc	();
    175 	&push	("edx");
    176 	&push	("eax");
    177 	&halt	();
    178 	&rdtsc	();
    179 
    180 	&sub	("eax",&DWP(0,"esp"));
    181 	&sbb	("edx",&DWP(4,"esp"));
    182 	&add	("esp",8);
    183 	&ret	();
    184 
    185 &set_label("nohalt");
    186 	&xor	("eax","eax");
    187 	&xor	("edx","edx");
    188 	&ret	();
    189 &function_end_B("OPENSSL_instrument_halt");
    190 
    191 # Essentially there is only one use for this function. Under DJGPP:
    192 #
    193 #	#include <go32.h>
    194 #	...
    195 #	i=OPENSSL_far_spin(_dos_ds,0x46c);
    196 #	...
    197 # to obtain the number of spins till closest timer interrupt.
    198 
    199 &function_begin_B("OPENSSL_far_spin");
    200 	&pushf	();
    201 	&pop	("eax")
    202 	&bt	("eax",9);
    203 	&jnc	(&label("nospin"));	# interrupts are disabled
    204 
    205 	&mov	("eax",&DWP(4,"esp"));
    206 	&mov	("ecx",&DWP(8,"esp"));
    207 	&data_word (0x90d88e1e);	# push %ds, mov %eax,%ds
    208 	&xor	("eax","eax");
    209 	&mov	("edx",&DWP(0,"ecx"));
    210 	&jmp	(&label("spin"));
    211 
    212 	&align	(16);
    213 &set_label("spin");
    214 	&inc	("eax");
    215 	&cmp	("edx",&DWP(0,"ecx"));
    216 	&je	(&label("spin"));
    217 
    218 	&data_word (0x1f909090);	# pop	%ds
    219 	&ret	();
    220 
    221 &set_label("nospin");
    222 	&xor	("eax","eax");
    223 	&xor	("edx","edx");
    224 	&ret	();
    225 &function_end_B("OPENSSL_far_spin");
    226 
    227 &function_begin_B("OPENSSL_wipe_cpu","EXTRN\t_OPENSSL_ia32cap_P:DWORD");
    228 	&xor	("eax","eax");
    229 	&xor	("edx","edx");
    230 	&picmeup("ecx","OPENSSL_ia32cap_P");
    231 	&mov	("ecx",&DWP(0,"ecx"));
    232 	&bt	(&DWP(0,"ecx"),1);
    233 	&jnc	(&label("no_x87"));
    234 	if ($sse2) {
    235 		&and	("ecx",1<<26|1<<24);	# check SSE2 and FXSR bits
    236 		&cmp	("ecx",1<<26|1<<24);
    237 		&jne	(&label("no_sse2"));
    238 		&pxor	("xmm0","xmm0");
    239 		&pxor	("xmm1","xmm1");
    240 		&pxor	("xmm2","xmm2");
    241 		&pxor	("xmm3","xmm3");
    242 		&pxor	("xmm4","xmm4");
    243 		&pxor	("xmm5","xmm5");
    244 		&pxor	("xmm6","xmm6");
    245 		&pxor	("xmm7","xmm7");
    246 	&set_label("no_sse2");
    247 	}
    248 	# just a bunch of fldz to zap the fp/mm bank followed by finit...
    249 	&data_word(0xeed9eed9,0xeed9eed9,0xeed9eed9,0xeed9eed9,0x90e3db9b);
    250 &set_label("no_x87");
    251 	&lea	("eax",&DWP(4,"esp"));
    252 	&ret	();
    253 &function_end_B("OPENSSL_wipe_cpu");
    254 
    255 &function_begin_B("OPENSSL_atomic_add");
    256 	&mov	("edx",&DWP(4,"esp"));	# fetch the pointer, 1st arg
    257 	&mov	("ecx",&DWP(8,"esp"));	# fetch the increment, 2nd arg
    258 	&push	("ebx");
    259 	&nop	();
    260 	&mov	("eax",&DWP(0,"edx"));
    261 &set_label("spin");
    262 	&lea	("ebx",&DWP(0,"eax","ecx"));
    263 	&nop	();
    264 	&data_word(0x1ab10ff0);	# lock;	cmpxchg	%ebx,(%edx)	# %eax is envolved and is always reloaded
    265 	&jne	(&label("spin"));
    266 	&mov	("eax","ebx");	# OpenSSL expects the new value
    267 	&pop	("ebx");
    268 	&ret	();
    269 &function_end_B("OPENSSL_atomic_add");
    270 
    271 # This function can become handy under Win32 in situations when
    272 # we don't know which calling convention, __stdcall or __cdecl(*),
    273 # indirect callee is using. In C it can be deployed as
    274 #
    275 #ifdef OPENSSL_CPUID_OBJ
    276 #	type OPENSSL_indirect_call(void *f,...);
    277 #	...
    278 #	OPENSSL_indirect_call(func,[up to $max arguments]);
    279 #endif
    280 #
    281 # (*)	it's designed to work even for __fastcall if number of
    282 #	arguments is 1 or 2!
    283 &function_begin_B("OPENSSL_indirect_call");
    284 	{
    285 	my ($max,$i)=(7,);	# $max has to be chosen as 4*n-1
    286 				# in order to preserve eventual
    287 				# stack alignment
    288 	&push	("ebp");
    289 	&mov	("ebp","esp");
    290 	&sub	("esp",$max*4);
    291 	&mov	("ecx",&DWP(12,"ebp"));
    292 	&mov	(&DWP(0,"esp"),"ecx");
    293 	&mov	("edx",&DWP(16,"ebp"));
    294 	&mov	(&DWP(4,"esp"),"edx");
    295 	for($i=2;$i<$max;$i++)
    296 		{
    297 		# Some copies will be redundant/bogus...
    298 		&mov	("eax",&DWP(12+$i*4,"ebp"));
    299 		&mov	(&DWP(0+$i*4,"esp"),"eax");
    300 		}
    301 	&call_ptr	(&DWP(8,"ebp"));# make the call...
    302 	&mov	("esp","ebp");	# ... and just restore the stack pointer
    303 				# without paying attention to what we called,
    304 				# (__cdecl *func) or (__stdcall *one).
    305 	&pop	("ebp");
    306 	&ret	();
    307 	}
    308 &function_end_B("OPENSSL_indirect_call");
    309 
    310 &function_begin_B("OPENSSL_cleanse");
    311 	&mov	("edx",&wparam(0));
    312 	&mov	("ecx",&wparam(1));
    313 	&xor	("eax","eax");
    314 	&cmp	("ecx",7);
    315 	&jae	(&label("lot"));
    316 	&cmp	("ecx",0);
    317 	&je	(&label("ret"));
    318 &set_label("little");
    319 	&mov	(&BP(0,"edx"),"al");
    320 	&sub	("ecx",1);
    321 	&lea	("edx",&DWP(1,"edx"));
    322 	&jnz	(&label("little"));
    323 &set_label("ret");
    324 	&ret	();
    325 
    326 &set_label("lot",16);
    327 	&test	("edx",3);
    328 	&jz	(&label("aligned"));
    329 	&mov	(&BP(0,"edx"),"al");
    330 	&lea	("ecx",&DWP(-1,"ecx"));
    331 	&lea	("edx",&DWP(1,"edx"));
    332 	&jmp	(&label("lot"));
    333 &set_label("aligned");
    334 	&mov	(&DWP(0,"edx"),"eax");
    335 	&lea	("ecx",&DWP(-4,"ecx"));
    336 	&test	("ecx",-4);
    337 	&lea	("edx",&DWP(4,"edx"));
    338 	&jnz	(&label("aligned"));
    339 	&cmp	("ecx",0);
    340 	&jne	(&label("little"));
    341 	&ret	();
    342 &function_end_B("OPENSSL_cleanse");
    343 
    344 &function_begin_B("OPENSSL_ia32_rdrand");
    345 	&mov	("ecx",8);
    346 &set_label("loop");
    347 	&rdrand	("eax");
    348 	&jc	(&label("break"));
    349 	&loop	(&label("loop"));
    350 &set_label("break");
    351 	&cmp	("eax",0);
    352 	&cmove	("eax","ecx");
    353 	&ret	();
    354 &function_end_B("OPENSSL_ia32_rdrand");
    355 
    356 &initseg("OPENSSL_cpuid_setup");
    357 
    358 &hidden("OPENSSL_cpuid_setup");
    359 &hidden("OPENSSL_ia32cap_P");
    360 
    361 &asm_finish();
    362