Home | History | Annotate | Download | only in crypto
      1 #!/usr/bin/env perl
      2 
      3 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
      4 push(@INC, "${dir}perlasm", "perlasm");
      5 require "x86asm.pl";
      6 
      7 &asm_init($ARGV[0],"crypto/cpu-x86-asm");
      8 
      9 for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
     10 
     11 &function_begin("OPENSSL_ia32_cpuid");
     12 	&xor	("edx","edx");
     13 	&pushf	();
     14 	&pop	("eax");
     15 	&mov	("ecx","eax");
     16 	&xor	("eax",1<<21);
     17 	&push	("eax");
     18 	&popf	();
     19 	&pushf	();
     20 	&pop	("eax");
     21 	&xor	("ecx","eax");
     22 	&xor	("eax","eax");
     23 	&bt	("ecx",21);
     24 	&jnc	(&label("nocpuid"));
     25 	&mov	("esi",&wparam(0));
     26 	&mov	(&DWP(8,"esi"),"eax");	# clear 3rd word
     27 	&cpuid	();
     28 	&mov	("edi","eax");		# max value for standard query level
     29 
     30 	&xor	("eax","eax");
     31 	&cmp	("ebx",0x756e6547);	# "Genu"
     32 	&setne	(&LB("eax"));
     33 	&mov	("ebp","eax");
     34 	&cmp	("edx",0x49656e69);	# "ineI"
     35 	&setne	(&LB("eax"));
     36 	&or	("ebp","eax");
     37 	&cmp	("ecx",0x6c65746e);	# "ntel"
     38 	&setne	(&LB("eax"));
     39 	&or	("ebp","eax");		# 0 indicates Intel CPU
     40 	&jz	(&label("intel"));
     41 
     42 	&cmp	("ebx",0x68747541);	# "Auth"
     43 	&setne	(&LB("eax"));
     44 	&mov	("esi","eax");
     45 	&cmp	("edx",0x69746E65);	# "enti"
     46 	&setne	(&LB("eax"));
     47 	&or	("esi","eax");
     48 	&cmp	("ecx",0x444D4163);	# "cAMD"
     49 	&setne	(&LB("eax"));
     50 	&or	("esi","eax");		# 0 indicates AMD CPU
     51 	&jnz	(&label("intel"));
     52 
     53 	# AMD specific
     54 	&mov	("eax",0x80000000);
     55 	&cpuid	();
     56 	&cmp	("eax",0x80000001);
     57 	&jb	(&label("intel"));
     58 	&mov	("esi","eax");
     59 	&mov	("eax",0x80000001);
     60 	&cpuid	();
     61 	&or	("ebp","ecx");
     62 	&and	("ebp",1<<11|1);	# isolate XOP bit
     63 	&cmp	("esi",0x80000008);
     64 	&jb	(&label("intel"));
     65 
     66 	&mov	("eax",0x80000008);
     67 	&cpuid	();
     68 	&movz	("esi",&LB("ecx"));	# number of cores - 1
     69 	&inc	("esi");		# number of cores
     70 
     71 	&mov	("eax",1);
     72 	&xor	("ecx","ecx");
     73 	&cpuid	();
     74 	&bt	("edx",28);
     75 	&jnc	(&label("generic"));
     76 	&shr	("ebx",16);
     77 	&and	("ebx",0xff);
     78 	&cmp	("ebx","esi");
     79 	&ja	(&label("generic"));
     80 	&and	("edx",0xefffffff);	# clear hyper-threading bit
     81 	&jmp	(&label("generic"));
     82 	
     83 &set_label("intel");
     84 	&cmp	("edi",7);
     85 	&jb	(&label("cacheinfo"));
     86 
     87 	&mov	("esi",&wparam(0));
     88 	&mov	("eax",7);
     89 	&xor	("ecx","ecx");
     90 	&cpuid	();
     91 	&mov	(&DWP(8,"esi"),"ebx");
     92 
     93 &set_label("cacheinfo");
     94 	&cmp	("edi",4);
     95 	&mov	("edi",-1);
     96 	&jb	(&label("nocacheinfo"));
     97 
     98 	&mov	("eax",4);
     99 	&mov	("ecx",0);		# query L1D
    100 	&cpuid	();
    101 	&mov	("edi","eax");
    102 	&shr	("edi",14);
    103 	&and	("edi",0xfff);		# number of cores -1 per L1D
    104 
    105 &set_label("nocacheinfo");
    106 	&mov	("eax",1);
    107 	&xor	("ecx","ecx");
    108 	&cpuid	();
    109 	&and	("edx",0xbfefffff);	# force reserved bits #20, #30 to 0
    110 	&cmp	("ebp",0);
    111 	&jne	(&label("notintel"));
    112 	&or	("edx",1<<30);		# set reserved bit#30 on Intel CPUs
    113 &set_label("notintel");
    114 	&bt	("edx",28);		# test hyper-threading bit
    115 	&jnc	(&label("generic"));
    116 	&and	("edx",0xefffffff);
    117 	&cmp	("edi",0);
    118 	&je	(&label("generic"));
    119 
    120 	&or	("edx",0x10000000);
    121 	&shr	("ebx",16);
    122 	&cmp	(&LB("ebx"),1);
    123 	&ja	(&label("generic"));
    124 	&and	("edx",0xefffffff);	# clear hyper-threading bit if not
    125 
    126 &set_label("generic");
    127 	&and	("ebp",1<<11);		# isolate AMD XOP flag
    128 	&and	("ecx",0xfffff7ff);	# force 11th bit to 0
    129 	&mov	("esi","edx");
    130 	&or	("ebp","ecx");		# merge AMD XOP flag
    131 
    132 	&bt	("ecx",27);		# check OSXSAVE bit
    133 	&jnc	(&label("clear_avx"));
    134 	&xor	("ecx","ecx");
    135 	&data_byte(0x0f,0x01,0xd0);	# xgetbv
    136 	&and	("eax",6);
    137 	&cmp	("eax",6);
    138 	&je	(&label("done"));
    139 	&cmp	("eax",2);
    140 	&je	(&label("clear_avx"));
    141 &set_label("clear_xmm");
    142 	&and	("ebp",0xfdfffffd);	# clear AESNI and PCLMULQDQ bits
    143 	&and	("esi",0xfeffffff);	# clear FXSR
    144 &set_label("clear_avx");
    145 	&and	("ebp",0xefffe7ff);	# clear AVX, FMA and AMD XOP bits
    146 	&mov	("edi",&wparam(0));
    147 	&and	(&DWP(8,"edi"),0xffffffdf);	# clear AVX2
    148 &set_label("done");
    149 	&mov	("eax","esi");
    150 	&mov	("edx","ebp");
    151 &set_label("nocpuid");
    152 &function_end("OPENSSL_ia32_cpuid");
    153 
    154 &external_label("OPENSSL_ia32cap_P");
    155 
    156 &function_begin_B("OPENSSL_rdtsc","EXTRN\t_OPENSSL_ia32cap_P:DWORD");
    157 	&xor	("eax","eax");
    158 	&xor	("edx","edx");
    159 	&picmeup("ecx","OPENSSL_ia32cap_P");
    160 	&bt	(&DWP(0,"ecx"),4);
    161 	&jnc	(&label("notsc"));
    162 	&rdtsc	();
    163 &set_label("notsc");
    164 	&ret	();
    165 &function_end_B("OPENSSL_rdtsc");
    166 
    167 # This works in Ring 0 only [read DJGPP+MS-DOS+privileged DPMI host],
    168 # but it's safe to call it on any [supported] 32-bit platform...
    169 # Just check for [non-]zero return value...
    170 &function_begin_B("OPENSSL_instrument_halt","EXTRN\t_OPENSSL_ia32cap_P:DWORD");
    171 	&picmeup("ecx","OPENSSL_ia32cap_P");
    172 	&bt	(&DWP(0,"ecx"),4);
    173 	&jnc	(&label("nohalt"));	# no TSC
    174 
    175 	&data_word(0x9058900e);		# push %cs; pop %eax
    176 	&and	("eax",3);
    177 	&jnz	(&label("nohalt"));	# not enough privileges
    178 
    179 	&pushf	();
    180 	&pop	("eax");
    181 	&bt	("eax",9);
    182 	&jnc	(&label("nohalt"));	# interrupts are disabled
    183 
    184 	&rdtsc	();
    185 	&push	("edx");
    186 	&push	("eax");
    187 	&halt	();
    188 	&rdtsc	();
    189 
    190 	&sub	("eax",&DWP(0,"esp"));
    191 	&sbb	("edx",&DWP(4,"esp"));
    192 	&add	("esp",8);
    193 	&ret	();
    194 
    195 &set_label("nohalt");
    196 	&xor	("eax","eax");
    197 	&xor	("edx","edx");
    198 	&ret	();
    199 &function_end_B("OPENSSL_instrument_halt");
    200 
    201 # Essentially there is only one use for this function. Under DJGPP:
    202 #
    203 #	#include <go32.h>
    204 #	...
    205 #	i=OPENSSL_far_spin(_dos_ds,0x46c);
    206 #	...
    207 # to obtain the number of spins till closest timer interrupt.
    208 
    209 &function_begin_B("OPENSSL_far_spin");
    210 	&pushf	();
    211 	&pop	("eax");
    212 	&bt	("eax",9);
    213 	&jnc	(&label("nospin"));	# interrupts are disabled
    214 
    215 	&mov	("eax",&DWP(4,"esp"));
    216 	&mov	("ecx",&DWP(8,"esp"));
    217 	&data_word (0x90d88e1e);	# push %ds, mov %eax,%ds
    218 	&xor	("eax","eax");
    219 	&mov	("edx",&DWP(0,"ecx"));
    220 	&jmp	(&label("spin"));
    221 
    222 	&align	(16);
    223 &set_label("spin");
    224 	&inc	("eax");
    225 	&cmp	("edx",&DWP(0,"ecx"));
    226 	&je	(&label("spin"));
    227 
    228 	&data_word (0x1f909090);	# pop	%ds
    229 	&ret	();
    230 
    231 &set_label("nospin");
    232 	&xor	("eax","eax");
    233 	&xor	("edx","edx");
    234 	&ret	();
    235 &function_end_B("OPENSSL_far_spin");
    236 
    237 &function_begin_B("OPENSSL_wipe_cpu","EXTRN\t_OPENSSL_ia32cap_P:DWORD");
    238 	&xor	("eax","eax");
    239 	&xor	("edx","edx");
    240 	&picmeup("ecx","OPENSSL_ia32cap_P");
    241 	&mov	("ecx",&DWP(0,"ecx"));
    242 	&bt	(&DWP(0,"ecx"),1);
    243 	&jnc	(&label("no_x87"));
    244 	if ($sse2) {
    245 		&and	("ecx",1<<26|1<<24);	# check SSE2 and FXSR bits
    246 		&cmp	("ecx",1<<26|1<<24);
    247 		&jne	(&label("no_sse2"));
    248 		&pxor	("xmm0","xmm0");
    249 		&pxor	("xmm1","xmm1");
    250 		&pxor	("xmm2","xmm2");
    251 		&pxor	("xmm3","xmm3");
    252 		&pxor	("xmm4","xmm4");
    253 		&pxor	("xmm5","xmm5");
    254 		&pxor	("xmm6","xmm6");
    255 		&pxor	("xmm7","xmm7");
    256 	&set_label("no_sse2");
    257 	}
    258 	# just a bunch of fldz to zap the fp/mm bank followed by finit...
    259 	&data_word(0xeed9eed9,0xeed9eed9,0xeed9eed9,0xeed9eed9,0x90e3db9b);
    260 &set_label("no_x87");
    261 	&lea	("eax",&DWP(4,"esp"));
    262 	&ret	();
    263 &function_end_B("OPENSSL_wipe_cpu");
    264 
    265 &function_begin_B("OPENSSL_atomic_add");
    266 	&mov	("edx",&DWP(4,"esp"));	# fetch the pointer, 1st arg
    267 	&mov	("ecx",&DWP(8,"esp"));	# fetch the increment, 2nd arg
    268 	&push	("ebx");
    269 	&nop	();
    270 	&mov	("eax",&DWP(0,"edx"));
    271 &set_label("spin");
    272 	&lea	("ebx",&DWP(0,"eax","ecx"));
    273 	&nop	();
    274 	&data_word(0x1ab10ff0);	# lock;	cmpxchg	%ebx,(%edx)	# %eax is envolved and is always reloaded
    275 	&jne	(&label("spin"));
    276 	&mov	("eax","ebx");	# OpenSSL expects the new value
    277 	&pop	("ebx");
    278 	&ret	();
    279 &function_end_B("OPENSSL_atomic_add");
    280 
    281 # This function can become handy under Win32 in situations when
    282 # we don't know which calling convention, __stdcall or __cdecl(*),
    283 # indirect callee is using. In C it can be deployed as
    284 #
    285 #ifdef OPENSSL_CPUID_OBJ
    286 #	type OPENSSL_indirect_call(void *f,...);
    287 #	...
    288 #	OPENSSL_indirect_call(func,[up to $max arguments]);
    289 #endif
    290 #
    291 # (*)	it's designed to work even for __fastcall if number of
    292 #	arguments is 1 or 2!
    293 &function_begin_B("OPENSSL_indirect_call");
    294 	{
    295 	my ($max,$i)=(7,);	# $max has to be chosen as 4*n-1
    296 				# in order to preserve eventual
    297 				# stack alignment
    298 	&push	("ebp");
    299 	&mov	("ebp","esp");
    300 	&sub	("esp",$max*4);
    301 	&mov	("ecx",&DWP(12,"ebp"));
    302 	&mov	(&DWP(0,"esp"),"ecx");
    303 	&mov	("edx",&DWP(16,"ebp"));
    304 	&mov	(&DWP(4,"esp"),"edx");
    305 	for($i=2;$i<$max;$i++)
    306 		{
    307 		# Some copies will be redundant/bogus...
    308 		&mov	("eax",&DWP(12+$i*4,"ebp"));
    309 		&mov	(&DWP(0+$i*4,"esp"),"eax");
    310 		}
    311 	&call_ptr	(&DWP(8,"ebp"));# make the call...
    312 	&mov	("esp","ebp");	# ... and just restore the stack pointer
    313 				# without paying attention to what we called,
    314 				# (__cdecl *func) or (__stdcall *one).
    315 	&pop	("ebp");
    316 	&ret	();
    317 	}
    318 &function_end_B("OPENSSL_indirect_call");
    319 
    320 &function_begin_B("OPENSSL_ia32_rdrand");
    321 	&mov	("ecx",8);
    322 &set_label("loop");
    323 	&rdrand	("eax");
    324 	&jc	(&label("break"));
    325 	&loop	(&label("loop"));
    326 &set_label("break");
    327 	&cmp	("eax",0);
    328 	&cmove	("eax","ecx");
    329 	&ret	();
    330 &function_end_B("OPENSSL_ia32_rdrand");
    331 
    332 &hidden("OPENSSL_ia32cap_P");
    333 
    334 &asm_finish();
    335