Home | History | Annotate | Download | only in crypto
      1 #!/usr/bin/env perl
      2 
      3 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
      4 push(@INC, "${dir}perlasm", "perlasm");
      5 require "x86asm.pl";
      6 
      7 &asm_init($ARGV[0],"crypto/cpu-x86-asm");
      8 
      9 for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
     10 
     11 &function_begin("OPENSSL_ia32_cpuid");
     12 	&xor	("edx","edx");
     13 	&pushf	();
     14 	&pop	("eax");
     15 	&mov	("ecx","eax");
     16 	&xor	("eax",1<<21);
     17 	&push	("eax");
     18 	&popf	();
     19 	&pushf	();
     20 	&pop	("eax");
     21 	&xor	("ecx","eax");
     22 	&xor	("eax","eax");
     23 	&bt	("ecx",21);
     24 	&jnc	(&label("nocpuid"));
     25 	&mov	("esi",&wparam(0));
     26 	&mov	(&DWP(8,"esi"),"eax");	# clear 3rd word
     27 	&cpuid	();
     28 	&mov	("edi","eax");		# max value for standard query level
     29 
     30 	&xor	("eax","eax");
     31 	&cmp	("ebx",0x756e6547);	# "Genu"
     32 	&setne	(&LB("eax"));
     33 	&mov	("ebp","eax");
     34 	&cmp	("edx",0x49656e69);	# "ineI"
     35 	&setne	(&LB("eax"));
     36 	&or	("ebp","eax");
     37 	&cmp	("ecx",0x6c65746e);	# "ntel"
     38 	&setne	(&LB("eax"));
     39 	&or	("ebp","eax");		# 0 indicates Intel CPU
     40 	&jz	(&label("intel"));
     41 
     42 	&cmp	("ebx",0x68747541);	# "Auth"
     43 	&setne	(&LB("eax"));
     44 	&mov	("esi","eax");
     45 	&cmp	("edx",0x69746E65);	# "enti"
     46 	&setne	(&LB("eax"));
     47 	&or	("esi","eax");
     48 	&cmp	("ecx",0x444D4163);	# "cAMD"
     49 	&setne	(&LB("eax"));
     50 	&or	("esi","eax");		# 0 indicates AMD CPU
     51 	&jnz	(&label("intel"));
     52 
     53 	# AMD specific
     54 	&mov	("eax",0x80000000);
     55 	&cpuid	();
     56 	&cmp	("eax",0x80000001);
     57 	&jb	(&label("intel"));
     58 	&mov	("esi","eax");
     59 	&mov	("eax",0x80000001);
     60 	&cpuid	();
     61 	&or	("ebp","ecx");
     62 	&and	("ebp",1<<11|1);	# isolate XOP bit
     63 	&cmp	("esi",0x80000008);
     64 	&jb	(&label("intel"));
     65 
     66 	&mov	("eax",0x80000008);
     67 	&cpuid	();
     68 	&movz	("esi",&LB("ecx"));	# number of cores - 1
     69 	&inc	("esi");		# number of cores
     70 
     71 	&mov	("eax",1);
     72 	&xor	("ecx","ecx");
     73 	&cpuid	();
     74 	&bt	("edx",28);
     75 	&jnc	(&label("generic"));
     76 	&shr	("ebx",16);
     77 	&and	("ebx",0xff);
     78 	&cmp	("ebx","esi");
     79 	&ja	(&label("generic"));
     80 	&and	("edx",0xefffffff);	# clear hyper-threading bit
     81 	&jmp	(&label("generic"));
     82 	
     83 &set_label("intel");
     84 	&cmp	("edi",7);
     85 	&jb	(&label("cacheinfo"));
     86 
     87 	&mov	("esi",&wparam(0));
     88 	&mov	("eax",7);
     89 	&xor	("ecx","ecx");
     90 	&cpuid	();
     91 	&mov	(&DWP(8,"esi"),"ebx");
     92 
     93 &set_label("cacheinfo");
     94 	&cmp	("edi",4);
     95 	&mov	("edi",-1);
     96 	&jb	(&label("nocacheinfo"));
     97 
     98 	&mov	("eax",4);
     99 	&mov	("ecx",0);		# query L1D
    100 	&cpuid	();
    101 	&mov	("edi","eax");
    102 	&shr	("edi",14);
    103 	&and	("edi",0xfff);		# number of cores -1 per L1D
    104 
    105 &set_label("nocacheinfo");
    106 	&mov	("eax",1);
    107 	&xor	("ecx","ecx");
    108 	&cpuid	();
    109 	&and	("edx",0xbfefffff);	# force reserved bits #20, #30 to 0
    110 	&cmp	("ebp",0);
    111 	&jne	(&label("notintel"));
    112 	&or	("edx",1<<30);		# set reserved bit#30 on Intel CPUs
    113 	&and	(&HB("eax"),15);	# familiy ID
    114 	&cmp	(&HB("eax"),15);	# P4?
    115 	&jne	(&label("notintel"));
    116 	&or	("edx",1<<20);		# set reserved bit#20 to engage RC4_CHAR
    117 &set_label("notintel");
    118 	&bt	("edx",28);		# test hyper-threading bit
    119 	&jnc	(&label("generic"));
    120 	&and	("edx",0xefffffff);
    121 	&cmp	("edi",0);
    122 	&je	(&label("generic"));
    123 
    124 	&or	("edx",0x10000000);
    125 	&shr	("ebx",16);
    126 	&cmp	(&LB("ebx"),1);
    127 	&ja	(&label("generic"));
    128 	&and	("edx",0xefffffff);	# clear hyper-threading bit if not
    129 
    130 &set_label("generic");
    131 	&and	("ebp",1<<11);		# isolate AMD XOP flag
    132 	&and	("ecx",0xfffff7ff);	# force 11th bit to 0
    133 	&mov	("esi","edx");
    134 	&or	("ebp","ecx");		# merge AMD XOP flag
    135 
    136 	&bt	("ecx",27);		# check OSXSAVE bit
    137 	&jnc	(&label("clear_avx"));
    138 	&xor	("ecx","ecx");
    139 	&data_byte(0x0f,0x01,0xd0);	# xgetbv
    140 	&and	("eax",6);
    141 	&cmp	("eax",6);
    142 	&je	(&label("done"));
    143 	&cmp	("eax",2);
    144 	&je	(&label("clear_avx"));
    145 &set_label("clear_xmm");
    146 	&and	("ebp",0xfdfffffd);	# clear AESNI and PCLMULQDQ bits
    147 	&and	("esi",0xfeffffff);	# clear FXSR
    148 &set_label("clear_avx");
    149 	&and	("ebp",0xefffe7ff);	# clear AVX, FMA and AMD XOP bits
    150 	&mov	("edi",&wparam(0));
    151 	&and	(&DWP(8,"edi"),0xffffffdf);	# clear AVX2
    152 &set_label("done");
    153 	&mov	("eax","esi");
    154 	&mov	("edx","ebp");
    155 &set_label("nocpuid");
    156 &function_end("OPENSSL_ia32_cpuid");
    157 
    158 &external_label("OPENSSL_ia32cap_P");
    159 
    160 &function_begin_B("OPENSSL_rdtsc","EXTRN\t_OPENSSL_ia32cap_P:DWORD");
    161 	&xor	("eax","eax");
    162 	&xor	("edx","edx");
    163 	&picmeup("ecx","OPENSSL_ia32cap_P");
    164 	&bt	(&DWP(0,"ecx"),4);
    165 	&jnc	(&label("notsc"));
    166 	&rdtsc	();
    167 &set_label("notsc");
    168 	&ret	();
    169 &function_end_B("OPENSSL_rdtsc");
    170 
    171 # This works in Ring 0 only [read DJGPP+MS-DOS+privileged DPMI host],
    172 # but it's safe to call it on any [supported] 32-bit platform...
    173 # Just check for [non-]zero return value...
    174 &function_begin_B("OPENSSL_instrument_halt","EXTRN\t_OPENSSL_ia32cap_P:DWORD");
    175 	&picmeup("ecx","OPENSSL_ia32cap_P");
    176 	&bt	(&DWP(0,"ecx"),4);
    177 	&jnc	(&label("nohalt"));	# no TSC
    178 
    179 	&data_word(0x9058900e);		# push %cs; pop %eax
    180 	&and	("eax",3);
    181 	&jnz	(&label("nohalt"));	# not enough privileges
    182 
    183 	&pushf	();
    184 	&pop	("eax");
    185 	&bt	("eax",9);
    186 	&jnc	(&label("nohalt"));	# interrupts are disabled
    187 
    188 	&rdtsc	();
    189 	&push	("edx");
    190 	&push	("eax");
    191 	&halt	();
    192 	&rdtsc	();
    193 
    194 	&sub	("eax",&DWP(0,"esp"));
    195 	&sbb	("edx",&DWP(4,"esp"));
    196 	&add	("esp",8);
    197 	&ret	();
    198 
    199 &set_label("nohalt");
    200 	&xor	("eax","eax");
    201 	&xor	("edx","edx");
    202 	&ret	();
    203 &function_end_B("OPENSSL_instrument_halt");
    204 
    205 # Essentially there is only one use for this function. Under DJGPP:
    206 #
    207 #	#include <go32.h>
    208 #	...
    209 #	i=OPENSSL_far_spin(_dos_ds,0x46c);
    210 #	...
    211 # to obtain the number of spins till closest timer interrupt.
    212 
    213 &function_begin_B("OPENSSL_far_spin");
    214 	&pushf	();
    215 	&pop	("eax");
    216 	&bt	("eax",9);
    217 	&jnc	(&label("nospin"));	# interrupts are disabled
    218 
    219 	&mov	("eax",&DWP(4,"esp"));
    220 	&mov	("ecx",&DWP(8,"esp"));
    221 	&data_word (0x90d88e1e);	# push %ds, mov %eax,%ds
    222 	&xor	("eax","eax");
    223 	&mov	("edx",&DWP(0,"ecx"));
    224 	&jmp	(&label("spin"));
    225 
    226 	&align	(16);
    227 &set_label("spin");
    228 	&inc	("eax");
    229 	&cmp	("edx",&DWP(0,"ecx"));
    230 	&je	(&label("spin"));
    231 
    232 	&data_word (0x1f909090);	# pop	%ds
    233 	&ret	();
    234 
    235 &set_label("nospin");
    236 	&xor	("eax","eax");
    237 	&xor	("edx","edx");
    238 	&ret	();
    239 &function_end_B("OPENSSL_far_spin");
    240 
    241 &function_begin_B("OPENSSL_wipe_cpu","EXTRN\t_OPENSSL_ia32cap_P:DWORD");
    242 	&xor	("eax","eax");
    243 	&xor	("edx","edx");
    244 	&picmeup("ecx","OPENSSL_ia32cap_P");
    245 	&mov	("ecx",&DWP(0,"ecx"));
    246 	&bt	(&DWP(0,"ecx"),1);
    247 	&jnc	(&label("no_x87"));
    248 	if ($sse2) {
    249 		&and	("ecx",1<<26|1<<24);	# check SSE2 and FXSR bits
    250 		&cmp	("ecx",1<<26|1<<24);
    251 		&jne	(&label("no_sse2"));
    252 		&pxor	("xmm0","xmm0");
    253 		&pxor	("xmm1","xmm1");
    254 		&pxor	("xmm2","xmm2");
    255 		&pxor	("xmm3","xmm3");
    256 		&pxor	("xmm4","xmm4");
    257 		&pxor	("xmm5","xmm5");
    258 		&pxor	("xmm6","xmm6");
    259 		&pxor	("xmm7","xmm7");
    260 	&set_label("no_sse2");
    261 	}
    262 	# just a bunch of fldz to zap the fp/mm bank followed by finit...
    263 	&data_word(0xeed9eed9,0xeed9eed9,0xeed9eed9,0xeed9eed9,0x90e3db9b);
    264 &set_label("no_x87");
    265 	&lea	("eax",&DWP(4,"esp"));
    266 	&ret	();
    267 &function_end_B("OPENSSL_wipe_cpu");
    268 
    269 &function_begin_B("OPENSSL_atomic_add");
    270 	&mov	("edx",&DWP(4,"esp"));	# fetch the pointer, 1st arg
    271 	&mov	("ecx",&DWP(8,"esp"));	# fetch the increment, 2nd arg
    272 	&push	("ebx");
    273 	&nop	();
    274 	&mov	("eax",&DWP(0,"edx"));
    275 &set_label("spin");
    276 	&lea	("ebx",&DWP(0,"eax","ecx"));
    277 	&nop	();
    278 	&data_word(0x1ab10ff0);	# lock;	cmpxchg	%ebx,(%edx)	# %eax is envolved and is always reloaded
    279 	&jne	(&label("spin"));
    280 	&mov	("eax","ebx");	# OpenSSL expects the new value
    281 	&pop	("ebx");
    282 	&ret	();
    283 &function_end_B("OPENSSL_atomic_add");
    284 
    285 # This function can become handy under Win32 in situations when
    286 # we don't know which calling convention, __stdcall or __cdecl(*),
    287 # indirect callee is using. In C it can be deployed as
    288 #
    289 #ifdef OPENSSL_CPUID_OBJ
    290 #	type OPENSSL_indirect_call(void *f,...);
    291 #	...
    292 #	OPENSSL_indirect_call(func,[up to $max arguments]);
    293 #endif
    294 #
    295 # (*)	it's designed to work even for __fastcall if number of
    296 #	arguments is 1 or 2!
    297 &function_begin_B("OPENSSL_indirect_call");
    298 	{
    299 	my ($max,$i)=(7,);	# $max has to be chosen as 4*n-1
    300 				# in order to preserve eventual
    301 				# stack alignment
    302 	&push	("ebp");
    303 	&mov	("ebp","esp");
    304 	&sub	("esp",$max*4);
    305 	&mov	("ecx",&DWP(12,"ebp"));
    306 	&mov	(&DWP(0,"esp"),"ecx");
    307 	&mov	("edx",&DWP(16,"ebp"));
    308 	&mov	(&DWP(4,"esp"),"edx");
    309 	for($i=2;$i<$max;$i++)
    310 		{
    311 		# Some copies will be redundant/bogus...
    312 		&mov	("eax",&DWP(12+$i*4,"ebp"));
    313 		&mov	(&DWP(0+$i*4,"esp"),"eax");
    314 		}
    315 	&call_ptr	(&DWP(8,"ebp"));# make the call...
    316 	&mov	("esp","ebp");	# ... and just restore the stack pointer
    317 				# without paying attention to what we called,
    318 				# (__cdecl *func) or (__stdcall *one).
    319 	&pop	("ebp");
    320 	&ret	();
    321 	}
    322 &function_end_B("OPENSSL_indirect_call");
    323 
    324 &function_begin_B("OPENSSL_ia32_rdrand");
    325 	&mov	("ecx",8);
    326 &set_label("loop");
    327 	&rdrand	("eax");
    328 	&jc	(&label("break"));
    329 	&loop	(&label("loop"));
    330 &set_label("break");
    331 	&cmp	("eax",0);
    332 	&cmove	("eax","ecx");
    333 	&ret	();
    334 &function_end_B("OPENSSL_ia32_rdrand");
    335 
    336 &hidden("OPENSSL_ia32cap_P");
    337 
    338 &asm_finish();
    339