Home | History | Annotate | Download | only in crypto
      1 #!/usr/bin/env perl
      2 
      3 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
      4 push(@INC, "${dir}perlasm", "perlasm");
      5 require "x86asm.pl";
      6 
      7 &asm_init($ARGV[0],"x86cpuid");
      8 
      9 for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
     10 
     11 &function_begin("OPENSSL_ia32_cpuid");
     12 	&xor	("edx","edx");
     13 	&pushf	();
     14 	&pop	("eax");
     15 	&mov	("ecx","eax");
     16 	&xor	("eax",1<<21);
     17 	&push	("eax");
     18 	&popf	();
     19 	&pushf	();
     20 	&pop	("eax");
     21 	&xor	("ecx","eax");
     22 	&bt	("ecx",21);
     23 	&jnc	(&label("done"));
     24 	&xor	("eax","eax");
     25 	&cpuid	();
     26 	&mov	("edi","eax");		# max value for standard query level
     27 
     28 	&xor	("eax","eax");
     29 	&cmp	("ebx",0x756e6547);	# "Genu"
     30 	&setne	(&LB("eax"));
     31 	&mov	("ebp","eax");
     32 	&cmp	("edx",0x49656e69);	# "ineI"
     33 	&setne	(&LB("eax"));
     34 	&or	("ebp","eax");
     35 	&cmp	("ecx",0x6c65746e);	# "ntel"
     36 	&setne	(&LB("eax"));
     37 	&or	("ebp","eax");		# 0 indicates Intel CPU
     38 	&jz	(&label("intel"));
     39 
     40 	&cmp	("ebx",0x68747541);	# "Auth"
     41 	&setne	(&LB("eax"));
     42 	&mov	("esi","eax");
     43 	&cmp	("edx",0x69746E65);	# "enti"
     44 	&setne	(&LB("eax"));
     45 	&or	("esi","eax");
     46 	&cmp	("ecx",0x444D4163);	# "cAMD"
     47 	&setne	(&LB("eax"));
     48 	&or	("esi","eax");		# 0 indicates AMD CPU
     49 	&jnz	(&label("intel"));
     50 
     51 	# AMD specific
     52 	&mov	("eax",0x80000000);
     53 	&cpuid	();
     54 	&cmp	("eax",0x80000008);
     55 	&jb	(&label("intel"));
     56 
     57 	&mov	("eax",0x80000008);
     58 	&cpuid	();
     59 	&movz	("esi",&LB("ecx"));	# number of cores - 1
     60 	&inc	("esi");		# number of cores
     61 
     62 	&mov	("eax",1);
     63 	&cpuid	();
     64 	&bt	("edx",28);
     65 	&jnc	(&label("done"));
     66 	&shr	("ebx",16);
     67 	&and	("ebx",0xff);
     68 	&cmp	("ebx","esi");
     69 	&ja	(&label("done"));
     70 	&and	("edx",0xefffffff);	# clear hyper-threading bit
     71 	&jmp	(&label("done"));
     72 	
     73 &set_label("intel");
     74 	&cmp	("edi",4);
     75 	&mov	("edi",-1);
     76 	&jb	(&label("nocacheinfo"));
     77 
     78 	&mov	("eax",4);
     79 	&mov	("ecx",0);		# query L1D
     80 	&cpuid	();
     81 	&mov	("edi","eax");
     82 	&shr	("edi",14);
     83 	&and	("edi",0xfff);		# number of cores -1 per L1D
     84 
     85 &set_label("nocacheinfo");
     86 	&mov	("eax",1);
     87 	&cpuid	();
     88 	&cmp	("ebp",0);
     89 	&jne	(&label("notP4"));
     90 	&and	(&HB("eax"),15);	# familiy ID
     91 	&cmp	(&HB("eax"),15);	# P4?
     92 	&jne	(&label("notP4"));
     93 	&or	("edx",1<<20);		# use reserved bit to engage RC4_CHAR
     94 &set_label("notP4");
     95 	&bt	("edx",28);		# test hyper-threading bit
     96 	&jnc	(&label("done"));
     97 	&and	("edx",0xefffffff);
     98 	&cmp	("edi",0);
     99 	&je	(&label("done"));
    100 
    101 	&or	("edx",0x10000000);
    102 	&shr	("ebx",16);
    103 	&cmp	(&LB("ebx"),1);
    104 	&ja	(&label("done"));
    105 	&and	("edx",0xefffffff);	# clear hyper-threading bit if not
    106 &set_label("done");
    107 	&mov	("eax","edx");
    108 	&mov	("edx","ecx");
    109 &function_end("OPENSSL_ia32_cpuid");
    110 
    111 &external_label("OPENSSL_ia32cap_P");
    112 
    113 &function_begin_B("OPENSSL_rdtsc","EXTRN\t_OPENSSL_ia32cap_P:DWORD");
    114 	&xor	("eax","eax");
    115 	&xor	("edx","edx");
    116 	&picmeup("ecx","OPENSSL_ia32cap_P");
    117 	&bt	(&DWP(0,"ecx"),4);
    118 	&jnc	(&label("notsc"));
    119 	&rdtsc	();
    120 &set_label("notsc");
    121 	&ret	();
    122 &function_end_B("OPENSSL_rdtsc");
    123 
    124 # This works in Ring 0 only [read DJGPP+MS-DOS+privileged DPMI host],
    125 # but it's safe to call it on any [supported] 32-bit platform...
    126 # Just check for [non-]zero return value...
    127 &function_begin_B("OPENSSL_instrument_halt","EXTRN\t_OPENSSL_ia32cap_P:DWORD");
    128 	&picmeup("ecx","OPENSSL_ia32cap_P");
    129 	&bt	(&DWP(0,"ecx"),4);
    130 	&jnc	(&label("nohalt"));	# no TSC
    131 
    132 	&data_word(0x9058900e);		# push %cs; pop %eax
    133 	&and	("eax",3);
    134 	&jnz	(&label("nohalt"));	# not enough privileges
    135 
    136 	&pushf	();
    137 	&pop	("eax")
    138 	&bt	("eax",9);
    139 	&jnc	(&label("nohalt"));	# interrupts are disabled
    140 
    141 	&rdtsc	();
    142 	&push	("edx");
    143 	&push	("eax");
    144 	&halt	();
    145 	&rdtsc	();
    146 
    147 	&sub	("eax",&DWP(0,"esp"));
    148 	&sbb	("edx",&DWP(4,"esp"));
    149 	&add	("esp",8);
    150 	&ret	();
    151 
    152 &set_label("nohalt");
    153 	&xor	("eax","eax");
    154 	&xor	("edx","edx");
    155 	&ret	();
    156 &function_end_B("OPENSSL_instrument_halt");
    157 
    158 # Essentially there is only one use for this function. Under DJGPP:
    159 #
    160 #	#include <go32.h>
    161 #	...
    162 #	i=OPENSSL_far_spin(_dos_ds,0x46c);
    163 #	...
    164 # to obtain the number of spins till closest timer interrupt.
    165 
    166 &function_begin_B("OPENSSL_far_spin");
    167 	&pushf	();
    168 	&pop	("eax")
    169 	&bt	("eax",9);
    170 	&jnc	(&label("nospin"));	# interrupts are disabled
    171 
    172 	&mov	("eax",&DWP(4,"esp"));
    173 	&mov	("ecx",&DWP(8,"esp"));
    174 	&data_word (0x90d88e1e);	# push %ds, mov %eax,%ds
    175 	&xor	("eax","eax");
    176 	&mov	("edx",&DWP(0,"ecx"));
    177 	&jmp	(&label("spin"));
    178 
    179 	&align	(16);
    180 &set_label("spin");
    181 	&inc	("eax");
    182 	&cmp	("edx",&DWP(0,"ecx"));
    183 	&je	(&label("spin"));
    184 
    185 	&data_word (0x1f909090);	# pop	%ds
    186 	&ret	();
    187 
    188 &set_label("nospin");
    189 	&xor	("eax","eax");
    190 	&xor	("edx","edx");
    191 	&ret	();
    192 &function_end_B("OPENSSL_far_spin");
    193 
    194 &function_begin_B("OPENSSL_wipe_cpu","EXTRN\t_OPENSSL_ia32cap_P:DWORD");
    195 	&xor	("eax","eax");
    196 	&xor	("edx","edx");
    197 	&picmeup("ecx","OPENSSL_ia32cap_P");
    198 	&mov	("ecx",&DWP(0,"ecx"));
    199 	&bt	(&DWP(0,"ecx"),1);
    200 	&jnc	(&label("no_x87"));
    201 	if ($sse2) {
    202 		&bt	(&DWP(0,"ecx"),26);
    203 		&jnc	(&label("no_sse2"));
    204 		&pxor	("xmm0","xmm0");
    205 		&pxor	("xmm1","xmm1");
    206 		&pxor	("xmm2","xmm2");
    207 		&pxor	("xmm3","xmm3");
    208 		&pxor	("xmm4","xmm4");
    209 		&pxor	("xmm5","xmm5");
    210 		&pxor	("xmm6","xmm6");
    211 		&pxor	("xmm7","xmm7");
    212 	&set_label("no_sse2");
    213 	}
    214 	# just a bunch of fldz to zap the fp/mm bank followed by finit...
    215 	&data_word(0xeed9eed9,0xeed9eed9,0xeed9eed9,0xeed9eed9,0x90e3db9b);
    216 &set_label("no_x87");
    217 	&lea	("eax",&DWP(4,"esp"));
    218 	&ret	();
    219 &function_end_B("OPENSSL_wipe_cpu");
    220 
    221 &function_begin_B("OPENSSL_atomic_add");
    222 	&mov	("edx",&DWP(4,"esp"));	# fetch the pointer, 1st arg
    223 	&mov	("ecx",&DWP(8,"esp"));	# fetch the increment, 2nd arg
    224 	&push	("ebx");
    225 	&nop	();
    226 	&mov	("eax",&DWP(0,"edx"));
    227 &set_label("spin");
    228 	&lea	("ebx",&DWP(0,"eax","ecx"));
    229 	&nop	();
    230 	&data_word(0x1ab10ff0);	# lock;	cmpxchg	%ebx,(%edx)	# %eax is envolved and is always reloaded
    231 	&jne	(&label("spin"));
    232 	&mov	("eax","ebx");	# OpenSSL expects the new value
    233 	&pop	("ebx");
    234 	&ret	();
    235 &function_end_B("OPENSSL_atomic_add");
    236 
    237 # This function can become handy under Win32 in situations when
    238 # we don't know which calling convention, __stdcall or __cdecl(*),
    239 # indirect callee is using. In C it can be deployed as
    240 #
    241 #ifdef OPENSSL_CPUID_OBJ
    242 #	type OPENSSL_indirect_call(void *f,...);
    243 #	...
    244 #	OPENSSL_indirect_call(func,[up to $max arguments]);
    245 #endif
    246 #
    247 # (*)	it's designed to work even for __fastcall if number of
    248 #	arguments is 1 or 2!
    249 &function_begin_B("OPENSSL_indirect_call");
    250 	{
    251 	my $i,$max=7;		# $max has to be chosen as 4*n-1
    252 				# in order to preserve eventual
    253 				# stack alignment
    254 	&push	("ebp");
    255 	&mov	("ebp","esp");
    256 	&sub	("esp",$max*4);
    257 	&mov	("ecx",&DWP(12,"ebp"));
    258 	&mov	(&DWP(0,"esp"),"ecx");
    259 	&mov	("edx",&DWP(16,"ebp"));
    260 	&mov	(&DWP(4,"esp"),"edx");
    261 	for($i=2;$i<$max;$i++)
    262 		{
    263 		# Some copies will be redundant/bogus...
    264 		&mov	("eax",&DWP(12+$i*4,"ebp"));
    265 		&mov	(&DWP(0+$i*4,"esp"),"eax");
    266 		}
    267 	&call_ptr	(&DWP(8,"ebp"));# make the call...
    268 	&mov	("esp","ebp");	# ... and just restore the stack pointer
    269 				# without paying attention to what we called,
    270 				# (__cdecl *func) or (__stdcall *one).
    271 	&pop	("ebp");
    272 	&ret	();
    273 	}
    274 &function_end_B("OPENSSL_indirect_call");
    275 
    276 &function_begin_B("OPENSSL_cleanse");
    277 	&mov	("edx",&wparam(0));
    278 	&mov	("ecx",&wparam(1));
    279 	&xor	("eax","eax");
    280 	&cmp	("ecx",7);
    281 	&jae	(&label("lot"));
    282 	&cmp	("ecx",0);
    283 	&je	(&label("ret"));
    284 &set_label("little");
    285 	&mov	(&BP(0,"edx"),"al");
    286 	&sub	("ecx",1);
    287 	&lea	("edx",&DWP(1,"edx"));
    288 	&jnz	(&label("little"));
    289 &set_label("ret");
    290 	&ret	();
    291 
    292 &set_label("lot",16);
    293 	&test	("edx",3);
    294 	&jz	(&label("aligned"));
    295 	&mov	(&BP(0,"edx"),"al");
    296 	&lea	("ecx",&DWP(-1,"ecx"));
    297 	&lea	("edx",&DWP(1,"edx"));
    298 	&jmp	(&label("lot"));
    299 &set_label("aligned");
    300 	&mov	(&DWP(0,"edx"),"eax");
    301 	&lea	("ecx",&DWP(-4,"ecx"));
    302 	&test	("ecx",-4);
    303 	&lea	("edx",&DWP(4,"edx"));
    304 	&jnz	(&label("aligned"));
    305 	&cmp	("ecx",0);
    306 	&jne	(&label("little"));
    307 	&ret	();
    308 &function_end_B("OPENSSL_cleanse");
    309 
    310 &initseg("OPENSSL_cpuid_setup");
    311 
    312 &asm_finish();
    313