Home | History | Annotate | Download | only in crypto
      1 #if defined(__SUNPRO_C) && defined(__sparcv9)
      2 # define ABI64  /* They've said -xarch=v9 at command line */
      3 #elif defined(__GNUC__) && defined(__arch64__)
      4 # define ABI64  /* They've said -m64 at command line */
      5 #endif
      6 
      7 #ifdef ABI64
      8   .register	%g2,#scratch
      9   .register	%g3,#scratch
     10 # define	FRAME	-192
     11 # define	BIAS	2047
     12 #else
     13 # define	FRAME	-96
     14 # define	BIAS	0
     15 #endif
     16 
     17 .text
     18 .align	32
     19 .global	OPENSSL_wipe_cpu
     20 .type	OPENSSL_wipe_cpu,#function
     21 ! Keep in mind that this does not excuse us from wiping the stack!
     22 ! This routine wipes registers, but not the backing store [which
     23 ! resides on the stack, toward lower addresses]. To facilitate for
     24 ! stack wiping I return pointer to the top of stack of the *caller*.
     25 OPENSSL_wipe_cpu:
     26 	save	%sp,FRAME,%sp
     27 	nop
     28 #ifdef __sun
     29 #include <sys/trap.h>
     30 	ta	ST_CLEAN_WINDOWS
     31 #else
     32 	call	.walk.reg.wins
     33 #endif
     34 	nop
     35 	call	.PIC.zero.up
     36 	mov	.zero-(.-4),%o0
     37 	ld	[%o0],%f0
     38 	ld	[%o0],%f1
     39 
     40 	subcc	%g0,1,%o0
     41 	! Following is V9 "rd %ccr,%o0" instruction. However! V8
     42 	! specification says that it ("rd %asr2,%o0" in V8 terms) does
     43 	! not cause illegal_instruction trap. It therefore can be used
     44 	! to determine if the CPU the code is executing on is V8- or
     45 	! V9-compliant, as V9 returns a distinct value of 0x99,
     46 	! "negative" and "borrow" bits set in both %icc and %xcc.
     47 	.word	0x91408000	!rd	%ccr,%o0
     48 	cmp	%o0,0x99
     49 	bne	.v8
     50 	nop
     51 			! Even though we do not use %fp register bank,
     52 			! we wipe it as memcpy might have used it...
     53 			.word	0xbfa00040	!fmovd	%f0,%f62
     54 			.word	0xbba00040	!...
     55 			.word	0xb7a00040
     56 			.word	0xb3a00040
     57 			.word	0xafa00040
     58 			.word	0xaba00040
     59 			.word	0xa7a00040
     60 			.word	0xa3a00040
     61 			.word	0x9fa00040
     62 			.word	0x9ba00040
     63 			.word	0x97a00040
     64 			.word	0x93a00040
     65 			.word	0x8fa00040
     66 			.word	0x8ba00040
     67 			.word	0x87a00040
     68 			.word	0x83a00040	!fmovd	%f0,%f32
     69 .v8:			fmovs	%f1,%f31
     70 	clr	%o0
     71 			fmovs	%f0,%f30
     72 	clr	%o1
     73 			fmovs	%f1,%f29
     74 	clr	%o2
     75 			fmovs	%f0,%f28
     76 	clr	%o3
     77 			fmovs	%f1,%f27
     78 	clr	%o4
     79 			fmovs	%f0,%f26
     80 	clr	%o5
     81 			fmovs	%f1,%f25
     82 	clr	%o7
     83 			fmovs	%f0,%f24
     84 	clr	%l0
     85 			fmovs	%f1,%f23
     86 	clr	%l1
     87 			fmovs	%f0,%f22
     88 	clr	%l2
     89 			fmovs	%f1,%f21
     90 	clr	%l3
     91 			fmovs	%f0,%f20
     92 	clr	%l4
     93 			fmovs	%f1,%f19
     94 	clr	%l5
     95 			fmovs	%f0,%f18
     96 	clr	%l6
     97 			fmovs	%f1,%f17
     98 	clr	%l7
     99 			fmovs	%f0,%f16
    100 	clr	%i0
    101 			fmovs	%f1,%f15
    102 	clr	%i1
    103 			fmovs	%f0,%f14
    104 	clr	%i2
    105 			fmovs	%f1,%f13
    106 	clr	%i3
    107 			fmovs	%f0,%f12
    108 	clr	%i4
    109 			fmovs	%f1,%f11
    110 	clr	%i5
    111 			fmovs	%f0,%f10
    112 	clr	%g1
    113 			fmovs	%f1,%f9
    114 	clr	%g2
    115 			fmovs	%f0,%f8
    116 	clr	%g3
    117 			fmovs	%f1,%f7
    118 	clr	%g4
    119 			fmovs	%f0,%f6
    120 	clr	%g5
    121 			fmovs	%f1,%f5
    122 			fmovs	%f0,%f4
    123 			fmovs	%f1,%f3
    124 			fmovs	%f0,%f2
    125 
    126 	add	%fp,BIAS,%i0	! return pointer to callers top of stack
    127 
    128 	ret
    129 	restore
    130 
    131 .zero:	.long	0x0,0x0
    132 .PIC.zero.up:
    133 	retl
    134 	add	%o0,%o7,%o0
    135 #ifdef DEBUG
    136 .global	walk_reg_wins
    137 .type	walk_reg_wins,#function
    138 walk_reg_wins:
    139 #endif
    140 .walk.reg.wins:
    141 	save	%sp,FRAME,%sp
    142 	cmp	%i7,%o7
    143 	be	2f
    144 	clr	%o0
    145 	cmp	%o7,0	! compiler never cleans %o7...
    146 	be	1f	! could have been a leaf function...
    147 	clr	%o1
    148 	call	.walk.reg.wins
    149 	nop
    150 1:	clr	%o2
    151 	clr	%o3
    152 	clr	%o4
    153 	clr	%o5
    154 	clr	%o7
    155 	clr	%l0
    156 	clr	%l1
    157 	clr	%l2
    158 	clr	%l3
    159 	clr	%l4
    160 	clr	%l5
    161 	clr	%l6
    162 	clr	%l7
    163 	add	%o0,1,%i0	! used for debugging
    164 2:	ret
    165 	restore
    166 .size	OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
    167 
    168 .global	OPENSSL_atomic_add
    169 .type	OPENSSL_atomic_add,#function
    170 .align	32
    171 OPENSSL_atomic_add:
    172 #ifndef ABI64
    173 	subcc	%g0,1,%o2
    174 	.word	0x95408000	!rd	%ccr,%o2, see comment above
    175 	cmp	%o2,0x99
    176 	be	.v9
    177 	nop
    178 	save	%sp,FRAME,%sp
    179 	ba	.enter
    180 	nop
    181 #ifdef __sun
    182 ! Note that you do not have to link with libthread to call thr_yield,
    183 ! as libc provides a stub, which is overloaded the moment you link
    184 ! with *either* libpthread or libthread...
    185 #define	YIELD_CPU	thr_yield
    186 #else
    187 ! applies at least to Linux and FreeBSD... Feedback expected...
    188 #define	YIELD_CPU	sched_yield
    189 #endif
    190 .spin:	call	YIELD_CPU
    191 	nop
    192 .enter:	ld	[%i0],%i2
    193 	cmp	%i2,-4096
    194 	be	.spin
    195 	mov	-1,%i2
    196 	swap	[%i0],%i2
    197 	cmp	%i2,-1
    198 	be	.spin
    199 	add	%i2,%i1,%i2
    200 	stbar
    201 	st	%i2,[%i0]
    202 	sra	%i2,%g0,%i0
    203 	ret
    204 	restore
    205 .v9:
    206 #endif
    207 	ld	[%o0],%o2
    208 1:	add	%o1,%o2,%o3
    209 	.word	0xd7e2100a	!cas [%o0],%o2,%o3, compare [%o0] with %o2 and swap %o3
    210 	cmp	%o2,%o3
    211 	bne	1b
    212 	mov	%o3,%o2		! cas is always fetching to dest. register
    213 	add	%o1,%o2,%o0	! OpenSSL expects the new value
    214 	retl
    215 	sra	%o0,%g0,%o0	! we return signed int, remember?
    216 .size	OPENSSL_atomic_add,.-OPENSSL_atomic_add
    217 
    218 .global	_sparcv9_rdtick
    219 .align	32
    220 _sparcv9_rdtick:
    221 	subcc	%g0,1,%o0
    222 	.word	0x91408000	!rd	%ccr,%o0
    223 	cmp	%o0,0x99
    224 	bne	.notick
    225 	xor	%o0,%o0,%o0
    226 	.word	0x91410000	!rd	%tick,%o0
    227 	retl
    228 	.word	0x93323020	!srlx	%o0,32,%o1
    229 .notick:
    230 	retl
    231 	xor	%o1,%o1,%o1
    232 .type	_sparcv9_rdtick,#function
    233 .size	_sparcv9_rdtick,.-_sparcv9_rdtick
    234 
    235 .global	_sparcv9_vis1_probe
    236 .align	8
    237 _sparcv9_vis1_probe:
    238 	add	%sp,BIAS+2,%o1
    239 	.word	0xc19a5a40	!ldda	[%o1]ASI_FP16_P,%f0
    240 	retl
    241 	.word	0x81b00d80	!fxor	%f0,%f0,%f0
    242 .type	_sparcv9_vis1_probe,#function
    243 .size	_sparcv9_vis1_probe,.-_sparcv9_vis1_probe
    244 
    245 ! Probe and instrument VIS1 instruction. Output is number of cycles it
    246 ! takes to execute rdtick and pair of VIS1 instructions. US-Tx VIS unit
    247 ! is slow (documented to be 6 cycles on T2) and the core is in-order
    248 ! single-issue, it should be possible to distinguish Tx reliably...
    249 ! Observed return values are:
    250 !
    251 !	UltraSPARC IIe		7
    252 !	UltraSPARC III		7
    253 !	UltraSPARC T1		24
    254 !
    255 ! Numbers for T2 and SPARC64 V-VII are more than welcomed.
    256 !
    257 ! It would be possible to detect specifically US-T1 by instrumenting
    258 ! fmul8ulx16, which is emulated on T1 and as such accounts for quite
    259 ! a lot of %tick-s, couple of thousand on Linux...
    260 .global	_sparcv9_vis1_instrument
    261 .align	8
    262 _sparcv9_vis1_instrument:
    263 	.word	0x91410000	!rd	%tick,%o0
    264 	.word	0x81b00d80	!fxor	%f0,%f0,%f0
    265 	.word	0x85b08d82	!fxor	%f2,%f2,%f2
    266 	.word	0x93410000	!rd	%tick,%o1
    267 	.word	0x81b00d80	!fxor	%f0,%f0,%f0
    268 	.word	0x85b08d82	!fxor	%f2,%f2,%f2
    269 	.word	0x95410000	!rd	%tick,%o2
    270 	.word	0x81b00d80	!fxor	%f0,%f0,%f0
    271 	.word	0x85b08d82	!fxor	%f2,%f2,%f2
    272 	.word	0x97410000	!rd	%tick,%o3
    273 	.word	0x81b00d80	!fxor	%f0,%f0,%f0
    274 	.word	0x85b08d82	!fxor	%f2,%f2,%f2
    275 	.word	0x99410000	!rd	%tick,%o4
    276 
    277 	! calculate intervals
    278 	sub	%o1,%o0,%o0
    279 	sub	%o2,%o1,%o1
    280 	sub	%o3,%o2,%o2
    281 	sub	%o4,%o3,%o3
    282 
    283 	! find minumum value
    284 	cmp	%o0,%o1
    285 	.word	0x38680002	!bgu,a	%xcc,.+8
    286 	mov	%o1,%o0
    287 	cmp	%o0,%o2
    288 	.word	0x38680002	!bgu,a	%xcc,.+8
    289 	mov	%o2,%o0
    290 	cmp	%o0,%o3
    291 	.word	0x38680002	!bgu,a	%xcc,.+8
    292 	mov	%o3,%o0
    293 
    294 	retl
    295 	nop
    296 .type	_sparcv9_vis1_instrument,#function
    297 .size	_sparcv9_vis1_instrument,.-_sparcv9_vis1_instrument
    298 
    299 .global	_sparcv9_vis2_probe
    300 .align	8
    301 _sparcv9_vis2_probe:
    302 	retl
    303 	.word	0x81b00980	!bshuffle	%f0,%f0,%f0
    304 .type	_sparcv9_vis2_probe,#function
    305 .size	_sparcv9_vis2_probe,.-_sparcv9_vis2_probe
    306 
    307 .global	_sparcv9_fmadd_probe
    308 .align	8
    309 _sparcv9_fmadd_probe:
    310 	.word	0x81b00d80	!fxor	%f0,%f0,%f0
    311 	.word	0x85b08d82	!fxor	%f2,%f2,%f2
    312 	retl
    313 	.word	0x81b80440	!fmaddd	%f0,%f0,%f2,%f0
    314 .type	_sparcv9_fmadd_probe,#function
    315 .size	_sparcv9_fmadd_probe,.-_sparcv9_fmadd_probe
    316 
    317 .global	OPENSSL_cleanse
    318 .align	32
    319 OPENSSL_cleanse:
    320 	cmp	%o1,14
    321 	nop
    322 #ifdef ABI64
    323 	bgu	%xcc,.Lot
    324 #else
    325 	bgu	.Lot
    326 #endif
    327 	cmp	%o1,0
    328 	bne	.Little
    329 	nop
    330 	retl
    331 	nop
    332 
    333 .Little:
    334 	stb	%g0,[%o0]
    335 	subcc	%o1,1,%o1
    336 	bnz	.Little
    337 	add	%o0,1,%o0
    338 	retl
    339 	nop
    340 .align	32
    341 .Lot:
    342 #ifndef ABI64
    343 	subcc	%g0,1,%g1
    344 	! see above for explanation
    345 	.word	0x83408000	!rd	%ccr,%g1
    346 	cmp	%g1,0x99
    347 	bne	.v8lot
    348 	nop
    349 #endif
    350 
    351 .v9lot:	andcc	%o0,7,%g0
    352 	bz	.v9aligned
    353 	nop
    354 	stb	%g0,[%o0]
    355 	sub	%o1,1,%o1
    356 	ba	.v9lot
    357 	add	%o0,1,%o0
    358 .align	16,0x01000000
    359 .v9aligned:
    360 	.word	0xc0720000	!stx	%g0,[%o0]
    361 	sub	%o1,8,%o1
    362 	andcc	%o1,-8,%g0
    363 #ifdef ABI64
    364 	.word	0x126ffffd	!bnz	%xcc,.v9aligned
    365 #else
    366 	.word	0x124ffffd	!bnz	%icc,.v9aligned
    367 #endif
    368 	add	%o0,8,%o0
    369 
    370 	cmp	%o1,0
    371 	bne	.Little
    372 	nop
    373 	retl
    374 	nop
    375 #ifndef ABI64
    376 .v8lot:	andcc	%o0,3,%g0
    377 	bz	.v8aligned
    378 	nop
    379 	stb	%g0,[%o0]
    380 	sub	%o1,1,%o1
    381 	ba	.v8lot
    382 	add	%o0,1,%o0
    383 	nop
    384 .v8aligned:
    385 	st	%g0,[%o0]
    386 	sub	%o1,4,%o1
    387 	andcc	%o1,-4,%g0
    388 	bnz	.v8aligned
    389 	add	%o0,4,%o0
    390 
    391 	cmp	%o1,0
    392 	bne	.Little
    393 	nop
    394 	retl
    395 	nop
    396 #endif
    397 .type	OPENSSL_cleanse,#function
    398 .size	OPENSSL_cleanse,.-OPENSSL_cleanse
    399 
    400 .section	".init",#alloc,#execinstr
    401 	call	OPENSSL_cpuid_setup
    402 	nop
    403