Home | History | Annotate | Download | only in asm
      1 .file	"crypto/bn/asm/x86-mont.s"
      2 .text
      3 .globl	bn_mul_mont
      4 .type	bn_mul_mont,@function
      5 .align	16
      6 bn_mul_mont:
      7 .L_bn_mul_mont_begin:
      8 	pushl	%ebp
      9 	pushl	%ebx
     10 	pushl	%esi
     11 	pushl	%edi
     12 	xorl	%eax,%eax
     13 	movl	40(%esp),%edi
     14 	cmpl	$4,%edi
     15 	jl	.L000just_leave
     16 	leal	20(%esp),%esi
     17 	leal	24(%esp),%edx
     18 	movl	%esp,%ebp
     19 	addl	$2,%edi
     20 	negl	%edi
     21 	leal	-32(%esp,%edi,4),%esp
     22 	negl	%edi
     23 	movl	%esp,%eax
     24 	subl	%edx,%eax
     25 	andl	$2047,%eax
     26 	subl	%eax,%esp
     27 	xorl	%esp,%edx
     28 	andl	$2048,%edx
     29 	xorl	$2048,%edx
     30 	subl	%edx,%esp
     31 	andl	$-64,%esp
     32 	movl	(%esi),%eax
     33 	movl	4(%esi),%ebx
     34 	movl	8(%esi),%ecx
     35 	movl	12(%esi),%edx
     36 	movl	16(%esi),%esi
     37 	movl	(%esi),%esi
     38 	movl	%eax,4(%esp)
     39 	movl	%ebx,8(%esp)
     40 	movl	%ecx,12(%esp)
     41 	movl	%edx,16(%esp)
     42 	movl	%esi,20(%esp)
     43 	leal	-3(%edi),%ebx
     44 	movl	%ebp,24(%esp)
     45 	call	.L001PIC_me_up
     46 .L001PIC_me_up:
     47 	popl	%eax
     48 	leal	_GLOBAL_OFFSET_TABLE_+[.-.L001PIC_me_up](%eax),%eax
     49 	movl	OPENSSL_ia32cap_P@GOT(%eax),%eax
     50 	btl	$26,(%eax)
     51 	jnc	.L002non_sse2
     52 	movl	$-1,%eax
     53 	movd	%eax,%mm7
     54 	movl	8(%esp),%esi
     55 	movl	12(%esp),%edi
     56 	movl	16(%esp),%ebp
     57 	xorl	%edx,%edx
     58 	xorl	%ecx,%ecx
     59 	movd	(%edi),%mm4
     60 	movd	(%esi),%mm5
     61 	movd	(%ebp),%mm3
     62 	pmuludq	%mm4,%mm5
     63 	movq	%mm5,%mm2
     64 	movq	%mm5,%mm0
     65 	pand	%mm7,%mm0
     66 	pmuludq	20(%esp),%mm5
     67 	pmuludq	%mm5,%mm3
     68 	paddq	%mm0,%mm3
     69 	movd	4(%ebp),%mm1
     70 	movd	4(%esi),%mm0
     71 	psrlq	$32,%mm2
     72 	psrlq	$32,%mm3
     73 	incl	%ecx
     74 .align	16
     75 .L0031st:
     76 	pmuludq	%mm4,%mm0
     77 	pmuludq	%mm5,%mm1
     78 	paddq	%mm0,%mm2
     79 	paddq	%mm1,%mm3
     80 	movq	%mm2,%mm0
     81 	pand	%mm7,%mm0
     82 	movd	4(%ebp,%ecx,4),%mm1
     83 	paddq	%mm0,%mm3
     84 	movd	4(%esi,%ecx,4),%mm0
     85 	psrlq	$32,%mm2
     86 	movd	%mm3,28(%esp,%ecx,4)
     87 	psrlq	$32,%mm3
     88 	leal	1(%ecx),%ecx
     89 	cmpl	%ebx,%ecx
     90 	jl	.L0031st
     91 	pmuludq	%mm4,%mm0
     92 	pmuludq	%mm5,%mm1
     93 	paddq	%mm0,%mm2
     94 	paddq	%mm1,%mm3
     95 	movq	%mm2,%mm0
     96 	pand	%mm7,%mm0
     97 	paddq	%mm0,%mm3
     98 	movd	%mm3,28(%esp,%ecx,4)
     99 	psrlq	$32,%mm2
    100 	psrlq	$32,%mm3
    101 	paddq	%mm2,%mm3
    102 	movq	%mm3,32(%esp,%ebx,4)
    103 	incl	%edx
    104 .L004outer:
    105 	xorl	%ecx,%ecx
    106 	movd	(%edi,%edx,4),%mm4
    107 	movd	(%esi),%mm5
    108 	movd	32(%esp),%mm6
    109 	movd	(%ebp),%mm3
    110 	pmuludq	%mm4,%mm5
    111 	paddq	%mm6,%mm5
    112 	movq	%mm5,%mm0
    113 	movq	%mm5,%mm2
    114 	pand	%mm7,%mm0
    115 	pmuludq	20(%esp),%mm5
    116 	pmuludq	%mm5,%mm3
    117 	paddq	%mm0,%mm3
    118 	movd	36(%esp),%mm6
    119 	movd	4(%ebp),%mm1
    120 	movd	4(%esi),%mm0
    121 	psrlq	$32,%mm2
    122 	psrlq	$32,%mm3
    123 	paddq	%mm6,%mm2
    124 	incl	%ecx
    125 	decl	%ebx
    126 .L005inner:
    127 	pmuludq	%mm4,%mm0
    128 	pmuludq	%mm5,%mm1
    129 	paddq	%mm0,%mm2
    130 	paddq	%mm1,%mm3
    131 	movq	%mm2,%mm0
    132 	movd	36(%esp,%ecx,4),%mm6
    133 	pand	%mm7,%mm0
    134 	movd	4(%ebp,%ecx,4),%mm1
    135 	paddq	%mm0,%mm3
    136 	movd	4(%esi,%ecx,4),%mm0
    137 	psrlq	$32,%mm2
    138 	movd	%mm3,28(%esp,%ecx,4)
    139 	psrlq	$32,%mm3
    140 	paddq	%mm6,%mm2
    141 	decl	%ebx
    142 	leal	1(%ecx),%ecx
    143 	jnz	.L005inner
    144 	movl	%ecx,%ebx
    145 	pmuludq	%mm4,%mm0
    146 	pmuludq	%mm5,%mm1
    147 	paddq	%mm0,%mm2
    148 	paddq	%mm1,%mm3
    149 	movq	%mm2,%mm0
    150 	pand	%mm7,%mm0
    151 	paddq	%mm0,%mm3
    152 	movd	%mm3,28(%esp,%ecx,4)
    153 	psrlq	$32,%mm2
    154 	psrlq	$32,%mm3
    155 	movd	36(%esp,%ebx,4),%mm6
    156 	paddq	%mm2,%mm3
    157 	paddq	%mm6,%mm3
    158 	movq	%mm3,32(%esp,%ebx,4)
    159 	leal	1(%edx),%edx
    160 	cmpl	%ebx,%edx
    161 	jle	.L004outer
    162 	emms
    163 	jmp	.L006common_tail
    164 .align	16
    165 .L002non_sse2:
    166 	movl	8(%esp),%esi
    167 	leal	1(%ebx),%ebp
    168 	movl	12(%esp),%edi
    169 	xorl	%ecx,%ecx
    170 	movl	%esi,%edx
    171 	andl	$1,%ebp
    172 	subl	%edi,%edx
    173 	leal	4(%edi,%ebx,4),%eax
    174 	orl	%edx,%ebp
    175 	movl	(%edi),%edi
    176 	jz	.L007bn_sqr_mont
    177 	movl	%eax,28(%esp)
    178 	movl	(%esi),%eax
    179 	xorl	%edx,%edx
    180 .align	16
    181 .L008mull:
    182 	movl	%edx,%ebp
    183 	mull	%edi
    184 	addl	%eax,%ebp
    185 	leal	1(%ecx),%ecx
    186 	adcl	$0,%edx
    187 	movl	(%esi,%ecx,4),%eax
    188 	cmpl	%ebx,%ecx
    189 	movl	%ebp,28(%esp,%ecx,4)
    190 	jl	.L008mull
    191 	movl	%edx,%ebp
    192 	mull	%edi
    193 	movl	20(%esp),%edi
    194 	addl	%ebp,%eax
    195 	movl	16(%esp),%esi
    196 	adcl	$0,%edx
    197 	imull	32(%esp),%edi
    198 	movl	%eax,32(%esp,%ebx,4)
    199 	xorl	%ecx,%ecx
    200 	movl	%edx,36(%esp,%ebx,4)
    201 	movl	%ecx,40(%esp,%ebx,4)
    202 	movl	(%esi),%eax
    203 	mull	%edi
    204 	addl	32(%esp),%eax
    205 	movl	4(%esi),%eax
    206 	adcl	$0,%edx
    207 	incl	%ecx
    208 	jmp	.L0092ndmadd
    209 .align	16
    210 .L0101stmadd:
    211 	movl	%edx,%ebp
    212 	mull	%edi
    213 	addl	32(%esp,%ecx,4),%ebp
    214 	leal	1(%ecx),%ecx
    215 	adcl	$0,%edx
    216 	addl	%eax,%ebp
    217 	movl	(%esi,%ecx,4),%eax
    218 	adcl	$0,%edx
    219 	cmpl	%ebx,%ecx
    220 	movl	%ebp,28(%esp,%ecx,4)
    221 	jl	.L0101stmadd
    222 	movl	%edx,%ebp
    223 	mull	%edi
    224 	addl	32(%esp,%ebx,4),%eax
    225 	movl	20(%esp),%edi
    226 	adcl	$0,%edx
    227 	movl	16(%esp),%esi
    228 	addl	%eax,%ebp
    229 	adcl	$0,%edx
    230 	imull	32(%esp),%edi
    231 	xorl	%ecx,%ecx
    232 	addl	36(%esp,%ebx,4),%edx
    233 	movl	%ebp,32(%esp,%ebx,4)
    234 	adcl	$0,%ecx
    235 	movl	(%esi),%eax
    236 	movl	%edx,36(%esp,%ebx,4)
    237 	movl	%ecx,40(%esp,%ebx,4)
    238 	mull	%edi
    239 	addl	32(%esp),%eax
    240 	movl	4(%esi),%eax
    241 	adcl	$0,%edx
    242 	movl	$1,%ecx
    243 .align	16
    244 .L0092ndmadd:
    245 	movl	%edx,%ebp
    246 	mull	%edi
    247 	addl	32(%esp,%ecx,4),%ebp
    248 	leal	1(%ecx),%ecx
    249 	adcl	$0,%edx
    250 	addl	%eax,%ebp
    251 	movl	(%esi,%ecx,4),%eax
    252 	adcl	$0,%edx
    253 	cmpl	%ebx,%ecx
    254 	movl	%ebp,24(%esp,%ecx,4)
    255 	jl	.L0092ndmadd
    256 	movl	%edx,%ebp
    257 	mull	%edi
    258 	addl	32(%esp,%ebx,4),%ebp
    259 	adcl	$0,%edx
    260 	addl	%eax,%ebp
    261 	adcl	$0,%edx
    262 	movl	%ebp,28(%esp,%ebx,4)
    263 	xorl	%eax,%eax
    264 	movl	12(%esp),%ecx
    265 	addl	36(%esp,%ebx,4),%edx
    266 	adcl	40(%esp,%ebx,4),%eax
    267 	leal	4(%ecx),%ecx
    268 	movl	%edx,32(%esp,%ebx,4)
    269 	cmpl	28(%esp),%ecx
    270 	movl	%eax,36(%esp,%ebx,4)
    271 	je	.L006common_tail
    272 	movl	(%ecx),%edi
    273 	movl	8(%esp),%esi
    274 	movl	%ecx,12(%esp)
    275 	xorl	%ecx,%ecx
    276 	xorl	%edx,%edx
    277 	movl	(%esi),%eax
    278 	jmp	.L0101stmadd
    279 .align	16
    280 .L007bn_sqr_mont:
    281 	movl	%ebx,(%esp)
    282 	movl	%ecx,12(%esp)
    283 	movl	%edi,%eax
    284 	mull	%edi
    285 	movl	%eax,32(%esp)
    286 	movl	%edx,%ebx
    287 	shrl	$1,%edx
    288 	andl	$1,%ebx
    289 	incl	%ecx
    290 .align	16
    291 .L011sqr:
    292 	movl	(%esi,%ecx,4),%eax
    293 	movl	%edx,%ebp
    294 	mull	%edi
    295 	addl	%ebp,%eax
    296 	leal	1(%ecx),%ecx
    297 	adcl	$0,%edx
    298 	leal	(%ebx,%eax,2),%ebp
    299 	shrl	$31,%eax
    300 	cmpl	(%esp),%ecx
    301 	movl	%eax,%ebx
    302 	movl	%ebp,28(%esp,%ecx,4)
    303 	jl	.L011sqr
    304 	movl	(%esi,%ecx,4),%eax
    305 	movl	%edx,%ebp
    306 	mull	%edi
    307 	addl	%ebp,%eax
    308 	movl	20(%esp),%edi
    309 	adcl	$0,%edx
    310 	movl	16(%esp),%esi
    311 	leal	(%ebx,%eax,2),%ebp
    312 	imull	32(%esp),%edi
    313 	shrl	$31,%eax
    314 	movl	%ebp,32(%esp,%ecx,4)
    315 	leal	(%eax,%edx,2),%ebp
    316 	movl	(%esi),%eax
    317 	shrl	$31,%edx
    318 	movl	%ebp,36(%esp,%ecx,4)
    319 	movl	%edx,40(%esp,%ecx,4)
    320 	mull	%edi
    321 	addl	32(%esp),%eax
    322 	movl	%ecx,%ebx
    323 	adcl	$0,%edx
    324 	movl	4(%esi),%eax
    325 	movl	$1,%ecx
    326 .align	16
    327 .L0123rdmadd:
    328 	movl	%edx,%ebp
    329 	mull	%edi
    330 	addl	32(%esp,%ecx,4),%ebp
    331 	adcl	$0,%edx
    332 	addl	%eax,%ebp
    333 	movl	4(%esi,%ecx,4),%eax
    334 	adcl	$0,%edx
    335 	movl	%ebp,28(%esp,%ecx,4)
    336 	movl	%edx,%ebp
    337 	mull	%edi
    338 	addl	36(%esp,%ecx,4),%ebp
    339 	leal	2(%ecx),%ecx
    340 	adcl	$0,%edx
    341 	addl	%eax,%ebp
    342 	movl	(%esi,%ecx,4),%eax
    343 	adcl	$0,%edx
    344 	cmpl	%ebx,%ecx
    345 	movl	%ebp,24(%esp,%ecx,4)
    346 	jl	.L0123rdmadd
    347 	movl	%edx,%ebp
    348 	mull	%edi
    349 	addl	32(%esp,%ebx,4),%ebp
    350 	adcl	$0,%edx
    351 	addl	%eax,%ebp
    352 	adcl	$0,%edx
    353 	movl	%ebp,28(%esp,%ebx,4)
    354 	movl	12(%esp),%ecx
    355 	xorl	%eax,%eax
    356 	movl	8(%esp),%esi
    357 	addl	36(%esp,%ebx,4),%edx
    358 	adcl	40(%esp,%ebx,4),%eax
    359 	movl	%edx,32(%esp,%ebx,4)
    360 	cmpl	%ebx,%ecx
    361 	movl	%eax,36(%esp,%ebx,4)
    362 	je	.L006common_tail
    363 	movl	4(%esi,%ecx,4),%edi
    364 	leal	1(%ecx),%ecx
    365 	movl	%edi,%eax
    366 	movl	%ecx,12(%esp)
    367 	mull	%edi
    368 	addl	32(%esp,%ecx,4),%eax
    369 	adcl	$0,%edx
    370 	movl	%eax,32(%esp,%ecx,4)
    371 	xorl	%ebp,%ebp
    372 	cmpl	%ebx,%ecx
    373 	leal	1(%ecx),%ecx
    374 	je	.L013sqrlast
    375 	movl	%edx,%ebx
    376 	shrl	$1,%edx
    377 	andl	$1,%ebx
    378 .align	16
    379 .L014sqradd:
    380 	movl	(%esi,%ecx,4),%eax
    381 	movl	%edx,%ebp
    382 	mull	%edi
    383 	addl	%ebp,%eax
    384 	leal	(%eax,%eax,1),%ebp
    385 	adcl	$0,%edx
    386 	shrl	$31,%eax
    387 	addl	32(%esp,%ecx,4),%ebp
    388 	leal	1(%ecx),%ecx
    389 	adcl	$0,%eax
    390 	addl	%ebx,%ebp
    391 	adcl	$0,%eax
    392 	cmpl	(%esp),%ecx
    393 	movl	%ebp,28(%esp,%ecx,4)
    394 	movl	%eax,%ebx
    395 	jle	.L014sqradd
    396 	movl	%edx,%ebp
    397 	addl	%edx,%edx
    398 	shrl	$31,%ebp
    399 	addl	%ebx,%edx
    400 	adcl	$0,%ebp
    401 .L013sqrlast:
    402 	movl	20(%esp),%edi
    403 	movl	16(%esp),%esi
    404 	imull	32(%esp),%edi
    405 	addl	32(%esp,%ecx,4),%edx
    406 	movl	(%esi),%eax
    407 	adcl	$0,%ebp
    408 	movl	%edx,32(%esp,%ecx,4)
    409 	movl	%ebp,36(%esp,%ecx,4)
    410 	mull	%edi
    411 	addl	32(%esp),%eax
    412 	leal	-1(%ecx),%ebx
    413 	adcl	$0,%edx
    414 	movl	$1,%ecx
    415 	movl	4(%esi),%eax
    416 	jmp	.L0123rdmadd
    417 .align	16
    418 .L006common_tail:
    419 	movl	16(%esp),%ebp
    420 	movl	4(%esp),%edi
    421 	leal	32(%esp),%esi
    422 	movl	(%esi),%eax
    423 	movl	%ebx,%ecx
    424 	xorl	%edx,%edx
    425 .align	16
    426 .L015sub:
    427 	sbbl	(%ebp,%edx,4),%eax
    428 	movl	%eax,(%edi,%edx,4)
    429 	decl	%ecx
    430 	movl	4(%esi,%edx,4),%eax
    431 	leal	1(%edx),%edx
    432 	jge	.L015sub
    433 	sbbl	$0,%eax
    434 	andl	%eax,%esi
    435 	notl	%eax
    436 	movl	%edi,%ebp
    437 	andl	%eax,%ebp
    438 	orl	%ebp,%esi
    439 .align	16
    440 .L016copy:
    441 	movl	(%esi,%ebx,4),%eax
    442 	movl	%eax,(%edi,%ebx,4)
    443 	movl	%ecx,32(%esp,%ebx,4)
    444 	decl	%ebx
    445 	jge	.L016copy
    446 	movl	24(%esp),%esp
    447 	movl	$1,%eax
    448 .L000just_leave:
    449 	popl	%edi
    450 	popl	%esi
    451 	popl	%ebx
    452 	popl	%ebp
    453 	ret
    454 .size	bn_mul_mont,.-.L_bn_mul_mont_begin
    455 .byte	77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
    456 .byte	112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
    457 .byte	54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
    458 .byte	32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
    459 .byte	111,114,103,62,0
    460 .comm	OPENSSL_ia32cap_P,8,4
    461