Home | History | Annotate | Download | only in chacha
      1 #if defined(__i386__)
      2 .text
      3 .globl	_ChaCha20_ctr32
      4 .private_extern	_ChaCha20_ctr32
      5 .align	4
      6 _ChaCha20_ctr32:
      7 L_ChaCha20_ctr32_begin:
      8 	pushl	%ebp
      9 	pushl	%ebx
     10 	pushl	%esi
     11 	pushl	%edi
     12 	xorl	%eax,%eax
     13 	cmpl	28(%esp),%eax
     14 	je	L000no_data
     15 	call	Lpic_point
     16 Lpic_point:
     17 	popl	%eax
     18 	movl	L_OPENSSL_ia32cap_P$non_lazy_ptr-Lpic_point(%eax),%ebp
     19 	testl	$16777216,(%ebp)
     20 	jz	L001x86
     21 	testl	$512,4(%ebp)
     22 	jz	L001x86
     23 	jmp	Lssse3_shortcut
     24 L001x86:
     25 	movl	32(%esp),%esi
     26 	movl	36(%esp),%edi
     27 	subl	$132,%esp
     28 	movl	(%esi),%eax
     29 	movl	4(%esi),%ebx
     30 	movl	8(%esi),%ecx
     31 	movl	12(%esi),%edx
     32 	movl	%eax,80(%esp)
     33 	movl	%ebx,84(%esp)
     34 	movl	%ecx,88(%esp)
     35 	movl	%edx,92(%esp)
     36 	movl	16(%esi),%eax
     37 	movl	20(%esi),%ebx
     38 	movl	24(%esi),%ecx
     39 	movl	28(%esi),%edx
     40 	movl	%eax,96(%esp)
     41 	movl	%ebx,100(%esp)
     42 	movl	%ecx,104(%esp)
     43 	movl	%edx,108(%esp)
     44 	movl	(%edi),%eax
     45 	movl	4(%edi),%ebx
     46 	movl	8(%edi),%ecx
     47 	movl	12(%edi),%edx
     48 	subl	$1,%eax
     49 	movl	%eax,112(%esp)
     50 	movl	%ebx,116(%esp)
     51 	movl	%ecx,120(%esp)
     52 	movl	%edx,124(%esp)
     53 	jmp	L002entry
     54 .align	4,0x90
     55 L003outer_loop:
     56 	movl	%ebx,156(%esp)
     57 	movl	%eax,152(%esp)
     58 	movl	%ecx,160(%esp)
     59 L002entry:
     60 	movl	$1634760805,%eax
     61 	movl	$857760878,4(%esp)
     62 	movl	$2036477234,8(%esp)
     63 	movl	$1797285236,12(%esp)
     64 	movl	84(%esp),%ebx
     65 	movl	88(%esp),%ebp
     66 	movl	104(%esp),%ecx
     67 	movl	108(%esp),%esi
     68 	movl	116(%esp),%edx
     69 	movl	120(%esp),%edi
     70 	movl	%ebx,20(%esp)
     71 	movl	%ebp,24(%esp)
     72 	movl	%ecx,40(%esp)
     73 	movl	%esi,44(%esp)
     74 	movl	%edx,52(%esp)
     75 	movl	%edi,56(%esp)
     76 	movl	92(%esp),%ebx
     77 	movl	124(%esp),%edi
     78 	movl	112(%esp),%edx
     79 	movl	80(%esp),%ebp
     80 	movl	96(%esp),%ecx
     81 	movl	100(%esp),%esi
     82 	addl	$1,%edx
     83 	movl	%ebx,28(%esp)
     84 	movl	%edi,60(%esp)
     85 	movl	%edx,112(%esp)
     86 	movl	$10,%ebx
     87 	jmp	L004loop
     88 .align	4,0x90
     89 L004loop:
     90 	addl	%ebp,%eax
     91 	movl	%ebx,128(%esp)
     92 	movl	%ebp,%ebx
     93 	xorl	%eax,%edx
     94 	roll	$16,%edx
     95 	addl	%edx,%ecx
     96 	xorl	%ecx,%ebx
     97 	movl	52(%esp),%edi
     98 	roll	$12,%ebx
     99 	movl	20(%esp),%ebp
    100 	addl	%ebx,%eax
    101 	xorl	%eax,%edx
    102 	movl	%eax,(%esp)
    103 	roll	$8,%edx
    104 	movl	4(%esp),%eax
    105 	addl	%edx,%ecx
    106 	movl	%edx,48(%esp)
    107 	xorl	%ecx,%ebx
    108 	addl	%ebp,%eax
    109 	roll	$7,%ebx
    110 	xorl	%eax,%edi
    111 	movl	%ecx,32(%esp)
    112 	roll	$16,%edi
    113 	movl	%ebx,16(%esp)
    114 	addl	%edi,%esi
    115 	movl	40(%esp),%ecx
    116 	xorl	%esi,%ebp
    117 	movl	56(%esp),%edx
    118 	roll	$12,%ebp
    119 	movl	24(%esp),%ebx
    120 	addl	%ebp,%eax
    121 	xorl	%eax,%edi
    122 	movl	%eax,4(%esp)
    123 	roll	$8,%edi
    124 	movl	8(%esp),%eax
    125 	addl	%edi,%esi
    126 	movl	%edi,52(%esp)
    127 	xorl	%esi,%ebp
    128 	addl	%ebx,%eax
    129 	roll	$7,%ebp
    130 	xorl	%eax,%edx
    131 	movl	%esi,36(%esp)
    132 	roll	$16,%edx
    133 	movl	%ebp,20(%esp)
    134 	addl	%edx,%ecx
    135 	movl	44(%esp),%esi
    136 	xorl	%ecx,%ebx
    137 	movl	60(%esp),%edi
    138 	roll	$12,%ebx
    139 	movl	28(%esp),%ebp
    140 	addl	%ebx,%eax
    141 	xorl	%eax,%edx
    142 	movl	%eax,8(%esp)
    143 	roll	$8,%edx
    144 	movl	12(%esp),%eax
    145 	addl	%edx,%ecx
    146 	movl	%edx,56(%esp)
    147 	xorl	%ecx,%ebx
    148 	addl	%ebp,%eax
    149 	roll	$7,%ebx
    150 	xorl	%eax,%edi
    151 	roll	$16,%edi
    152 	movl	%ebx,24(%esp)
    153 	addl	%edi,%esi
    154 	xorl	%esi,%ebp
    155 	roll	$12,%ebp
    156 	movl	20(%esp),%ebx
    157 	addl	%ebp,%eax
    158 	xorl	%eax,%edi
    159 	movl	%eax,12(%esp)
    160 	roll	$8,%edi
    161 	movl	(%esp),%eax
    162 	addl	%edi,%esi
    163 	movl	%edi,%edx
    164 	xorl	%esi,%ebp
    165 	addl	%ebx,%eax
    166 	roll	$7,%ebp
    167 	xorl	%eax,%edx
    168 	roll	$16,%edx
    169 	movl	%ebp,28(%esp)
    170 	addl	%edx,%ecx
    171 	xorl	%ecx,%ebx
    172 	movl	48(%esp),%edi
    173 	roll	$12,%ebx
    174 	movl	24(%esp),%ebp
    175 	addl	%ebx,%eax
    176 	xorl	%eax,%edx
    177 	movl	%eax,(%esp)
    178 	roll	$8,%edx
    179 	movl	4(%esp),%eax
    180 	addl	%edx,%ecx
    181 	movl	%edx,60(%esp)
    182 	xorl	%ecx,%ebx
    183 	addl	%ebp,%eax
    184 	roll	$7,%ebx
    185 	xorl	%eax,%edi
    186 	movl	%ecx,40(%esp)
    187 	roll	$16,%edi
    188 	movl	%ebx,20(%esp)
    189 	addl	%edi,%esi
    190 	movl	32(%esp),%ecx
    191 	xorl	%esi,%ebp
    192 	movl	52(%esp),%edx
    193 	roll	$12,%ebp
    194 	movl	28(%esp),%ebx
    195 	addl	%ebp,%eax
    196 	xorl	%eax,%edi
    197 	movl	%eax,4(%esp)
    198 	roll	$8,%edi
    199 	movl	8(%esp),%eax
    200 	addl	%edi,%esi
    201 	movl	%edi,48(%esp)
    202 	xorl	%esi,%ebp
    203 	addl	%ebx,%eax
    204 	roll	$7,%ebp
    205 	xorl	%eax,%edx
    206 	movl	%esi,44(%esp)
    207 	roll	$16,%edx
    208 	movl	%ebp,24(%esp)
    209 	addl	%edx,%ecx
    210 	movl	36(%esp),%esi
    211 	xorl	%ecx,%ebx
    212 	movl	56(%esp),%edi
    213 	roll	$12,%ebx
    214 	movl	16(%esp),%ebp
    215 	addl	%ebx,%eax
    216 	xorl	%eax,%edx
    217 	movl	%eax,8(%esp)
    218 	roll	$8,%edx
    219 	movl	12(%esp),%eax
    220 	addl	%edx,%ecx
    221 	movl	%edx,52(%esp)
    222 	xorl	%ecx,%ebx
    223 	addl	%ebp,%eax
    224 	roll	$7,%ebx
    225 	xorl	%eax,%edi
    226 	roll	$16,%edi
    227 	movl	%ebx,28(%esp)
    228 	addl	%edi,%esi
    229 	xorl	%esi,%ebp
    230 	movl	48(%esp),%edx
    231 	roll	$12,%ebp
    232 	movl	128(%esp),%ebx
    233 	addl	%ebp,%eax
    234 	xorl	%eax,%edi
    235 	movl	%eax,12(%esp)
    236 	roll	$8,%edi
    237 	movl	(%esp),%eax
    238 	addl	%edi,%esi
    239 	movl	%edi,56(%esp)
    240 	xorl	%esi,%ebp
    241 	roll	$7,%ebp
    242 	decl	%ebx
    243 	jnz	L004loop
    244 	movl	160(%esp),%ebx
    245 	addl	$1634760805,%eax
    246 	addl	80(%esp),%ebp
    247 	addl	96(%esp),%ecx
    248 	addl	100(%esp),%esi
    249 	cmpl	$64,%ebx
    250 	jb	L005tail
    251 	movl	156(%esp),%ebx
    252 	addl	112(%esp),%edx
    253 	addl	120(%esp),%edi
    254 	xorl	(%ebx),%eax
    255 	xorl	16(%ebx),%ebp
    256 	movl	%eax,(%esp)
    257 	movl	152(%esp),%eax
    258 	xorl	32(%ebx),%ecx
    259 	xorl	36(%ebx),%esi
    260 	xorl	48(%ebx),%edx
    261 	xorl	56(%ebx),%edi
    262 	movl	%ebp,16(%eax)
    263 	movl	%ecx,32(%eax)
    264 	movl	%esi,36(%eax)
    265 	movl	%edx,48(%eax)
    266 	movl	%edi,56(%eax)
    267 	movl	4(%esp),%ebp
    268 	movl	8(%esp),%ecx
    269 	movl	12(%esp),%esi
    270 	movl	20(%esp),%edx
    271 	movl	24(%esp),%edi
    272 	addl	$857760878,%ebp
    273 	addl	$2036477234,%ecx
    274 	addl	$1797285236,%esi
    275 	addl	84(%esp),%edx
    276 	addl	88(%esp),%edi
    277 	xorl	4(%ebx),%ebp
    278 	xorl	8(%ebx),%ecx
    279 	xorl	12(%ebx),%esi
    280 	xorl	20(%ebx),%edx
    281 	xorl	24(%ebx),%edi
    282 	movl	%ebp,4(%eax)
    283 	movl	%ecx,8(%eax)
    284 	movl	%esi,12(%eax)
    285 	movl	%edx,20(%eax)
    286 	movl	%edi,24(%eax)
    287 	movl	28(%esp),%ebp
    288 	movl	40(%esp),%ecx
    289 	movl	44(%esp),%esi
    290 	movl	52(%esp),%edx
    291 	movl	60(%esp),%edi
    292 	addl	92(%esp),%ebp
    293 	addl	104(%esp),%ecx
    294 	addl	108(%esp),%esi
    295 	addl	116(%esp),%edx
    296 	addl	124(%esp),%edi
    297 	xorl	28(%ebx),%ebp
    298 	xorl	40(%ebx),%ecx
    299 	xorl	44(%ebx),%esi
    300 	xorl	52(%ebx),%edx
    301 	xorl	60(%ebx),%edi
    302 	leal	64(%ebx),%ebx
    303 	movl	%ebp,28(%eax)
    304 	movl	(%esp),%ebp
    305 	movl	%ecx,40(%eax)
    306 	movl	160(%esp),%ecx
    307 	movl	%esi,44(%eax)
    308 	movl	%edx,52(%eax)
    309 	movl	%edi,60(%eax)
    310 	movl	%ebp,(%eax)
    311 	leal	64(%eax),%eax
    312 	subl	$64,%ecx
    313 	jnz	L003outer_loop
    314 	jmp	L006done
    315 L005tail:
    316 	addl	112(%esp),%edx
    317 	addl	120(%esp),%edi
    318 	movl	%eax,(%esp)
    319 	movl	%ebp,16(%esp)
    320 	movl	%ecx,32(%esp)
    321 	movl	%esi,36(%esp)
    322 	movl	%edx,48(%esp)
    323 	movl	%edi,56(%esp)
    324 	movl	4(%esp),%ebp
    325 	movl	8(%esp),%ecx
    326 	movl	12(%esp),%esi
    327 	movl	20(%esp),%edx
    328 	movl	24(%esp),%edi
    329 	addl	$857760878,%ebp
    330 	addl	$2036477234,%ecx
    331 	addl	$1797285236,%esi
    332 	addl	84(%esp),%edx
    333 	addl	88(%esp),%edi
    334 	movl	%ebp,4(%esp)
    335 	movl	%ecx,8(%esp)
    336 	movl	%esi,12(%esp)
    337 	movl	%edx,20(%esp)
    338 	movl	%edi,24(%esp)
    339 	movl	28(%esp),%ebp
    340 	movl	40(%esp),%ecx
    341 	movl	44(%esp),%esi
    342 	movl	52(%esp),%edx
    343 	movl	60(%esp),%edi
    344 	addl	92(%esp),%ebp
    345 	addl	104(%esp),%ecx
    346 	addl	108(%esp),%esi
    347 	addl	116(%esp),%edx
    348 	addl	124(%esp),%edi
    349 	movl	%ebp,28(%esp)
    350 	movl	156(%esp),%ebp
    351 	movl	%ecx,40(%esp)
    352 	movl	152(%esp),%ecx
    353 	movl	%esi,44(%esp)
    354 	xorl	%esi,%esi
    355 	movl	%edx,52(%esp)
    356 	movl	%edi,60(%esp)
    357 	xorl	%eax,%eax
    358 	xorl	%edx,%edx
    359 L007tail_loop:
    360 	movb	(%esi,%ebp,1),%al
    361 	movb	(%esp,%esi,1),%dl
    362 	leal	1(%esi),%esi
    363 	xorb	%dl,%al
    364 	movb	%al,-1(%ecx,%esi,1)
    365 	decl	%ebx
    366 	jnz	L007tail_loop
    367 L006done:
    368 	addl	$132,%esp
    369 L000no_data:
    370 	popl	%edi
    371 	popl	%esi
    372 	popl	%ebx
    373 	popl	%ebp
    374 	ret
    375 .globl	_ChaCha20_ssse3
    376 .private_extern	_ChaCha20_ssse3
    377 .align	4
    378 _ChaCha20_ssse3:
    379 L_ChaCha20_ssse3_begin:
    380 	pushl	%ebp
    381 	pushl	%ebx
    382 	pushl	%esi
    383 	pushl	%edi
    384 Lssse3_shortcut:
    385 	movl	20(%esp),%edi
    386 	movl	24(%esp),%esi
    387 	movl	28(%esp),%ecx
    388 	movl	32(%esp),%edx
    389 	movl	36(%esp),%ebx
    390 	movl	%esp,%ebp
    391 	subl	$524,%esp
    392 	andl	$-64,%esp
    393 	movl	%ebp,512(%esp)
    394 	leal	Lssse3_data-Lpic_point(%eax),%eax
    395 	movdqu	(%ebx),%xmm3
    396 	cmpl	$256,%ecx
    397 	jb	L0081x
    398 	movl	%edx,516(%esp)
    399 	movl	%ebx,520(%esp)
    400 	subl	$256,%ecx
    401 	leal	384(%esp),%ebp
    402 	movdqu	(%edx),%xmm7
    403 	pshufd	$0,%xmm3,%xmm0
    404 	pshufd	$85,%xmm3,%xmm1
    405 	pshufd	$170,%xmm3,%xmm2
    406 	pshufd	$255,%xmm3,%xmm3
    407 	paddd	48(%eax),%xmm0
    408 	pshufd	$0,%xmm7,%xmm4
    409 	pshufd	$85,%xmm7,%xmm5
    410 	psubd	64(%eax),%xmm0
    411 	pshufd	$170,%xmm7,%xmm6
    412 	pshufd	$255,%xmm7,%xmm7
    413 	movdqa	%xmm0,64(%ebp)
    414 	movdqa	%xmm1,80(%ebp)
    415 	movdqa	%xmm2,96(%ebp)
    416 	movdqa	%xmm3,112(%ebp)
    417 	movdqu	16(%edx),%xmm3
    418 	movdqa	%xmm4,-64(%ebp)
    419 	movdqa	%xmm5,-48(%ebp)
    420 	movdqa	%xmm6,-32(%ebp)
    421 	movdqa	%xmm7,-16(%ebp)
    422 	movdqa	32(%eax),%xmm7
    423 	leal	128(%esp),%ebx
    424 	pshufd	$0,%xmm3,%xmm0
    425 	pshufd	$85,%xmm3,%xmm1
    426 	pshufd	$170,%xmm3,%xmm2
    427 	pshufd	$255,%xmm3,%xmm3
    428 	pshufd	$0,%xmm7,%xmm4
    429 	pshufd	$85,%xmm7,%xmm5
    430 	pshufd	$170,%xmm7,%xmm6
    431 	pshufd	$255,%xmm7,%xmm7
    432 	movdqa	%xmm0,(%ebp)
    433 	movdqa	%xmm1,16(%ebp)
    434 	movdqa	%xmm2,32(%ebp)
    435 	movdqa	%xmm3,48(%ebp)
    436 	movdqa	%xmm4,-128(%ebp)
    437 	movdqa	%xmm5,-112(%ebp)
    438 	movdqa	%xmm6,-96(%ebp)
    439 	movdqa	%xmm7,-80(%ebp)
    440 	leal	128(%esi),%esi
    441 	leal	128(%edi),%edi
    442 	jmp	L009outer_loop
    443 .align	4,0x90
    444 L009outer_loop:
    445 	movdqa	-112(%ebp),%xmm1
    446 	movdqa	-96(%ebp),%xmm2
    447 	movdqa	-80(%ebp),%xmm3
    448 	movdqa	-48(%ebp),%xmm5
    449 	movdqa	-32(%ebp),%xmm6
    450 	movdqa	-16(%ebp),%xmm7
    451 	movdqa	%xmm1,-112(%ebx)
    452 	movdqa	%xmm2,-96(%ebx)
    453 	movdqa	%xmm3,-80(%ebx)
    454 	movdqa	%xmm5,-48(%ebx)
    455 	movdqa	%xmm6,-32(%ebx)
    456 	movdqa	%xmm7,-16(%ebx)
    457 	movdqa	32(%ebp),%xmm2
    458 	movdqa	48(%ebp),%xmm3
    459 	movdqa	64(%ebp),%xmm4
    460 	movdqa	80(%ebp),%xmm5
    461 	movdqa	96(%ebp),%xmm6
    462 	movdqa	112(%ebp),%xmm7
    463 	paddd	64(%eax),%xmm4
    464 	movdqa	%xmm2,32(%ebx)
    465 	movdqa	%xmm3,48(%ebx)
    466 	movdqa	%xmm4,64(%ebx)
    467 	movdqa	%xmm5,80(%ebx)
    468 	movdqa	%xmm6,96(%ebx)
    469 	movdqa	%xmm7,112(%ebx)
    470 	movdqa	%xmm4,64(%ebp)
    471 	movdqa	-128(%ebp),%xmm0
    472 	movdqa	%xmm4,%xmm6
    473 	movdqa	-64(%ebp),%xmm3
    474 	movdqa	(%ebp),%xmm4
    475 	movdqa	16(%ebp),%xmm5
    476 	movl	$10,%edx
    477 	nop
    478 .align	4,0x90
    479 L010loop:
    480 	paddd	%xmm3,%xmm0
    481 	movdqa	%xmm3,%xmm2
    482 	pxor	%xmm0,%xmm6
    483 	pshufb	(%eax),%xmm6
    484 	paddd	%xmm6,%xmm4
    485 	pxor	%xmm4,%xmm2
    486 	movdqa	-48(%ebx),%xmm3
    487 	movdqa	%xmm2,%xmm1
    488 	pslld	$12,%xmm2
    489 	psrld	$20,%xmm1
    490 	por	%xmm1,%xmm2
    491 	movdqa	-112(%ebx),%xmm1
    492 	paddd	%xmm2,%xmm0
    493 	movdqa	80(%ebx),%xmm7
    494 	pxor	%xmm0,%xmm6
    495 	movdqa	%xmm0,-128(%ebx)
    496 	pshufb	16(%eax),%xmm6
    497 	paddd	%xmm6,%xmm4
    498 	movdqa	%xmm6,64(%ebx)
    499 	pxor	%xmm4,%xmm2
    500 	paddd	%xmm3,%xmm1
    501 	movdqa	%xmm2,%xmm0
    502 	pslld	$7,%xmm2
    503 	psrld	$25,%xmm0
    504 	pxor	%xmm1,%xmm7
    505 	por	%xmm0,%xmm2
    506 	movdqa	%xmm4,(%ebx)
    507 	pshufb	(%eax),%xmm7
    508 	movdqa	%xmm2,-64(%ebx)
    509 	paddd	%xmm7,%xmm5
    510 	movdqa	32(%ebx),%xmm4
    511 	pxor	%xmm5,%xmm3
    512 	movdqa	-32(%ebx),%xmm2
    513 	movdqa	%xmm3,%xmm0
    514 	pslld	$12,%xmm3
    515 	psrld	$20,%xmm0
    516 	por	%xmm0,%xmm3
    517 	movdqa	-96(%ebx),%xmm0
    518 	paddd	%xmm3,%xmm1
    519 	movdqa	96(%ebx),%xmm6
    520 	pxor	%xmm1,%xmm7
    521 	movdqa	%xmm1,-112(%ebx)
    522 	pshufb	16(%eax),%xmm7
    523 	paddd	%xmm7,%xmm5
    524 	movdqa	%xmm7,80(%ebx)
    525 	pxor	%xmm5,%xmm3
    526 	paddd	%xmm2,%xmm0
    527 	movdqa	%xmm3,%xmm1
    528 	pslld	$7,%xmm3
    529 	psrld	$25,%xmm1
    530 	pxor	%xmm0,%xmm6
    531 	por	%xmm1,%xmm3
    532 	movdqa	%xmm5,16(%ebx)
    533 	pshufb	(%eax),%xmm6
    534 	movdqa	%xmm3,-48(%ebx)
    535 	paddd	%xmm6,%xmm4
    536 	movdqa	48(%ebx),%xmm5
    537 	pxor	%xmm4,%xmm2
    538 	movdqa	-16(%ebx),%xmm3
    539 	movdqa	%xmm2,%xmm1
    540 	pslld	$12,%xmm2
    541 	psrld	$20,%xmm1
    542 	por	%xmm1,%xmm2
    543 	movdqa	-80(%ebx),%xmm1
    544 	paddd	%xmm2,%xmm0
    545 	movdqa	112(%ebx),%xmm7
    546 	pxor	%xmm0,%xmm6
    547 	movdqa	%xmm0,-96(%ebx)
    548 	pshufb	16(%eax),%xmm6
    549 	paddd	%xmm6,%xmm4
    550 	movdqa	%xmm6,96(%ebx)
    551 	pxor	%xmm4,%xmm2
    552 	paddd	%xmm3,%xmm1
    553 	movdqa	%xmm2,%xmm0
    554 	pslld	$7,%xmm2
    555 	psrld	$25,%xmm0
    556 	pxor	%xmm1,%xmm7
    557 	por	%xmm0,%xmm2
    558 	pshufb	(%eax),%xmm7
    559 	movdqa	%xmm2,-32(%ebx)
    560 	paddd	%xmm7,%xmm5
    561 	pxor	%xmm5,%xmm3
    562 	movdqa	-48(%ebx),%xmm2
    563 	movdqa	%xmm3,%xmm0
    564 	pslld	$12,%xmm3
    565 	psrld	$20,%xmm0
    566 	por	%xmm0,%xmm3
    567 	movdqa	-128(%ebx),%xmm0
    568 	paddd	%xmm3,%xmm1
    569 	pxor	%xmm1,%xmm7
    570 	movdqa	%xmm1,-80(%ebx)
    571 	pshufb	16(%eax),%xmm7
    572 	paddd	%xmm7,%xmm5
    573 	movdqa	%xmm7,%xmm6
    574 	pxor	%xmm5,%xmm3
    575 	paddd	%xmm2,%xmm0
    576 	movdqa	%xmm3,%xmm1
    577 	pslld	$7,%xmm3
    578 	psrld	$25,%xmm1
    579 	pxor	%xmm0,%xmm6
    580 	por	%xmm1,%xmm3
    581 	pshufb	(%eax),%xmm6
    582 	movdqa	%xmm3,-16(%ebx)
    583 	paddd	%xmm6,%xmm4
    584 	pxor	%xmm4,%xmm2
    585 	movdqa	-32(%ebx),%xmm3
    586 	movdqa	%xmm2,%xmm1
    587 	pslld	$12,%xmm2
    588 	psrld	$20,%xmm1
    589 	por	%xmm1,%xmm2
    590 	movdqa	-112(%ebx),%xmm1
    591 	paddd	%xmm2,%xmm0
    592 	movdqa	64(%ebx),%xmm7
    593 	pxor	%xmm0,%xmm6
    594 	movdqa	%xmm0,-128(%ebx)
    595 	pshufb	16(%eax),%xmm6
    596 	paddd	%xmm6,%xmm4
    597 	movdqa	%xmm6,112(%ebx)
    598 	pxor	%xmm4,%xmm2
    599 	paddd	%xmm3,%xmm1
    600 	movdqa	%xmm2,%xmm0
    601 	pslld	$7,%xmm2
    602 	psrld	$25,%xmm0
    603 	pxor	%xmm1,%xmm7
    604 	por	%xmm0,%xmm2
    605 	movdqa	%xmm4,32(%ebx)
    606 	pshufb	(%eax),%xmm7
    607 	movdqa	%xmm2,-48(%ebx)
    608 	paddd	%xmm7,%xmm5
    609 	movdqa	(%ebx),%xmm4
    610 	pxor	%xmm5,%xmm3
    611 	movdqa	-16(%ebx),%xmm2
    612 	movdqa	%xmm3,%xmm0
    613 	pslld	$12,%xmm3
    614 	psrld	$20,%xmm0
    615 	por	%xmm0,%xmm3
    616 	movdqa	-96(%ebx),%xmm0
    617 	paddd	%xmm3,%xmm1
    618 	movdqa	80(%ebx),%xmm6
    619 	pxor	%xmm1,%xmm7
    620 	movdqa	%xmm1,-112(%ebx)
    621 	pshufb	16(%eax),%xmm7
    622 	paddd	%xmm7,%xmm5
    623 	movdqa	%xmm7,64(%ebx)
    624 	pxor	%xmm5,%xmm3
    625 	paddd	%xmm2,%xmm0
    626 	movdqa	%xmm3,%xmm1
    627 	pslld	$7,%xmm3
    628 	psrld	$25,%xmm1
    629 	pxor	%xmm0,%xmm6
    630 	por	%xmm1,%xmm3
    631 	movdqa	%xmm5,48(%ebx)
    632 	pshufb	(%eax),%xmm6
    633 	movdqa	%xmm3,-32(%ebx)
    634 	paddd	%xmm6,%xmm4
    635 	movdqa	16(%ebx),%xmm5
    636 	pxor	%xmm4,%xmm2
    637 	movdqa	-64(%ebx),%xmm3
    638 	movdqa	%xmm2,%xmm1
    639 	pslld	$12,%xmm2
    640 	psrld	$20,%xmm1
    641 	por	%xmm1,%xmm2
    642 	movdqa	-80(%ebx),%xmm1
    643 	paddd	%xmm2,%xmm0
    644 	movdqa	96(%ebx),%xmm7
    645 	pxor	%xmm0,%xmm6
    646 	movdqa	%xmm0,-96(%ebx)
    647 	pshufb	16(%eax),%xmm6
    648 	paddd	%xmm6,%xmm4
    649 	movdqa	%xmm6,80(%ebx)
    650 	pxor	%xmm4,%xmm2
    651 	paddd	%xmm3,%xmm1
    652 	movdqa	%xmm2,%xmm0
    653 	pslld	$7,%xmm2
    654 	psrld	$25,%xmm0
    655 	pxor	%xmm1,%xmm7
    656 	por	%xmm0,%xmm2
    657 	pshufb	(%eax),%xmm7
    658 	movdqa	%xmm2,-16(%ebx)
    659 	paddd	%xmm7,%xmm5
    660 	pxor	%xmm5,%xmm3
    661 	movdqa	%xmm3,%xmm0
    662 	pslld	$12,%xmm3
    663 	psrld	$20,%xmm0
    664 	por	%xmm0,%xmm3
    665 	movdqa	-128(%ebx),%xmm0
    666 	paddd	%xmm3,%xmm1
    667 	movdqa	64(%ebx),%xmm6
    668 	pxor	%xmm1,%xmm7
    669 	movdqa	%xmm1,-80(%ebx)
    670 	pshufb	16(%eax),%xmm7
    671 	paddd	%xmm7,%xmm5
    672 	movdqa	%xmm7,96(%ebx)
    673 	pxor	%xmm5,%xmm3
    674 	movdqa	%xmm3,%xmm1
    675 	pslld	$7,%xmm3
    676 	psrld	$25,%xmm1
    677 	por	%xmm1,%xmm3
    678 	decl	%edx
    679 	jnz	L010loop
    680 	movdqa	%xmm3,-64(%ebx)
    681 	movdqa	%xmm4,(%ebx)
    682 	movdqa	%xmm5,16(%ebx)
    683 	movdqa	%xmm6,64(%ebx)
    684 	movdqa	%xmm7,96(%ebx)
    685 	movdqa	-112(%ebx),%xmm1
    686 	movdqa	-96(%ebx),%xmm2
    687 	movdqa	-80(%ebx),%xmm3
    688 	paddd	-128(%ebp),%xmm0
    689 	paddd	-112(%ebp),%xmm1
    690 	paddd	-96(%ebp),%xmm2
    691 	paddd	-80(%ebp),%xmm3
    692 	movdqa	%xmm0,%xmm6
    693 	punpckldq	%xmm1,%xmm0
    694 	movdqa	%xmm2,%xmm7
    695 	punpckldq	%xmm3,%xmm2
    696 	punpckhdq	%xmm1,%xmm6
    697 	punpckhdq	%xmm3,%xmm7
    698 	movdqa	%xmm0,%xmm1
    699 	punpcklqdq	%xmm2,%xmm0
    700 	movdqa	%xmm6,%xmm3
    701 	punpcklqdq	%xmm7,%xmm6
    702 	punpckhqdq	%xmm2,%xmm1
    703 	punpckhqdq	%xmm7,%xmm3
    704 	movdqu	-128(%esi),%xmm4
    705 	movdqu	-64(%esi),%xmm5
    706 	movdqu	(%esi),%xmm2
    707 	movdqu	64(%esi),%xmm7
    708 	leal	16(%esi),%esi
    709 	pxor	%xmm0,%xmm4
    710 	movdqa	-64(%ebx),%xmm0
    711 	pxor	%xmm1,%xmm5
    712 	movdqa	-48(%ebx),%xmm1
    713 	pxor	%xmm2,%xmm6
    714 	movdqa	-32(%ebx),%xmm2
    715 	pxor	%xmm3,%xmm7
    716 	movdqa	-16(%ebx),%xmm3
    717 	movdqu	%xmm4,-128(%edi)
    718 	movdqu	%xmm5,-64(%edi)
    719 	movdqu	%xmm6,(%edi)
    720 	movdqu	%xmm7,64(%edi)
    721 	leal	16(%edi),%edi
    722 	paddd	-64(%ebp),%xmm0
    723 	paddd	-48(%ebp),%xmm1
    724 	paddd	-32(%ebp),%xmm2
    725 	paddd	-16(%ebp),%xmm3
    726 	movdqa	%xmm0,%xmm6
    727 	punpckldq	%xmm1,%xmm0
    728 	movdqa	%xmm2,%xmm7
    729 	punpckldq	%xmm3,%xmm2
    730 	punpckhdq	%xmm1,%xmm6
    731 	punpckhdq	%xmm3,%xmm7
    732 	movdqa	%xmm0,%xmm1
    733 	punpcklqdq	%xmm2,%xmm0
    734 	movdqa	%xmm6,%xmm3
    735 	punpcklqdq	%xmm7,%xmm6
    736 	punpckhqdq	%xmm2,%xmm1
    737 	punpckhqdq	%xmm7,%xmm3
    738 	movdqu	-128(%esi),%xmm4
    739 	movdqu	-64(%esi),%xmm5
    740 	movdqu	(%esi),%xmm2
    741 	movdqu	64(%esi),%xmm7
    742 	leal	16(%esi),%esi
    743 	pxor	%xmm0,%xmm4
    744 	movdqa	(%ebx),%xmm0
    745 	pxor	%xmm1,%xmm5
    746 	movdqa	16(%ebx),%xmm1
    747 	pxor	%xmm2,%xmm6
    748 	movdqa	32(%ebx),%xmm2
    749 	pxor	%xmm3,%xmm7
    750 	movdqa	48(%ebx),%xmm3
    751 	movdqu	%xmm4,-128(%edi)
    752 	movdqu	%xmm5,-64(%edi)
    753 	movdqu	%xmm6,(%edi)
    754 	movdqu	%xmm7,64(%edi)
    755 	leal	16(%edi),%edi
    756 	paddd	(%ebp),%xmm0
    757 	paddd	16(%ebp),%xmm1
    758 	paddd	32(%ebp),%xmm2
    759 	paddd	48(%ebp),%xmm3
    760 	movdqa	%xmm0,%xmm6
    761 	punpckldq	%xmm1,%xmm0
    762 	movdqa	%xmm2,%xmm7
    763 	punpckldq	%xmm3,%xmm2
    764 	punpckhdq	%xmm1,%xmm6
    765 	punpckhdq	%xmm3,%xmm7
    766 	movdqa	%xmm0,%xmm1
    767 	punpcklqdq	%xmm2,%xmm0
    768 	movdqa	%xmm6,%xmm3
    769 	punpcklqdq	%xmm7,%xmm6
    770 	punpckhqdq	%xmm2,%xmm1
    771 	punpckhqdq	%xmm7,%xmm3
    772 	movdqu	-128(%esi),%xmm4
    773 	movdqu	-64(%esi),%xmm5
    774 	movdqu	(%esi),%xmm2
    775 	movdqu	64(%esi),%xmm7
    776 	leal	16(%esi),%esi
    777 	pxor	%xmm0,%xmm4
    778 	movdqa	64(%ebx),%xmm0
    779 	pxor	%xmm1,%xmm5
    780 	movdqa	80(%ebx),%xmm1
    781 	pxor	%xmm2,%xmm6
    782 	movdqa	96(%ebx),%xmm2
    783 	pxor	%xmm3,%xmm7
    784 	movdqa	112(%ebx),%xmm3
    785 	movdqu	%xmm4,-128(%edi)
    786 	movdqu	%xmm5,-64(%edi)
    787 	movdqu	%xmm6,(%edi)
    788 	movdqu	%xmm7,64(%edi)
    789 	leal	16(%edi),%edi
    790 	paddd	64(%ebp),%xmm0
    791 	paddd	80(%ebp),%xmm1
    792 	paddd	96(%ebp),%xmm2
    793 	paddd	112(%ebp),%xmm3
    794 	movdqa	%xmm0,%xmm6
    795 	punpckldq	%xmm1,%xmm0
    796 	movdqa	%xmm2,%xmm7
    797 	punpckldq	%xmm3,%xmm2
    798 	punpckhdq	%xmm1,%xmm6
    799 	punpckhdq	%xmm3,%xmm7
    800 	movdqa	%xmm0,%xmm1
    801 	punpcklqdq	%xmm2,%xmm0
    802 	movdqa	%xmm6,%xmm3
    803 	punpcklqdq	%xmm7,%xmm6
    804 	punpckhqdq	%xmm2,%xmm1
    805 	punpckhqdq	%xmm7,%xmm3
    806 	movdqu	-128(%esi),%xmm4
    807 	movdqu	-64(%esi),%xmm5
    808 	movdqu	(%esi),%xmm2
    809 	movdqu	64(%esi),%xmm7
    810 	leal	208(%esi),%esi
    811 	pxor	%xmm0,%xmm4
    812 	pxor	%xmm1,%xmm5
    813 	pxor	%xmm2,%xmm6
    814 	pxor	%xmm3,%xmm7
    815 	movdqu	%xmm4,-128(%edi)
    816 	movdqu	%xmm5,-64(%edi)
    817 	movdqu	%xmm6,(%edi)
    818 	movdqu	%xmm7,64(%edi)
    819 	leal	208(%edi),%edi
    820 	subl	$256,%ecx
    821 	jnc	L009outer_loop
    822 	addl	$256,%ecx
    823 	jz	L011done
    824 	movl	520(%esp),%ebx
    825 	leal	-128(%esi),%esi
    826 	movl	516(%esp),%edx
    827 	leal	-128(%edi),%edi
    828 	movd	64(%ebp),%xmm2
    829 	movdqu	(%ebx),%xmm3
    830 	paddd	96(%eax),%xmm2
    831 	pand	112(%eax),%xmm3
    832 	por	%xmm2,%xmm3
    833 L0081x:
    834 	movdqa	32(%eax),%xmm0
    835 	movdqu	(%edx),%xmm1
    836 	movdqu	16(%edx),%xmm2
    837 	movdqa	(%eax),%xmm6
    838 	movdqa	16(%eax),%xmm7
    839 	movl	%ebp,48(%esp)
    840 	movdqa	%xmm0,(%esp)
    841 	movdqa	%xmm1,16(%esp)
    842 	movdqa	%xmm2,32(%esp)
    843 	movdqa	%xmm3,48(%esp)
    844 	movl	$10,%edx
    845 	jmp	L012loop1x
    846 .align	4,0x90
    847 L013outer1x:
    848 	movdqa	80(%eax),%xmm3
    849 	movdqa	(%esp),%xmm0
    850 	movdqa	16(%esp),%xmm1
    851 	movdqa	32(%esp),%xmm2
    852 	paddd	48(%esp),%xmm3
    853 	movl	$10,%edx
    854 	movdqa	%xmm3,48(%esp)
    855 	jmp	L012loop1x
    856 .align	4,0x90
    857 L012loop1x:
    858 	paddd	%xmm1,%xmm0
    859 	pxor	%xmm0,%xmm3
    860 .byte	102,15,56,0,222
    861 	paddd	%xmm3,%xmm2
    862 	pxor	%xmm2,%xmm1
    863 	movdqa	%xmm1,%xmm4
    864 	psrld	$20,%xmm1
    865 	pslld	$12,%xmm4
    866 	por	%xmm4,%xmm1
    867 	paddd	%xmm1,%xmm0
    868 	pxor	%xmm0,%xmm3
    869 .byte	102,15,56,0,223
    870 	paddd	%xmm3,%xmm2
    871 	pxor	%xmm2,%xmm1
    872 	movdqa	%xmm1,%xmm4
    873 	psrld	$25,%xmm1
    874 	pslld	$7,%xmm4
    875 	por	%xmm4,%xmm1
    876 	pshufd	$78,%xmm2,%xmm2
    877 	pshufd	$57,%xmm1,%xmm1
    878 	pshufd	$147,%xmm3,%xmm3
    879 	nop
    880 	paddd	%xmm1,%xmm0
    881 	pxor	%xmm0,%xmm3
    882 .byte	102,15,56,0,222
    883 	paddd	%xmm3,%xmm2
    884 	pxor	%xmm2,%xmm1
    885 	movdqa	%xmm1,%xmm4
    886 	psrld	$20,%xmm1
    887 	pslld	$12,%xmm4
    888 	por	%xmm4,%xmm1
    889 	paddd	%xmm1,%xmm0
    890 	pxor	%xmm0,%xmm3
    891 .byte	102,15,56,0,223
    892 	paddd	%xmm3,%xmm2
    893 	pxor	%xmm2,%xmm1
    894 	movdqa	%xmm1,%xmm4
    895 	psrld	$25,%xmm1
    896 	pslld	$7,%xmm4
    897 	por	%xmm4,%xmm1
    898 	pshufd	$78,%xmm2,%xmm2
    899 	pshufd	$147,%xmm1,%xmm1
    900 	pshufd	$57,%xmm3,%xmm3
    901 	decl	%edx
    902 	jnz	L012loop1x
    903 	paddd	(%esp),%xmm0
    904 	paddd	16(%esp),%xmm1
    905 	paddd	32(%esp),%xmm2
    906 	paddd	48(%esp),%xmm3
    907 	cmpl	$64,%ecx
    908 	jb	L014tail
    909 	movdqu	(%esi),%xmm4
    910 	movdqu	16(%esi),%xmm5
    911 	pxor	%xmm4,%xmm0
    912 	movdqu	32(%esi),%xmm4
    913 	pxor	%xmm5,%xmm1
    914 	movdqu	48(%esi),%xmm5
    915 	pxor	%xmm4,%xmm2
    916 	pxor	%xmm5,%xmm3
    917 	leal	64(%esi),%esi
    918 	movdqu	%xmm0,(%edi)
    919 	movdqu	%xmm1,16(%edi)
    920 	movdqu	%xmm2,32(%edi)
    921 	movdqu	%xmm3,48(%edi)
    922 	leal	64(%edi),%edi
    923 	subl	$64,%ecx
    924 	jnz	L013outer1x
    925 	jmp	L011done
    926 L014tail:
    927 	movdqa	%xmm0,(%esp)
    928 	movdqa	%xmm1,16(%esp)
    929 	movdqa	%xmm2,32(%esp)
    930 	movdqa	%xmm3,48(%esp)
    931 	xorl	%eax,%eax
    932 	xorl	%edx,%edx
    933 	xorl	%ebp,%ebp
    934 L015tail_loop:
    935 	movb	(%esp,%ebp,1),%al
    936 	movb	(%esi,%ebp,1),%dl
    937 	leal	1(%ebp),%ebp
    938 	xorb	%dl,%al
    939 	movb	%al,-1(%edi,%ebp,1)
    940 	decl	%ecx
    941 	jnz	L015tail_loop
    942 L011done:
    943 	movl	512(%esp),%esp
    944 	popl	%edi
    945 	popl	%esi
    946 	popl	%ebx
    947 	popl	%ebp
    948 	ret
    949 .align	6,0x90
    950 Lssse3_data:
    951 .byte	2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
    952 .byte	3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
    953 .long	1634760805,857760878,2036477234,1797285236
    954 .long	0,1,2,3
    955 .long	4,4,4,4
    956 .long	1,0,0,0
    957 .long	4,0,0,0
    958 .long	0,-1,-1,-1
    959 .align	6,0x90
    960 .byte	67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54
    961 .byte	44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
    962 .byte	60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
    963 .byte	114,103,62,0
    964 .section __IMPORT,__pointers,non_lazy_symbol_pointers
    965 L_OPENSSL_ia32cap_P$non_lazy_ptr:
    966 .indirect_symbol	_OPENSSL_ia32cap_P
    967 .long	0
    968 #endif
    969