Home | History | Annotate | Download | only in chacha
      1 #if defined(__i386__)
      2 .text
      3 .globl	ChaCha20_ctr32
      4 .hidden	ChaCha20_ctr32
      5 .type	ChaCha20_ctr32,@function
      6 .align	16
      7 ChaCha20_ctr32:
      8 .L_ChaCha20_ctr32_begin:
      9 	pushl	%ebp
     10 	pushl	%ebx
     11 	pushl	%esi
     12 	pushl	%edi
     13 	xorl	%eax,%eax
     14 	cmpl	28(%esp),%eax
     15 	je	.L000no_data
     16 	call	.Lpic_point
     17 .Lpic_point:
     18 	popl	%eax
     19 	leal	OPENSSL_ia32cap_P-.Lpic_point(%eax),%ebp
     20 	testl	$16777216,(%ebp)
     21 	jz	.L001x86
     22 	testl	$512,4(%ebp)
     23 	jz	.L001x86
     24 	jmp	.Lssse3_shortcut
     25 .L001x86:
     26 	movl	32(%esp),%esi
     27 	movl	36(%esp),%edi
     28 	subl	$132,%esp
     29 	movl	(%esi),%eax
     30 	movl	4(%esi),%ebx
     31 	movl	8(%esi),%ecx
     32 	movl	12(%esi),%edx
     33 	movl	%eax,80(%esp)
     34 	movl	%ebx,84(%esp)
     35 	movl	%ecx,88(%esp)
     36 	movl	%edx,92(%esp)
     37 	movl	16(%esi),%eax
     38 	movl	20(%esi),%ebx
     39 	movl	24(%esi),%ecx
     40 	movl	28(%esi),%edx
     41 	movl	%eax,96(%esp)
     42 	movl	%ebx,100(%esp)
     43 	movl	%ecx,104(%esp)
     44 	movl	%edx,108(%esp)
     45 	movl	(%edi),%eax
     46 	movl	4(%edi),%ebx
     47 	movl	8(%edi),%ecx
     48 	movl	12(%edi),%edx
     49 	subl	$1,%eax
     50 	movl	%eax,112(%esp)
     51 	movl	%ebx,116(%esp)
     52 	movl	%ecx,120(%esp)
     53 	movl	%edx,124(%esp)
     54 	jmp	.L002entry
     55 .align	16
     56 .L003outer_loop:
     57 	movl	%ebx,156(%esp)
     58 	movl	%eax,152(%esp)
     59 	movl	%ecx,160(%esp)
     60 .L002entry:
     61 	movl	$1634760805,%eax
     62 	movl	$857760878,4(%esp)
     63 	movl	$2036477234,8(%esp)
     64 	movl	$1797285236,12(%esp)
     65 	movl	84(%esp),%ebx
     66 	movl	88(%esp),%ebp
     67 	movl	104(%esp),%ecx
     68 	movl	108(%esp),%esi
     69 	movl	116(%esp),%edx
     70 	movl	120(%esp),%edi
     71 	movl	%ebx,20(%esp)
     72 	movl	%ebp,24(%esp)
     73 	movl	%ecx,40(%esp)
     74 	movl	%esi,44(%esp)
     75 	movl	%edx,52(%esp)
     76 	movl	%edi,56(%esp)
     77 	movl	92(%esp),%ebx
     78 	movl	124(%esp),%edi
     79 	movl	112(%esp),%edx
     80 	movl	80(%esp),%ebp
     81 	movl	96(%esp),%ecx
     82 	movl	100(%esp),%esi
     83 	addl	$1,%edx
     84 	movl	%ebx,28(%esp)
     85 	movl	%edi,60(%esp)
     86 	movl	%edx,112(%esp)
     87 	movl	$10,%ebx
     88 	jmp	.L004loop
     89 .align	16
     90 .L004loop:
     91 	addl	%ebp,%eax
     92 	movl	%ebx,128(%esp)
     93 	movl	%ebp,%ebx
     94 	xorl	%eax,%edx
     95 	roll	$16,%edx
     96 	addl	%edx,%ecx
     97 	xorl	%ecx,%ebx
     98 	movl	52(%esp),%edi
     99 	roll	$12,%ebx
    100 	movl	20(%esp),%ebp
    101 	addl	%ebx,%eax
    102 	xorl	%eax,%edx
    103 	movl	%eax,(%esp)
    104 	roll	$8,%edx
    105 	movl	4(%esp),%eax
    106 	addl	%edx,%ecx
    107 	movl	%edx,48(%esp)
    108 	xorl	%ecx,%ebx
    109 	addl	%ebp,%eax
    110 	roll	$7,%ebx
    111 	xorl	%eax,%edi
    112 	movl	%ecx,32(%esp)
    113 	roll	$16,%edi
    114 	movl	%ebx,16(%esp)
    115 	addl	%edi,%esi
    116 	movl	40(%esp),%ecx
    117 	xorl	%esi,%ebp
    118 	movl	56(%esp),%edx
    119 	roll	$12,%ebp
    120 	movl	24(%esp),%ebx
    121 	addl	%ebp,%eax
    122 	xorl	%eax,%edi
    123 	movl	%eax,4(%esp)
    124 	roll	$8,%edi
    125 	movl	8(%esp),%eax
    126 	addl	%edi,%esi
    127 	movl	%edi,52(%esp)
    128 	xorl	%esi,%ebp
    129 	addl	%ebx,%eax
    130 	roll	$7,%ebp
    131 	xorl	%eax,%edx
    132 	movl	%esi,36(%esp)
    133 	roll	$16,%edx
    134 	movl	%ebp,20(%esp)
    135 	addl	%edx,%ecx
    136 	movl	44(%esp),%esi
    137 	xorl	%ecx,%ebx
    138 	movl	60(%esp),%edi
    139 	roll	$12,%ebx
    140 	movl	28(%esp),%ebp
    141 	addl	%ebx,%eax
    142 	xorl	%eax,%edx
    143 	movl	%eax,8(%esp)
    144 	roll	$8,%edx
    145 	movl	12(%esp),%eax
    146 	addl	%edx,%ecx
    147 	movl	%edx,56(%esp)
    148 	xorl	%ecx,%ebx
    149 	addl	%ebp,%eax
    150 	roll	$7,%ebx
    151 	xorl	%eax,%edi
    152 	roll	$16,%edi
    153 	movl	%ebx,24(%esp)
    154 	addl	%edi,%esi
    155 	xorl	%esi,%ebp
    156 	roll	$12,%ebp
    157 	movl	20(%esp),%ebx
    158 	addl	%ebp,%eax
    159 	xorl	%eax,%edi
    160 	movl	%eax,12(%esp)
    161 	roll	$8,%edi
    162 	movl	(%esp),%eax
    163 	addl	%edi,%esi
    164 	movl	%edi,%edx
    165 	xorl	%esi,%ebp
    166 	addl	%ebx,%eax
    167 	roll	$7,%ebp
    168 	xorl	%eax,%edx
    169 	roll	$16,%edx
    170 	movl	%ebp,28(%esp)
    171 	addl	%edx,%ecx
    172 	xorl	%ecx,%ebx
    173 	movl	48(%esp),%edi
    174 	roll	$12,%ebx
    175 	movl	24(%esp),%ebp
    176 	addl	%ebx,%eax
    177 	xorl	%eax,%edx
    178 	movl	%eax,(%esp)
    179 	roll	$8,%edx
    180 	movl	4(%esp),%eax
    181 	addl	%edx,%ecx
    182 	movl	%edx,60(%esp)
    183 	xorl	%ecx,%ebx
    184 	addl	%ebp,%eax
    185 	roll	$7,%ebx
    186 	xorl	%eax,%edi
    187 	movl	%ecx,40(%esp)
    188 	roll	$16,%edi
    189 	movl	%ebx,20(%esp)
    190 	addl	%edi,%esi
    191 	movl	32(%esp),%ecx
    192 	xorl	%esi,%ebp
    193 	movl	52(%esp),%edx
    194 	roll	$12,%ebp
    195 	movl	28(%esp),%ebx
    196 	addl	%ebp,%eax
    197 	xorl	%eax,%edi
    198 	movl	%eax,4(%esp)
    199 	roll	$8,%edi
    200 	movl	8(%esp),%eax
    201 	addl	%edi,%esi
    202 	movl	%edi,48(%esp)
    203 	xorl	%esi,%ebp
    204 	addl	%ebx,%eax
    205 	roll	$7,%ebp
    206 	xorl	%eax,%edx
    207 	movl	%esi,44(%esp)
    208 	roll	$16,%edx
    209 	movl	%ebp,24(%esp)
    210 	addl	%edx,%ecx
    211 	movl	36(%esp),%esi
    212 	xorl	%ecx,%ebx
    213 	movl	56(%esp),%edi
    214 	roll	$12,%ebx
    215 	movl	16(%esp),%ebp
    216 	addl	%ebx,%eax
    217 	xorl	%eax,%edx
    218 	movl	%eax,8(%esp)
    219 	roll	$8,%edx
    220 	movl	12(%esp),%eax
    221 	addl	%edx,%ecx
    222 	movl	%edx,52(%esp)
    223 	xorl	%ecx,%ebx
    224 	addl	%ebp,%eax
    225 	roll	$7,%ebx
    226 	xorl	%eax,%edi
    227 	roll	$16,%edi
    228 	movl	%ebx,28(%esp)
    229 	addl	%edi,%esi
    230 	xorl	%esi,%ebp
    231 	movl	48(%esp),%edx
    232 	roll	$12,%ebp
    233 	movl	128(%esp),%ebx
    234 	addl	%ebp,%eax
    235 	xorl	%eax,%edi
    236 	movl	%eax,12(%esp)
    237 	roll	$8,%edi
    238 	movl	(%esp),%eax
    239 	addl	%edi,%esi
    240 	movl	%edi,56(%esp)
    241 	xorl	%esi,%ebp
    242 	roll	$7,%ebp
    243 	decl	%ebx
    244 	jnz	.L004loop
    245 	movl	160(%esp),%ebx
    246 	addl	$1634760805,%eax
    247 	addl	80(%esp),%ebp
    248 	addl	96(%esp),%ecx
    249 	addl	100(%esp),%esi
    250 	cmpl	$64,%ebx
    251 	jb	.L005tail
    252 	movl	156(%esp),%ebx
    253 	addl	112(%esp),%edx
    254 	addl	120(%esp),%edi
    255 	xorl	(%ebx),%eax
    256 	xorl	16(%ebx),%ebp
    257 	movl	%eax,(%esp)
    258 	movl	152(%esp),%eax
    259 	xorl	32(%ebx),%ecx
    260 	xorl	36(%ebx),%esi
    261 	xorl	48(%ebx),%edx
    262 	xorl	56(%ebx),%edi
    263 	movl	%ebp,16(%eax)
    264 	movl	%ecx,32(%eax)
    265 	movl	%esi,36(%eax)
    266 	movl	%edx,48(%eax)
    267 	movl	%edi,56(%eax)
    268 	movl	4(%esp),%ebp
    269 	movl	8(%esp),%ecx
    270 	movl	12(%esp),%esi
    271 	movl	20(%esp),%edx
    272 	movl	24(%esp),%edi
    273 	addl	$857760878,%ebp
    274 	addl	$2036477234,%ecx
    275 	addl	$1797285236,%esi
    276 	addl	84(%esp),%edx
    277 	addl	88(%esp),%edi
    278 	xorl	4(%ebx),%ebp
    279 	xorl	8(%ebx),%ecx
    280 	xorl	12(%ebx),%esi
    281 	xorl	20(%ebx),%edx
    282 	xorl	24(%ebx),%edi
    283 	movl	%ebp,4(%eax)
    284 	movl	%ecx,8(%eax)
    285 	movl	%esi,12(%eax)
    286 	movl	%edx,20(%eax)
    287 	movl	%edi,24(%eax)
    288 	movl	28(%esp),%ebp
    289 	movl	40(%esp),%ecx
    290 	movl	44(%esp),%esi
    291 	movl	52(%esp),%edx
    292 	movl	60(%esp),%edi
    293 	addl	92(%esp),%ebp
    294 	addl	104(%esp),%ecx
    295 	addl	108(%esp),%esi
    296 	addl	116(%esp),%edx
    297 	addl	124(%esp),%edi
    298 	xorl	28(%ebx),%ebp
    299 	xorl	40(%ebx),%ecx
    300 	xorl	44(%ebx),%esi
    301 	xorl	52(%ebx),%edx
    302 	xorl	60(%ebx),%edi
    303 	leal	64(%ebx),%ebx
    304 	movl	%ebp,28(%eax)
    305 	movl	(%esp),%ebp
    306 	movl	%ecx,40(%eax)
    307 	movl	160(%esp),%ecx
    308 	movl	%esi,44(%eax)
    309 	movl	%edx,52(%eax)
    310 	movl	%edi,60(%eax)
    311 	movl	%ebp,(%eax)
    312 	leal	64(%eax),%eax
    313 	subl	$64,%ecx
    314 	jnz	.L003outer_loop
    315 	jmp	.L006done
    316 .L005tail:
    317 	addl	112(%esp),%edx
    318 	addl	120(%esp),%edi
    319 	movl	%eax,(%esp)
    320 	movl	%ebp,16(%esp)
    321 	movl	%ecx,32(%esp)
    322 	movl	%esi,36(%esp)
    323 	movl	%edx,48(%esp)
    324 	movl	%edi,56(%esp)
    325 	movl	4(%esp),%ebp
    326 	movl	8(%esp),%ecx
    327 	movl	12(%esp),%esi
    328 	movl	20(%esp),%edx
    329 	movl	24(%esp),%edi
    330 	addl	$857760878,%ebp
    331 	addl	$2036477234,%ecx
    332 	addl	$1797285236,%esi
    333 	addl	84(%esp),%edx
    334 	addl	88(%esp),%edi
    335 	movl	%ebp,4(%esp)
    336 	movl	%ecx,8(%esp)
    337 	movl	%esi,12(%esp)
    338 	movl	%edx,20(%esp)
    339 	movl	%edi,24(%esp)
    340 	movl	28(%esp),%ebp
    341 	movl	40(%esp),%ecx
    342 	movl	44(%esp),%esi
    343 	movl	52(%esp),%edx
    344 	movl	60(%esp),%edi
    345 	addl	92(%esp),%ebp
    346 	addl	104(%esp),%ecx
    347 	addl	108(%esp),%esi
    348 	addl	116(%esp),%edx
    349 	addl	124(%esp),%edi
    350 	movl	%ebp,28(%esp)
    351 	movl	156(%esp),%ebp
    352 	movl	%ecx,40(%esp)
    353 	movl	152(%esp),%ecx
    354 	movl	%esi,44(%esp)
    355 	xorl	%esi,%esi
    356 	movl	%edx,52(%esp)
    357 	movl	%edi,60(%esp)
    358 	xorl	%eax,%eax
    359 	xorl	%edx,%edx
    360 .L007tail_loop:
    361 	movb	(%esi,%ebp,1),%al
    362 	movb	(%esp,%esi,1),%dl
    363 	leal	1(%esi),%esi
    364 	xorb	%dl,%al
    365 	movb	%al,-1(%ecx,%esi,1)
    366 	decl	%ebx
    367 	jnz	.L007tail_loop
    368 .L006done:
    369 	addl	$132,%esp
    370 .L000no_data:
    371 	popl	%edi
    372 	popl	%esi
    373 	popl	%ebx
    374 	popl	%ebp
    375 	ret
    376 .size	ChaCha20_ctr32,.-.L_ChaCha20_ctr32_begin
    377 .globl	ChaCha20_ssse3
    378 .hidden	ChaCha20_ssse3
    379 .type	ChaCha20_ssse3,@function
    380 .align	16
    381 ChaCha20_ssse3:
    382 .L_ChaCha20_ssse3_begin:
    383 	pushl	%ebp
    384 	pushl	%ebx
    385 	pushl	%esi
    386 	pushl	%edi
    387 .Lssse3_shortcut:
    388 	movl	20(%esp),%edi
    389 	movl	24(%esp),%esi
    390 	movl	28(%esp),%ecx
    391 	movl	32(%esp),%edx
    392 	movl	36(%esp),%ebx
    393 	movl	%esp,%ebp
    394 	subl	$524,%esp
    395 	andl	$-64,%esp
    396 	movl	%ebp,512(%esp)
    397 	leal	.Lssse3_data-.Lpic_point(%eax),%eax
    398 	movdqu	(%ebx),%xmm3
    399 	cmpl	$256,%ecx
    400 	jb	.L0081x
    401 	movl	%edx,516(%esp)
    402 	movl	%ebx,520(%esp)
    403 	subl	$256,%ecx
    404 	leal	384(%esp),%ebp
    405 	movdqu	(%edx),%xmm7
    406 	pshufd	$0,%xmm3,%xmm0
    407 	pshufd	$85,%xmm3,%xmm1
    408 	pshufd	$170,%xmm3,%xmm2
    409 	pshufd	$255,%xmm3,%xmm3
    410 	paddd	48(%eax),%xmm0
    411 	pshufd	$0,%xmm7,%xmm4
    412 	pshufd	$85,%xmm7,%xmm5
    413 	psubd	64(%eax),%xmm0
    414 	pshufd	$170,%xmm7,%xmm6
    415 	pshufd	$255,%xmm7,%xmm7
    416 	movdqa	%xmm0,64(%ebp)
    417 	movdqa	%xmm1,80(%ebp)
    418 	movdqa	%xmm2,96(%ebp)
    419 	movdqa	%xmm3,112(%ebp)
    420 	movdqu	16(%edx),%xmm3
    421 	movdqa	%xmm4,-64(%ebp)
    422 	movdqa	%xmm5,-48(%ebp)
    423 	movdqa	%xmm6,-32(%ebp)
    424 	movdqa	%xmm7,-16(%ebp)
    425 	movdqa	32(%eax),%xmm7
    426 	leal	128(%esp),%ebx
    427 	pshufd	$0,%xmm3,%xmm0
    428 	pshufd	$85,%xmm3,%xmm1
    429 	pshufd	$170,%xmm3,%xmm2
    430 	pshufd	$255,%xmm3,%xmm3
    431 	pshufd	$0,%xmm7,%xmm4
    432 	pshufd	$85,%xmm7,%xmm5
    433 	pshufd	$170,%xmm7,%xmm6
    434 	pshufd	$255,%xmm7,%xmm7
    435 	movdqa	%xmm0,(%ebp)
    436 	movdqa	%xmm1,16(%ebp)
    437 	movdqa	%xmm2,32(%ebp)
    438 	movdqa	%xmm3,48(%ebp)
    439 	movdqa	%xmm4,-128(%ebp)
    440 	movdqa	%xmm5,-112(%ebp)
    441 	movdqa	%xmm6,-96(%ebp)
    442 	movdqa	%xmm7,-80(%ebp)
    443 	leal	128(%esi),%esi
    444 	leal	128(%edi),%edi
    445 	jmp	.L009outer_loop
    446 .align	16
    447 .L009outer_loop:
    448 	movdqa	-112(%ebp),%xmm1
    449 	movdqa	-96(%ebp),%xmm2
    450 	movdqa	-80(%ebp),%xmm3
    451 	movdqa	-48(%ebp),%xmm5
    452 	movdqa	-32(%ebp),%xmm6
    453 	movdqa	-16(%ebp),%xmm7
    454 	movdqa	%xmm1,-112(%ebx)
    455 	movdqa	%xmm2,-96(%ebx)
    456 	movdqa	%xmm3,-80(%ebx)
    457 	movdqa	%xmm5,-48(%ebx)
    458 	movdqa	%xmm6,-32(%ebx)
    459 	movdqa	%xmm7,-16(%ebx)
    460 	movdqa	32(%ebp),%xmm2
    461 	movdqa	48(%ebp),%xmm3
    462 	movdqa	64(%ebp),%xmm4
    463 	movdqa	80(%ebp),%xmm5
    464 	movdqa	96(%ebp),%xmm6
    465 	movdqa	112(%ebp),%xmm7
    466 	paddd	64(%eax),%xmm4
    467 	movdqa	%xmm2,32(%ebx)
    468 	movdqa	%xmm3,48(%ebx)
    469 	movdqa	%xmm4,64(%ebx)
    470 	movdqa	%xmm5,80(%ebx)
    471 	movdqa	%xmm6,96(%ebx)
    472 	movdqa	%xmm7,112(%ebx)
    473 	movdqa	%xmm4,64(%ebp)
    474 	movdqa	-128(%ebp),%xmm0
    475 	movdqa	%xmm4,%xmm6
    476 	movdqa	-64(%ebp),%xmm3
    477 	movdqa	(%ebp),%xmm4
    478 	movdqa	16(%ebp),%xmm5
    479 	movl	$10,%edx
    480 	nop
    481 .align	16
    482 .L010loop:
    483 	paddd	%xmm3,%xmm0
    484 	movdqa	%xmm3,%xmm2
    485 	pxor	%xmm0,%xmm6
    486 	pshufb	(%eax),%xmm6
    487 	paddd	%xmm6,%xmm4
    488 	pxor	%xmm4,%xmm2
    489 	movdqa	-48(%ebx),%xmm3
    490 	movdqa	%xmm2,%xmm1
    491 	pslld	$12,%xmm2
    492 	psrld	$20,%xmm1
    493 	por	%xmm1,%xmm2
    494 	movdqa	-112(%ebx),%xmm1
    495 	paddd	%xmm2,%xmm0
    496 	movdqa	80(%ebx),%xmm7
    497 	pxor	%xmm0,%xmm6
    498 	movdqa	%xmm0,-128(%ebx)
    499 	pshufb	16(%eax),%xmm6
    500 	paddd	%xmm6,%xmm4
    501 	movdqa	%xmm6,64(%ebx)
    502 	pxor	%xmm4,%xmm2
    503 	paddd	%xmm3,%xmm1
    504 	movdqa	%xmm2,%xmm0
    505 	pslld	$7,%xmm2
    506 	psrld	$25,%xmm0
    507 	pxor	%xmm1,%xmm7
    508 	por	%xmm0,%xmm2
    509 	movdqa	%xmm4,(%ebx)
    510 	pshufb	(%eax),%xmm7
    511 	movdqa	%xmm2,-64(%ebx)
    512 	paddd	%xmm7,%xmm5
    513 	movdqa	32(%ebx),%xmm4
    514 	pxor	%xmm5,%xmm3
    515 	movdqa	-32(%ebx),%xmm2
    516 	movdqa	%xmm3,%xmm0
    517 	pslld	$12,%xmm3
    518 	psrld	$20,%xmm0
    519 	por	%xmm0,%xmm3
    520 	movdqa	-96(%ebx),%xmm0
    521 	paddd	%xmm3,%xmm1
    522 	movdqa	96(%ebx),%xmm6
    523 	pxor	%xmm1,%xmm7
    524 	movdqa	%xmm1,-112(%ebx)
    525 	pshufb	16(%eax),%xmm7
    526 	paddd	%xmm7,%xmm5
    527 	movdqa	%xmm7,80(%ebx)
    528 	pxor	%xmm5,%xmm3
    529 	paddd	%xmm2,%xmm0
    530 	movdqa	%xmm3,%xmm1
    531 	pslld	$7,%xmm3
    532 	psrld	$25,%xmm1
    533 	pxor	%xmm0,%xmm6
    534 	por	%xmm1,%xmm3
    535 	movdqa	%xmm5,16(%ebx)
    536 	pshufb	(%eax),%xmm6
    537 	movdqa	%xmm3,-48(%ebx)
    538 	paddd	%xmm6,%xmm4
    539 	movdqa	48(%ebx),%xmm5
    540 	pxor	%xmm4,%xmm2
    541 	movdqa	-16(%ebx),%xmm3
    542 	movdqa	%xmm2,%xmm1
    543 	pslld	$12,%xmm2
    544 	psrld	$20,%xmm1
    545 	por	%xmm1,%xmm2
    546 	movdqa	-80(%ebx),%xmm1
    547 	paddd	%xmm2,%xmm0
    548 	movdqa	112(%ebx),%xmm7
    549 	pxor	%xmm0,%xmm6
    550 	movdqa	%xmm0,-96(%ebx)
    551 	pshufb	16(%eax),%xmm6
    552 	paddd	%xmm6,%xmm4
    553 	movdqa	%xmm6,96(%ebx)
    554 	pxor	%xmm4,%xmm2
    555 	paddd	%xmm3,%xmm1
    556 	movdqa	%xmm2,%xmm0
    557 	pslld	$7,%xmm2
    558 	psrld	$25,%xmm0
    559 	pxor	%xmm1,%xmm7
    560 	por	%xmm0,%xmm2
    561 	pshufb	(%eax),%xmm7
    562 	movdqa	%xmm2,-32(%ebx)
    563 	paddd	%xmm7,%xmm5
    564 	pxor	%xmm5,%xmm3
    565 	movdqa	-48(%ebx),%xmm2
    566 	movdqa	%xmm3,%xmm0
    567 	pslld	$12,%xmm3
    568 	psrld	$20,%xmm0
    569 	por	%xmm0,%xmm3
    570 	movdqa	-128(%ebx),%xmm0
    571 	paddd	%xmm3,%xmm1
    572 	pxor	%xmm1,%xmm7
    573 	movdqa	%xmm1,-80(%ebx)
    574 	pshufb	16(%eax),%xmm7
    575 	paddd	%xmm7,%xmm5
    576 	movdqa	%xmm7,%xmm6
    577 	pxor	%xmm5,%xmm3
    578 	paddd	%xmm2,%xmm0
    579 	movdqa	%xmm3,%xmm1
    580 	pslld	$7,%xmm3
    581 	psrld	$25,%xmm1
    582 	pxor	%xmm0,%xmm6
    583 	por	%xmm1,%xmm3
    584 	pshufb	(%eax),%xmm6
    585 	movdqa	%xmm3,-16(%ebx)
    586 	paddd	%xmm6,%xmm4
    587 	pxor	%xmm4,%xmm2
    588 	movdqa	-32(%ebx),%xmm3
    589 	movdqa	%xmm2,%xmm1
    590 	pslld	$12,%xmm2
    591 	psrld	$20,%xmm1
    592 	por	%xmm1,%xmm2
    593 	movdqa	-112(%ebx),%xmm1
    594 	paddd	%xmm2,%xmm0
    595 	movdqa	64(%ebx),%xmm7
    596 	pxor	%xmm0,%xmm6
    597 	movdqa	%xmm0,-128(%ebx)
    598 	pshufb	16(%eax),%xmm6
    599 	paddd	%xmm6,%xmm4
    600 	movdqa	%xmm6,112(%ebx)
    601 	pxor	%xmm4,%xmm2
    602 	paddd	%xmm3,%xmm1
    603 	movdqa	%xmm2,%xmm0
    604 	pslld	$7,%xmm2
    605 	psrld	$25,%xmm0
    606 	pxor	%xmm1,%xmm7
    607 	por	%xmm0,%xmm2
    608 	movdqa	%xmm4,32(%ebx)
    609 	pshufb	(%eax),%xmm7
    610 	movdqa	%xmm2,-48(%ebx)
    611 	paddd	%xmm7,%xmm5
    612 	movdqa	(%ebx),%xmm4
    613 	pxor	%xmm5,%xmm3
    614 	movdqa	-16(%ebx),%xmm2
    615 	movdqa	%xmm3,%xmm0
    616 	pslld	$12,%xmm3
    617 	psrld	$20,%xmm0
    618 	por	%xmm0,%xmm3
    619 	movdqa	-96(%ebx),%xmm0
    620 	paddd	%xmm3,%xmm1
    621 	movdqa	80(%ebx),%xmm6
    622 	pxor	%xmm1,%xmm7
    623 	movdqa	%xmm1,-112(%ebx)
    624 	pshufb	16(%eax),%xmm7
    625 	paddd	%xmm7,%xmm5
    626 	movdqa	%xmm7,64(%ebx)
    627 	pxor	%xmm5,%xmm3
    628 	paddd	%xmm2,%xmm0
    629 	movdqa	%xmm3,%xmm1
    630 	pslld	$7,%xmm3
    631 	psrld	$25,%xmm1
    632 	pxor	%xmm0,%xmm6
    633 	por	%xmm1,%xmm3
    634 	movdqa	%xmm5,48(%ebx)
    635 	pshufb	(%eax),%xmm6
    636 	movdqa	%xmm3,-32(%ebx)
    637 	paddd	%xmm6,%xmm4
    638 	movdqa	16(%ebx),%xmm5
    639 	pxor	%xmm4,%xmm2
    640 	movdqa	-64(%ebx),%xmm3
    641 	movdqa	%xmm2,%xmm1
    642 	pslld	$12,%xmm2
    643 	psrld	$20,%xmm1
    644 	por	%xmm1,%xmm2
    645 	movdqa	-80(%ebx),%xmm1
    646 	paddd	%xmm2,%xmm0
    647 	movdqa	96(%ebx),%xmm7
    648 	pxor	%xmm0,%xmm6
    649 	movdqa	%xmm0,-96(%ebx)
    650 	pshufb	16(%eax),%xmm6
    651 	paddd	%xmm6,%xmm4
    652 	movdqa	%xmm6,80(%ebx)
    653 	pxor	%xmm4,%xmm2
    654 	paddd	%xmm3,%xmm1
    655 	movdqa	%xmm2,%xmm0
    656 	pslld	$7,%xmm2
    657 	psrld	$25,%xmm0
    658 	pxor	%xmm1,%xmm7
    659 	por	%xmm0,%xmm2
    660 	pshufb	(%eax),%xmm7
    661 	movdqa	%xmm2,-16(%ebx)
    662 	paddd	%xmm7,%xmm5
    663 	pxor	%xmm5,%xmm3
    664 	movdqa	%xmm3,%xmm0
    665 	pslld	$12,%xmm3
    666 	psrld	$20,%xmm0
    667 	por	%xmm0,%xmm3
    668 	movdqa	-128(%ebx),%xmm0
    669 	paddd	%xmm3,%xmm1
    670 	movdqa	64(%ebx),%xmm6
    671 	pxor	%xmm1,%xmm7
    672 	movdqa	%xmm1,-80(%ebx)
    673 	pshufb	16(%eax),%xmm7
    674 	paddd	%xmm7,%xmm5
    675 	movdqa	%xmm7,96(%ebx)
    676 	pxor	%xmm5,%xmm3
    677 	movdqa	%xmm3,%xmm1
    678 	pslld	$7,%xmm3
    679 	psrld	$25,%xmm1
    680 	por	%xmm1,%xmm3
    681 	decl	%edx
    682 	jnz	.L010loop
    683 	movdqa	%xmm3,-64(%ebx)
    684 	movdqa	%xmm4,(%ebx)
    685 	movdqa	%xmm5,16(%ebx)
    686 	movdqa	%xmm6,64(%ebx)
    687 	movdqa	%xmm7,96(%ebx)
    688 	movdqa	-112(%ebx),%xmm1
    689 	movdqa	-96(%ebx),%xmm2
    690 	movdqa	-80(%ebx),%xmm3
    691 	paddd	-128(%ebp),%xmm0
    692 	paddd	-112(%ebp),%xmm1
    693 	paddd	-96(%ebp),%xmm2
    694 	paddd	-80(%ebp),%xmm3
    695 	movdqa	%xmm0,%xmm6
    696 	punpckldq	%xmm1,%xmm0
    697 	movdqa	%xmm2,%xmm7
    698 	punpckldq	%xmm3,%xmm2
    699 	punpckhdq	%xmm1,%xmm6
    700 	punpckhdq	%xmm3,%xmm7
    701 	movdqa	%xmm0,%xmm1
    702 	punpcklqdq	%xmm2,%xmm0
    703 	movdqa	%xmm6,%xmm3
    704 	punpcklqdq	%xmm7,%xmm6
    705 	punpckhqdq	%xmm2,%xmm1
    706 	punpckhqdq	%xmm7,%xmm3
    707 	movdqu	-128(%esi),%xmm4
    708 	movdqu	-64(%esi),%xmm5
    709 	movdqu	(%esi),%xmm2
    710 	movdqu	64(%esi),%xmm7
    711 	leal	16(%esi),%esi
    712 	pxor	%xmm0,%xmm4
    713 	movdqa	-64(%ebx),%xmm0
    714 	pxor	%xmm1,%xmm5
    715 	movdqa	-48(%ebx),%xmm1
    716 	pxor	%xmm2,%xmm6
    717 	movdqa	-32(%ebx),%xmm2
    718 	pxor	%xmm3,%xmm7
    719 	movdqa	-16(%ebx),%xmm3
    720 	movdqu	%xmm4,-128(%edi)
    721 	movdqu	%xmm5,-64(%edi)
    722 	movdqu	%xmm6,(%edi)
    723 	movdqu	%xmm7,64(%edi)
    724 	leal	16(%edi),%edi
    725 	paddd	-64(%ebp),%xmm0
    726 	paddd	-48(%ebp),%xmm1
    727 	paddd	-32(%ebp),%xmm2
    728 	paddd	-16(%ebp),%xmm3
    729 	movdqa	%xmm0,%xmm6
    730 	punpckldq	%xmm1,%xmm0
    731 	movdqa	%xmm2,%xmm7
    732 	punpckldq	%xmm3,%xmm2
    733 	punpckhdq	%xmm1,%xmm6
    734 	punpckhdq	%xmm3,%xmm7
    735 	movdqa	%xmm0,%xmm1
    736 	punpcklqdq	%xmm2,%xmm0
    737 	movdqa	%xmm6,%xmm3
    738 	punpcklqdq	%xmm7,%xmm6
    739 	punpckhqdq	%xmm2,%xmm1
    740 	punpckhqdq	%xmm7,%xmm3
    741 	movdqu	-128(%esi),%xmm4
    742 	movdqu	-64(%esi),%xmm5
    743 	movdqu	(%esi),%xmm2
    744 	movdqu	64(%esi),%xmm7
    745 	leal	16(%esi),%esi
    746 	pxor	%xmm0,%xmm4
    747 	movdqa	(%ebx),%xmm0
    748 	pxor	%xmm1,%xmm5
    749 	movdqa	16(%ebx),%xmm1
    750 	pxor	%xmm2,%xmm6
    751 	movdqa	32(%ebx),%xmm2
    752 	pxor	%xmm3,%xmm7
    753 	movdqa	48(%ebx),%xmm3
    754 	movdqu	%xmm4,-128(%edi)
    755 	movdqu	%xmm5,-64(%edi)
    756 	movdqu	%xmm6,(%edi)
    757 	movdqu	%xmm7,64(%edi)
    758 	leal	16(%edi),%edi
    759 	paddd	(%ebp),%xmm0
    760 	paddd	16(%ebp),%xmm1
    761 	paddd	32(%ebp),%xmm2
    762 	paddd	48(%ebp),%xmm3
    763 	movdqa	%xmm0,%xmm6
    764 	punpckldq	%xmm1,%xmm0
    765 	movdqa	%xmm2,%xmm7
    766 	punpckldq	%xmm3,%xmm2
    767 	punpckhdq	%xmm1,%xmm6
    768 	punpckhdq	%xmm3,%xmm7
    769 	movdqa	%xmm0,%xmm1
    770 	punpcklqdq	%xmm2,%xmm0
    771 	movdqa	%xmm6,%xmm3
    772 	punpcklqdq	%xmm7,%xmm6
    773 	punpckhqdq	%xmm2,%xmm1
    774 	punpckhqdq	%xmm7,%xmm3
    775 	movdqu	-128(%esi),%xmm4
    776 	movdqu	-64(%esi),%xmm5
    777 	movdqu	(%esi),%xmm2
    778 	movdqu	64(%esi),%xmm7
    779 	leal	16(%esi),%esi
    780 	pxor	%xmm0,%xmm4
    781 	movdqa	64(%ebx),%xmm0
    782 	pxor	%xmm1,%xmm5
    783 	movdqa	80(%ebx),%xmm1
    784 	pxor	%xmm2,%xmm6
    785 	movdqa	96(%ebx),%xmm2
    786 	pxor	%xmm3,%xmm7
    787 	movdqa	112(%ebx),%xmm3
    788 	movdqu	%xmm4,-128(%edi)
    789 	movdqu	%xmm5,-64(%edi)
    790 	movdqu	%xmm6,(%edi)
    791 	movdqu	%xmm7,64(%edi)
    792 	leal	16(%edi),%edi
    793 	paddd	64(%ebp),%xmm0
    794 	paddd	80(%ebp),%xmm1
    795 	paddd	96(%ebp),%xmm2
    796 	paddd	112(%ebp),%xmm3
    797 	movdqa	%xmm0,%xmm6
    798 	punpckldq	%xmm1,%xmm0
    799 	movdqa	%xmm2,%xmm7
    800 	punpckldq	%xmm3,%xmm2
    801 	punpckhdq	%xmm1,%xmm6
    802 	punpckhdq	%xmm3,%xmm7
    803 	movdqa	%xmm0,%xmm1
    804 	punpcklqdq	%xmm2,%xmm0
    805 	movdqa	%xmm6,%xmm3
    806 	punpcklqdq	%xmm7,%xmm6
    807 	punpckhqdq	%xmm2,%xmm1
    808 	punpckhqdq	%xmm7,%xmm3
    809 	movdqu	-128(%esi),%xmm4
    810 	movdqu	-64(%esi),%xmm5
    811 	movdqu	(%esi),%xmm2
    812 	movdqu	64(%esi),%xmm7
    813 	leal	208(%esi),%esi
    814 	pxor	%xmm0,%xmm4
    815 	pxor	%xmm1,%xmm5
    816 	pxor	%xmm2,%xmm6
    817 	pxor	%xmm3,%xmm7
    818 	movdqu	%xmm4,-128(%edi)
    819 	movdqu	%xmm5,-64(%edi)
    820 	movdqu	%xmm6,(%edi)
    821 	movdqu	%xmm7,64(%edi)
    822 	leal	208(%edi),%edi
    823 	subl	$256,%ecx
    824 	jnc	.L009outer_loop
    825 	addl	$256,%ecx
    826 	jz	.L011done
    827 	movl	520(%esp),%ebx
    828 	leal	-128(%esi),%esi
    829 	movl	516(%esp),%edx
    830 	leal	-128(%edi),%edi
    831 	movd	64(%ebp),%xmm2
    832 	movdqu	(%ebx),%xmm3
    833 	paddd	96(%eax),%xmm2
    834 	pand	112(%eax),%xmm3
    835 	por	%xmm2,%xmm3
    836 .L0081x:
    837 	movdqa	32(%eax),%xmm0
    838 	movdqu	(%edx),%xmm1
    839 	movdqu	16(%edx),%xmm2
    840 	movdqa	(%eax),%xmm6
    841 	movdqa	16(%eax),%xmm7
    842 	movl	%ebp,48(%esp)
    843 	movdqa	%xmm0,(%esp)
    844 	movdqa	%xmm1,16(%esp)
    845 	movdqa	%xmm2,32(%esp)
    846 	movdqa	%xmm3,48(%esp)
    847 	movl	$10,%edx
    848 	jmp	.L012loop1x
    849 .align	16
    850 .L013outer1x:
    851 	movdqa	80(%eax),%xmm3
    852 	movdqa	(%esp),%xmm0
    853 	movdqa	16(%esp),%xmm1
    854 	movdqa	32(%esp),%xmm2
    855 	paddd	48(%esp),%xmm3
    856 	movl	$10,%edx
    857 	movdqa	%xmm3,48(%esp)
    858 	jmp	.L012loop1x
    859 .align	16
    860 .L012loop1x:
    861 	paddd	%xmm1,%xmm0
    862 	pxor	%xmm0,%xmm3
    863 .byte	102,15,56,0,222
    864 	paddd	%xmm3,%xmm2
    865 	pxor	%xmm2,%xmm1
    866 	movdqa	%xmm1,%xmm4
    867 	psrld	$20,%xmm1
    868 	pslld	$12,%xmm4
    869 	por	%xmm4,%xmm1
    870 	paddd	%xmm1,%xmm0
    871 	pxor	%xmm0,%xmm3
    872 .byte	102,15,56,0,223
    873 	paddd	%xmm3,%xmm2
    874 	pxor	%xmm2,%xmm1
    875 	movdqa	%xmm1,%xmm4
    876 	psrld	$25,%xmm1
    877 	pslld	$7,%xmm4
    878 	por	%xmm4,%xmm1
    879 	pshufd	$78,%xmm2,%xmm2
    880 	pshufd	$57,%xmm1,%xmm1
    881 	pshufd	$147,%xmm3,%xmm3
    882 	nop
    883 	paddd	%xmm1,%xmm0
    884 	pxor	%xmm0,%xmm3
    885 .byte	102,15,56,0,222
    886 	paddd	%xmm3,%xmm2
    887 	pxor	%xmm2,%xmm1
    888 	movdqa	%xmm1,%xmm4
    889 	psrld	$20,%xmm1
    890 	pslld	$12,%xmm4
    891 	por	%xmm4,%xmm1
    892 	paddd	%xmm1,%xmm0
    893 	pxor	%xmm0,%xmm3
    894 .byte	102,15,56,0,223
    895 	paddd	%xmm3,%xmm2
    896 	pxor	%xmm2,%xmm1
    897 	movdqa	%xmm1,%xmm4
    898 	psrld	$25,%xmm1
    899 	pslld	$7,%xmm4
    900 	por	%xmm4,%xmm1
    901 	pshufd	$78,%xmm2,%xmm2
    902 	pshufd	$147,%xmm1,%xmm1
    903 	pshufd	$57,%xmm3,%xmm3
    904 	decl	%edx
    905 	jnz	.L012loop1x
    906 	paddd	(%esp),%xmm0
    907 	paddd	16(%esp),%xmm1
    908 	paddd	32(%esp),%xmm2
    909 	paddd	48(%esp),%xmm3
    910 	cmpl	$64,%ecx
    911 	jb	.L014tail
    912 	movdqu	(%esi),%xmm4
    913 	movdqu	16(%esi),%xmm5
    914 	pxor	%xmm4,%xmm0
    915 	movdqu	32(%esi),%xmm4
    916 	pxor	%xmm5,%xmm1
    917 	movdqu	48(%esi),%xmm5
    918 	pxor	%xmm4,%xmm2
    919 	pxor	%xmm5,%xmm3
    920 	leal	64(%esi),%esi
    921 	movdqu	%xmm0,(%edi)
    922 	movdqu	%xmm1,16(%edi)
    923 	movdqu	%xmm2,32(%edi)
    924 	movdqu	%xmm3,48(%edi)
    925 	leal	64(%edi),%edi
    926 	subl	$64,%ecx
    927 	jnz	.L013outer1x
    928 	jmp	.L011done
    929 .L014tail:
    930 	movdqa	%xmm0,(%esp)
    931 	movdqa	%xmm1,16(%esp)
    932 	movdqa	%xmm2,32(%esp)
    933 	movdqa	%xmm3,48(%esp)
    934 	xorl	%eax,%eax
    935 	xorl	%edx,%edx
    936 	xorl	%ebp,%ebp
    937 .L015tail_loop:
    938 	movb	(%esp,%ebp,1),%al
    939 	movb	(%esi,%ebp,1),%dl
    940 	leal	1(%ebp),%ebp
    941 	xorb	%dl,%al
    942 	movb	%al,-1(%edi,%ebp,1)
    943 	decl	%ecx
    944 	jnz	.L015tail_loop
    945 .L011done:
    946 	movl	512(%esp),%esp
    947 	popl	%edi
    948 	popl	%esi
    949 	popl	%ebx
    950 	popl	%ebp
    951 	ret
    952 .size	ChaCha20_ssse3,.-.L_ChaCha20_ssse3_begin
    953 .align	64
    954 .Lssse3_data:
    955 .byte	2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
    956 .byte	3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
    957 .long	1634760805,857760878,2036477234,1797285236
    958 .long	0,1,2,3
    959 .long	4,4,4,4
    960 .long	1,0,0,0
    961 .long	4,0,0,0
    962 .long	0,-1,-1,-1
    963 .align	64
    964 .byte	67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54
    965 .byte	44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
    966 .byte	60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
    967 .byte	114,103,62,0
    968 #endif
    969