Home | History | Annotate | Download | only in cipher_extra
      1 #if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
      2 .text
      3 
      4 
      5 chacha20_poly1305_constants:
      6 
      7 .p2align	6
      8 .chacha20_consts:
      9 .byte	'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k'
     10 .byte	'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k'
     11 .rol8:
     12 .byte	3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14
     13 .byte	3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14
     14 .rol16:
     15 .byte	2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13
     16 .byte	2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13
     17 .avx2_init:
     18 .long	0,0,0,0
     19 .sse_inc:
     20 .long	1,0,0,0
     21 .avx2_inc:
     22 .long	2,0,0,0,2,0,0,0
     23 .clamp:
     24 .quad	0x0FFFFFFC0FFFFFFF, 0x0FFFFFFC0FFFFFFC
     25 .quad	0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF
     26 .p2align	4
     27 .and_masks:
     28 .byte	0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
     29 .byte	0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
     30 .byte	0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
     31 .byte	0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
     32 .byte	0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
     33 .byte	0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
     34 .byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
     35 .byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
     36 .byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00
     37 .byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00
     38 .byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00
     39 .byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00
     40 .byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00
     41 .byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00
     42 .byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00
     43 .byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
     44 
     45 
     46 .p2align	6
     47 poly_hash_ad_internal:
     48 
     49 	xorq	%r10,%r10
     50 	xorq	%r11,%r11
     51 	xorq	%r12,%r12
     52 	cmpq	$13,%r8
     53 	jne	hash_ad_loop
     54 poly_fast_tls_ad:
     55 
     56 	movq	(%rcx),%r10
     57 	movq	5(%rcx),%r11
     58 	shrq	$24,%r11
     59 	movq	$1,%r12
     60 	movq	0+0(%rbp),%rax
     61 	movq	%rax,%r15
     62 	mulq	%r10
     63 	movq	%rax,%r13
     64 	movq	%rdx,%r14
     65 	movq	0+0(%rbp),%rax
     66 	mulq	%r11
     67 	imulq	%r12,%r15
     68 	addq	%rax,%r14
     69 	adcq	%rdx,%r15
     70 	movq	8+0(%rbp),%rax
     71 	movq	%rax,%r9
     72 	mulq	%r10
     73 	addq	%rax,%r14
     74 	adcq	$0,%rdx
     75 	movq	%rdx,%r10
     76 	movq	8+0(%rbp),%rax
     77 	mulq	%r11
     78 	addq	%rax,%r15
     79 	adcq	$0,%rdx
     80 	imulq	%r12,%r9
     81 	addq	%r10,%r15
     82 	adcq	%rdx,%r9
     83 	movq	%r13,%r10
     84 	movq	%r14,%r11
     85 	movq	%r15,%r12
     86 	andq	$3,%r12
     87 	movq	%r15,%r13
     88 	andq	$-4,%r13
     89 	movq	%r9,%r14
     90 	shrdq	$2,%r9,%r15
     91 	shrq	$2,%r9
     92 	addq	%r13,%r10
     93 	adcq	%r14,%r11
     94 	adcq	$0,%r12
     95 	addq	%r15,%r10
     96 	adcq	%r9,%r11
     97 	adcq	$0,%r12
     98 
     99 	.byte	0xf3,0xc3
    100 hash_ad_loop:
    101 
    102 	cmpq	$16,%r8
    103 	jb	hash_ad_tail
    104 	addq	0(%rcx),%r10
    105 	adcq	8+0(%rcx),%r11
    106 	adcq	$1,%r12
    107 	movq	0+0(%rbp),%rax
    108 	movq	%rax,%r15
    109 	mulq	%r10
    110 	movq	%rax,%r13
    111 	movq	%rdx,%r14
    112 	movq	0+0(%rbp),%rax
    113 	mulq	%r11
    114 	imulq	%r12,%r15
    115 	addq	%rax,%r14
    116 	adcq	%rdx,%r15
    117 	movq	8+0(%rbp),%rax
    118 	movq	%rax,%r9
    119 	mulq	%r10
    120 	addq	%rax,%r14
    121 	adcq	$0,%rdx
    122 	movq	%rdx,%r10
    123 	movq	8+0(%rbp),%rax
    124 	mulq	%r11
    125 	addq	%rax,%r15
    126 	adcq	$0,%rdx
    127 	imulq	%r12,%r9
    128 	addq	%r10,%r15
    129 	adcq	%rdx,%r9
    130 	movq	%r13,%r10
    131 	movq	%r14,%r11
    132 	movq	%r15,%r12
    133 	andq	$3,%r12
    134 	movq	%r15,%r13
    135 	andq	$-4,%r13
    136 	movq	%r9,%r14
    137 	shrdq	$2,%r9,%r15
    138 	shrq	$2,%r9
    139 	addq	%r13,%r10
    140 	adcq	%r14,%r11
    141 	adcq	$0,%r12
    142 	addq	%r15,%r10
    143 	adcq	%r9,%r11
    144 	adcq	$0,%r12
    145 
    146 	leaq	16(%rcx),%rcx
    147 	subq	$16,%r8
    148 	jmp	hash_ad_loop
    149 hash_ad_tail:
    150 	cmpq	$0,%r8
    151 	je	1f
    152 
    153 	xorq	%r13,%r13
    154 	xorq	%r14,%r14
    155 	xorq	%r15,%r15
    156 	addq	%r8,%rcx
    157 hash_ad_tail_loop:
    158 	shldq	$8,%r13,%r14
    159 	shlq	$8,%r13
    160 	movzbq	-1(%rcx),%r15
    161 	xorq	%r15,%r13
    162 	decq	%rcx
    163 	decq	%r8
    164 	jne	hash_ad_tail_loop
    165 
    166 	addq	%r13,%r10
    167 	adcq	%r14,%r11
    168 	adcq	$1,%r12
    169 	movq	0+0(%rbp),%rax
    170 	movq	%rax,%r15
    171 	mulq	%r10
    172 	movq	%rax,%r13
    173 	movq	%rdx,%r14
    174 	movq	0+0(%rbp),%rax
    175 	mulq	%r11
    176 	imulq	%r12,%r15
    177 	addq	%rax,%r14
    178 	adcq	%rdx,%r15
    179 	movq	8+0(%rbp),%rax
    180 	movq	%rax,%r9
    181 	mulq	%r10
    182 	addq	%rax,%r14
    183 	adcq	$0,%rdx
    184 	movq	%rdx,%r10
    185 	movq	8+0(%rbp),%rax
    186 	mulq	%r11
    187 	addq	%rax,%r15
    188 	adcq	$0,%rdx
    189 	imulq	%r12,%r9
    190 	addq	%r10,%r15
    191 	adcq	%rdx,%r9
    192 	movq	%r13,%r10
    193 	movq	%r14,%r11
    194 	movq	%r15,%r12
    195 	andq	$3,%r12
    196 	movq	%r15,%r13
    197 	andq	$-4,%r13
    198 	movq	%r9,%r14
    199 	shrdq	$2,%r9,%r15
    200 	shrq	$2,%r9
    201 	addq	%r13,%r10
    202 	adcq	%r14,%r11
    203 	adcq	$0,%r12
    204 	addq	%r15,%r10
    205 	adcq	%r9,%r11
    206 	adcq	$0,%r12
    207 
    208 
    209 1:
    210 	.byte	0xf3,0xc3
    211 
    212 
    213 
    214 .globl	_chacha20_poly1305_open
    215 .private_extern _chacha20_poly1305_open
    216 
    217 .p2align	6
    218 _chacha20_poly1305_open:
    219 
    220 	pushq	%rbp
    221 
    222 	pushq	%rbx
    223 
    224 	pushq	%r12
    225 
    226 	pushq	%r13
    227 
    228 	pushq	%r14
    229 
    230 	pushq	%r15
    231 
    232 
    233 
    234 	pushq	%r9
    235 
    236 	subq	$288 + 32,%rsp
    237 
    238 
    239 
    240 
    241 
    242 
    243 
    244 	leaq	32(%rsp),%rbp
    245 	andq	$-32,%rbp
    246 	movq	%rdx,8+32(%rbp)
    247 	movq	%r8,0+32(%rbp)
    248 	movq	%rdx,%rbx
    249 
    250 	movl	_OPENSSL_ia32cap_P+8(%rip),%eax
    251 	andl	$288,%eax
    252 	xorl	$288,%eax
    253 	jz	chacha20_poly1305_open_avx2
    254 
    255 1:
    256 	cmpq	$128,%rbx
    257 	jbe	open_sse_128
    258 
    259 	movdqa	.chacha20_consts(%rip),%xmm0
    260 	movdqu	0(%r9),%xmm4
    261 	movdqu	16(%r9),%xmm8
    262 	movdqu	32(%r9),%xmm12
    263 	movdqa	%xmm12,%xmm7
    264 
    265 	movdqa	%xmm4,48(%rbp)
    266 	movdqa	%xmm8,64(%rbp)
    267 	movdqa	%xmm12,96(%rbp)
    268 	movq	$10,%r10
    269 1:
    270 	paddd	%xmm4,%xmm0
    271 	pxor	%xmm0,%xmm12
    272 	pshufb	.rol16(%rip),%xmm12
    273 	paddd	%xmm12,%xmm8
    274 	pxor	%xmm8,%xmm4
    275 	movdqa	%xmm4,%xmm3
    276 	pslld	$12,%xmm3
    277 	psrld	$20,%xmm4
    278 	pxor	%xmm3,%xmm4
    279 	paddd	%xmm4,%xmm0
    280 	pxor	%xmm0,%xmm12
    281 	pshufb	.rol8(%rip),%xmm12
    282 	paddd	%xmm12,%xmm8
    283 	pxor	%xmm8,%xmm4
    284 	movdqa	%xmm4,%xmm3
    285 	pslld	$7,%xmm3
    286 	psrld	$25,%xmm4
    287 	pxor	%xmm3,%xmm4
    288 .byte	102,15,58,15,228,4
    289 .byte	102,69,15,58,15,192,8
    290 .byte	102,69,15,58,15,228,12
    291 	paddd	%xmm4,%xmm0
    292 	pxor	%xmm0,%xmm12
    293 	pshufb	.rol16(%rip),%xmm12
    294 	paddd	%xmm12,%xmm8
    295 	pxor	%xmm8,%xmm4
    296 	movdqa	%xmm4,%xmm3
    297 	pslld	$12,%xmm3
    298 	psrld	$20,%xmm4
    299 	pxor	%xmm3,%xmm4
    300 	paddd	%xmm4,%xmm0
    301 	pxor	%xmm0,%xmm12
    302 	pshufb	.rol8(%rip),%xmm12
    303 	paddd	%xmm12,%xmm8
    304 	pxor	%xmm8,%xmm4
    305 	movdqa	%xmm4,%xmm3
    306 	pslld	$7,%xmm3
    307 	psrld	$25,%xmm4
    308 	pxor	%xmm3,%xmm4
    309 .byte	102,15,58,15,228,12
    310 .byte	102,69,15,58,15,192,8
    311 .byte	102,69,15,58,15,228,4
    312 
    313 	decq	%r10
    314 	jne	1b
    315 
    316 	paddd	.chacha20_consts(%rip),%xmm0
    317 	paddd	48(%rbp),%xmm4
    318 
    319 	pand	.clamp(%rip),%xmm0
    320 	movdqa	%xmm0,0(%rbp)
    321 	movdqa	%xmm4,16(%rbp)
    322 
    323 	movq	%r8,%r8
    324 	call	poly_hash_ad_internal
    325 open_sse_main_loop:
    326 	cmpq	$256,%rbx
    327 	jb	2f
    328 
    329 	movdqa	.chacha20_consts(%rip),%xmm0
    330 	movdqa	48(%rbp),%xmm4
    331 	movdqa	64(%rbp),%xmm8
    332 	movdqa	%xmm0,%xmm1
    333 	movdqa	%xmm4,%xmm5
    334 	movdqa	%xmm8,%xmm9
    335 	movdqa	%xmm0,%xmm2
    336 	movdqa	%xmm4,%xmm6
    337 	movdqa	%xmm8,%xmm10
    338 	movdqa	%xmm0,%xmm3
    339 	movdqa	%xmm4,%xmm7
    340 	movdqa	%xmm8,%xmm11
    341 	movdqa	96(%rbp),%xmm15
    342 	paddd	.sse_inc(%rip),%xmm15
    343 	movdqa	%xmm15,%xmm14
    344 	paddd	.sse_inc(%rip),%xmm14
    345 	movdqa	%xmm14,%xmm13
    346 	paddd	.sse_inc(%rip),%xmm13
    347 	movdqa	%xmm13,%xmm12
    348 	paddd	.sse_inc(%rip),%xmm12
    349 	movdqa	%xmm12,96(%rbp)
    350 	movdqa	%xmm13,112(%rbp)
    351 	movdqa	%xmm14,128(%rbp)
    352 	movdqa	%xmm15,144(%rbp)
    353 
    354 
    355 
    356 	movq	$4,%rcx
    357 	movq	%rsi,%r8
    358 1:
    359 	movdqa	%xmm8,80(%rbp)
    360 	movdqa	.rol16(%rip),%xmm8
    361 	paddd	%xmm7,%xmm3
    362 	paddd	%xmm6,%xmm2
    363 	paddd	%xmm5,%xmm1
    364 	paddd	%xmm4,%xmm0
    365 	pxor	%xmm3,%xmm15
    366 	pxor	%xmm2,%xmm14
    367 	pxor	%xmm1,%xmm13
    368 	pxor	%xmm0,%xmm12
    369 .byte	102,69,15,56,0,248
    370 .byte	102,69,15,56,0,240
    371 .byte	102,69,15,56,0,232
    372 .byte	102,69,15,56,0,224
    373 	movdqa	80(%rbp),%xmm8
    374 	paddd	%xmm15,%xmm11
    375 	paddd	%xmm14,%xmm10
    376 	paddd	%xmm13,%xmm9
    377 	paddd	%xmm12,%xmm8
    378 	pxor	%xmm11,%xmm7
    379 	addq	0(%r8),%r10
    380 	adcq	8+0(%r8),%r11
    381 	adcq	$1,%r12
    382 
    383 	leaq	16(%r8),%r8
    384 	pxor	%xmm10,%xmm6
    385 	pxor	%xmm9,%xmm5
    386 	pxor	%xmm8,%xmm4
    387 	movdqa	%xmm8,80(%rbp)
    388 	movdqa	%xmm7,%xmm8
    389 	psrld	$20,%xmm8
    390 	pslld	$32-20,%xmm7
    391 	pxor	%xmm8,%xmm7
    392 	movdqa	%xmm6,%xmm8
    393 	psrld	$20,%xmm8
    394 	pslld	$32-20,%xmm6
    395 	pxor	%xmm8,%xmm6
    396 	movdqa	%xmm5,%xmm8
    397 	psrld	$20,%xmm8
    398 	pslld	$32-20,%xmm5
    399 	pxor	%xmm8,%xmm5
    400 	movdqa	%xmm4,%xmm8
    401 	psrld	$20,%xmm8
    402 	pslld	$32-20,%xmm4
    403 	pxor	%xmm8,%xmm4
    404 	movq	0+0(%rbp),%rax
    405 	movq	%rax,%r15
    406 	mulq	%r10
    407 	movq	%rax,%r13
    408 	movq	%rdx,%r14
    409 	movq	0+0(%rbp),%rax
    410 	mulq	%r11
    411 	imulq	%r12,%r15
    412 	addq	%rax,%r14
    413 	adcq	%rdx,%r15
    414 	movdqa	.rol8(%rip),%xmm8
    415 	paddd	%xmm7,%xmm3
    416 	paddd	%xmm6,%xmm2
    417 	paddd	%xmm5,%xmm1
    418 	paddd	%xmm4,%xmm0
    419 	pxor	%xmm3,%xmm15
    420 	pxor	%xmm2,%xmm14
    421 	pxor	%xmm1,%xmm13
    422 	pxor	%xmm0,%xmm12
    423 .byte	102,69,15,56,0,248
    424 .byte	102,69,15,56,0,240
    425 .byte	102,69,15,56,0,232
    426 .byte	102,69,15,56,0,224
    427 	movdqa	80(%rbp),%xmm8
    428 	paddd	%xmm15,%xmm11
    429 	paddd	%xmm14,%xmm10
    430 	paddd	%xmm13,%xmm9
    431 	paddd	%xmm12,%xmm8
    432 	pxor	%xmm11,%xmm7
    433 	pxor	%xmm10,%xmm6
    434 	movq	8+0(%rbp),%rax
    435 	movq	%rax,%r9
    436 	mulq	%r10
    437 	addq	%rax,%r14
    438 	adcq	$0,%rdx
    439 	movq	%rdx,%r10
    440 	movq	8+0(%rbp),%rax
    441 	mulq	%r11
    442 	addq	%rax,%r15
    443 	adcq	$0,%rdx
    444 	pxor	%xmm9,%xmm5
    445 	pxor	%xmm8,%xmm4
    446 	movdqa	%xmm8,80(%rbp)
    447 	movdqa	%xmm7,%xmm8
    448 	psrld	$25,%xmm8
    449 	pslld	$32-25,%xmm7
    450 	pxor	%xmm8,%xmm7
    451 	movdqa	%xmm6,%xmm8
    452 	psrld	$25,%xmm8
    453 	pslld	$32-25,%xmm6
    454 	pxor	%xmm8,%xmm6
    455 	movdqa	%xmm5,%xmm8
    456 	psrld	$25,%xmm8
    457 	pslld	$32-25,%xmm5
    458 	pxor	%xmm8,%xmm5
    459 	movdqa	%xmm4,%xmm8
    460 	psrld	$25,%xmm8
    461 	pslld	$32-25,%xmm4
    462 	pxor	%xmm8,%xmm4
    463 	movdqa	80(%rbp),%xmm8
    464 	imulq	%r12,%r9
    465 	addq	%r10,%r15
    466 	adcq	%rdx,%r9
    467 .byte	102,15,58,15,255,4
    468 .byte	102,69,15,58,15,219,8
    469 .byte	102,69,15,58,15,255,12
    470 .byte	102,15,58,15,246,4
    471 .byte	102,69,15,58,15,210,8
    472 .byte	102,69,15,58,15,246,12
    473 .byte	102,15,58,15,237,4
    474 .byte	102,69,15,58,15,201,8
    475 .byte	102,69,15,58,15,237,12
    476 .byte	102,15,58,15,228,4
    477 .byte	102,69,15,58,15,192,8
    478 .byte	102,69,15,58,15,228,12
    479 	movdqa	%xmm8,80(%rbp)
    480 	movdqa	.rol16(%rip),%xmm8
    481 	paddd	%xmm7,%xmm3
    482 	paddd	%xmm6,%xmm2
    483 	paddd	%xmm5,%xmm1
    484 	paddd	%xmm4,%xmm0
    485 	pxor	%xmm3,%xmm15
    486 	pxor	%xmm2,%xmm14
    487 	movq	%r13,%r10
    488 	movq	%r14,%r11
    489 	movq	%r15,%r12
    490 	andq	$3,%r12
    491 	movq	%r15,%r13
    492 	andq	$-4,%r13
    493 	movq	%r9,%r14
    494 	shrdq	$2,%r9,%r15
    495 	shrq	$2,%r9
    496 	addq	%r13,%r10
    497 	adcq	%r14,%r11
    498 	adcq	$0,%r12
    499 	addq	%r15,%r10
    500 	adcq	%r9,%r11
    501 	adcq	$0,%r12
    502 	pxor	%xmm1,%xmm13
    503 	pxor	%xmm0,%xmm12
    504 .byte	102,69,15,56,0,248
    505 .byte	102,69,15,56,0,240
    506 .byte	102,69,15,56,0,232
    507 .byte	102,69,15,56,0,224
    508 	movdqa	80(%rbp),%xmm8
    509 	paddd	%xmm15,%xmm11
    510 	paddd	%xmm14,%xmm10
    511 	paddd	%xmm13,%xmm9
    512 	paddd	%xmm12,%xmm8
    513 	pxor	%xmm11,%xmm7
    514 	pxor	%xmm10,%xmm6
    515 	pxor	%xmm9,%xmm5
    516 	pxor	%xmm8,%xmm4
    517 	movdqa	%xmm8,80(%rbp)
    518 	movdqa	%xmm7,%xmm8
    519 	psrld	$20,%xmm8
    520 	pslld	$32-20,%xmm7
    521 	pxor	%xmm8,%xmm7
    522 	movdqa	%xmm6,%xmm8
    523 	psrld	$20,%xmm8
    524 	pslld	$32-20,%xmm6
    525 	pxor	%xmm8,%xmm6
    526 	movdqa	%xmm5,%xmm8
    527 	psrld	$20,%xmm8
    528 	pslld	$32-20,%xmm5
    529 	pxor	%xmm8,%xmm5
    530 	movdqa	%xmm4,%xmm8
    531 	psrld	$20,%xmm8
    532 	pslld	$32-20,%xmm4
    533 	pxor	%xmm8,%xmm4
    534 	movdqa	.rol8(%rip),%xmm8
    535 	paddd	%xmm7,%xmm3
    536 	paddd	%xmm6,%xmm2
    537 	paddd	%xmm5,%xmm1
    538 	paddd	%xmm4,%xmm0
    539 	pxor	%xmm3,%xmm15
    540 	pxor	%xmm2,%xmm14
    541 	pxor	%xmm1,%xmm13
    542 	pxor	%xmm0,%xmm12
    543 .byte	102,69,15,56,0,248
    544 .byte	102,69,15,56,0,240
    545 .byte	102,69,15,56,0,232
    546 .byte	102,69,15,56,0,224
    547 	movdqa	80(%rbp),%xmm8
    548 	paddd	%xmm15,%xmm11
    549 	paddd	%xmm14,%xmm10
    550 	paddd	%xmm13,%xmm9
    551 	paddd	%xmm12,%xmm8
    552 	pxor	%xmm11,%xmm7
    553 	pxor	%xmm10,%xmm6
    554 	pxor	%xmm9,%xmm5
    555 	pxor	%xmm8,%xmm4
    556 	movdqa	%xmm8,80(%rbp)
    557 	movdqa	%xmm7,%xmm8
    558 	psrld	$25,%xmm8
    559 	pslld	$32-25,%xmm7
    560 	pxor	%xmm8,%xmm7
    561 	movdqa	%xmm6,%xmm8
    562 	psrld	$25,%xmm8
    563 	pslld	$32-25,%xmm6
    564 	pxor	%xmm8,%xmm6
    565 	movdqa	%xmm5,%xmm8
    566 	psrld	$25,%xmm8
    567 	pslld	$32-25,%xmm5
    568 	pxor	%xmm8,%xmm5
    569 	movdqa	%xmm4,%xmm8
    570 	psrld	$25,%xmm8
    571 	pslld	$32-25,%xmm4
    572 	pxor	%xmm8,%xmm4
    573 	movdqa	80(%rbp),%xmm8
    574 .byte	102,15,58,15,255,12
    575 .byte	102,69,15,58,15,219,8
    576 .byte	102,69,15,58,15,255,4
    577 .byte	102,15,58,15,246,12
    578 .byte	102,69,15,58,15,210,8
    579 .byte	102,69,15,58,15,246,4
    580 .byte	102,15,58,15,237,12
    581 .byte	102,69,15,58,15,201,8
    582 .byte	102,69,15,58,15,237,4
    583 .byte	102,15,58,15,228,12
    584 .byte	102,69,15,58,15,192,8
    585 .byte	102,69,15,58,15,228,4
    586 
    587 	decq	%rcx
    588 	jge	1b
    589 	addq	0(%r8),%r10
    590 	adcq	8+0(%r8),%r11
    591 	adcq	$1,%r12
    592 	movq	0+0(%rbp),%rax
    593 	movq	%rax,%r15
    594 	mulq	%r10
    595 	movq	%rax,%r13
    596 	movq	%rdx,%r14
    597 	movq	0+0(%rbp),%rax
    598 	mulq	%r11
    599 	imulq	%r12,%r15
    600 	addq	%rax,%r14
    601 	adcq	%rdx,%r15
    602 	movq	8+0(%rbp),%rax
    603 	movq	%rax,%r9
    604 	mulq	%r10
    605 	addq	%rax,%r14
    606 	adcq	$0,%rdx
    607 	movq	%rdx,%r10
    608 	movq	8+0(%rbp),%rax
    609 	mulq	%r11
    610 	addq	%rax,%r15
    611 	adcq	$0,%rdx
    612 	imulq	%r12,%r9
    613 	addq	%r10,%r15
    614 	adcq	%rdx,%r9
    615 	movq	%r13,%r10
    616 	movq	%r14,%r11
    617 	movq	%r15,%r12
    618 	andq	$3,%r12
    619 	movq	%r15,%r13
    620 	andq	$-4,%r13
    621 	movq	%r9,%r14
    622 	shrdq	$2,%r9,%r15
    623 	shrq	$2,%r9
    624 	addq	%r13,%r10
    625 	adcq	%r14,%r11
    626 	adcq	$0,%r12
    627 	addq	%r15,%r10
    628 	adcq	%r9,%r11
    629 	adcq	$0,%r12
    630 
    631 	leaq	16(%r8),%r8
    632 	cmpq	$-6,%rcx
    633 	jg	1b
    634 	paddd	.chacha20_consts(%rip),%xmm3
    635 	paddd	48(%rbp),%xmm7
    636 	paddd	64(%rbp),%xmm11
    637 	paddd	144(%rbp),%xmm15
    638 	paddd	.chacha20_consts(%rip),%xmm2
    639 	paddd	48(%rbp),%xmm6
    640 	paddd	64(%rbp),%xmm10
    641 	paddd	128(%rbp),%xmm14
    642 	paddd	.chacha20_consts(%rip),%xmm1
    643 	paddd	48(%rbp),%xmm5
    644 	paddd	64(%rbp),%xmm9
    645 	paddd	112(%rbp),%xmm13
    646 	paddd	.chacha20_consts(%rip),%xmm0
    647 	paddd	48(%rbp),%xmm4
    648 	paddd	64(%rbp),%xmm8
    649 	paddd	96(%rbp),%xmm12
    650 	movdqa	%xmm12,80(%rbp)
    651 	movdqu	0 + 0(%rsi),%xmm12
    652 	pxor	%xmm3,%xmm12
    653 	movdqu	%xmm12,0 + 0(%rdi)
    654 	movdqu	16 + 0(%rsi),%xmm12
    655 	pxor	%xmm7,%xmm12
    656 	movdqu	%xmm12,16 + 0(%rdi)
    657 	movdqu	32 + 0(%rsi),%xmm12
    658 	pxor	%xmm11,%xmm12
    659 	movdqu	%xmm12,32 + 0(%rdi)
    660 	movdqu	48 + 0(%rsi),%xmm12
    661 	pxor	%xmm15,%xmm12
    662 	movdqu	%xmm12,48 + 0(%rdi)
    663 	movdqu	0 + 64(%rsi),%xmm3
    664 	movdqu	16 + 64(%rsi),%xmm7
    665 	movdqu	32 + 64(%rsi),%xmm11
    666 	movdqu	48 + 64(%rsi),%xmm15
    667 	pxor	%xmm3,%xmm2
    668 	pxor	%xmm7,%xmm6
    669 	pxor	%xmm11,%xmm10
    670 	pxor	%xmm14,%xmm15
    671 	movdqu	%xmm2,0 + 64(%rdi)
    672 	movdqu	%xmm6,16 + 64(%rdi)
    673 	movdqu	%xmm10,32 + 64(%rdi)
    674 	movdqu	%xmm15,48 + 64(%rdi)
    675 	movdqu	0 + 128(%rsi),%xmm3
    676 	movdqu	16 + 128(%rsi),%xmm7
    677 	movdqu	32 + 128(%rsi),%xmm11
    678 	movdqu	48 + 128(%rsi),%xmm15
    679 	pxor	%xmm3,%xmm1
    680 	pxor	%xmm7,%xmm5
    681 	pxor	%xmm11,%xmm9
    682 	pxor	%xmm13,%xmm15
    683 	movdqu	%xmm1,0 + 128(%rdi)
    684 	movdqu	%xmm5,16 + 128(%rdi)
    685 	movdqu	%xmm9,32 + 128(%rdi)
    686 	movdqu	%xmm15,48 + 128(%rdi)
    687 	movdqu	0 + 192(%rsi),%xmm3
    688 	movdqu	16 + 192(%rsi),%xmm7
    689 	movdqu	32 + 192(%rsi),%xmm11
    690 	movdqu	48 + 192(%rsi),%xmm15
    691 	pxor	%xmm3,%xmm0
    692 	pxor	%xmm7,%xmm4
    693 	pxor	%xmm11,%xmm8
    694 	pxor	80(%rbp),%xmm15
    695 	movdqu	%xmm0,0 + 192(%rdi)
    696 	movdqu	%xmm4,16 + 192(%rdi)
    697 	movdqu	%xmm8,32 + 192(%rdi)
    698 	movdqu	%xmm15,48 + 192(%rdi)
    699 
    700 	leaq	256(%rsi),%rsi
    701 	leaq	256(%rdi),%rdi
    702 	subq	$256,%rbx
    703 	jmp	open_sse_main_loop
    704 2:
    705 
    706 	testq	%rbx,%rbx
    707 	jz	open_sse_finalize
    708 	cmpq	$64,%rbx
    709 	ja	3f
    710 	movdqa	.chacha20_consts(%rip),%xmm0
    711 	movdqa	48(%rbp),%xmm4
    712 	movdqa	64(%rbp),%xmm8
    713 	movdqa	96(%rbp),%xmm12
    714 	paddd	.sse_inc(%rip),%xmm12
    715 	movdqa	%xmm12,96(%rbp)
    716 
    717 	xorq	%r8,%r8
    718 	movq	%rbx,%rcx
    719 	cmpq	$16,%rcx
    720 	jb	2f
    721 1:
    722 	addq	0(%rsi,%r8), %r10
    723 	adcq	8+0(%rsi,%r8), %r11
    724 	adcq	$1,%r12
    725 	movq	0+0(%rbp),%rax
    726 	movq	%rax,%r15
    727 	mulq	%r10
    728 	movq	%rax,%r13
    729 	movq	%rdx,%r14
    730 	movq	0+0(%rbp),%rax
    731 	mulq	%r11
    732 	imulq	%r12,%r15
    733 	addq	%rax,%r14
    734 	adcq	%rdx,%r15
    735 	movq	8+0(%rbp),%rax
    736 	movq	%rax,%r9
    737 	mulq	%r10
    738 	addq	%rax,%r14
    739 	adcq	$0,%rdx
    740 	movq	%rdx,%r10
    741 	movq	8+0(%rbp),%rax
    742 	mulq	%r11
    743 	addq	%rax,%r15
    744 	adcq	$0,%rdx
    745 	imulq	%r12,%r9
    746 	addq	%r10,%r15
    747 	adcq	%rdx,%r9
    748 	movq	%r13,%r10
    749 	movq	%r14,%r11
    750 	movq	%r15,%r12
    751 	andq	$3,%r12
    752 	movq	%r15,%r13
    753 	andq	$-4,%r13
    754 	movq	%r9,%r14
    755 	shrdq	$2,%r9,%r15
    756 	shrq	$2,%r9
    757 	addq	%r13,%r10
    758 	adcq	%r14,%r11
    759 	adcq	$0,%r12
    760 	addq	%r15,%r10
    761 	adcq	%r9,%r11
    762 	adcq	$0,%r12
    763 
    764 	subq	$16,%rcx
    765 2:
    766 	addq	$16,%r8
    767 	paddd	%xmm4,%xmm0
    768 	pxor	%xmm0,%xmm12
    769 	pshufb	.rol16(%rip),%xmm12
    770 	paddd	%xmm12,%xmm8
    771 	pxor	%xmm8,%xmm4
    772 	movdqa	%xmm4,%xmm3
    773 	pslld	$12,%xmm3
    774 	psrld	$20,%xmm4
    775 	pxor	%xmm3,%xmm4
    776 	paddd	%xmm4,%xmm0
    777 	pxor	%xmm0,%xmm12
    778 	pshufb	.rol8(%rip),%xmm12
    779 	paddd	%xmm12,%xmm8
    780 	pxor	%xmm8,%xmm4
    781 	movdqa	%xmm4,%xmm3
    782 	pslld	$7,%xmm3
    783 	psrld	$25,%xmm4
    784 	pxor	%xmm3,%xmm4
    785 .byte	102,15,58,15,228,4
    786 .byte	102,69,15,58,15,192,8
    787 .byte	102,69,15,58,15,228,12
    788 	paddd	%xmm4,%xmm0
    789 	pxor	%xmm0,%xmm12
    790 	pshufb	.rol16(%rip),%xmm12
    791 	paddd	%xmm12,%xmm8
    792 	pxor	%xmm8,%xmm4
    793 	movdqa	%xmm4,%xmm3
    794 	pslld	$12,%xmm3
    795 	psrld	$20,%xmm4
    796 	pxor	%xmm3,%xmm4
    797 	paddd	%xmm4,%xmm0
    798 	pxor	%xmm0,%xmm12
    799 	pshufb	.rol8(%rip),%xmm12
    800 	paddd	%xmm12,%xmm8
    801 	pxor	%xmm8,%xmm4
    802 	movdqa	%xmm4,%xmm3
    803 	pslld	$7,%xmm3
    804 	psrld	$25,%xmm4
    805 	pxor	%xmm3,%xmm4
    806 .byte	102,15,58,15,228,12
    807 .byte	102,69,15,58,15,192,8
    808 .byte	102,69,15,58,15,228,4
    809 
    810 	cmpq	$16,%rcx
    811 	jae	1b
    812 	cmpq	$160,%r8
    813 	jne	2b
    814 	paddd	.chacha20_consts(%rip),%xmm0
    815 	paddd	48(%rbp),%xmm4
    816 	paddd	64(%rbp),%xmm8
    817 	paddd	96(%rbp),%xmm12
    818 
    819 	jmp	open_sse_tail_64_dec_loop
    820 3:
    821 	cmpq	$128,%rbx
    822 	ja	3f
    823 	movdqa	.chacha20_consts(%rip),%xmm0
    824 	movdqa	48(%rbp),%xmm4
    825 	movdqa	64(%rbp),%xmm8
    826 	movdqa	%xmm0,%xmm1
    827 	movdqa	%xmm4,%xmm5
    828 	movdqa	%xmm8,%xmm9
    829 	movdqa	96(%rbp),%xmm13
    830 	paddd	.sse_inc(%rip),%xmm13
    831 	movdqa	%xmm13,%xmm12
    832 	paddd	.sse_inc(%rip),%xmm12
    833 	movdqa	%xmm12,96(%rbp)
    834 	movdqa	%xmm13,112(%rbp)
    835 
    836 	movq	%rbx,%rcx
    837 	andq	$-16,%rcx
    838 	xorq	%r8,%r8
    839 1:
    840 	addq	0(%rsi,%r8), %r10
    841 	adcq	8+0(%rsi,%r8), %r11
    842 	adcq	$1,%r12
    843 	movq	0+0(%rbp),%rax
    844 	movq	%rax,%r15
    845 	mulq	%r10
    846 	movq	%rax,%r13
    847 	movq	%rdx,%r14
    848 	movq	0+0(%rbp),%rax
    849 	mulq	%r11
    850 	imulq	%r12,%r15
    851 	addq	%rax,%r14
    852 	adcq	%rdx,%r15
    853 	movq	8+0(%rbp),%rax
    854 	movq	%rax,%r9
    855 	mulq	%r10
    856 	addq	%rax,%r14
    857 	adcq	$0,%rdx
    858 	movq	%rdx,%r10
    859 	movq	8+0(%rbp),%rax
    860 	mulq	%r11
    861 	addq	%rax,%r15
    862 	adcq	$0,%rdx
    863 	imulq	%r12,%r9
    864 	addq	%r10,%r15
    865 	adcq	%rdx,%r9
    866 	movq	%r13,%r10
    867 	movq	%r14,%r11
    868 	movq	%r15,%r12
    869 	andq	$3,%r12
    870 	movq	%r15,%r13
    871 	andq	$-4,%r13
    872 	movq	%r9,%r14
    873 	shrdq	$2,%r9,%r15
    874 	shrq	$2,%r9
    875 	addq	%r13,%r10
    876 	adcq	%r14,%r11
    877 	adcq	$0,%r12
    878 	addq	%r15,%r10
    879 	adcq	%r9,%r11
    880 	adcq	$0,%r12
    881 
    882 2:
    883 	addq	$16,%r8
    884 	paddd	%xmm4,%xmm0
    885 	pxor	%xmm0,%xmm12
    886 	pshufb	.rol16(%rip),%xmm12
    887 	paddd	%xmm12,%xmm8
    888 	pxor	%xmm8,%xmm4
    889 	movdqa	%xmm4,%xmm3
    890 	pslld	$12,%xmm3
    891 	psrld	$20,%xmm4
    892 	pxor	%xmm3,%xmm4
    893 	paddd	%xmm4,%xmm0
    894 	pxor	%xmm0,%xmm12
    895 	pshufb	.rol8(%rip),%xmm12
    896 	paddd	%xmm12,%xmm8
    897 	pxor	%xmm8,%xmm4
    898 	movdqa	%xmm4,%xmm3
    899 	pslld	$7,%xmm3
    900 	psrld	$25,%xmm4
    901 	pxor	%xmm3,%xmm4
    902 .byte	102,15,58,15,228,4
    903 .byte	102,69,15,58,15,192,8
    904 .byte	102,69,15,58,15,228,12
    905 	paddd	%xmm5,%xmm1
    906 	pxor	%xmm1,%xmm13
    907 	pshufb	.rol16(%rip),%xmm13
    908 	paddd	%xmm13,%xmm9
    909 	pxor	%xmm9,%xmm5
    910 	movdqa	%xmm5,%xmm3
    911 	pslld	$12,%xmm3
    912 	psrld	$20,%xmm5
    913 	pxor	%xmm3,%xmm5
    914 	paddd	%xmm5,%xmm1
    915 	pxor	%xmm1,%xmm13
    916 	pshufb	.rol8(%rip),%xmm13
    917 	paddd	%xmm13,%xmm9
    918 	pxor	%xmm9,%xmm5
    919 	movdqa	%xmm5,%xmm3
    920 	pslld	$7,%xmm3
    921 	psrld	$25,%xmm5
    922 	pxor	%xmm3,%xmm5
    923 .byte	102,15,58,15,237,4
    924 .byte	102,69,15,58,15,201,8
    925 .byte	102,69,15,58,15,237,12
    926 	paddd	%xmm4,%xmm0
    927 	pxor	%xmm0,%xmm12
    928 	pshufb	.rol16(%rip),%xmm12
    929 	paddd	%xmm12,%xmm8
    930 	pxor	%xmm8,%xmm4
    931 	movdqa	%xmm4,%xmm3
    932 	pslld	$12,%xmm3
    933 	psrld	$20,%xmm4
    934 	pxor	%xmm3,%xmm4
    935 	paddd	%xmm4,%xmm0
    936 	pxor	%xmm0,%xmm12
    937 	pshufb	.rol8(%rip),%xmm12
    938 	paddd	%xmm12,%xmm8
    939 	pxor	%xmm8,%xmm4
    940 	movdqa	%xmm4,%xmm3
    941 	pslld	$7,%xmm3
    942 	psrld	$25,%xmm4
    943 	pxor	%xmm3,%xmm4
    944 .byte	102,15,58,15,228,12
    945 .byte	102,69,15,58,15,192,8
    946 .byte	102,69,15,58,15,228,4
    947 	paddd	%xmm5,%xmm1
    948 	pxor	%xmm1,%xmm13
    949 	pshufb	.rol16(%rip),%xmm13
    950 	paddd	%xmm13,%xmm9
    951 	pxor	%xmm9,%xmm5
    952 	movdqa	%xmm5,%xmm3
    953 	pslld	$12,%xmm3
    954 	psrld	$20,%xmm5
    955 	pxor	%xmm3,%xmm5
    956 	paddd	%xmm5,%xmm1
    957 	pxor	%xmm1,%xmm13
    958 	pshufb	.rol8(%rip),%xmm13
    959 	paddd	%xmm13,%xmm9
    960 	pxor	%xmm9,%xmm5
    961 	movdqa	%xmm5,%xmm3
    962 	pslld	$7,%xmm3
    963 	psrld	$25,%xmm5
    964 	pxor	%xmm3,%xmm5
    965 .byte	102,15,58,15,237,12
    966 .byte	102,69,15,58,15,201,8
    967 .byte	102,69,15,58,15,237,4
    968 
    969 	cmpq	%rcx,%r8
    970 	jb	1b
    971 	cmpq	$160,%r8
    972 	jne	2b
    973 	paddd	.chacha20_consts(%rip),%xmm1
    974 	paddd	48(%rbp),%xmm5
    975 	paddd	64(%rbp),%xmm9
    976 	paddd	112(%rbp),%xmm13
    977 	paddd	.chacha20_consts(%rip),%xmm0
    978 	paddd	48(%rbp),%xmm4
    979 	paddd	64(%rbp),%xmm8
    980 	paddd	96(%rbp),%xmm12
    981 	movdqu	0 + 0(%rsi),%xmm3
    982 	movdqu	16 + 0(%rsi),%xmm7
    983 	movdqu	32 + 0(%rsi),%xmm11
    984 	movdqu	48 + 0(%rsi),%xmm15
    985 	pxor	%xmm3,%xmm1
    986 	pxor	%xmm7,%xmm5
    987 	pxor	%xmm11,%xmm9
    988 	pxor	%xmm13,%xmm15
    989 	movdqu	%xmm1,0 + 0(%rdi)
    990 	movdqu	%xmm5,16 + 0(%rdi)
    991 	movdqu	%xmm9,32 + 0(%rdi)
    992 	movdqu	%xmm15,48 + 0(%rdi)
    993 
    994 	subq	$64,%rbx
    995 	leaq	64(%rsi),%rsi
    996 	leaq	64(%rdi),%rdi
    997 	jmp	open_sse_tail_64_dec_loop
    998 3:
    999 	cmpq	$192,%rbx
   1000 	ja	3f
   1001 	movdqa	.chacha20_consts(%rip),%xmm0
   1002 	movdqa	48(%rbp),%xmm4
   1003 	movdqa	64(%rbp),%xmm8
   1004 	movdqa	%xmm0,%xmm1
   1005 	movdqa	%xmm4,%xmm5
   1006 	movdqa	%xmm8,%xmm9
   1007 	movdqa	%xmm0,%xmm2
   1008 	movdqa	%xmm4,%xmm6
   1009 	movdqa	%xmm8,%xmm10
   1010 	movdqa	96(%rbp),%xmm14
   1011 	paddd	.sse_inc(%rip),%xmm14
   1012 	movdqa	%xmm14,%xmm13
   1013 	paddd	.sse_inc(%rip),%xmm13
   1014 	movdqa	%xmm13,%xmm12
   1015 	paddd	.sse_inc(%rip),%xmm12
   1016 	movdqa	%xmm12,96(%rbp)
   1017 	movdqa	%xmm13,112(%rbp)
   1018 	movdqa	%xmm14,128(%rbp)
   1019 
   1020 	movq	%rbx,%rcx
   1021 	movq	$160,%r8
   1022 	cmpq	$160,%rcx
   1023 	cmovgq	%r8,%rcx
   1024 	andq	$-16,%rcx
   1025 	xorq	%r8,%r8
   1026 1:
   1027 	addq	0(%rsi,%r8), %r10
   1028 	adcq	8+0(%rsi,%r8), %r11
   1029 	adcq	$1,%r12
   1030 	movq	0+0(%rbp),%rax
   1031 	movq	%rax,%r15
   1032 	mulq	%r10
   1033 	movq	%rax,%r13
   1034 	movq	%rdx,%r14
   1035 	movq	0+0(%rbp),%rax
   1036 	mulq	%r11
   1037 	imulq	%r12,%r15
   1038 	addq	%rax,%r14
   1039 	adcq	%rdx,%r15
   1040 	movq	8+0(%rbp),%rax
   1041 	movq	%rax,%r9
   1042 	mulq	%r10
   1043 	addq	%rax,%r14
   1044 	adcq	$0,%rdx
   1045 	movq	%rdx,%r10
   1046 	movq	8+0(%rbp),%rax
   1047 	mulq	%r11
   1048 	addq	%rax,%r15
   1049 	adcq	$0,%rdx
   1050 	imulq	%r12,%r9
   1051 	addq	%r10,%r15
   1052 	adcq	%rdx,%r9
   1053 	movq	%r13,%r10
   1054 	movq	%r14,%r11
   1055 	movq	%r15,%r12
   1056 	andq	$3,%r12
   1057 	movq	%r15,%r13
   1058 	andq	$-4,%r13
   1059 	movq	%r9,%r14
   1060 	shrdq	$2,%r9,%r15
   1061 	shrq	$2,%r9
   1062 	addq	%r13,%r10
   1063 	adcq	%r14,%r11
   1064 	adcq	$0,%r12
   1065 	addq	%r15,%r10
   1066 	adcq	%r9,%r11
   1067 	adcq	$0,%r12
   1068 
   1069 2:
   1070 	addq	$16,%r8
   1071 	paddd	%xmm4,%xmm0
   1072 	pxor	%xmm0,%xmm12
   1073 	pshufb	.rol16(%rip),%xmm12
   1074 	paddd	%xmm12,%xmm8
   1075 	pxor	%xmm8,%xmm4
   1076 	movdqa	%xmm4,%xmm3
   1077 	pslld	$12,%xmm3
   1078 	psrld	$20,%xmm4
   1079 	pxor	%xmm3,%xmm4
   1080 	paddd	%xmm4,%xmm0
   1081 	pxor	%xmm0,%xmm12
   1082 	pshufb	.rol8(%rip),%xmm12
   1083 	paddd	%xmm12,%xmm8
   1084 	pxor	%xmm8,%xmm4
   1085 	movdqa	%xmm4,%xmm3
   1086 	pslld	$7,%xmm3
   1087 	psrld	$25,%xmm4
   1088 	pxor	%xmm3,%xmm4
   1089 .byte	102,15,58,15,228,4
   1090 .byte	102,69,15,58,15,192,8
   1091 .byte	102,69,15,58,15,228,12
   1092 	paddd	%xmm5,%xmm1
   1093 	pxor	%xmm1,%xmm13
   1094 	pshufb	.rol16(%rip),%xmm13
   1095 	paddd	%xmm13,%xmm9
   1096 	pxor	%xmm9,%xmm5
   1097 	movdqa	%xmm5,%xmm3
   1098 	pslld	$12,%xmm3
   1099 	psrld	$20,%xmm5
   1100 	pxor	%xmm3,%xmm5
   1101 	paddd	%xmm5,%xmm1
   1102 	pxor	%xmm1,%xmm13
   1103 	pshufb	.rol8(%rip),%xmm13
   1104 	paddd	%xmm13,%xmm9
   1105 	pxor	%xmm9,%xmm5
   1106 	movdqa	%xmm5,%xmm3
   1107 	pslld	$7,%xmm3
   1108 	psrld	$25,%xmm5
   1109 	pxor	%xmm3,%xmm5
   1110 .byte	102,15,58,15,237,4
   1111 .byte	102,69,15,58,15,201,8
   1112 .byte	102,69,15,58,15,237,12
   1113 	paddd	%xmm6,%xmm2
   1114 	pxor	%xmm2,%xmm14
   1115 	pshufb	.rol16(%rip),%xmm14
   1116 	paddd	%xmm14,%xmm10
   1117 	pxor	%xmm10,%xmm6
   1118 	movdqa	%xmm6,%xmm3
   1119 	pslld	$12,%xmm3
   1120 	psrld	$20,%xmm6
   1121 	pxor	%xmm3,%xmm6
   1122 	paddd	%xmm6,%xmm2
   1123 	pxor	%xmm2,%xmm14
   1124 	pshufb	.rol8(%rip),%xmm14
   1125 	paddd	%xmm14,%xmm10
   1126 	pxor	%xmm10,%xmm6
   1127 	movdqa	%xmm6,%xmm3
   1128 	pslld	$7,%xmm3
   1129 	psrld	$25,%xmm6
   1130 	pxor	%xmm3,%xmm6
   1131 .byte	102,15,58,15,246,4
   1132 .byte	102,69,15,58,15,210,8
   1133 .byte	102,69,15,58,15,246,12
   1134 	paddd	%xmm4,%xmm0
   1135 	pxor	%xmm0,%xmm12
   1136 	pshufb	.rol16(%rip),%xmm12
   1137 	paddd	%xmm12,%xmm8
   1138 	pxor	%xmm8,%xmm4
   1139 	movdqa	%xmm4,%xmm3
   1140 	pslld	$12,%xmm3
   1141 	psrld	$20,%xmm4
   1142 	pxor	%xmm3,%xmm4
   1143 	paddd	%xmm4,%xmm0
   1144 	pxor	%xmm0,%xmm12
   1145 	pshufb	.rol8(%rip),%xmm12
   1146 	paddd	%xmm12,%xmm8
   1147 	pxor	%xmm8,%xmm4
   1148 	movdqa	%xmm4,%xmm3
   1149 	pslld	$7,%xmm3
   1150 	psrld	$25,%xmm4
   1151 	pxor	%xmm3,%xmm4
   1152 .byte	102,15,58,15,228,12
   1153 .byte	102,69,15,58,15,192,8
   1154 .byte	102,69,15,58,15,228,4
   1155 	paddd	%xmm5,%xmm1
   1156 	pxor	%xmm1,%xmm13
   1157 	pshufb	.rol16(%rip),%xmm13
   1158 	paddd	%xmm13,%xmm9
   1159 	pxor	%xmm9,%xmm5
   1160 	movdqa	%xmm5,%xmm3
   1161 	pslld	$12,%xmm3
   1162 	psrld	$20,%xmm5
   1163 	pxor	%xmm3,%xmm5
   1164 	paddd	%xmm5,%xmm1
   1165 	pxor	%xmm1,%xmm13
   1166 	pshufb	.rol8(%rip),%xmm13
   1167 	paddd	%xmm13,%xmm9
   1168 	pxor	%xmm9,%xmm5
   1169 	movdqa	%xmm5,%xmm3
   1170 	pslld	$7,%xmm3
   1171 	psrld	$25,%xmm5
   1172 	pxor	%xmm3,%xmm5
   1173 .byte	102,15,58,15,237,12
   1174 .byte	102,69,15,58,15,201,8
   1175 .byte	102,69,15,58,15,237,4
   1176 	paddd	%xmm6,%xmm2
   1177 	pxor	%xmm2,%xmm14
   1178 	pshufb	.rol16(%rip),%xmm14
   1179 	paddd	%xmm14,%xmm10
   1180 	pxor	%xmm10,%xmm6
   1181 	movdqa	%xmm6,%xmm3
   1182 	pslld	$12,%xmm3
   1183 	psrld	$20,%xmm6
   1184 	pxor	%xmm3,%xmm6
   1185 	paddd	%xmm6,%xmm2
   1186 	pxor	%xmm2,%xmm14
   1187 	pshufb	.rol8(%rip),%xmm14
   1188 	paddd	%xmm14,%xmm10
   1189 	pxor	%xmm10,%xmm6
   1190 	movdqa	%xmm6,%xmm3
   1191 	pslld	$7,%xmm3
   1192 	psrld	$25,%xmm6
   1193 	pxor	%xmm3,%xmm6
   1194 .byte	102,15,58,15,246,12
   1195 .byte	102,69,15,58,15,210,8
   1196 .byte	102,69,15,58,15,246,4
   1197 
   1198 	cmpq	%rcx,%r8
   1199 	jb	1b
   1200 	cmpq	$160,%r8
   1201 	jne	2b
   1202 	cmpq	$176,%rbx
   1203 	jb	1f
   1204 	addq	160(%rsi),%r10
   1205 	adcq	8+160(%rsi),%r11
   1206 	adcq	$1,%r12
   1207 	movq	0+0(%rbp),%rax
   1208 	movq	%rax,%r15
   1209 	mulq	%r10
   1210 	movq	%rax,%r13
   1211 	movq	%rdx,%r14
   1212 	movq	0+0(%rbp),%rax
   1213 	mulq	%r11
   1214 	imulq	%r12,%r15
   1215 	addq	%rax,%r14
   1216 	adcq	%rdx,%r15
   1217 	movq	8+0(%rbp),%rax
   1218 	movq	%rax,%r9
   1219 	mulq	%r10
   1220 	addq	%rax,%r14
   1221 	adcq	$0,%rdx
   1222 	movq	%rdx,%r10
   1223 	movq	8+0(%rbp),%rax
   1224 	mulq	%r11
   1225 	addq	%rax,%r15
   1226 	adcq	$0,%rdx
   1227 	imulq	%r12,%r9
   1228 	addq	%r10,%r15
   1229 	adcq	%rdx,%r9
   1230 	movq	%r13,%r10
   1231 	movq	%r14,%r11
   1232 	movq	%r15,%r12
   1233 	andq	$3,%r12
   1234 	movq	%r15,%r13
   1235 	andq	$-4,%r13
   1236 	movq	%r9,%r14
   1237 	shrdq	$2,%r9,%r15
   1238 	shrq	$2,%r9
   1239 	addq	%r13,%r10
   1240 	adcq	%r14,%r11
   1241 	adcq	$0,%r12
   1242 	addq	%r15,%r10
   1243 	adcq	%r9,%r11
   1244 	adcq	$0,%r12
   1245 
   1246 	cmpq	$192,%rbx
   1247 	jb	1f
   1248 	addq	176(%rsi),%r10
   1249 	adcq	8+176(%rsi),%r11
   1250 	adcq	$1,%r12
   1251 	movq	0+0(%rbp),%rax
   1252 	movq	%rax,%r15
   1253 	mulq	%r10
   1254 	movq	%rax,%r13
   1255 	movq	%rdx,%r14
   1256 	movq	0+0(%rbp),%rax
   1257 	mulq	%r11
   1258 	imulq	%r12,%r15
   1259 	addq	%rax,%r14
   1260 	adcq	%rdx,%r15
   1261 	movq	8+0(%rbp),%rax
   1262 	movq	%rax,%r9
   1263 	mulq	%r10
   1264 	addq	%rax,%r14
   1265 	adcq	$0,%rdx
   1266 	movq	%rdx,%r10
   1267 	movq	8+0(%rbp),%rax
   1268 	mulq	%r11
   1269 	addq	%rax,%r15
   1270 	adcq	$0,%rdx
   1271 	imulq	%r12,%r9
   1272 	addq	%r10,%r15
   1273 	adcq	%rdx,%r9
   1274 	movq	%r13,%r10
   1275 	movq	%r14,%r11
   1276 	movq	%r15,%r12
   1277 	andq	$3,%r12
   1278 	movq	%r15,%r13
   1279 	andq	$-4,%r13
   1280 	movq	%r9,%r14
   1281 	shrdq	$2,%r9,%r15
   1282 	shrq	$2,%r9
   1283 	addq	%r13,%r10
   1284 	adcq	%r14,%r11
   1285 	adcq	$0,%r12
   1286 	addq	%r15,%r10
   1287 	adcq	%r9,%r11
   1288 	adcq	$0,%r12
   1289 
   1290 1:
   1291 	paddd	.chacha20_consts(%rip),%xmm2
   1292 	paddd	48(%rbp),%xmm6
   1293 	paddd	64(%rbp),%xmm10
   1294 	paddd	128(%rbp),%xmm14
   1295 	paddd	.chacha20_consts(%rip),%xmm1
   1296 	paddd	48(%rbp),%xmm5
   1297 	paddd	64(%rbp),%xmm9
   1298 	paddd	112(%rbp),%xmm13
   1299 	paddd	.chacha20_consts(%rip),%xmm0
   1300 	paddd	48(%rbp),%xmm4
   1301 	paddd	64(%rbp),%xmm8
   1302 	paddd	96(%rbp),%xmm12
   1303 	movdqu	0 + 0(%rsi),%xmm3
   1304 	movdqu	16 + 0(%rsi),%xmm7
   1305 	movdqu	32 + 0(%rsi),%xmm11
   1306 	movdqu	48 + 0(%rsi),%xmm15
   1307 	pxor	%xmm3,%xmm2
   1308 	pxor	%xmm7,%xmm6
   1309 	pxor	%xmm11,%xmm10
   1310 	pxor	%xmm14,%xmm15
   1311 	movdqu	%xmm2,0 + 0(%rdi)
   1312 	movdqu	%xmm6,16 + 0(%rdi)
   1313 	movdqu	%xmm10,32 + 0(%rdi)
   1314 	movdqu	%xmm15,48 + 0(%rdi)
   1315 	movdqu	0 + 64(%rsi),%xmm3
   1316 	movdqu	16 + 64(%rsi),%xmm7
   1317 	movdqu	32 + 64(%rsi),%xmm11
   1318 	movdqu	48 + 64(%rsi),%xmm15
   1319 	pxor	%xmm3,%xmm1
   1320 	pxor	%xmm7,%xmm5
   1321 	pxor	%xmm11,%xmm9
   1322 	pxor	%xmm13,%xmm15
   1323 	movdqu	%xmm1,0 + 64(%rdi)
   1324 	movdqu	%xmm5,16 + 64(%rdi)
   1325 	movdqu	%xmm9,32 + 64(%rdi)
   1326 	movdqu	%xmm15,48 + 64(%rdi)
   1327 
   1328 	subq	$128,%rbx
   1329 	leaq	128(%rsi),%rsi
   1330 	leaq	128(%rdi),%rdi
   1331 	jmp	open_sse_tail_64_dec_loop
   1332 3:
   1333 
   1334 	movdqa	.chacha20_consts(%rip),%xmm0
   1335 	movdqa	48(%rbp),%xmm4
   1336 	movdqa	64(%rbp),%xmm8
   1337 	movdqa	%xmm0,%xmm1
   1338 	movdqa	%xmm4,%xmm5
   1339 	movdqa	%xmm8,%xmm9
   1340 	movdqa	%xmm0,%xmm2
   1341 	movdqa	%xmm4,%xmm6
   1342 	movdqa	%xmm8,%xmm10
   1343 	movdqa	%xmm0,%xmm3
   1344 	movdqa	%xmm4,%xmm7
   1345 	movdqa	%xmm8,%xmm11
   1346 	movdqa	96(%rbp),%xmm15
   1347 	paddd	.sse_inc(%rip),%xmm15
   1348 	movdqa	%xmm15,%xmm14
   1349 	paddd	.sse_inc(%rip),%xmm14
   1350 	movdqa	%xmm14,%xmm13
   1351 	paddd	.sse_inc(%rip),%xmm13
   1352 	movdqa	%xmm13,%xmm12
   1353 	paddd	.sse_inc(%rip),%xmm12
   1354 	movdqa	%xmm12,96(%rbp)
   1355 	movdqa	%xmm13,112(%rbp)
   1356 	movdqa	%xmm14,128(%rbp)
   1357 	movdqa	%xmm15,144(%rbp)
   1358 
   1359 	xorq	%r8,%r8
   1360 1:
   1361 	addq	0(%rsi,%r8), %r10
   1362 	adcq	8+0(%rsi,%r8), %r11
   1363 	adcq	$1,%r12
   1364 	movdqa	%xmm11,80(%rbp)
   1365 	paddd	%xmm4,%xmm0
   1366 	pxor	%xmm0,%xmm12
   1367 	pshufb	.rol16(%rip),%xmm12
   1368 	paddd	%xmm12,%xmm8
   1369 	pxor	%xmm8,%xmm4
   1370 	movdqa	%xmm4,%xmm11
   1371 	pslld	$12,%xmm11
   1372 	psrld	$20,%xmm4
   1373 	pxor	%xmm11,%xmm4
   1374 	paddd	%xmm4,%xmm0
   1375 	pxor	%xmm0,%xmm12
   1376 	pshufb	.rol8(%rip),%xmm12
   1377 	paddd	%xmm12,%xmm8
   1378 	pxor	%xmm8,%xmm4
   1379 	movdqa	%xmm4,%xmm11
   1380 	pslld	$7,%xmm11
   1381 	psrld	$25,%xmm4
   1382 	pxor	%xmm11,%xmm4
   1383 .byte	102,15,58,15,228,4
   1384 .byte	102,69,15,58,15,192,8
   1385 .byte	102,69,15,58,15,228,12
   1386 	paddd	%xmm5,%xmm1
   1387 	pxor	%xmm1,%xmm13
   1388 	pshufb	.rol16(%rip),%xmm13
   1389 	paddd	%xmm13,%xmm9
   1390 	pxor	%xmm9,%xmm5
   1391 	movdqa	%xmm5,%xmm11
   1392 	pslld	$12,%xmm11
   1393 	psrld	$20,%xmm5
   1394 	pxor	%xmm11,%xmm5
   1395 	paddd	%xmm5,%xmm1
   1396 	pxor	%xmm1,%xmm13
   1397 	pshufb	.rol8(%rip),%xmm13
   1398 	paddd	%xmm13,%xmm9
   1399 	pxor	%xmm9,%xmm5
   1400 	movdqa	%xmm5,%xmm11
   1401 	pslld	$7,%xmm11
   1402 	psrld	$25,%xmm5
   1403 	pxor	%xmm11,%xmm5
   1404 .byte	102,15,58,15,237,4
   1405 .byte	102,69,15,58,15,201,8
   1406 .byte	102,69,15,58,15,237,12
   1407 	paddd	%xmm6,%xmm2
   1408 	pxor	%xmm2,%xmm14
   1409 	pshufb	.rol16(%rip),%xmm14
   1410 	paddd	%xmm14,%xmm10
   1411 	pxor	%xmm10,%xmm6
   1412 	movdqa	%xmm6,%xmm11
   1413 	pslld	$12,%xmm11
   1414 	psrld	$20,%xmm6
   1415 	pxor	%xmm11,%xmm6
   1416 	paddd	%xmm6,%xmm2
   1417 	pxor	%xmm2,%xmm14
   1418 	pshufb	.rol8(%rip),%xmm14
   1419 	paddd	%xmm14,%xmm10
   1420 	pxor	%xmm10,%xmm6
   1421 	movdqa	%xmm6,%xmm11
   1422 	pslld	$7,%xmm11
   1423 	psrld	$25,%xmm6
   1424 	pxor	%xmm11,%xmm6
   1425 .byte	102,15,58,15,246,4
   1426 .byte	102,69,15,58,15,210,8
   1427 .byte	102,69,15,58,15,246,12
   1428 	movdqa	80(%rbp),%xmm11
   1429 	movq	0+0(%rbp),%rax
   1430 	movq	%rax,%r15
   1431 	mulq	%r10
   1432 	movq	%rax,%r13
   1433 	movq	%rdx,%r14
   1434 	movq	0+0(%rbp),%rax
   1435 	mulq	%r11
   1436 	imulq	%r12,%r15
   1437 	addq	%rax,%r14
   1438 	adcq	%rdx,%r15
   1439 	movdqa	%xmm9,80(%rbp)
   1440 	paddd	%xmm7,%xmm3
   1441 	pxor	%xmm3,%xmm15
   1442 	pshufb	.rol16(%rip),%xmm15
   1443 	paddd	%xmm15,%xmm11
   1444 	pxor	%xmm11,%xmm7
   1445 	movdqa	%xmm7,%xmm9
   1446 	pslld	$12,%xmm9
   1447 	psrld	$20,%xmm7
   1448 	pxor	%xmm9,%xmm7
   1449 	paddd	%xmm7,%xmm3
   1450 	pxor	%xmm3,%xmm15
   1451 	pshufb	.rol8(%rip),%xmm15
   1452 	paddd	%xmm15,%xmm11
   1453 	pxor	%xmm11,%xmm7
   1454 	movdqa	%xmm7,%xmm9
   1455 	pslld	$7,%xmm9
   1456 	psrld	$25,%xmm7
   1457 	pxor	%xmm9,%xmm7
   1458 .byte	102,15,58,15,255,4
   1459 .byte	102,69,15,58,15,219,8
   1460 .byte	102,69,15,58,15,255,12
   1461 	movdqa	80(%rbp),%xmm9
   1462 	movq	8+0(%rbp),%rax
   1463 	movq	%rax,%r9
   1464 	mulq	%r10
   1465 	addq	%rax,%r14
   1466 	adcq	$0,%rdx
   1467 	movq	%rdx,%r10
   1468 	movq	8+0(%rbp),%rax
   1469 	mulq	%r11
   1470 	addq	%rax,%r15
   1471 	adcq	$0,%rdx
   1472 	movdqa	%xmm11,80(%rbp)
   1473 	paddd	%xmm4,%xmm0
   1474 	pxor	%xmm0,%xmm12
   1475 	pshufb	.rol16(%rip),%xmm12
   1476 	paddd	%xmm12,%xmm8
   1477 	pxor	%xmm8,%xmm4
   1478 	movdqa	%xmm4,%xmm11
   1479 	pslld	$12,%xmm11
   1480 	psrld	$20,%xmm4
   1481 	pxor	%xmm11,%xmm4
   1482 	paddd	%xmm4,%xmm0
   1483 	pxor	%xmm0,%xmm12
   1484 	pshufb	.rol8(%rip),%xmm12
   1485 	paddd	%xmm12,%xmm8
   1486 	pxor	%xmm8,%xmm4
   1487 	movdqa	%xmm4,%xmm11
   1488 	pslld	$7,%xmm11
   1489 	psrld	$25,%xmm4
   1490 	pxor	%xmm11,%xmm4
   1491 .byte	102,15,58,15,228,12
   1492 .byte	102,69,15,58,15,192,8
   1493 .byte	102,69,15,58,15,228,4
   1494 	paddd	%xmm5,%xmm1
   1495 	pxor	%xmm1,%xmm13
   1496 	pshufb	.rol16(%rip),%xmm13
   1497 	paddd	%xmm13,%xmm9
   1498 	pxor	%xmm9,%xmm5
   1499 	movdqa	%xmm5,%xmm11
   1500 	pslld	$12,%xmm11
   1501 	psrld	$20,%xmm5
   1502 	pxor	%xmm11,%xmm5
   1503 	paddd	%xmm5,%xmm1
   1504 	pxor	%xmm1,%xmm13
   1505 	pshufb	.rol8(%rip),%xmm13
   1506 	paddd	%xmm13,%xmm9
   1507 	pxor	%xmm9,%xmm5
   1508 	movdqa	%xmm5,%xmm11
   1509 	pslld	$7,%xmm11
   1510 	psrld	$25,%xmm5
   1511 	pxor	%xmm11,%xmm5
   1512 .byte	102,15,58,15,237,12
   1513 .byte	102,69,15,58,15,201,8
   1514 .byte	102,69,15,58,15,237,4
   1515 	imulq	%r12,%r9
   1516 	addq	%r10,%r15
   1517 	adcq	%rdx,%r9
   1518 	paddd	%xmm6,%xmm2
   1519 	pxor	%xmm2,%xmm14
   1520 	pshufb	.rol16(%rip),%xmm14
   1521 	paddd	%xmm14,%xmm10
   1522 	pxor	%xmm10,%xmm6
   1523 	movdqa	%xmm6,%xmm11
   1524 	pslld	$12,%xmm11
   1525 	psrld	$20,%xmm6
   1526 	pxor	%xmm11,%xmm6
   1527 	paddd	%xmm6,%xmm2
   1528 	pxor	%xmm2,%xmm14
   1529 	pshufb	.rol8(%rip),%xmm14
   1530 	paddd	%xmm14,%xmm10
   1531 	pxor	%xmm10,%xmm6
   1532 	movdqa	%xmm6,%xmm11
   1533 	pslld	$7,%xmm11
   1534 	psrld	$25,%xmm6
   1535 	pxor	%xmm11,%xmm6
   1536 .byte	102,15,58,15,246,12
   1537 .byte	102,69,15,58,15,210,8
   1538 .byte	102,69,15,58,15,246,4
   1539 	movdqa	80(%rbp),%xmm11
   1540 	movq	%r13,%r10
   1541 	movq	%r14,%r11
   1542 	movq	%r15,%r12
   1543 	andq	$3,%r12
   1544 	movq	%r15,%r13
   1545 	andq	$-4,%r13
   1546 	movq	%r9,%r14
   1547 	shrdq	$2,%r9,%r15
   1548 	shrq	$2,%r9
   1549 	addq	%r13,%r10
   1550 	adcq	%r14,%r11
   1551 	adcq	$0,%r12
   1552 	addq	%r15,%r10
   1553 	adcq	%r9,%r11
   1554 	adcq	$0,%r12
   1555 	movdqa	%xmm9,80(%rbp)
   1556 	paddd	%xmm7,%xmm3
   1557 	pxor	%xmm3,%xmm15
   1558 	pshufb	.rol16(%rip),%xmm15
   1559 	paddd	%xmm15,%xmm11
   1560 	pxor	%xmm11,%xmm7
   1561 	movdqa	%xmm7,%xmm9
   1562 	pslld	$12,%xmm9
   1563 	psrld	$20,%xmm7
   1564 	pxor	%xmm9,%xmm7
   1565 	paddd	%xmm7,%xmm3
   1566 	pxor	%xmm3,%xmm15
   1567 	pshufb	.rol8(%rip),%xmm15
   1568 	paddd	%xmm15,%xmm11
   1569 	pxor	%xmm11,%xmm7
   1570 	movdqa	%xmm7,%xmm9
   1571 	pslld	$7,%xmm9
   1572 	psrld	$25,%xmm7
   1573 	pxor	%xmm9,%xmm7
   1574 .byte	102,15,58,15,255,12
   1575 .byte	102,69,15,58,15,219,8
   1576 .byte	102,69,15,58,15,255,4
   1577 	movdqa	80(%rbp),%xmm9
   1578 
   1579 	addq	$16,%r8
   1580 	cmpq	$160,%r8
   1581 	jb	1b
   1582 	movq	%rbx,%rcx
   1583 	andq	$-16,%rcx
   1584 1:
   1585 	addq	0(%rsi,%r8), %r10
   1586 	adcq	8+0(%rsi,%r8), %r11
   1587 	adcq	$1,%r12
   1588 	movq	0+0(%rbp),%rax
   1589 	movq	%rax,%r15
   1590 	mulq	%r10
   1591 	movq	%rax,%r13
   1592 	movq	%rdx,%r14
   1593 	movq	0+0(%rbp),%rax
   1594 	mulq	%r11
   1595 	imulq	%r12,%r15
   1596 	addq	%rax,%r14
   1597 	adcq	%rdx,%r15
   1598 	movq	8+0(%rbp),%rax
   1599 	movq	%rax,%r9
   1600 	mulq	%r10
   1601 	addq	%rax,%r14
   1602 	adcq	$0,%rdx
   1603 	movq	%rdx,%r10
   1604 	movq	8+0(%rbp),%rax
   1605 	mulq	%r11
   1606 	addq	%rax,%r15
   1607 	adcq	$0,%rdx
   1608 	imulq	%r12,%r9
   1609 	addq	%r10,%r15
   1610 	adcq	%rdx,%r9
   1611 	movq	%r13,%r10
   1612 	movq	%r14,%r11
   1613 	movq	%r15,%r12
   1614 	andq	$3,%r12
   1615 	movq	%r15,%r13
   1616 	andq	$-4,%r13
   1617 	movq	%r9,%r14
   1618 	shrdq	$2,%r9,%r15
   1619 	shrq	$2,%r9
   1620 	addq	%r13,%r10
   1621 	adcq	%r14,%r11
   1622 	adcq	$0,%r12
   1623 	addq	%r15,%r10
   1624 	adcq	%r9,%r11
   1625 	adcq	$0,%r12
   1626 
   1627 	addq	$16,%r8
   1628 	cmpq	%rcx,%r8
   1629 	jb	1b
   1630 	paddd	.chacha20_consts(%rip),%xmm3
   1631 	paddd	48(%rbp),%xmm7
   1632 	paddd	64(%rbp),%xmm11
   1633 	paddd	144(%rbp),%xmm15
   1634 	paddd	.chacha20_consts(%rip),%xmm2
   1635 	paddd	48(%rbp),%xmm6
   1636 	paddd	64(%rbp),%xmm10
   1637 	paddd	128(%rbp),%xmm14
   1638 	paddd	.chacha20_consts(%rip),%xmm1
   1639 	paddd	48(%rbp),%xmm5
   1640 	paddd	64(%rbp),%xmm9
   1641 	paddd	112(%rbp),%xmm13
   1642 	paddd	.chacha20_consts(%rip),%xmm0
   1643 	paddd	48(%rbp),%xmm4
   1644 	paddd	64(%rbp),%xmm8
   1645 	paddd	96(%rbp),%xmm12
   1646 	movdqa	%xmm12,80(%rbp)
   1647 	movdqu	0 + 0(%rsi),%xmm12
   1648 	pxor	%xmm3,%xmm12
   1649 	movdqu	%xmm12,0 + 0(%rdi)
   1650 	movdqu	16 + 0(%rsi),%xmm12
   1651 	pxor	%xmm7,%xmm12
   1652 	movdqu	%xmm12,16 + 0(%rdi)
   1653 	movdqu	32 + 0(%rsi),%xmm12
   1654 	pxor	%xmm11,%xmm12
   1655 	movdqu	%xmm12,32 + 0(%rdi)
   1656 	movdqu	48 + 0(%rsi),%xmm12
   1657 	pxor	%xmm15,%xmm12
   1658 	movdqu	%xmm12,48 + 0(%rdi)
   1659 	movdqu	0 + 64(%rsi),%xmm3
   1660 	movdqu	16 + 64(%rsi),%xmm7
   1661 	movdqu	32 + 64(%rsi),%xmm11
   1662 	movdqu	48 + 64(%rsi),%xmm15
   1663 	pxor	%xmm3,%xmm2
   1664 	pxor	%xmm7,%xmm6
   1665 	pxor	%xmm11,%xmm10
   1666 	pxor	%xmm14,%xmm15
   1667 	movdqu	%xmm2,0 + 64(%rdi)
   1668 	movdqu	%xmm6,16 + 64(%rdi)
   1669 	movdqu	%xmm10,32 + 64(%rdi)
   1670 	movdqu	%xmm15,48 + 64(%rdi)
   1671 	movdqu	0 + 128(%rsi),%xmm3
   1672 	movdqu	16 + 128(%rsi),%xmm7
   1673 	movdqu	32 + 128(%rsi),%xmm11
   1674 	movdqu	48 + 128(%rsi),%xmm15
   1675 	pxor	%xmm3,%xmm1
   1676 	pxor	%xmm7,%xmm5
   1677 	pxor	%xmm11,%xmm9
   1678 	pxor	%xmm13,%xmm15
   1679 	movdqu	%xmm1,0 + 128(%rdi)
   1680 	movdqu	%xmm5,16 + 128(%rdi)
   1681 	movdqu	%xmm9,32 + 128(%rdi)
   1682 	movdqu	%xmm15,48 + 128(%rdi)
   1683 
   1684 	movdqa	80(%rbp),%xmm12
   1685 	subq	$192,%rbx
   1686 	leaq	192(%rsi),%rsi
   1687 	leaq	192(%rdi),%rdi
   1688 
   1689 
   1690 open_sse_tail_64_dec_loop:
   1691 	cmpq	$16,%rbx
   1692 	jb	1f
   1693 	subq	$16,%rbx
   1694 	movdqu	(%rsi),%xmm3
   1695 	pxor	%xmm3,%xmm0
   1696 	movdqu	%xmm0,(%rdi)
   1697 	leaq	16(%rsi),%rsi
   1698 	leaq	16(%rdi),%rdi
   1699 	movdqa	%xmm4,%xmm0
   1700 	movdqa	%xmm8,%xmm4
   1701 	movdqa	%xmm12,%xmm8
   1702 	jmp	open_sse_tail_64_dec_loop
   1703 1:
   1704 	movdqa	%xmm0,%xmm1
   1705 
   1706 
   1707 open_sse_tail_16:
   1708 	testq	%rbx,%rbx
   1709 	jz	open_sse_finalize
   1710 
   1711 
   1712 
   1713 	pxor	%xmm3,%xmm3
   1714 	leaq	-1(%rsi,%rbx), %rsi
   1715 	movq	%rbx,%r8
   1716 2:
   1717 	pslldq	$1,%xmm3
   1718 	pinsrb	$0,(%rsi),%xmm3
   1719 	subq	$1,%rsi
   1720 	subq	$1,%r8
   1721 	jnz	2b
   1722 
   1723 3:
   1724 .byte	102,73,15,126,221
   1725 	pextrq	$1,%xmm3,%r14
   1726 
   1727 	pxor	%xmm1,%xmm3
   1728 
   1729 
   1730 2:
   1731 	pextrb	$0,%xmm3,(%rdi)
   1732 	psrldq	$1,%xmm3
   1733 	addq	$1,%rdi
   1734 	subq	$1,%rbx
   1735 	jne	2b
   1736 
   1737 	addq	%r13,%r10
   1738 	adcq	%r14,%r11
   1739 	adcq	$1,%r12
   1740 	movq	0+0(%rbp),%rax
   1741 	movq	%rax,%r15
   1742 	mulq	%r10
   1743 	movq	%rax,%r13
   1744 	movq	%rdx,%r14
   1745 	movq	0+0(%rbp),%rax
   1746 	mulq	%r11
   1747 	imulq	%r12,%r15
   1748 	addq	%rax,%r14
   1749 	adcq	%rdx,%r15
   1750 	movq	8+0(%rbp),%rax
   1751 	movq	%rax,%r9
   1752 	mulq	%r10
   1753 	addq	%rax,%r14
   1754 	adcq	$0,%rdx
   1755 	movq	%rdx,%r10
   1756 	movq	8+0(%rbp),%rax
   1757 	mulq	%r11
   1758 	addq	%rax,%r15
   1759 	adcq	$0,%rdx
   1760 	imulq	%r12,%r9
   1761 	addq	%r10,%r15
   1762 	adcq	%rdx,%r9
   1763 	movq	%r13,%r10
   1764 	movq	%r14,%r11
   1765 	movq	%r15,%r12
   1766 	andq	$3,%r12
   1767 	movq	%r15,%r13
   1768 	andq	$-4,%r13
   1769 	movq	%r9,%r14
   1770 	shrdq	$2,%r9,%r15
   1771 	shrq	$2,%r9
   1772 	addq	%r13,%r10
   1773 	adcq	%r14,%r11
   1774 	adcq	$0,%r12
   1775 	addq	%r15,%r10
   1776 	adcq	%r9,%r11
   1777 	adcq	$0,%r12
   1778 
   1779 
   1780 open_sse_finalize:
   1781 	addq	32(%rbp),%r10
   1782 	adcq	8+32(%rbp),%r11
   1783 	adcq	$1,%r12
   1784 	movq	0+0(%rbp),%rax
   1785 	movq	%rax,%r15
   1786 	mulq	%r10
   1787 	movq	%rax,%r13
   1788 	movq	%rdx,%r14
   1789 	movq	0+0(%rbp),%rax
   1790 	mulq	%r11
   1791 	imulq	%r12,%r15
   1792 	addq	%rax,%r14
   1793 	adcq	%rdx,%r15
   1794 	movq	8+0(%rbp),%rax
   1795 	movq	%rax,%r9
   1796 	mulq	%r10
   1797 	addq	%rax,%r14
   1798 	adcq	$0,%rdx
   1799 	movq	%rdx,%r10
   1800 	movq	8+0(%rbp),%rax
   1801 	mulq	%r11
   1802 	addq	%rax,%r15
   1803 	adcq	$0,%rdx
   1804 	imulq	%r12,%r9
   1805 	addq	%r10,%r15
   1806 	adcq	%rdx,%r9
   1807 	movq	%r13,%r10
   1808 	movq	%r14,%r11
   1809 	movq	%r15,%r12
   1810 	andq	$3,%r12
   1811 	movq	%r15,%r13
   1812 	andq	$-4,%r13
   1813 	movq	%r9,%r14
   1814 	shrdq	$2,%r9,%r15
   1815 	shrq	$2,%r9
   1816 	addq	%r13,%r10
   1817 	adcq	%r14,%r11
   1818 	adcq	$0,%r12
   1819 	addq	%r15,%r10
   1820 	adcq	%r9,%r11
   1821 	adcq	$0,%r12
   1822 
   1823 
   1824 	movq	%r10,%r13
   1825 	movq	%r11,%r14
   1826 	movq	%r12,%r15
   1827 	subq	$-5,%r10
   1828 	sbbq	$-1,%r11
   1829 	sbbq	$3,%r12
   1830 	cmovcq	%r13,%r10
   1831 	cmovcq	%r14,%r11
   1832 	cmovcq	%r15,%r12
   1833 
   1834 	addq	0+16(%rbp),%r10
   1835 	adcq	8+16(%rbp),%r11
   1836 
   1837 	addq	$288 + 32,%rsp
   1838 
   1839 	popq	%r9
   1840 
   1841 	movq	%r10,(%r9)
   1842 	movq	%r11,8(%r9)
   1843 
   1844 	popq	%r15
   1845 
   1846 	popq	%r14
   1847 
   1848 	popq	%r13
   1849 
   1850 	popq	%r12
   1851 
   1852 	popq	%rbx
   1853 
   1854 	popq	%rbp
   1855 
   1856 	.byte	0xf3,0xc3
   1857 
   1858 
   1859 open_sse_128:
   1860 	movdqu	.chacha20_consts(%rip),%xmm0
   1861 	movdqa	%xmm0,%xmm1
   1862 	movdqa	%xmm0,%xmm2
   1863 	movdqu	0(%r9),%xmm4
   1864 	movdqa	%xmm4,%xmm5
   1865 	movdqa	%xmm4,%xmm6
   1866 	movdqu	16(%r9),%xmm8
   1867 	movdqa	%xmm8,%xmm9
   1868 	movdqa	%xmm8,%xmm10
   1869 	movdqu	32(%r9),%xmm12
   1870 	movdqa	%xmm12,%xmm13
   1871 	paddd	.sse_inc(%rip),%xmm13
   1872 	movdqa	%xmm13,%xmm14
   1873 	paddd	.sse_inc(%rip),%xmm14
   1874 	movdqa	%xmm4,%xmm7
   1875 	movdqa	%xmm8,%xmm11
   1876 	movdqa	%xmm13,%xmm15
   1877 	movq	$10,%r10
   1878 1:
   1879 	paddd	%xmm4,%xmm0
   1880 	pxor	%xmm0,%xmm12
   1881 	pshufb	.rol16(%rip),%xmm12
   1882 	paddd	%xmm12,%xmm8
   1883 	pxor	%xmm8,%xmm4
   1884 	movdqa	%xmm4,%xmm3
   1885 	pslld	$12,%xmm3
   1886 	psrld	$20,%xmm4
   1887 	pxor	%xmm3,%xmm4
   1888 	paddd	%xmm4,%xmm0
   1889 	pxor	%xmm0,%xmm12
   1890 	pshufb	.rol8(%rip),%xmm12
   1891 	paddd	%xmm12,%xmm8
   1892 	pxor	%xmm8,%xmm4
   1893 	movdqa	%xmm4,%xmm3
   1894 	pslld	$7,%xmm3
   1895 	psrld	$25,%xmm4
   1896 	pxor	%xmm3,%xmm4
   1897 .byte	102,15,58,15,228,4
   1898 .byte	102,69,15,58,15,192,8
   1899 .byte	102,69,15,58,15,228,12
   1900 	paddd	%xmm5,%xmm1
   1901 	pxor	%xmm1,%xmm13
   1902 	pshufb	.rol16(%rip),%xmm13
   1903 	paddd	%xmm13,%xmm9
   1904 	pxor	%xmm9,%xmm5
   1905 	movdqa	%xmm5,%xmm3
   1906 	pslld	$12,%xmm3
   1907 	psrld	$20,%xmm5
   1908 	pxor	%xmm3,%xmm5
   1909 	paddd	%xmm5,%xmm1
   1910 	pxor	%xmm1,%xmm13
   1911 	pshufb	.rol8(%rip),%xmm13
   1912 	paddd	%xmm13,%xmm9
   1913 	pxor	%xmm9,%xmm5
   1914 	movdqa	%xmm5,%xmm3
   1915 	pslld	$7,%xmm3
   1916 	psrld	$25,%xmm5
   1917 	pxor	%xmm3,%xmm5
   1918 .byte	102,15,58,15,237,4
   1919 .byte	102,69,15,58,15,201,8
   1920 .byte	102,69,15,58,15,237,12
   1921 	paddd	%xmm6,%xmm2
   1922 	pxor	%xmm2,%xmm14
   1923 	pshufb	.rol16(%rip),%xmm14
   1924 	paddd	%xmm14,%xmm10
   1925 	pxor	%xmm10,%xmm6
   1926 	movdqa	%xmm6,%xmm3
   1927 	pslld	$12,%xmm3
   1928 	psrld	$20,%xmm6
   1929 	pxor	%xmm3,%xmm6
   1930 	paddd	%xmm6,%xmm2
   1931 	pxor	%xmm2,%xmm14
   1932 	pshufb	.rol8(%rip),%xmm14
   1933 	paddd	%xmm14,%xmm10
   1934 	pxor	%xmm10,%xmm6
   1935 	movdqa	%xmm6,%xmm3
   1936 	pslld	$7,%xmm3
   1937 	psrld	$25,%xmm6
   1938 	pxor	%xmm3,%xmm6
   1939 .byte	102,15,58,15,246,4
   1940 .byte	102,69,15,58,15,210,8
   1941 .byte	102,69,15,58,15,246,12
   1942 	paddd	%xmm4,%xmm0
   1943 	pxor	%xmm0,%xmm12
   1944 	pshufb	.rol16(%rip),%xmm12
   1945 	paddd	%xmm12,%xmm8
   1946 	pxor	%xmm8,%xmm4
   1947 	movdqa	%xmm4,%xmm3
   1948 	pslld	$12,%xmm3
   1949 	psrld	$20,%xmm4
   1950 	pxor	%xmm3,%xmm4
   1951 	paddd	%xmm4,%xmm0
   1952 	pxor	%xmm0,%xmm12
   1953 	pshufb	.rol8(%rip),%xmm12
   1954 	paddd	%xmm12,%xmm8
   1955 	pxor	%xmm8,%xmm4
   1956 	movdqa	%xmm4,%xmm3
   1957 	pslld	$7,%xmm3
   1958 	psrld	$25,%xmm4
   1959 	pxor	%xmm3,%xmm4
   1960 .byte	102,15,58,15,228,12
   1961 .byte	102,69,15,58,15,192,8
   1962 .byte	102,69,15,58,15,228,4
   1963 	paddd	%xmm5,%xmm1
   1964 	pxor	%xmm1,%xmm13
   1965 	pshufb	.rol16(%rip),%xmm13
   1966 	paddd	%xmm13,%xmm9
   1967 	pxor	%xmm9,%xmm5
   1968 	movdqa	%xmm5,%xmm3
   1969 	pslld	$12,%xmm3
   1970 	psrld	$20,%xmm5
   1971 	pxor	%xmm3,%xmm5
   1972 	paddd	%xmm5,%xmm1
   1973 	pxor	%xmm1,%xmm13
   1974 	pshufb	.rol8(%rip),%xmm13
   1975 	paddd	%xmm13,%xmm9
   1976 	pxor	%xmm9,%xmm5
   1977 	movdqa	%xmm5,%xmm3
   1978 	pslld	$7,%xmm3
   1979 	psrld	$25,%xmm5
   1980 	pxor	%xmm3,%xmm5
   1981 .byte	102,15,58,15,237,12
   1982 .byte	102,69,15,58,15,201,8
   1983 .byte	102,69,15,58,15,237,4
   1984 	paddd	%xmm6,%xmm2
   1985 	pxor	%xmm2,%xmm14
   1986 	pshufb	.rol16(%rip),%xmm14
   1987 	paddd	%xmm14,%xmm10
   1988 	pxor	%xmm10,%xmm6
   1989 	movdqa	%xmm6,%xmm3
   1990 	pslld	$12,%xmm3
   1991 	psrld	$20,%xmm6
   1992 	pxor	%xmm3,%xmm6
   1993 	paddd	%xmm6,%xmm2
   1994 	pxor	%xmm2,%xmm14
   1995 	pshufb	.rol8(%rip),%xmm14
   1996 	paddd	%xmm14,%xmm10
   1997 	pxor	%xmm10,%xmm6
   1998 	movdqa	%xmm6,%xmm3
   1999 	pslld	$7,%xmm3
   2000 	psrld	$25,%xmm6
   2001 	pxor	%xmm3,%xmm6
   2002 .byte	102,15,58,15,246,12
   2003 .byte	102,69,15,58,15,210,8
   2004 .byte	102,69,15,58,15,246,4
   2005 
   2006 	decq	%r10
   2007 	jnz	1b
   2008 	paddd	.chacha20_consts(%rip),%xmm0
   2009 	paddd	.chacha20_consts(%rip),%xmm1
   2010 	paddd	.chacha20_consts(%rip),%xmm2
   2011 	paddd	%xmm7,%xmm4
   2012 	paddd	%xmm7,%xmm5
   2013 	paddd	%xmm7,%xmm6
   2014 	paddd	%xmm11,%xmm9
   2015 	paddd	%xmm11,%xmm10
   2016 	paddd	%xmm15,%xmm13
   2017 	paddd	.sse_inc(%rip),%xmm15
   2018 	paddd	%xmm15,%xmm14
   2019 
   2020 	pand	.clamp(%rip),%xmm0
   2021 	movdqa	%xmm0,0(%rbp)
   2022 	movdqa	%xmm4,16(%rbp)
   2023 
   2024 	movq	%r8,%r8
   2025 	call	poly_hash_ad_internal
   2026 1:
   2027 	cmpq	$16,%rbx
   2028 	jb	open_sse_tail_16
   2029 	subq	$16,%rbx
   2030 	addq	0(%rsi),%r10
   2031 	adcq	8+0(%rsi),%r11
   2032 	adcq	$1,%r12
   2033 
   2034 
   2035 	movdqu	0(%rsi),%xmm3
   2036 	pxor	%xmm3,%xmm1
   2037 	movdqu	%xmm1,0(%rdi)
   2038 	leaq	16(%rsi),%rsi
   2039 	leaq	16(%rdi),%rdi
   2040 	movq	0+0(%rbp),%rax
   2041 	movq	%rax,%r15
   2042 	mulq	%r10
   2043 	movq	%rax,%r13
   2044 	movq	%rdx,%r14
   2045 	movq	0+0(%rbp),%rax
   2046 	mulq	%r11
   2047 	imulq	%r12,%r15
   2048 	addq	%rax,%r14
   2049 	adcq	%rdx,%r15
   2050 	movq	8+0(%rbp),%rax
   2051 	movq	%rax,%r9
   2052 	mulq	%r10
   2053 	addq	%rax,%r14
   2054 	adcq	$0,%rdx
   2055 	movq	%rdx,%r10
   2056 	movq	8+0(%rbp),%rax
   2057 	mulq	%r11
   2058 	addq	%rax,%r15
   2059 	adcq	$0,%rdx
   2060 	imulq	%r12,%r9
   2061 	addq	%r10,%r15
   2062 	adcq	%rdx,%r9
   2063 	movq	%r13,%r10
   2064 	movq	%r14,%r11
   2065 	movq	%r15,%r12
   2066 	andq	$3,%r12
   2067 	movq	%r15,%r13
   2068 	andq	$-4,%r13
   2069 	movq	%r9,%r14
   2070 	shrdq	$2,%r9,%r15
   2071 	shrq	$2,%r9
   2072 	addq	%r13,%r10
   2073 	adcq	%r14,%r11
   2074 	adcq	$0,%r12
   2075 	addq	%r15,%r10
   2076 	adcq	%r9,%r11
   2077 	adcq	$0,%r12
   2078 
   2079 
   2080 	movdqa	%xmm5,%xmm1
   2081 	movdqa	%xmm9,%xmm5
   2082 	movdqa	%xmm13,%xmm9
   2083 	movdqa	%xmm2,%xmm13
   2084 	movdqa	%xmm6,%xmm2
   2085 	movdqa	%xmm10,%xmm6
   2086 	movdqa	%xmm14,%xmm10
   2087 	jmp	1b
   2088 	jmp	open_sse_tail_16
   2089 
   2090 
   2091 
   2092 
   2093 
   2094 
   2095 .globl	_chacha20_poly1305_seal
   2096 .private_extern _chacha20_poly1305_seal
   2097 
   2098 .p2align	6
   2099 _chacha20_poly1305_seal:
   2100 
   2101 	pushq	%rbp
   2102 
   2103 	pushq	%rbx
   2104 
   2105 	pushq	%r12
   2106 
   2107 	pushq	%r13
   2108 
   2109 	pushq	%r14
   2110 
   2111 	pushq	%r15
   2112 
   2113 
   2114 
   2115 	pushq	%r9
   2116 
   2117 	subq	$288 + 32,%rsp
   2118 
   2119 
   2120 
   2121 
   2122 
   2123 
   2124 
   2125 	leaq	32(%rsp),%rbp
   2126 	andq	$-32,%rbp
   2127 	movq	56(%r9),%rbx
   2128 	addq	%rdx,%rbx
   2129 	movq	%rbx,8+32(%rbp)
   2130 	movq	%r8,0+32(%rbp)
   2131 	movq	%rdx,%rbx
   2132 
   2133 	movl	_OPENSSL_ia32cap_P+8(%rip),%eax
   2134 	andl	$288,%eax
   2135 	xorl	$288,%eax
   2136 	jz	chacha20_poly1305_seal_avx2
   2137 
   2138 	cmpq	$128,%rbx
   2139 	jbe	seal_sse_128
   2140 
   2141 	movdqa	.chacha20_consts(%rip),%xmm0
   2142 	movdqu	0(%r9),%xmm4
   2143 	movdqu	16(%r9),%xmm8
   2144 	movdqu	32(%r9),%xmm12
   2145 	movdqa	%xmm0,%xmm1
   2146 	movdqa	%xmm0,%xmm2
   2147 	movdqa	%xmm0,%xmm3
   2148 	movdqa	%xmm4,%xmm5
   2149 	movdqa	%xmm4,%xmm6
   2150 	movdqa	%xmm4,%xmm7
   2151 	movdqa	%xmm8,%xmm9
   2152 	movdqa	%xmm8,%xmm10
   2153 	movdqa	%xmm8,%xmm11
   2154 	movdqa	%xmm12,%xmm15
   2155 	paddd	.sse_inc(%rip),%xmm12
   2156 	movdqa	%xmm12,%xmm14
   2157 	paddd	.sse_inc(%rip),%xmm12
   2158 	movdqa	%xmm12,%xmm13
   2159 	paddd	.sse_inc(%rip),%xmm12
   2160 
   2161 	movdqa	%xmm4,48(%rbp)
   2162 	movdqa	%xmm8,64(%rbp)
   2163 	movdqa	%xmm12,96(%rbp)
   2164 	movdqa	%xmm13,112(%rbp)
   2165 	movdqa	%xmm14,128(%rbp)
   2166 	movdqa	%xmm15,144(%rbp)
   2167 	movq	$10,%r10
   2168 1:
   2169 	movdqa	%xmm8,80(%rbp)
   2170 	movdqa	.rol16(%rip),%xmm8
   2171 	paddd	%xmm7,%xmm3
   2172 	paddd	%xmm6,%xmm2
   2173 	paddd	%xmm5,%xmm1
   2174 	paddd	%xmm4,%xmm0
   2175 	pxor	%xmm3,%xmm15
   2176 	pxor	%xmm2,%xmm14
   2177 	pxor	%xmm1,%xmm13
   2178 	pxor	%xmm0,%xmm12
   2179 .byte	102,69,15,56,0,248
   2180 .byte	102,69,15,56,0,240
   2181 .byte	102,69,15,56,0,232
   2182 .byte	102,69,15,56,0,224
   2183 	movdqa	80(%rbp),%xmm8
   2184 	paddd	%xmm15,%xmm11
   2185 	paddd	%xmm14,%xmm10
   2186 	paddd	%xmm13,%xmm9
   2187 	paddd	%xmm12,%xmm8
   2188 	pxor	%xmm11,%xmm7
   2189 	pxor	%xmm10,%xmm6
   2190 	pxor	%xmm9,%xmm5
   2191 	pxor	%xmm8,%xmm4
   2192 	movdqa	%xmm8,80(%rbp)
   2193 	movdqa	%xmm7,%xmm8
   2194 	psrld	$20,%xmm8
   2195 	pslld	$32-20,%xmm7
   2196 	pxor	%xmm8,%xmm7
   2197 	movdqa	%xmm6,%xmm8
   2198 	psrld	$20,%xmm8
   2199 	pslld	$32-20,%xmm6
   2200 	pxor	%xmm8,%xmm6
   2201 	movdqa	%xmm5,%xmm8
   2202 	psrld	$20,%xmm8
   2203 	pslld	$32-20,%xmm5
   2204 	pxor	%xmm8,%xmm5
   2205 	movdqa	%xmm4,%xmm8
   2206 	psrld	$20,%xmm8
   2207 	pslld	$32-20,%xmm4
   2208 	pxor	%xmm8,%xmm4
   2209 	movdqa	.rol8(%rip),%xmm8
   2210 	paddd	%xmm7,%xmm3
   2211 	paddd	%xmm6,%xmm2
   2212 	paddd	%xmm5,%xmm1
   2213 	paddd	%xmm4,%xmm0
   2214 	pxor	%xmm3,%xmm15
   2215 	pxor	%xmm2,%xmm14
   2216 	pxor	%xmm1,%xmm13
   2217 	pxor	%xmm0,%xmm12
   2218 .byte	102,69,15,56,0,248
   2219 .byte	102,69,15,56,0,240
   2220 .byte	102,69,15,56,0,232
   2221 .byte	102,69,15,56,0,224
   2222 	movdqa	80(%rbp),%xmm8
   2223 	paddd	%xmm15,%xmm11
   2224 	paddd	%xmm14,%xmm10
   2225 	paddd	%xmm13,%xmm9
   2226 	paddd	%xmm12,%xmm8
   2227 	pxor	%xmm11,%xmm7
   2228 	pxor	%xmm10,%xmm6
   2229 	pxor	%xmm9,%xmm5
   2230 	pxor	%xmm8,%xmm4
   2231 	movdqa	%xmm8,80(%rbp)
   2232 	movdqa	%xmm7,%xmm8
   2233 	psrld	$25,%xmm8
   2234 	pslld	$32-25,%xmm7
   2235 	pxor	%xmm8,%xmm7
   2236 	movdqa	%xmm6,%xmm8
   2237 	psrld	$25,%xmm8
   2238 	pslld	$32-25,%xmm6
   2239 	pxor	%xmm8,%xmm6
   2240 	movdqa	%xmm5,%xmm8
   2241 	psrld	$25,%xmm8
   2242 	pslld	$32-25,%xmm5
   2243 	pxor	%xmm8,%xmm5
   2244 	movdqa	%xmm4,%xmm8
   2245 	psrld	$25,%xmm8
   2246 	pslld	$32-25,%xmm4
   2247 	pxor	%xmm8,%xmm4
   2248 	movdqa	80(%rbp),%xmm8
   2249 .byte	102,15,58,15,255,4
   2250 .byte	102,69,15,58,15,219,8
   2251 .byte	102,69,15,58,15,255,12
   2252 .byte	102,15,58,15,246,4
   2253 .byte	102,69,15,58,15,210,8
   2254 .byte	102,69,15,58,15,246,12
   2255 .byte	102,15,58,15,237,4
   2256 .byte	102,69,15,58,15,201,8
   2257 .byte	102,69,15,58,15,237,12
   2258 .byte	102,15,58,15,228,4
   2259 .byte	102,69,15,58,15,192,8
   2260 .byte	102,69,15,58,15,228,12
   2261 	movdqa	%xmm8,80(%rbp)
   2262 	movdqa	.rol16(%rip),%xmm8
   2263 	paddd	%xmm7,%xmm3
   2264 	paddd	%xmm6,%xmm2
   2265 	paddd	%xmm5,%xmm1
   2266 	paddd	%xmm4,%xmm0
   2267 	pxor	%xmm3,%xmm15
   2268 	pxor	%xmm2,%xmm14
   2269 	pxor	%xmm1,%xmm13
   2270 	pxor	%xmm0,%xmm12
   2271 .byte	102,69,15,56,0,248
   2272 .byte	102,69,15,56,0,240
   2273 .byte	102,69,15,56,0,232
   2274 .byte	102,69,15,56,0,224
   2275 	movdqa	80(%rbp),%xmm8
   2276 	paddd	%xmm15,%xmm11
   2277 	paddd	%xmm14,%xmm10
   2278 	paddd	%xmm13,%xmm9
   2279 	paddd	%xmm12,%xmm8
   2280 	pxor	%xmm11,%xmm7
   2281 	pxor	%xmm10,%xmm6
   2282 	pxor	%xmm9,%xmm5
   2283 	pxor	%xmm8,%xmm4
   2284 	movdqa	%xmm8,80(%rbp)
   2285 	movdqa	%xmm7,%xmm8
   2286 	psrld	$20,%xmm8
   2287 	pslld	$32-20,%xmm7
   2288 	pxor	%xmm8,%xmm7
   2289 	movdqa	%xmm6,%xmm8
   2290 	psrld	$20,%xmm8
   2291 	pslld	$32-20,%xmm6
   2292 	pxor	%xmm8,%xmm6
   2293 	movdqa	%xmm5,%xmm8
   2294 	psrld	$20,%xmm8
   2295 	pslld	$32-20,%xmm5
   2296 	pxor	%xmm8,%xmm5
   2297 	movdqa	%xmm4,%xmm8
   2298 	psrld	$20,%xmm8
   2299 	pslld	$32-20,%xmm4
   2300 	pxor	%xmm8,%xmm4
   2301 	movdqa	.rol8(%rip),%xmm8
   2302 	paddd	%xmm7,%xmm3
   2303 	paddd	%xmm6,%xmm2
   2304 	paddd	%xmm5,%xmm1
   2305 	paddd	%xmm4,%xmm0
   2306 	pxor	%xmm3,%xmm15
   2307 	pxor	%xmm2,%xmm14
   2308 	pxor	%xmm1,%xmm13
   2309 	pxor	%xmm0,%xmm12
   2310 .byte	102,69,15,56,0,248
   2311 .byte	102,69,15,56,0,240
   2312 .byte	102,69,15,56,0,232
   2313 .byte	102,69,15,56,0,224
   2314 	movdqa	80(%rbp),%xmm8
   2315 	paddd	%xmm15,%xmm11
   2316 	paddd	%xmm14,%xmm10
   2317 	paddd	%xmm13,%xmm9
   2318 	paddd	%xmm12,%xmm8
   2319 	pxor	%xmm11,%xmm7
   2320 	pxor	%xmm10,%xmm6
   2321 	pxor	%xmm9,%xmm5
   2322 	pxor	%xmm8,%xmm4
   2323 	movdqa	%xmm8,80(%rbp)
   2324 	movdqa	%xmm7,%xmm8
   2325 	psrld	$25,%xmm8
   2326 	pslld	$32-25,%xmm7
   2327 	pxor	%xmm8,%xmm7
   2328 	movdqa	%xmm6,%xmm8
   2329 	psrld	$25,%xmm8
   2330 	pslld	$32-25,%xmm6
   2331 	pxor	%xmm8,%xmm6
   2332 	movdqa	%xmm5,%xmm8
   2333 	psrld	$25,%xmm8
   2334 	pslld	$32-25,%xmm5
   2335 	pxor	%xmm8,%xmm5
   2336 	movdqa	%xmm4,%xmm8
   2337 	psrld	$25,%xmm8
   2338 	pslld	$32-25,%xmm4
   2339 	pxor	%xmm8,%xmm4
   2340 	movdqa	80(%rbp),%xmm8
   2341 .byte	102,15,58,15,255,12
   2342 .byte	102,69,15,58,15,219,8
   2343 .byte	102,69,15,58,15,255,4
   2344 .byte	102,15,58,15,246,12
   2345 .byte	102,69,15,58,15,210,8
   2346 .byte	102,69,15,58,15,246,4
   2347 .byte	102,15,58,15,237,12
   2348 .byte	102,69,15,58,15,201,8
   2349 .byte	102,69,15,58,15,237,4
   2350 .byte	102,15,58,15,228,12
   2351 .byte	102,69,15,58,15,192,8
   2352 .byte	102,69,15,58,15,228,4
   2353 
   2354 	decq	%r10
   2355 	jnz	1b
   2356 	paddd	.chacha20_consts(%rip),%xmm3
   2357 	paddd	48(%rbp),%xmm7
   2358 	paddd	64(%rbp),%xmm11
   2359 	paddd	144(%rbp),%xmm15
   2360 	paddd	.chacha20_consts(%rip),%xmm2
   2361 	paddd	48(%rbp),%xmm6
   2362 	paddd	64(%rbp),%xmm10
   2363 	paddd	128(%rbp),%xmm14
   2364 	paddd	.chacha20_consts(%rip),%xmm1
   2365 	paddd	48(%rbp),%xmm5
   2366 	paddd	64(%rbp),%xmm9
   2367 	paddd	112(%rbp),%xmm13
   2368 	paddd	.chacha20_consts(%rip),%xmm0
   2369 	paddd	48(%rbp),%xmm4
   2370 	paddd	64(%rbp),%xmm8
   2371 	paddd	96(%rbp),%xmm12
   2372 
   2373 
   2374 	pand	.clamp(%rip),%xmm3
   2375 	movdqa	%xmm3,0(%rbp)
   2376 	movdqa	%xmm7,16(%rbp)
   2377 
   2378 	movq	%r8,%r8
   2379 	call	poly_hash_ad_internal
   2380 	movdqu	0 + 0(%rsi),%xmm3
   2381 	movdqu	16 + 0(%rsi),%xmm7
   2382 	movdqu	32 + 0(%rsi),%xmm11
   2383 	movdqu	48 + 0(%rsi),%xmm15
   2384 	pxor	%xmm3,%xmm2
   2385 	pxor	%xmm7,%xmm6
   2386 	pxor	%xmm11,%xmm10
   2387 	pxor	%xmm14,%xmm15
   2388 	movdqu	%xmm2,0 + 0(%rdi)
   2389 	movdqu	%xmm6,16 + 0(%rdi)
   2390 	movdqu	%xmm10,32 + 0(%rdi)
   2391 	movdqu	%xmm15,48 + 0(%rdi)
   2392 	movdqu	0 + 64(%rsi),%xmm3
   2393 	movdqu	16 + 64(%rsi),%xmm7
   2394 	movdqu	32 + 64(%rsi),%xmm11
   2395 	movdqu	48 + 64(%rsi),%xmm15
   2396 	pxor	%xmm3,%xmm1
   2397 	pxor	%xmm7,%xmm5
   2398 	pxor	%xmm11,%xmm9
   2399 	pxor	%xmm13,%xmm15
   2400 	movdqu	%xmm1,0 + 64(%rdi)
   2401 	movdqu	%xmm5,16 + 64(%rdi)
   2402 	movdqu	%xmm9,32 + 64(%rdi)
   2403 	movdqu	%xmm15,48 + 64(%rdi)
   2404 
   2405 	cmpq	$192,%rbx
   2406 	ja	1f
   2407 	movq	$128,%rcx
   2408 	subq	$128,%rbx
   2409 	leaq	128(%rsi),%rsi
   2410 	jmp	seal_sse_128_seal_hash
   2411 1:
   2412 	movdqu	0 + 128(%rsi),%xmm3
   2413 	movdqu	16 + 128(%rsi),%xmm7
   2414 	movdqu	32 + 128(%rsi),%xmm11
   2415 	movdqu	48 + 128(%rsi),%xmm15
   2416 	pxor	%xmm3,%xmm0
   2417 	pxor	%xmm7,%xmm4
   2418 	pxor	%xmm11,%xmm8
   2419 	pxor	%xmm12,%xmm15
   2420 	movdqu	%xmm0,0 + 128(%rdi)
   2421 	movdqu	%xmm4,16 + 128(%rdi)
   2422 	movdqu	%xmm8,32 + 128(%rdi)
   2423 	movdqu	%xmm15,48 + 128(%rdi)
   2424 
   2425 	movq	$192,%rcx
   2426 	subq	$192,%rbx
   2427 	leaq	192(%rsi),%rsi
   2428 	movq	$2,%rcx
   2429 	movq	$8,%r8
   2430 	cmpq	$64,%rbx
   2431 	jbe	seal_sse_tail_64
   2432 	cmpq	$128,%rbx
   2433 	jbe	seal_sse_tail_128
   2434 	cmpq	$192,%rbx
   2435 	jbe	seal_sse_tail_192
   2436 
   2437 1:
   2438 	movdqa	.chacha20_consts(%rip),%xmm0
   2439 	movdqa	48(%rbp),%xmm4
   2440 	movdqa	64(%rbp),%xmm8
   2441 	movdqa	%xmm0,%xmm1
   2442 	movdqa	%xmm4,%xmm5
   2443 	movdqa	%xmm8,%xmm9
   2444 	movdqa	%xmm0,%xmm2
   2445 	movdqa	%xmm4,%xmm6
   2446 	movdqa	%xmm8,%xmm10
   2447 	movdqa	%xmm0,%xmm3
   2448 	movdqa	%xmm4,%xmm7
   2449 	movdqa	%xmm8,%xmm11
   2450 	movdqa	96(%rbp),%xmm15
   2451 	paddd	.sse_inc(%rip),%xmm15
   2452 	movdqa	%xmm15,%xmm14
   2453 	paddd	.sse_inc(%rip),%xmm14
   2454 	movdqa	%xmm14,%xmm13
   2455 	paddd	.sse_inc(%rip),%xmm13
   2456 	movdqa	%xmm13,%xmm12
   2457 	paddd	.sse_inc(%rip),%xmm12
   2458 	movdqa	%xmm12,96(%rbp)
   2459 	movdqa	%xmm13,112(%rbp)
   2460 	movdqa	%xmm14,128(%rbp)
   2461 	movdqa	%xmm15,144(%rbp)
   2462 
   2463 2:
   2464 	movdqa	%xmm8,80(%rbp)
   2465 	movdqa	.rol16(%rip),%xmm8
   2466 	paddd	%xmm7,%xmm3
   2467 	paddd	%xmm6,%xmm2
   2468 	paddd	%xmm5,%xmm1
   2469 	paddd	%xmm4,%xmm0
   2470 	pxor	%xmm3,%xmm15
   2471 	pxor	%xmm2,%xmm14
   2472 	pxor	%xmm1,%xmm13
   2473 	pxor	%xmm0,%xmm12
   2474 .byte	102,69,15,56,0,248
   2475 .byte	102,69,15,56,0,240
   2476 .byte	102,69,15,56,0,232
   2477 .byte	102,69,15,56,0,224
   2478 	movdqa	80(%rbp),%xmm8
   2479 	paddd	%xmm15,%xmm11
   2480 	paddd	%xmm14,%xmm10
   2481 	paddd	%xmm13,%xmm9
   2482 	paddd	%xmm12,%xmm8
   2483 	pxor	%xmm11,%xmm7
   2484 	addq	0(%rdi),%r10
   2485 	adcq	8+0(%rdi),%r11
   2486 	adcq	$1,%r12
   2487 	pxor	%xmm10,%xmm6
   2488 	pxor	%xmm9,%xmm5
   2489 	pxor	%xmm8,%xmm4
   2490 	movdqa	%xmm8,80(%rbp)
   2491 	movdqa	%xmm7,%xmm8
   2492 	psrld	$20,%xmm8
   2493 	pslld	$32-20,%xmm7
   2494 	pxor	%xmm8,%xmm7
   2495 	movdqa	%xmm6,%xmm8
   2496 	psrld	$20,%xmm8
   2497 	pslld	$32-20,%xmm6
   2498 	pxor	%xmm8,%xmm6
   2499 	movdqa	%xmm5,%xmm8
   2500 	psrld	$20,%xmm8
   2501 	pslld	$32-20,%xmm5
   2502 	pxor	%xmm8,%xmm5
   2503 	movdqa	%xmm4,%xmm8
   2504 	psrld	$20,%xmm8
   2505 	pslld	$32-20,%xmm4
   2506 	pxor	%xmm8,%xmm4
   2507 	movq	0+0(%rbp),%rax
   2508 	movq	%rax,%r15
   2509 	mulq	%r10
   2510 	movq	%rax,%r13
   2511 	movq	%rdx,%r14
   2512 	movq	0+0(%rbp),%rax
   2513 	mulq	%r11
   2514 	imulq	%r12,%r15
   2515 	addq	%rax,%r14
   2516 	adcq	%rdx,%r15
   2517 	movdqa	.rol8(%rip),%xmm8
   2518 	paddd	%xmm7,%xmm3
   2519 	paddd	%xmm6,%xmm2
   2520 	paddd	%xmm5,%xmm1
   2521 	paddd	%xmm4,%xmm0
   2522 	pxor	%xmm3,%xmm15
   2523 	pxor	%xmm2,%xmm14
   2524 	pxor	%xmm1,%xmm13
   2525 	pxor	%xmm0,%xmm12
   2526 .byte	102,69,15,56,0,248
   2527 .byte	102,69,15,56,0,240
   2528 .byte	102,69,15,56,0,232
   2529 .byte	102,69,15,56,0,224
   2530 	movdqa	80(%rbp),%xmm8
   2531 	paddd	%xmm15,%xmm11
   2532 	paddd	%xmm14,%xmm10
   2533 	paddd	%xmm13,%xmm9
   2534 	paddd	%xmm12,%xmm8
   2535 	pxor	%xmm11,%xmm7
   2536 	pxor	%xmm10,%xmm6
   2537 	movq	8+0(%rbp),%rax
   2538 	movq	%rax,%r9
   2539 	mulq	%r10
   2540 	addq	%rax,%r14
   2541 	adcq	$0,%rdx
   2542 	movq	%rdx,%r10
   2543 	movq	8+0(%rbp),%rax
   2544 	mulq	%r11
   2545 	addq	%rax,%r15
   2546 	adcq	$0,%rdx
   2547 	pxor	%xmm9,%xmm5
   2548 	pxor	%xmm8,%xmm4
   2549 	movdqa	%xmm8,80(%rbp)
   2550 	movdqa	%xmm7,%xmm8
   2551 	psrld	$25,%xmm8
   2552 	pslld	$32-25,%xmm7
   2553 	pxor	%xmm8,%xmm7
   2554 	movdqa	%xmm6,%xmm8
   2555 	psrld	$25,%xmm8
   2556 	pslld	$32-25,%xmm6
   2557 	pxor	%xmm8,%xmm6
   2558 	movdqa	%xmm5,%xmm8
   2559 	psrld	$25,%xmm8
   2560 	pslld	$32-25,%xmm5
   2561 	pxor	%xmm8,%xmm5
   2562 	movdqa	%xmm4,%xmm8
   2563 	psrld	$25,%xmm8
   2564 	pslld	$32-25,%xmm4
   2565 	pxor	%xmm8,%xmm4
   2566 	movdqa	80(%rbp),%xmm8
   2567 	imulq	%r12,%r9
   2568 	addq	%r10,%r15
   2569 	adcq	%rdx,%r9
   2570 .byte	102,15,58,15,255,4
   2571 .byte	102,69,15,58,15,219,8
   2572 .byte	102,69,15,58,15,255,12
   2573 .byte	102,15,58,15,246,4
   2574 .byte	102,69,15,58,15,210,8
   2575 .byte	102,69,15,58,15,246,12
   2576 .byte	102,15,58,15,237,4
   2577 .byte	102,69,15,58,15,201,8
   2578 .byte	102,69,15,58,15,237,12
   2579 .byte	102,15,58,15,228,4
   2580 .byte	102,69,15,58,15,192,8
   2581 .byte	102,69,15,58,15,228,12
   2582 	movdqa	%xmm8,80(%rbp)
   2583 	movdqa	.rol16(%rip),%xmm8
   2584 	paddd	%xmm7,%xmm3
   2585 	paddd	%xmm6,%xmm2
   2586 	paddd	%xmm5,%xmm1
   2587 	paddd	%xmm4,%xmm0
   2588 	pxor	%xmm3,%xmm15
   2589 	pxor	%xmm2,%xmm14
   2590 	movq	%r13,%r10
   2591 	movq	%r14,%r11
   2592 	movq	%r15,%r12
   2593 	andq	$3,%r12
   2594 	movq	%r15,%r13
   2595 	andq	$-4,%r13
   2596 	movq	%r9,%r14
   2597 	shrdq	$2,%r9,%r15
   2598 	shrq	$2,%r9
   2599 	addq	%r13,%r10
   2600 	adcq	%r14,%r11
   2601 	adcq	$0,%r12
   2602 	addq	%r15,%r10
   2603 	adcq	%r9,%r11
   2604 	adcq	$0,%r12
   2605 	pxor	%xmm1,%xmm13
   2606 	pxor	%xmm0,%xmm12
   2607 .byte	102,69,15,56,0,248
   2608 .byte	102,69,15,56,0,240
   2609 .byte	102,69,15,56,0,232
   2610 .byte	102,69,15,56,0,224
   2611 	movdqa	80(%rbp),%xmm8
   2612 	paddd	%xmm15,%xmm11
   2613 	paddd	%xmm14,%xmm10
   2614 	paddd	%xmm13,%xmm9
   2615 	paddd	%xmm12,%xmm8
   2616 	pxor	%xmm11,%xmm7
   2617 	pxor	%xmm10,%xmm6
   2618 	pxor	%xmm9,%xmm5
   2619 	pxor	%xmm8,%xmm4
   2620 	movdqa	%xmm8,80(%rbp)
   2621 	movdqa	%xmm7,%xmm8
   2622 	psrld	$20,%xmm8
   2623 	pslld	$32-20,%xmm7
   2624 	pxor	%xmm8,%xmm7
   2625 	movdqa	%xmm6,%xmm8
   2626 	psrld	$20,%xmm8
   2627 	pslld	$32-20,%xmm6
   2628 	pxor	%xmm8,%xmm6
   2629 	movdqa	%xmm5,%xmm8
   2630 	psrld	$20,%xmm8
   2631 	pslld	$32-20,%xmm5
   2632 	pxor	%xmm8,%xmm5
   2633 	movdqa	%xmm4,%xmm8
   2634 	psrld	$20,%xmm8
   2635 	pslld	$32-20,%xmm4
   2636 	pxor	%xmm8,%xmm4
   2637 	movdqa	.rol8(%rip),%xmm8
   2638 	paddd	%xmm7,%xmm3
   2639 	paddd	%xmm6,%xmm2
   2640 	paddd	%xmm5,%xmm1
   2641 	paddd	%xmm4,%xmm0
   2642 	pxor	%xmm3,%xmm15
   2643 	pxor	%xmm2,%xmm14
   2644 	pxor	%xmm1,%xmm13
   2645 	pxor	%xmm0,%xmm12
   2646 .byte	102,69,15,56,0,248
   2647 .byte	102,69,15,56,0,240
   2648 .byte	102,69,15,56,0,232
   2649 .byte	102,69,15,56,0,224
   2650 	movdqa	80(%rbp),%xmm8
   2651 	paddd	%xmm15,%xmm11
   2652 	paddd	%xmm14,%xmm10
   2653 	paddd	%xmm13,%xmm9
   2654 	paddd	%xmm12,%xmm8
   2655 	pxor	%xmm11,%xmm7
   2656 	pxor	%xmm10,%xmm6
   2657 	pxor	%xmm9,%xmm5
   2658 	pxor	%xmm8,%xmm4
   2659 	movdqa	%xmm8,80(%rbp)
   2660 	movdqa	%xmm7,%xmm8
   2661 	psrld	$25,%xmm8
   2662 	pslld	$32-25,%xmm7
   2663 	pxor	%xmm8,%xmm7
   2664 	movdqa	%xmm6,%xmm8
   2665 	psrld	$25,%xmm8
   2666 	pslld	$32-25,%xmm6
   2667 	pxor	%xmm8,%xmm6
   2668 	movdqa	%xmm5,%xmm8
   2669 	psrld	$25,%xmm8
   2670 	pslld	$32-25,%xmm5
   2671 	pxor	%xmm8,%xmm5
   2672 	movdqa	%xmm4,%xmm8
   2673 	psrld	$25,%xmm8
   2674 	pslld	$32-25,%xmm4
   2675 	pxor	%xmm8,%xmm4
   2676 	movdqa	80(%rbp),%xmm8
   2677 .byte	102,15,58,15,255,12
   2678 .byte	102,69,15,58,15,219,8
   2679 .byte	102,69,15,58,15,255,4
   2680 .byte	102,15,58,15,246,12
   2681 .byte	102,69,15,58,15,210,8
   2682 .byte	102,69,15,58,15,246,4
   2683 .byte	102,15,58,15,237,12
   2684 .byte	102,69,15,58,15,201,8
   2685 .byte	102,69,15,58,15,237,4
   2686 .byte	102,15,58,15,228,12
   2687 .byte	102,69,15,58,15,192,8
   2688 .byte	102,69,15,58,15,228,4
   2689 
   2690 	leaq	16(%rdi),%rdi
   2691 	decq	%r8
   2692 	jge	2b
   2693 	addq	0(%rdi),%r10
   2694 	adcq	8+0(%rdi),%r11
   2695 	adcq	$1,%r12
   2696 	movq	0+0(%rbp),%rax
   2697 	movq	%rax,%r15
   2698 	mulq	%r10
   2699 	movq	%rax,%r13
   2700 	movq	%rdx,%r14
   2701 	movq	0+0(%rbp),%rax
   2702 	mulq	%r11
   2703 	imulq	%r12,%r15
   2704 	addq	%rax,%r14
   2705 	adcq	%rdx,%r15
   2706 	movq	8+0(%rbp),%rax
   2707 	movq	%rax,%r9
   2708 	mulq	%r10
   2709 	addq	%rax,%r14
   2710 	adcq	$0,%rdx
   2711 	movq	%rdx,%r10
   2712 	movq	8+0(%rbp),%rax
   2713 	mulq	%r11
   2714 	addq	%rax,%r15
   2715 	adcq	$0,%rdx
   2716 	imulq	%r12,%r9
   2717 	addq	%r10,%r15
   2718 	adcq	%rdx,%r9
   2719 	movq	%r13,%r10
   2720 	movq	%r14,%r11
   2721 	movq	%r15,%r12
   2722 	andq	$3,%r12
   2723 	movq	%r15,%r13
   2724 	andq	$-4,%r13
   2725 	movq	%r9,%r14
   2726 	shrdq	$2,%r9,%r15
   2727 	shrq	$2,%r9
   2728 	addq	%r13,%r10
   2729 	adcq	%r14,%r11
   2730 	adcq	$0,%r12
   2731 	addq	%r15,%r10
   2732 	adcq	%r9,%r11
   2733 	adcq	$0,%r12
   2734 
   2735 	leaq	16(%rdi),%rdi
   2736 	decq	%rcx
   2737 	jg	2b
   2738 	paddd	.chacha20_consts(%rip),%xmm3
   2739 	paddd	48(%rbp),%xmm7
   2740 	paddd	64(%rbp),%xmm11
   2741 	paddd	144(%rbp),%xmm15
   2742 	paddd	.chacha20_consts(%rip),%xmm2
   2743 	paddd	48(%rbp),%xmm6
   2744 	paddd	64(%rbp),%xmm10
   2745 	paddd	128(%rbp),%xmm14
   2746 	paddd	.chacha20_consts(%rip),%xmm1
   2747 	paddd	48(%rbp),%xmm5
   2748 	paddd	64(%rbp),%xmm9
   2749 	paddd	112(%rbp),%xmm13
   2750 	paddd	.chacha20_consts(%rip),%xmm0
   2751 	paddd	48(%rbp),%xmm4
   2752 	paddd	64(%rbp),%xmm8
   2753 	paddd	96(%rbp),%xmm12
   2754 
   2755 	movdqa	%xmm14,80(%rbp)
   2756 	movdqa	%xmm14,80(%rbp)
   2757 	movdqu	0 + 0(%rsi),%xmm14
   2758 	pxor	%xmm3,%xmm14
   2759 	movdqu	%xmm14,0 + 0(%rdi)
   2760 	movdqu	16 + 0(%rsi),%xmm14
   2761 	pxor	%xmm7,%xmm14
   2762 	movdqu	%xmm14,16 + 0(%rdi)
   2763 	movdqu	32 + 0(%rsi),%xmm14
   2764 	pxor	%xmm11,%xmm14
   2765 	movdqu	%xmm14,32 + 0(%rdi)
   2766 	movdqu	48 + 0(%rsi),%xmm14
   2767 	pxor	%xmm15,%xmm14
   2768 	movdqu	%xmm14,48 + 0(%rdi)
   2769 
   2770 	movdqa	80(%rbp),%xmm14
   2771 	movdqu	0 + 64(%rsi),%xmm3
   2772 	movdqu	16 + 64(%rsi),%xmm7
   2773 	movdqu	32 + 64(%rsi),%xmm11
   2774 	movdqu	48 + 64(%rsi),%xmm15
   2775 	pxor	%xmm3,%xmm2
   2776 	pxor	%xmm7,%xmm6
   2777 	pxor	%xmm11,%xmm10
   2778 	pxor	%xmm14,%xmm15
   2779 	movdqu	%xmm2,0 + 64(%rdi)
   2780 	movdqu	%xmm6,16 + 64(%rdi)
   2781 	movdqu	%xmm10,32 + 64(%rdi)
   2782 	movdqu	%xmm15,48 + 64(%rdi)
   2783 	movdqu	0 + 128(%rsi),%xmm3
   2784 	movdqu	16 + 128(%rsi),%xmm7
   2785 	movdqu	32 + 128(%rsi),%xmm11
   2786 	movdqu	48 + 128(%rsi),%xmm15
   2787 	pxor	%xmm3,%xmm1
   2788 	pxor	%xmm7,%xmm5
   2789 	pxor	%xmm11,%xmm9
   2790 	pxor	%xmm13,%xmm15
   2791 	movdqu	%xmm1,0 + 128(%rdi)
   2792 	movdqu	%xmm5,16 + 128(%rdi)
   2793 	movdqu	%xmm9,32 + 128(%rdi)
   2794 	movdqu	%xmm15,48 + 128(%rdi)
   2795 
   2796 	cmpq	$256,%rbx
   2797 	ja	3f
   2798 
   2799 	movq	$192,%rcx
   2800 	subq	$192,%rbx
   2801 	leaq	192(%rsi),%rsi
   2802 	jmp	seal_sse_128_seal_hash
   2803 3:
   2804 	movdqu	0 + 192(%rsi),%xmm3
   2805 	movdqu	16 + 192(%rsi),%xmm7
   2806 	movdqu	32 + 192(%rsi),%xmm11
   2807 	movdqu	48 + 192(%rsi),%xmm15
   2808 	pxor	%xmm3,%xmm0
   2809 	pxor	%xmm7,%xmm4
   2810 	pxor	%xmm11,%xmm8
   2811 	pxor	%xmm12,%xmm15
   2812 	movdqu	%xmm0,0 + 192(%rdi)
   2813 	movdqu	%xmm4,16 + 192(%rdi)
   2814 	movdqu	%xmm8,32 + 192(%rdi)
   2815 	movdqu	%xmm15,48 + 192(%rdi)
   2816 
   2817 	leaq	256(%rsi),%rsi
   2818 	subq	$256,%rbx
   2819 	movq	$6,%rcx
   2820 	movq	$4,%r8
   2821 	cmpq	$192,%rbx
   2822 	jg	1b
   2823 	movq	%rbx,%rcx
   2824 	testq	%rbx,%rbx
   2825 	je	seal_sse_128_seal_hash
   2826 	movq	$6,%rcx
   2827 	cmpq	$64,%rbx
   2828 	jg	3f
   2829 
   2830 seal_sse_tail_64:
   2831 	movdqa	.chacha20_consts(%rip),%xmm0
   2832 	movdqa	48(%rbp),%xmm4
   2833 	movdqa	64(%rbp),%xmm8
   2834 	movdqa	96(%rbp),%xmm12
   2835 	paddd	.sse_inc(%rip),%xmm12
   2836 	movdqa	%xmm12,96(%rbp)
   2837 
   2838 1:
   2839 	addq	0(%rdi),%r10
   2840 	adcq	8+0(%rdi),%r11
   2841 	adcq	$1,%r12
   2842 	movq	0+0(%rbp),%rax
   2843 	movq	%rax,%r15
   2844 	mulq	%r10
   2845 	movq	%rax,%r13
   2846 	movq	%rdx,%r14
   2847 	movq	0+0(%rbp),%rax
   2848 	mulq	%r11
   2849 	imulq	%r12,%r15
   2850 	addq	%rax,%r14
   2851 	adcq	%rdx,%r15
   2852 	movq	8+0(%rbp),%rax
   2853 	movq	%rax,%r9
   2854 	mulq	%r10
   2855 	addq	%rax,%r14
   2856 	adcq	$0,%rdx
   2857 	movq	%rdx,%r10
   2858 	movq	8+0(%rbp),%rax
   2859 	mulq	%r11
   2860 	addq	%rax,%r15
   2861 	adcq	$0,%rdx
   2862 	imulq	%r12,%r9
   2863 	addq	%r10,%r15
   2864 	adcq	%rdx,%r9
   2865 	movq	%r13,%r10
   2866 	movq	%r14,%r11
   2867 	movq	%r15,%r12
   2868 	andq	$3,%r12
   2869 	movq	%r15,%r13
   2870 	andq	$-4,%r13
   2871 	movq	%r9,%r14
   2872 	shrdq	$2,%r9,%r15
   2873 	shrq	$2,%r9
   2874 	addq	%r13,%r10
   2875 	adcq	%r14,%r11
   2876 	adcq	$0,%r12
   2877 	addq	%r15,%r10
   2878 	adcq	%r9,%r11
   2879 	adcq	$0,%r12
   2880 
   2881 	leaq	16(%rdi),%rdi
   2882 2:
   2883 	paddd	%xmm4,%xmm0
   2884 	pxor	%xmm0,%xmm12
   2885 	pshufb	.rol16(%rip),%xmm12
   2886 	paddd	%xmm12,%xmm8
   2887 	pxor	%xmm8,%xmm4
   2888 	movdqa	%xmm4,%xmm3
   2889 	pslld	$12,%xmm3
   2890 	psrld	$20,%xmm4
   2891 	pxor	%xmm3,%xmm4
   2892 	paddd	%xmm4,%xmm0
   2893 	pxor	%xmm0,%xmm12
   2894 	pshufb	.rol8(%rip),%xmm12
   2895 	paddd	%xmm12,%xmm8
   2896 	pxor	%xmm8,%xmm4
   2897 	movdqa	%xmm4,%xmm3
   2898 	pslld	$7,%xmm3
   2899 	psrld	$25,%xmm4
   2900 	pxor	%xmm3,%xmm4
   2901 .byte	102,15,58,15,228,4
   2902 .byte	102,69,15,58,15,192,8
   2903 .byte	102,69,15,58,15,228,12
   2904 	paddd	%xmm4,%xmm0
   2905 	pxor	%xmm0,%xmm12
   2906 	pshufb	.rol16(%rip),%xmm12
   2907 	paddd	%xmm12,%xmm8
   2908 	pxor	%xmm8,%xmm4
   2909 	movdqa	%xmm4,%xmm3
   2910 	pslld	$12,%xmm3
   2911 	psrld	$20,%xmm4
   2912 	pxor	%xmm3,%xmm4
   2913 	paddd	%xmm4,%xmm0
   2914 	pxor	%xmm0,%xmm12
   2915 	pshufb	.rol8(%rip),%xmm12
   2916 	paddd	%xmm12,%xmm8
   2917 	pxor	%xmm8,%xmm4
   2918 	movdqa	%xmm4,%xmm3
   2919 	pslld	$7,%xmm3
   2920 	psrld	$25,%xmm4
   2921 	pxor	%xmm3,%xmm4
   2922 .byte	102,15,58,15,228,12
   2923 .byte	102,69,15,58,15,192,8
   2924 .byte	102,69,15,58,15,228,4
   2925 	addq	0(%rdi),%r10
   2926 	adcq	8+0(%rdi),%r11
   2927 	adcq	$1,%r12
   2928 	movq	0+0(%rbp),%rax
   2929 	movq	%rax,%r15
   2930 	mulq	%r10
   2931 	movq	%rax,%r13
   2932 	movq	%rdx,%r14
   2933 	movq	0+0(%rbp),%rax
   2934 	mulq	%r11
   2935 	imulq	%r12,%r15
   2936 	addq	%rax,%r14
   2937 	adcq	%rdx,%r15
   2938 	movq	8+0(%rbp),%rax
   2939 	movq	%rax,%r9
   2940 	mulq	%r10
   2941 	addq	%rax,%r14
   2942 	adcq	$0,%rdx
   2943 	movq	%rdx,%r10
   2944 	movq	8+0(%rbp),%rax
   2945 	mulq	%r11
   2946 	addq	%rax,%r15
   2947 	adcq	$0,%rdx
   2948 	imulq	%r12,%r9
   2949 	addq	%r10,%r15
   2950 	adcq	%rdx,%r9
   2951 	movq	%r13,%r10
   2952 	movq	%r14,%r11
   2953 	movq	%r15,%r12
   2954 	andq	$3,%r12
   2955 	movq	%r15,%r13
   2956 	andq	$-4,%r13
   2957 	movq	%r9,%r14
   2958 	shrdq	$2,%r9,%r15
   2959 	shrq	$2,%r9
   2960 	addq	%r13,%r10
   2961 	adcq	%r14,%r11
   2962 	adcq	$0,%r12
   2963 	addq	%r15,%r10
   2964 	adcq	%r9,%r11
   2965 	adcq	$0,%r12
   2966 
   2967 	leaq	16(%rdi),%rdi
   2968 	decq	%rcx
   2969 	jg	1b
   2970 	decq	%r8
   2971 	jge	2b
   2972 	paddd	.chacha20_consts(%rip),%xmm0
   2973 	paddd	48(%rbp),%xmm4
   2974 	paddd	64(%rbp),%xmm8
   2975 	paddd	96(%rbp),%xmm12
   2976 
   2977 	jmp	seal_sse_128_seal
   2978 3:
   2979 	cmpq	$128,%rbx
   2980 	jg	3f
   2981 
   2982 seal_sse_tail_128:
   2983 	movdqa	.chacha20_consts(%rip),%xmm0
   2984 	movdqa	48(%rbp),%xmm4
   2985 	movdqa	64(%rbp),%xmm8
   2986 	movdqa	%xmm0,%xmm1
   2987 	movdqa	%xmm4,%xmm5
   2988 	movdqa	%xmm8,%xmm9
   2989 	movdqa	96(%rbp),%xmm13
   2990 	paddd	.sse_inc(%rip),%xmm13
   2991 	movdqa	%xmm13,%xmm12
   2992 	paddd	.sse_inc(%rip),%xmm12
   2993 	movdqa	%xmm12,96(%rbp)
   2994 	movdqa	%xmm13,112(%rbp)
   2995 
   2996 1:
   2997 	addq	0(%rdi),%r10
   2998 	adcq	8+0(%rdi),%r11
   2999 	adcq	$1,%r12
   3000 	movq	0+0(%rbp),%rax
   3001 	movq	%rax,%r15
   3002 	mulq	%r10
   3003 	movq	%rax,%r13
   3004 	movq	%rdx,%r14
   3005 	movq	0+0(%rbp),%rax
   3006 	mulq	%r11
   3007 	imulq	%r12,%r15
   3008 	addq	%rax,%r14
   3009 	adcq	%rdx,%r15
   3010 	movq	8+0(%rbp),%rax
   3011 	movq	%rax,%r9
   3012 	mulq	%r10
   3013 	addq	%rax,%r14
   3014 	adcq	$0,%rdx
   3015 	movq	%rdx,%r10
   3016 	movq	8+0(%rbp),%rax
   3017 	mulq	%r11
   3018 	addq	%rax,%r15
   3019 	adcq	$0,%rdx
   3020 	imulq	%r12,%r9
   3021 	addq	%r10,%r15
   3022 	adcq	%rdx,%r9
   3023 	movq	%r13,%r10
   3024 	movq	%r14,%r11
   3025 	movq	%r15,%r12
   3026 	andq	$3,%r12
   3027 	movq	%r15,%r13
   3028 	andq	$-4,%r13
   3029 	movq	%r9,%r14
   3030 	shrdq	$2,%r9,%r15
   3031 	shrq	$2,%r9
   3032 	addq	%r13,%r10
   3033 	adcq	%r14,%r11
   3034 	adcq	$0,%r12
   3035 	addq	%r15,%r10
   3036 	adcq	%r9,%r11
   3037 	adcq	$0,%r12
   3038 
   3039 	leaq	16(%rdi),%rdi
   3040 2:
   3041 	paddd	%xmm4,%xmm0
   3042 	pxor	%xmm0,%xmm12
   3043 	pshufb	.rol16(%rip),%xmm12
   3044 	paddd	%xmm12,%xmm8
   3045 	pxor	%xmm8,%xmm4
   3046 	movdqa	%xmm4,%xmm3
   3047 	pslld	$12,%xmm3
   3048 	psrld	$20,%xmm4
   3049 	pxor	%xmm3,%xmm4
   3050 	paddd	%xmm4,%xmm0
   3051 	pxor	%xmm0,%xmm12
   3052 	pshufb	.rol8(%rip),%xmm12
   3053 	paddd	%xmm12,%xmm8
   3054 	pxor	%xmm8,%xmm4
   3055 	movdqa	%xmm4,%xmm3
   3056 	pslld	$7,%xmm3
   3057 	psrld	$25,%xmm4
   3058 	pxor	%xmm3,%xmm4
   3059 .byte	102,15,58,15,228,4
   3060 .byte	102,69,15,58,15,192,8
   3061 .byte	102,69,15,58,15,228,12
   3062 	paddd	%xmm5,%xmm1
   3063 	pxor	%xmm1,%xmm13
   3064 	pshufb	.rol16(%rip),%xmm13
   3065 	paddd	%xmm13,%xmm9
   3066 	pxor	%xmm9,%xmm5
   3067 	movdqa	%xmm5,%xmm3
   3068 	pslld	$12,%xmm3
   3069 	psrld	$20,%xmm5
   3070 	pxor	%xmm3,%xmm5
   3071 	paddd	%xmm5,%xmm1
   3072 	pxor	%xmm1,%xmm13
   3073 	pshufb	.rol8(%rip),%xmm13
   3074 	paddd	%xmm13,%xmm9
   3075 	pxor	%xmm9,%xmm5
   3076 	movdqa	%xmm5,%xmm3
   3077 	pslld	$7,%xmm3
   3078 	psrld	$25,%xmm5
   3079 	pxor	%xmm3,%xmm5
   3080 .byte	102,15,58,15,237,4
   3081 .byte	102,69,15,58,15,201,8
   3082 .byte	102,69,15,58,15,237,12
   3083 	addq	0(%rdi),%r10
   3084 	adcq	8+0(%rdi),%r11
   3085 	adcq	$1,%r12
   3086 	movq	0+0(%rbp),%rax
   3087 	movq	%rax,%r15
   3088 	mulq	%r10
   3089 	movq	%rax,%r13
   3090 	movq	%rdx,%r14
   3091 	movq	0+0(%rbp),%rax
   3092 	mulq	%r11
   3093 	imulq	%r12,%r15
   3094 	addq	%rax,%r14
   3095 	adcq	%rdx,%r15
   3096 	movq	8+0(%rbp),%rax
   3097 	movq	%rax,%r9
   3098 	mulq	%r10
   3099 	addq	%rax,%r14
   3100 	adcq	$0,%rdx
   3101 	movq	%rdx,%r10
   3102 	movq	8+0(%rbp),%rax
   3103 	mulq	%r11
   3104 	addq	%rax,%r15
   3105 	adcq	$0,%rdx
   3106 	imulq	%r12,%r9
   3107 	addq	%r10,%r15
   3108 	adcq	%rdx,%r9
   3109 	movq	%r13,%r10
   3110 	movq	%r14,%r11
   3111 	movq	%r15,%r12
   3112 	andq	$3,%r12
   3113 	movq	%r15,%r13
   3114 	andq	$-4,%r13
   3115 	movq	%r9,%r14
   3116 	shrdq	$2,%r9,%r15
   3117 	shrq	$2,%r9
   3118 	addq	%r13,%r10
   3119 	adcq	%r14,%r11
   3120 	adcq	$0,%r12
   3121 	addq	%r15,%r10
   3122 	adcq	%r9,%r11
   3123 	adcq	$0,%r12
   3124 	paddd	%xmm4,%xmm0
   3125 	pxor	%xmm0,%xmm12
   3126 	pshufb	.rol16(%rip),%xmm12
   3127 	paddd	%xmm12,%xmm8
   3128 	pxor	%xmm8,%xmm4
   3129 	movdqa	%xmm4,%xmm3
   3130 	pslld	$12,%xmm3
   3131 	psrld	$20,%xmm4
   3132 	pxor	%xmm3,%xmm4
   3133 	paddd	%xmm4,%xmm0
   3134 	pxor	%xmm0,%xmm12
   3135 	pshufb	.rol8(%rip),%xmm12
   3136 	paddd	%xmm12,%xmm8
   3137 	pxor	%xmm8,%xmm4
   3138 	movdqa	%xmm4,%xmm3
   3139 	pslld	$7,%xmm3
   3140 	psrld	$25,%xmm4
   3141 	pxor	%xmm3,%xmm4
   3142 .byte	102,15,58,15,228,12
   3143 .byte	102,69,15,58,15,192,8
   3144 .byte	102,69,15,58,15,228,4
   3145 	paddd	%xmm5,%xmm1
   3146 	pxor	%xmm1,%xmm13
   3147 	pshufb	.rol16(%rip),%xmm13
   3148 	paddd	%xmm13,%xmm9
   3149 	pxor	%xmm9,%xmm5
   3150 	movdqa	%xmm5,%xmm3
   3151 	pslld	$12,%xmm3
   3152 	psrld	$20,%xmm5
   3153 	pxor	%xmm3,%xmm5
   3154 	paddd	%xmm5,%xmm1
   3155 	pxor	%xmm1,%xmm13
   3156 	pshufb	.rol8(%rip),%xmm13
   3157 	paddd	%xmm13,%xmm9
   3158 	pxor	%xmm9,%xmm5
   3159 	movdqa	%xmm5,%xmm3
   3160 	pslld	$7,%xmm3
   3161 	psrld	$25,%xmm5
   3162 	pxor	%xmm3,%xmm5
   3163 .byte	102,15,58,15,237,12
   3164 .byte	102,69,15,58,15,201,8
   3165 .byte	102,69,15,58,15,237,4
   3166 
   3167 	leaq	16(%rdi),%rdi
   3168 	decq	%rcx
   3169 	jg	1b
   3170 	decq	%r8
   3171 	jge	2b
   3172 	paddd	.chacha20_consts(%rip),%xmm1
   3173 	paddd	48(%rbp),%xmm5
   3174 	paddd	64(%rbp),%xmm9
   3175 	paddd	112(%rbp),%xmm13
   3176 	paddd	.chacha20_consts(%rip),%xmm0
   3177 	paddd	48(%rbp),%xmm4
   3178 	paddd	64(%rbp),%xmm8
   3179 	paddd	96(%rbp),%xmm12
   3180 	movdqu	0 + 0(%rsi),%xmm3
   3181 	movdqu	16 + 0(%rsi),%xmm7
   3182 	movdqu	32 + 0(%rsi),%xmm11
   3183 	movdqu	48 + 0(%rsi),%xmm15
   3184 	pxor	%xmm3,%xmm1
   3185 	pxor	%xmm7,%xmm5
   3186 	pxor	%xmm11,%xmm9
   3187 	pxor	%xmm13,%xmm15
   3188 	movdqu	%xmm1,0 + 0(%rdi)
   3189 	movdqu	%xmm5,16 + 0(%rdi)
   3190 	movdqu	%xmm9,32 + 0(%rdi)
   3191 	movdqu	%xmm15,48 + 0(%rdi)
   3192 
   3193 	movq	$64,%rcx
   3194 	subq	$64,%rbx
   3195 	leaq	64(%rsi),%rsi
   3196 	jmp	seal_sse_128_seal_hash
   3197 3:
   3198 
   3199 seal_sse_tail_192:
   3200 	movdqa	.chacha20_consts(%rip),%xmm0
   3201 	movdqa	48(%rbp),%xmm4
   3202 	movdqa	64(%rbp),%xmm8
   3203 	movdqa	%xmm0,%xmm1
   3204 	movdqa	%xmm4,%xmm5
   3205 	movdqa	%xmm8,%xmm9
   3206 	movdqa	%xmm0,%xmm2
   3207 	movdqa	%xmm4,%xmm6
   3208 	movdqa	%xmm8,%xmm10
   3209 	movdqa	96(%rbp),%xmm14
   3210 	paddd	.sse_inc(%rip),%xmm14
   3211 	movdqa	%xmm14,%xmm13
   3212 	paddd	.sse_inc(%rip),%xmm13
   3213 	movdqa	%xmm13,%xmm12
   3214 	paddd	.sse_inc(%rip),%xmm12
   3215 	movdqa	%xmm12,96(%rbp)
   3216 	movdqa	%xmm13,112(%rbp)
   3217 	movdqa	%xmm14,128(%rbp)
   3218 
   3219 1:
   3220 	addq	0(%rdi),%r10
   3221 	adcq	8+0(%rdi),%r11
   3222 	adcq	$1,%r12
   3223 	movq	0+0(%rbp),%rax
   3224 	movq	%rax,%r15
   3225 	mulq	%r10
   3226 	movq	%rax,%r13
   3227 	movq	%rdx,%r14
   3228 	movq	0+0(%rbp),%rax
   3229 	mulq	%r11
   3230 	imulq	%r12,%r15
   3231 	addq	%rax,%r14
   3232 	adcq	%rdx,%r15
   3233 	movq	8+0(%rbp),%rax
   3234 	movq	%rax,%r9
   3235 	mulq	%r10
   3236 	addq	%rax,%r14
   3237 	adcq	$0,%rdx
   3238 	movq	%rdx,%r10
   3239 	movq	8+0(%rbp),%rax
   3240 	mulq	%r11
   3241 	addq	%rax,%r15
   3242 	adcq	$0,%rdx
   3243 	imulq	%r12,%r9
   3244 	addq	%r10,%r15
   3245 	adcq	%rdx,%r9
   3246 	movq	%r13,%r10
   3247 	movq	%r14,%r11
   3248 	movq	%r15,%r12
   3249 	andq	$3,%r12
   3250 	movq	%r15,%r13
   3251 	andq	$-4,%r13
   3252 	movq	%r9,%r14
   3253 	shrdq	$2,%r9,%r15
   3254 	shrq	$2,%r9
   3255 	addq	%r13,%r10
   3256 	adcq	%r14,%r11
   3257 	adcq	$0,%r12
   3258 	addq	%r15,%r10
   3259 	adcq	%r9,%r11
   3260 	adcq	$0,%r12
   3261 
   3262 	leaq	16(%rdi),%rdi
   3263 2:
   3264 	paddd	%xmm4,%xmm0
   3265 	pxor	%xmm0,%xmm12
   3266 	pshufb	.rol16(%rip),%xmm12
   3267 	paddd	%xmm12,%xmm8
   3268 	pxor	%xmm8,%xmm4
   3269 	movdqa	%xmm4,%xmm3
   3270 	pslld	$12,%xmm3
   3271 	psrld	$20,%xmm4
   3272 	pxor	%xmm3,%xmm4
   3273 	paddd	%xmm4,%xmm0
   3274 	pxor	%xmm0,%xmm12
   3275 	pshufb	.rol8(%rip),%xmm12
   3276 	paddd	%xmm12,%xmm8
   3277 	pxor	%xmm8,%xmm4
   3278 	movdqa	%xmm4,%xmm3
   3279 	pslld	$7,%xmm3
   3280 	psrld	$25,%xmm4
   3281 	pxor	%xmm3,%xmm4
   3282 .byte	102,15,58,15,228,4
   3283 .byte	102,69,15,58,15,192,8
   3284 .byte	102,69,15,58,15,228,12
   3285 	paddd	%xmm5,%xmm1
   3286 	pxor	%xmm1,%xmm13
   3287 	pshufb	.rol16(%rip),%xmm13
   3288 	paddd	%xmm13,%xmm9
   3289 	pxor	%xmm9,%xmm5
   3290 	movdqa	%xmm5,%xmm3
   3291 	pslld	$12,%xmm3
   3292 	psrld	$20,%xmm5
   3293 	pxor	%xmm3,%xmm5
   3294 	paddd	%xmm5,%xmm1
   3295 	pxor	%xmm1,%xmm13
   3296 	pshufb	.rol8(%rip),%xmm13
   3297 	paddd	%xmm13,%xmm9
   3298 	pxor	%xmm9,%xmm5
   3299 	movdqa	%xmm5,%xmm3
   3300 	pslld	$7,%xmm3
   3301 	psrld	$25,%xmm5
   3302 	pxor	%xmm3,%xmm5
   3303 .byte	102,15,58,15,237,4
   3304 .byte	102,69,15,58,15,201,8
   3305 .byte	102,69,15,58,15,237,12
   3306 	paddd	%xmm6,%xmm2
   3307 	pxor	%xmm2,%xmm14
   3308 	pshufb	.rol16(%rip),%xmm14
   3309 	paddd	%xmm14,%xmm10
   3310 	pxor	%xmm10,%xmm6
   3311 	movdqa	%xmm6,%xmm3
   3312 	pslld	$12,%xmm3
   3313 	psrld	$20,%xmm6
   3314 	pxor	%xmm3,%xmm6
   3315 	paddd	%xmm6,%xmm2
   3316 	pxor	%xmm2,%xmm14
   3317 	pshufb	.rol8(%rip),%xmm14
   3318 	paddd	%xmm14,%xmm10
   3319 	pxor	%xmm10,%xmm6
   3320 	movdqa	%xmm6,%xmm3
   3321 	pslld	$7,%xmm3
   3322 	psrld	$25,%xmm6
   3323 	pxor	%xmm3,%xmm6
   3324 .byte	102,15,58,15,246,4
   3325 .byte	102,69,15,58,15,210,8
   3326 .byte	102,69,15,58,15,246,12
   3327 	addq	0(%rdi),%r10
   3328 	adcq	8+0(%rdi),%r11
   3329 	adcq	$1,%r12
   3330 	movq	0+0(%rbp),%rax
   3331 	movq	%rax,%r15
   3332 	mulq	%r10
   3333 	movq	%rax,%r13
   3334 	movq	%rdx,%r14
   3335 	movq	0+0(%rbp),%rax
   3336 	mulq	%r11
   3337 	imulq	%r12,%r15
   3338 	addq	%rax,%r14
   3339 	adcq	%rdx,%r15
   3340 	movq	8+0(%rbp),%rax
   3341 	movq	%rax,%r9
   3342 	mulq	%r10
   3343 	addq	%rax,%r14
   3344 	adcq	$0,%rdx
   3345 	movq	%rdx,%r10
   3346 	movq	8+0(%rbp),%rax
   3347 	mulq	%r11
   3348 	addq	%rax,%r15
   3349 	adcq	$0,%rdx
   3350 	imulq	%r12,%r9
   3351 	addq	%r10,%r15
   3352 	adcq	%rdx,%r9
   3353 	movq	%r13,%r10
   3354 	movq	%r14,%r11
   3355 	movq	%r15,%r12
   3356 	andq	$3,%r12
   3357 	movq	%r15,%r13
   3358 	andq	$-4,%r13
   3359 	movq	%r9,%r14
   3360 	shrdq	$2,%r9,%r15
   3361 	shrq	$2,%r9
   3362 	addq	%r13,%r10
   3363 	adcq	%r14,%r11
   3364 	adcq	$0,%r12
   3365 	addq	%r15,%r10
   3366 	adcq	%r9,%r11
   3367 	adcq	$0,%r12
   3368 	paddd	%xmm4,%xmm0
   3369 	pxor	%xmm0,%xmm12
   3370 	pshufb	.rol16(%rip),%xmm12
   3371 	paddd	%xmm12,%xmm8
   3372 	pxor	%xmm8,%xmm4
   3373 	movdqa	%xmm4,%xmm3
   3374 	pslld	$12,%xmm3
   3375 	psrld	$20,%xmm4
   3376 	pxor	%xmm3,%xmm4
   3377 	paddd	%xmm4,%xmm0
   3378 	pxor	%xmm0,%xmm12
   3379 	pshufb	.rol8(%rip),%xmm12
   3380 	paddd	%xmm12,%xmm8
   3381 	pxor	%xmm8,%xmm4
   3382 	movdqa	%xmm4,%xmm3
   3383 	pslld	$7,%xmm3
   3384 	psrld	$25,%xmm4
   3385 	pxor	%xmm3,%xmm4
   3386 .byte	102,15,58,15,228,12
   3387 .byte	102,69,15,58,15,192,8
   3388 .byte	102,69,15,58,15,228,4
   3389 	paddd	%xmm5,%xmm1
   3390 	pxor	%xmm1,%xmm13
   3391 	pshufb	.rol16(%rip),%xmm13
   3392 	paddd	%xmm13,%xmm9
   3393 	pxor	%xmm9,%xmm5
   3394 	movdqa	%xmm5,%xmm3
   3395 	pslld	$12,%xmm3
   3396 	psrld	$20,%xmm5
   3397 	pxor	%xmm3,%xmm5
   3398 	paddd	%xmm5,%xmm1
   3399 	pxor	%xmm1,%xmm13
   3400 	pshufb	.rol8(%rip),%xmm13
   3401 	paddd	%xmm13,%xmm9
   3402 	pxor	%xmm9,%xmm5
   3403 	movdqa	%xmm5,%xmm3
   3404 	pslld	$7,%xmm3
   3405 	psrld	$25,%xmm5
   3406 	pxor	%xmm3,%xmm5
   3407 .byte	102,15,58,15,237,12
   3408 .byte	102,69,15,58,15,201,8
   3409 .byte	102,69,15,58,15,237,4
   3410 	paddd	%xmm6,%xmm2
   3411 	pxor	%xmm2,%xmm14
   3412 	pshufb	.rol16(%rip),%xmm14
   3413 	paddd	%xmm14,%xmm10
   3414 	pxor	%xmm10,%xmm6
   3415 	movdqa	%xmm6,%xmm3
   3416 	pslld	$12,%xmm3
   3417 	psrld	$20,%xmm6
   3418 	pxor	%xmm3,%xmm6
   3419 	paddd	%xmm6,%xmm2
   3420 	pxor	%xmm2,%xmm14
   3421 	pshufb	.rol8(%rip),%xmm14
   3422 	paddd	%xmm14,%xmm10
   3423 	pxor	%xmm10,%xmm6
   3424 	movdqa	%xmm6,%xmm3
   3425 	pslld	$7,%xmm3
   3426 	psrld	$25,%xmm6
   3427 	pxor	%xmm3,%xmm6
   3428 .byte	102,15,58,15,246,12
   3429 .byte	102,69,15,58,15,210,8
   3430 .byte	102,69,15,58,15,246,4
   3431 
   3432 	leaq	16(%rdi),%rdi
   3433 	decq	%rcx
   3434 	jg	1b
   3435 	decq	%r8
   3436 	jge	2b
   3437 	paddd	.chacha20_consts(%rip),%xmm2
   3438 	paddd	48(%rbp),%xmm6
   3439 	paddd	64(%rbp),%xmm10
   3440 	paddd	128(%rbp),%xmm14
   3441 	paddd	.chacha20_consts(%rip),%xmm1
   3442 	paddd	48(%rbp),%xmm5
   3443 	paddd	64(%rbp),%xmm9
   3444 	paddd	112(%rbp),%xmm13
   3445 	paddd	.chacha20_consts(%rip),%xmm0
   3446 	paddd	48(%rbp),%xmm4
   3447 	paddd	64(%rbp),%xmm8
   3448 	paddd	96(%rbp),%xmm12
   3449 	movdqu	0 + 0(%rsi),%xmm3
   3450 	movdqu	16 + 0(%rsi),%xmm7
   3451 	movdqu	32 + 0(%rsi),%xmm11
   3452 	movdqu	48 + 0(%rsi),%xmm15
   3453 	pxor	%xmm3,%xmm2
   3454 	pxor	%xmm7,%xmm6
   3455 	pxor	%xmm11,%xmm10
   3456 	pxor	%xmm14,%xmm15
   3457 	movdqu	%xmm2,0 + 0(%rdi)
   3458 	movdqu	%xmm6,16 + 0(%rdi)
   3459 	movdqu	%xmm10,32 + 0(%rdi)
   3460 	movdqu	%xmm15,48 + 0(%rdi)
   3461 	movdqu	0 + 64(%rsi),%xmm3
   3462 	movdqu	16 + 64(%rsi),%xmm7
   3463 	movdqu	32 + 64(%rsi),%xmm11
   3464 	movdqu	48 + 64(%rsi),%xmm15
   3465 	pxor	%xmm3,%xmm1
   3466 	pxor	%xmm7,%xmm5
   3467 	pxor	%xmm11,%xmm9
   3468 	pxor	%xmm13,%xmm15
   3469 	movdqu	%xmm1,0 + 64(%rdi)
   3470 	movdqu	%xmm5,16 + 64(%rdi)
   3471 	movdqu	%xmm9,32 + 64(%rdi)
   3472 	movdqu	%xmm15,48 + 64(%rdi)
   3473 
   3474 	movq	$128,%rcx
   3475 	subq	$128,%rbx
   3476 	leaq	128(%rsi),%rsi
   3477 
   3478 seal_sse_128_seal_hash:
   3479 	cmpq	$16,%rcx
   3480 	jb	seal_sse_128_seal
   3481 	addq	0(%rdi),%r10
   3482 	adcq	8+0(%rdi),%r11
   3483 	adcq	$1,%r12
   3484 	movq	0+0(%rbp),%rax
   3485 	movq	%rax,%r15
   3486 	mulq	%r10
   3487 	movq	%rax,%r13
   3488 	movq	%rdx,%r14
   3489 	movq	0+0(%rbp),%rax
   3490 	mulq	%r11
   3491 	imulq	%r12,%r15
   3492 	addq	%rax,%r14
   3493 	adcq	%rdx,%r15
   3494 	movq	8+0(%rbp),%rax
   3495 	movq	%rax,%r9
   3496 	mulq	%r10
   3497 	addq	%rax,%r14
   3498 	adcq	$0,%rdx
   3499 	movq	%rdx,%r10
   3500 	movq	8+0(%rbp),%rax
   3501 	mulq	%r11
   3502 	addq	%rax,%r15
   3503 	adcq	$0,%rdx
   3504 	imulq	%r12,%r9
   3505 	addq	%r10,%r15
   3506 	adcq	%rdx,%r9
   3507 	movq	%r13,%r10
   3508 	movq	%r14,%r11
   3509 	movq	%r15,%r12
   3510 	andq	$3,%r12
   3511 	movq	%r15,%r13
   3512 	andq	$-4,%r13
   3513 	movq	%r9,%r14
   3514 	shrdq	$2,%r9,%r15
   3515 	shrq	$2,%r9
   3516 	addq	%r13,%r10
   3517 	adcq	%r14,%r11
   3518 	adcq	$0,%r12
   3519 	addq	%r15,%r10
   3520 	adcq	%r9,%r11
   3521 	adcq	$0,%r12
   3522 
   3523 	subq	$16,%rcx
   3524 	leaq	16(%rdi),%rdi
   3525 	jmp	seal_sse_128_seal_hash
   3526 
   3527 seal_sse_128_seal:
   3528 	cmpq	$16,%rbx
   3529 	jb	seal_sse_tail_16
   3530 	subq	$16,%rbx
   3531 
   3532 	movdqu	0(%rsi),%xmm3
   3533 	pxor	%xmm3,%xmm0
   3534 	movdqu	%xmm0,0(%rdi)
   3535 
   3536 	addq	0(%rdi),%r10
   3537 	adcq	8(%rdi),%r11
   3538 	adcq	$1,%r12
   3539 	leaq	16(%rsi),%rsi
   3540 	leaq	16(%rdi),%rdi
   3541 	movq	0+0(%rbp),%rax
   3542 	movq	%rax,%r15
   3543 	mulq	%r10
   3544 	movq	%rax,%r13
   3545 	movq	%rdx,%r14
   3546 	movq	0+0(%rbp),%rax
   3547 	mulq	%r11
   3548 	imulq	%r12,%r15
   3549 	addq	%rax,%r14
   3550 	adcq	%rdx,%r15
   3551 	movq	8+0(%rbp),%rax
   3552 	movq	%rax,%r9
   3553 	mulq	%r10
   3554 	addq	%rax,%r14
   3555 	adcq	$0,%rdx
   3556 	movq	%rdx,%r10
   3557 	movq	8+0(%rbp),%rax
   3558 	mulq	%r11
   3559 	addq	%rax,%r15
   3560 	adcq	$0,%rdx
   3561 	imulq	%r12,%r9
   3562 	addq	%r10,%r15
   3563 	adcq	%rdx,%r9
   3564 	movq	%r13,%r10
   3565 	movq	%r14,%r11
   3566 	movq	%r15,%r12
   3567 	andq	$3,%r12
   3568 	movq	%r15,%r13
   3569 	andq	$-4,%r13
   3570 	movq	%r9,%r14
   3571 	shrdq	$2,%r9,%r15
   3572 	shrq	$2,%r9
   3573 	addq	%r13,%r10
   3574 	adcq	%r14,%r11
   3575 	adcq	$0,%r12
   3576 	addq	%r15,%r10
   3577 	adcq	%r9,%r11
   3578 	adcq	$0,%r12
   3579 
   3580 
   3581 	movdqa	%xmm4,%xmm0
   3582 	movdqa	%xmm8,%xmm4
   3583 	movdqa	%xmm12,%xmm8
   3584 	movdqa	%xmm1,%xmm12
   3585 	movdqa	%xmm5,%xmm1
   3586 	movdqa	%xmm9,%xmm5
   3587 	movdqa	%xmm13,%xmm9
   3588 	jmp	seal_sse_128_seal
   3589 
   3590 seal_sse_tail_16:
   3591 	testq	%rbx,%rbx
   3592 	jz	process_blocks_of_extra_in
   3593 
   3594 	movq	%rbx,%r8
   3595 	movq	%rbx,%rcx
   3596 	leaq	-1(%rsi,%rbx), %rsi
   3597 	pxor	%xmm15,%xmm15
   3598 1:
   3599 	pslldq	$1,%xmm15
   3600 	pinsrb	$0,(%rsi),%xmm15
   3601 	leaq	-1(%rsi),%rsi
   3602 	decq	%rcx
   3603 	jne	1b
   3604 
   3605 
   3606 	pxor	%xmm0,%xmm15
   3607 
   3608 
   3609 	movq	%rbx,%rcx
   3610 	movdqu	%xmm15,%xmm0
   3611 2:
   3612 	pextrb	$0,%xmm0,(%rdi)
   3613 	psrldq	$1,%xmm0
   3614 	addq	$1,%rdi
   3615 	subq	$1,%rcx
   3616 	jnz	2b
   3617 
   3618 
   3619 
   3620 
   3621 
   3622 
   3623 
   3624 
   3625 	movq	288+32(%rsp),%r9
   3626 	movq	56(%r9),%r14
   3627 	movq	48(%r9),%r13
   3628 	testq	%r14,%r14
   3629 	jz	process_partial_block
   3630 
   3631 	movq	$16,%r15
   3632 	subq	%rbx,%r15
   3633 	cmpq	%r15,%r14
   3634 
   3635 	jge	load_extra_in
   3636 	movq	%r14,%r15
   3637 
   3638 load_extra_in:
   3639 
   3640 
   3641 	leaq	-1(%r13,%r15), %rsi
   3642 
   3643 
   3644 	addq	%r15,%r13
   3645 	subq	%r15,%r14
   3646 	movq	%r13,48(%r9)
   3647 	movq	%r14,56(%r9)
   3648 
   3649 
   3650 
   3651 	addq	%r15,%r8
   3652 
   3653 
   3654 	pxor	%xmm11,%xmm11
   3655 3:
   3656 	pslldq	$1,%xmm11
   3657 	pinsrb	$0,(%rsi),%xmm11
   3658 	leaq	-1(%rsi),%rsi
   3659 	subq	$1,%r15
   3660 	jnz	3b
   3661 
   3662 
   3663 
   3664 
   3665 	movq	%rbx,%r15
   3666 
   3667 4:
   3668 	pslldq	$1,%xmm11
   3669 	subq	$1,%r15
   3670 	jnz	4b
   3671 
   3672 
   3673 
   3674 
   3675 	leaq	.and_masks(%rip),%r15
   3676 	shlq	$4,%rbx
   3677 	pand	-16(%r15,%rbx), %xmm15
   3678 
   3679 
   3680 	por	%xmm11,%xmm15
   3681 
   3682 
   3683 
   3684 .byte	102,77,15,126,253
   3685 	pextrq	$1,%xmm15,%r14
   3686 	addq	%r13,%r10
   3687 	adcq	%r14,%r11
   3688 	adcq	$1,%r12
   3689 	movq	0+0(%rbp),%rax
   3690 	movq	%rax,%r15
   3691 	mulq	%r10
   3692 	movq	%rax,%r13
   3693 	movq	%rdx,%r14
   3694 	movq	0+0(%rbp),%rax
   3695 	mulq	%r11
   3696 	imulq	%r12,%r15
   3697 	addq	%rax,%r14
   3698 	adcq	%rdx,%r15
   3699 	movq	8+0(%rbp),%rax
   3700 	movq	%rax,%r9
   3701 	mulq	%r10
   3702 	addq	%rax,%r14
   3703 	adcq	$0,%rdx
   3704 	movq	%rdx,%r10
   3705 	movq	8+0(%rbp),%rax
   3706 	mulq	%r11
   3707 	addq	%rax,%r15
   3708 	adcq	$0,%rdx
   3709 	imulq	%r12,%r9
   3710 	addq	%r10,%r15
   3711 	adcq	%rdx,%r9
   3712 	movq	%r13,%r10
   3713 	movq	%r14,%r11
   3714 	movq	%r15,%r12
   3715 	andq	$3,%r12
   3716 	movq	%r15,%r13
   3717 	andq	$-4,%r13
   3718 	movq	%r9,%r14
   3719 	shrdq	$2,%r9,%r15
   3720 	shrq	$2,%r9
   3721 	addq	%r13,%r10
   3722 	adcq	%r14,%r11
   3723 	adcq	$0,%r12
   3724 	addq	%r15,%r10
   3725 	adcq	%r9,%r11
   3726 	adcq	$0,%r12
   3727 
   3728 
   3729 process_blocks_of_extra_in:
   3730 
   3731 	movq	288+32(%rsp),%r9
   3732 	movq	48(%r9),%rsi
   3733 	movq	56(%r9),%r8
   3734 	movq	%r8,%rcx
   3735 	shrq	$4,%r8
   3736 
   3737 5:
   3738 	jz	process_extra_in_trailer
   3739 	addq	0(%rsi),%r10
   3740 	adcq	8+0(%rsi),%r11
   3741 	adcq	$1,%r12
   3742 	movq	0+0(%rbp),%rax
   3743 	movq	%rax,%r15
   3744 	mulq	%r10
   3745 	movq	%rax,%r13
   3746 	movq	%rdx,%r14
   3747 	movq	0+0(%rbp),%rax
   3748 	mulq	%r11
   3749 	imulq	%r12,%r15
   3750 	addq	%rax,%r14
   3751 	adcq	%rdx,%r15
   3752 	movq	8+0(%rbp),%rax
   3753 	movq	%rax,%r9
   3754 	mulq	%r10
   3755 	addq	%rax,%r14
   3756 	adcq	$0,%rdx
   3757 	movq	%rdx,%r10
   3758 	movq	8+0(%rbp),%rax
   3759 	mulq	%r11
   3760 	addq	%rax,%r15
   3761 	adcq	$0,%rdx
   3762 	imulq	%r12,%r9
   3763 	addq	%r10,%r15
   3764 	adcq	%rdx,%r9
   3765 	movq	%r13,%r10
   3766 	movq	%r14,%r11
   3767 	movq	%r15,%r12
   3768 	andq	$3,%r12
   3769 	movq	%r15,%r13
   3770 	andq	$-4,%r13
   3771 	movq	%r9,%r14
   3772 	shrdq	$2,%r9,%r15
   3773 	shrq	$2,%r9
   3774 	addq	%r13,%r10
   3775 	adcq	%r14,%r11
   3776 	adcq	$0,%r12
   3777 	addq	%r15,%r10
   3778 	adcq	%r9,%r11
   3779 	adcq	$0,%r12
   3780 
   3781 	leaq	16(%rsi),%rsi
   3782 	subq	$1,%r8
   3783 	jmp	5b
   3784 
   3785 process_extra_in_trailer:
   3786 	andq	$15,%rcx
   3787 	movq	%rcx,%rbx
   3788 	jz	do_length_block
   3789 	leaq	-1(%rsi,%rcx), %rsi
   3790 
   3791 6:
   3792 	pslldq	$1,%xmm15
   3793 	pinsrb	$0,(%rsi),%xmm15
   3794 	leaq	-1(%rsi),%rsi
   3795 	subq	$1,%rcx
   3796 	jnz	6b
   3797 
   3798 process_partial_block:
   3799 
   3800 	leaq	.and_masks(%rip),%r15
   3801 	shlq	$4,%rbx
   3802 	pand	-16(%r15,%rbx), %xmm15
   3803 .byte	102,77,15,126,253
   3804 	pextrq	$1,%xmm15,%r14
   3805 	addq	%r13,%r10
   3806 	adcq	%r14,%r11
   3807 	adcq	$1,%r12
   3808 	movq	0+0(%rbp),%rax
   3809 	movq	%rax,%r15
   3810 	mulq	%r10
   3811 	movq	%rax,%r13
   3812 	movq	%rdx,%r14
   3813 	movq	0+0(%rbp),%rax
   3814 	mulq	%r11
   3815 	imulq	%r12,%r15
   3816 	addq	%rax,%r14
   3817 	adcq	%rdx,%r15
   3818 	movq	8+0(%rbp),%rax
   3819 	movq	%rax,%r9
   3820 	mulq	%r10
   3821 	addq	%rax,%r14
   3822 	adcq	$0,%rdx
   3823 	movq	%rdx,%r10
   3824 	movq	8+0(%rbp),%rax
   3825 	mulq	%r11
   3826 	addq	%rax,%r15
   3827 	adcq	$0,%rdx
   3828 	imulq	%r12,%r9
   3829 	addq	%r10,%r15
   3830 	adcq	%rdx,%r9
   3831 	movq	%r13,%r10
   3832 	movq	%r14,%r11
   3833 	movq	%r15,%r12
   3834 	andq	$3,%r12
   3835 	movq	%r15,%r13
   3836 	andq	$-4,%r13
   3837 	movq	%r9,%r14
   3838 	shrdq	$2,%r9,%r15
   3839 	shrq	$2,%r9
   3840 	addq	%r13,%r10
   3841 	adcq	%r14,%r11
   3842 	adcq	$0,%r12
   3843 	addq	%r15,%r10
   3844 	adcq	%r9,%r11
   3845 	adcq	$0,%r12
   3846 
   3847 
   3848 do_length_block:
   3849 	addq	32(%rbp),%r10
   3850 	adcq	8+32(%rbp),%r11
   3851 	adcq	$1,%r12
   3852 	movq	0+0(%rbp),%rax
   3853 	movq	%rax,%r15
   3854 	mulq	%r10
   3855 	movq	%rax,%r13
   3856 	movq	%rdx,%r14
   3857 	movq	0+0(%rbp),%rax
   3858 	mulq	%r11
   3859 	imulq	%r12,%r15
   3860 	addq	%rax,%r14
   3861 	adcq	%rdx,%r15
   3862 	movq	8+0(%rbp),%rax
   3863 	movq	%rax,%r9
   3864 	mulq	%r10
   3865 	addq	%rax,%r14
   3866 	adcq	$0,%rdx
   3867 	movq	%rdx,%r10
   3868 	movq	8+0(%rbp),%rax
   3869 	mulq	%r11
   3870 	addq	%rax,%r15
   3871 	adcq	$0,%rdx
   3872 	imulq	%r12,%r9
   3873 	addq	%r10,%r15
   3874 	adcq	%rdx,%r9
   3875 	movq	%r13,%r10
   3876 	movq	%r14,%r11
   3877 	movq	%r15,%r12
   3878 	andq	$3,%r12
   3879 	movq	%r15,%r13
   3880 	andq	$-4,%r13
   3881 	movq	%r9,%r14
   3882 	shrdq	$2,%r9,%r15
   3883 	shrq	$2,%r9
   3884 	addq	%r13,%r10
   3885 	adcq	%r14,%r11
   3886 	adcq	$0,%r12
   3887 	addq	%r15,%r10
   3888 	adcq	%r9,%r11
   3889 	adcq	$0,%r12
   3890 
   3891 
   3892 	movq	%r10,%r13
   3893 	movq	%r11,%r14
   3894 	movq	%r12,%r15
   3895 	subq	$-5,%r10
   3896 	sbbq	$-1,%r11
   3897 	sbbq	$3,%r12
   3898 	cmovcq	%r13,%r10
   3899 	cmovcq	%r14,%r11
   3900 	cmovcq	%r15,%r12
   3901 
   3902 	addq	0+16(%rbp),%r10
   3903 	adcq	8+16(%rbp),%r11
   3904 
   3905 	addq	$288 + 32,%rsp
   3906 
   3907 	popq	%r9
   3908 
   3909 	movq	%r10,0(%r9)
   3910 	movq	%r11,8(%r9)
   3911 
   3912 	popq	%r15
   3913 
   3914 	popq	%r14
   3915 
   3916 	popq	%r13
   3917 
   3918 	popq	%r12
   3919 
   3920 	popq	%rbx
   3921 
   3922 	popq	%rbp
   3923 
   3924 	.byte	0xf3,0xc3
   3925 
   3926 
   3927 seal_sse_128:
   3928 	movdqu	.chacha20_consts(%rip),%xmm0
   3929 	movdqa	%xmm0,%xmm1
   3930 	movdqa	%xmm0,%xmm2
   3931 	movdqu	0(%r9),%xmm4
   3932 	movdqa	%xmm4,%xmm5
   3933 	movdqa	%xmm4,%xmm6
   3934 	movdqu	16(%r9),%xmm8
   3935 	movdqa	%xmm8,%xmm9
   3936 	movdqa	%xmm8,%xmm10
   3937 	movdqu	32(%r9),%xmm14
   3938 	movdqa	%xmm14,%xmm12
   3939 	paddd	.sse_inc(%rip),%xmm12
   3940 	movdqa	%xmm12,%xmm13
   3941 	paddd	.sse_inc(%rip),%xmm13
   3942 	movdqa	%xmm4,%xmm7
   3943 	movdqa	%xmm8,%xmm11
   3944 	movdqa	%xmm12,%xmm15
   3945 	movq	$10,%r10
   3946 1:
   3947 	paddd	%xmm4,%xmm0
   3948 	pxor	%xmm0,%xmm12
   3949 	pshufb	.rol16(%rip),%xmm12
   3950 	paddd	%xmm12,%xmm8
   3951 	pxor	%xmm8,%xmm4
   3952 	movdqa	%xmm4,%xmm3
   3953 	pslld	$12,%xmm3
   3954 	psrld	$20,%xmm4
   3955 	pxor	%xmm3,%xmm4
   3956 	paddd	%xmm4,%xmm0
   3957 	pxor	%xmm0,%xmm12
   3958 	pshufb	.rol8(%rip),%xmm12
   3959 	paddd	%xmm12,%xmm8
   3960 	pxor	%xmm8,%xmm4
   3961 	movdqa	%xmm4,%xmm3
   3962 	pslld	$7,%xmm3
   3963 	psrld	$25,%xmm4
   3964 	pxor	%xmm3,%xmm4
   3965 .byte	102,15,58,15,228,4
   3966 .byte	102,69,15,58,15,192,8
   3967 .byte	102,69,15,58,15,228,12
   3968 	paddd	%xmm5,%xmm1
   3969 	pxor	%xmm1,%xmm13
   3970 	pshufb	.rol16(%rip),%xmm13
   3971 	paddd	%xmm13,%xmm9
   3972 	pxor	%xmm9,%xmm5
   3973 	movdqa	%xmm5,%xmm3
   3974 	pslld	$12,%xmm3
   3975 	psrld	$20,%xmm5
   3976 	pxor	%xmm3,%xmm5
   3977 	paddd	%xmm5,%xmm1
   3978 	pxor	%xmm1,%xmm13
   3979 	pshufb	.rol8(%rip),%xmm13
   3980 	paddd	%xmm13,%xmm9
   3981 	pxor	%xmm9,%xmm5
   3982 	movdqa	%xmm5,%xmm3
   3983 	pslld	$7,%xmm3
   3984 	psrld	$25,%xmm5
   3985 	pxor	%xmm3,%xmm5
   3986 .byte	102,15,58,15,237,4
   3987 .byte	102,69,15,58,15,201,8
   3988 .byte	102,69,15,58,15,237,12
   3989 	paddd	%xmm6,%xmm2
   3990 	pxor	%xmm2,%xmm14
   3991 	pshufb	.rol16(%rip),%xmm14
   3992 	paddd	%xmm14,%xmm10
   3993 	pxor	%xmm10,%xmm6
   3994 	movdqa	%xmm6,%xmm3
   3995 	pslld	$12,%xmm3
   3996 	psrld	$20,%xmm6
   3997 	pxor	%xmm3,%xmm6
   3998 	paddd	%xmm6,%xmm2
   3999 	pxor	%xmm2,%xmm14
   4000 	pshufb	.rol8(%rip),%xmm14
   4001 	paddd	%xmm14,%xmm10
   4002 	pxor	%xmm10,%xmm6
   4003 	movdqa	%xmm6,%xmm3
   4004 	pslld	$7,%xmm3
   4005 	psrld	$25,%xmm6
   4006 	pxor	%xmm3,%xmm6
   4007 .byte	102,15,58,15,246,4
   4008 .byte	102,69,15,58,15,210,8
   4009 .byte	102,69,15,58,15,246,12
   4010 	paddd	%xmm4,%xmm0
   4011 	pxor	%xmm0,%xmm12
   4012 	pshufb	.rol16(%rip),%xmm12
   4013 	paddd	%xmm12,%xmm8
   4014 	pxor	%xmm8,%xmm4
   4015 	movdqa	%xmm4,%xmm3
   4016 	pslld	$12,%xmm3
   4017 	psrld	$20,%xmm4
   4018 	pxor	%xmm3,%xmm4
   4019 	paddd	%xmm4,%xmm0
   4020 	pxor	%xmm0,%xmm12
   4021 	pshufb	.rol8(%rip),%xmm12
   4022 	paddd	%xmm12,%xmm8
   4023 	pxor	%xmm8,%xmm4
   4024 	movdqa	%xmm4,%xmm3
   4025 	pslld	$7,%xmm3
   4026 	psrld	$25,%xmm4
   4027 	pxor	%xmm3,%xmm4
   4028 .byte	102,15,58,15,228,12
   4029 .byte	102,69,15,58,15,192,8
   4030 .byte	102,69,15,58,15,228,4
   4031 	paddd	%xmm5,%xmm1
   4032 	pxor	%xmm1,%xmm13
   4033 	pshufb	.rol16(%rip),%xmm13
   4034 	paddd	%xmm13,%xmm9
   4035 	pxor	%xmm9,%xmm5
   4036 	movdqa	%xmm5,%xmm3
   4037 	pslld	$12,%xmm3
   4038 	psrld	$20,%xmm5
   4039 	pxor	%xmm3,%xmm5
   4040 	paddd	%xmm5,%xmm1
   4041 	pxor	%xmm1,%xmm13
   4042 	pshufb	.rol8(%rip),%xmm13
   4043 	paddd	%xmm13,%xmm9
   4044 	pxor	%xmm9,%xmm5
   4045 	movdqa	%xmm5,%xmm3
   4046 	pslld	$7,%xmm3
   4047 	psrld	$25,%xmm5
   4048 	pxor	%xmm3,%xmm5
   4049 .byte	102,15,58,15,237,12
   4050 .byte	102,69,15,58,15,201,8
   4051 .byte	102,69,15,58,15,237,4
   4052 	paddd	%xmm6,%xmm2
   4053 	pxor	%xmm2,%xmm14
   4054 	pshufb	.rol16(%rip),%xmm14
   4055 	paddd	%xmm14,%xmm10
   4056 	pxor	%xmm10,%xmm6
   4057 	movdqa	%xmm6,%xmm3
   4058 	pslld	$12,%xmm3
   4059 	psrld	$20,%xmm6
   4060 	pxor	%xmm3,%xmm6
   4061 	paddd	%xmm6,%xmm2
   4062 	pxor	%xmm2,%xmm14
   4063 	pshufb	.rol8(%rip),%xmm14
   4064 	paddd	%xmm14,%xmm10
   4065 	pxor	%xmm10,%xmm6
   4066 	movdqa	%xmm6,%xmm3
   4067 	pslld	$7,%xmm3
   4068 	psrld	$25,%xmm6
   4069 	pxor	%xmm3,%xmm6
   4070 .byte	102,15,58,15,246,12
   4071 .byte	102,69,15,58,15,210,8
   4072 .byte	102,69,15,58,15,246,4
   4073 
   4074 	decq	%r10
   4075 	jnz	1b
   4076 	paddd	.chacha20_consts(%rip),%xmm0
   4077 	paddd	.chacha20_consts(%rip),%xmm1
   4078 	paddd	.chacha20_consts(%rip),%xmm2
   4079 	paddd	%xmm7,%xmm4
   4080 	paddd	%xmm7,%xmm5
   4081 	paddd	%xmm7,%xmm6
   4082 	paddd	%xmm11,%xmm8
   4083 	paddd	%xmm11,%xmm9
   4084 	paddd	%xmm15,%xmm12
   4085 	paddd	.sse_inc(%rip),%xmm15
   4086 	paddd	%xmm15,%xmm13
   4087 
   4088 	pand	.clamp(%rip),%xmm2
   4089 	movdqa	%xmm2,0(%rbp)
   4090 	movdqa	%xmm6,16(%rbp)
   4091 
   4092 	movq	%r8,%r8
   4093 	call	poly_hash_ad_internal
   4094 	jmp	seal_sse_128_seal
   4095 
   4096 
   4097 
   4098 
   4099 .p2align	6
   4100 chacha20_poly1305_open_avx2:
   4101 	vzeroupper
   4102 	vmovdqa	.chacha20_consts(%rip),%ymm0
   4103 	vbroadcasti128	0(%r9),%ymm4
   4104 	vbroadcasti128	16(%r9),%ymm8
   4105 	vbroadcasti128	32(%r9),%ymm12
   4106 	vpaddd	.avx2_init(%rip),%ymm12,%ymm12
   4107 	cmpq	$192,%rbx
   4108 	jbe	open_avx2_192
   4109 	cmpq	$320,%rbx
   4110 	jbe	open_avx2_320
   4111 
   4112 	vmovdqa	%ymm4,64(%rbp)
   4113 	vmovdqa	%ymm8,96(%rbp)
   4114 	vmovdqa	%ymm12,160(%rbp)
   4115 	movq	$10,%r10
   4116 1:
   4117 	vpaddd	%ymm4,%ymm0,%ymm0
   4118 	vpxor	%ymm0,%ymm12,%ymm12
   4119 	vpshufb	.rol16(%rip),%ymm12,%ymm12
   4120 	vpaddd	%ymm12,%ymm8,%ymm8
   4121 	vpxor	%ymm8,%ymm4,%ymm4
   4122 	vpsrld	$20,%ymm4,%ymm3
   4123 	vpslld	$12,%ymm4,%ymm4
   4124 	vpxor	%ymm3,%ymm4,%ymm4
   4125 	vpaddd	%ymm4,%ymm0,%ymm0
   4126 	vpxor	%ymm0,%ymm12,%ymm12
   4127 	vpshufb	.rol8(%rip),%ymm12,%ymm12
   4128 	vpaddd	%ymm12,%ymm8,%ymm8
   4129 	vpxor	%ymm8,%ymm4,%ymm4
   4130 	vpslld	$7,%ymm4,%ymm3
   4131 	vpsrld	$25,%ymm4,%ymm4
   4132 	vpxor	%ymm3,%ymm4,%ymm4
   4133 	vpalignr	$12,%ymm12,%ymm12,%ymm12
   4134 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   4135 	vpalignr	$4,%ymm4,%ymm4,%ymm4
   4136 	vpaddd	%ymm4,%ymm0,%ymm0
   4137 	vpxor	%ymm0,%ymm12,%ymm12
   4138 	vpshufb	.rol16(%rip),%ymm12,%ymm12
   4139 	vpaddd	%ymm12,%ymm8,%ymm8
   4140 	vpxor	%ymm8,%ymm4,%ymm4
   4141 	vpsrld	$20,%ymm4,%ymm3
   4142 	vpslld	$12,%ymm4,%ymm4
   4143 	vpxor	%ymm3,%ymm4,%ymm4
   4144 	vpaddd	%ymm4,%ymm0,%ymm0
   4145 	vpxor	%ymm0,%ymm12,%ymm12
   4146 	vpshufb	.rol8(%rip),%ymm12,%ymm12
   4147 	vpaddd	%ymm12,%ymm8,%ymm8
   4148 	vpxor	%ymm8,%ymm4,%ymm4
   4149 	vpslld	$7,%ymm4,%ymm3
   4150 	vpsrld	$25,%ymm4,%ymm4
   4151 	vpxor	%ymm3,%ymm4,%ymm4
   4152 	vpalignr	$4,%ymm12,%ymm12,%ymm12
   4153 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   4154 	vpalignr	$12,%ymm4,%ymm4,%ymm4
   4155 
   4156 	decq	%r10
   4157 	jne	1b
   4158 	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
   4159 	vpaddd	64(%rbp),%ymm4,%ymm4
   4160 	vpaddd	96(%rbp),%ymm8,%ymm8
   4161 	vpaddd	160(%rbp),%ymm12,%ymm12
   4162 
   4163 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
   4164 
   4165 	vpand	.clamp(%rip),%ymm3,%ymm3
   4166 	vmovdqa	%ymm3,0(%rbp)
   4167 
   4168 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0
   4169 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4
   4170 
   4171 	movq	%r8,%r8
   4172 	call	poly_hash_ad_internal
   4173 	xorq	%rcx,%rcx
   4174 
   4175 1:
   4176 	addq	0(%rsi,%rcx), %r10
   4177 	adcq	8+0(%rsi,%rcx), %r11
   4178 	adcq	$1,%r12
   4179 	movq	0+0(%rbp),%rax
   4180 	movq	%rax,%r15
   4181 	mulq	%r10
   4182 	movq	%rax,%r13
   4183 	movq	%rdx,%r14
   4184 	movq	0+0(%rbp),%rax
   4185 	mulq	%r11
   4186 	imulq	%r12,%r15
   4187 	addq	%rax,%r14
   4188 	adcq	%rdx,%r15
   4189 	movq	8+0(%rbp),%rax
   4190 	movq	%rax,%r9
   4191 	mulq	%r10
   4192 	addq	%rax,%r14
   4193 	adcq	$0,%rdx
   4194 	movq	%rdx,%r10
   4195 	movq	8+0(%rbp),%rax
   4196 	mulq	%r11
   4197 	addq	%rax,%r15
   4198 	adcq	$0,%rdx
   4199 	imulq	%r12,%r9
   4200 	addq	%r10,%r15
   4201 	adcq	%rdx,%r9
   4202 	movq	%r13,%r10
   4203 	movq	%r14,%r11
   4204 	movq	%r15,%r12
   4205 	andq	$3,%r12
   4206 	movq	%r15,%r13
   4207 	andq	$-4,%r13
   4208 	movq	%r9,%r14
   4209 	shrdq	$2,%r9,%r15
   4210 	shrq	$2,%r9
   4211 	addq	%r13,%r10
   4212 	adcq	%r14,%r11
   4213 	adcq	$0,%r12
   4214 	addq	%r15,%r10
   4215 	adcq	%r9,%r11
   4216 	adcq	$0,%r12
   4217 
   4218 	addq	$16,%rcx
   4219 	cmpq	$64,%rcx
   4220 	jne	1b
   4221 
   4222 	vpxor	0(%rsi),%ymm0,%ymm0
   4223 	vpxor	32(%rsi),%ymm4,%ymm4
   4224 	vmovdqu	%ymm0,0(%rdi)
   4225 	vmovdqu	%ymm4,32(%rdi)
   4226 	leaq	64(%rsi),%rsi
   4227 	leaq	64(%rdi),%rdi
   4228 	subq	$64,%rbx
   4229 1:
   4230 
   4231 	cmpq	$512,%rbx
   4232 	jb	3f
   4233 	vmovdqa	.chacha20_consts(%rip),%ymm0
   4234 	vmovdqa	64(%rbp),%ymm4
   4235 	vmovdqa	96(%rbp),%ymm8
   4236 	vmovdqa	%ymm0,%ymm1
   4237 	vmovdqa	%ymm4,%ymm5
   4238 	vmovdqa	%ymm8,%ymm9
   4239 	vmovdqa	%ymm0,%ymm2
   4240 	vmovdqa	%ymm4,%ymm6
   4241 	vmovdqa	%ymm8,%ymm10
   4242 	vmovdqa	%ymm0,%ymm3
   4243 	vmovdqa	%ymm4,%ymm7
   4244 	vmovdqa	%ymm8,%ymm11
   4245 	vmovdqa	.avx2_inc(%rip),%ymm12
   4246 	vpaddd	160(%rbp),%ymm12,%ymm15
   4247 	vpaddd	%ymm15,%ymm12,%ymm14
   4248 	vpaddd	%ymm14,%ymm12,%ymm13
   4249 	vpaddd	%ymm13,%ymm12,%ymm12
   4250 	vmovdqa	%ymm15,256(%rbp)
   4251 	vmovdqa	%ymm14,224(%rbp)
   4252 	vmovdqa	%ymm13,192(%rbp)
   4253 	vmovdqa	%ymm12,160(%rbp)
   4254 
   4255 	xorq	%rcx,%rcx
   4256 2:
   4257 	addq	0*8(%rsi,%rcx), %r10
   4258 	adcq	8+0*8(%rsi,%rcx), %r11
   4259 	adcq	$1,%r12
   4260 	vmovdqa	%ymm8,128(%rbp)
   4261 	vmovdqa	.rol16(%rip),%ymm8
   4262 	vpaddd	%ymm7,%ymm3,%ymm3
   4263 	vpaddd	%ymm6,%ymm2,%ymm2
   4264 	vpaddd	%ymm5,%ymm1,%ymm1
   4265 	vpaddd	%ymm4,%ymm0,%ymm0
   4266 	vpxor	%ymm3,%ymm15,%ymm15
   4267 	vpxor	%ymm2,%ymm14,%ymm14
   4268 	vpxor	%ymm1,%ymm13,%ymm13
   4269 	vpxor	%ymm0,%ymm12,%ymm12
   4270 	movq	0+0(%rbp),%rdx
   4271 	movq	%rdx,%r15
   4272 	mulxq	%r10,%r13,%r14
   4273 	mulxq	%r11,%rax,%rdx
   4274 	imulq	%r12,%r15
   4275 	addq	%rax,%r14
   4276 	adcq	%rdx,%r15
   4277 	vpshufb	%ymm8,%ymm15,%ymm15
   4278 	vpshufb	%ymm8,%ymm14,%ymm14
   4279 	vpshufb	%ymm8,%ymm13,%ymm13
   4280 	vpshufb	%ymm8,%ymm12,%ymm12
   4281 	vmovdqa	128(%rbp),%ymm8
   4282 	vpaddd	%ymm15,%ymm11,%ymm11
   4283 	vpaddd	%ymm14,%ymm10,%ymm10
   4284 	vpaddd	%ymm13,%ymm9,%ymm9
   4285 	vpaddd	%ymm12,%ymm8,%ymm8
   4286 	movq	8+0(%rbp),%rdx
   4287 	mulxq	%r10,%r10,%rax
   4288 	addq	%r10,%r14
   4289 	mulxq	%r11,%r11,%r9
   4290 	adcq	%r11,%r15
   4291 	adcq	$0,%r9
   4292 	imulq	%r12,%rdx
   4293 	vpxor	%ymm11,%ymm7,%ymm7
   4294 	vpxor	%ymm10,%ymm6,%ymm6
   4295 	vpxor	%ymm9,%ymm5,%ymm5
   4296 	vpxor	%ymm8,%ymm4,%ymm4
   4297 	vmovdqa	%ymm8,128(%rbp)
   4298 	vpsrld	$20,%ymm7,%ymm8
   4299 	vpslld	$32-20,%ymm7,%ymm7
   4300 	vpxor	%ymm8,%ymm7,%ymm7
   4301 	vpsrld	$20,%ymm6,%ymm8
   4302 	vpslld	$32-20,%ymm6,%ymm6
   4303 	vpxor	%ymm8,%ymm6,%ymm6
   4304 	vpsrld	$20,%ymm5,%ymm8
   4305 	addq	%rax,%r15
   4306 	adcq	%rdx,%r9
   4307 	vpslld	$32-20,%ymm5,%ymm5
   4308 	vpxor	%ymm8,%ymm5,%ymm5
   4309 	vpsrld	$20,%ymm4,%ymm8
   4310 	vpslld	$32-20,%ymm4,%ymm4
   4311 	vpxor	%ymm8,%ymm4,%ymm4
   4312 	vmovdqa	.rol8(%rip),%ymm8
   4313 	vpaddd	%ymm7,%ymm3,%ymm3
   4314 	vpaddd	%ymm6,%ymm2,%ymm2
   4315 	vpaddd	%ymm5,%ymm1,%ymm1
   4316 	vpaddd	%ymm4,%ymm0,%ymm0
   4317 	movq	%r13,%r10
   4318 	movq	%r14,%r11
   4319 	movq	%r15,%r12
   4320 	andq	$3,%r12
   4321 	movq	%r15,%r13
   4322 	andq	$-4,%r13
   4323 	movq	%r9,%r14
   4324 	shrdq	$2,%r9,%r15
   4325 	shrq	$2,%r9
   4326 	addq	%r13,%r10
   4327 	adcq	%r14,%r11
   4328 	adcq	$0,%r12
   4329 	addq	%r15,%r10
   4330 	adcq	%r9,%r11
   4331 	adcq	$0,%r12
   4332 	vpxor	%ymm3,%ymm15,%ymm15
   4333 	vpxor	%ymm2,%ymm14,%ymm14
   4334 	vpxor	%ymm1,%ymm13,%ymm13
   4335 	vpxor	%ymm0,%ymm12,%ymm12
   4336 	vpshufb	%ymm8,%ymm15,%ymm15
   4337 	vpshufb	%ymm8,%ymm14,%ymm14
   4338 	vpshufb	%ymm8,%ymm13,%ymm13
   4339 	vpshufb	%ymm8,%ymm12,%ymm12
   4340 	vmovdqa	128(%rbp),%ymm8
   4341 	addq	2*8(%rsi,%rcx), %r10
   4342 	adcq	8+2*8(%rsi,%rcx), %r11
   4343 	adcq	$1,%r12
   4344 	vpaddd	%ymm15,%ymm11,%ymm11
   4345 	vpaddd	%ymm14,%ymm10,%ymm10
   4346 	vpaddd	%ymm13,%ymm9,%ymm9
   4347 	vpaddd	%ymm12,%ymm8,%ymm8
   4348 	vpxor	%ymm11,%ymm7,%ymm7
   4349 	vpxor	%ymm10,%ymm6,%ymm6
   4350 	vpxor	%ymm9,%ymm5,%ymm5
   4351 	vpxor	%ymm8,%ymm4,%ymm4
   4352 	movq	0+0(%rbp),%rdx
   4353 	movq	%rdx,%r15
   4354 	mulxq	%r10,%r13,%r14
   4355 	mulxq	%r11,%rax,%rdx
   4356 	imulq	%r12,%r15
   4357 	addq	%rax,%r14
   4358 	adcq	%rdx,%r15
   4359 	vmovdqa	%ymm8,128(%rbp)
   4360 	vpsrld	$25,%ymm7,%ymm8
   4361 	vpslld	$32-25,%ymm7,%ymm7
   4362 	vpxor	%ymm8,%ymm7,%ymm7
   4363 	vpsrld	$25,%ymm6,%ymm8
   4364 	vpslld	$32-25,%ymm6,%ymm6
   4365 	vpxor	%ymm8,%ymm6,%ymm6
   4366 	vpsrld	$25,%ymm5,%ymm8
   4367 	vpslld	$32-25,%ymm5,%ymm5
   4368 	vpxor	%ymm8,%ymm5,%ymm5
   4369 	vpsrld	$25,%ymm4,%ymm8
   4370 	vpslld	$32-25,%ymm4,%ymm4
   4371 	vpxor	%ymm8,%ymm4,%ymm4
   4372 	vmovdqa	128(%rbp),%ymm8
   4373 	vpalignr	$4,%ymm7,%ymm7,%ymm7
   4374 	vpalignr	$8,%ymm11,%ymm11,%ymm11
   4375 	vpalignr	$12,%ymm15,%ymm15,%ymm15
   4376 	vpalignr	$4,%ymm6,%ymm6,%ymm6
   4377 	movq	8+0(%rbp),%rdx
   4378 	mulxq	%r10,%r10,%rax
   4379 	addq	%r10,%r14
   4380 	mulxq	%r11,%r11,%r9
   4381 	adcq	%r11,%r15
   4382 	adcq	$0,%r9
   4383 	imulq	%r12,%rdx
   4384 	vpalignr	$8,%ymm10,%ymm10,%ymm10
   4385 	vpalignr	$12,%ymm14,%ymm14,%ymm14
   4386 	vpalignr	$4,%ymm5,%ymm5,%ymm5
   4387 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   4388 	vpalignr	$12,%ymm13,%ymm13,%ymm13
   4389 	vpalignr	$4,%ymm4,%ymm4,%ymm4
   4390 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   4391 	vpalignr	$12,%ymm12,%ymm12,%ymm12
   4392 	vmovdqa	%ymm8,128(%rbp)
   4393 	vmovdqa	.rol16(%rip),%ymm8
   4394 	vpaddd	%ymm7,%ymm3,%ymm3
   4395 	vpaddd	%ymm6,%ymm2,%ymm2
   4396 	vpaddd	%ymm5,%ymm1,%ymm1
   4397 	vpaddd	%ymm4,%ymm0,%ymm0
   4398 	vpxor	%ymm3,%ymm15,%ymm15
   4399 	vpxor	%ymm2,%ymm14,%ymm14
   4400 	vpxor	%ymm1,%ymm13,%ymm13
   4401 	vpxor	%ymm0,%ymm12,%ymm12
   4402 	addq	%rax,%r15
   4403 	adcq	%rdx,%r9
   4404 	vpshufb	%ymm8,%ymm15,%ymm15
   4405 	vpshufb	%ymm8,%ymm14,%ymm14
   4406 	vpshufb	%ymm8,%ymm13,%ymm13
   4407 	vpshufb	%ymm8,%ymm12,%ymm12
   4408 	vmovdqa	128(%rbp),%ymm8
   4409 	vpaddd	%ymm15,%ymm11,%ymm11
   4410 	vpaddd	%ymm14,%ymm10,%ymm10
   4411 	vpaddd	%ymm13,%ymm9,%ymm9
   4412 	vpaddd	%ymm12,%ymm8,%ymm8
   4413 	movq	%r13,%r10
   4414 	movq	%r14,%r11
   4415 	movq	%r15,%r12
   4416 	andq	$3,%r12
   4417 	movq	%r15,%r13
   4418 	andq	$-4,%r13
   4419 	movq	%r9,%r14
   4420 	shrdq	$2,%r9,%r15
   4421 	shrq	$2,%r9
   4422 	addq	%r13,%r10
   4423 	adcq	%r14,%r11
   4424 	adcq	$0,%r12
   4425 	addq	%r15,%r10
   4426 	adcq	%r9,%r11
   4427 	adcq	$0,%r12
   4428 	vpxor	%ymm11,%ymm7,%ymm7
   4429 	vpxor	%ymm10,%ymm6,%ymm6
   4430 	vpxor	%ymm9,%ymm5,%ymm5
   4431 	vpxor	%ymm8,%ymm4,%ymm4
   4432 	vmovdqa	%ymm8,128(%rbp)
   4433 	vpsrld	$20,%ymm7,%ymm8
   4434 	vpslld	$32-20,%ymm7,%ymm7
   4435 	vpxor	%ymm8,%ymm7,%ymm7
   4436 	addq	4*8(%rsi,%rcx), %r10
   4437 	adcq	8+4*8(%rsi,%rcx), %r11
   4438 	adcq	$1,%r12
   4439 
   4440 	leaq	48(%rcx),%rcx
   4441 	vpsrld	$20,%ymm6,%ymm8
   4442 	vpslld	$32-20,%ymm6,%ymm6
   4443 	vpxor	%ymm8,%ymm6,%ymm6
   4444 	vpsrld	$20,%ymm5,%ymm8
   4445 	vpslld	$32-20,%ymm5,%ymm5
   4446 	vpxor	%ymm8,%ymm5,%ymm5
   4447 	vpsrld	$20,%ymm4,%ymm8
   4448 	vpslld	$32-20,%ymm4,%ymm4
   4449 	vpxor	%ymm8,%ymm4,%ymm4
   4450 	vmovdqa	.rol8(%rip),%ymm8
   4451 	vpaddd	%ymm7,%ymm3,%ymm3
   4452 	vpaddd	%ymm6,%ymm2,%ymm2
   4453 	vpaddd	%ymm5,%ymm1,%ymm1
   4454 	vpaddd	%ymm4,%ymm0,%ymm0
   4455 	vpxor	%ymm3,%ymm15,%ymm15
   4456 	vpxor	%ymm2,%ymm14,%ymm14
   4457 	vpxor	%ymm1,%ymm13,%ymm13
   4458 	vpxor	%ymm0,%ymm12,%ymm12
   4459 	movq	0+0(%rbp),%rdx
   4460 	movq	%rdx,%r15
   4461 	mulxq	%r10,%r13,%r14
   4462 	mulxq	%r11,%rax,%rdx
   4463 	imulq	%r12,%r15
   4464 	addq	%rax,%r14
   4465 	adcq	%rdx,%r15
   4466 	vpshufb	%ymm8,%ymm15,%ymm15
   4467 	vpshufb	%ymm8,%ymm14,%ymm14
   4468 	vpshufb	%ymm8,%ymm13,%ymm13
   4469 	vpshufb	%ymm8,%ymm12,%ymm12
   4470 	vmovdqa	128(%rbp),%ymm8
   4471 	vpaddd	%ymm15,%ymm11,%ymm11
   4472 	vpaddd	%ymm14,%ymm10,%ymm10
   4473 	vpaddd	%ymm13,%ymm9,%ymm9
   4474 	movq	8+0(%rbp),%rdx
   4475 	mulxq	%r10,%r10,%rax
   4476 	addq	%r10,%r14
   4477 	mulxq	%r11,%r11,%r9
   4478 	adcq	%r11,%r15
   4479 	adcq	$0,%r9
   4480 	imulq	%r12,%rdx
   4481 	vpaddd	%ymm12,%ymm8,%ymm8
   4482 	vpxor	%ymm11,%ymm7,%ymm7
   4483 	vpxor	%ymm10,%ymm6,%ymm6
   4484 	vpxor	%ymm9,%ymm5,%ymm5
   4485 	vpxor	%ymm8,%ymm4,%ymm4
   4486 	vmovdqa	%ymm8,128(%rbp)
   4487 	vpsrld	$25,%ymm7,%ymm8
   4488 	vpslld	$32-25,%ymm7,%ymm7
   4489 	addq	%rax,%r15
   4490 	adcq	%rdx,%r9
   4491 	vpxor	%ymm8,%ymm7,%ymm7
   4492 	vpsrld	$25,%ymm6,%ymm8
   4493 	vpslld	$32-25,%ymm6,%ymm6
   4494 	vpxor	%ymm8,%ymm6,%ymm6
   4495 	vpsrld	$25,%ymm5,%ymm8
   4496 	vpslld	$32-25,%ymm5,%ymm5
   4497 	vpxor	%ymm8,%ymm5,%ymm5
   4498 	vpsrld	$25,%ymm4,%ymm8
   4499 	vpslld	$32-25,%ymm4,%ymm4
   4500 	vpxor	%ymm8,%ymm4,%ymm4
   4501 	vmovdqa	128(%rbp),%ymm8
   4502 	vpalignr	$12,%ymm7,%ymm7,%ymm7
   4503 	vpalignr	$8,%ymm11,%ymm11,%ymm11
   4504 	vpalignr	$4,%ymm15,%ymm15,%ymm15
   4505 	vpalignr	$12,%ymm6,%ymm6,%ymm6
   4506 	vpalignr	$8,%ymm10,%ymm10,%ymm10
   4507 	vpalignr	$4,%ymm14,%ymm14,%ymm14
   4508 	vpalignr	$12,%ymm5,%ymm5,%ymm5
   4509 	movq	%r13,%r10
   4510 	movq	%r14,%r11
   4511 	movq	%r15,%r12
   4512 	andq	$3,%r12
   4513 	movq	%r15,%r13
   4514 	andq	$-4,%r13
   4515 	movq	%r9,%r14
   4516 	shrdq	$2,%r9,%r15
   4517 	shrq	$2,%r9
   4518 	addq	%r13,%r10
   4519 	adcq	%r14,%r11
   4520 	adcq	$0,%r12
   4521 	addq	%r15,%r10
   4522 	adcq	%r9,%r11
   4523 	adcq	$0,%r12
   4524 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   4525 	vpalignr	$4,%ymm13,%ymm13,%ymm13
   4526 	vpalignr	$12,%ymm4,%ymm4,%ymm4
   4527 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   4528 	vpalignr	$4,%ymm12,%ymm12,%ymm12
   4529 
   4530 	cmpq	$60*8,%rcx
   4531 	jne	2b
   4532 	vpaddd	.chacha20_consts(%rip),%ymm3,%ymm3
   4533 	vpaddd	64(%rbp),%ymm7,%ymm7
   4534 	vpaddd	96(%rbp),%ymm11,%ymm11
   4535 	vpaddd	256(%rbp),%ymm15,%ymm15
   4536 	vpaddd	.chacha20_consts(%rip),%ymm2,%ymm2
   4537 	vpaddd	64(%rbp),%ymm6,%ymm6
   4538 	vpaddd	96(%rbp),%ymm10,%ymm10
   4539 	vpaddd	224(%rbp),%ymm14,%ymm14
   4540 	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
   4541 	vpaddd	64(%rbp),%ymm5,%ymm5
   4542 	vpaddd	96(%rbp),%ymm9,%ymm9
   4543 	vpaddd	192(%rbp),%ymm13,%ymm13
   4544 	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
   4545 	vpaddd	64(%rbp),%ymm4,%ymm4
   4546 	vpaddd	96(%rbp),%ymm8,%ymm8
   4547 	vpaddd	160(%rbp),%ymm12,%ymm12
   4548 
   4549 	vmovdqa	%ymm0,128(%rbp)
   4550 	addq	60*8(%rsi),%r10
   4551 	adcq	8+60*8(%rsi),%r11
   4552 	adcq	$1,%r12
   4553 	vperm2i128	$0x02,%ymm3,%ymm7,%ymm0
   4554 	vperm2i128	$0x13,%ymm3,%ymm7,%ymm7
   4555 	vperm2i128	$0x02,%ymm11,%ymm15,%ymm3
   4556 	vperm2i128	$0x13,%ymm11,%ymm15,%ymm11
   4557 	vpxor	0+0(%rsi),%ymm0,%ymm0
   4558 	vpxor	32+0(%rsi),%ymm3,%ymm3
   4559 	vpxor	64+0(%rsi),%ymm7,%ymm7
   4560 	vpxor	96+0(%rsi),%ymm11,%ymm11
   4561 	vmovdqu	%ymm0,0+0(%rdi)
   4562 	vmovdqu	%ymm3,32+0(%rdi)
   4563 	vmovdqu	%ymm7,64+0(%rdi)
   4564 	vmovdqu	%ymm11,96+0(%rdi)
   4565 
   4566 	vmovdqa	128(%rbp),%ymm0
   4567 	movq	0+0(%rbp),%rax
   4568 	movq	%rax,%r15
   4569 	mulq	%r10
   4570 	movq	%rax,%r13
   4571 	movq	%rdx,%r14
   4572 	movq	0+0(%rbp),%rax
   4573 	mulq	%r11
   4574 	imulq	%r12,%r15
   4575 	addq	%rax,%r14
   4576 	adcq	%rdx,%r15
   4577 	movq	8+0(%rbp),%rax
   4578 	movq	%rax,%r9
   4579 	mulq	%r10
   4580 	addq	%rax,%r14
   4581 	adcq	$0,%rdx
   4582 	movq	%rdx,%r10
   4583 	movq	8+0(%rbp),%rax
   4584 	mulq	%r11
   4585 	addq	%rax,%r15
   4586 	adcq	$0,%rdx
   4587 	imulq	%r12,%r9
   4588 	addq	%r10,%r15
   4589 	adcq	%rdx,%r9
   4590 	movq	%r13,%r10
   4591 	movq	%r14,%r11
   4592 	movq	%r15,%r12
   4593 	andq	$3,%r12
   4594 	movq	%r15,%r13
   4595 	andq	$-4,%r13
   4596 	movq	%r9,%r14
   4597 	shrdq	$2,%r9,%r15
   4598 	shrq	$2,%r9
   4599 	addq	%r13,%r10
   4600 	adcq	%r14,%r11
   4601 	adcq	$0,%r12
   4602 	addq	%r15,%r10
   4603 	adcq	%r9,%r11
   4604 	adcq	$0,%r12
   4605 	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
   4606 	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
   4607 	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
   4608 	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
   4609 	vpxor	0+128(%rsi),%ymm3,%ymm3
   4610 	vpxor	32+128(%rsi),%ymm2,%ymm2
   4611 	vpxor	64+128(%rsi),%ymm6,%ymm6
   4612 	vpxor	96+128(%rsi),%ymm10,%ymm10
   4613 	vmovdqu	%ymm3,0+128(%rdi)
   4614 	vmovdqu	%ymm2,32+128(%rdi)
   4615 	vmovdqu	%ymm6,64+128(%rdi)
   4616 	vmovdqu	%ymm10,96+128(%rdi)
   4617 	addq	60*8+16(%rsi),%r10
   4618 	adcq	8+60*8+16(%rsi),%r11
   4619 	adcq	$1,%r12
   4620 	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
   4621 	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
   4622 	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
   4623 	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
   4624 	vpxor	0+256(%rsi),%ymm3,%ymm3
   4625 	vpxor	32+256(%rsi),%ymm1,%ymm1
   4626 	vpxor	64+256(%rsi),%ymm5,%ymm5
   4627 	vpxor	96+256(%rsi),%ymm9,%ymm9
   4628 	vmovdqu	%ymm3,0+256(%rdi)
   4629 	vmovdqu	%ymm1,32+256(%rdi)
   4630 	vmovdqu	%ymm5,64+256(%rdi)
   4631 	vmovdqu	%ymm9,96+256(%rdi)
   4632 	movq	0+0(%rbp),%rax
   4633 	movq	%rax,%r15
   4634 	mulq	%r10
   4635 	movq	%rax,%r13
   4636 	movq	%rdx,%r14
   4637 	movq	0+0(%rbp),%rax
   4638 	mulq	%r11
   4639 	imulq	%r12,%r15
   4640 	addq	%rax,%r14
   4641 	adcq	%rdx,%r15
   4642 	movq	8+0(%rbp),%rax
   4643 	movq	%rax,%r9
   4644 	mulq	%r10
   4645 	addq	%rax,%r14
   4646 	adcq	$0,%rdx
   4647 	movq	%rdx,%r10
   4648 	movq	8+0(%rbp),%rax
   4649 	mulq	%r11
   4650 	addq	%rax,%r15
   4651 	adcq	$0,%rdx
   4652 	imulq	%r12,%r9
   4653 	addq	%r10,%r15
   4654 	adcq	%rdx,%r9
   4655 	movq	%r13,%r10
   4656 	movq	%r14,%r11
   4657 	movq	%r15,%r12
   4658 	andq	$3,%r12
   4659 	movq	%r15,%r13
   4660 	andq	$-4,%r13
   4661 	movq	%r9,%r14
   4662 	shrdq	$2,%r9,%r15
   4663 	shrq	$2,%r9
   4664 	addq	%r13,%r10
   4665 	adcq	%r14,%r11
   4666 	adcq	$0,%r12
   4667 	addq	%r15,%r10
   4668 	adcq	%r9,%r11
   4669 	adcq	$0,%r12
   4670 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
   4671 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm4
   4672 	vperm2i128	$0x02,%ymm8,%ymm12,%ymm0
   4673 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm8
   4674 	vpxor	0+384(%rsi),%ymm3,%ymm3
   4675 	vpxor	32+384(%rsi),%ymm0,%ymm0
   4676 	vpxor	64+384(%rsi),%ymm4,%ymm4
   4677 	vpxor	96+384(%rsi),%ymm8,%ymm8
   4678 	vmovdqu	%ymm3,0+384(%rdi)
   4679 	vmovdqu	%ymm0,32+384(%rdi)
   4680 	vmovdqu	%ymm4,64+384(%rdi)
   4681 	vmovdqu	%ymm8,96+384(%rdi)
   4682 
   4683 	leaq	512(%rsi),%rsi
   4684 	leaq	512(%rdi),%rdi
   4685 	subq	$512,%rbx
   4686 	jmp	1b
   4687 3:
   4688 	testq	%rbx,%rbx
   4689 	vzeroupper
   4690 	je	open_sse_finalize
   4691 3:
   4692 	cmpq	$128,%rbx
   4693 	ja	3f
   4694 	vmovdqa	.chacha20_consts(%rip),%ymm0
   4695 	vmovdqa	64(%rbp),%ymm4
   4696 	vmovdqa	96(%rbp),%ymm8
   4697 	vmovdqa	.avx2_inc(%rip),%ymm12
   4698 	vpaddd	160(%rbp),%ymm12,%ymm12
   4699 	vmovdqa	%ymm12,160(%rbp)
   4700 
   4701 	xorq	%r8,%r8
   4702 	movq	%rbx,%rcx
   4703 	andq	$-16,%rcx
   4704 	testq	%rcx,%rcx
   4705 	je	2f
   4706 1:
   4707 	addq	0*8(%rsi,%r8), %r10
   4708 	adcq	8+0*8(%rsi,%r8), %r11
   4709 	adcq	$1,%r12
   4710 	movq	0+0(%rbp),%rax
   4711 	movq	%rax,%r15
   4712 	mulq	%r10
   4713 	movq	%rax,%r13
   4714 	movq	%rdx,%r14
   4715 	movq	0+0(%rbp),%rax
   4716 	mulq	%r11
   4717 	imulq	%r12,%r15
   4718 	addq	%rax,%r14
   4719 	adcq	%rdx,%r15
   4720 	movq	8+0(%rbp),%rax
   4721 	movq	%rax,%r9
   4722 	mulq	%r10
   4723 	addq	%rax,%r14
   4724 	adcq	$0,%rdx
   4725 	movq	%rdx,%r10
   4726 	movq	8+0(%rbp),%rax
   4727 	mulq	%r11
   4728 	addq	%rax,%r15
   4729 	adcq	$0,%rdx
   4730 	imulq	%r12,%r9
   4731 	addq	%r10,%r15
   4732 	adcq	%rdx,%r9
   4733 	movq	%r13,%r10
   4734 	movq	%r14,%r11
   4735 	movq	%r15,%r12
   4736 	andq	$3,%r12
   4737 	movq	%r15,%r13
   4738 	andq	$-4,%r13
   4739 	movq	%r9,%r14
   4740 	shrdq	$2,%r9,%r15
   4741 	shrq	$2,%r9
   4742 	addq	%r13,%r10
   4743 	adcq	%r14,%r11
   4744 	adcq	$0,%r12
   4745 	addq	%r15,%r10
   4746 	adcq	%r9,%r11
   4747 	adcq	$0,%r12
   4748 
   4749 2:
   4750 	addq	$16,%r8
   4751 	vpaddd	%ymm4,%ymm0,%ymm0
   4752 	vpxor	%ymm0,%ymm12,%ymm12
   4753 	vpshufb	.rol16(%rip),%ymm12,%ymm12
   4754 	vpaddd	%ymm12,%ymm8,%ymm8
   4755 	vpxor	%ymm8,%ymm4,%ymm4
   4756 	vpsrld	$20,%ymm4,%ymm3
   4757 	vpslld	$12,%ymm4,%ymm4
   4758 	vpxor	%ymm3,%ymm4,%ymm4
   4759 	vpaddd	%ymm4,%ymm0,%ymm0
   4760 	vpxor	%ymm0,%ymm12,%ymm12
   4761 	vpshufb	.rol8(%rip),%ymm12,%ymm12
   4762 	vpaddd	%ymm12,%ymm8,%ymm8
   4763 	vpxor	%ymm8,%ymm4,%ymm4
   4764 	vpslld	$7,%ymm4,%ymm3
   4765 	vpsrld	$25,%ymm4,%ymm4
   4766 	vpxor	%ymm3,%ymm4,%ymm4
   4767 	vpalignr	$12,%ymm12,%ymm12,%ymm12
   4768 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   4769 	vpalignr	$4,%ymm4,%ymm4,%ymm4
   4770 	vpaddd	%ymm4,%ymm0,%ymm0
   4771 	vpxor	%ymm0,%ymm12,%ymm12
   4772 	vpshufb	.rol16(%rip),%ymm12,%ymm12
   4773 	vpaddd	%ymm12,%ymm8,%ymm8
   4774 	vpxor	%ymm8,%ymm4,%ymm4
   4775 	vpsrld	$20,%ymm4,%ymm3
   4776 	vpslld	$12,%ymm4,%ymm4
   4777 	vpxor	%ymm3,%ymm4,%ymm4
   4778 	vpaddd	%ymm4,%ymm0,%ymm0
   4779 	vpxor	%ymm0,%ymm12,%ymm12
   4780 	vpshufb	.rol8(%rip),%ymm12,%ymm12
   4781 	vpaddd	%ymm12,%ymm8,%ymm8
   4782 	vpxor	%ymm8,%ymm4,%ymm4
   4783 	vpslld	$7,%ymm4,%ymm3
   4784 	vpsrld	$25,%ymm4,%ymm4
   4785 	vpxor	%ymm3,%ymm4,%ymm4
   4786 	vpalignr	$4,%ymm12,%ymm12,%ymm12
   4787 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   4788 	vpalignr	$12,%ymm4,%ymm4,%ymm4
   4789 
   4790 	cmpq	%rcx,%r8
   4791 	jb	1b
   4792 	cmpq	$160,%r8
   4793 	jne	2b
   4794 	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
   4795 	vpaddd	64(%rbp),%ymm4,%ymm4
   4796 	vpaddd	96(%rbp),%ymm8,%ymm8
   4797 	vpaddd	160(%rbp),%ymm12,%ymm12
   4798 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
   4799 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
   4800 	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
   4801 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
   4802 	vmovdqa	%ymm3,%ymm8
   4803 
   4804 	jmp	open_avx2_tail_loop
   4805 3:
   4806 	cmpq	$256,%rbx
   4807 	ja	3f
   4808 	vmovdqa	.chacha20_consts(%rip),%ymm0
   4809 	vmovdqa	64(%rbp),%ymm4
   4810 	vmovdqa	96(%rbp),%ymm8
   4811 	vmovdqa	%ymm0,%ymm1
   4812 	vmovdqa	%ymm4,%ymm5
   4813 	vmovdqa	%ymm8,%ymm9
   4814 	vmovdqa	.avx2_inc(%rip),%ymm12
   4815 	vpaddd	160(%rbp),%ymm12,%ymm13
   4816 	vpaddd	%ymm13,%ymm12,%ymm12
   4817 	vmovdqa	%ymm12,160(%rbp)
   4818 	vmovdqa	%ymm13,192(%rbp)
   4819 
   4820 	movq	%rbx,128(%rbp)
   4821 	movq	%rbx,%rcx
   4822 	subq	$128,%rcx
   4823 	shrq	$4,%rcx
   4824 	movq	$10,%r8
   4825 	cmpq	$10,%rcx
   4826 	cmovgq	%r8,%rcx
   4827 	movq	%rsi,%rbx
   4828 	xorq	%r8,%r8
   4829 1:
   4830 	addq	0(%rbx),%r10
   4831 	adcq	8+0(%rbx),%r11
   4832 	adcq	$1,%r12
   4833 	movq	0+0(%rbp),%rdx
   4834 	movq	%rdx,%r15
   4835 	mulxq	%r10,%r13,%r14
   4836 	mulxq	%r11,%rax,%rdx
   4837 	imulq	%r12,%r15
   4838 	addq	%rax,%r14
   4839 	adcq	%rdx,%r15
   4840 	movq	8+0(%rbp),%rdx
   4841 	mulxq	%r10,%r10,%rax
   4842 	addq	%r10,%r14
   4843 	mulxq	%r11,%r11,%r9
   4844 	adcq	%r11,%r15
   4845 	adcq	$0,%r9
   4846 	imulq	%r12,%rdx
   4847 	addq	%rax,%r15
   4848 	adcq	%rdx,%r9
   4849 	movq	%r13,%r10
   4850 	movq	%r14,%r11
   4851 	movq	%r15,%r12
   4852 	andq	$3,%r12
   4853 	movq	%r15,%r13
   4854 	andq	$-4,%r13
   4855 	movq	%r9,%r14
   4856 	shrdq	$2,%r9,%r15
   4857 	shrq	$2,%r9
   4858 	addq	%r13,%r10
   4859 	adcq	%r14,%r11
   4860 	adcq	$0,%r12
   4861 	addq	%r15,%r10
   4862 	adcq	%r9,%r11
   4863 	adcq	$0,%r12
   4864 
   4865 	leaq	16(%rbx),%rbx
   4866 2:
   4867 	vpaddd	%ymm4,%ymm0,%ymm0
   4868 	vpxor	%ymm0,%ymm12,%ymm12
   4869 	vpshufb	.rol16(%rip),%ymm12,%ymm12
   4870 	vpaddd	%ymm12,%ymm8,%ymm8
   4871 	vpxor	%ymm8,%ymm4,%ymm4
   4872 	vpsrld	$20,%ymm4,%ymm3
   4873 	vpslld	$12,%ymm4,%ymm4
   4874 	vpxor	%ymm3,%ymm4,%ymm4
   4875 	vpaddd	%ymm4,%ymm0,%ymm0
   4876 	vpxor	%ymm0,%ymm12,%ymm12
   4877 	vpshufb	.rol8(%rip),%ymm12,%ymm12
   4878 	vpaddd	%ymm12,%ymm8,%ymm8
   4879 	vpxor	%ymm8,%ymm4,%ymm4
   4880 	vpslld	$7,%ymm4,%ymm3
   4881 	vpsrld	$25,%ymm4,%ymm4
   4882 	vpxor	%ymm3,%ymm4,%ymm4
   4883 	vpalignr	$12,%ymm12,%ymm12,%ymm12
   4884 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   4885 	vpalignr	$4,%ymm4,%ymm4,%ymm4
   4886 	vpaddd	%ymm5,%ymm1,%ymm1
   4887 	vpxor	%ymm1,%ymm13,%ymm13
   4888 	vpshufb	.rol16(%rip),%ymm13,%ymm13
   4889 	vpaddd	%ymm13,%ymm9,%ymm9
   4890 	vpxor	%ymm9,%ymm5,%ymm5
   4891 	vpsrld	$20,%ymm5,%ymm3
   4892 	vpslld	$12,%ymm5,%ymm5
   4893 	vpxor	%ymm3,%ymm5,%ymm5
   4894 	vpaddd	%ymm5,%ymm1,%ymm1
   4895 	vpxor	%ymm1,%ymm13,%ymm13
   4896 	vpshufb	.rol8(%rip),%ymm13,%ymm13
   4897 	vpaddd	%ymm13,%ymm9,%ymm9
   4898 	vpxor	%ymm9,%ymm5,%ymm5
   4899 	vpslld	$7,%ymm5,%ymm3
   4900 	vpsrld	$25,%ymm5,%ymm5
   4901 	vpxor	%ymm3,%ymm5,%ymm5
   4902 	vpalignr	$12,%ymm13,%ymm13,%ymm13
   4903 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   4904 	vpalignr	$4,%ymm5,%ymm5,%ymm5
   4905 
   4906 	incq	%r8
   4907 	vpaddd	%ymm4,%ymm0,%ymm0
   4908 	vpxor	%ymm0,%ymm12,%ymm12
   4909 	vpshufb	.rol16(%rip),%ymm12,%ymm12
   4910 	vpaddd	%ymm12,%ymm8,%ymm8
   4911 	vpxor	%ymm8,%ymm4,%ymm4
   4912 	vpsrld	$20,%ymm4,%ymm3
   4913 	vpslld	$12,%ymm4,%ymm4
   4914 	vpxor	%ymm3,%ymm4,%ymm4
   4915 	vpaddd	%ymm4,%ymm0,%ymm0
   4916 	vpxor	%ymm0,%ymm12,%ymm12
   4917 	vpshufb	.rol8(%rip),%ymm12,%ymm12
   4918 	vpaddd	%ymm12,%ymm8,%ymm8
   4919 	vpxor	%ymm8,%ymm4,%ymm4
   4920 	vpslld	$7,%ymm4,%ymm3
   4921 	vpsrld	$25,%ymm4,%ymm4
   4922 	vpxor	%ymm3,%ymm4,%ymm4
   4923 	vpalignr	$4,%ymm12,%ymm12,%ymm12
   4924 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   4925 	vpalignr	$12,%ymm4,%ymm4,%ymm4
   4926 	vpaddd	%ymm5,%ymm1,%ymm1
   4927 	vpxor	%ymm1,%ymm13,%ymm13
   4928 	vpshufb	.rol16(%rip),%ymm13,%ymm13
   4929 	vpaddd	%ymm13,%ymm9,%ymm9
   4930 	vpxor	%ymm9,%ymm5,%ymm5
   4931 	vpsrld	$20,%ymm5,%ymm3
   4932 	vpslld	$12,%ymm5,%ymm5
   4933 	vpxor	%ymm3,%ymm5,%ymm5
   4934 	vpaddd	%ymm5,%ymm1,%ymm1
   4935 	vpxor	%ymm1,%ymm13,%ymm13
   4936 	vpshufb	.rol8(%rip),%ymm13,%ymm13
   4937 	vpaddd	%ymm13,%ymm9,%ymm9
   4938 	vpxor	%ymm9,%ymm5,%ymm5
   4939 	vpslld	$7,%ymm5,%ymm3
   4940 	vpsrld	$25,%ymm5,%ymm5
   4941 	vpxor	%ymm3,%ymm5,%ymm5
   4942 	vpalignr	$4,%ymm13,%ymm13,%ymm13
   4943 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   4944 	vpalignr	$12,%ymm5,%ymm5,%ymm5
   4945 	vpaddd	%ymm6,%ymm2,%ymm2
   4946 	vpxor	%ymm2,%ymm14,%ymm14
   4947 	vpshufb	.rol16(%rip),%ymm14,%ymm14
   4948 	vpaddd	%ymm14,%ymm10,%ymm10
   4949 	vpxor	%ymm10,%ymm6,%ymm6
   4950 	vpsrld	$20,%ymm6,%ymm3
   4951 	vpslld	$12,%ymm6,%ymm6
   4952 	vpxor	%ymm3,%ymm6,%ymm6
   4953 	vpaddd	%ymm6,%ymm2,%ymm2
   4954 	vpxor	%ymm2,%ymm14,%ymm14
   4955 	vpshufb	.rol8(%rip),%ymm14,%ymm14
   4956 	vpaddd	%ymm14,%ymm10,%ymm10
   4957 	vpxor	%ymm10,%ymm6,%ymm6
   4958 	vpslld	$7,%ymm6,%ymm3
   4959 	vpsrld	$25,%ymm6,%ymm6
   4960 	vpxor	%ymm3,%ymm6,%ymm6
   4961 	vpalignr	$4,%ymm14,%ymm14,%ymm14
   4962 	vpalignr	$8,%ymm10,%ymm10,%ymm10
   4963 	vpalignr	$12,%ymm6,%ymm6,%ymm6
   4964 
   4965 	cmpq	%rcx,%r8
   4966 	jb	1b
   4967 	cmpq	$10,%r8
   4968 	jne	2b
   4969 	movq	%rbx,%r8
   4970 	subq	%rsi,%rbx
   4971 	movq	%rbx,%rcx
   4972 	movq	128(%rbp),%rbx
   4973 1:
   4974 	addq	$16,%rcx
   4975 	cmpq	%rbx,%rcx
   4976 	jg	1f
   4977 	addq	0(%r8),%r10
   4978 	adcq	8+0(%r8),%r11
   4979 	adcq	$1,%r12
   4980 	movq	0+0(%rbp),%rdx
   4981 	movq	%rdx,%r15
   4982 	mulxq	%r10,%r13,%r14
   4983 	mulxq	%r11,%rax,%rdx
   4984 	imulq	%r12,%r15
   4985 	addq	%rax,%r14
   4986 	adcq	%rdx,%r15
   4987 	movq	8+0(%rbp),%rdx
   4988 	mulxq	%r10,%r10,%rax
   4989 	addq	%r10,%r14
   4990 	mulxq	%r11,%r11,%r9
   4991 	adcq	%r11,%r15
   4992 	adcq	$0,%r9
   4993 	imulq	%r12,%rdx
   4994 	addq	%rax,%r15
   4995 	adcq	%rdx,%r9
   4996 	movq	%r13,%r10
   4997 	movq	%r14,%r11
   4998 	movq	%r15,%r12
   4999 	andq	$3,%r12
   5000 	movq	%r15,%r13
   5001 	andq	$-4,%r13
   5002 	movq	%r9,%r14
   5003 	shrdq	$2,%r9,%r15
   5004 	shrq	$2,%r9
   5005 	addq	%r13,%r10
   5006 	adcq	%r14,%r11
   5007 	adcq	$0,%r12
   5008 	addq	%r15,%r10
   5009 	adcq	%r9,%r11
   5010 	adcq	$0,%r12
   5011 
   5012 	leaq	16(%r8),%r8
   5013 	jmp	1b
   5014 1:
   5015 	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
   5016 	vpaddd	64(%rbp),%ymm5,%ymm5
   5017 	vpaddd	96(%rbp),%ymm9,%ymm9
   5018 	vpaddd	192(%rbp),%ymm13,%ymm13
   5019 	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
   5020 	vpaddd	64(%rbp),%ymm4,%ymm4
   5021 	vpaddd	96(%rbp),%ymm8,%ymm8
   5022 	vpaddd	160(%rbp),%ymm12,%ymm12
   5023 	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
   5024 	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
   5025 	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
   5026 	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
   5027 	vpxor	0+0(%rsi),%ymm3,%ymm3
   5028 	vpxor	32+0(%rsi),%ymm1,%ymm1
   5029 	vpxor	64+0(%rsi),%ymm5,%ymm5
   5030 	vpxor	96+0(%rsi),%ymm9,%ymm9
   5031 	vmovdqu	%ymm3,0+0(%rdi)
   5032 	vmovdqu	%ymm1,32+0(%rdi)
   5033 	vmovdqu	%ymm5,64+0(%rdi)
   5034 	vmovdqu	%ymm9,96+0(%rdi)
   5035 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
   5036 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
   5037 	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
   5038 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
   5039 	vmovdqa	%ymm3,%ymm8
   5040 
   5041 	leaq	128(%rsi),%rsi
   5042 	leaq	128(%rdi),%rdi
   5043 	subq	$128,%rbx
   5044 	jmp	open_avx2_tail_loop
   5045 3:
   5046 	cmpq	$384,%rbx
   5047 	ja	3f
   5048 	vmovdqa	.chacha20_consts(%rip),%ymm0
   5049 	vmovdqa	64(%rbp),%ymm4
   5050 	vmovdqa	96(%rbp),%ymm8
   5051 	vmovdqa	%ymm0,%ymm1
   5052 	vmovdqa	%ymm4,%ymm5
   5053 	vmovdqa	%ymm8,%ymm9
   5054 	vmovdqa	%ymm0,%ymm2
   5055 	vmovdqa	%ymm4,%ymm6
   5056 	vmovdqa	%ymm8,%ymm10
   5057 	vmovdqa	.avx2_inc(%rip),%ymm12
   5058 	vpaddd	160(%rbp),%ymm12,%ymm14
   5059 	vpaddd	%ymm14,%ymm12,%ymm13
   5060 	vpaddd	%ymm13,%ymm12,%ymm12
   5061 	vmovdqa	%ymm12,160(%rbp)
   5062 	vmovdqa	%ymm13,192(%rbp)
   5063 	vmovdqa	%ymm14,224(%rbp)
   5064 
   5065 	movq	%rbx,128(%rbp)
   5066 	movq	%rbx,%rcx
   5067 	subq	$256,%rcx
   5068 	shrq	$4,%rcx
   5069 	addq	$6,%rcx
   5070 	movq	$10,%r8
   5071 	cmpq	$10,%rcx
   5072 	cmovgq	%r8,%rcx
   5073 	movq	%rsi,%rbx
   5074 	xorq	%r8,%r8
   5075 1:
   5076 	addq	0(%rbx),%r10
   5077 	adcq	8+0(%rbx),%r11
   5078 	adcq	$1,%r12
   5079 	movq	0+0(%rbp),%rdx
   5080 	movq	%rdx,%r15
   5081 	mulxq	%r10,%r13,%r14
   5082 	mulxq	%r11,%rax,%rdx
   5083 	imulq	%r12,%r15
   5084 	addq	%rax,%r14
   5085 	adcq	%rdx,%r15
   5086 	movq	8+0(%rbp),%rdx
   5087 	mulxq	%r10,%r10,%rax
   5088 	addq	%r10,%r14
   5089 	mulxq	%r11,%r11,%r9
   5090 	adcq	%r11,%r15
   5091 	adcq	$0,%r9
   5092 	imulq	%r12,%rdx
   5093 	addq	%rax,%r15
   5094 	adcq	%rdx,%r9
   5095 	movq	%r13,%r10
   5096 	movq	%r14,%r11
   5097 	movq	%r15,%r12
   5098 	andq	$3,%r12
   5099 	movq	%r15,%r13
   5100 	andq	$-4,%r13
   5101 	movq	%r9,%r14
   5102 	shrdq	$2,%r9,%r15
   5103 	shrq	$2,%r9
   5104 	addq	%r13,%r10
   5105 	adcq	%r14,%r11
   5106 	adcq	$0,%r12
   5107 	addq	%r15,%r10
   5108 	adcq	%r9,%r11
   5109 	adcq	$0,%r12
   5110 
   5111 	leaq	16(%rbx),%rbx
   5112 2:
   5113 	vpaddd	%ymm6,%ymm2,%ymm2
   5114 	vpxor	%ymm2,%ymm14,%ymm14
   5115 	vpshufb	.rol16(%rip),%ymm14,%ymm14
   5116 	vpaddd	%ymm14,%ymm10,%ymm10
   5117 	vpxor	%ymm10,%ymm6,%ymm6
   5118 	vpsrld	$20,%ymm6,%ymm3
   5119 	vpslld	$12,%ymm6,%ymm6
   5120 	vpxor	%ymm3,%ymm6,%ymm6
   5121 	vpaddd	%ymm6,%ymm2,%ymm2
   5122 	vpxor	%ymm2,%ymm14,%ymm14
   5123 	vpshufb	.rol8(%rip),%ymm14,%ymm14
   5124 	vpaddd	%ymm14,%ymm10,%ymm10
   5125 	vpxor	%ymm10,%ymm6,%ymm6
   5126 	vpslld	$7,%ymm6,%ymm3
   5127 	vpsrld	$25,%ymm6,%ymm6
   5128 	vpxor	%ymm3,%ymm6,%ymm6
   5129 	vpalignr	$12,%ymm14,%ymm14,%ymm14
   5130 	vpalignr	$8,%ymm10,%ymm10,%ymm10
   5131 	vpalignr	$4,%ymm6,%ymm6,%ymm6
   5132 	vpaddd	%ymm5,%ymm1,%ymm1
   5133 	vpxor	%ymm1,%ymm13,%ymm13
   5134 	vpshufb	.rol16(%rip),%ymm13,%ymm13
   5135 	vpaddd	%ymm13,%ymm9,%ymm9
   5136 	vpxor	%ymm9,%ymm5,%ymm5
   5137 	vpsrld	$20,%ymm5,%ymm3
   5138 	vpslld	$12,%ymm5,%ymm5
   5139 	vpxor	%ymm3,%ymm5,%ymm5
   5140 	vpaddd	%ymm5,%ymm1,%ymm1
   5141 	vpxor	%ymm1,%ymm13,%ymm13
   5142 	vpshufb	.rol8(%rip),%ymm13,%ymm13
   5143 	vpaddd	%ymm13,%ymm9,%ymm9
   5144 	vpxor	%ymm9,%ymm5,%ymm5
   5145 	vpslld	$7,%ymm5,%ymm3
   5146 	vpsrld	$25,%ymm5,%ymm5
   5147 	vpxor	%ymm3,%ymm5,%ymm5
   5148 	vpalignr	$12,%ymm13,%ymm13,%ymm13
   5149 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   5150 	vpalignr	$4,%ymm5,%ymm5,%ymm5
   5151 	vpaddd	%ymm4,%ymm0,%ymm0
   5152 	vpxor	%ymm0,%ymm12,%ymm12
   5153 	vpshufb	.rol16(%rip),%ymm12,%ymm12
   5154 	vpaddd	%ymm12,%ymm8,%ymm8
   5155 	vpxor	%ymm8,%ymm4,%ymm4
   5156 	vpsrld	$20,%ymm4,%ymm3
   5157 	vpslld	$12,%ymm4,%ymm4
   5158 	vpxor	%ymm3,%ymm4,%ymm4
   5159 	vpaddd	%ymm4,%ymm0,%ymm0
   5160 	vpxor	%ymm0,%ymm12,%ymm12
   5161 	vpshufb	.rol8(%rip),%ymm12,%ymm12
   5162 	vpaddd	%ymm12,%ymm8,%ymm8
   5163 	vpxor	%ymm8,%ymm4,%ymm4
   5164 	vpslld	$7,%ymm4,%ymm3
   5165 	vpsrld	$25,%ymm4,%ymm4
   5166 	vpxor	%ymm3,%ymm4,%ymm4
   5167 	vpalignr	$12,%ymm12,%ymm12,%ymm12
   5168 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   5169 	vpalignr	$4,%ymm4,%ymm4,%ymm4
   5170 	addq	0(%rbx),%r10
   5171 	adcq	8+0(%rbx),%r11
   5172 	adcq	$1,%r12
   5173 	movq	0+0(%rbp),%rax
   5174 	movq	%rax,%r15
   5175 	mulq	%r10
   5176 	movq	%rax,%r13
   5177 	movq	%rdx,%r14
   5178 	movq	0+0(%rbp),%rax
   5179 	mulq	%r11
   5180 	imulq	%r12,%r15
   5181 	addq	%rax,%r14
   5182 	adcq	%rdx,%r15
   5183 	movq	8+0(%rbp),%rax
   5184 	movq	%rax,%r9
   5185 	mulq	%r10
   5186 	addq	%rax,%r14
   5187 	adcq	$0,%rdx
   5188 	movq	%rdx,%r10
   5189 	movq	8+0(%rbp),%rax
   5190 	mulq	%r11
   5191 	addq	%rax,%r15
   5192 	adcq	$0,%rdx
   5193 	imulq	%r12,%r9
   5194 	addq	%r10,%r15
   5195 	adcq	%rdx,%r9
   5196 	movq	%r13,%r10
   5197 	movq	%r14,%r11
   5198 	movq	%r15,%r12
   5199 	andq	$3,%r12
   5200 	movq	%r15,%r13
   5201 	andq	$-4,%r13
   5202 	movq	%r9,%r14
   5203 	shrdq	$2,%r9,%r15
   5204 	shrq	$2,%r9
   5205 	addq	%r13,%r10
   5206 	adcq	%r14,%r11
   5207 	adcq	$0,%r12
   5208 	addq	%r15,%r10
   5209 	adcq	%r9,%r11
   5210 	adcq	$0,%r12
   5211 
   5212 	leaq	16(%rbx),%rbx
   5213 	incq	%r8
   5214 	vpaddd	%ymm6,%ymm2,%ymm2
   5215 	vpxor	%ymm2,%ymm14,%ymm14
   5216 	vpshufb	.rol16(%rip),%ymm14,%ymm14
   5217 	vpaddd	%ymm14,%ymm10,%ymm10
   5218 	vpxor	%ymm10,%ymm6,%ymm6
   5219 	vpsrld	$20,%ymm6,%ymm3
   5220 	vpslld	$12,%ymm6,%ymm6
   5221 	vpxor	%ymm3,%ymm6,%ymm6
   5222 	vpaddd	%ymm6,%ymm2,%ymm2
   5223 	vpxor	%ymm2,%ymm14,%ymm14
   5224 	vpshufb	.rol8(%rip),%ymm14,%ymm14
   5225 	vpaddd	%ymm14,%ymm10,%ymm10
   5226 	vpxor	%ymm10,%ymm6,%ymm6
   5227 	vpslld	$7,%ymm6,%ymm3
   5228 	vpsrld	$25,%ymm6,%ymm6
   5229 	vpxor	%ymm3,%ymm6,%ymm6
   5230 	vpalignr	$4,%ymm14,%ymm14,%ymm14
   5231 	vpalignr	$8,%ymm10,%ymm10,%ymm10
   5232 	vpalignr	$12,%ymm6,%ymm6,%ymm6
   5233 	vpaddd	%ymm5,%ymm1,%ymm1
   5234 	vpxor	%ymm1,%ymm13,%ymm13
   5235 	vpshufb	.rol16(%rip),%ymm13,%ymm13
   5236 	vpaddd	%ymm13,%ymm9,%ymm9
   5237 	vpxor	%ymm9,%ymm5,%ymm5
   5238 	vpsrld	$20,%ymm5,%ymm3
   5239 	vpslld	$12,%ymm5,%ymm5
   5240 	vpxor	%ymm3,%ymm5,%ymm5
   5241 	vpaddd	%ymm5,%ymm1,%ymm1
   5242 	vpxor	%ymm1,%ymm13,%ymm13
   5243 	vpshufb	.rol8(%rip),%ymm13,%ymm13
   5244 	vpaddd	%ymm13,%ymm9,%ymm9
   5245 	vpxor	%ymm9,%ymm5,%ymm5
   5246 	vpslld	$7,%ymm5,%ymm3
   5247 	vpsrld	$25,%ymm5,%ymm5
   5248 	vpxor	%ymm3,%ymm5,%ymm5
   5249 	vpalignr	$4,%ymm13,%ymm13,%ymm13
   5250 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   5251 	vpalignr	$12,%ymm5,%ymm5,%ymm5
   5252 	vpaddd	%ymm4,%ymm0,%ymm0
   5253 	vpxor	%ymm0,%ymm12,%ymm12
   5254 	vpshufb	.rol16(%rip),%ymm12,%ymm12
   5255 	vpaddd	%ymm12,%ymm8,%ymm8
   5256 	vpxor	%ymm8,%ymm4,%ymm4
   5257 	vpsrld	$20,%ymm4,%ymm3
   5258 	vpslld	$12,%ymm4,%ymm4
   5259 	vpxor	%ymm3,%ymm4,%ymm4
   5260 	vpaddd	%ymm4,%ymm0,%ymm0
   5261 	vpxor	%ymm0,%ymm12,%ymm12
   5262 	vpshufb	.rol8(%rip),%ymm12,%ymm12
   5263 	vpaddd	%ymm12,%ymm8,%ymm8
   5264 	vpxor	%ymm8,%ymm4,%ymm4
   5265 	vpslld	$7,%ymm4,%ymm3
   5266 	vpsrld	$25,%ymm4,%ymm4
   5267 	vpxor	%ymm3,%ymm4,%ymm4
   5268 	vpalignr	$4,%ymm12,%ymm12,%ymm12
   5269 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   5270 	vpalignr	$12,%ymm4,%ymm4,%ymm4
   5271 
   5272 	cmpq	%rcx,%r8
   5273 	jb	1b
   5274 	cmpq	$10,%r8
   5275 	jne	2b
   5276 	movq	%rbx,%r8
   5277 	subq	%rsi,%rbx
   5278 	movq	%rbx,%rcx
   5279 	movq	128(%rbp),%rbx
   5280 1:
   5281 	addq	$16,%rcx
   5282 	cmpq	%rbx,%rcx
   5283 	jg	1f
   5284 	addq	0(%r8),%r10
   5285 	adcq	8+0(%r8),%r11
   5286 	adcq	$1,%r12
   5287 	movq	0+0(%rbp),%rdx
   5288 	movq	%rdx,%r15
   5289 	mulxq	%r10,%r13,%r14
   5290 	mulxq	%r11,%rax,%rdx
   5291 	imulq	%r12,%r15
   5292 	addq	%rax,%r14
   5293 	adcq	%rdx,%r15
   5294 	movq	8+0(%rbp),%rdx
   5295 	mulxq	%r10,%r10,%rax
   5296 	addq	%r10,%r14
   5297 	mulxq	%r11,%r11,%r9
   5298 	adcq	%r11,%r15
   5299 	adcq	$0,%r9
   5300 	imulq	%r12,%rdx
   5301 	addq	%rax,%r15
   5302 	adcq	%rdx,%r9
   5303 	movq	%r13,%r10
   5304 	movq	%r14,%r11
   5305 	movq	%r15,%r12
   5306 	andq	$3,%r12
   5307 	movq	%r15,%r13
   5308 	andq	$-4,%r13
   5309 	movq	%r9,%r14
   5310 	shrdq	$2,%r9,%r15
   5311 	shrq	$2,%r9
   5312 	addq	%r13,%r10
   5313 	adcq	%r14,%r11
   5314 	adcq	$0,%r12
   5315 	addq	%r15,%r10
   5316 	adcq	%r9,%r11
   5317 	adcq	$0,%r12
   5318 
   5319 	leaq	16(%r8),%r8
   5320 	jmp	1b
   5321 1:
   5322 	vpaddd	.chacha20_consts(%rip),%ymm2,%ymm2
   5323 	vpaddd	64(%rbp),%ymm6,%ymm6
   5324 	vpaddd	96(%rbp),%ymm10,%ymm10
   5325 	vpaddd	224(%rbp),%ymm14,%ymm14
   5326 	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
   5327 	vpaddd	64(%rbp),%ymm5,%ymm5
   5328 	vpaddd	96(%rbp),%ymm9,%ymm9
   5329 	vpaddd	192(%rbp),%ymm13,%ymm13
   5330 	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
   5331 	vpaddd	64(%rbp),%ymm4,%ymm4
   5332 	vpaddd	96(%rbp),%ymm8,%ymm8
   5333 	vpaddd	160(%rbp),%ymm12,%ymm12
   5334 	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
   5335 	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
   5336 	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
   5337 	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
   5338 	vpxor	0+0(%rsi),%ymm3,%ymm3
   5339 	vpxor	32+0(%rsi),%ymm2,%ymm2
   5340 	vpxor	64+0(%rsi),%ymm6,%ymm6
   5341 	vpxor	96+0(%rsi),%ymm10,%ymm10
   5342 	vmovdqu	%ymm3,0+0(%rdi)
   5343 	vmovdqu	%ymm2,32+0(%rdi)
   5344 	vmovdqu	%ymm6,64+0(%rdi)
   5345 	vmovdqu	%ymm10,96+0(%rdi)
   5346 	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
   5347 	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
   5348 	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
   5349 	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
   5350 	vpxor	0+128(%rsi),%ymm3,%ymm3
   5351 	vpxor	32+128(%rsi),%ymm1,%ymm1
   5352 	vpxor	64+128(%rsi),%ymm5,%ymm5
   5353 	vpxor	96+128(%rsi),%ymm9,%ymm9
   5354 	vmovdqu	%ymm3,0+128(%rdi)
   5355 	vmovdqu	%ymm1,32+128(%rdi)
   5356 	vmovdqu	%ymm5,64+128(%rdi)
   5357 	vmovdqu	%ymm9,96+128(%rdi)
   5358 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
   5359 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
   5360 	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
   5361 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
   5362 	vmovdqa	%ymm3,%ymm8
   5363 
   5364 	leaq	256(%rsi),%rsi
   5365 	leaq	256(%rdi),%rdi
   5366 	subq	$256,%rbx
   5367 	jmp	open_avx2_tail_loop
   5368 3:
   5369 	vmovdqa	.chacha20_consts(%rip),%ymm0
   5370 	vmovdqa	64(%rbp),%ymm4
   5371 	vmovdqa	96(%rbp),%ymm8
   5372 	vmovdqa	%ymm0,%ymm1
   5373 	vmovdqa	%ymm4,%ymm5
   5374 	vmovdqa	%ymm8,%ymm9
   5375 	vmovdqa	%ymm0,%ymm2
   5376 	vmovdqa	%ymm4,%ymm6
   5377 	vmovdqa	%ymm8,%ymm10
   5378 	vmovdqa	%ymm0,%ymm3
   5379 	vmovdqa	%ymm4,%ymm7
   5380 	vmovdqa	%ymm8,%ymm11
   5381 	vmovdqa	.avx2_inc(%rip),%ymm12
   5382 	vpaddd	160(%rbp),%ymm12,%ymm15
   5383 	vpaddd	%ymm15,%ymm12,%ymm14
   5384 	vpaddd	%ymm14,%ymm12,%ymm13
   5385 	vpaddd	%ymm13,%ymm12,%ymm12
   5386 	vmovdqa	%ymm15,256(%rbp)
   5387 	vmovdqa	%ymm14,224(%rbp)
   5388 	vmovdqa	%ymm13,192(%rbp)
   5389 	vmovdqa	%ymm12,160(%rbp)
   5390 
   5391 	xorq	%rcx,%rcx
   5392 	movq	%rsi,%r8
   5393 1:
   5394 	addq	0(%r8),%r10
   5395 	adcq	8+0(%r8),%r11
   5396 	adcq	$1,%r12
   5397 	movq	0+0(%rbp),%rax
   5398 	movq	%rax,%r15
   5399 	mulq	%r10
   5400 	movq	%rax,%r13
   5401 	movq	%rdx,%r14
   5402 	movq	0+0(%rbp),%rax
   5403 	mulq	%r11
   5404 	imulq	%r12,%r15
   5405 	addq	%rax,%r14
   5406 	adcq	%rdx,%r15
   5407 	movq	8+0(%rbp),%rax
   5408 	movq	%rax,%r9
   5409 	mulq	%r10
   5410 	addq	%rax,%r14
   5411 	adcq	$0,%rdx
   5412 	movq	%rdx,%r10
   5413 	movq	8+0(%rbp),%rax
   5414 	mulq	%r11
   5415 	addq	%rax,%r15
   5416 	adcq	$0,%rdx
   5417 	imulq	%r12,%r9
   5418 	addq	%r10,%r15
   5419 	adcq	%rdx,%r9
   5420 	movq	%r13,%r10
   5421 	movq	%r14,%r11
   5422 	movq	%r15,%r12
   5423 	andq	$3,%r12
   5424 	movq	%r15,%r13
   5425 	andq	$-4,%r13
   5426 	movq	%r9,%r14
   5427 	shrdq	$2,%r9,%r15
   5428 	shrq	$2,%r9
   5429 	addq	%r13,%r10
   5430 	adcq	%r14,%r11
   5431 	adcq	$0,%r12
   5432 	addq	%r15,%r10
   5433 	adcq	%r9,%r11
   5434 	adcq	$0,%r12
   5435 
   5436 	leaq	16(%r8),%r8
   5437 2:
   5438 	vmovdqa	%ymm8,128(%rbp)
   5439 	vmovdqa	.rol16(%rip),%ymm8
   5440 	vpaddd	%ymm7,%ymm3,%ymm3
   5441 	vpaddd	%ymm6,%ymm2,%ymm2
   5442 	vpaddd	%ymm5,%ymm1,%ymm1
   5443 	vpaddd	%ymm4,%ymm0,%ymm0
   5444 	vpxor	%ymm3,%ymm15,%ymm15
   5445 	vpxor	%ymm2,%ymm14,%ymm14
   5446 	vpxor	%ymm1,%ymm13,%ymm13
   5447 	vpxor	%ymm0,%ymm12,%ymm12
   5448 	vpshufb	%ymm8,%ymm15,%ymm15
   5449 	vpshufb	%ymm8,%ymm14,%ymm14
   5450 	vpshufb	%ymm8,%ymm13,%ymm13
   5451 	vpshufb	%ymm8,%ymm12,%ymm12
   5452 	vmovdqa	128(%rbp),%ymm8
   5453 	vpaddd	%ymm15,%ymm11,%ymm11
   5454 	vpaddd	%ymm14,%ymm10,%ymm10
   5455 	vpaddd	%ymm13,%ymm9,%ymm9
   5456 	vpaddd	%ymm12,%ymm8,%ymm8
   5457 	vpxor	%ymm11,%ymm7,%ymm7
   5458 	vpxor	%ymm10,%ymm6,%ymm6
   5459 	vpxor	%ymm9,%ymm5,%ymm5
   5460 	vpxor	%ymm8,%ymm4,%ymm4
   5461 	vmovdqa	%ymm8,128(%rbp)
   5462 	vpsrld	$20,%ymm7,%ymm8
   5463 	vpslld	$32-20,%ymm7,%ymm7
   5464 	vpxor	%ymm8,%ymm7,%ymm7
   5465 	vpsrld	$20,%ymm6,%ymm8
   5466 	vpslld	$32-20,%ymm6,%ymm6
   5467 	vpxor	%ymm8,%ymm6,%ymm6
   5468 	vpsrld	$20,%ymm5,%ymm8
   5469 	vpslld	$32-20,%ymm5,%ymm5
   5470 	vpxor	%ymm8,%ymm5,%ymm5
   5471 	vpsrld	$20,%ymm4,%ymm8
   5472 	vpslld	$32-20,%ymm4,%ymm4
   5473 	vpxor	%ymm8,%ymm4,%ymm4
   5474 	vmovdqa	.rol8(%rip),%ymm8
   5475 	addq	0(%r8),%r10
   5476 	adcq	8+0(%r8),%r11
   5477 	adcq	$1,%r12
   5478 	movq	0+0(%rbp),%rdx
   5479 	movq	%rdx,%r15
   5480 	mulxq	%r10,%r13,%r14
   5481 	mulxq	%r11,%rax,%rdx
   5482 	imulq	%r12,%r15
   5483 	addq	%rax,%r14
   5484 	adcq	%rdx,%r15
   5485 	movq	8+0(%rbp),%rdx
   5486 	mulxq	%r10,%r10,%rax
   5487 	addq	%r10,%r14
   5488 	mulxq	%r11,%r11,%r9
   5489 	adcq	%r11,%r15
   5490 	adcq	$0,%r9
   5491 	imulq	%r12,%rdx
   5492 	addq	%rax,%r15
   5493 	adcq	%rdx,%r9
   5494 	movq	%r13,%r10
   5495 	movq	%r14,%r11
   5496 	movq	%r15,%r12
   5497 	andq	$3,%r12
   5498 	movq	%r15,%r13
   5499 	andq	$-4,%r13
   5500 	movq	%r9,%r14
   5501 	shrdq	$2,%r9,%r15
   5502 	shrq	$2,%r9
   5503 	addq	%r13,%r10
   5504 	adcq	%r14,%r11
   5505 	adcq	$0,%r12
   5506 	addq	%r15,%r10
   5507 	adcq	%r9,%r11
   5508 	adcq	$0,%r12
   5509 	vpaddd	%ymm7,%ymm3,%ymm3
   5510 	vpaddd	%ymm6,%ymm2,%ymm2
   5511 	vpaddd	%ymm5,%ymm1,%ymm1
   5512 	vpaddd	%ymm4,%ymm0,%ymm0
   5513 	vpxor	%ymm3,%ymm15,%ymm15
   5514 	vpxor	%ymm2,%ymm14,%ymm14
   5515 	vpxor	%ymm1,%ymm13,%ymm13
   5516 	vpxor	%ymm0,%ymm12,%ymm12
   5517 	vpshufb	%ymm8,%ymm15,%ymm15
   5518 	vpshufb	%ymm8,%ymm14,%ymm14
   5519 	vpshufb	%ymm8,%ymm13,%ymm13
   5520 	vpshufb	%ymm8,%ymm12,%ymm12
   5521 	vmovdqa	128(%rbp),%ymm8
   5522 	vpaddd	%ymm15,%ymm11,%ymm11
   5523 	vpaddd	%ymm14,%ymm10,%ymm10
   5524 	vpaddd	%ymm13,%ymm9,%ymm9
   5525 	vpaddd	%ymm12,%ymm8,%ymm8
   5526 	vpxor	%ymm11,%ymm7,%ymm7
   5527 	vpxor	%ymm10,%ymm6,%ymm6
   5528 	vpxor	%ymm9,%ymm5,%ymm5
   5529 	vpxor	%ymm8,%ymm4,%ymm4
   5530 	vmovdqa	%ymm8,128(%rbp)
   5531 	vpsrld	$25,%ymm7,%ymm8
   5532 	vpslld	$32-25,%ymm7,%ymm7
   5533 	vpxor	%ymm8,%ymm7,%ymm7
   5534 	vpsrld	$25,%ymm6,%ymm8
   5535 	vpslld	$32-25,%ymm6,%ymm6
   5536 	vpxor	%ymm8,%ymm6,%ymm6
   5537 	vpsrld	$25,%ymm5,%ymm8
   5538 	vpslld	$32-25,%ymm5,%ymm5
   5539 	vpxor	%ymm8,%ymm5,%ymm5
   5540 	vpsrld	$25,%ymm4,%ymm8
   5541 	vpslld	$32-25,%ymm4,%ymm4
   5542 	vpxor	%ymm8,%ymm4,%ymm4
   5543 	vmovdqa	128(%rbp),%ymm8
   5544 	vpalignr	$4,%ymm7,%ymm7,%ymm7
   5545 	vpalignr	$8,%ymm11,%ymm11,%ymm11
   5546 	vpalignr	$12,%ymm15,%ymm15,%ymm15
   5547 	vpalignr	$4,%ymm6,%ymm6,%ymm6
   5548 	vpalignr	$8,%ymm10,%ymm10,%ymm10
   5549 	vpalignr	$12,%ymm14,%ymm14,%ymm14
   5550 	vpalignr	$4,%ymm5,%ymm5,%ymm5
   5551 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   5552 	vpalignr	$12,%ymm13,%ymm13,%ymm13
   5553 	vpalignr	$4,%ymm4,%ymm4,%ymm4
   5554 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   5555 	vpalignr	$12,%ymm12,%ymm12,%ymm12
   5556 	vmovdqa	%ymm8,128(%rbp)
   5557 	addq	16(%r8),%r10
   5558 	adcq	8+16(%r8),%r11
   5559 	adcq	$1,%r12
   5560 	movq	0+0(%rbp),%rdx
   5561 	movq	%rdx,%r15
   5562 	mulxq	%r10,%r13,%r14
   5563 	mulxq	%r11,%rax,%rdx
   5564 	imulq	%r12,%r15
   5565 	addq	%rax,%r14
   5566 	adcq	%rdx,%r15
   5567 	movq	8+0(%rbp),%rdx
   5568 	mulxq	%r10,%r10,%rax
   5569 	addq	%r10,%r14
   5570 	mulxq	%r11,%r11,%r9
   5571 	adcq	%r11,%r15
   5572 	adcq	$0,%r9
   5573 	imulq	%r12,%rdx
   5574 	addq	%rax,%r15
   5575 	adcq	%rdx,%r9
   5576 	movq	%r13,%r10
   5577 	movq	%r14,%r11
   5578 	movq	%r15,%r12
   5579 	andq	$3,%r12
   5580 	movq	%r15,%r13
   5581 	andq	$-4,%r13
   5582 	movq	%r9,%r14
   5583 	shrdq	$2,%r9,%r15
   5584 	shrq	$2,%r9
   5585 	addq	%r13,%r10
   5586 	adcq	%r14,%r11
   5587 	adcq	$0,%r12
   5588 	addq	%r15,%r10
   5589 	adcq	%r9,%r11
   5590 	adcq	$0,%r12
   5591 
   5592 	leaq	32(%r8),%r8
   5593 	vmovdqa	.rol16(%rip),%ymm8
   5594 	vpaddd	%ymm7,%ymm3,%ymm3
   5595 	vpaddd	%ymm6,%ymm2,%ymm2
   5596 	vpaddd	%ymm5,%ymm1,%ymm1
   5597 	vpaddd	%ymm4,%ymm0,%ymm0
   5598 	vpxor	%ymm3,%ymm15,%ymm15
   5599 	vpxor	%ymm2,%ymm14,%ymm14
   5600 	vpxor	%ymm1,%ymm13,%ymm13
   5601 	vpxor	%ymm0,%ymm12,%ymm12
   5602 	vpshufb	%ymm8,%ymm15,%ymm15
   5603 	vpshufb	%ymm8,%ymm14,%ymm14
   5604 	vpshufb	%ymm8,%ymm13,%ymm13
   5605 	vpshufb	%ymm8,%ymm12,%ymm12
   5606 	vmovdqa	128(%rbp),%ymm8
   5607 	vpaddd	%ymm15,%ymm11,%ymm11
   5608 	vpaddd	%ymm14,%ymm10,%ymm10
   5609 	vpaddd	%ymm13,%ymm9,%ymm9
   5610 	vpaddd	%ymm12,%ymm8,%ymm8
   5611 	vpxor	%ymm11,%ymm7,%ymm7
   5612 	vpxor	%ymm10,%ymm6,%ymm6
   5613 	vpxor	%ymm9,%ymm5,%ymm5
   5614 	vpxor	%ymm8,%ymm4,%ymm4
   5615 	vmovdqa	%ymm8,128(%rbp)
   5616 	vpsrld	$20,%ymm7,%ymm8
   5617 	vpslld	$32-20,%ymm7,%ymm7
   5618 	vpxor	%ymm8,%ymm7,%ymm7
   5619 	vpsrld	$20,%ymm6,%ymm8
   5620 	vpslld	$32-20,%ymm6,%ymm6
   5621 	vpxor	%ymm8,%ymm6,%ymm6
   5622 	vpsrld	$20,%ymm5,%ymm8
   5623 	vpslld	$32-20,%ymm5,%ymm5
   5624 	vpxor	%ymm8,%ymm5,%ymm5
   5625 	vpsrld	$20,%ymm4,%ymm8
   5626 	vpslld	$32-20,%ymm4,%ymm4
   5627 	vpxor	%ymm8,%ymm4,%ymm4
   5628 	vmovdqa	.rol8(%rip),%ymm8
   5629 	vpaddd	%ymm7,%ymm3,%ymm3
   5630 	vpaddd	%ymm6,%ymm2,%ymm2
   5631 	vpaddd	%ymm5,%ymm1,%ymm1
   5632 	vpaddd	%ymm4,%ymm0,%ymm0
   5633 	vpxor	%ymm3,%ymm15,%ymm15
   5634 	vpxor	%ymm2,%ymm14,%ymm14
   5635 	vpxor	%ymm1,%ymm13,%ymm13
   5636 	vpxor	%ymm0,%ymm12,%ymm12
   5637 	vpshufb	%ymm8,%ymm15,%ymm15
   5638 	vpshufb	%ymm8,%ymm14,%ymm14
   5639 	vpshufb	%ymm8,%ymm13,%ymm13
   5640 	vpshufb	%ymm8,%ymm12,%ymm12
   5641 	vmovdqa	128(%rbp),%ymm8
   5642 	vpaddd	%ymm15,%ymm11,%ymm11
   5643 	vpaddd	%ymm14,%ymm10,%ymm10
   5644 	vpaddd	%ymm13,%ymm9,%ymm9
   5645 	vpaddd	%ymm12,%ymm8,%ymm8
   5646 	vpxor	%ymm11,%ymm7,%ymm7
   5647 	vpxor	%ymm10,%ymm6,%ymm6
   5648 	vpxor	%ymm9,%ymm5,%ymm5
   5649 	vpxor	%ymm8,%ymm4,%ymm4
   5650 	vmovdqa	%ymm8,128(%rbp)
   5651 	vpsrld	$25,%ymm7,%ymm8
   5652 	vpslld	$32-25,%ymm7,%ymm7
   5653 	vpxor	%ymm8,%ymm7,%ymm7
   5654 	vpsrld	$25,%ymm6,%ymm8
   5655 	vpslld	$32-25,%ymm6,%ymm6
   5656 	vpxor	%ymm8,%ymm6,%ymm6
   5657 	vpsrld	$25,%ymm5,%ymm8
   5658 	vpslld	$32-25,%ymm5,%ymm5
   5659 	vpxor	%ymm8,%ymm5,%ymm5
   5660 	vpsrld	$25,%ymm4,%ymm8
   5661 	vpslld	$32-25,%ymm4,%ymm4
   5662 	vpxor	%ymm8,%ymm4,%ymm4
   5663 	vmovdqa	128(%rbp),%ymm8
   5664 	vpalignr	$12,%ymm7,%ymm7,%ymm7
   5665 	vpalignr	$8,%ymm11,%ymm11,%ymm11
   5666 	vpalignr	$4,%ymm15,%ymm15,%ymm15
   5667 	vpalignr	$12,%ymm6,%ymm6,%ymm6
   5668 	vpalignr	$8,%ymm10,%ymm10,%ymm10
   5669 	vpalignr	$4,%ymm14,%ymm14,%ymm14
   5670 	vpalignr	$12,%ymm5,%ymm5,%ymm5
   5671 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   5672 	vpalignr	$4,%ymm13,%ymm13,%ymm13
   5673 	vpalignr	$12,%ymm4,%ymm4,%ymm4
   5674 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   5675 	vpalignr	$4,%ymm12,%ymm12,%ymm12
   5676 
   5677 	incq	%rcx
   5678 	cmpq	$4,%rcx
   5679 	jl	1b
   5680 	cmpq	$10,%rcx
   5681 	jne	2b
   5682 	movq	%rbx,%rcx
   5683 	subq	$384,%rcx
   5684 	andq	$-16,%rcx
   5685 1:
   5686 	testq	%rcx,%rcx
   5687 	je	1f
   5688 	addq	0(%r8),%r10
   5689 	adcq	8+0(%r8),%r11
   5690 	adcq	$1,%r12
   5691 	movq	0+0(%rbp),%rdx
   5692 	movq	%rdx,%r15
   5693 	mulxq	%r10,%r13,%r14
   5694 	mulxq	%r11,%rax,%rdx
   5695 	imulq	%r12,%r15
   5696 	addq	%rax,%r14
   5697 	adcq	%rdx,%r15
   5698 	movq	8+0(%rbp),%rdx
   5699 	mulxq	%r10,%r10,%rax
   5700 	addq	%r10,%r14
   5701 	mulxq	%r11,%r11,%r9
   5702 	adcq	%r11,%r15
   5703 	adcq	$0,%r9
   5704 	imulq	%r12,%rdx
   5705 	addq	%rax,%r15
   5706 	adcq	%rdx,%r9
   5707 	movq	%r13,%r10
   5708 	movq	%r14,%r11
   5709 	movq	%r15,%r12
   5710 	andq	$3,%r12
   5711 	movq	%r15,%r13
   5712 	andq	$-4,%r13
   5713 	movq	%r9,%r14
   5714 	shrdq	$2,%r9,%r15
   5715 	shrq	$2,%r9
   5716 	addq	%r13,%r10
   5717 	adcq	%r14,%r11
   5718 	adcq	$0,%r12
   5719 	addq	%r15,%r10
   5720 	adcq	%r9,%r11
   5721 	adcq	$0,%r12
   5722 
   5723 	leaq	16(%r8),%r8
   5724 	subq	$16,%rcx
   5725 	jmp	1b
   5726 1:
   5727 	vpaddd	.chacha20_consts(%rip),%ymm3,%ymm3
   5728 	vpaddd	64(%rbp),%ymm7,%ymm7
   5729 	vpaddd	96(%rbp),%ymm11,%ymm11
   5730 	vpaddd	256(%rbp),%ymm15,%ymm15
   5731 	vpaddd	.chacha20_consts(%rip),%ymm2,%ymm2
   5732 	vpaddd	64(%rbp),%ymm6,%ymm6
   5733 	vpaddd	96(%rbp),%ymm10,%ymm10
   5734 	vpaddd	224(%rbp),%ymm14,%ymm14
   5735 	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
   5736 	vpaddd	64(%rbp),%ymm5,%ymm5
   5737 	vpaddd	96(%rbp),%ymm9,%ymm9
   5738 	vpaddd	192(%rbp),%ymm13,%ymm13
   5739 	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
   5740 	vpaddd	64(%rbp),%ymm4,%ymm4
   5741 	vpaddd	96(%rbp),%ymm8,%ymm8
   5742 	vpaddd	160(%rbp),%ymm12,%ymm12
   5743 
   5744 	vmovdqa	%ymm0,128(%rbp)
   5745 	vperm2i128	$0x02,%ymm3,%ymm7,%ymm0
   5746 	vperm2i128	$0x13,%ymm3,%ymm7,%ymm7
   5747 	vperm2i128	$0x02,%ymm11,%ymm15,%ymm3
   5748 	vperm2i128	$0x13,%ymm11,%ymm15,%ymm11
   5749 	vpxor	0+0(%rsi),%ymm0,%ymm0
   5750 	vpxor	32+0(%rsi),%ymm3,%ymm3
   5751 	vpxor	64+0(%rsi),%ymm7,%ymm7
   5752 	vpxor	96+0(%rsi),%ymm11,%ymm11
   5753 	vmovdqu	%ymm0,0+0(%rdi)
   5754 	vmovdqu	%ymm3,32+0(%rdi)
   5755 	vmovdqu	%ymm7,64+0(%rdi)
   5756 	vmovdqu	%ymm11,96+0(%rdi)
   5757 
   5758 	vmovdqa	128(%rbp),%ymm0
   5759 	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
   5760 	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
   5761 	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
   5762 	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
   5763 	vpxor	0+128(%rsi),%ymm3,%ymm3
   5764 	vpxor	32+128(%rsi),%ymm2,%ymm2
   5765 	vpxor	64+128(%rsi),%ymm6,%ymm6
   5766 	vpxor	96+128(%rsi),%ymm10,%ymm10
   5767 	vmovdqu	%ymm3,0+128(%rdi)
   5768 	vmovdqu	%ymm2,32+128(%rdi)
   5769 	vmovdqu	%ymm6,64+128(%rdi)
   5770 	vmovdqu	%ymm10,96+128(%rdi)
   5771 	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
   5772 	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
   5773 	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
   5774 	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
   5775 	vpxor	0+256(%rsi),%ymm3,%ymm3
   5776 	vpxor	32+256(%rsi),%ymm1,%ymm1
   5777 	vpxor	64+256(%rsi),%ymm5,%ymm5
   5778 	vpxor	96+256(%rsi),%ymm9,%ymm9
   5779 	vmovdqu	%ymm3,0+256(%rdi)
   5780 	vmovdqu	%ymm1,32+256(%rdi)
   5781 	vmovdqu	%ymm5,64+256(%rdi)
   5782 	vmovdqu	%ymm9,96+256(%rdi)
   5783 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
   5784 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
   5785 	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
   5786 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
   5787 	vmovdqa	%ymm3,%ymm8
   5788 
   5789 	leaq	384(%rsi),%rsi
   5790 	leaq	384(%rdi),%rdi
   5791 	subq	$384,%rbx
   5792 open_avx2_tail_loop:
   5793 	cmpq	$32,%rbx
   5794 	jb	open_avx2_tail
   5795 	subq	$32,%rbx
   5796 	vpxor	(%rsi),%ymm0,%ymm0
   5797 	vmovdqu	%ymm0,(%rdi)
   5798 	leaq	32(%rsi),%rsi
   5799 	leaq	32(%rdi),%rdi
   5800 	vmovdqa	%ymm4,%ymm0
   5801 	vmovdqa	%ymm8,%ymm4
   5802 	vmovdqa	%ymm12,%ymm8
   5803 	jmp	open_avx2_tail_loop
   5804 open_avx2_tail:
   5805 	cmpq	$16,%rbx
   5806 	vmovdqa	%xmm0,%xmm1
   5807 	jb	1f
   5808 	subq	$16,%rbx
   5809 
   5810 	vpxor	(%rsi),%xmm0,%xmm1
   5811 	vmovdqu	%xmm1,(%rdi)
   5812 	leaq	16(%rsi),%rsi
   5813 	leaq	16(%rdi),%rdi
   5814 	vperm2i128	$0x11,%ymm0,%ymm0,%ymm0
   5815 	vmovdqa	%xmm0,%xmm1
   5816 1:
   5817 	vzeroupper
   5818 	jmp	open_sse_tail_16
   5819 
   5820 open_avx2_192:
   5821 	vmovdqa	%ymm0,%ymm1
   5822 	vmovdqa	%ymm0,%ymm2
   5823 	vmovdqa	%ymm4,%ymm5
   5824 	vmovdqa	%ymm4,%ymm6
   5825 	vmovdqa	%ymm8,%ymm9
   5826 	vmovdqa	%ymm8,%ymm10
   5827 	vpaddd	.avx2_inc(%rip),%ymm12,%ymm13
   5828 	vmovdqa	%ymm12,%ymm11
   5829 	vmovdqa	%ymm13,%ymm15
   5830 	movq	$10,%r10
   5831 1:
   5832 	vpaddd	%ymm4,%ymm0,%ymm0
   5833 	vpxor	%ymm0,%ymm12,%ymm12
   5834 	vpshufb	.rol16(%rip),%ymm12,%ymm12
   5835 	vpaddd	%ymm12,%ymm8,%ymm8
   5836 	vpxor	%ymm8,%ymm4,%ymm4
   5837 	vpsrld	$20,%ymm4,%ymm3
   5838 	vpslld	$12,%ymm4,%ymm4
   5839 	vpxor	%ymm3,%ymm4,%ymm4
   5840 	vpaddd	%ymm4,%ymm0,%ymm0
   5841 	vpxor	%ymm0,%ymm12,%ymm12
   5842 	vpshufb	.rol8(%rip),%ymm12,%ymm12
   5843 	vpaddd	%ymm12,%ymm8,%ymm8
   5844 	vpxor	%ymm8,%ymm4,%ymm4
   5845 	vpslld	$7,%ymm4,%ymm3
   5846 	vpsrld	$25,%ymm4,%ymm4
   5847 	vpxor	%ymm3,%ymm4,%ymm4
   5848 	vpalignr	$12,%ymm12,%ymm12,%ymm12
   5849 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   5850 	vpalignr	$4,%ymm4,%ymm4,%ymm4
   5851 	vpaddd	%ymm5,%ymm1,%ymm1
   5852 	vpxor	%ymm1,%ymm13,%ymm13
   5853 	vpshufb	.rol16(%rip),%ymm13,%ymm13
   5854 	vpaddd	%ymm13,%ymm9,%ymm9
   5855 	vpxor	%ymm9,%ymm5,%ymm5
   5856 	vpsrld	$20,%ymm5,%ymm3
   5857 	vpslld	$12,%ymm5,%ymm5
   5858 	vpxor	%ymm3,%ymm5,%ymm5
   5859 	vpaddd	%ymm5,%ymm1,%ymm1
   5860 	vpxor	%ymm1,%ymm13,%ymm13
   5861 	vpshufb	.rol8(%rip),%ymm13,%ymm13
   5862 	vpaddd	%ymm13,%ymm9,%ymm9
   5863 	vpxor	%ymm9,%ymm5,%ymm5
   5864 	vpslld	$7,%ymm5,%ymm3
   5865 	vpsrld	$25,%ymm5,%ymm5
   5866 	vpxor	%ymm3,%ymm5,%ymm5
   5867 	vpalignr	$12,%ymm13,%ymm13,%ymm13
   5868 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   5869 	vpalignr	$4,%ymm5,%ymm5,%ymm5
   5870 	vpaddd	%ymm4,%ymm0,%ymm0
   5871 	vpxor	%ymm0,%ymm12,%ymm12
   5872 	vpshufb	.rol16(%rip),%ymm12,%ymm12
   5873 	vpaddd	%ymm12,%ymm8,%ymm8
   5874 	vpxor	%ymm8,%ymm4,%ymm4
   5875 	vpsrld	$20,%ymm4,%ymm3
   5876 	vpslld	$12,%ymm4,%ymm4
   5877 	vpxor	%ymm3,%ymm4,%ymm4
   5878 	vpaddd	%ymm4,%ymm0,%ymm0
   5879 	vpxor	%ymm0,%ymm12,%ymm12
   5880 	vpshufb	.rol8(%rip),%ymm12,%ymm12
   5881 	vpaddd	%ymm12,%ymm8,%ymm8
   5882 	vpxor	%ymm8,%ymm4,%ymm4
   5883 	vpslld	$7,%ymm4,%ymm3
   5884 	vpsrld	$25,%ymm4,%ymm4
   5885 	vpxor	%ymm3,%ymm4,%ymm4
   5886 	vpalignr	$4,%ymm12,%ymm12,%ymm12
   5887 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   5888 	vpalignr	$12,%ymm4,%ymm4,%ymm4
   5889 	vpaddd	%ymm5,%ymm1,%ymm1
   5890 	vpxor	%ymm1,%ymm13,%ymm13
   5891 	vpshufb	.rol16(%rip),%ymm13,%ymm13
   5892 	vpaddd	%ymm13,%ymm9,%ymm9
   5893 	vpxor	%ymm9,%ymm5,%ymm5
   5894 	vpsrld	$20,%ymm5,%ymm3
   5895 	vpslld	$12,%ymm5,%ymm5
   5896 	vpxor	%ymm3,%ymm5,%ymm5
   5897 	vpaddd	%ymm5,%ymm1,%ymm1
   5898 	vpxor	%ymm1,%ymm13,%ymm13
   5899 	vpshufb	.rol8(%rip),%ymm13,%ymm13
   5900 	vpaddd	%ymm13,%ymm9,%ymm9
   5901 	vpxor	%ymm9,%ymm5,%ymm5
   5902 	vpslld	$7,%ymm5,%ymm3
   5903 	vpsrld	$25,%ymm5,%ymm5
   5904 	vpxor	%ymm3,%ymm5,%ymm5
   5905 	vpalignr	$4,%ymm13,%ymm13,%ymm13
   5906 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   5907 	vpalignr	$12,%ymm5,%ymm5,%ymm5
   5908 
   5909 	decq	%r10
   5910 	jne	1b
   5911 	vpaddd	%ymm2,%ymm0,%ymm0
   5912 	vpaddd	%ymm2,%ymm1,%ymm1
   5913 	vpaddd	%ymm6,%ymm4,%ymm4
   5914 	vpaddd	%ymm6,%ymm5,%ymm5
   5915 	vpaddd	%ymm10,%ymm8,%ymm8
   5916 	vpaddd	%ymm10,%ymm9,%ymm9
   5917 	vpaddd	%ymm11,%ymm12,%ymm12
   5918 	vpaddd	%ymm15,%ymm13,%ymm13
   5919 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
   5920 
   5921 	vpand	.clamp(%rip),%ymm3,%ymm3
   5922 	vmovdqa	%ymm3,0(%rbp)
   5923 
   5924 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0
   5925 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4
   5926 	vperm2i128	$0x02,%ymm1,%ymm5,%ymm8
   5927 	vperm2i128	$0x02,%ymm9,%ymm13,%ymm12
   5928 	vperm2i128	$0x13,%ymm1,%ymm5,%ymm1
   5929 	vperm2i128	$0x13,%ymm9,%ymm13,%ymm5
   5930 open_avx2_short:
   5931 	movq	%r8,%r8
   5932 	call	poly_hash_ad_internal
   5933 open_avx2_hash_and_xor_loop:
   5934 	cmpq	$32,%rbx
   5935 	jb	open_avx2_short_tail_32
   5936 	subq	$32,%rbx
   5937 	addq	0(%rsi),%r10
   5938 	adcq	8+0(%rsi),%r11
   5939 	adcq	$1,%r12
   5940 	movq	0+0(%rbp),%rax
   5941 	movq	%rax,%r15
   5942 	mulq	%r10
   5943 	movq	%rax,%r13
   5944 	movq	%rdx,%r14
   5945 	movq	0+0(%rbp),%rax
   5946 	mulq	%r11
   5947 	imulq	%r12,%r15
   5948 	addq	%rax,%r14
   5949 	adcq	%rdx,%r15
   5950 	movq	8+0(%rbp),%rax
   5951 	movq	%rax,%r9
   5952 	mulq	%r10
   5953 	addq	%rax,%r14
   5954 	adcq	$0,%rdx
   5955 	movq	%rdx,%r10
   5956 	movq	8+0(%rbp),%rax
   5957 	mulq	%r11
   5958 	addq	%rax,%r15
   5959 	adcq	$0,%rdx
   5960 	imulq	%r12,%r9
   5961 	addq	%r10,%r15
   5962 	adcq	%rdx,%r9
   5963 	movq	%r13,%r10
   5964 	movq	%r14,%r11
   5965 	movq	%r15,%r12
   5966 	andq	$3,%r12
   5967 	movq	%r15,%r13
   5968 	andq	$-4,%r13
   5969 	movq	%r9,%r14
   5970 	shrdq	$2,%r9,%r15
   5971 	shrq	$2,%r9
   5972 	addq	%r13,%r10
   5973 	adcq	%r14,%r11
   5974 	adcq	$0,%r12
   5975 	addq	%r15,%r10
   5976 	adcq	%r9,%r11
   5977 	adcq	$0,%r12
   5978 	addq	16(%rsi),%r10
   5979 	adcq	8+16(%rsi),%r11
   5980 	adcq	$1,%r12
   5981 	movq	0+0(%rbp),%rax
   5982 	movq	%rax,%r15
   5983 	mulq	%r10
   5984 	movq	%rax,%r13
   5985 	movq	%rdx,%r14
   5986 	movq	0+0(%rbp),%rax
   5987 	mulq	%r11
   5988 	imulq	%r12,%r15
   5989 	addq	%rax,%r14
   5990 	adcq	%rdx,%r15
   5991 	movq	8+0(%rbp),%rax
   5992 	movq	%rax,%r9
   5993 	mulq	%r10
   5994 	addq	%rax,%r14
   5995 	adcq	$0,%rdx
   5996 	movq	%rdx,%r10
   5997 	movq	8+0(%rbp),%rax
   5998 	mulq	%r11
   5999 	addq	%rax,%r15
   6000 	adcq	$0,%rdx
   6001 	imulq	%r12,%r9
   6002 	addq	%r10,%r15
   6003 	adcq	%rdx,%r9
   6004 	movq	%r13,%r10
   6005 	movq	%r14,%r11
   6006 	movq	%r15,%r12
   6007 	andq	$3,%r12
   6008 	movq	%r15,%r13
   6009 	andq	$-4,%r13
   6010 	movq	%r9,%r14
   6011 	shrdq	$2,%r9,%r15
   6012 	shrq	$2,%r9
   6013 	addq	%r13,%r10
   6014 	adcq	%r14,%r11
   6015 	adcq	$0,%r12
   6016 	addq	%r15,%r10
   6017 	adcq	%r9,%r11
   6018 	adcq	$0,%r12
   6019 
   6020 
   6021 	vpxor	(%rsi),%ymm0,%ymm0
   6022 	vmovdqu	%ymm0,(%rdi)
   6023 	leaq	32(%rsi),%rsi
   6024 	leaq	32(%rdi),%rdi
   6025 
   6026 	vmovdqa	%ymm4,%ymm0
   6027 	vmovdqa	%ymm8,%ymm4
   6028 	vmovdqa	%ymm12,%ymm8
   6029 	vmovdqa	%ymm1,%ymm12
   6030 	vmovdqa	%ymm5,%ymm1
   6031 	vmovdqa	%ymm9,%ymm5
   6032 	vmovdqa	%ymm13,%ymm9
   6033 	vmovdqa	%ymm2,%ymm13
   6034 	vmovdqa	%ymm6,%ymm2
   6035 	jmp	open_avx2_hash_and_xor_loop
   6036 open_avx2_short_tail_32:
   6037 	cmpq	$16,%rbx
   6038 	vmovdqa	%xmm0,%xmm1
   6039 	jb	1f
   6040 	subq	$16,%rbx
   6041 	addq	0(%rsi),%r10
   6042 	adcq	8+0(%rsi),%r11
   6043 	adcq	$1,%r12
   6044 	movq	0+0(%rbp),%rax
   6045 	movq	%rax,%r15
   6046 	mulq	%r10
   6047 	movq	%rax,%r13
   6048 	movq	%rdx,%r14
   6049 	movq	0+0(%rbp),%rax
   6050 	mulq	%r11
   6051 	imulq	%r12,%r15
   6052 	addq	%rax,%r14
   6053 	adcq	%rdx,%r15
   6054 	movq	8+0(%rbp),%rax
   6055 	movq	%rax,%r9
   6056 	mulq	%r10
   6057 	addq	%rax,%r14
   6058 	adcq	$0,%rdx
   6059 	movq	%rdx,%r10
   6060 	movq	8+0(%rbp),%rax
   6061 	mulq	%r11
   6062 	addq	%rax,%r15
   6063 	adcq	$0,%rdx
   6064 	imulq	%r12,%r9
   6065 	addq	%r10,%r15
   6066 	adcq	%rdx,%r9
   6067 	movq	%r13,%r10
   6068 	movq	%r14,%r11
   6069 	movq	%r15,%r12
   6070 	andq	$3,%r12
   6071 	movq	%r15,%r13
   6072 	andq	$-4,%r13
   6073 	movq	%r9,%r14
   6074 	shrdq	$2,%r9,%r15
   6075 	shrq	$2,%r9
   6076 	addq	%r13,%r10
   6077 	adcq	%r14,%r11
   6078 	adcq	$0,%r12
   6079 	addq	%r15,%r10
   6080 	adcq	%r9,%r11
   6081 	adcq	$0,%r12
   6082 
   6083 	vpxor	(%rsi),%xmm0,%xmm3
   6084 	vmovdqu	%xmm3,(%rdi)
   6085 	leaq	16(%rsi),%rsi
   6086 	leaq	16(%rdi),%rdi
   6087 	vextracti128	$1,%ymm0,%xmm1
   6088 1:
   6089 	vzeroupper
   6090 	jmp	open_sse_tail_16
   6091 
   6092 open_avx2_320:
   6093 	vmovdqa	%ymm0,%ymm1
   6094 	vmovdqa	%ymm0,%ymm2
   6095 	vmovdqa	%ymm4,%ymm5
   6096 	vmovdqa	%ymm4,%ymm6
   6097 	vmovdqa	%ymm8,%ymm9
   6098 	vmovdqa	%ymm8,%ymm10
   6099 	vpaddd	.avx2_inc(%rip),%ymm12,%ymm13
   6100 	vpaddd	.avx2_inc(%rip),%ymm13,%ymm14
   6101 	vmovdqa	%ymm4,%ymm7
   6102 	vmovdqa	%ymm8,%ymm11
   6103 	vmovdqa	%ymm12,160(%rbp)
   6104 	vmovdqa	%ymm13,192(%rbp)
   6105 	vmovdqa	%ymm14,224(%rbp)
   6106 	movq	$10,%r10
   6107 1:
   6108 	vpaddd	%ymm4,%ymm0,%ymm0
   6109 	vpxor	%ymm0,%ymm12,%ymm12
   6110 	vpshufb	.rol16(%rip),%ymm12,%ymm12
   6111 	vpaddd	%ymm12,%ymm8,%ymm8
   6112 	vpxor	%ymm8,%ymm4,%ymm4
   6113 	vpsrld	$20,%ymm4,%ymm3
   6114 	vpslld	$12,%ymm4,%ymm4
   6115 	vpxor	%ymm3,%ymm4,%ymm4
   6116 	vpaddd	%ymm4,%ymm0,%ymm0
   6117 	vpxor	%ymm0,%ymm12,%ymm12
   6118 	vpshufb	.rol8(%rip),%ymm12,%ymm12
   6119 	vpaddd	%ymm12,%ymm8,%ymm8
   6120 	vpxor	%ymm8,%ymm4,%ymm4
   6121 	vpslld	$7,%ymm4,%ymm3
   6122 	vpsrld	$25,%ymm4,%ymm4
   6123 	vpxor	%ymm3,%ymm4,%ymm4
   6124 	vpalignr	$12,%ymm12,%ymm12,%ymm12
   6125 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   6126 	vpalignr	$4,%ymm4,%ymm4,%ymm4
   6127 	vpaddd	%ymm5,%ymm1,%ymm1
   6128 	vpxor	%ymm1,%ymm13,%ymm13
   6129 	vpshufb	.rol16(%rip),%ymm13,%ymm13
   6130 	vpaddd	%ymm13,%ymm9,%ymm9
   6131 	vpxor	%ymm9,%ymm5,%ymm5
   6132 	vpsrld	$20,%ymm5,%ymm3
   6133 	vpslld	$12,%ymm5,%ymm5
   6134 	vpxor	%ymm3,%ymm5,%ymm5
   6135 	vpaddd	%ymm5,%ymm1,%ymm1
   6136 	vpxor	%ymm1,%ymm13,%ymm13
   6137 	vpshufb	.rol8(%rip),%ymm13,%ymm13
   6138 	vpaddd	%ymm13,%ymm9,%ymm9
   6139 	vpxor	%ymm9,%ymm5,%ymm5
   6140 	vpslld	$7,%ymm5,%ymm3
   6141 	vpsrld	$25,%ymm5,%ymm5
   6142 	vpxor	%ymm3,%ymm5,%ymm5
   6143 	vpalignr	$12,%ymm13,%ymm13,%ymm13
   6144 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   6145 	vpalignr	$4,%ymm5,%ymm5,%ymm5
   6146 	vpaddd	%ymm6,%ymm2,%ymm2
   6147 	vpxor	%ymm2,%ymm14,%ymm14
   6148 	vpshufb	.rol16(%rip),%ymm14,%ymm14
   6149 	vpaddd	%ymm14,%ymm10,%ymm10
   6150 	vpxor	%ymm10,%ymm6,%ymm6
   6151 	vpsrld	$20,%ymm6,%ymm3
   6152 	vpslld	$12,%ymm6,%ymm6
   6153 	vpxor	%ymm3,%ymm6,%ymm6
   6154 	vpaddd	%ymm6,%ymm2,%ymm2
   6155 	vpxor	%ymm2,%ymm14,%ymm14
   6156 	vpshufb	.rol8(%rip),%ymm14,%ymm14
   6157 	vpaddd	%ymm14,%ymm10,%ymm10
   6158 	vpxor	%ymm10,%ymm6,%ymm6
   6159 	vpslld	$7,%ymm6,%ymm3
   6160 	vpsrld	$25,%ymm6,%ymm6
   6161 	vpxor	%ymm3,%ymm6,%ymm6
   6162 	vpalignr	$12,%ymm14,%ymm14,%ymm14
   6163 	vpalignr	$8,%ymm10,%ymm10,%ymm10
   6164 	vpalignr	$4,%ymm6,%ymm6,%ymm6
   6165 	vpaddd	%ymm4,%ymm0,%ymm0
   6166 	vpxor	%ymm0,%ymm12,%ymm12
   6167 	vpshufb	.rol16(%rip),%ymm12,%ymm12
   6168 	vpaddd	%ymm12,%ymm8,%ymm8
   6169 	vpxor	%ymm8,%ymm4,%ymm4
   6170 	vpsrld	$20,%ymm4,%ymm3
   6171 	vpslld	$12,%ymm4,%ymm4
   6172 	vpxor	%ymm3,%ymm4,%ymm4
   6173 	vpaddd	%ymm4,%ymm0,%ymm0
   6174 	vpxor	%ymm0,%ymm12,%ymm12
   6175 	vpshufb	.rol8(%rip),%ymm12,%ymm12
   6176 	vpaddd	%ymm12,%ymm8,%ymm8
   6177 	vpxor	%ymm8,%ymm4,%ymm4
   6178 	vpslld	$7,%ymm4,%ymm3
   6179 	vpsrld	$25,%ymm4,%ymm4
   6180 	vpxor	%ymm3,%ymm4,%ymm4
   6181 	vpalignr	$4,%ymm12,%ymm12,%ymm12
   6182 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   6183 	vpalignr	$12,%ymm4,%ymm4,%ymm4
   6184 	vpaddd	%ymm5,%ymm1,%ymm1
   6185 	vpxor	%ymm1,%ymm13,%ymm13
   6186 	vpshufb	.rol16(%rip),%ymm13,%ymm13
   6187 	vpaddd	%ymm13,%ymm9,%ymm9
   6188 	vpxor	%ymm9,%ymm5,%ymm5
   6189 	vpsrld	$20,%ymm5,%ymm3
   6190 	vpslld	$12,%ymm5,%ymm5
   6191 	vpxor	%ymm3,%ymm5,%ymm5
   6192 	vpaddd	%ymm5,%ymm1,%ymm1
   6193 	vpxor	%ymm1,%ymm13,%ymm13
   6194 	vpshufb	.rol8(%rip),%ymm13,%ymm13
   6195 	vpaddd	%ymm13,%ymm9,%ymm9
   6196 	vpxor	%ymm9,%ymm5,%ymm5
   6197 	vpslld	$7,%ymm5,%ymm3
   6198 	vpsrld	$25,%ymm5,%ymm5
   6199 	vpxor	%ymm3,%ymm5,%ymm5
   6200 	vpalignr	$4,%ymm13,%ymm13,%ymm13
   6201 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   6202 	vpalignr	$12,%ymm5,%ymm5,%ymm5
   6203 	vpaddd	%ymm6,%ymm2,%ymm2
   6204 	vpxor	%ymm2,%ymm14,%ymm14
   6205 	vpshufb	.rol16(%rip),%ymm14,%ymm14
   6206 	vpaddd	%ymm14,%ymm10,%ymm10
   6207 	vpxor	%ymm10,%ymm6,%ymm6
   6208 	vpsrld	$20,%ymm6,%ymm3
   6209 	vpslld	$12,%ymm6,%ymm6
   6210 	vpxor	%ymm3,%ymm6,%ymm6
   6211 	vpaddd	%ymm6,%ymm2,%ymm2
   6212 	vpxor	%ymm2,%ymm14,%ymm14
   6213 	vpshufb	.rol8(%rip),%ymm14,%ymm14
   6214 	vpaddd	%ymm14,%ymm10,%ymm10
   6215 	vpxor	%ymm10,%ymm6,%ymm6
   6216 	vpslld	$7,%ymm6,%ymm3
   6217 	vpsrld	$25,%ymm6,%ymm6
   6218 	vpxor	%ymm3,%ymm6,%ymm6
   6219 	vpalignr	$4,%ymm14,%ymm14,%ymm14
   6220 	vpalignr	$8,%ymm10,%ymm10,%ymm10
   6221 	vpalignr	$12,%ymm6,%ymm6,%ymm6
   6222 
   6223 	decq	%r10
   6224 	jne	1b
   6225 	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
   6226 	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
   6227 	vpaddd	.chacha20_consts(%rip),%ymm2,%ymm2
   6228 	vpaddd	%ymm7,%ymm4,%ymm4
   6229 	vpaddd	%ymm7,%ymm5,%ymm5
   6230 	vpaddd	%ymm7,%ymm6,%ymm6
   6231 	vpaddd	%ymm11,%ymm8,%ymm8
   6232 	vpaddd	%ymm11,%ymm9,%ymm9
   6233 	vpaddd	%ymm11,%ymm10,%ymm10
   6234 	vpaddd	160(%rbp),%ymm12,%ymm12
   6235 	vpaddd	192(%rbp),%ymm13,%ymm13
   6236 	vpaddd	224(%rbp),%ymm14,%ymm14
   6237 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
   6238 
   6239 	vpand	.clamp(%rip),%ymm3,%ymm3
   6240 	vmovdqa	%ymm3,0(%rbp)
   6241 
   6242 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0
   6243 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4
   6244 	vperm2i128	$0x02,%ymm1,%ymm5,%ymm8
   6245 	vperm2i128	$0x02,%ymm9,%ymm13,%ymm12
   6246 	vperm2i128	$0x13,%ymm1,%ymm5,%ymm1
   6247 	vperm2i128	$0x13,%ymm9,%ymm13,%ymm5
   6248 	vperm2i128	$0x02,%ymm2,%ymm6,%ymm9
   6249 	vperm2i128	$0x02,%ymm10,%ymm14,%ymm13
   6250 	vperm2i128	$0x13,%ymm2,%ymm6,%ymm2
   6251 	vperm2i128	$0x13,%ymm10,%ymm14,%ymm6
   6252 	jmp	open_avx2_short
   6253 
   6254 
   6255 
   6256 
   6257 .p2align	6
   6258 chacha20_poly1305_seal_avx2:
   6259 	vzeroupper
   6260 	vmovdqa	.chacha20_consts(%rip),%ymm0
   6261 	vbroadcasti128	0(%r9),%ymm4
   6262 	vbroadcasti128	16(%r9),%ymm8
   6263 	vbroadcasti128	32(%r9),%ymm12
   6264 	vpaddd	.avx2_init(%rip),%ymm12,%ymm12
   6265 	cmpq	$192,%rbx
   6266 	jbe	seal_avx2_192
   6267 	cmpq	$320,%rbx
   6268 	jbe	seal_avx2_320
   6269 	vmovdqa	%ymm0,%ymm1
   6270 	vmovdqa	%ymm0,%ymm2
   6271 	vmovdqa	%ymm0,%ymm3
   6272 	vmovdqa	%ymm4,%ymm5
   6273 	vmovdqa	%ymm4,%ymm6
   6274 	vmovdqa	%ymm4,%ymm7
   6275 	vmovdqa	%ymm4,64(%rbp)
   6276 	vmovdqa	%ymm8,%ymm9
   6277 	vmovdqa	%ymm8,%ymm10
   6278 	vmovdqa	%ymm8,%ymm11
   6279 	vmovdqa	%ymm8,96(%rbp)
   6280 	vmovdqa	%ymm12,%ymm15
   6281 	vpaddd	.avx2_inc(%rip),%ymm15,%ymm14
   6282 	vpaddd	.avx2_inc(%rip),%ymm14,%ymm13
   6283 	vpaddd	.avx2_inc(%rip),%ymm13,%ymm12
   6284 	vmovdqa	%ymm12,160(%rbp)
   6285 	vmovdqa	%ymm13,192(%rbp)
   6286 	vmovdqa	%ymm14,224(%rbp)
   6287 	vmovdqa	%ymm15,256(%rbp)
   6288 	movq	$10,%r10
   6289 1:
   6290 	vmovdqa	%ymm8,128(%rbp)
   6291 	vmovdqa	.rol16(%rip),%ymm8
   6292 	vpaddd	%ymm7,%ymm3,%ymm3
   6293 	vpaddd	%ymm6,%ymm2,%ymm2
   6294 	vpaddd	%ymm5,%ymm1,%ymm1
   6295 	vpaddd	%ymm4,%ymm0,%ymm0
   6296 	vpxor	%ymm3,%ymm15,%ymm15
   6297 	vpxor	%ymm2,%ymm14,%ymm14
   6298 	vpxor	%ymm1,%ymm13,%ymm13
   6299 	vpxor	%ymm0,%ymm12,%ymm12
   6300 	vpshufb	%ymm8,%ymm15,%ymm15
   6301 	vpshufb	%ymm8,%ymm14,%ymm14
   6302 	vpshufb	%ymm8,%ymm13,%ymm13
   6303 	vpshufb	%ymm8,%ymm12,%ymm12
   6304 	vmovdqa	128(%rbp),%ymm8
   6305 	vpaddd	%ymm15,%ymm11,%ymm11
   6306 	vpaddd	%ymm14,%ymm10,%ymm10
   6307 	vpaddd	%ymm13,%ymm9,%ymm9
   6308 	vpaddd	%ymm12,%ymm8,%ymm8
   6309 	vpxor	%ymm11,%ymm7,%ymm7
   6310 	vpxor	%ymm10,%ymm6,%ymm6
   6311 	vpxor	%ymm9,%ymm5,%ymm5
   6312 	vpxor	%ymm8,%ymm4,%ymm4
   6313 	vmovdqa	%ymm8,128(%rbp)
   6314 	vpsrld	$20,%ymm7,%ymm8
   6315 	vpslld	$32-20,%ymm7,%ymm7
   6316 	vpxor	%ymm8,%ymm7,%ymm7
   6317 	vpsrld	$20,%ymm6,%ymm8
   6318 	vpslld	$32-20,%ymm6,%ymm6
   6319 	vpxor	%ymm8,%ymm6,%ymm6
   6320 	vpsrld	$20,%ymm5,%ymm8
   6321 	vpslld	$32-20,%ymm5,%ymm5
   6322 	vpxor	%ymm8,%ymm5,%ymm5
   6323 	vpsrld	$20,%ymm4,%ymm8
   6324 	vpslld	$32-20,%ymm4,%ymm4
   6325 	vpxor	%ymm8,%ymm4,%ymm4
   6326 	vmovdqa	.rol8(%rip),%ymm8
   6327 	vpaddd	%ymm7,%ymm3,%ymm3
   6328 	vpaddd	%ymm6,%ymm2,%ymm2
   6329 	vpaddd	%ymm5,%ymm1,%ymm1
   6330 	vpaddd	%ymm4,%ymm0,%ymm0
   6331 	vpxor	%ymm3,%ymm15,%ymm15
   6332 	vpxor	%ymm2,%ymm14,%ymm14
   6333 	vpxor	%ymm1,%ymm13,%ymm13
   6334 	vpxor	%ymm0,%ymm12,%ymm12
   6335 	vpshufb	%ymm8,%ymm15,%ymm15
   6336 	vpshufb	%ymm8,%ymm14,%ymm14
   6337 	vpshufb	%ymm8,%ymm13,%ymm13
   6338 	vpshufb	%ymm8,%ymm12,%ymm12
   6339 	vmovdqa	128(%rbp),%ymm8
   6340 	vpaddd	%ymm15,%ymm11,%ymm11
   6341 	vpaddd	%ymm14,%ymm10,%ymm10
   6342 	vpaddd	%ymm13,%ymm9,%ymm9
   6343 	vpaddd	%ymm12,%ymm8,%ymm8
   6344 	vpxor	%ymm11,%ymm7,%ymm7
   6345 	vpxor	%ymm10,%ymm6,%ymm6
   6346 	vpxor	%ymm9,%ymm5,%ymm5
   6347 	vpxor	%ymm8,%ymm4,%ymm4
   6348 	vmovdqa	%ymm8,128(%rbp)
   6349 	vpsrld	$25,%ymm7,%ymm8
   6350 	vpslld	$32-25,%ymm7,%ymm7
   6351 	vpxor	%ymm8,%ymm7,%ymm7
   6352 	vpsrld	$25,%ymm6,%ymm8
   6353 	vpslld	$32-25,%ymm6,%ymm6
   6354 	vpxor	%ymm8,%ymm6,%ymm6
   6355 	vpsrld	$25,%ymm5,%ymm8
   6356 	vpslld	$32-25,%ymm5,%ymm5
   6357 	vpxor	%ymm8,%ymm5,%ymm5
   6358 	vpsrld	$25,%ymm4,%ymm8
   6359 	vpslld	$32-25,%ymm4,%ymm4
   6360 	vpxor	%ymm8,%ymm4,%ymm4
   6361 	vmovdqa	128(%rbp),%ymm8
   6362 	vpalignr	$4,%ymm7,%ymm7,%ymm7
   6363 	vpalignr	$8,%ymm11,%ymm11,%ymm11
   6364 	vpalignr	$12,%ymm15,%ymm15,%ymm15
   6365 	vpalignr	$4,%ymm6,%ymm6,%ymm6
   6366 	vpalignr	$8,%ymm10,%ymm10,%ymm10
   6367 	vpalignr	$12,%ymm14,%ymm14,%ymm14
   6368 	vpalignr	$4,%ymm5,%ymm5,%ymm5
   6369 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   6370 	vpalignr	$12,%ymm13,%ymm13,%ymm13
   6371 	vpalignr	$4,%ymm4,%ymm4,%ymm4
   6372 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   6373 	vpalignr	$12,%ymm12,%ymm12,%ymm12
   6374 	vmovdqa	%ymm8,128(%rbp)
   6375 	vmovdqa	.rol16(%rip),%ymm8
   6376 	vpaddd	%ymm7,%ymm3,%ymm3
   6377 	vpaddd	%ymm6,%ymm2,%ymm2
   6378 	vpaddd	%ymm5,%ymm1,%ymm1
   6379 	vpaddd	%ymm4,%ymm0,%ymm0
   6380 	vpxor	%ymm3,%ymm15,%ymm15
   6381 	vpxor	%ymm2,%ymm14,%ymm14
   6382 	vpxor	%ymm1,%ymm13,%ymm13
   6383 	vpxor	%ymm0,%ymm12,%ymm12
   6384 	vpshufb	%ymm8,%ymm15,%ymm15
   6385 	vpshufb	%ymm8,%ymm14,%ymm14
   6386 	vpshufb	%ymm8,%ymm13,%ymm13
   6387 	vpshufb	%ymm8,%ymm12,%ymm12
   6388 	vmovdqa	128(%rbp),%ymm8
   6389 	vpaddd	%ymm15,%ymm11,%ymm11
   6390 	vpaddd	%ymm14,%ymm10,%ymm10
   6391 	vpaddd	%ymm13,%ymm9,%ymm9
   6392 	vpaddd	%ymm12,%ymm8,%ymm8
   6393 	vpxor	%ymm11,%ymm7,%ymm7
   6394 	vpxor	%ymm10,%ymm6,%ymm6
   6395 	vpxor	%ymm9,%ymm5,%ymm5
   6396 	vpxor	%ymm8,%ymm4,%ymm4
   6397 	vmovdqa	%ymm8,128(%rbp)
   6398 	vpsrld	$20,%ymm7,%ymm8
   6399 	vpslld	$32-20,%ymm7,%ymm7
   6400 	vpxor	%ymm8,%ymm7,%ymm7
   6401 	vpsrld	$20,%ymm6,%ymm8
   6402 	vpslld	$32-20,%ymm6,%ymm6
   6403 	vpxor	%ymm8,%ymm6,%ymm6
   6404 	vpsrld	$20,%ymm5,%ymm8
   6405 	vpslld	$32-20,%ymm5,%ymm5
   6406 	vpxor	%ymm8,%ymm5,%ymm5
   6407 	vpsrld	$20,%ymm4,%ymm8
   6408 	vpslld	$32-20,%ymm4,%ymm4
   6409 	vpxor	%ymm8,%ymm4,%ymm4
   6410 	vmovdqa	.rol8(%rip),%ymm8
   6411 	vpaddd	%ymm7,%ymm3,%ymm3
   6412 	vpaddd	%ymm6,%ymm2,%ymm2
   6413 	vpaddd	%ymm5,%ymm1,%ymm1
   6414 	vpaddd	%ymm4,%ymm0,%ymm0
   6415 	vpxor	%ymm3,%ymm15,%ymm15
   6416 	vpxor	%ymm2,%ymm14,%ymm14
   6417 	vpxor	%ymm1,%ymm13,%ymm13
   6418 	vpxor	%ymm0,%ymm12,%ymm12
   6419 	vpshufb	%ymm8,%ymm15,%ymm15
   6420 	vpshufb	%ymm8,%ymm14,%ymm14
   6421 	vpshufb	%ymm8,%ymm13,%ymm13
   6422 	vpshufb	%ymm8,%ymm12,%ymm12
   6423 	vmovdqa	128(%rbp),%ymm8
   6424 	vpaddd	%ymm15,%ymm11,%ymm11
   6425 	vpaddd	%ymm14,%ymm10,%ymm10
   6426 	vpaddd	%ymm13,%ymm9,%ymm9
   6427 	vpaddd	%ymm12,%ymm8,%ymm8
   6428 	vpxor	%ymm11,%ymm7,%ymm7
   6429 	vpxor	%ymm10,%ymm6,%ymm6
   6430 	vpxor	%ymm9,%ymm5,%ymm5
   6431 	vpxor	%ymm8,%ymm4,%ymm4
   6432 	vmovdqa	%ymm8,128(%rbp)
   6433 	vpsrld	$25,%ymm7,%ymm8
   6434 	vpslld	$32-25,%ymm7,%ymm7
   6435 	vpxor	%ymm8,%ymm7,%ymm7
   6436 	vpsrld	$25,%ymm6,%ymm8
   6437 	vpslld	$32-25,%ymm6,%ymm6
   6438 	vpxor	%ymm8,%ymm6,%ymm6
   6439 	vpsrld	$25,%ymm5,%ymm8
   6440 	vpslld	$32-25,%ymm5,%ymm5
   6441 	vpxor	%ymm8,%ymm5,%ymm5
   6442 	vpsrld	$25,%ymm4,%ymm8
   6443 	vpslld	$32-25,%ymm4,%ymm4
   6444 	vpxor	%ymm8,%ymm4,%ymm4
   6445 	vmovdqa	128(%rbp),%ymm8
   6446 	vpalignr	$12,%ymm7,%ymm7,%ymm7
   6447 	vpalignr	$8,%ymm11,%ymm11,%ymm11
   6448 	vpalignr	$4,%ymm15,%ymm15,%ymm15
   6449 	vpalignr	$12,%ymm6,%ymm6,%ymm6
   6450 	vpalignr	$8,%ymm10,%ymm10,%ymm10
   6451 	vpalignr	$4,%ymm14,%ymm14,%ymm14
   6452 	vpalignr	$12,%ymm5,%ymm5,%ymm5
   6453 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   6454 	vpalignr	$4,%ymm13,%ymm13,%ymm13
   6455 	vpalignr	$12,%ymm4,%ymm4,%ymm4
   6456 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   6457 	vpalignr	$4,%ymm12,%ymm12,%ymm12
   6458 
   6459 	decq	%r10
   6460 	jnz	1b
   6461 	vpaddd	.chacha20_consts(%rip),%ymm3,%ymm3
   6462 	vpaddd	64(%rbp),%ymm7,%ymm7
   6463 	vpaddd	96(%rbp),%ymm11,%ymm11
   6464 	vpaddd	256(%rbp),%ymm15,%ymm15
   6465 	vpaddd	.chacha20_consts(%rip),%ymm2,%ymm2
   6466 	vpaddd	64(%rbp),%ymm6,%ymm6
   6467 	vpaddd	96(%rbp),%ymm10,%ymm10
   6468 	vpaddd	224(%rbp),%ymm14,%ymm14
   6469 	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
   6470 	vpaddd	64(%rbp),%ymm5,%ymm5
   6471 	vpaddd	96(%rbp),%ymm9,%ymm9
   6472 	vpaddd	192(%rbp),%ymm13,%ymm13
   6473 	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
   6474 	vpaddd	64(%rbp),%ymm4,%ymm4
   6475 	vpaddd	96(%rbp),%ymm8,%ymm8
   6476 	vpaddd	160(%rbp),%ymm12,%ymm12
   6477 
   6478 	vperm2i128	$0x13,%ymm11,%ymm15,%ymm11
   6479 	vperm2i128	$0x02,%ymm3,%ymm7,%ymm15
   6480 	vperm2i128	$0x13,%ymm3,%ymm7,%ymm3
   6481 	vpand	.clamp(%rip),%ymm15,%ymm15
   6482 	vmovdqa	%ymm15,0(%rbp)
   6483 	movq	%r8,%r8
   6484 	call	poly_hash_ad_internal
   6485 
   6486 	vpxor	0(%rsi),%ymm3,%ymm3
   6487 	vpxor	32(%rsi),%ymm11,%ymm11
   6488 	vmovdqu	%ymm3,0(%rdi)
   6489 	vmovdqu	%ymm11,32(%rdi)
   6490 	vperm2i128	$0x02,%ymm2,%ymm6,%ymm15
   6491 	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
   6492 	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
   6493 	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
   6494 	vpxor	0+64(%rsi),%ymm15,%ymm15
   6495 	vpxor	32+64(%rsi),%ymm2,%ymm2
   6496 	vpxor	64+64(%rsi),%ymm6,%ymm6
   6497 	vpxor	96+64(%rsi),%ymm10,%ymm10
   6498 	vmovdqu	%ymm15,0+64(%rdi)
   6499 	vmovdqu	%ymm2,32+64(%rdi)
   6500 	vmovdqu	%ymm6,64+64(%rdi)
   6501 	vmovdqu	%ymm10,96+64(%rdi)
   6502 	vperm2i128	$0x02,%ymm1,%ymm5,%ymm15
   6503 	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
   6504 	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
   6505 	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
   6506 	vpxor	0+192(%rsi),%ymm15,%ymm15
   6507 	vpxor	32+192(%rsi),%ymm1,%ymm1
   6508 	vpxor	64+192(%rsi),%ymm5,%ymm5
   6509 	vpxor	96+192(%rsi),%ymm9,%ymm9
   6510 	vmovdqu	%ymm15,0+192(%rdi)
   6511 	vmovdqu	%ymm1,32+192(%rdi)
   6512 	vmovdqu	%ymm5,64+192(%rdi)
   6513 	vmovdqu	%ymm9,96+192(%rdi)
   6514 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm15
   6515 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
   6516 	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
   6517 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
   6518 	vmovdqa	%ymm15,%ymm8
   6519 
   6520 	leaq	320(%rsi),%rsi
   6521 	subq	$320,%rbx
   6522 	movq	$320,%rcx
   6523 	cmpq	$128,%rbx
   6524 	jbe	seal_avx2_hash
   6525 	vpxor	0(%rsi),%ymm0,%ymm0
   6526 	vpxor	32(%rsi),%ymm4,%ymm4
   6527 	vpxor	64(%rsi),%ymm8,%ymm8
   6528 	vpxor	96(%rsi),%ymm12,%ymm12
   6529 	vmovdqu	%ymm0,320(%rdi)
   6530 	vmovdqu	%ymm4,352(%rdi)
   6531 	vmovdqu	%ymm8,384(%rdi)
   6532 	vmovdqu	%ymm12,416(%rdi)
   6533 	leaq	128(%rsi),%rsi
   6534 	subq	$128,%rbx
   6535 	movq	$8,%rcx
   6536 	movq	$2,%r8
   6537 	cmpq	$128,%rbx
   6538 	jbe	seal_avx2_tail_128
   6539 	cmpq	$256,%rbx
   6540 	jbe	seal_avx2_tail_256
   6541 	cmpq	$384,%rbx
   6542 	jbe	seal_avx2_tail_384
   6543 	cmpq	$512,%rbx
   6544 	jbe	seal_avx2_tail_512
   6545 	vmovdqa	.chacha20_consts(%rip),%ymm0
   6546 	vmovdqa	64(%rbp),%ymm4
   6547 	vmovdqa	96(%rbp),%ymm8
   6548 	vmovdqa	%ymm0,%ymm1
   6549 	vmovdqa	%ymm4,%ymm5
   6550 	vmovdqa	%ymm8,%ymm9
   6551 	vmovdqa	%ymm0,%ymm2
   6552 	vmovdqa	%ymm4,%ymm6
   6553 	vmovdqa	%ymm8,%ymm10
   6554 	vmovdqa	%ymm0,%ymm3
   6555 	vmovdqa	%ymm4,%ymm7
   6556 	vmovdqa	%ymm8,%ymm11
   6557 	vmovdqa	.avx2_inc(%rip),%ymm12
   6558 	vpaddd	160(%rbp),%ymm12,%ymm15
   6559 	vpaddd	%ymm15,%ymm12,%ymm14
   6560 	vpaddd	%ymm14,%ymm12,%ymm13
   6561 	vpaddd	%ymm13,%ymm12,%ymm12
   6562 	vmovdqa	%ymm15,256(%rbp)
   6563 	vmovdqa	%ymm14,224(%rbp)
   6564 	vmovdqa	%ymm13,192(%rbp)
   6565 	vmovdqa	%ymm12,160(%rbp)
   6566 	vmovdqa	%ymm8,128(%rbp)
   6567 	vmovdqa	.rol16(%rip),%ymm8
   6568 	vpaddd	%ymm7,%ymm3,%ymm3
   6569 	vpaddd	%ymm6,%ymm2,%ymm2
   6570 	vpaddd	%ymm5,%ymm1,%ymm1
   6571 	vpaddd	%ymm4,%ymm0,%ymm0
   6572 	vpxor	%ymm3,%ymm15,%ymm15
   6573 	vpxor	%ymm2,%ymm14,%ymm14
   6574 	vpxor	%ymm1,%ymm13,%ymm13
   6575 	vpxor	%ymm0,%ymm12,%ymm12
   6576 	vpshufb	%ymm8,%ymm15,%ymm15
   6577 	vpshufb	%ymm8,%ymm14,%ymm14
   6578 	vpshufb	%ymm8,%ymm13,%ymm13
   6579 	vpshufb	%ymm8,%ymm12,%ymm12
   6580 	vmovdqa	128(%rbp),%ymm8
   6581 	vpaddd	%ymm15,%ymm11,%ymm11
   6582 	vpaddd	%ymm14,%ymm10,%ymm10
   6583 	vpaddd	%ymm13,%ymm9,%ymm9
   6584 	vpaddd	%ymm12,%ymm8,%ymm8
   6585 	vpxor	%ymm11,%ymm7,%ymm7
   6586 	vpxor	%ymm10,%ymm6,%ymm6
   6587 	vpxor	%ymm9,%ymm5,%ymm5
   6588 	vpxor	%ymm8,%ymm4,%ymm4
   6589 	vmovdqa	%ymm8,128(%rbp)
   6590 	vpsrld	$20,%ymm7,%ymm8
   6591 	vpslld	$32-20,%ymm7,%ymm7
   6592 	vpxor	%ymm8,%ymm7,%ymm7
   6593 	vpsrld	$20,%ymm6,%ymm8
   6594 	vpslld	$32-20,%ymm6,%ymm6
   6595 	vpxor	%ymm8,%ymm6,%ymm6
   6596 	vpsrld	$20,%ymm5,%ymm8
   6597 	vpslld	$32-20,%ymm5,%ymm5
   6598 	vpxor	%ymm8,%ymm5,%ymm5
   6599 	vpsrld	$20,%ymm4,%ymm8
   6600 	vpslld	$32-20,%ymm4,%ymm4
   6601 	vpxor	%ymm8,%ymm4,%ymm4
   6602 	vmovdqa	.rol8(%rip),%ymm8
   6603 	vpaddd	%ymm7,%ymm3,%ymm3
   6604 	vpaddd	%ymm6,%ymm2,%ymm2
   6605 	vpaddd	%ymm5,%ymm1,%ymm1
   6606 	vpaddd	%ymm4,%ymm0,%ymm0
   6607 	vpxor	%ymm3,%ymm15,%ymm15
   6608 	vpxor	%ymm2,%ymm14,%ymm14
   6609 	vpxor	%ymm1,%ymm13,%ymm13
   6610 	vpxor	%ymm0,%ymm12,%ymm12
   6611 	vpshufb	%ymm8,%ymm15,%ymm15
   6612 	vpshufb	%ymm8,%ymm14,%ymm14
   6613 	vpshufb	%ymm8,%ymm13,%ymm13
   6614 	vpshufb	%ymm8,%ymm12,%ymm12
   6615 	vmovdqa	128(%rbp),%ymm8
   6616 	vpaddd	%ymm15,%ymm11,%ymm11
   6617 	vpaddd	%ymm14,%ymm10,%ymm10
   6618 	vpaddd	%ymm13,%ymm9,%ymm9
   6619 	vpaddd	%ymm12,%ymm8,%ymm8
   6620 	vpxor	%ymm11,%ymm7,%ymm7
   6621 	vpxor	%ymm10,%ymm6,%ymm6
   6622 	vpxor	%ymm9,%ymm5,%ymm5
   6623 	vpxor	%ymm8,%ymm4,%ymm4
   6624 	vmovdqa	%ymm8,128(%rbp)
   6625 	vpsrld	$25,%ymm7,%ymm8
   6626 	vpslld	$32-25,%ymm7,%ymm7
   6627 	vpxor	%ymm8,%ymm7,%ymm7
   6628 	vpsrld	$25,%ymm6,%ymm8
   6629 	vpslld	$32-25,%ymm6,%ymm6
   6630 	vpxor	%ymm8,%ymm6,%ymm6
   6631 	vpsrld	$25,%ymm5,%ymm8
   6632 	vpslld	$32-25,%ymm5,%ymm5
   6633 	vpxor	%ymm8,%ymm5,%ymm5
   6634 	vpsrld	$25,%ymm4,%ymm8
   6635 	vpslld	$32-25,%ymm4,%ymm4
   6636 	vpxor	%ymm8,%ymm4,%ymm4
   6637 	vmovdqa	128(%rbp),%ymm8
   6638 	vpalignr	$4,%ymm7,%ymm7,%ymm7
   6639 	vpalignr	$8,%ymm11,%ymm11,%ymm11
   6640 	vpalignr	$12,%ymm15,%ymm15,%ymm15
   6641 	vpalignr	$4,%ymm6,%ymm6,%ymm6
   6642 	vpalignr	$8,%ymm10,%ymm10,%ymm10
   6643 	vpalignr	$12,%ymm14,%ymm14,%ymm14
   6644 	vpalignr	$4,%ymm5,%ymm5,%ymm5
   6645 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   6646 	vpalignr	$12,%ymm13,%ymm13,%ymm13
   6647 	vpalignr	$4,%ymm4,%ymm4,%ymm4
   6648 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   6649 	vpalignr	$12,%ymm12,%ymm12,%ymm12
   6650 	vmovdqa	%ymm8,128(%rbp)
   6651 	vmovdqa	.rol16(%rip),%ymm8
   6652 	vpaddd	%ymm7,%ymm3,%ymm3
   6653 	vpaddd	%ymm6,%ymm2,%ymm2
   6654 	vpaddd	%ymm5,%ymm1,%ymm1
   6655 	vpaddd	%ymm4,%ymm0,%ymm0
   6656 	vpxor	%ymm3,%ymm15,%ymm15
   6657 	vpxor	%ymm2,%ymm14,%ymm14
   6658 	vpxor	%ymm1,%ymm13,%ymm13
   6659 	vpxor	%ymm0,%ymm12,%ymm12
   6660 	vpshufb	%ymm8,%ymm15,%ymm15
   6661 	vpshufb	%ymm8,%ymm14,%ymm14
   6662 	vpshufb	%ymm8,%ymm13,%ymm13
   6663 	vpshufb	%ymm8,%ymm12,%ymm12
   6664 	vmovdqa	128(%rbp),%ymm8
   6665 	vpaddd	%ymm15,%ymm11,%ymm11
   6666 	vpaddd	%ymm14,%ymm10,%ymm10
   6667 	vpaddd	%ymm13,%ymm9,%ymm9
   6668 	vpaddd	%ymm12,%ymm8,%ymm8
   6669 	vpxor	%ymm11,%ymm7,%ymm7
   6670 	vpxor	%ymm10,%ymm6,%ymm6
   6671 	vpxor	%ymm9,%ymm5,%ymm5
   6672 	vpxor	%ymm8,%ymm4,%ymm4
   6673 	vmovdqa	%ymm8,128(%rbp)
   6674 	vpsrld	$20,%ymm7,%ymm8
   6675 	vpslld	$32-20,%ymm7,%ymm7
   6676 	vpxor	%ymm8,%ymm7,%ymm7
   6677 	vpsrld	$20,%ymm6,%ymm8
   6678 	vpslld	$32-20,%ymm6,%ymm6
   6679 	vpxor	%ymm8,%ymm6,%ymm6
   6680 	vpsrld	$20,%ymm5,%ymm8
   6681 	vpslld	$32-20,%ymm5,%ymm5
   6682 	vpxor	%ymm8,%ymm5,%ymm5
   6683 	vpsrld	$20,%ymm4,%ymm8
   6684 	vpslld	$32-20,%ymm4,%ymm4
   6685 	vpxor	%ymm8,%ymm4,%ymm4
   6686 	vmovdqa	.rol8(%rip),%ymm8
   6687 	vpaddd	%ymm7,%ymm3,%ymm3
   6688 	vpaddd	%ymm6,%ymm2,%ymm2
   6689 	vpaddd	%ymm5,%ymm1,%ymm1
   6690 	vpaddd	%ymm4,%ymm0,%ymm0
   6691 	vpxor	%ymm3,%ymm15,%ymm15
   6692 	vpxor	%ymm2,%ymm14,%ymm14
   6693 	vpxor	%ymm1,%ymm13,%ymm13
   6694 	vpxor	%ymm0,%ymm12,%ymm12
   6695 	vpshufb	%ymm8,%ymm15,%ymm15
   6696 	vpshufb	%ymm8,%ymm14,%ymm14
   6697 	vpshufb	%ymm8,%ymm13,%ymm13
   6698 	vpshufb	%ymm8,%ymm12,%ymm12
   6699 	vmovdqa	128(%rbp),%ymm8
   6700 	vpaddd	%ymm15,%ymm11,%ymm11
   6701 	vpaddd	%ymm14,%ymm10,%ymm10
   6702 	vpaddd	%ymm13,%ymm9,%ymm9
   6703 	vpaddd	%ymm12,%ymm8,%ymm8
   6704 	vpxor	%ymm11,%ymm7,%ymm7
   6705 	vpxor	%ymm10,%ymm6,%ymm6
   6706 	vpxor	%ymm9,%ymm5,%ymm5
   6707 	vpxor	%ymm8,%ymm4,%ymm4
   6708 	vmovdqa	%ymm8,128(%rbp)
   6709 	vpsrld	$25,%ymm7,%ymm8
   6710 	vpslld	$32-25,%ymm7,%ymm7
   6711 	vpxor	%ymm8,%ymm7,%ymm7
   6712 	vpsrld	$25,%ymm6,%ymm8
   6713 	vpslld	$32-25,%ymm6,%ymm6
   6714 	vpxor	%ymm8,%ymm6,%ymm6
   6715 	vpsrld	$25,%ymm5,%ymm8
   6716 	vpslld	$32-25,%ymm5,%ymm5
   6717 	vpxor	%ymm8,%ymm5,%ymm5
   6718 	vpsrld	$25,%ymm4,%ymm8
   6719 	vpslld	$32-25,%ymm4,%ymm4
   6720 	vpxor	%ymm8,%ymm4,%ymm4
   6721 	vmovdqa	128(%rbp),%ymm8
   6722 	vpalignr	$12,%ymm7,%ymm7,%ymm7
   6723 	vpalignr	$8,%ymm11,%ymm11,%ymm11
   6724 	vpalignr	$4,%ymm15,%ymm15,%ymm15
   6725 	vpalignr	$12,%ymm6,%ymm6,%ymm6
   6726 	vpalignr	$8,%ymm10,%ymm10,%ymm10
   6727 	vpalignr	$4,%ymm14,%ymm14,%ymm14
   6728 	vpalignr	$12,%ymm5,%ymm5,%ymm5
   6729 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   6730 	vpalignr	$4,%ymm13,%ymm13,%ymm13
   6731 	vpalignr	$12,%ymm4,%ymm4,%ymm4
   6732 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   6733 	vpalignr	$4,%ymm12,%ymm12,%ymm12
   6734 	vmovdqa	%ymm8,128(%rbp)
   6735 	vmovdqa	.rol16(%rip),%ymm8
   6736 	vpaddd	%ymm7,%ymm3,%ymm3
   6737 	vpaddd	%ymm6,%ymm2,%ymm2
   6738 	vpaddd	%ymm5,%ymm1,%ymm1
   6739 	vpaddd	%ymm4,%ymm0,%ymm0
   6740 	vpxor	%ymm3,%ymm15,%ymm15
   6741 	vpxor	%ymm2,%ymm14,%ymm14
   6742 	vpxor	%ymm1,%ymm13,%ymm13
   6743 	vpxor	%ymm0,%ymm12,%ymm12
   6744 	vpshufb	%ymm8,%ymm15,%ymm15
   6745 	vpshufb	%ymm8,%ymm14,%ymm14
   6746 	vpshufb	%ymm8,%ymm13,%ymm13
   6747 	vpshufb	%ymm8,%ymm12,%ymm12
   6748 	vmovdqa	128(%rbp),%ymm8
   6749 	vpaddd	%ymm15,%ymm11,%ymm11
   6750 	vpaddd	%ymm14,%ymm10,%ymm10
   6751 	vpaddd	%ymm13,%ymm9,%ymm9
   6752 	vpaddd	%ymm12,%ymm8,%ymm8
   6753 	vpxor	%ymm11,%ymm7,%ymm7
   6754 	vpxor	%ymm10,%ymm6,%ymm6
   6755 	vpxor	%ymm9,%ymm5,%ymm5
   6756 	vpxor	%ymm8,%ymm4,%ymm4
   6757 	vmovdqa	%ymm8,128(%rbp)
   6758 	vpsrld	$20,%ymm7,%ymm8
   6759 	vpslld	$32-20,%ymm7,%ymm7
   6760 	vpxor	%ymm8,%ymm7,%ymm7
   6761 	vpsrld	$20,%ymm6,%ymm8
   6762 	vpslld	$32-20,%ymm6,%ymm6
   6763 	vpxor	%ymm8,%ymm6,%ymm6
   6764 	vpsrld	$20,%ymm5,%ymm8
   6765 	vpslld	$32-20,%ymm5,%ymm5
   6766 	vpxor	%ymm8,%ymm5,%ymm5
   6767 	vpsrld	$20,%ymm4,%ymm8
   6768 	vpslld	$32-20,%ymm4,%ymm4
   6769 	vpxor	%ymm8,%ymm4,%ymm4
   6770 	vmovdqa	.rol8(%rip),%ymm8
   6771 	vpaddd	%ymm7,%ymm3,%ymm3
   6772 	vpaddd	%ymm6,%ymm2,%ymm2
   6773 	vpaddd	%ymm5,%ymm1,%ymm1
   6774 	vpaddd	%ymm4,%ymm0,%ymm0
   6775 
   6776 	subq	$16,%rdi
   6777 	movq	$9,%rcx
   6778 	jmp	4f
   6779 1:
   6780 	vmovdqa	.chacha20_consts(%rip),%ymm0
   6781 	vmovdqa	64(%rbp),%ymm4
   6782 	vmovdqa	96(%rbp),%ymm8
   6783 	vmovdqa	%ymm0,%ymm1
   6784 	vmovdqa	%ymm4,%ymm5
   6785 	vmovdqa	%ymm8,%ymm9
   6786 	vmovdqa	%ymm0,%ymm2
   6787 	vmovdqa	%ymm4,%ymm6
   6788 	vmovdqa	%ymm8,%ymm10
   6789 	vmovdqa	%ymm0,%ymm3
   6790 	vmovdqa	%ymm4,%ymm7
   6791 	vmovdqa	%ymm8,%ymm11
   6792 	vmovdqa	.avx2_inc(%rip),%ymm12
   6793 	vpaddd	160(%rbp),%ymm12,%ymm15
   6794 	vpaddd	%ymm15,%ymm12,%ymm14
   6795 	vpaddd	%ymm14,%ymm12,%ymm13
   6796 	vpaddd	%ymm13,%ymm12,%ymm12
   6797 	vmovdqa	%ymm15,256(%rbp)
   6798 	vmovdqa	%ymm14,224(%rbp)
   6799 	vmovdqa	%ymm13,192(%rbp)
   6800 	vmovdqa	%ymm12,160(%rbp)
   6801 
   6802 	movq	$10,%rcx
   6803 2:
   6804 	addq	0(%rdi),%r10
   6805 	adcq	8+0(%rdi),%r11
   6806 	adcq	$1,%r12
   6807 	vmovdqa	%ymm8,128(%rbp)
   6808 	vmovdqa	.rol16(%rip),%ymm8
   6809 	vpaddd	%ymm7,%ymm3,%ymm3
   6810 	vpaddd	%ymm6,%ymm2,%ymm2
   6811 	vpaddd	%ymm5,%ymm1,%ymm1
   6812 	vpaddd	%ymm4,%ymm0,%ymm0
   6813 	vpxor	%ymm3,%ymm15,%ymm15
   6814 	vpxor	%ymm2,%ymm14,%ymm14
   6815 	vpxor	%ymm1,%ymm13,%ymm13
   6816 	vpxor	%ymm0,%ymm12,%ymm12
   6817 	movq	0+0(%rbp),%rdx
   6818 	movq	%rdx,%r15
   6819 	mulxq	%r10,%r13,%r14
   6820 	mulxq	%r11,%rax,%rdx
   6821 	imulq	%r12,%r15
   6822 	addq	%rax,%r14
   6823 	adcq	%rdx,%r15
   6824 	vpshufb	%ymm8,%ymm15,%ymm15
   6825 	vpshufb	%ymm8,%ymm14,%ymm14
   6826 	vpshufb	%ymm8,%ymm13,%ymm13
   6827 	vpshufb	%ymm8,%ymm12,%ymm12
   6828 	vmovdqa	128(%rbp),%ymm8
   6829 	vpaddd	%ymm15,%ymm11,%ymm11
   6830 	vpaddd	%ymm14,%ymm10,%ymm10
   6831 	vpaddd	%ymm13,%ymm9,%ymm9
   6832 	vpaddd	%ymm12,%ymm8,%ymm8
   6833 	movq	8+0(%rbp),%rdx
   6834 	mulxq	%r10,%r10,%rax
   6835 	addq	%r10,%r14
   6836 	mulxq	%r11,%r11,%r9
   6837 	adcq	%r11,%r15
   6838 	adcq	$0,%r9
   6839 	imulq	%r12,%rdx
   6840 	vpxor	%ymm11,%ymm7,%ymm7
   6841 	vpxor	%ymm10,%ymm6,%ymm6
   6842 	vpxor	%ymm9,%ymm5,%ymm5
   6843 	vpxor	%ymm8,%ymm4,%ymm4
   6844 	vmovdqa	%ymm8,128(%rbp)
   6845 	vpsrld	$20,%ymm7,%ymm8
   6846 	vpslld	$32-20,%ymm7,%ymm7
   6847 	vpxor	%ymm8,%ymm7,%ymm7
   6848 	vpsrld	$20,%ymm6,%ymm8
   6849 	vpslld	$32-20,%ymm6,%ymm6
   6850 	vpxor	%ymm8,%ymm6,%ymm6
   6851 	vpsrld	$20,%ymm5,%ymm8
   6852 	addq	%rax,%r15
   6853 	adcq	%rdx,%r9
   6854 	vpslld	$32-20,%ymm5,%ymm5
   6855 	vpxor	%ymm8,%ymm5,%ymm5
   6856 	vpsrld	$20,%ymm4,%ymm8
   6857 	vpslld	$32-20,%ymm4,%ymm4
   6858 	vpxor	%ymm8,%ymm4,%ymm4
   6859 	vmovdqa	.rol8(%rip),%ymm8
   6860 	vpaddd	%ymm7,%ymm3,%ymm3
   6861 	vpaddd	%ymm6,%ymm2,%ymm2
   6862 	vpaddd	%ymm5,%ymm1,%ymm1
   6863 	vpaddd	%ymm4,%ymm0,%ymm0
   6864 	movq	%r13,%r10
   6865 	movq	%r14,%r11
   6866 	movq	%r15,%r12
   6867 	andq	$3,%r12
   6868 	movq	%r15,%r13
   6869 	andq	$-4,%r13
   6870 	movq	%r9,%r14
   6871 	shrdq	$2,%r9,%r15
   6872 	shrq	$2,%r9
   6873 	addq	%r13,%r10
   6874 	adcq	%r14,%r11
   6875 	adcq	$0,%r12
   6876 	addq	%r15,%r10
   6877 	adcq	%r9,%r11
   6878 	adcq	$0,%r12
   6879 
   6880 4:
   6881 	vpxor	%ymm3,%ymm15,%ymm15
   6882 	vpxor	%ymm2,%ymm14,%ymm14
   6883 	vpxor	%ymm1,%ymm13,%ymm13
   6884 	vpxor	%ymm0,%ymm12,%ymm12
   6885 	vpshufb	%ymm8,%ymm15,%ymm15
   6886 	vpshufb	%ymm8,%ymm14,%ymm14
   6887 	vpshufb	%ymm8,%ymm13,%ymm13
   6888 	vpshufb	%ymm8,%ymm12,%ymm12
   6889 	vmovdqa	128(%rbp),%ymm8
   6890 	addq	16(%rdi),%r10
   6891 	adcq	8+16(%rdi),%r11
   6892 	adcq	$1,%r12
   6893 	vpaddd	%ymm15,%ymm11,%ymm11
   6894 	vpaddd	%ymm14,%ymm10,%ymm10
   6895 	vpaddd	%ymm13,%ymm9,%ymm9
   6896 	vpaddd	%ymm12,%ymm8,%ymm8
   6897 	vpxor	%ymm11,%ymm7,%ymm7
   6898 	vpxor	%ymm10,%ymm6,%ymm6
   6899 	vpxor	%ymm9,%ymm5,%ymm5
   6900 	vpxor	%ymm8,%ymm4,%ymm4
   6901 	movq	0+0(%rbp),%rdx
   6902 	movq	%rdx,%r15
   6903 	mulxq	%r10,%r13,%r14
   6904 	mulxq	%r11,%rax,%rdx
   6905 	imulq	%r12,%r15
   6906 	addq	%rax,%r14
   6907 	adcq	%rdx,%r15
   6908 	vmovdqa	%ymm8,128(%rbp)
   6909 	vpsrld	$25,%ymm7,%ymm8
   6910 	vpslld	$32-25,%ymm7,%ymm7
   6911 	vpxor	%ymm8,%ymm7,%ymm7
   6912 	vpsrld	$25,%ymm6,%ymm8
   6913 	vpslld	$32-25,%ymm6,%ymm6
   6914 	vpxor	%ymm8,%ymm6,%ymm6
   6915 	vpsrld	$25,%ymm5,%ymm8
   6916 	vpslld	$32-25,%ymm5,%ymm5
   6917 	vpxor	%ymm8,%ymm5,%ymm5
   6918 	vpsrld	$25,%ymm4,%ymm8
   6919 	vpslld	$32-25,%ymm4,%ymm4
   6920 	vpxor	%ymm8,%ymm4,%ymm4
   6921 	vmovdqa	128(%rbp),%ymm8
   6922 	vpalignr	$4,%ymm7,%ymm7,%ymm7
   6923 	vpalignr	$8,%ymm11,%ymm11,%ymm11
   6924 	vpalignr	$12,%ymm15,%ymm15,%ymm15
   6925 	vpalignr	$4,%ymm6,%ymm6,%ymm6
   6926 	movq	8+0(%rbp),%rdx
   6927 	mulxq	%r10,%r10,%rax
   6928 	addq	%r10,%r14
   6929 	mulxq	%r11,%r11,%r9
   6930 	adcq	%r11,%r15
   6931 	adcq	$0,%r9
   6932 	imulq	%r12,%rdx
   6933 	vpalignr	$8,%ymm10,%ymm10,%ymm10
   6934 	vpalignr	$12,%ymm14,%ymm14,%ymm14
   6935 	vpalignr	$4,%ymm5,%ymm5,%ymm5
   6936 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   6937 	vpalignr	$12,%ymm13,%ymm13,%ymm13
   6938 	vpalignr	$4,%ymm4,%ymm4,%ymm4
   6939 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   6940 	vpalignr	$12,%ymm12,%ymm12,%ymm12
   6941 	vmovdqa	%ymm8,128(%rbp)
   6942 	vmovdqa	.rol16(%rip),%ymm8
   6943 	vpaddd	%ymm7,%ymm3,%ymm3
   6944 	vpaddd	%ymm6,%ymm2,%ymm2
   6945 	vpaddd	%ymm5,%ymm1,%ymm1
   6946 	vpaddd	%ymm4,%ymm0,%ymm0
   6947 	vpxor	%ymm3,%ymm15,%ymm15
   6948 	vpxor	%ymm2,%ymm14,%ymm14
   6949 	vpxor	%ymm1,%ymm13,%ymm13
   6950 	vpxor	%ymm0,%ymm12,%ymm12
   6951 	addq	%rax,%r15
   6952 	adcq	%rdx,%r9
   6953 	vpshufb	%ymm8,%ymm15,%ymm15
   6954 	vpshufb	%ymm8,%ymm14,%ymm14
   6955 	vpshufb	%ymm8,%ymm13,%ymm13
   6956 	vpshufb	%ymm8,%ymm12,%ymm12
   6957 	vmovdqa	128(%rbp),%ymm8
   6958 	vpaddd	%ymm15,%ymm11,%ymm11
   6959 	vpaddd	%ymm14,%ymm10,%ymm10
   6960 	vpaddd	%ymm13,%ymm9,%ymm9
   6961 	vpaddd	%ymm12,%ymm8,%ymm8
   6962 	movq	%r13,%r10
   6963 	movq	%r14,%r11
   6964 	movq	%r15,%r12
   6965 	andq	$3,%r12
   6966 	movq	%r15,%r13
   6967 	andq	$-4,%r13
   6968 	movq	%r9,%r14
   6969 	shrdq	$2,%r9,%r15
   6970 	shrq	$2,%r9
   6971 	addq	%r13,%r10
   6972 	adcq	%r14,%r11
   6973 	adcq	$0,%r12
   6974 	addq	%r15,%r10
   6975 	adcq	%r9,%r11
   6976 	adcq	$0,%r12
   6977 	vpxor	%ymm11,%ymm7,%ymm7
   6978 	vpxor	%ymm10,%ymm6,%ymm6
   6979 	vpxor	%ymm9,%ymm5,%ymm5
   6980 	vpxor	%ymm8,%ymm4,%ymm4
   6981 	vmovdqa	%ymm8,128(%rbp)
   6982 	vpsrld	$20,%ymm7,%ymm8
   6983 	vpslld	$32-20,%ymm7,%ymm7
   6984 	vpxor	%ymm8,%ymm7,%ymm7
   6985 	addq	32(%rdi),%r10
   6986 	adcq	8+32(%rdi),%r11
   6987 	adcq	$1,%r12
   6988 
   6989 	leaq	48(%rdi),%rdi
   6990 	vpsrld	$20,%ymm6,%ymm8
   6991 	vpslld	$32-20,%ymm6,%ymm6
   6992 	vpxor	%ymm8,%ymm6,%ymm6
   6993 	vpsrld	$20,%ymm5,%ymm8
   6994 	vpslld	$32-20,%ymm5,%ymm5
   6995 	vpxor	%ymm8,%ymm5,%ymm5
   6996 	vpsrld	$20,%ymm4,%ymm8
   6997 	vpslld	$32-20,%ymm4,%ymm4
   6998 	vpxor	%ymm8,%ymm4,%ymm4
   6999 	vmovdqa	.rol8(%rip),%ymm8
   7000 	vpaddd	%ymm7,%ymm3,%ymm3
   7001 	vpaddd	%ymm6,%ymm2,%ymm2
   7002 	vpaddd	%ymm5,%ymm1,%ymm1
   7003 	vpaddd	%ymm4,%ymm0,%ymm0
   7004 	vpxor	%ymm3,%ymm15,%ymm15
   7005 	vpxor	%ymm2,%ymm14,%ymm14
   7006 	vpxor	%ymm1,%ymm13,%ymm13
   7007 	vpxor	%ymm0,%ymm12,%ymm12
   7008 	movq	0+0(%rbp),%rdx
   7009 	movq	%rdx,%r15
   7010 	mulxq	%r10,%r13,%r14
   7011 	mulxq	%r11,%rax,%rdx
   7012 	imulq	%r12,%r15
   7013 	addq	%rax,%r14
   7014 	adcq	%rdx,%r15
   7015 	vpshufb	%ymm8,%ymm15,%ymm15
   7016 	vpshufb	%ymm8,%ymm14,%ymm14
   7017 	vpshufb	%ymm8,%ymm13,%ymm13
   7018 	vpshufb	%ymm8,%ymm12,%ymm12
   7019 	vmovdqa	128(%rbp),%ymm8
   7020 	vpaddd	%ymm15,%ymm11,%ymm11
   7021 	vpaddd	%ymm14,%ymm10,%ymm10
   7022 	vpaddd	%ymm13,%ymm9,%ymm9
   7023 	movq	8+0(%rbp),%rdx
   7024 	mulxq	%r10,%r10,%rax
   7025 	addq	%r10,%r14
   7026 	mulxq	%r11,%r11,%r9
   7027 	adcq	%r11,%r15
   7028 	adcq	$0,%r9
   7029 	imulq	%r12,%rdx
   7030 	vpaddd	%ymm12,%ymm8,%ymm8
   7031 	vpxor	%ymm11,%ymm7,%ymm7
   7032 	vpxor	%ymm10,%ymm6,%ymm6
   7033 	vpxor	%ymm9,%ymm5,%ymm5
   7034 	vpxor	%ymm8,%ymm4,%ymm4
   7035 	vmovdqa	%ymm8,128(%rbp)
   7036 	vpsrld	$25,%ymm7,%ymm8
   7037 	vpslld	$32-25,%ymm7,%ymm7
   7038 	addq	%rax,%r15
   7039 	adcq	%rdx,%r9
   7040 	vpxor	%ymm8,%ymm7,%ymm7
   7041 	vpsrld	$25,%ymm6,%ymm8
   7042 	vpslld	$32-25,%ymm6,%ymm6
   7043 	vpxor	%ymm8,%ymm6,%ymm6
   7044 	vpsrld	$25,%ymm5,%ymm8
   7045 	vpslld	$32-25,%ymm5,%ymm5
   7046 	vpxor	%ymm8,%ymm5,%ymm5
   7047 	vpsrld	$25,%ymm4,%ymm8
   7048 	vpslld	$32-25,%ymm4,%ymm4
   7049 	vpxor	%ymm8,%ymm4,%ymm4
   7050 	vmovdqa	128(%rbp),%ymm8
   7051 	vpalignr	$12,%ymm7,%ymm7,%ymm7
   7052 	vpalignr	$8,%ymm11,%ymm11,%ymm11
   7053 	vpalignr	$4,%ymm15,%ymm15,%ymm15
   7054 	vpalignr	$12,%ymm6,%ymm6,%ymm6
   7055 	vpalignr	$8,%ymm10,%ymm10,%ymm10
   7056 	vpalignr	$4,%ymm14,%ymm14,%ymm14
   7057 	vpalignr	$12,%ymm5,%ymm5,%ymm5
   7058 	movq	%r13,%r10
   7059 	movq	%r14,%r11
   7060 	movq	%r15,%r12
   7061 	andq	$3,%r12
   7062 	movq	%r15,%r13
   7063 	andq	$-4,%r13
   7064 	movq	%r9,%r14
   7065 	shrdq	$2,%r9,%r15
   7066 	shrq	$2,%r9
   7067 	addq	%r13,%r10
   7068 	adcq	%r14,%r11
   7069 	adcq	$0,%r12
   7070 	addq	%r15,%r10
   7071 	adcq	%r9,%r11
   7072 	adcq	$0,%r12
   7073 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   7074 	vpalignr	$4,%ymm13,%ymm13,%ymm13
   7075 	vpalignr	$12,%ymm4,%ymm4,%ymm4
   7076 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   7077 	vpalignr	$4,%ymm12,%ymm12,%ymm12
   7078 
   7079 	decq	%rcx
   7080 	jne	2b
   7081 	vpaddd	.chacha20_consts(%rip),%ymm3,%ymm3
   7082 	vpaddd	64(%rbp),%ymm7,%ymm7
   7083 	vpaddd	96(%rbp),%ymm11,%ymm11
   7084 	vpaddd	256(%rbp),%ymm15,%ymm15
   7085 	vpaddd	.chacha20_consts(%rip),%ymm2,%ymm2
   7086 	vpaddd	64(%rbp),%ymm6,%ymm6
   7087 	vpaddd	96(%rbp),%ymm10,%ymm10
   7088 	vpaddd	224(%rbp),%ymm14,%ymm14
   7089 	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
   7090 	vpaddd	64(%rbp),%ymm5,%ymm5
   7091 	vpaddd	96(%rbp),%ymm9,%ymm9
   7092 	vpaddd	192(%rbp),%ymm13,%ymm13
   7093 	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
   7094 	vpaddd	64(%rbp),%ymm4,%ymm4
   7095 	vpaddd	96(%rbp),%ymm8,%ymm8
   7096 	vpaddd	160(%rbp),%ymm12,%ymm12
   7097 
   7098 	leaq	32(%rdi),%rdi
   7099 	vmovdqa	%ymm0,128(%rbp)
   7100 	addq	-32(%rdi),%r10
   7101 	adcq	8+-32(%rdi),%r11
   7102 	adcq	$1,%r12
   7103 	vperm2i128	$0x02,%ymm3,%ymm7,%ymm0
   7104 	vperm2i128	$0x13,%ymm3,%ymm7,%ymm7
   7105 	vperm2i128	$0x02,%ymm11,%ymm15,%ymm3
   7106 	vperm2i128	$0x13,%ymm11,%ymm15,%ymm11
   7107 	vpxor	0+0(%rsi),%ymm0,%ymm0
   7108 	vpxor	32+0(%rsi),%ymm3,%ymm3
   7109 	vpxor	64+0(%rsi),%ymm7,%ymm7
   7110 	vpxor	96+0(%rsi),%ymm11,%ymm11
   7111 	vmovdqu	%ymm0,0+0(%rdi)
   7112 	vmovdqu	%ymm3,32+0(%rdi)
   7113 	vmovdqu	%ymm7,64+0(%rdi)
   7114 	vmovdqu	%ymm11,96+0(%rdi)
   7115 
   7116 	vmovdqa	128(%rbp),%ymm0
   7117 	movq	0+0(%rbp),%rax
   7118 	movq	%rax,%r15
   7119 	mulq	%r10
   7120 	movq	%rax,%r13
   7121 	movq	%rdx,%r14
   7122 	movq	0+0(%rbp),%rax
   7123 	mulq	%r11
   7124 	imulq	%r12,%r15
   7125 	addq	%rax,%r14
   7126 	adcq	%rdx,%r15
   7127 	movq	8+0(%rbp),%rax
   7128 	movq	%rax,%r9
   7129 	mulq	%r10
   7130 	addq	%rax,%r14
   7131 	adcq	$0,%rdx
   7132 	movq	%rdx,%r10
   7133 	movq	8+0(%rbp),%rax
   7134 	mulq	%r11
   7135 	addq	%rax,%r15
   7136 	adcq	$0,%rdx
   7137 	imulq	%r12,%r9
   7138 	addq	%r10,%r15
   7139 	adcq	%rdx,%r9
   7140 	movq	%r13,%r10
   7141 	movq	%r14,%r11
   7142 	movq	%r15,%r12
   7143 	andq	$3,%r12
   7144 	movq	%r15,%r13
   7145 	andq	$-4,%r13
   7146 	movq	%r9,%r14
   7147 	shrdq	$2,%r9,%r15
   7148 	shrq	$2,%r9
   7149 	addq	%r13,%r10
   7150 	adcq	%r14,%r11
   7151 	adcq	$0,%r12
   7152 	addq	%r15,%r10
   7153 	adcq	%r9,%r11
   7154 	adcq	$0,%r12
   7155 	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
   7156 	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
   7157 	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
   7158 	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
   7159 	vpxor	0+128(%rsi),%ymm3,%ymm3
   7160 	vpxor	32+128(%rsi),%ymm2,%ymm2
   7161 	vpxor	64+128(%rsi),%ymm6,%ymm6
   7162 	vpxor	96+128(%rsi),%ymm10,%ymm10
   7163 	vmovdqu	%ymm3,0+128(%rdi)
   7164 	vmovdqu	%ymm2,32+128(%rdi)
   7165 	vmovdqu	%ymm6,64+128(%rdi)
   7166 	vmovdqu	%ymm10,96+128(%rdi)
   7167 	addq	-16(%rdi),%r10
   7168 	adcq	8+-16(%rdi),%r11
   7169 	adcq	$1,%r12
   7170 	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
   7171 	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
   7172 	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
   7173 	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
   7174 	vpxor	0+256(%rsi),%ymm3,%ymm3
   7175 	vpxor	32+256(%rsi),%ymm1,%ymm1
   7176 	vpxor	64+256(%rsi),%ymm5,%ymm5
   7177 	vpxor	96+256(%rsi),%ymm9,%ymm9
   7178 	vmovdqu	%ymm3,0+256(%rdi)
   7179 	vmovdqu	%ymm1,32+256(%rdi)
   7180 	vmovdqu	%ymm5,64+256(%rdi)
   7181 	vmovdqu	%ymm9,96+256(%rdi)
   7182 	movq	0+0(%rbp),%rax
   7183 	movq	%rax,%r15
   7184 	mulq	%r10
   7185 	movq	%rax,%r13
   7186 	movq	%rdx,%r14
   7187 	movq	0+0(%rbp),%rax
   7188 	mulq	%r11
   7189 	imulq	%r12,%r15
   7190 	addq	%rax,%r14
   7191 	adcq	%rdx,%r15
   7192 	movq	8+0(%rbp),%rax
   7193 	movq	%rax,%r9
   7194 	mulq	%r10
   7195 	addq	%rax,%r14
   7196 	adcq	$0,%rdx
   7197 	movq	%rdx,%r10
   7198 	movq	8+0(%rbp),%rax
   7199 	mulq	%r11
   7200 	addq	%rax,%r15
   7201 	adcq	$0,%rdx
   7202 	imulq	%r12,%r9
   7203 	addq	%r10,%r15
   7204 	adcq	%rdx,%r9
   7205 	movq	%r13,%r10
   7206 	movq	%r14,%r11
   7207 	movq	%r15,%r12
   7208 	andq	$3,%r12
   7209 	movq	%r15,%r13
   7210 	andq	$-4,%r13
   7211 	movq	%r9,%r14
   7212 	shrdq	$2,%r9,%r15
   7213 	shrq	$2,%r9
   7214 	addq	%r13,%r10
   7215 	adcq	%r14,%r11
   7216 	adcq	$0,%r12
   7217 	addq	%r15,%r10
   7218 	adcq	%r9,%r11
   7219 	adcq	$0,%r12
   7220 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
   7221 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm4
   7222 	vperm2i128	$0x02,%ymm8,%ymm12,%ymm0
   7223 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm8
   7224 	vpxor	0+384(%rsi),%ymm3,%ymm3
   7225 	vpxor	32+384(%rsi),%ymm0,%ymm0
   7226 	vpxor	64+384(%rsi),%ymm4,%ymm4
   7227 	vpxor	96+384(%rsi),%ymm8,%ymm8
   7228 	vmovdqu	%ymm3,0+384(%rdi)
   7229 	vmovdqu	%ymm0,32+384(%rdi)
   7230 	vmovdqu	%ymm4,64+384(%rdi)
   7231 	vmovdqu	%ymm8,96+384(%rdi)
   7232 
   7233 	leaq	512(%rsi),%rsi
   7234 	subq	$512,%rbx
   7235 	cmpq	$512,%rbx
   7236 	jg	1b
   7237 	addq	0(%rdi),%r10
   7238 	adcq	8+0(%rdi),%r11
   7239 	adcq	$1,%r12
   7240 	movq	0+0(%rbp),%rax
   7241 	movq	%rax,%r15
   7242 	mulq	%r10
   7243 	movq	%rax,%r13
   7244 	movq	%rdx,%r14
   7245 	movq	0+0(%rbp),%rax
   7246 	mulq	%r11
   7247 	imulq	%r12,%r15
   7248 	addq	%rax,%r14
   7249 	adcq	%rdx,%r15
   7250 	movq	8+0(%rbp),%rax
   7251 	movq	%rax,%r9
   7252 	mulq	%r10
   7253 	addq	%rax,%r14
   7254 	adcq	$0,%rdx
   7255 	movq	%rdx,%r10
   7256 	movq	8+0(%rbp),%rax
   7257 	mulq	%r11
   7258 	addq	%rax,%r15
   7259 	adcq	$0,%rdx
   7260 	imulq	%r12,%r9
   7261 	addq	%r10,%r15
   7262 	adcq	%rdx,%r9
   7263 	movq	%r13,%r10
   7264 	movq	%r14,%r11
   7265 	movq	%r15,%r12
   7266 	andq	$3,%r12
   7267 	movq	%r15,%r13
   7268 	andq	$-4,%r13
   7269 	movq	%r9,%r14
   7270 	shrdq	$2,%r9,%r15
   7271 	shrq	$2,%r9
   7272 	addq	%r13,%r10
   7273 	adcq	%r14,%r11
   7274 	adcq	$0,%r12
   7275 	addq	%r15,%r10
   7276 	adcq	%r9,%r11
   7277 	adcq	$0,%r12
   7278 	addq	16(%rdi),%r10
   7279 	adcq	8+16(%rdi),%r11
   7280 	adcq	$1,%r12
   7281 	movq	0+0(%rbp),%rax
   7282 	movq	%rax,%r15
   7283 	mulq	%r10
   7284 	movq	%rax,%r13
   7285 	movq	%rdx,%r14
   7286 	movq	0+0(%rbp),%rax
   7287 	mulq	%r11
   7288 	imulq	%r12,%r15
   7289 	addq	%rax,%r14
   7290 	adcq	%rdx,%r15
   7291 	movq	8+0(%rbp),%rax
   7292 	movq	%rax,%r9
   7293 	mulq	%r10
   7294 	addq	%rax,%r14
   7295 	adcq	$0,%rdx
   7296 	movq	%rdx,%r10
   7297 	movq	8+0(%rbp),%rax
   7298 	mulq	%r11
   7299 	addq	%rax,%r15
   7300 	adcq	$0,%rdx
   7301 	imulq	%r12,%r9
   7302 	addq	%r10,%r15
   7303 	adcq	%rdx,%r9
   7304 	movq	%r13,%r10
   7305 	movq	%r14,%r11
   7306 	movq	%r15,%r12
   7307 	andq	$3,%r12
   7308 	movq	%r15,%r13
   7309 	andq	$-4,%r13
   7310 	movq	%r9,%r14
   7311 	shrdq	$2,%r9,%r15
   7312 	shrq	$2,%r9
   7313 	addq	%r13,%r10
   7314 	adcq	%r14,%r11
   7315 	adcq	$0,%r12
   7316 	addq	%r15,%r10
   7317 	adcq	%r9,%r11
   7318 	adcq	$0,%r12
   7319 
   7320 	leaq	32(%rdi),%rdi
   7321 	movq	$10,%rcx
   7322 	xorq	%r8,%r8
   7323 	cmpq	$128,%rbx
   7324 	ja	3f
   7325 
   7326 seal_avx2_tail_128:
   7327 	vmovdqa	.chacha20_consts(%rip),%ymm0
   7328 	vmovdqa	64(%rbp),%ymm4
   7329 	vmovdqa	96(%rbp),%ymm8
   7330 	vmovdqa	.avx2_inc(%rip),%ymm12
   7331 	vpaddd	160(%rbp),%ymm12,%ymm12
   7332 	vmovdqa	%ymm12,160(%rbp)
   7333 
   7334 1:
   7335 	addq	0(%rdi),%r10
   7336 	adcq	8+0(%rdi),%r11
   7337 	adcq	$1,%r12
   7338 	movq	0+0(%rbp),%rax
   7339 	movq	%rax,%r15
   7340 	mulq	%r10
   7341 	movq	%rax,%r13
   7342 	movq	%rdx,%r14
   7343 	movq	0+0(%rbp),%rax
   7344 	mulq	%r11
   7345 	imulq	%r12,%r15
   7346 	addq	%rax,%r14
   7347 	adcq	%rdx,%r15
   7348 	movq	8+0(%rbp),%rax
   7349 	movq	%rax,%r9
   7350 	mulq	%r10
   7351 	addq	%rax,%r14
   7352 	adcq	$0,%rdx
   7353 	movq	%rdx,%r10
   7354 	movq	8+0(%rbp),%rax
   7355 	mulq	%r11
   7356 	addq	%rax,%r15
   7357 	adcq	$0,%rdx
   7358 	imulq	%r12,%r9
   7359 	addq	%r10,%r15
   7360 	adcq	%rdx,%r9
   7361 	movq	%r13,%r10
   7362 	movq	%r14,%r11
   7363 	movq	%r15,%r12
   7364 	andq	$3,%r12
   7365 	movq	%r15,%r13
   7366 	andq	$-4,%r13
   7367 	movq	%r9,%r14
   7368 	shrdq	$2,%r9,%r15
   7369 	shrq	$2,%r9
   7370 	addq	%r13,%r10
   7371 	adcq	%r14,%r11
   7372 	adcq	$0,%r12
   7373 	addq	%r15,%r10
   7374 	adcq	%r9,%r11
   7375 	adcq	$0,%r12
   7376 
   7377 	leaq	16(%rdi),%rdi
   7378 2:
   7379 	vpaddd	%ymm4,%ymm0,%ymm0
   7380 	vpxor	%ymm0,%ymm12,%ymm12
   7381 	vpshufb	.rol16(%rip),%ymm12,%ymm12
   7382 	vpaddd	%ymm12,%ymm8,%ymm8
   7383 	vpxor	%ymm8,%ymm4,%ymm4
   7384 	vpsrld	$20,%ymm4,%ymm3
   7385 	vpslld	$12,%ymm4,%ymm4
   7386 	vpxor	%ymm3,%ymm4,%ymm4
   7387 	vpaddd	%ymm4,%ymm0,%ymm0
   7388 	vpxor	%ymm0,%ymm12,%ymm12
   7389 	vpshufb	.rol8(%rip),%ymm12,%ymm12
   7390 	vpaddd	%ymm12,%ymm8,%ymm8
   7391 	vpxor	%ymm8,%ymm4,%ymm4
   7392 	vpslld	$7,%ymm4,%ymm3
   7393 	vpsrld	$25,%ymm4,%ymm4
   7394 	vpxor	%ymm3,%ymm4,%ymm4
   7395 	vpalignr	$12,%ymm12,%ymm12,%ymm12
   7396 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   7397 	vpalignr	$4,%ymm4,%ymm4,%ymm4
   7398 	addq	0(%rdi),%r10
   7399 	adcq	8+0(%rdi),%r11
   7400 	adcq	$1,%r12
   7401 	movq	0+0(%rbp),%rax
   7402 	movq	%rax,%r15
   7403 	mulq	%r10
   7404 	movq	%rax,%r13
   7405 	movq	%rdx,%r14
   7406 	movq	0+0(%rbp),%rax
   7407 	mulq	%r11
   7408 	imulq	%r12,%r15
   7409 	addq	%rax,%r14
   7410 	adcq	%rdx,%r15
   7411 	movq	8+0(%rbp),%rax
   7412 	movq	%rax,%r9
   7413 	mulq	%r10
   7414 	addq	%rax,%r14
   7415 	adcq	$0,%rdx
   7416 	movq	%rdx,%r10
   7417 	movq	8+0(%rbp),%rax
   7418 	mulq	%r11
   7419 	addq	%rax,%r15
   7420 	adcq	$0,%rdx
   7421 	imulq	%r12,%r9
   7422 	addq	%r10,%r15
   7423 	adcq	%rdx,%r9
   7424 	movq	%r13,%r10
   7425 	movq	%r14,%r11
   7426 	movq	%r15,%r12
   7427 	andq	$3,%r12
   7428 	movq	%r15,%r13
   7429 	andq	$-4,%r13
   7430 	movq	%r9,%r14
   7431 	shrdq	$2,%r9,%r15
   7432 	shrq	$2,%r9
   7433 	addq	%r13,%r10
   7434 	adcq	%r14,%r11
   7435 	adcq	$0,%r12
   7436 	addq	%r15,%r10
   7437 	adcq	%r9,%r11
   7438 	adcq	$0,%r12
   7439 	vpaddd	%ymm4,%ymm0,%ymm0
   7440 	vpxor	%ymm0,%ymm12,%ymm12
   7441 	vpshufb	.rol16(%rip),%ymm12,%ymm12
   7442 	vpaddd	%ymm12,%ymm8,%ymm8
   7443 	vpxor	%ymm8,%ymm4,%ymm4
   7444 	vpsrld	$20,%ymm4,%ymm3
   7445 	vpslld	$12,%ymm4,%ymm4
   7446 	vpxor	%ymm3,%ymm4,%ymm4
   7447 	vpaddd	%ymm4,%ymm0,%ymm0
   7448 	vpxor	%ymm0,%ymm12,%ymm12
   7449 	vpshufb	.rol8(%rip),%ymm12,%ymm12
   7450 	vpaddd	%ymm12,%ymm8,%ymm8
   7451 	vpxor	%ymm8,%ymm4,%ymm4
   7452 	vpslld	$7,%ymm4,%ymm3
   7453 	vpsrld	$25,%ymm4,%ymm4
   7454 	vpxor	%ymm3,%ymm4,%ymm4
   7455 	vpalignr	$4,%ymm12,%ymm12,%ymm12
   7456 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   7457 	vpalignr	$12,%ymm4,%ymm4,%ymm4
   7458 	addq	16(%rdi),%r10
   7459 	adcq	8+16(%rdi),%r11
   7460 	adcq	$1,%r12
   7461 	movq	0+0(%rbp),%rax
   7462 	movq	%rax,%r15
   7463 	mulq	%r10
   7464 	movq	%rax,%r13
   7465 	movq	%rdx,%r14
   7466 	movq	0+0(%rbp),%rax
   7467 	mulq	%r11
   7468 	imulq	%r12,%r15
   7469 	addq	%rax,%r14
   7470 	adcq	%rdx,%r15
   7471 	movq	8+0(%rbp),%rax
   7472 	movq	%rax,%r9
   7473 	mulq	%r10
   7474 	addq	%rax,%r14
   7475 	adcq	$0,%rdx
   7476 	movq	%rdx,%r10
   7477 	movq	8+0(%rbp),%rax
   7478 	mulq	%r11
   7479 	addq	%rax,%r15
   7480 	adcq	$0,%rdx
   7481 	imulq	%r12,%r9
   7482 	addq	%r10,%r15
   7483 	adcq	%rdx,%r9
   7484 	movq	%r13,%r10
   7485 	movq	%r14,%r11
   7486 	movq	%r15,%r12
   7487 	andq	$3,%r12
   7488 	movq	%r15,%r13
   7489 	andq	$-4,%r13
   7490 	movq	%r9,%r14
   7491 	shrdq	$2,%r9,%r15
   7492 	shrq	$2,%r9
   7493 	addq	%r13,%r10
   7494 	adcq	%r14,%r11
   7495 	adcq	$0,%r12
   7496 	addq	%r15,%r10
   7497 	adcq	%r9,%r11
   7498 	adcq	$0,%r12
   7499 
   7500 	leaq	32(%rdi),%rdi
   7501 	decq	%rcx
   7502 	jg	1b
   7503 	decq	%r8
   7504 	jge	2b
   7505 	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
   7506 	vpaddd	64(%rbp),%ymm4,%ymm4
   7507 	vpaddd	96(%rbp),%ymm8,%ymm8
   7508 	vpaddd	160(%rbp),%ymm12,%ymm12
   7509 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
   7510 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
   7511 	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
   7512 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
   7513 	vmovdqa	%ymm3,%ymm8
   7514 
   7515 	jmp	seal_avx2_short_loop
   7516 3:
   7517 	cmpq	$256,%rbx
   7518 	ja	3f
   7519 
   7520 seal_avx2_tail_256:
   7521 	vmovdqa	.chacha20_consts(%rip),%ymm0
   7522 	vmovdqa	64(%rbp),%ymm4
   7523 	vmovdqa	96(%rbp),%ymm8
   7524 	vmovdqa	%ymm0,%ymm1
   7525 	vmovdqa	%ymm4,%ymm5
   7526 	vmovdqa	%ymm8,%ymm9
   7527 	vmovdqa	.avx2_inc(%rip),%ymm12
   7528 	vpaddd	160(%rbp),%ymm12,%ymm13
   7529 	vpaddd	%ymm13,%ymm12,%ymm12
   7530 	vmovdqa	%ymm12,160(%rbp)
   7531 	vmovdqa	%ymm13,192(%rbp)
   7532 
   7533 1:
   7534 	addq	0(%rdi),%r10
   7535 	adcq	8+0(%rdi),%r11
   7536 	adcq	$1,%r12
   7537 	movq	0+0(%rbp),%rax
   7538 	movq	%rax,%r15
   7539 	mulq	%r10
   7540 	movq	%rax,%r13
   7541 	movq	%rdx,%r14
   7542 	movq	0+0(%rbp),%rax
   7543 	mulq	%r11
   7544 	imulq	%r12,%r15
   7545 	addq	%rax,%r14
   7546 	adcq	%rdx,%r15
   7547 	movq	8+0(%rbp),%rax
   7548 	movq	%rax,%r9
   7549 	mulq	%r10
   7550 	addq	%rax,%r14
   7551 	adcq	$0,%rdx
   7552 	movq	%rdx,%r10
   7553 	movq	8+0(%rbp),%rax
   7554 	mulq	%r11
   7555 	addq	%rax,%r15
   7556 	adcq	$0,%rdx
   7557 	imulq	%r12,%r9
   7558 	addq	%r10,%r15
   7559 	adcq	%rdx,%r9
   7560 	movq	%r13,%r10
   7561 	movq	%r14,%r11
   7562 	movq	%r15,%r12
   7563 	andq	$3,%r12
   7564 	movq	%r15,%r13
   7565 	andq	$-4,%r13
   7566 	movq	%r9,%r14
   7567 	shrdq	$2,%r9,%r15
   7568 	shrq	$2,%r9
   7569 	addq	%r13,%r10
   7570 	adcq	%r14,%r11
   7571 	adcq	$0,%r12
   7572 	addq	%r15,%r10
   7573 	adcq	%r9,%r11
   7574 	adcq	$0,%r12
   7575 
   7576 	leaq	16(%rdi),%rdi
   7577 2:
   7578 	vpaddd	%ymm4,%ymm0,%ymm0
   7579 	vpxor	%ymm0,%ymm12,%ymm12
   7580 	vpshufb	.rol16(%rip),%ymm12,%ymm12
   7581 	vpaddd	%ymm12,%ymm8,%ymm8
   7582 	vpxor	%ymm8,%ymm4,%ymm4
   7583 	vpsrld	$20,%ymm4,%ymm3
   7584 	vpslld	$12,%ymm4,%ymm4
   7585 	vpxor	%ymm3,%ymm4,%ymm4
   7586 	vpaddd	%ymm4,%ymm0,%ymm0
   7587 	vpxor	%ymm0,%ymm12,%ymm12
   7588 	vpshufb	.rol8(%rip),%ymm12,%ymm12
   7589 	vpaddd	%ymm12,%ymm8,%ymm8
   7590 	vpxor	%ymm8,%ymm4,%ymm4
   7591 	vpslld	$7,%ymm4,%ymm3
   7592 	vpsrld	$25,%ymm4,%ymm4
   7593 	vpxor	%ymm3,%ymm4,%ymm4
   7594 	vpalignr	$12,%ymm12,%ymm12,%ymm12
   7595 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   7596 	vpalignr	$4,%ymm4,%ymm4,%ymm4
   7597 	vpaddd	%ymm5,%ymm1,%ymm1
   7598 	vpxor	%ymm1,%ymm13,%ymm13
   7599 	vpshufb	.rol16(%rip),%ymm13,%ymm13
   7600 	vpaddd	%ymm13,%ymm9,%ymm9
   7601 	vpxor	%ymm9,%ymm5,%ymm5
   7602 	vpsrld	$20,%ymm5,%ymm3
   7603 	vpslld	$12,%ymm5,%ymm5
   7604 	vpxor	%ymm3,%ymm5,%ymm5
   7605 	vpaddd	%ymm5,%ymm1,%ymm1
   7606 	vpxor	%ymm1,%ymm13,%ymm13
   7607 	vpshufb	.rol8(%rip),%ymm13,%ymm13
   7608 	vpaddd	%ymm13,%ymm9,%ymm9
   7609 	vpxor	%ymm9,%ymm5,%ymm5
   7610 	vpslld	$7,%ymm5,%ymm3
   7611 	vpsrld	$25,%ymm5,%ymm5
   7612 	vpxor	%ymm3,%ymm5,%ymm5
   7613 	vpalignr	$12,%ymm13,%ymm13,%ymm13
   7614 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   7615 	vpalignr	$4,%ymm5,%ymm5,%ymm5
   7616 	addq	0(%rdi),%r10
   7617 	adcq	8+0(%rdi),%r11
   7618 	adcq	$1,%r12
   7619 	movq	0+0(%rbp),%rax
   7620 	movq	%rax,%r15
   7621 	mulq	%r10
   7622 	movq	%rax,%r13
   7623 	movq	%rdx,%r14
   7624 	movq	0+0(%rbp),%rax
   7625 	mulq	%r11
   7626 	imulq	%r12,%r15
   7627 	addq	%rax,%r14
   7628 	adcq	%rdx,%r15
   7629 	movq	8+0(%rbp),%rax
   7630 	movq	%rax,%r9
   7631 	mulq	%r10
   7632 	addq	%rax,%r14
   7633 	adcq	$0,%rdx
   7634 	movq	%rdx,%r10
   7635 	movq	8+0(%rbp),%rax
   7636 	mulq	%r11
   7637 	addq	%rax,%r15
   7638 	adcq	$0,%rdx
   7639 	imulq	%r12,%r9
   7640 	addq	%r10,%r15
   7641 	adcq	%rdx,%r9
   7642 	movq	%r13,%r10
   7643 	movq	%r14,%r11
   7644 	movq	%r15,%r12
   7645 	andq	$3,%r12
   7646 	movq	%r15,%r13
   7647 	andq	$-4,%r13
   7648 	movq	%r9,%r14
   7649 	shrdq	$2,%r9,%r15
   7650 	shrq	$2,%r9
   7651 	addq	%r13,%r10
   7652 	adcq	%r14,%r11
   7653 	adcq	$0,%r12
   7654 	addq	%r15,%r10
   7655 	adcq	%r9,%r11
   7656 	adcq	$0,%r12
   7657 	vpaddd	%ymm4,%ymm0,%ymm0
   7658 	vpxor	%ymm0,%ymm12,%ymm12
   7659 	vpshufb	.rol16(%rip),%ymm12,%ymm12
   7660 	vpaddd	%ymm12,%ymm8,%ymm8
   7661 	vpxor	%ymm8,%ymm4,%ymm4
   7662 	vpsrld	$20,%ymm4,%ymm3
   7663 	vpslld	$12,%ymm4,%ymm4
   7664 	vpxor	%ymm3,%ymm4,%ymm4
   7665 	vpaddd	%ymm4,%ymm0,%ymm0
   7666 	vpxor	%ymm0,%ymm12,%ymm12
   7667 	vpshufb	.rol8(%rip),%ymm12,%ymm12
   7668 	vpaddd	%ymm12,%ymm8,%ymm8
   7669 	vpxor	%ymm8,%ymm4,%ymm4
   7670 	vpslld	$7,%ymm4,%ymm3
   7671 	vpsrld	$25,%ymm4,%ymm4
   7672 	vpxor	%ymm3,%ymm4,%ymm4
   7673 	vpalignr	$4,%ymm12,%ymm12,%ymm12
   7674 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   7675 	vpalignr	$12,%ymm4,%ymm4,%ymm4
   7676 	vpaddd	%ymm5,%ymm1,%ymm1
   7677 	vpxor	%ymm1,%ymm13,%ymm13
   7678 	vpshufb	.rol16(%rip),%ymm13,%ymm13
   7679 	vpaddd	%ymm13,%ymm9,%ymm9
   7680 	vpxor	%ymm9,%ymm5,%ymm5
   7681 	vpsrld	$20,%ymm5,%ymm3
   7682 	vpslld	$12,%ymm5,%ymm5
   7683 	vpxor	%ymm3,%ymm5,%ymm5
   7684 	vpaddd	%ymm5,%ymm1,%ymm1
   7685 	vpxor	%ymm1,%ymm13,%ymm13
   7686 	vpshufb	.rol8(%rip),%ymm13,%ymm13
   7687 	vpaddd	%ymm13,%ymm9,%ymm9
   7688 	vpxor	%ymm9,%ymm5,%ymm5
   7689 	vpslld	$7,%ymm5,%ymm3
   7690 	vpsrld	$25,%ymm5,%ymm5
   7691 	vpxor	%ymm3,%ymm5,%ymm5
   7692 	vpalignr	$4,%ymm13,%ymm13,%ymm13
   7693 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   7694 	vpalignr	$12,%ymm5,%ymm5,%ymm5
   7695 	addq	16(%rdi),%r10
   7696 	adcq	8+16(%rdi),%r11
   7697 	adcq	$1,%r12
   7698 	movq	0+0(%rbp),%rax
   7699 	movq	%rax,%r15
   7700 	mulq	%r10
   7701 	movq	%rax,%r13
   7702 	movq	%rdx,%r14
   7703 	movq	0+0(%rbp),%rax
   7704 	mulq	%r11
   7705 	imulq	%r12,%r15
   7706 	addq	%rax,%r14
   7707 	adcq	%rdx,%r15
   7708 	movq	8+0(%rbp),%rax
   7709 	movq	%rax,%r9
   7710 	mulq	%r10
   7711 	addq	%rax,%r14
   7712 	adcq	$0,%rdx
   7713 	movq	%rdx,%r10
   7714 	movq	8+0(%rbp),%rax
   7715 	mulq	%r11
   7716 	addq	%rax,%r15
   7717 	adcq	$0,%rdx
   7718 	imulq	%r12,%r9
   7719 	addq	%r10,%r15
   7720 	adcq	%rdx,%r9
   7721 	movq	%r13,%r10
   7722 	movq	%r14,%r11
   7723 	movq	%r15,%r12
   7724 	andq	$3,%r12
   7725 	movq	%r15,%r13
   7726 	andq	$-4,%r13
   7727 	movq	%r9,%r14
   7728 	shrdq	$2,%r9,%r15
   7729 	shrq	$2,%r9
   7730 	addq	%r13,%r10
   7731 	adcq	%r14,%r11
   7732 	adcq	$0,%r12
   7733 	addq	%r15,%r10
   7734 	adcq	%r9,%r11
   7735 	adcq	$0,%r12
   7736 
   7737 	leaq	32(%rdi),%rdi
   7738 	decq	%rcx
   7739 	jg	1b
   7740 	decq	%r8
   7741 	jge	2b
   7742 	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
   7743 	vpaddd	64(%rbp),%ymm5,%ymm5
   7744 	vpaddd	96(%rbp),%ymm9,%ymm9
   7745 	vpaddd	192(%rbp),%ymm13,%ymm13
   7746 	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
   7747 	vpaddd	64(%rbp),%ymm4,%ymm4
   7748 	vpaddd	96(%rbp),%ymm8,%ymm8
   7749 	vpaddd	160(%rbp),%ymm12,%ymm12
   7750 	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
   7751 	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
   7752 	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
   7753 	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
   7754 	vpxor	0+0(%rsi),%ymm3,%ymm3
   7755 	vpxor	32+0(%rsi),%ymm1,%ymm1
   7756 	vpxor	64+0(%rsi),%ymm5,%ymm5
   7757 	vpxor	96+0(%rsi),%ymm9,%ymm9
   7758 	vmovdqu	%ymm3,0+0(%rdi)
   7759 	vmovdqu	%ymm1,32+0(%rdi)
   7760 	vmovdqu	%ymm5,64+0(%rdi)
   7761 	vmovdqu	%ymm9,96+0(%rdi)
   7762 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
   7763 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
   7764 	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
   7765 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
   7766 	vmovdqa	%ymm3,%ymm8
   7767 
   7768 	movq	$128,%rcx
   7769 	leaq	128(%rsi),%rsi
   7770 	subq	$128,%rbx
   7771 	jmp	seal_avx2_hash
   7772 3:
   7773 	cmpq	$384,%rbx
   7774 	ja	seal_avx2_tail_512
   7775 
   7776 seal_avx2_tail_384:
   7777 	vmovdqa	.chacha20_consts(%rip),%ymm0
   7778 	vmovdqa	64(%rbp),%ymm4
   7779 	vmovdqa	96(%rbp),%ymm8
   7780 	vmovdqa	%ymm0,%ymm1
   7781 	vmovdqa	%ymm4,%ymm5
   7782 	vmovdqa	%ymm8,%ymm9
   7783 	vmovdqa	%ymm0,%ymm2
   7784 	vmovdqa	%ymm4,%ymm6
   7785 	vmovdqa	%ymm8,%ymm10
   7786 	vmovdqa	.avx2_inc(%rip),%ymm12
   7787 	vpaddd	160(%rbp),%ymm12,%ymm14
   7788 	vpaddd	%ymm14,%ymm12,%ymm13
   7789 	vpaddd	%ymm13,%ymm12,%ymm12
   7790 	vmovdqa	%ymm12,160(%rbp)
   7791 	vmovdqa	%ymm13,192(%rbp)
   7792 	vmovdqa	%ymm14,224(%rbp)
   7793 
   7794 1:
   7795 	addq	0(%rdi),%r10
   7796 	adcq	8+0(%rdi),%r11
   7797 	adcq	$1,%r12
   7798 	movq	0+0(%rbp),%rax
   7799 	movq	%rax,%r15
   7800 	mulq	%r10
   7801 	movq	%rax,%r13
   7802 	movq	%rdx,%r14
   7803 	movq	0+0(%rbp),%rax
   7804 	mulq	%r11
   7805 	imulq	%r12,%r15
   7806 	addq	%rax,%r14
   7807 	adcq	%rdx,%r15
   7808 	movq	8+0(%rbp),%rax
   7809 	movq	%rax,%r9
   7810 	mulq	%r10
   7811 	addq	%rax,%r14
   7812 	adcq	$0,%rdx
   7813 	movq	%rdx,%r10
   7814 	movq	8+0(%rbp),%rax
   7815 	mulq	%r11
   7816 	addq	%rax,%r15
   7817 	adcq	$0,%rdx
   7818 	imulq	%r12,%r9
   7819 	addq	%r10,%r15
   7820 	adcq	%rdx,%r9
   7821 	movq	%r13,%r10
   7822 	movq	%r14,%r11
   7823 	movq	%r15,%r12
   7824 	andq	$3,%r12
   7825 	movq	%r15,%r13
   7826 	andq	$-4,%r13
   7827 	movq	%r9,%r14
   7828 	shrdq	$2,%r9,%r15
   7829 	shrq	$2,%r9
   7830 	addq	%r13,%r10
   7831 	adcq	%r14,%r11
   7832 	adcq	$0,%r12
   7833 	addq	%r15,%r10
   7834 	adcq	%r9,%r11
   7835 	adcq	$0,%r12
   7836 
   7837 	leaq	16(%rdi),%rdi
   7838 2:
   7839 	vpaddd	%ymm4,%ymm0,%ymm0
   7840 	vpxor	%ymm0,%ymm12,%ymm12
   7841 	vpshufb	.rol16(%rip),%ymm12,%ymm12
   7842 	vpaddd	%ymm12,%ymm8,%ymm8
   7843 	vpxor	%ymm8,%ymm4,%ymm4
   7844 	vpsrld	$20,%ymm4,%ymm3
   7845 	vpslld	$12,%ymm4,%ymm4
   7846 	vpxor	%ymm3,%ymm4,%ymm4
   7847 	vpaddd	%ymm4,%ymm0,%ymm0
   7848 	vpxor	%ymm0,%ymm12,%ymm12
   7849 	vpshufb	.rol8(%rip),%ymm12,%ymm12
   7850 	vpaddd	%ymm12,%ymm8,%ymm8
   7851 	vpxor	%ymm8,%ymm4,%ymm4
   7852 	vpslld	$7,%ymm4,%ymm3
   7853 	vpsrld	$25,%ymm4,%ymm4
   7854 	vpxor	%ymm3,%ymm4,%ymm4
   7855 	vpalignr	$12,%ymm12,%ymm12,%ymm12
   7856 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   7857 	vpalignr	$4,%ymm4,%ymm4,%ymm4
   7858 	vpaddd	%ymm5,%ymm1,%ymm1
   7859 	vpxor	%ymm1,%ymm13,%ymm13
   7860 	vpshufb	.rol16(%rip),%ymm13,%ymm13
   7861 	vpaddd	%ymm13,%ymm9,%ymm9
   7862 	vpxor	%ymm9,%ymm5,%ymm5
   7863 	vpsrld	$20,%ymm5,%ymm3
   7864 	vpslld	$12,%ymm5,%ymm5
   7865 	vpxor	%ymm3,%ymm5,%ymm5
   7866 	vpaddd	%ymm5,%ymm1,%ymm1
   7867 	vpxor	%ymm1,%ymm13,%ymm13
   7868 	vpshufb	.rol8(%rip),%ymm13,%ymm13
   7869 	vpaddd	%ymm13,%ymm9,%ymm9
   7870 	vpxor	%ymm9,%ymm5,%ymm5
   7871 	vpslld	$7,%ymm5,%ymm3
   7872 	vpsrld	$25,%ymm5,%ymm5
   7873 	vpxor	%ymm3,%ymm5,%ymm5
   7874 	vpalignr	$12,%ymm13,%ymm13,%ymm13
   7875 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   7876 	vpalignr	$4,%ymm5,%ymm5,%ymm5
   7877 	addq	0(%rdi),%r10
   7878 	adcq	8+0(%rdi),%r11
   7879 	adcq	$1,%r12
   7880 	movq	0+0(%rbp),%rax
   7881 	movq	%rax,%r15
   7882 	mulq	%r10
   7883 	movq	%rax,%r13
   7884 	movq	%rdx,%r14
   7885 	movq	0+0(%rbp),%rax
   7886 	mulq	%r11
   7887 	imulq	%r12,%r15
   7888 	addq	%rax,%r14
   7889 	adcq	%rdx,%r15
   7890 	movq	8+0(%rbp),%rax
   7891 	movq	%rax,%r9
   7892 	mulq	%r10
   7893 	addq	%rax,%r14
   7894 	adcq	$0,%rdx
   7895 	movq	%rdx,%r10
   7896 	movq	8+0(%rbp),%rax
   7897 	mulq	%r11
   7898 	addq	%rax,%r15
   7899 	adcq	$0,%rdx
   7900 	imulq	%r12,%r9
   7901 	addq	%r10,%r15
   7902 	adcq	%rdx,%r9
   7903 	movq	%r13,%r10
   7904 	movq	%r14,%r11
   7905 	movq	%r15,%r12
   7906 	andq	$3,%r12
   7907 	movq	%r15,%r13
   7908 	andq	$-4,%r13
   7909 	movq	%r9,%r14
   7910 	shrdq	$2,%r9,%r15
   7911 	shrq	$2,%r9
   7912 	addq	%r13,%r10
   7913 	adcq	%r14,%r11
   7914 	adcq	$0,%r12
   7915 	addq	%r15,%r10
   7916 	adcq	%r9,%r11
   7917 	adcq	$0,%r12
   7918 	vpaddd	%ymm6,%ymm2,%ymm2
   7919 	vpxor	%ymm2,%ymm14,%ymm14
   7920 	vpshufb	.rol16(%rip),%ymm14,%ymm14
   7921 	vpaddd	%ymm14,%ymm10,%ymm10
   7922 	vpxor	%ymm10,%ymm6,%ymm6
   7923 	vpsrld	$20,%ymm6,%ymm3
   7924 	vpslld	$12,%ymm6,%ymm6
   7925 	vpxor	%ymm3,%ymm6,%ymm6
   7926 	vpaddd	%ymm6,%ymm2,%ymm2
   7927 	vpxor	%ymm2,%ymm14,%ymm14
   7928 	vpshufb	.rol8(%rip),%ymm14,%ymm14
   7929 	vpaddd	%ymm14,%ymm10,%ymm10
   7930 	vpxor	%ymm10,%ymm6,%ymm6
   7931 	vpslld	$7,%ymm6,%ymm3
   7932 	vpsrld	$25,%ymm6,%ymm6
   7933 	vpxor	%ymm3,%ymm6,%ymm6
   7934 	vpalignr	$12,%ymm14,%ymm14,%ymm14
   7935 	vpalignr	$8,%ymm10,%ymm10,%ymm10
   7936 	vpalignr	$4,%ymm6,%ymm6,%ymm6
   7937 	vpaddd	%ymm4,%ymm0,%ymm0
   7938 	vpxor	%ymm0,%ymm12,%ymm12
   7939 	vpshufb	.rol16(%rip),%ymm12,%ymm12
   7940 	vpaddd	%ymm12,%ymm8,%ymm8
   7941 	vpxor	%ymm8,%ymm4,%ymm4
   7942 	vpsrld	$20,%ymm4,%ymm3
   7943 	vpslld	$12,%ymm4,%ymm4
   7944 	vpxor	%ymm3,%ymm4,%ymm4
   7945 	vpaddd	%ymm4,%ymm0,%ymm0
   7946 	vpxor	%ymm0,%ymm12,%ymm12
   7947 	vpshufb	.rol8(%rip),%ymm12,%ymm12
   7948 	vpaddd	%ymm12,%ymm8,%ymm8
   7949 	vpxor	%ymm8,%ymm4,%ymm4
   7950 	vpslld	$7,%ymm4,%ymm3
   7951 	vpsrld	$25,%ymm4,%ymm4
   7952 	vpxor	%ymm3,%ymm4,%ymm4
   7953 	vpalignr	$4,%ymm12,%ymm12,%ymm12
   7954 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   7955 	vpalignr	$12,%ymm4,%ymm4,%ymm4
   7956 	addq	16(%rdi),%r10
   7957 	adcq	8+16(%rdi),%r11
   7958 	adcq	$1,%r12
   7959 	movq	0+0(%rbp),%rax
   7960 	movq	%rax,%r15
   7961 	mulq	%r10
   7962 	movq	%rax,%r13
   7963 	movq	%rdx,%r14
   7964 	movq	0+0(%rbp),%rax
   7965 	mulq	%r11
   7966 	imulq	%r12,%r15
   7967 	addq	%rax,%r14
   7968 	adcq	%rdx,%r15
   7969 	movq	8+0(%rbp),%rax
   7970 	movq	%rax,%r9
   7971 	mulq	%r10
   7972 	addq	%rax,%r14
   7973 	adcq	$0,%rdx
   7974 	movq	%rdx,%r10
   7975 	movq	8+0(%rbp),%rax
   7976 	mulq	%r11
   7977 	addq	%rax,%r15
   7978 	adcq	$0,%rdx
   7979 	imulq	%r12,%r9
   7980 	addq	%r10,%r15
   7981 	adcq	%rdx,%r9
   7982 	movq	%r13,%r10
   7983 	movq	%r14,%r11
   7984 	movq	%r15,%r12
   7985 	andq	$3,%r12
   7986 	movq	%r15,%r13
   7987 	andq	$-4,%r13
   7988 	movq	%r9,%r14
   7989 	shrdq	$2,%r9,%r15
   7990 	shrq	$2,%r9
   7991 	addq	%r13,%r10
   7992 	adcq	%r14,%r11
   7993 	adcq	$0,%r12
   7994 	addq	%r15,%r10
   7995 	adcq	%r9,%r11
   7996 	adcq	$0,%r12
   7997 	vpaddd	%ymm5,%ymm1,%ymm1
   7998 	vpxor	%ymm1,%ymm13,%ymm13
   7999 	vpshufb	.rol16(%rip),%ymm13,%ymm13
   8000 	vpaddd	%ymm13,%ymm9,%ymm9
   8001 	vpxor	%ymm9,%ymm5,%ymm5
   8002 	vpsrld	$20,%ymm5,%ymm3
   8003 	vpslld	$12,%ymm5,%ymm5
   8004 	vpxor	%ymm3,%ymm5,%ymm5
   8005 	vpaddd	%ymm5,%ymm1,%ymm1
   8006 	vpxor	%ymm1,%ymm13,%ymm13
   8007 	vpshufb	.rol8(%rip),%ymm13,%ymm13
   8008 	vpaddd	%ymm13,%ymm9,%ymm9
   8009 	vpxor	%ymm9,%ymm5,%ymm5
   8010 	vpslld	$7,%ymm5,%ymm3
   8011 	vpsrld	$25,%ymm5,%ymm5
   8012 	vpxor	%ymm3,%ymm5,%ymm5
   8013 	vpalignr	$4,%ymm13,%ymm13,%ymm13
   8014 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   8015 	vpalignr	$12,%ymm5,%ymm5,%ymm5
   8016 	vpaddd	%ymm6,%ymm2,%ymm2
   8017 	vpxor	%ymm2,%ymm14,%ymm14
   8018 	vpshufb	.rol16(%rip),%ymm14,%ymm14
   8019 	vpaddd	%ymm14,%ymm10,%ymm10
   8020 	vpxor	%ymm10,%ymm6,%ymm6
   8021 	vpsrld	$20,%ymm6,%ymm3
   8022 	vpslld	$12,%ymm6,%ymm6
   8023 	vpxor	%ymm3,%ymm6,%ymm6
   8024 	vpaddd	%ymm6,%ymm2,%ymm2
   8025 	vpxor	%ymm2,%ymm14,%ymm14
   8026 	vpshufb	.rol8(%rip),%ymm14,%ymm14
   8027 	vpaddd	%ymm14,%ymm10,%ymm10
   8028 	vpxor	%ymm10,%ymm6,%ymm6
   8029 	vpslld	$7,%ymm6,%ymm3
   8030 	vpsrld	$25,%ymm6,%ymm6
   8031 	vpxor	%ymm3,%ymm6,%ymm6
   8032 	vpalignr	$4,%ymm14,%ymm14,%ymm14
   8033 	vpalignr	$8,%ymm10,%ymm10,%ymm10
   8034 	vpalignr	$12,%ymm6,%ymm6,%ymm6
   8035 
   8036 	leaq	32(%rdi),%rdi
   8037 	decq	%rcx
   8038 	jg	1b
   8039 	decq	%r8
   8040 	jge	2b
   8041 	vpaddd	.chacha20_consts(%rip),%ymm2,%ymm2
   8042 	vpaddd	64(%rbp),%ymm6,%ymm6
   8043 	vpaddd	96(%rbp),%ymm10,%ymm10
   8044 	vpaddd	224(%rbp),%ymm14,%ymm14
   8045 	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
   8046 	vpaddd	64(%rbp),%ymm5,%ymm5
   8047 	vpaddd	96(%rbp),%ymm9,%ymm9
   8048 	vpaddd	192(%rbp),%ymm13,%ymm13
   8049 	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
   8050 	vpaddd	64(%rbp),%ymm4,%ymm4
   8051 	vpaddd	96(%rbp),%ymm8,%ymm8
   8052 	vpaddd	160(%rbp),%ymm12,%ymm12
   8053 	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
   8054 	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
   8055 	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
   8056 	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
   8057 	vpxor	0+0(%rsi),%ymm3,%ymm3
   8058 	vpxor	32+0(%rsi),%ymm2,%ymm2
   8059 	vpxor	64+0(%rsi),%ymm6,%ymm6
   8060 	vpxor	96+0(%rsi),%ymm10,%ymm10
   8061 	vmovdqu	%ymm3,0+0(%rdi)
   8062 	vmovdqu	%ymm2,32+0(%rdi)
   8063 	vmovdqu	%ymm6,64+0(%rdi)
   8064 	vmovdqu	%ymm10,96+0(%rdi)
   8065 	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
   8066 	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
   8067 	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
   8068 	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
   8069 	vpxor	0+128(%rsi),%ymm3,%ymm3
   8070 	vpxor	32+128(%rsi),%ymm1,%ymm1
   8071 	vpxor	64+128(%rsi),%ymm5,%ymm5
   8072 	vpxor	96+128(%rsi),%ymm9,%ymm9
   8073 	vmovdqu	%ymm3,0+128(%rdi)
   8074 	vmovdqu	%ymm1,32+128(%rdi)
   8075 	vmovdqu	%ymm5,64+128(%rdi)
   8076 	vmovdqu	%ymm9,96+128(%rdi)
   8077 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
   8078 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
   8079 	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
   8080 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
   8081 	vmovdqa	%ymm3,%ymm8
   8082 
   8083 	movq	$256,%rcx
   8084 	leaq	256(%rsi),%rsi
   8085 	subq	$256,%rbx
   8086 	jmp	seal_avx2_hash
   8087 
   8088 seal_avx2_tail_512:
   8089 	vmovdqa	.chacha20_consts(%rip),%ymm0
   8090 	vmovdqa	64(%rbp),%ymm4
   8091 	vmovdqa	96(%rbp),%ymm8
   8092 	vmovdqa	%ymm0,%ymm1
   8093 	vmovdqa	%ymm4,%ymm5
   8094 	vmovdqa	%ymm8,%ymm9
   8095 	vmovdqa	%ymm0,%ymm2
   8096 	vmovdqa	%ymm4,%ymm6
   8097 	vmovdqa	%ymm8,%ymm10
   8098 	vmovdqa	%ymm0,%ymm3
   8099 	vmovdqa	%ymm4,%ymm7
   8100 	vmovdqa	%ymm8,%ymm11
   8101 	vmovdqa	.avx2_inc(%rip),%ymm12
   8102 	vpaddd	160(%rbp),%ymm12,%ymm15
   8103 	vpaddd	%ymm15,%ymm12,%ymm14
   8104 	vpaddd	%ymm14,%ymm12,%ymm13
   8105 	vpaddd	%ymm13,%ymm12,%ymm12
   8106 	vmovdqa	%ymm15,256(%rbp)
   8107 	vmovdqa	%ymm14,224(%rbp)
   8108 	vmovdqa	%ymm13,192(%rbp)
   8109 	vmovdqa	%ymm12,160(%rbp)
   8110 
   8111 1:
   8112 	addq	0(%rdi),%r10
   8113 	adcq	8+0(%rdi),%r11
   8114 	adcq	$1,%r12
   8115 	movq	0+0(%rbp),%rdx
   8116 	movq	%rdx,%r15
   8117 	mulxq	%r10,%r13,%r14
   8118 	mulxq	%r11,%rax,%rdx
   8119 	imulq	%r12,%r15
   8120 	addq	%rax,%r14
   8121 	adcq	%rdx,%r15
   8122 	movq	8+0(%rbp),%rdx
   8123 	mulxq	%r10,%r10,%rax
   8124 	addq	%r10,%r14
   8125 	mulxq	%r11,%r11,%r9
   8126 	adcq	%r11,%r15
   8127 	adcq	$0,%r9
   8128 	imulq	%r12,%rdx
   8129 	addq	%rax,%r15
   8130 	adcq	%rdx,%r9
   8131 	movq	%r13,%r10
   8132 	movq	%r14,%r11
   8133 	movq	%r15,%r12
   8134 	andq	$3,%r12
   8135 	movq	%r15,%r13
   8136 	andq	$-4,%r13
   8137 	movq	%r9,%r14
   8138 	shrdq	$2,%r9,%r15
   8139 	shrq	$2,%r9
   8140 	addq	%r13,%r10
   8141 	adcq	%r14,%r11
   8142 	adcq	$0,%r12
   8143 	addq	%r15,%r10
   8144 	adcq	%r9,%r11
   8145 	adcq	$0,%r12
   8146 
   8147 	leaq	16(%rdi),%rdi
   8148 2:
   8149 	vmovdqa	%ymm8,128(%rbp)
   8150 	vmovdqa	.rol16(%rip),%ymm8
   8151 	vpaddd	%ymm7,%ymm3,%ymm3
   8152 	vpaddd	%ymm6,%ymm2,%ymm2
   8153 	vpaddd	%ymm5,%ymm1,%ymm1
   8154 	vpaddd	%ymm4,%ymm0,%ymm0
   8155 	vpxor	%ymm3,%ymm15,%ymm15
   8156 	vpxor	%ymm2,%ymm14,%ymm14
   8157 	vpxor	%ymm1,%ymm13,%ymm13
   8158 	vpxor	%ymm0,%ymm12,%ymm12
   8159 	vpshufb	%ymm8,%ymm15,%ymm15
   8160 	vpshufb	%ymm8,%ymm14,%ymm14
   8161 	vpshufb	%ymm8,%ymm13,%ymm13
   8162 	vpshufb	%ymm8,%ymm12,%ymm12
   8163 	vmovdqa	128(%rbp),%ymm8
   8164 	vpaddd	%ymm15,%ymm11,%ymm11
   8165 	vpaddd	%ymm14,%ymm10,%ymm10
   8166 	vpaddd	%ymm13,%ymm9,%ymm9
   8167 	vpaddd	%ymm12,%ymm8,%ymm8
   8168 	vpxor	%ymm11,%ymm7,%ymm7
   8169 	addq	0(%rdi),%r10
   8170 	adcq	8+0(%rdi),%r11
   8171 	adcq	$1,%r12
   8172 	vpxor	%ymm10,%ymm6,%ymm6
   8173 	vpxor	%ymm9,%ymm5,%ymm5
   8174 	vpxor	%ymm8,%ymm4,%ymm4
   8175 	vmovdqa	%ymm8,128(%rbp)
   8176 	vpsrld	$20,%ymm7,%ymm8
   8177 	vpslld	$32-20,%ymm7,%ymm7
   8178 	vpxor	%ymm8,%ymm7,%ymm7
   8179 	vpsrld	$20,%ymm6,%ymm8
   8180 	vpslld	$32-20,%ymm6,%ymm6
   8181 	vpxor	%ymm8,%ymm6,%ymm6
   8182 	vpsrld	$20,%ymm5,%ymm8
   8183 	vpslld	$32-20,%ymm5,%ymm5
   8184 	vpxor	%ymm8,%ymm5,%ymm5
   8185 	vpsrld	$20,%ymm4,%ymm8
   8186 	vpslld	$32-20,%ymm4,%ymm4
   8187 	vpxor	%ymm8,%ymm4,%ymm4
   8188 	vmovdqa	.rol8(%rip),%ymm8
   8189 	vpaddd	%ymm7,%ymm3,%ymm3
   8190 	vpaddd	%ymm6,%ymm2,%ymm2
   8191 	vpaddd	%ymm5,%ymm1,%ymm1
   8192 	movq	0+0(%rbp),%rdx
   8193 	movq	%rdx,%r15
   8194 	mulxq	%r10,%r13,%r14
   8195 	mulxq	%r11,%rax,%rdx
   8196 	imulq	%r12,%r15
   8197 	addq	%rax,%r14
   8198 	adcq	%rdx,%r15
   8199 	vpaddd	%ymm4,%ymm0,%ymm0
   8200 	vpxor	%ymm3,%ymm15,%ymm15
   8201 	vpxor	%ymm2,%ymm14,%ymm14
   8202 	vpxor	%ymm1,%ymm13,%ymm13
   8203 	vpxor	%ymm0,%ymm12,%ymm12
   8204 	vpshufb	%ymm8,%ymm15,%ymm15
   8205 	vpshufb	%ymm8,%ymm14,%ymm14
   8206 	vpshufb	%ymm8,%ymm13,%ymm13
   8207 	vpshufb	%ymm8,%ymm12,%ymm12
   8208 	vmovdqa	128(%rbp),%ymm8
   8209 	vpaddd	%ymm15,%ymm11,%ymm11
   8210 	vpaddd	%ymm14,%ymm10,%ymm10
   8211 	vpaddd	%ymm13,%ymm9,%ymm9
   8212 	vpaddd	%ymm12,%ymm8,%ymm8
   8213 	vpxor	%ymm11,%ymm7,%ymm7
   8214 	vpxor	%ymm10,%ymm6,%ymm6
   8215 	vpxor	%ymm9,%ymm5,%ymm5
   8216 	vpxor	%ymm8,%ymm4,%ymm4
   8217 	vmovdqa	%ymm8,128(%rbp)
   8218 	vpsrld	$25,%ymm7,%ymm8
   8219 	movq	8+0(%rbp),%rdx
   8220 	mulxq	%r10,%r10,%rax
   8221 	addq	%r10,%r14
   8222 	mulxq	%r11,%r11,%r9
   8223 	adcq	%r11,%r15
   8224 	adcq	$0,%r9
   8225 	imulq	%r12,%rdx
   8226 	vpslld	$32-25,%ymm7,%ymm7
   8227 	vpxor	%ymm8,%ymm7,%ymm7
   8228 	vpsrld	$25,%ymm6,%ymm8
   8229 	vpslld	$32-25,%ymm6,%ymm6
   8230 	vpxor	%ymm8,%ymm6,%ymm6
   8231 	vpsrld	$25,%ymm5,%ymm8
   8232 	vpslld	$32-25,%ymm5,%ymm5
   8233 	vpxor	%ymm8,%ymm5,%ymm5
   8234 	vpsrld	$25,%ymm4,%ymm8
   8235 	vpslld	$32-25,%ymm4,%ymm4
   8236 	vpxor	%ymm8,%ymm4,%ymm4
   8237 	vmovdqa	128(%rbp),%ymm8
   8238 	vpalignr	$4,%ymm7,%ymm7,%ymm7
   8239 	vpalignr	$8,%ymm11,%ymm11,%ymm11
   8240 	vpalignr	$12,%ymm15,%ymm15,%ymm15
   8241 	vpalignr	$4,%ymm6,%ymm6,%ymm6
   8242 	vpalignr	$8,%ymm10,%ymm10,%ymm10
   8243 	vpalignr	$12,%ymm14,%ymm14,%ymm14
   8244 	vpalignr	$4,%ymm5,%ymm5,%ymm5
   8245 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   8246 	addq	%rax,%r15
   8247 	adcq	%rdx,%r9
   8248 	vpalignr	$12,%ymm13,%ymm13,%ymm13
   8249 	vpalignr	$4,%ymm4,%ymm4,%ymm4
   8250 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   8251 	vpalignr	$12,%ymm12,%ymm12,%ymm12
   8252 	vmovdqa	%ymm8,128(%rbp)
   8253 	vmovdqa	.rol16(%rip),%ymm8
   8254 	vpaddd	%ymm7,%ymm3,%ymm3
   8255 	vpaddd	%ymm6,%ymm2,%ymm2
   8256 	vpaddd	%ymm5,%ymm1,%ymm1
   8257 	vpaddd	%ymm4,%ymm0,%ymm0
   8258 	vpxor	%ymm3,%ymm15,%ymm15
   8259 	vpxor	%ymm2,%ymm14,%ymm14
   8260 	vpxor	%ymm1,%ymm13,%ymm13
   8261 	vpxor	%ymm0,%ymm12,%ymm12
   8262 	vpshufb	%ymm8,%ymm15,%ymm15
   8263 	vpshufb	%ymm8,%ymm14,%ymm14
   8264 	vpshufb	%ymm8,%ymm13,%ymm13
   8265 	vpshufb	%ymm8,%ymm12,%ymm12
   8266 	vmovdqa	128(%rbp),%ymm8
   8267 	vpaddd	%ymm15,%ymm11,%ymm11
   8268 	movq	%r13,%r10
   8269 	movq	%r14,%r11
   8270 	movq	%r15,%r12
   8271 	andq	$3,%r12
   8272 	movq	%r15,%r13
   8273 	andq	$-4,%r13
   8274 	movq	%r9,%r14
   8275 	shrdq	$2,%r9,%r15
   8276 	shrq	$2,%r9
   8277 	addq	%r13,%r10
   8278 	adcq	%r14,%r11
   8279 	adcq	$0,%r12
   8280 	addq	%r15,%r10
   8281 	adcq	%r9,%r11
   8282 	adcq	$0,%r12
   8283 	vpaddd	%ymm14,%ymm10,%ymm10
   8284 	vpaddd	%ymm13,%ymm9,%ymm9
   8285 	vpaddd	%ymm12,%ymm8,%ymm8
   8286 	vpxor	%ymm11,%ymm7,%ymm7
   8287 	vpxor	%ymm10,%ymm6,%ymm6
   8288 	vpxor	%ymm9,%ymm5,%ymm5
   8289 	vpxor	%ymm8,%ymm4,%ymm4
   8290 	vmovdqa	%ymm8,128(%rbp)
   8291 	vpsrld	$20,%ymm7,%ymm8
   8292 	vpslld	$32-20,%ymm7,%ymm7
   8293 	vpxor	%ymm8,%ymm7,%ymm7
   8294 	vpsrld	$20,%ymm6,%ymm8
   8295 	vpslld	$32-20,%ymm6,%ymm6
   8296 	vpxor	%ymm8,%ymm6,%ymm6
   8297 	vpsrld	$20,%ymm5,%ymm8
   8298 	vpslld	$32-20,%ymm5,%ymm5
   8299 	vpxor	%ymm8,%ymm5,%ymm5
   8300 	vpsrld	$20,%ymm4,%ymm8
   8301 	vpslld	$32-20,%ymm4,%ymm4
   8302 	vpxor	%ymm8,%ymm4,%ymm4
   8303 	addq	16(%rdi),%r10
   8304 	adcq	8+16(%rdi),%r11
   8305 	adcq	$1,%r12
   8306 	vmovdqa	.rol8(%rip),%ymm8
   8307 	vpaddd	%ymm7,%ymm3,%ymm3
   8308 	vpaddd	%ymm6,%ymm2,%ymm2
   8309 	vpaddd	%ymm5,%ymm1,%ymm1
   8310 	vpaddd	%ymm4,%ymm0,%ymm0
   8311 	vpxor	%ymm3,%ymm15,%ymm15
   8312 	vpxor	%ymm2,%ymm14,%ymm14
   8313 	vpxor	%ymm1,%ymm13,%ymm13
   8314 	vpxor	%ymm0,%ymm12,%ymm12
   8315 	vpshufb	%ymm8,%ymm15,%ymm15
   8316 	vpshufb	%ymm8,%ymm14,%ymm14
   8317 	vpshufb	%ymm8,%ymm13,%ymm13
   8318 	vpshufb	%ymm8,%ymm12,%ymm12
   8319 	vmovdqa	128(%rbp),%ymm8
   8320 	vpaddd	%ymm15,%ymm11,%ymm11
   8321 	vpaddd	%ymm14,%ymm10,%ymm10
   8322 	vpaddd	%ymm13,%ymm9,%ymm9
   8323 	vpaddd	%ymm12,%ymm8,%ymm8
   8324 	vpxor	%ymm11,%ymm7,%ymm7
   8325 	vpxor	%ymm10,%ymm6,%ymm6
   8326 	movq	0+0(%rbp),%rdx
   8327 	movq	%rdx,%r15
   8328 	mulxq	%r10,%r13,%r14
   8329 	mulxq	%r11,%rax,%rdx
   8330 	imulq	%r12,%r15
   8331 	addq	%rax,%r14
   8332 	adcq	%rdx,%r15
   8333 	vpxor	%ymm9,%ymm5,%ymm5
   8334 	vpxor	%ymm8,%ymm4,%ymm4
   8335 	vmovdqa	%ymm8,128(%rbp)
   8336 	vpsrld	$25,%ymm7,%ymm8
   8337 	vpslld	$32-25,%ymm7,%ymm7
   8338 	vpxor	%ymm8,%ymm7,%ymm7
   8339 	vpsrld	$25,%ymm6,%ymm8
   8340 	vpslld	$32-25,%ymm6,%ymm6
   8341 	vpxor	%ymm8,%ymm6,%ymm6
   8342 	vpsrld	$25,%ymm5,%ymm8
   8343 	vpslld	$32-25,%ymm5,%ymm5
   8344 	vpxor	%ymm8,%ymm5,%ymm5
   8345 	vpsrld	$25,%ymm4,%ymm8
   8346 	vpslld	$32-25,%ymm4,%ymm4
   8347 	vpxor	%ymm8,%ymm4,%ymm4
   8348 	vmovdqa	128(%rbp),%ymm8
   8349 	vpalignr	$12,%ymm7,%ymm7,%ymm7
   8350 	vpalignr	$8,%ymm11,%ymm11,%ymm11
   8351 	vpalignr	$4,%ymm15,%ymm15,%ymm15
   8352 	vpalignr	$12,%ymm6,%ymm6,%ymm6
   8353 	movq	8+0(%rbp),%rdx
   8354 	mulxq	%r10,%r10,%rax
   8355 	addq	%r10,%r14
   8356 	mulxq	%r11,%r11,%r9
   8357 	adcq	%r11,%r15
   8358 	adcq	$0,%r9
   8359 	imulq	%r12,%rdx
   8360 	vpalignr	$8,%ymm10,%ymm10,%ymm10
   8361 	vpalignr	$4,%ymm14,%ymm14,%ymm14
   8362 	vpalignr	$12,%ymm5,%ymm5,%ymm5
   8363 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   8364 	vpalignr	$4,%ymm13,%ymm13,%ymm13
   8365 	vpalignr	$12,%ymm4,%ymm4,%ymm4
   8366 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   8367 	vpalignr	$4,%ymm12,%ymm12,%ymm12
   8368 
   8369 
   8370 
   8371 
   8372 
   8373 
   8374 
   8375 
   8376 
   8377 
   8378 
   8379 
   8380 	addq	%rax,%r15
   8381 	adcq	%rdx,%r9
   8382 
   8383 
   8384 
   8385 
   8386 
   8387 
   8388 
   8389 
   8390 
   8391 
   8392 
   8393 
   8394 
   8395 
   8396 
   8397 
   8398 
   8399 
   8400 
   8401 
   8402 	movq	%r13,%r10
   8403 	movq	%r14,%r11
   8404 	movq	%r15,%r12
   8405 	andq	$3,%r12
   8406 	movq	%r15,%r13
   8407 	andq	$-4,%r13
   8408 	movq	%r9,%r14
   8409 	shrdq	$2,%r9,%r15
   8410 	shrq	$2,%r9
   8411 	addq	%r13,%r10
   8412 	adcq	%r14,%r11
   8413 	adcq	$0,%r12
   8414 	addq	%r15,%r10
   8415 	adcq	%r9,%r11
   8416 	adcq	$0,%r12
   8417 
   8418 	leaq	32(%rdi),%rdi
   8419 	decq	%rcx
   8420 	jg	1b
   8421 	decq	%r8
   8422 	jge	2b
   8423 	vpaddd	.chacha20_consts(%rip),%ymm3,%ymm3
   8424 	vpaddd	64(%rbp),%ymm7,%ymm7
   8425 	vpaddd	96(%rbp),%ymm11,%ymm11
   8426 	vpaddd	256(%rbp),%ymm15,%ymm15
   8427 	vpaddd	.chacha20_consts(%rip),%ymm2,%ymm2
   8428 	vpaddd	64(%rbp),%ymm6,%ymm6
   8429 	vpaddd	96(%rbp),%ymm10,%ymm10
   8430 	vpaddd	224(%rbp),%ymm14,%ymm14
   8431 	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
   8432 	vpaddd	64(%rbp),%ymm5,%ymm5
   8433 	vpaddd	96(%rbp),%ymm9,%ymm9
   8434 	vpaddd	192(%rbp),%ymm13,%ymm13
   8435 	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
   8436 	vpaddd	64(%rbp),%ymm4,%ymm4
   8437 	vpaddd	96(%rbp),%ymm8,%ymm8
   8438 	vpaddd	160(%rbp),%ymm12,%ymm12
   8439 
   8440 	vmovdqa	%ymm0,128(%rbp)
   8441 	vperm2i128	$0x02,%ymm3,%ymm7,%ymm0
   8442 	vperm2i128	$0x13,%ymm3,%ymm7,%ymm7
   8443 	vperm2i128	$0x02,%ymm11,%ymm15,%ymm3
   8444 	vperm2i128	$0x13,%ymm11,%ymm15,%ymm11
   8445 	vpxor	0+0(%rsi),%ymm0,%ymm0
   8446 	vpxor	32+0(%rsi),%ymm3,%ymm3
   8447 	vpxor	64+0(%rsi),%ymm7,%ymm7
   8448 	vpxor	96+0(%rsi),%ymm11,%ymm11
   8449 	vmovdqu	%ymm0,0+0(%rdi)
   8450 	vmovdqu	%ymm3,32+0(%rdi)
   8451 	vmovdqu	%ymm7,64+0(%rdi)
   8452 	vmovdqu	%ymm11,96+0(%rdi)
   8453 
   8454 	vmovdqa	128(%rbp),%ymm0
   8455 	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
   8456 	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
   8457 	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
   8458 	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
   8459 	vpxor	0+128(%rsi),%ymm3,%ymm3
   8460 	vpxor	32+128(%rsi),%ymm2,%ymm2
   8461 	vpxor	64+128(%rsi),%ymm6,%ymm6
   8462 	vpxor	96+128(%rsi),%ymm10,%ymm10
   8463 	vmovdqu	%ymm3,0+128(%rdi)
   8464 	vmovdqu	%ymm2,32+128(%rdi)
   8465 	vmovdqu	%ymm6,64+128(%rdi)
   8466 	vmovdqu	%ymm10,96+128(%rdi)
   8467 	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
   8468 	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
   8469 	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
   8470 	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
   8471 	vpxor	0+256(%rsi),%ymm3,%ymm3
   8472 	vpxor	32+256(%rsi),%ymm1,%ymm1
   8473 	vpxor	64+256(%rsi),%ymm5,%ymm5
   8474 	vpxor	96+256(%rsi),%ymm9,%ymm9
   8475 	vmovdqu	%ymm3,0+256(%rdi)
   8476 	vmovdqu	%ymm1,32+256(%rdi)
   8477 	vmovdqu	%ymm5,64+256(%rdi)
   8478 	vmovdqu	%ymm9,96+256(%rdi)
   8479 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
   8480 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
   8481 	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
   8482 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
   8483 	vmovdqa	%ymm3,%ymm8
   8484 
   8485 	movq	$384,%rcx
   8486 	leaq	384(%rsi),%rsi
   8487 	subq	$384,%rbx
   8488 	jmp	seal_avx2_hash
   8489 
   8490 seal_avx2_320:
   8491 	vmovdqa	%ymm0,%ymm1
   8492 	vmovdqa	%ymm0,%ymm2
   8493 	vmovdqa	%ymm4,%ymm5
   8494 	vmovdqa	%ymm4,%ymm6
   8495 	vmovdqa	%ymm8,%ymm9
   8496 	vmovdqa	%ymm8,%ymm10
   8497 	vpaddd	.avx2_inc(%rip),%ymm12,%ymm13
   8498 	vpaddd	.avx2_inc(%rip),%ymm13,%ymm14
   8499 	vmovdqa	%ymm4,%ymm7
   8500 	vmovdqa	%ymm8,%ymm11
   8501 	vmovdqa	%ymm12,160(%rbp)
   8502 	vmovdqa	%ymm13,192(%rbp)
   8503 	vmovdqa	%ymm14,224(%rbp)
   8504 	movq	$10,%r10
   8505 1:
   8506 	vpaddd	%ymm4,%ymm0,%ymm0
   8507 	vpxor	%ymm0,%ymm12,%ymm12
   8508 	vpshufb	.rol16(%rip),%ymm12,%ymm12
   8509 	vpaddd	%ymm12,%ymm8,%ymm8
   8510 	vpxor	%ymm8,%ymm4,%ymm4
   8511 	vpsrld	$20,%ymm4,%ymm3
   8512 	vpslld	$12,%ymm4,%ymm4
   8513 	vpxor	%ymm3,%ymm4,%ymm4
   8514 	vpaddd	%ymm4,%ymm0,%ymm0
   8515 	vpxor	%ymm0,%ymm12,%ymm12
   8516 	vpshufb	.rol8(%rip),%ymm12,%ymm12
   8517 	vpaddd	%ymm12,%ymm8,%ymm8
   8518 	vpxor	%ymm8,%ymm4,%ymm4
   8519 	vpslld	$7,%ymm4,%ymm3
   8520 	vpsrld	$25,%ymm4,%ymm4
   8521 	vpxor	%ymm3,%ymm4,%ymm4
   8522 	vpalignr	$12,%ymm12,%ymm12,%ymm12
   8523 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   8524 	vpalignr	$4,%ymm4,%ymm4,%ymm4
   8525 	vpaddd	%ymm5,%ymm1,%ymm1
   8526 	vpxor	%ymm1,%ymm13,%ymm13
   8527 	vpshufb	.rol16(%rip),%ymm13,%ymm13
   8528 	vpaddd	%ymm13,%ymm9,%ymm9
   8529 	vpxor	%ymm9,%ymm5,%ymm5
   8530 	vpsrld	$20,%ymm5,%ymm3
   8531 	vpslld	$12,%ymm5,%ymm5
   8532 	vpxor	%ymm3,%ymm5,%ymm5
   8533 	vpaddd	%ymm5,%ymm1,%ymm1
   8534 	vpxor	%ymm1,%ymm13,%ymm13
   8535 	vpshufb	.rol8(%rip),%ymm13,%ymm13
   8536 	vpaddd	%ymm13,%ymm9,%ymm9
   8537 	vpxor	%ymm9,%ymm5,%ymm5
   8538 	vpslld	$7,%ymm5,%ymm3
   8539 	vpsrld	$25,%ymm5,%ymm5
   8540 	vpxor	%ymm3,%ymm5,%ymm5
   8541 	vpalignr	$12,%ymm13,%ymm13,%ymm13
   8542 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   8543 	vpalignr	$4,%ymm5,%ymm5,%ymm5
   8544 	vpaddd	%ymm6,%ymm2,%ymm2
   8545 	vpxor	%ymm2,%ymm14,%ymm14
   8546 	vpshufb	.rol16(%rip),%ymm14,%ymm14
   8547 	vpaddd	%ymm14,%ymm10,%ymm10
   8548 	vpxor	%ymm10,%ymm6,%ymm6
   8549 	vpsrld	$20,%ymm6,%ymm3
   8550 	vpslld	$12,%ymm6,%ymm6
   8551 	vpxor	%ymm3,%ymm6,%ymm6
   8552 	vpaddd	%ymm6,%ymm2,%ymm2
   8553 	vpxor	%ymm2,%ymm14,%ymm14
   8554 	vpshufb	.rol8(%rip),%ymm14,%ymm14
   8555 	vpaddd	%ymm14,%ymm10,%ymm10
   8556 	vpxor	%ymm10,%ymm6,%ymm6
   8557 	vpslld	$7,%ymm6,%ymm3
   8558 	vpsrld	$25,%ymm6,%ymm6
   8559 	vpxor	%ymm3,%ymm6,%ymm6
   8560 	vpalignr	$12,%ymm14,%ymm14,%ymm14
   8561 	vpalignr	$8,%ymm10,%ymm10,%ymm10
   8562 	vpalignr	$4,%ymm6,%ymm6,%ymm6
   8563 	vpaddd	%ymm4,%ymm0,%ymm0
   8564 	vpxor	%ymm0,%ymm12,%ymm12
   8565 	vpshufb	.rol16(%rip),%ymm12,%ymm12
   8566 	vpaddd	%ymm12,%ymm8,%ymm8
   8567 	vpxor	%ymm8,%ymm4,%ymm4
   8568 	vpsrld	$20,%ymm4,%ymm3
   8569 	vpslld	$12,%ymm4,%ymm4
   8570 	vpxor	%ymm3,%ymm4,%ymm4
   8571 	vpaddd	%ymm4,%ymm0,%ymm0
   8572 	vpxor	%ymm0,%ymm12,%ymm12
   8573 	vpshufb	.rol8(%rip),%ymm12,%ymm12
   8574 	vpaddd	%ymm12,%ymm8,%ymm8
   8575 	vpxor	%ymm8,%ymm4,%ymm4
   8576 	vpslld	$7,%ymm4,%ymm3
   8577 	vpsrld	$25,%ymm4,%ymm4
   8578 	vpxor	%ymm3,%ymm4,%ymm4
   8579 	vpalignr	$4,%ymm12,%ymm12,%ymm12
   8580 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   8581 	vpalignr	$12,%ymm4,%ymm4,%ymm4
   8582 	vpaddd	%ymm5,%ymm1,%ymm1
   8583 	vpxor	%ymm1,%ymm13,%ymm13
   8584 	vpshufb	.rol16(%rip),%ymm13,%ymm13
   8585 	vpaddd	%ymm13,%ymm9,%ymm9
   8586 	vpxor	%ymm9,%ymm5,%ymm5
   8587 	vpsrld	$20,%ymm5,%ymm3
   8588 	vpslld	$12,%ymm5,%ymm5
   8589 	vpxor	%ymm3,%ymm5,%ymm5
   8590 	vpaddd	%ymm5,%ymm1,%ymm1
   8591 	vpxor	%ymm1,%ymm13,%ymm13
   8592 	vpshufb	.rol8(%rip),%ymm13,%ymm13
   8593 	vpaddd	%ymm13,%ymm9,%ymm9
   8594 	vpxor	%ymm9,%ymm5,%ymm5
   8595 	vpslld	$7,%ymm5,%ymm3
   8596 	vpsrld	$25,%ymm5,%ymm5
   8597 	vpxor	%ymm3,%ymm5,%ymm5
   8598 	vpalignr	$4,%ymm13,%ymm13,%ymm13
   8599 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   8600 	vpalignr	$12,%ymm5,%ymm5,%ymm5
   8601 	vpaddd	%ymm6,%ymm2,%ymm2
   8602 	vpxor	%ymm2,%ymm14,%ymm14
   8603 	vpshufb	.rol16(%rip),%ymm14,%ymm14
   8604 	vpaddd	%ymm14,%ymm10,%ymm10
   8605 	vpxor	%ymm10,%ymm6,%ymm6
   8606 	vpsrld	$20,%ymm6,%ymm3
   8607 	vpslld	$12,%ymm6,%ymm6
   8608 	vpxor	%ymm3,%ymm6,%ymm6
   8609 	vpaddd	%ymm6,%ymm2,%ymm2
   8610 	vpxor	%ymm2,%ymm14,%ymm14
   8611 	vpshufb	.rol8(%rip),%ymm14,%ymm14
   8612 	vpaddd	%ymm14,%ymm10,%ymm10
   8613 	vpxor	%ymm10,%ymm6,%ymm6
   8614 	vpslld	$7,%ymm6,%ymm3
   8615 	vpsrld	$25,%ymm6,%ymm6
   8616 	vpxor	%ymm3,%ymm6,%ymm6
   8617 	vpalignr	$4,%ymm14,%ymm14,%ymm14
   8618 	vpalignr	$8,%ymm10,%ymm10,%ymm10
   8619 	vpalignr	$12,%ymm6,%ymm6,%ymm6
   8620 
   8621 	decq	%r10
   8622 	jne	1b
   8623 	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
   8624 	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
   8625 	vpaddd	.chacha20_consts(%rip),%ymm2,%ymm2
   8626 	vpaddd	%ymm7,%ymm4,%ymm4
   8627 	vpaddd	%ymm7,%ymm5,%ymm5
   8628 	vpaddd	%ymm7,%ymm6,%ymm6
   8629 	vpaddd	%ymm11,%ymm8,%ymm8
   8630 	vpaddd	%ymm11,%ymm9,%ymm9
   8631 	vpaddd	%ymm11,%ymm10,%ymm10
   8632 	vpaddd	160(%rbp),%ymm12,%ymm12
   8633 	vpaddd	192(%rbp),%ymm13,%ymm13
   8634 	vpaddd	224(%rbp),%ymm14,%ymm14
   8635 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
   8636 
   8637 	vpand	.clamp(%rip),%ymm3,%ymm3
   8638 	vmovdqa	%ymm3,0(%rbp)
   8639 
   8640 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0
   8641 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4
   8642 	vperm2i128	$0x02,%ymm1,%ymm5,%ymm8
   8643 	vperm2i128	$0x02,%ymm9,%ymm13,%ymm12
   8644 	vperm2i128	$0x13,%ymm1,%ymm5,%ymm1
   8645 	vperm2i128	$0x13,%ymm9,%ymm13,%ymm5
   8646 	vperm2i128	$0x02,%ymm2,%ymm6,%ymm9
   8647 	vperm2i128	$0x02,%ymm10,%ymm14,%ymm13
   8648 	vperm2i128	$0x13,%ymm2,%ymm6,%ymm2
   8649 	vperm2i128	$0x13,%ymm10,%ymm14,%ymm6
   8650 	jmp	seal_avx2_short
   8651 
   8652 seal_avx2_192:
   8653 	vmovdqa	%ymm0,%ymm1
   8654 	vmovdqa	%ymm0,%ymm2
   8655 	vmovdqa	%ymm4,%ymm5
   8656 	vmovdqa	%ymm4,%ymm6
   8657 	vmovdqa	%ymm8,%ymm9
   8658 	vmovdqa	%ymm8,%ymm10
   8659 	vpaddd	.avx2_inc(%rip),%ymm12,%ymm13
   8660 	vmovdqa	%ymm12,%ymm11
   8661 	vmovdqa	%ymm13,%ymm15
   8662 	movq	$10,%r10
   8663 1:
   8664 	vpaddd	%ymm4,%ymm0,%ymm0
   8665 	vpxor	%ymm0,%ymm12,%ymm12
   8666 	vpshufb	.rol16(%rip),%ymm12,%ymm12
   8667 	vpaddd	%ymm12,%ymm8,%ymm8
   8668 	vpxor	%ymm8,%ymm4,%ymm4
   8669 	vpsrld	$20,%ymm4,%ymm3
   8670 	vpslld	$12,%ymm4,%ymm4
   8671 	vpxor	%ymm3,%ymm4,%ymm4
   8672 	vpaddd	%ymm4,%ymm0,%ymm0
   8673 	vpxor	%ymm0,%ymm12,%ymm12
   8674 	vpshufb	.rol8(%rip),%ymm12,%ymm12
   8675 	vpaddd	%ymm12,%ymm8,%ymm8
   8676 	vpxor	%ymm8,%ymm4,%ymm4
   8677 	vpslld	$7,%ymm4,%ymm3
   8678 	vpsrld	$25,%ymm4,%ymm4
   8679 	vpxor	%ymm3,%ymm4,%ymm4
   8680 	vpalignr	$12,%ymm12,%ymm12,%ymm12
   8681 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   8682 	vpalignr	$4,%ymm4,%ymm4,%ymm4
   8683 	vpaddd	%ymm5,%ymm1,%ymm1
   8684 	vpxor	%ymm1,%ymm13,%ymm13
   8685 	vpshufb	.rol16(%rip),%ymm13,%ymm13
   8686 	vpaddd	%ymm13,%ymm9,%ymm9
   8687 	vpxor	%ymm9,%ymm5,%ymm5
   8688 	vpsrld	$20,%ymm5,%ymm3
   8689 	vpslld	$12,%ymm5,%ymm5
   8690 	vpxor	%ymm3,%ymm5,%ymm5
   8691 	vpaddd	%ymm5,%ymm1,%ymm1
   8692 	vpxor	%ymm1,%ymm13,%ymm13
   8693 	vpshufb	.rol8(%rip),%ymm13,%ymm13
   8694 	vpaddd	%ymm13,%ymm9,%ymm9
   8695 	vpxor	%ymm9,%ymm5,%ymm5
   8696 	vpslld	$7,%ymm5,%ymm3
   8697 	vpsrld	$25,%ymm5,%ymm5
   8698 	vpxor	%ymm3,%ymm5,%ymm5
   8699 	vpalignr	$12,%ymm13,%ymm13,%ymm13
   8700 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   8701 	vpalignr	$4,%ymm5,%ymm5,%ymm5
   8702 	vpaddd	%ymm4,%ymm0,%ymm0
   8703 	vpxor	%ymm0,%ymm12,%ymm12
   8704 	vpshufb	.rol16(%rip),%ymm12,%ymm12
   8705 	vpaddd	%ymm12,%ymm8,%ymm8
   8706 	vpxor	%ymm8,%ymm4,%ymm4
   8707 	vpsrld	$20,%ymm4,%ymm3
   8708 	vpslld	$12,%ymm4,%ymm4
   8709 	vpxor	%ymm3,%ymm4,%ymm4
   8710 	vpaddd	%ymm4,%ymm0,%ymm0
   8711 	vpxor	%ymm0,%ymm12,%ymm12
   8712 	vpshufb	.rol8(%rip),%ymm12,%ymm12
   8713 	vpaddd	%ymm12,%ymm8,%ymm8
   8714 	vpxor	%ymm8,%ymm4,%ymm4
   8715 	vpslld	$7,%ymm4,%ymm3
   8716 	vpsrld	$25,%ymm4,%ymm4
   8717 	vpxor	%ymm3,%ymm4,%ymm4
   8718 	vpalignr	$4,%ymm12,%ymm12,%ymm12
   8719 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   8720 	vpalignr	$12,%ymm4,%ymm4,%ymm4
   8721 	vpaddd	%ymm5,%ymm1,%ymm1
   8722 	vpxor	%ymm1,%ymm13,%ymm13
   8723 	vpshufb	.rol16(%rip),%ymm13,%ymm13
   8724 	vpaddd	%ymm13,%ymm9,%ymm9
   8725 	vpxor	%ymm9,%ymm5,%ymm5
   8726 	vpsrld	$20,%ymm5,%ymm3
   8727 	vpslld	$12,%ymm5,%ymm5
   8728 	vpxor	%ymm3,%ymm5,%ymm5
   8729 	vpaddd	%ymm5,%ymm1,%ymm1
   8730 	vpxor	%ymm1,%ymm13,%ymm13
   8731 	vpshufb	.rol8(%rip),%ymm13,%ymm13
   8732 	vpaddd	%ymm13,%ymm9,%ymm9
   8733 	vpxor	%ymm9,%ymm5,%ymm5
   8734 	vpslld	$7,%ymm5,%ymm3
   8735 	vpsrld	$25,%ymm5,%ymm5
   8736 	vpxor	%ymm3,%ymm5,%ymm5
   8737 	vpalignr	$4,%ymm13,%ymm13,%ymm13
   8738 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   8739 	vpalignr	$12,%ymm5,%ymm5,%ymm5
   8740 
   8741 	decq	%r10
   8742 	jne	1b
   8743 	vpaddd	%ymm2,%ymm0,%ymm0
   8744 	vpaddd	%ymm2,%ymm1,%ymm1
   8745 	vpaddd	%ymm6,%ymm4,%ymm4
   8746 	vpaddd	%ymm6,%ymm5,%ymm5
   8747 	vpaddd	%ymm10,%ymm8,%ymm8
   8748 	vpaddd	%ymm10,%ymm9,%ymm9
   8749 	vpaddd	%ymm11,%ymm12,%ymm12
   8750 	vpaddd	%ymm15,%ymm13,%ymm13
   8751 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
   8752 
   8753 	vpand	.clamp(%rip),%ymm3,%ymm3
   8754 	vmovdqa	%ymm3,0(%rbp)
   8755 
   8756 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0
   8757 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4
   8758 	vperm2i128	$0x02,%ymm1,%ymm5,%ymm8
   8759 	vperm2i128	$0x02,%ymm9,%ymm13,%ymm12
   8760 	vperm2i128	$0x13,%ymm1,%ymm5,%ymm1
   8761 	vperm2i128	$0x13,%ymm9,%ymm13,%ymm5
   8762 seal_avx2_short:
   8763 	movq	%r8,%r8
   8764 	call	poly_hash_ad_internal
   8765 	xorq	%rcx,%rcx
   8766 seal_avx2_hash:
   8767 	cmpq	$16,%rcx
   8768 	jb	seal_avx2_short_loop
   8769 	addq	0(%rdi),%r10
   8770 	adcq	8+0(%rdi),%r11
   8771 	adcq	$1,%r12
   8772 	movq	0+0(%rbp),%rax
   8773 	movq	%rax,%r15
   8774 	mulq	%r10
   8775 	movq	%rax,%r13
   8776 	movq	%rdx,%r14
   8777 	movq	0+0(%rbp),%rax
   8778 	mulq	%r11
   8779 	imulq	%r12,%r15
   8780 	addq	%rax,%r14
   8781 	adcq	%rdx,%r15
   8782 	movq	8+0(%rbp),%rax
   8783 	movq	%rax,%r9
   8784 	mulq	%r10
   8785 	addq	%rax,%r14
   8786 	adcq	$0,%rdx
   8787 	movq	%rdx,%r10
   8788 	movq	8+0(%rbp),%rax
   8789 	mulq	%r11
   8790 	addq	%rax,%r15
   8791 	adcq	$0,%rdx
   8792 	imulq	%r12,%r9
   8793 	addq	%r10,%r15
   8794 	adcq	%rdx,%r9
   8795 	movq	%r13,%r10
   8796 	movq	%r14,%r11
   8797 	movq	%r15,%r12
   8798 	andq	$3,%r12
   8799 	movq	%r15,%r13
   8800 	andq	$-4,%r13
   8801 	movq	%r9,%r14
   8802 	shrdq	$2,%r9,%r15
   8803 	shrq	$2,%r9
   8804 	addq	%r13,%r10
   8805 	adcq	%r14,%r11
   8806 	adcq	$0,%r12
   8807 	addq	%r15,%r10
   8808 	adcq	%r9,%r11
   8809 	adcq	$0,%r12
   8810 
   8811 	subq	$16,%rcx
   8812 	addq	$16,%rdi
   8813 	jmp	seal_avx2_hash
   8814 seal_avx2_short_loop:
   8815 	cmpq	$32,%rbx
   8816 	jb	seal_avx2_short_tail
   8817 	subq	$32,%rbx
   8818 
   8819 	vpxor	(%rsi),%ymm0,%ymm0
   8820 	vmovdqu	%ymm0,(%rdi)
   8821 	leaq	32(%rsi),%rsi
   8822 
   8823 	addq	0(%rdi),%r10
   8824 	adcq	8+0(%rdi),%r11
   8825 	adcq	$1,%r12
   8826 	movq	0+0(%rbp),%rax
   8827 	movq	%rax,%r15
   8828 	mulq	%r10
   8829 	movq	%rax,%r13
   8830 	movq	%rdx,%r14
   8831 	movq	0+0(%rbp),%rax
   8832 	mulq	%r11
   8833 	imulq	%r12,%r15
   8834 	addq	%rax,%r14
   8835 	adcq	%rdx,%r15
   8836 	movq	8+0(%rbp),%rax
   8837 	movq	%rax,%r9
   8838 	mulq	%r10
   8839 	addq	%rax,%r14
   8840 	adcq	$0,%rdx
   8841 	movq	%rdx,%r10
   8842 	movq	8+0(%rbp),%rax
   8843 	mulq	%r11
   8844 	addq	%rax,%r15
   8845 	adcq	$0,%rdx
   8846 	imulq	%r12,%r9
   8847 	addq	%r10,%r15
   8848 	adcq	%rdx,%r9
   8849 	movq	%r13,%r10
   8850 	movq	%r14,%r11
   8851 	movq	%r15,%r12
   8852 	andq	$3,%r12
   8853 	movq	%r15,%r13
   8854 	andq	$-4,%r13
   8855 	movq	%r9,%r14
   8856 	shrdq	$2,%r9,%r15
   8857 	shrq	$2,%r9
   8858 	addq	%r13,%r10
   8859 	adcq	%r14,%r11
   8860 	adcq	$0,%r12
   8861 	addq	%r15,%r10
   8862 	adcq	%r9,%r11
   8863 	adcq	$0,%r12
   8864 	addq	16(%rdi),%r10
   8865 	adcq	8+16(%rdi),%r11
   8866 	adcq	$1,%r12
   8867 	movq	0+0(%rbp),%rax
   8868 	movq	%rax,%r15
   8869 	mulq	%r10
   8870 	movq	%rax,%r13
   8871 	movq	%rdx,%r14
   8872 	movq	0+0(%rbp),%rax
   8873 	mulq	%r11
   8874 	imulq	%r12,%r15
   8875 	addq	%rax,%r14
   8876 	adcq	%rdx,%r15
   8877 	movq	8+0(%rbp),%rax
   8878 	movq	%rax,%r9
   8879 	mulq	%r10
   8880 	addq	%rax,%r14
   8881 	adcq	$0,%rdx
   8882 	movq	%rdx,%r10
   8883 	movq	8+0(%rbp),%rax
   8884 	mulq	%r11
   8885 	addq	%rax,%r15
   8886 	adcq	$0,%rdx
   8887 	imulq	%r12,%r9
   8888 	addq	%r10,%r15
   8889 	adcq	%rdx,%r9
   8890 	movq	%r13,%r10
   8891 	movq	%r14,%r11
   8892 	movq	%r15,%r12
   8893 	andq	$3,%r12
   8894 	movq	%r15,%r13
   8895 	andq	$-4,%r13
   8896 	movq	%r9,%r14
   8897 	shrdq	$2,%r9,%r15
   8898 	shrq	$2,%r9
   8899 	addq	%r13,%r10
   8900 	adcq	%r14,%r11
   8901 	adcq	$0,%r12
   8902 	addq	%r15,%r10
   8903 	adcq	%r9,%r11
   8904 	adcq	$0,%r12
   8905 
   8906 	leaq	32(%rdi),%rdi
   8907 
   8908 	vmovdqa	%ymm4,%ymm0
   8909 	vmovdqa	%ymm8,%ymm4
   8910 	vmovdqa	%ymm12,%ymm8
   8911 	vmovdqa	%ymm1,%ymm12
   8912 	vmovdqa	%ymm5,%ymm1
   8913 	vmovdqa	%ymm9,%ymm5
   8914 	vmovdqa	%ymm13,%ymm9
   8915 	vmovdqa	%ymm2,%ymm13
   8916 	vmovdqa	%ymm6,%ymm2
   8917 	jmp	seal_avx2_short_loop
   8918 seal_avx2_short_tail:
   8919 	cmpq	$16,%rbx
   8920 	jb	1f
   8921 	subq	$16,%rbx
   8922 	vpxor	(%rsi),%xmm0,%xmm3
   8923 	vmovdqu	%xmm3,(%rdi)
   8924 	leaq	16(%rsi),%rsi
   8925 	addq	0(%rdi),%r10
   8926 	adcq	8+0(%rdi),%r11
   8927 	adcq	$1,%r12
   8928 	movq	0+0(%rbp),%rax
   8929 	movq	%rax,%r15
   8930 	mulq	%r10
   8931 	movq	%rax,%r13
   8932 	movq	%rdx,%r14
   8933 	movq	0+0(%rbp),%rax
   8934 	mulq	%r11
   8935 	imulq	%r12,%r15
   8936 	addq	%rax,%r14
   8937 	adcq	%rdx,%r15
   8938 	movq	8+0(%rbp),%rax
   8939 	movq	%rax,%r9
   8940 	mulq	%r10
   8941 	addq	%rax,%r14
   8942 	adcq	$0,%rdx
   8943 	movq	%rdx,%r10
   8944 	movq	8+0(%rbp),%rax
   8945 	mulq	%r11
   8946 	addq	%rax,%r15
   8947 	adcq	$0,%rdx
   8948 	imulq	%r12,%r9
   8949 	addq	%r10,%r15
   8950 	adcq	%rdx,%r9
   8951 	movq	%r13,%r10
   8952 	movq	%r14,%r11
   8953 	movq	%r15,%r12
   8954 	andq	$3,%r12
   8955 	movq	%r15,%r13
   8956 	andq	$-4,%r13
   8957 	movq	%r9,%r14
   8958 	shrdq	$2,%r9,%r15
   8959 	shrq	$2,%r9
   8960 	addq	%r13,%r10
   8961 	adcq	%r14,%r11
   8962 	adcq	$0,%r12
   8963 	addq	%r15,%r10
   8964 	adcq	%r9,%r11
   8965 	adcq	$0,%r12
   8966 
   8967 	leaq	16(%rdi),%rdi
   8968 	vextracti128	$1,%ymm0,%xmm0
   8969 1:
   8970 	vzeroupper
   8971 	jmp	seal_sse_tail_16
   8972 
   8973 #endif
   8974