Home | History | Annotate | Download | only in cipher
      1 #if defined(__x86_64__)
      2 .text
      3 .extern	OPENSSL_ia32cap_P
      4 .hidden OPENSSL_ia32cap_P
      5 
      6 chacha20_poly1305_constants:
      7 
      8 .align	64
      9 .chacha20_consts:
     10 .byte	'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k'
     11 .byte	'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k'
     12 .rol8:
     13 .byte	3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14
     14 .byte	3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14
     15 .rol16:
     16 .byte	2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13
     17 .byte	2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13
     18 .avx2_init:
     19 .long	0,0,0,0
     20 .sse_inc:
     21 .long	1,0,0,0
     22 .avx2_inc:
     23 .long	2,0,0,0,2,0,0,0
     24 .clamp:
     25 .quad	0x0FFFFFFC0FFFFFFF, 0x0FFFFFFC0FFFFFFC
     26 .quad	0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF
     27 .align	16
     28 .and_masks:
     29 .byte	0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
     30 .byte	0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
     31 .byte	0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
     32 .byte	0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
     33 .byte	0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
     34 .byte	0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
     35 .byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
     36 .byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
     37 .byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00
     38 .byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00
     39 .byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00
     40 .byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00
     41 .byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00
     42 .byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00
     43 .byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00
     44 
     45 .type	poly_hash_ad_internal,@function
     46 .align	64
     47 poly_hash_ad_internal:
     48 .cfi_startproc
     49 	xorq	%r10,%r10
     50 	xorq	%r11,%r11
     51 	xorq	%r12,%r12
     52 	cmpq	$13,%r8
     53 	jne	hash_ad_loop
     54 poly_fast_tls_ad:
     55 
     56 	movq	(%rcx),%r10
     57 	movq	5(%rcx),%r11
     58 	shrq	$24,%r11
     59 	movq	$1,%r12
     60 	movq	0+0(%rbp),%rax
     61 	movq	%rax,%r15
     62 	mulq	%r10
     63 	movq	%rax,%r13
     64 	movq	%rdx,%r14
     65 	movq	0+0(%rbp),%rax
     66 	mulq	%r11
     67 	imulq	%r12,%r15
     68 	addq	%rax,%r14
     69 	adcq	%rdx,%r15
     70 	movq	8+0(%rbp),%rax
     71 	movq	%rax,%r9
     72 	mulq	%r10
     73 	addq	%rax,%r14
     74 	adcq	$0,%rdx
     75 	movq	%rdx,%r10
     76 	movq	8+0(%rbp),%rax
     77 	mulq	%r11
     78 	addq	%rax,%r15
     79 	adcq	$0,%rdx
     80 	imulq	%r12,%r9
     81 	addq	%r10,%r15
     82 	adcq	%rdx,%r9
     83 	movq	%r13,%r10
     84 	movq	%r14,%r11
     85 	movq	%r15,%r12
     86 	andq	$3,%r12
     87 	movq	%r15,%r13
     88 	andq	$-4,%r13
     89 	movq	%r9,%r14
     90 	shrdq	$2,%r9,%r15
     91 	shrq	$2,%r9
     92 	addq	%r13,%r10
     93 	adcq	%r14,%r11
     94 	adcq	$0,%r12
     95 	addq	%r15,%r10
     96 	adcq	%r9,%r11
     97 	adcq	$0,%r12
     98 
     99 	.byte	0xf3,0xc3
    100 hash_ad_loop:
    101 
    102 	cmpq	$16,%r8
    103 	jb	hash_ad_tail
    104 	addq	0(%rcx),%r10
    105 	adcq	8+0(%rcx),%r11
    106 	adcq	$1,%r12
    107 	movq	0+0(%rbp),%rax
    108 	movq	%rax,%r15
    109 	mulq	%r10
    110 	movq	%rax,%r13
    111 	movq	%rdx,%r14
    112 	movq	0+0(%rbp),%rax
    113 	mulq	%r11
    114 	imulq	%r12,%r15
    115 	addq	%rax,%r14
    116 	adcq	%rdx,%r15
    117 	movq	8+0(%rbp),%rax
    118 	movq	%rax,%r9
    119 	mulq	%r10
    120 	addq	%rax,%r14
    121 	adcq	$0,%rdx
    122 	movq	%rdx,%r10
    123 	movq	8+0(%rbp),%rax
    124 	mulq	%r11
    125 	addq	%rax,%r15
    126 	adcq	$0,%rdx
    127 	imulq	%r12,%r9
    128 	addq	%r10,%r15
    129 	adcq	%rdx,%r9
    130 	movq	%r13,%r10
    131 	movq	%r14,%r11
    132 	movq	%r15,%r12
    133 	andq	$3,%r12
    134 	movq	%r15,%r13
    135 	andq	$-4,%r13
    136 	movq	%r9,%r14
    137 	shrdq	$2,%r9,%r15
    138 	shrq	$2,%r9
    139 	addq	%r13,%r10
    140 	adcq	%r14,%r11
    141 	adcq	$0,%r12
    142 	addq	%r15,%r10
    143 	adcq	%r9,%r11
    144 	adcq	$0,%r12
    145 
    146 	leaq	16(%rcx),%rcx
    147 	subq	$16,%r8
    148 	jmp	hash_ad_loop
    149 hash_ad_tail:
    150 	cmpq	$0,%r8
    151 	je	1f
    152 
    153 	xorq	%r13,%r13
    154 	xorq	%r14,%r14
    155 	xorq	%r15,%r15
    156 	addq	%r8,%rcx
    157 hash_ad_tail_loop:
    158 	shldq	$8,%r13,%r14
    159 	shlq	$8,%r13
    160 	movzbq	-1(%rcx),%r15
    161 	xorq	%r15,%r13
    162 	decq	%rcx
    163 	decq	%r8
    164 	jne	hash_ad_tail_loop
    165 
    166 	addq	%r13,%r10
    167 	adcq	%r14,%r11
    168 	adcq	$1,%r12
    169 	movq	0+0(%rbp),%rax
    170 	movq	%rax,%r15
    171 	mulq	%r10
    172 	movq	%rax,%r13
    173 	movq	%rdx,%r14
    174 	movq	0+0(%rbp),%rax
    175 	mulq	%r11
    176 	imulq	%r12,%r15
    177 	addq	%rax,%r14
    178 	adcq	%rdx,%r15
    179 	movq	8+0(%rbp),%rax
    180 	movq	%rax,%r9
    181 	mulq	%r10
    182 	addq	%rax,%r14
    183 	adcq	$0,%rdx
    184 	movq	%rdx,%r10
    185 	movq	8+0(%rbp),%rax
    186 	mulq	%r11
    187 	addq	%rax,%r15
    188 	adcq	$0,%rdx
    189 	imulq	%r12,%r9
    190 	addq	%r10,%r15
    191 	adcq	%rdx,%r9
    192 	movq	%r13,%r10
    193 	movq	%r14,%r11
    194 	movq	%r15,%r12
    195 	andq	$3,%r12
    196 	movq	%r15,%r13
    197 	andq	$-4,%r13
    198 	movq	%r9,%r14
    199 	shrdq	$2,%r9,%r15
    200 	shrq	$2,%r9
    201 	addq	%r13,%r10
    202 	adcq	%r14,%r11
    203 	adcq	$0,%r12
    204 	addq	%r15,%r10
    205 	adcq	%r9,%r11
    206 	adcq	$0,%r12
    207 
    208 
    209 1:
    210 	.byte	0xf3,0xc3
    211 .cfi_endproc
    212 .size	poly_hash_ad_internal, .-poly_hash_ad_internal
    213 
    214 .globl	chacha20_poly1305_open
    215 .hidden chacha20_poly1305_open
    216 .type	chacha20_poly1305_open,@function
    217 .align	64
    218 chacha20_poly1305_open:
    219 .cfi_startproc
    220 	pushq	%rbp
    221 .cfi_adjust_cfa_offset	8
    222 	pushq	%rbx
    223 .cfi_adjust_cfa_offset	8
    224 	pushq	%r12
    225 .cfi_adjust_cfa_offset	8
    226 	pushq	%r13
    227 .cfi_adjust_cfa_offset	8
    228 	pushq	%r14
    229 .cfi_adjust_cfa_offset	8
    230 	pushq	%r15
    231 .cfi_adjust_cfa_offset	8
    232 
    233 
    234 	pushq	%r9
    235 .cfi_adjust_cfa_offset	8
    236 	subq	$288 + 32,%rsp
    237 .cfi_adjust_cfa_offset	288 + 32
    238 .cfi_offset	rbp, -16
    239 .cfi_offset	rbx, -24
    240 .cfi_offset	r12, -32
    241 .cfi_offset	r13, -40
    242 .cfi_offset	r14, -48
    243 .cfi_offset	r15, -56
    244 	leaq	32(%rsp),%rbp
    245 	andq	$-32,%rbp
    246 	movq	%rdx,8+32(%rbp)
    247 	movq	%r8,0+32(%rbp)
    248 	movq	%rdx,%rbx
    249 
    250 	movl	OPENSSL_ia32cap_P+8(%rip),%eax
    251 	andl	$288,%eax
    252 	xorl	$288,%eax
    253 	jz	chacha20_poly1305_open_avx2
    254 
    255 1:
    256 	cmpq	$128,%rbx
    257 	jbe	open_sse_128
    258 
    259 	movdqa	.chacha20_consts(%rip),%xmm0
    260 	movdqu	0(%r9),%xmm4
    261 	movdqu	16(%r9),%xmm8
    262 	movdqu	32(%r9),%xmm12
    263 	movdqa	%xmm12,%xmm7
    264 
    265 	movdqa	%xmm4,48(%rbp)
    266 	movdqa	%xmm8,64(%rbp)
    267 	movdqa	%xmm12,96(%rbp)
    268 	movq	$10,%r10
    269 1:
    270 	paddd	%xmm4,%xmm0
    271 	pxor	%xmm0,%xmm12
    272 	pshufb	.rol16(%rip),%xmm12
    273 	paddd	%xmm12,%xmm8
    274 	pxor	%xmm8,%xmm4
    275 	movdqa	%xmm4,%xmm3
    276 	pslld	$12,%xmm3
    277 	psrld	$20,%xmm4
    278 	pxor	%xmm3,%xmm4
    279 	paddd	%xmm4,%xmm0
    280 	pxor	%xmm0,%xmm12
    281 	pshufb	.rol8(%rip),%xmm12
    282 	paddd	%xmm12,%xmm8
    283 	pxor	%xmm8,%xmm4
    284 	movdqa	%xmm4,%xmm3
    285 	pslld	$7,%xmm3
    286 	psrld	$25,%xmm4
    287 	pxor	%xmm3,%xmm4
    288 .byte	102,15,58,15,228,4
    289 .byte	102,69,15,58,15,192,8
    290 .byte	102,69,15,58,15,228,12
    291 	paddd	%xmm4,%xmm0
    292 	pxor	%xmm0,%xmm12
    293 	pshufb	.rol16(%rip),%xmm12
    294 	paddd	%xmm12,%xmm8
    295 	pxor	%xmm8,%xmm4
    296 	movdqa	%xmm4,%xmm3
    297 	pslld	$12,%xmm3
    298 	psrld	$20,%xmm4
    299 	pxor	%xmm3,%xmm4
    300 	paddd	%xmm4,%xmm0
    301 	pxor	%xmm0,%xmm12
    302 	pshufb	.rol8(%rip),%xmm12
    303 	paddd	%xmm12,%xmm8
    304 	pxor	%xmm8,%xmm4
    305 	movdqa	%xmm4,%xmm3
    306 	pslld	$7,%xmm3
    307 	psrld	$25,%xmm4
    308 	pxor	%xmm3,%xmm4
    309 .byte	102,15,58,15,228,12
    310 .byte	102,69,15,58,15,192,8
    311 .byte	102,69,15,58,15,228,4
    312 
    313 	decq	%r10
    314 	jne	1b
    315 
    316 	paddd	.chacha20_consts(%rip),%xmm0
    317 	paddd	48(%rbp),%xmm4
    318 
    319 	pand	.clamp(%rip),%xmm0
    320 	movdqa	%xmm0,0(%rbp)
    321 	movdqa	%xmm4,16(%rbp)
    322 
    323 	movq	%r8,%r8
    324 	call	poly_hash_ad_internal
    325 open_sse_main_loop:
    326 	cmpq	$256,%rbx
    327 	jb	2f
    328 
    329 	movdqa	.chacha20_consts(%rip),%xmm0
    330 	movdqa	48(%rbp),%xmm4
    331 	movdqa	64(%rbp),%xmm8
    332 	movdqa	%xmm0,%xmm1
    333 	movdqa	%xmm4,%xmm5
    334 	movdqa	%xmm8,%xmm9
    335 	movdqa	%xmm0,%xmm2
    336 	movdqa	%xmm4,%xmm6
    337 	movdqa	%xmm8,%xmm10
    338 	movdqa	%xmm0,%xmm3
    339 	movdqa	%xmm4,%xmm7
    340 	movdqa	%xmm8,%xmm11
    341 	movdqa	96(%rbp),%xmm15
    342 	paddd	.sse_inc(%rip),%xmm15
    343 	movdqa	%xmm15,%xmm14
    344 	paddd	.sse_inc(%rip),%xmm14
    345 	movdqa	%xmm14,%xmm13
    346 	paddd	.sse_inc(%rip),%xmm13
    347 	movdqa	%xmm13,%xmm12
    348 	paddd	.sse_inc(%rip),%xmm12
    349 	movdqa	%xmm12,96(%rbp)
    350 	movdqa	%xmm13,112(%rbp)
    351 	movdqa	%xmm14,128(%rbp)
    352 	movdqa	%xmm15,144(%rbp)
    353 
    354 
    355 
    356 	movq	$4,%rcx
    357 	movq	%rsi,%r8
    358 1:
    359 	movdqa	%xmm8,80(%rbp)
    360 	movdqa	.rol16(%rip),%xmm8
    361 	paddd	%xmm7,%xmm3
    362 	paddd	%xmm6,%xmm2
    363 	paddd	%xmm5,%xmm1
    364 	paddd	%xmm4,%xmm0
    365 	pxor	%xmm3,%xmm15
    366 	pxor	%xmm2,%xmm14
    367 	pxor	%xmm1,%xmm13
    368 	pxor	%xmm0,%xmm12
    369 .byte	102,69,15,56,0,248
    370 .byte	102,69,15,56,0,240
    371 .byte	102,69,15,56,0,232
    372 .byte	102,69,15,56,0,224
    373 	movdqa	80(%rbp),%xmm8
    374 	paddd	%xmm15,%xmm11
    375 	paddd	%xmm14,%xmm10
    376 	paddd	%xmm13,%xmm9
    377 	paddd	%xmm12,%xmm8
    378 	pxor	%xmm11,%xmm7
    379 	addq	0(%r8),%r10
    380 	adcq	8+0(%r8),%r11
    381 	adcq	$1,%r12
    382 
    383 	leaq	16(%r8),%r8
    384 	pxor	%xmm10,%xmm6
    385 	pxor	%xmm9,%xmm5
    386 	pxor	%xmm8,%xmm4
    387 	movdqa	%xmm8,80(%rbp)
    388 	movdqa	%xmm7,%xmm8
    389 	psrld	$20,%xmm8
    390 	pslld	$32-20,%xmm7
    391 	pxor	%xmm8,%xmm7
    392 	movdqa	%xmm6,%xmm8
    393 	psrld	$20,%xmm8
    394 	pslld	$32-20,%xmm6
    395 	pxor	%xmm8,%xmm6
    396 	movdqa	%xmm5,%xmm8
    397 	psrld	$20,%xmm8
    398 	pslld	$32-20,%xmm5
    399 	pxor	%xmm8,%xmm5
    400 	movdqa	%xmm4,%xmm8
    401 	psrld	$20,%xmm8
    402 	pslld	$32-20,%xmm4
    403 	pxor	%xmm8,%xmm4
    404 	movq	0+0(%rbp),%rax
    405 	movq	%rax,%r15
    406 	mulq	%r10
    407 	movq	%rax,%r13
    408 	movq	%rdx,%r14
    409 	movq	0+0(%rbp),%rax
    410 	mulq	%r11
    411 	imulq	%r12,%r15
    412 	addq	%rax,%r14
    413 	adcq	%rdx,%r15
    414 	movdqa	.rol8(%rip),%xmm8
    415 	paddd	%xmm7,%xmm3
    416 	paddd	%xmm6,%xmm2
    417 	paddd	%xmm5,%xmm1
    418 	paddd	%xmm4,%xmm0
    419 	pxor	%xmm3,%xmm15
    420 	pxor	%xmm2,%xmm14
    421 	pxor	%xmm1,%xmm13
    422 	pxor	%xmm0,%xmm12
    423 .byte	102,69,15,56,0,248
    424 .byte	102,69,15,56,0,240
    425 .byte	102,69,15,56,0,232
    426 .byte	102,69,15,56,0,224
    427 	movdqa	80(%rbp),%xmm8
    428 	paddd	%xmm15,%xmm11
    429 	paddd	%xmm14,%xmm10
    430 	paddd	%xmm13,%xmm9
    431 	paddd	%xmm12,%xmm8
    432 	pxor	%xmm11,%xmm7
    433 	pxor	%xmm10,%xmm6
    434 	movq	8+0(%rbp),%rax
    435 	movq	%rax,%r9
    436 	mulq	%r10
    437 	addq	%rax,%r14
    438 	adcq	$0,%rdx
    439 	movq	%rdx,%r10
    440 	movq	8+0(%rbp),%rax
    441 	mulq	%r11
    442 	addq	%rax,%r15
    443 	adcq	$0,%rdx
    444 	pxor	%xmm9,%xmm5
    445 	pxor	%xmm8,%xmm4
    446 	movdqa	%xmm8,80(%rbp)
    447 	movdqa	%xmm7,%xmm8
    448 	psrld	$25,%xmm8
    449 	pslld	$32-25,%xmm7
    450 	pxor	%xmm8,%xmm7
    451 	movdqa	%xmm6,%xmm8
    452 	psrld	$25,%xmm8
    453 	pslld	$32-25,%xmm6
    454 	pxor	%xmm8,%xmm6
    455 	movdqa	%xmm5,%xmm8
    456 	psrld	$25,%xmm8
    457 	pslld	$32-25,%xmm5
    458 	pxor	%xmm8,%xmm5
    459 	movdqa	%xmm4,%xmm8
    460 	psrld	$25,%xmm8
    461 	pslld	$32-25,%xmm4
    462 	pxor	%xmm8,%xmm4
    463 	movdqa	80(%rbp),%xmm8
    464 	imulq	%r12,%r9
    465 	addq	%r10,%r15
    466 	adcq	%rdx,%r9
    467 .byte	102,15,58,15,255,4
    468 .byte	102,69,15,58,15,219,8
    469 .byte	102,69,15,58,15,255,12
    470 .byte	102,15,58,15,246,4
    471 .byte	102,69,15,58,15,210,8
    472 .byte	102,69,15,58,15,246,12
    473 .byte	102,15,58,15,237,4
    474 .byte	102,69,15,58,15,201,8
    475 .byte	102,69,15,58,15,237,12
    476 .byte	102,15,58,15,228,4
    477 .byte	102,69,15,58,15,192,8
    478 .byte	102,69,15,58,15,228,12
    479 	movdqa	%xmm8,80(%rbp)
    480 	movdqa	.rol16(%rip),%xmm8
    481 	paddd	%xmm7,%xmm3
    482 	paddd	%xmm6,%xmm2
    483 	paddd	%xmm5,%xmm1
    484 	paddd	%xmm4,%xmm0
    485 	pxor	%xmm3,%xmm15
    486 	pxor	%xmm2,%xmm14
    487 	movq	%r13,%r10
    488 	movq	%r14,%r11
    489 	movq	%r15,%r12
    490 	andq	$3,%r12
    491 	movq	%r15,%r13
    492 	andq	$-4,%r13
    493 	movq	%r9,%r14
    494 	shrdq	$2,%r9,%r15
    495 	shrq	$2,%r9
    496 	addq	%r13,%r10
    497 	adcq	%r14,%r11
    498 	adcq	$0,%r12
    499 	addq	%r15,%r10
    500 	adcq	%r9,%r11
    501 	adcq	$0,%r12
    502 	pxor	%xmm1,%xmm13
    503 	pxor	%xmm0,%xmm12
    504 .byte	102,69,15,56,0,248
    505 .byte	102,69,15,56,0,240
    506 .byte	102,69,15,56,0,232
    507 .byte	102,69,15,56,0,224
    508 	movdqa	80(%rbp),%xmm8
    509 	paddd	%xmm15,%xmm11
    510 	paddd	%xmm14,%xmm10
    511 	paddd	%xmm13,%xmm9
    512 	paddd	%xmm12,%xmm8
    513 	pxor	%xmm11,%xmm7
    514 	pxor	%xmm10,%xmm6
    515 	pxor	%xmm9,%xmm5
    516 	pxor	%xmm8,%xmm4
    517 	movdqa	%xmm8,80(%rbp)
    518 	movdqa	%xmm7,%xmm8
    519 	psrld	$20,%xmm8
    520 	pslld	$32-20,%xmm7
    521 	pxor	%xmm8,%xmm7
    522 	movdqa	%xmm6,%xmm8
    523 	psrld	$20,%xmm8
    524 	pslld	$32-20,%xmm6
    525 	pxor	%xmm8,%xmm6
    526 	movdqa	%xmm5,%xmm8
    527 	psrld	$20,%xmm8
    528 	pslld	$32-20,%xmm5
    529 	pxor	%xmm8,%xmm5
    530 	movdqa	%xmm4,%xmm8
    531 	psrld	$20,%xmm8
    532 	pslld	$32-20,%xmm4
    533 	pxor	%xmm8,%xmm4
    534 	movdqa	.rol8(%rip),%xmm8
    535 	paddd	%xmm7,%xmm3
    536 	paddd	%xmm6,%xmm2
    537 	paddd	%xmm5,%xmm1
    538 	paddd	%xmm4,%xmm0
    539 	pxor	%xmm3,%xmm15
    540 	pxor	%xmm2,%xmm14
    541 	pxor	%xmm1,%xmm13
    542 	pxor	%xmm0,%xmm12
    543 .byte	102,69,15,56,0,248
    544 .byte	102,69,15,56,0,240
    545 .byte	102,69,15,56,0,232
    546 .byte	102,69,15,56,0,224
    547 	movdqa	80(%rbp),%xmm8
    548 	paddd	%xmm15,%xmm11
    549 	paddd	%xmm14,%xmm10
    550 	paddd	%xmm13,%xmm9
    551 	paddd	%xmm12,%xmm8
    552 	pxor	%xmm11,%xmm7
    553 	pxor	%xmm10,%xmm6
    554 	pxor	%xmm9,%xmm5
    555 	pxor	%xmm8,%xmm4
    556 	movdqa	%xmm8,80(%rbp)
    557 	movdqa	%xmm7,%xmm8
    558 	psrld	$25,%xmm8
    559 	pslld	$32-25,%xmm7
    560 	pxor	%xmm8,%xmm7
    561 	movdqa	%xmm6,%xmm8
    562 	psrld	$25,%xmm8
    563 	pslld	$32-25,%xmm6
    564 	pxor	%xmm8,%xmm6
    565 	movdqa	%xmm5,%xmm8
    566 	psrld	$25,%xmm8
    567 	pslld	$32-25,%xmm5
    568 	pxor	%xmm8,%xmm5
    569 	movdqa	%xmm4,%xmm8
    570 	psrld	$25,%xmm8
    571 	pslld	$32-25,%xmm4
    572 	pxor	%xmm8,%xmm4
    573 	movdqa	80(%rbp),%xmm8
    574 .byte	102,15,58,15,255,12
    575 .byte	102,69,15,58,15,219,8
    576 .byte	102,69,15,58,15,255,4
    577 .byte	102,15,58,15,246,12
    578 .byte	102,69,15,58,15,210,8
    579 .byte	102,69,15,58,15,246,4
    580 .byte	102,15,58,15,237,12
    581 .byte	102,69,15,58,15,201,8
    582 .byte	102,69,15,58,15,237,4
    583 .byte	102,15,58,15,228,12
    584 .byte	102,69,15,58,15,192,8
    585 .byte	102,69,15,58,15,228,4
    586 
    587 	decq	%rcx
    588 	jge	1b
    589 	addq	0(%r8),%r10
    590 	adcq	8+0(%r8),%r11
    591 	adcq	$1,%r12
    592 	movq	0+0(%rbp),%rax
    593 	movq	%rax,%r15
    594 	mulq	%r10
    595 	movq	%rax,%r13
    596 	movq	%rdx,%r14
    597 	movq	0+0(%rbp),%rax
    598 	mulq	%r11
    599 	imulq	%r12,%r15
    600 	addq	%rax,%r14
    601 	adcq	%rdx,%r15
    602 	movq	8+0(%rbp),%rax
    603 	movq	%rax,%r9
    604 	mulq	%r10
    605 	addq	%rax,%r14
    606 	adcq	$0,%rdx
    607 	movq	%rdx,%r10
    608 	movq	8+0(%rbp),%rax
    609 	mulq	%r11
    610 	addq	%rax,%r15
    611 	adcq	$0,%rdx
    612 	imulq	%r12,%r9
    613 	addq	%r10,%r15
    614 	adcq	%rdx,%r9
    615 	movq	%r13,%r10
    616 	movq	%r14,%r11
    617 	movq	%r15,%r12
    618 	andq	$3,%r12
    619 	movq	%r15,%r13
    620 	andq	$-4,%r13
    621 	movq	%r9,%r14
    622 	shrdq	$2,%r9,%r15
    623 	shrq	$2,%r9
    624 	addq	%r13,%r10
    625 	adcq	%r14,%r11
    626 	adcq	$0,%r12
    627 	addq	%r15,%r10
    628 	adcq	%r9,%r11
    629 	adcq	$0,%r12
    630 
    631 	leaq	16(%r8),%r8
    632 	cmpq	$-6,%rcx
    633 	jg	1b
    634 	paddd	.chacha20_consts(%rip),%xmm3
    635 	paddd	48(%rbp),%xmm7
    636 	paddd	64(%rbp),%xmm11
    637 	paddd	144(%rbp),%xmm15
    638 	paddd	.chacha20_consts(%rip),%xmm2
    639 	paddd	48(%rbp),%xmm6
    640 	paddd	64(%rbp),%xmm10
    641 	paddd	128(%rbp),%xmm14
    642 	paddd	.chacha20_consts(%rip),%xmm1
    643 	paddd	48(%rbp),%xmm5
    644 	paddd	64(%rbp),%xmm9
    645 	paddd	112(%rbp),%xmm13
    646 	paddd	.chacha20_consts(%rip),%xmm0
    647 	paddd	48(%rbp),%xmm4
    648 	paddd	64(%rbp),%xmm8
    649 	paddd	96(%rbp),%xmm12
    650 	movdqa	%xmm12,80(%rbp)
    651 	movdqu	0 + 0(%rsi),%xmm12
    652 	pxor	%xmm3,%xmm12
    653 	movdqu	%xmm12,0 + 0(%rdi)
    654 	movdqu	16 + 0(%rsi),%xmm12
    655 	pxor	%xmm7,%xmm12
    656 	movdqu	%xmm12,16 + 0(%rdi)
    657 	movdqu	32 + 0(%rsi),%xmm12
    658 	pxor	%xmm11,%xmm12
    659 	movdqu	%xmm12,32 + 0(%rdi)
    660 	movdqu	48 + 0(%rsi),%xmm12
    661 	pxor	%xmm15,%xmm12
    662 	movdqu	%xmm12,48 + 0(%rdi)
    663 	movdqu	0 + 64(%rsi),%xmm3
    664 	movdqu	16 + 64(%rsi),%xmm7
    665 	movdqu	32 + 64(%rsi),%xmm11
    666 	movdqu	48 + 64(%rsi),%xmm15
    667 	pxor	%xmm3,%xmm2
    668 	pxor	%xmm7,%xmm6
    669 	pxor	%xmm11,%xmm10
    670 	pxor	%xmm14,%xmm15
    671 	movdqu	%xmm2,0 + 64(%rdi)
    672 	movdqu	%xmm6,16 + 64(%rdi)
    673 	movdqu	%xmm10,32 + 64(%rdi)
    674 	movdqu	%xmm15,48 + 64(%rdi)
    675 	movdqu	0 + 128(%rsi),%xmm3
    676 	movdqu	16 + 128(%rsi),%xmm7
    677 	movdqu	32 + 128(%rsi),%xmm11
    678 	movdqu	48 + 128(%rsi),%xmm15
    679 	pxor	%xmm3,%xmm1
    680 	pxor	%xmm7,%xmm5
    681 	pxor	%xmm11,%xmm9
    682 	pxor	%xmm13,%xmm15
    683 	movdqu	%xmm1,0 + 128(%rdi)
    684 	movdqu	%xmm5,16 + 128(%rdi)
    685 	movdqu	%xmm9,32 + 128(%rdi)
    686 	movdqu	%xmm15,48 + 128(%rdi)
    687 	movdqu	0 + 192(%rsi),%xmm3
    688 	movdqu	16 + 192(%rsi),%xmm7
    689 	movdqu	32 + 192(%rsi),%xmm11
    690 	movdqu	48 + 192(%rsi),%xmm15
    691 	pxor	%xmm3,%xmm0
    692 	pxor	%xmm7,%xmm4
    693 	pxor	%xmm11,%xmm8
    694 	pxor	80(%rbp),%xmm15
    695 	movdqu	%xmm0,0 + 192(%rdi)
    696 	movdqu	%xmm4,16 + 192(%rdi)
    697 	movdqu	%xmm8,32 + 192(%rdi)
    698 	movdqu	%xmm15,48 + 192(%rdi)
    699 
    700 	leaq	256(%rsi),%rsi
    701 	leaq	256(%rdi),%rdi
    702 	subq	$256,%rbx
    703 	jmp	open_sse_main_loop
    704 2:
    705 
    706 	testq	%rbx,%rbx
    707 	jz	open_sse_finalize
    708 	cmpq	$64,%rbx
    709 	ja	3f
    710 	movdqa	.chacha20_consts(%rip),%xmm0
    711 	movdqa	48(%rbp),%xmm4
    712 	movdqa	64(%rbp),%xmm8
    713 	movdqa	96(%rbp),%xmm12
    714 	paddd	.sse_inc(%rip),%xmm12
    715 	movdqa	%xmm12,96(%rbp)
    716 
    717 	xorq	%r8,%r8
    718 	movq	%rbx,%rcx
    719 	cmpq	$16,%rcx
    720 	jb	2f
    721 1:
    722 	addq	0(%rsi,%r8), %r10
    723 	adcq	8+0(%rsi,%r8), %r11
    724 	adcq	$1,%r12
    725 	movq	0+0(%rbp),%rax
    726 	movq	%rax,%r15
    727 	mulq	%r10
    728 	movq	%rax,%r13
    729 	movq	%rdx,%r14
    730 	movq	0+0(%rbp),%rax
    731 	mulq	%r11
    732 	imulq	%r12,%r15
    733 	addq	%rax,%r14
    734 	adcq	%rdx,%r15
    735 	movq	8+0(%rbp),%rax
    736 	movq	%rax,%r9
    737 	mulq	%r10
    738 	addq	%rax,%r14
    739 	adcq	$0,%rdx
    740 	movq	%rdx,%r10
    741 	movq	8+0(%rbp),%rax
    742 	mulq	%r11
    743 	addq	%rax,%r15
    744 	adcq	$0,%rdx
    745 	imulq	%r12,%r9
    746 	addq	%r10,%r15
    747 	adcq	%rdx,%r9
    748 	movq	%r13,%r10
    749 	movq	%r14,%r11
    750 	movq	%r15,%r12
    751 	andq	$3,%r12
    752 	movq	%r15,%r13
    753 	andq	$-4,%r13
    754 	movq	%r9,%r14
    755 	shrdq	$2,%r9,%r15
    756 	shrq	$2,%r9
    757 	addq	%r13,%r10
    758 	adcq	%r14,%r11
    759 	adcq	$0,%r12
    760 	addq	%r15,%r10
    761 	adcq	%r9,%r11
    762 	adcq	$0,%r12
    763 
    764 	subq	$16,%rcx
    765 2:
    766 	addq	$16,%r8
    767 	paddd	%xmm4,%xmm0
    768 	pxor	%xmm0,%xmm12
    769 	pshufb	.rol16(%rip),%xmm12
    770 	paddd	%xmm12,%xmm8
    771 	pxor	%xmm8,%xmm4
    772 	movdqa	%xmm4,%xmm3
    773 	pslld	$12,%xmm3
    774 	psrld	$20,%xmm4
    775 	pxor	%xmm3,%xmm4
    776 	paddd	%xmm4,%xmm0
    777 	pxor	%xmm0,%xmm12
    778 	pshufb	.rol8(%rip),%xmm12
    779 	paddd	%xmm12,%xmm8
    780 	pxor	%xmm8,%xmm4
    781 	movdqa	%xmm4,%xmm3
    782 	pslld	$7,%xmm3
    783 	psrld	$25,%xmm4
    784 	pxor	%xmm3,%xmm4
    785 .byte	102,15,58,15,228,4
    786 .byte	102,69,15,58,15,192,8
    787 .byte	102,69,15,58,15,228,12
    788 	paddd	%xmm4,%xmm0
    789 	pxor	%xmm0,%xmm12
    790 	pshufb	.rol16(%rip),%xmm12
    791 	paddd	%xmm12,%xmm8
    792 	pxor	%xmm8,%xmm4
    793 	movdqa	%xmm4,%xmm3
    794 	pslld	$12,%xmm3
    795 	psrld	$20,%xmm4
    796 	pxor	%xmm3,%xmm4
    797 	paddd	%xmm4,%xmm0
    798 	pxor	%xmm0,%xmm12
    799 	pshufb	.rol8(%rip),%xmm12
    800 	paddd	%xmm12,%xmm8
    801 	pxor	%xmm8,%xmm4
    802 	movdqa	%xmm4,%xmm3
    803 	pslld	$7,%xmm3
    804 	psrld	$25,%xmm4
    805 	pxor	%xmm3,%xmm4
    806 .byte	102,15,58,15,228,12
    807 .byte	102,69,15,58,15,192,8
    808 .byte	102,69,15,58,15,228,4
    809 
    810 	cmpq	$16,%rcx
    811 	jae	1b
    812 	cmpq	$160,%r8
    813 	jne	2b
    814 	paddd	.chacha20_consts(%rip),%xmm0
    815 	paddd	48(%rbp),%xmm4
    816 	paddd	64(%rbp),%xmm8
    817 	paddd	96(%rbp),%xmm12
    818 
    819 	jmp	open_sse_tail_64_dec_loop
    820 3:
    821 	cmpq	$128,%rbx
    822 	ja	3f
    823 	movdqa	.chacha20_consts(%rip),%xmm0
    824 	movdqa	48(%rbp),%xmm4
    825 	movdqa	64(%rbp),%xmm8
    826 	movdqa	%xmm0,%xmm1
    827 	movdqa	%xmm4,%xmm5
    828 	movdqa	%xmm8,%xmm9
    829 	movdqa	96(%rbp),%xmm13
    830 	paddd	.sse_inc(%rip),%xmm13
    831 	movdqa	%xmm13,%xmm12
    832 	paddd	.sse_inc(%rip),%xmm12
    833 	movdqa	%xmm12,96(%rbp)
    834 	movdqa	%xmm13,112(%rbp)
    835 
    836 	movq	%rbx,%rcx
    837 	andq	$-16,%rcx
    838 	xorq	%r8,%r8
    839 1:
    840 	addq	0(%rsi,%r8), %r10
    841 	adcq	8+0(%rsi,%r8), %r11
    842 	adcq	$1,%r12
    843 	movq	0+0(%rbp),%rax
    844 	movq	%rax,%r15
    845 	mulq	%r10
    846 	movq	%rax,%r13
    847 	movq	%rdx,%r14
    848 	movq	0+0(%rbp),%rax
    849 	mulq	%r11
    850 	imulq	%r12,%r15
    851 	addq	%rax,%r14
    852 	adcq	%rdx,%r15
    853 	movq	8+0(%rbp),%rax
    854 	movq	%rax,%r9
    855 	mulq	%r10
    856 	addq	%rax,%r14
    857 	adcq	$0,%rdx
    858 	movq	%rdx,%r10
    859 	movq	8+0(%rbp),%rax
    860 	mulq	%r11
    861 	addq	%rax,%r15
    862 	adcq	$0,%rdx
    863 	imulq	%r12,%r9
    864 	addq	%r10,%r15
    865 	adcq	%rdx,%r9
    866 	movq	%r13,%r10
    867 	movq	%r14,%r11
    868 	movq	%r15,%r12
    869 	andq	$3,%r12
    870 	movq	%r15,%r13
    871 	andq	$-4,%r13
    872 	movq	%r9,%r14
    873 	shrdq	$2,%r9,%r15
    874 	shrq	$2,%r9
    875 	addq	%r13,%r10
    876 	adcq	%r14,%r11
    877 	adcq	$0,%r12
    878 	addq	%r15,%r10
    879 	adcq	%r9,%r11
    880 	adcq	$0,%r12
    881 
    882 2:
    883 	addq	$16,%r8
    884 	paddd	%xmm4,%xmm0
    885 	pxor	%xmm0,%xmm12
    886 	pshufb	.rol16(%rip),%xmm12
    887 	paddd	%xmm12,%xmm8
    888 	pxor	%xmm8,%xmm4
    889 	movdqa	%xmm4,%xmm3
    890 	pslld	$12,%xmm3
    891 	psrld	$20,%xmm4
    892 	pxor	%xmm3,%xmm4
    893 	paddd	%xmm4,%xmm0
    894 	pxor	%xmm0,%xmm12
    895 	pshufb	.rol8(%rip),%xmm12
    896 	paddd	%xmm12,%xmm8
    897 	pxor	%xmm8,%xmm4
    898 	movdqa	%xmm4,%xmm3
    899 	pslld	$7,%xmm3
    900 	psrld	$25,%xmm4
    901 	pxor	%xmm3,%xmm4
    902 .byte	102,15,58,15,228,4
    903 .byte	102,69,15,58,15,192,8
    904 .byte	102,69,15,58,15,228,12
    905 	paddd	%xmm5,%xmm1
    906 	pxor	%xmm1,%xmm13
    907 	pshufb	.rol16(%rip),%xmm13
    908 	paddd	%xmm13,%xmm9
    909 	pxor	%xmm9,%xmm5
    910 	movdqa	%xmm5,%xmm3
    911 	pslld	$12,%xmm3
    912 	psrld	$20,%xmm5
    913 	pxor	%xmm3,%xmm5
    914 	paddd	%xmm5,%xmm1
    915 	pxor	%xmm1,%xmm13
    916 	pshufb	.rol8(%rip),%xmm13
    917 	paddd	%xmm13,%xmm9
    918 	pxor	%xmm9,%xmm5
    919 	movdqa	%xmm5,%xmm3
    920 	pslld	$7,%xmm3
    921 	psrld	$25,%xmm5
    922 	pxor	%xmm3,%xmm5
    923 .byte	102,15,58,15,237,4
    924 .byte	102,69,15,58,15,201,8
    925 .byte	102,69,15,58,15,237,12
    926 	paddd	%xmm4,%xmm0
    927 	pxor	%xmm0,%xmm12
    928 	pshufb	.rol16(%rip),%xmm12
    929 	paddd	%xmm12,%xmm8
    930 	pxor	%xmm8,%xmm4
    931 	movdqa	%xmm4,%xmm3
    932 	pslld	$12,%xmm3
    933 	psrld	$20,%xmm4
    934 	pxor	%xmm3,%xmm4
    935 	paddd	%xmm4,%xmm0
    936 	pxor	%xmm0,%xmm12
    937 	pshufb	.rol8(%rip),%xmm12
    938 	paddd	%xmm12,%xmm8
    939 	pxor	%xmm8,%xmm4
    940 	movdqa	%xmm4,%xmm3
    941 	pslld	$7,%xmm3
    942 	psrld	$25,%xmm4
    943 	pxor	%xmm3,%xmm4
    944 .byte	102,15,58,15,228,12
    945 .byte	102,69,15,58,15,192,8
    946 .byte	102,69,15,58,15,228,4
    947 	paddd	%xmm5,%xmm1
    948 	pxor	%xmm1,%xmm13
    949 	pshufb	.rol16(%rip),%xmm13
    950 	paddd	%xmm13,%xmm9
    951 	pxor	%xmm9,%xmm5
    952 	movdqa	%xmm5,%xmm3
    953 	pslld	$12,%xmm3
    954 	psrld	$20,%xmm5
    955 	pxor	%xmm3,%xmm5
    956 	paddd	%xmm5,%xmm1
    957 	pxor	%xmm1,%xmm13
    958 	pshufb	.rol8(%rip),%xmm13
    959 	paddd	%xmm13,%xmm9
    960 	pxor	%xmm9,%xmm5
    961 	movdqa	%xmm5,%xmm3
    962 	pslld	$7,%xmm3
    963 	psrld	$25,%xmm5
    964 	pxor	%xmm3,%xmm5
    965 .byte	102,15,58,15,237,12
    966 .byte	102,69,15,58,15,201,8
    967 .byte	102,69,15,58,15,237,4
    968 
    969 	cmpq	%rcx,%r8
    970 	jb	1b
    971 	cmpq	$160,%r8
    972 	jne	2b
    973 	paddd	.chacha20_consts(%rip),%xmm1
    974 	paddd	48(%rbp),%xmm5
    975 	paddd	64(%rbp),%xmm9
    976 	paddd	112(%rbp),%xmm13
    977 	paddd	.chacha20_consts(%rip),%xmm0
    978 	paddd	48(%rbp),%xmm4
    979 	paddd	64(%rbp),%xmm8
    980 	paddd	96(%rbp),%xmm12
    981 	movdqu	0 + 0(%rsi),%xmm3
    982 	movdqu	16 + 0(%rsi),%xmm7
    983 	movdqu	32 + 0(%rsi),%xmm11
    984 	movdqu	48 + 0(%rsi),%xmm15
    985 	pxor	%xmm3,%xmm1
    986 	pxor	%xmm7,%xmm5
    987 	pxor	%xmm11,%xmm9
    988 	pxor	%xmm13,%xmm15
    989 	movdqu	%xmm1,0 + 0(%rdi)
    990 	movdqu	%xmm5,16 + 0(%rdi)
    991 	movdqu	%xmm9,32 + 0(%rdi)
    992 	movdqu	%xmm15,48 + 0(%rdi)
    993 
    994 	subq	$64,%rbx
    995 	leaq	64(%rsi),%rsi
    996 	leaq	64(%rdi),%rdi
    997 	jmp	open_sse_tail_64_dec_loop
    998 3:
    999 	cmpq	$192,%rbx
   1000 	ja	3f
   1001 	movdqa	.chacha20_consts(%rip),%xmm0
   1002 	movdqa	48(%rbp),%xmm4
   1003 	movdqa	64(%rbp),%xmm8
   1004 	movdqa	%xmm0,%xmm1
   1005 	movdqa	%xmm4,%xmm5
   1006 	movdqa	%xmm8,%xmm9
   1007 	movdqa	%xmm0,%xmm2
   1008 	movdqa	%xmm4,%xmm6
   1009 	movdqa	%xmm8,%xmm10
   1010 	movdqa	96(%rbp),%xmm14
   1011 	paddd	.sse_inc(%rip),%xmm14
   1012 	movdqa	%xmm14,%xmm13
   1013 	paddd	.sse_inc(%rip),%xmm13
   1014 	movdqa	%xmm13,%xmm12
   1015 	paddd	.sse_inc(%rip),%xmm12
   1016 	movdqa	%xmm12,96(%rbp)
   1017 	movdqa	%xmm13,112(%rbp)
   1018 	movdqa	%xmm14,128(%rbp)
   1019 
   1020 	movq	%rbx,%rcx
   1021 	movq	$160,%r8
   1022 	cmpq	$160,%rcx
   1023 	cmovgq	%r8,%rcx
   1024 	andq	$-16,%rcx
   1025 	xorq	%r8,%r8
   1026 1:
   1027 	addq	0(%rsi,%r8), %r10
   1028 	adcq	8+0(%rsi,%r8), %r11
   1029 	adcq	$1,%r12
   1030 	movq	0+0(%rbp),%rax
   1031 	movq	%rax,%r15
   1032 	mulq	%r10
   1033 	movq	%rax,%r13
   1034 	movq	%rdx,%r14
   1035 	movq	0+0(%rbp),%rax
   1036 	mulq	%r11
   1037 	imulq	%r12,%r15
   1038 	addq	%rax,%r14
   1039 	adcq	%rdx,%r15
   1040 	movq	8+0(%rbp),%rax
   1041 	movq	%rax,%r9
   1042 	mulq	%r10
   1043 	addq	%rax,%r14
   1044 	adcq	$0,%rdx
   1045 	movq	%rdx,%r10
   1046 	movq	8+0(%rbp),%rax
   1047 	mulq	%r11
   1048 	addq	%rax,%r15
   1049 	adcq	$0,%rdx
   1050 	imulq	%r12,%r9
   1051 	addq	%r10,%r15
   1052 	adcq	%rdx,%r9
   1053 	movq	%r13,%r10
   1054 	movq	%r14,%r11
   1055 	movq	%r15,%r12
   1056 	andq	$3,%r12
   1057 	movq	%r15,%r13
   1058 	andq	$-4,%r13
   1059 	movq	%r9,%r14
   1060 	shrdq	$2,%r9,%r15
   1061 	shrq	$2,%r9
   1062 	addq	%r13,%r10
   1063 	adcq	%r14,%r11
   1064 	adcq	$0,%r12
   1065 	addq	%r15,%r10
   1066 	adcq	%r9,%r11
   1067 	adcq	$0,%r12
   1068 
   1069 2:
   1070 	addq	$16,%r8
   1071 	paddd	%xmm4,%xmm0
   1072 	pxor	%xmm0,%xmm12
   1073 	pshufb	.rol16(%rip),%xmm12
   1074 	paddd	%xmm12,%xmm8
   1075 	pxor	%xmm8,%xmm4
   1076 	movdqa	%xmm4,%xmm3
   1077 	pslld	$12,%xmm3
   1078 	psrld	$20,%xmm4
   1079 	pxor	%xmm3,%xmm4
   1080 	paddd	%xmm4,%xmm0
   1081 	pxor	%xmm0,%xmm12
   1082 	pshufb	.rol8(%rip),%xmm12
   1083 	paddd	%xmm12,%xmm8
   1084 	pxor	%xmm8,%xmm4
   1085 	movdqa	%xmm4,%xmm3
   1086 	pslld	$7,%xmm3
   1087 	psrld	$25,%xmm4
   1088 	pxor	%xmm3,%xmm4
   1089 .byte	102,15,58,15,228,4
   1090 .byte	102,69,15,58,15,192,8
   1091 .byte	102,69,15,58,15,228,12
   1092 	paddd	%xmm5,%xmm1
   1093 	pxor	%xmm1,%xmm13
   1094 	pshufb	.rol16(%rip),%xmm13
   1095 	paddd	%xmm13,%xmm9
   1096 	pxor	%xmm9,%xmm5
   1097 	movdqa	%xmm5,%xmm3
   1098 	pslld	$12,%xmm3
   1099 	psrld	$20,%xmm5
   1100 	pxor	%xmm3,%xmm5
   1101 	paddd	%xmm5,%xmm1
   1102 	pxor	%xmm1,%xmm13
   1103 	pshufb	.rol8(%rip),%xmm13
   1104 	paddd	%xmm13,%xmm9
   1105 	pxor	%xmm9,%xmm5
   1106 	movdqa	%xmm5,%xmm3
   1107 	pslld	$7,%xmm3
   1108 	psrld	$25,%xmm5
   1109 	pxor	%xmm3,%xmm5
   1110 .byte	102,15,58,15,237,4
   1111 .byte	102,69,15,58,15,201,8
   1112 .byte	102,69,15,58,15,237,12
   1113 	paddd	%xmm6,%xmm2
   1114 	pxor	%xmm2,%xmm14
   1115 	pshufb	.rol16(%rip),%xmm14
   1116 	paddd	%xmm14,%xmm10
   1117 	pxor	%xmm10,%xmm6
   1118 	movdqa	%xmm6,%xmm3
   1119 	pslld	$12,%xmm3
   1120 	psrld	$20,%xmm6
   1121 	pxor	%xmm3,%xmm6
   1122 	paddd	%xmm6,%xmm2
   1123 	pxor	%xmm2,%xmm14
   1124 	pshufb	.rol8(%rip),%xmm14
   1125 	paddd	%xmm14,%xmm10
   1126 	pxor	%xmm10,%xmm6
   1127 	movdqa	%xmm6,%xmm3
   1128 	pslld	$7,%xmm3
   1129 	psrld	$25,%xmm6
   1130 	pxor	%xmm3,%xmm6
   1131 .byte	102,15,58,15,246,4
   1132 .byte	102,69,15,58,15,210,8
   1133 .byte	102,69,15,58,15,246,12
   1134 	paddd	%xmm4,%xmm0
   1135 	pxor	%xmm0,%xmm12
   1136 	pshufb	.rol16(%rip),%xmm12
   1137 	paddd	%xmm12,%xmm8
   1138 	pxor	%xmm8,%xmm4
   1139 	movdqa	%xmm4,%xmm3
   1140 	pslld	$12,%xmm3
   1141 	psrld	$20,%xmm4
   1142 	pxor	%xmm3,%xmm4
   1143 	paddd	%xmm4,%xmm0
   1144 	pxor	%xmm0,%xmm12
   1145 	pshufb	.rol8(%rip),%xmm12
   1146 	paddd	%xmm12,%xmm8
   1147 	pxor	%xmm8,%xmm4
   1148 	movdqa	%xmm4,%xmm3
   1149 	pslld	$7,%xmm3
   1150 	psrld	$25,%xmm4
   1151 	pxor	%xmm3,%xmm4
   1152 .byte	102,15,58,15,228,12
   1153 .byte	102,69,15,58,15,192,8
   1154 .byte	102,69,15,58,15,228,4
   1155 	paddd	%xmm5,%xmm1
   1156 	pxor	%xmm1,%xmm13
   1157 	pshufb	.rol16(%rip),%xmm13
   1158 	paddd	%xmm13,%xmm9
   1159 	pxor	%xmm9,%xmm5
   1160 	movdqa	%xmm5,%xmm3
   1161 	pslld	$12,%xmm3
   1162 	psrld	$20,%xmm5
   1163 	pxor	%xmm3,%xmm5
   1164 	paddd	%xmm5,%xmm1
   1165 	pxor	%xmm1,%xmm13
   1166 	pshufb	.rol8(%rip),%xmm13
   1167 	paddd	%xmm13,%xmm9
   1168 	pxor	%xmm9,%xmm5
   1169 	movdqa	%xmm5,%xmm3
   1170 	pslld	$7,%xmm3
   1171 	psrld	$25,%xmm5
   1172 	pxor	%xmm3,%xmm5
   1173 .byte	102,15,58,15,237,12
   1174 .byte	102,69,15,58,15,201,8
   1175 .byte	102,69,15,58,15,237,4
   1176 	paddd	%xmm6,%xmm2
   1177 	pxor	%xmm2,%xmm14
   1178 	pshufb	.rol16(%rip),%xmm14
   1179 	paddd	%xmm14,%xmm10
   1180 	pxor	%xmm10,%xmm6
   1181 	movdqa	%xmm6,%xmm3
   1182 	pslld	$12,%xmm3
   1183 	psrld	$20,%xmm6
   1184 	pxor	%xmm3,%xmm6
   1185 	paddd	%xmm6,%xmm2
   1186 	pxor	%xmm2,%xmm14
   1187 	pshufb	.rol8(%rip),%xmm14
   1188 	paddd	%xmm14,%xmm10
   1189 	pxor	%xmm10,%xmm6
   1190 	movdqa	%xmm6,%xmm3
   1191 	pslld	$7,%xmm3
   1192 	psrld	$25,%xmm6
   1193 	pxor	%xmm3,%xmm6
   1194 .byte	102,15,58,15,246,12
   1195 .byte	102,69,15,58,15,210,8
   1196 .byte	102,69,15,58,15,246,4
   1197 
   1198 	cmpq	%rcx,%r8
   1199 	jb	1b
   1200 	cmpq	$160,%r8
   1201 	jne	2b
   1202 	cmpq	$176,%rbx
   1203 	jb	1f
   1204 	addq	160(%rsi),%r10
   1205 	adcq	8+160(%rsi),%r11
   1206 	adcq	$1,%r12
   1207 	movq	0+0(%rbp),%rax
   1208 	movq	%rax,%r15
   1209 	mulq	%r10
   1210 	movq	%rax,%r13
   1211 	movq	%rdx,%r14
   1212 	movq	0+0(%rbp),%rax
   1213 	mulq	%r11
   1214 	imulq	%r12,%r15
   1215 	addq	%rax,%r14
   1216 	adcq	%rdx,%r15
   1217 	movq	8+0(%rbp),%rax
   1218 	movq	%rax,%r9
   1219 	mulq	%r10
   1220 	addq	%rax,%r14
   1221 	adcq	$0,%rdx
   1222 	movq	%rdx,%r10
   1223 	movq	8+0(%rbp),%rax
   1224 	mulq	%r11
   1225 	addq	%rax,%r15
   1226 	adcq	$0,%rdx
   1227 	imulq	%r12,%r9
   1228 	addq	%r10,%r15
   1229 	adcq	%rdx,%r9
   1230 	movq	%r13,%r10
   1231 	movq	%r14,%r11
   1232 	movq	%r15,%r12
   1233 	andq	$3,%r12
   1234 	movq	%r15,%r13
   1235 	andq	$-4,%r13
   1236 	movq	%r9,%r14
   1237 	shrdq	$2,%r9,%r15
   1238 	shrq	$2,%r9
   1239 	addq	%r13,%r10
   1240 	adcq	%r14,%r11
   1241 	adcq	$0,%r12
   1242 	addq	%r15,%r10
   1243 	adcq	%r9,%r11
   1244 	adcq	$0,%r12
   1245 
   1246 	cmpq	$192,%rbx
   1247 	jb	1f
   1248 	addq	176(%rsi),%r10
   1249 	adcq	8+176(%rsi),%r11
   1250 	adcq	$1,%r12
   1251 	movq	0+0(%rbp),%rax
   1252 	movq	%rax,%r15
   1253 	mulq	%r10
   1254 	movq	%rax,%r13
   1255 	movq	%rdx,%r14
   1256 	movq	0+0(%rbp),%rax
   1257 	mulq	%r11
   1258 	imulq	%r12,%r15
   1259 	addq	%rax,%r14
   1260 	adcq	%rdx,%r15
   1261 	movq	8+0(%rbp),%rax
   1262 	movq	%rax,%r9
   1263 	mulq	%r10
   1264 	addq	%rax,%r14
   1265 	adcq	$0,%rdx
   1266 	movq	%rdx,%r10
   1267 	movq	8+0(%rbp),%rax
   1268 	mulq	%r11
   1269 	addq	%rax,%r15
   1270 	adcq	$0,%rdx
   1271 	imulq	%r12,%r9
   1272 	addq	%r10,%r15
   1273 	adcq	%rdx,%r9
   1274 	movq	%r13,%r10
   1275 	movq	%r14,%r11
   1276 	movq	%r15,%r12
   1277 	andq	$3,%r12
   1278 	movq	%r15,%r13
   1279 	andq	$-4,%r13
   1280 	movq	%r9,%r14
   1281 	shrdq	$2,%r9,%r15
   1282 	shrq	$2,%r9
   1283 	addq	%r13,%r10
   1284 	adcq	%r14,%r11
   1285 	adcq	$0,%r12
   1286 	addq	%r15,%r10
   1287 	adcq	%r9,%r11
   1288 	adcq	$0,%r12
   1289 
   1290 1:
   1291 	paddd	.chacha20_consts(%rip),%xmm2
   1292 	paddd	48(%rbp),%xmm6
   1293 	paddd	64(%rbp),%xmm10
   1294 	paddd	128(%rbp),%xmm14
   1295 	paddd	.chacha20_consts(%rip),%xmm1
   1296 	paddd	48(%rbp),%xmm5
   1297 	paddd	64(%rbp),%xmm9
   1298 	paddd	112(%rbp),%xmm13
   1299 	paddd	.chacha20_consts(%rip),%xmm0
   1300 	paddd	48(%rbp),%xmm4
   1301 	paddd	64(%rbp),%xmm8
   1302 	paddd	96(%rbp),%xmm12
   1303 	movdqu	0 + 0(%rsi),%xmm3
   1304 	movdqu	16 + 0(%rsi),%xmm7
   1305 	movdqu	32 + 0(%rsi),%xmm11
   1306 	movdqu	48 + 0(%rsi),%xmm15
   1307 	pxor	%xmm3,%xmm2
   1308 	pxor	%xmm7,%xmm6
   1309 	pxor	%xmm11,%xmm10
   1310 	pxor	%xmm14,%xmm15
   1311 	movdqu	%xmm2,0 + 0(%rdi)
   1312 	movdqu	%xmm6,16 + 0(%rdi)
   1313 	movdqu	%xmm10,32 + 0(%rdi)
   1314 	movdqu	%xmm15,48 + 0(%rdi)
   1315 	movdqu	0 + 64(%rsi),%xmm3
   1316 	movdqu	16 + 64(%rsi),%xmm7
   1317 	movdqu	32 + 64(%rsi),%xmm11
   1318 	movdqu	48 + 64(%rsi),%xmm15
   1319 	pxor	%xmm3,%xmm1
   1320 	pxor	%xmm7,%xmm5
   1321 	pxor	%xmm11,%xmm9
   1322 	pxor	%xmm13,%xmm15
   1323 	movdqu	%xmm1,0 + 64(%rdi)
   1324 	movdqu	%xmm5,16 + 64(%rdi)
   1325 	movdqu	%xmm9,32 + 64(%rdi)
   1326 	movdqu	%xmm15,48 + 64(%rdi)
   1327 
   1328 	subq	$128,%rbx
   1329 	leaq	128(%rsi),%rsi
   1330 	leaq	128(%rdi),%rdi
   1331 	jmp	open_sse_tail_64_dec_loop
   1332 3:
   1333 
   1334 	movdqa	.chacha20_consts(%rip),%xmm0
   1335 	movdqa	48(%rbp),%xmm4
   1336 	movdqa	64(%rbp),%xmm8
   1337 	movdqa	%xmm0,%xmm1
   1338 	movdqa	%xmm4,%xmm5
   1339 	movdqa	%xmm8,%xmm9
   1340 	movdqa	%xmm0,%xmm2
   1341 	movdqa	%xmm4,%xmm6
   1342 	movdqa	%xmm8,%xmm10
   1343 	movdqa	%xmm0,%xmm3
   1344 	movdqa	%xmm4,%xmm7
   1345 	movdqa	%xmm8,%xmm11
   1346 	movdqa	96(%rbp),%xmm15
   1347 	paddd	.sse_inc(%rip),%xmm15
   1348 	movdqa	%xmm15,%xmm14
   1349 	paddd	.sse_inc(%rip),%xmm14
   1350 	movdqa	%xmm14,%xmm13
   1351 	paddd	.sse_inc(%rip),%xmm13
   1352 	movdqa	%xmm13,%xmm12
   1353 	paddd	.sse_inc(%rip),%xmm12
   1354 	movdqa	%xmm12,96(%rbp)
   1355 	movdqa	%xmm13,112(%rbp)
   1356 	movdqa	%xmm14,128(%rbp)
   1357 	movdqa	%xmm15,144(%rbp)
   1358 
   1359 	xorq	%r8,%r8
   1360 1:
   1361 	addq	0(%rsi,%r8), %r10
   1362 	adcq	8+0(%rsi,%r8), %r11
   1363 	adcq	$1,%r12
   1364 	movdqa	%xmm11,80(%rbp)
   1365 	paddd	%xmm4,%xmm0
   1366 	pxor	%xmm0,%xmm12
   1367 	pshufb	.rol16(%rip),%xmm12
   1368 	paddd	%xmm12,%xmm8
   1369 	pxor	%xmm8,%xmm4
   1370 	movdqa	%xmm4,%xmm11
   1371 	pslld	$12,%xmm11
   1372 	psrld	$20,%xmm4
   1373 	pxor	%xmm11,%xmm4
   1374 	paddd	%xmm4,%xmm0
   1375 	pxor	%xmm0,%xmm12
   1376 	pshufb	.rol8(%rip),%xmm12
   1377 	paddd	%xmm12,%xmm8
   1378 	pxor	%xmm8,%xmm4
   1379 	movdqa	%xmm4,%xmm11
   1380 	pslld	$7,%xmm11
   1381 	psrld	$25,%xmm4
   1382 	pxor	%xmm11,%xmm4
   1383 .byte	102,15,58,15,228,4
   1384 .byte	102,69,15,58,15,192,8
   1385 .byte	102,69,15,58,15,228,12
   1386 	paddd	%xmm5,%xmm1
   1387 	pxor	%xmm1,%xmm13
   1388 	pshufb	.rol16(%rip),%xmm13
   1389 	paddd	%xmm13,%xmm9
   1390 	pxor	%xmm9,%xmm5
   1391 	movdqa	%xmm5,%xmm11
   1392 	pslld	$12,%xmm11
   1393 	psrld	$20,%xmm5
   1394 	pxor	%xmm11,%xmm5
   1395 	paddd	%xmm5,%xmm1
   1396 	pxor	%xmm1,%xmm13
   1397 	pshufb	.rol8(%rip),%xmm13
   1398 	paddd	%xmm13,%xmm9
   1399 	pxor	%xmm9,%xmm5
   1400 	movdqa	%xmm5,%xmm11
   1401 	pslld	$7,%xmm11
   1402 	psrld	$25,%xmm5
   1403 	pxor	%xmm11,%xmm5
   1404 .byte	102,15,58,15,237,4
   1405 .byte	102,69,15,58,15,201,8
   1406 .byte	102,69,15,58,15,237,12
   1407 	paddd	%xmm6,%xmm2
   1408 	pxor	%xmm2,%xmm14
   1409 	pshufb	.rol16(%rip),%xmm14
   1410 	paddd	%xmm14,%xmm10
   1411 	pxor	%xmm10,%xmm6
   1412 	movdqa	%xmm6,%xmm11
   1413 	pslld	$12,%xmm11
   1414 	psrld	$20,%xmm6
   1415 	pxor	%xmm11,%xmm6
   1416 	paddd	%xmm6,%xmm2
   1417 	pxor	%xmm2,%xmm14
   1418 	pshufb	.rol8(%rip),%xmm14
   1419 	paddd	%xmm14,%xmm10
   1420 	pxor	%xmm10,%xmm6
   1421 	movdqa	%xmm6,%xmm11
   1422 	pslld	$7,%xmm11
   1423 	psrld	$25,%xmm6
   1424 	pxor	%xmm11,%xmm6
   1425 .byte	102,15,58,15,246,4
   1426 .byte	102,69,15,58,15,210,8
   1427 .byte	102,69,15,58,15,246,12
   1428 	movdqa	80(%rbp),%xmm11
   1429 	movq	0+0(%rbp),%rax
   1430 	movq	%rax,%r15
   1431 	mulq	%r10
   1432 	movq	%rax,%r13
   1433 	movq	%rdx,%r14
   1434 	movq	0+0(%rbp),%rax
   1435 	mulq	%r11
   1436 	imulq	%r12,%r15
   1437 	addq	%rax,%r14
   1438 	adcq	%rdx,%r15
   1439 	movdqa	%xmm9,80(%rbp)
   1440 	paddd	%xmm7,%xmm3
   1441 	pxor	%xmm3,%xmm15
   1442 	pshufb	.rol16(%rip),%xmm15
   1443 	paddd	%xmm15,%xmm11
   1444 	pxor	%xmm11,%xmm7
   1445 	movdqa	%xmm7,%xmm9
   1446 	pslld	$12,%xmm9
   1447 	psrld	$20,%xmm7
   1448 	pxor	%xmm9,%xmm7
   1449 	paddd	%xmm7,%xmm3
   1450 	pxor	%xmm3,%xmm15
   1451 	pshufb	.rol8(%rip),%xmm15
   1452 	paddd	%xmm15,%xmm11
   1453 	pxor	%xmm11,%xmm7
   1454 	movdqa	%xmm7,%xmm9
   1455 	pslld	$7,%xmm9
   1456 	psrld	$25,%xmm7
   1457 	pxor	%xmm9,%xmm7
   1458 .byte	102,15,58,15,255,4
   1459 .byte	102,69,15,58,15,219,8
   1460 .byte	102,69,15,58,15,255,12
   1461 	movdqa	80(%rbp),%xmm9
   1462 	movq	8+0(%rbp),%rax
   1463 	movq	%rax,%r9
   1464 	mulq	%r10
   1465 	addq	%rax,%r14
   1466 	adcq	$0,%rdx
   1467 	movq	%rdx,%r10
   1468 	movq	8+0(%rbp),%rax
   1469 	mulq	%r11
   1470 	addq	%rax,%r15
   1471 	adcq	$0,%rdx
   1472 	movdqa	%xmm11,80(%rbp)
   1473 	paddd	%xmm4,%xmm0
   1474 	pxor	%xmm0,%xmm12
   1475 	pshufb	.rol16(%rip),%xmm12
   1476 	paddd	%xmm12,%xmm8
   1477 	pxor	%xmm8,%xmm4
   1478 	movdqa	%xmm4,%xmm11
   1479 	pslld	$12,%xmm11
   1480 	psrld	$20,%xmm4
   1481 	pxor	%xmm11,%xmm4
   1482 	paddd	%xmm4,%xmm0
   1483 	pxor	%xmm0,%xmm12
   1484 	pshufb	.rol8(%rip),%xmm12
   1485 	paddd	%xmm12,%xmm8
   1486 	pxor	%xmm8,%xmm4
   1487 	movdqa	%xmm4,%xmm11
   1488 	pslld	$7,%xmm11
   1489 	psrld	$25,%xmm4
   1490 	pxor	%xmm11,%xmm4
   1491 .byte	102,15,58,15,228,12
   1492 .byte	102,69,15,58,15,192,8
   1493 .byte	102,69,15,58,15,228,4
   1494 	paddd	%xmm5,%xmm1
   1495 	pxor	%xmm1,%xmm13
   1496 	pshufb	.rol16(%rip),%xmm13
   1497 	paddd	%xmm13,%xmm9
   1498 	pxor	%xmm9,%xmm5
   1499 	movdqa	%xmm5,%xmm11
   1500 	pslld	$12,%xmm11
   1501 	psrld	$20,%xmm5
   1502 	pxor	%xmm11,%xmm5
   1503 	paddd	%xmm5,%xmm1
   1504 	pxor	%xmm1,%xmm13
   1505 	pshufb	.rol8(%rip),%xmm13
   1506 	paddd	%xmm13,%xmm9
   1507 	pxor	%xmm9,%xmm5
   1508 	movdqa	%xmm5,%xmm11
   1509 	pslld	$7,%xmm11
   1510 	psrld	$25,%xmm5
   1511 	pxor	%xmm11,%xmm5
   1512 .byte	102,15,58,15,237,12
   1513 .byte	102,69,15,58,15,201,8
   1514 .byte	102,69,15,58,15,237,4
   1515 	imulq	%r12,%r9
   1516 	addq	%r10,%r15
   1517 	adcq	%rdx,%r9
   1518 	paddd	%xmm6,%xmm2
   1519 	pxor	%xmm2,%xmm14
   1520 	pshufb	.rol16(%rip),%xmm14
   1521 	paddd	%xmm14,%xmm10
   1522 	pxor	%xmm10,%xmm6
   1523 	movdqa	%xmm6,%xmm11
   1524 	pslld	$12,%xmm11
   1525 	psrld	$20,%xmm6
   1526 	pxor	%xmm11,%xmm6
   1527 	paddd	%xmm6,%xmm2
   1528 	pxor	%xmm2,%xmm14
   1529 	pshufb	.rol8(%rip),%xmm14
   1530 	paddd	%xmm14,%xmm10
   1531 	pxor	%xmm10,%xmm6
   1532 	movdqa	%xmm6,%xmm11
   1533 	pslld	$7,%xmm11
   1534 	psrld	$25,%xmm6
   1535 	pxor	%xmm11,%xmm6
   1536 .byte	102,15,58,15,246,12
   1537 .byte	102,69,15,58,15,210,8
   1538 .byte	102,69,15,58,15,246,4
   1539 	movdqa	80(%rbp),%xmm11
   1540 	movq	%r13,%r10
   1541 	movq	%r14,%r11
   1542 	movq	%r15,%r12
   1543 	andq	$3,%r12
   1544 	movq	%r15,%r13
   1545 	andq	$-4,%r13
   1546 	movq	%r9,%r14
   1547 	shrdq	$2,%r9,%r15
   1548 	shrq	$2,%r9
   1549 	addq	%r13,%r10
   1550 	adcq	%r14,%r11
   1551 	adcq	$0,%r12
   1552 	addq	%r15,%r10
   1553 	adcq	%r9,%r11
   1554 	adcq	$0,%r12
   1555 	movdqa	%xmm9,80(%rbp)
   1556 	paddd	%xmm7,%xmm3
   1557 	pxor	%xmm3,%xmm15
   1558 	pshufb	.rol16(%rip),%xmm15
   1559 	paddd	%xmm15,%xmm11
   1560 	pxor	%xmm11,%xmm7
   1561 	movdqa	%xmm7,%xmm9
   1562 	pslld	$12,%xmm9
   1563 	psrld	$20,%xmm7
   1564 	pxor	%xmm9,%xmm7
   1565 	paddd	%xmm7,%xmm3
   1566 	pxor	%xmm3,%xmm15
   1567 	pshufb	.rol8(%rip),%xmm15
   1568 	paddd	%xmm15,%xmm11
   1569 	pxor	%xmm11,%xmm7
   1570 	movdqa	%xmm7,%xmm9
   1571 	pslld	$7,%xmm9
   1572 	psrld	$25,%xmm7
   1573 	pxor	%xmm9,%xmm7
   1574 .byte	102,15,58,15,255,12
   1575 .byte	102,69,15,58,15,219,8
   1576 .byte	102,69,15,58,15,255,4
   1577 	movdqa	80(%rbp),%xmm9
   1578 
   1579 	addq	$16,%r8
   1580 	cmpq	$160,%r8
   1581 	jb	1b
   1582 	movq	%rbx,%rcx
   1583 	andq	$-16,%rcx
   1584 1:
   1585 	addq	0(%rsi,%r8), %r10
   1586 	adcq	8+0(%rsi,%r8), %r11
   1587 	adcq	$1,%r12
   1588 	movq	0+0(%rbp),%rax
   1589 	movq	%rax,%r15
   1590 	mulq	%r10
   1591 	movq	%rax,%r13
   1592 	movq	%rdx,%r14
   1593 	movq	0+0(%rbp),%rax
   1594 	mulq	%r11
   1595 	imulq	%r12,%r15
   1596 	addq	%rax,%r14
   1597 	adcq	%rdx,%r15
   1598 	movq	8+0(%rbp),%rax
   1599 	movq	%rax,%r9
   1600 	mulq	%r10
   1601 	addq	%rax,%r14
   1602 	adcq	$0,%rdx
   1603 	movq	%rdx,%r10
   1604 	movq	8+0(%rbp),%rax
   1605 	mulq	%r11
   1606 	addq	%rax,%r15
   1607 	adcq	$0,%rdx
   1608 	imulq	%r12,%r9
   1609 	addq	%r10,%r15
   1610 	adcq	%rdx,%r9
   1611 	movq	%r13,%r10
   1612 	movq	%r14,%r11
   1613 	movq	%r15,%r12
   1614 	andq	$3,%r12
   1615 	movq	%r15,%r13
   1616 	andq	$-4,%r13
   1617 	movq	%r9,%r14
   1618 	shrdq	$2,%r9,%r15
   1619 	shrq	$2,%r9
   1620 	addq	%r13,%r10
   1621 	adcq	%r14,%r11
   1622 	adcq	$0,%r12
   1623 	addq	%r15,%r10
   1624 	adcq	%r9,%r11
   1625 	adcq	$0,%r12
   1626 
   1627 	addq	$16,%r8
   1628 	cmpq	%rcx,%r8
   1629 	jb	1b
   1630 	paddd	.chacha20_consts(%rip),%xmm3
   1631 	paddd	48(%rbp),%xmm7
   1632 	paddd	64(%rbp),%xmm11
   1633 	paddd	144(%rbp),%xmm15
   1634 	paddd	.chacha20_consts(%rip),%xmm2
   1635 	paddd	48(%rbp),%xmm6
   1636 	paddd	64(%rbp),%xmm10
   1637 	paddd	128(%rbp),%xmm14
   1638 	paddd	.chacha20_consts(%rip),%xmm1
   1639 	paddd	48(%rbp),%xmm5
   1640 	paddd	64(%rbp),%xmm9
   1641 	paddd	112(%rbp),%xmm13
   1642 	paddd	.chacha20_consts(%rip),%xmm0
   1643 	paddd	48(%rbp),%xmm4
   1644 	paddd	64(%rbp),%xmm8
   1645 	paddd	96(%rbp),%xmm12
   1646 	movdqa	%xmm12,80(%rbp)
   1647 	movdqu	0 + 0(%rsi),%xmm12
   1648 	pxor	%xmm3,%xmm12
   1649 	movdqu	%xmm12,0 + 0(%rdi)
   1650 	movdqu	16 + 0(%rsi),%xmm12
   1651 	pxor	%xmm7,%xmm12
   1652 	movdqu	%xmm12,16 + 0(%rdi)
   1653 	movdqu	32 + 0(%rsi),%xmm12
   1654 	pxor	%xmm11,%xmm12
   1655 	movdqu	%xmm12,32 + 0(%rdi)
   1656 	movdqu	48 + 0(%rsi),%xmm12
   1657 	pxor	%xmm15,%xmm12
   1658 	movdqu	%xmm12,48 + 0(%rdi)
   1659 	movdqu	0 + 64(%rsi),%xmm3
   1660 	movdqu	16 + 64(%rsi),%xmm7
   1661 	movdqu	32 + 64(%rsi),%xmm11
   1662 	movdqu	48 + 64(%rsi),%xmm15
   1663 	pxor	%xmm3,%xmm2
   1664 	pxor	%xmm7,%xmm6
   1665 	pxor	%xmm11,%xmm10
   1666 	pxor	%xmm14,%xmm15
   1667 	movdqu	%xmm2,0 + 64(%rdi)
   1668 	movdqu	%xmm6,16 + 64(%rdi)
   1669 	movdqu	%xmm10,32 + 64(%rdi)
   1670 	movdqu	%xmm15,48 + 64(%rdi)
   1671 	movdqu	0 + 128(%rsi),%xmm3
   1672 	movdqu	16 + 128(%rsi),%xmm7
   1673 	movdqu	32 + 128(%rsi),%xmm11
   1674 	movdqu	48 + 128(%rsi),%xmm15
   1675 	pxor	%xmm3,%xmm1
   1676 	pxor	%xmm7,%xmm5
   1677 	pxor	%xmm11,%xmm9
   1678 	pxor	%xmm13,%xmm15
   1679 	movdqu	%xmm1,0 + 128(%rdi)
   1680 	movdqu	%xmm5,16 + 128(%rdi)
   1681 	movdqu	%xmm9,32 + 128(%rdi)
   1682 	movdqu	%xmm15,48 + 128(%rdi)
   1683 
   1684 	movdqa	80(%rbp),%xmm12
   1685 	subq	$192,%rbx
   1686 	leaq	192(%rsi),%rsi
   1687 	leaq	192(%rdi),%rdi
   1688 
   1689 
   1690 open_sse_tail_64_dec_loop:
   1691 	cmpq	$16,%rbx
   1692 	jb	1f
   1693 	subq	$16,%rbx
   1694 	movdqu	(%rsi),%xmm3
   1695 	pxor	%xmm3,%xmm0
   1696 	movdqu	%xmm0,(%rdi)
   1697 	leaq	16(%rsi),%rsi
   1698 	leaq	16(%rdi),%rdi
   1699 	movdqa	%xmm4,%xmm0
   1700 	movdqa	%xmm8,%xmm4
   1701 	movdqa	%xmm12,%xmm8
   1702 	jmp	open_sse_tail_64_dec_loop
   1703 1:
   1704 	movdqa	%xmm0,%xmm1
   1705 
   1706 
   1707 open_sse_tail_16:
   1708 	testq	%rbx,%rbx
   1709 	jz	open_sse_finalize
   1710 
   1711 
   1712 
   1713 	pxor	%xmm3,%xmm3
   1714 	leaq	-1(%rsi,%rbx), %rsi
   1715 	movq	%rbx,%r8
   1716 2:
   1717 	pslldq	$1,%xmm3
   1718 	pinsrb	$0,(%rsi),%xmm3
   1719 	subq	$1,%rsi
   1720 	subq	$1,%r8
   1721 	jnz	2b
   1722 
   1723 3:
   1724 .byte	102,73,15,126,221
   1725 	pextrq	$1,%xmm3,%r14
   1726 
   1727 	pxor	%xmm1,%xmm3
   1728 
   1729 
   1730 2:
   1731 	pextrb	$0,%xmm3,(%rdi)
   1732 	psrldq	$1,%xmm3
   1733 	addq	$1,%rdi
   1734 	subq	$1,%rbx
   1735 	jne	2b
   1736 
   1737 	addq	%r13,%r10
   1738 	adcq	%r14,%r11
   1739 	adcq	$1,%r12
   1740 	movq	0+0(%rbp),%rax
   1741 	movq	%rax,%r15
   1742 	mulq	%r10
   1743 	movq	%rax,%r13
   1744 	movq	%rdx,%r14
   1745 	movq	0+0(%rbp),%rax
   1746 	mulq	%r11
   1747 	imulq	%r12,%r15
   1748 	addq	%rax,%r14
   1749 	adcq	%rdx,%r15
   1750 	movq	8+0(%rbp),%rax
   1751 	movq	%rax,%r9
   1752 	mulq	%r10
   1753 	addq	%rax,%r14
   1754 	adcq	$0,%rdx
   1755 	movq	%rdx,%r10
   1756 	movq	8+0(%rbp),%rax
   1757 	mulq	%r11
   1758 	addq	%rax,%r15
   1759 	adcq	$0,%rdx
   1760 	imulq	%r12,%r9
   1761 	addq	%r10,%r15
   1762 	adcq	%rdx,%r9
   1763 	movq	%r13,%r10
   1764 	movq	%r14,%r11
   1765 	movq	%r15,%r12
   1766 	andq	$3,%r12
   1767 	movq	%r15,%r13
   1768 	andq	$-4,%r13
   1769 	movq	%r9,%r14
   1770 	shrdq	$2,%r9,%r15
   1771 	shrq	$2,%r9
   1772 	addq	%r13,%r10
   1773 	adcq	%r14,%r11
   1774 	adcq	$0,%r12
   1775 	addq	%r15,%r10
   1776 	adcq	%r9,%r11
   1777 	adcq	$0,%r12
   1778 
   1779 
   1780 open_sse_finalize:
   1781 	addq	32(%rbp),%r10
   1782 	adcq	8+32(%rbp),%r11
   1783 	adcq	$1,%r12
   1784 	movq	0+0(%rbp),%rax
   1785 	movq	%rax,%r15
   1786 	mulq	%r10
   1787 	movq	%rax,%r13
   1788 	movq	%rdx,%r14
   1789 	movq	0+0(%rbp),%rax
   1790 	mulq	%r11
   1791 	imulq	%r12,%r15
   1792 	addq	%rax,%r14
   1793 	adcq	%rdx,%r15
   1794 	movq	8+0(%rbp),%rax
   1795 	movq	%rax,%r9
   1796 	mulq	%r10
   1797 	addq	%rax,%r14
   1798 	adcq	$0,%rdx
   1799 	movq	%rdx,%r10
   1800 	movq	8+0(%rbp),%rax
   1801 	mulq	%r11
   1802 	addq	%rax,%r15
   1803 	adcq	$0,%rdx
   1804 	imulq	%r12,%r9
   1805 	addq	%r10,%r15
   1806 	adcq	%rdx,%r9
   1807 	movq	%r13,%r10
   1808 	movq	%r14,%r11
   1809 	movq	%r15,%r12
   1810 	andq	$3,%r12
   1811 	movq	%r15,%r13
   1812 	andq	$-4,%r13
   1813 	movq	%r9,%r14
   1814 	shrdq	$2,%r9,%r15
   1815 	shrq	$2,%r9
   1816 	addq	%r13,%r10
   1817 	adcq	%r14,%r11
   1818 	adcq	$0,%r12
   1819 	addq	%r15,%r10
   1820 	adcq	%r9,%r11
   1821 	adcq	$0,%r12
   1822 
   1823 
   1824 	movq	%r10,%r13
   1825 	movq	%r11,%r14
   1826 	movq	%r12,%r15
   1827 	subq	$-5,%r10
   1828 	sbbq	$-1,%r11
   1829 	sbbq	$3,%r12
   1830 	cmovcq	%r13,%r10
   1831 	cmovcq	%r14,%r11
   1832 	cmovcq	%r15,%r12
   1833 
   1834 	addq	0+16(%rbp),%r10
   1835 	adcq	8+16(%rbp),%r11
   1836 
   1837 	addq	$288 + 32,%rsp
   1838 .cfi_adjust_cfa_offset	-(288 + 32)
   1839 	popq	%r9
   1840 .cfi_adjust_cfa_offset	-8
   1841 	movq	%r10,(%r9)
   1842 	movq	%r11,8(%r9)
   1843 
   1844 	popq	%r15
   1845 .cfi_adjust_cfa_offset	-8
   1846 	popq	%r14
   1847 .cfi_adjust_cfa_offset	-8
   1848 	popq	%r13
   1849 .cfi_adjust_cfa_offset	-8
   1850 	popq	%r12
   1851 .cfi_adjust_cfa_offset	-8
   1852 	popq	%rbx
   1853 .cfi_adjust_cfa_offset	-8
   1854 	popq	%rbp
   1855 .cfi_adjust_cfa_offset	-8
   1856 	.byte	0xf3,0xc3
   1857 .cfi_adjust_cfa_offset	(8 * 6) + 288 + 32
   1858 
   1859 open_sse_128:
   1860 	movdqu	.chacha20_consts(%rip),%xmm0
   1861 	movdqa	%xmm0,%xmm1
   1862 	movdqa	%xmm0,%xmm2
   1863 	movdqu	0(%r9),%xmm4
   1864 	movdqa	%xmm4,%xmm5
   1865 	movdqa	%xmm4,%xmm6
   1866 	movdqu	16(%r9),%xmm8
   1867 	movdqa	%xmm8,%xmm9
   1868 	movdqa	%xmm8,%xmm10
   1869 	movdqu	32(%r9),%xmm12
   1870 	movdqa	%xmm12,%xmm13
   1871 	paddd	.sse_inc(%rip),%xmm13
   1872 	movdqa	%xmm13,%xmm14
   1873 	paddd	.sse_inc(%rip),%xmm14
   1874 	movdqa	%xmm4,%xmm7
   1875 	movdqa	%xmm8,%xmm11
   1876 	movdqa	%xmm13,%xmm15
   1877 	movq	$10,%r10
   1878 1:
   1879 	paddd	%xmm4,%xmm0
   1880 	pxor	%xmm0,%xmm12
   1881 	pshufb	.rol16(%rip),%xmm12
   1882 	paddd	%xmm12,%xmm8
   1883 	pxor	%xmm8,%xmm4
   1884 	movdqa	%xmm4,%xmm3
   1885 	pslld	$12,%xmm3
   1886 	psrld	$20,%xmm4
   1887 	pxor	%xmm3,%xmm4
   1888 	paddd	%xmm4,%xmm0
   1889 	pxor	%xmm0,%xmm12
   1890 	pshufb	.rol8(%rip),%xmm12
   1891 	paddd	%xmm12,%xmm8
   1892 	pxor	%xmm8,%xmm4
   1893 	movdqa	%xmm4,%xmm3
   1894 	pslld	$7,%xmm3
   1895 	psrld	$25,%xmm4
   1896 	pxor	%xmm3,%xmm4
   1897 .byte	102,15,58,15,228,4
   1898 .byte	102,69,15,58,15,192,8
   1899 .byte	102,69,15,58,15,228,12
   1900 	paddd	%xmm5,%xmm1
   1901 	pxor	%xmm1,%xmm13
   1902 	pshufb	.rol16(%rip),%xmm13
   1903 	paddd	%xmm13,%xmm9
   1904 	pxor	%xmm9,%xmm5
   1905 	movdqa	%xmm5,%xmm3
   1906 	pslld	$12,%xmm3
   1907 	psrld	$20,%xmm5
   1908 	pxor	%xmm3,%xmm5
   1909 	paddd	%xmm5,%xmm1
   1910 	pxor	%xmm1,%xmm13
   1911 	pshufb	.rol8(%rip),%xmm13
   1912 	paddd	%xmm13,%xmm9
   1913 	pxor	%xmm9,%xmm5
   1914 	movdqa	%xmm5,%xmm3
   1915 	pslld	$7,%xmm3
   1916 	psrld	$25,%xmm5
   1917 	pxor	%xmm3,%xmm5
   1918 .byte	102,15,58,15,237,4
   1919 .byte	102,69,15,58,15,201,8
   1920 .byte	102,69,15,58,15,237,12
   1921 	paddd	%xmm6,%xmm2
   1922 	pxor	%xmm2,%xmm14
   1923 	pshufb	.rol16(%rip),%xmm14
   1924 	paddd	%xmm14,%xmm10
   1925 	pxor	%xmm10,%xmm6
   1926 	movdqa	%xmm6,%xmm3
   1927 	pslld	$12,%xmm3
   1928 	psrld	$20,%xmm6
   1929 	pxor	%xmm3,%xmm6
   1930 	paddd	%xmm6,%xmm2
   1931 	pxor	%xmm2,%xmm14
   1932 	pshufb	.rol8(%rip),%xmm14
   1933 	paddd	%xmm14,%xmm10
   1934 	pxor	%xmm10,%xmm6
   1935 	movdqa	%xmm6,%xmm3
   1936 	pslld	$7,%xmm3
   1937 	psrld	$25,%xmm6
   1938 	pxor	%xmm3,%xmm6
   1939 .byte	102,15,58,15,246,4
   1940 .byte	102,69,15,58,15,210,8
   1941 .byte	102,69,15,58,15,246,12
   1942 	paddd	%xmm4,%xmm0
   1943 	pxor	%xmm0,%xmm12
   1944 	pshufb	.rol16(%rip),%xmm12
   1945 	paddd	%xmm12,%xmm8
   1946 	pxor	%xmm8,%xmm4
   1947 	movdqa	%xmm4,%xmm3
   1948 	pslld	$12,%xmm3
   1949 	psrld	$20,%xmm4
   1950 	pxor	%xmm3,%xmm4
   1951 	paddd	%xmm4,%xmm0
   1952 	pxor	%xmm0,%xmm12
   1953 	pshufb	.rol8(%rip),%xmm12
   1954 	paddd	%xmm12,%xmm8
   1955 	pxor	%xmm8,%xmm4
   1956 	movdqa	%xmm4,%xmm3
   1957 	pslld	$7,%xmm3
   1958 	psrld	$25,%xmm4
   1959 	pxor	%xmm3,%xmm4
   1960 .byte	102,15,58,15,228,12
   1961 .byte	102,69,15,58,15,192,8
   1962 .byte	102,69,15,58,15,228,4
   1963 	paddd	%xmm5,%xmm1
   1964 	pxor	%xmm1,%xmm13
   1965 	pshufb	.rol16(%rip),%xmm13
   1966 	paddd	%xmm13,%xmm9
   1967 	pxor	%xmm9,%xmm5
   1968 	movdqa	%xmm5,%xmm3
   1969 	pslld	$12,%xmm3
   1970 	psrld	$20,%xmm5
   1971 	pxor	%xmm3,%xmm5
   1972 	paddd	%xmm5,%xmm1
   1973 	pxor	%xmm1,%xmm13
   1974 	pshufb	.rol8(%rip),%xmm13
   1975 	paddd	%xmm13,%xmm9
   1976 	pxor	%xmm9,%xmm5
   1977 	movdqa	%xmm5,%xmm3
   1978 	pslld	$7,%xmm3
   1979 	psrld	$25,%xmm5
   1980 	pxor	%xmm3,%xmm5
   1981 .byte	102,15,58,15,237,12
   1982 .byte	102,69,15,58,15,201,8
   1983 .byte	102,69,15,58,15,237,4
   1984 	paddd	%xmm6,%xmm2
   1985 	pxor	%xmm2,%xmm14
   1986 	pshufb	.rol16(%rip),%xmm14
   1987 	paddd	%xmm14,%xmm10
   1988 	pxor	%xmm10,%xmm6
   1989 	movdqa	%xmm6,%xmm3
   1990 	pslld	$12,%xmm3
   1991 	psrld	$20,%xmm6
   1992 	pxor	%xmm3,%xmm6
   1993 	paddd	%xmm6,%xmm2
   1994 	pxor	%xmm2,%xmm14
   1995 	pshufb	.rol8(%rip),%xmm14
   1996 	paddd	%xmm14,%xmm10
   1997 	pxor	%xmm10,%xmm6
   1998 	movdqa	%xmm6,%xmm3
   1999 	pslld	$7,%xmm3
   2000 	psrld	$25,%xmm6
   2001 	pxor	%xmm3,%xmm6
   2002 .byte	102,15,58,15,246,12
   2003 .byte	102,69,15,58,15,210,8
   2004 .byte	102,69,15,58,15,246,4
   2005 
   2006 	decq	%r10
   2007 	jnz	1b
   2008 	paddd	.chacha20_consts(%rip),%xmm0
   2009 	paddd	.chacha20_consts(%rip),%xmm1
   2010 	paddd	.chacha20_consts(%rip),%xmm2
   2011 	paddd	%xmm7,%xmm4
   2012 	paddd	%xmm7,%xmm5
   2013 	paddd	%xmm7,%xmm6
   2014 	paddd	%xmm11,%xmm9
   2015 	paddd	%xmm11,%xmm10
   2016 	paddd	%xmm15,%xmm13
   2017 	paddd	.sse_inc(%rip),%xmm15
   2018 	paddd	%xmm15,%xmm14
   2019 
   2020 	pand	.clamp(%rip),%xmm0
   2021 	movdqa	%xmm0,0(%rbp)
   2022 	movdqa	%xmm4,16(%rbp)
   2023 
   2024 	movq	%r8,%r8
   2025 	call	poly_hash_ad_internal
   2026 1:
   2027 	cmpq	$16,%rbx
   2028 	jb	open_sse_tail_16
   2029 	subq	$16,%rbx
   2030 	addq	0(%rsi),%r10
   2031 	adcq	8+0(%rsi),%r11
   2032 	adcq	$1,%r12
   2033 
   2034 
   2035 	movdqu	0(%rsi),%xmm3
   2036 	pxor	%xmm3,%xmm1
   2037 	movdqu	%xmm1,0(%rdi)
   2038 	leaq	16(%rsi),%rsi
   2039 	leaq	16(%rdi),%rdi
   2040 	movq	0+0(%rbp),%rax
   2041 	movq	%rax,%r15
   2042 	mulq	%r10
   2043 	movq	%rax,%r13
   2044 	movq	%rdx,%r14
   2045 	movq	0+0(%rbp),%rax
   2046 	mulq	%r11
   2047 	imulq	%r12,%r15
   2048 	addq	%rax,%r14
   2049 	adcq	%rdx,%r15
   2050 	movq	8+0(%rbp),%rax
   2051 	movq	%rax,%r9
   2052 	mulq	%r10
   2053 	addq	%rax,%r14
   2054 	adcq	$0,%rdx
   2055 	movq	%rdx,%r10
   2056 	movq	8+0(%rbp),%rax
   2057 	mulq	%r11
   2058 	addq	%rax,%r15
   2059 	adcq	$0,%rdx
   2060 	imulq	%r12,%r9
   2061 	addq	%r10,%r15
   2062 	adcq	%rdx,%r9
   2063 	movq	%r13,%r10
   2064 	movq	%r14,%r11
   2065 	movq	%r15,%r12
   2066 	andq	$3,%r12
   2067 	movq	%r15,%r13
   2068 	andq	$-4,%r13
   2069 	movq	%r9,%r14
   2070 	shrdq	$2,%r9,%r15
   2071 	shrq	$2,%r9
   2072 	addq	%r13,%r10
   2073 	adcq	%r14,%r11
   2074 	adcq	$0,%r12
   2075 	addq	%r15,%r10
   2076 	adcq	%r9,%r11
   2077 	adcq	$0,%r12
   2078 
   2079 
   2080 	movdqa	%xmm5,%xmm1
   2081 	movdqa	%xmm9,%xmm5
   2082 	movdqa	%xmm13,%xmm9
   2083 	movdqa	%xmm2,%xmm13
   2084 	movdqa	%xmm6,%xmm2
   2085 	movdqa	%xmm10,%xmm6
   2086 	movdqa	%xmm14,%xmm10
   2087 	jmp	1b
   2088 	jmp	open_sse_tail_16
   2089 .size	chacha20_poly1305_open, .-chacha20_poly1305_open
   2090 .cfi_endproc
   2091 
   2092 
   2093 
   2094 
   2095 .globl	chacha20_poly1305_seal
   2096 .hidden chacha20_poly1305_seal
   2097 .type	chacha20_poly1305_seal,@function
   2098 .align	64
   2099 chacha20_poly1305_seal:
   2100 .cfi_startproc
   2101 	pushq	%rbp
   2102 .cfi_adjust_cfa_offset	8
   2103 	pushq	%rbx
   2104 .cfi_adjust_cfa_offset	8
   2105 	pushq	%r12
   2106 .cfi_adjust_cfa_offset	8
   2107 	pushq	%r13
   2108 .cfi_adjust_cfa_offset	8
   2109 	pushq	%r14
   2110 .cfi_adjust_cfa_offset	8
   2111 	pushq	%r15
   2112 .cfi_adjust_cfa_offset	8
   2113 
   2114 
   2115 	pushq	%r9
   2116 .cfi_adjust_cfa_offset	8
   2117 	subq	$288 + 32,%rsp
   2118 .cfi_adjust_cfa_offset	288 + 32
   2119 .cfi_offset	rbp, -16
   2120 .cfi_offset	rbx, -24
   2121 .cfi_offset	r12, -32
   2122 .cfi_offset	r13, -40
   2123 .cfi_offset	r14, -48
   2124 .cfi_offset	r15, -56
   2125 	leaq	32(%rsp),%rbp
   2126 	andq	$-32,%rbp
   2127 	movq	%rdx,8+32(%rbp)
   2128 	movq	%r8,0+32(%rbp)
   2129 	movq	%rdx,%rbx
   2130 
   2131 	movl	OPENSSL_ia32cap_P+8(%rip),%eax
   2132 	andl	$288,%eax
   2133 	xorl	$288,%eax
   2134 	jz	chacha20_poly1305_seal_avx2
   2135 
   2136 	cmpq	$128,%rbx
   2137 	jbe	seal_sse_128
   2138 
   2139 	movdqa	.chacha20_consts(%rip),%xmm0
   2140 	movdqu	0(%r9),%xmm4
   2141 	movdqu	16(%r9),%xmm8
   2142 	movdqu	32(%r9),%xmm12
   2143 	movdqa	%xmm0,%xmm1
   2144 	movdqa	%xmm0,%xmm2
   2145 	movdqa	%xmm0,%xmm3
   2146 	movdqa	%xmm4,%xmm5
   2147 	movdqa	%xmm4,%xmm6
   2148 	movdqa	%xmm4,%xmm7
   2149 	movdqa	%xmm8,%xmm9
   2150 	movdqa	%xmm8,%xmm10
   2151 	movdqa	%xmm8,%xmm11
   2152 	movdqa	%xmm12,%xmm15
   2153 	paddd	.sse_inc(%rip),%xmm12
   2154 	movdqa	%xmm12,%xmm14
   2155 	paddd	.sse_inc(%rip),%xmm12
   2156 	movdqa	%xmm12,%xmm13
   2157 	paddd	.sse_inc(%rip),%xmm12
   2158 
   2159 	movdqa	%xmm4,48(%rbp)
   2160 	movdqa	%xmm8,64(%rbp)
   2161 	movdqa	%xmm12,96(%rbp)
   2162 	movdqa	%xmm13,112(%rbp)
   2163 	movdqa	%xmm14,128(%rbp)
   2164 	movdqa	%xmm15,144(%rbp)
   2165 	movq	$10,%r10
   2166 1:
   2167 	movdqa	%xmm8,80(%rbp)
   2168 	movdqa	.rol16(%rip),%xmm8
   2169 	paddd	%xmm7,%xmm3
   2170 	paddd	%xmm6,%xmm2
   2171 	paddd	%xmm5,%xmm1
   2172 	paddd	%xmm4,%xmm0
   2173 	pxor	%xmm3,%xmm15
   2174 	pxor	%xmm2,%xmm14
   2175 	pxor	%xmm1,%xmm13
   2176 	pxor	%xmm0,%xmm12
   2177 .byte	102,69,15,56,0,248
   2178 .byte	102,69,15,56,0,240
   2179 .byte	102,69,15,56,0,232
   2180 .byte	102,69,15,56,0,224
   2181 	movdqa	80(%rbp),%xmm8
   2182 	paddd	%xmm15,%xmm11
   2183 	paddd	%xmm14,%xmm10
   2184 	paddd	%xmm13,%xmm9
   2185 	paddd	%xmm12,%xmm8
   2186 	pxor	%xmm11,%xmm7
   2187 	pxor	%xmm10,%xmm6
   2188 	pxor	%xmm9,%xmm5
   2189 	pxor	%xmm8,%xmm4
   2190 	movdqa	%xmm8,80(%rbp)
   2191 	movdqa	%xmm7,%xmm8
   2192 	psrld	$20,%xmm8
   2193 	pslld	$32-20,%xmm7
   2194 	pxor	%xmm8,%xmm7
   2195 	movdqa	%xmm6,%xmm8
   2196 	psrld	$20,%xmm8
   2197 	pslld	$32-20,%xmm6
   2198 	pxor	%xmm8,%xmm6
   2199 	movdqa	%xmm5,%xmm8
   2200 	psrld	$20,%xmm8
   2201 	pslld	$32-20,%xmm5
   2202 	pxor	%xmm8,%xmm5
   2203 	movdqa	%xmm4,%xmm8
   2204 	psrld	$20,%xmm8
   2205 	pslld	$32-20,%xmm4
   2206 	pxor	%xmm8,%xmm4
   2207 	movdqa	.rol8(%rip),%xmm8
   2208 	paddd	%xmm7,%xmm3
   2209 	paddd	%xmm6,%xmm2
   2210 	paddd	%xmm5,%xmm1
   2211 	paddd	%xmm4,%xmm0
   2212 	pxor	%xmm3,%xmm15
   2213 	pxor	%xmm2,%xmm14
   2214 	pxor	%xmm1,%xmm13
   2215 	pxor	%xmm0,%xmm12
   2216 .byte	102,69,15,56,0,248
   2217 .byte	102,69,15,56,0,240
   2218 .byte	102,69,15,56,0,232
   2219 .byte	102,69,15,56,0,224
   2220 	movdqa	80(%rbp),%xmm8
   2221 	paddd	%xmm15,%xmm11
   2222 	paddd	%xmm14,%xmm10
   2223 	paddd	%xmm13,%xmm9
   2224 	paddd	%xmm12,%xmm8
   2225 	pxor	%xmm11,%xmm7
   2226 	pxor	%xmm10,%xmm6
   2227 	pxor	%xmm9,%xmm5
   2228 	pxor	%xmm8,%xmm4
   2229 	movdqa	%xmm8,80(%rbp)
   2230 	movdqa	%xmm7,%xmm8
   2231 	psrld	$25,%xmm8
   2232 	pslld	$32-25,%xmm7
   2233 	pxor	%xmm8,%xmm7
   2234 	movdqa	%xmm6,%xmm8
   2235 	psrld	$25,%xmm8
   2236 	pslld	$32-25,%xmm6
   2237 	pxor	%xmm8,%xmm6
   2238 	movdqa	%xmm5,%xmm8
   2239 	psrld	$25,%xmm8
   2240 	pslld	$32-25,%xmm5
   2241 	pxor	%xmm8,%xmm5
   2242 	movdqa	%xmm4,%xmm8
   2243 	psrld	$25,%xmm8
   2244 	pslld	$32-25,%xmm4
   2245 	pxor	%xmm8,%xmm4
   2246 	movdqa	80(%rbp),%xmm8
   2247 .byte	102,15,58,15,255,4
   2248 .byte	102,69,15,58,15,219,8
   2249 .byte	102,69,15,58,15,255,12
   2250 .byte	102,15,58,15,246,4
   2251 .byte	102,69,15,58,15,210,8
   2252 .byte	102,69,15,58,15,246,12
   2253 .byte	102,15,58,15,237,4
   2254 .byte	102,69,15,58,15,201,8
   2255 .byte	102,69,15,58,15,237,12
   2256 .byte	102,15,58,15,228,4
   2257 .byte	102,69,15,58,15,192,8
   2258 .byte	102,69,15,58,15,228,12
   2259 	movdqa	%xmm8,80(%rbp)
   2260 	movdqa	.rol16(%rip),%xmm8
   2261 	paddd	%xmm7,%xmm3
   2262 	paddd	%xmm6,%xmm2
   2263 	paddd	%xmm5,%xmm1
   2264 	paddd	%xmm4,%xmm0
   2265 	pxor	%xmm3,%xmm15
   2266 	pxor	%xmm2,%xmm14
   2267 	pxor	%xmm1,%xmm13
   2268 	pxor	%xmm0,%xmm12
   2269 .byte	102,69,15,56,0,248
   2270 .byte	102,69,15,56,0,240
   2271 .byte	102,69,15,56,0,232
   2272 .byte	102,69,15,56,0,224
   2273 	movdqa	80(%rbp),%xmm8
   2274 	paddd	%xmm15,%xmm11
   2275 	paddd	%xmm14,%xmm10
   2276 	paddd	%xmm13,%xmm9
   2277 	paddd	%xmm12,%xmm8
   2278 	pxor	%xmm11,%xmm7
   2279 	pxor	%xmm10,%xmm6
   2280 	pxor	%xmm9,%xmm5
   2281 	pxor	%xmm8,%xmm4
   2282 	movdqa	%xmm8,80(%rbp)
   2283 	movdqa	%xmm7,%xmm8
   2284 	psrld	$20,%xmm8
   2285 	pslld	$32-20,%xmm7
   2286 	pxor	%xmm8,%xmm7
   2287 	movdqa	%xmm6,%xmm8
   2288 	psrld	$20,%xmm8
   2289 	pslld	$32-20,%xmm6
   2290 	pxor	%xmm8,%xmm6
   2291 	movdqa	%xmm5,%xmm8
   2292 	psrld	$20,%xmm8
   2293 	pslld	$32-20,%xmm5
   2294 	pxor	%xmm8,%xmm5
   2295 	movdqa	%xmm4,%xmm8
   2296 	psrld	$20,%xmm8
   2297 	pslld	$32-20,%xmm4
   2298 	pxor	%xmm8,%xmm4
   2299 	movdqa	.rol8(%rip),%xmm8
   2300 	paddd	%xmm7,%xmm3
   2301 	paddd	%xmm6,%xmm2
   2302 	paddd	%xmm5,%xmm1
   2303 	paddd	%xmm4,%xmm0
   2304 	pxor	%xmm3,%xmm15
   2305 	pxor	%xmm2,%xmm14
   2306 	pxor	%xmm1,%xmm13
   2307 	pxor	%xmm0,%xmm12
   2308 .byte	102,69,15,56,0,248
   2309 .byte	102,69,15,56,0,240
   2310 .byte	102,69,15,56,0,232
   2311 .byte	102,69,15,56,0,224
   2312 	movdqa	80(%rbp),%xmm8
   2313 	paddd	%xmm15,%xmm11
   2314 	paddd	%xmm14,%xmm10
   2315 	paddd	%xmm13,%xmm9
   2316 	paddd	%xmm12,%xmm8
   2317 	pxor	%xmm11,%xmm7
   2318 	pxor	%xmm10,%xmm6
   2319 	pxor	%xmm9,%xmm5
   2320 	pxor	%xmm8,%xmm4
   2321 	movdqa	%xmm8,80(%rbp)
   2322 	movdqa	%xmm7,%xmm8
   2323 	psrld	$25,%xmm8
   2324 	pslld	$32-25,%xmm7
   2325 	pxor	%xmm8,%xmm7
   2326 	movdqa	%xmm6,%xmm8
   2327 	psrld	$25,%xmm8
   2328 	pslld	$32-25,%xmm6
   2329 	pxor	%xmm8,%xmm6
   2330 	movdqa	%xmm5,%xmm8
   2331 	psrld	$25,%xmm8
   2332 	pslld	$32-25,%xmm5
   2333 	pxor	%xmm8,%xmm5
   2334 	movdqa	%xmm4,%xmm8
   2335 	psrld	$25,%xmm8
   2336 	pslld	$32-25,%xmm4
   2337 	pxor	%xmm8,%xmm4
   2338 	movdqa	80(%rbp),%xmm8
   2339 .byte	102,15,58,15,255,12
   2340 .byte	102,69,15,58,15,219,8
   2341 .byte	102,69,15,58,15,255,4
   2342 .byte	102,15,58,15,246,12
   2343 .byte	102,69,15,58,15,210,8
   2344 .byte	102,69,15,58,15,246,4
   2345 .byte	102,15,58,15,237,12
   2346 .byte	102,69,15,58,15,201,8
   2347 .byte	102,69,15,58,15,237,4
   2348 .byte	102,15,58,15,228,12
   2349 .byte	102,69,15,58,15,192,8
   2350 .byte	102,69,15,58,15,228,4
   2351 
   2352 	decq	%r10
   2353 	jnz	1b
   2354 	paddd	.chacha20_consts(%rip),%xmm3
   2355 	paddd	48(%rbp),%xmm7
   2356 	paddd	64(%rbp),%xmm11
   2357 	paddd	144(%rbp),%xmm15
   2358 	paddd	.chacha20_consts(%rip),%xmm2
   2359 	paddd	48(%rbp),%xmm6
   2360 	paddd	64(%rbp),%xmm10
   2361 	paddd	128(%rbp),%xmm14
   2362 	paddd	.chacha20_consts(%rip),%xmm1
   2363 	paddd	48(%rbp),%xmm5
   2364 	paddd	64(%rbp),%xmm9
   2365 	paddd	112(%rbp),%xmm13
   2366 	paddd	.chacha20_consts(%rip),%xmm0
   2367 	paddd	48(%rbp),%xmm4
   2368 	paddd	64(%rbp),%xmm8
   2369 	paddd	96(%rbp),%xmm12
   2370 
   2371 
   2372 	pand	.clamp(%rip),%xmm3
   2373 	movdqa	%xmm3,0(%rbp)
   2374 	movdqa	%xmm7,16(%rbp)
   2375 
   2376 	movq	%r8,%r8
   2377 	call	poly_hash_ad_internal
   2378 	movdqu	0 + 0(%rsi),%xmm3
   2379 	movdqu	16 + 0(%rsi),%xmm7
   2380 	movdqu	32 + 0(%rsi),%xmm11
   2381 	movdqu	48 + 0(%rsi),%xmm15
   2382 	pxor	%xmm3,%xmm2
   2383 	pxor	%xmm7,%xmm6
   2384 	pxor	%xmm11,%xmm10
   2385 	pxor	%xmm14,%xmm15
   2386 	movdqu	%xmm2,0 + 0(%rdi)
   2387 	movdqu	%xmm6,16 + 0(%rdi)
   2388 	movdqu	%xmm10,32 + 0(%rdi)
   2389 	movdqu	%xmm15,48 + 0(%rdi)
   2390 	movdqu	0 + 64(%rsi),%xmm3
   2391 	movdqu	16 + 64(%rsi),%xmm7
   2392 	movdqu	32 + 64(%rsi),%xmm11
   2393 	movdqu	48 + 64(%rsi),%xmm15
   2394 	pxor	%xmm3,%xmm1
   2395 	pxor	%xmm7,%xmm5
   2396 	pxor	%xmm11,%xmm9
   2397 	pxor	%xmm13,%xmm15
   2398 	movdqu	%xmm1,0 + 64(%rdi)
   2399 	movdqu	%xmm5,16 + 64(%rdi)
   2400 	movdqu	%xmm9,32 + 64(%rdi)
   2401 	movdqu	%xmm15,48 + 64(%rdi)
   2402 
   2403 	cmpq	$192,%rbx
   2404 	ja	1f
   2405 	movq	$128,%rcx
   2406 	subq	$128,%rbx
   2407 	leaq	128(%rsi),%rsi
   2408 	jmp	seal_sse_128_seal_hash
   2409 1:
   2410 	movdqu	0 + 128(%rsi),%xmm3
   2411 	movdqu	16 + 128(%rsi),%xmm7
   2412 	movdqu	32 + 128(%rsi),%xmm11
   2413 	movdqu	48 + 128(%rsi),%xmm15
   2414 	pxor	%xmm3,%xmm0
   2415 	pxor	%xmm7,%xmm4
   2416 	pxor	%xmm11,%xmm8
   2417 	pxor	%xmm12,%xmm15
   2418 	movdqu	%xmm0,0 + 128(%rdi)
   2419 	movdqu	%xmm4,16 + 128(%rdi)
   2420 	movdqu	%xmm8,32 + 128(%rdi)
   2421 	movdqu	%xmm15,48 + 128(%rdi)
   2422 
   2423 	movq	$192,%rcx
   2424 	subq	$192,%rbx
   2425 	leaq	192(%rsi),%rsi
   2426 	movq	$2,%rcx
   2427 	movq	$8,%r8
   2428 	cmpq	$64,%rbx
   2429 	jbe	seal_sse_tail_64
   2430 	cmpq	$128,%rbx
   2431 	jbe	seal_sse_tail_128
   2432 	cmpq	$192,%rbx
   2433 	jbe	seal_sse_tail_192
   2434 
   2435 1:
   2436 	movdqa	.chacha20_consts(%rip),%xmm0
   2437 	movdqa	48(%rbp),%xmm4
   2438 	movdqa	64(%rbp),%xmm8
   2439 	movdqa	%xmm0,%xmm1
   2440 	movdqa	%xmm4,%xmm5
   2441 	movdqa	%xmm8,%xmm9
   2442 	movdqa	%xmm0,%xmm2
   2443 	movdqa	%xmm4,%xmm6
   2444 	movdqa	%xmm8,%xmm10
   2445 	movdqa	%xmm0,%xmm3
   2446 	movdqa	%xmm4,%xmm7
   2447 	movdqa	%xmm8,%xmm11
   2448 	movdqa	96(%rbp),%xmm15
   2449 	paddd	.sse_inc(%rip),%xmm15
   2450 	movdqa	%xmm15,%xmm14
   2451 	paddd	.sse_inc(%rip),%xmm14
   2452 	movdqa	%xmm14,%xmm13
   2453 	paddd	.sse_inc(%rip),%xmm13
   2454 	movdqa	%xmm13,%xmm12
   2455 	paddd	.sse_inc(%rip),%xmm12
   2456 	movdqa	%xmm12,96(%rbp)
   2457 	movdqa	%xmm13,112(%rbp)
   2458 	movdqa	%xmm14,128(%rbp)
   2459 	movdqa	%xmm15,144(%rbp)
   2460 
   2461 2:
   2462 	movdqa	%xmm8,80(%rbp)
   2463 	movdqa	.rol16(%rip),%xmm8
   2464 	paddd	%xmm7,%xmm3
   2465 	paddd	%xmm6,%xmm2
   2466 	paddd	%xmm5,%xmm1
   2467 	paddd	%xmm4,%xmm0
   2468 	pxor	%xmm3,%xmm15
   2469 	pxor	%xmm2,%xmm14
   2470 	pxor	%xmm1,%xmm13
   2471 	pxor	%xmm0,%xmm12
   2472 .byte	102,69,15,56,0,248
   2473 .byte	102,69,15,56,0,240
   2474 .byte	102,69,15,56,0,232
   2475 .byte	102,69,15,56,0,224
   2476 	movdqa	80(%rbp),%xmm8
   2477 	paddd	%xmm15,%xmm11
   2478 	paddd	%xmm14,%xmm10
   2479 	paddd	%xmm13,%xmm9
   2480 	paddd	%xmm12,%xmm8
   2481 	pxor	%xmm11,%xmm7
   2482 	addq	0(%rdi),%r10
   2483 	adcq	8+0(%rdi),%r11
   2484 	adcq	$1,%r12
   2485 	pxor	%xmm10,%xmm6
   2486 	pxor	%xmm9,%xmm5
   2487 	pxor	%xmm8,%xmm4
   2488 	movdqa	%xmm8,80(%rbp)
   2489 	movdqa	%xmm7,%xmm8
   2490 	psrld	$20,%xmm8
   2491 	pslld	$32-20,%xmm7
   2492 	pxor	%xmm8,%xmm7
   2493 	movdqa	%xmm6,%xmm8
   2494 	psrld	$20,%xmm8
   2495 	pslld	$32-20,%xmm6
   2496 	pxor	%xmm8,%xmm6
   2497 	movdqa	%xmm5,%xmm8
   2498 	psrld	$20,%xmm8
   2499 	pslld	$32-20,%xmm5
   2500 	pxor	%xmm8,%xmm5
   2501 	movdqa	%xmm4,%xmm8
   2502 	psrld	$20,%xmm8
   2503 	pslld	$32-20,%xmm4
   2504 	pxor	%xmm8,%xmm4
   2505 	movq	0+0(%rbp),%rax
   2506 	movq	%rax,%r15
   2507 	mulq	%r10
   2508 	movq	%rax,%r13
   2509 	movq	%rdx,%r14
   2510 	movq	0+0(%rbp),%rax
   2511 	mulq	%r11
   2512 	imulq	%r12,%r15
   2513 	addq	%rax,%r14
   2514 	adcq	%rdx,%r15
   2515 	movdqa	.rol8(%rip),%xmm8
   2516 	paddd	%xmm7,%xmm3
   2517 	paddd	%xmm6,%xmm2
   2518 	paddd	%xmm5,%xmm1
   2519 	paddd	%xmm4,%xmm0
   2520 	pxor	%xmm3,%xmm15
   2521 	pxor	%xmm2,%xmm14
   2522 	pxor	%xmm1,%xmm13
   2523 	pxor	%xmm0,%xmm12
   2524 .byte	102,69,15,56,0,248
   2525 .byte	102,69,15,56,0,240
   2526 .byte	102,69,15,56,0,232
   2527 .byte	102,69,15,56,0,224
   2528 	movdqa	80(%rbp),%xmm8
   2529 	paddd	%xmm15,%xmm11
   2530 	paddd	%xmm14,%xmm10
   2531 	paddd	%xmm13,%xmm9
   2532 	paddd	%xmm12,%xmm8
   2533 	pxor	%xmm11,%xmm7
   2534 	pxor	%xmm10,%xmm6
   2535 	movq	8+0(%rbp),%rax
   2536 	movq	%rax,%r9
   2537 	mulq	%r10
   2538 	addq	%rax,%r14
   2539 	adcq	$0,%rdx
   2540 	movq	%rdx,%r10
   2541 	movq	8+0(%rbp),%rax
   2542 	mulq	%r11
   2543 	addq	%rax,%r15
   2544 	adcq	$0,%rdx
   2545 	pxor	%xmm9,%xmm5
   2546 	pxor	%xmm8,%xmm4
   2547 	movdqa	%xmm8,80(%rbp)
   2548 	movdqa	%xmm7,%xmm8
   2549 	psrld	$25,%xmm8
   2550 	pslld	$32-25,%xmm7
   2551 	pxor	%xmm8,%xmm7
   2552 	movdqa	%xmm6,%xmm8
   2553 	psrld	$25,%xmm8
   2554 	pslld	$32-25,%xmm6
   2555 	pxor	%xmm8,%xmm6
   2556 	movdqa	%xmm5,%xmm8
   2557 	psrld	$25,%xmm8
   2558 	pslld	$32-25,%xmm5
   2559 	pxor	%xmm8,%xmm5
   2560 	movdqa	%xmm4,%xmm8
   2561 	psrld	$25,%xmm8
   2562 	pslld	$32-25,%xmm4
   2563 	pxor	%xmm8,%xmm4
   2564 	movdqa	80(%rbp),%xmm8
   2565 	imulq	%r12,%r9
   2566 	addq	%r10,%r15
   2567 	adcq	%rdx,%r9
   2568 .byte	102,15,58,15,255,4
   2569 .byte	102,69,15,58,15,219,8
   2570 .byte	102,69,15,58,15,255,12
   2571 .byte	102,15,58,15,246,4
   2572 .byte	102,69,15,58,15,210,8
   2573 .byte	102,69,15,58,15,246,12
   2574 .byte	102,15,58,15,237,4
   2575 .byte	102,69,15,58,15,201,8
   2576 .byte	102,69,15,58,15,237,12
   2577 .byte	102,15,58,15,228,4
   2578 .byte	102,69,15,58,15,192,8
   2579 .byte	102,69,15,58,15,228,12
   2580 	movdqa	%xmm8,80(%rbp)
   2581 	movdqa	.rol16(%rip),%xmm8
   2582 	paddd	%xmm7,%xmm3
   2583 	paddd	%xmm6,%xmm2
   2584 	paddd	%xmm5,%xmm1
   2585 	paddd	%xmm4,%xmm0
   2586 	pxor	%xmm3,%xmm15
   2587 	pxor	%xmm2,%xmm14
   2588 	movq	%r13,%r10
   2589 	movq	%r14,%r11
   2590 	movq	%r15,%r12
   2591 	andq	$3,%r12
   2592 	movq	%r15,%r13
   2593 	andq	$-4,%r13
   2594 	movq	%r9,%r14
   2595 	shrdq	$2,%r9,%r15
   2596 	shrq	$2,%r9
   2597 	addq	%r13,%r10
   2598 	adcq	%r14,%r11
   2599 	adcq	$0,%r12
   2600 	addq	%r15,%r10
   2601 	adcq	%r9,%r11
   2602 	adcq	$0,%r12
   2603 	pxor	%xmm1,%xmm13
   2604 	pxor	%xmm0,%xmm12
   2605 .byte	102,69,15,56,0,248
   2606 .byte	102,69,15,56,0,240
   2607 .byte	102,69,15,56,0,232
   2608 .byte	102,69,15,56,0,224
   2609 	movdqa	80(%rbp),%xmm8
   2610 	paddd	%xmm15,%xmm11
   2611 	paddd	%xmm14,%xmm10
   2612 	paddd	%xmm13,%xmm9
   2613 	paddd	%xmm12,%xmm8
   2614 	pxor	%xmm11,%xmm7
   2615 	pxor	%xmm10,%xmm6
   2616 	pxor	%xmm9,%xmm5
   2617 	pxor	%xmm8,%xmm4
   2618 	movdqa	%xmm8,80(%rbp)
   2619 	movdqa	%xmm7,%xmm8
   2620 	psrld	$20,%xmm8
   2621 	pslld	$32-20,%xmm7
   2622 	pxor	%xmm8,%xmm7
   2623 	movdqa	%xmm6,%xmm8
   2624 	psrld	$20,%xmm8
   2625 	pslld	$32-20,%xmm6
   2626 	pxor	%xmm8,%xmm6
   2627 	movdqa	%xmm5,%xmm8
   2628 	psrld	$20,%xmm8
   2629 	pslld	$32-20,%xmm5
   2630 	pxor	%xmm8,%xmm5
   2631 	movdqa	%xmm4,%xmm8
   2632 	psrld	$20,%xmm8
   2633 	pslld	$32-20,%xmm4
   2634 	pxor	%xmm8,%xmm4
   2635 	movdqa	.rol8(%rip),%xmm8
   2636 	paddd	%xmm7,%xmm3
   2637 	paddd	%xmm6,%xmm2
   2638 	paddd	%xmm5,%xmm1
   2639 	paddd	%xmm4,%xmm0
   2640 	pxor	%xmm3,%xmm15
   2641 	pxor	%xmm2,%xmm14
   2642 	pxor	%xmm1,%xmm13
   2643 	pxor	%xmm0,%xmm12
   2644 .byte	102,69,15,56,0,248
   2645 .byte	102,69,15,56,0,240
   2646 .byte	102,69,15,56,0,232
   2647 .byte	102,69,15,56,0,224
   2648 	movdqa	80(%rbp),%xmm8
   2649 	paddd	%xmm15,%xmm11
   2650 	paddd	%xmm14,%xmm10
   2651 	paddd	%xmm13,%xmm9
   2652 	paddd	%xmm12,%xmm8
   2653 	pxor	%xmm11,%xmm7
   2654 	pxor	%xmm10,%xmm6
   2655 	pxor	%xmm9,%xmm5
   2656 	pxor	%xmm8,%xmm4
   2657 	movdqa	%xmm8,80(%rbp)
   2658 	movdqa	%xmm7,%xmm8
   2659 	psrld	$25,%xmm8
   2660 	pslld	$32-25,%xmm7
   2661 	pxor	%xmm8,%xmm7
   2662 	movdqa	%xmm6,%xmm8
   2663 	psrld	$25,%xmm8
   2664 	pslld	$32-25,%xmm6
   2665 	pxor	%xmm8,%xmm6
   2666 	movdqa	%xmm5,%xmm8
   2667 	psrld	$25,%xmm8
   2668 	pslld	$32-25,%xmm5
   2669 	pxor	%xmm8,%xmm5
   2670 	movdqa	%xmm4,%xmm8
   2671 	psrld	$25,%xmm8
   2672 	pslld	$32-25,%xmm4
   2673 	pxor	%xmm8,%xmm4
   2674 	movdqa	80(%rbp),%xmm8
   2675 .byte	102,15,58,15,255,12
   2676 .byte	102,69,15,58,15,219,8
   2677 .byte	102,69,15,58,15,255,4
   2678 .byte	102,15,58,15,246,12
   2679 .byte	102,69,15,58,15,210,8
   2680 .byte	102,69,15,58,15,246,4
   2681 .byte	102,15,58,15,237,12
   2682 .byte	102,69,15,58,15,201,8
   2683 .byte	102,69,15,58,15,237,4
   2684 .byte	102,15,58,15,228,12
   2685 .byte	102,69,15,58,15,192,8
   2686 .byte	102,69,15,58,15,228,4
   2687 
   2688 	leaq	16(%rdi),%rdi
   2689 	decq	%r8
   2690 	jge	2b
   2691 	addq	0(%rdi),%r10
   2692 	adcq	8+0(%rdi),%r11
   2693 	adcq	$1,%r12
   2694 	movq	0+0(%rbp),%rax
   2695 	movq	%rax,%r15
   2696 	mulq	%r10
   2697 	movq	%rax,%r13
   2698 	movq	%rdx,%r14
   2699 	movq	0+0(%rbp),%rax
   2700 	mulq	%r11
   2701 	imulq	%r12,%r15
   2702 	addq	%rax,%r14
   2703 	adcq	%rdx,%r15
   2704 	movq	8+0(%rbp),%rax
   2705 	movq	%rax,%r9
   2706 	mulq	%r10
   2707 	addq	%rax,%r14
   2708 	adcq	$0,%rdx
   2709 	movq	%rdx,%r10
   2710 	movq	8+0(%rbp),%rax
   2711 	mulq	%r11
   2712 	addq	%rax,%r15
   2713 	adcq	$0,%rdx
   2714 	imulq	%r12,%r9
   2715 	addq	%r10,%r15
   2716 	adcq	%rdx,%r9
   2717 	movq	%r13,%r10
   2718 	movq	%r14,%r11
   2719 	movq	%r15,%r12
   2720 	andq	$3,%r12
   2721 	movq	%r15,%r13
   2722 	andq	$-4,%r13
   2723 	movq	%r9,%r14
   2724 	shrdq	$2,%r9,%r15
   2725 	shrq	$2,%r9
   2726 	addq	%r13,%r10
   2727 	adcq	%r14,%r11
   2728 	adcq	$0,%r12
   2729 	addq	%r15,%r10
   2730 	adcq	%r9,%r11
   2731 	adcq	$0,%r12
   2732 
   2733 	leaq	16(%rdi),%rdi
   2734 	decq	%rcx
   2735 	jg	2b
   2736 	paddd	.chacha20_consts(%rip),%xmm3
   2737 	paddd	48(%rbp),%xmm7
   2738 	paddd	64(%rbp),%xmm11
   2739 	paddd	144(%rbp),%xmm15
   2740 	paddd	.chacha20_consts(%rip),%xmm2
   2741 	paddd	48(%rbp),%xmm6
   2742 	paddd	64(%rbp),%xmm10
   2743 	paddd	128(%rbp),%xmm14
   2744 	paddd	.chacha20_consts(%rip),%xmm1
   2745 	paddd	48(%rbp),%xmm5
   2746 	paddd	64(%rbp),%xmm9
   2747 	paddd	112(%rbp),%xmm13
   2748 	paddd	.chacha20_consts(%rip),%xmm0
   2749 	paddd	48(%rbp),%xmm4
   2750 	paddd	64(%rbp),%xmm8
   2751 	paddd	96(%rbp),%xmm12
   2752 
   2753 	movdqa	%xmm14,80(%rbp)
   2754 	movdqa	%xmm14,80(%rbp)
   2755 	movdqu	0 + 0(%rsi),%xmm14
   2756 	pxor	%xmm3,%xmm14
   2757 	movdqu	%xmm14,0 + 0(%rdi)
   2758 	movdqu	16 + 0(%rsi),%xmm14
   2759 	pxor	%xmm7,%xmm14
   2760 	movdqu	%xmm14,16 + 0(%rdi)
   2761 	movdqu	32 + 0(%rsi),%xmm14
   2762 	pxor	%xmm11,%xmm14
   2763 	movdqu	%xmm14,32 + 0(%rdi)
   2764 	movdqu	48 + 0(%rsi),%xmm14
   2765 	pxor	%xmm15,%xmm14
   2766 	movdqu	%xmm14,48 + 0(%rdi)
   2767 
   2768 	movdqa	80(%rbp),%xmm14
   2769 	movdqu	0 + 64(%rsi),%xmm3
   2770 	movdqu	16 + 64(%rsi),%xmm7
   2771 	movdqu	32 + 64(%rsi),%xmm11
   2772 	movdqu	48 + 64(%rsi),%xmm15
   2773 	pxor	%xmm3,%xmm2
   2774 	pxor	%xmm7,%xmm6
   2775 	pxor	%xmm11,%xmm10
   2776 	pxor	%xmm14,%xmm15
   2777 	movdqu	%xmm2,0 + 64(%rdi)
   2778 	movdqu	%xmm6,16 + 64(%rdi)
   2779 	movdqu	%xmm10,32 + 64(%rdi)
   2780 	movdqu	%xmm15,48 + 64(%rdi)
   2781 	movdqu	0 + 128(%rsi),%xmm3
   2782 	movdqu	16 + 128(%rsi),%xmm7
   2783 	movdqu	32 + 128(%rsi),%xmm11
   2784 	movdqu	48 + 128(%rsi),%xmm15
   2785 	pxor	%xmm3,%xmm1
   2786 	pxor	%xmm7,%xmm5
   2787 	pxor	%xmm11,%xmm9
   2788 	pxor	%xmm13,%xmm15
   2789 	movdqu	%xmm1,0 + 128(%rdi)
   2790 	movdqu	%xmm5,16 + 128(%rdi)
   2791 	movdqu	%xmm9,32 + 128(%rdi)
   2792 	movdqu	%xmm15,48 + 128(%rdi)
   2793 
   2794 	cmpq	$256,%rbx
   2795 	ja	3f
   2796 
   2797 	movq	$192,%rcx
   2798 	subq	$192,%rbx
   2799 	leaq	192(%rsi),%rsi
   2800 	jmp	seal_sse_128_seal_hash
   2801 3:
   2802 	movdqu	0 + 192(%rsi),%xmm3
   2803 	movdqu	16 + 192(%rsi),%xmm7
   2804 	movdqu	32 + 192(%rsi),%xmm11
   2805 	movdqu	48 + 192(%rsi),%xmm15
   2806 	pxor	%xmm3,%xmm0
   2807 	pxor	%xmm7,%xmm4
   2808 	pxor	%xmm11,%xmm8
   2809 	pxor	%xmm12,%xmm15
   2810 	movdqu	%xmm0,0 + 192(%rdi)
   2811 	movdqu	%xmm4,16 + 192(%rdi)
   2812 	movdqu	%xmm8,32 + 192(%rdi)
   2813 	movdqu	%xmm15,48 + 192(%rdi)
   2814 
   2815 	leaq	256(%rsi),%rsi
   2816 	subq	$256,%rbx
   2817 	movq	$6,%rcx
   2818 	movq	$4,%r8
   2819 	cmpq	$192,%rbx
   2820 	jg	1b
   2821 	movq	%rbx,%rcx
   2822 	testq	%rbx,%rbx
   2823 	je	seal_sse_128_seal_hash
   2824 	movq	$6,%rcx
   2825 	cmpq	$64,%rbx
   2826 	jg	3f
   2827 
   2828 seal_sse_tail_64:
   2829 	movdqa	.chacha20_consts(%rip),%xmm0
   2830 	movdqa	48(%rbp),%xmm4
   2831 	movdqa	64(%rbp),%xmm8
   2832 	movdqa	96(%rbp),%xmm12
   2833 	paddd	.sse_inc(%rip),%xmm12
   2834 	movdqa	%xmm12,96(%rbp)
   2835 
   2836 1:
   2837 	addq	0(%rdi),%r10
   2838 	adcq	8+0(%rdi),%r11
   2839 	adcq	$1,%r12
   2840 	movq	0+0(%rbp),%rax
   2841 	movq	%rax,%r15
   2842 	mulq	%r10
   2843 	movq	%rax,%r13
   2844 	movq	%rdx,%r14
   2845 	movq	0+0(%rbp),%rax
   2846 	mulq	%r11
   2847 	imulq	%r12,%r15
   2848 	addq	%rax,%r14
   2849 	adcq	%rdx,%r15
   2850 	movq	8+0(%rbp),%rax
   2851 	movq	%rax,%r9
   2852 	mulq	%r10
   2853 	addq	%rax,%r14
   2854 	adcq	$0,%rdx
   2855 	movq	%rdx,%r10
   2856 	movq	8+0(%rbp),%rax
   2857 	mulq	%r11
   2858 	addq	%rax,%r15
   2859 	adcq	$0,%rdx
   2860 	imulq	%r12,%r9
   2861 	addq	%r10,%r15
   2862 	adcq	%rdx,%r9
   2863 	movq	%r13,%r10
   2864 	movq	%r14,%r11
   2865 	movq	%r15,%r12
   2866 	andq	$3,%r12
   2867 	movq	%r15,%r13
   2868 	andq	$-4,%r13
   2869 	movq	%r9,%r14
   2870 	shrdq	$2,%r9,%r15
   2871 	shrq	$2,%r9
   2872 	addq	%r13,%r10
   2873 	adcq	%r14,%r11
   2874 	adcq	$0,%r12
   2875 	addq	%r15,%r10
   2876 	adcq	%r9,%r11
   2877 	adcq	$0,%r12
   2878 
   2879 	leaq	16(%rdi),%rdi
   2880 2:
   2881 	paddd	%xmm4,%xmm0
   2882 	pxor	%xmm0,%xmm12
   2883 	pshufb	.rol16(%rip),%xmm12
   2884 	paddd	%xmm12,%xmm8
   2885 	pxor	%xmm8,%xmm4
   2886 	movdqa	%xmm4,%xmm3
   2887 	pslld	$12,%xmm3
   2888 	psrld	$20,%xmm4
   2889 	pxor	%xmm3,%xmm4
   2890 	paddd	%xmm4,%xmm0
   2891 	pxor	%xmm0,%xmm12
   2892 	pshufb	.rol8(%rip),%xmm12
   2893 	paddd	%xmm12,%xmm8
   2894 	pxor	%xmm8,%xmm4
   2895 	movdqa	%xmm4,%xmm3
   2896 	pslld	$7,%xmm3
   2897 	psrld	$25,%xmm4
   2898 	pxor	%xmm3,%xmm4
   2899 .byte	102,15,58,15,228,4
   2900 .byte	102,69,15,58,15,192,8
   2901 .byte	102,69,15,58,15,228,12
   2902 	paddd	%xmm4,%xmm0
   2903 	pxor	%xmm0,%xmm12
   2904 	pshufb	.rol16(%rip),%xmm12
   2905 	paddd	%xmm12,%xmm8
   2906 	pxor	%xmm8,%xmm4
   2907 	movdqa	%xmm4,%xmm3
   2908 	pslld	$12,%xmm3
   2909 	psrld	$20,%xmm4
   2910 	pxor	%xmm3,%xmm4
   2911 	paddd	%xmm4,%xmm0
   2912 	pxor	%xmm0,%xmm12
   2913 	pshufb	.rol8(%rip),%xmm12
   2914 	paddd	%xmm12,%xmm8
   2915 	pxor	%xmm8,%xmm4
   2916 	movdqa	%xmm4,%xmm3
   2917 	pslld	$7,%xmm3
   2918 	psrld	$25,%xmm4
   2919 	pxor	%xmm3,%xmm4
   2920 .byte	102,15,58,15,228,12
   2921 .byte	102,69,15,58,15,192,8
   2922 .byte	102,69,15,58,15,228,4
   2923 	addq	0(%rdi),%r10
   2924 	adcq	8+0(%rdi),%r11
   2925 	adcq	$1,%r12
   2926 	movq	0+0(%rbp),%rax
   2927 	movq	%rax,%r15
   2928 	mulq	%r10
   2929 	movq	%rax,%r13
   2930 	movq	%rdx,%r14
   2931 	movq	0+0(%rbp),%rax
   2932 	mulq	%r11
   2933 	imulq	%r12,%r15
   2934 	addq	%rax,%r14
   2935 	adcq	%rdx,%r15
   2936 	movq	8+0(%rbp),%rax
   2937 	movq	%rax,%r9
   2938 	mulq	%r10
   2939 	addq	%rax,%r14
   2940 	adcq	$0,%rdx
   2941 	movq	%rdx,%r10
   2942 	movq	8+0(%rbp),%rax
   2943 	mulq	%r11
   2944 	addq	%rax,%r15
   2945 	adcq	$0,%rdx
   2946 	imulq	%r12,%r9
   2947 	addq	%r10,%r15
   2948 	adcq	%rdx,%r9
   2949 	movq	%r13,%r10
   2950 	movq	%r14,%r11
   2951 	movq	%r15,%r12
   2952 	andq	$3,%r12
   2953 	movq	%r15,%r13
   2954 	andq	$-4,%r13
   2955 	movq	%r9,%r14
   2956 	shrdq	$2,%r9,%r15
   2957 	shrq	$2,%r9
   2958 	addq	%r13,%r10
   2959 	adcq	%r14,%r11
   2960 	adcq	$0,%r12
   2961 	addq	%r15,%r10
   2962 	adcq	%r9,%r11
   2963 	adcq	$0,%r12
   2964 
   2965 	leaq	16(%rdi),%rdi
   2966 	decq	%rcx
   2967 	jg	1b
   2968 	decq	%r8
   2969 	jge	2b
   2970 	paddd	.chacha20_consts(%rip),%xmm0
   2971 	paddd	48(%rbp),%xmm4
   2972 	paddd	64(%rbp),%xmm8
   2973 	paddd	96(%rbp),%xmm12
   2974 
   2975 	jmp	seal_sse_128_seal
   2976 3:
   2977 	cmpq	$128,%rbx
   2978 	jg	3f
   2979 
   2980 seal_sse_tail_128:
   2981 	movdqa	.chacha20_consts(%rip),%xmm0
   2982 	movdqa	48(%rbp),%xmm4
   2983 	movdqa	64(%rbp),%xmm8
   2984 	movdqa	%xmm0,%xmm1
   2985 	movdqa	%xmm4,%xmm5
   2986 	movdqa	%xmm8,%xmm9
   2987 	movdqa	96(%rbp),%xmm13
   2988 	paddd	.sse_inc(%rip),%xmm13
   2989 	movdqa	%xmm13,%xmm12
   2990 	paddd	.sse_inc(%rip),%xmm12
   2991 	movdqa	%xmm12,96(%rbp)
   2992 	movdqa	%xmm13,112(%rbp)
   2993 
   2994 1:
   2995 	addq	0(%rdi),%r10
   2996 	adcq	8+0(%rdi),%r11
   2997 	adcq	$1,%r12
   2998 	movq	0+0(%rbp),%rax
   2999 	movq	%rax,%r15
   3000 	mulq	%r10
   3001 	movq	%rax,%r13
   3002 	movq	%rdx,%r14
   3003 	movq	0+0(%rbp),%rax
   3004 	mulq	%r11
   3005 	imulq	%r12,%r15
   3006 	addq	%rax,%r14
   3007 	adcq	%rdx,%r15
   3008 	movq	8+0(%rbp),%rax
   3009 	movq	%rax,%r9
   3010 	mulq	%r10
   3011 	addq	%rax,%r14
   3012 	adcq	$0,%rdx
   3013 	movq	%rdx,%r10
   3014 	movq	8+0(%rbp),%rax
   3015 	mulq	%r11
   3016 	addq	%rax,%r15
   3017 	adcq	$0,%rdx
   3018 	imulq	%r12,%r9
   3019 	addq	%r10,%r15
   3020 	adcq	%rdx,%r9
   3021 	movq	%r13,%r10
   3022 	movq	%r14,%r11
   3023 	movq	%r15,%r12
   3024 	andq	$3,%r12
   3025 	movq	%r15,%r13
   3026 	andq	$-4,%r13
   3027 	movq	%r9,%r14
   3028 	shrdq	$2,%r9,%r15
   3029 	shrq	$2,%r9
   3030 	addq	%r13,%r10
   3031 	adcq	%r14,%r11
   3032 	adcq	$0,%r12
   3033 	addq	%r15,%r10
   3034 	adcq	%r9,%r11
   3035 	adcq	$0,%r12
   3036 
   3037 	leaq	16(%rdi),%rdi
   3038 2:
   3039 	paddd	%xmm4,%xmm0
   3040 	pxor	%xmm0,%xmm12
   3041 	pshufb	.rol16(%rip),%xmm12
   3042 	paddd	%xmm12,%xmm8
   3043 	pxor	%xmm8,%xmm4
   3044 	movdqa	%xmm4,%xmm3
   3045 	pslld	$12,%xmm3
   3046 	psrld	$20,%xmm4
   3047 	pxor	%xmm3,%xmm4
   3048 	paddd	%xmm4,%xmm0
   3049 	pxor	%xmm0,%xmm12
   3050 	pshufb	.rol8(%rip),%xmm12
   3051 	paddd	%xmm12,%xmm8
   3052 	pxor	%xmm8,%xmm4
   3053 	movdqa	%xmm4,%xmm3
   3054 	pslld	$7,%xmm3
   3055 	psrld	$25,%xmm4
   3056 	pxor	%xmm3,%xmm4
   3057 .byte	102,15,58,15,228,4
   3058 .byte	102,69,15,58,15,192,8
   3059 .byte	102,69,15,58,15,228,12
   3060 	paddd	%xmm5,%xmm1
   3061 	pxor	%xmm1,%xmm13
   3062 	pshufb	.rol16(%rip),%xmm13
   3063 	paddd	%xmm13,%xmm9
   3064 	pxor	%xmm9,%xmm5
   3065 	movdqa	%xmm5,%xmm3
   3066 	pslld	$12,%xmm3
   3067 	psrld	$20,%xmm5
   3068 	pxor	%xmm3,%xmm5
   3069 	paddd	%xmm5,%xmm1
   3070 	pxor	%xmm1,%xmm13
   3071 	pshufb	.rol8(%rip),%xmm13
   3072 	paddd	%xmm13,%xmm9
   3073 	pxor	%xmm9,%xmm5
   3074 	movdqa	%xmm5,%xmm3
   3075 	pslld	$7,%xmm3
   3076 	psrld	$25,%xmm5
   3077 	pxor	%xmm3,%xmm5
   3078 .byte	102,15,58,15,237,4
   3079 .byte	102,69,15,58,15,201,8
   3080 .byte	102,69,15,58,15,237,12
   3081 	addq	0(%rdi),%r10
   3082 	adcq	8+0(%rdi),%r11
   3083 	adcq	$1,%r12
   3084 	movq	0+0(%rbp),%rax
   3085 	movq	%rax,%r15
   3086 	mulq	%r10
   3087 	movq	%rax,%r13
   3088 	movq	%rdx,%r14
   3089 	movq	0+0(%rbp),%rax
   3090 	mulq	%r11
   3091 	imulq	%r12,%r15
   3092 	addq	%rax,%r14
   3093 	adcq	%rdx,%r15
   3094 	movq	8+0(%rbp),%rax
   3095 	movq	%rax,%r9
   3096 	mulq	%r10
   3097 	addq	%rax,%r14
   3098 	adcq	$0,%rdx
   3099 	movq	%rdx,%r10
   3100 	movq	8+0(%rbp),%rax
   3101 	mulq	%r11
   3102 	addq	%rax,%r15
   3103 	adcq	$0,%rdx
   3104 	imulq	%r12,%r9
   3105 	addq	%r10,%r15
   3106 	adcq	%rdx,%r9
   3107 	movq	%r13,%r10
   3108 	movq	%r14,%r11
   3109 	movq	%r15,%r12
   3110 	andq	$3,%r12
   3111 	movq	%r15,%r13
   3112 	andq	$-4,%r13
   3113 	movq	%r9,%r14
   3114 	shrdq	$2,%r9,%r15
   3115 	shrq	$2,%r9
   3116 	addq	%r13,%r10
   3117 	adcq	%r14,%r11
   3118 	adcq	$0,%r12
   3119 	addq	%r15,%r10
   3120 	adcq	%r9,%r11
   3121 	adcq	$0,%r12
   3122 	paddd	%xmm4,%xmm0
   3123 	pxor	%xmm0,%xmm12
   3124 	pshufb	.rol16(%rip),%xmm12
   3125 	paddd	%xmm12,%xmm8
   3126 	pxor	%xmm8,%xmm4
   3127 	movdqa	%xmm4,%xmm3
   3128 	pslld	$12,%xmm3
   3129 	psrld	$20,%xmm4
   3130 	pxor	%xmm3,%xmm4
   3131 	paddd	%xmm4,%xmm0
   3132 	pxor	%xmm0,%xmm12
   3133 	pshufb	.rol8(%rip),%xmm12
   3134 	paddd	%xmm12,%xmm8
   3135 	pxor	%xmm8,%xmm4
   3136 	movdqa	%xmm4,%xmm3
   3137 	pslld	$7,%xmm3
   3138 	psrld	$25,%xmm4
   3139 	pxor	%xmm3,%xmm4
   3140 .byte	102,15,58,15,228,12
   3141 .byte	102,69,15,58,15,192,8
   3142 .byte	102,69,15,58,15,228,4
   3143 	paddd	%xmm5,%xmm1
   3144 	pxor	%xmm1,%xmm13
   3145 	pshufb	.rol16(%rip),%xmm13
   3146 	paddd	%xmm13,%xmm9
   3147 	pxor	%xmm9,%xmm5
   3148 	movdqa	%xmm5,%xmm3
   3149 	pslld	$12,%xmm3
   3150 	psrld	$20,%xmm5
   3151 	pxor	%xmm3,%xmm5
   3152 	paddd	%xmm5,%xmm1
   3153 	pxor	%xmm1,%xmm13
   3154 	pshufb	.rol8(%rip),%xmm13
   3155 	paddd	%xmm13,%xmm9
   3156 	pxor	%xmm9,%xmm5
   3157 	movdqa	%xmm5,%xmm3
   3158 	pslld	$7,%xmm3
   3159 	psrld	$25,%xmm5
   3160 	pxor	%xmm3,%xmm5
   3161 .byte	102,15,58,15,237,12
   3162 .byte	102,69,15,58,15,201,8
   3163 .byte	102,69,15,58,15,237,4
   3164 
   3165 	leaq	16(%rdi),%rdi
   3166 	decq	%rcx
   3167 	jg	1b
   3168 	decq	%r8
   3169 	jge	2b
   3170 	paddd	.chacha20_consts(%rip),%xmm1
   3171 	paddd	48(%rbp),%xmm5
   3172 	paddd	64(%rbp),%xmm9
   3173 	paddd	112(%rbp),%xmm13
   3174 	paddd	.chacha20_consts(%rip),%xmm0
   3175 	paddd	48(%rbp),%xmm4
   3176 	paddd	64(%rbp),%xmm8
   3177 	paddd	96(%rbp),%xmm12
   3178 	movdqu	0 + 0(%rsi),%xmm3
   3179 	movdqu	16 + 0(%rsi),%xmm7
   3180 	movdqu	32 + 0(%rsi),%xmm11
   3181 	movdqu	48 + 0(%rsi),%xmm15
   3182 	pxor	%xmm3,%xmm1
   3183 	pxor	%xmm7,%xmm5
   3184 	pxor	%xmm11,%xmm9
   3185 	pxor	%xmm13,%xmm15
   3186 	movdqu	%xmm1,0 + 0(%rdi)
   3187 	movdqu	%xmm5,16 + 0(%rdi)
   3188 	movdqu	%xmm9,32 + 0(%rdi)
   3189 	movdqu	%xmm15,48 + 0(%rdi)
   3190 
   3191 	movq	$64,%rcx
   3192 	subq	$64,%rbx
   3193 	leaq	64(%rsi),%rsi
   3194 	jmp	seal_sse_128_seal_hash
   3195 3:
   3196 
   3197 seal_sse_tail_192:
   3198 	movdqa	.chacha20_consts(%rip),%xmm0
   3199 	movdqa	48(%rbp),%xmm4
   3200 	movdqa	64(%rbp),%xmm8
   3201 	movdqa	%xmm0,%xmm1
   3202 	movdqa	%xmm4,%xmm5
   3203 	movdqa	%xmm8,%xmm9
   3204 	movdqa	%xmm0,%xmm2
   3205 	movdqa	%xmm4,%xmm6
   3206 	movdqa	%xmm8,%xmm10
   3207 	movdqa	96(%rbp),%xmm14
   3208 	paddd	.sse_inc(%rip),%xmm14
   3209 	movdqa	%xmm14,%xmm13
   3210 	paddd	.sse_inc(%rip),%xmm13
   3211 	movdqa	%xmm13,%xmm12
   3212 	paddd	.sse_inc(%rip),%xmm12
   3213 	movdqa	%xmm12,96(%rbp)
   3214 	movdqa	%xmm13,112(%rbp)
   3215 	movdqa	%xmm14,128(%rbp)
   3216 
   3217 1:
   3218 	addq	0(%rdi),%r10
   3219 	adcq	8+0(%rdi),%r11
   3220 	adcq	$1,%r12
   3221 	movq	0+0(%rbp),%rax
   3222 	movq	%rax,%r15
   3223 	mulq	%r10
   3224 	movq	%rax,%r13
   3225 	movq	%rdx,%r14
   3226 	movq	0+0(%rbp),%rax
   3227 	mulq	%r11
   3228 	imulq	%r12,%r15
   3229 	addq	%rax,%r14
   3230 	adcq	%rdx,%r15
   3231 	movq	8+0(%rbp),%rax
   3232 	movq	%rax,%r9
   3233 	mulq	%r10
   3234 	addq	%rax,%r14
   3235 	adcq	$0,%rdx
   3236 	movq	%rdx,%r10
   3237 	movq	8+0(%rbp),%rax
   3238 	mulq	%r11
   3239 	addq	%rax,%r15
   3240 	adcq	$0,%rdx
   3241 	imulq	%r12,%r9
   3242 	addq	%r10,%r15
   3243 	adcq	%rdx,%r9
   3244 	movq	%r13,%r10
   3245 	movq	%r14,%r11
   3246 	movq	%r15,%r12
   3247 	andq	$3,%r12
   3248 	movq	%r15,%r13
   3249 	andq	$-4,%r13
   3250 	movq	%r9,%r14
   3251 	shrdq	$2,%r9,%r15
   3252 	shrq	$2,%r9
   3253 	addq	%r13,%r10
   3254 	adcq	%r14,%r11
   3255 	adcq	$0,%r12
   3256 	addq	%r15,%r10
   3257 	adcq	%r9,%r11
   3258 	adcq	$0,%r12
   3259 
   3260 	leaq	16(%rdi),%rdi
   3261 2:
   3262 	paddd	%xmm4,%xmm0
   3263 	pxor	%xmm0,%xmm12
   3264 	pshufb	.rol16(%rip),%xmm12
   3265 	paddd	%xmm12,%xmm8
   3266 	pxor	%xmm8,%xmm4
   3267 	movdqa	%xmm4,%xmm3
   3268 	pslld	$12,%xmm3
   3269 	psrld	$20,%xmm4
   3270 	pxor	%xmm3,%xmm4
   3271 	paddd	%xmm4,%xmm0
   3272 	pxor	%xmm0,%xmm12
   3273 	pshufb	.rol8(%rip),%xmm12
   3274 	paddd	%xmm12,%xmm8
   3275 	pxor	%xmm8,%xmm4
   3276 	movdqa	%xmm4,%xmm3
   3277 	pslld	$7,%xmm3
   3278 	psrld	$25,%xmm4
   3279 	pxor	%xmm3,%xmm4
   3280 .byte	102,15,58,15,228,4
   3281 .byte	102,69,15,58,15,192,8
   3282 .byte	102,69,15,58,15,228,12
   3283 	paddd	%xmm5,%xmm1
   3284 	pxor	%xmm1,%xmm13
   3285 	pshufb	.rol16(%rip),%xmm13
   3286 	paddd	%xmm13,%xmm9
   3287 	pxor	%xmm9,%xmm5
   3288 	movdqa	%xmm5,%xmm3
   3289 	pslld	$12,%xmm3
   3290 	psrld	$20,%xmm5
   3291 	pxor	%xmm3,%xmm5
   3292 	paddd	%xmm5,%xmm1
   3293 	pxor	%xmm1,%xmm13
   3294 	pshufb	.rol8(%rip),%xmm13
   3295 	paddd	%xmm13,%xmm9
   3296 	pxor	%xmm9,%xmm5
   3297 	movdqa	%xmm5,%xmm3
   3298 	pslld	$7,%xmm3
   3299 	psrld	$25,%xmm5
   3300 	pxor	%xmm3,%xmm5
   3301 .byte	102,15,58,15,237,4
   3302 .byte	102,69,15,58,15,201,8
   3303 .byte	102,69,15,58,15,237,12
   3304 	paddd	%xmm6,%xmm2
   3305 	pxor	%xmm2,%xmm14
   3306 	pshufb	.rol16(%rip),%xmm14
   3307 	paddd	%xmm14,%xmm10
   3308 	pxor	%xmm10,%xmm6
   3309 	movdqa	%xmm6,%xmm3
   3310 	pslld	$12,%xmm3
   3311 	psrld	$20,%xmm6
   3312 	pxor	%xmm3,%xmm6
   3313 	paddd	%xmm6,%xmm2
   3314 	pxor	%xmm2,%xmm14
   3315 	pshufb	.rol8(%rip),%xmm14
   3316 	paddd	%xmm14,%xmm10
   3317 	pxor	%xmm10,%xmm6
   3318 	movdqa	%xmm6,%xmm3
   3319 	pslld	$7,%xmm3
   3320 	psrld	$25,%xmm6
   3321 	pxor	%xmm3,%xmm6
   3322 .byte	102,15,58,15,246,4
   3323 .byte	102,69,15,58,15,210,8
   3324 .byte	102,69,15,58,15,246,12
   3325 	addq	0(%rdi),%r10
   3326 	adcq	8+0(%rdi),%r11
   3327 	adcq	$1,%r12
   3328 	movq	0+0(%rbp),%rax
   3329 	movq	%rax,%r15
   3330 	mulq	%r10
   3331 	movq	%rax,%r13
   3332 	movq	%rdx,%r14
   3333 	movq	0+0(%rbp),%rax
   3334 	mulq	%r11
   3335 	imulq	%r12,%r15
   3336 	addq	%rax,%r14
   3337 	adcq	%rdx,%r15
   3338 	movq	8+0(%rbp),%rax
   3339 	movq	%rax,%r9
   3340 	mulq	%r10
   3341 	addq	%rax,%r14
   3342 	adcq	$0,%rdx
   3343 	movq	%rdx,%r10
   3344 	movq	8+0(%rbp),%rax
   3345 	mulq	%r11
   3346 	addq	%rax,%r15
   3347 	adcq	$0,%rdx
   3348 	imulq	%r12,%r9
   3349 	addq	%r10,%r15
   3350 	adcq	%rdx,%r9
   3351 	movq	%r13,%r10
   3352 	movq	%r14,%r11
   3353 	movq	%r15,%r12
   3354 	andq	$3,%r12
   3355 	movq	%r15,%r13
   3356 	andq	$-4,%r13
   3357 	movq	%r9,%r14
   3358 	shrdq	$2,%r9,%r15
   3359 	shrq	$2,%r9
   3360 	addq	%r13,%r10
   3361 	adcq	%r14,%r11
   3362 	adcq	$0,%r12
   3363 	addq	%r15,%r10
   3364 	adcq	%r9,%r11
   3365 	adcq	$0,%r12
   3366 	paddd	%xmm4,%xmm0
   3367 	pxor	%xmm0,%xmm12
   3368 	pshufb	.rol16(%rip),%xmm12
   3369 	paddd	%xmm12,%xmm8
   3370 	pxor	%xmm8,%xmm4
   3371 	movdqa	%xmm4,%xmm3
   3372 	pslld	$12,%xmm3
   3373 	psrld	$20,%xmm4
   3374 	pxor	%xmm3,%xmm4
   3375 	paddd	%xmm4,%xmm0
   3376 	pxor	%xmm0,%xmm12
   3377 	pshufb	.rol8(%rip),%xmm12
   3378 	paddd	%xmm12,%xmm8
   3379 	pxor	%xmm8,%xmm4
   3380 	movdqa	%xmm4,%xmm3
   3381 	pslld	$7,%xmm3
   3382 	psrld	$25,%xmm4
   3383 	pxor	%xmm3,%xmm4
   3384 .byte	102,15,58,15,228,12
   3385 .byte	102,69,15,58,15,192,8
   3386 .byte	102,69,15,58,15,228,4
   3387 	paddd	%xmm5,%xmm1
   3388 	pxor	%xmm1,%xmm13
   3389 	pshufb	.rol16(%rip),%xmm13
   3390 	paddd	%xmm13,%xmm9
   3391 	pxor	%xmm9,%xmm5
   3392 	movdqa	%xmm5,%xmm3
   3393 	pslld	$12,%xmm3
   3394 	psrld	$20,%xmm5
   3395 	pxor	%xmm3,%xmm5
   3396 	paddd	%xmm5,%xmm1
   3397 	pxor	%xmm1,%xmm13
   3398 	pshufb	.rol8(%rip),%xmm13
   3399 	paddd	%xmm13,%xmm9
   3400 	pxor	%xmm9,%xmm5
   3401 	movdqa	%xmm5,%xmm3
   3402 	pslld	$7,%xmm3
   3403 	psrld	$25,%xmm5
   3404 	pxor	%xmm3,%xmm5
   3405 .byte	102,15,58,15,237,12
   3406 .byte	102,69,15,58,15,201,8
   3407 .byte	102,69,15,58,15,237,4
   3408 	paddd	%xmm6,%xmm2
   3409 	pxor	%xmm2,%xmm14
   3410 	pshufb	.rol16(%rip),%xmm14
   3411 	paddd	%xmm14,%xmm10
   3412 	pxor	%xmm10,%xmm6
   3413 	movdqa	%xmm6,%xmm3
   3414 	pslld	$12,%xmm3
   3415 	psrld	$20,%xmm6
   3416 	pxor	%xmm3,%xmm6
   3417 	paddd	%xmm6,%xmm2
   3418 	pxor	%xmm2,%xmm14
   3419 	pshufb	.rol8(%rip),%xmm14
   3420 	paddd	%xmm14,%xmm10
   3421 	pxor	%xmm10,%xmm6
   3422 	movdqa	%xmm6,%xmm3
   3423 	pslld	$7,%xmm3
   3424 	psrld	$25,%xmm6
   3425 	pxor	%xmm3,%xmm6
   3426 .byte	102,15,58,15,246,12
   3427 .byte	102,69,15,58,15,210,8
   3428 .byte	102,69,15,58,15,246,4
   3429 
   3430 	leaq	16(%rdi),%rdi
   3431 	decq	%rcx
   3432 	jg	1b
   3433 	decq	%r8
   3434 	jge	2b
   3435 	paddd	.chacha20_consts(%rip),%xmm2
   3436 	paddd	48(%rbp),%xmm6
   3437 	paddd	64(%rbp),%xmm10
   3438 	paddd	128(%rbp),%xmm14
   3439 	paddd	.chacha20_consts(%rip),%xmm1
   3440 	paddd	48(%rbp),%xmm5
   3441 	paddd	64(%rbp),%xmm9
   3442 	paddd	112(%rbp),%xmm13
   3443 	paddd	.chacha20_consts(%rip),%xmm0
   3444 	paddd	48(%rbp),%xmm4
   3445 	paddd	64(%rbp),%xmm8
   3446 	paddd	96(%rbp),%xmm12
   3447 	movdqu	0 + 0(%rsi),%xmm3
   3448 	movdqu	16 + 0(%rsi),%xmm7
   3449 	movdqu	32 + 0(%rsi),%xmm11
   3450 	movdqu	48 + 0(%rsi),%xmm15
   3451 	pxor	%xmm3,%xmm2
   3452 	pxor	%xmm7,%xmm6
   3453 	pxor	%xmm11,%xmm10
   3454 	pxor	%xmm14,%xmm15
   3455 	movdqu	%xmm2,0 + 0(%rdi)
   3456 	movdqu	%xmm6,16 + 0(%rdi)
   3457 	movdqu	%xmm10,32 + 0(%rdi)
   3458 	movdqu	%xmm15,48 + 0(%rdi)
   3459 	movdqu	0 + 64(%rsi),%xmm3
   3460 	movdqu	16 + 64(%rsi),%xmm7
   3461 	movdqu	32 + 64(%rsi),%xmm11
   3462 	movdqu	48 + 64(%rsi),%xmm15
   3463 	pxor	%xmm3,%xmm1
   3464 	pxor	%xmm7,%xmm5
   3465 	pxor	%xmm11,%xmm9
   3466 	pxor	%xmm13,%xmm15
   3467 	movdqu	%xmm1,0 + 64(%rdi)
   3468 	movdqu	%xmm5,16 + 64(%rdi)
   3469 	movdqu	%xmm9,32 + 64(%rdi)
   3470 	movdqu	%xmm15,48 + 64(%rdi)
   3471 
   3472 	movq	$128,%rcx
   3473 	subq	$128,%rbx
   3474 	leaq	128(%rsi),%rsi
   3475 
   3476 seal_sse_128_seal_hash:
   3477 	cmpq	$16,%rcx
   3478 	jb	seal_sse_128_seal
   3479 	addq	0(%rdi),%r10
   3480 	adcq	8+0(%rdi),%r11
   3481 	adcq	$1,%r12
   3482 	movq	0+0(%rbp),%rax
   3483 	movq	%rax,%r15
   3484 	mulq	%r10
   3485 	movq	%rax,%r13
   3486 	movq	%rdx,%r14
   3487 	movq	0+0(%rbp),%rax
   3488 	mulq	%r11
   3489 	imulq	%r12,%r15
   3490 	addq	%rax,%r14
   3491 	adcq	%rdx,%r15
   3492 	movq	8+0(%rbp),%rax
   3493 	movq	%rax,%r9
   3494 	mulq	%r10
   3495 	addq	%rax,%r14
   3496 	adcq	$0,%rdx
   3497 	movq	%rdx,%r10
   3498 	movq	8+0(%rbp),%rax
   3499 	mulq	%r11
   3500 	addq	%rax,%r15
   3501 	adcq	$0,%rdx
   3502 	imulq	%r12,%r9
   3503 	addq	%r10,%r15
   3504 	adcq	%rdx,%r9
   3505 	movq	%r13,%r10
   3506 	movq	%r14,%r11
   3507 	movq	%r15,%r12
   3508 	andq	$3,%r12
   3509 	movq	%r15,%r13
   3510 	andq	$-4,%r13
   3511 	movq	%r9,%r14
   3512 	shrdq	$2,%r9,%r15
   3513 	shrq	$2,%r9
   3514 	addq	%r13,%r10
   3515 	adcq	%r14,%r11
   3516 	adcq	$0,%r12
   3517 	addq	%r15,%r10
   3518 	adcq	%r9,%r11
   3519 	adcq	$0,%r12
   3520 
   3521 	subq	$16,%rcx
   3522 	leaq	16(%rdi),%rdi
   3523 	jmp	seal_sse_128_seal_hash
   3524 
   3525 seal_sse_128_seal:
   3526 	cmpq	$16,%rbx
   3527 	jb	seal_sse_tail_16
   3528 	subq	$16,%rbx
   3529 
   3530 	movdqu	0(%rsi),%xmm3
   3531 	pxor	%xmm3,%xmm0
   3532 	movdqu	%xmm0,0(%rdi)
   3533 
   3534 	addq	0(%rdi),%r10
   3535 	adcq	8(%rdi),%r11
   3536 	adcq	$1,%r12
   3537 	leaq	16(%rsi),%rsi
   3538 	leaq	16(%rdi),%rdi
   3539 	movq	0+0(%rbp),%rax
   3540 	movq	%rax,%r15
   3541 	mulq	%r10
   3542 	movq	%rax,%r13
   3543 	movq	%rdx,%r14
   3544 	movq	0+0(%rbp),%rax
   3545 	mulq	%r11
   3546 	imulq	%r12,%r15
   3547 	addq	%rax,%r14
   3548 	adcq	%rdx,%r15
   3549 	movq	8+0(%rbp),%rax
   3550 	movq	%rax,%r9
   3551 	mulq	%r10
   3552 	addq	%rax,%r14
   3553 	adcq	$0,%rdx
   3554 	movq	%rdx,%r10
   3555 	movq	8+0(%rbp),%rax
   3556 	mulq	%r11
   3557 	addq	%rax,%r15
   3558 	adcq	$0,%rdx
   3559 	imulq	%r12,%r9
   3560 	addq	%r10,%r15
   3561 	adcq	%rdx,%r9
   3562 	movq	%r13,%r10
   3563 	movq	%r14,%r11
   3564 	movq	%r15,%r12
   3565 	andq	$3,%r12
   3566 	movq	%r15,%r13
   3567 	andq	$-4,%r13
   3568 	movq	%r9,%r14
   3569 	shrdq	$2,%r9,%r15
   3570 	shrq	$2,%r9
   3571 	addq	%r13,%r10
   3572 	adcq	%r14,%r11
   3573 	adcq	$0,%r12
   3574 	addq	%r15,%r10
   3575 	adcq	%r9,%r11
   3576 	adcq	$0,%r12
   3577 
   3578 
   3579 	movdqa	%xmm4,%xmm0
   3580 	movdqa	%xmm8,%xmm4
   3581 	movdqa	%xmm12,%xmm8
   3582 	movdqa	%xmm1,%xmm12
   3583 	movdqa	%xmm5,%xmm1
   3584 	movdqa	%xmm9,%xmm5
   3585 	movdqa	%xmm13,%xmm9
   3586 	jmp	seal_sse_128_seal
   3587 
   3588 seal_sse_tail_16:
   3589 	testq	%rbx,%rbx
   3590 	jz	seal_sse_finalize
   3591 
   3592 	movq	%rbx,%r8
   3593 	shlq	$4,%r8
   3594 	leaq	.and_masks(%rip),%r13
   3595 	movq	%rbx,%rcx
   3596 	leaq	-1(%rsi,%rbx), %rsi
   3597 	pxor	%xmm15,%xmm15
   3598 1:
   3599 	pslldq	$1,%xmm15
   3600 	pinsrb	$0,(%rsi),%xmm15
   3601 	leaq	-1(%rsi),%rsi
   3602 	decq	%rcx
   3603 	jne	1b
   3604 
   3605 
   3606 	pxor	%xmm0,%xmm15
   3607 
   3608 
   3609 	movq	%rbx,%rcx
   3610 	movdqu	%xmm15,%xmm0
   3611 2:
   3612 	pextrb	$0,%xmm0,(%rdi)
   3613 	psrldq	$1,%xmm0
   3614 	addq	$1,%rdi
   3615 	subq	$1,%rcx
   3616 	jnz	2b
   3617 
   3618 	pand	-16(%r13,%r8), %xmm15
   3619 .byte	102,77,15,126,253
   3620 	pextrq	$1,%xmm15,%r14
   3621 	addq	%r13,%r10
   3622 	adcq	%r14,%r11
   3623 	adcq	$1,%r12
   3624 	movq	0+0(%rbp),%rax
   3625 	movq	%rax,%r15
   3626 	mulq	%r10
   3627 	movq	%rax,%r13
   3628 	movq	%rdx,%r14
   3629 	movq	0+0(%rbp),%rax
   3630 	mulq	%r11
   3631 	imulq	%r12,%r15
   3632 	addq	%rax,%r14
   3633 	adcq	%rdx,%r15
   3634 	movq	8+0(%rbp),%rax
   3635 	movq	%rax,%r9
   3636 	mulq	%r10
   3637 	addq	%rax,%r14
   3638 	adcq	$0,%rdx
   3639 	movq	%rdx,%r10
   3640 	movq	8+0(%rbp),%rax
   3641 	mulq	%r11
   3642 	addq	%rax,%r15
   3643 	adcq	$0,%rdx
   3644 	imulq	%r12,%r9
   3645 	addq	%r10,%r15
   3646 	adcq	%rdx,%r9
   3647 	movq	%r13,%r10
   3648 	movq	%r14,%r11
   3649 	movq	%r15,%r12
   3650 	andq	$3,%r12
   3651 	movq	%r15,%r13
   3652 	andq	$-4,%r13
   3653 	movq	%r9,%r14
   3654 	shrdq	$2,%r9,%r15
   3655 	shrq	$2,%r9
   3656 	addq	%r13,%r10
   3657 	adcq	%r14,%r11
   3658 	adcq	$0,%r12
   3659 	addq	%r15,%r10
   3660 	adcq	%r9,%r11
   3661 	adcq	$0,%r12
   3662 
   3663 seal_sse_finalize:
   3664 	addq	32(%rbp),%r10
   3665 	adcq	8+32(%rbp),%r11
   3666 	adcq	$1,%r12
   3667 	movq	0+0(%rbp),%rax
   3668 	movq	%rax,%r15
   3669 	mulq	%r10
   3670 	movq	%rax,%r13
   3671 	movq	%rdx,%r14
   3672 	movq	0+0(%rbp),%rax
   3673 	mulq	%r11
   3674 	imulq	%r12,%r15
   3675 	addq	%rax,%r14
   3676 	adcq	%rdx,%r15
   3677 	movq	8+0(%rbp),%rax
   3678 	movq	%rax,%r9
   3679 	mulq	%r10
   3680 	addq	%rax,%r14
   3681 	adcq	$0,%rdx
   3682 	movq	%rdx,%r10
   3683 	movq	8+0(%rbp),%rax
   3684 	mulq	%r11
   3685 	addq	%rax,%r15
   3686 	adcq	$0,%rdx
   3687 	imulq	%r12,%r9
   3688 	addq	%r10,%r15
   3689 	adcq	%rdx,%r9
   3690 	movq	%r13,%r10
   3691 	movq	%r14,%r11
   3692 	movq	%r15,%r12
   3693 	andq	$3,%r12
   3694 	movq	%r15,%r13
   3695 	andq	$-4,%r13
   3696 	movq	%r9,%r14
   3697 	shrdq	$2,%r9,%r15
   3698 	shrq	$2,%r9
   3699 	addq	%r13,%r10
   3700 	adcq	%r14,%r11
   3701 	adcq	$0,%r12
   3702 	addq	%r15,%r10
   3703 	adcq	%r9,%r11
   3704 	adcq	$0,%r12
   3705 
   3706 
   3707 	movq	%r10,%r13
   3708 	movq	%r11,%r14
   3709 	movq	%r12,%r15
   3710 	subq	$-5,%r10
   3711 	sbbq	$-1,%r11
   3712 	sbbq	$3,%r12
   3713 	cmovcq	%r13,%r10
   3714 	cmovcq	%r14,%r11
   3715 	cmovcq	%r15,%r12
   3716 
   3717 	addq	0+16(%rbp),%r10
   3718 	adcq	8+16(%rbp),%r11
   3719 
   3720 	addq	$288 + 32,%rsp
   3721 .cfi_adjust_cfa_offset	-(288 + 32)
   3722 	popq	%r9
   3723 .cfi_adjust_cfa_offset	-8
   3724 	movq	%r10,0(%r9)
   3725 	movq	%r11,8(%r9)
   3726 
   3727 	popq	%r15
   3728 .cfi_adjust_cfa_offset	-8
   3729 	popq	%r14
   3730 .cfi_adjust_cfa_offset	-8
   3731 	popq	%r13
   3732 .cfi_adjust_cfa_offset	-8
   3733 	popq	%r12
   3734 .cfi_adjust_cfa_offset	-8
   3735 	popq	%rbx
   3736 .cfi_adjust_cfa_offset	-8
   3737 	popq	%rbp
   3738 .cfi_adjust_cfa_offset	-8
   3739 	.byte	0xf3,0xc3
   3740 .cfi_adjust_cfa_offset	(8 * 6) + 288 + 32
   3741 
   3742 seal_sse_128:
   3743 	movdqu	.chacha20_consts(%rip),%xmm0
   3744 	movdqa	%xmm0,%xmm1
   3745 	movdqa	%xmm0,%xmm2
   3746 	movdqu	0(%r9),%xmm4
   3747 	movdqa	%xmm4,%xmm5
   3748 	movdqa	%xmm4,%xmm6
   3749 	movdqu	16(%r9),%xmm8
   3750 	movdqa	%xmm8,%xmm9
   3751 	movdqa	%xmm8,%xmm10
   3752 	movdqu	32(%r9),%xmm14
   3753 	movdqa	%xmm14,%xmm12
   3754 	paddd	.sse_inc(%rip),%xmm12
   3755 	movdqa	%xmm12,%xmm13
   3756 	paddd	.sse_inc(%rip),%xmm13
   3757 	movdqa	%xmm4,%xmm7
   3758 	movdqa	%xmm8,%xmm11
   3759 	movdqa	%xmm12,%xmm15
   3760 	movq	$10,%r10
   3761 1:
   3762 	paddd	%xmm4,%xmm0
   3763 	pxor	%xmm0,%xmm12
   3764 	pshufb	.rol16(%rip),%xmm12
   3765 	paddd	%xmm12,%xmm8
   3766 	pxor	%xmm8,%xmm4
   3767 	movdqa	%xmm4,%xmm3
   3768 	pslld	$12,%xmm3
   3769 	psrld	$20,%xmm4
   3770 	pxor	%xmm3,%xmm4
   3771 	paddd	%xmm4,%xmm0
   3772 	pxor	%xmm0,%xmm12
   3773 	pshufb	.rol8(%rip),%xmm12
   3774 	paddd	%xmm12,%xmm8
   3775 	pxor	%xmm8,%xmm4
   3776 	movdqa	%xmm4,%xmm3
   3777 	pslld	$7,%xmm3
   3778 	psrld	$25,%xmm4
   3779 	pxor	%xmm3,%xmm4
   3780 .byte	102,15,58,15,228,4
   3781 .byte	102,69,15,58,15,192,8
   3782 .byte	102,69,15,58,15,228,12
   3783 	paddd	%xmm5,%xmm1
   3784 	pxor	%xmm1,%xmm13
   3785 	pshufb	.rol16(%rip),%xmm13
   3786 	paddd	%xmm13,%xmm9
   3787 	pxor	%xmm9,%xmm5
   3788 	movdqa	%xmm5,%xmm3
   3789 	pslld	$12,%xmm3
   3790 	psrld	$20,%xmm5
   3791 	pxor	%xmm3,%xmm5
   3792 	paddd	%xmm5,%xmm1
   3793 	pxor	%xmm1,%xmm13
   3794 	pshufb	.rol8(%rip),%xmm13
   3795 	paddd	%xmm13,%xmm9
   3796 	pxor	%xmm9,%xmm5
   3797 	movdqa	%xmm5,%xmm3
   3798 	pslld	$7,%xmm3
   3799 	psrld	$25,%xmm5
   3800 	pxor	%xmm3,%xmm5
   3801 .byte	102,15,58,15,237,4
   3802 .byte	102,69,15,58,15,201,8
   3803 .byte	102,69,15,58,15,237,12
   3804 	paddd	%xmm6,%xmm2
   3805 	pxor	%xmm2,%xmm14
   3806 	pshufb	.rol16(%rip),%xmm14
   3807 	paddd	%xmm14,%xmm10
   3808 	pxor	%xmm10,%xmm6
   3809 	movdqa	%xmm6,%xmm3
   3810 	pslld	$12,%xmm3
   3811 	psrld	$20,%xmm6
   3812 	pxor	%xmm3,%xmm6
   3813 	paddd	%xmm6,%xmm2
   3814 	pxor	%xmm2,%xmm14
   3815 	pshufb	.rol8(%rip),%xmm14
   3816 	paddd	%xmm14,%xmm10
   3817 	pxor	%xmm10,%xmm6
   3818 	movdqa	%xmm6,%xmm3
   3819 	pslld	$7,%xmm3
   3820 	psrld	$25,%xmm6
   3821 	pxor	%xmm3,%xmm6
   3822 .byte	102,15,58,15,246,4
   3823 .byte	102,69,15,58,15,210,8
   3824 .byte	102,69,15,58,15,246,12
   3825 	paddd	%xmm4,%xmm0
   3826 	pxor	%xmm0,%xmm12
   3827 	pshufb	.rol16(%rip),%xmm12
   3828 	paddd	%xmm12,%xmm8
   3829 	pxor	%xmm8,%xmm4
   3830 	movdqa	%xmm4,%xmm3
   3831 	pslld	$12,%xmm3
   3832 	psrld	$20,%xmm4
   3833 	pxor	%xmm3,%xmm4
   3834 	paddd	%xmm4,%xmm0
   3835 	pxor	%xmm0,%xmm12
   3836 	pshufb	.rol8(%rip),%xmm12
   3837 	paddd	%xmm12,%xmm8
   3838 	pxor	%xmm8,%xmm4
   3839 	movdqa	%xmm4,%xmm3
   3840 	pslld	$7,%xmm3
   3841 	psrld	$25,%xmm4
   3842 	pxor	%xmm3,%xmm4
   3843 .byte	102,15,58,15,228,12
   3844 .byte	102,69,15,58,15,192,8
   3845 .byte	102,69,15,58,15,228,4
   3846 	paddd	%xmm5,%xmm1
   3847 	pxor	%xmm1,%xmm13
   3848 	pshufb	.rol16(%rip),%xmm13
   3849 	paddd	%xmm13,%xmm9
   3850 	pxor	%xmm9,%xmm5
   3851 	movdqa	%xmm5,%xmm3
   3852 	pslld	$12,%xmm3
   3853 	psrld	$20,%xmm5
   3854 	pxor	%xmm3,%xmm5
   3855 	paddd	%xmm5,%xmm1
   3856 	pxor	%xmm1,%xmm13
   3857 	pshufb	.rol8(%rip),%xmm13
   3858 	paddd	%xmm13,%xmm9
   3859 	pxor	%xmm9,%xmm5
   3860 	movdqa	%xmm5,%xmm3
   3861 	pslld	$7,%xmm3
   3862 	psrld	$25,%xmm5
   3863 	pxor	%xmm3,%xmm5
   3864 .byte	102,15,58,15,237,12
   3865 .byte	102,69,15,58,15,201,8
   3866 .byte	102,69,15,58,15,237,4
   3867 	paddd	%xmm6,%xmm2
   3868 	pxor	%xmm2,%xmm14
   3869 	pshufb	.rol16(%rip),%xmm14
   3870 	paddd	%xmm14,%xmm10
   3871 	pxor	%xmm10,%xmm6
   3872 	movdqa	%xmm6,%xmm3
   3873 	pslld	$12,%xmm3
   3874 	psrld	$20,%xmm6
   3875 	pxor	%xmm3,%xmm6
   3876 	paddd	%xmm6,%xmm2
   3877 	pxor	%xmm2,%xmm14
   3878 	pshufb	.rol8(%rip),%xmm14
   3879 	paddd	%xmm14,%xmm10
   3880 	pxor	%xmm10,%xmm6
   3881 	movdqa	%xmm6,%xmm3
   3882 	pslld	$7,%xmm3
   3883 	psrld	$25,%xmm6
   3884 	pxor	%xmm3,%xmm6
   3885 .byte	102,15,58,15,246,12
   3886 .byte	102,69,15,58,15,210,8
   3887 .byte	102,69,15,58,15,246,4
   3888 
   3889 	decq	%r10
   3890 	jnz	1b
   3891 	paddd	.chacha20_consts(%rip),%xmm0
   3892 	paddd	.chacha20_consts(%rip),%xmm1
   3893 	paddd	.chacha20_consts(%rip),%xmm2
   3894 	paddd	%xmm7,%xmm4
   3895 	paddd	%xmm7,%xmm5
   3896 	paddd	%xmm7,%xmm6
   3897 	paddd	%xmm11,%xmm8
   3898 	paddd	%xmm11,%xmm9
   3899 	paddd	%xmm15,%xmm12
   3900 	paddd	.sse_inc(%rip),%xmm15
   3901 	paddd	%xmm15,%xmm13
   3902 
   3903 	pand	.clamp(%rip),%xmm2
   3904 	movdqa	%xmm2,0(%rbp)
   3905 	movdqa	%xmm6,16(%rbp)
   3906 
   3907 	movq	%r8,%r8
   3908 	call	poly_hash_ad_internal
   3909 	jmp	seal_sse_128_seal
   3910 .size	chacha20_poly1305_seal, .-chacha20_poly1305_seal
   3911 
   3912 
   3913 .type	chacha20_poly1305_open_avx2,@function
   3914 .align	64
   3915 chacha20_poly1305_open_avx2:
   3916 	vzeroupper
   3917 	vmovdqa	.chacha20_consts(%rip),%ymm0
   3918 	vbroadcasti128	0(%r9),%ymm4
   3919 	vbroadcasti128	16(%r9),%ymm8
   3920 	vbroadcasti128	32(%r9),%ymm12
   3921 	vpaddd	.avx2_init(%rip),%ymm12,%ymm12
   3922 	cmpq	$192,%rbx
   3923 	jbe	open_avx2_192
   3924 	cmpq	$320,%rbx
   3925 	jbe	open_avx2_320
   3926 
   3927 	vmovdqa	%ymm4,64(%rbp)
   3928 	vmovdqa	%ymm8,96(%rbp)
   3929 	vmovdqa	%ymm12,160(%rbp)
   3930 	movq	$10,%r10
   3931 1:
   3932 	vpaddd	%ymm4,%ymm0,%ymm0
   3933 	vpxor	%ymm0,%ymm12,%ymm12
   3934 	vpshufb	.rol16(%rip),%ymm12,%ymm12
   3935 	vpaddd	%ymm12,%ymm8,%ymm8
   3936 	vpxor	%ymm8,%ymm4,%ymm4
   3937 	vpsrld	$20,%ymm4,%ymm3
   3938 	vpslld	$12,%ymm4,%ymm4
   3939 	vpxor	%ymm3,%ymm4,%ymm4
   3940 	vpaddd	%ymm4,%ymm0,%ymm0
   3941 	vpxor	%ymm0,%ymm12,%ymm12
   3942 	vpshufb	.rol8(%rip),%ymm12,%ymm12
   3943 	vpaddd	%ymm12,%ymm8,%ymm8
   3944 	vpxor	%ymm8,%ymm4,%ymm4
   3945 	vpslld	$7,%ymm4,%ymm3
   3946 	vpsrld	$25,%ymm4,%ymm4
   3947 	vpxor	%ymm3,%ymm4,%ymm4
   3948 	vpalignr	$12,%ymm12,%ymm12,%ymm12
   3949 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   3950 	vpalignr	$4,%ymm4,%ymm4,%ymm4
   3951 	vpaddd	%ymm4,%ymm0,%ymm0
   3952 	vpxor	%ymm0,%ymm12,%ymm12
   3953 	vpshufb	.rol16(%rip),%ymm12,%ymm12
   3954 	vpaddd	%ymm12,%ymm8,%ymm8
   3955 	vpxor	%ymm8,%ymm4,%ymm4
   3956 	vpsrld	$20,%ymm4,%ymm3
   3957 	vpslld	$12,%ymm4,%ymm4
   3958 	vpxor	%ymm3,%ymm4,%ymm4
   3959 	vpaddd	%ymm4,%ymm0,%ymm0
   3960 	vpxor	%ymm0,%ymm12,%ymm12
   3961 	vpshufb	.rol8(%rip),%ymm12,%ymm12
   3962 	vpaddd	%ymm12,%ymm8,%ymm8
   3963 	vpxor	%ymm8,%ymm4,%ymm4
   3964 	vpslld	$7,%ymm4,%ymm3
   3965 	vpsrld	$25,%ymm4,%ymm4
   3966 	vpxor	%ymm3,%ymm4,%ymm4
   3967 	vpalignr	$4,%ymm12,%ymm12,%ymm12
   3968 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   3969 	vpalignr	$12,%ymm4,%ymm4,%ymm4
   3970 
   3971 	decq	%r10
   3972 	jne	1b
   3973 	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
   3974 	vpaddd	64(%rbp),%ymm4,%ymm4
   3975 	vpaddd	96(%rbp),%ymm8,%ymm8
   3976 	vpaddd	160(%rbp),%ymm12,%ymm12
   3977 
   3978 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
   3979 
   3980 	vpand	.clamp(%rip),%ymm3,%ymm3
   3981 	vmovdqa	%ymm3,0(%rbp)
   3982 
   3983 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0
   3984 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4
   3985 
   3986 	movq	%r8,%r8
   3987 	call	poly_hash_ad_internal
   3988 	xorq	%rcx,%rcx
   3989 
   3990 1:
   3991 	addq	0(%rsi,%rcx), %r10
   3992 	adcq	8+0(%rsi,%rcx), %r11
   3993 	adcq	$1,%r12
   3994 	movq	0+0(%rbp),%rax
   3995 	movq	%rax,%r15
   3996 	mulq	%r10
   3997 	movq	%rax,%r13
   3998 	movq	%rdx,%r14
   3999 	movq	0+0(%rbp),%rax
   4000 	mulq	%r11
   4001 	imulq	%r12,%r15
   4002 	addq	%rax,%r14
   4003 	adcq	%rdx,%r15
   4004 	movq	8+0(%rbp),%rax
   4005 	movq	%rax,%r9
   4006 	mulq	%r10
   4007 	addq	%rax,%r14
   4008 	adcq	$0,%rdx
   4009 	movq	%rdx,%r10
   4010 	movq	8+0(%rbp),%rax
   4011 	mulq	%r11
   4012 	addq	%rax,%r15
   4013 	adcq	$0,%rdx
   4014 	imulq	%r12,%r9
   4015 	addq	%r10,%r15
   4016 	adcq	%rdx,%r9
   4017 	movq	%r13,%r10
   4018 	movq	%r14,%r11
   4019 	movq	%r15,%r12
   4020 	andq	$3,%r12
   4021 	movq	%r15,%r13
   4022 	andq	$-4,%r13
   4023 	movq	%r9,%r14
   4024 	shrdq	$2,%r9,%r15
   4025 	shrq	$2,%r9
   4026 	addq	%r13,%r10
   4027 	adcq	%r14,%r11
   4028 	adcq	$0,%r12
   4029 	addq	%r15,%r10
   4030 	adcq	%r9,%r11
   4031 	adcq	$0,%r12
   4032 
   4033 	addq	$16,%rcx
   4034 	cmpq	$64,%rcx
   4035 	jne	1b
   4036 
   4037 	vpxor	0(%rsi),%ymm0,%ymm0
   4038 	vpxor	32(%rsi),%ymm4,%ymm4
   4039 	vmovdqu	%ymm0,0(%rdi)
   4040 	vmovdqu	%ymm4,32(%rdi)
   4041 	leaq	64(%rsi),%rsi
   4042 	leaq	64(%rdi),%rdi
   4043 	subq	$64,%rbx
   4044 1:
   4045 
   4046 	cmpq	$512,%rbx
   4047 	jb	3f
   4048 	vmovdqa	.chacha20_consts(%rip),%ymm0
   4049 	vmovdqa	64(%rbp),%ymm4
   4050 	vmovdqa	96(%rbp),%ymm8
   4051 	vmovdqa	%ymm0,%ymm1
   4052 	vmovdqa	%ymm4,%ymm5
   4053 	vmovdqa	%ymm8,%ymm9
   4054 	vmovdqa	%ymm0,%ymm2
   4055 	vmovdqa	%ymm4,%ymm6
   4056 	vmovdqa	%ymm8,%ymm10
   4057 	vmovdqa	%ymm0,%ymm3
   4058 	vmovdqa	%ymm4,%ymm7
   4059 	vmovdqa	%ymm8,%ymm11
   4060 	vmovdqa	.avx2_inc(%rip),%ymm12
   4061 	vpaddd	160(%rbp),%ymm12,%ymm15
   4062 	vpaddd	%ymm15,%ymm12,%ymm14
   4063 	vpaddd	%ymm14,%ymm12,%ymm13
   4064 	vpaddd	%ymm13,%ymm12,%ymm12
   4065 	vmovdqa	%ymm15,256(%rbp)
   4066 	vmovdqa	%ymm14,224(%rbp)
   4067 	vmovdqa	%ymm13,192(%rbp)
   4068 	vmovdqa	%ymm12,160(%rbp)
   4069 
   4070 	xorq	%rcx,%rcx
   4071 2:
   4072 	addq	0*8(%rsi,%rcx), %r10
   4073 	adcq	8+0*8(%rsi,%rcx), %r11
   4074 	adcq	$1,%r12
   4075 	vmovdqa	%ymm8,128(%rbp)
   4076 	vmovdqa	.rol16(%rip),%ymm8
   4077 	vpaddd	%ymm7,%ymm3,%ymm3
   4078 	vpaddd	%ymm6,%ymm2,%ymm2
   4079 	vpaddd	%ymm5,%ymm1,%ymm1
   4080 	vpaddd	%ymm4,%ymm0,%ymm0
   4081 	vpxor	%ymm3,%ymm15,%ymm15
   4082 	vpxor	%ymm2,%ymm14,%ymm14
   4083 	vpxor	%ymm1,%ymm13,%ymm13
   4084 	vpxor	%ymm0,%ymm12,%ymm12
   4085 	movq	0+0(%rbp),%rdx
   4086 	movq	%rdx,%r15
   4087 	mulxq	%r10,%r13,%r14
   4088 	mulxq	%r11,%rax,%rdx
   4089 	imulq	%r12,%r15
   4090 	addq	%rax,%r14
   4091 	adcq	%rdx,%r15
   4092 	vpshufb	%ymm8,%ymm15,%ymm15
   4093 	vpshufb	%ymm8,%ymm14,%ymm14
   4094 	vpshufb	%ymm8,%ymm13,%ymm13
   4095 	vpshufb	%ymm8,%ymm12,%ymm12
   4096 	vmovdqa	128(%rbp),%ymm8
   4097 	vpaddd	%ymm15,%ymm11,%ymm11
   4098 	vpaddd	%ymm14,%ymm10,%ymm10
   4099 	vpaddd	%ymm13,%ymm9,%ymm9
   4100 	vpaddd	%ymm12,%ymm8,%ymm8
   4101 	movq	8+0(%rbp),%rdx
   4102 	mulxq	%r10,%r10,%rax
   4103 	addq	%r10,%r14
   4104 	mulxq	%r11,%r11,%r9
   4105 	adcq	%r11,%r15
   4106 	adcq	$0,%r9
   4107 	imulq	%r12,%rdx
   4108 	vpxor	%ymm11,%ymm7,%ymm7
   4109 	vpxor	%ymm10,%ymm6,%ymm6
   4110 	vpxor	%ymm9,%ymm5,%ymm5
   4111 	vpxor	%ymm8,%ymm4,%ymm4
   4112 	vmovdqa	%ymm8,128(%rbp)
   4113 	vpsrld	$20,%ymm7,%ymm8
   4114 	vpslld	$32-20,%ymm7,%ymm7
   4115 	vpxor	%ymm8,%ymm7,%ymm7
   4116 	vpsrld	$20,%ymm6,%ymm8
   4117 	vpslld	$32-20,%ymm6,%ymm6
   4118 	vpxor	%ymm8,%ymm6,%ymm6
   4119 	vpsrld	$20,%ymm5,%ymm8
   4120 	addq	%rax,%r15
   4121 	adcq	%rdx,%r9
   4122 	vpslld	$32-20,%ymm5,%ymm5
   4123 	vpxor	%ymm8,%ymm5,%ymm5
   4124 	vpsrld	$20,%ymm4,%ymm8
   4125 	vpslld	$32-20,%ymm4,%ymm4
   4126 	vpxor	%ymm8,%ymm4,%ymm4
   4127 	vmovdqa	.rol8(%rip),%ymm8
   4128 	vpaddd	%ymm7,%ymm3,%ymm3
   4129 	vpaddd	%ymm6,%ymm2,%ymm2
   4130 	vpaddd	%ymm5,%ymm1,%ymm1
   4131 	vpaddd	%ymm4,%ymm0,%ymm0
   4132 	movq	%r13,%r10
   4133 	movq	%r14,%r11
   4134 	movq	%r15,%r12
   4135 	andq	$3,%r12
   4136 	movq	%r15,%r13
   4137 	andq	$-4,%r13
   4138 	movq	%r9,%r14
   4139 	shrdq	$2,%r9,%r15
   4140 	shrq	$2,%r9
   4141 	addq	%r13,%r10
   4142 	adcq	%r14,%r11
   4143 	adcq	$0,%r12
   4144 	addq	%r15,%r10
   4145 	adcq	%r9,%r11
   4146 	adcq	$0,%r12
   4147 	vpxor	%ymm3,%ymm15,%ymm15
   4148 	vpxor	%ymm2,%ymm14,%ymm14
   4149 	vpxor	%ymm1,%ymm13,%ymm13
   4150 	vpxor	%ymm0,%ymm12,%ymm12
   4151 	vpshufb	%ymm8,%ymm15,%ymm15
   4152 	vpshufb	%ymm8,%ymm14,%ymm14
   4153 	vpshufb	%ymm8,%ymm13,%ymm13
   4154 	vpshufb	%ymm8,%ymm12,%ymm12
   4155 	vmovdqa	128(%rbp),%ymm8
   4156 	addq	2*8(%rsi,%rcx), %r10
   4157 	adcq	8+2*8(%rsi,%rcx), %r11
   4158 	adcq	$1,%r12
   4159 	vpaddd	%ymm15,%ymm11,%ymm11
   4160 	vpaddd	%ymm14,%ymm10,%ymm10
   4161 	vpaddd	%ymm13,%ymm9,%ymm9
   4162 	vpaddd	%ymm12,%ymm8,%ymm8
   4163 	vpxor	%ymm11,%ymm7,%ymm7
   4164 	vpxor	%ymm10,%ymm6,%ymm6
   4165 	vpxor	%ymm9,%ymm5,%ymm5
   4166 	vpxor	%ymm8,%ymm4,%ymm4
   4167 	movq	0+0(%rbp),%rdx
   4168 	movq	%rdx,%r15
   4169 	mulxq	%r10,%r13,%r14
   4170 	mulxq	%r11,%rax,%rdx
   4171 	imulq	%r12,%r15
   4172 	addq	%rax,%r14
   4173 	adcq	%rdx,%r15
   4174 	vmovdqa	%ymm8,128(%rbp)
   4175 	vpsrld	$25,%ymm7,%ymm8
   4176 	vpslld	$32-25,%ymm7,%ymm7
   4177 	vpxor	%ymm8,%ymm7,%ymm7
   4178 	vpsrld	$25,%ymm6,%ymm8
   4179 	vpslld	$32-25,%ymm6,%ymm6
   4180 	vpxor	%ymm8,%ymm6,%ymm6
   4181 	vpsrld	$25,%ymm5,%ymm8
   4182 	vpslld	$32-25,%ymm5,%ymm5
   4183 	vpxor	%ymm8,%ymm5,%ymm5
   4184 	vpsrld	$25,%ymm4,%ymm8
   4185 	vpslld	$32-25,%ymm4,%ymm4
   4186 	vpxor	%ymm8,%ymm4,%ymm4
   4187 	vmovdqa	128(%rbp),%ymm8
   4188 	vpalignr	$4,%ymm7,%ymm7,%ymm7
   4189 	vpalignr	$8,%ymm11,%ymm11,%ymm11
   4190 	vpalignr	$12,%ymm15,%ymm15,%ymm15
   4191 	vpalignr	$4,%ymm6,%ymm6,%ymm6
   4192 	movq	8+0(%rbp),%rdx
   4193 	mulxq	%r10,%r10,%rax
   4194 	addq	%r10,%r14
   4195 	mulxq	%r11,%r11,%r9
   4196 	adcq	%r11,%r15
   4197 	adcq	$0,%r9
   4198 	imulq	%r12,%rdx
   4199 	vpalignr	$8,%ymm10,%ymm10,%ymm10
   4200 	vpalignr	$12,%ymm14,%ymm14,%ymm14
   4201 	vpalignr	$4,%ymm5,%ymm5,%ymm5
   4202 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   4203 	vpalignr	$12,%ymm13,%ymm13,%ymm13
   4204 	vpalignr	$4,%ymm4,%ymm4,%ymm4
   4205 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   4206 	vpalignr	$12,%ymm12,%ymm12,%ymm12
   4207 	vmovdqa	%ymm8,128(%rbp)
   4208 	vmovdqa	.rol16(%rip),%ymm8
   4209 	vpaddd	%ymm7,%ymm3,%ymm3
   4210 	vpaddd	%ymm6,%ymm2,%ymm2
   4211 	vpaddd	%ymm5,%ymm1,%ymm1
   4212 	vpaddd	%ymm4,%ymm0,%ymm0
   4213 	vpxor	%ymm3,%ymm15,%ymm15
   4214 	vpxor	%ymm2,%ymm14,%ymm14
   4215 	vpxor	%ymm1,%ymm13,%ymm13
   4216 	vpxor	%ymm0,%ymm12,%ymm12
   4217 	addq	%rax,%r15
   4218 	adcq	%rdx,%r9
   4219 	vpshufb	%ymm8,%ymm15,%ymm15
   4220 	vpshufb	%ymm8,%ymm14,%ymm14
   4221 	vpshufb	%ymm8,%ymm13,%ymm13
   4222 	vpshufb	%ymm8,%ymm12,%ymm12
   4223 	vmovdqa	128(%rbp),%ymm8
   4224 	vpaddd	%ymm15,%ymm11,%ymm11
   4225 	vpaddd	%ymm14,%ymm10,%ymm10
   4226 	vpaddd	%ymm13,%ymm9,%ymm9
   4227 	vpaddd	%ymm12,%ymm8,%ymm8
   4228 	movq	%r13,%r10
   4229 	movq	%r14,%r11
   4230 	movq	%r15,%r12
   4231 	andq	$3,%r12
   4232 	movq	%r15,%r13
   4233 	andq	$-4,%r13
   4234 	movq	%r9,%r14
   4235 	shrdq	$2,%r9,%r15
   4236 	shrq	$2,%r9
   4237 	addq	%r13,%r10
   4238 	adcq	%r14,%r11
   4239 	adcq	$0,%r12
   4240 	addq	%r15,%r10
   4241 	adcq	%r9,%r11
   4242 	adcq	$0,%r12
   4243 	vpxor	%ymm11,%ymm7,%ymm7
   4244 	vpxor	%ymm10,%ymm6,%ymm6
   4245 	vpxor	%ymm9,%ymm5,%ymm5
   4246 	vpxor	%ymm8,%ymm4,%ymm4
   4247 	vmovdqa	%ymm8,128(%rbp)
   4248 	vpsrld	$20,%ymm7,%ymm8
   4249 	vpslld	$32-20,%ymm7,%ymm7
   4250 	vpxor	%ymm8,%ymm7,%ymm7
   4251 	addq	4*8(%rsi,%rcx), %r10
   4252 	adcq	8+4*8(%rsi,%rcx), %r11
   4253 	adcq	$1,%r12
   4254 
   4255 	leaq	48(%rcx),%rcx
   4256 	vpsrld	$20,%ymm6,%ymm8
   4257 	vpslld	$32-20,%ymm6,%ymm6
   4258 	vpxor	%ymm8,%ymm6,%ymm6
   4259 	vpsrld	$20,%ymm5,%ymm8
   4260 	vpslld	$32-20,%ymm5,%ymm5
   4261 	vpxor	%ymm8,%ymm5,%ymm5
   4262 	vpsrld	$20,%ymm4,%ymm8
   4263 	vpslld	$32-20,%ymm4,%ymm4
   4264 	vpxor	%ymm8,%ymm4,%ymm4
   4265 	vmovdqa	.rol8(%rip),%ymm8
   4266 	vpaddd	%ymm7,%ymm3,%ymm3
   4267 	vpaddd	%ymm6,%ymm2,%ymm2
   4268 	vpaddd	%ymm5,%ymm1,%ymm1
   4269 	vpaddd	%ymm4,%ymm0,%ymm0
   4270 	vpxor	%ymm3,%ymm15,%ymm15
   4271 	vpxor	%ymm2,%ymm14,%ymm14
   4272 	vpxor	%ymm1,%ymm13,%ymm13
   4273 	vpxor	%ymm0,%ymm12,%ymm12
   4274 	movq	0+0(%rbp),%rdx
   4275 	movq	%rdx,%r15
   4276 	mulxq	%r10,%r13,%r14
   4277 	mulxq	%r11,%rax,%rdx
   4278 	imulq	%r12,%r15
   4279 	addq	%rax,%r14
   4280 	adcq	%rdx,%r15
   4281 	vpshufb	%ymm8,%ymm15,%ymm15
   4282 	vpshufb	%ymm8,%ymm14,%ymm14
   4283 	vpshufb	%ymm8,%ymm13,%ymm13
   4284 	vpshufb	%ymm8,%ymm12,%ymm12
   4285 	vmovdqa	128(%rbp),%ymm8
   4286 	vpaddd	%ymm15,%ymm11,%ymm11
   4287 	vpaddd	%ymm14,%ymm10,%ymm10
   4288 	vpaddd	%ymm13,%ymm9,%ymm9
   4289 	movq	8+0(%rbp),%rdx
   4290 	mulxq	%r10,%r10,%rax
   4291 	addq	%r10,%r14
   4292 	mulxq	%r11,%r11,%r9
   4293 	adcq	%r11,%r15
   4294 	adcq	$0,%r9
   4295 	imulq	%r12,%rdx
   4296 	vpaddd	%ymm12,%ymm8,%ymm8
   4297 	vpxor	%ymm11,%ymm7,%ymm7
   4298 	vpxor	%ymm10,%ymm6,%ymm6
   4299 	vpxor	%ymm9,%ymm5,%ymm5
   4300 	vpxor	%ymm8,%ymm4,%ymm4
   4301 	vmovdqa	%ymm8,128(%rbp)
   4302 	vpsrld	$25,%ymm7,%ymm8
   4303 	vpslld	$32-25,%ymm7,%ymm7
   4304 	addq	%rax,%r15
   4305 	adcq	%rdx,%r9
   4306 	vpxor	%ymm8,%ymm7,%ymm7
   4307 	vpsrld	$25,%ymm6,%ymm8
   4308 	vpslld	$32-25,%ymm6,%ymm6
   4309 	vpxor	%ymm8,%ymm6,%ymm6
   4310 	vpsrld	$25,%ymm5,%ymm8
   4311 	vpslld	$32-25,%ymm5,%ymm5
   4312 	vpxor	%ymm8,%ymm5,%ymm5
   4313 	vpsrld	$25,%ymm4,%ymm8
   4314 	vpslld	$32-25,%ymm4,%ymm4
   4315 	vpxor	%ymm8,%ymm4,%ymm4
   4316 	vmovdqa	128(%rbp),%ymm8
   4317 	vpalignr	$12,%ymm7,%ymm7,%ymm7
   4318 	vpalignr	$8,%ymm11,%ymm11,%ymm11
   4319 	vpalignr	$4,%ymm15,%ymm15,%ymm15
   4320 	vpalignr	$12,%ymm6,%ymm6,%ymm6
   4321 	vpalignr	$8,%ymm10,%ymm10,%ymm10
   4322 	vpalignr	$4,%ymm14,%ymm14,%ymm14
   4323 	vpalignr	$12,%ymm5,%ymm5,%ymm5
   4324 	movq	%r13,%r10
   4325 	movq	%r14,%r11
   4326 	movq	%r15,%r12
   4327 	andq	$3,%r12
   4328 	movq	%r15,%r13
   4329 	andq	$-4,%r13
   4330 	movq	%r9,%r14
   4331 	shrdq	$2,%r9,%r15
   4332 	shrq	$2,%r9
   4333 	addq	%r13,%r10
   4334 	adcq	%r14,%r11
   4335 	adcq	$0,%r12
   4336 	addq	%r15,%r10
   4337 	adcq	%r9,%r11
   4338 	adcq	$0,%r12
   4339 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   4340 	vpalignr	$4,%ymm13,%ymm13,%ymm13
   4341 	vpalignr	$12,%ymm4,%ymm4,%ymm4
   4342 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   4343 	vpalignr	$4,%ymm12,%ymm12,%ymm12
   4344 
   4345 	cmpq	$60*8,%rcx
   4346 	jne	2b
   4347 	vpaddd	.chacha20_consts(%rip),%ymm3,%ymm3
   4348 	vpaddd	64(%rbp),%ymm7,%ymm7
   4349 	vpaddd	96(%rbp),%ymm11,%ymm11
   4350 	vpaddd	256(%rbp),%ymm15,%ymm15
   4351 	vpaddd	.chacha20_consts(%rip),%ymm2,%ymm2
   4352 	vpaddd	64(%rbp),%ymm6,%ymm6
   4353 	vpaddd	96(%rbp),%ymm10,%ymm10
   4354 	vpaddd	224(%rbp),%ymm14,%ymm14
   4355 	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
   4356 	vpaddd	64(%rbp),%ymm5,%ymm5
   4357 	vpaddd	96(%rbp),%ymm9,%ymm9
   4358 	vpaddd	192(%rbp),%ymm13,%ymm13
   4359 	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
   4360 	vpaddd	64(%rbp),%ymm4,%ymm4
   4361 	vpaddd	96(%rbp),%ymm8,%ymm8
   4362 	vpaddd	160(%rbp),%ymm12,%ymm12
   4363 
   4364 	vmovdqa	%ymm0,128(%rbp)
   4365 	addq	60*8(%rsi),%r10
   4366 	adcq	8+60*8(%rsi),%r11
   4367 	adcq	$1,%r12
   4368 	vperm2i128	$0x02,%ymm3,%ymm7,%ymm0
   4369 	vperm2i128	$0x13,%ymm3,%ymm7,%ymm7
   4370 	vperm2i128	$0x02,%ymm11,%ymm15,%ymm3
   4371 	vperm2i128	$0x13,%ymm11,%ymm15,%ymm11
   4372 	vpxor	0+0(%rsi),%ymm0,%ymm0
   4373 	vpxor	32+0(%rsi),%ymm3,%ymm3
   4374 	vpxor	64+0(%rsi),%ymm7,%ymm7
   4375 	vpxor	96+0(%rsi),%ymm11,%ymm11
   4376 	vmovdqu	%ymm0,0+0(%rdi)
   4377 	vmovdqu	%ymm3,32+0(%rdi)
   4378 	vmovdqu	%ymm7,64+0(%rdi)
   4379 	vmovdqu	%ymm11,96+0(%rdi)
   4380 
   4381 	vmovdqa	128(%rbp),%ymm0
   4382 	movq	0+0(%rbp),%rax
   4383 	movq	%rax,%r15
   4384 	mulq	%r10
   4385 	movq	%rax,%r13
   4386 	movq	%rdx,%r14
   4387 	movq	0+0(%rbp),%rax
   4388 	mulq	%r11
   4389 	imulq	%r12,%r15
   4390 	addq	%rax,%r14
   4391 	adcq	%rdx,%r15
   4392 	movq	8+0(%rbp),%rax
   4393 	movq	%rax,%r9
   4394 	mulq	%r10
   4395 	addq	%rax,%r14
   4396 	adcq	$0,%rdx
   4397 	movq	%rdx,%r10
   4398 	movq	8+0(%rbp),%rax
   4399 	mulq	%r11
   4400 	addq	%rax,%r15
   4401 	adcq	$0,%rdx
   4402 	imulq	%r12,%r9
   4403 	addq	%r10,%r15
   4404 	adcq	%rdx,%r9
   4405 	movq	%r13,%r10
   4406 	movq	%r14,%r11
   4407 	movq	%r15,%r12
   4408 	andq	$3,%r12
   4409 	movq	%r15,%r13
   4410 	andq	$-4,%r13
   4411 	movq	%r9,%r14
   4412 	shrdq	$2,%r9,%r15
   4413 	shrq	$2,%r9
   4414 	addq	%r13,%r10
   4415 	adcq	%r14,%r11
   4416 	adcq	$0,%r12
   4417 	addq	%r15,%r10
   4418 	adcq	%r9,%r11
   4419 	adcq	$0,%r12
   4420 	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
   4421 	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
   4422 	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
   4423 	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
   4424 	vpxor	0+128(%rsi),%ymm3,%ymm3
   4425 	vpxor	32+128(%rsi),%ymm2,%ymm2
   4426 	vpxor	64+128(%rsi),%ymm6,%ymm6
   4427 	vpxor	96+128(%rsi),%ymm10,%ymm10
   4428 	vmovdqu	%ymm3,0+128(%rdi)
   4429 	vmovdqu	%ymm2,32+128(%rdi)
   4430 	vmovdqu	%ymm6,64+128(%rdi)
   4431 	vmovdqu	%ymm10,96+128(%rdi)
   4432 	addq	60*8+16(%rsi),%r10
   4433 	adcq	8+60*8+16(%rsi),%r11
   4434 	adcq	$1,%r12
   4435 	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
   4436 	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
   4437 	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
   4438 	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
   4439 	vpxor	0+256(%rsi),%ymm3,%ymm3
   4440 	vpxor	32+256(%rsi),%ymm1,%ymm1
   4441 	vpxor	64+256(%rsi),%ymm5,%ymm5
   4442 	vpxor	96+256(%rsi),%ymm9,%ymm9
   4443 	vmovdqu	%ymm3,0+256(%rdi)
   4444 	vmovdqu	%ymm1,32+256(%rdi)
   4445 	vmovdqu	%ymm5,64+256(%rdi)
   4446 	vmovdqu	%ymm9,96+256(%rdi)
   4447 	movq	0+0(%rbp),%rax
   4448 	movq	%rax,%r15
   4449 	mulq	%r10
   4450 	movq	%rax,%r13
   4451 	movq	%rdx,%r14
   4452 	movq	0+0(%rbp),%rax
   4453 	mulq	%r11
   4454 	imulq	%r12,%r15
   4455 	addq	%rax,%r14
   4456 	adcq	%rdx,%r15
   4457 	movq	8+0(%rbp),%rax
   4458 	movq	%rax,%r9
   4459 	mulq	%r10
   4460 	addq	%rax,%r14
   4461 	adcq	$0,%rdx
   4462 	movq	%rdx,%r10
   4463 	movq	8+0(%rbp),%rax
   4464 	mulq	%r11
   4465 	addq	%rax,%r15
   4466 	adcq	$0,%rdx
   4467 	imulq	%r12,%r9
   4468 	addq	%r10,%r15
   4469 	adcq	%rdx,%r9
   4470 	movq	%r13,%r10
   4471 	movq	%r14,%r11
   4472 	movq	%r15,%r12
   4473 	andq	$3,%r12
   4474 	movq	%r15,%r13
   4475 	andq	$-4,%r13
   4476 	movq	%r9,%r14
   4477 	shrdq	$2,%r9,%r15
   4478 	shrq	$2,%r9
   4479 	addq	%r13,%r10
   4480 	adcq	%r14,%r11
   4481 	adcq	$0,%r12
   4482 	addq	%r15,%r10
   4483 	adcq	%r9,%r11
   4484 	adcq	$0,%r12
   4485 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
   4486 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm4
   4487 	vperm2i128	$0x02,%ymm8,%ymm12,%ymm0
   4488 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm8
   4489 	vpxor	0+384(%rsi),%ymm3,%ymm3
   4490 	vpxor	32+384(%rsi),%ymm0,%ymm0
   4491 	vpxor	64+384(%rsi),%ymm4,%ymm4
   4492 	vpxor	96+384(%rsi),%ymm8,%ymm8
   4493 	vmovdqu	%ymm3,0+384(%rdi)
   4494 	vmovdqu	%ymm0,32+384(%rdi)
   4495 	vmovdqu	%ymm4,64+384(%rdi)
   4496 	vmovdqu	%ymm8,96+384(%rdi)
   4497 
   4498 	leaq	512(%rsi),%rsi
   4499 	leaq	512(%rdi),%rdi
   4500 	subq	$512,%rbx
   4501 	jmp	1b
   4502 3:
   4503 	testq	%rbx,%rbx
   4504 	vzeroupper
   4505 	je	open_sse_finalize
   4506 3:
   4507 	cmpq	$128,%rbx
   4508 	ja	3f
   4509 	vmovdqa	.chacha20_consts(%rip),%ymm0
   4510 	vmovdqa	64(%rbp),%ymm4
   4511 	vmovdqa	96(%rbp),%ymm8
   4512 	vmovdqa	.avx2_inc(%rip),%ymm12
   4513 	vpaddd	160(%rbp),%ymm12,%ymm12
   4514 	vmovdqa	%ymm12,160(%rbp)
   4515 
   4516 	xorq	%r8,%r8
   4517 	movq	%rbx,%rcx
   4518 	andq	$-16,%rcx
   4519 	testq	%rcx,%rcx
   4520 	je	2f
   4521 1:
   4522 	addq	0*8(%rsi,%r8), %r10
   4523 	adcq	8+0*8(%rsi,%r8), %r11
   4524 	adcq	$1,%r12
   4525 	movq	0+0(%rbp),%rax
   4526 	movq	%rax,%r15
   4527 	mulq	%r10
   4528 	movq	%rax,%r13
   4529 	movq	%rdx,%r14
   4530 	movq	0+0(%rbp),%rax
   4531 	mulq	%r11
   4532 	imulq	%r12,%r15
   4533 	addq	%rax,%r14
   4534 	adcq	%rdx,%r15
   4535 	movq	8+0(%rbp),%rax
   4536 	movq	%rax,%r9
   4537 	mulq	%r10
   4538 	addq	%rax,%r14
   4539 	adcq	$0,%rdx
   4540 	movq	%rdx,%r10
   4541 	movq	8+0(%rbp),%rax
   4542 	mulq	%r11
   4543 	addq	%rax,%r15
   4544 	adcq	$0,%rdx
   4545 	imulq	%r12,%r9
   4546 	addq	%r10,%r15
   4547 	adcq	%rdx,%r9
   4548 	movq	%r13,%r10
   4549 	movq	%r14,%r11
   4550 	movq	%r15,%r12
   4551 	andq	$3,%r12
   4552 	movq	%r15,%r13
   4553 	andq	$-4,%r13
   4554 	movq	%r9,%r14
   4555 	shrdq	$2,%r9,%r15
   4556 	shrq	$2,%r9
   4557 	addq	%r13,%r10
   4558 	adcq	%r14,%r11
   4559 	adcq	$0,%r12
   4560 	addq	%r15,%r10
   4561 	adcq	%r9,%r11
   4562 	adcq	$0,%r12
   4563 
   4564 2:
   4565 	addq	$16,%r8
   4566 	vpaddd	%ymm4,%ymm0,%ymm0
   4567 	vpxor	%ymm0,%ymm12,%ymm12
   4568 	vpshufb	.rol16(%rip),%ymm12,%ymm12
   4569 	vpaddd	%ymm12,%ymm8,%ymm8
   4570 	vpxor	%ymm8,%ymm4,%ymm4
   4571 	vpsrld	$20,%ymm4,%ymm3
   4572 	vpslld	$12,%ymm4,%ymm4
   4573 	vpxor	%ymm3,%ymm4,%ymm4
   4574 	vpaddd	%ymm4,%ymm0,%ymm0
   4575 	vpxor	%ymm0,%ymm12,%ymm12
   4576 	vpshufb	.rol8(%rip),%ymm12,%ymm12
   4577 	vpaddd	%ymm12,%ymm8,%ymm8
   4578 	vpxor	%ymm8,%ymm4,%ymm4
   4579 	vpslld	$7,%ymm4,%ymm3
   4580 	vpsrld	$25,%ymm4,%ymm4
   4581 	vpxor	%ymm3,%ymm4,%ymm4
   4582 	vpalignr	$12,%ymm12,%ymm12,%ymm12
   4583 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   4584 	vpalignr	$4,%ymm4,%ymm4,%ymm4
   4585 	vpaddd	%ymm4,%ymm0,%ymm0
   4586 	vpxor	%ymm0,%ymm12,%ymm12
   4587 	vpshufb	.rol16(%rip),%ymm12,%ymm12
   4588 	vpaddd	%ymm12,%ymm8,%ymm8
   4589 	vpxor	%ymm8,%ymm4,%ymm4
   4590 	vpsrld	$20,%ymm4,%ymm3
   4591 	vpslld	$12,%ymm4,%ymm4
   4592 	vpxor	%ymm3,%ymm4,%ymm4
   4593 	vpaddd	%ymm4,%ymm0,%ymm0
   4594 	vpxor	%ymm0,%ymm12,%ymm12
   4595 	vpshufb	.rol8(%rip),%ymm12,%ymm12
   4596 	vpaddd	%ymm12,%ymm8,%ymm8
   4597 	vpxor	%ymm8,%ymm4,%ymm4
   4598 	vpslld	$7,%ymm4,%ymm3
   4599 	vpsrld	$25,%ymm4,%ymm4
   4600 	vpxor	%ymm3,%ymm4,%ymm4
   4601 	vpalignr	$4,%ymm12,%ymm12,%ymm12
   4602 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   4603 	vpalignr	$12,%ymm4,%ymm4,%ymm4
   4604 
   4605 	cmpq	%rcx,%r8
   4606 	jb	1b
   4607 	cmpq	$160,%r8
   4608 	jne	2b
   4609 	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
   4610 	vpaddd	64(%rbp),%ymm4,%ymm4
   4611 	vpaddd	96(%rbp),%ymm8,%ymm8
   4612 	vpaddd	160(%rbp),%ymm12,%ymm12
   4613 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
   4614 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
   4615 	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
   4616 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
   4617 	vmovdqa	%ymm3,%ymm8
   4618 
   4619 	jmp	open_avx2_tail_loop
   4620 3:
   4621 	cmpq	$256,%rbx
   4622 	ja	3f
   4623 	vmovdqa	.chacha20_consts(%rip),%ymm0
   4624 	vmovdqa	64(%rbp),%ymm4
   4625 	vmovdqa	96(%rbp),%ymm8
   4626 	vmovdqa	%ymm0,%ymm1
   4627 	vmovdqa	%ymm4,%ymm5
   4628 	vmovdqa	%ymm8,%ymm9
   4629 	vmovdqa	.avx2_inc(%rip),%ymm12
   4630 	vpaddd	160(%rbp),%ymm12,%ymm13
   4631 	vpaddd	%ymm13,%ymm12,%ymm12
   4632 	vmovdqa	%ymm12,160(%rbp)
   4633 	vmovdqa	%ymm13,192(%rbp)
   4634 
   4635 	movq	%rbx,128(%rbp)
   4636 	movq	%rbx,%rcx
   4637 	subq	$128,%rcx
   4638 	shrq	$4,%rcx
   4639 	movq	$10,%r8
   4640 	cmpq	$10,%rcx
   4641 	cmovgq	%r8,%rcx
   4642 	movq	%rsi,%rbx
   4643 	xorq	%r8,%r8
   4644 1:
   4645 	addq	0(%rbx),%r10
   4646 	adcq	8+0(%rbx),%r11
   4647 	adcq	$1,%r12
   4648 	movq	0+0(%rbp),%rdx
   4649 	movq	%rdx,%r15
   4650 	mulxq	%r10,%r13,%r14
   4651 	mulxq	%r11,%rax,%rdx
   4652 	imulq	%r12,%r15
   4653 	addq	%rax,%r14
   4654 	adcq	%rdx,%r15
   4655 	movq	8+0(%rbp),%rdx
   4656 	mulxq	%r10,%r10,%rax
   4657 	addq	%r10,%r14
   4658 	mulxq	%r11,%r11,%r9
   4659 	adcq	%r11,%r15
   4660 	adcq	$0,%r9
   4661 	imulq	%r12,%rdx
   4662 	addq	%rax,%r15
   4663 	adcq	%rdx,%r9
   4664 	movq	%r13,%r10
   4665 	movq	%r14,%r11
   4666 	movq	%r15,%r12
   4667 	andq	$3,%r12
   4668 	movq	%r15,%r13
   4669 	andq	$-4,%r13
   4670 	movq	%r9,%r14
   4671 	shrdq	$2,%r9,%r15
   4672 	shrq	$2,%r9
   4673 	addq	%r13,%r10
   4674 	adcq	%r14,%r11
   4675 	adcq	$0,%r12
   4676 	addq	%r15,%r10
   4677 	adcq	%r9,%r11
   4678 	adcq	$0,%r12
   4679 
   4680 	leaq	16(%rbx),%rbx
   4681 2:
   4682 	vpaddd	%ymm4,%ymm0,%ymm0
   4683 	vpxor	%ymm0,%ymm12,%ymm12
   4684 	vpshufb	.rol16(%rip),%ymm12,%ymm12
   4685 	vpaddd	%ymm12,%ymm8,%ymm8
   4686 	vpxor	%ymm8,%ymm4,%ymm4
   4687 	vpsrld	$20,%ymm4,%ymm3
   4688 	vpslld	$12,%ymm4,%ymm4
   4689 	vpxor	%ymm3,%ymm4,%ymm4
   4690 	vpaddd	%ymm4,%ymm0,%ymm0
   4691 	vpxor	%ymm0,%ymm12,%ymm12
   4692 	vpshufb	.rol8(%rip),%ymm12,%ymm12
   4693 	vpaddd	%ymm12,%ymm8,%ymm8
   4694 	vpxor	%ymm8,%ymm4,%ymm4
   4695 	vpslld	$7,%ymm4,%ymm3
   4696 	vpsrld	$25,%ymm4,%ymm4
   4697 	vpxor	%ymm3,%ymm4,%ymm4
   4698 	vpalignr	$12,%ymm12,%ymm12,%ymm12
   4699 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   4700 	vpalignr	$4,%ymm4,%ymm4,%ymm4
   4701 	vpaddd	%ymm5,%ymm1,%ymm1
   4702 	vpxor	%ymm1,%ymm13,%ymm13
   4703 	vpshufb	.rol16(%rip),%ymm13,%ymm13
   4704 	vpaddd	%ymm13,%ymm9,%ymm9
   4705 	vpxor	%ymm9,%ymm5,%ymm5
   4706 	vpsrld	$20,%ymm5,%ymm3
   4707 	vpslld	$12,%ymm5,%ymm5
   4708 	vpxor	%ymm3,%ymm5,%ymm5
   4709 	vpaddd	%ymm5,%ymm1,%ymm1
   4710 	vpxor	%ymm1,%ymm13,%ymm13
   4711 	vpshufb	.rol8(%rip),%ymm13,%ymm13
   4712 	vpaddd	%ymm13,%ymm9,%ymm9
   4713 	vpxor	%ymm9,%ymm5,%ymm5
   4714 	vpslld	$7,%ymm5,%ymm3
   4715 	vpsrld	$25,%ymm5,%ymm5
   4716 	vpxor	%ymm3,%ymm5,%ymm5
   4717 	vpalignr	$12,%ymm13,%ymm13,%ymm13
   4718 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   4719 	vpalignr	$4,%ymm5,%ymm5,%ymm5
   4720 
   4721 	incq	%r8
   4722 	vpaddd	%ymm4,%ymm0,%ymm0
   4723 	vpxor	%ymm0,%ymm12,%ymm12
   4724 	vpshufb	.rol16(%rip),%ymm12,%ymm12
   4725 	vpaddd	%ymm12,%ymm8,%ymm8
   4726 	vpxor	%ymm8,%ymm4,%ymm4
   4727 	vpsrld	$20,%ymm4,%ymm3
   4728 	vpslld	$12,%ymm4,%ymm4
   4729 	vpxor	%ymm3,%ymm4,%ymm4
   4730 	vpaddd	%ymm4,%ymm0,%ymm0
   4731 	vpxor	%ymm0,%ymm12,%ymm12
   4732 	vpshufb	.rol8(%rip),%ymm12,%ymm12
   4733 	vpaddd	%ymm12,%ymm8,%ymm8
   4734 	vpxor	%ymm8,%ymm4,%ymm4
   4735 	vpslld	$7,%ymm4,%ymm3
   4736 	vpsrld	$25,%ymm4,%ymm4
   4737 	vpxor	%ymm3,%ymm4,%ymm4
   4738 	vpalignr	$4,%ymm12,%ymm12,%ymm12
   4739 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   4740 	vpalignr	$12,%ymm4,%ymm4,%ymm4
   4741 	vpaddd	%ymm5,%ymm1,%ymm1
   4742 	vpxor	%ymm1,%ymm13,%ymm13
   4743 	vpshufb	.rol16(%rip),%ymm13,%ymm13
   4744 	vpaddd	%ymm13,%ymm9,%ymm9
   4745 	vpxor	%ymm9,%ymm5,%ymm5
   4746 	vpsrld	$20,%ymm5,%ymm3
   4747 	vpslld	$12,%ymm5,%ymm5
   4748 	vpxor	%ymm3,%ymm5,%ymm5
   4749 	vpaddd	%ymm5,%ymm1,%ymm1
   4750 	vpxor	%ymm1,%ymm13,%ymm13
   4751 	vpshufb	.rol8(%rip),%ymm13,%ymm13
   4752 	vpaddd	%ymm13,%ymm9,%ymm9
   4753 	vpxor	%ymm9,%ymm5,%ymm5
   4754 	vpslld	$7,%ymm5,%ymm3
   4755 	vpsrld	$25,%ymm5,%ymm5
   4756 	vpxor	%ymm3,%ymm5,%ymm5
   4757 	vpalignr	$4,%ymm13,%ymm13,%ymm13
   4758 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   4759 	vpalignr	$12,%ymm5,%ymm5,%ymm5
   4760 	vpaddd	%ymm6,%ymm2,%ymm2
   4761 	vpxor	%ymm2,%ymm14,%ymm14
   4762 	vpshufb	.rol16(%rip),%ymm14,%ymm14
   4763 	vpaddd	%ymm14,%ymm10,%ymm10
   4764 	vpxor	%ymm10,%ymm6,%ymm6
   4765 	vpsrld	$20,%ymm6,%ymm3
   4766 	vpslld	$12,%ymm6,%ymm6
   4767 	vpxor	%ymm3,%ymm6,%ymm6
   4768 	vpaddd	%ymm6,%ymm2,%ymm2
   4769 	vpxor	%ymm2,%ymm14,%ymm14
   4770 	vpshufb	.rol8(%rip),%ymm14,%ymm14
   4771 	vpaddd	%ymm14,%ymm10,%ymm10
   4772 	vpxor	%ymm10,%ymm6,%ymm6
   4773 	vpslld	$7,%ymm6,%ymm3
   4774 	vpsrld	$25,%ymm6,%ymm6
   4775 	vpxor	%ymm3,%ymm6,%ymm6
   4776 	vpalignr	$4,%ymm14,%ymm14,%ymm14
   4777 	vpalignr	$8,%ymm10,%ymm10,%ymm10
   4778 	vpalignr	$12,%ymm6,%ymm6,%ymm6
   4779 
   4780 	cmpq	%rcx,%r8
   4781 	jb	1b
   4782 	cmpq	$10,%r8
   4783 	jne	2b
   4784 	movq	%rbx,%r8
   4785 	subq	%rsi,%rbx
   4786 	movq	%rbx,%rcx
   4787 	movq	128(%rbp),%rbx
   4788 1:
   4789 	addq	$16,%rcx
   4790 	cmpq	%rbx,%rcx
   4791 	jg	1f
   4792 	addq	0(%r8),%r10
   4793 	adcq	8+0(%r8),%r11
   4794 	adcq	$1,%r12
   4795 	movq	0+0(%rbp),%rdx
   4796 	movq	%rdx,%r15
   4797 	mulxq	%r10,%r13,%r14
   4798 	mulxq	%r11,%rax,%rdx
   4799 	imulq	%r12,%r15
   4800 	addq	%rax,%r14
   4801 	adcq	%rdx,%r15
   4802 	movq	8+0(%rbp),%rdx
   4803 	mulxq	%r10,%r10,%rax
   4804 	addq	%r10,%r14
   4805 	mulxq	%r11,%r11,%r9
   4806 	adcq	%r11,%r15
   4807 	adcq	$0,%r9
   4808 	imulq	%r12,%rdx
   4809 	addq	%rax,%r15
   4810 	adcq	%rdx,%r9
   4811 	movq	%r13,%r10
   4812 	movq	%r14,%r11
   4813 	movq	%r15,%r12
   4814 	andq	$3,%r12
   4815 	movq	%r15,%r13
   4816 	andq	$-4,%r13
   4817 	movq	%r9,%r14
   4818 	shrdq	$2,%r9,%r15
   4819 	shrq	$2,%r9
   4820 	addq	%r13,%r10
   4821 	adcq	%r14,%r11
   4822 	adcq	$0,%r12
   4823 	addq	%r15,%r10
   4824 	adcq	%r9,%r11
   4825 	adcq	$0,%r12
   4826 
   4827 	leaq	16(%r8),%r8
   4828 	jmp	1b
   4829 1:
   4830 	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
   4831 	vpaddd	64(%rbp),%ymm5,%ymm5
   4832 	vpaddd	96(%rbp),%ymm9,%ymm9
   4833 	vpaddd	192(%rbp),%ymm13,%ymm13
   4834 	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
   4835 	vpaddd	64(%rbp),%ymm4,%ymm4
   4836 	vpaddd	96(%rbp),%ymm8,%ymm8
   4837 	vpaddd	160(%rbp),%ymm12,%ymm12
   4838 	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
   4839 	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
   4840 	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
   4841 	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
   4842 	vpxor	0+0(%rsi),%ymm3,%ymm3
   4843 	vpxor	32+0(%rsi),%ymm1,%ymm1
   4844 	vpxor	64+0(%rsi),%ymm5,%ymm5
   4845 	vpxor	96+0(%rsi),%ymm9,%ymm9
   4846 	vmovdqu	%ymm3,0+0(%rdi)
   4847 	vmovdqu	%ymm1,32+0(%rdi)
   4848 	vmovdqu	%ymm5,64+0(%rdi)
   4849 	vmovdqu	%ymm9,96+0(%rdi)
   4850 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
   4851 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
   4852 	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
   4853 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
   4854 	vmovdqa	%ymm3,%ymm8
   4855 
   4856 	leaq	128(%rsi),%rsi
   4857 	leaq	128(%rdi),%rdi
   4858 	subq	$128,%rbx
   4859 	jmp	open_avx2_tail_loop
   4860 3:
   4861 	cmpq	$384,%rbx
   4862 	ja	3f
   4863 	vmovdqa	.chacha20_consts(%rip),%ymm0
   4864 	vmovdqa	64(%rbp),%ymm4
   4865 	vmovdqa	96(%rbp),%ymm8
   4866 	vmovdqa	%ymm0,%ymm1
   4867 	vmovdqa	%ymm4,%ymm5
   4868 	vmovdqa	%ymm8,%ymm9
   4869 	vmovdqa	%ymm0,%ymm2
   4870 	vmovdqa	%ymm4,%ymm6
   4871 	vmovdqa	%ymm8,%ymm10
   4872 	vmovdqa	.avx2_inc(%rip),%ymm12
   4873 	vpaddd	160(%rbp),%ymm12,%ymm14
   4874 	vpaddd	%ymm14,%ymm12,%ymm13
   4875 	vpaddd	%ymm13,%ymm12,%ymm12
   4876 	vmovdqa	%ymm12,160(%rbp)
   4877 	vmovdqa	%ymm13,192(%rbp)
   4878 	vmovdqa	%ymm14,224(%rbp)
   4879 
   4880 	movq	%rbx,128(%rbp)
   4881 	movq	%rbx,%rcx
   4882 	subq	$256,%rcx
   4883 	shrq	$4,%rcx
   4884 	addq	$6,%rcx
   4885 	movq	$10,%r8
   4886 	cmpq	$10,%rcx
   4887 	cmovgq	%r8,%rcx
   4888 	movq	%rsi,%rbx
   4889 	xorq	%r8,%r8
   4890 1:
   4891 	addq	0(%rbx),%r10
   4892 	adcq	8+0(%rbx),%r11
   4893 	adcq	$1,%r12
   4894 	movq	0+0(%rbp),%rdx
   4895 	movq	%rdx,%r15
   4896 	mulxq	%r10,%r13,%r14
   4897 	mulxq	%r11,%rax,%rdx
   4898 	imulq	%r12,%r15
   4899 	addq	%rax,%r14
   4900 	adcq	%rdx,%r15
   4901 	movq	8+0(%rbp),%rdx
   4902 	mulxq	%r10,%r10,%rax
   4903 	addq	%r10,%r14
   4904 	mulxq	%r11,%r11,%r9
   4905 	adcq	%r11,%r15
   4906 	adcq	$0,%r9
   4907 	imulq	%r12,%rdx
   4908 	addq	%rax,%r15
   4909 	adcq	%rdx,%r9
   4910 	movq	%r13,%r10
   4911 	movq	%r14,%r11
   4912 	movq	%r15,%r12
   4913 	andq	$3,%r12
   4914 	movq	%r15,%r13
   4915 	andq	$-4,%r13
   4916 	movq	%r9,%r14
   4917 	shrdq	$2,%r9,%r15
   4918 	shrq	$2,%r9
   4919 	addq	%r13,%r10
   4920 	adcq	%r14,%r11
   4921 	adcq	$0,%r12
   4922 	addq	%r15,%r10
   4923 	adcq	%r9,%r11
   4924 	adcq	$0,%r12
   4925 
   4926 	leaq	16(%rbx),%rbx
   4927 2:
   4928 	vpaddd	%ymm6,%ymm2,%ymm2
   4929 	vpxor	%ymm2,%ymm14,%ymm14
   4930 	vpshufb	.rol16(%rip),%ymm14,%ymm14
   4931 	vpaddd	%ymm14,%ymm10,%ymm10
   4932 	vpxor	%ymm10,%ymm6,%ymm6
   4933 	vpsrld	$20,%ymm6,%ymm3
   4934 	vpslld	$12,%ymm6,%ymm6
   4935 	vpxor	%ymm3,%ymm6,%ymm6
   4936 	vpaddd	%ymm6,%ymm2,%ymm2
   4937 	vpxor	%ymm2,%ymm14,%ymm14
   4938 	vpshufb	.rol8(%rip),%ymm14,%ymm14
   4939 	vpaddd	%ymm14,%ymm10,%ymm10
   4940 	vpxor	%ymm10,%ymm6,%ymm6
   4941 	vpslld	$7,%ymm6,%ymm3
   4942 	vpsrld	$25,%ymm6,%ymm6
   4943 	vpxor	%ymm3,%ymm6,%ymm6
   4944 	vpalignr	$12,%ymm14,%ymm14,%ymm14
   4945 	vpalignr	$8,%ymm10,%ymm10,%ymm10
   4946 	vpalignr	$4,%ymm6,%ymm6,%ymm6
   4947 	vpaddd	%ymm5,%ymm1,%ymm1
   4948 	vpxor	%ymm1,%ymm13,%ymm13
   4949 	vpshufb	.rol16(%rip),%ymm13,%ymm13
   4950 	vpaddd	%ymm13,%ymm9,%ymm9
   4951 	vpxor	%ymm9,%ymm5,%ymm5
   4952 	vpsrld	$20,%ymm5,%ymm3
   4953 	vpslld	$12,%ymm5,%ymm5
   4954 	vpxor	%ymm3,%ymm5,%ymm5
   4955 	vpaddd	%ymm5,%ymm1,%ymm1
   4956 	vpxor	%ymm1,%ymm13,%ymm13
   4957 	vpshufb	.rol8(%rip),%ymm13,%ymm13
   4958 	vpaddd	%ymm13,%ymm9,%ymm9
   4959 	vpxor	%ymm9,%ymm5,%ymm5
   4960 	vpslld	$7,%ymm5,%ymm3
   4961 	vpsrld	$25,%ymm5,%ymm5
   4962 	vpxor	%ymm3,%ymm5,%ymm5
   4963 	vpalignr	$12,%ymm13,%ymm13,%ymm13
   4964 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   4965 	vpalignr	$4,%ymm5,%ymm5,%ymm5
   4966 	vpaddd	%ymm4,%ymm0,%ymm0
   4967 	vpxor	%ymm0,%ymm12,%ymm12
   4968 	vpshufb	.rol16(%rip),%ymm12,%ymm12
   4969 	vpaddd	%ymm12,%ymm8,%ymm8
   4970 	vpxor	%ymm8,%ymm4,%ymm4
   4971 	vpsrld	$20,%ymm4,%ymm3
   4972 	vpslld	$12,%ymm4,%ymm4
   4973 	vpxor	%ymm3,%ymm4,%ymm4
   4974 	vpaddd	%ymm4,%ymm0,%ymm0
   4975 	vpxor	%ymm0,%ymm12,%ymm12
   4976 	vpshufb	.rol8(%rip),%ymm12,%ymm12
   4977 	vpaddd	%ymm12,%ymm8,%ymm8
   4978 	vpxor	%ymm8,%ymm4,%ymm4
   4979 	vpslld	$7,%ymm4,%ymm3
   4980 	vpsrld	$25,%ymm4,%ymm4
   4981 	vpxor	%ymm3,%ymm4,%ymm4
   4982 	vpalignr	$12,%ymm12,%ymm12,%ymm12
   4983 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   4984 	vpalignr	$4,%ymm4,%ymm4,%ymm4
   4985 	addq	0(%rbx),%r10
   4986 	adcq	8+0(%rbx),%r11
   4987 	adcq	$1,%r12
   4988 	movq	0+0(%rbp),%rax
   4989 	movq	%rax,%r15
   4990 	mulq	%r10
   4991 	movq	%rax,%r13
   4992 	movq	%rdx,%r14
   4993 	movq	0+0(%rbp),%rax
   4994 	mulq	%r11
   4995 	imulq	%r12,%r15
   4996 	addq	%rax,%r14
   4997 	adcq	%rdx,%r15
   4998 	movq	8+0(%rbp),%rax
   4999 	movq	%rax,%r9
   5000 	mulq	%r10
   5001 	addq	%rax,%r14
   5002 	adcq	$0,%rdx
   5003 	movq	%rdx,%r10
   5004 	movq	8+0(%rbp),%rax
   5005 	mulq	%r11
   5006 	addq	%rax,%r15
   5007 	adcq	$0,%rdx
   5008 	imulq	%r12,%r9
   5009 	addq	%r10,%r15
   5010 	adcq	%rdx,%r9
   5011 	movq	%r13,%r10
   5012 	movq	%r14,%r11
   5013 	movq	%r15,%r12
   5014 	andq	$3,%r12
   5015 	movq	%r15,%r13
   5016 	andq	$-4,%r13
   5017 	movq	%r9,%r14
   5018 	shrdq	$2,%r9,%r15
   5019 	shrq	$2,%r9
   5020 	addq	%r13,%r10
   5021 	adcq	%r14,%r11
   5022 	adcq	$0,%r12
   5023 	addq	%r15,%r10
   5024 	adcq	%r9,%r11
   5025 	adcq	$0,%r12
   5026 
   5027 	leaq	16(%rbx),%rbx
   5028 	incq	%r8
   5029 	vpaddd	%ymm6,%ymm2,%ymm2
   5030 	vpxor	%ymm2,%ymm14,%ymm14
   5031 	vpshufb	.rol16(%rip),%ymm14,%ymm14
   5032 	vpaddd	%ymm14,%ymm10,%ymm10
   5033 	vpxor	%ymm10,%ymm6,%ymm6
   5034 	vpsrld	$20,%ymm6,%ymm3
   5035 	vpslld	$12,%ymm6,%ymm6
   5036 	vpxor	%ymm3,%ymm6,%ymm6
   5037 	vpaddd	%ymm6,%ymm2,%ymm2
   5038 	vpxor	%ymm2,%ymm14,%ymm14
   5039 	vpshufb	.rol8(%rip),%ymm14,%ymm14
   5040 	vpaddd	%ymm14,%ymm10,%ymm10
   5041 	vpxor	%ymm10,%ymm6,%ymm6
   5042 	vpslld	$7,%ymm6,%ymm3
   5043 	vpsrld	$25,%ymm6,%ymm6
   5044 	vpxor	%ymm3,%ymm6,%ymm6
   5045 	vpalignr	$4,%ymm14,%ymm14,%ymm14
   5046 	vpalignr	$8,%ymm10,%ymm10,%ymm10
   5047 	vpalignr	$12,%ymm6,%ymm6,%ymm6
   5048 	vpaddd	%ymm5,%ymm1,%ymm1
   5049 	vpxor	%ymm1,%ymm13,%ymm13
   5050 	vpshufb	.rol16(%rip),%ymm13,%ymm13
   5051 	vpaddd	%ymm13,%ymm9,%ymm9
   5052 	vpxor	%ymm9,%ymm5,%ymm5
   5053 	vpsrld	$20,%ymm5,%ymm3
   5054 	vpslld	$12,%ymm5,%ymm5
   5055 	vpxor	%ymm3,%ymm5,%ymm5
   5056 	vpaddd	%ymm5,%ymm1,%ymm1
   5057 	vpxor	%ymm1,%ymm13,%ymm13
   5058 	vpshufb	.rol8(%rip),%ymm13,%ymm13
   5059 	vpaddd	%ymm13,%ymm9,%ymm9
   5060 	vpxor	%ymm9,%ymm5,%ymm5
   5061 	vpslld	$7,%ymm5,%ymm3
   5062 	vpsrld	$25,%ymm5,%ymm5
   5063 	vpxor	%ymm3,%ymm5,%ymm5
   5064 	vpalignr	$4,%ymm13,%ymm13,%ymm13
   5065 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   5066 	vpalignr	$12,%ymm5,%ymm5,%ymm5
   5067 	vpaddd	%ymm4,%ymm0,%ymm0
   5068 	vpxor	%ymm0,%ymm12,%ymm12
   5069 	vpshufb	.rol16(%rip),%ymm12,%ymm12
   5070 	vpaddd	%ymm12,%ymm8,%ymm8
   5071 	vpxor	%ymm8,%ymm4,%ymm4
   5072 	vpsrld	$20,%ymm4,%ymm3
   5073 	vpslld	$12,%ymm4,%ymm4
   5074 	vpxor	%ymm3,%ymm4,%ymm4
   5075 	vpaddd	%ymm4,%ymm0,%ymm0
   5076 	vpxor	%ymm0,%ymm12,%ymm12
   5077 	vpshufb	.rol8(%rip),%ymm12,%ymm12
   5078 	vpaddd	%ymm12,%ymm8,%ymm8
   5079 	vpxor	%ymm8,%ymm4,%ymm4
   5080 	vpslld	$7,%ymm4,%ymm3
   5081 	vpsrld	$25,%ymm4,%ymm4
   5082 	vpxor	%ymm3,%ymm4,%ymm4
   5083 	vpalignr	$4,%ymm12,%ymm12,%ymm12
   5084 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   5085 	vpalignr	$12,%ymm4,%ymm4,%ymm4
   5086 
   5087 	cmpq	%rcx,%r8
   5088 	jb	1b
   5089 	cmpq	$10,%r8
   5090 	jne	2b
   5091 	movq	%rbx,%r8
   5092 	subq	%rsi,%rbx
   5093 	movq	%rbx,%rcx
   5094 	movq	128(%rbp),%rbx
   5095 1:
   5096 	addq	$16,%rcx
   5097 	cmpq	%rbx,%rcx
   5098 	jg	1f
   5099 	addq	0(%r8),%r10
   5100 	adcq	8+0(%r8),%r11
   5101 	adcq	$1,%r12
   5102 	movq	0+0(%rbp),%rdx
   5103 	movq	%rdx,%r15
   5104 	mulxq	%r10,%r13,%r14
   5105 	mulxq	%r11,%rax,%rdx
   5106 	imulq	%r12,%r15
   5107 	addq	%rax,%r14
   5108 	adcq	%rdx,%r15
   5109 	movq	8+0(%rbp),%rdx
   5110 	mulxq	%r10,%r10,%rax
   5111 	addq	%r10,%r14
   5112 	mulxq	%r11,%r11,%r9
   5113 	adcq	%r11,%r15
   5114 	adcq	$0,%r9
   5115 	imulq	%r12,%rdx
   5116 	addq	%rax,%r15
   5117 	adcq	%rdx,%r9
   5118 	movq	%r13,%r10
   5119 	movq	%r14,%r11
   5120 	movq	%r15,%r12
   5121 	andq	$3,%r12
   5122 	movq	%r15,%r13
   5123 	andq	$-4,%r13
   5124 	movq	%r9,%r14
   5125 	shrdq	$2,%r9,%r15
   5126 	shrq	$2,%r9
   5127 	addq	%r13,%r10
   5128 	adcq	%r14,%r11
   5129 	adcq	$0,%r12
   5130 	addq	%r15,%r10
   5131 	adcq	%r9,%r11
   5132 	adcq	$0,%r12
   5133 
   5134 	leaq	16(%r8),%r8
   5135 	jmp	1b
   5136 1:
   5137 	vpaddd	.chacha20_consts(%rip),%ymm2,%ymm2
   5138 	vpaddd	64(%rbp),%ymm6,%ymm6
   5139 	vpaddd	96(%rbp),%ymm10,%ymm10
   5140 	vpaddd	224(%rbp),%ymm14,%ymm14
   5141 	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
   5142 	vpaddd	64(%rbp),%ymm5,%ymm5
   5143 	vpaddd	96(%rbp),%ymm9,%ymm9
   5144 	vpaddd	192(%rbp),%ymm13,%ymm13
   5145 	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
   5146 	vpaddd	64(%rbp),%ymm4,%ymm4
   5147 	vpaddd	96(%rbp),%ymm8,%ymm8
   5148 	vpaddd	160(%rbp),%ymm12,%ymm12
   5149 	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
   5150 	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
   5151 	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
   5152 	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
   5153 	vpxor	0+0(%rsi),%ymm3,%ymm3
   5154 	vpxor	32+0(%rsi),%ymm2,%ymm2
   5155 	vpxor	64+0(%rsi),%ymm6,%ymm6
   5156 	vpxor	96+0(%rsi),%ymm10,%ymm10
   5157 	vmovdqu	%ymm3,0+0(%rdi)
   5158 	vmovdqu	%ymm2,32+0(%rdi)
   5159 	vmovdqu	%ymm6,64+0(%rdi)
   5160 	vmovdqu	%ymm10,96+0(%rdi)
   5161 	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
   5162 	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
   5163 	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
   5164 	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
   5165 	vpxor	0+128(%rsi),%ymm3,%ymm3
   5166 	vpxor	32+128(%rsi),%ymm1,%ymm1
   5167 	vpxor	64+128(%rsi),%ymm5,%ymm5
   5168 	vpxor	96+128(%rsi),%ymm9,%ymm9
   5169 	vmovdqu	%ymm3,0+128(%rdi)
   5170 	vmovdqu	%ymm1,32+128(%rdi)
   5171 	vmovdqu	%ymm5,64+128(%rdi)
   5172 	vmovdqu	%ymm9,96+128(%rdi)
   5173 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
   5174 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
   5175 	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
   5176 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
   5177 	vmovdqa	%ymm3,%ymm8
   5178 
   5179 	leaq	256(%rsi),%rsi
   5180 	leaq	256(%rdi),%rdi
   5181 	subq	$256,%rbx
   5182 	jmp	open_avx2_tail_loop
   5183 3:
   5184 	vmovdqa	.chacha20_consts(%rip),%ymm0
   5185 	vmovdqa	64(%rbp),%ymm4
   5186 	vmovdqa	96(%rbp),%ymm8
   5187 	vmovdqa	%ymm0,%ymm1
   5188 	vmovdqa	%ymm4,%ymm5
   5189 	vmovdqa	%ymm8,%ymm9
   5190 	vmovdqa	%ymm0,%ymm2
   5191 	vmovdqa	%ymm4,%ymm6
   5192 	vmovdqa	%ymm8,%ymm10
   5193 	vmovdqa	%ymm0,%ymm3
   5194 	vmovdqa	%ymm4,%ymm7
   5195 	vmovdqa	%ymm8,%ymm11
   5196 	vmovdqa	.avx2_inc(%rip),%ymm12
   5197 	vpaddd	160(%rbp),%ymm12,%ymm15
   5198 	vpaddd	%ymm15,%ymm12,%ymm14
   5199 	vpaddd	%ymm14,%ymm12,%ymm13
   5200 	vpaddd	%ymm13,%ymm12,%ymm12
   5201 	vmovdqa	%ymm15,256(%rbp)
   5202 	vmovdqa	%ymm14,224(%rbp)
   5203 	vmovdqa	%ymm13,192(%rbp)
   5204 	vmovdqa	%ymm12,160(%rbp)
   5205 
   5206 	xorq	%rcx,%rcx
   5207 	movq	%rsi,%r8
   5208 1:
   5209 	addq	0(%r8),%r10
   5210 	adcq	8+0(%r8),%r11
   5211 	adcq	$1,%r12
   5212 	movq	0+0(%rbp),%rax
   5213 	movq	%rax,%r15
   5214 	mulq	%r10
   5215 	movq	%rax,%r13
   5216 	movq	%rdx,%r14
   5217 	movq	0+0(%rbp),%rax
   5218 	mulq	%r11
   5219 	imulq	%r12,%r15
   5220 	addq	%rax,%r14
   5221 	adcq	%rdx,%r15
   5222 	movq	8+0(%rbp),%rax
   5223 	movq	%rax,%r9
   5224 	mulq	%r10
   5225 	addq	%rax,%r14
   5226 	adcq	$0,%rdx
   5227 	movq	%rdx,%r10
   5228 	movq	8+0(%rbp),%rax
   5229 	mulq	%r11
   5230 	addq	%rax,%r15
   5231 	adcq	$0,%rdx
   5232 	imulq	%r12,%r9
   5233 	addq	%r10,%r15
   5234 	adcq	%rdx,%r9
   5235 	movq	%r13,%r10
   5236 	movq	%r14,%r11
   5237 	movq	%r15,%r12
   5238 	andq	$3,%r12
   5239 	movq	%r15,%r13
   5240 	andq	$-4,%r13
   5241 	movq	%r9,%r14
   5242 	shrdq	$2,%r9,%r15
   5243 	shrq	$2,%r9
   5244 	addq	%r13,%r10
   5245 	adcq	%r14,%r11
   5246 	adcq	$0,%r12
   5247 	addq	%r15,%r10
   5248 	adcq	%r9,%r11
   5249 	adcq	$0,%r12
   5250 
   5251 	leaq	16(%r8),%r8
   5252 2:
   5253 	vmovdqa	%ymm8,128(%rbp)
   5254 	vmovdqa	.rol16(%rip),%ymm8
   5255 	vpaddd	%ymm7,%ymm3,%ymm3
   5256 	vpaddd	%ymm6,%ymm2,%ymm2
   5257 	vpaddd	%ymm5,%ymm1,%ymm1
   5258 	vpaddd	%ymm4,%ymm0,%ymm0
   5259 	vpxor	%ymm3,%ymm15,%ymm15
   5260 	vpxor	%ymm2,%ymm14,%ymm14
   5261 	vpxor	%ymm1,%ymm13,%ymm13
   5262 	vpxor	%ymm0,%ymm12,%ymm12
   5263 	vpshufb	%ymm8,%ymm15,%ymm15
   5264 	vpshufb	%ymm8,%ymm14,%ymm14
   5265 	vpshufb	%ymm8,%ymm13,%ymm13
   5266 	vpshufb	%ymm8,%ymm12,%ymm12
   5267 	vmovdqa	128(%rbp),%ymm8
   5268 	vpaddd	%ymm15,%ymm11,%ymm11
   5269 	vpaddd	%ymm14,%ymm10,%ymm10
   5270 	vpaddd	%ymm13,%ymm9,%ymm9
   5271 	vpaddd	%ymm12,%ymm8,%ymm8
   5272 	vpxor	%ymm11,%ymm7,%ymm7
   5273 	vpxor	%ymm10,%ymm6,%ymm6
   5274 	vpxor	%ymm9,%ymm5,%ymm5
   5275 	vpxor	%ymm8,%ymm4,%ymm4
   5276 	vmovdqa	%ymm8,128(%rbp)
   5277 	vpsrld	$20,%ymm7,%ymm8
   5278 	vpslld	$32-20,%ymm7,%ymm7
   5279 	vpxor	%ymm8,%ymm7,%ymm7
   5280 	vpsrld	$20,%ymm6,%ymm8
   5281 	vpslld	$32-20,%ymm6,%ymm6
   5282 	vpxor	%ymm8,%ymm6,%ymm6
   5283 	vpsrld	$20,%ymm5,%ymm8
   5284 	vpslld	$32-20,%ymm5,%ymm5
   5285 	vpxor	%ymm8,%ymm5,%ymm5
   5286 	vpsrld	$20,%ymm4,%ymm8
   5287 	vpslld	$32-20,%ymm4,%ymm4
   5288 	vpxor	%ymm8,%ymm4,%ymm4
   5289 	vmovdqa	.rol8(%rip),%ymm8
   5290 	addq	0(%r8),%r10
   5291 	adcq	8+0(%r8),%r11
   5292 	adcq	$1,%r12
   5293 	movq	0+0(%rbp),%rdx
   5294 	movq	%rdx,%r15
   5295 	mulxq	%r10,%r13,%r14
   5296 	mulxq	%r11,%rax,%rdx
   5297 	imulq	%r12,%r15
   5298 	addq	%rax,%r14
   5299 	adcq	%rdx,%r15
   5300 	movq	8+0(%rbp),%rdx
   5301 	mulxq	%r10,%r10,%rax
   5302 	addq	%r10,%r14
   5303 	mulxq	%r11,%r11,%r9
   5304 	adcq	%r11,%r15
   5305 	adcq	$0,%r9
   5306 	imulq	%r12,%rdx
   5307 	addq	%rax,%r15
   5308 	adcq	%rdx,%r9
   5309 	movq	%r13,%r10
   5310 	movq	%r14,%r11
   5311 	movq	%r15,%r12
   5312 	andq	$3,%r12
   5313 	movq	%r15,%r13
   5314 	andq	$-4,%r13
   5315 	movq	%r9,%r14
   5316 	shrdq	$2,%r9,%r15
   5317 	shrq	$2,%r9
   5318 	addq	%r13,%r10
   5319 	adcq	%r14,%r11
   5320 	adcq	$0,%r12
   5321 	addq	%r15,%r10
   5322 	adcq	%r9,%r11
   5323 	adcq	$0,%r12
   5324 	vpaddd	%ymm7,%ymm3,%ymm3
   5325 	vpaddd	%ymm6,%ymm2,%ymm2
   5326 	vpaddd	%ymm5,%ymm1,%ymm1
   5327 	vpaddd	%ymm4,%ymm0,%ymm0
   5328 	vpxor	%ymm3,%ymm15,%ymm15
   5329 	vpxor	%ymm2,%ymm14,%ymm14
   5330 	vpxor	%ymm1,%ymm13,%ymm13
   5331 	vpxor	%ymm0,%ymm12,%ymm12
   5332 	vpshufb	%ymm8,%ymm15,%ymm15
   5333 	vpshufb	%ymm8,%ymm14,%ymm14
   5334 	vpshufb	%ymm8,%ymm13,%ymm13
   5335 	vpshufb	%ymm8,%ymm12,%ymm12
   5336 	vmovdqa	128(%rbp),%ymm8
   5337 	vpaddd	%ymm15,%ymm11,%ymm11
   5338 	vpaddd	%ymm14,%ymm10,%ymm10
   5339 	vpaddd	%ymm13,%ymm9,%ymm9
   5340 	vpaddd	%ymm12,%ymm8,%ymm8
   5341 	vpxor	%ymm11,%ymm7,%ymm7
   5342 	vpxor	%ymm10,%ymm6,%ymm6
   5343 	vpxor	%ymm9,%ymm5,%ymm5
   5344 	vpxor	%ymm8,%ymm4,%ymm4
   5345 	vmovdqa	%ymm8,128(%rbp)
   5346 	vpsrld	$25,%ymm7,%ymm8
   5347 	vpslld	$32-25,%ymm7,%ymm7
   5348 	vpxor	%ymm8,%ymm7,%ymm7
   5349 	vpsrld	$25,%ymm6,%ymm8
   5350 	vpslld	$32-25,%ymm6,%ymm6
   5351 	vpxor	%ymm8,%ymm6,%ymm6
   5352 	vpsrld	$25,%ymm5,%ymm8
   5353 	vpslld	$32-25,%ymm5,%ymm5
   5354 	vpxor	%ymm8,%ymm5,%ymm5
   5355 	vpsrld	$25,%ymm4,%ymm8
   5356 	vpslld	$32-25,%ymm4,%ymm4
   5357 	vpxor	%ymm8,%ymm4,%ymm4
   5358 	vmovdqa	128(%rbp),%ymm8
   5359 	vpalignr	$4,%ymm7,%ymm7,%ymm7
   5360 	vpalignr	$8,%ymm11,%ymm11,%ymm11
   5361 	vpalignr	$12,%ymm15,%ymm15,%ymm15
   5362 	vpalignr	$4,%ymm6,%ymm6,%ymm6
   5363 	vpalignr	$8,%ymm10,%ymm10,%ymm10
   5364 	vpalignr	$12,%ymm14,%ymm14,%ymm14
   5365 	vpalignr	$4,%ymm5,%ymm5,%ymm5
   5366 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   5367 	vpalignr	$12,%ymm13,%ymm13,%ymm13
   5368 	vpalignr	$4,%ymm4,%ymm4,%ymm4
   5369 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   5370 	vpalignr	$12,%ymm12,%ymm12,%ymm12
   5371 	vmovdqa	%ymm8,128(%rbp)
   5372 	addq	16(%r8),%r10
   5373 	adcq	8+16(%r8),%r11
   5374 	adcq	$1,%r12
   5375 	movq	0+0(%rbp),%rdx
   5376 	movq	%rdx,%r15
   5377 	mulxq	%r10,%r13,%r14
   5378 	mulxq	%r11,%rax,%rdx
   5379 	imulq	%r12,%r15
   5380 	addq	%rax,%r14
   5381 	adcq	%rdx,%r15
   5382 	movq	8+0(%rbp),%rdx
   5383 	mulxq	%r10,%r10,%rax
   5384 	addq	%r10,%r14
   5385 	mulxq	%r11,%r11,%r9
   5386 	adcq	%r11,%r15
   5387 	adcq	$0,%r9
   5388 	imulq	%r12,%rdx
   5389 	addq	%rax,%r15
   5390 	adcq	%rdx,%r9
   5391 	movq	%r13,%r10
   5392 	movq	%r14,%r11
   5393 	movq	%r15,%r12
   5394 	andq	$3,%r12
   5395 	movq	%r15,%r13
   5396 	andq	$-4,%r13
   5397 	movq	%r9,%r14
   5398 	shrdq	$2,%r9,%r15
   5399 	shrq	$2,%r9
   5400 	addq	%r13,%r10
   5401 	adcq	%r14,%r11
   5402 	adcq	$0,%r12
   5403 	addq	%r15,%r10
   5404 	adcq	%r9,%r11
   5405 	adcq	$0,%r12
   5406 
   5407 	leaq	32(%r8),%r8
   5408 	vmovdqa	.rol16(%rip),%ymm8
   5409 	vpaddd	%ymm7,%ymm3,%ymm3
   5410 	vpaddd	%ymm6,%ymm2,%ymm2
   5411 	vpaddd	%ymm5,%ymm1,%ymm1
   5412 	vpaddd	%ymm4,%ymm0,%ymm0
   5413 	vpxor	%ymm3,%ymm15,%ymm15
   5414 	vpxor	%ymm2,%ymm14,%ymm14
   5415 	vpxor	%ymm1,%ymm13,%ymm13
   5416 	vpxor	%ymm0,%ymm12,%ymm12
   5417 	vpshufb	%ymm8,%ymm15,%ymm15
   5418 	vpshufb	%ymm8,%ymm14,%ymm14
   5419 	vpshufb	%ymm8,%ymm13,%ymm13
   5420 	vpshufb	%ymm8,%ymm12,%ymm12
   5421 	vmovdqa	128(%rbp),%ymm8
   5422 	vpaddd	%ymm15,%ymm11,%ymm11
   5423 	vpaddd	%ymm14,%ymm10,%ymm10
   5424 	vpaddd	%ymm13,%ymm9,%ymm9
   5425 	vpaddd	%ymm12,%ymm8,%ymm8
   5426 	vpxor	%ymm11,%ymm7,%ymm7
   5427 	vpxor	%ymm10,%ymm6,%ymm6
   5428 	vpxor	%ymm9,%ymm5,%ymm5
   5429 	vpxor	%ymm8,%ymm4,%ymm4
   5430 	vmovdqa	%ymm8,128(%rbp)
   5431 	vpsrld	$20,%ymm7,%ymm8
   5432 	vpslld	$32-20,%ymm7,%ymm7
   5433 	vpxor	%ymm8,%ymm7,%ymm7
   5434 	vpsrld	$20,%ymm6,%ymm8
   5435 	vpslld	$32-20,%ymm6,%ymm6
   5436 	vpxor	%ymm8,%ymm6,%ymm6
   5437 	vpsrld	$20,%ymm5,%ymm8
   5438 	vpslld	$32-20,%ymm5,%ymm5
   5439 	vpxor	%ymm8,%ymm5,%ymm5
   5440 	vpsrld	$20,%ymm4,%ymm8
   5441 	vpslld	$32-20,%ymm4,%ymm4
   5442 	vpxor	%ymm8,%ymm4,%ymm4
   5443 	vmovdqa	.rol8(%rip),%ymm8
   5444 	vpaddd	%ymm7,%ymm3,%ymm3
   5445 	vpaddd	%ymm6,%ymm2,%ymm2
   5446 	vpaddd	%ymm5,%ymm1,%ymm1
   5447 	vpaddd	%ymm4,%ymm0,%ymm0
   5448 	vpxor	%ymm3,%ymm15,%ymm15
   5449 	vpxor	%ymm2,%ymm14,%ymm14
   5450 	vpxor	%ymm1,%ymm13,%ymm13
   5451 	vpxor	%ymm0,%ymm12,%ymm12
   5452 	vpshufb	%ymm8,%ymm15,%ymm15
   5453 	vpshufb	%ymm8,%ymm14,%ymm14
   5454 	vpshufb	%ymm8,%ymm13,%ymm13
   5455 	vpshufb	%ymm8,%ymm12,%ymm12
   5456 	vmovdqa	128(%rbp),%ymm8
   5457 	vpaddd	%ymm15,%ymm11,%ymm11
   5458 	vpaddd	%ymm14,%ymm10,%ymm10
   5459 	vpaddd	%ymm13,%ymm9,%ymm9
   5460 	vpaddd	%ymm12,%ymm8,%ymm8
   5461 	vpxor	%ymm11,%ymm7,%ymm7
   5462 	vpxor	%ymm10,%ymm6,%ymm6
   5463 	vpxor	%ymm9,%ymm5,%ymm5
   5464 	vpxor	%ymm8,%ymm4,%ymm4
   5465 	vmovdqa	%ymm8,128(%rbp)
   5466 	vpsrld	$25,%ymm7,%ymm8
   5467 	vpslld	$32-25,%ymm7,%ymm7
   5468 	vpxor	%ymm8,%ymm7,%ymm7
   5469 	vpsrld	$25,%ymm6,%ymm8
   5470 	vpslld	$32-25,%ymm6,%ymm6
   5471 	vpxor	%ymm8,%ymm6,%ymm6
   5472 	vpsrld	$25,%ymm5,%ymm8
   5473 	vpslld	$32-25,%ymm5,%ymm5
   5474 	vpxor	%ymm8,%ymm5,%ymm5
   5475 	vpsrld	$25,%ymm4,%ymm8
   5476 	vpslld	$32-25,%ymm4,%ymm4
   5477 	vpxor	%ymm8,%ymm4,%ymm4
   5478 	vmovdqa	128(%rbp),%ymm8
   5479 	vpalignr	$12,%ymm7,%ymm7,%ymm7
   5480 	vpalignr	$8,%ymm11,%ymm11,%ymm11
   5481 	vpalignr	$4,%ymm15,%ymm15,%ymm15
   5482 	vpalignr	$12,%ymm6,%ymm6,%ymm6
   5483 	vpalignr	$8,%ymm10,%ymm10,%ymm10
   5484 	vpalignr	$4,%ymm14,%ymm14,%ymm14
   5485 	vpalignr	$12,%ymm5,%ymm5,%ymm5
   5486 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   5487 	vpalignr	$4,%ymm13,%ymm13,%ymm13
   5488 	vpalignr	$12,%ymm4,%ymm4,%ymm4
   5489 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   5490 	vpalignr	$4,%ymm12,%ymm12,%ymm12
   5491 
   5492 	incq	%rcx
   5493 	cmpq	$4,%rcx
   5494 	jl	1b
   5495 	cmpq	$10,%rcx
   5496 	jne	2b
   5497 	movq	%rbx,%rcx
   5498 	subq	$384,%rcx
   5499 	andq	$-16,%rcx
   5500 1:
   5501 	testq	%rcx,%rcx
   5502 	je	1f
   5503 	addq	0(%r8),%r10
   5504 	adcq	8+0(%r8),%r11
   5505 	adcq	$1,%r12
   5506 	movq	0+0(%rbp),%rdx
   5507 	movq	%rdx,%r15
   5508 	mulxq	%r10,%r13,%r14
   5509 	mulxq	%r11,%rax,%rdx
   5510 	imulq	%r12,%r15
   5511 	addq	%rax,%r14
   5512 	adcq	%rdx,%r15
   5513 	movq	8+0(%rbp),%rdx
   5514 	mulxq	%r10,%r10,%rax
   5515 	addq	%r10,%r14
   5516 	mulxq	%r11,%r11,%r9
   5517 	adcq	%r11,%r15
   5518 	adcq	$0,%r9
   5519 	imulq	%r12,%rdx
   5520 	addq	%rax,%r15
   5521 	adcq	%rdx,%r9
   5522 	movq	%r13,%r10
   5523 	movq	%r14,%r11
   5524 	movq	%r15,%r12
   5525 	andq	$3,%r12
   5526 	movq	%r15,%r13
   5527 	andq	$-4,%r13
   5528 	movq	%r9,%r14
   5529 	shrdq	$2,%r9,%r15
   5530 	shrq	$2,%r9
   5531 	addq	%r13,%r10
   5532 	adcq	%r14,%r11
   5533 	adcq	$0,%r12
   5534 	addq	%r15,%r10
   5535 	adcq	%r9,%r11
   5536 	adcq	$0,%r12
   5537 
   5538 	leaq	16(%r8),%r8
   5539 	subq	$16,%rcx
   5540 	jmp	1b
   5541 1:
   5542 	vpaddd	.chacha20_consts(%rip),%ymm3,%ymm3
   5543 	vpaddd	64(%rbp),%ymm7,%ymm7
   5544 	vpaddd	96(%rbp),%ymm11,%ymm11
   5545 	vpaddd	256(%rbp),%ymm15,%ymm15
   5546 	vpaddd	.chacha20_consts(%rip),%ymm2,%ymm2
   5547 	vpaddd	64(%rbp),%ymm6,%ymm6
   5548 	vpaddd	96(%rbp),%ymm10,%ymm10
   5549 	vpaddd	224(%rbp),%ymm14,%ymm14
   5550 	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
   5551 	vpaddd	64(%rbp),%ymm5,%ymm5
   5552 	vpaddd	96(%rbp),%ymm9,%ymm9
   5553 	vpaddd	192(%rbp),%ymm13,%ymm13
   5554 	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
   5555 	vpaddd	64(%rbp),%ymm4,%ymm4
   5556 	vpaddd	96(%rbp),%ymm8,%ymm8
   5557 	vpaddd	160(%rbp),%ymm12,%ymm12
   5558 
   5559 	vmovdqa	%ymm0,128(%rbp)
   5560 	vperm2i128	$0x02,%ymm3,%ymm7,%ymm0
   5561 	vperm2i128	$0x13,%ymm3,%ymm7,%ymm7
   5562 	vperm2i128	$0x02,%ymm11,%ymm15,%ymm3
   5563 	vperm2i128	$0x13,%ymm11,%ymm15,%ymm11
   5564 	vpxor	0+0(%rsi),%ymm0,%ymm0
   5565 	vpxor	32+0(%rsi),%ymm3,%ymm3
   5566 	vpxor	64+0(%rsi),%ymm7,%ymm7
   5567 	vpxor	96+0(%rsi),%ymm11,%ymm11
   5568 	vmovdqu	%ymm0,0+0(%rdi)
   5569 	vmovdqu	%ymm3,32+0(%rdi)
   5570 	vmovdqu	%ymm7,64+0(%rdi)
   5571 	vmovdqu	%ymm11,96+0(%rdi)
   5572 
   5573 	vmovdqa	128(%rbp),%ymm0
   5574 	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
   5575 	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
   5576 	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
   5577 	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
   5578 	vpxor	0+128(%rsi),%ymm3,%ymm3
   5579 	vpxor	32+128(%rsi),%ymm2,%ymm2
   5580 	vpxor	64+128(%rsi),%ymm6,%ymm6
   5581 	vpxor	96+128(%rsi),%ymm10,%ymm10
   5582 	vmovdqu	%ymm3,0+128(%rdi)
   5583 	vmovdqu	%ymm2,32+128(%rdi)
   5584 	vmovdqu	%ymm6,64+128(%rdi)
   5585 	vmovdqu	%ymm10,96+128(%rdi)
   5586 	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
   5587 	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
   5588 	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
   5589 	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
   5590 	vpxor	0+256(%rsi),%ymm3,%ymm3
   5591 	vpxor	32+256(%rsi),%ymm1,%ymm1
   5592 	vpxor	64+256(%rsi),%ymm5,%ymm5
   5593 	vpxor	96+256(%rsi),%ymm9,%ymm9
   5594 	vmovdqu	%ymm3,0+256(%rdi)
   5595 	vmovdqu	%ymm1,32+256(%rdi)
   5596 	vmovdqu	%ymm5,64+256(%rdi)
   5597 	vmovdqu	%ymm9,96+256(%rdi)
   5598 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
   5599 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
   5600 	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
   5601 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
   5602 	vmovdqa	%ymm3,%ymm8
   5603 
   5604 	leaq	384(%rsi),%rsi
   5605 	leaq	384(%rdi),%rdi
   5606 	subq	$384,%rbx
   5607 open_avx2_tail_loop:
   5608 	cmpq	$32,%rbx
   5609 	jb	open_avx2_tail
   5610 	subq	$32,%rbx
   5611 	vpxor	(%rsi),%ymm0,%ymm0
   5612 	vmovdqu	%ymm0,(%rdi)
   5613 	leaq	32(%rsi),%rsi
   5614 	leaq	32(%rdi),%rdi
   5615 	vmovdqa	%ymm4,%ymm0
   5616 	vmovdqa	%ymm8,%ymm4
   5617 	vmovdqa	%ymm12,%ymm8
   5618 	jmp	open_avx2_tail_loop
   5619 open_avx2_tail:
   5620 	cmpq	$16,%rbx
   5621 	vmovdqa	%xmm0,%xmm1
   5622 	jb	1f
   5623 	subq	$16,%rbx
   5624 
   5625 	vpxor	(%rsi),%xmm0,%xmm1
   5626 	vmovdqu	%xmm1,(%rdi)
   5627 	leaq	16(%rsi),%rsi
   5628 	leaq	16(%rdi),%rdi
   5629 	vperm2i128	$0x11,%ymm0,%ymm0,%ymm0
   5630 	vmovdqa	%xmm0,%xmm1
   5631 1:
   5632 	vzeroupper
   5633 	jmp	open_sse_tail_16
   5634 
   5635 open_avx2_192:
   5636 	vmovdqa	%ymm0,%ymm1
   5637 	vmovdqa	%ymm0,%ymm2
   5638 	vmovdqa	%ymm4,%ymm5
   5639 	vmovdqa	%ymm4,%ymm6
   5640 	vmovdqa	%ymm8,%ymm9
   5641 	vmovdqa	%ymm8,%ymm10
   5642 	vpaddd	.avx2_inc(%rip),%ymm12,%ymm13
   5643 	vmovdqa	%ymm12,%ymm11
   5644 	vmovdqa	%ymm13,%ymm15
   5645 	movq	$10,%r10
   5646 1:
   5647 	vpaddd	%ymm4,%ymm0,%ymm0
   5648 	vpxor	%ymm0,%ymm12,%ymm12
   5649 	vpshufb	.rol16(%rip),%ymm12,%ymm12
   5650 	vpaddd	%ymm12,%ymm8,%ymm8
   5651 	vpxor	%ymm8,%ymm4,%ymm4
   5652 	vpsrld	$20,%ymm4,%ymm3
   5653 	vpslld	$12,%ymm4,%ymm4
   5654 	vpxor	%ymm3,%ymm4,%ymm4
   5655 	vpaddd	%ymm4,%ymm0,%ymm0
   5656 	vpxor	%ymm0,%ymm12,%ymm12
   5657 	vpshufb	.rol8(%rip),%ymm12,%ymm12
   5658 	vpaddd	%ymm12,%ymm8,%ymm8
   5659 	vpxor	%ymm8,%ymm4,%ymm4
   5660 	vpslld	$7,%ymm4,%ymm3
   5661 	vpsrld	$25,%ymm4,%ymm4
   5662 	vpxor	%ymm3,%ymm4,%ymm4
   5663 	vpalignr	$12,%ymm12,%ymm12,%ymm12
   5664 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   5665 	vpalignr	$4,%ymm4,%ymm4,%ymm4
   5666 	vpaddd	%ymm5,%ymm1,%ymm1
   5667 	vpxor	%ymm1,%ymm13,%ymm13
   5668 	vpshufb	.rol16(%rip),%ymm13,%ymm13
   5669 	vpaddd	%ymm13,%ymm9,%ymm9
   5670 	vpxor	%ymm9,%ymm5,%ymm5
   5671 	vpsrld	$20,%ymm5,%ymm3
   5672 	vpslld	$12,%ymm5,%ymm5
   5673 	vpxor	%ymm3,%ymm5,%ymm5
   5674 	vpaddd	%ymm5,%ymm1,%ymm1
   5675 	vpxor	%ymm1,%ymm13,%ymm13
   5676 	vpshufb	.rol8(%rip),%ymm13,%ymm13
   5677 	vpaddd	%ymm13,%ymm9,%ymm9
   5678 	vpxor	%ymm9,%ymm5,%ymm5
   5679 	vpslld	$7,%ymm5,%ymm3
   5680 	vpsrld	$25,%ymm5,%ymm5
   5681 	vpxor	%ymm3,%ymm5,%ymm5
   5682 	vpalignr	$12,%ymm13,%ymm13,%ymm13
   5683 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   5684 	vpalignr	$4,%ymm5,%ymm5,%ymm5
   5685 	vpaddd	%ymm4,%ymm0,%ymm0
   5686 	vpxor	%ymm0,%ymm12,%ymm12
   5687 	vpshufb	.rol16(%rip),%ymm12,%ymm12
   5688 	vpaddd	%ymm12,%ymm8,%ymm8
   5689 	vpxor	%ymm8,%ymm4,%ymm4
   5690 	vpsrld	$20,%ymm4,%ymm3
   5691 	vpslld	$12,%ymm4,%ymm4
   5692 	vpxor	%ymm3,%ymm4,%ymm4
   5693 	vpaddd	%ymm4,%ymm0,%ymm0
   5694 	vpxor	%ymm0,%ymm12,%ymm12
   5695 	vpshufb	.rol8(%rip),%ymm12,%ymm12
   5696 	vpaddd	%ymm12,%ymm8,%ymm8
   5697 	vpxor	%ymm8,%ymm4,%ymm4
   5698 	vpslld	$7,%ymm4,%ymm3
   5699 	vpsrld	$25,%ymm4,%ymm4
   5700 	vpxor	%ymm3,%ymm4,%ymm4
   5701 	vpalignr	$4,%ymm12,%ymm12,%ymm12
   5702 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   5703 	vpalignr	$12,%ymm4,%ymm4,%ymm4
   5704 	vpaddd	%ymm5,%ymm1,%ymm1
   5705 	vpxor	%ymm1,%ymm13,%ymm13
   5706 	vpshufb	.rol16(%rip),%ymm13,%ymm13
   5707 	vpaddd	%ymm13,%ymm9,%ymm9
   5708 	vpxor	%ymm9,%ymm5,%ymm5
   5709 	vpsrld	$20,%ymm5,%ymm3
   5710 	vpslld	$12,%ymm5,%ymm5
   5711 	vpxor	%ymm3,%ymm5,%ymm5
   5712 	vpaddd	%ymm5,%ymm1,%ymm1
   5713 	vpxor	%ymm1,%ymm13,%ymm13
   5714 	vpshufb	.rol8(%rip),%ymm13,%ymm13
   5715 	vpaddd	%ymm13,%ymm9,%ymm9
   5716 	vpxor	%ymm9,%ymm5,%ymm5
   5717 	vpslld	$7,%ymm5,%ymm3
   5718 	vpsrld	$25,%ymm5,%ymm5
   5719 	vpxor	%ymm3,%ymm5,%ymm5
   5720 	vpalignr	$4,%ymm13,%ymm13,%ymm13
   5721 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   5722 	vpalignr	$12,%ymm5,%ymm5,%ymm5
   5723 
   5724 	decq	%r10
   5725 	jne	1b
   5726 	vpaddd	%ymm2,%ymm0,%ymm0
   5727 	vpaddd	%ymm2,%ymm1,%ymm1
   5728 	vpaddd	%ymm6,%ymm4,%ymm4
   5729 	vpaddd	%ymm6,%ymm5,%ymm5
   5730 	vpaddd	%ymm10,%ymm8,%ymm8
   5731 	vpaddd	%ymm10,%ymm9,%ymm9
   5732 	vpaddd	%ymm11,%ymm12,%ymm12
   5733 	vpaddd	%ymm15,%ymm13,%ymm13
   5734 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
   5735 
   5736 	vpand	.clamp(%rip),%ymm3,%ymm3
   5737 	vmovdqa	%ymm3,0(%rbp)
   5738 
   5739 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0
   5740 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4
   5741 	vperm2i128	$0x02,%ymm1,%ymm5,%ymm8
   5742 	vperm2i128	$0x02,%ymm9,%ymm13,%ymm12
   5743 	vperm2i128	$0x13,%ymm1,%ymm5,%ymm1
   5744 	vperm2i128	$0x13,%ymm9,%ymm13,%ymm5
   5745 open_avx2_short:
   5746 	movq	%r8,%r8
   5747 	call	poly_hash_ad_internal
   5748 open_avx2_hash_and_xor_loop:
   5749 	cmpq	$32,%rbx
   5750 	jb	open_avx2_short_tail_32
   5751 	subq	$32,%rbx
   5752 	addq	0(%rsi),%r10
   5753 	adcq	8+0(%rsi),%r11
   5754 	adcq	$1,%r12
   5755 	movq	0+0(%rbp),%rax
   5756 	movq	%rax,%r15
   5757 	mulq	%r10
   5758 	movq	%rax,%r13
   5759 	movq	%rdx,%r14
   5760 	movq	0+0(%rbp),%rax
   5761 	mulq	%r11
   5762 	imulq	%r12,%r15
   5763 	addq	%rax,%r14
   5764 	adcq	%rdx,%r15
   5765 	movq	8+0(%rbp),%rax
   5766 	movq	%rax,%r9
   5767 	mulq	%r10
   5768 	addq	%rax,%r14
   5769 	adcq	$0,%rdx
   5770 	movq	%rdx,%r10
   5771 	movq	8+0(%rbp),%rax
   5772 	mulq	%r11
   5773 	addq	%rax,%r15
   5774 	adcq	$0,%rdx
   5775 	imulq	%r12,%r9
   5776 	addq	%r10,%r15
   5777 	adcq	%rdx,%r9
   5778 	movq	%r13,%r10
   5779 	movq	%r14,%r11
   5780 	movq	%r15,%r12
   5781 	andq	$3,%r12
   5782 	movq	%r15,%r13
   5783 	andq	$-4,%r13
   5784 	movq	%r9,%r14
   5785 	shrdq	$2,%r9,%r15
   5786 	shrq	$2,%r9
   5787 	addq	%r13,%r10
   5788 	adcq	%r14,%r11
   5789 	adcq	$0,%r12
   5790 	addq	%r15,%r10
   5791 	adcq	%r9,%r11
   5792 	adcq	$0,%r12
   5793 	addq	16(%rsi),%r10
   5794 	adcq	8+16(%rsi),%r11
   5795 	adcq	$1,%r12
   5796 	movq	0+0(%rbp),%rax
   5797 	movq	%rax,%r15
   5798 	mulq	%r10
   5799 	movq	%rax,%r13
   5800 	movq	%rdx,%r14
   5801 	movq	0+0(%rbp),%rax
   5802 	mulq	%r11
   5803 	imulq	%r12,%r15
   5804 	addq	%rax,%r14
   5805 	adcq	%rdx,%r15
   5806 	movq	8+0(%rbp),%rax
   5807 	movq	%rax,%r9
   5808 	mulq	%r10
   5809 	addq	%rax,%r14
   5810 	adcq	$0,%rdx
   5811 	movq	%rdx,%r10
   5812 	movq	8+0(%rbp),%rax
   5813 	mulq	%r11
   5814 	addq	%rax,%r15
   5815 	adcq	$0,%rdx
   5816 	imulq	%r12,%r9
   5817 	addq	%r10,%r15
   5818 	adcq	%rdx,%r9
   5819 	movq	%r13,%r10
   5820 	movq	%r14,%r11
   5821 	movq	%r15,%r12
   5822 	andq	$3,%r12
   5823 	movq	%r15,%r13
   5824 	andq	$-4,%r13
   5825 	movq	%r9,%r14
   5826 	shrdq	$2,%r9,%r15
   5827 	shrq	$2,%r9
   5828 	addq	%r13,%r10
   5829 	adcq	%r14,%r11
   5830 	adcq	$0,%r12
   5831 	addq	%r15,%r10
   5832 	adcq	%r9,%r11
   5833 	adcq	$0,%r12
   5834 
   5835 
   5836 	vpxor	(%rsi),%ymm0,%ymm0
   5837 	vmovdqu	%ymm0,(%rdi)
   5838 	leaq	32(%rsi),%rsi
   5839 	leaq	32(%rdi),%rdi
   5840 
   5841 	vmovdqa	%ymm4,%ymm0
   5842 	vmovdqa	%ymm8,%ymm4
   5843 	vmovdqa	%ymm12,%ymm8
   5844 	vmovdqa	%ymm1,%ymm12
   5845 	vmovdqa	%ymm5,%ymm1
   5846 	vmovdqa	%ymm9,%ymm5
   5847 	vmovdqa	%ymm13,%ymm9
   5848 	vmovdqa	%ymm2,%ymm13
   5849 	vmovdqa	%ymm6,%ymm2
   5850 	jmp	open_avx2_hash_and_xor_loop
   5851 open_avx2_short_tail_32:
   5852 	cmpq	$16,%rbx
   5853 	vmovdqa	%xmm0,%xmm1
   5854 	jb	1f
   5855 	subq	$16,%rbx
   5856 	addq	0(%rsi),%r10
   5857 	adcq	8+0(%rsi),%r11
   5858 	adcq	$1,%r12
   5859 	movq	0+0(%rbp),%rax
   5860 	movq	%rax,%r15
   5861 	mulq	%r10
   5862 	movq	%rax,%r13
   5863 	movq	%rdx,%r14
   5864 	movq	0+0(%rbp),%rax
   5865 	mulq	%r11
   5866 	imulq	%r12,%r15
   5867 	addq	%rax,%r14
   5868 	adcq	%rdx,%r15
   5869 	movq	8+0(%rbp),%rax
   5870 	movq	%rax,%r9
   5871 	mulq	%r10
   5872 	addq	%rax,%r14
   5873 	adcq	$0,%rdx
   5874 	movq	%rdx,%r10
   5875 	movq	8+0(%rbp),%rax
   5876 	mulq	%r11
   5877 	addq	%rax,%r15
   5878 	adcq	$0,%rdx
   5879 	imulq	%r12,%r9
   5880 	addq	%r10,%r15
   5881 	adcq	%rdx,%r9
   5882 	movq	%r13,%r10
   5883 	movq	%r14,%r11
   5884 	movq	%r15,%r12
   5885 	andq	$3,%r12
   5886 	movq	%r15,%r13
   5887 	andq	$-4,%r13
   5888 	movq	%r9,%r14
   5889 	shrdq	$2,%r9,%r15
   5890 	shrq	$2,%r9
   5891 	addq	%r13,%r10
   5892 	adcq	%r14,%r11
   5893 	adcq	$0,%r12
   5894 	addq	%r15,%r10
   5895 	adcq	%r9,%r11
   5896 	adcq	$0,%r12
   5897 
   5898 	vpxor	(%rsi),%xmm0,%xmm3
   5899 	vmovdqu	%xmm3,(%rdi)
   5900 	leaq	16(%rsi),%rsi
   5901 	leaq	16(%rdi),%rdi
   5902 	vextracti128	$1,%ymm0,%xmm1
   5903 1:
   5904 	vzeroupper
   5905 	jmp	open_sse_tail_16
   5906 
   5907 open_avx2_320:
   5908 	vmovdqa	%ymm0,%ymm1
   5909 	vmovdqa	%ymm0,%ymm2
   5910 	vmovdqa	%ymm4,%ymm5
   5911 	vmovdqa	%ymm4,%ymm6
   5912 	vmovdqa	%ymm8,%ymm9
   5913 	vmovdqa	%ymm8,%ymm10
   5914 	vpaddd	.avx2_inc(%rip),%ymm12,%ymm13
   5915 	vpaddd	.avx2_inc(%rip),%ymm13,%ymm14
   5916 	vmovdqa	%ymm4,%ymm7
   5917 	vmovdqa	%ymm8,%ymm11
   5918 	vmovdqa	%ymm12,160(%rbp)
   5919 	vmovdqa	%ymm13,192(%rbp)
   5920 	vmovdqa	%ymm14,224(%rbp)
   5921 	movq	$10,%r10
   5922 1:
   5923 	vpaddd	%ymm4,%ymm0,%ymm0
   5924 	vpxor	%ymm0,%ymm12,%ymm12
   5925 	vpshufb	.rol16(%rip),%ymm12,%ymm12
   5926 	vpaddd	%ymm12,%ymm8,%ymm8
   5927 	vpxor	%ymm8,%ymm4,%ymm4
   5928 	vpsrld	$20,%ymm4,%ymm3
   5929 	vpslld	$12,%ymm4,%ymm4
   5930 	vpxor	%ymm3,%ymm4,%ymm4
   5931 	vpaddd	%ymm4,%ymm0,%ymm0
   5932 	vpxor	%ymm0,%ymm12,%ymm12
   5933 	vpshufb	.rol8(%rip),%ymm12,%ymm12
   5934 	vpaddd	%ymm12,%ymm8,%ymm8
   5935 	vpxor	%ymm8,%ymm4,%ymm4
   5936 	vpslld	$7,%ymm4,%ymm3
   5937 	vpsrld	$25,%ymm4,%ymm4
   5938 	vpxor	%ymm3,%ymm4,%ymm4
   5939 	vpalignr	$12,%ymm12,%ymm12,%ymm12
   5940 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   5941 	vpalignr	$4,%ymm4,%ymm4,%ymm4
   5942 	vpaddd	%ymm5,%ymm1,%ymm1
   5943 	vpxor	%ymm1,%ymm13,%ymm13
   5944 	vpshufb	.rol16(%rip),%ymm13,%ymm13
   5945 	vpaddd	%ymm13,%ymm9,%ymm9
   5946 	vpxor	%ymm9,%ymm5,%ymm5
   5947 	vpsrld	$20,%ymm5,%ymm3
   5948 	vpslld	$12,%ymm5,%ymm5
   5949 	vpxor	%ymm3,%ymm5,%ymm5
   5950 	vpaddd	%ymm5,%ymm1,%ymm1
   5951 	vpxor	%ymm1,%ymm13,%ymm13
   5952 	vpshufb	.rol8(%rip),%ymm13,%ymm13
   5953 	vpaddd	%ymm13,%ymm9,%ymm9
   5954 	vpxor	%ymm9,%ymm5,%ymm5
   5955 	vpslld	$7,%ymm5,%ymm3
   5956 	vpsrld	$25,%ymm5,%ymm5
   5957 	vpxor	%ymm3,%ymm5,%ymm5
   5958 	vpalignr	$12,%ymm13,%ymm13,%ymm13
   5959 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   5960 	vpalignr	$4,%ymm5,%ymm5,%ymm5
   5961 	vpaddd	%ymm6,%ymm2,%ymm2
   5962 	vpxor	%ymm2,%ymm14,%ymm14
   5963 	vpshufb	.rol16(%rip),%ymm14,%ymm14
   5964 	vpaddd	%ymm14,%ymm10,%ymm10
   5965 	vpxor	%ymm10,%ymm6,%ymm6
   5966 	vpsrld	$20,%ymm6,%ymm3
   5967 	vpslld	$12,%ymm6,%ymm6
   5968 	vpxor	%ymm3,%ymm6,%ymm6
   5969 	vpaddd	%ymm6,%ymm2,%ymm2
   5970 	vpxor	%ymm2,%ymm14,%ymm14
   5971 	vpshufb	.rol8(%rip),%ymm14,%ymm14
   5972 	vpaddd	%ymm14,%ymm10,%ymm10
   5973 	vpxor	%ymm10,%ymm6,%ymm6
   5974 	vpslld	$7,%ymm6,%ymm3
   5975 	vpsrld	$25,%ymm6,%ymm6
   5976 	vpxor	%ymm3,%ymm6,%ymm6
   5977 	vpalignr	$12,%ymm14,%ymm14,%ymm14
   5978 	vpalignr	$8,%ymm10,%ymm10,%ymm10
   5979 	vpalignr	$4,%ymm6,%ymm6,%ymm6
   5980 	vpaddd	%ymm4,%ymm0,%ymm0
   5981 	vpxor	%ymm0,%ymm12,%ymm12
   5982 	vpshufb	.rol16(%rip),%ymm12,%ymm12
   5983 	vpaddd	%ymm12,%ymm8,%ymm8
   5984 	vpxor	%ymm8,%ymm4,%ymm4
   5985 	vpsrld	$20,%ymm4,%ymm3
   5986 	vpslld	$12,%ymm4,%ymm4
   5987 	vpxor	%ymm3,%ymm4,%ymm4
   5988 	vpaddd	%ymm4,%ymm0,%ymm0
   5989 	vpxor	%ymm0,%ymm12,%ymm12
   5990 	vpshufb	.rol8(%rip),%ymm12,%ymm12
   5991 	vpaddd	%ymm12,%ymm8,%ymm8
   5992 	vpxor	%ymm8,%ymm4,%ymm4
   5993 	vpslld	$7,%ymm4,%ymm3
   5994 	vpsrld	$25,%ymm4,%ymm4
   5995 	vpxor	%ymm3,%ymm4,%ymm4
   5996 	vpalignr	$4,%ymm12,%ymm12,%ymm12
   5997 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   5998 	vpalignr	$12,%ymm4,%ymm4,%ymm4
   5999 	vpaddd	%ymm5,%ymm1,%ymm1
   6000 	vpxor	%ymm1,%ymm13,%ymm13
   6001 	vpshufb	.rol16(%rip),%ymm13,%ymm13
   6002 	vpaddd	%ymm13,%ymm9,%ymm9
   6003 	vpxor	%ymm9,%ymm5,%ymm5
   6004 	vpsrld	$20,%ymm5,%ymm3
   6005 	vpslld	$12,%ymm5,%ymm5
   6006 	vpxor	%ymm3,%ymm5,%ymm5
   6007 	vpaddd	%ymm5,%ymm1,%ymm1
   6008 	vpxor	%ymm1,%ymm13,%ymm13
   6009 	vpshufb	.rol8(%rip),%ymm13,%ymm13
   6010 	vpaddd	%ymm13,%ymm9,%ymm9
   6011 	vpxor	%ymm9,%ymm5,%ymm5
   6012 	vpslld	$7,%ymm5,%ymm3
   6013 	vpsrld	$25,%ymm5,%ymm5
   6014 	vpxor	%ymm3,%ymm5,%ymm5
   6015 	vpalignr	$4,%ymm13,%ymm13,%ymm13
   6016 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   6017 	vpalignr	$12,%ymm5,%ymm5,%ymm5
   6018 	vpaddd	%ymm6,%ymm2,%ymm2
   6019 	vpxor	%ymm2,%ymm14,%ymm14
   6020 	vpshufb	.rol16(%rip),%ymm14,%ymm14
   6021 	vpaddd	%ymm14,%ymm10,%ymm10
   6022 	vpxor	%ymm10,%ymm6,%ymm6
   6023 	vpsrld	$20,%ymm6,%ymm3
   6024 	vpslld	$12,%ymm6,%ymm6
   6025 	vpxor	%ymm3,%ymm6,%ymm6
   6026 	vpaddd	%ymm6,%ymm2,%ymm2
   6027 	vpxor	%ymm2,%ymm14,%ymm14
   6028 	vpshufb	.rol8(%rip),%ymm14,%ymm14
   6029 	vpaddd	%ymm14,%ymm10,%ymm10
   6030 	vpxor	%ymm10,%ymm6,%ymm6
   6031 	vpslld	$7,%ymm6,%ymm3
   6032 	vpsrld	$25,%ymm6,%ymm6
   6033 	vpxor	%ymm3,%ymm6,%ymm6
   6034 	vpalignr	$4,%ymm14,%ymm14,%ymm14
   6035 	vpalignr	$8,%ymm10,%ymm10,%ymm10
   6036 	vpalignr	$12,%ymm6,%ymm6,%ymm6
   6037 
   6038 	decq	%r10
   6039 	jne	1b
   6040 	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
   6041 	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
   6042 	vpaddd	.chacha20_consts(%rip),%ymm2,%ymm2
   6043 	vpaddd	%ymm7,%ymm4,%ymm4
   6044 	vpaddd	%ymm7,%ymm5,%ymm5
   6045 	vpaddd	%ymm7,%ymm6,%ymm6
   6046 	vpaddd	%ymm11,%ymm8,%ymm8
   6047 	vpaddd	%ymm11,%ymm9,%ymm9
   6048 	vpaddd	%ymm11,%ymm10,%ymm10
   6049 	vpaddd	160(%rbp),%ymm12,%ymm12
   6050 	vpaddd	192(%rbp),%ymm13,%ymm13
   6051 	vpaddd	224(%rbp),%ymm14,%ymm14
   6052 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
   6053 
   6054 	vpand	.clamp(%rip),%ymm3,%ymm3
   6055 	vmovdqa	%ymm3,0(%rbp)
   6056 
   6057 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0
   6058 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4
   6059 	vperm2i128	$0x02,%ymm1,%ymm5,%ymm8
   6060 	vperm2i128	$0x02,%ymm9,%ymm13,%ymm12
   6061 	vperm2i128	$0x13,%ymm1,%ymm5,%ymm1
   6062 	vperm2i128	$0x13,%ymm9,%ymm13,%ymm5
   6063 	vperm2i128	$0x02,%ymm2,%ymm6,%ymm9
   6064 	vperm2i128	$0x02,%ymm10,%ymm14,%ymm13
   6065 	vperm2i128	$0x13,%ymm2,%ymm6,%ymm2
   6066 	vperm2i128	$0x13,%ymm10,%ymm14,%ymm6
   6067 	jmp	open_avx2_short
   6068 .size	chacha20_poly1305_open_avx2, .-chacha20_poly1305_open_avx2
   6069 
   6070 
   6071 .type	chacha20_poly1305_seal_avx2,@function
   6072 .align	64
   6073 chacha20_poly1305_seal_avx2:
   6074 	vzeroupper
   6075 	vmovdqa	.chacha20_consts(%rip),%ymm0
   6076 	vbroadcasti128	0(%r9),%ymm4
   6077 	vbroadcasti128	16(%r9),%ymm8
   6078 	vbroadcasti128	32(%r9),%ymm12
   6079 	vpaddd	.avx2_init(%rip),%ymm12,%ymm12
   6080 	cmpq	$192,%rbx
   6081 	jbe	seal_avx2_192
   6082 	cmpq	$320,%rbx
   6083 	jbe	seal_avx2_320
   6084 	vmovdqa	%ymm0,%ymm1
   6085 	vmovdqa	%ymm0,%ymm2
   6086 	vmovdqa	%ymm0,%ymm3
   6087 	vmovdqa	%ymm4,%ymm5
   6088 	vmovdqa	%ymm4,%ymm6
   6089 	vmovdqa	%ymm4,%ymm7
   6090 	vmovdqa	%ymm4,64(%rbp)
   6091 	vmovdqa	%ymm8,%ymm9
   6092 	vmovdqa	%ymm8,%ymm10
   6093 	vmovdqa	%ymm8,%ymm11
   6094 	vmovdqa	%ymm8,96(%rbp)
   6095 	vmovdqa	%ymm12,%ymm15
   6096 	vpaddd	.avx2_inc(%rip),%ymm15,%ymm14
   6097 	vpaddd	.avx2_inc(%rip),%ymm14,%ymm13
   6098 	vpaddd	.avx2_inc(%rip),%ymm13,%ymm12
   6099 	vmovdqa	%ymm12,160(%rbp)
   6100 	vmovdqa	%ymm13,192(%rbp)
   6101 	vmovdqa	%ymm14,224(%rbp)
   6102 	vmovdqa	%ymm15,256(%rbp)
   6103 	movq	$10,%r10
   6104 1:
   6105 	vmovdqa	%ymm8,128(%rbp)
   6106 	vmovdqa	.rol16(%rip),%ymm8
   6107 	vpaddd	%ymm7,%ymm3,%ymm3
   6108 	vpaddd	%ymm6,%ymm2,%ymm2
   6109 	vpaddd	%ymm5,%ymm1,%ymm1
   6110 	vpaddd	%ymm4,%ymm0,%ymm0
   6111 	vpxor	%ymm3,%ymm15,%ymm15
   6112 	vpxor	%ymm2,%ymm14,%ymm14
   6113 	vpxor	%ymm1,%ymm13,%ymm13
   6114 	vpxor	%ymm0,%ymm12,%ymm12
   6115 	vpshufb	%ymm8,%ymm15,%ymm15
   6116 	vpshufb	%ymm8,%ymm14,%ymm14
   6117 	vpshufb	%ymm8,%ymm13,%ymm13
   6118 	vpshufb	%ymm8,%ymm12,%ymm12
   6119 	vmovdqa	128(%rbp),%ymm8
   6120 	vpaddd	%ymm15,%ymm11,%ymm11
   6121 	vpaddd	%ymm14,%ymm10,%ymm10
   6122 	vpaddd	%ymm13,%ymm9,%ymm9
   6123 	vpaddd	%ymm12,%ymm8,%ymm8
   6124 	vpxor	%ymm11,%ymm7,%ymm7
   6125 	vpxor	%ymm10,%ymm6,%ymm6
   6126 	vpxor	%ymm9,%ymm5,%ymm5
   6127 	vpxor	%ymm8,%ymm4,%ymm4
   6128 	vmovdqa	%ymm8,128(%rbp)
   6129 	vpsrld	$20,%ymm7,%ymm8
   6130 	vpslld	$32-20,%ymm7,%ymm7
   6131 	vpxor	%ymm8,%ymm7,%ymm7
   6132 	vpsrld	$20,%ymm6,%ymm8
   6133 	vpslld	$32-20,%ymm6,%ymm6
   6134 	vpxor	%ymm8,%ymm6,%ymm6
   6135 	vpsrld	$20,%ymm5,%ymm8
   6136 	vpslld	$32-20,%ymm5,%ymm5
   6137 	vpxor	%ymm8,%ymm5,%ymm5
   6138 	vpsrld	$20,%ymm4,%ymm8
   6139 	vpslld	$32-20,%ymm4,%ymm4
   6140 	vpxor	%ymm8,%ymm4,%ymm4
   6141 	vmovdqa	.rol8(%rip),%ymm8
   6142 	vpaddd	%ymm7,%ymm3,%ymm3
   6143 	vpaddd	%ymm6,%ymm2,%ymm2
   6144 	vpaddd	%ymm5,%ymm1,%ymm1
   6145 	vpaddd	%ymm4,%ymm0,%ymm0
   6146 	vpxor	%ymm3,%ymm15,%ymm15
   6147 	vpxor	%ymm2,%ymm14,%ymm14
   6148 	vpxor	%ymm1,%ymm13,%ymm13
   6149 	vpxor	%ymm0,%ymm12,%ymm12
   6150 	vpshufb	%ymm8,%ymm15,%ymm15
   6151 	vpshufb	%ymm8,%ymm14,%ymm14
   6152 	vpshufb	%ymm8,%ymm13,%ymm13
   6153 	vpshufb	%ymm8,%ymm12,%ymm12
   6154 	vmovdqa	128(%rbp),%ymm8
   6155 	vpaddd	%ymm15,%ymm11,%ymm11
   6156 	vpaddd	%ymm14,%ymm10,%ymm10
   6157 	vpaddd	%ymm13,%ymm9,%ymm9
   6158 	vpaddd	%ymm12,%ymm8,%ymm8
   6159 	vpxor	%ymm11,%ymm7,%ymm7
   6160 	vpxor	%ymm10,%ymm6,%ymm6
   6161 	vpxor	%ymm9,%ymm5,%ymm5
   6162 	vpxor	%ymm8,%ymm4,%ymm4
   6163 	vmovdqa	%ymm8,128(%rbp)
   6164 	vpsrld	$25,%ymm7,%ymm8
   6165 	vpslld	$32-25,%ymm7,%ymm7
   6166 	vpxor	%ymm8,%ymm7,%ymm7
   6167 	vpsrld	$25,%ymm6,%ymm8
   6168 	vpslld	$32-25,%ymm6,%ymm6
   6169 	vpxor	%ymm8,%ymm6,%ymm6
   6170 	vpsrld	$25,%ymm5,%ymm8
   6171 	vpslld	$32-25,%ymm5,%ymm5
   6172 	vpxor	%ymm8,%ymm5,%ymm5
   6173 	vpsrld	$25,%ymm4,%ymm8
   6174 	vpslld	$32-25,%ymm4,%ymm4
   6175 	vpxor	%ymm8,%ymm4,%ymm4
   6176 	vmovdqa	128(%rbp),%ymm8
   6177 	vpalignr	$4,%ymm7,%ymm7,%ymm7
   6178 	vpalignr	$8,%ymm11,%ymm11,%ymm11
   6179 	vpalignr	$12,%ymm15,%ymm15,%ymm15
   6180 	vpalignr	$4,%ymm6,%ymm6,%ymm6
   6181 	vpalignr	$8,%ymm10,%ymm10,%ymm10
   6182 	vpalignr	$12,%ymm14,%ymm14,%ymm14
   6183 	vpalignr	$4,%ymm5,%ymm5,%ymm5
   6184 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   6185 	vpalignr	$12,%ymm13,%ymm13,%ymm13
   6186 	vpalignr	$4,%ymm4,%ymm4,%ymm4
   6187 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   6188 	vpalignr	$12,%ymm12,%ymm12,%ymm12
   6189 	vmovdqa	%ymm8,128(%rbp)
   6190 	vmovdqa	.rol16(%rip),%ymm8
   6191 	vpaddd	%ymm7,%ymm3,%ymm3
   6192 	vpaddd	%ymm6,%ymm2,%ymm2
   6193 	vpaddd	%ymm5,%ymm1,%ymm1
   6194 	vpaddd	%ymm4,%ymm0,%ymm0
   6195 	vpxor	%ymm3,%ymm15,%ymm15
   6196 	vpxor	%ymm2,%ymm14,%ymm14
   6197 	vpxor	%ymm1,%ymm13,%ymm13
   6198 	vpxor	%ymm0,%ymm12,%ymm12
   6199 	vpshufb	%ymm8,%ymm15,%ymm15
   6200 	vpshufb	%ymm8,%ymm14,%ymm14
   6201 	vpshufb	%ymm8,%ymm13,%ymm13
   6202 	vpshufb	%ymm8,%ymm12,%ymm12
   6203 	vmovdqa	128(%rbp),%ymm8
   6204 	vpaddd	%ymm15,%ymm11,%ymm11
   6205 	vpaddd	%ymm14,%ymm10,%ymm10
   6206 	vpaddd	%ymm13,%ymm9,%ymm9
   6207 	vpaddd	%ymm12,%ymm8,%ymm8
   6208 	vpxor	%ymm11,%ymm7,%ymm7
   6209 	vpxor	%ymm10,%ymm6,%ymm6
   6210 	vpxor	%ymm9,%ymm5,%ymm5
   6211 	vpxor	%ymm8,%ymm4,%ymm4
   6212 	vmovdqa	%ymm8,128(%rbp)
   6213 	vpsrld	$20,%ymm7,%ymm8
   6214 	vpslld	$32-20,%ymm7,%ymm7
   6215 	vpxor	%ymm8,%ymm7,%ymm7
   6216 	vpsrld	$20,%ymm6,%ymm8
   6217 	vpslld	$32-20,%ymm6,%ymm6
   6218 	vpxor	%ymm8,%ymm6,%ymm6
   6219 	vpsrld	$20,%ymm5,%ymm8
   6220 	vpslld	$32-20,%ymm5,%ymm5
   6221 	vpxor	%ymm8,%ymm5,%ymm5
   6222 	vpsrld	$20,%ymm4,%ymm8
   6223 	vpslld	$32-20,%ymm4,%ymm4
   6224 	vpxor	%ymm8,%ymm4,%ymm4
   6225 	vmovdqa	.rol8(%rip),%ymm8
   6226 	vpaddd	%ymm7,%ymm3,%ymm3
   6227 	vpaddd	%ymm6,%ymm2,%ymm2
   6228 	vpaddd	%ymm5,%ymm1,%ymm1
   6229 	vpaddd	%ymm4,%ymm0,%ymm0
   6230 	vpxor	%ymm3,%ymm15,%ymm15
   6231 	vpxor	%ymm2,%ymm14,%ymm14
   6232 	vpxor	%ymm1,%ymm13,%ymm13
   6233 	vpxor	%ymm0,%ymm12,%ymm12
   6234 	vpshufb	%ymm8,%ymm15,%ymm15
   6235 	vpshufb	%ymm8,%ymm14,%ymm14
   6236 	vpshufb	%ymm8,%ymm13,%ymm13
   6237 	vpshufb	%ymm8,%ymm12,%ymm12
   6238 	vmovdqa	128(%rbp),%ymm8
   6239 	vpaddd	%ymm15,%ymm11,%ymm11
   6240 	vpaddd	%ymm14,%ymm10,%ymm10
   6241 	vpaddd	%ymm13,%ymm9,%ymm9
   6242 	vpaddd	%ymm12,%ymm8,%ymm8
   6243 	vpxor	%ymm11,%ymm7,%ymm7
   6244 	vpxor	%ymm10,%ymm6,%ymm6
   6245 	vpxor	%ymm9,%ymm5,%ymm5
   6246 	vpxor	%ymm8,%ymm4,%ymm4
   6247 	vmovdqa	%ymm8,128(%rbp)
   6248 	vpsrld	$25,%ymm7,%ymm8
   6249 	vpslld	$32-25,%ymm7,%ymm7
   6250 	vpxor	%ymm8,%ymm7,%ymm7
   6251 	vpsrld	$25,%ymm6,%ymm8
   6252 	vpslld	$32-25,%ymm6,%ymm6
   6253 	vpxor	%ymm8,%ymm6,%ymm6
   6254 	vpsrld	$25,%ymm5,%ymm8
   6255 	vpslld	$32-25,%ymm5,%ymm5
   6256 	vpxor	%ymm8,%ymm5,%ymm5
   6257 	vpsrld	$25,%ymm4,%ymm8
   6258 	vpslld	$32-25,%ymm4,%ymm4
   6259 	vpxor	%ymm8,%ymm4,%ymm4
   6260 	vmovdqa	128(%rbp),%ymm8
   6261 	vpalignr	$12,%ymm7,%ymm7,%ymm7
   6262 	vpalignr	$8,%ymm11,%ymm11,%ymm11
   6263 	vpalignr	$4,%ymm15,%ymm15,%ymm15
   6264 	vpalignr	$12,%ymm6,%ymm6,%ymm6
   6265 	vpalignr	$8,%ymm10,%ymm10,%ymm10
   6266 	vpalignr	$4,%ymm14,%ymm14,%ymm14
   6267 	vpalignr	$12,%ymm5,%ymm5,%ymm5
   6268 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   6269 	vpalignr	$4,%ymm13,%ymm13,%ymm13
   6270 	vpalignr	$12,%ymm4,%ymm4,%ymm4
   6271 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   6272 	vpalignr	$4,%ymm12,%ymm12,%ymm12
   6273 
   6274 	decq	%r10
   6275 	jnz	1b
   6276 	vpaddd	.chacha20_consts(%rip),%ymm3,%ymm3
   6277 	vpaddd	64(%rbp),%ymm7,%ymm7
   6278 	vpaddd	96(%rbp),%ymm11,%ymm11
   6279 	vpaddd	256(%rbp),%ymm15,%ymm15
   6280 	vpaddd	.chacha20_consts(%rip),%ymm2,%ymm2
   6281 	vpaddd	64(%rbp),%ymm6,%ymm6
   6282 	vpaddd	96(%rbp),%ymm10,%ymm10
   6283 	vpaddd	224(%rbp),%ymm14,%ymm14
   6284 	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
   6285 	vpaddd	64(%rbp),%ymm5,%ymm5
   6286 	vpaddd	96(%rbp),%ymm9,%ymm9
   6287 	vpaddd	192(%rbp),%ymm13,%ymm13
   6288 	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
   6289 	vpaddd	64(%rbp),%ymm4,%ymm4
   6290 	vpaddd	96(%rbp),%ymm8,%ymm8
   6291 	vpaddd	160(%rbp),%ymm12,%ymm12
   6292 
   6293 	vperm2i128	$0x13,%ymm11,%ymm15,%ymm11
   6294 	vperm2i128	$0x02,%ymm3,%ymm7,%ymm15
   6295 	vperm2i128	$0x13,%ymm3,%ymm7,%ymm3
   6296 	vpand	.clamp(%rip),%ymm15,%ymm15
   6297 	vmovdqa	%ymm15,0(%rbp)
   6298 	movq	%r8,%r8
   6299 	call	poly_hash_ad_internal
   6300 
   6301 	vpxor	0(%rsi),%ymm3,%ymm3
   6302 	vpxor	32(%rsi),%ymm11,%ymm11
   6303 	vmovdqu	%ymm3,0(%rdi)
   6304 	vmovdqu	%ymm11,32(%rdi)
   6305 	vperm2i128	$0x02,%ymm2,%ymm6,%ymm15
   6306 	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
   6307 	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
   6308 	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
   6309 	vpxor	0+64(%rsi),%ymm15,%ymm15
   6310 	vpxor	32+64(%rsi),%ymm2,%ymm2
   6311 	vpxor	64+64(%rsi),%ymm6,%ymm6
   6312 	vpxor	96+64(%rsi),%ymm10,%ymm10
   6313 	vmovdqu	%ymm15,0+64(%rdi)
   6314 	vmovdqu	%ymm2,32+64(%rdi)
   6315 	vmovdqu	%ymm6,64+64(%rdi)
   6316 	vmovdqu	%ymm10,96+64(%rdi)
   6317 	vperm2i128	$0x02,%ymm1,%ymm5,%ymm15
   6318 	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
   6319 	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
   6320 	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
   6321 	vpxor	0+192(%rsi),%ymm15,%ymm15
   6322 	vpxor	32+192(%rsi),%ymm1,%ymm1
   6323 	vpxor	64+192(%rsi),%ymm5,%ymm5
   6324 	vpxor	96+192(%rsi),%ymm9,%ymm9
   6325 	vmovdqu	%ymm15,0+192(%rdi)
   6326 	vmovdqu	%ymm1,32+192(%rdi)
   6327 	vmovdqu	%ymm5,64+192(%rdi)
   6328 	vmovdqu	%ymm9,96+192(%rdi)
   6329 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm15
   6330 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
   6331 	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
   6332 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
   6333 	vmovdqa	%ymm15,%ymm8
   6334 
   6335 	leaq	320(%rsi),%rsi
   6336 	subq	$320,%rbx
   6337 	movq	$320,%rcx
   6338 	cmpq	$128,%rbx
   6339 	jbe	seal_avx2_hash
   6340 	vpxor	0(%rsi),%ymm0,%ymm0
   6341 	vpxor	32(%rsi),%ymm4,%ymm4
   6342 	vpxor	64(%rsi),%ymm8,%ymm8
   6343 	vpxor	96(%rsi),%ymm12,%ymm12
   6344 	vmovdqu	%ymm0,320(%rdi)
   6345 	vmovdqu	%ymm4,352(%rdi)
   6346 	vmovdqu	%ymm8,384(%rdi)
   6347 	vmovdqu	%ymm12,416(%rdi)
   6348 	leaq	128(%rsi),%rsi
   6349 	subq	$128,%rbx
   6350 	movq	$8,%rcx
   6351 	movq	$2,%r8
   6352 	cmpq	$128,%rbx
   6353 	jbe	seal_avx2_tail_128
   6354 	cmpq	$256,%rbx
   6355 	jbe	seal_avx2_tail_256
   6356 	cmpq	$384,%rbx
   6357 	jbe	seal_avx2_tail_384
   6358 	cmpq	$512,%rbx
   6359 	jbe	seal_avx2_tail_512
   6360 	vmovdqa	.chacha20_consts(%rip),%ymm0
   6361 	vmovdqa	64(%rbp),%ymm4
   6362 	vmovdqa	96(%rbp),%ymm8
   6363 	vmovdqa	%ymm0,%ymm1
   6364 	vmovdqa	%ymm4,%ymm5
   6365 	vmovdqa	%ymm8,%ymm9
   6366 	vmovdqa	%ymm0,%ymm2
   6367 	vmovdqa	%ymm4,%ymm6
   6368 	vmovdqa	%ymm8,%ymm10
   6369 	vmovdqa	%ymm0,%ymm3
   6370 	vmovdqa	%ymm4,%ymm7
   6371 	vmovdqa	%ymm8,%ymm11
   6372 	vmovdqa	.avx2_inc(%rip),%ymm12
   6373 	vpaddd	160(%rbp),%ymm12,%ymm15
   6374 	vpaddd	%ymm15,%ymm12,%ymm14
   6375 	vpaddd	%ymm14,%ymm12,%ymm13
   6376 	vpaddd	%ymm13,%ymm12,%ymm12
   6377 	vmovdqa	%ymm15,256(%rbp)
   6378 	vmovdqa	%ymm14,224(%rbp)
   6379 	vmovdqa	%ymm13,192(%rbp)
   6380 	vmovdqa	%ymm12,160(%rbp)
   6381 	vmovdqa	%ymm8,128(%rbp)
   6382 	vmovdqa	.rol16(%rip),%ymm8
   6383 	vpaddd	%ymm7,%ymm3,%ymm3
   6384 	vpaddd	%ymm6,%ymm2,%ymm2
   6385 	vpaddd	%ymm5,%ymm1,%ymm1
   6386 	vpaddd	%ymm4,%ymm0,%ymm0
   6387 	vpxor	%ymm3,%ymm15,%ymm15
   6388 	vpxor	%ymm2,%ymm14,%ymm14
   6389 	vpxor	%ymm1,%ymm13,%ymm13
   6390 	vpxor	%ymm0,%ymm12,%ymm12
   6391 	vpshufb	%ymm8,%ymm15,%ymm15
   6392 	vpshufb	%ymm8,%ymm14,%ymm14
   6393 	vpshufb	%ymm8,%ymm13,%ymm13
   6394 	vpshufb	%ymm8,%ymm12,%ymm12
   6395 	vmovdqa	128(%rbp),%ymm8
   6396 	vpaddd	%ymm15,%ymm11,%ymm11
   6397 	vpaddd	%ymm14,%ymm10,%ymm10
   6398 	vpaddd	%ymm13,%ymm9,%ymm9
   6399 	vpaddd	%ymm12,%ymm8,%ymm8
   6400 	vpxor	%ymm11,%ymm7,%ymm7
   6401 	vpxor	%ymm10,%ymm6,%ymm6
   6402 	vpxor	%ymm9,%ymm5,%ymm5
   6403 	vpxor	%ymm8,%ymm4,%ymm4
   6404 	vmovdqa	%ymm8,128(%rbp)
   6405 	vpsrld	$20,%ymm7,%ymm8
   6406 	vpslld	$32-20,%ymm7,%ymm7
   6407 	vpxor	%ymm8,%ymm7,%ymm7
   6408 	vpsrld	$20,%ymm6,%ymm8
   6409 	vpslld	$32-20,%ymm6,%ymm6
   6410 	vpxor	%ymm8,%ymm6,%ymm6
   6411 	vpsrld	$20,%ymm5,%ymm8
   6412 	vpslld	$32-20,%ymm5,%ymm5
   6413 	vpxor	%ymm8,%ymm5,%ymm5
   6414 	vpsrld	$20,%ymm4,%ymm8
   6415 	vpslld	$32-20,%ymm4,%ymm4
   6416 	vpxor	%ymm8,%ymm4,%ymm4
   6417 	vmovdqa	.rol8(%rip),%ymm8
   6418 	vpaddd	%ymm7,%ymm3,%ymm3
   6419 	vpaddd	%ymm6,%ymm2,%ymm2
   6420 	vpaddd	%ymm5,%ymm1,%ymm1
   6421 	vpaddd	%ymm4,%ymm0,%ymm0
   6422 	vpxor	%ymm3,%ymm15,%ymm15
   6423 	vpxor	%ymm2,%ymm14,%ymm14
   6424 	vpxor	%ymm1,%ymm13,%ymm13
   6425 	vpxor	%ymm0,%ymm12,%ymm12
   6426 	vpshufb	%ymm8,%ymm15,%ymm15
   6427 	vpshufb	%ymm8,%ymm14,%ymm14
   6428 	vpshufb	%ymm8,%ymm13,%ymm13
   6429 	vpshufb	%ymm8,%ymm12,%ymm12
   6430 	vmovdqa	128(%rbp),%ymm8
   6431 	vpaddd	%ymm15,%ymm11,%ymm11
   6432 	vpaddd	%ymm14,%ymm10,%ymm10
   6433 	vpaddd	%ymm13,%ymm9,%ymm9
   6434 	vpaddd	%ymm12,%ymm8,%ymm8
   6435 	vpxor	%ymm11,%ymm7,%ymm7
   6436 	vpxor	%ymm10,%ymm6,%ymm6
   6437 	vpxor	%ymm9,%ymm5,%ymm5
   6438 	vpxor	%ymm8,%ymm4,%ymm4
   6439 	vmovdqa	%ymm8,128(%rbp)
   6440 	vpsrld	$25,%ymm7,%ymm8
   6441 	vpslld	$32-25,%ymm7,%ymm7
   6442 	vpxor	%ymm8,%ymm7,%ymm7
   6443 	vpsrld	$25,%ymm6,%ymm8
   6444 	vpslld	$32-25,%ymm6,%ymm6
   6445 	vpxor	%ymm8,%ymm6,%ymm6
   6446 	vpsrld	$25,%ymm5,%ymm8
   6447 	vpslld	$32-25,%ymm5,%ymm5
   6448 	vpxor	%ymm8,%ymm5,%ymm5
   6449 	vpsrld	$25,%ymm4,%ymm8
   6450 	vpslld	$32-25,%ymm4,%ymm4
   6451 	vpxor	%ymm8,%ymm4,%ymm4
   6452 	vmovdqa	128(%rbp),%ymm8
   6453 	vpalignr	$4,%ymm7,%ymm7,%ymm7
   6454 	vpalignr	$8,%ymm11,%ymm11,%ymm11
   6455 	vpalignr	$12,%ymm15,%ymm15,%ymm15
   6456 	vpalignr	$4,%ymm6,%ymm6,%ymm6
   6457 	vpalignr	$8,%ymm10,%ymm10,%ymm10
   6458 	vpalignr	$12,%ymm14,%ymm14,%ymm14
   6459 	vpalignr	$4,%ymm5,%ymm5,%ymm5
   6460 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   6461 	vpalignr	$12,%ymm13,%ymm13,%ymm13
   6462 	vpalignr	$4,%ymm4,%ymm4,%ymm4
   6463 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   6464 	vpalignr	$12,%ymm12,%ymm12,%ymm12
   6465 	vmovdqa	%ymm8,128(%rbp)
   6466 	vmovdqa	.rol16(%rip),%ymm8
   6467 	vpaddd	%ymm7,%ymm3,%ymm3
   6468 	vpaddd	%ymm6,%ymm2,%ymm2
   6469 	vpaddd	%ymm5,%ymm1,%ymm1
   6470 	vpaddd	%ymm4,%ymm0,%ymm0
   6471 	vpxor	%ymm3,%ymm15,%ymm15
   6472 	vpxor	%ymm2,%ymm14,%ymm14
   6473 	vpxor	%ymm1,%ymm13,%ymm13
   6474 	vpxor	%ymm0,%ymm12,%ymm12
   6475 	vpshufb	%ymm8,%ymm15,%ymm15
   6476 	vpshufb	%ymm8,%ymm14,%ymm14
   6477 	vpshufb	%ymm8,%ymm13,%ymm13
   6478 	vpshufb	%ymm8,%ymm12,%ymm12
   6479 	vmovdqa	128(%rbp),%ymm8
   6480 	vpaddd	%ymm15,%ymm11,%ymm11
   6481 	vpaddd	%ymm14,%ymm10,%ymm10
   6482 	vpaddd	%ymm13,%ymm9,%ymm9
   6483 	vpaddd	%ymm12,%ymm8,%ymm8
   6484 	vpxor	%ymm11,%ymm7,%ymm7
   6485 	vpxor	%ymm10,%ymm6,%ymm6
   6486 	vpxor	%ymm9,%ymm5,%ymm5
   6487 	vpxor	%ymm8,%ymm4,%ymm4
   6488 	vmovdqa	%ymm8,128(%rbp)
   6489 	vpsrld	$20,%ymm7,%ymm8
   6490 	vpslld	$32-20,%ymm7,%ymm7
   6491 	vpxor	%ymm8,%ymm7,%ymm7
   6492 	vpsrld	$20,%ymm6,%ymm8
   6493 	vpslld	$32-20,%ymm6,%ymm6
   6494 	vpxor	%ymm8,%ymm6,%ymm6
   6495 	vpsrld	$20,%ymm5,%ymm8
   6496 	vpslld	$32-20,%ymm5,%ymm5
   6497 	vpxor	%ymm8,%ymm5,%ymm5
   6498 	vpsrld	$20,%ymm4,%ymm8
   6499 	vpslld	$32-20,%ymm4,%ymm4
   6500 	vpxor	%ymm8,%ymm4,%ymm4
   6501 	vmovdqa	.rol8(%rip),%ymm8
   6502 	vpaddd	%ymm7,%ymm3,%ymm3
   6503 	vpaddd	%ymm6,%ymm2,%ymm2
   6504 	vpaddd	%ymm5,%ymm1,%ymm1
   6505 	vpaddd	%ymm4,%ymm0,%ymm0
   6506 	vpxor	%ymm3,%ymm15,%ymm15
   6507 	vpxor	%ymm2,%ymm14,%ymm14
   6508 	vpxor	%ymm1,%ymm13,%ymm13
   6509 	vpxor	%ymm0,%ymm12,%ymm12
   6510 	vpshufb	%ymm8,%ymm15,%ymm15
   6511 	vpshufb	%ymm8,%ymm14,%ymm14
   6512 	vpshufb	%ymm8,%ymm13,%ymm13
   6513 	vpshufb	%ymm8,%ymm12,%ymm12
   6514 	vmovdqa	128(%rbp),%ymm8
   6515 	vpaddd	%ymm15,%ymm11,%ymm11
   6516 	vpaddd	%ymm14,%ymm10,%ymm10
   6517 	vpaddd	%ymm13,%ymm9,%ymm9
   6518 	vpaddd	%ymm12,%ymm8,%ymm8
   6519 	vpxor	%ymm11,%ymm7,%ymm7
   6520 	vpxor	%ymm10,%ymm6,%ymm6
   6521 	vpxor	%ymm9,%ymm5,%ymm5
   6522 	vpxor	%ymm8,%ymm4,%ymm4
   6523 	vmovdqa	%ymm8,128(%rbp)
   6524 	vpsrld	$25,%ymm7,%ymm8
   6525 	vpslld	$32-25,%ymm7,%ymm7
   6526 	vpxor	%ymm8,%ymm7,%ymm7
   6527 	vpsrld	$25,%ymm6,%ymm8
   6528 	vpslld	$32-25,%ymm6,%ymm6
   6529 	vpxor	%ymm8,%ymm6,%ymm6
   6530 	vpsrld	$25,%ymm5,%ymm8
   6531 	vpslld	$32-25,%ymm5,%ymm5
   6532 	vpxor	%ymm8,%ymm5,%ymm5
   6533 	vpsrld	$25,%ymm4,%ymm8
   6534 	vpslld	$32-25,%ymm4,%ymm4
   6535 	vpxor	%ymm8,%ymm4,%ymm4
   6536 	vmovdqa	128(%rbp),%ymm8
   6537 	vpalignr	$12,%ymm7,%ymm7,%ymm7
   6538 	vpalignr	$8,%ymm11,%ymm11,%ymm11
   6539 	vpalignr	$4,%ymm15,%ymm15,%ymm15
   6540 	vpalignr	$12,%ymm6,%ymm6,%ymm6
   6541 	vpalignr	$8,%ymm10,%ymm10,%ymm10
   6542 	vpalignr	$4,%ymm14,%ymm14,%ymm14
   6543 	vpalignr	$12,%ymm5,%ymm5,%ymm5
   6544 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   6545 	vpalignr	$4,%ymm13,%ymm13,%ymm13
   6546 	vpalignr	$12,%ymm4,%ymm4,%ymm4
   6547 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   6548 	vpalignr	$4,%ymm12,%ymm12,%ymm12
   6549 	vmovdqa	%ymm8,128(%rbp)
   6550 	vmovdqa	.rol16(%rip),%ymm8
   6551 	vpaddd	%ymm7,%ymm3,%ymm3
   6552 	vpaddd	%ymm6,%ymm2,%ymm2
   6553 	vpaddd	%ymm5,%ymm1,%ymm1
   6554 	vpaddd	%ymm4,%ymm0,%ymm0
   6555 	vpxor	%ymm3,%ymm15,%ymm15
   6556 	vpxor	%ymm2,%ymm14,%ymm14
   6557 	vpxor	%ymm1,%ymm13,%ymm13
   6558 	vpxor	%ymm0,%ymm12,%ymm12
   6559 	vpshufb	%ymm8,%ymm15,%ymm15
   6560 	vpshufb	%ymm8,%ymm14,%ymm14
   6561 	vpshufb	%ymm8,%ymm13,%ymm13
   6562 	vpshufb	%ymm8,%ymm12,%ymm12
   6563 	vmovdqa	128(%rbp),%ymm8
   6564 	vpaddd	%ymm15,%ymm11,%ymm11
   6565 	vpaddd	%ymm14,%ymm10,%ymm10
   6566 	vpaddd	%ymm13,%ymm9,%ymm9
   6567 	vpaddd	%ymm12,%ymm8,%ymm8
   6568 	vpxor	%ymm11,%ymm7,%ymm7
   6569 	vpxor	%ymm10,%ymm6,%ymm6
   6570 	vpxor	%ymm9,%ymm5,%ymm5
   6571 	vpxor	%ymm8,%ymm4,%ymm4
   6572 	vmovdqa	%ymm8,128(%rbp)
   6573 	vpsrld	$20,%ymm7,%ymm8
   6574 	vpslld	$32-20,%ymm7,%ymm7
   6575 	vpxor	%ymm8,%ymm7,%ymm7
   6576 	vpsrld	$20,%ymm6,%ymm8
   6577 	vpslld	$32-20,%ymm6,%ymm6
   6578 	vpxor	%ymm8,%ymm6,%ymm6
   6579 	vpsrld	$20,%ymm5,%ymm8
   6580 	vpslld	$32-20,%ymm5,%ymm5
   6581 	vpxor	%ymm8,%ymm5,%ymm5
   6582 	vpsrld	$20,%ymm4,%ymm8
   6583 	vpslld	$32-20,%ymm4,%ymm4
   6584 	vpxor	%ymm8,%ymm4,%ymm4
   6585 	vmovdqa	.rol8(%rip),%ymm8
   6586 	vpaddd	%ymm7,%ymm3,%ymm3
   6587 	vpaddd	%ymm6,%ymm2,%ymm2
   6588 	vpaddd	%ymm5,%ymm1,%ymm1
   6589 	vpaddd	%ymm4,%ymm0,%ymm0
   6590 
   6591 	subq	$16,%rdi
   6592 	movq	$9,%rcx
   6593 	jmp	4f
   6594 1:
   6595 	vmovdqa	.chacha20_consts(%rip),%ymm0
   6596 	vmovdqa	64(%rbp),%ymm4
   6597 	vmovdqa	96(%rbp),%ymm8
   6598 	vmovdqa	%ymm0,%ymm1
   6599 	vmovdqa	%ymm4,%ymm5
   6600 	vmovdqa	%ymm8,%ymm9
   6601 	vmovdqa	%ymm0,%ymm2
   6602 	vmovdqa	%ymm4,%ymm6
   6603 	vmovdqa	%ymm8,%ymm10
   6604 	vmovdqa	%ymm0,%ymm3
   6605 	vmovdqa	%ymm4,%ymm7
   6606 	vmovdqa	%ymm8,%ymm11
   6607 	vmovdqa	.avx2_inc(%rip),%ymm12
   6608 	vpaddd	160(%rbp),%ymm12,%ymm15
   6609 	vpaddd	%ymm15,%ymm12,%ymm14
   6610 	vpaddd	%ymm14,%ymm12,%ymm13
   6611 	vpaddd	%ymm13,%ymm12,%ymm12
   6612 	vmovdqa	%ymm15,256(%rbp)
   6613 	vmovdqa	%ymm14,224(%rbp)
   6614 	vmovdqa	%ymm13,192(%rbp)
   6615 	vmovdqa	%ymm12,160(%rbp)
   6616 
   6617 	movq	$10,%rcx
   6618 2:
   6619 	addq	0(%rdi),%r10
   6620 	adcq	8+0(%rdi),%r11
   6621 	adcq	$1,%r12
   6622 	vmovdqa	%ymm8,128(%rbp)
   6623 	vmovdqa	.rol16(%rip),%ymm8
   6624 	vpaddd	%ymm7,%ymm3,%ymm3
   6625 	vpaddd	%ymm6,%ymm2,%ymm2
   6626 	vpaddd	%ymm5,%ymm1,%ymm1
   6627 	vpaddd	%ymm4,%ymm0,%ymm0
   6628 	vpxor	%ymm3,%ymm15,%ymm15
   6629 	vpxor	%ymm2,%ymm14,%ymm14
   6630 	vpxor	%ymm1,%ymm13,%ymm13
   6631 	vpxor	%ymm0,%ymm12,%ymm12
   6632 	movq	0+0(%rbp),%rdx
   6633 	movq	%rdx,%r15
   6634 	mulxq	%r10,%r13,%r14
   6635 	mulxq	%r11,%rax,%rdx
   6636 	imulq	%r12,%r15
   6637 	addq	%rax,%r14
   6638 	adcq	%rdx,%r15
   6639 	vpshufb	%ymm8,%ymm15,%ymm15
   6640 	vpshufb	%ymm8,%ymm14,%ymm14
   6641 	vpshufb	%ymm8,%ymm13,%ymm13
   6642 	vpshufb	%ymm8,%ymm12,%ymm12
   6643 	vmovdqa	128(%rbp),%ymm8
   6644 	vpaddd	%ymm15,%ymm11,%ymm11
   6645 	vpaddd	%ymm14,%ymm10,%ymm10
   6646 	vpaddd	%ymm13,%ymm9,%ymm9
   6647 	vpaddd	%ymm12,%ymm8,%ymm8
   6648 	movq	8+0(%rbp),%rdx
   6649 	mulxq	%r10,%r10,%rax
   6650 	addq	%r10,%r14
   6651 	mulxq	%r11,%r11,%r9
   6652 	adcq	%r11,%r15
   6653 	adcq	$0,%r9
   6654 	imulq	%r12,%rdx
   6655 	vpxor	%ymm11,%ymm7,%ymm7
   6656 	vpxor	%ymm10,%ymm6,%ymm6
   6657 	vpxor	%ymm9,%ymm5,%ymm5
   6658 	vpxor	%ymm8,%ymm4,%ymm4
   6659 	vmovdqa	%ymm8,128(%rbp)
   6660 	vpsrld	$20,%ymm7,%ymm8
   6661 	vpslld	$32-20,%ymm7,%ymm7
   6662 	vpxor	%ymm8,%ymm7,%ymm7
   6663 	vpsrld	$20,%ymm6,%ymm8
   6664 	vpslld	$32-20,%ymm6,%ymm6
   6665 	vpxor	%ymm8,%ymm6,%ymm6
   6666 	vpsrld	$20,%ymm5,%ymm8
   6667 	addq	%rax,%r15
   6668 	adcq	%rdx,%r9
   6669 	vpslld	$32-20,%ymm5,%ymm5
   6670 	vpxor	%ymm8,%ymm5,%ymm5
   6671 	vpsrld	$20,%ymm4,%ymm8
   6672 	vpslld	$32-20,%ymm4,%ymm4
   6673 	vpxor	%ymm8,%ymm4,%ymm4
   6674 	vmovdqa	.rol8(%rip),%ymm8
   6675 	vpaddd	%ymm7,%ymm3,%ymm3
   6676 	vpaddd	%ymm6,%ymm2,%ymm2
   6677 	vpaddd	%ymm5,%ymm1,%ymm1
   6678 	vpaddd	%ymm4,%ymm0,%ymm0
   6679 	movq	%r13,%r10
   6680 	movq	%r14,%r11
   6681 	movq	%r15,%r12
   6682 	andq	$3,%r12
   6683 	movq	%r15,%r13
   6684 	andq	$-4,%r13
   6685 	movq	%r9,%r14
   6686 	shrdq	$2,%r9,%r15
   6687 	shrq	$2,%r9
   6688 	addq	%r13,%r10
   6689 	adcq	%r14,%r11
   6690 	adcq	$0,%r12
   6691 	addq	%r15,%r10
   6692 	adcq	%r9,%r11
   6693 	adcq	$0,%r12
   6694 
   6695 4:
   6696 	vpxor	%ymm3,%ymm15,%ymm15
   6697 	vpxor	%ymm2,%ymm14,%ymm14
   6698 	vpxor	%ymm1,%ymm13,%ymm13
   6699 	vpxor	%ymm0,%ymm12,%ymm12
   6700 	vpshufb	%ymm8,%ymm15,%ymm15
   6701 	vpshufb	%ymm8,%ymm14,%ymm14
   6702 	vpshufb	%ymm8,%ymm13,%ymm13
   6703 	vpshufb	%ymm8,%ymm12,%ymm12
   6704 	vmovdqa	128(%rbp),%ymm8
   6705 	addq	16(%rdi),%r10
   6706 	adcq	8+16(%rdi),%r11
   6707 	adcq	$1,%r12
   6708 	vpaddd	%ymm15,%ymm11,%ymm11
   6709 	vpaddd	%ymm14,%ymm10,%ymm10
   6710 	vpaddd	%ymm13,%ymm9,%ymm9
   6711 	vpaddd	%ymm12,%ymm8,%ymm8
   6712 	vpxor	%ymm11,%ymm7,%ymm7
   6713 	vpxor	%ymm10,%ymm6,%ymm6
   6714 	vpxor	%ymm9,%ymm5,%ymm5
   6715 	vpxor	%ymm8,%ymm4,%ymm4
   6716 	movq	0+0(%rbp),%rdx
   6717 	movq	%rdx,%r15
   6718 	mulxq	%r10,%r13,%r14
   6719 	mulxq	%r11,%rax,%rdx
   6720 	imulq	%r12,%r15
   6721 	addq	%rax,%r14
   6722 	adcq	%rdx,%r15
   6723 	vmovdqa	%ymm8,128(%rbp)
   6724 	vpsrld	$25,%ymm7,%ymm8
   6725 	vpslld	$32-25,%ymm7,%ymm7
   6726 	vpxor	%ymm8,%ymm7,%ymm7
   6727 	vpsrld	$25,%ymm6,%ymm8
   6728 	vpslld	$32-25,%ymm6,%ymm6
   6729 	vpxor	%ymm8,%ymm6,%ymm6
   6730 	vpsrld	$25,%ymm5,%ymm8
   6731 	vpslld	$32-25,%ymm5,%ymm5
   6732 	vpxor	%ymm8,%ymm5,%ymm5
   6733 	vpsrld	$25,%ymm4,%ymm8
   6734 	vpslld	$32-25,%ymm4,%ymm4
   6735 	vpxor	%ymm8,%ymm4,%ymm4
   6736 	vmovdqa	128(%rbp),%ymm8
   6737 	vpalignr	$4,%ymm7,%ymm7,%ymm7
   6738 	vpalignr	$8,%ymm11,%ymm11,%ymm11
   6739 	vpalignr	$12,%ymm15,%ymm15,%ymm15
   6740 	vpalignr	$4,%ymm6,%ymm6,%ymm6
   6741 	movq	8+0(%rbp),%rdx
   6742 	mulxq	%r10,%r10,%rax
   6743 	addq	%r10,%r14
   6744 	mulxq	%r11,%r11,%r9
   6745 	adcq	%r11,%r15
   6746 	adcq	$0,%r9
   6747 	imulq	%r12,%rdx
   6748 	vpalignr	$8,%ymm10,%ymm10,%ymm10
   6749 	vpalignr	$12,%ymm14,%ymm14,%ymm14
   6750 	vpalignr	$4,%ymm5,%ymm5,%ymm5
   6751 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   6752 	vpalignr	$12,%ymm13,%ymm13,%ymm13
   6753 	vpalignr	$4,%ymm4,%ymm4,%ymm4
   6754 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   6755 	vpalignr	$12,%ymm12,%ymm12,%ymm12
   6756 	vmovdqa	%ymm8,128(%rbp)
   6757 	vmovdqa	.rol16(%rip),%ymm8
   6758 	vpaddd	%ymm7,%ymm3,%ymm3
   6759 	vpaddd	%ymm6,%ymm2,%ymm2
   6760 	vpaddd	%ymm5,%ymm1,%ymm1
   6761 	vpaddd	%ymm4,%ymm0,%ymm0
   6762 	vpxor	%ymm3,%ymm15,%ymm15
   6763 	vpxor	%ymm2,%ymm14,%ymm14
   6764 	vpxor	%ymm1,%ymm13,%ymm13
   6765 	vpxor	%ymm0,%ymm12,%ymm12
   6766 	addq	%rax,%r15
   6767 	adcq	%rdx,%r9
   6768 	vpshufb	%ymm8,%ymm15,%ymm15
   6769 	vpshufb	%ymm8,%ymm14,%ymm14
   6770 	vpshufb	%ymm8,%ymm13,%ymm13
   6771 	vpshufb	%ymm8,%ymm12,%ymm12
   6772 	vmovdqa	128(%rbp),%ymm8
   6773 	vpaddd	%ymm15,%ymm11,%ymm11
   6774 	vpaddd	%ymm14,%ymm10,%ymm10
   6775 	vpaddd	%ymm13,%ymm9,%ymm9
   6776 	vpaddd	%ymm12,%ymm8,%ymm8
   6777 	movq	%r13,%r10
   6778 	movq	%r14,%r11
   6779 	movq	%r15,%r12
   6780 	andq	$3,%r12
   6781 	movq	%r15,%r13
   6782 	andq	$-4,%r13
   6783 	movq	%r9,%r14
   6784 	shrdq	$2,%r9,%r15
   6785 	shrq	$2,%r9
   6786 	addq	%r13,%r10
   6787 	adcq	%r14,%r11
   6788 	adcq	$0,%r12
   6789 	addq	%r15,%r10
   6790 	adcq	%r9,%r11
   6791 	adcq	$0,%r12
   6792 	vpxor	%ymm11,%ymm7,%ymm7
   6793 	vpxor	%ymm10,%ymm6,%ymm6
   6794 	vpxor	%ymm9,%ymm5,%ymm5
   6795 	vpxor	%ymm8,%ymm4,%ymm4
   6796 	vmovdqa	%ymm8,128(%rbp)
   6797 	vpsrld	$20,%ymm7,%ymm8
   6798 	vpslld	$32-20,%ymm7,%ymm7
   6799 	vpxor	%ymm8,%ymm7,%ymm7
   6800 	addq	32(%rdi),%r10
   6801 	adcq	8+32(%rdi),%r11
   6802 	adcq	$1,%r12
   6803 
   6804 	leaq	48(%rdi),%rdi
   6805 	vpsrld	$20,%ymm6,%ymm8
   6806 	vpslld	$32-20,%ymm6,%ymm6
   6807 	vpxor	%ymm8,%ymm6,%ymm6
   6808 	vpsrld	$20,%ymm5,%ymm8
   6809 	vpslld	$32-20,%ymm5,%ymm5
   6810 	vpxor	%ymm8,%ymm5,%ymm5
   6811 	vpsrld	$20,%ymm4,%ymm8
   6812 	vpslld	$32-20,%ymm4,%ymm4
   6813 	vpxor	%ymm8,%ymm4,%ymm4
   6814 	vmovdqa	.rol8(%rip),%ymm8
   6815 	vpaddd	%ymm7,%ymm3,%ymm3
   6816 	vpaddd	%ymm6,%ymm2,%ymm2
   6817 	vpaddd	%ymm5,%ymm1,%ymm1
   6818 	vpaddd	%ymm4,%ymm0,%ymm0
   6819 	vpxor	%ymm3,%ymm15,%ymm15
   6820 	vpxor	%ymm2,%ymm14,%ymm14
   6821 	vpxor	%ymm1,%ymm13,%ymm13
   6822 	vpxor	%ymm0,%ymm12,%ymm12
   6823 	movq	0+0(%rbp),%rdx
   6824 	movq	%rdx,%r15
   6825 	mulxq	%r10,%r13,%r14
   6826 	mulxq	%r11,%rax,%rdx
   6827 	imulq	%r12,%r15
   6828 	addq	%rax,%r14
   6829 	adcq	%rdx,%r15
   6830 	vpshufb	%ymm8,%ymm15,%ymm15
   6831 	vpshufb	%ymm8,%ymm14,%ymm14
   6832 	vpshufb	%ymm8,%ymm13,%ymm13
   6833 	vpshufb	%ymm8,%ymm12,%ymm12
   6834 	vmovdqa	128(%rbp),%ymm8
   6835 	vpaddd	%ymm15,%ymm11,%ymm11
   6836 	vpaddd	%ymm14,%ymm10,%ymm10
   6837 	vpaddd	%ymm13,%ymm9,%ymm9
   6838 	movq	8+0(%rbp),%rdx
   6839 	mulxq	%r10,%r10,%rax
   6840 	addq	%r10,%r14
   6841 	mulxq	%r11,%r11,%r9
   6842 	adcq	%r11,%r15
   6843 	adcq	$0,%r9
   6844 	imulq	%r12,%rdx
   6845 	vpaddd	%ymm12,%ymm8,%ymm8
   6846 	vpxor	%ymm11,%ymm7,%ymm7
   6847 	vpxor	%ymm10,%ymm6,%ymm6
   6848 	vpxor	%ymm9,%ymm5,%ymm5
   6849 	vpxor	%ymm8,%ymm4,%ymm4
   6850 	vmovdqa	%ymm8,128(%rbp)
   6851 	vpsrld	$25,%ymm7,%ymm8
   6852 	vpslld	$32-25,%ymm7,%ymm7
   6853 	addq	%rax,%r15
   6854 	adcq	%rdx,%r9
   6855 	vpxor	%ymm8,%ymm7,%ymm7
   6856 	vpsrld	$25,%ymm6,%ymm8
   6857 	vpslld	$32-25,%ymm6,%ymm6
   6858 	vpxor	%ymm8,%ymm6,%ymm6
   6859 	vpsrld	$25,%ymm5,%ymm8
   6860 	vpslld	$32-25,%ymm5,%ymm5
   6861 	vpxor	%ymm8,%ymm5,%ymm5
   6862 	vpsrld	$25,%ymm4,%ymm8
   6863 	vpslld	$32-25,%ymm4,%ymm4
   6864 	vpxor	%ymm8,%ymm4,%ymm4
   6865 	vmovdqa	128(%rbp),%ymm8
   6866 	vpalignr	$12,%ymm7,%ymm7,%ymm7
   6867 	vpalignr	$8,%ymm11,%ymm11,%ymm11
   6868 	vpalignr	$4,%ymm15,%ymm15,%ymm15
   6869 	vpalignr	$12,%ymm6,%ymm6,%ymm6
   6870 	vpalignr	$8,%ymm10,%ymm10,%ymm10
   6871 	vpalignr	$4,%ymm14,%ymm14,%ymm14
   6872 	vpalignr	$12,%ymm5,%ymm5,%ymm5
   6873 	movq	%r13,%r10
   6874 	movq	%r14,%r11
   6875 	movq	%r15,%r12
   6876 	andq	$3,%r12
   6877 	movq	%r15,%r13
   6878 	andq	$-4,%r13
   6879 	movq	%r9,%r14
   6880 	shrdq	$2,%r9,%r15
   6881 	shrq	$2,%r9
   6882 	addq	%r13,%r10
   6883 	adcq	%r14,%r11
   6884 	adcq	$0,%r12
   6885 	addq	%r15,%r10
   6886 	adcq	%r9,%r11
   6887 	adcq	$0,%r12
   6888 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   6889 	vpalignr	$4,%ymm13,%ymm13,%ymm13
   6890 	vpalignr	$12,%ymm4,%ymm4,%ymm4
   6891 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   6892 	vpalignr	$4,%ymm12,%ymm12,%ymm12
   6893 
   6894 	decq	%rcx
   6895 	jne	2b
   6896 	vpaddd	.chacha20_consts(%rip),%ymm3,%ymm3
   6897 	vpaddd	64(%rbp),%ymm7,%ymm7
   6898 	vpaddd	96(%rbp),%ymm11,%ymm11
   6899 	vpaddd	256(%rbp),%ymm15,%ymm15
   6900 	vpaddd	.chacha20_consts(%rip),%ymm2,%ymm2
   6901 	vpaddd	64(%rbp),%ymm6,%ymm6
   6902 	vpaddd	96(%rbp),%ymm10,%ymm10
   6903 	vpaddd	224(%rbp),%ymm14,%ymm14
   6904 	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
   6905 	vpaddd	64(%rbp),%ymm5,%ymm5
   6906 	vpaddd	96(%rbp),%ymm9,%ymm9
   6907 	vpaddd	192(%rbp),%ymm13,%ymm13
   6908 	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
   6909 	vpaddd	64(%rbp),%ymm4,%ymm4
   6910 	vpaddd	96(%rbp),%ymm8,%ymm8
   6911 	vpaddd	160(%rbp),%ymm12,%ymm12
   6912 
   6913 	leaq	32(%rdi),%rdi
   6914 	vmovdqa	%ymm0,128(%rbp)
   6915 	addq	-32(%rdi),%r10
   6916 	adcq	8+-32(%rdi),%r11
   6917 	adcq	$1,%r12
   6918 	vperm2i128	$0x02,%ymm3,%ymm7,%ymm0
   6919 	vperm2i128	$0x13,%ymm3,%ymm7,%ymm7
   6920 	vperm2i128	$0x02,%ymm11,%ymm15,%ymm3
   6921 	vperm2i128	$0x13,%ymm11,%ymm15,%ymm11
   6922 	vpxor	0+0(%rsi),%ymm0,%ymm0
   6923 	vpxor	32+0(%rsi),%ymm3,%ymm3
   6924 	vpxor	64+0(%rsi),%ymm7,%ymm7
   6925 	vpxor	96+0(%rsi),%ymm11,%ymm11
   6926 	vmovdqu	%ymm0,0+0(%rdi)
   6927 	vmovdqu	%ymm3,32+0(%rdi)
   6928 	vmovdqu	%ymm7,64+0(%rdi)
   6929 	vmovdqu	%ymm11,96+0(%rdi)
   6930 
   6931 	vmovdqa	128(%rbp),%ymm0
   6932 	movq	0+0(%rbp),%rax
   6933 	movq	%rax,%r15
   6934 	mulq	%r10
   6935 	movq	%rax,%r13
   6936 	movq	%rdx,%r14
   6937 	movq	0+0(%rbp),%rax
   6938 	mulq	%r11
   6939 	imulq	%r12,%r15
   6940 	addq	%rax,%r14
   6941 	adcq	%rdx,%r15
   6942 	movq	8+0(%rbp),%rax
   6943 	movq	%rax,%r9
   6944 	mulq	%r10
   6945 	addq	%rax,%r14
   6946 	adcq	$0,%rdx
   6947 	movq	%rdx,%r10
   6948 	movq	8+0(%rbp),%rax
   6949 	mulq	%r11
   6950 	addq	%rax,%r15
   6951 	adcq	$0,%rdx
   6952 	imulq	%r12,%r9
   6953 	addq	%r10,%r15
   6954 	adcq	%rdx,%r9
   6955 	movq	%r13,%r10
   6956 	movq	%r14,%r11
   6957 	movq	%r15,%r12
   6958 	andq	$3,%r12
   6959 	movq	%r15,%r13
   6960 	andq	$-4,%r13
   6961 	movq	%r9,%r14
   6962 	shrdq	$2,%r9,%r15
   6963 	shrq	$2,%r9
   6964 	addq	%r13,%r10
   6965 	adcq	%r14,%r11
   6966 	adcq	$0,%r12
   6967 	addq	%r15,%r10
   6968 	adcq	%r9,%r11
   6969 	adcq	$0,%r12
   6970 	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
   6971 	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
   6972 	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
   6973 	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
   6974 	vpxor	0+128(%rsi),%ymm3,%ymm3
   6975 	vpxor	32+128(%rsi),%ymm2,%ymm2
   6976 	vpxor	64+128(%rsi),%ymm6,%ymm6
   6977 	vpxor	96+128(%rsi),%ymm10,%ymm10
   6978 	vmovdqu	%ymm3,0+128(%rdi)
   6979 	vmovdqu	%ymm2,32+128(%rdi)
   6980 	vmovdqu	%ymm6,64+128(%rdi)
   6981 	vmovdqu	%ymm10,96+128(%rdi)
   6982 	addq	-16(%rdi),%r10
   6983 	adcq	8+-16(%rdi),%r11
   6984 	adcq	$1,%r12
   6985 	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
   6986 	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
   6987 	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
   6988 	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
   6989 	vpxor	0+256(%rsi),%ymm3,%ymm3
   6990 	vpxor	32+256(%rsi),%ymm1,%ymm1
   6991 	vpxor	64+256(%rsi),%ymm5,%ymm5
   6992 	vpxor	96+256(%rsi),%ymm9,%ymm9
   6993 	vmovdqu	%ymm3,0+256(%rdi)
   6994 	vmovdqu	%ymm1,32+256(%rdi)
   6995 	vmovdqu	%ymm5,64+256(%rdi)
   6996 	vmovdqu	%ymm9,96+256(%rdi)
   6997 	movq	0+0(%rbp),%rax
   6998 	movq	%rax,%r15
   6999 	mulq	%r10
   7000 	movq	%rax,%r13
   7001 	movq	%rdx,%r14
   7002 	movq	0+0(%rbp),%rax
   7003 	mulq	%r11
   7004 	imulq	%r12,%r15
   7005 	addq	%rax,%r14
   7006 	adcq	%rdx,%r15
   7007 	movq	8+0(%rbp),%rax
   7008 	movq	%rax,%r9
   7009 	mulq	%r10
   7010 	addq	%rax,%r14
   7011 	adcq	$0,%rdx
   7012 	movq	%rdx,%r10
   7013 	movq	8+0(%rbp),%rax
   7014 	mulq	%r11
   7015 	addq	%rax,%r15
   7016 	adcq	$0,%rdx
   7017 	imulq	%r12,%r9
   7018 	addq	%r10,%r15
   7019 	adcq	%rdx,%r9
   7020 	movq	%r13,%r10
   7021 	movq	%r14,%r11
   7022 	movq	%r15,%r12
   7023 	andq	$3,%r12
   7024 	movq	%r15,%r13
   7025 	andq	$-4,%r13
   7026 	movq	%r9,%r14
   7027 	shrdq	$2,%r9,%r15
   7028 	shrq	$2,%r9
   7029 	addq	%r13,%r10
   7030 	adcq	%r14,%r11
   7031 	adcq	$0,%r12
   7032 	addq	%r15,%r10
   7033 	adcq	%r9,%r11
   7034 	adcq	$0,%r12
   7035 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
   7036 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm4
   7037 	vperm2i128	$0x02,%ymm8,%ymm12,%ymm0
   7038 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm8
   7039 	vpxor	0+384(%rsi),%ymm3,%ymm3
   7040 	vpxor	32+384(%rsi),%ymm0,%ymm0
   7041 	vpxor	64+384(%rsi),%ymm4,%ymm4
   7042 	vpxor	96+384(%rsi),%ymm8,%ymm8
   7043 	vmovdqu	%ymm3,0+384(%rdi)
   7044 	vmovdqu	%ymm0,32+384(%rdi)
   7045 	vmovdqu	%ymm4,64+384(%rdi)
   7046 	vmovdqu	%ymm8,96+384(%rdi)
   7047 
   7048 	leaq	512(%rsi),%rsi
   7049 	subq	$512,%rbx
   7050 	cmpq	$512,%rbx
   7051 	jg	1b
   7052 	addq	0(%rdi),%r10
   7053 	adcq	8+0(%rdi),%r11
   7054 	adcq	$1,%r12
   7055 	movq	0+0(%rbp),%rax
   7056 	movq	%rax,%r15
   7057 	mulq	%r10
   7058 	movq	%rax,%r13
   7059 	movq	%rdx,%r14
   7060 	movq	0+0(%rbp),%rax
   7061 	mulq	%r11
   7062 	imulq	%r12,%r15
   7063 	addq	%rax,%r14
   7064 	adcq	%rdx,%r15
   7065 	movq	8+0(%rbp),%rax
   7066 	movq	%rax,%r9
   7067 	mulq	%r10
   7068 	addq	%rax,%r14
   7069 	adcq	$0,%rdx
   7070 	movq	%rdx,%r10
   7071 	movq	8+0(%rbp),%rax
   7072 	mulq	%r11
   7073 	addq	%rax,%r15
   7074 	adcq	$0,%rdx
   7075 	imulq	%r12,%r9
   7076 	addq	%r10,%r15
   7077 	adcq	%rdx,%r9
   7078 	movq	%r13,%r10
   7079 	movq	%r14,%r11
   7080 	movq	%r15,%r12
   7081 	andq	$3,%r12
   7082 	movq	%r15,%r13
   7083 	andq	$-4,%r13
   7084 	movq	%r9,%r14
   7085 	shrdq	$2,%r9,%r15
   7086 	shrq	$2,%r9
   7087 	addq	%r13,%r10
   7088 	adcq	%r14,%r11
   7089 	adcq	$0,%r12
   7090 	addq	%r15,%r10
   7091 	adcq	%r9,%r11
   7092 	adcq	$0,%r12
   7093 	addq	16(%rdi),%r10
   7094 	adcq	8+16(%rdi),%r11
   7095 	adcq	$1,%r12
   7096 	movq	0+0(%rbp),%rax
   7097 	movq	%rax,%r15
   7098 	mulq	%r10
   7099 	movq	%rax,%r13
   7100 	movq	%rdx,%r14
   7101 	movq	0+0(%rbp),%rax
   7102 	mulq	%r11
   7103 	imulq	%r12,%r15
   7104 	addq	%rax,%r14
   7105 	adcq	%rdx,%r15
   7106 	movq	8+0(%rbp),%rax
   7107 	movq	%rax,%r9
   7108 	mulq	%r10
   7109 	addq	%rax,%r14
   7110 	adcq	$0,%rdx
   7111 	movq	%rdx,%r10
   7112 	movq	8+0(%rbp),%rax
   7113 	mulq	%r11
   7114 	addq	%rax,%r15
   7115 	adcq	$0,%rdx
   7116 	imulq	%r12,%r9
   7117 	addq	%r10,%r15
   7118 	adcq	%rdx,%r9
   7119 	movq	%r13,%r10
   7120 	movq	%r14,%r11
   7121 	movq	%r15,%r12
   7122 	andq	$3,%r12
   7123 	movq	%r15,%r13
   7124 	andq	$-4,%r13
   7125 	movq	%r9,%r14
   7126 	shrdq	$2,%r9,%r15
   7127 	shrq	$2,%r9
   7128 	addq	%r13,%r10
   7129 	adcq	%r14,%r11
   7130 	adcq	$0,%r12
   7131 	addq	%r15,%r10
   7132 	adcq	%r9,%r11
   7133 	adcq	$0,%r12
   7134 
   7135 	leaq	32(%rdi),%rdi
   7136 	movq	$10,%rcx
   7137 	xorq	%r8,%r8
   7138 	cmpq	$128,%rbx
   7139 	ja	3f
   7140 
   7141 seal_avx2_tail_128:
   7142 	vmovdqa	.chacha20_consts(%rip),%ymm0
   7143 	vmovdqa	64(%rbp),%ymm4
   7144 	vmovdqa	96(%rbp),%ymm8
   7145 	vmovdqa	.avx2_inc(%rip),%ymm12
   7146 	vpaddd	160(%rbp),%ymm12,%ymm12
   7147 	vmovdqa	%ymm12,160(%rbp)
   7148 
   7149 1:
   7150 	addq	0(%rdi),%r10
   7151 	adcq	8+0(%rdi),%r11
   7152 	adcq	$1,%r12
   7153 	movq	0+0(%rbp),%rax
   7154 	movq	%rax,%r15
   7155 	mulq	%r10
   7156 	movq	%rax,%r13
   7157 	movq	%rdx,%r14
   7158 	movq	0+0(%rbp),%rax
   7159 	mulq	%r11
   7160 	imulq	%r12,%r15
   7161 	addq	%rax,%r14
   7162 	adcq	%rdx,%r15
   7163 	movq	8+0(%rbp),%rax
   7164 	movq	%rax,%r9
   7165 	mulq	%r10
   7166 	addq	%rax,%r14
   7167 	adcq	$0,%rdx
   7168 	movq	%rdx,%r10
   7169 	movq	8+0(%rbp),%rax
   7170 	mulq	%r11
   7171 	addq	%rax,%r15
   7172 	adcq	$0,%rdx
   7173 	imulq	%r12,%r9
   7174 	addq	%r10,%r15
   7175 	adcq	%rdx,%r9
   7176 	movq	%r13,%r10
   7177 	movq	%r14,%r11
   7178 	movq	%r15,%r12
   7179 	andq	$3,%r12
   7180 	movq	%r15,%r13
   7181 	andq	$-4,%r13
   7182 	movq	%r9,%r14
   7183 	shrdq	$2,%r9,%r15
   7184 	shrq	$2,%r9
   7185 	addq	%r13,%r10
   7186 	adcq	%r14,%r11
   7187 	adcq	$0,%r12
   7188 	addq	%r15,%r10
   7189 	adcq	%r9,%r11
   7190 	adcq	$0,%r12
   7191 
   7192 	leaq	16(%rdi),%rdi
   7193 2:
   7194 	vpaddd	%ymm4,%ymm0,%ymm0
   7195 	vpxor	%ymm0,%ymm12,%ymm12
   7196 	vpshufb	.rol16(%rip),%ymm12,%ymm12
   7197 	vpaddd	%ymm12,%ymm8,%ymm8
   7198 	vpxor	%ymm8,%ymm4,%ymm4
   7199 	vpsrld	$20,%ymm4,%ymm3
   7200 	vpslld	$12,%ymm4,%ymm4
   7201 	vpxor	%ymm3,%ymm4,%ymm4
   7202 	vpaddd	%ymm4,%ymm0,%ymm0
   7203 	vpxor	%ymm0,%ymm12,%ymm12
   7204 	vpshufb	.rol8(%rip),%ymm12,%ymm12
   7205 	vpaddd	%ymm12,%ymm8,%ymm8
   7206 	vpxor	%ymm8,%ymm4,%ymm4
   7207 	vpslld	$7,%ymm4,%ymm3
   7208 	vpsrld	$25,%ymm4,%ymm4
   7209 	vpxor	%ymm3,%ymm4,%ymm4
   7210 	vpalignr	$12,%ymm12,%ymm12,%ymm12
   7211 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   7212 	vpalignr	$4,%ymm4,%ymm4,%ymm4
   7213 	addq	0(%rdi),%r10
   7214 	adcq	8+0(%rdi),%r11
   7215 	adcq	$1,%r12
   7216 	movq	0+0(%rbp),%rax
   7217 	movq	%rax,%r15
   7218 	mulq	%r10
   7219 	movq	%rax,%r13
   7220 	movq	%rdx,%r14
   7221 	movq	0+0(%rbp),%rax
   7222 	mulq	%r11
   7223 	imulq	%r12,%r15
   7224 	addq	%rax,%r14
   7225 	adcq	%rdx,%r15
   7226 	movq	8+0(%rbp),%rax
   7227 	movq	%rax,%r9
   7228 	mulq	%r10
   7229 	addq	%rax,%r14
   7230 	adcq	$0,%rdx
   7231 	movq	%rdx,%r10
   7232 	movq	8+0(%rbp),%rax
   7233 	mulq	%r11
   7234 	addq	%rax,%r15
   7235 	adcq	$0,%rdx
   7236 	imulq	%r12,%r9
   7237 	addq	%r10,%r15
   7238 	adcq	%rdx,%r9
   7239 	movq	%r13,%r10
   7240 	movq	%r14,%r11
   7241 	movq	%r15,%r12
   7242 	andq	$3,%r12
   7243 	movq	%r15,%r13
   7244 	andq	$-4,%r13
   7245 	movq	%r9,%r14
   7246 	shrdq	$2,%r9,%r15
   7247 	shrq	$2,%r9
   7248 	addq	%r13,%r10
   7249 	adcq	%r14,%r11
   7250 	adcq	$0,%r12
   7251 	addq	%r15,%r10
   7252 	adcq	%r9,%r11
   7253 	adcq	$0,%r12
   7254 	vpaddd	%ymm4,%ymm0,%ymm0
   7255 	vpxor	%ymm0,%ymm12,%ymm12
   7256 	vpshufb	.rol16(%rip),%ymm12,%ymm12
   7257 	vpaddd	%ymm12,%ymm8,%ymm8
   7258 	vpxor	%ymm8,%ymm4,%ymm4
   7259 	vpsrld	$20,%ymm4,%ymm3
   7260 	vpslld	$12,%ymm4,%ymm4
   7261 	vpxor	%ymm3,%ymm4,%ymm4
   7262 	vpaddd	%ymm4,%ymm0,%ymm0
   7263 	vpxor	%ymm0,%ymm12,%ymm12
   7264 	vpshufb	.rol8(%rip),%ymm12,%ymm12
   7265 	vpaddd	%ymm12,%ymm8,%ymm8
   7266 	vpxor	%ymm8,%ymm4,%ymm4
   7267 	vpslld	$7,%ymm4,%ymm3
   7268 	vpsrld	$25,%ymm4,%ymm4
   7269 	vpxor	%ymm3,%ymm4,%ymm4
   7270 	vpalignr	$4,%ymm12,%ymm12,%ymm12
   7271 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   7272 	vpalignr	$12,%ymm4,%ymm4,%ymm4
   7273 	addq	16(%rdi),%r10
   7274 	adcq	8+16(%rdi),%r11
   7275 	adcq	$1,%r12
   7276 	movq	0+0(%rbp),%rax
   7277 	movq	%rax,%r15
   7278 	mulq	%r10
   7279 	movq	%rax,%r13
   7280 	movq	%rdx,%r14
   7281 	movq	0+0(%rbp),%rax
   7282 	mulq	%r11
   7283 	imulq	%r12,%r15
   7284 	addq	%rax,%r14
   7285 	adcq	%rdx,%r15
   7286 	movq	8+0(%rbp),%rax
   7287 	movq	%rax,%r9
   7288 	mulq	%r10
   7289 	addq	%rax,%r14
   7290 	adcq	$0,%rdx
   7291 	movq	%rdx,%r10
   7292 	movq	8+0(%rbp),%rax
   7293 	mulq	%r11
   7294 	addq	%rax,%r15
   7295 	adcq	$0,%rdx
   7296 	imulq	%r12,%r9
   7297 	addq	%r10,%r15
   7298 	adcq	%rdx,%r9
   7299 	movq	%r13,%r10
   7300 	movq	%r14,%r11
   7301 	movq	%r15,%r12
   7302 	andq	$3,%r12
   7303 	movq	%r15,%r13
   7304 	andq	$-4,%r13
   7305 	movq	%r9,%r14
   7306 	shrdq	$2,%r9,%r15
   7307 	shrq	$2,%r9
   7308 	addq	%r13,%r10
   7309 	adcq	%r14,%r11
   7310 	adcq	$0,%r12
   7311 	addq	%r15,%r10
   7312 	adcq	%r9,%r11
   7313 	adcq	$0,%r12
   7314 
   7315 	leaq	32(%rdi),%rdi
   7316 	decq	%rcx
   7317 	jg	1b
   7318 	decq	%r8
   7319 	jge	2b
   7320 	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
   7321 	vpaddd	64(%rbp),%ymm4,%ymm4
   7322 	vpaddd	96(%rbp),%ymm8,%ymm8
   7323 	vpaddd	160(%rbp),%ymm12,%ymm12
   7324 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
   7325 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
   7326 	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
   7327 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
   7328 	vmovdqa	%ymm3,%ymm8
   7329 
   7330 	jmp	seal_avx2_short_loop
   7331 3:
   7332 	cmpq	$256,%rbx
   7333 	ja	3f
   7334 
   7335 seal_avx2_tail_256:
   7336 	vmovdqa	.chacha20_consts(%rip),%ymm0
   7337 	vmovdqa	64(%rbp),%ymm4
   7338 	vmovdqa	96(%rbp),%ymm8
   7339 	vmovdqa	%ymm0,%ymm1
   7340 	vmovdqa	%ymm4,%ymm5
   7341 	vmovdqa	%ymm8,%ymm9
   7342 	vmovdqa	.avx2_inc(%rip),%ymm12
   7343 	vpaddd	160(%rbp),%ymm12,%ymm13
   7344 	vpaddd	%ymm13,%ymm12,%ymm12
   7345 	vmovdqa	%ymm12,160(%rbp)
   7346 	vmovdqa	%ymm13,192(%rbp)
   7347 
   7348 1:
   7349 	addq	0(%rdi),%r10
   7350 	adcq	8+0(%rdi),%r11
   7351 	adcq	$1,%r12
   7352 	movq	0+0(%rbp),%rax
   7353 	movq	%rax,%r15
   7354 	mulq	%r10
   7355 	movq	%rax,%r13
   7356 	movq	%rdx,%r14
   7357 	movq	0+0(%rbp),%rax
   7358 	mulq	%r11
   7359 	imulq	%r12,%r15
   7360 	addq	%rax,%r14
   7361 	adcq	%rdx,%r15
   7362 	movq	8+0(%rbp),%rax
   7363 	movq	%rax,%r9
   7364 	mulq	%r10
   7365 	addq	%rax,%r14
   7366 	adcq	$0,%rdx
   7367 	movq	%rdx,%r10
   7368 	movq	8+0(%rbp),%rax
   7369 	mulq	%r11
   7370 	addq	%rax,%r15
   7371 	adcq	$0,%rdx
   7372 	imulq	%r12,%r9
   7373 	addq	%r10,%r15
   7374 	adcq	%rdx,%r9
   7375 	movq	%r13,%r10
   7376 	movq	%r14,%r11
   7377 	movq	%r15,%r12
   7378 	andq	$3,%r12
   7379 	movq	%r15,%r13
   7380 	andq	$-4,%r13
   7381 	movq	%r9,%r14
   7382 	shrdq	$2,%r9,%r15
   7383 	shrq	$2,%r9
   7384 	addq	%r13,%r10
   7385 	adcq	%r14,%r11
   7386 	adcq	$0,%r12
   7387 	addq	%r15,%r10
   7388 	adcq	%r9,%r11
   7389 	adcq	$0,%r12
   7390 
   7391 	leaq	16(%rdi),%rdi
   7392 2:
   7393 	vpaddd	%ymm4,%ymm0,%ymm0
   7394 	vpxor	%ymm0,%ymm12,%ymm12
   7395 	vpshufb	.rol16(%rip),%ymm12,%ymm12
   7396 	vpaddd	%ymm12,%ymm8,%ymm8
   7397 	vpxor	%ymm8,%ymm4,%ymm4
   7398 	vpsrld	$20,%ymm4,%ymm3
   7399 	vpslld	$12,%ymm4,%ymm4
   7400 	vpxor	%ymm3,%ymm4,%ymm4
   7401 	vpaddd	%ymm4,%ymm0,%ymm0
   7402 	vpxor	%ymm0,%ymm12,%ymm12
   7403 	vpshufb	.rol8(%rip),%ymm12,%ymm12
   7404 	vpaddd	%ymm12,%ymm8,%ymm8
   7405 	vpxor	%ymm8,%ymm4,%ymm4
   7406 	vpslld	$7,%ymm4,%ymm3
   7407 	vpsrld	$25,%ymm4,%ymm4
   7408 	vpxor	%ymm3,%ymm4,%ymm4
   7409 	vpalignr	$12,%ymm12,%ymm12,%ymm12
   7410 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   7411 	vpalignr	$4,%ymm4,%ymm4,%ymm4
   7412 	vpaddd	%ymm5,%ymm1,%ymm1
   7413 	vpxor	%ymm1,%ymm13,%ymm13
   7414 	vpshufb	.rol16(%rip),%ymm13,%ymm13
   7415 	vpaddd	%ymm13,%ymm9,%ymm9
   7416 	vpxor	%ymm9,%ymm5,%ymm5
   7417 	vpsrld	$20,%ymm5,%ymm3
   7418 	vpslld	$12,%ymm5,%ymm5
   7419 	vpxor	%ymm3,%ymm5,%ymm5
   7420 	vpaddd	%ymm5,%ymm1,%ymm1
   7421 	vpxor	%ymm1,%ymm13,%ymm13
   7422 	vpshufb	.rol8(%rip),%ymm13,%ymm13
   7423 	vpaddd	%ymm13,%ymm9,%ymm9
   7424 	vpxor	%ymm9,%ymm5,%ymm5
   7425 	vpslld	$7,%ymm5,%ymm3
   7426 	vpsrld	$25,%ymm5,%ymm5
   7427 	vpxor	%ymm3,%ymm5,%ymm5
   7428 	vpalignr	$12,%ymm13,%ymm13,%ymm13
   7429 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   7430 	vpalignr	$4,%ymm5,%ymm5,%ymm5
   7431 	addq	0(%rdi),%r10
   7432 	adcq	8+0(%rdi),%r11
   7433 	adcq	$1,%r12
   7434 	movq	0+0(%rbp),%rax
   7435 	movq	%rax,%r15
   7436 	mulq	%r10
   7437 	movq	%rax,%r13
   7438 	movq	%rdx,%r14
   7439 	movq	0+0(%rbp),%rax
   7440 	mulq	%r11
   7441 	imulq	%r12,%r15
   7442 	addq	%rax,%r14
   7443 	adcq	%rdx,%r15
   7444 	movq	8+0(%rbp),%rax
   7445 	movq	%rax,%r9
   7446 	mulq	%r10
   7447 	addq	%rax,%r14
   7448 	adcq	$0,%rdx
   7449 	movq	%rdx,%r10
   7450 	movq	8+0(%rbp),%rax
   7451 	mulq	%r11
   7452 	addq	%rax,%r15
   7453 	adcq	$0,%rdx
   7454 	imulq	%r12,%r9
   7455 	addq	%r10,%r15
   7456 	adcq	%rdx,%r9
   7457 	movq	%r13,%r10
   7458 	movq	%r14,%r11
   7459 	movq	%r15,%r12
   7460 	andq	$3,%r12
   7461 	movq	%r15,%r13
   7462 	andq	$-4,%r13
   7463 	movq	%r9,%r14
   7464 	shrdq	$2,%r9,%r15
   7465 	shrq	$2,%r9
   7466 	addq	%r13,%r10
   7467 	adcq	%r14,%r11
   7468 	adcq	$0,%r12
   7469 	addq	%r15,%r10
   7470 	adcq	%r9,%r11
   7471 	adcq	$0,%r12
   7472 	vpaddd	%ymm4,%ymm0,%ymm0
   7473 	vpxor	%ymm0,%ymm12,%ymm12
   7474 	vpshufb	.rol16(%rip),%ymm12,%ymm12
   7475 	vpaddd	%ymm12,%ymm8,%ymm8
   7476 	vpxor	%ymm8,%ymm4,%ymm4
   7477 	vpsrld	$20,%ymm4,%ymm3
   7478 	vpslld	$12,%ymm4,%ymm4
   7479 	vpxor	%ymm3,%ymm4,%ymm4
   7480 	vpaddd	%ymm4,%ymm0,%ymm0
   7481 	vpxor	%ymm0,%ymm12,%ymm12
   7482 	vpshufb	.rol8(%rip),%ymm12,%ymm12
   7483 	vpaddd	%ymm12,%ymm8,%ymm8
   7484 	vpxor	%ymm8,%ymm4,%ymm4
   7485 	vpslld	$7,%ymm4,%ymm3
   7486 	vpsrld	$25,%ymm4,%ymm4
   7487 	vpxor	%ymm3,%ymm4,%ymm4
   7488 	vpalignr	$4,%ymm12,%ymm12,%ymm12
   7489 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   7490 	vpalignr	$12,%ymm4,%ymm4,%ymm4
   7491 	vpaddd	%ymm5,%ymm1,%ymm1
   7492 	vpxor	%ymm1,%ymm13,%ymm13
   7493 	vpshufb	.rol16(%rip),%ymm13,%ymm13
   7494 	vpaddd	%ymm13,%ymm9,%ymm9
   7495 	vpxor	%ymm9,%ymm5,%ymm5
   7496 	vpsrld	$20,%ymm5,%ymm3
   7497 	vpslld	$12,%ymm5,%ymm5
   7498 	vpxor	%ymm3,%ymm5,%ymm5
   7499 	vpaddd	%ymm5,%ymm1,%ymm1
   7500 	vpxor	%ymm1,%ymm13,%ymm13
   7501 	vpshufb	.rol8(%rip),%ymm13,%ymm13
   7502 	vpaddd	%ymm13,%ymm9,%ymm9
   7503 	vpxor	%ymm9,%ymm5,%ymm5
   7504 	vpslld	$7,%ymm5,%ymm3
   7505 	vpsrld	$25,%ymm5,%ymm5
   7506 	vpxor	%ymm3,%ymm5,%ymm5
   7507 	vpalignr	$4,%ymm13,%ymm13,%ymm13
   7508 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   7509 	vpalignr	$12,%ymm5,%ymm5,%ymm5
   7510 	addq	16(%rdi),%r10
   7511 	adcq	8+16(%rdi),%r11
   7512 	adcq	$1,%r12
   7513 	movq	0+0(%rbp),%rax
   7514 	movq	%rax,%r15
   7515 	mulq	%r10
   7516 	movq	%rax,%r13
   7517 	movq	%rdx,%r14
   7518 	movq	0+0(%rbp),%rax
   7519 	mulq	%r11
   7520 	imulq	%r12,%r15
   7521 	addq	%rax,%r14
   7522 	adcq	%rdx,%r15
   7523 	movq	8+0(%rbp),%rax
   7524 	movq	%rax,%r9
   7525 	mulq	%r10
   7526 	addq	%rax,%r14
   7527 	adcq	$0,%rdx
   7528 	movq	%rdx,%r10
   7529 	movq	8+0(%rbp),%rax
   7530 	mulq	%r11
   7531 	addq	%rax,%r15
   7532 	adcq	$0,%rdx
   7533 	imulq	%r12,%r9
   7534 	addq	%r10,%r15
   7535 	adcq	%rdx,%r9
   7536 	movq	%r13,%r10
   7537 	movq	%r14,%r11
   7538 	movq	%r15,%r12
   7539 	andq	$3,%r12
   7540 	movq	%r15,%r13
   7541 	andq	$-4,%r13
   7542 	movq	%r9,%r14
   7543 	shrdq	$2,%r9,%r15
   7544 	shrq	$2,%r9
   7545 	addq	%r13,%r10
   7546 	adcq	%r14,%r11
   7547 	adcq	$0,%r12
   7548 	addq	%r15,%r10
   7549 	adcq	%r9,%r11
   7550 	adcq	$0,%r12
   7551 
   7552 	leaq	32(%rdi),%rdi
   7553 	decq	%rcx
   7554 	jg	1b
   7555 	decq	%r8
   7556 	jge	2b
   7557 	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
   7558 	vpaddd	64(%rbp),%ymm5,%ymm5
   7559 	vpaddd	96(%rbp),%ymm9,%ymm9
   7560 	vpaddd	192(%rbp),%ymm13,%ymm13
   7561 	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
   7562 	vpaddd	64(%rbp),%ymm4,%ymm4
   7563 	vpaddd	96(%rbp),%ymm8,%ymm8
   7564 	vpaddd	160(%rbp),%ymm12,%ymm12
   7565 	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
   7566 	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
   7567 	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
   7568 	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
   7569 	vpxor	0+0(%rsi),%ymm3,%ymm3
   7570 	vpxor	32+0(%rsi),%ymm1,%ymm1
   7571 	vpxor	64+0(%rsi),%ymm5,%ymm5
   7572 	vpxor	96+0(%rsi),%ymm9,%ymm9
   7573 	vmovdqu	%ymm3,0+0(%rdi)
   7574 	vmovdqu	%ymm1,32+0(%rdi)
   7575 	vmovdqu	%ymm5,64+0(%rdi)
   7576 	vmovdqu	%ymm9,96+0(%rdi)
   7577 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
   7578 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
   7579 	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
   7580 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
   7581 	vmovdqa	%ymm3,%ymm8
   7582 
   7583 	movq	$128,%rcx
   7584 	leaq	128(%rsi),%rsi
   7585 	subq	$128,%rbx
   7586 	jmp	seal_avx2_hash
   7587 3:
   7588 	cmpq	$384,%rbx
   7589 	ja	seal_avx2_tail_512
   7590 
   7591 seal_avx2_tail_384:
   7592 	vmovdqa	.chacha20_consts(%rip),%ymm0
   7593 	vmovdqa	64(%rbp),%ymm4
   7594 	vmovdqa	96(%rbp),%ymm8
   7595 	vmovdqa	%ymm0,%ymm1
   7596 	vmovdqa	%ymm4,%ymm5
   7597 	vmovdqa	%ymm8,%ymm9
   7598 	vmovdqa	%ymm0,%ymm2
   7599 	vmovdqa	%ymm4,%ymm6
   7600 	vmovdqa	%ymm8,%ymm10
   7601 	vmovdqa	.avx2_inc(%rip),%ymm12
   7602 	vpaddd	160(%rbp),%ymm12,%ymm14
   7603 	vpaddd	%ymm14,%ymm12,%ymm13
   7604 	vpaddd	%ymm13,%ymm12,%ymm12
   7605 	vmovdqa	%ymm12,160(%rbp)
   7606 	vmovdqa	%ymm13,192(%rbp)
   7607 	vmovdqa	%ymm14,224(%rbp)
   7608 
   7609 1:
   7610 	addq	0(%rdi),%r10
   7611 	adcq	8+0(%rdi),%r11
   7612 	adcq	$1,%r12
   7613 	movq	0+0(%rbp),%rax
   7614 	movq	%rax,%r15
   7615 	mulq	%r10
   7616 	movq	%rax,%r13
   7617 	movq	%rdx,%r14
   7618 	movq	0+0(%rbp),%rax
   7619 	mulq	%r11
   7620 	imulq	%r12,%r15
   7621 	addq	%rax,%r14
   7622 	adcq	%rdx,%r15
   7623 	movq	8+0(%rbp),%rax
   7624 	movq	%rax,%r9
   7625 	mulq	%r10
   7626 	addq	%rax,%r14
   7627 	adcq	$0,%rdx
   7628 	movq	%rdx,%r10
   7629 	movq	8+0(%rbp),%rax
   7630 	mulq	%r11
   7631 	addq	%rax,%r15
   7632 	adcq	$0,%rdx
   7633 	imulq	%r12,%r9
   7634 	addq	%r10,%r15
   7635 	adcq	%rdx,%r9
   7636 	movq	%r13,%r10
   7637 	movq	%r14,%r11
   7638 	movq	%r15,%r12
   7639 	andq	$3,%r12
   7640 	movq	%r15,%r13
   7641 	andq	$-4,%r13
   7642 	movq	%r9,%r14
   7643 	shrdq	$2,%r9,%r15
   7644 	shrq	$2,%r9
   7645 	addq	%r13,%r10
   7646 	adcq	%r14,%r11
   7647 	adcq	$0,%r12
   7648 	addq	%r15,%r10
   7649 	adcq	%r9,%r11
   7650 	adcq	$0,%r12
   7651 
   7652 	leaq	16(%rdi),%rdi
   7653 2:
   7654 	vpaddd	%ymm4,%ymm0,%ymm0
   7655 	vpxor	%ymm0,%ymm12,%ymm12
   7656 	vpshufb	.rol16(%rip),%ymm12,%ymm12
   7657 	vpaddd	%ymm12,%ymm8,%ymm8
   7658 	vpxor	%ymm8,%ymm4,%ymm4
   7659 	vpsrld	$20,%ymm4,%ymm3
   7660 	vpslld	$12,%ymm4,%ymm4
   7661 	vpxor	%ymm3,%ymm4,%ymm4
   7662 	vpaddd	%ymm4,%ymm0,%ymm0
   7663 	vpxor	%ymm0,%ymm12,%ymm12
   7664 	vpshufb	.rol8(%rip),%ymm12,%ymm12
   7665 	vpaddd	%ymm12,%ymm8,%ymm8
   7666 	vpxor	%ymm8,%ymm4,%ymm4
   7667 	vpslld	$7,%ymm4,%ymm3
   7668 	vpsrld	$25,%ymm4,%ymm4
   7669 	vpxor	%ymm3,%ymm4,%ymm4
   7670 	vpalignr	$12,%ymm12,%ymm12,%ymm12
   7671 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   7672 	vpalignr	$4,%ymm4,%ymm4,%ymm4
   7673 	vpaddd	%ymm5,%ymm1,%ymm1
   7674 	vpxor	%ymm1,%ymm13,%ymm13
   7675 	vpshufb	.rol16(%rip),%ymm13,%ymm13
   7676 	vpaddd	%ymm13,%ymm9,%ymm9
   7677 	vpxor	%ymm9,%ymm5,%ymm5
   7678 	vpsrld	$20,%ymm5,%ymm3
   7679 	vpslld	$12,%ymm5,%ymm5
   7680 	vpxor	%ymm3,%ymm5,%ymm5
   7681 	vpaddd	%ymm5,%ymm1,%ymm1
   7682 	vpxor	%ymm1,%ymm13,%ymm13
   7683 	vpshufb	.rol8(%rip),%ymm13,%ymm13
   7684 	vpaddd	%ymm13,%ymm9,%ymm9
   7685 	vpxor	%ymm9,%ymm5,%ymm5
   7686 	vpslld	$7,%ymm5,%ymm3
   7687 	vpsrld	$25,%ymm5,%ymm5
   7688 	vpxor	%ymm3,%ymm5,%ymm5
   7689 	vpalignr	$12,%ymm13,%ymm13,%ymm13
   7690 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   7691 	vpalignr	$4,%ymm5,%ymm5,%ymm5
   7692 	addq	0(%rdi),%r10
   7693 	adcq	8+0(%rdi),%r11
   7694 	adcq	$1,%r12
   7695 	movq	0+0(%rbp),%rax
   7696 	movq	%rax,%r15
   7697 	mulq	%r10
   7698 	movq	%rax,%r13
   7699 	movq	%rdx,%r14
   7700 	movq	0+0(%rbp),%rax
   7701 	mulq	%r11
   7702 	imulq	%r12,%r15
   7703 	addq	%rax,%r14
   7704 	adcq	%rdx,%r15
   7705 	movq	8+0(%rbp),%rax
   7706 	movq	%rax,%r9
   7707 	mulq	%r10
   7708 	addq	%rax,%r14
   7709 	adcq	$0,%rdx
   7710 	movq	%rdx,%r10
   7711 	movq	8+0(%rbp),%rax
   7712 	mulq	%r11
   7713 	addq	%rax,%r15
   7714 	adcq	$0,%rdx
   7715 	imulq	%r12,%r9
   7716 	addq	%r10,%r15
   7717 	adcq	%rdx,%r9
   7718 	movq	%r13,%r10
   7719 	movq	%r14,%r11
   7720 	movq	%r15,%r12
   7721 	andq	$3,%r12
   7722 	movq	%r15,%r13
   7723 	andq	$-4,%r13
   7724 	movq	%r9,%r14
   7725 	shrdq	$2,%r9,%r15
   7726 	shrq	$2,%r9
   7727 	addq	%r13,%r10
   7728 	adcq	%r14,%r11
   7729 	adcq	$0,%r12
   7730 	addq	%r15,%r10
   7731 	adcq	%r9,%r11
   7732 	adcq	$0,%r12
   7733 	vpaddd	%ymm6,%ymm2,%ymm2
   7734 	vpxor	%ymm2,%ymm14,%ymm14
   7735 	vpshufb	.rol16(%rip),%ymm14,%ymm14
   7736 	vpaddd	%ymm14,%ymm10,%ymm10
   7737 	vpxor	%ymm10,%ymm6,%ymm6
   7738 	vpsrld	$20,%ymm6,%ymm3
   7739 	vpslld	$12,%ymm6,%ymm6
   7740 	vpxor	%ymm3,%ymm6,%ymm6
   7741 	vpaddd	%ymm6,%ymm2,%ymm2
   7742 	vpxor	%ymm2,%ymm14,%ymm14
   7743 	vpshufb	.rol8(%rip),%ymm14,%ymm14
   7744 	vpaddd	%ymm14,%ymm10,%ymm10
   7745 	vpxor	%ymm10,%ymm6,%ymm6
   7746 	vpslld	$7,%ymm6,%ymm3
   7747 	vpsrld	$25,%ymm6,%ymm6
   7748 	vpxor	%ymm3,%ymm6,%ymm6
   7749 	vpalignr	$12,%ymm14,%ymm14,%ymm14
   7750 	vpalignr	$8,%ymm10,%ymm10,%ymm10
   7751 	vpalignr	$4,%ymm6,%ymm6,%ymm6
   7752 	vpaddd	%ymm4,%ymm0,%ymm0
   7753 	vpxor	%ymm0,%ymm12,%ymm12
   7754 	vpshufb	.rol16(%rip),%ymm12,%ymm12
   7755 	vpaddd	%ymm12,%ymm8,%ymm8
   7756 	vpxor	%ymm8,%ymm4,%ymm4
   7757 	vpsrld	$20,%ymm4,%ymm3
   7758 	vpslld	$12,%ymm4,%ymm4
   7759 	vpxor	%ymm3,%ymm4,%ymm4
   7760 	vpaddd	%ymm4,%ymm0,%ymm0
   7761 	vpxor	%ymm0,%ymm12,%ymm12
   7762 	vpshufb	.rol8(%rip),%ymm12,%ymm12
   7763 	vpaddd	%ymm12,%ymm8,%ymm8
   7764 	vpxor	%ymm8,%ymm4,%ymm4
   7765 	vpslld	$7,%ymm4,%ymm3
   7766 	vpsrld	$25,%ymm4,%ymm4
   7767 	vpxor	%ymm3,%ymm4,%ymm4
   7768 	vpalignr	$4,%ymm12,%ymm12,%ymm12
   7769 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   7770 	vpalignr	$12,%ymm4,%ymm4,%ymm4
   7771 	addq	16(%rdi),%r10
   7772 	adcq	8+16(%rdi),%r11
   7773 	adcq	$1,%r12
   7774 	movq	0+0(%rbp),%rax
   7775 	movq	%rax,%r15
   7776 	mulq	%r10
   7777 	movq	%rax,%r13
   7778 	movq	%rdx,%r14
   7779 	movq	0+0(%rbp),%rax
   7780 	mulq	%r11
   7781 	imulq	%r12,%r15
   7782 	addq	%rax,%r14
   7783 	adcq	%rdx,%r15
   7784 	movq	8+0(%rbp),%rax
   7785 	movq	%rax,%r9
   7786 	mulq	%r10
   7787 	addq	%rax,%r14
   7788 	adcq	$0,%rdx
   7789 	movq	%rdx,%r10
   7790 	movq	8+0(%rbp),%rax
   7791 	mulq	%r11
   7792 	addq	%rax,%r15
   7793 	adcq	$0,%rdx
   7794 	imulq	%r12,%r9
   7795 	addq	%r10,%r15
   7796 	adcq	%rdx,%r9
   7797 	movq	%r13,%r10
   7798 	movq	%r14,%r11
   7799 	movq	%r15,%r12
   7800 	andq	$3,%r12
   7801 	movq	%r15,%r13
   7802 	andq	$-4,%r13
   7803 	movq	%r9,%r14
   7804 	shrdq	$2,%r9,%r15
   7805 	shrq	$2,%r9
   7806 	addq	%r13,%r10
   7807 	adcq	%r14,%r11
   7808 	adcq	$0,%r12
   7809 	addq	%r15,%r10
   7810 	adcq	%r9,%r11
   7811 	adcq	$0,%r12
   7812 	vpaddd	%ymm5,%ymm1,%ymm1
   7813 	vpxor	%ymm1,%ymm13,%ymm13
   7814 	vpshufb	.rol16(%rip),%ymm13,%ymm13
   7815 	vpaddd	%ymm13,%ymm9,%ymm9
   7816 	vpxor	%ymm9,%ymm5,%ymm5
   7817 	vpsrld	$20,%ymm5,%ymm3
   7818 	vpslld	$12,%ymm5,%ymm5
   7819 	vpxor	%ymm3,%ymm5,%ymm5
   7820 	vpaddd	%ymm5,%ymm1,%ymm1
   7821 	vpxor	%ymm1,%ymm13,%ymm13
   7822 	vpshufb	.rol8(%rip),%ymm13,%ymm13
   7823 	vpaddd	%ymm13,%ymm9,%ymm9
   7824 	vpxor	%ymm9,%ymm5,%ymm5
   7825 	vpslld	$7,%ymm5,%ymm3
   7826 	vpsrld	$25,%ymm5,%ymm5
   7827 	vpxor	%ymm3,%ymm5,%ymm5
   7828 	vpalignr	$4,%ymm13,%ymm13,%ymm13
   7829 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   7830 	vpalignr	$12,%ymm5,%ymm5,%ymm5
   7831 	vpaddd	%ymm6,%ymm2,%ymm2
   7832 	vpxor	%ymm2,%ymm14,%ymm14
   7833 	vpshufb	.rol16(%rip),%ymm14,%ymm14
   7834 	vpaddd	%ymm14,%ymm10,%ymm10
   7835 	vpxor	%ymm10,%ymm6,%ymm6
   7836 	vpsrld	$20,%ymm6,%ymm3
   7837 	vpslld	$12,%ymm6,%ymm6
   7838 	vpxor	%ymm3,%ymm6,%ymm6
   7839 	vpaddd	%ymm6,%ymm2,%ymm2
   7840 	vpxor	%ymm2,%ymm14,%ymm14
   7841 	vpshufb	.rol8(%rip),%ymm14,%ymm14
   7842 	vpaddd	%ymm14,%ymm10,%ymm10
   7843 	vpxor	%ymm10,%ymm6,%ymm6
   7844 	vpslld	$7,%ymm6,%ymm3
   7845 	vpsrld	$25,%ymm6,%ymm6
   7846 	vpxor	%ymm3,%ymm6,%ymm6
   7847 	vpalignr	$4,%ymm14,%ymm14,%ymm14
   7848 	vpalignr	$8,%ymm10,%ymm10,%ymm10
   7849 	vpalignr	$12,%ymm6,%ymm6,%ymm6
   7850 
   7851 	leaq	32(%rdi),%rdi
   7852 	decq	%rcx
   7853 	jg	1b
   7854 	decq	%r8
   7855 	jge	2b
   7856 	vpaddd	.chacha20_consts(%rip),%ymm2,%ymm2
   7857 	vpaddd	64(%rbp),%ymm6,%ymm6
   7858 	vpaddd	96(%rbp),%ymm10,%ymm10
   7859 	vpaddd	224(%rbp),%ymm14,%ymm14
   7860 	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
   7861 	vpaddd	64(%rbp),%ymm5,%ymm5
   7862 	vpaddd	96(%rbp),%ymm9,%ymm9
   7863 	vpaddd	192(%rbp),%ymm13,%ymm13
   7864 	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
   7865 	vpaddd	64(%rbp),%ymm4,%ymm4
   7866 	vpaddd	96(%rbp),%ymm8,%ymm8
   7867 	vpaddd	160(%rbp),%ymm12,%ymm12
   7868 	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
   7869 	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
   7870 	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
   7871 	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
   7872 	vpxor	0+0(%rsi),%ymm3,%ymm3
   7873 	vpxor	32+0(%rsi),%ymm2,%ymm2
   7874 	vpxor	64+0(%rsi),%ymm6,%ymm6
   7875 	vpxor	96+0(%rsi),%ymm10,%ymm10
   7876 	vmovdqu	%ymm3,0+0(%rdi)
   7877 	vmovdqu	%ymm2,32+0(%rdi)
   7878 	vmovdqu	%ymm6,64+0(%rdi)
   7879 	vmovdqu	%ymm10,96+0(%rdi)
   7880 	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
   7881 	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
   7882 	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
   7883 	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
   7884 	vpxor	0+128(%rsi),%ymm3,%ymm3
   7885 	vpxor	32+128(%rsi),%ymm1,%ymm1
   7886 	vpxor	64+128(%rsi),%ymm5,%ymm5
   7887 	vpxor	96+128(%rsi),%ymm9,%ymm9
   7888 	vmovdqu	%ymm3,0+128(%rdi)
   7889 	vmovdqu	%ymm1,32+128(%rdi)
   7890 	vmovdqu	%ymm5,64+128(%rdi)
   7891 	vmovdqu	%ymm9,96+128(%rdi)
   7892 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
   7893 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
   7894 	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
   7895 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
   7896 	vmovdqa	%ymm3,%ymm8
   7897 
   7898 	movq	$256,%rcx
   7899 	leaq	256(%rsi),%rsi
   7900 	subq	$256,%rbx
   7901 	jmp	seal_avx2_hash
   7902 
   7903 seal_avx2_tail_512:
   7904 	vmovdqa	.chacha20_consts(%rip),%ymm0
   7905 	vmovdqa	64(%rbp),%ymm4
   7906 	vmovdqa	96(%rbp),%ymm8
   7907 	vmovdqa	%ymm0,%ymm1
   7908 	vmovdqa	%ymm4,%ymm5
   7909 	vmovdqa	%ymm8,%ymm9
   7910 	vmovdqa	%ymm0,%ymm2
   7911 	vmovdqa	%ymm4,%ymm6
   7912 	vmovdqa	%ymm8,%ymm10
   7913 	vmovdqa	%ymm0,%ymm3
   7914 	vmovdqa	%ymm4,%ymm7
   7915 	vmovdqa	%ymm8,%ymm11
   7916 	vmovdqa	.avx2_inc(%rip),%ymm12
   7917 	vpaddd	160(%rbp),%ymm12,%ymm15
   7918 	vpaddd	%ymm15,%ymm12,%ymm14
   7919 	vpaddd	%ymm14,%ymm12,%ymm13
   7920 	vpaddd	%ymm13,%ymm12,%ymm12
   7921 	vmovdqa	%ymm15,256(%rbp)
   7922 	vmovdqa	%ymm14,224(%rbp)
   7923 	vmovdqa	%ymm13,192(%rbp)
   7924 	vmovdqa	%ymm12,160(%rbp)
   7925 
   7926 1:
   7927 	addq	0(%rdi),%r10
   7928 	adcq	8+0(%rdi),%r11
   7929 	adcq	$1,%r12
   7930 	movq	0+0(%rbp),%rdx
   7931 	movq	%rdx,%r15
   7932 	mulxq	%r10,%r13,%r14
   7933 	mulxq	%r11,%rax,%rdx
   7934 	imulq	%r12,%r15
   7935 	addq	%rax,%r14
   7936 	adcq	%rdx,%r15
   7937 	movq	8+0(%rbp),%rdx
   7938 	mulxq	%r10,%r10,%rax
   7939 	addq	%r10,%r14
   7940 	mulxq	%r11,%r11,%r9
   7941 	adcq	%r11,%r15
   7942 	adcq	$0,%r9
   7943 	imulq	%r12,%rdx
   7944 	addq	%rax,%r15
   7945 	adcq	%rdx,%r9
   7946 	movq	%r13,%r10
   7947 	movq	%r14,%r11
   7948 	movq	%r15,%r12
   7949 	andq	$3,%r12
   7950 	movq	%r15,%r13
   7951 	andq	$-4,%r13
   7952 	movq	%r9,%r14
   7953 	shrdq	$2,%r9,%r15
   7954 	shrq	$2,%r9
   7955 	addq	%r13,%r10
   7956 	adcq	%r14,%r11
   7957 	adcq	$0,%r12
   7958 	addq	%r15,%r10
   7959 	adcq	%r9,%r11
   7960 	adcq	$0,%r12
   7961 
   7962 	leaq	16(%rdi),%rdi
   7963 2:
   7964 	vmovdqa	%ymm8,128(%rbp)
   7965 	vmovdqa	.rol16(%rip),%ymm8
   7966 	vpaddd	%ymm7,%ymm3,%ymm3
   7967 	vpaddd	%ymm6,%ymm2,%ymm2
   7968 	vpaddd	%ymm5,%ymm1,%ymm1
   7969 	vpaddd	%ymm4,%ymm0,%ymm0
   7970 	vpxor	%ymm3,%ymm15,%ymm15
   7971 	vpxor	%ymm2,%ymm14,%ymm14
   7972 	vpxor	%ymm1,%ymm13,%ymm13
   7973 	vpxor	%ymm0,%ymm12,%ymm12
   7974 	vpshufb	%ymm8,%ymm15,%ymm15
   7975 	vpshufb	%ymm8,%ymm14,%ymm14
   7976 	vpshufb	%ymm8,%ymm13,%ymm13
   7977 	vpshufb	%ymm8,%ymm12,%ymm12
   7978 	vmovdqa	128(%rbp),%ymm8
   7979 	vpaddd	%ymm15,%ymm11,%ymm11
   7980 	vpaddd	%ymm14,%ymm10,%ymm10
   7981 	vpaddd	%ymm13,%ymm9,%ymm9
   7982 	vpaddd	%ymm12,%ymm8,%ymm8
   7983 	vpxor	%ymm11,%ymm7,%ymm7
   7984 	addq	0(%rdi),%r10
   7985 	adcq	8+0(%rdi),%r11
   7986 	adcq	$1,%r12
   7987 	vpxor	%ymm10,%ymm6,%ymm6
   7988 	vpxor	%ymm9,%ymm5,%ymm5
   7989 	vpxor	%ymm8,%ymm4,%ymm4
   7990 	vmovdqa	%ymm8,128(%rbp)
   7991 	vpsrld	$20,%ymm7,%ymm8
   7992 	vpslld	$32-20,%ymm7,%ymm7
   7993 	vpxor	%ymm8,%ymm7,%ymm7
   7994 	vpsrld	$20,%ymm6,%ymm8
   7995 	vpslld	$32-20,%ymm6,%ymm6
   7996 	vpxor	%ymm8,%ymm6,%ymm6
   7997 	vpsrld	$20,%ymm5,%ymm8
   7998 	vpslld	$32-20,%ymm5,%ymm5
   7999 	vpxor	%ymm8,%ymm5,%ymm5
   8000 	vpsrld	$20,%ymm4,%ymm8
   8001 	vpslld	$32-20,%ymm4,%ymm4
   8002 	vpxor	%ymm8,%ymm4,%ymm4
   8003 	vmovdqa	.rol8(%rip),%ymm8
   8004 	vpaddd	%ymm7,%ymm3,%ymm3
   8005 	vpaddd	%ymm6,%ymm2,%ymm2
   8006 	vpaddd	%ymm5,%ymm1,%ymm1
   8007 	movq	0+0(%rbp),%rdx
   8008 	movq	%rdx,%r15
   8009 	mulxq	%r10,%r13,%r14
   8010 	mulxq	%r11,%rax,%rdx
   8011 	imulq	%r12,%r15
   8012 	addq	%rax,%r14
   8013 	adcq	%rdx,%r15
   8014 	vpaddd	%ymm4,%ymm0,%ymm0
   8015 	vpxor	%ymm3,%ymm15,%ymm15
   8016 	vpxor	%ymm2,%ymm14,%ymm14
   8017 	vpxor	%ymm1,%ymm13,%ymm13
   8018 	vpxor	%ymm0,%ymm12,%ymm12
   8019 	vpshufb	%ymm8,%ymm15,%ymm15
   8020 	vpshufb	%ymm8,%ymm14,%ymm14
   8021 	vpshufb	%ymm8,%ymm13,%ymm13
   8022 	vpshufb	%ymm8,%ymm12,%ymm12
   8023 	vmovdqa	128(%rbp),%ymm8
   8024 	vpaddd	%ymm15,%ymm11,%ymm11
   8025 	vpaddd	%ymm14,%ymm10,%ymm10
   8026 	vpaddd	%ymm13,%ymm9,%ymm9
   8027 	vpaddd	%ymm12,%ymm8,%ymm8
   8028 	vpxor	%ymm11,%ymm7,%ymm7
   8029 	vpxor	%ymm10,%ymm6,%ymm6
   8030 	vpxor	%ymm9,%ymm5,%ymm5
   8031 	vpxor	%ymm8,%ymm4,%ymm4
   8032 	vmovdqa	%ymm8,128(%rbp)
   8033 	vpsrld	$25,%ymm7,%ymm8
   8034 	movq	8+0(%rbp),%rdx
   8035 	mulxq	%r10,%r10,%rax
   8036 	addq	%r10,%r14
   8037 	mulxq	%r11,%r11,%r9
   8038 	adcq	%r11,%r15
   8039 	adcq	$0,%r9
   8040 	imulq	%r12,%rdx
   8041 	vpslld	$32-25,%ymm7,%ymm7
   8042 	vpxor	%ymm8,%ymm7,%ymm7
   8043 	vpsrld	$25,%ymm6,%ymm8
   8044 	vpslld	$32-25,%ymm6,%ymm6
   8045 	vpxor	%ymm8,%ymm6,%ymm6
   8046 	vpsrld	$25,%ymm5,%ymm8
   8047 	vpslld	$32-25,%ymm5,%ymm5
   8048 	vpxor	%ymm8,%ymm5,%ymm5
   8049 	vpsrld	$25,%ymm4,%ymm8
   8050 	vpslld	$32-25,%ymm4,%ymm4
   8051 	vpxor	%ymm8,%ymm4,%ymm4
   8052 	vmovdqa	128(%rbp),%ymm8
   8053 	vpalignr	$4,%ymm7,%ymm7,%ymm7
   8054 	vpalignr	$8,%ymm11,%ymm11,%ymm11
   8055 	vpalignr	$12,%ymm15,%ymm15,%ymm15
   8056 	vpalignr	$4,%ymm6,%ymm6,%ymm6
   8057 	vpalignr	$8,%ymm10,%ymm10,%ymm10
   8058 	vpalignr	$12,%ymm14,%ymm14,%ymm14
   8059 	vpalignr	$4,%ymm5,%ymm5,%ymm5
   8060 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   8061 	addq	%rax,%r15
   8062 	adcq	%rdx,%r9
   8063 	vpalignr	$12,%ymm13,%ymm13,%ymm13
   8064 	vpalignr	$4,%ymm4,%ymm4,%ymm4
   8065 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   8066 	vpalignr	$12,%ymm12,%ymm12,%ymm12
   8067 	vmovdqa	%ymm8,128(%rbp)
   8068 	vmovdqa	.rol16(%rip),%ymm8
   8069 	vpaddd	%ymm7,%ymm3,%ymm3
   8070 	vpaddd	%ymm6,%ymm2,%ymm2
   8071 	vpaddd	%ymm5,%ymm1,%ymm1
   8072 	vpaddd	%ymm4,%ymm0,%ymm0
   8073 	vpxor	%ymm3,%ymm15,%ymm15
   8074 	vpxor	%ymm2,%ymm14,%ymm14
   8075 	vpxor	%ymm1,%ymm13,%ymm13
   8076 	vpxor	%ymm0,%ymm12,%ymm12
   8077 	vpshufb	%ymm8,%ymm15,%ymm15
   8078 	vpshufb	%ymm8,%ymm14,%ymm14
   8079 	vpshufb	%ymm8,%ymm13,%ymm13
   8080 	vpshufb	%ymm8,%ymm12,%ymm12
   8081 	vmovdqa	128(%rbp),%ymm8
   8082 	vpaddd	%ymm15,%ymm11,%ymm11
   8083 	movq	%r13,%r10
   8084 	movq	%r14,%r11
   8085 	movq	%r15,%r12
   8086 	andq	$3,%r12
   8087 	movq	%r15,%r13
   8088 	andq	$-4,%r13
   8089 	movq	%r9,%r14
   8090 	shrdq	$2,%r9,%r15
   8091 	shrq	$2,%r9
   8092 	addq	%r13,%r10
   8093 	adcq	%r14,%r11
   8094 	adcq	$0,%r12
   8095 	addq	%r15,%r10
   8096 	adcq	%r9,%r11
   8097 	adcq	$0,%r12
   8098 	vpaddd	%ymm14,%ymm10,%ymm10
   8099 	vpaddd	%ymm13,%ymm9,%ymm9
   8100 	vpaddd	%ymm12,%ymm8,%ymm8
   8101 	vpxor	%ymm11,%ymm7,%ymm7
   8102 	vpxor	%ymm10,%ymm6,%ymm6
   8103 	vpxor	%ymm9,%ymm5,%ymm5
   8104 	vpxor	%ymm8,%ymm4,%ymm4
   8105 	vmovdqa	%ymm8,128(%rbp)
   8106 	vpsrld	$20,%ymm7,%ymm8
   8107 	vpslld	$32-20,%ymm7,%ymm7
   8108 	vpxor	%ymm8,%ymm7,%ymm7
   8109 	vpsrld	$20,%ymm6,%ymm8
   8110 	vpslld	$32-20,%ymm6,%ymm6
   8111 	vpxor	%ymm8,%ymm6,%ymm6
   8112 	vpsrld	$20,%ymm5,%ymm8
   8113 	vpslld	$32-20,%ymm5,%ymm5
   8114 	vpxor	%ymm8,%ymm5,%ymm5
   8115 	vpsrld	$20,%ymm4,%ymm8
   8116 	vpslld	$32-20,%ymm4,%ymm4
   8117 	vpxor	%ymm8,%ymm4,%ymm4
   8118 	addq	16(%rdi),%r10
   8119 	adcq	8+16(%rdi),%r11
   8120 	adcq	$1,%r12
   8121 	vmovdqa	.rol8(%rip),%ymm8
   8122 	vpaddd	%ymm7,%ymm3,%ymm3
   8123 	vpaddd	%ymm6,%ymm2,%ymm2
   8124 	vpaddd	%ymm5,%ymm1,%ymm1
   8125 	vpaddd	%ymm4,%ymm0,%ymm0
   8126 	vpxor	%ymm3,%ymm15,%ymm15
   8127 	vpxor	%ymm2,%ymm14,%ymm14
   8128 	vpxor	%ymm1,%ymm13,%ymm13
   8129 	vpxor	%ymm0,%ymm12,%ymm12
   8130 	vpshufb	%ymm8,%ymm15,%ymm15
   8131 	vpshufb	%ymm8,%ymm14,%ymm14
   8132 	vpshufb	%ymm8,%ymm13,%ymm13
   8133 	vpshufb	%ymm8,%ymm12,%ymm12
   8134 	vmovdqa	128(%rbp),%ymm8
   8135 	vpaddd	%ymm15,%ymm11,%ymm11
   8136 	vpaddd	%ymm14,%ymm10,%ymm10
   8137 	vpaddd	%ymm13,%ymm9,%ymm9
   8138 	vpaddd	%ymm12,%ymm8,%ymm8
   8139 	vpxor	%ymm11,%ymm7,%ymm7
   8140 	vpxor	%ymm10,%ymm6,%ymm6
   8141 	movq	0+0(%rbp),%rdx
   8142 	movq	%rdx,%r15
   8143 	mulxq	%r10,%r13,%r14
   8144 	mulxq	%r11,%rax,%rdx
   8145 	imulq	%r12,%r15
   8146 	addq	%rax,%r14
   8147 	adcq	%rdx,%r15
   8148 	vpxor	%ymm9,%ymm5,%ymm5
   8149 	vpxor	%ymm8,%ymm4,%ymm4
   8150 	vmovdqa	%ymm8,128(%rbp)
   8151 	vpsrld	$25,%ymm7,%ymm8
   8152 	vpslld	$32-25,%ymm7,%ymm7
   8153 	vpxor	%ymm8,%ymm7,%ymm7
   8154 	vpsrld	$25,%ymm6,%ymm8
   8155 	vpslld	$32-25,%ymm6,%ymm6
   8156 	vpxor	%ymm8,%ymm6,%ymm6
   8157 	vpsrld	$25,%ymm5,%ymm8
   8158 	vpslld	$32-25,%ymm5,%ymm5
   8159 	vpxor	%ymm8,%ymm5,%ymm5
   8160 	vpsrld	$25,%ymm4,%ymm8
   8161 	vpslld	$32-25,%ymm4,%ymm4
   8162 	vpxor	%ymm8,%ymm4,%ymm4
   8163 	vmovdqa	128(%rbp),%ymm8
   8164 	vpalignr	$12,%ymm7,%ymm7,%ymm7
   8165 	vpalignr	$8,%ymm11,%ymm11,%ymm11
   8166 	vpalignr	$4,%ymm15,%ymm15,%ymm15
   8167 	vpalignr	$12,%ymm6,%ymm6,%ymm6
   8168 	movq	8+0(%rbp),%rdx
   8169 	mulxq	%r10,%r10,%rax
   8170 	addq	%r10,%r14
   8171 	mulxq	%r11,%r11,%r9
   8172 	adcq	%r11,%r15
   8173 	adcq	$0,%r9
   8174 	imulq	%r12,%rdx
   8175 	vpalignr	$8,%ymm10,%ymm10,%ymm10
   8176 	vpalignr	$4,%ymm14,%ymm14,%ymm14
   8177 	vpalignr	$12,%ymm5,%ymm5,%ymm5
   8178 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   8179 	vpalignr	$4,%ymm13,%ymm13,%ymm13
   8180 	vpalignr	$12,%ymm4,%ymm4,%ymm4
   8181 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   8182 	vpalignr	$4,%ymm12,%ymm12,%ymm12
   8183 
   8184 
   8185 
   8186 
   8187 
   8188 
   8189 
   8190 
   8191 
   8192 
   8193 
   8194 
   8195 	addq	%rax,%r15
   8196 	adcq	%rdx,%r9
   8197 
   8198 
   8199 
   8200 
   8201 
   8202 
   8203 
   8204 
   8205 
   8206 
   8207 
   8208 
   8209 
   8210 
   8211 
   8212 
   8213 
   8214 
   8215 
   8216 
   8217 	movq	%r13,%r10
   8218 	movq	%r14,%r11
   8219 	movq	%r15,%r12
   8220 	andq	$3,%r12
   8221 	movq	%r15,%r13
   8222 	andq	$-4,%r13
   8223 	movq	%r9,%r14
   8224 	shrdq	$2,%r9,%r15
   8225 	shrq	$2,%r9
   8226 	addq	%r13,%r10
   8227 	adcq	%r14,%r11
   8228 	adcq	$0,%r12
   8229 	addq	%r15,%r10
   8230 	adcq	%r9,%r11
   8231 	adcq	$0,%r12
   8232 
   8233 	leaq	32(%rdi),%rdi
   8234 	decq	%rcx
   8235 	jg	1b
   8236 	decq	%r8
   8237 	jge	2b
   8238 	vpaddd	.chacha20_consts(%rip),%ymm3,%ymm3
   8239 	vpaddd	64(%rbp),%ymm7,%ymm7
   8240 	vpaddd	96(%rbp),%ymm11,%ymm11
   8241 	vpaddd	256(%rbp),%ymm15,%ymm15
   8242 	vpaddd	.chacha20_consts(%rip),%ymm2,%ymm2
   8243 	vpaddd	64(%rbp),%ymm6,%ymm6
   8244 	vpaddd	96(%rbp),%ymm10,%ymm10
   8245 	vpaddd	224(%rbp),%ymm14,%ymm14
   8246 	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
   8247 	vpaddd	64(%rbp),%ymm5,%ymm5
   8248 	vpaddd	96(%rbp),%ymm9,%ymm9
   8249 	vpaddd	192(%rbp),%ymm13,%ymm13
   8250 	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
   8251 	vpaddd	64(%rbp),%ymm4,%ymm4
   8252 	vpaddd	96(%rbp),%ymm8,%ymm8
   8253 	vpaddd	160(%rbp),%ymm12,%ymm12
   8254 
   8255 	vmovdqa	%ymm0,128(%rbp)
   8256 	vperm2i128	$0x02,%ymm3,%ymm7,%ymm0
   8257 	vperm2i128	$0x13,%ymm3,%ymm7,%ymm7
   8258 	vperm2i128	$0x02,%ymm11,%ymm15,%ymm3
   8259 	vperm2i128	$0x13,%ymm11,%ymm15,%ymm11
   8260 	vpxor	0+0(%rsi),%ymm0,%ymm0
   8261 	vpxor	32+0(%rsi),%ymm3,%ymm3
   8262 	vpxor	64+0(%rsi),%ymm7,%ymm7
   8263 	vpxor	96+0(%rsi),%ymm11,%ymm11
   8264 	vmovdqu	%ymm0,0+0(%rdi)
   8265 	vmovdqu	%ymm3,32+0(%rdi)
   8266 	vmovdqu	%ymm7,64+0(%rdi)
   8267 	vmovdqu	%ymm11,96+0(%rdi)
   8268 
   8269 	vmovdqa	128(%rbp),%ymm0
   8270 	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
   8271 	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
   8272 	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
   8273 	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
   8274 	vpxor	0+128(%rsi),%ymm3,%ymm3
   8275 	vpxor	32+128(%rsi),%ymm2,%ymm2
   8276 	vpxor	64+128(%rsi),%ymm6,%ymm6
   8277 	vpxor	96+128(%rsi),%ymm10,%ymm10
   8278 	vmovdqu	%ymm3,0+128(%rdi)
   8279 	vmovdqu	%ymm2,32+128(%rdi)
   8280 	vmovdqu	%ymm6,64+128(%rdi)
   8281 	vmovdqu	%ymm10,96+128(%rdi)
   8282 	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
   8283 	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
   8284 	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
   8285 	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
   8286 	vpxor	0+256(%rsi),%ymm3,%ymm3
   8287 	vpxor	32+256(%rsi),%ymm1,%ymm1
   8288 	vpxor	64+256(%rsi),%ymm5,%ymm5
   8289 	vpxor	96+256(%rsi),%ymm9,%ymm9
   8290 	vmovdqu	%ymm3,0+256(%rdi)
   8291 	vmovdqu	%ymm1,32+256(%rdi)
   8292 	vmovdqu	%ymm5,64+256(%rdi)
   8293 	vmovdqu	%ymm9,96+256(%rdi)
   8294 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
   8295 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
   8296 	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
   8297 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
   8298 	vmovdqa	%ymm3,%ymm8
   8299 
   8300 	movq	$384,%rcx
   8301 	leaq	384(%rsi),%rsi
   8302 	subq	$384,%rbx
   8303 	jmp	seal_avx2_hash
   8304 
   8305 seal_avx2_320:
   8306 	vmovdqa	%ymm0,%ymm1
   8307 	vmovdqa	%ymm0,%ymm2
   8308 	vmovdqa	%ymm4,%ymm5
   8309 	vmovdqa	%ymm4,%ymm6
   8310 	vmovdqa	%ymm8,%ymm9
   8311 	vmovdqa	%ymm8,%ymm10
   8312 	vpaddd	.avx2_inc(%rip),%ymm12,%ymm13
   8313 	vpaddd	.avx2_inc(%rip),%ymm13,%ymm14
   8314 	vmovdqa	%ymm4,%ymm7
   8315 	vmovdqa	%ymm8,%ymm11
   8316 	vmovdqa	%ymm12,160(%rbp)
   8317 	vmovdqa	%ymm13,192(%rbp)
   8318 	vmovdqa	%ymm14,224(%rbp)
   8319 	movq	$10,%r10
   8320 1:
   8321 	vpaddd	%ymm4,%ymm0,%ymm0
   8322 	vpxor	%ymm0,%ymm12,%ymm12
   8323 	vpshufb	.rol16(%rip),%ymm12,%ymm12
   8324 	vpaddd	%ymm12,%ymm8,%ymm8
   8325 	vpxor	%ymm8,%ymm4,%ymm4
   8326 	vpsrld	$20,%ymm4,%ymm3
   8327 	vpslld	$12,%ymm4,%ymm4
   8328 	vpxor	%ymm3,%ymm4,%ymm4
   8329 	vpaddd	%ymm4,%ymm0,%ymm0
   8330 	vpxor	%ymm0,%ymm12,%ymm12
   8331 	vpshufb	.rol8(%rip),%ymm12,%ymm12
   8332 	vpaddd	%ymm12,%ymm8,%ymm8
   8333 	vpxor	%ymm8,%ymm4,%ymm4
   8334 	vpslld	$7,%ymm4,%ymm3
   8335 	vpsrld	$25,%ymm4,%ymm4
   8336 	vpxor	%ymm3,%ymm4,%ymm4
   8337 	vpalignr	$12,%ymm12,%ymm12,%ymm12
   8338 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   8339 	vpalignr	$4,%ymm4,%ymm4,%ymm4
   8340 	vpaddd	%ymm5,%ymm1,%ymm1
   8341 	vpxor	%ymm1,%ymm13,%ymm13
   8342 	vpshufb	.rol16(%rip),%ymm13,%ymm13
   8343 	vpaddd	%ymm13,%ymm9,%ymm9
   8344 	vpxor	%ymm9,%ymm5,%ymm5
   8345 	vpsrld	$20,%ymm5,%ymm3
   8346 	vpslld	$12,%ymm5,%ymm5
   8347 	vpxor	%ymm3,%ymm5,%ymm5
   8348 	vpaddd	%ymm5,%ymm1,%ymm1
   8349 	vpxor	%ymm1,%ymm13,%ymm13
   8350 	vpshufb	.rol8(%rip),%ymm13,%ymm13
   8351 	vpaddd	%ymm13,%ymm9,%ymm9
   8352 	vpxor	%ymm9,%ymm5,%ymm5
   8353 	vpslld	$7,%ymm5,%ymm3
   8354 	vpsrld	$25,%ymm5,%ymm5
   8355 	vpxor	%ymm3,%ymm5,%ymm5
   8356 	vpalignr	$12,%ymm13,%ymm13,%ymm13
   8357 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   8358 	vpalignr	$4,%ymm5,%ymm5,%ymm5
   8359 	vpaddd	%ymm6,%ymm2,%ymm2
   8360 	vpxor	%ymm2,%ymm14,%ymm14
   8361 	vpshufb	.rol16(%rip),%ymm14,%ymm14
   8362 	vpaddd	%ymm14,%ymm10,%ymm10
   8363 	vpxor	%ymm10,%ymm6,%ymm6
   8364 	vpsrld	$20,%ymm6,%ymm3
   8365 	vpslld	$12,%ymm6,%ymm6
   8366 	vpxor	%ymm3,%ymm6,%ymm6
   8367 	vpaddd	%ymm6,%ymm2,%ymm2
   8368 	vpxor	%ymm2,%ymm14,%ymm14
   8369 	vpshufb	.rol8(%rip),%ymm14,%ymm14
   8370 	vpaddd	%ymm14,%ymm10,%ymm10
   8371 	vpxor	%ymm10,%ymm6,%ymm6
   8372 	vpslld	$7,%ymm6,%ymm3
   8373 	vpsrld	$25,%ymm6,%ymm6
   8374 	vpxor	%ymm3,%ymm6,%ymm6
   8375 	vpalignr	$12,%ymm14,%ymm14,%ymm14
   8376 	vpalignr	$8,%ymm10,%ymm10,%ymm10
   8377 	vpalignr	$4,%ymm6,%ymm6,%ymm6
   8378 	vpaddd	%ymm4,%ymm0,%ymm0
   8379 	vpxor	%ymm0,%ymm12,%ymm12
   8380 	vpshufb	.rol16(%rip),%ymm12,%ymm12
   8381 	vpaddd	%ymm12,%ymm8,%ymm8
   8382 	vpxor	%ymm8,%ymm4,%ymm4
   8383 	vpsrld	$20,%ymm4,%ymm3
   8384 	vpslld	$12,%ymm4,%ymm4
   8385 	vpxor	%ymm3,%ymm4,%ymm4
   8386 	vpaddd	%ymm4,%ymm0,%ymm0
   8387 	vpxor	%ymm0,%ymm12,%ymm12
   8388 	vpshufb	.rol8(%rip),%ymm12,%ymm12
   8389 	vpaddd	%ymm12,%ymm8,%ymm8
   8390 	vpxor	%ymm8,%ymm4,%ymm4
   8391 	vpslld	$7,%ymm4,%ymm3
   8392 	vpsrld	$25,%ymm4,%ymm4
   8393 	vpxor	%ymm3,%ymm4,%ymm4
   8394 	vpalignr	$4,%ymm12,%ymm12,%ymm12
   8395 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   8396 	vpalignr	$12,%ymm4,%ymm4,%ymm4
   8397 	vpaddd	%ymm5,%ymm1,%ymm1
   8398 	vpxor	%ymm1,%ymm13,%ymm13
   8399 	vpshufb	.rol16(%rip),%ymm13,%ymm13
   8400 	vpaddd	%ymm13,%ymm9,%ymm9
   8401 	vpxor	%ymm9,%ymm5,%ymm5
   8402 	vpsrld	$20,%ymm5,%ymm3
   8403 	vpslld	$12,%ymm5,%ymm5
   8404 	vpxor	%ymm3,%ymm5,%ymm5
   8405 	vpaddd	%ymm5,%ymm1,%ymm1
   8406 	vpxor	%ymm1,%ymm13,%ymm13
   8407 	vpshufb	.rol8(%rip),%ymm13,%ymm13
   8408 	vpaddd	%ymm13,%ymm9,%ymm9
   8409 	vpxor	%ymm9,%ymm5,%ymm5
   8410 	vpslld	$7,%ymm5,%ymm3
   8411 	vpsrld	$25,%ymm5,%ymm5
   8412 	vpxor	%ymm3,%ymm5,%ymm5
   8413 	vpalignr	$4,%ymm13,%ymm13,%ymm13
   8414 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   8415 	vpalignr	$12,%ymm5,%ymm5,%ymm5
   8416 	vpaddd	%ymm6,%ymm2,%ymm2
   8417 	vpxor	%ymm2,%ymm14,%ymm14
   8418 	vpshufb	.rol16(%rip),%ymm14,%ymm14
   8419 	vpaddd	%ymm14,%ymm10,%ymm10
   8420 	vpxor	%ymm10,%ymm6,%ymm6
   8421 	vpsrld	$20,%ymm6,%ymm3
   8422 	vpslld	$12,%ymm6,%ymm6
   8423 	vpxor	%ymm3,%ymm6,%ymm6
   8424 	vpaddd	%ymm6,%ymm2,%ymm2
   8425 	vpxor	%ymm2,%ymm14,%ymm14
   8426 	vpshufb	.rol8(%rip),%ymm14,%ymm14
   8427 	vpaddd	%ymm14,%ymm10,%ymm10
   8428 	vpxor	%ymm10,%ymm6,%ymm6
   8429 	vpslld	$7,%ymm6,%ymm3
   8430 	vpsrld	$25,%ymm6,%ymm6
   8431 	vpxor	%ymm3,%ymm6,%ymm6
   8432 	vpalignr	$4,%ymm14,%ymm14,%ymm14
   8433 	vpalignr	$8,%ymm10,%ymm10,%ymm10
   8434 	vpalignr	$12,%ymm6,%ymm6,%ymm6
   8435 
   8436 	decq	%r10
   8437 	jne	1b
   8438 	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
   8439 	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
   8440 	vpaddd	.chacha20_consts(%rip),%ymm2,%ymm2
   8441 	vpaddd	%ymm7,%ymm4,%ymm4
   8442 	vpaddd	%ymm7,%ymm5,%ymm5
   8443 	vpaddd	%ymm7,%ymm6,%ymm6
   8444 	vpaddd	%ymm11,%ymm8,%ymm8
   8445 	vpaddd	%ymm11,%ymm9,%ymm9
   8446 	vpaddd	%ymm11,%ymm10,%ymm10
   8447 	vpaddd	160(%rbp),%ymm12,%ymm12
   8448 	vpaddd	192(%rbp),%ymm13,%ymm13
   8449 	vpaddd	224(%rbp),%ymm14,%ymm14
   8450 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
   8451 
   8452 	vpand	.clamp(%rip),%ymm3,%ymm3
   8453 	vmovdqa	%ymm3,0(%rbp)
   8454 
   8455 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0
   8456 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4
   8457 	vperm2i128	$0x02,%ymm1,%ymm5,%ymm8
   8458 	vperm2i128	$0x02,%ymm9,%ymm13,%ymm12
   8459 	vperm2i128	$0x13,%ymm1,%ymm5,%ymm1
   8460 	vperm2i128	$0x13,%ymm9,%ymm13,%ymm5
   8461 	vperm2i128	$0x02,%ymm2,%ymm6,%ymm9
   8462 	vperm2i128	$0x02,%ymm10,%ymm14,%ymm13
   8463 	vperm2i128	$0x13,%ymm2,%ymm6,%ymm2
   8464 	vperm2i128	$0x13,%ymm10,%ymm14,%ymm6
   8465 	jmp	seal_avx2_short
   8466 
   8467 seal_avx2_192:
   8468 	vmovdqa	%ymm0,%ymm1
   8469 	vmovdqa	%ymm0,%ymm2
   8470 	vmovdqa	%ymm4,%ymm5
   8471 	vmovdqa	%ymm4,%ymm6
   8472 	vmovdqa	%ymm8,%ymm9
   8473 	vmovdqa	%ymm8,%ymm10
   8474 	vpaddd	.avx2_inc(%rip),%ymm12,%ymm13
   8475 	vmovdqa	%ymm12,%ymm11
   8476 	vmovdqa	%ymm13,%ymm15
   8477 	movq	$10,%r10
   8478 1:
   8479 	vpaddd	%ymm4,%ymm0,%ymm0
   8480 	vpxor	%ymm0,%ymm12,%ymm12
   8481 	vpshufb	.rol16(%rip),%ymm12,%ymm12
   8482 	vpaddd	%ymm12,%ymm8,%ymm8
   8483 	vpxor	%ymm8,%ymm4,%ymm4
   8484 	vpsrld	$20,%ymm4,%ymm3
   8485 	vpslld	$12,%ymm4,%ymm4
   8486 	vpxor	%ymm3,%ymm4,%ymm4
   8487 	vpaddd	%ymm4,%ymm0,%ymm0
   8488 	vpxor	%ymm0,%ymm12,%ymm12
   8489 	vpshufb	.rol8(%rip),%ymm12,%ymm12
   8490 	vpaddd	%ymm12,%ymm8,%ymm8
   8491 	vpxor	%ymm8,%ymm4,%ymm4
   8492 	vpslld	$7,%ymm4,%ymm3
   8493 	vpsrld	$25,%ymm4,%ymm4
   8494 	vpxor	%ymm3,%ymm4,%ymm4
   8495 	vpalignr	$12,%ymm12,%ymm12,%ymm12
   8496 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   8497 	vpalignr	$4,%ymm4,%ymm4,%ymm4
   8498 	vpaddd	%ymm5,%ymm1,%ymm1
   8499 	vpxor	%ymm1,%ymm13,%ymm13
   8500 	vpshufb	.rol16(%rip),%ymm13,%ymm13
   8501 	vpaddd	%ymm13,%ymm9,%ymm9
   8502 	vpxor	%ymm9,%ymm5,%ymm5
   8503 	vpsrld	$20,%ymm5,%ymm3
   8504 	vpslld	$12,%ymm5,%ymm5
   8505 	vpxor	%ymm3,%ymm5,%ymm5
   8506 	vpaddd	%ymm5,%ymm1,%ymm1
   8507 	vpxor	%ymm1,%ymm13,%ymm13
   8508 	vpshufb	.rol8(%rip),%ymm13,%ymm13
   8509 	vpaddd	%ymm13,%ymm9,%ymm9
   8510 	vpxor	%ymm9,%ymm5,%ymm5
   8511 	vpslld	$7,%ymm5,%ymm3
   8512 	vpsrld	$25,%ymm5,%ymm5
   8513 	vpxor	%ymm3,%ymm5,%ymm5
   8514 	vpalignr	$12,%ymm13,%ymm13,%ymm13
   8515 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   8516 	vpalignr	$4,%ymm5,%ymm5,%ymm5
   8517 	vpaddd	%ymm4,%ymm0,%ymm0
   8518 	vpxor	%ymm0,%ymm12,%ymm12
   8519 	vpshufb	.rol16(%rip),%ymm12,%ymm12
   8520 	vpaddd	%ymm12,%ymm8,%ymm8
   8521 	vpxor	%ymm8,%ymm4,%ymm4
   8522 	vpsrld	$20,%ymm4,%ymm3
   8523 	vpslld	$12,%ymm4,%ymm4
   8524 	vpxor	%ymm3,%ymm4,%ymm4
   8525 	vpaddd	%ymm4,%ymm0,%ymm0
   8526 	vpxor	%ymm0,%ymm12,%ymm12
   8527 	vpshufb	.rol8(%rip),%ymm12,%ymm12
   8528 	vpaddd	%ymm12,%ymm8,%ymm8
   8529 	vpxor	%ymm8,%ymm4,%ymm4
   8530 	vpslld	$7,%ymm4,%ymm3
   8531 	vpsrld	$25,%ymm4,%ymm4
   8532 	vpxor	%ymm3,%ymm4,%ymm4
   8533 	vpalignr	$4,%ymm12,%ymm12,%ymm12
   8534 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   8535 	vpalignr	$12,%ymm4,%ymm4,%ymm4
   8536 	vpaddd	%ymm5,%ymm1,%ymm1
   8537 	vpxor	%ymm1,%ymm13,%ymm13
   8538 	vpshufb	.rol16(%rip),%ymm13,%ymm13
   8539 	vpaddd	%ymm13,%ymm9,%ymm9
   8540 	vpxor	%ymm9,%ymm5,%ymm5
   8541 	vpsrld	$20,%ymm5,%ymm3
   8542 	vpslld	$12,%ymm5,%ymm5
   8543 	vpxor	%ymm3,%ymm5,%ymm5
   8544 	vpaddd	%ymm5,%ymm1,%ymm1
   8545 	vpxor	%ymm1,%ymm13,%ymm13
   8546 	vpshufb	.rol8(%rip),%ymm13,%ymm13
   8547 	vpaddd	%ymm13,%ymm9,%ymm9
   8548 	vpxor	%ymm9,%ymm5,%ymm5
   8549 	vpslld	$7,%ymm5,%ymm3
   8550 	vpsrld	$25,%ymm5,%ymm5
   8551 	vpxor	%ymm3,%ymm5,%ymm5
   8552 	vpalignr	$4,%ymm13,%ymm13,%ymm13
   8553 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   8554 	vpalignr	$12,%ymm5,%ymm5,%ymm5
   8555 
   8556 	decq	%r10
   8557 	jne	1b
   8558 	vpaddd	%ymm2,%ymm0,%ymm0
   8559 	vpaddd	%ymm2,%ymm1,%ymm1
   8560 	vpaddd	%ymm6,%ymm4,%ymm4
   8561 	vpaddd	%ymm6,%ymm5,%ymm5
   8562 	vpaddd	%ymm10,%ymm8,%ymm8
   8563 	vpaddd	%ymm10,%ymm9,%ymm9
   8564 	vpaddd	%ymm11,%ymm12,%ymm12
   8565 	vpaddd	%ymm15,%ymm13,%ymm13
   8566 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
   8567 
   8568 	vpand	.clamp(%rip),%ymm3,%ymm3
   8569 	vmovdqa	%ymm3,0(%rbp)
   8570 
   8571 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0
   8572 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4
   8573 	vperm2i128	$0x02,%ymm1,%ymm5,%ymm8
   8574 	vperm2i128	$0x02,%ymm9,%ymm13,%ymm12
   8575 	vperm2i128	$0x13,%ymm1,%ymm5,%ymm1
   8576 	vperm2i128	$0x13,%ymm9,%ymm13,%ymm5
   8577 seal_avx2_short:
   8578 	movq	%r8,%r8
   8579 	call	poly_hash_ad_internal
   8580 	xorq	%rcx,%rcx
   8581 seal_avx2_hash:
   8582 	cmpq	$16,%rcx
   8583 	jb	seal_avx2_short_loop
   8584 	addq	0(%rdi),%r10
   8585 	adcq	8+0(%rdi),%r11
   8586 	adcq	$1,%r12
   8587 	movq	0+0(%rbp),%rax
   8588 	movq	%rax,%r15
   8589 	mulq	%r10
   8590 	movq	%rax,%r13
   8591 	movq	%rdx,%r14
   8592 	movq	0+0(%rbp),%rax
   8593 	mulq	%r11
   8594 	imulq	%r12,%r15
   8595 	addq	%rax,%r14
   8596 	adcq	%rdx,%r15
   8597 	movq	8+0(%rbp),%rax
   8598 	movq	%rax,%r9
   8599 	mulq	%r10
   8600 	addq	%rax,%r14
   8601 	adcq	$0,%rdx
   8602 	movq	%rdx,%r10
   8603 	movq	8+0(%rbp),%rax
   8604 	mulq	%r11
   8605 	addq	%rax,%r15
   8606 	adcq	$0,%rdx
   8607 	imulq	%r12,%r9
   8608 	addq	%r10,%r15
   8609 	adcq	%rdx,%r9
   8610 	movq	%r13,%r10
   8611 	movq	%r14,%r11
   8612 	movq	%r15,%r12
   8613 	andq	$3,%r12
   8614 	movq	%r15,%r13
   8615 	andq	$-4,%r13
   8616 	movq	%r9,%r14
   8617 	shrdq	$2,%r9,%r15
   8618 	shrq	$2,%r9
   8619 	addq	%r13,%r10
   8620 	adcq	%r14,%r11
   8621 	adcq	$0,%r12
   8622 	addq	%r15,%r10
   8623 	adcq	%r9,%r11
   8624 	adcq	$0,%r12
   8625 
   8626 	subq	$16,%rcx
   8627 	addq	$16,%rdi
   8628 	jmp	seal_avx2_hash
   8629 seal_avx2_short_loop:
   8630 	cmpq	$32,%rbx
   8631 	jb	seal_avx2_short_tail
   8632 	subq	$32,%rbx
   8633 
   8634 	vpxor	(%rsi),%ymm0,%ymm0
   8635 	vmovdqu	%ymm0,(%rdi)
   8636 	leaq	32(%rsi),%rsi
   8637 
   8638 	addq	0(%rdi),%r10
   8639 	adcq	8+0(%rdi),%r11
   8640 	adcq	$1,%r12
   8641 	movq	0+0(%rbp),%rax
   8642 	movq	%rax,%r15
   8643 	mulq	%r10
   8644 	movq	%rax,%r13
   8645 	movq	%rdx,%r14
   8646 	movq	0+0(%rbp),%rax
   8647 	mulq	%r11
   8648 	imulq	%r12,%r15
   8649 	addq	%rax,%r14
   8650 	adcq	%rdx,%r15
   8651 	movq	8+0(%rbp),%rax
   8652 	movq	%rax,%r9
   8653 	mulq	%r10
   8654 	addq	%rax,%r14
   8655 	adcq	$0,%rdx
   8656 	movq	%rdx,%r10
   8657 	movq	8+0(%rbp),%rax
   8658 	mulq	%r11
   8659 	addq	%rax,%r15
   8660 	adcq	$0,%rdx
   8661 	imulq	%r12,%r9
   8662 	addq	%r10,%r15
   8663 	adcq	%rdx,%r9
   8664 	movq	%r13,%r10
   8665 	movq	%r14,%r11
   8666 	movq	%r15,%r12
   8667 	andq	$3,%r12
   8668 	movq	%r15,%r13
   8669 	andq	$-4,%r13
   8670 	movq	%r9,%r14
   8671 	shrdq	$2,%r9,%r15
   8672 	shrq	$2,%r9
   8673 	addq	%r13,%r10
   8674 	adcq	%r14,%r11
   8675 	adcq	$0,%r12
   8676 	addq	%r15,%r10
   8677 	adcq	%r9,%r11
   8678 	adcq	$0,%r12
   8679 	addq	16(%rdi),%r10
   8680 	adcq	8+16(%rdi),%r11
   8681 	adcq	$1,%r12
   8682 	movq	0+0(%rbp),%rax
   8683 	movq	%rax,%r15
   8684 	mulq	%r10
   8685 	movq	%rax,%r13
   8686 	movq	%rdx,%r14
   8687 	movq	0+0(%rbp),%rax
   8688 	mulq	%r11
   8689 	imulq	%r12,%r15
   8690 	addq	%rax,%r14
   8691 	adcq	%rdx,%r15
   8692 	movq	8+0(%rbp),%rax
   8693 	movq	%rax,%r9
   8694 	mulq	%r10
   8695 	addq	%rax,%r14
   8696 	adcq	$0,%rdx
   8697 	movq	%rdx,%r10
   8698 	movq	8+0(%rbp),%rax
   8699 	mulq	%r11
   8700 	addq	%rax,%r15
   8701 	adcq	$0,%rdx
   8702 	imulq	%r12,%r9
   8703 	addq	%r10,%r15
   8704 	adcq	%rdx,%r9
   8705 	movq	%r13,%r10
   8706 	movq	%r14,%r11
   8707 	movq	%r15,%r12
   8708 	andq	$3,%r12
   8709 	movq	%r15,%r13
   8710 	andq	$-4,%r13
   8711 	movq	%r9,%r14
   8712 	shrdq	$2,%r9,%r15
   8713 	shrq	$2,%r9
   8714 	addq	%r13,%r10
   8715 	adcq	%r14,%r11
   8716 	adcq	$0,%r12
   8717 	addq	%r15,%r10
   8718 	adcq	%r9,%r11
   8719 	adcq	$0,%r12
   8720 
   8721 	leaq	32(%rdi),%rdi
   8722 
   8723 	vmovdqa	%ymm4,%ymm0
   8724 	vmovdqa	%ymm8,%ymm4
   8725 	vmovdqa	%ymm12,%ymm8
   8726 	vmovdqa	%ymm1,%ymm12
   8727 	vmovdqa	%ymm5,%ymm1
   8728 	vmovdqa	%ymm9,%ymm5
   8729 	vmovdqa	%ymm13,%ymm9
   8730 	vmovdqa	%ymm2,%ymm13
   8731 	vmovdqa	%ymm6,%ymm2
   8732 	jmp	seal_avx2_short_loop
   8733 seal_avx2_short_tail:
   8734 	cmpq	$16,%rbx
   8735 	jb	1f
   8736 	subq	$16,%rbx
   8737 	vpxor	(%rsi),%xmm0,%xmm3
   8738 	vmovdqu	%xmm3,(%rdi)
   8739 	leaq	16(%rsi),%rsi
   8740 	addq	0(%rdi),%r10
   8741 	adcq	8+0(%rdi),%r11
   8742 	adcq	$1,%r12
   8743 	movq	0+0(%rbp),%rax
   8744 	movq	%rax,%r15
   8745 	mulq	%r10
   8746 	movq	%rax,%r13
   8747 	movq	%rdx,%r14
   8748 	movq	0+0(%rbp),%rax
   8749 	mulq	%r11
   8750 	imulq	%r12,%r15
   8751 	addq	%rax,%r14
   8752 	adcq	%rdx,%r15
   8753 	movq	8+0(%rbp),%rax
   8754 	movq	%rax,%r9
   8755 	mulq	%r10
   8756 	addq	%rax,%r14
   8757 	adcq	$0,%rdx
   8758 	movq	%rdx,%r10
   8759 	movq	8+0(%rbp),%rax
   8760 	mulq	%r11
   8761 	addq	%rax,%r15
   8762 	adcq	$0,%rdx
   8763 	imulq	%r12,%r9
   8764 	addq	%r10,%r15
   8765 	adcq	%rdx,%r9
   8766 	movq	%r13,%r10
   8767 	movq	%r14,%r11
   8768 	movq	%r15,%r12
   8769 	andq	$3,%r12
   8770 	movq	%r15,%r13
   8771 	andq	$-4,%r13
   8772 	movq	%r9,%r14
   8773 	shrdq	$2,%r9,%r15
   8774 	shrq	$2,%r9
   8775 	addq	%r13,%r10
   8776 	adcq	%r14,%r11
   8777 	adcq	$0,%r12
   8778 	addq	%r15,%r10
   8779 	adcq	%r9,%r11
   8780 	adcq	$0,%r12
   8781 
   8782 	leaq	16(%rdi),%rdi
   8783 	vextracti128	$1,%ymm0,%xmm0
   8784 1:
   8785 	vzeroupper
   8786 	jmp	seal_sse_tail_16
   8787 .cfi_endproc
   8788 #endif
   8789