Home | History | Annotate | Download | only in cipher_extra
      1 #if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
      2 .text
      3 .extern	OPENSSL_ia32cap_P
      4 .hidden OPENSSL_ia32cap_P
      5 
      6 chacha20_poly1305_constants:
      7 
      8 .align	64
      9 .chacha20_consts:
     10 .byte	'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k'
     11 .byte	'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k'
     12 .rol8:
     13 .byte	3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14
     14 .byte	3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14
     15 .rol16:
     16 .byte	2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13
     17 .byte	2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13
     18 .avx2_init:
     19 .long	0,0,0,0
     20 .sse_inc:
     21 .long	1,0,0,0
     22 .avx2_inc:
     23 .long	2,0,0,0,2,0,0,0
     24 .clamp:
     25 .quad	0x0FFFFFFC0FFFFFFF, 0x0FFFFFFC0FFFFFFC
     26 .quad	0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF
     27 .align	16
     28 .and_masks:
     29 .byte	0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
     30 .byte	0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
     31 .byte	0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
     32 .byte	0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
     33 .byte	0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
     34 .byte	0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
     35 .byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
     36 .byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
     37 .byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00
     38 .byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00
     39 .byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00
     40 .byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00
     41 .byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00
     42 .byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00
     43 .byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00
     44 .byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
     45 
     46 .type	poly_hash_ad_internal,@function
     47 .align	64
     48 poly_hash_ad_internal:
     49 .cfi_startproc
     50 	xorq	%r10,%r10
     51 	xorq	%r11,%r11
     52 	xorq	%r12,%r12
     53 	cmpq	$13,%r8
     54 	jne	hash_ad_loop
     55 poly_fast_tls_ad:
     56 
     57 	movq	(%rcx),%r10
     58 	movq	5(%rcx),%r11
     59 	shrq	$24,%r11
     60 	movq	$1,%r12
     61 	movq	0+0(%rbp),%rax
     62 	movq	%rax,%r15
     63 	mulq	%r10
     64 	movq	%rax,%r13
     65 	movq	%rdx,%r14
     66 	movq	0+0(%rbp),%rax
     67 	mulq	%r11
     68 	imulq	%r12,%r15
     69 	addq	%rax,%r14
     70 	adcq	%rdx,%r15
     71 	movq	8+0(%rbp),%rax
     72 	movq	%rax,%r9
     73 	mulq	%r10
     74 	addq	%rax,%r14
     75 	adcq	$0,%rdx
     76 	movq	%rdx,%r10
     77 	movq	8+0(%rbp),%rax
     78 	mulq	%r11
     79 	addq	%rax,%r15
     80 	adcq	$0,%rdx
     81 	imulq	%r12,%r9
     82 	addq	%r10,%r15
     83 	adcq	%rdx,%r9
     84 	movq	%r13,%r10
     85 	movq	%r14,%r11
     86 	movq	%r15,%r12
     87 	andq	$3,%r12
     88 	movq	%r15,%r13
     89 	andq	$-4,%r13
     90 	movq	%r9,%r14
     91 	shrdq	$2,%r9,%r15
     92 	shrq	$2,%r9
     93 	addq	%r13,%r10
     94 	adcq	%r14,%r11
     95 	adcq	$0,%r12
     96 	addq	%r15,%r10
     97 	adcq	%r9,%r11
     98 	adcq	$0,%r12
     99 
    100 	.byte	0xf3,0xc3
    101 hash_ad_loop:
    102 
    103 	cmpq	$16,%r8
    104 	jb	hash_ad_tail
    105 	addq	0(%rcx),%r10
    106 	adcq	8+0(%rcx),%r11
    107 	adcq	$1,%r12
    108 	movq	0+0(%rbp),%rax
    109 	movq	%rax,%r15
    110 	mulq	%r10
    111 	movq	%rax,%r13
    112 	movq	%rdx,%r14
    113 	movq	0+0(%rbp),%rax
    114 	mulq	%r11
    115 	imulq	%r12,%r15
    116 	addq	%rax,%r14
    117 	adcq	%rdx,%r15
    118 	movq	8+0(%rbp),%rax
    119 	movq	%rax,%r9
    120 	mulq	%r10
    121 	addq	%rax,%r14
    122 	adcq	$0,%rdx
    123 	movq	%rdx,%r10
    124 	movq	8+0(%rbp),%rax
    125 	mulq	%r11
    126 	addq	%rax,%r15
    127 	adcq	$0,%rdx
    128 	imulq	%r12,%r9
    129 	addq	%r10,%r15
    130 	adcq	%rdx,%r9
    131 	movq	%r13,%r10
    132 	movq	%r14,%r11
    133 	movq	%r15,%r12
    134 	andq	$3,%r12
    135 	movq	%r15,%r13
    136 	andq	$-4,%r13
    137 	movq	%r9,%r14
    138 	shrdq	$2,%r9,%r15
    139 	shrq	$2,%r9
    140 	addq	%r13,%r10
    141 	adcq	%r14,%r11
    142 	adcq	$0,%r12
    143 	addq	%r15,%r10
    144 	adcq	%r9,%r11
    145 	adcq	$0,%r12
    146 
    147 	leaq	16(%rcx),%rcx
    148 	subq	$16,%r8
    149 	jmp	hash_ad_loop
    150 hash_ad_tail:
    151 	cmpq	$0,%r8
    152 	je	1f
    153 
    154 	xorq	%r13,%r13
    155 	xorq	%r14,%r14
    156 	xorq	%r15,%r15
    157 	addq	%r8,%rcx
    158 hash_ad_tail_loop:
    159 	shldq	$8,%r13,%r14
    160 	shlq	$8,%r13
    161 	movzbq	-1(%rcx),%r15
    162 	xorq	%r15,%r13
    163 	decq	%rcx
    164 	decq	%r8
    165 	jne	hash_ad_tail_loop
    166 
    167 	addq	%r13,%r10
    168 	adcq	%r14,%r11
    169 	adcq	$1,%r12
    170 	movq	0+0(%rbp),%rax
    171 	movq	%rax,%r15
    172 	mulq	%r10
    173 	movq	%rax,%r13
    174 	movq	%rdx,%r14
    175 	movq	0+0(%rbp),%rax
    176 	mulq	%r11
    177 	imulq	%r12,%r15
    178 	addq	%rax,%r14
    179 	adcq	%rdx,%r15
    180 	movq	8+0(%rbp),%rax
    181 	movq	%rax,%r9
    182 	mulq	%r10
    183 	addq	%rax,%r14
    184 	adcq	$0,%rdx
    185 	movq	%rdx,%r10
    186 	movq	8+0(%rbp),%rax
    187 	mulq	%r11
    188 	addq	%rax,%r15
    189 	adcq	$0,%rdx
    190 	imulq	%r12,%r9
    191 	addq	%r10,%r15
    192 	adcq	%rdx,%r9
    193 	movq	%r13,%r10
    194 	movq	%r14,%r11
    195 	movq	%r15,%r12
    196 	andq	$3,%r12
    197 	movq	%r15,%r13
    198 	andq	$-4,%r13
    199 	movq	%r9,%r14
    200 	shrdq	$2,%r9,%r15
    201 	shrq	$2,%r9
    202 	addq	%r13,%r10
    203 	adcq	%r14,%r11
    204 	adcq	$0,%r12
    205 	addq	%r15,%r10
    206 	adcq	%r9,%r11
    207 	adcq	$0,%r12
    208 
    209 
    210 1:
    211 	.byte	0xf3,0xc3
    212 .cfi_endproc
    213 .size	poly_hash_ad_internal, .-poly_hash_ad_internal
    214 
    215 .globl	chacha20_poly1305_open
    216 .hidden chacha20_poly1305_open
    217 .type	chacha20_poly1305_open,@function
    218 .align	64
    219 chacha20_poly1305_open:
    220 .cfi_startproc
    221 	pushq	%rbp
    222 .cfi_adjust_cfa_offset	8
    223 	pushq	%rbx
    224 .cfi_adjust_cfa_offset	8
    225 	pushq	%r12
    226 .cfi_adjust_cfa_offset	8
    227 	pushq	%r13
    228 .cfi_adjust_cfa_offset	8
    229 	pushq	%r14
    230 .cfi_adjust_cfa_offset	8
    231 	pushq	%r15
    232 .cfi_adjust_cfa_offset	8
    233 
    234 
    235 	pushq	%r9
    236 .cfi_adjust_cfa_offset	8
    237 	subq	$288 + 32,%rsp
    238 .cfi_adjust_cfa_offset	288 + 32
    239 .cfi_offset	rbp, -16
    240 .cfi_offset	rbx, -24
    241 .cfi_offset	r12, -32
    242 .cfi_offset	r13, -40
    243 .cfi_offset	r14, -48
    244 .cfi_offset	r15, -56
    245 	leaq	32(%rsp),%rbp
    246 	andq	$-32,%rbp
    247 	movq	%rdx,8+32(%rbp)
    248 	movq	%r8,0+32(%rbp)
    249 	movq	%rdx,%rbx
    250 
    251 	movl	OPENSSL_ia32cap_P+8(%rip),%eax
    252 	andl	$288,%eax
    253 	xorl	$288,%eax
    254 	jz	chacha20_poly1305_open_avx2
    255 
    256 1:
    257 	cmpq	$128,%rbx
    258 	jbe	open_sse_128
    259 
    260 	movdqa	.chacha20_consts(%rip),%xmm0
    261 	movdqu	0(%r9),%xmm4
    262 	movdqu	16(%r9),%xmm8
    263 	movdqu	32(%r9),%xmm12
    264 	movdqa	%xmm12,%xmm7
    265 
    266 	movdqa	%xmm4,48(%rbp)
    267 	movdqa	%xmm8,64(%rbp)
    268 	movdqa	%xmm12,96(%rbp)
    269 	movq	$10,%r10
    270 1:
    271 	paddd	%xmm4,%xmm0
    272 	pxor	%xmm0,%xmm12
    273 	pshufb	.rol16(%rip),%xmm12
    274 	paddd	%xmm12,%xmm8
    275 	pxor	%xmm8,%xmm4
    276 	movdqa	%xmm4,%xmm3
    277 	pslld	$12,%xmm3
    278 	psrld	$20,%xmm4
    279 	pxor	%xmm3,%xmm4
    280 	paddd	%xmm4,%xmm0
    281 	pxor	%xmm0,%xmm12
    282 	pshufb	.rol8(%rip),%xmm12
    283 	paddd	%xmm12,%xmm8
    284 	pxor	%xmm8,%xmm4
    285 	movdqa	%xmm4,%xmm3
    286 	pslld	$7,%xmm3
    287 	psrld	$25,%xmm4
    288 	pxor	%xmm3,%xmm4
    289 .byte	102,15,58,15,228,4
    290 .byte	102,69,15,58,15,192,8
    291 .byte	102,69,15,58,15,228,12
    292 	paddd	%xmm4,%xmm0
    293 	pxor	%xmm0,%xmm12
    294 	pshufb	.rol16(%rip),%xmm12
    295 	paddd	%xmm12,%xmm8
    296 	pxor	%xmm8,%xmm4
    297 	movdqa	%xmm4,%xmm3
    298 	pslld	$12,%xmm3
    299 	psrld	$20,%xmm4
    300 	pxor	%xmm3,%xmm4
    301 	paddd	%xmm4,%xmm0
    302 	pxor	%xmm0,%xmm12
    303 	pshufb	.rol8(%rip),%xmm12
    304 	paddd	%xmm12,%xmm8
    305 	pxor	%xmm8,%xmm4
    306 	movdqa	%xmm4,%xmm3
    307 	pslld	$7,%xmm3
    308 	psrld	$25,%xmm4
    309 	pxor	%xmm3,%xmm4
    310 .byte	102,15,58,15,228,12
    311 .byte	102,69,15,58,15,192,8
    312 .byte	102,69,15,58,15,228,4
    313 
    314 	decq	%r10
    315 	jne	1b
    316 
    317 	paddd	.chacha20_consts(%rip),%xmm0
    318 	paddd	48(%rbp),%xmm4
    319 
    320 	pand	.clamp(%rip),%xmm0
    321 	movdqa	%xmm0,0(%rbp)
    322 	movdqa	%xmm4,16(%rbp)
    323 
    324 	movq	%r8,%r8
    325 	call	poly_hash_ad_internal
    326 open_sse_main_loop:
    327 	cmpq	$256,%rbx
    328 	jb	2f
    329 
    330 	movdqa	.chacha20_consts(%rip),%xmm0
    331 	movdqa	48(%rbp),%xmm4
    332 	movdqa	64(%rbp),%xmm8
    333 	movdqa	%xmm0,%xmm1
    334 	movdqa	%xmm4,%xmm5
    335 	movdqa	%xmm8,%xmm9
    336 	movdqa	%xmm0,%xmm2
    337 	movdqa	%xmm4,%xmm6
    338 	movdqa	%xmm8,%xmm10
    339 	movdqa	%xmm0,%xmm3
    340 	movdqa	%xmm4,%xmm7
    341 	movdqa	%xmm8,%xmm11
    342 	movdqa	96(%rbp),%xmm15
    343 	paddd	.sse_inc(%rip),%xmm15
    344 	movdqa	%xmm15,%xmm14
    345 	paddd	.sse_inc(%rip),%xmm14
    346 	movdqa	%xmm14,%xmm13
    347 	paddd	.sse_inc(%rip),%xmm13
    348 	movdqa	%xmm13,%xmm12
    349 	paddd	.sse_inc(%rip),%xmm12
    350 	movdqa	%xmm12,96(%rbp)
    351 	movdqa	%xmm13,112(%rbp)
    352 	movdqa	%xmm14,128(%rbp)
    353 	movdqa	%xmm15,144(%rbp)
    354 
    355 
    356 
    357 	movq	$4,%rcx
    358 	movq	%rsi,%r8
    359 1:
    360 	movdqa	%xmm8,80(%rbp)
    361 	movdqa	.rol16(%rip),%xmm8
    362 	paddd	%xmm7,%xmm3
    363 	paddd	%xmm6,%xmm2
    364 	paddd	%xmm5,%xmm1
    365 	paddd	%xmm4,%xmm0
    366 	pxor	%xmm3,%xmm15
    367 	pxor	%xmm2,%xmm14
    368 	pxor	%xmm1,%xmm13
    369 	pxor	%xmm0,%xmm12
    370 .byte	102,69,15,56,0,248
    371 .byte	102,69,15,56,0,240
    372 .byte	102,69,15,56,0,232
    373 .byte	102,69,15,56,0,224
    374 	movdqa	80(%rbp),%xmm8
    375 	paddd	%xmm15,%xmm11
    376 	paddd	%xmm14,%xmm10
    377 	paddd	%xmm13,%xmm9
    378 	paddd	%xmm12,%xmm8
    379 	pxor	%xmm11,%xmm7
    380 	addq	0(%r8),%r10
    381 	adcq	8+0(%r8),%r11
    382 	adcq	$1,%r12
    383 
    384 	leaq	16(%r8),%r8
    385 	pxor	%xmm10,%xmm6
    386 	pxor	%xmm9,%xmm5
    387 	pxor	%xmm8,%xmm4
    388 	movdqa	%xmm8,80(%rbp)
    389 	movdqa	%xmm7,%xmm8
    390 	psrld	$20,%xmm8
    391 	pslld	$32-20,%xmm7
    392 	pxor	%xmm8,%xmm7
    393 	movdqa	%xmm6,%xmm8
    394 	psrld	$20,%xmm8
    395 	pslld	$32-20,%xmm6
    396 	pxor	%xmm8,%xmm6
    397 	movdqa	%xmm5,%xmm8
    398 	psrld	$20,%xmm8
    399 	pslld	$32-20,%xmm5
    400 	pxor	%xmm8,%xmm5
    401 	movdqa	%xmm4,%xmm8
    402 	psrld	$20,%xmm8
    403 	pslld	$32-20,%xmm4
    404 	pxor	%xmm8,%xmm4
    405 	movq	0+0(%rbp),%rax
    406 	movq	%rax,%r15
    407 	mulq	%r10
    408 	movq	%rax,%r13
    409 	movq	%rdx,%r14
    410 	movq	0+0(%rbp),%rax
    411 	mulq	%r11
    412 	imulq	%r12,%r15
    413 	addq	%rax,%r14
    414 	adcq	%rdx,%r15
    415 	movdqa	.rol8(%rip),%xmm8
    416 	paddd	%xmm7,%xmm3
    417 	paddd	%xmm6,%xmm2
    418 	paddd	%xmm5,%xmm1
    419 	paddd	%xmm4,%xmm0
    420 	pxor	%xmm3,%xmm15
    421 	pxor	%xmm2,%xmm14
    422 	pxor	%xmm1,%xmm13
    423 	pxor	%xmm0,%xmm12
    424 .byte	102,69,15,56,0,248
    425 .byte	102,69,15,56,0,240
    426 .byte	102,69,15,56,0,232
    427 .byte	102,69,15,56,0,224
    428 	movdqa	80(%rbp),%xmm8
    429 	paddd	%xmm15,%xmm11
    430 	paddd	%xmm14,%xmm10
    431 	paddd	%xmm13,%xmm9
    432 	paddd	%xmm12,%xmm8
    433 	pxor	%xmm11,%xmm7
    434 	pxor	%xmm10,%xmm6
    435 	movq	8+0(%rbp),%rax
    436 	movq	%rax,%r9
    437 	mulq	%r10
    438 	addq	%rax,%r14
    439 	adcq	$0,%rdx
    440 	movq	%rdx,%r10
    441 	movq	8+0(%rbp),%rax
    442 	mulq	%r11
    443 	addq	%rax,%r15
    444 	adcq	$0,%rdx
    445 	pxor	%xmm9,%xmm5
    446 	pxor	%xmm8,%xmm4
    447 	movdqa	%xmm8,80(%rbp)
    448 	movdqa	%xmm7,%xmm8
    449 	psrld	$25,%xmm8
    450 	pslld	$32-25,%xmm7
    451 	pxor	%xmm8,%xmm7
    452 	movdqa	%xmm6,%xmm8
    453 	psrld	$25,%xmm8
    454 	pslld	$32-25,%xmm6
    455 	pxor	%xmm8,%xmm6
    456 	movdqa	%xmm5,%xmm8
    457 	psrld	$25,%xmm8
    458 	pslld	$32-25,%xmm5
    459 	pxor	%xmm8,%xmm5
    460 	movdqa	%xmm4,%xmm8
    461 	psrld	$25,%xmm8
    462 	pslld	$32-25,%xmm4
    463 	pxor	%xmm8,%xmm4
    464 	movdqa	80(%rbp),%xmm8
    465 	imulq	%r12,%r9
    466 	addq	%r10,%r15
    467 	adcq	%rdx,%r9
    468 .byte	102,15,58,15,255,4
    469 .byte	102,69,15,58,15,219,8
    470 .byte	102,69,15,58,15,255,12
    471 .byte	102,15,58,15,246,4
    472 .byte	102,69,15,58,15,210,8
    473 .byte	102,69,15,58,15,246,12
    474 .byte	102,15,58,15,237,4
    475 .byte	102,69,15,58,15,201,8
    476 .byte	102,69,15,58,15,237,12
    477 .byte	102,15,58,15,228,4
    478 .byte	102,69,15,58,15,192,8
    479 .byte	102,69,15,58,15,228,12
    480 	movdqa	%xmm8,80(%rbp)
    481 	movdqa	.rol16(%rip),%xmm8
    482 	paddd	%xmm7,%xmm3
    483 	paddd	%xmm6,%xmm2
    484 	paddd	%xmm5,%xmm1
    485 	paddd	%xmm4,%xmm0
    486 	pxor	%xmm3,%xmm15
    487 	pxor	%xmm2,%xmm14
    488 	movq	%r13,%r10
    489 	movq	%r14,%r11
    490 	movq	%r15,%r12
    491 	andq	$3,%r12
    492 	movq	%r15,%r13
    493 	andq	$-4,%r13
    494 	movq	%r9,%r14
    495 	shrdq	$2,%r9,%r15
    496 	shrq	$2,%r9
    497 	addq	%r13,%r10
    498 	adcq	%r14,%r11
    499 	adcq	$0,%r12
    500 	addq	%r15,%r10
    501 	adcq	%r9,%r11
    502 	adcq	$0,%r12
    503 	pxor	%xmm1,%xmm13
    504 	pxor	%xmm0,%xmm12
    505 .byte	102,69,15,56,0,248
    506 .byte	102,69,15,56,0,240
    507 .byte	102,69,15,56,0,232
    508 .byte	102,69,15,56,0,224
    509 	movdqa	80(%rbp),%xmm8
    510 	paddd	%xmm15,%xmm11
    511 	paddd	%xmm14,%xmm10
    512 	paddd	%xmm13,%xmm9
    513 	paddd	%xmm12,%xmm8
    514 	pxor	%xmm11,%xmm7
    515 	pxor	%xmm10,%xmm6
    516 	pxor	%xmm9,%xmm5
    517 	pxor	%xmm8,%xmm4
    518 	movdqa	%xmm8,80(%rbp)
    519 	movdqa	%xmm7,%xmm8
    520 	psrld	$20,%xmm8
    521 	pslld	$32-20,%xmm7
    522 	pxor	%xmm8,%xmm7
    523 	movdqa	%xmm6,%xmm8
    524 	psrld	$20,%xmm8
    525 	pslld	$32-20,%xmm6
    526 	pxor	%xmm8,%xmm6
    527 	movdqa	%xmm5,%xmm8
    528 	psrld	$20,%xmm8
    529 	pslld	$32-20,%xmm5
    530 	pxor	%xmm8,%xmm5
    531 	movdqa	%xmm4,%xmm8
    532 	psrld	$20,%xmm8
    533 	pslld	$32-20,%xmm4
    534 	pxor	%xmm8,%xmm4
    535 	movdqa	.rol8(%rip),%xmm8
    536 	paddd	%xmm7,%xmm3
    537 	paddd	%xmm6,%xmm2
    538 	paddd	%xmm5,%xmm1
    539 	paddd	%xmm4,%xmm0
    540 	pxor	%xmm3,%xmm15
    541 	pxor	%xmm2,%xmm14
    542 	pxor	%xmm1,%xmm13
    543 	pxor	%xmm0,%xmm12
    544 .byte	102,69,15,56,0,248
    545 .byte	102,69,15,56,0,240
    546 .byte	102,69,15,56,0,232
    547 .byte	102,69,15,56,0,224
    548 	movdqa	80(%rbp),%xmm8
    549 	paddd	%xmm15,%xmm11
    550 	paddd	%xmm14,%xmm10
    551 	paddd	%xmm13,%xmm9
    552 	paddd	%xmm12,%xmm8
    553 	pxor	%xmm11,%xmm7
    554 	pxor	%xmm10,%xmm6
    555 	pxor	%xmm9,%xmm5
    556 	pxor	%xmm8,%xmm4
    557 	movdqa	%xmm8,80(%rbp)
    558 	movdqa	%xmm7,%xmm8
    559 	psrld	$25,%xmm8
    560 	pslld	$32-25,%xmm7
    561 	pxor	%xmm8,%xmm7
    562 	movdqa	%xmm6,%xmm8
    563 	psrld	$25,%xmm8
    564 	pslld	$32-25,%xmm6
    565 	pxor	%xmm8,%xmm6
    566 	movdqa	%xmm5,%xmm8
    567 	psrld	$25,%xmm8
    568 	pslld	$32-25,%xmm5
    569 	pxor	%xmm8,%xmm5
    570 	movdqa	%xmm4,%xmm8
    571 	psrld	$25,%xmm8
    572 	pslld	$32-25,%xmm4
    573 	pxor	%xmm8,%xmm4
    574 	movdqa	80(%rbp),%xmm8
    575 .byte	102,15,58,15,255,12
    576 .byte	102,69,15,58,15,219,8
    577 .byte	102,69,15,58,15,255,4
    578 .byte	102,15,58,15,246,12
    579 .byte	102,69,15,58,15,210,8
    580 .byte	102,69,15,58,15,246,4
    581 .byte	102,15,58,15,237,12
    582 .byte	102,69,15,58,15,201,8
    583 .byte	102,69,15,58,15,237,4
    584 .byte	102,15,58,15,228,12
    585 .byte	102,69,15,58,15,192,8
    586 .byte	102,69,15,58,15,228,4
    587 
    588 	decq	%rcx
    589 	jge	1b
    590 	addq	0(%r8),%r10
    591 	adcq	8+0(%r8),%r11
    592 	adcq	$1,%r12
    593 	movq	0+0(%rbp),%rax
    594 	movq	%rax,%r15
    595 	mulq	%r10
    596 	movq	%rax,%r13
    597 	movq	%rdx,%r14
    598 	movq	0+0(%rbp),%rax
    599 	mulq	%r11
    600 	imulq	%r12,%r15
    601 	addq	%rax,%r14
    602 	adcq	%rdx,%r15
    603 	movq	8+0(%rbp),%rax
    604 	movq	%rax,%r9
    605 	mulq	%r10
    606 	addq	%rax,%r14
    607 	adcq	$0,%rdx
    608 	movq	%rdx,%r10
    609 	movq	8+0(%rbp),%rax
    610 	mulq	%r11
    611 	addq	%rax,%r15
    612 	adcq	$0,%rdx
    613 	imulq	%r12,%r9
    614 	addq	%r10,%r15
    615 	adcq	%rdx,%r9
    616 	movq	%r13,%r10
    617 	movq	%r14,%r11
    618 	movq	%r15,%r12
    619 	andq	$3,%r12
    620 	movq	%r15,%r13
    621 	andq	$-4,%r13
    622 	movq	%r9,%r14
    623 	shrdq	$2,%r9,%r15
    624 	shrq	$2,%r9
    625 	addq	%r13,%r10
    626 	adcq	%r14,%r11
    627 	adcq	$0,%r12
    628 	addq	%r15,%r10
    629 	adcq	%r9,%r11
    630 	adcq	$0,%r12
    631 
    632 	leaq	16(%r8),%r8
    633 	cmpq	$-6,%rcx
    634 	jg	1b
    635 	paddd	.chacha20_consts(%rip),%xmm3
    636 	paddd	48(%rbp),%xmm7
    637 	paddd	64(%rbp),%xmm11
    638 	paddd	144(%rbp),%xmm15
    639 	paddd	.chacha20_consts(%rip),%xmm2
    640 	paddd	48(%rbp),%xmm6
    641 	paddd	64(%rbp),%xmm10
    642 	paddd	128(%rbp),%xmm14
    643 	paddd	.chacha20_consts(%rip),%xmm1
    644 	paddd	48(%rbp),%xmm5
    645 	paddd	64(%rbp),%xmm9
    646 	paddd	112(%rbp),%xmm13
    647 	paddd	.chacha20_consts(%rip),%xmm0
    648 	paddd	48(%rbp),%xmm4
    649 	paddd	64(%rbp),%xmm8
    650 	paddd	96(%rbp),%xmm12
    651 	movdqa	%xmm12,80(%rbp)
    652 	movdqu	0 + 0(%rsi),%xmm12
    653 	pxor	%xmm3,%xmm12
    654 	movdqu	%xmm12,0 + 0(%rdi)
    655 	movdqu	16 + 0(%rsi),%xmm12
    656 	pxor	%xmm7,%xmm12
    657 	movdqu	%xmm12,16 + 0(%rdi)
    658 	movdqu	32 + 0(%rsi),%xmm12
    659 	pxor	%xmm11,%xmm12
    660 	movdqu	%xmm12,32 + 0(%rdi)
    661 	movdqu	48 + 0(%rsi),%xmm12
    662 	pxor	%xmm15,%xmm12
    663 	movdqu	%xmm12,48 + 0(%rdi)
    664 	movdqu	0 + 64(%rsi),%xmm3
    665 	movdqu	16 + 64(%rsi),%xmm7
    666 	movdqu	32 + 64(%rsi),%xmm11
    667 	movdqu	48 + 64(%rsi),%xmm15
    668 	pxor	%xmm3,%xmm2
    669 	pxor	%xmm7,%xmm6
    670 	pxor	%xmm11,%xmm10
    671 	pxor	%xmm14,%xmm15
    672 	movdqu	%xmm2,0 + 64(%rdi)
    673 	movdqu	%xmm6,16 + 64(%rdi)
    674 	movdqu	%xmm10,32 + 64(%rdi)
    675 	movdqu	%xmm15,48 + 64(%rdi)
    676 	movdqu	0 + 128(%rsi),%xmm3
    677 	movdqu	16 + 128(%rsi),%xmm7
    678 	movdqu	32 + 128(%rsi),%xmm11
    679 	movdqu	48 + 128(%rsi),%xmm15
    680 	pxor	%xmm3,%xmm1
    681 	pxor	%xmm7,%xmm5
    682 	pxor	%xmm11,%xmm9
    683 	pxor	%xmm13,%xmm15
    684 	movdqu	%xmm1,0 + 128(%rdi)
    685 	movdqu	%xmm5,16 + 128(%rdi)
    686 	movdqu	%xmm9,32 + 128(%rdi)
    687 	movdqu	%xmm15,48 + 128(%rdi)
    688 	movdqu	0 + 192(%rsi),%xmm3
    689 	movdqu	16 + 192(%rsi),%xmm7
    690 	movdqu	32 + 192(%rsi),%xmm11
    691 	movdqu	48 + 192(%rsi),%xmm15
    692 	pxor	%xmm3,%xmm0
    693 	pxor	%xmm7,%xmm4
    694 	pxor	%xmm11,%xmm8
    695 	pxor	80(%rbp),%xmm15
    696 	movdqu	%xmm0,0 + 192(%rdi)
    697 	movdqu	%xmm4,16 + 192(%rdi)
    698 	movdqu	%xmm8,32 + 192(%rdi)
    699 	movdqu	%xmm15,48 + 192(%rdi)
    700 
    701 	leaq	256(%rsi),%rsi
    702 	leaq	256(%rdi),%rdi
    703 	subq	$256,%rbx
    704 	jmp	open_sse_main_loop
    705 2:
    706 
    707 	testq	%rbx,%rbx
    708 	jz	open_sse_finalize
    709 	cmpq	$64,%rbx
    710 	ja	3f
    711 	movdqa	.chacha20_consts(%rip),%xmm0
    712 	movdqa	48(%rbp),%xmm4
    713 	movdqa	64(%rbp),%xmm8
    714 	movdqa	96(%rbp),%xmm12
    715 	paddd	.sse_inc(%rip),%xmm12
    716 	movdqa	%xmm12,96(%rbp)
    717 
    718 	xorq	%r8,%r8
    719 	movq	%rbx,%rcx
    720 	cmpq	$16,%rcx
    721 	jb	2f
    722 1:
    723 	addq	0(%rsi,%r8), %r10
    724 	adcq	8+0(%rsi,%r8), %r11
    725 	adcq	$1,%r12
    726 	movq	0+0(%rbp),%rax
    727 	movq	%rax,%r15
    728 	mulq	%r10
    729 	movq	%rax,%r13
    730 	movq	%rdx,%r14
    731 	movq	0+0(%rbp),%rax
    732 	mulq	%r11
    733 	imulq	%r12,%r15
    734 	addq	%rax,%r14
    735 	adcq	%rdx,%r15
    736 	movq	8+0(%rbp),%rax
    737 	movq	%rax,%r9
    738 	mulq	%r10
    739 	addq	%rax,%r14
    740 	adcq	$0,%rdx
    741 	movq	%rdx,%r10
    742 	movq	8+0(%rbp),%rax
    743 	mulq	%r11
    744 	addq	%rax,%r15
    745 	adcq	$0,%rdx
    746 	imulq	%r12,%r9
    747 	addq	%r10,%r15
    748 	adcq	%rdx,%r9
    749 	movq	%r13,%r10
    750 	movq	%r14,%r11
    751 	movq	%r15,%r12
    752 	andq	$3,%r12
    753 	movq	%r15,%r13
    754 	andq	$-4,%r13
    755 	movq	%r9,%r14
    756 	shrdq	$2,%r9,%r15
    757 	shrq	$2,%r9
    758 	addq	%r13,%r10
    759 	adcq	%r14,%r11
    760 	adcq	$0,%r12
    761 	addq	%r15,%r10
    762 	adcq	%r9,%r11
    763 	adcq	$0,%r12
    764 
    765 	subq	$16,%rcx
    766 2:
    767 	addq	$16,%r8
    768 	paddd	%xmm4,%xmm0
    769 	pxor	%xmm0,%xmm12
    770 	pshufb	.rol16(%rip),%xmm12
    771 	paddd	%xmm12,%xmm8
    772 	pxor	%xmm8,%xmm4
    773 	movdqa	%xmm4,%xmm3
    774 	pslld	$12,%xmm3
    775 	psrld	$20,%xmm4
    776 	pxor	%xmm3,%xmm4
    777 	paddd	%xmm4,%xmm0
    778 	pxor	%xmm0,%xmm12
    779 	pshufb	.rol8(%rip),%xmm12
    780 	paddd	%xmm12,%xmm8
    781 	pxor	%xmm8,%xmm4
    782 	movdqa	%xmm4,%xmm3
    783 	pslld	$7,%xmm3
    784 	psrld	$25,%xmm4
    785 	pxor	%xmm3,%xmm4
    786 .byte	102,15,58,15,228,4
    787 .byte	102,69,15,58,15,192,8
    788 .byte	102,69,15,58,15,228,12
    789 	paddd	%xmm4,%xmm0
    790 	pxor	%xmm0,%xmm12
    791 	pshufb	.rol16(%rip),%xmm12
    792 	paddd	%xmm12,%xmm8
    793 	pxor	%xmm8,%xmm4
    794 	movdqa	%xmm4,%xmm3
    795 	pslld	$12,%xmm3
    796 	psrld	$20,%xmm4
    797 	pxor	%xmm3,%xmm4
    798 	paddd	%xmm4,%xmm0
    799 	pxor	%xmm0,%xmm12
    800 	pshufb	.rol8(%rip),%xmm12
    801 	paddd	%xmm12,%xmm8
    802 	pxor	%xmm8,%xmm4
    803 	movdqa	%xmm4,%xmm3
    804 	pslld	$7,%xmm3
    805 	psrld	$25,%xmm4
    806 	pxor	%xmm3,%xmm4
    807 .byte	102,15,58,15,228,12
    808 .byte	102,69,15,58,15,192,8
    809 .byte	102,69,15,58,15,228,4
    810 
    811 	cmpq	$16,%rcx
    812 	jae	1b
    813 	cmpq	$160,%r8
    814 	jne	2b
    815 	paddd	.chacha20_consts(%rip),%xmm0
    816 	paddd	48(%rbp),%xmm4
    817 	paddd	64(%rbp),%xmm8
    818 	paddd	96(%rbp),%xmm12
    819 
    820 	jmp	open_sse_tail_64_dec_loop
    821 3:
    822 	cmpq	$128,%rbx
    823 	ja	3f
    824 	movdqa	.chacha20_consts(%rip),%xmm0
    825 	movdqa	48(%rbp),%xmm4
    826 	movdqa	64(%rbp),%xmm8
    827 	movdqa	%xmm0,%xmm1
    828 	movdqa	%xmm4,%xmm5
    829 	movdqa	%xmm8,%xmm9
    830 	movdqa	96(%rbp),%xmm13
    831 	paddd	.sse_inc(%rip),%xmm13
    832 	movdqa	%xmm13,%xmm12
    833 	paddd	.sse_inc(%rip),%xmm12
    834 	movdqa	%xmm12,96(%rbp)
    835 	movdqa	%xmm13,112(%rbp)
    836 
    837 	movq	%rbx,%rcx
    838 	andq	$-16,%rcx
    839 	xorq	%r8,%r8
    840 1:
    841 	addq	0(%rsi,%r8), %r10
    842 	adcq	8+0(%rsi,%r8), %r11
    843 	adcq	$1,%r12
    844 	movq	0+0(%rbp),%rax
    845 	movq	%rax,%r15
    846 	mulq	%r10
    847 	movq	%rax,%r13
    848 	movq	%rdx,%r14
    849 	movq	0+0(%rbp),%rax
    850 	mulq	%r11
    851 	imulq	%r12,%r15
    852 	addq	%rax,%r14
    853 	adcq	%rdx,%r15
    854 	movq	8+0(%rbp),%rax
    855 	movq	%rax,%r9
    856 	mulq	%r10
    857 	addq	%rax,%r14
    858 	adcq	$0,%rdx
    859 	movq	%rdx,%r10
    860 	movq	8+0(%rbp),%rax
    861 	mulq	%r11
    862 	addq	%rax,%r15
    863 	adcq	$0,%rdx
    864 	imulq	%r12,%r9
    865 	addq	%r10,%r15
    866 	adcq	%rdx,%r9
    867 	movq	%r13,%r10
    868 	movq	%r14,%r11
    869 	movq	%r15,%r12
    870 	andq	$3,%r12
    871 	movq	%r15,%r13
    872 	andq	$-4,%r13
    873 	movq	%r9,%r14
    874 	shrdq	$2,%r9,%r15
    875 	shrq	$2,%r9
    876 	addq	%r13,%r10
    877 	adcq	%r14,%r11
    878 	adcq	$0,%r12
    879 	addq	%r15,%r10
    880 	adcq	%r9,%r11
    881 	adcq	$0,%r12
    882 
    883 2:
    884 	addq	$16,%r8
    885 	paddd	%xmm4,%xmm0
    886 	pxor	%xmm0,%xmm12
    887 	pshufb	.rol16(%rip),%xmm12
    888 	paddd	%xmm12,%xmm8
    889 	pxor	%xmm8,%xmm4
    890 	movdqa	%xmm4,%xmm3
    891 	pslld	$12,%xmm3
    892 	psrld	$20,%xmm4
    893 	pxor	%xmm3,%xmm4
    894 	paddd	%xmm4,%xmm0
    895 	pxor	%xmm0,%xmm12
    896 	pshufb	.rol8(%rip),%xmm12
    897 	paddd	%xmm12,%xmm8
    898 	pxor	%xmm8,%xmm4
    899 	movdqa	%xmm4,%xmm3
    900 	pslld	$7,%xmm3
    901 	psrld	$25,%xmm4
    902 	pxor	%xmm3,%xmm4
    903 .byte	102,15,58,15,228,4
    904 .byte	102,69,15,58,15,192,8
    905 .byte	102,69,15,58,15,228,12
    906 	paddd	%xmm5,%xmm1
    907 	pxor	%xmm1,%xmm13
    908 	pshufb	.rol16(%rip),%xmm13
    909 	paddd	%xmm13,%xmm9
    910 	pxor	%xmm9,%xmm5
    911 	movdqa	%xmm5,%xmm3
    912 	pslld	$12,%xmm3
    913 	psrld	$20,%xmm5
    914 	pxor	%xmm3,%xmm5
    915 	paddd	%xmm5,%xmm1
    916 	pxor	%xmm1,%xmm13
    917 	pshufb	.rol8(%rip),%xmm13
    918 	paddd	%xmm13,%xmm9
    919 	pxor	%xmm9,%xmm5
    920 	movdqa	%xmm5,%xmm3
    921 	pslld	$7,%xmm3
    922 	psrld	$25,%xmm5
    923 	pxor	%xmm3,%xmm5
    924 .byte	102,15,58,15,237,4
    925 .byte	102,69,15,58,15,201,8
    926 .byte	102,69,15,58,15,237,12
    927 	paddd	%xmm4,%xmm0
    928 	pxor	%xmm0,%xmm12
    929 	pshufb	.rol16(%rip),%xmm12
    930 	paddd	%xmm12,%xmm8
    931 	pxor	%xmm8,%xmm4
    932 	movdqa	%xmm4,%xmm3
    933 	pslld	$12,%xmm3
    934 	psrld	$20,%xmm4
    935 	pxor	%xmm3,%xmm4
    936 	paddd	%xmm4,%xmm0
    937 	pxor	%xmm0,%xmm12
    938 	pshufb	.rol8(%rip),%xmm12
    939 	paddd	%xmm12,%xmm8
    940 	pxor	%xmm8,%xmm4
    941 	movdqa	%xmm4,%xmm3
    942 	pslld	$7,%xmm3
    943 	psrld	$25,%xmm4
    944 	pxor	%xmm3,%xmm4
    945 .byte	102,15,58,15,228,12
    946 .byte	102,69,15,58,15,192,8
    947 .byte	102,69,15,58,15,228,4
    948 	paddd	%xmm5,%xmm1
    949 	pxor	%xmm1,%xmm13
    950 	pshufb	.rol16(%rip),%xmm13
    951 	paddd	%xmm13,%xmm9
    952 	pxor	%xmm9,%xmm5
    953 	movdqa	%xmm5,%xmm3
    954 	pslld	$12,%xmm3
    955 	psrld	$20,%xmm5
    956 	pxor	%xmm3,%xmm5
    957 	paddd	%xmm5,%xmm1
    958 	pxor	%xmm1,%xmm13
    959 	pshufb	.rol8(%rip),%xmm13
    960 	paddd	%xmm13,%xmm9
    961 	pxor	%xmm9,%xmm5
    962 	movdqa	%xmm5,%xmm3
    963 	pslld	$7,%xmm3
    964 	psrld	$25,%xmm5
    965 	pxor	%xmm3,%xmm5
    966 .byte	102,15,58,15,237,12
    967 .byte	102,69,15,58,15,201,8
    968 .byte	102,69,15,58,15,237,4
    969 
    970 	cmpq	%rcx,%r8
    971 	jb	1b
    972 	cmpq	$160,%r8
    973 	jne	2b
    974 	paddd	.chacha20_consts(%rip),%xmm1
    975 	paddd	48(%rbp),%xmm5
    976 	paddd	64(%rbp),%xmm9
    977 	paddd	112(%rbp),%xmm13
    978 	paddd	.chacha20_consts(%rip),%xmm0
    979 	paddd	48(%rbp),%xmm4
    980 	paddd	64(%rbp),%xmm8
    981 	paddd	96(%rbp),%xmm12
    982 	movdqu	0 + 0(%rsi),%xmm3
    983 	movdqu	16 + 0(%rsi),%xmm7
    984 	movdqu	32 + 0(%rsi),%xmm11
    985 	movdqu	48 + 0(%rsi),%xmm15
    986 	pxor	%xmm3,%xmm1
    987 	pxor	%xmm7,%xmm5
    988 	pxor	%xmm11,%xmm9
    989 	pxor	%xmm13,%xmm15
    990 	movdqu	%xmm1,0 + 0(%rdi)
    991 	movdqu	%xmm5,16 + 0(%rdi)
    992 	movdqu	%xmm9,32 + 0(%rdi)
    993 	movdqu	%xmm15,48 + 0(%rdi)
    994 
    995 	subq	$64,%rbx
    996 	leaq	64(%rsi),%rsi
    997 	leaq	64(%rdi),%rdi
    998 	jmp	open_sse_tail_64_dec_loop
    999 3:
   1000 	cmpq	$192,%rbx
   1001 	ja	3f
   1002 	movdqa	.chacha20_consts(%rip),%xmm0
   1003 	movdqa	48(%rbp),%xmm4
   1004 	movdqa	64(%rbp),%xmm8
   1005 	movdqa	%xmm0,%xmm1
   1006 	movdqa	%xmm4,%xmm5
   1007 	movdqa	%xmm8,%xmm9
   1008 	movdqa	%xmm0,%xmm2
   1009 	movdqa	%xmm4,%xmm6
   1010 	movdqa	%xmm8,%xmm10
   1011 	movdqa	96(%rbp),%xmm14
   1012 	paddd	.sse_inc(%rip),%xmm14
   1013 	movdqa	%xmm14,%xmm13
   1014 	paddd	.sse_inc(%rip),%xmm13
   1015 	movdqa	%xmm13,%xmm12
   1016 	paddd	.sse_inc(%rip),%xmm12
   1017 	movdqa	%xmm12,96(%rbp)
   1018 	movdqa	%xmm13,112(%rbp)
   1019 	movdqa	%xmm14,128(%rbp)
   1020 
   1021 	movq	%rbx,%rcx
   1022 	movq	$160,%r8
   1023 	cmpq	$160,%rcx
   1024 	cmovgq	%r8,%rcx
   1025 	andq	$-16,%rcx
   1026 	xorq	%r8,%r8
   1027 1:
   1028 	addq	0(%rsi,%r8), %r10
   1029 	adcq	8+0(%rsi,%r8), %r11
   1030 	adcq	$1,%r12
   1031 	movq	0+0(%rbp),%rax
   1032 	movq	%rax,%r15
   1033 	mulq	%r10
   1034 	movq	%rax,%r13
   1035 	movq	%rdx,%r14
   1036 	movq	0+0(%rbp),%rax
   1037 	mulq	%r11
   1038 	imulq	%r12,%r15
   1039 	addq	%rax,%r14
   1040 	adcq	%rdx,%r15
   1041 	movq	8+0(%rbp),%rax
   1042 	movq	%rax,%r9
   1043 	mulq	%r10
   1044 	addq	%rax,%r14
   1045 	adcq	$0,%rdx
   1046 	movq	%rdx,%r10
   1047 	movq	8+0(%rbp),%rax
   1048 	mulq	%r11
   1049 	addq	%rax,%r15
   1050 	adcq	$0,%rdx
   1051 	imulq	%r12,%r9
   1052 	addq	%r10,%r15
   1053 	adcq	%rdx,%r9
   1054 	movq	%r13,%r10
   1055 	movq	%r14,%r11
   1056 	movq	%r15,%r12
   1057 	andq	$3,%r12
   1058 	movq	%r15,%r13
   1059 	andq	$-4,%r13
   1060 	movq	%r9,%r14
   1061 	shrdq	$2,%r9,%r15
   1062 	shrq	$2,%r9
   1063 	addq	%r13,%r10
   1064 	adcq	%r14,%r11
   1065 	adcq	$0,%r12
   1066 	addq	%r15,%r10
   1067 	adcq	%r9,%r11
   1068 	adcq	$0,%r12
   1069 
   1070 2:
   1071 	addq	$16,%r8
   1072 	paddd	%xmm4,%xmm0
   1073 	pxor	%xmm0,%xmm12
   1074 	pshufb	.rol16(%rip),%xmm12
   1075 	paddd	%xmm12,%xmm8
   1076 	pxor	%xmm8,%xmm4
   1077 	movdqa	%xmm4,%xmm3
   1078 	pslld	$12,%xmm3
   1079 	psrld	$20,%xmm4
   1080 	pxor	%xmm3,%xmm4
   1081 	paddd	%xmm4,%xmm0
   1082 	pxor	%xmm0,%xmm12
   1083 	pshufb	.rol8(%rip),%xmm12
   1084 	paddd	%xmm12,%xmm8
   1085 	pxor	%xmm8,%xmm4
   1086 	movdqa	%xmm4,%xmm3
   1087 	pslld	$7,%xmm3
   1088 	psrld	$25,%xmm4
   1089 	pxor	%xmm3,%xmm4
   1090 .byte	102,15,58,15,228,4
   1091 .byte	102,69,15,58,15,192,8
   1092 .byte	102,69,15,58,15,228,12
   1093 	paddd	%xmm5,%xmm1
   1094 	pxor	%xmm1,%xmm13
   1095 	pshufb	.rol16(%rip),%xmm13
   1096 	paddd	%xmm13,%xmm9
   1097 	pxor	%xmm9,%xmm5
   1098 	movdqa	%xmm5,%xmm3
   1099 	pslld	$12,%xmm3
   1100 	psrld	$20,%xmm5
   1101 	pxor	%xmm3,%xmm5
   1102 	paddd	%xmm5,%xmm1
   1103 	pxor	%xmm1,%xmm13
   1104 	pshufb	.rol8(%rip),%xmm13
   1105 	paddd	%xmm13,%xmm9
   1106 	pxor	%xmm9,%xmm5
   1107 	movdqa	%xmm5,%xmm3
   1108 	pslld	$7,%xmm3
   1109 	psrld	$25,%xmm5
   1110 	pxor	%xmm3,%xmm5
   1111 .byte	102,15,58,15,237,4
   1112 .byte	102,69,15,58,15,201,8
   1113 .byte	102,69,15,58,15,237,12
   1114 	paddd	%xmm6,%xmm2
   1115 	pxor	%xmm2,%xmm14
   1116 	pshufb	.rol16(%rip),%xmm14
   1117 	paddd	%xmm14,%xmm10
   1118 	pxor	%xmm10,%xmm6
   1119 	movdqa	%xmm6,%xmm3
   1120 	pslld	$12,%xmm3
   1121 	psrld	$20,%xmm6
   1122 	pxor	%xmm3,%xmm6
   1123 	paddd	%xmm6,%xmm2
   1124 	pxor	%xmm2,%xmm14
   1125 	pshufb	.rol8(%rip),%xmm14
   1126 	paddd	%xmm14,%xmm10
   1127 	pxor	%xmm10,%xmm6
   1128 	movdqa	%xmm6,%xmm3
   1129 	pslld	$7,%xmm3
   1130 	psrld	$25,%xmm6
   1131 	pxor	%xmm3,%xmm6
   1132 .byte	102,15,58,15,246,4
   1133 .byte	102,69,15,58,15,210,8
   1134 .byte	102,69,15,58,15,246,12
   1135 	paddd	%xmm4,%xmm0
   1136 	pxor	%xmm0,%xmm12
   1137 	pshufb	.rol16(%rip),%xmm12
   1138 	paddd	%xmm12,%xmm8
   1139 	pxor	%xmm8,%xmm4
   1140 	movdqa	%xmm4,%xmm3
   1141 	pslld	$12,%xmm3
   1142 	psrld	$20,%xmm4
   1143 	pxor	%xmm3,%xmm4
   1144 	paddd	%xmm4,%xmm0
   1145 	pxor	%xmm0,%xmm12
   1146 	pshufb	.rol8(%rip),%xmm12
   1147 	paddd	%xmm12,%xmm8
   1148 	pxor	%xmm8,%xmm4
   1149 	movdqa	%xmm4,%xmm3
   1150 	pslld	$7,%xmm3
   1151 	psrld	$25,%xmm4
   1152 	pxor	%xmm3,%xmm4
   1153 .byte	102,15,58,15,228,12
   1154 .byte	102,69,15,58,15,192,8
   1155 .byte	102,69,15,58,15,228,4
   1156 	paddd	%xmm5,%xmm1
   1157 	pxor	%xmm1,%xmm13
   1158 	pshufb	.rol16(%rip),%xmm13
   1159 	paddd	%xmm13,%xmm9
   1160 	pxor	%xmm9,%xmm5
   1161 	movdqa	%xmm5,%xmm3
   1162 	pslld	$12,%xmm3
   1163 	psrld	$20,%xmm5
   1164 	pxor	%xmm3,%xmm5
   1165 	paddd	%xmm5,%xmm1
   1166 	pxor	%xmm1,%xmm13
   1167 	pshufb	.rol8(%rip),%xmm13
   1168 	paddd	%xmm13,%xmm9
   1169 	pxor	%xmm9,%xmm5
   1170 	movdqa	%xmm5,%xmm3
   1171 	pslld	$7,%xmm3
   1172 	psrld	$25,%xmm5
   1173 	pxor	%xmm3,%xmm5
   1174 .byte	102,15,58,15,237,12
   1175 .byte	102,69,15,58,15,201,8
   1176 .byte	102,69,15,58,15,237,4
   1177 	paddd	%xmm6,%xmm2
   1178 	pxor	%xmm2,%xmm14
   1179 	pshufb	.rol16(%rip),%xmm14
   1180 	paddd	%xmm14,%xmm10
   1181 	pxor	%xmm10,%xmm6
   1182 	movdqa	%xmm6,%xmm3
   1183 	pslld	$12,%xmm3
   1184 	psrld	$20,%xmm6
   1185 	pxor	%xmm3,%xmm6
   1186 	paddd	%xmm6,%xmm2
   1187 	pxor	%xmm2,%xmm14
   1188 	pshufb	.rol8(%rip),%xmm14
   1189 	paddd	%xmm14,%xmm10
   1190 	pxor	%xmm10,%xmm6
   1191 	movdqa	%xmm6,%xmm3
   1192 	pslld	$7,%xmm3
   1193 	psrld	$25,%xmm6
   1194 	pxor	%xmm3,%xmm6
   1195 .byte	102,15,58,15,246,12
   1196 .byte	102,69,15,58,15,210,8
   1197 .byte	102,69,15,58,15,246,4
   1198 
   1199 	cmpq	%rcx,%r8
   1200 	jb	1b
   1201 	cmpq	$160,%r8
   1202 	jne	2b
   1203 	cmpq	$176,%rbx
   1204 	jb	1f
   1205 	addq	160(%rsi),%r10
   1206 	adcq	8+160(%rsi),%r11
   1207 	adcq	$1,%r12
   1208 	movq	0+0(%rbp),%rax
   1209 	movq	%rax,%r15
   1210 	mulq	%r10
   1211 	movq	%rax,%r13
   1212 	movq	%rdx,%r14
   1213 	movq	0+0(%rbp),%rax
   1214 	mulq	%r11
   1215 	imulq	%r12,%r15
   1216 	addq	%rax,%r14
   1217 	adcq	%rdx,%r15
   1218 	movq	8+0(%rbp),%rax
   1219 	movq	%rax,%r9
   1220 	mulq	%r10
   1221 	addq	%rax,%r14
   1222 	adcq	$0,%rdx
   1223 	movq	%rdx,%r10
   1224 	movq	8+0(%rbp),%rax
   1225 	mulq	%r11
   1226 	addq	%rax,%r15
   1227 	adcq	$0,%rdx
   1228 	imulq	%r12,%r9
   1229 	addq	%r10,%r15
   1230 	adcq	%rdx,%r9
   1231 	movq	%r13,%r10
   1232 	movq	%r14,%r11
   1233 	movq	%r15,%r12
   1234 	andq	$3,%r12
   1235 	movq	%r15,%r13
   1236 	andq	$-4,%r13
   1237 	movq	%r9,%r14
   1238 	shrdq	$2,%r9,%r15
   1239 	shrq	$2,%r9
   1240 	addq	%r13,%r10
   1241 	adcq	%r14,%r11
   1242 	adcq	$0,%r12
   1243 	addq	%r15,%r10
   1244 	adcq	%r9,%r11
   1245 	adcq	$0,%r12
   1246 
   1247 	cmpq	$192,%rbx
   1248 	jb	1f
   1249 	addq	176(%rsi),%r10
   1250 	adcq	8+176(%rsi),%r11
   1251 	adcq	$1,%r12
   1252 	movq	0+0(%rbp),%rax
   1253 	movq	%rax,%r15
   1254 	mulq	%r10
   1255 	movq	%rax,%r13
   1256 	movq	%rdx,%r14
   1257 	movq	0+0(%rbp),%rax
   1258 	mulq	%r11
   1259 	imulq	%r12,%r15
   1260 	addq	%rax,%r14
   1261 	adcq	%rdx,%r15
   1262 	movq	8+0(%rbp),%rax
   1263 	movq	%rax,%r9
   1264 	mulq	%r10
   1265 	addq	%rax,%r14
   1266 	adcq	$0,%rdx
   1267 	movq	%rdx,%r10
   1268 	movq	8+0(%rbp),%rax
   1269 	mulq	%r11
   1270 	addq	%rax,%r15
   1271 	adcq	$0,%rdx
   1272 	imulq	%r12,%r9
   1273 	addq	%r10,%r15
   1274 	adcq	%rdx,%r9
   1275 	movq	%r13,%r10
   1276 	movq	%r14,%r11
   1277 	movq	%r15,%r12
   1278 	andq	$3,%r12
   1279 	movq	%r15,%r13
   1280 	andq	$-4,%r13
   1281 	movq	%r9,%r14
   1282 	shrdq	$2,%r9,%r15
   1283 	shrq	$2,%r9
   1284 	addq	%r13,%r10
   1285 	adcq	%r14,%r11
   1286 	adcq	$0,%r12
   1287 	addq	%r15,%r10
   1288 	adcq	%r9,%r11
   1289 	adcq	$0,%r12
   1290 
   1291 1:
   1292 	paddd	.chacha20_consts(%rip),%xmm2
   1293 	paddd	48(%rbp),%xmm6
   1294 	paddd	64(%rbp),%xmm10
   1295 	paddd	128(%rbp),%xmm14
   1296 	paddd	.chacha20_consts(%rip),%xmm1
   1297 	paddd	48(%rbp),%xmm5
   1298 	paddd	64(%rbp),%xmm9
   1299 	paddd	112(%rbp),%xmm13
   1300 	paddd	.chacha20_consts(%rip),%xmm0
   1301 	paddd	48(%rbp),%xmm4
   1302 	paddd	64(%rbp),%xmm8
   1303 	paddd	96(%rbp),%xmm12
   1304 	movdqu	0 + 0(%rsi),%xmm3
   1305 	movdqu	16 + 0(%rsi),%xmm7
   1306 	movdqu	32 + 0(%rsi),%xmm11
   1307 	movdqu	48 + 0(%rsi),%xmm15
   1308 	pxor	%xmm3,%xmm2
   1309 	pxor	%xmm7,%xmm6
   1310 	pxor	%xmm11,%xmm10
   1311 	pxor	%xmm14,%xmm15
   1312 	movdqu	%xmm2,0 + 0(%rdi)
   1313 	movdqu	%xmm6,16 + 0(%rdi)
   1314 	movdqu	%xmm10,32 + 0(%rdi)
   1315 	movdqu	%xmm15,48 + 0(%rdi)
   1316 	movdqu	0 + 64(%rsi),%xmm3
   1317 	movdqu	16 + 64(%rsi),%xmm7
   1318 	movdqu	32 + 64(%rsi),%xmm11
   1319 	movdqu	48 + 64(%rsi),%xmm15
   1320 	pxor	%xmm3,%xmm1
   1321 	pxor	%xmm7,%xmm5
   1322 	pxor	%xmm11,%xmm9
   1323 	pxor	%xmm13,%xmm15
   1324 	movdqu	%xmm1,0 + 64(%rdi)
   1325 	movdqu	%xmm5,16 + 64(%rdi)
   1326 	movdqu	%xmm9,32 + 64(%rdi)
   1327 	movdqu	%xmm15,48 + 64(%rdi)
   1328 
   1329 	subq	$128,%rbx
   1330 	leaq	128(%rsi),%rsi
   1331 	leaq	128(%rdi),%rdi
   1332 	jmp	open_sse_tail_64_dec_loop
   1333 3:
   1334 
   1335 	movdqa	.chacha20_consts(%rip),%xmm0
   1336 	movdqa	48(%rbp),%xmm4
   1337 	movdqa	64(%rbp),%xmm8
   1338 	movdqa	%xmm0,%xmm1
   1339 	movdqa	%xmm4,%xmm5
   1340 	movdqa	%xmm8,%xmm9
   1341 	movdqa	%xmm0,%xmm2
   1342 	movdqa	%xmm4,%xmm6
   1343 	movdqa	%xmm8,%xmm10
   1344 	movdqa	%xmm0,%xmm3
   1345 	movdqa	%xmm4,%xmm7
   1346 	movdqa	%xmm8,%xmm11
   1347 	movdqa	96(%rbp),%xmm15
   1348 	paddd	.sse_inc(%rip),%xmm15
   1349 	movdqa	%xmm15,%xmm14
   1350 	paddd	.sse_inc(%rip),%xmm14
   1351 	movdqa	%xmm14,%xmm13
   1352 	paddd	.sse_inc(%rip),%xmm13
   1353 	movdqa	%xmm13,%xmm12
   1354 	paddd	.sse_inc(%rip),%xmm12
   1355 	movdqa	%xmm12,96(%rbp)
   1356 	movdqa	%xmm13,112(%rbp)
   1357 	movdqa	%xmm14,128(%rbp)
   1358 	movdqa	%xmm15,144(%rbp)
   1359 
   1360 	xorq	%r8,%r8
   1361 1:
   1362 	addq	0(%rsi,%r8), %r10
   1363 	adcq	8+0(%rsi,%r8), %r11
   1364 	adcq	$1,%r12
   1365 	movdqa	%xmm11,80(%rbp)
   1366 	paddd	%xmm4,%xmm0
   1367 	pxor	%xmm0,%xmm12
   1368 	pshufb	.rol16(%rip),%xmm12
   1369 	paddd	%xmm12,%xmm8
   1370 	pxor	%xmm8,%xmm4
   1371 	movdqa	%xmm4,%xmm11
   1372 	pslld	$12,%xmm11
   1373 	psrld	$20,%xmm4
   1374 	pxor	%xmm11,%xmm4
   1375 	paddd	%xmm4,%xmm0
   1376 	pxor	%xmm0,%xmm12
   1377 	pshufb	.rol8(%rip),%xmm12
   1378 	paddd	%xmm12,%xmm8
   1379 	pxor	%xmm8,%xmm4
   1380 	movdqa	%xmm4,%xmm11
   1381 	pslld	$7,%xmm11
   1382 	psrld	$25,%xmm4
   1383 	pxor	%xmm11,%xmm4
   1384 .byte	102,15,58,15,228,4
   1385 .byte	102,69,15,58,15,192,8
   1386 .byte	102,69,15,58,15,228,12
   1387 	paddd	%xmm5,%xmm1
   1388 	pxor	%xmm1,%xmm13
   1389 	pshufb	.rol16(%rip),%xmm13
   1390 	paddd	%xmm13,%xmm9
   1391 	pxor	%xmm9,%xmm5
   1392 	movdqa	%xmm5,%xmm11
   1393 	pslld	$12,%xmm11
   1394 	psrld	$20,%xmm5
   1395 	pxor	%xmm11,%xmm5
   1396 	paddd	%xmm5,%xmm1
   1397 	pxor	%xmm1,%xmm13
   1398 	pshufb	.rol8(%rip),%xmm13
   1399 	paddd	%xmm13,%xmm9
   1400 	pxor	%xmm9,%xmm5
   1401 	movdqa	%xmm5,%xmm11
   1402 	pslld	$7,%xmm11
   1403 	psrld	$25,%xmm5
   1404 	pxor	%xmm11,%xmm5
   1405 .byte	102,15,58,15,237,4
   1406 .byte	102,69,15,58,15,201,8
   1407 .byte	102,69,15,58,15,237,12
   1408 	paddd	%xmm6,%xmm2
   1409 	pxor	%xmm2,%xmm14
   1410 	pshufb	.rol16(%rip),%xmm14
   1411 	paddd	%xmm14,%xmm10
   1412 	pxor	%xmm10,%xmm6
   1413 	movdqa	%xmm6,%xmm11
   1414 	pslld	$12,%xmm11
   1415 	psrld	$20,%xmm6
   1416 	pxor	%xmm11,%xmm6
   1417 	paddd	%xmm6,%xmm2
   1418 	pxor	%xmm2,%xmm14
   1419 	pshufb	.rol8(%rip),%xmm14
   1420 	paddd	%xmm14,%xmm10
   1421 	pxor	%xmm10,%xmm6
   1422 	movdqa	%xmm6,%xmm11
   1423 	pslld	$7,%xmm11
   1424 	psrld	$25,%xmm6
   1425 	pxor	%xmm11,%xmm6
   1426 .byte	102,15,58,15,246,4
   1427 .byte	102,69,15,58,15,210,8
   1428 .byte	102,69,15,58,15,246,12
   1429 	movdqa	80(%rbp),%xmm11
   1430 	movq	0+0(%rbp),%rax
   1431 	movq	%rax,%r15
   1432 	mulq	%r10
   1433 	movq	%rax,%r13
   1434 	movq	%rdx,%r14
   1435 	movq	0+0(%rbp),%rax
   1436 	mulq	%r11
   1437 	imulq	%r12,%r15
   1438 	addq	%rax,%r14
   1439 	adcq	%rdx,%r15
   1440 	movdqa	%xmm9,80(%rbp)
   1441 	paddd	%xmm7,%xmm3
   1442 	pxor	%xmm3,%xmm15
   1443 	pshufb	.rol16(%rip),%xmm15
   1444 	paddd	%xmm15,%xmm11
   1445 	pxor	%xmm11,%xmm7
   1446 	movdqa	%xmm7,%xmm9
   1447 	pslld	$12,%xmm9
   1448 	psrld	$20,%xmm7
   1449 	pxor	%xmm9,%xmm7
   1450 	paddd	%xmm7,%xmm3
   1451 	pxor	%xmm3,%xmm15
   1452 	pshufb	.rol8(%rip),%xmm15
   1453 	paddd	%xmm15,%xmm11
   1454 	pxor	%xmm11,%xmm7
   1455 	movdqa	%xmm7,%xmm9
   1456 	pslld	$7,%xmm9
   1457 	psrld	$25,%xmm7
   1458 	pxor	%xmm9,%xmm7
   1459 .byte	102,15,58,15,255,4
   1460 .byte	102,69,15,58,15,219,8
   1461 .byte	102,69,15,58,15,255,12
   1462 	movdqa	80(%rbp),%xmm9
   1463 	movq	8+0(%rbp),%rax
   1464 	movq	%rax,%r9
   1465 	mulq	%r10
   1466 	addq	%rax,%r14
   1467 	adcq	$0,%rdx
   1468 	movq	%rdx,%r10
   1469 	movq	8+0(%rbp),%rax
   1470 	mulq	%r11
   1471 	addq	%rax,%r15
   1472 	adcq	$0,%rdx
   1473 	movdqa	%xmm11,80(%rbp)
   1474 	paddd	%xmm4,%xmm0
   1475 	pxor	%xmm0,%xmm12
   1476 	pshufb	.rol16(%rip),%xmm12
   1477 	paddd	%xmm12,%xmm8
   1478 	pxor	%xmm8,%xmm4
   1479 	movdqa	%xmm4,%xmm11
   1480 	pslld	$12,%xmm11
   1481 	psrld	$20,%xmm4
   1482 	pxor	%xmm11,%xmm4
   1483 	paddd	%xmm4,%xmm0
   1484 	pxor	%xmm0,%xmm12
   1485 	pshufb	.rol8(%rip),%xmm12
   1486 	paddd	%xmm12,%xmm8
   1487 	pxor	%xmm8,%xmm4
   1488 	movdqa	%xmm4,%xmm11
   1489 	pslld	$7,%xmm11
   1490 	psrld	$25,%xmm4
   1491 	pxor	%xmm11,%xmm4
   1492 .byte	102,15,58,15,228,12
   1493 .byte	102,69,15,58,15,192,8
   1494 .byte	102,69,15,58,15,228,4
   1495 	paddd	%xmm5,%xmm1
   1496 	pxor	%xmm1,%xmm13
   1497 	pshufb	.rol16(%rip),%xmm13
   1498 	paddd	%xmm13,%xmm9
   1499 	pxor	%xmm9,%xmm5
   1500 	movdqa	%xmm5,%xmm11
   1501 	pslld	$12,%xmm11
   1502 	psrld	$20,%xmm5
   1503 	pxor	%xmm11,%xmm5
   1504 	paddd	%xmm5,%xmm1
   1505 	pxor	%xmm1,%xmm13
   1506 	pshufb	.rol8(%rip),%xmm13
   1507 	paddd	%xmm13,%xmm9
   1508 	pxor	%xmm9,%xmm5
   1509 	movdqa	%xmm5,%xmm11
   1510 	pslld	$7,%xmm11
   1511 	psrld	$25,%xmm5
   1512 	pxor	%xmm11,%xmm5
   1513 .byte	102,15,58,15,237,12
   1514 .byte	102,69,15,58,15,201,8
   1515 .byte	102,69,15,58,15,237,4
   1516 	imulq	%r12,%r9
   1517 	addq	%r10,%r15
   1518 	adcq	%rdx,%r9
   1519 	paddd	%xmm6,%xmm2
   1520 	pxor	%xmm2,%xmm14
   1521 	pshufb	.rol16(%rip),%xmm14
   1522 	paddd	%xmm14,%xmm10
   1523 	pxor	%xmm10,%xmm6
   1524 	movdqa	%xmm6,%xmm11
   1525 	pslld	$12,%xmm11
   1526 	psrld	$20,%xmm6
   1527 	pxor	%xmm11,%xmm6
   1528 	paddd	%xmm6,%xmm2
   1529 	pxor	%xmm2,%xmm14
   1530 	pshufb	.rol8(%rip),%xmm14
   1531 	paddd	%xmm14,%xmm10
   1532 	pxor	%xmm10,%xmm6
   1533 	movdqa	%xmm6,%xmm11
   1534 	pslld	$7,%xmm11
   1535 	psrld	$25,%xmm6
   1536 	pxor	%xmm11,%xmm6
   1537 .byte	102,15,58,15,246,12
   1538 .byte	102,69,15,58,15,210,8
   1539 .byte	102,69,15,58,15,246,4
   1540 	movdqa	80(%rbp),%xmm11
   1541 	movq	%r13,%r10
   1542 	movq	%r14,%r11
   1543 	movq	%r15,%r12
   1544 	andq	$3,%r12
   1545 	movq	%r15,%r13
   1546 	andq	$-4,%r13
   1547 	movq	%r9,%r14
   1548 	shrdq	$2,%r9,%r15
   1549 	shrq	$2,%r9
   1550 	addq	%r13,%r10
   1551 	adcq	%r14,%r11
   1552 	adcq	$0,%r12
   1553 	addq	%r15,%r10
   1554 	adcq	%r9,%r11
   1555 	adcq	$0,%r12
   1556 	movdqa	%xmm9,80(%rbp)
   1557 	paddd	%xmm7,%xmm3
   1558 	pxor	%xmm3,%xmm15
   1559 	pshufb	.rol16(%rip),%xmm15
   1560 	paddd	%xmm15,%xmm11
   1561 	pxor	%xmm11,%xmm7
   1562 	movdqa	%xmm7,%xmm9
   1563 	pslld	$12,%xmm9
   1564 	psrld	$20,%xmm7
   1565 	pxor	%xmm9,%xmm7
   1566 	paddd	%xmm7,%xmm3
   1567 	pxor	%xmm3,%xmm15
   1568 	pshufb	.rol8(%rip),%xmm15
   1569 	paddd	%xmm15,%xmm11
   1570 	pxor	%xmm11,%xmm7
   1571 	movdqa	%xmm7,%xmm9
   1572 	pslld	$7,%xmm9
   1573 	psrld	$25,%xmm7
   1574 	pxor	%xmm9,%xmm7
   1575 .byte	102,15,58,15,255,12
   1576 .byte	102,69,15,58,15,219,8
   1577 .byte	102,69,15,58,15,255,4
   1578 	movdqa	80(%rbp),%xmm9
   1579 
   1580 	addq	$16,%r8
   1581 	cmpq	$160,%r8
   1582 	jb	1b
   1583 	movq	%rbx,%rcx
   1584 	andq	$-16,%rcx
   1585 1:
   1586 	addq	0(%rsi,%r8), %r10
   1587 	adcq	8+0(%rsi,%r8), %r11
   1588 	adcq	$1,%r12
   1589 	movq	0+0(%rbp),%rax
   1590 	movq	%rax,%r15
   1591 	mulq	%r10
   1592 	movq	%rax,%r13
   1593 	movq	%rdx,%r14
   1594 	movq	0+0(%rbp),%rax
   1595 	mulq	%r11
   1596 	imulq	%r12,%r15
   1597 	addq	%rax,%r14
   1598 	adcq	%rdx,%r15
   1599 	movq	8+0(%rbp),%rax
   1600 	movq	%rax,%r9
   1601 	mulq	%r10
   1602 	addq	%rax,%r14
   1603 	adcq	$0,%rdx
   1604 	movq	%rdx,%r10
   1605 	movq	8+0(%rbp),%rax
   1606 	mulq	%r11
   1607 	addq	%rax,%r15
   1608 	adcq	$0,%rdx
   1609 	imulq	%r12,%r9
   1610 	addq	%r10,%r15
   1611 	adcq	%rdx,%r9
   1612 	movq	%r13,%r10
   1613 	movq	%r14,%r11
   1614 	movq	%r15,%r12
   1615 	andq	$3,%r12
   1616 	movq	%r15,%r13
   1617 	andq	$-4,%r13
   1618 	movq	%r9,%r14
   1619 	shrdq	$2,%r9,%r15
   1620 	shrq	$2,%r9
   1621 	addq	%r13,%r10
   1622 	adcq	%r14,%r11
   1623 	adcq	$0,%r12
   1624 	addq	%r15,%r10
   1625 	adcq	%r9,%r11
   1626 	adcq	$0,%r12
   1627 
   1628 	addq	$16,%r8
   1629 	cmpq	%rcx,%r8
   1630 	jb	1b
   1631 	paddd	.chacha20_consts(%rip),%xmm3
   1632 	paddd	48(%rbp),%xmm7
   1633 	paddd	64(%rbp),%xmm11
   1634 	paddd	144(%rbp),%xmm15
   1635 	paddd	.chacha20_consts(%rip),%xmm2
   1636 	paddd	48(%rbp),%xmm6
   1637 	paddd	64(%rbp),%xmm10
   1638 	paddd	128(%rbp),%xmm14
   1639 	paddd	.chacha20_consts(%rip),%xmm1
   1640 	paddd	48(%rbp),%xmm5
   1641 	paddd	64(%rbp),%xmm9
   1642 	paddd	112(%rbp),%xmm13
   1643 	paddd	.chacha20_consts(%rip),%xmm0
   1644 	paddd	48(%rbp),%xmm4
   1645 	paddd	64(%rbp),%xmm8
   1646 	paddd	96(%rbp),%xmm12
   1647 	movdqa	%xmm12,80(%rbp)
   1648 	movdqu	0 + 0(%rsi),%xmm12
   1649 	pxor	%xmm3,%xmm12
   1650 	movdqu	%xmm12,0 + 0(%rdi)
   1651 	movdqu	16 + 0(%rsi),%xmm12
   1652 	pxor	%xmm7,%xmm12
   1653 	movdqu	%xmm12,16 + 0(%rdi)
   1654 	movdqu	32 + 0(%rsi),%xmm12
   1655 	pxor	%xmm11,%xmm12
   1656 	movdqu	%xmm12,32 + 0(%rdi)
   1657 	movdqu	48 + 0(%rsi),%xmm12
   1658 	pxor	%xmm15,%xmm12
   1659 	movdqu	%xmm12,48 + 0(%rdi)
   1660 	movdqu	0 + 64(%rsi),%xmm3
   1661 	movdqu	16 + 64(%rsi),%xmm7
   1662 	movdqu	32 + 64(%rsi),%xmm11
   1663 	movdqu	48 + 64(%rsi),%xmm15
   1664 	pxor	%xmm3,%xmm2
   1665 	pxor	%xmm7,%xmm6
   1666 	pxor	%xmm11,%xmm10
   1667 	pxor	%xmm14,%xmm15
   1668 	movdqu	%xmm2,0 + 64(%rdi)
   1669 	movdqu	%xmm6,16 + 64(%rdi)
   1670 	movdqu	%xmm10,32 + 64(%rdi)
   1671 	movdqu	%xmm15,48 + 64(%rdi)
   1672 	movdqu	0 + 128(%rsi),%xmm3
   1673 	movdqu	16 + 128(%rsi),%xmm7
   1674 	movdqu	32 + 128(%rsi),%xmm11
   1675 	movdqu	48 + 128(%rsi),%xmm15
   1676 	pxor	%xmm3,%xmm1
   1677 	pxor	%xmm7,%xmm5
   1678 	pxor	%xmm11,%xmm9
   1679 	pxor	%xmm13,%xmm15
   1680 	movdqu	%xmm1,0 + 128(%rdi)
   1681 	movdqu	%xmm5,16 + 128(%rdi)
   1682 	movdqu	%xmm9,32 + 128(%rdi)
   1683 	movdqu	%xmm15,48 + 128(%rdi)
   1684 
   1685 	movdqa	80(%rbp),%xmm12
   1686 	subq	$192,%rbx
   1687 	leaq	192(%rsi),%rsi
   1688 	leaq	192(%rdi),%rdi
   1689 
   1690 
   1691 open_sse_tail_64_dec_loop:
   1692 	cmpq	$16,%rbx
   1693 	jb	1f
   1694 	subq	$16,%rbx
   1695 	movdqu	(%rsi),%xmm3
   1696 	pxor	%xmm3,%xmm0
   1697 	movdqu	%xmm0,(%rdi)
   1698 	leaq	16(%rsi),%rsi
   1699 	leaq	16(%rdi),%rdi
   1700 	movdqa	%xmm4,%xmm0
   1701 	movdqa	%xmm8,%xmm4
   1702 	movdqa	%xmm12,%xmm8
   1703 	jmp	open_sse_tail_64_dec_loop
   1704 1:
   1705 	movdqa	%xmm0,%xmm1
   1706 
   1707 
   1708 open_sse_tail_16:
   1709 	testq	%rbx,%rbx
   1710 	jz	open_sse_finalize
   1711 
   1712 
   1713 
   1714 	pxor	%xmm3,%xmm3
   1715 	leaq	-1(%rsi,%rbx), %rsi
   1716 	movq	%rbx,%r8
   1717 2:
   1718 	pslldq	$1,%xmm3
   1719 	pinsrb	$0,(%rsi),%xmm3
   1720 	subq	$1,%rsi
   1721 	subq	$1,%r8
   1722 	jnz	2b
   1723 
   1724 3:
   1725 .byte	102,73,15,126,221
   1726 	pextrq	$1,%xmm3,%r14
   1727 
   1728 	pxor	%xmm1,%xmm3
   1729 
   1730 
   1731 2:
   1732 	pextrb	$0,%xmm3,(%rdi)
   1733 	psrldq	$1,%xmm3
   1734 	addq	$1,%rdi
   1735 	subq	$1,%rbx
   1736 	jne	2b
   1737 
   1738 	addq	%r13,%r10
   1739 	adcq	%r14,%r11
   1740 	adcq	$1,%r12
   1741 	movq	0+0(%rbp),%rax
   1742 	movq	%rax,%r15
   1743 	mulq	%r10
   1744 	movq	%rax,%r13
   1745 	movq	%rdx,%r14
   1746 	movq	0+0(%rbp),%rax
   1747 	mulq	%r11
   1748 	imulq	%r12,%r15
   1749 	addq	%rax,%r14
   1750 	adcq	%rdx,%r15
   1751 	movq	8+0(%rbp),%rax
   1752 	movq	%rax,%r9
   1753 	mulq	%r10
   1754 	addq	%rax,%r14
   1755 	adcq	$0,%rdx
   1756 	movq	%rdx,%r10
   1757 	movq	8+0(%rbp),%rax
   1758 	mulq	%r11
   1759 	addq	%rax,%r15
   1760 	adcq	$0,%rdx
   1761 	imulq	%r12,%r9
   1762 	addq	%r10,%r15
   1763 	adcq	%rdx,%r9
   1764 	movq	%r13,%r10
   1765 	movq	%r14,%r11
   1766 	movq	%r15,%r12
   1767 	andq	$3,%r12
   1768 	movq	%r15,%r13
   1769 	andq	$-4,%r13
   1770 	movq	%r9,%r14
   1771 	shrdq	$2,%r9,%r15
   1772 	shrq	$2,%r9
   1773 	addq	%r13,%r10
   1774 	adcq	%r14,%r11
   1775 	adcq	$0,%r12
   1776 	addq	%r15,%r10
   1777 	adcq	%r9,%r11
   1778 	adcq	$0,%r12
   1779 
   1780 
   1781 open_sse_finalize:
   1782 	addq	32(%rbp),%r10
   1783 	adcq	8+32(%rbp),%r11
   1784 	adcq	$1,%r12
   1785 	movq	0+0(%rbp),%rax
   1786 	movq	%rax,%r15
   1787 	mulq	%r10
   1788 	movq	%rax,%r13
   1789 	movq	%rdx,%r14
   1790 	movq	0+0(%rbp),%rax
   1791 	mulq	%r11
   1792 	imulq	%r12,%r15
   1793 	addq	%rax,%r14
   1794 	adcq	%rdx,%r15
   1795 	movq	8+0(%rbp),%rax
   1796 	movq	%rax,%r9
   1797 	mulq	%r10
   1798 	addq	%rax,%r14
   1799 	adcq	$0,%rdx
   1800 	movq	%rdx,%r10
   1801 	movq	8+0(%rbp),%rax
   1802 	mulq	%r11
   1803 	addq	%rax,%r15
   1804 	adcq	$0,%rdx
   1805 	imulq	%r12,%r9
   1806 	addq	%r10,%r15
   1807 	adcq	%rdx,%r9
   1808 	movq	%r13,%r10
   1809 	movq	%r14,%r11
   1810 	movq	%r15,%r12
   1811 	andq	$3,%r12
   1812 	movq	%r15,%r13
   1813 	andq	$-4,%r13
   1814 	movq	%r9,%r14
   1815 	shrdq	$2,%r9,%r15
   1816 	shrq	$2,%r9
   1817 	addq	%r13,%r10
   1818 	adcq	%r14,%r11
   1819 	adcq	$0,%r12
   1820 	addq	%r15,%r10
   1821 	adcq	%r9,%r11
   1822 	adcq	$0,%r12
   1823 
   1824 
   1825 	movq	%r10,%r13
   1826 	movq	%r11,%r14
   1827 	movq	%r12,%r15
   1828 	subq	$-5,%r10
   1829 	sbbq	$-1,%r11
   1830 	sbbq	$3,%r12
   1831 	cmovcq	%r13,%r10
   1832 	cmovcq	%r14,%r11
   1833 	cmovcq	%r15,%r12
   1834 
   1835 	addq	0+16(%rbp),%r10
   1836 	adcq	8+16(%rbp),%r11
   1837 
   1838 	addq	$288 + 32,%rsp
   1839 .cfi_adjust_cfa_offset	-(288 + 32)
   1840 	popq	%r9
   1841 .cfi_adjust_cfa_offset	-8
   1842 	movq	%r10,(%r9)
   1843 	movq	%r11,8(%r9)
   1844 
   1845 	popq	%r15
   1846 .cfi_adjust_cfa_offset	-8
   1847 	popq	%r14
   1848 .cfi_adjust_cfa_offset	-8
   1849 	popq	%r13
   1850 .cfi_adjust_cfa_offset	-8
   1851 	popq	%r12
   1852 .cfi_adjust_cfa_offset	-8
   1853 	popq	%rbx
   1854 .cfi_adjust_cfa_offset	-8
   1855 	popq	%rbp
   1856 .cfi_adjust_cfa_offset	-8
   1857 	.byte	0xf3,0xc3
   1858 .cfi_adjust_cfa_offset	(8 * 6) + 288 + 32
   1859 
   1860 open_sse_128:
   1861 	movdqu	.chacha20_consts(%rip),%xmm0
   1862 	movdqa	%xmm0,%xmm1
   1863 	movdqa	%xmm0,%xmm2
   1864 	movdqu	0(%r9),%xmm4
   1865 	movdqa	%xmm4,%xmm5
   1866 	movdqa	%xmm4,%xmm6
   1867 	movdqu	16(%r9),%xmm8
   1868 	movdqa	%xmm8,%xmm9
   1869 	movdqa	%xmm8,%xmm10
   1870 	movdqu	32(%r9),%xmm12
   1871 	movdqa	%xmm12,%xmm13
   1872 	paddd	.sse_inc(%rip),%xmm13
   1873 	movdqa	%xmm13,%xmm14
   1874 	paddd	.sse_inc(%rip),%xmm14
   1875 	movdqa	%xmm4,%xmm7
   1876 	movdqa	%xmm8,%xmm11
   1877 	movdqa	%xmm13,%xmm15
   1878 	movq	$10,%r10
   1879 1:
   1880 	paddd	%xmm4,%xmm0
   1881 	pxor	%xmm0,%xmm12
   1882 	pshufb	.rol16(%rip),%xmm12
   1883 	paddd	%xmm12,%xmm8
   1884 	pxor	%xmm8,%xmm4
   1885 	movdqa	%xmm4,%xmm3
   1886 	pslld	$12,%xmm3
   1887 	psrld	$20,%xmm4
   1888 	pxor	%xmm3,%xmm4
   1889 	paddd	%xmm4,%xmm0
   1890 	pxor	%xmm0,%xmm12
   1891 	pshufb	.rol8(%rip),%xmm12
   1892 	paddd	%xmm12,%xmm8
   1893 	pxor	%xmm8,%xmm4
   1894 	movdqa	%xmm4,%xmm3
   1895 	pslld	$7,%xmm3
   1896 	psrld	$25,%xmm4
   1897 	pxor	%xmm3,%xmm4
   1898 .byte	102,15,58,15,228,4
   1899 .byte	102,69,15,58,15,192,8
   1900 .byte	102,69,15,58,15,228,12
   1901 	paddd	%xmm5,%xmm1
   1902 	pxor	%xmm1,%xmm13
   1903 	pshufb	.rol16(%rip),%xmm13
   1904 	paddd	%xmm13,%xmm9
   1905 	pxor	%xmm9,%xmm5
   1906 	movdqa	%xmm5,%xmm3
   1907 	pslld	$12,%xmm3
   1908 	psrld	$20,%xmm5
   1909 	pxor	%xmm3,%xmm5
   1910 	paddd	%xmm5,%xmm1
   1911 	pxor	%xmm1,%xmm13
   1912 	pshufb	.rol8(%rip),%xmm13
   1913 	paddd	%xmm13,%xmm9
   1914 	pxor	%xmm9,%xmm5
   1915 	movdqa	%xmm5,%xmm3
   1916 	pslld	$7,%xmm3
   1917 	psrld	$25,%xmm5
   1918 	pxor	%xmm3,%xmm5
   1919 .byte	102,15,58,15,237,4
   1920 .byte	102,69,15,58,15,201,8
   1921 .byte	102,69,15,58,15,237,12
   1922 	paddd	%xmm6,%xmm2
   1923 	pxor	%xmm2,%xmm14
   1924 	pshufb	.rol16(%rip),%xmm14
   1925 	paddd	%xmm14,%xmm10
   1926 	pxor	%xmm10,%xmm6
   1927 	movdqa	%xmm6,%xmm3
   1928 	pslld	$12,%xmm3
   1929 	psrld	$20,%xmm6
   1930 	pxor	%xmm3,%xmm6
   1931 	paddd	%xmm6,%xmm2
   1932 	pxor	%xmm2,%xmm14
   1933 	pshufb	.rol8(%rip),%xmm14
   1934 	paddd	%xmm14,%xmm10
   1935 	pxor	%xmm10,%xmm6
   1936 	movdqa	%xmm6,%xmm3
   1937 	pslld	$7,%xmm3
   1938 	psrld	$25,%xmm6
   1939 	pxor	%xmm3,%xmm6
   1940 .byte	102,15,58,15,246,4
   1941 .byte	102,69,15,58,15,210,8
   1942 .byte	102,69,15,58,15,246,12
   1943 	paddd	%xmm4,%xmm0
   1944 	pxor	%xmm0,%xmm12
   1945 	pshufb	.rol16(%rip),%xmm12
   1946 	paddd	%xmm12,%xmm8
   1947 	pxor	%xmm8,%xmm4
   1948 	movdqa	%xmm4,%xmm3
   1949 	pslld	$12,%xmm3
   1950 	psrld	$20,%xmm4
   1951 	pxor	%xmm3,%xmm4
   1952 	paddd	%xmm4,%xmm0
   1953 	pxor	%xmm0,%xmm12
   1954 	pshufb	.rol8(%rip),%xmm12
   1955 	paddd	%xmm12,%xmm8
   1956 	pxor	%xmm8,%xmm4
   1957 	movdqa	%xmm4,%xmm3
   1958 	pslld	$7,%xmm3
   1959 	psrld	$25,%xmm4
   1960 	pxor	%xmm3,%xmm4
   1961 .byte	102,15,58,15,228,12
   1962 .byte	102,69,15,58,15,192,8
   1963 .byte	102,69,15,58,15,228,4
   1964 	paddd	%xmm5,%xmm1
   1965 	pxor	%xmm1,%xmm13
   1966 	pshufb	.rol16(%rip),%xmm13
   1967 	paddd	%xmm13,%xmm9
   1968 	pxor	%xmm9,%xmm5
   1969 	movdqa	%xmm5,%xmm3
   1970 	pslld	$12,%xmm3
   1971 	psrld	$20,%xmm5
   1972 	pxor	%xmm3,%xmm5
   1973 	paddd	%xmm5,%xmm1
   1974 	pxor	%xmm1,%xmm13
   1975 	pshufb	.rol8(%rip),%xmm13
   1976 	paddd	%xmm13,%xmm9
   1977 	pxor	%xmm9,%xmm5
   1978 	movdqa	%xmm5,%xmm3
   1979 	pslld	$7,%xmm3
   1980 	psrld	$25,%xmm5
   1981 	pxor	%xmm3,%xmm5
   1982 .byte	102,15,58,15,237,12
   1983 .byte	102,69,15,58,15,201,8
   1984 .byte	102,69,15,58,15,237,4
   1985 	paddd	%xmm6,%xmm2
   1986 	pxor	%xmm2,%xmm14
   1987 	pshufb	.rol16(%rip),%xmm14
   1988 	paddd	%xmm14,%xmm10
   1989 	pxor	%xmm10,%xmm6
   1990 	movdqa	%xmm6,%xmm3
   1991 	pslld	$12,%xmm3
   1992 	psrld	$20,%xmm6
   1993 	pxor	%xmm3,%xmm6
   1994 	paddd	%xmm6,%xmm2
   1995 	pxor	%xmm2,%xmm14
   1996 	pshufb	.rol8(%rip),%xmm14
   1997 	paddd	%xmm14,%xmm10
   1998 	pxor	%xmm10,%xmm6
   1999 	movdqa	%xmm6,%xmm3
   2000 	pslld	$7,%xmm3
   2001 	psrld	$25,%xmm6
   2002 	pxor	%xmm3,%xmm6
   2003 .byte	102,15,58,15,246,12
   2004 .byte	102,69,15,58,15,210,8
   2005 .byte	102,69,15,58,15,246,4
   2006 
   2007 	decq	%r10
   2008 	jnz	1b
   2009 	paddd	.chacha20_consts(%rip),%xmm0
   2010 	paddd	.chacha20_consts(%rip),%xmm1
   2011 	paddd	.chacha20_consts(%rip),%xmm2
   2012 	paddd	%xmm7,%xmm4
   2013 	paddd	%xmm7,%xmm5
   2014 	paddd	%xmm7,%xmm6
   2015 	paddd	%xmm11,%xmm9
   2016 	paddd	%xmm11,%xmm10
   2017 	paddd	%xmm15,%xmm13
   2018 	paddd	.sse_inc(%rip),%xmm15
   2019 	paddd	%xmm15,%xmm14
   2020 
   2021 	pand	.clamp(%rip),%xmm0
   2022 	movdqa	%xmm0,0(%rbp)
   2023 	movdqa	%xmm4,16(%rbp)
   2024 
   2025 	movq	%r8,%r8
   2026 	call	poly_hash_ad_internal
   2027 1:
   2028 	cmpq	$16,%rbx
   2029 	jb	open_sse_tail_16
   2030 	subq	$16,%rbx
   2031 	addq	0(%rsi),%r10
   2032 	adcq	8+0(%rsi),%r11
   2033 	adcq	$1,%r12
   2034 
   2035 
   2036 	movdqu	0(%rsi),%xmm3
   2037 	pxor	%xmm3,%xmm1
   2038 	movdqu	%xmm1,0(%rdi)
   2039 	leaq	16(%rsi),%rsi
   2040 	leaq	16(%rdi),%rdi
   2041 	movq	0+0(%rbp),%rax
   2042 	movq	%rax,%r15
   2043 	mulq	%r10
   2044 	movq	%rax,%r13
   2045 	movq	%rdx,%r14
   2046 	movq	0+0(%rbp),%rax
   2047 	mulq	%r11
   2048 	imulq	%r12,%r15
   2049 	addq	%rax,%r14
   2050 	adcq	%rdx,%r15
   2051 	movq	8+0(%rbp),%rax
   2052 	movq	%rax,%r9
   2053 	mulq	%r10
   2054 	addq	%rax,%r14
   2055 	adcq	$0,%rdx
   2056 	movq	%rdx,%r10
   2057 	movq	8+0(%rbp),%rax
   2058 	mulq	%r11
   2059 	addq	%rax,%r15
   2060 	adcq	$0,%rdx
   2061 	imulq	%r12,%r9
   2062 	addq	%r10,%r15
   2063 	adcq	%rdx,%r9
   2064 	movq	%r13,%r10
   2065 	movq	%r14,%r11
   2066 	movq	%r15,%r12
   2067 	andq	$3,%r12
   2068 	movq	%r15,%r13
   2069 	andq	$-4,%r13
   2070 	movq	%r9,%r14
   2071 	shrdq	$2,%r9,%r15
   2072 	shrq	$2,%r9
   2073 	addq	%r13,%r10
   2074 	adcq	%r14,%r11
   2075 	adcq	$0,%r12
   2076 	addq	%r15,%r10
   2077 	adcq	%r9,%r11
   2078 	adcq	$0,%r12
   2079 
   2080 
   2081 	movdqa	%xmm5,%xmm1
   2082 	movdqa	%xmm9,%xmm5
   2083 	movdqa	%xmm13,%xmm9
   2084 	movdqa	%xmm2,%xmm13
   2085 	movdqa	%xmm6,%xmm2
   2086 	movdqa	%xmm10,%xmm6
   2087 	movdqa	%xmm14,%xmm10
   2088 	jmp	1b
   2089 	jmp	open_sse_tail_16
   2090 .size	chacha20_poly1305_open, .-chacha20_poly1305_open
   2091 .cfi_endproc
   2092 
   2093 
   2094 
   2095 
   2096 .globl	chacha20_poly1305_seal
   2097 .hidden chacha20_poly1305_seal
   2098 .type	chacha20_poly1305_seal,@function
   2099 .align	64
   2100 chacha20_poly1305_seal:
   2101 .cfi_startproc
   2102 	pushq	%rbp
   2103 .cfi_adjust_cfa_offset	8
   2104 	pushq	%rbx
   2105 .cfi_adjust_cfa_offset	8
   2106 	pushq	%r12
   2107 .cfi_adjust_cfa_offset	8
   2108 	pushq	%r13
   2109 .cfi_adjust_cfa_offset	8
   2110 	pushq	%r14
   2111 .cfi_adjust_cfa_offset	8
   2112 	pushq	%r15
   2113 .cfi_adjust_cfa_offset	8
   2114 
   2115 
   2116 	pushq	%r9
   2117 .cfi_adjust_cfa_offset	8
   2118 	subq	$288 + 32,%rsp
   2119 .cfi_adjust_cfa_offset	288 + 32
   2120 .cfi_offset	rbp, -16
   2121 .cfi_offset	rbx, -24
   2122 .cfi_offset	r12, -32
   2123 .cfi_offset	r13, -40
   2124 .cfi_offset	r14, -48
   2125 .cfi_offset	r15, -56
   2126 	leaq	32(%rsp),%rbp
   2127 	andq	$-32,%rbp
   2128 	movq	56(%r9),%rbx
   2129 	addq	%rdx,%rbx
   2130 	movq	%rbx,8+32(%rbp)
   2131 	movq	%r8,0+32(%rbp)
   2132 	movq	%rdx,%rbx
   2133 
   2134 	movl	OPENSSL_ia32cap_P+8(%rip),%eax
   2135 	andl	$288,%eax
   2136 	xorl	$288,%eax
   2137 	jz	chacha20_poly1305_seal_avx2
   2138 
   2139 	cmpq	$128,%rbx
   2140 	jbe	seal_sse_128
   2141 
   2142 	movdqa	.chacha20_consts(%rip),%xmm0
   2143 	movdqu	0(%r9),%xmm4
   2144 	movdqu	16(%r9),%xmm8
   2145 	movdqu	32(%r9),%xmm12
   2146 	movdqa	%xmm0,%xmm1
   2147 	movdqa	%xmm0,%xmm2
   2148 	movdqa	%xmm0,%xmm3
   2149 	movdqa	%xmm4,%xmm5
   2150 	movdqa	%xmm4,%xmm6
   2151 	movdqa	%xmm4,%xmm7
   2152 	movdqa	%xmm8,%xmm9
   2153 	movdqa	%xmm8,%xmm10
   2154 	movdqa	%xmm8,%xmm11
   2155 	movdqa	%xmm12,%xmm15
   2156 	paddd	.sse_inc(%rip),%xmm12
   2157 	movdqa	%xmm12,%xmm14
   2158 	paddd	.sse_inc(%rip),%xmm12
   2159 	movdqa	%xmm12,%xmm13
   2160 	paddd	.sse_inc(%rip),%xmm12
   2161 
   2162 	movdqa	%xmm4,48(%rbp)
   2163 	movdqa	%xmm8,64(%rbp)
   2164 	movdqa	%xmm12,96(%rbp)
   2165 	movdqa	%xmm13,112(%rbp)
   2166 	movdqa	%xmm14,128(%rbp)
   2167 	movdqa	%xmm15,144(%rbp)
   2168 	movq	$10,%r10
   2169 1:
   2170 	movdqa	%xmm8,80(%rbp)
   2171 	movdqa	.rol16(%rip),%xmm8
   2172 	paddd	%xmm7,%xmm3
   2173 	paddd	%xmm6,%xmm2
   2174 	paddd	%xmm5,%xmm1
   2175 	paddd	%xmm4,%xmm0
   2176 	pxor	%xmm3,%xmm15
   2177 	pxor	%xmm2,%xmm14
   2178 	pxor	%xmm1,%xmm13
   2179 	pxor	%xmm0,%xmm12
   2180 .byte	102,69,15,56,0,248
   2181 .byte	102,69,15,56,0,240
   2182 .byte	102,69,15,56,0,232
   2183 .byte	102,69,15,56,0,224
   2184 	movdqa	80(%rbp),%xmm8
   2185 	paddd	%xmm15,%xmm11
   2186 	paddd	%xmm14,%xmm10
   2187 	paddd	%xmm13,%xmm9
   2188 	paddd	%xmm12,%xmm8
   2189 	pxor	%xmm11,%xmm7
   2190 	pxor	%xmm10,%xmm6
   2191 	pxor	%xmm9,%xmm5
   2192 	pxor	%xmm8,%xmm4
   2193 	movdqa	%xmm8,80(%rbp)
   2194 	movdqa	%xmm7,%xmm8
   2195 	psrld	$20,%xmm8
   2196 	pslld	$32-20,%xmm7
   2197 	pxor	%xmm8,%xmm7
   2198 	movdqa	%xmm6,%xmm8
   2199 	psrld	$20,%xmm8
   2200 	pslld	$32-20,%xmm6
   2201 	pxor	%xmm8,%xmm6
   2202 	movdqa	%xmm5,%xmm8
   2203 	psrld	$20,%xmm8
   2204 	pslld	$32-20,%xmm5
   2205 	pxor	%xmm8,%xmm5
   2206 	movdqa	%xmm4,%xmm8
   2207 	psrld	$20,%xmm8
   2208 	pslld	$32-20,%xmm4
   2209 	pxor	%xmm8,%xmm4
   2210 	movdqa	.rol8(%rip),%xmm8
   2211 	paddd	%xmm7,%xmm3
   2212 	paddd	%xmm6,%xmm2
   2213 	paddd	%xmm5,%xmm1
   2214 	paddd	%xmm4,%xmm0
   2215 	pxor	%xmm3,%xmm15
   2216 	pxor	%xmm2,%xmm14
   2217 	pxor	%xmm1,%xmm13
   2218 	pxor	%xmm0,%xmm12
   2219 .byte	102,69,15,56,0,248
   2220 .byte	102,69,15,56,0,240
   2221 .byte	102,69,15,56,0,232
   2222 .byte	102,69,15,56,0,224
   2223 	movdqa	80(%rbp),%xmm8
   2224 	paddd	%xmm15,%xmm11
   2225 	paddd	%xmm14,%xmm10
   2226 	paddd	%xmm13,%xmm9
   2227 	paddd	%xmm12,%xmm8
   2228 	pxor	%xmm11,%xmm7
   2229 	pxor	%xmm10,%xmm6
   2230 	pxor	%xmm9,%xmm5
   2231 	pxor	%xmm8,%xmm4
   2232 	movdqa	%xmm8,80(%rbp)
   2233 	movdqa	%xmm7,%xmm8
   2234 	psrld	$25,%xmm8
   2235 	pslld	$32-25,%xmm7
   2236 	pxor	%xmm8,%xmm7
   2237 	movdqa	%xmm6,%xmm8
   2238 	psrld	$25,%xmm8
   2239 	pslld	$32-25,%xmm6
   2240 	pxor	%xmm8,%xmm6
   2241 	movdqa	%xmm5,%xmm8
   2242 	psrld	$25,%xmm8
   2243 	pslld	$32-25,%xmm5
   2244 	pxor	%xmm8,%xmm5
   2245 	movdqa	%xmm4,%xmm8
   2246 	psrld	$25,%xmm8
   2247 	pslld	$32-25,%xmm4
   2248 	pxor	%xmm8,%xmm4
   2249 	movdqa	80(%rbp),%xmm8
   2250 .byte	102,15,58,15,255,4
   2251 .byte	102,69,15,58,15,219,8
   2252 .byte	102,69,15,58,15,255,12
   2253 .byte	102,15,58,15,246,4
   2254 .byte	102,69,15,58,15,210,8
   2255 .byte	102,69,15,58,15,246,12
   2256 .byte	102,15,58,15,237,4
   2257 .byte	102,69,15,58,15,201,8
   2258 .byte	102,69,15,58,15,237,12
   2259 .byte	102,15,58,15,228,4
   2260 .byte	102,69,15,58,15,192,8
   2261 .byte	102,69,15,58,15,228,12
   2262 	movdqa	%xmm8,80(%rbp)
   2263 	movdqa	.rol16(%rip),%xmm8
   2264 	paddd	%xmm7,%xmm3
   2265 	paddd	%xmm6,%xmm2
   2266 	paddd	%xmm5,%xmm1
   2267 	paddd	%xmm4,%xmm0
   2268 	pxor	%xmm3,%xmm15
   2269 	pxor	%xmm2,%xmm14
   2270 	pxor	%xmm1,%xmm13
   2271 	pxor	%xmm0,%xmm12
   2272 .byte	102,69,15,56,0,248
   2273 .byte	102,69,15,56,0,240
   2274 .byte	102,69,15,56,0,232
   2275 .byte	102,69,15,56,0,224
   2276 	movdqa	80(%rbp),%xmm8
   2277 	paddd	%xmm15,%xmm11
   2278 	paddd	%xmm14,%xmm10
   2279 	paddd	%xmm13,%xmm9
   2280 	paddd	%xmm12,%xmm8
   2281 	pxor	%xmm11,%xmm7
   2282 	pxor	%xmm10,%xmm6
   2283 	pxor	%xmm9,%xmm5
   2284 	pxor	%xmm8,%xmm4
   2285 	movdqa	%xmm8,80(%rbp)
   2286 	movdqa	%xmm7,%xmm8
   2287 	psrld	$20,%xmm8
   2288 	pslld	$32-20,%xmm7
   2289 	pxor	%xmm8,%xmm7
   2290 	movdqa	%xmm6,%xmm8
   2291 	psrld	$20,%xmm8
   2292 	pslld	$32-20,%xmm6
   2293 	pxor	%xmm8,%xmm6
   2294 	movdqa	%xmm5,%xmm8
   2295 	psrld	$20,%xmm8
   2296 	pslld	$32-20,%xmm5
   2297 	pxor	%xmm8,%xmm5
   2298 	movdqa	%xmm4,%xmm8
   2299 	psrld	$20,%xmm8
   2300 	pslld	$32-20,%xmm4
   2301 	pxor	%xmm8,%xmm4
   2302 	movdqa	.rol8(%rip),%xmm8
   2303 	paddd	%xmm7,%xmm3
   2304 	paddd	%xmm6,%xmm2
   2305 	paddd	%xmm5,%xmm1
   2306 	paddd	%xmm4,%xmm0
   2307 	pxor	%xmm3,%xmm15
   2308 	pxor	%xmm2,%xmm14
   2309 	pxor	%xmm1,%xmm13
   2310 	pxor	%xmm0,%xmm12
   2311 .byte	102,69,15,56,0,248
   2312 .byte	102,69,15,56,0,240
   2313 .byte	102,69,15,56,0,232
   2314 .byte	102,69,15,56,0,224
   2315 	movdqa	80(%rbp),%xmm8
   2316 	paddd	%xmm15,%xmm11
   2317 	paddd	%xmm14,%xmm10
   2318 	paddd	%xmm13,%xmm9
   2319 	paddd	%xmm12,%xmm8
   2320 	pxor	%xmm11,%xmm7
   2321 	pxor	%xmm10,%xmm6
   2322 	pxor	%xmm9,%xmm5
   2323 	pxor	%xmm8,%xmm4
   2324 	movdqa	%xmm8,80(%rbp)
   2325 	movdqa	%xmm7,%xmm8
   2326 	psrld	$25,%xmm8
   2327 	pslld	$32-25,%xmm7
   2328 	pxor	%xmm8,%xmm7
   2329 	movdqa	%xmm6,%xmm8
   2330 	psrld	$25,%xmm8
   2331 	pslld	$32-25,%xmm6
   2332 	pxor	%xmm8,%xmm6
   2333 	movdqa	%xmm5,%xmm8
   2334 	psrld	$25,%xmm8
   2335 	pslld	$32-25,%xmm5
   2336 	pxor	%xmm8,%xmm5
   2337 	movdqa	%xmm4,%xmm8
   2338 	psrld	$25,%xmm8
   2339 	pslld	$32-25,%xmm4
   2340 	pxor	%xmm8,%xmm4
   2341 	movdqa	80(%rbp),%xmm8
   2342 .byte	102,15,58,15,255,12
   2343 .byte	102,69,15,58,15,219,8
   2344 .byte	102,69,15,58,15,255,4
   2345 .byte	102,15,58,15,246,12
   2346 .byte	102,69,15,58,15,210,8
   2347 .byte	102,69,15,58,15,246,4
   2348 .byte	102,15,58,15,237,12
   2349 .byte	102,69,15,58,15,201,8
   2350 .byte	102,69,15,58,15,237,4
   2351 .byte	102,15,58,15,228,12
   2352 .byte	102,69,15,58,15,192,8
   2353 .byte	102,69,15,58,15,228,4
   2354 
   2355 	decq	%r10
   2356 	jnz	1b
   2357 	paddd	.chacha20_consts(%rip),%xmm3
   2358 	paddd	48(%rbp),%xmm7
   2359 	paddd	64(%rbp),%xmm11
   2360 	paddd	144(%rbp),%xmm15
   2361 	paddd	.chacha20_consts(%rip),%xmm2
   2362 	paddd	48(%rbp),%xmm6
   2363 	paddd	64(%rbp),%xmm10
   2364 	paddd	128(%rbp),%xmm14
   2365 	paddd	.chacha20_consts(%rip),%xmm1
   2366 	paddd	48(%rbp),%xmm5
   2367 	paddd	64(%rbp),%xmm9
   2368 	paddd	112(%rbp),%xmm13
   2369 	paddd	.chacha20_consts(%rip),%xmm0
   2370 	paddd	48(%rbp),%xmm4
   2371 	paddd	64(%rbp),%xmm8
   2372 	paddd	96(%rbp),%xmm12
   2373 
   2374 
   2375 	pand	.clamp(%rip),%xmm3
   2376 	movdqa	%xmm3,0(%rbp)
   2377 	movdqa	%xmm7,16(%rbp)
   2378 
   2379 	movq	%r8,%r8
   2380 	call	poly_hash_ad_internal
   2381 	movdqu	0 + 0(%rsi),%xmm3
   2382 	movdqu	16 + 0(%rsi),%xmm7
   2383 	movdqu	32 + 0(%rsi),%xmm11
   2384 	movdqu	48 + 0(%rsi),%xmm15
   2385 	pxor	%xmm3,%xmm2
   2386 	pxor	%xmm7,%xmm6
   2387 	pxor	%xmm11,%xmm10
   2388 	pxor	%xmm14,%xmm15
   2389 	movdqu	%xmm2,0 + 0(%rdi)
   2390 	movdqu	%xmm6,16 + 0(%rdi)
   2391 	movdqu	%xmm10,32 + 0(%rdi)
   2392 	movdqu	%xmm15,48 + 0(%rdi)
   2393 	movdqu	0 + 64(%rsi),%xmm3
   2394 	movdqu	16 + 64(%rsi),%xmm7
   2395 	movdqu	32 + 64(%rsi),%xmm11
   2396 	movdqu	48 + 64(%rsi),%xmm15
   2397 	pxor	%xmm3,%xmm1
   2398 	pxor	%xmm7,%xmm5
   2399 	pxor	%xmm11,%xmm9
   2400 	pxor	%xmm13,%xmm15
   2401 	movdqu	%xmm1,0 + 64(%rdi)
   2402 	movdqu	%xmm5,16 + 64(%rdi)
   2403 	movdqu	%xmm9,32 + 64(%rdi)
   2404 	movdqu	%xmm15,48 + 64(%rdi)
   2405 
   2406 	cmpq	$192,%rbx
   2407 	ja	1f
   2408 	movq	$128,%rcx
   2409 	subq	$128,%rbx
   2410 	leaq	128(%rsi),%rsi
   2411 	jmp	seal_sse_128_seal_hash
   2412 1:
   2413 	movdqu	0 + 128(%rsi),%xmm3
   2414 	movdqu	16 + 128(%rsi),%xmm7
   2415 	movdqu	32 + 128(%rsi),%xmm11
   2416 	movdqu	48 + 128(%rsi),%xmm15
   2417 	pxor	%xmm3,%xmm0
   2418 	pxor	%xmm7,%xmm4
   2419 	pxor	%xmm11,%xmm8
   2420 	pxor	%xmm12,%xmm15
   2421 	movdqu	%xmm0,0 + 128(%rdi)
   2422 	movdqu	%xmm4,16 + 128(%rdi)
   2423 	movdqu	%xmm8,32 + 128(%rdi)
   2424 	movdqu	%xmm15,48 + 128(%rdi)
   2425 
   2426 	movq	$192,%rcx
   2427 	subq	$192,%rbx
   2428 	leaq	192(%rsi),%rsi
   2429 	movq	$2,%rcx
   2430 	movq	$8,%r8
   2431 	cmpq	$64,%rbx
   2432 	jbe	seal_sse_tail_64
   2433 	cmpq	$128,%rbx
   2434 	jbe	seal_sse_tail_128
   2435 	cmpq	$192,%rbx
   2436 	jbe	seal_sse_tail_192
   2437 
   2438 1:
   2439 	movdqa	.chacha20_consts(%rip),%xmm0
   2440 	movdqa	48(%rbp),%xmm4
   2441 	movdqa	64(%rbp),%xmm8
   2442 	movdqa	%xmm0,%xmm1
   2443 	movdqa	%xmm4,%xmm5
   2444 	movdqa	%xmm8,%xmm9
   2445 	movdqa	%xmm0,%xmm2
   2446 	movdqa	%xmm4,%xmm6
   2447 	movdqa	%xmm8,%xmm10
   2448 	movdqa	%xmm0,%xmm3
   2449 	movdqa	%xmm4,%xmm7
   2450 	movdqa	%xmm8,%xmm11
   2451 	movdqa	96(%rbp),%xmm15
   2452 	paddd	.sse_inc(%rip),%xmm15
   2453 	movdqa	%xmm15,%xmm14
   2454 	paddd	.sse_inc(%rip),%xmm14
   2455 	movdqa	%xmm14,%xmm13
   2456 	paddd	.sse_inc(%rip),%xmm13
   2457 	movdqa	%xmm13,%xmm12
   2458 	paddd	.sse_inc(%rip),%xmm12
   2459 	movdqa	%xmm12,96(%rbp)
   2460 	movdqa	%xmm13,112(%rbp)
   2461 	movdqa	%xmm14,128(%rbp)
   2462 	movdqa	%xmm15,144(%rbp)
   2463 
   2464 2:
   2465 	movdqa	%xmm8,80(%rbp)
   2466 	movdqa	.rol16(%rip),%xmm8
   2467 	paddd	%xmm7,%xmm3
   2468 	paddd	%xmm6,%xmm2
   2469 	paddd	%xmm5,%xmm1
   2470 	paddd	%xmm4,%xmm0
   2471 	pxor	%xmm3,%xmm15
   2472 	pxor	%xmm2,%xmm14
   2473 	pxor	%xmm1,%xmm13
   2474 	pxor	%xmm0,%xmm12
   2475 .byte	102,69,15,56,0,248
   2476 .byte	102,69,15,56,0,240
   2477 .byte	102,69,15,56,0,232
   2478 .byte	102,69,15,56,0,224
   2479 	movdqa	80(%rbp),%xmm8
   2480 	paddd	%xmm15,%xmm11
   2481 	paddd	%xmm14,%xmm10
   2482 	paddd	%xmm13,%xmm9
   2483 	paddd	%xmm12,%xmm8
   2484 	pxor	%xmm11,%xmm7
   2485 	addq	0(%rdi),%r10
   2486 	adcq	8+0(%rdi),%r11
   2487 	adcq	$1,%r12
   2488 	pxor	%xmm10,%xmm6
   2489 	pxor	%xmm9,%xmm5
   2490 	pxor	%xmm8,%xmm4
   2491 	movdqa	%xmm8,80(%rbp)
   2492 	movdqa	%xmm7,%xmm8
   2493 	psrld	$20,%xmm8
   2494 	pslld	$32-20,%xmm7
   2495 	pxor	%xmm8,%xmm7
   2496 	movdqa	%xmm6,%xmm8
   2497 	psrld	$20,%xmm8
   2498 	pslld	$32-20,%xmm6
   2499 	pxor	%xmm8,%xmm6
   2500 	movdqa	%xmm5,%xmm8
   2501 	psrld	$20,%xmm8
   2502 	pslld	$32-20,%xmm5
   2503 	pxor	%xmm8,%xmm5
   2504 	movdqa	%xmm4,%xmm8
   2505 	psrld	$20,%xmm8
   2506 	pslld	$32-20,%xmm4
   2507 	pxor	%xmm8,%xmm4
   2508 	movq	0+0(%rbp),%rax
   2509 	movq	%rax,%r15
   2510 	mulq	%r10
   2511 	movq	%rax,%r13
   2512 	movq	%rdx,%r14
   2513 	movq	0+0(%rbp),%rax
   2514 	mulq	%r11
   2515 	imulq	%r12,%r15
   2516 	addq	%rax,%r14
   2517 	adcq	%rdx,%r15
   2518 	movdqa	.rol8(%rip),%xmm8
   2519 	paddd	%xmm7,%xmm3
   2520 	paddd	%xmm6,%xmm2
   2521 	paddd	%xmm5,%xmm1
   2522 	paddd	%xmm4,%xmm0
   2523 	pxor	%xmm3,%xmm15
   2524 	pxor	%xmm2,%xmm14
   2525 	pxor	%xmm1,%xmm13
   2526 	pxor	%xmm0,%xmm12
   2527 .byte	102,69,15,56,0,248
   2528 .byte	102,69,15,56,0,240
   2529 .byte	102,69,15,56,0,232
   2530 .byte	102,69,15,56,0,224
   2531 	movdqa	80(%rbp),%xmm8
   2532 	paddd	%xmm15,%xmm11
   2533 	paddd	%xmm14,%xmm10
   2534 	paddd	%xmm13,%xmm9
   2535 	paddd	%xmm12,%xmm8
   2536 	pxor	%xmm11,%xmm7
   2537 	pxor	%xmm10,%xmm6
   2538 	movq	8+0(%rbp),%rax
   2539 	movq	%rax,%r9
   2540 	mulq	%r10
   2541 	addq	%rax,%r14
   2542 	adcq	$0,%rdx
   2543 	movq	%rdx,%r10
   2544 	movq	8+0(%rbp),%rax
   2545 	mulq	%r11
   2546 	addq	%rax,%r15
   2547 	adcq	$0,%rdx
   2548 	pxor	%xmm9,%xmm5
   2549 	pxor	%xmm8,%xmm4
   2550 	movdqa	%xmm8,80(%rbp)
   2551 	movdqa	%xmm7,%xmm8
   2552 	psrld	$25,%xmm8
   2553 	pslld	$32-25,%xmm7
   2554 	pxor	%xmm8,%xmm7
   2555 	movdqa	%xmm6,%xmm8
   2556 	psrld	$25,%xmm8
   2557 	pslld	$32-25,%xmm6
   2558 	pxor	%xmm8,%xmm6
   2559 	movdqa	%xmm5,%xmm8
   2560 	psrld	$25,%xmm8
   2561 	pslld	$32-25,%xmm5
   2562 	pxor	%xmm8,%xmm5
   2563 	movdqa	%xmm4,%xmm8
   2564 	psrld	$25,%xmm8
   2565 	pslld	$32-25,%xmm4
   2566 	pxor	%xmm8,%xmm4
   2567 	movdqa	80(%rbp),%xmm8
   2568 	imulq	%r12,%r9
   2569 	addq	%r10,%r15
   2570 	adcq	%rdx,%r9
   2571 .byte	102,15,58,15,255,4
   2572 .byte	102,69,15,58,15,219,8
   2573 .byte	102,69,15,58,15,255,12
   2574 .byte	102,15,58,15,246,4
   2575 .byte	102,69,15,58,15,210,8
   2576 .byte	102,69,15,58,15,246,12
   2577 .byte	102,15,58,15,237,4
   2578 .byte	102,69,15,58,15,201,8
   2579 .byte	102,69,15,58,15,237,12
   2580 .byte	102,15,58,15,228,4
   2581 .byte	102,69,15,58,15,192,8
   2582 .byte	102,69,15,58,15,228,12
   2583 	movdqa	%xmm8,80(%rbp)
   2584 	movdqa	.rol16(%rip),%xmm8
   2585 	paddd	%xmm7,%xmm3
   2586 	paddd	%xmm6,%xmm2
   2587 	paddd	%xmm5,%xmm1
   2588 	paddd	%xmm4,%xmm0
   2589 	pxor	%xmm3,%xmm15
   2590 	pxor	%xmm2,%xmm14
   2591 	movq	%r13,%r10
   2592 	movq	%r14,%r11
   2593 	movq	%r15,%r12
   2594 	andq	$3,%r12
   2595 	movq	%r15,%r13
   2596 	andq	$-4,%r13
   2597 	movq	%r9,%r14
   2598 	shrdq	$2,%r9,%r15
   2599 	shrq	$2,%r9
   2600 	addq	%r13,%r10
   2601 	adcq	%r14,%r11
   2602 	adcq	$0,%r12
   2603 	addq	%r15,%r10
   2604 	adcq	%r9,%r11
   2605 	adcq	$0,%r12
   2606 	pxor	%xmm1,%xmm13
   2607 	pxor	%xmm0,%xmm12
   2608 .byte	102,69,15,56,0,248
   2609 .byte	102,69,15,56,0,240
   2610 .byte	102,69,15,56,0,232
   2611 .byte	102,69,15,56,0,224
   2612 	movdqa	80(%rbp),%xmm8
   2613 	paddd	%xmm15,%xmm11
   2614 	paddd	%xmm14,%xmm10
   2615 	paddd	%xmm13,%xmm9
   2616 	paddd	%xmm12,%xmm8
   2617 	pxor	%xmm11,%xmm7
   2618 	pxor	%xmm10,%xmm6
   2619 	pxor	%xmm9,%xmm5
   2620 	pxor	%xmm8,%xmm4
   2621 	movdqa	%xmm8,80(%rbp)
   2622 	movdqa	%xmm7,%xmm8
   2623 	psrld	$20,%xmm8
   2624 	pslld	$32-20,%xmm7
   2625 	pxor	%xmm8,%xmm7
   2626 	movdqa	%xmm6,%xmm8
   2627 	psrld	$20,%xmm8
   2628 	pslld	$32-20,%xmm6
   2629 	pxor	%xmm8,%xmm6
   2630 	movdqa	%xmm5,%xmm8
   2631 	psrld	$20,%xmm8
   2632 	pslld	$32-20,%xmm5
   2633 	pxor	%xmm8,%xmm5
   2634 	movdqa	%xmm4,%xmm8
   2635 	psrld	$20,%xmm8
   2636 	pslld	$32-20,%xmm4
   2637 	pxor	%xmm8,%xmm4
   2638 	movdqa	.rol8(%rip),%xmm8
   2639 	paddd	%xmm7,%xmm3
   2640 	paddd	%xmm6,%xmm2
   2641 	paddd	%xmm5,%xmm1
   2642 	paddd	%xmm4,%xmm0
   2643 	pxor	%xmm3,%xmm15
   2644 	pxor	%xmm2,%xmm14
   2645 	pxor	%xmm1,%xmm13
   2646 	pxor	%xmm0,%xmm12
   2647 .byte	102,69,15,56,0,248
   2648 .byte	102,69,15,56,0,240
   2649 .byte	102,69,15,56,0,232
   2650 .byte	102,69,15,56,0,224
   2651 	movdqa	80(%rbp),%xmm8
   2652 	paddd	%xmm15,%xmm11
   2653 	paddd	%xmm14,%xmm10
   2654 	paddd	%xmm13,%xmm9
   2655 	paddd	%xmm12,%xmm8
   2656 	pxor	%xmm11,%xmm7
   2657 	pxor	%xmm10,%xmm6
   2658 	pxor	%xmm9,%xmm5
   2659 	pxor	%xmm8,%xmm4
   2660 	movdqa	%xmm8,80(%rbp)
   2661 	movdqa	%xmm7,%xmm8
   2662 	psrld	$25,%xmm8
   2663 	pslld	$32-25,%xmm7
   2664 	pxor	%xmm8,%xmm7
   2665 	movdqa	%xmm6,%xmm8
   2666 	psrld	$25,%xmm8
   2667 	pslld	$32-25,%xmm6
   2668 	pxor	%xmm8,%xmm6
   2669 	movdqa	%xmm5,%xmm8
   2670 	psrld	$25,%xmm8
   2671 	pslld	$32-25,%xmm5
   2672 	pxor	%xmm8,%xmm5
   2673 	movdqa	%xmm4,%xmm8
   2674 	psrld	$25,%xmm8
   2675 	pslld	$32-25,%xmm4
   2676 	pxor	%xmm8,%xmm4
   2677 	movdqa	80(%rbp),%xmm8
   2678 .byte	102,15,58,15,255,12
   2679 .byte	102,69,15,58,15,219,8
   2680 .byte	102,69,15,58,15,255,4
   2681 .byte	102,15,58,15,246,12
   2682 .byte	102,69,15,58,15,210,8
   2683 .byte	102,69,15,58,15,246,4
   2684 .byte	102,15,58,15,237,12
   2685 .byte	102,69,15,58,15,201,8
   2686 .byte	102,69,15,58,15,237,4
   2687 .byte	102,15,58,15,228,12
   2688 .byte	102,69,15,58,15,192,8
   2689 .byte	102,69,15,58,15,228,4
   2690 
   2691 	leaq	16(%rdi),%rdi
   2692 	decq	%r8
   2693 	jge	2b
   2694 	addq	0(%rdi),%r10
   2695 	adcq	8+0(%rdi),%r11
   2696 	adcq	$1,%r12
   2697 	movq	0+0(%rbp),%rax
   2698 	movq	%rax,%r15
   2699 	mulq	%r10
   2700 	movq	%rax,%r13
   2701 	movq	%rdx,%r14
   2702 	movq	0+0(%rbp),%rax
   2703 	mulq	%r11
   2704 	imulq	%r12,%r15
   2705 	addq	%rax,%r14
   2706 	adcq	%rdx,%r15
   2707 	movq	8+0(%rbp),%rax
   2708 	movq	%rax,%r9
   2709 	mulq	%r10
   2710 	addq	%rax,%r14
   2711 	adcq	$0,%rdx
   2712 	movq	%rdx,%r10
   2713 	movq	8+0(%rbp),%rax
   2714 	mulq	%r11
   2715 	addq	%rax,%r15
   2716 	adcq	$0,%rdx
   2717 	imulq	%r12,%r9
   2718 	addq	%r10,%r15
   2719 	adcq	%rdx,%r9
   2720 	movq	%r13,%r10
   2721 	movq	%r14,%r11
   2722 	movq	%r15,%r12
   2723 	andq	$3,%r12
   2724 	movq	%r15,%r13
   2725 	andq	$-4,%r13
   2726 	movq	%r9,%r14
   2727 	shrdq	$2,%r9,%r15
   2728 	shrq	$2,%r9
   2729 	addq	%r13,%r10
   2730 	adcq	%r14,%r11
   2731 	adcq	$0,%r12
   2732 	addq	%r15,%r10
   2733 	adcq	%r9,%r11
   2734 	adcq	$0,%r12
   2735 
   2736 	leaq	16(%rdi),%rdi
   2737 	decq	%rcx
   2738 	jg	2b
   2739 	paddd	.chacha20_consts(%rip),%xmm3
   2740 	paddd	48(%rbp),%xmm7
   2741 	paddd	64(%rbp),%xmm11
   2742 	paddd	144(%rbp),%xmm15
   2743 	paddd	.chacha20_consts(%rip),%xmm2
   2744 	paddd	48(%rbp),%xmm6
   2745 	paddd	64(%rbp),%xmm10
   2746 	paddd	128(%rbp),%xmm14
   2747 	paddd	.chacha20_consts(%rip),%xmm1
   2748 	paddd	48(%rbp),%xmm5
   2749 	paddd	64(%rbp),%xmm9
   2750 	paddd	112(%rbp),%xmm13
   2751 	paddd	.chacha20_consts(%rip),%xmm0
   2752 	paddd	48(%rbp),%xmm4
   2753 	paddd	64(%rbp),%xmm8
   2754 	paddd	96(%rbp),%xmm12
   2755 
   2756 	movdqa	%xmm14,80(%rbp)
   2757 	movdqa	%xmm14,80(%rbp)
   2758 	movdqu	0 + 0(%rsi),%xmm14
   2759 	pxor	%xmm3,%xmm14
   2760 	movdqu	%xmm14,0 + 0(%rdi)
   2761 	movdqu	16 + 0(%rsi),%xmm14
   2762 	pxor	%xmm7,%xmm14
   2763 	movdqu	%xmm14,16 + 0(%rdi)
   2764 	movdqu	32 + 0(%rsi),%xmm14
   2765 	pxor	%xmm11,%xmm14
   2766 	movdqu	%xmm14,32 + 0(%rdi)
   2767 	movdqu	48 + 0(%rsi),%xmm14
   2768 	pxor	%xmm15,%xmm14
   2769 	movdqu	%xmm14,48 + 0(%rdi)
   2770 
   2771 	movdqa	80(%rbp),%xmm14
   2772 	movdqu	0 + 64(%rsi),%xmm3
   2773 	movdqu	16 + 64(%rsi),%xmm7
   2774 	movdqu	32 + 64(%rsi),%xmm11
   2775 	movdqu	48 + 64(%rsi),%xmm15
   2776 	pxor	%xmm3,%xmm2
   2777 	pxor	%xmm7,%xmm6
   2778 	pxor	%xmm11,%xmm10
   2779 	pxor	%xmm14,%xmm15
   2780 	movdqu	%xmm2,0 + 64(%rdi)
   2781 	movdqu	%xmm6,16 + 64(%rdi)
   2782 	movdqu	%xmm10,32 + 64(%rdi)
   2783 	movdqu	%xmm15,48 + 64(%rdi)
   2784 	movdqu	0 + 128(%rsi),%xmm3
   2785 	movdqu	16 + 128(%rsi),%xmm7
   2786 	movdqu	32 + 128(%rsi),%xmm11
   2787 	movdqu	48 + 128(%rsi),%xmm15
   2788 	pxor	%xmm3,%xmm1
   2789 	pxor	%xmm7,%xmm5
   2790 	pxor	%xmm11,%xmm9
   2791 	pxor	%xmm13,%xmm15
   2792 	movdqu	%xmm1,0 + 128(%rdi)
   2793 	movdqu	%xmm5,16 + 128(%rdi)
   2794 	movdqu	%xmm9,32 + 128(%rdi)
   2795 	movdqu	%xmm15,48 + 128(%rdi)
   2796 
   2797 	cmpq	$256,%rbx
   2798 	ja	3f
   2799 
   2800 	movq	$192,%rcx
   2801 	subq	$192,%rbx
   2802 	leaq	192(%rsi),%rsi
   2803 	jmp	seal_sse_128_seal_hash
   2804 3:
   2805 	movdqu	0 + 192(%rsi),%xmm3
   2806 	movdqu	16 + 192(%rsi),%xmm7
   2807 	movdqu	32 + 192(%rsi),%xmm11
   2808 	movdqu	48 + 192(%rsi),%xmm15
   2809 	pxor	%xmm3,%xmm0
   2810 	pxor	%xmm7,%xmm4
   2811 	pxor	%xmm11,%xmm8
   2812 	pxor	%xmm12,%xmm15
   2813 	movdqu	%xmm0,0 + 192(%rdi)
   2814 	movdqu	%xmm4,16 + 192(%rdi)
   2815 	movdqu	%xmm8,32 + 192(%rdi)
   2816 	movdqu	%xmm15,48 + 192(%rdi)
   2817 
   2818 	leaq	256(%rsi),%rsi
   2819 	subq	$256,%rbx
   2820 	movq	$6,%rcx
   2821 	movq	$4,%r8
   2822 	cmpq	$192,%rbx
   2823 	jg	1b
   2824 	movq	%rbx,%rcx
   2825 	testq	%rbx,%rbx
   2826 	je	seal_sse_128_seal_hash
   2827 	movq	$6,%rcx
   2828 	cmpq	$64,%rbx
   2829 	jg	3f
   2830 
   2831 seal_sse_tail_64:
   2832 	movdqa	.chacha20_consts(%rip),%xmm0
   2833 	movdqa	48(%rbp),%xmm4
   2834 	movdqa	64(%rbp),%xmm8
   2835 	movdqa	96(%rbp),%xmm12
   2836 	paddd	.sse_inc(%rip),%xmm12
   2837 	movdqa	%xmm12,96(%rbp)
   2838 
   2839 1:
   2840 	addq	0(%rdi),%r10
   2841 	adcq	8+0(%rdi),%r11
   2842 	adcq	$1,%r12
   2843 	movq	0+0(%rbp),%rax
   2844 	movq	%rax,%r15
   2845 	mulq	%r10
   2846 	movq	%rax,%r13
   2847 	movq	%rdx,%r14
   2848 	movq	0+0(%rbp),%rax
   2849 	mulq	%r11
   2850 	imulq	%r12,%r15
   2851 	addq	%rax,%r14
   2852 	adcq	%rdx,%r15
   2853 	movq	8+0(%rbp),%rax
   2854 	movq	%rax,%r9
   2855 	mulq	%r10
   2856 	addq	%rax,%r14
   2857 	adcq	$0,%rdx
   2858 	movq	%rdx,%r10
   2859 	movq	8+0(%rbp),%rax
   2860 	mulq	%r11
   2861 	addq	%rax,%r15
   2862 	adcq	$0,%rdx
   2863 	imulq	%r12,%r9
   2864 	addq	%r10,%r15
   2865 	adcq	%rdx,%r9
   2866 	movq	%r13,%r10
   2867 	movq	%r14,%r11
   2868 	movq	%r15,%r12
   2869 	andq	$3,%r12
   2870 	movq	%r15,%r13
   2871 	andq	$-4,%r13
   2872 	movq	%r9,%r14
   2873 	shrdq	$2,%r9,%r15
   2874 	shrq	$2,%r9
   2875 	addq	%r13,%r10
   2876 	adcq	%r14,%r11
   2877 	adcq	$0,%r12
   2878 	addq	%r15,%r10
   2879 	adcq	%r9,%r11
   2880 	adcq	$0,%r12
   2881 
   2882 	leaq	16(%rdi),%rdi
   2883 2:
   2884 	paddd	%xmm4,%xmm0
   2885 	pxor	%xmm0,%xmm12
   2886 	pshufb	.rol16(%rip),%xmm12
   2887 	paddd	%xmm12,%xmm8
   2888 	pxor	%xmm8,%xmm4
   2889 	movdqa	%xmm4,%xmm3
   2890 	pslld	$12,%xmm3
   2891 	psrld	$20,%xmm4
   2892 	pxor	%xmm3,%xmm4
   2893 	paddd	%xmm4,%xmm0
   2894 	pxor	%xmm0,%xmm12
   2895 	pshufb	.rol8(%rip),%xmm12
   2896 	paddd	%xmm12,%xmm8
   2897 	pxor	%xmm8,%xmm4
   2898 	movdqa	%xmm4,%xmm3
   2899 	pslld	$7,%xmm3
   2900 	psrld	$25,%xmm4
   2901 	pxor	%xmm3,%xmm4
   2902 .byte	102,15,58,15,228,4
   2903 .byte	102,69,15,58,15,192,8
   2904 .byte	102,69,15,58,15,228,12
   2905 	paddd	%xmm4,%xmm0
   2906 	pxor	%xmm0,%xmm12
   2907 	pshufb	.rol16(%rip),%xmm12
   2908 	paddd	%xmm12,%xmm8
   2909 	pxor	%xmm8,%xmm4
   2910 	movdqa	%xmm4,%xmm3
   2911 	pslld	$12,%xmm3
   2912 	psrld	$20,%xmm4
   2913 	pxor	%xmm3,%xmm4
   2914 	paddd	%xmm4,%xmm0
   2915 	pxor	%xmm0,%xmm12
   2916 	pshufb	.rol8(%rip),%xmm12
   2917 	paddd	%xmm12,%xmm8
   2918 	pxor	%xmm8,%xmm4
   2919 	movdqa	%xmm4,%xmm3
   2920 	pslld	$7,%xmm3
   2921 	psrld	$25,%xmm4
   2922 	pxor	%xmm3,%xmm4
   2923 .byte	102,15,58,15,228,12
   2924 .byte	102,69,15,58,15,192,8
   2925 .byte	102,69,15,58,15,228,4
   2926 	addq	0(%rdi),%r10
   2927 	adcq	8+0(%rdi),%r11
   2928 	adcq	$1,%r12
   2929 	movq	0+0(%rbp),%rax
   2930 	movq	%rax,%r15
   2931 	mulq	%r10
   2932 	movq	%rax,%r13
   2933 	movq	%rdx,%r14
   2934 	movq	0+0(%rbp),%rax
   2935 	mulq	%r11
   2936 	imulq	%r12,%r15
   2937 	addq	%rax,%r14
   2938 	adcq	%rdx,%r15
   2939 	movq	8+0(%rbp),%rax
   2940 	movq	%rax,%r9
   2941 	mulq	%r10
   2942 	addq	%rax,%r14
   2943 	adcq	$0,%rdx
   2944 	movq	%rdx,%r10
   2945 	movq	8+0(%rbp),%rax
   2946 	mulq	%r11
   2947 	addq	%rax,%r15
   2948 	adcq	$0,%rdx
   2949 	imulq	%r12,%r9
   2950 	addq	%r10,%r15
   2951 	adcq	%rdx,%r9
   2952 	movq	%r13,%r10
   2953 	movq	%r14,%r11
   2954 	movq	%r15,%r12
   2955 	andq	$3,%r12
   2956 	movq	%r15,%r13
   2957 	andq	$-4,%r13
   2958 	movq	%r9,%r14
   2959 	shrdq	$2,%r9,%r15
   2960 	shrq	$2,%r9
   2961 	addq	%r13,%r10
   2962 	adcq	%r14,%r11
   2963 	adcq	$0,%r12
   2964 	addq	%r15,%r10
   2965 	adcq	%r9,%r11
   2966 	adcq	$0,%r12
   2967 
   2968 	leaq	16(%rdi),%rdi
   2969 	decq	%rcx
   2970 	jg	1b
   2971 	decq	%r8
   2972 	jge	2b
   2973 	paddd	.chacha20_consts(%rip),%xmm0
   2974 	paddd	48(%rbp),%xmm4
   2975 	paddd	64(%rbp),%xmm8
   2976 	paddd	96(%rbp),%xmm12
   2977 
   2978 	jmp	seal_sse_128_seal
   2979 3:
   2980 	cmpq	$128,%rbx
   2981 	jg	3f
   2982 
   2983 seal_sse_tail_128:
   2984 	movdqa	.chacha20_consts(%rip),%xmm0
   2985 	movdqa	48(%rbp),%xmm4
   2986 	movdqa	64(%rbp),%xmm8
   2987 	movdqa	%xmm0,%xmm1
   2988 	movdqa	%xmm4,%xmm5
   2989 	movdqa	%xmm8,%xmm9
   2990 	movdqa	96(%rbp),%xmm13
   2991 	paddd	.sse_inc(%rip),%xmm13
   2992 	movdqa	%xmm13,%xmm12
   2993 	paddd	.sse_inc(%rip),%xmm12
   2994 	movdqa	%xmm12,96(%rbp)
   2995 	movdqa	%xmm13,112(%rbp)
   2996 
   2997 1:
   2998 	addq	0(%rdi),%r10
   2999 	adcq	8+0(%rdi),%r11
   3000 	adcq	$1,%r12
   3001 	movq	0+0(%rbp),%rax
   3002 	movq	%rax,%r15
   3003 	mulq	%r10
   3004 	movq	%rax,%r13
   3005 	movq	%rdx,%r14
   3006 	movq	0+0(%rbp),%rax
   3007 	mulq	%r11
   3008 	imulq	%r12,%r15
   3009 	addq	%rax,%r14
   3010 	adcq	%rdx,%r15
   3011 	movq	8+0(%rbp),%rax
   3012 	movq	%rax,%r9
   3013 	mulq	%r10
   3014 	addq	%rax,%r14
   3015 	adcq	$0,%rdx
   3016 	movq	%rdx,%r10
   3017 	movq	8+0(%rbp),%rax
   3018 	mulq	%r11
   3019 	addq	%rax,%r15
   3020 	adcq	$0,%rdx
   3021 	imulq	%r12,%r9
   3022 	addq	%r10,%r15
   3023 	adcq	%rdx,%r9
   3024 	movq	%r13,%r10
   3025 	movq	%r14,%r11
   3026 	movq	%r15,%r12
   3027 	andq	$3,%r12
   3028 	movq	%r15,%r13
   3029 	andq	$-4,%r13
   3030 	movq	%r9,%r14
   3031 	shrdq	$2,%r9,%r15
   3032 	shrq	$2,%r9
   3033 	addq	%r13,%r10
   3034 	adcq	%r14,%r11
   3035 	adcq	$0,%r12
   3036 	addq	%r15,%r10
   3037 	adcq	%r9,%r11
   3038 	adcq	$0,%r12
   3039 
   3040 	leaq	16(%rdi),%rdi
   3041 2:
   3042 	paddd	%xmm4,%xmm0
   3043 	pxor	%xmm0,%xmm12
   3044 	pshufb	.rol16(%rip),%xmm12
   3045 	paddd	%xmm12,%xmm8
   3046 	pxor	%xmm8,%xmm4
   3047 	movdqa	%xmm4,%xmm3
   3048 	pslld	$12,%xmm3
   3049 	psrld	$20,%xmm4
   3050 	pxor	%xmm3,%xmm4
   3051 	paddd	%xmm4,%xmm0
   3052 	pxor	%xmm0,%xmm12
   3053 	pshufb	.rol8(%rip),%xmm12
   3054 	paddd	%xmm12,%xmm8
   3055 	pxor	%xmm8,%xmm4
   3056 	movdqa	%xmm4,%xmm3
   3057 	pslld	$7,%xmm3
   3058 	psrld	$25,%xmm4
   3059 	pxor	%xmm3,%xmm4
   3060 .byte	102,15,58,15,228,4
   3061 .byte	102,69,15,58,15,192,8
   3062 .byte	102,69,15,58,15,228,12
   3063 	paddd	%xmm5,%xmm1
   3064 	pxor	%xmm1,%xmm13
   3065 	pshufb	.rol16(%rip),%xmm13
   3066 	paddd	%xmm13,%xmm9
   3067 	pxor	%xmm9,%xmm5
   3068 	movdqa	%xmm5,%xmm3
   3069 	pslld	$12,%xmm3
   3070 	psrld	$20,%xmm5
   3071 	pxor	%xmm3,%xmm5
   3072 	paddd	%xmm5,%xmm1
   3073 	pxor	%xmm1,%xmm13
   3074 	pshufb	.rol8(%rip),%xmm13
   3075 	paddd	%xmm13,%xmm9
   3076 	pxor	%xmm9,%xmm5
   3077 	movdqa	%xmm5,%xmm3
   3078 	pslld	$7,%xmm3
   3079 	psrld	$25,%xmm5
   3080 	pxor	%xmm3,%xmm5
   3081 .byte	102,15,58,15,237,4
   3082 .byte	102,69,15,58,15,201,8
   3083 .byte	102,69,15,58,15,237,12
   3084 	addq	0(%rdi),%r10
   3085 	adcq	8+0(%rdi),%r11
   3086 	adcq	$1,%r12
   3087 	movq	0+0(%rbp),%rax
   3088 	movq	%rax,%r15
   3089 	mulq	%r10
   3090 	movq	%rax,%r13
   3091 	movq	%rdx,%r14
   3092 	movq	0+0(%rbp),%rax
   3093 	mulq	%r11
   3094 	imulq	%r12,%r15
   3095 	addq	%rax,%r14
   3096 	adcq	%rdx,%r15
   3097 	movq	8+0(%rbp),%rax
   3098 	movq	%rax,%r9
   3099 	mulq	%r10
   3100 	addq	%rax,%r14
   3101 	adcq	$0,%rdx
   3102 	movq	%rdx,%r10
   3103 	movq	8+0(%rbp),%rax
   3104 	mulq	%r11
   3105 	addq	%rax,%r15
   3106 	adcq	$0,%rdx
   3107 	imulq	%r12,%r9
   3108 	addq	%r10,%r15
   3109 	adcq	%rdx,%r9
   3110 	movq	%r13,%r10
   3111 	movq	%r14,%r11
   3112 	movq	%r15,%r12
   3113 	andq	$3,%r12
   3114 	movq	%r15,%r13
   3115 	andq	$-4,%r13
   3116 	movq	%r9,%r14
   3117 	shrdq	$2,%r9,%r15
   3118 	shrq	$2,%r9
   3119 	addq	%r13,%r10
   3120 	adcq	%r14,%r11
   3121 	adcq	$0,%r12
   3122 	addq	%r15,%r10
   3123 	adcq	%r9,%r11
   3124 	adcq	$0,%r12
   3125 	paddd	%xmm4,%xmm0
   3126 	pxor	%xmm0,%xmm12
   3127 	pshufb	.rol16(%rip),%xmm12
   3128 	paddd	%xmm12,%xmm8
   3129 	pxor	%xmm8,%xmm4
   3130 	movdqa	%xmm4,%xmm3
   3131 	pslld	$12,%xmm3
   3132 	psrld	$20,%xmm4
   3133 	pxor	%xmm3,%xmm4
   3134 	paddd	%xmm4,%xmm0
   3135 	pxor	%xmm0,%xmm12
   3136 	pshufb	.rol8(%rip),%xmm12
   3137 	paddd	%xmm12,%xmm8
   3138 	pxor	%xmm8,%xmm4
   3139 	movdqa	%xmm4,%xmm3
   3140 	pslld	$7,%xmm3
   3141 	psrld	$25,%xmm4
   3142 	pxor	%xmm3,%xmm4
   3143 .byte	102,15,58,15,228,12
   3144 .byte	102,69,15,58,15,192,8
   3145 .byte	102,69,15,58,15,228,4
   3146 	paddd	%xmm5,%xmm1
   3147 	pxor	%xmm1,%xmm13
   3148 	pshufb	.rol16(%rip),%xmm13
   3149 	paddd	%xmm13,%xmm9
   3150 	pxor	%xmm9,%xmm5
   3151 	movdqa	%xmm5,%xmm3
   3152 	pslld	$12,%xmm3
   3153 	psrld	$20,%xmm5
   3154 	pxor	%xmm3,%xmm5
   3155 	paddd	%xmm5,%xmm1
   3156 	pxor	%xmm1,%xmm13
   3157 	pshufb	.rol8(%rip),%xmm13
   3158 	paddd	%xmm13,%xmm9
   3159 	pxor	%xmm9,%xmm5
   3160 	movdqa	%xmm5,%xmm3
   3161 	pslld	$7,%xmm3
   3162 	psrld	$25,%xmm5
   3163 	pxor	%xmm3,%xmm5
   3164 .byte	102,15,58,15,237,12
   3165 .byte	102,69,15,58,15,201,8
   3166 .byte	102,69,15,58,15,237,4
   3167 
   3168 	leaq	16(%rdi),%rdi
   3169 	decq	%rcx
   3170 	jg	1b
   3171 	decq	%r8
   3172 	jge	2b
   3173 	paddd	.chacha20_consts(%rip),%xmm1
   3174 	paddd	48(%rbp),%xmm5
   3175 	paddd	64(%rbp),%xmm9
   3176 	paddd	112(%rbp),%xmm13
   3177 	paddd	.chacha20_consts(%rip),%xmm0
   3178 	paddd	48(%rbp),%xmm4
   3179 	paddd	64(%rbp),%xmm8
   3180 	paddd	96(%rbp),%xmm12
   3181 	movdqu	0 + 0(%rsi),%xmm3
   3182 	movdqu	16 + 0(%rsi),%xmm7
   3183 	movdqu	32 + 0(%rsi),%xmm11
   3184 	movdqu	48 + 0(%rsi),%xmm15
   3185 	pxor	%xmm3,%xmm1
   3186 	pxor	%xmm7,%xmm5
   3187 	pxor	%xmm11,%xmm9
   3188 	pxor	%xmm13,%xmm15
   3189 	movdqu	%xmm1,0 + 0(%rdi)
   3190 	movdqu	%xmm5,16 + 0(%rdi)
   3191 	movdqu	%xmm9,32 + 0(%rdi)
   3192 	movdqu	%xmm15,48 + 0(%rdi)
   3193 
   3194 	movq	$64,%rcx
   3195 	subq	$64,%rbx
   3196 	leaq	64(%rsi),%rsi
   3197 	jmp	seal_sse_128_seal_hash
   3198 3:
   3199 
   3200 seal_sse_tail_192:
   3201 	movdqa	.chacha20_consts(%rip),%xmm0
   3202 	movdqa	48(%rbp),%xmm4
   3203 	movdqa	64(%rbp),%xmm8
   3204 	movdqa	%xmm0,%xmm1
   3205 	movdqa	%xmm4,%xmm5
   3206 	movdqa	%xmm8,%xmm9
   3207 	movdqa	%xmm0,%xmm2
   3208 	movdqa	%xmm4,%xmm6
   3209 	movdqa	%xmm8,%xmm10
   3210 	movdqa	96(%rbp),%xmm14
   3211 	paddd	.sse_inc(%rip),%xmm14
   3212 	movdqa	%xmm14,%xmm13
   3213 	paddd	.sse_inc(%rip),%xmm13
   3214 	movdqa	%xmm13,%xmm12
   3215 	paddd	.sse_inc(%rip),%xmm12
   3216 	movdqa	%xmm12,96(%rbp)
   3217 	movdqa	%xmm13,112(%rbp)
   3218 	movdqa	%xmm14,128(%rbp)
   3219 
   3220 1:
   3221 	addq	0(%rdi),%r10
   3222 	adcq	8+0(%rdi),%r11
   3223 	adcq	$1,%r12
   3224 	movq	0+0(%rbp),%rax
   3225 	movq	%rax,%r15
   3226 	mulq	%r10
   3227 	movq	%rax,%r13
   3228 	movq	%rdx,%r14
   3229 	movq	0+0(%rbp),%rax
   3230 	mulq	%r11
   3231 	imulq	%r12,%r15
   3232 	addq	%rax,%r14
   3233 	adcq	%rdx,%r15
   3234 	movq	8+0(%rbp),%rax
   3235 	movq	%rax,%r9
   3236 	mulq	%r10
   3237 	addq	%rax,%r14
   3238 	adcq	$0,%rdx
   3239 	movq	%rdx,%r10
   3240 	movq	8+0(%rbp),%rax
   3241 	mulq	%r11
   3242 	addq	%rax,%r15
   3243 	adcq	$0,%rdx
   3244 	imulq	%r12,%r9
   3245 	addq	%r10,%r15
   3246 	adcq	%rdx,%r9
   3247 	movq	%r13,%r10
   3248 	movq	%r14,%r11
   3249 	movq	%r15,%r12
   3250 	andq	$3,%r12
   3251 	movq	%r15,%r13
   3252 	andq	$-4,%r13
   3253 	movq	%r9,%r14
   3254 	shrdq	$2,%r9,%r15
   3255 	shrq	$2,%r9
   3256 	addq	%r13,%r10
   3257 	adcq	%r14,%r11
   3258 	adcq	$0,%r12
   3259 	addq	%r15,%r10
   3260 	adcq	%r9,%r11
   3261 	adcq	$0,%r12
   3262 
   3263 	leaq	16(%rdi),%rdi
   3264 2:
   3265 	paddd	%xmm4,%xmm0
   3266 	pxor	%xmm0,%xmm12
   3267 	pshufb	.rol16(%rip),%xmm12
   3268 	paddd	%xmm12,%xmm8
   3269 	pxor	%xmm8,%xmm4
   3270 	movdqa	%xmm4,%xmm3
   3271 	pslld	$12,%xmm3
   3272 	psrld	$20,%xmm4
   3273 	pxor	%xmm3,%xmm4
   3274 	paddd	%xmm4,%xmm0
   3275 	pxor	%xmm0,%xmm12
   3276 	pshufb	.rol8(%rip),%xmm12
   3277 	paddd	%xmm12,%xmm8
   3278 	pxor	%xmm8,%xmm4
   3279 	movdqa	%xmm4,%xmm3
   3280 	pslld	$7,%xmm3
   3281 	psrld	$25,%xmm4
   3282 	pxor	%xmm3,%xmm4
   3283 .byte	102,15,58,15,228,4
   3284 .byte	102,69,15,58,15,192,8
   3285 .byte	102,69,15,58,15,228,12
   3286 	paddd	%xmm5,%xmm1
   3287 	pxor	%xmm1,%xmm13
   3288 	pshufb	.rol16(%rip),%xmm13
   3289 	paddd	%xmm13,%xmm9
   3290 	pxor	%xmm9,%xmm5
   3291 	movdqa	%xmm5,%xmm3
   3292 	pslld	$12,%xmm3
   3293 	psrld	$20,%xmm5
   3294 	pxor	%xmm3,%xmm5
   3295 	paddd	%xmm5,%xmm1
   3296 	pxor	%xmm1,%xmm13
   3297 	pshufb	.rol8(%rip),%xmm13
   3298 	paddd	%xmm13,%xmm9
   3299 	pxor	%xmm9,%xmm5
   3300 	movdqa	%xmm5,%xmm3
   3301 	pslld	$7,%xmm3
   3302 	psrld	$25,%xmm5
   3303 	pxor	%xmm3,%xmm5
   3304 .byte	102,15,58,15,237,4
   3305 .byte	102,69,15,58,15,201,8
   3306 .byte	102,69,15,58,15,237,12
   3307 	paddd	%xmm6,%xmm2
   3308 	pxor	%xmm2,%xmm14
   3309 	pshufb	.rol16(%rip),%xmm14
   3310 	paddd	%xmm14,%xmm10
   3311 	pxor	%xmm10,%xmm6
   3312 	movdqa	%xmm6,%xmm3
   3313 	pslld	$12,%xmm3
   3314 	psrld	$20,%xmm6
   3315 	pxor	%xmm3,%xmm6
   3316 	paddd	%xmm6,%xmm2
   3317 	pxor	%xmm2,%xmm14
   3318 	pshufb	.rol8(%rip),%xmm14
   3319 	paddd	%xmm14,%xmm10
   3320 	pxor	%xmm10,%xmm6
   3321 	movdqa	%xmm6,%xmm3
   3322 	pslld	$7,%xmm3
   3323 	psrld	$25,%xmm6
   3324 	pxor	%xmm3,%xmm6
   3325 .byte	102,15,58,15,246,4
   3326 .byte	102,69,15,58,15,210,8
   3327 .byte	102,69,15,58,15,246,12
   3328 	addq	0(%rdi),%r10
   3329 	adcq	8+0(%rdi),%r11
   3330 	adcq	$1,%r12
   3331 	movq	0+0(%rbp),%rax
   3332 	movq	%rax,%r15
   3333 	mulq	%r10
   3334 	movq	%rax,%r13
   3335 	movq	%rdx,%r14
   3336 	movq	0+0(%rbp),%rax
   3337 	mulq	%r11
   3338 	imulq	%r12,%r15
   3339 	addq	%rax,%r14
   3340 	adcq	%rdx,%r15
   3341 	movq	8+0(%rbp),%rax
   3342 	movq	%rax,%r9
   3343 	mulq	%r10
   3344 	addq	%rax,%r14
   3345 	adcq	$0,%rdx
   3346 	movq	%rdx,%r10
   3347 	movq	8+0(%rbp),%rax
   3348 	mulq	%r11
   3349 	addq	%rax,%r15
   3350 	adcq	$0,%rdx
   3351 	imulq	%r12,%r9
   3352 	addq	%r10,%r15
   3353 	adcq	%rdx,%r9
   3354 	movq	%r13,%r10
   3355 	movq	%r14,%r11
   3356 	movq	%r15,%r12
   3357 	andq	$3,%r12
   3358 	movq	%r15,%r13
   3359 	andq	$-4,%r13
   3360 	movq	%r9,%r14
   3361 	shrdq	$2,%r9,%r15
   3362 	shrq	$2,%r9
   3363 	addq	%r13,%r10
   3364 	adcq	%r14,%r11
   3365 	adcq	$0,%r12
   3366 	addq	%r15,%r10
   3367 	adcq	%r9,%r11
   3368 	adcq	$0,%r12
   3369 	paddd	%xmm4,%xmm0
   3370 	pxor	%xmm0,%xmm12
   3371 	pshufb	.rol16(%rip),%xmm12
   3372 	paddd	%xmm12,%xmm8
   3373 	pxor	%xmm8,%xmm4
   3374 	movdqa	%xmm4,%xmm3
   3375 	pslld	$12,%xmm3
   3376 	psrld	$20,%xmm4
   3377 	pxor	%xmm3,%xmm4
   3378 	paddd	%xmm4,%xmm0
   3379 	pxor	%xmm0,%xmm12
   3380 	pshufb	.rol8(%rip),%xmm12
   3381 	paddd	%xmm12,%xmm8
   3382 	pxor	%xmm8,%xmm4
   3383 	movdqa	%xmm4,%xmm3
   3384 	pslld	$7,%xmm3
   3385 	psrld	$25,%xmm4
   3386 	pxor	%xmm3,%xmm4
   3387 .byte	102,15,58,15,228,12
   3388 .byte	102,69,15,58,15,192,8
   3389 .byte	102,69,15,58,15,228,4
   3390 	paddd	%xmm5,%xmm1
   3391 	pxor	%xmm1,%xmm13
   3392 	pshufb	.rol16(%rip),%xmm13
   3393 	paddd	%xmm13,%xmm9
   3394 	pxor	%xmm9,%xmm5
   3395 	movdqa	%xmm5,%xmm3
   3396 	pslld	$12,%xmm3
   3397 	psrld	$20,%xmm5
   3398 	pxor	%xmm3,%xmm5
   3399 	paddd	%xmm5,%xmm1
   3400 	pxor	%xmm1,%xmm13
   3401 	pshufb	.rol8(%rip),%xmm13
   3402 	paddd	%xmm13,%xmm9
   3403 	pxor	%xmm9,%xmm5
   3404 	movdqa	%xmm5,%xmm3
   3405 	pslld	$7,%xmm3
   3406 	psrld	$25,%xmm5
   3407 	pxor	%xmm3,%xmm5
   3408 .byte	102,15,58,15,237,12
   3409 .byte	102,69,15,58,15,201,8
   3410 .byte	102,69,15,58,15,237,4
   3411 	paddd	%xmm6,%xmm2
   3412 	pxor	%xmm2,%xmm14
   3413 	pshufb	.rol16(%rip),%xmm14
   3414 	paddd	%xmm14,%xmm10
   3415 	pxor	%xmm10,%xmm6
   3416 	movdqa	%xmm6,%xmm3
   3417 	pslld	$12,%xmm3
   3418 	psrld	$20,%xmm6
   3419 	pxor	%xmm3,%xmm6
   3420 	paddd	%xmm6,%xmm2
   3421 	pxor	%xmm2,%xmm14
   3422 	pshufb	.rol8(%rip),%xmm14
   3423 	paddd	%xmm14,%xmm10
   3424 	pxor	%xmm10,%xmm6
   3425 	movdqa	%xmm6,%xmm3
   3426 	pslld	$7,%xmm3
   3427 	psrld	$25,%xmm6
   3428 	pxor	%xmm3,%xmm6
   3429 .byte	102,15,58,15,246,12
   3430 .byte	102,69,15,58,15,210,8
   3431 .byte	102,69,15,58,15,246,4
   3432 
   3433 	leaq	16(%rdi),%rdi
   3434 	decq	%rcx
   3435 	jg	1b
   3436 	decq	%r8
   3437 	jge	2b
   3438 	paddd	.chacha20_consts(%rip),%xmm2
   3439 	paddd	48(%rbp),%xmm6
   3440 	paddd	64(%rbp),%xmm10
   3441 	paddd	128(%rbp),%xmm14
   3442 	paddd	.chacha20_consts(%rip),%xmm1
   3443 	paddd	48(%rbp),%xmm5
   3444 	paddd	64(%rbp),%xmm9
   3445 	paddd	112(%rbp),%xmm13
   3446 	paddd	.chacha20_consts(%rip),%xmm0
   3447 	paddd	48(%rbp),%xmm4
   3448 	paddd	64(%rbp),%xmm8
   3449 	paddd	96(%rbp),%xmm12
   3450 	movdqu	0 + 0(%rsi),%xmm3
   3451 	movdqu	16 + 0(%rsi),%xmm7
   3452 	movdqu	32 + 0(%rsi),%xmm11
   3453 	movdqu	48 + 0(%rsi),%xmm15
   3454 	pxor	%xmm3,%xmm2
   3455 	pxor	%xmm7,%xmm6
   3456 	pxor	%xmm11,%xmm10
   3457 	pxor	%xmm14,%xmm15
   3458 	movdqu	%xmm2,0 + 0(%rdi)
   3459 	movdqu	%xmm6,16 + 0(%rdi)
   3460 	movdqu	%xmm10,32 + 0(%rdi)
   3461 	movdqu	%xmm15,48 + 0(%rdi)
   3462 	movdqu	0 + 64(%rsi),%xmm3
   3463 	movdqu	16 + 64(%rsi),%xmm7
   3464 	movdqu	32 + 64(%rsi),%xmm11
   3465 	movdqu	48 + 64(%rsi),%xmm15
   3466 	pxor	%xmm3,%xmm1
   3467 	pxor	%xmm7,%xmm5
   3468 	pxor	%xmm11,%xmm9
   3469 	pxor	%xmm13,%xmm15
   3470 	movdqu	%xmm1,0 + 64(%rdi)
   3471 	movdqu	%xmm5,16 + 64(%rdi)
   3472 	movdqu	%xmm9,32 + 64(%rdi)
   3473 	movdqu	%xmm15,48 + 64(%rdi)
   3474 
   3475 	movq	$128,%rcx
   3476 	subq	$128,%rbx
   3477 	leaq	128(%rsi),%rsi
   3478 
   3479 seal_sse_128_seal_hash:
   3480 	cmpq	$16,%rcx
   3481 	jb	seal_sse_128_seal
   3482 	addq	0(%rdi),%r10
   3483 	adcq	8+0(%rdi),%r11
   3484 	adcq	$1,%r12
   3485 	movq	0+0(%rbp),%rax
   3486 	movq	%rax,%r15
   3487 	mulq	%r10
   3488 	movq	%rax,%r13
   3489 	movq	%rdx,%r14
   3490 	movq	0+0(%rbp),%rax
   3491 	mulq	%r11
   3492 	imulq	%r12,%r15
   3493 	addq	%rax,%r14
   3494 	adcq	%rdx,%r15
   3495 	movq	8+0(%rbp),%rax
   3496 	movq	%rax,%r9
   3497 	mulq	%r10
   3498 	addq	%rax,%r14
   3499 	adcq	$0,%rdx
   3500 	movq	%rdx,%r10
   3501 	movq	8+0(%rbp),%rax
   3502 	mulq	%r11
   3503 	addq	%rax,%r15
   3504 	adcq	$0,%rdx
   3505 	imulq	%r12,%r9
   3506 	addq	%r10,%r15
   3507 	adcq	%rdx,%r9
   3508 	movq	%r13,%r10
   3509 	movq	%r14,%r11
   3510 	movq	%r15,%r12
   3511 	andq	$3,%r12
   3512 	movq	%r15,%r13
   3513 	andq	$-4,%r13
   3514 	movq	%r9,%r14
   3515 	shrdq	$2,%r9,%r15
   3516 	shrq	$2,%r9
   3517 	addq	%r13,%r10
   3518 	adcq	%r14,%r11
   3519 	adcq	$0,%r12
   3520 	addq	%r15,%r10
   3521 	adcq	%r9,%r11
   3522 	adcq	$0,%r12
   3523 
   3524 	subq	$16,%rcx
   3525 	leaq	16(%rdi),%rdi
   3526 	jmp	seal_sse_128_seal_hash
   3527 
   3528 seal_sse_128_seal:
   3529 	cmpq	$16,%rbx
   3530 	jb	seal_sse_tail_16
   3531 	subq	$16,%rbx
   3532 
   3533 	movdqu	0(%rsi),%xmm3
   3534 	pxor	%xmm3,%xmm0
   3535 	movdqu	%xmm0,0(%rdi)
   3536 
   3537 	addq	0(%rdi),%r10
   3538 	adcq	8(%rdi),%r11
   3539 	adcq	$1,%r12
   3540 	leaq	16(%rsi),%rsi
   3541 	leaq	16(%rdi),%rdi
   3542 	movq	0+0(%rbp),%rax
   3543 	movq	%rax,%r15
   3544 	mulq	%r10
   3545 	movq	%rax,%r13
   3546 	movq	%rdx,%r14
   3547 	movq	0+0(%rbp),%rax
   3548 	mulq	%r11
   3549 	imulq	%r12,%r15
   3550 	addq	%rax,%r14
   3551 	adcq	%rdx,%r15
   3552 	movq	8+0(%rbp),%rax
   3553 	movq	%rax,%r9
   3554 	mulq	%r10
   3555 	addq	%rax,%r14
   3556 	adcq	$0,%rdx
   3557 	movq	%rdx,%r10
   3558 	movq	8+0(%rbp),%rax
   3559 	mulq	%r11
   3560 	addq	%rax,%r15
   3561 	adcq	$0,%rdx
   3562 	imulq	%r12,%r9
   3563 	addq	%r10,%r15
   3564 	adcq	%rdx,%r9
   3565 	movq	%r13,%r10
   3566 	movq	%r14,%r11
   3567 	movq	%r15,%r12
   3568 	andq	$3,%r12
   3569 	movq	%r15,%r13
   3570 	andq	$-4,%r13
   3571 	movq	%r9,%r14
   3572 	shrdq	$2,%r9,%r15
   3573 	shrq	$2,%r9
   3574 	addq	%r13,%r10
   3575 	adcq	%r14,%r11
   3576 	adcq	$0,%r12
   3577 	addq	%r15,%r10
   3578 	adcq	%r9,%r11
   3579 	adcq	$0,%r12
   3580 
   3581 
   3582 	movdqa	%xmm4,%xmm0
   3583 	movdqa	%xmm8,%xmm4
   3584 	movdqa	%xmm12,%xmm8
   3585 	movdqa	%xmm1,%xmm12
   3586 	movdqa	%xmm5,%xmm1
   3587 	movdqa	%xmm9,%xmm5
   3588 	movdqa	%xmm13,%xmm9
   3589 	jmp	seal_sse_128_seal
   3590 
   3591 seal_sse_tail_16:
   3592 	testq	%rbx,%rbx
   3593 	jz	process_blocks_of_extra_in
   3594 
   3595 	movq	%rbx,%r8
   3596 	movq	%rbx,%rcx
   3597 	leaq	-1(%rsi,%rbx), %rsi
   3598 	pxor	%xmm15,%xmm15
   3599 1:
   3600 	pslldq	$1,%xmm15
   3601 	pinsrb	$0,(%rsi),%xmm15
   3602 	leaq	-1(%rsi),%rsi
   3603 	decq	%rcx
   3604 	jne	1b
   3605 
   3606 
   3607 	pxor	%xmm0,%xmm15
   3608 
   3609 
   3610 	movq	%rbx,%rcx
   3611 	movdqu	%xmm15,%xmm0
   3612 2:
   3613 	pextrb	$0,%xmm0,(%rdi)
   3614 	psrldq	$1,%xmm0
   3615 	addq	$1,%rdi
   3616 	subq	$1,%rcx
   3617 	jnz	2b
   3618 
   3619 
   3620 
   3621 
   3622 
   3623 
   3624 
   3625 
   3626 	movq	288+32(%rsp),%r9
   3627 	movq	56(%r9),%r14
   3628 	movq	48(%r9),%r13
   3629 	testq	%r14,%r14
   3630 	jz	process_partial_block
   3631 
   3632 	movq	$16,%r15
   3633 	subq	%rbx,%r15
   3634 	cmpq	%r15,%r14
   3635 
   3636 	jge	load_extra_in
   3637 	movq	%r14,%r15
   3638 
   3639 load_extra_in:
   3640 
   3641 
   3642 	leaq	-1(%r13,%r15), %rsi
   3643 
   3644 
   3645 	addq	%r15,%r13
   3646 	subq	%r15,%r14
   3647 	movq	%r13,48(%r9)
   3648 	movq	%r14,56(%r9)
   3649 
   3650 
   3651 
   3652 	addq	%r15,%r8
   3653 
   3654 
   3655 	pxor	%xmm11,%xmm11
   3656 3:
   3657 	pslldq	$1,%xmm11
   3658 	pinsrb	$0,(%rsi),%xmm11
   3659 	leaq	-1(%rsi),%rsi
   3660 	subq	$1,%r15
   3661 	jnz	3b
   3662 
   3663 
   3664 
   3665 
   3666 	movq	%rbx,%r15
   3667 
   3668 4:
   3669 	pslldq	$1,%xmm11
   3670 	subq	$1,%r15
   3671 	jnz	4b
   3672 
   3673 
   3674 
   3675 
   3676 	leaq	.and_masks(%rip),%r15
   3677 	shlq	$4,%rbx
   3678 	pand	-16(%r15,%rbx), %xmm15
   3679 
   3680 
   3681 	por	%xmm11,%xmm15
   3682 
   3683 
   3684 
   3685 .byte	102,77,15,126,253
   3686 	pextrq	$1,%xmm15,%r14
   3687 	addq	%r13,%r10
   3688 	adcq	%r14,%r11
   3689 	adcq	$1,%r12
   3690 	movq	0+0(%rbp),%rax
   3691 	movq	%rax,%r15
   3692 	mulq	%r10
   3693 	movq	%rax,%r13
   3694 	movq	%rdx,%r14
   3695 	movq	0+0(%rbp),%rax
   3696 	mulq	%r11
   3697 	imulq	%r12,%r15
   3698 	addq	%rax,%r14
   3699 	adcq	%rdx,%r15
   3700 	movq	8+0(%rbp),%rax
   3701 	movq	%rax,%r9
   3702 	mulq	%r10
   3703 	addq	%rax,%r14
   3704 	adcq	$0,%rdx
   3705 	movq	%rdx,%r10
   3706 	movq	8+0(%rbp),%rax
   3707 	mulq	%r11
   3708 	addq	%rax,%r15
   3709 	adcq	$0,%rdx
   3710 	imulq	%r12,%r9
   3711 	addq	%r10,%r15
   3712 	adcq	%rdx,%r9
   3713 	movq	%r13,%r10
   3714 	movq	%r14,%r11
   3715 	movq	%r15,%r12
   3716 	andq	$3,%r12
   3717 	movq	%r15,%r13
   3718 	andq	$-4,%r13
   3719 	movq	%r9,%r14
   3720 	shrdq	$2,%r9,%r15
   3721 	shrq	$2,%r9
   3722 	addq	%r13,%r10
   3723 	adcq	%r14,%r11
   3724 	adcq	$0,%r12
   3725 	addq	%r15,%r10
   3726 	adcq	%r9,%r11
   3727 	adcq	$0,%r12
   3728 
   3729 
   3730 process_blocks_of_extra_in:
   3731 
   3732 	movq	288+32(%rsp),%r9
   3733 	movq	48(%r9),%rsi
   3734 	movq	56(%r9),%r8
   3735 	movq	%r8,%rcx
   3736 	shrq	$4,%r8
   3737 
   3738 5:
   3739 	jz	process_extra_in_trailer
   3740 	addq	0(%rsi),%r10
   3741 	adcq	8+0(%rsi),%r11
   3742 	adcq	$1,%r12
   3743 	movq	0+0(%rbp),%rax
   3744 	movq	%rax,%r15
   3745 	mulq	%r10
   3746 	movq	%rax,%r13
   3747 	movq	%rdx,%r14
   3748 	movq	0+0(%rbp),%rax
   3749 	mulq	%r11
   3750 	imulq	%r12,%r15
   3751 	addq	%rax,%r14
   3752 	adcq	%rdx,%r15
   3753 	movq	8+0(%rbp),%rax
   3754 	movq	%rax,%r9
   3755 	mulq	%r10
   3756 	addq	%rax,%r14
   3757 	adcq	$0,%rdx
   3758 	movq	%rdx,%r10
   3759 	movq	8+0(%rbp),%rax
   3760 	mulq	%r11
   3761 	addq	%rax,%r15
   3762 	adcq	$0,%rdx
   3763 	imulq	%r12,%r9
   3764 	addq	%r10,%r15
   3765 	adcq	%rdx,%r9
   3766 	movq	%r13,%r10
   3767 	movq	%r14,%r11
   3768 	movq	%r15,%r12
   3769 	andq	$3,%r12
   3770 	movq	%r15,%r13
   3771 	andq	$-4,%r13
   3772 	movq	%r9,%r14
   3773 	shrdq	$2,%r9,%r15
   3774 	shrq	$2,%r9
   3775 	addq	%r13,%r10
   3776 	adcq	%r14,%r11
   3777 	adcq	$0,%r12
   3778 	addq	%r15,%r10
   3779 	adcq	%r9,%r11
   3780 	adcq	$0,%r12
   3781 
   3782 	leaq	16(%rsi),%rsi
   3783 	subq	$1,%r8
   3784 	jmp	5b
   3785 
   3786 process_extra_in_trailer:
   3787 	andq	$15,%rcx
   3788 	movq	%rcx,%rbx
   3789 	jz	do_length_block
   3790 	leaq	-1(%rsi,%rcx), %rsi
   3791 
   3792 6:
   3793 	pslldq	$1,%xmm15
   3794 	pinsrb	$0,(%rsi),%xmm15
   3795 	leaq	-1(%rsi),%rsi
   3796 	subq	$1,%rcx
   3797 	jnz	6b
   3798 
   3799 process_partial_block:
   3800 
   3801 	leaq	.and_masks(%rip),%r15
   3802 	shlq	$4,%rbx
   3803 	pand	-16(%r15,%rbx), %xmm15
   3804 .byte	102,77,15,126,253
   3805 	pextrq	$1,%xmm15,%r14
   3806 	addq	%r13,%r10
   3807 	adcq	%r14,%r11
   3808 	adcq	$1,%r12
   3809 	movq	0+0(%rbp),%rax
   3810 	movq	%rax,%r15
   3811 	mulq	%r10
   3812 	movq	%rax,%r13
   3813 	movq	%rdx,%r14
   3814 	movq	0+0(%rbp),%rax
   3815 	mulq	%r11
   3816 	imulq	%r12,%r15
   3817 	addq	%rax,%r14
   3818 	adcq	%rdx,%r15
   3819 	movq	8+0(%rbp),%rax
   3820 	movq	%rax,%r9
   3821 	mulq	%r10
   3822 	addq	%rax,%r14
   3823 	adcq	$0,%rdx
   3824 	movq	%rdx,%r10
   3825 	movq	8+0(%rbp),%rax
   3826 	mulq	%r11
   3827 	addq	%rax,%r15
   3828 	adcq	$0,%rdx
   3829 	imulq	%r12,%r9
   3830 	addq	%r10,%r15
   3831 	adcq	%rdx,%r9
   3832 	movq	%r13,%r10
   3833 	movq	%r14,%r11
   3834 	movq	%r15,%r12
   3835 	andq	$3,%r12
   3836 	movq	%r15,%r13
   3837 	andq	$-4,%r13
   3838 	movq	%r9,%r14
   3839 	shrdq	$2,%r9,%r15
   3840 	shrq	$2,%r9
   3841 	addq	%r13,%r10
   3842 	adcq	%r14,%r11
   3843 	adcq	$0,%r12
   3844 	addq	%r15,%r10
   3845 	adcq	%r9,%r11
   3846 	adcq	$0,%r12
   3847 
   3848 
   3849 do_length_block:
   3850 	addq	32(%rbp),%r10
   3851 	adcq	8+32(%rbp),%r11
   3852 	adcq	$1,%r12
   3853 	movq	0+0(%rbp),%rax
   3854 	movq	%rax,%r15
   3855 	mulq	%r10
   3856 	movq	%rax,%r13
   3857 	movq	%rdx,%r14
   3858 	movq	0+0(%rbp),%rax
   3859 	mulq	%r11
   3860 	imulq	%r12,%r15
   3861 	addq	%rax,%r14
   3862 	adcq	%rdx,%r15
   3863 	movq	8+0(%rbp),%rax
   3864 	movq	%rax,%r9
   3865 	mulq	%r10
   3866 	addq	%rax,%r14
   3867 	adcq	$0,%rdx
   3868 	movq	%rdx,%r10
   3869 	movq	8+0(%rbp),%rax
   3870 	mulq	%r11
   3871 	addq	%rax,%r15
   3872 	adcq	$0,%rdx
   3873 	imulq	%r12,%r9
   3874 	addq	%r10,%r15
   3875 	adcq	%rdx,%r9
   3876 	movq	%r13,%r10
   3877 	movq	%r14,%r11
   3878 	movq	%r15,%r12
   3879 	andq	$3,%r12
   3880 	movq	%r15,%r13
   3881 	andq	$-4,%r13
   3882 	movq	%r9,%r14
   3883 	shrdq	$2,%r9,%r15
   3884 	shrq	$2,%r9
   3885 	addq	%r13,%r10
   3886 	adcq	%r14,%r11
   3887 	adcq	$0,%r12
   3888 	addq	%r15,%r10
   3889 	adcq	%r9,%r11
   3890 	adcq	$0,%r12
   3891 
   3892 
   3893 	movq	%r10,%r13
   3894 	movq	%r11,%r14
   3895 	movq	%r12,%r15
   3896 	subq	$-5,%r10
   3897 	sbbq	$-1,%r11
   3898 	sbbq	$3,%r12
   3899 	cmovcq	%r13,%r10
   3900 	cmovcq	%r14,%r11
   3901 	cmovcq	%r15,%r12
   3902 
   3903 	addq	0+16(%rbp),%r10
   3904 	adcq	8+16(%rbp),%r11
   3905 
   3906 	addq	$288 + 32,%rsp
   3907 .cfi_adjust_cfa_offset	-(288 + 32)
   3908 	popq	%r9
   3909 .cfi_adjust_cfa_offset	-8
   3910 	movq	%r10,0(%r9)
   3911 	movq	%r11,8(%r9)
   3912 
   3913 	popq	%r15
   3914 .cfi_adjust_cfa_offset	-8
   3915 	popq	%r14
   3916 .cfi_adjust_cfa_offset	-8
   3917 	popq	%r13
   3918 .cfi_adjust_cfa_offset	-8
   3919 	popq	%r12
   3920 .cfi_adjust_cfa_offset	-8
   3921 	popq	%rbx
   3922 .cfi_adjust_cfa_offset	-8
   3923 	popq	%rbp
   3924 .cfi_adjust_cfa_offset	-8
   3925 	.byte	0xf3,0xc3
   3926 .cfi_adjust_cfa_offset	(8 * 6) + 288 + 32
   3927 
   3928 seal_sse_128:
   3929 	movdqu	.chacha20_consts(%rip),%xmm0
   3930 	movdqa	%xmm0,%xmm1
   3931 	movdqa	%xmm0,%xmm2
   3932 	movdqu	0(%r9),%xmm4
   3933 	movdqa	%xmm4,%xmm5
   3934 	movdqa	%xmm4,%xmm6
   3935 	movdqu	16(%r9),%xmm8
   3936 	movdqa	%xmm8,%xmm9
   3937 	movdqa	%xmm8,%xmm10
   3938 	movdqu	32(%r9),%xmm14
   3939 	movdqa	%xmm14,%xmm12
   3940 	paddd	.sse_inc(%rip),%xmm12
   3941 	movdqa	%xmm12,%xmm13
   3942 	paddd	.sse_inc(%rip),%xmm13
   3943 	movdqa	%xmm4,%xmm7
   3944 	movdqa	%xmm8,%xmm11
   3945 	movdqa	%xmm12,%xmm15
   3946 	movq	$10,%r10
   3947 1:
   3948 	paddd	%xmm4,%xmm0
   3949 	pxor	%xmm0,%xmm12
   3950 	pshufb	.rol16(%rip),%xmm12
   3951 	paddd	%xmm12,%xmm8
   3952 	pxor	%xmm8,%xmm4
   3953 	movdqa	%xmm4,%xmm3
   3954 	pslld	$12,%xmm3
   3955 	psrld	$20,%xmm4
   3956 	pxor	%xmm3,%xmm4
   3957 	paddd	%xmm4,%xmm0
   3958 	pxor	%xmm0,%xmm12
   3959 	pshufb	.rol8(%rip),%xmm12
   3960 	paddd	%xmm12,%xmm8
   3961 	pxor	%xmm8,%xmm4
   3962 	movdqa	%xmm4,%xmm3
   3963 	pslld	$7,%xmm3
   3964 	psrld	$25,%xmm4
   3965 	pxor	%xmm3,%xmm4
   3966 .byte	102,15,58,15,228,4
   3967 .byte	102,69,15,58,15,192,8
   3968 .byte	102,69,15,58,15,228,12
   3969 	paddd	%xmm5,%xmm1
   3970 	pxor	%xmm1,%xmm13
   3971 	pshufb	.rol16(%rip),%xmm13
   3972 	paddd	%xmm13,%xmm9
   3973 	pxor	%xmm9,%xmm5
   3974 	movdqa	%xmm5,%xmm3
   3975 	pslld	$12,%xmm3
   3976 	psrld	$20,%xmm5
   3977 	pxor	%xmm3,%xmm5
   3978 	paddd	%xmm5,%xmm1
   3979 	pxor	%xmm1,%xmm13
   3980 	pshufb	.rol8(%rip),%xmm13
   3981 	paddd	%xmm13,%xmm9
   3982 	pxor	%xmm9,%xmm5
   3983 	movdqa	%xmm5,%xmm3
   3984 	pslld	$7,%xmm3
   3985 	psrld	$25,%xmm5
   3986 	pxor	%xmm3,%xmm5
   3987 .byte	102,15,58,15,237,4
   3988 .byte	102,69,15,58,15,201,8
   3989 .byte	102,69,15,58,15,237,12
   3990 	paddd	%xmm6,%xmm2
   3991 	pxor	%xmm2,%xmm14
   3992 	pshufb	.rol16(%rip),%xmm14
   3993 	paddd	%xmm14,%xmm10
   3994 	pxor	%xmm10,%xmm6
   3995 	movdqa	%xmm6,%xmm3
   3996 	pslld	$12,%xmm3
   3997 	psrld	$20,%xmm6
   3998 	pxor	%xmm3,%xmm6
   3999 	paddd	%xmm6,%xmm2
   4000 	pxor	%xmm2,%xmm14
   4001 	pshufb	.rol8(%rip),%xmm14
   4002 	paddd	%xmm14,%xmm10
   4003 	pxor	%xmm10,%xmm6
   4004 	movdqa	%xmm6,%xmm3
   4005 	pslld	$7,%xmm3
   4006 	psrld	$25,%xmm6
   4007 	pxor	%xmm3,%xmm6
   4008 .byte	102,15,58,15,246,4
   4009 .byte	102,69,15,58,15,210,8
   4010 .byte	102,69,15,58,15,246,12
   4011 	paddd	%xmm4,%xmm0
   4012 	pxor	%xmm0,%xmm12
   4013 	pshufb	.rol16(%rip),%xmm12
   4014 	paddd	%xmm12,%xmm8
   4015 	pxor	%xmm8,%xmm4
   4016 	movdqa	%xmm4,%xmm3
   4017 	pslld	$12,%xmm3
   4018 	psrld	$20,%xmm4
   4019 	pxor	%xmm3,%xmm4
   4020 	paddd	%xmm4,%xmm0
   4021 	pxor	%xmm0,%xmm12
   4022 	pshufb	.rol8(%rip),%xmm12
   4023 	paddd	%xmm12,%xmm8
   4024 	pxor	%xmm8,%xmm4
   4025 	movdqa	%xmm4,%xmm3
   4026 	pslld	$7,%xmm3
   4027 	psrld	$25,%xmm4
   4028 	pxor	%xmm3,%xmm4
   4029 .byte	102,15,58,15,228,12
   4030 .byte	102,69,15,58,15,192,8
   4031 .byte	102,69,15,58,15,228,4
   4032 	paddd	%xmm5,%xmm1
   4033 	pxor	%xmm1,%xmm13
   4034 	pshufb	.rol16(%rip),%xmm13
   4035 	paddd	%xmm13,%xmm9
   4036 	pxor	%xmm9,%xmm5
   4037 	movdqa	%xmm5,%xmm3
   4038 	pslld	$12,%xmm3
   4039 	psrld	$20,%xmm5
   4040 	pxor	%xmm3,%xmm5
   4041 	paddd	%xmm5,%xmm1
   4042 	pxor	%xmm1,%xmm13
   4043 	pshufb	.rol8(%rip),%xmm13
   4044 	paddd	%xmm13,%xmm9
   4045 	pxor	%xmm9,%xmm5
   4046 	movdqa	%xmm5,%xmm3
   4047 	pslld	$7,%xmm3
   4048 	psrld	$25,%xmm5
   4049 	pxor	%xmm3,%xmm5
   4050 .byte	102,15,58,15,237,12
   4051 .byte	102,69,15,58,15,201,8
   4052 .byte	102,69,15,58,15,237,4
   4053 	paddd	%xmm6,%xmm2
   4054 	pxor	%xmm2,%xmm14
   4055 	pshufb	.rol16(%rip),%xmm14
   4056 	paddd	%xmm14,%xmm10
   4057 	pxor	%xmm10,%xmm6
   4058 	movdqa	%xmm6,%xmm3
   4059 	pslld	$12,%xmm3
   4060 	psrld	$20,%xmm6
   4061 	pxor	%xmm3,%xmm6
   4062 	paddd	%xmm6,%xmm2
   4063 	pxor	%xmm2,%xmm14
   4064 	pshufb	.rol8(%rip),%xmm14
   4065 	paddd	%xmm14,%xmm10
   4066 	pxor	%xmm10,%xmm6
   4067 	movdqa	%xmm6,%xmm3
   4068 	pslld	$7,%xmm3
   4069 	psrld	$25,%xmm6
   4070 	pxor	%xmm3,%xmm6
   4071 .byte	102,15,58,15,246,12
   4072 .byte	102,69,15,58,15,210,8
   4073 .byte	102,69,15,58,15,246,4
   4074 
   4075 	decq	%r10
   4076 	jnz	1b
   4077 	paddd	.chacha20_consts(%rip),%xmm0
   4078 	paddd	.chacha20_consts(%rip),%xmm1
   4079 	paddd	.chacha20_consts(%rip),%xmm2
   4080 	paddd	%xmm7,%xmm4
   4081 	paddd	%xmm7,%xmm5
   4082 	paddd	%xmm7,%xmm6
   4083 	paddd	%xmm11,%xmm8
   4084 	paddd	%xmm11,%xmm9
   4085 	paddd	%xmm15,%xmm12
   4086 	paddd	.sse_inc(%rip),%xmm15
   4087 	paddd	%xmm15,%xmm13
   4088 
   4089 	pand	.clamp(%rip),%xmm2
   4090 	movdqa	%xmm2,0(%rbp)
   4091 	movdqa	%xmm6,16(%rbp)
   4092 
   4093 	movq	%r8,%r8
   4094 	call	poly_hash_ad_internal
   4095 	jmp	seal_sse_128_seal
   4096 .size	chacha20_poly1305_seal, .-chacha20_poly1305_seal
   4097 
   4098 
   4099 .type	chacha20_poly1305_open_avx2,@function
   4100 .align	64
   4101 chacha20_poly1305_open_avx2:
   4102 	vzeroupper
   4103 	vmovdqa	.chacha20_consts(%rip),%ymm0
   4104 	vbroadcasti128	0(%r9),%ymm4
   4105 	vbroadcasti128	16(%r9),%ymm8
   4106 	vbroadcasti128	32(%r9),%ymm12
   4107 	vpaddd	.avx2_init(%rip),%ymm12,%ymm12
   4108 	cmpq	$192,%rbx
   4109 	jbe	open_avx2_192
   4110 	cmpq	$320,%rbx
   4111 	jbe	open_avx2_320
   4112 
   4113 	vmovdqa	%ymm4,64(%rbp)
   4114 	vmovdqa	%ymm8,96(%rbp)
   4115 	vmovdqa	%ymm12,160(%rbp)
   4116 	movq	$10,%r10
   4117 1:
   4118 	vpaddd	%ymm4,%ymm0,%ymm0
   4119 	vpxor	%ymm0,%ymm12,%ymm12
   4120 	vpshufb	.rol16(%rip),%ymm12,%ymm12
   4121 	vpaddd	%ymm12,%ymm8,%ymm8
   4122 	vpxor	%ymm8,%ymm4,%ymm4
   4123 	vpsrld	$20,%ymm4,%ymm3
   4124 	vpslld	$12,%ymm4,%ymm4
   4125 	vpxor	%ymm3,%ymm4,%ymm4
   4126 	vpaddd	%ymm4,%ymm0,%ymm0
   4127 	vpxor	%ymm0,%ymm12,%ymm12
   4128 	vpshufb	.rol8(%rip),%ymm12,%ymm12
   4129 	vpaddd	%ymm12,%ymm8,%ymm8
   4130 	vpxor	%ymm8,%ymm4,%ymm4
   4131 	vpslld	$7,%ymm4,%ymm3
   4132 	vpsrld	$25,%ymm4,%ymm4
   4133 	vpxor	%ymm3,%ymm4,%ymm4
   4134 	vpalignr	$12,%ymm12,%ymm12,%ymm12
   4135 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   4136 	vpalignr	$4,%ymm4,%ymm4,%ymm4
   4137 	vpaddd	%ymm4,%ymm0,%ymm0
   4138 	vpxor	%ymm0,%ymm12,%ymm12
   4139 	vpshufb	.rol16(%rip),%ymm12,%ymm12
   4140 	vpaddd	%ymm12,%ymm8,%ymm8
   4141 	vpxor	%ymm8,%ymm4,%ymm4
   4142 	vpsrld	$20,%ymm4,%ymm3
   4143 	vpslld	$12,%ymm4,%ymm4
   4144 	vpxor	%ymm3,%ymm4,%ymm4
   4145 	vpaddd	%ymm4,%ymm0,%ymm0
   4146 	vpxor	%ymm0,%ymm12,%ymm12
   4147 	vpshufb	.rol8(%rip),%ymm12,%ymm12
   4148 	vpaddd	%ymm12,%ymm8,%ymm8
   4149 	vpxor	%ymm8,%ymm4,%ymm4
   4150 	vpslld	$7,%ymm4,%ymm3
   4151 	vpsrld	$25,%ymm4,%ymm4
   4152 	vpxor	%ymm3,%ymm4,%ymm4
   4153 	vpalignr	$4,%ymm12,%ymm12,%ymm12
   4154 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   4155 	vpalignr	$12,%ymm4,%ymm4,%ymm4
   4156 
   4157 	decq	%r10
   4158 	jne	1b
   4159 	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
   4160 	vpaddd	64(%rbp),%ymm4,%ymm4
   4161 	vpaddd	96(%rbp),%ymm8,%ymm8
   4162 	vpaddd	160(%rbp),%ymm12,%ymm12
   4163 
   4164 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
   4165 
   4166 	vpand	.clamp(%rip),%ymm3,%ymm3
   4167 	vmovdqa	%ymm3,0(%rbp)
   4168 
   4169 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0
   4170 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4
   4171 
   4172 	movq	%r8,%r8
   4173 	call	poly_hash_ad_internal
   4174 	xorq	%rcx,%rcx
   4175 
   4176 1:
   4177 	addq	0(%rsi,%rcx), %r10
   4178 	adcq	8+0(%rsi,%rcx), %r11
   4179 	adcq	$1,%r12
   4180 	movq	0+0(%rbp),%rax
   4181 	movq	%rax,%r15
   4182 	mulq	%r10
   4183 	movq	%rax,%r13
   4184 	movq	%rdx,%r14
   4185 	movq	0+0(%rbp),%rax
   4186 	mulq	%r11
   4187 	imulq	%r12,%r15
   4188 	addq	%rax,%r14
   4189 	adcq	%rdx,%r15
   4190 	movq	8+0(%rbp),%rax
   4191 	movq	%rax,%r9
   4192 	mulq	%r10
   4193 	addq	%rax,%r14
   4194 	adcq	$0,%rdx
   4195 	movq	%rdx,%r10
   4196 	movq	8+0(%rbp),%rax
   4197 	mulq	%r11
   4198 	addq	%rax,%r15
   4199 	adcq	$0,%rdx
   4200 	imulq	%r12,%r9
   4201 	addq	%r10,%r15
   4202 	adcq	%rdx,%r9
   4203 	movq	%r13,%r10
   4204 	movq	%r14,%r11
   4205 	movq	%r15,%r12
   4206 	andq	$3,%r12
   4207 	movq	%r15,%r13
   4208 	andq	$-4,%r13
   4209 	movq	%r9,%r14
   4210 	shrdq	$2,%r9,%r15
   4211 	shrq	$2,%r9
   4212 	addq	%r13,%r10
   4213 	adcq	%r14,%r11
   4214 	adcq	$0,%r12
   4215 	addq	%r15,%r10
   4216 	adcq	%r9,%r11
   4217 	adcq	$0,%r12
   4218 
   4219 	addq	$16,%rcx
   4220 	cmpq	$64,%rcx
   4221 	jne	1b
   4222 
   4223 	vpxor	0(%rsi),%ymm0,%ymm0
   4224 	vpxor	32(%rsi),%ymm4,%ymm4
   4225 	vmovdqu	%ymm0,0(%rdi)
   4226 	vmovdqu	%ymm4,32(%rdi)
   4227 	leaq	64(%rsi),%rsi
   4228 	leaq	64(%rdi),%rdi
   4229 	subq	$64,%rbx
   4230 1:
   4231 
   4232 	cmpq	$512,%rbx
   4233 	jb	3f
   4234 	vmovdqa	.chacha20_consts(%rip),%ymm0
   4235 	vmovdqa	64(%rbp),%ymm4
   4236 	vmovdqa	96(%rbp),%ymm8
   4237 	vmovdqa	%ymm0,%ymm1
   4238 	vmovdqa	%ymm4,%ymm5
   4239 	vmovdqa	%ymm8,%ymm9
   4240 	vmovdqa	%ymm0,%ymm2
   4241 	vmovdqa	%ymm4,%ymm6
   4242 	vmovdqa	%ymm8,%ymm10
   4243 	vmovdqa	%ymm0,%ymm3
   4244 	vmovdqa	%ymm4,%ymm7
   4245 	vmovdqa	%ymm8,%ymm11
   4246 	vmovdqa	.avx2_inc(%rip),%ymm12
   4247 	vpaddd	160(%rbp),%ymm12,%ymm15
   4248 	vpaddd	%ymm15,%ymm12,%ymm14
   4249 	vpaddd	%ymm14,%ymm12,%ymm13
   4250 	vpaddd	%ymm13,%ymm12,%ymm12
   4251 	vmovdqa	%ymm15,256(%rbp)
   4252 	vmovdqa	%ymm14,224(%rbp)
   4253 	vmovdqa	%ymm13,192(%rbp)
   4254 	vmovdqa	%ymm12,160(%rbp)
   4255 
   4256 	xorq	%rcx,%rcx
   4257 2:
   4258 	addq	0*8(%rsi,%rcx), %r10
   4259 	adcq	8+0*8(%rsi,%rcx), %r11
   4260 	adcq	$1,%r12
   4261 	vmovdqa	%ymm8,128(%rbp)
   4262 	vmovdqa	.rol16(%rip),%ymm8
   4263 	vpaddd	%ymm7,%ymm3,%ymm3
   4264 	vpaddd	%ymm6,%ymm2,%ymm2
   4265 	vpaddd	%ymm5,%ymm1,%ymm1
   4266 	vpaddd	%ymm4,%ymm0,%ymm0
   4267 	vpxor	%ymm3,%ymm15,%ymm15
   4268 	vpxor	%ymm2,%ymm14,%ymm14
   4269 	vpxor	%ymm1,%ymm13,%ymm13
   4270 	vpxor	%ymm0,%ymm12,%ymm12
   4271 	movq	0+0(%rbp),%rdx
   4272 	movq	%rdx,%r15
   4273 	mulxq	%r10,%r13,%r14
   4274 	mulxq	%r11,%rax,%rdx
   4275 	imulq	%r12,%r15
   4276 	addq	%rax,%r14
   4277 	adcq	%rdx,%r15
   4278 	vpshufb	%ymm8,%ymm15,%ymm15
   4279 	vpshufb	%ymm8,%ymm14,%ymm14
   4280 	vpshufb	%ymm8,%ymm13,%ymm13
   4281 	vpshufb	%ymm8,%ymm12,%ymm12
   4282 	vmovdqa	128(%rbp),%ymm8
   4283 	vpaddd	%ymm15,%ymm11,%ymm11
   4284 	vpaddd	%ymm14,%ymm10,%ymm10
   4285 	vpaddd	%ymm13,%ymm9,%ymm9
   4286 	vpaddd	%ymm12,%ymm8,%ymm8
   4287 	movq	8+0(%rbp),%rdx
   4288 	mulxq	%r10,%r10,%rax
   4289 	addq	%r10,%r14
   4290 	mulxq	%r11,%r11,%r9
   4291 	adcq	%r11,%r15
   4292 	adcq	$0,%r9
   4293 	imulq	%r12,%rdx
   4294 	vpxor	%ymm11,%ymm7,%ymm7
   4295 	vpxor	%ymm10,%ymm6,%ymm6
   4296 	vpxor	%ymm9,%ymm5,%ymm5
   4297 	vpxor	%ymm8,%ymm4,%ymm4
   4298 	vmovdqa	%ymm8,128(%rbp)
   4299 	vpsrld	$20,%ymm7,%ymm8
   4300 	vpslld	$32-20,%ymm7,%ymm7
   4301 	vpxor	%ymm8,%ymm7,%ymm7
   4302 	vpsrld	$20,%ymm6,%ymm8
   4303 	vpslld	$32-20,%ymm6,%ymm6
   4304 	vpxor	%ymm8,%ymm6,%ymm6
   4305 	vpsrld	$20,%ymm5,%ymm8
   4306 	addq	%rax,%r15
   4307 	adcq	%rdx,%r9
   4308 	vpslld	$32-20,%ymm5,%ymm5
   4309 	vpxor	%ymm8,%ymm5,%ymm5
   4310 	vpsrld	$20,%ymm4,%ymm8
   4311 	vpslld	$32-20,%ymm4,%ymm4
   4312 	vpxor	%ymm8,%ymm4,%ymm4
   4313 	vmovdqa	.rol8(%rip),%ymm8
   4314 	vpaddd	%ymm7,%ymm3,%ymm3
   4315 	vpaddd	%ymm6,%ymm2,%ymm2
   4316 	vpaddd	%ymm5,%ymm1,%ymm1
   4317 	vpaddd	%ymm4,%ymm0,%ymm0
   4318 	movq	%r13,%r10
   4319 	movq	%r14,%r11
   4320 	movq	%r15,%r12
   4321 	andq	$3,%r12
   4322 	movq	%r15,%r13
   4323 	andq	$-4,%r13
   4324 	movq	%r9,%r14
   4325 	shrdq	$2,%r9,%r15
   4326 	shrq	$2,%r9
   4327 	addq	%r13,%r10
   4328 	adcq	%r14,%r11
   4329 	adcq	$0,%r12
   4330 	addq	%r15,%r10
   4331 	adcq	%r9,%r11
   4332 	adcq	$0,%r12
   4333 	vpxor	%ymm3,%ymm15,%ymm15
   4334 	vpxor	%ymm2,%ymm14,%ymm14
   4335 	vpxor	%ymm1,%ymm13,%ymm13
   4336 	vpxor	%ymm0,%ymm12,%ymm12
   4337 	vpshufb	%ymm8,%ymm15,%ymm15
   4338 	vpshufb	%ymm8,%ymm14,%ymm14
   4339 	vpshufb	%ymm8,%ymm13,%ymm13
   4340 	vpshufb	%ymm8,%ymm12,%ymm12
   4341 	vmovdqa	128(%rbp),%ymm8
   4342 	addq	2*8(%rsi,%rcx), %r10
   4343 	adcq	8+2*8(%rsi,%rcx), %r11
   4344 	adcq	$1,%r12
   4345 	vpaddd	%ymm15,%ymm11,%ymm11
   4346 	vpaddd	%ymm14,%ymm10,%ymm10
   4347 	vpaddd	%ymm13,%ymm9,%ymm9
   4348 	vpaddd	%ymm12,%ymm8,%ymm8
   4349 	vpxor	%ymm11,%ymm7,%ymm7
   4350 	vpxor	%ymm10,%ymm6,%ymm6
   4351 	vpxor	%ymm9,%ymm5,%ymm5
   4352 	vpxor	%ymm8,%ymm4,%ymm4
   4353 	movq	0+0(%rbp),%rdx
   4354 	movq	%rdx,%r15
   4355 	mulxq	%r10,%r13,%r14
   4356 	mulxq	%r11,%rax,%rdx
   4357 	imulq	%r12,%r15
   4358 	addq	%rax,%r14
   4359 	adcq	%rdx,%r15
   4360 	vmovdqa	%ymm8,128(%rbp)
   4361 	vpsrld	$25,%ymm7,%ymm8
   4362 	vpslld	$32-25,%ymm7,%ymm7
   4363 	vpxor	%ymm8,%ymm7,%ymm7
   4364 	vpsrld	$25,%ymm6,%ymm8
   4365 	vpslld	$32-25,%ymm6,%ymm6
   4366 	vpxor	%ymm8,%ymm6,%ymm6
   4367 	vpsrld	$25,%ymm5,%ymm8
   4368 	vpslld	$32-25,%ymm5,%ymm5
   4369 	vpxor	%ymm8,%ymm5,%ymm5
   4370 	vpsrld	$25,%ymm4,%ymm8
   4371 	vpslld	$32-25,%ymm4,%ymm4
   4372 	vpxor	%ymm8,%ymm4,%ymm4
   4373 	vmovdqa	128(%rbp),%ymm8
   4374 	vpalignr	$4,%ymm7,%ymm7,%ymm7
   4375 	vpalignr	$8,%ymm11,%ymm11,%ymm11
   4376 	vpalignr	$12,%ymm15,%ymm15,%ymm15
   4377 	vpalignr	$4,%ymm6,%ymm6,%ymm6
   4378 	movq	8+0(%rbp),%rdx
   4379 	mulxq	%r10,%r10,%rax
   4380 	addq	%r10,%r14
   4381 	mulxq	%r11,%r11,%r9
   4382 	adcq	%r11,%r15
   4383 	adcq	$0,%r9
   4384 	imulq	%r12,%rdx
   4385 	vpalignr	$8,%ymm10,%ymm10,%ymm10
   4386 	vpalignr	$12,%ymm14,%ymm14,%ymm14
   4387 	vpalignr	$4,%ymm5,%ymm5,%ymm5
   4388 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   4389 	vpalignr	$12,%ymm13,%ymm13,%ymm13
   4390 	vpalignr	$4,%ymm4,%ymm4,%ymm4
   4391 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   4392 	vpalignr	$12,%ymm12,%ymm12,%ymm12
   4393 	vmovdqa	%ymm8,128(%rbp)
   4394 	vmovdqa	.rol16(%rip),%ymm8
   4395 	vpaddd	%ymm7,%ymm3,%ymm3
   4396 	vpaddd	%ymm6,%ymm2,%ymm2
   4397 	vpaddd	%ymm5,%ymm1,%ymm1
   4398 	vpaddd	%ymm4,%ymm0,%ymm0
   4399 	vpxor	%ymm3,%ymm15,%ymm15
   4400 	vpxor	%ymm2,%ymm14,%ymm14
   4401 	vpxor	%ymm1,%ymm13,%ymm13
   4402 	vpxor	%ymm0,%ymm12,%ymm12
   4403 	addq	%rax,%r15
   4404 	adcq	%rdx,%r9
   4405 	vpshufb	%ymm8,%ymm15,%ymm15
   4406 	vpshufb	%ymm8,%ymm14,%ymm14
   4407 	vpshufb	%ymm8,%ymm13,%ymm13
   4408 	vpshufb	%ymm8,%ymm12,%ymm12
   4409 	vmovdqa	128(%rbp),%ymm8
   4410 	vpaddd	%ymm15,%ymm11,%ymm11
   4411 	vpaddd	%ymm14,%ymm10,%ymm10
   4412 	vpaddd	%ymm13,%ymm9,%ymm9
   4413 	vpaddd	%ymm12,%ymm8,%ymm8
   4414 	movq	%r13,%r10
   4415 	movq	%r14,%r11
   4416 	movq	%r15,%r12
   4417 	andq	$3,%r12
   4418 	movq	%r15,%r13
   4419 	andq	$-4,%r13
   4420 	movq	%r9,%r14
   4421 	shrdq	$2,%r9,%r15
   4422 	shrq	$2,%r9
   4423 	addq	%r13,%r10
   4424 	adcq	%r14,%r11
   4425 	adcq	$0,%r12
   4426 	addq	%r15,%r10
   4427 	adcq	%r9,%r11
   4428 	adcq	$0,%r12
   4429 	vpxor	%ymm11,%ymm7,%ymm7
   4430 	vpxor	%ymm10,%ymm6,%ymm6
   4431 	vpxor	%ymm9,%ymm5,%ymm5
   4432 	vpxor	%ymm8,%ymm4,%ymm4
   4433 	vmovdqa	%ymm8,128(%rbp)
   4434 	vpsrld	$20,%ymm7,%ymm8
   4435 	vpslld	$32-20,%ymm7,%ymm7
   4436 	vpxor	%ymm8,%ymm7,%ymm7
   4437 	addq	4*8(%rsi,%rcx), %r10
   4438 	adcq	8+4*8(%rsi,%rcx), %r11
   4439 	adcq	$1,%r12
   4440 
   4441 	leaq	48(%rcx),%rcx
   4442 	vpsrld	$20,%ymm6,%ymm8
   4443 	vpslld	$32-20,%ymm6,%ymm6
   4444 	vpxor	%ymm8,%ymm6,%ymm6
   4445 	vpsrld	$20,%ymm5,%ymm8
   4446 	vpslld	$32-20,%ymm5,%ymm5
   4447 	vpxor	%ymm8,%ymm5,%ymm5
   4448 	vpsrld	$20,%ymm4,%ymm8
   4449 	vpslld	$32-20,%ymm4,%ymm4
   4450 	vpxor	%ymm8,%ymm4,%ymm4
   4451 	vmovdqa	.rol8(%rip),%ymm8
   4452 	vpaddd	%ymm7,%ymm3,%ymm3
   4453 	vpaddd	%ymm6,%ymm2,%ymm2
   4454 	vpaddd	%ymm5,%ymm1,%ymm1
   4455 	vpaddd	%ymm4,%ymm0,%ymm0
   4456 	vpxor	%ymm3,%ymm15,%ymm15
   4457 	vpxor	%ymm2,%ymm14,%ymm14
   4458 	vpxor	%ymm1,%ymm13,%ymm13
   4459 	vpxor	%ymm0,%ymm12,%ymm12
   4460 	movq	0+0(%rbp),%rdx
   4461 	movq	%rdx,%r15
   4462 	mulxq	%r10,%r13,%r14
   4463 	mulxq	%r11,%rax,%rdx
   4464 	imulq	%r12,%r15
   4465 	addq	%rax,%r14
   4466 	adcq	%rdx,%r15
   4467 	vpshufb	%ymm8,%ymm15,%ymm15
   4468 	vpshufb	%ymm8,%ymm14,%ymm14
   4469 	vpshufb	%ymm8,%ymm13,%ymm13
   4470 	vpshufb	%ymm8,%ymm12,%ymm12
   4471 	vmovdqa	128(%rbp),%ymm8
   4472 	vpaddd	%ymm15,%ymm11,%ymm11
   4473 	vpaddd	%ymm14,%ymm10,%ymm10
   4474 	vpaddd	%ymm13,%ymm9,%ymm9
   4475 	movq	8+0(%rbp),%rdx
   4476 	mulxq	%r10,%r10,%rax
   4477 	addq	%r10,%r14
   4478 	mulxq	%r11,%r11,%r9
   4479 	adcq	%r11,%r15
   4480 	adcq	$0,%r9
   4481 	imulq	%r12,%rdx
   4482 	vpaddd	%ymm12,%ymm8,%ymm8
   4483 	vpxor	%ymm11,%ymm7,%ymm7
   4484 	vpxor	%ymm10,%ymm6,%ymm6
   4485 	vpxor	%ymm9,%ymm5,%ymm5
   4486 	vpxor	%ymm8,%ymm4,%ymm4
   4487 	vmovdqa	%ymm8,128(%rbp)
   4488 	vpsrld	$25,%ymm7,%ymm8
   4489 	vpslld	$32-25,%ymm7,%ymm7
   4490 	addq	%rax,%r15
   4491 	adcq	%rdx,%r9
   4492 	vpxor	%ymm8,%ymm7,%ymm7
   4493 	vpsrld	$25,%ymm6,%ymm8
   4494 	vpslld	$32-25,%ymm6,%ymm6
   4495 	vpxor	%ymm8,%ymm6,%ymm6
   4496 	vpsrld	$25,%ymm5,%ymm8
   4497 	vpslld	$32-25,%ymm5,%ymm5
   4498 	vpxor	%ymm8,%ymm5,%ymm5
   4499 	vpsrld	$25,%ymm4,%ymm8
   4500 	vpslld	$32-25,%ymm4,%ymm4
   4501 	vpxor	%ymm8,%ymm4,%ymm4
   4502 	vmovdqa	128(%rbp),%ymm8
   4503 	vpalignr	$12,%ymm7,%ymm7,%ymm7
   4504 	vpalignr	$8,%ymm11,%ymm11,%ymm11
   4505 	vpalignr	$4,%ymm15,%ymm15,%ymm15
   4506 	vpalignr	$12,%ymm6,%ymm6,%ymm6
   4507 	vpalignr	$8,%ymm10,%ymm10,%ymm10
   4508 	vpalignr	$4,%ymm14,%ymm14,%ymm14
   4509 	vpalignr	$12,%ymm5,%ymm5,%ymm5
   4510 	movq	%r13,%r10
   4511 	movq	%r14,%r11
   4512 	movq	%r15,%r12
   4513 	andq	$3,%r12
   4514 	movq	%r15,%r13
   4515 	andq	$-4,%r13
   4516 	movq	%r9,%r14
   4517 	shrdq	$2,%r9,%r15
   4518 	shrq	$2,%r9
   4519 	addq	%r13,%r10
   4520 	adcq	%r14,%r11
   4521 	adcq	$0,%r12
   4522 	addq	%r15,%r10
   4523 	adcq	%r9,%r11
   4524 	adcq	$0,%r12
   4525 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   4526 	vpalignr	$4,%ymm13,%ymm13,%ymm13
   4527 	vpalignr	$12,%ymm4,%ymm4,%ymm4
   4528 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   4529 	vpalignr	$4,%ymm12,%ymm12,%ymm12
   4530 
   4531 	cmpq	$60*8,%rcx
   4532 	jne	2b
   4533 	vpaddd	.chacha20_consts(%rip),%ymm3,%ymm3
   4534 	vpaddd	64(%rbp),%ymm7,%ymm7
   4535 	vpaddd	96(%rbp),%ymm11,%ymm11
   4536 	vpaddd	256(%rbp),%ymm15,%ymm15
   4537 	vpaddd	.chacha20_consts(%rip),%ymm2,%ymm2
   4538 	vpaddd	64(%rbp),%ymm6,%ymm6
   4539 	vpaddd	96(%rbp),%ymm10,%ymm10
   4540 	vpaddd	224(%rbp),%ymm14,%ymm14
   4541 	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
   4542 	vpaddd	64(%rbp),%ymm5,%ymm5
   4543 	vpaddd	96(%rbp),%ymm9,%ymm9
   4544 	vpaddd	192(%rbp),%ymm13,%ymm13
   4545 	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
   4546 	vpaddd	64(%rbp),%ymm4,%ymm4
   4547 	vpaddd	96(%rbp),%ymm8,%ymm8
   4548 	vpaddd	160(%rbp),%ymm12,%ymm12
   4549 
   4550 	vmovdqa	%ymm0,128(%rbp)
   4551 	addq	60*8(%rsi),%r10
   4552 	adcq	8+60*8(%rsi),%r11
   4553 	adcq	$1,%r12
   4554 	vperm2i128	$0x02,%ymm3,%ymm7,%ymm0
   4555 	vperm2i128	$0x13,%ymm3,%ymm7,%ymm7
   4556 	vperm2i128	$0x02,%ymm11,%ymm15,%ymm3
   4557 	vperm2i128	$0x13,%ymm11,%ymm15,%ymm11
   4558 	vpxor	0+0(%rsi),%ymm0,%ymm0
   4559 	vpxor	32+0(%rsi),%ymm3,%ymm3
   4560 	vpxor	64+0(%rsi),%ymm7,%ymm7
   4561 	vpxor	96+0(%rsi),%ymm11,%ymm11
   4562 	vmovdqu	%ymm0,0+0(%rdi)
   4563 	vmovdqu	%ymm3,32+0(%rdi)
   4564 	vmovdqu	%ymm7,64+0(%rdi)
   4565 	vmovdqu	%ymm11,96+0(%rdi)
   4566 
   4567 	vmovdqa	128(%rbp),%ymm0
   4568 	movq	0+0(%rbp),%rax
   4569 	movq	%rax,%r15
   4570 	mulq	%r10
   4571 	movq	%rax,%r13
   4572 	movq	%rdx,%r14
   4573 	movq	0+0(%rbp),%rax
   4574 	mulq	%r11
   4575 	imulq	%r12,%r15
   4576 	addq	%rax,%r14
   4577 	adcq	%rdx,%r15
   4578 	movq	8+0(%rbp),%rax
   4579 	movq	%rax,%r9
   4580 	mulq	%r10
   4581 	addq	%rax,%r14
   4582 	adcq	$0,%rdx
   4583 	movq	%rdx,%r10
   4584 	movq	8+0(%rbp),%rax
   4585 	mulq	%r11
   4586 	addq	%rax,%r15
   4587 	adcq	$0,%rdx
   4588 	imulq	%r12,%r9
   4589 	addq	%r10,%r15
   4590 	adcq	%rdx,%r9
   4591 	movq	%r13,%r10
   4592 	movq	%r14,%r11
   4593 	movq	%r15,%r12
   4594 	andq	$3,%r12
   4595 	movq	%r15,%r13
   4596 	andq	$-4,%r13
   4597 	movq	%r9,%r14
   4598 	shrdq	$2,%r9,%r15
   4599 	shrq	$2,%r9
   4600 	addq	%r13,%r10
   4601 	adcq	%r14,%r11
   4602 	adcq	$0,%r12
   4603 	addq	%r15,%r10
   4604 	adcq	%r9,%r11
   4605 	adcq	$0,%r12
   4606 	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
   4607 	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
   4608 	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
   4609 	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
   4610 	vpxor	0+128(%rsi),%ymm3,%ymm3
   4611 	vpxor	32+128(%rsi),%ymm2,%ymm2
   4612 	vpxor	64+128(%rsi),%ymm6,%ymm6
   4613 	vpxor	96+128(%rsi),%ymm10,%ymm10
   4614 	vmovdqu	%ymm3,0+128(%rdi)
   4615 	vmovdqu	%ymm2,32+128(%rdi)
   4616 	vmovdqu	%ymm6,64+128(%rdi)
   4617 	vmovdqu	%ymm10,96+128(%rdi)
   4618 	addq	60*8+16(%rsi),%r10
   4619 	adcq	8+60*8+16(%rsi),%r11
   4620 	adcq	$1,%r12
   4621 	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
   4622 	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
   4623 	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
   4624 	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
   4625 	vpxor	0+256(%rsi),%ymm3,%ymm3
   4626 	vpxor	32+256(%rsi),%ymm1,%ymm1
   4627 	vpxor	64+256(%rsi),%ymm5,%ymm5
   4628 	vpxor	96+256(%rsi),%ymm9,%ymm9
   4629 	vmovdqu	%ymm3,0+256(%rdi)
   4630 	vmovdqu	%ymm1,32+256(%rdi)
   4631 	vmovdqu	%ymm5,64+256(%rdi)
   4632 	vmovdqu	%ymm9,96+256(%rdi)
   4633 	movq	0+0(%rbp),%rax
   4634 	movq	%rax,%r15
   4635 	mulq	%r10
   4636 	movq	%rax,%r13
   4637 	movq	%rdx,%r14
   4638 	movq	0+0(%rbp),%rax
   4639 	mulq	%r11
   4640 	imulq	%r12,%r15
   4641 	addq	%rax,%r14
   4642 	adcq	%rdx,%r15
   4643 	movq	8+0(%rbp),%rax
   4644 	movq	%rax,%r9
   4645 	mulq	%r10
   4646 	addq	%rax,%r14
   4647 	adcq	$0,%rdx
   4648 	movq	%rdx,%r10
   4649 	movq	8+0(%rbp),%rax
   4650 	mulq	%r11
   4651 	addq	%rax,%r15
   4652 	adcq	$0,%rdx
   4653 	imulq	%r12,%r9
   4654 	addq	%r10,%r15
   4655 	adcq	%rdx,%r9
   4656 	movq	%r13,%r10
   4657 	movq	%r14,%r11
   4658 	movq	%r15,%r12
   4659 	andq	$3,%r12
   4660 	movq	%r15,%r13
   4661 	andq	$-4,%r13
   4662 	movq	%r9,%r14
   4663 	shrdq	$2,%r9,%r15
   4664 	shrq	$2,%r9
   4665 	addq	%r13,%r10
   4666 	adcq	%r14,%r11
   4667 	adcq	$0,%r12
   4668 	addq	%r15,%r10
   4669 	adcq	%r9,%r11
   4670 	adcq	$0,%r12
   4671 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
   4672 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm4
   4673 	vperm2i128	$0x02,%ymm8,%ymm12,%ymm0
   4674 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm8
   4675 	vpxor	0+384(%rsi),%ymm3,%ymm3
   4676 	vpxor	32+384(%rsi),%ymm0,%ymm0
   4677 	vpxor	64+384(%rsi),%ymm4,%ymm4
   4678 	vpxor	96+384(%rsi),%ymm8,%ymm8
   4679 	vmovdqu	%ymm3,0+384(%rdi)
   4680 	vmovdqu	%ymm0,32+384(%rdi)
   4681 	vmovdqu	%ymm4,64+384(%rdi)
   4682 	vmovdqu	%ymm8,96+384(%rdi)
   4683 
   4684 	leaq	512(%rsi),%rsi
   4685 	leaq	512(%rdi),%rdi
   4686 	subq	$512,%rbx
   4687 	jmp	1b
   4688 3:
   4689 	testq	%rbx,%rbx
   4690 	vzeroupper
   4691 	je	open_sse_finalize
   4692 3:
   4693 	cmpq	$128,%rbx
   4694 	ja	3f
   4695 	vmovdqa	.chacha20_consts(%rip),%ymm0
   4696 	vmovdqa	64(%rbp),%ymm4
   4697 	vmovdqa	96(%rbp),%ymm8
   4698 	vmovdqa	.avx2_inc(%rip),%ymm12
   4699 	vpaddd	160(%rbp),%ymm12,%ymm12
   4700 	vmovdqa	%ymm12,160(%rbp)
   4701 
   4702 	xorq	%r8,%r8
   4703 	movq	%rbx,%rcx
   4704 	andq	$-16,%rcx
   4705 	testq	%rcx,%rcx
   4706 	je	2f
   4707 1:
   4708 	addq	0*8(%rsi,%r8), %r10
   4709 	adcq	8+0*8(%rsi,%r8), %r11
   4710 	adcq	$1,%r12
   4711 	movq	0+0(%rbp),%rax
   4712 	movq	%rax,%r15
   4713 	mulq	%r10
   4714 	movq	%rax,%r13
   4715 	movq	%rdx,%r14
   4716 	movq	0+0(%rbp),%rax
   4717 	mulq	%r11
   4718 	imulq	%r12,%r15
   4719 	addq	%rax,%r14
   4720 	adcq	%rdx,%r15
   4721 	movq	8+0(%rbp),%rax
   4722 	movq	%rax,%r9
   4723 	mulq	%r10
   4724 	addq	%rax,%r14
   4725 	adcq	$0,%rdx
   4726 	movq	%rdx,%r10
   4727 	movq	8+0(%rbp),%rax
   4728 	mulq	%r11
   4729 	addq	%rax,%r15
   4730 	adcq	$0,%rdx
   4731 	imulq	%r12,%r9
   4732 	addq	%r10,%r15
   4733 	adcq	%rdx,%r9
   4734 	movq	%r13,%r10
   4735 	movq	%r14,%r11
   4736 	movq	%r15,%r12
   4737 	andq	$3,%r12
   4738 	movq	%r15,%r13
   4739 	andq	$-4,%r13
   4740 	movq	%r9,%r14
   4741 	shrdq	$2,%r9,%r15
   4742 	shrq	$2,%r9
   4743 	addq	%r13,%r10
   4744 	adcq	%r14,%r11
   4745 	adcq	$0,%r12
   4746 	addq	%r15,%r10
   4747 	adcq	%r9,%r11
   4748 	adcq	$0,%r12
   4749 
   4750 2:
   4751 	addq	$16,%r8
   4752 	vpaddd	%ymm4,%ymm0,%ymm0
   4753 	vpxor	%ymm0,%ymm12,%ymm12
   4754 	vpshufb	.rol16(%rip),%ymm12,%ymm12
   4755 	vpaddd	%ymm12,%ymm8,%ymm8
   4756 	vpxor	%ymm8,%ymm4,%ymm4
   4757 	vpsrld	$20,%ymm4,%ymm3
   4758 	vpslld	$12,%ymm4,%ymm4
   4759 	vpxor	%ymm3,%ymm4,%ymm4
   4760 	vpaddd	%ymm4,%ymm0,%ymm0
   4761 	vpxor	%ymm0,%ymm12,%ymm12
   4762 	vpshufb	.rol8(%rip),%ymm12,%ymm12
   4763 	vpaddd	%ymm12,%ymm8,%ymm8
   4764 	vpxor	%ymm8,%ymm4,%ymm4
   4765 	vpslld	$7,%ymm4,%ymm3
   4766 	vpsrld	$25,%ymm4,%ymm4
   4767 	vpxor	%ymm3,%ymm4,%ymm4
   4768 	vpalignr	$12,%ymm12,%ymm12,%ymm12
   4769 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   4770 	vpalignr	$4,%ymm4,%ymm4,%ymm4
   4771 	vpaddd	%ymm4,%ymm0,%ymm0
   4772 	vpxor	%ymm0,%ymm12,%ymm12
   4773 	vpshufb	.rol16(%rip),%ymm12,%ymm12
   4774 	vpaddd	%ymm12,%ymm8,%ymm8
   4775 	vpxor	%ymm8,%ymm4,%ymm4
   4776 	vpsrld	$20,%ymm4,%ymm3
   4777 	vpslld	$12,%ymm4,%ymm4
   4778 	vpxor	%ymm3,%ymm4,%ymm4
   4779 	vpaddd	%ymm4,%ymm0,%ymm0
   4780 	vpxor	%ymm0,%ymm12,%ymm12
   4781 	vpshufb	.rol8(%rip),%ymm12,%ymm12
   4782 	vpaddd	%ymm12,%ymm8,%ymm8
   4783 	vpxor	%ymm8,%ymm4,%ymm4
   4784 	vpslld	$7,%ymm4,%ymm3
   4785 	vpsrld	$25,%ymm4,%ymm4
   4786 	vpxor	%ymm3,%ymm4,%ymm4
   4787 	vpalignr	$4,%ymm12,%ymm12,%ymm12
   4788 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   4789 	vpalignr	$12,%ymm4,%ymm4,%ymm4
   4790 
   4791 	cmpq	%rcx,%r8
   4792 	jb	1b
   4793 	cmpq	$160,%r8
   4794 	jne	2b
   4795 	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
   4796 	vpaddd	64(%rbp),%ymm4,%ymm4
   4797 	vpaddd	96(%rbp),%ymm8,%ymm8
   4798 	vpaddd	160(%rbp),%ymm12,%ymm12
   4799 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
   4800 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
   4801 	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
   4802 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
   4803 	vmovdqa	%ymm3,%ymm8
   4804 
   4805 	jmp	open_avx2_tail_loop
   4806 3:
   4807 	cmpq	$256,%rbx
   4808 	ja	3f
   4809 	vmovdqa	.chacha20_consts(%rip),%ymm0
   4810 	vmovdqa	64(%rbp),%ymm4
   4811 	vmovdqa	96(%rbp),%ymm8
   4812 	vmovdqa	%ymm0,%ymm1
   4813 	vmovdqa	%ymm4,%ymm5
   4814 	vmovdqa	%ymm8,%ymm9
   4815 	vmovdqa	.avx2_inc(%rip),%ymm12
   4816 	vpaddd	160(%rbp),%ymm12,%ymm13
   4817 	vpaddd	%ymm13,%ymm12,%ymm12
   4818 	vmovdqa	%ymm12,160(%rbp)
   4819 	vmovdqa	%ymm13,192(%rbp)
   4820 
   4821 	movq	%rbx,128(%rbp)
   4822 	movq	%rbx,%rcx
   4823 	subq	$128,%rcx
   4824 	shrq	$4,%rcx
   4825 	movq	$10,%r8
   4826 	cmpq	$10,%rcx
   4827 	cmovgq	%r8,%rcx
   4828 	movq	%rsi,%rbx
   4829 	xorq	%r8,%r8
   4830 1:
   4831 	addq	0(%rbx),%r10
   4832 	adcq	8+0(%rbx),%r11
   4833 	adcq	$1,%r12
   4834 	movq	0+0(%rbp),%rdx
   4835 	movq	%rdx,%r15
   4836 	mulxq	%r10,%r13,%r14
   4837 	mulxq	%r11,%rax,%rdx
   4838 	imulq	%r12,%r15
   4839 	addq	%rax,%r14
   4840 	adcq	%rdx,%r15
   4841 	movq	8+0(%rbp),%rdx
   4842 	mulxq	%r10,%r10,%rax
   4843 	addq	%r10,%r14
   4844 	mulxq	%r11,%r11,%r9
   4845 	adcq	%r11,%r15
   4846 	adcq	$0,%r9
   4847 	imulq	%r12,%rdx
   4848 	addq	%rax,%r15
   4849 	adcq	%rdx,%r9
   4850 	movq	%r13,%r10
   4851 	movq	%r14,%r11
   4852 	movq	%r15,%r12
   4853 	andq	$3,%r12
   4854 	movq	%r15,%r13
   4855 	andq	$-4,%r13
   4856 	movq	%r9,%r14
   4857 	shrdq	$2,%r9,%r15
   4858 	shrq	$2,%r9
   4859 	addq	%r13,%r10
   4860 	adcq	%r14,%r11
   4861 	adcq	$0,%r12
   4862 	addq	%r15,%r10
   4863 	adcq	%r9,%r11
   4864 	adcq	$0,%r12
   4865 
   4866 	leaq	16(%rbx),%rbx
   4867 2:
   4868 	vpaddd	%ymm4,%ymm0,%ymm0
   4869 	vpxor	%ymm0,%ymm12,%ymm12
   4870 	vpshufb	.rol16(%rip),%ymm12,%ymm12
   4871 	vpaddd	%ymm12,%ymm8,%ymm8
   4872 	vpxor	%ymm8,%ymm4,%ymm4
   4873 	vpsrld	$20,%ymm4,%ymm3
   4874 	vpslld	$12,%ymm4,%ymm4
   4875 	vpxor	%ymm3,%ymm4,%ymm4
   4876 	vpaddd	%ymm4,%ymm0,%ymm0
   4877 	vpxor	%ymm0,%ymm12,%ymm12
   4878 	vpshufb	.rol8(%rip),%ymm12,%ymm12
   4879 	vpaddd	%ymm12,%ymm8,%ymm8
   4880 	vpxor	%ymm8,%ymm4,%ymm4
   4881 	vpslld	$7,%ymm4,%ymm3
   4882 	vpsrld	$25,%ymm4,%ymm4
   4883 	vpxor	%ymm3,%ymm4,%ymm4
   4884 	vpalignr	$12,%ymm12,%ymm12,%ymm12
   4885 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   4886 	vpalignr	$4,%ymm4,%ymm4,%ymm4
   4887 	vpaddd	%ymm5,%ymm1,%ymm1
   4888 	vpxor	%ymm1,%ymm13,%ymm13
   4889 	vpshufb	.rol16(%rip),%ymm13,%ymm13
   4890 	vpaddd	%ymm13,%ymm9,%ymm9
   4891 	vpxor	%ymm9,%ymm5,%ymm5
   4892 	vpsrld	$20,%ymm5,%ymm3
   4893 	vpslld	$12,%ymm5,%ymm5
   4894 	vpxor	%ymm3,%ymm5,%ymm5
   4895 	vpaddd	%ymm5,%ymm1,%ymm1
   4896 	vpxor	%ymm1,%ymm13,%ymm13
   4897 	vpshufb	.rol8(%rip),%ymm13,%ymm13
   4898 	vpaddd	%ymm13,%ymm9,%ymm9
   4899 	vpxor	%ymm9,%ymm5,%ymm5
   4900 	vpslld	$7,%ymm5,%ymm3
   4901 	vpsrld	$25,%ymm5,%ymm5
   4902 	vpxor	%ymm3,%ymm5,%ymm5
   4903 	vpalignr	$12,%ymm13,%ymm13,%ymm13
   4904 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   4905 	vpalignr	$4,%ymm5,%ymm5,%ymm5
   4906 
   4907 	incq	%r8
   4908 	vpaddd	%ymm4,%ymm0,%ymm0
   4909 	vpxor	%ymm0,%ymm12,%ymm12
   4910 	vpshufb	.rol16(%rip),%ymm12,%ymm12
   4911 	vpaddd	%ymm12,%ymm8,%ymm8
   4912 	vpxor	%ymm8,%ymm4,%ymm4
   4913 	vpsrld	$20,%ymm4,%ymm3
   4914 	vpslld	$12,%ymm4,%ymm4
   4915 	vpxor	%ymm3,%ymm4,%ymm4
   4916 	vpaddd	%ymm4,%ymm0,%ymm0
   4917 	vpxor	%ymm0,%ymm12,%ymm12
   4918 	vpshufb	.rol8(%rip),%ymm12,%ymm12
   4919 	vpaddd	%ymm12,%ymm8,%ymm8
   4920 	vpxor	%ymm8,%ymm4,%ymm4
   4921 	vpslld	$7,%ymm4,%ymm3
   4922 	vpsrld	$25,%ymm4,%ymm4
   4923 	vpxor	%ymm3,%ymm4,%ymm4
   4924 	vpalignr	$4,%ymm12,%ymm12,%ymm12
   4925 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   4926 	vpalignr	$12,%ymm4,%ymm4,%ymm4
   4927 	vpaddd	%ymm5,%ymm1,%ymm1
   4928 	vpxor	%ymm1,%ymm13,%ymm13
   4929 	vpshufb	.rol16(%rip),%ymm13,%ymm13
   4930 	vpaddd	%ymm13,%ymm9,%ymm9
   4931 	vpxor	%ymm9,%ymm5,%ymm5
   4932 	vpsrld	$20,%ymm5,%ymm3
   4933 	vpslld	$12,%ymm5,%ymm5
   4934 	vpxor	%ymm3,%ymm5,%ymm5
   4935 	vpaddd	%ymm5,%ymm1,%ymm1
   4936 	vpxor	%ymm1,%ymm13,%ymm13
   4937 	vpshufb	.rol8(%rip),%ymm13,%ymm13
   4938 	vpaddd	%ymm13,%ymm9,%ymm9
   4939 	vpxor	%ymm9,%ymm5,%ymm5
   4940 	vpslld	$7,%ymm5,%ymm3
   4941 	vpsrld	$25,%ymm5,%ymm5
   4942 	vpxor	%ymm3,%ymm5,%ymm5
   4943 	vpalignr	$4,%ymm13,%ymm13,%ymm13
   4944 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   4945 	vpalignr	$12,%ymm5,%ymm5,%ymm5
   4946 	vpaddd	%ymm6,%ymm2,%ymm2
   4947 	vpxor	%ymm2,%ymm14,%ymm14
   4948 	vpshufb	.rol16(%rip),%ymm14,%ymm14
   4949 	vpaddd	%ymm14,%ymm10,%ymm10
   4950 	vpxor	%ymm10,%ymm6,%ymm6
   4951 	vpsrld	$20,%ymm6,%ymm3
   4952 	vpslld	$12,%ymm6,%ymm6
   4953 	vpxor	%ymm3,%ymm6,%ymm6
   4954 	vpaddd	%ymm6,%ymm2,%ymm2
   4955 	vpxor	%ymm2,%ymm14,%ymm14
   4956 	vpshufb	.rol8(%rip),%ymm14,%ymm14
   4957 	vpaddd	%ymm14,%ymm10,%ymm10
   4958 	vpxor	%ymm10,%ymm6,%ymm6
   4959 	vpslld	$7,%ymm6,%ymm3
   4960 	vpsrld	$25,%ymm6,%ymm6
   4961 	vpxor	%ymm3,%ymm6,%ymm6
   4962 	vpalignr	$4,%ymm14,%ymm14,%ymm14
   4963 	vpalignr	$8,%ymm10,%ymm10,%ymm10
   4964 	vpalignr	$12,%ymm6,%ymm6,%ymm6
   4965 
   4966 	cmpq	%rcx,%r8
   4967 	jb	1b
   4968 	cmpq	$10,%r8
   4969 	jne	2b
   4970 	movq	%rbx,%r8
   4971 	subq	%rsi,%rbx
   4972 	movq	%rbx,%rcx
   4973 	movq	128(%rbp),%rbx
   4974 1:
   4975 	addq	$16,%rcx
   4976 	cmpq	%rbx,%rcx
   4977 	jg	1f
   4978 	addq	0(%r8),%r10
   4979 	adcq	8+0(%r8),%r11
   4980 	adcq	$1,%r12
   4981 	movq	0+0(%rbp),%rdx
   4982 	movq	%rdx,%r15
   4983 	mulxq	%r10,%r13,%r14
   4984 	mulxq	%r11,%rax,%rdx
   4985 	imulq	%r12,%r15
   4986 	addq	%rax,%r14
   4987 	adcq	%rdx,%r15
   4988 	movq	8+0(%rbp),%rdx
   4989 	mulxq	%r10,%r10,%rax
   4990 	addq	%r10,%r14
   4991 	mulxq	%r11,%r11,%r9
   4992 	adcq	%r11,%r15
   4993 	adcq	$0,%r9
   4994 	imulq	%r12,%rdx
   4995 	addq	%rax,%r15
   4996 	adcq	%rdx,%r9
   4997 	movq	%r13,%r10
   4998 	movq	%r14,%r11
   4999 	movq	%r15,%r12
   5000 	andq	$3,%r12
   5001 	movq	%r15,%r13
   5002 	andq	$-4,%r13
   5003 	movq	%r9,%r14
   5004 	shrdq	$2,%r9,%r15
   5005 	shrq	$2,%r9
   5006 	addq	%r13,%r10
   5007 	adcq	%r14,%r11
   5008 	adcq	$0,%r12
   5009 	addq	%r15,%r10
   5010 	adcq	%r9,%r11
   5011 	adcq	$0,%r12
   5012 
   5013 	leaq	16(%r8),%r8
   5014 	jmp	1b
   5015 1:
   5016 	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
   5017 	vpaddd	64(%rbp),%ymm5,%ymm5
   5018 	vpaddd	96(%rbp),%ymm9,%ymm9
   5019 	vpaddd	192(%rbp),%ymm13,%ymm13
   5020 	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
   5021 	vpaddd	64(%rbp),%ymm4,%ymm4
   5022 	vpaddd	96(%rbp),%ymm8,%ymm8
   5023 	vpaddd	160(%rbp),%ymm12,%ymm12
   5024 	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
   5025 	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
   5026 	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
   5027 	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
   5028 	vpxor	0+0(%rsi),%ymm3,%ymm3
   5029 	vpxor	32+0(%rsi),%ymm1,%ymm1
   5030 	vpxor	64+0(%rsi),%ymm5,%ymm5
   5031 	vpxor	96+0(%rsi),%ymm9,%ymm9
   5032 	vmovdqu	%ymm3,0+0(%rdi)
   5033 	vmovdqu	%ymm1,32+0(%rdi)
   5034 	vmovdqu	%ymm5,64+0(%rdi)
   5035 	vmovdqu	%ymm9,96+0(%rdi)
   5036 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
   5037 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
   5038 	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
   5039 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
   5040 	vmovdqa	%ymm3,%ymm8
   5041 
   5042 	leaq	128(%rsi),%rsi
   5043 	leaq	128(%rdi),%rdi
   5044 	subq	$128,%rbx
   5045 	jmp	open_avx2_tail_loop
   5046 3:
   5047 	cmpq	$384,%rbx
   5048 	ja	3f
   5049 	vmovdqa	.chacha20_consts(%rip),%ymm0
   5050 	vmovdqa	64(%rbp),%ymm4
   5051 	vmovdqa	96(%rbp),%ymm8
   5052 	vmovdqa	%ymm0,%ymm1
   5053 	vmovdqa	%ymm4,%ymm5
   5054 	vmovdqa	%ymm8,%ymm9
   5055 	vmovdqa	%ymm0,%ymm2
   5056 	vmovdqa	%ymm4,%ymm6
   5057 	vmovdqa	%ymm8,%ymm10
   5058 	vmovdqa	.avx2_inc(%rip),%ymm12
   5059 	vpaddd	160(%rbp),%ymm12,%ymm14
   5060 	vpaddd	%ymm14,%ymm12,%ymm13
   5061 	vpaddd	%ymm13,%ymm12,%ymm12
   5062 	vmovdqa	%ymm12,160(%rbp)
   5063 	vmovdqa	%ymm13,192(%rbp)
   5064 	vmovdqa	%ymm14,224(%rbp)
   5065 
   5066 	movq	%rbx,128(%rbp)
   5067 	movq	%rbx,%rcx
   5068 	subq	$256,%rcx
   5069 	shrq	$4,%rcx
   5070 	addq	$6,%rcx
   5071 	movq	$10,%r8
   5072 	cmpq	$10,%rcx
   5073 	cmovgq	%r8,%rcx
   5074 	movq	%rsi,%rbx
   5075 	xorq	%r8,%r8
   5076 1:
   5077 	addq	0(%rbx),%r10
   5078 	adcq	8+0(%rbx),%r11
   5079 	adcq	$1,%r12
   5080 	movq	0+0(%rbp),%rdx
   5081 	movq	%rdx,%r15
   5082 	mulxq	%r10,%r13,%r14
   5083 	mulxq	%r11,%rax,%rdx
   5084 	imulq	%r12,%r15
   5085 	addq	%rax,%r14
   5086 	adcq	%rdx,%r15
   5087 	movq	8+0(%rbp),%rdx
   5088 	mulxq	%r10,%r10,%rax
   5089 	addq	%r10,%r14
   5090 	mulxq	%r11,%r11,%r9
   5091 	adcq	%r11,%r15
   5092 	adcq	$0,%r9
   5093 	imulq	%r12,%rdx
   5094 	addq	%rax,%r15
   5095 	adcq	%rdx,%r9
   5096 	movq	%r13,%r10
   5097 	movq	%r14,%r11
   5098 	movq	%r15,%r12
   5099 	andq	$3,%r12
   5100 	movq	%r15,%r13
   5101 	andq	$-4,%r13
   5102 	movq	%r9,%r14
   5103 	shrdq	$2,%r9,%r15
   5104 	shrq	$2,%r9
   5105 	addq	%r13,%r10
   5106 	adcq	%r14,%r11
   5107 	adcq	$0,%r12
   5108 	addq	%r15,%r10
   5109 	adcq	%r9,%r11
   5110 	adcq	$0,%r12
   5111 
   5112 	leaq	16(%rbx),%rbx
   5113 2:
   5114 	vpaddd	%ymm6,%ymm2,%ymm2
   5115 	vpxor	%ymm2,%ymm14,%ymm14
   5116 	vpshufb	.rol16(%rip),%ymm14,%ymm14
   5117 	vpaddd	%ymm14,%ymm10,%ymm10
   5118 	vpxor	%ymm10,%ymm6,%ymm6
   5119 	vpsrld	$20,%ymm6,%ymm3
   5120 	vpslld	$12,%ymm6,%ymm6
   5121 	vpxor	%ymm3,%ymm6,%ymm6
   5122 	vpaddd	%ymm6,%ymm2,%ymm2
   5123 	vpxor	%ymm2,%ymm14,%ymm14
   5124 	vpshufb	.rol8(%rip),%ymm14,%ymm14
   5125 	vpaddd	%ymm14,%ymm10,%ymm10
   5126 	vpxor	%ymm10,%ymm6,%ymm6
   5127 	vpslld	$7,%ymm6,%ymm3
   5128 	vpsrld	$25,%ymm6,%ymm6
   5129 	vpxor	%ymm3,%ymm6,%ymm6
   5130 	vpalignr	$12,%ymm14,%ymm14,%ymm14
   5131 	vpalignr	$8,%ymm10,%ymm10,%ymm10
   5132 	vpalignr	$4,%ymm6,%ymm6,%ymm6
   5133 	vpaddd	%ymm5,%ymm1,%ymm1
   5134 	vpxor	%ymm1,%ymm13,%ymm13
   5135 	vpshufb	.rol16(%rip),%ymm13,%ymm13
   5136 	vpaddd	%ymm13,%ymm9,%ymm9
   5137 	vpxor	%ymm9,%ymm5,%ymm5
   5138 	vpsrld	$20,%ymm5,%ymm3
   5139 	vpslld	$12,%ymm5,%ymm5
   5140 	vpxor	%ymm3,%ymm5,%ymm5
   5141 	vpaddd	%ymm5,%ymm1,%ymm1
   5142 	vpxor	%ymm1,%ymm13,%ymm13
   5143 	vpshufb	.rol8(%rip),%ymm13,%ymm13
   5144 	vpaddd	%ymm13,%ymm9,%ymm9
   5145 	vpxor	%ymm9,%ymm5,%ymm5
   5146 	vpslld	$7,%ymm5,%ymm3
   5147 	vpsrld	$25,%ymm5,%ymm5
   5148 	vpxor	%ymm3,%ymm5,%ymm5
   5149 	vpalignr	$12,%ymm13,%ymm13,%ymm13
   5150 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   5151 	vpalignr	$4,%ymm5,%ymm5,%ymm5
   5152 	vpaddd	%ymm4,%ymm0,%ymm0
   5153 	vpxor	%ymm0,%ymm12,%ymm12
   5154 	vpshufb	.rol16(%rip),%ymm12,%ymm12
   5155 	vpaddd	%ymm12,%ymm8,%ymm8
   5156 	vpxor	%ymm8,%ymm4,%ymm4
   5157 	vpsrld	$20,%ymm4,%ymm3
   5158 	vpslld	$12,%ymm4,%ymm4
   5159 	vpxor	%ymm3,%ymm4,%ymm4
   5160 	vpaddd	%ymm4,%ymm0,%ymm0
   5161 	vpxor	%ymm0,%ymm12,%ymm12
   5162 	vpshufb	.rol8(%rip),%ymm12,%ymm12
   5163 	vpaddd	%ymm12,%ymm8,%ymm8
   5164 	vpxor	%ymm8,%ymm4,%ymm4
   5165 	vpslld	$7,%ymm4,%ymm3
   5166 	vpsrld	$25,%ymm4,%ymm4
   5167 	vpxor	%ymm3,%ymm4,%ymm4
   5168 	vpalignr	$12,%ymm12,%ymm12,%ymm12
   5169 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   5170 	vpalignr	$4,%ymm4,%ymm4,%ymm4
   5171 	addq	0(%rbx),%r10
   5172 	adcq	8+0(%rbx),%r11
   5173 	adcq	$1,%r12
   5174 	movq	0+0(%rbp),%rax
   5175 	movq	%rax,%r15
   5176 	mulq	%r10
   5177 	movq	%rax,%r13
   5178 	movq	%rdx,%r14
   5179 	movq	0+0(%rbp),%rax
   5180 	mulq	%r11
   5181 	imulq	%r12,%r15
   5182 	addq	%rax,%r14
   5183 	adcq	%rdx,%r15
   5184 	movq	8+0(%rbp),%rax
   5185 	movq	%rax,%r9
   5186 	mulq	%r10
   5187 	addq	%rax,%r14
   5188 	adcq	$0,%rdx
   5189 	movq	%rdx,%r10
   5190 	movq	8+0(%rbp),%rax
   5191 	mulq	%r11
   5192 	addq	%rax,%r15
   5193 	adcq	$0,%rdx
   5194 	imulq	%r12,%r9
   5195 	addq	%r10,%r15
   5196 	adcq	%rdx,%r9
   5197 	movq	%r13,%r10
   5198 	movq	%r14,%r11
   5199 	movq	%r15,%r12
   5200 	andq	$3,%r12
   5201 	movq	%r15,%r13
   5202 	andq	$-4,%r13
   5203 	movq	%r9,%r14
   5204 	shrdq	$2,%r9,%r15
   5205 	shrq	$2,%r9
   5206 	addq	%r13,%r10
   5207 	adcq	%r14,%r11
   5208 	adcq	$0,%r12
   5209 	addq	%r15,%r10
   5210 	adcq	%r9,%r11
   5211 	adcq	$0,%r12
   5212 
   5213 	leaq	16(%rbx),%rbx
   5214 	incq	%r8
   5215 	vpaddd	%ymm6,%ymm2,%ymm2
   5216 	vpxor	%ymm2,%ymm14,%ymm14
   5217 	vpshufb	.rol16(%rip),%ymm14,%ymm14
   5218 	vpaddd	%ymm14,%ymm10,%ymm10
   5219 	vpxor	%ymm10,%ymm6,%ymm6
   5220 	vpsrld	$20,%ymm6,%ymm3
   5221 	vpslld	$12,%ymm6,%ymm6
   5222 	vpxor	%ymm3,%ymm6,%ymm6
   5223 	vpaddd	%ymm6,%ymm2,%ymm2
   5224 	vpxor	%ymm2,%ymm14,%ymm14
   5225 	vpshufb	.rol8(%rip),%ymm14,%ymm14
   5226 	vpaddd	%ymm14,%ymm10,%ymm10
   5227 	vpxor	%ymm10,%ymm6,%ymm6
   5228 	vpslld	$7,%ymm6,%ymm3
   5229 	vpsrld	$25,%ymm6,%ymm6
   5230 	vpxor	%ymm3,%ymm6,%ymm6
   5231 	vpalignr	$4,%ymm14,%ymm14,%ymm14
   5232 	vpalignr	$8,%ymm10,%ymm10,%ymm10
   5233 	vpalignr	$12,%ymm6,%ymm6,%ymm6
   5234 	vpaddd	%ymm5,%ymm1,%ymm1
   5235 	vpxor	%ymm1,%ymm13,%ymm13
   5236 	vpshufb	.rol16(%rip),%ymm13,%ymm13
   5237 	vpaddd	%ymm13,%ymm9,%ymm9
   5238 	vpxor	%ymm9,%ymm5,%ymm5
   5239 	vpsrld	$20,%ymm5,%ymm3
   5240 	vpslld	$12,%ymm5,%ymm5
   5241 	vpxor	%ymm3,%ymm5,%ymm5
   5242 	vpaddd	%ymm5,%ymm1,%ymm1
   5243 	vpxor	%ymm1,%ymm13,%ymm13
   5244 	vpshufb	.rol8(%rip),%ymm13,%ymm13
   5245 	vpaddd	%ymm13,%ymm9,%ymm9
   5246 	vpxor	%ymm9,%ymm5,%ymm5
   5247 	vpslld	$7,%ymm5,%ymm3
   5248 	vpsrld	$25,%ymm5,%ymm5
   5249 	vpxor	%ymm3,%ymm5,%ymm5
   5250 	vpalignr	$4,%ymm13,%ymm13,%ymm13
   5251 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   5252 	vpalignr	$12,%ymm5,%ymm5,%ymm5
   5253 	vpaddd	%ymm4,%ymm0,%ymm0
   5254 	vpxor	%ymm0,%ymm12,%ymm12
   5255 	vpshufb	.rol16(%rip),%ymm12,%ymm12
   5256 	vpaddd	%ymm12,%ymm8,%ymm8
   5257 	vpxor	%ymm8,%ymm4,%ymm4
   5258 	vpsrld	$20,%ymm4,%ymm3
   5259 	vpslld	$12,%ymm4,%ymm4
   5260 	vpxor	%ymm3,%ymm4,%ymm4
   5261 	vpaddd	%ymm4,%ymm0,%ymm0
   5262 	vpxor	%ymm0,%ymm12,%ymm12
   5263 	vpshufb	.rol8(%rip),%ymm12,%ymm12
   5264 	vpaddd	%ymm12,%ymm8,%ymm8
   5265 	vpxor	%ymm8,%ymm4,%ymm4
   5266 	vpslld	$7,%ymm4,%ymm3
   5267 	vpsrld	$25,%ymm4,%ymm4
   5268 	vpxor	%ymm3,%ymm4,%ymm4
   5269 	vpalignr	$4,%ymm12,%ymm12,%ymm12
   5270 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   5271 	vpalignr	$12,%ymm4,%ymm4,%ymm4
   5272 
   5273 	cmpq	%rcx,%r8
   5274 	jb	1b
   5275 	cmpq	$10,%r8
   5276 	jne	2b
   5277 	movq	%rbx,%r8
   5278 	subq	%rsi,%rbx
   5279 	movq	%rbx,%rcx
   5280 	movq	128(%rbp),%rbx
   5281 1:
   5282 	addq	$16,%rcx
   5283 	cmpq	%rbx,%rcx
   5284 	jg	1f
   5285 	addq	0(%r8),%r10
   5286 	adcq	8+0(%r8),%r11
   5287 	adcq	$1,%r12
   5288 	movq	0+0(%rbp),%rdx
   5289 	movq	%rdx,%r15
   5290 	mulxq	%r10,%r13,%r14
   5291 	mulxq	%r11,%rax,%rdx
   5292 	imulq	%r12,%r15
   5293 	addq	%rax,%r14
   5294 	adcq	%rdx,%r15
   5295 	movq	8+0(%rbp),%rdx
   5296 	mulxq	%r10,%r10,%rax
   5297 	addq	%r10,%r14
   5298 	mulxq	%r11,%r11,%r9
   5299 	adcq	%r11,%r15
   5300 	adcq	$0,%r9
   5301 	imulq	%r12,%rdx
   5302 	addq	%rax,%r15
   5303 	adcq	%rdx,%r9
   5304 	movq	%r13,%r10
   5305 	movq	%r14,%r11
   5306 	movq	%r15,%r12
   5307 	andq	$3,%r12
   5308 	movq	%r15,%r13
   5309 	andq	$-4,%r13
   5310 	movq	%r9,%r14
   5311 	shrdq	$2,%r9,%r15
   5312 	shrq	$2,%r9
   5313 	addq	%r13,%r10
   5314 	adcq	%r14,%r11
   5315 	adcq	$0,%r12
   5316 	addq	%r15,%r10
   5317 	adcq	%r9,%r11
   5318 	adcq	$0,%r12
   5319 
   5320 	leaq	16(%r8),%r8
   5321 	jmp	1b
   5322 1:
   5323 	vpaddd	.chacha20_consts(%rip),%ymm2,%ymm2
   5324 	vpaddd	64(%rbp),%ymm6,%ymm6
   5325 	vpaddd	96(%rbp),%ymm10,%ymm10
   5326 	vpaddd	224(%rbp),%ymm14,%ymm14
   5327 	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
   5328 	vpaddd	64(%rbp),%ymm5,%ymm5
   5329 	vpaddd	96(%rbp),%ymm9,%ymm9
   5330 	vpaddd	192(%rbp),%ymm13,%ymm13
   5331 	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
   5332 	vpaddd	64(%rbp),%ymm4,%ymm4
   5333 	vpaddd	96(%rbp),%ymm8,%ymm8
   5334 	vpaddd	160(%rbp),%ymm12,%ymm12
   5335 	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
   5336 	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
   5337 	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
   5338 	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
   5339 	vpxor	0+0(%rsi),%ymm3,%ymm3
   5340 	vpxor	32+0(%rsi),%ymm2,%ymm2
   5341 	vpxor	64+0(%rsi),%ymm6,%ymm6
   5342 	vpxor	96+0(%rsi),%ymm10,%ymm10
   5343 	vmovdqu	%ymm3,0+0(%rdi)
   5344 	vmovdqu	%ymm2,32+0(%rdi)
   5345 	vmovdqu	%ymm6,64+0(%rdi)
   5346 	vmovdqu	%ymm10,96+0(%rdi)
   5347 	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
   5348 	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
   5349 	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
   5350 	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
   5351 	vpxor	0+128(%rsi),%ymm3,%ymm3
   5352 	vpxor	32+128(%rsi),%ymm1,%ymm1
   5353 	vpxor	64+128(%rsi),%ymm5,%ymm5
   5354 	vpxor	96+128(%rsi),%ymm9,%ymm9
   5355 	vmovdqu	%ymm3,0+128(%rdi)
   5356 	vmovdqu	%ymm1,32+128(%rdi)
   5357 	vmovdqu	%ymm5,64+128(%rdi)
   5358 	vmovdqu	%ymm9,96+128(%rdi)
   5359 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
   5360 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
   5361 	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
   5362 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
   5363 	vmovdqa	%ymm3,%ymm8
   5364 
   5365 	leaq	256(%rsi),%rsi
   5366 	leaq	256(%rdi),%rdi
   5367 	subq	$256,%rbx
   5368 	jmp	open_avx2_tail_loop
   5369 3:
   5370 	vmovdqa	.chacha20_consts(%rip),%ymm0
   5371 	vmovdqa	64(%rbp),%ymm4
   5372 	vmovdqa	96(%rbp),%ymm8
   5373 	vmovdqa	%ymm0,%ymm1
   5374 	vmovdqa	%ymm4,%ymm5
   5375 	vmovdqa	%ymm8,%ymm9
   5376 	vmovdqa	%ymm0,%ymm2
   5377 	vmovdqa	%ymm4,%ymm6
   5378 	vmovdqa	%ymm8,%ymm10
   5379 	vmovdqa	%ymm0,%ymm3
   5380 	vmovdqa	%ymm4,%ymm7
   5381 	vmovdqa	%ymm8,%ymm11
   5382 	vmovdqa	.avx2_inc(%rip),%ymm12
   5383 	vpaddd	160(%rbp),%ymm12,%ymm15
   5384 	vpaddd	%ymm15,%ymm12,%ymm14
   5385 	vpaddd	%ymm14,%ymm12,%ymm13
   5386 	vpaddd	%ymm13,%ymm12,%ymm12
   5387 	vmovdqa	%ymm15,256(%rbp)
   5388 	vmovdqa	%ymm14,224(%rbp)
   5389 	vmovdqa	%ymm13,192(%rbp)
   5390 	vmovdqa	%ymm12,160(%rbp)
   5391 
   5392 	xorq	%rcx,%rcx
   5393 	movq	%rsi,%r8
   5394 1:
   5395 	addq	0(%r8),%r10
   5396 	adcq	8+0(%r8),%r11
   5397 	adcq	$1,%r12
   5398 	movq	0+0(%rbp),%rax
   5399 	movq	%rax,%r15
   5400 	mulq	%r10
   5401 	movq	%rax,%r13
   5402 	movq	%rdx,%r14
   5403 	movq	0+0(%rbp),%rax
   5404 	mulq	%r11
   5405 	imulq	%r12,%r15
   5406 	addq	%rax,%r14
   5407 	adcq	%rdx,%r15
   5408 	movq	8+0(%rbp),%rax
   5409 	movq	%rax,%r9
   5410 	mulq	%r10
   5411 	addq	%rax,%r14
   5412 	adcq	$0,%rdx
   5413 	movq	%rdx,%r10
   5414 	movq	8+0(%rbp),%rax
   5415 	mulq	%r11
   5416 	addq	%rax,%r15
   5417 	adcq	$0,%rdx
   5418 	imulq	%r12,%r9
   5419 	addq	%r10,%r15
   5420 	adcq	%rdx,%r9
   5421 	movq	%r13,%r10
   5422 	movq	%r14,%r11
   5423 	movq	%r15,%r12
   5424 	andq	$3,%r12
   5425 	movq	%r15,%r13
   5426 	andq	$-4,%r13
   5427 	movq	%r9,%r14
   5428 	shrdq	$2,%r9,%r15
   5429 	shrq	$2,%r9
   5430 	addq	%r13,%r10
   5431 	adcq	%r14,%r11
   5432 	adcq	$0,%r12
   5433 	addq	%r15,%r10
   5434 	adcq	%r9,%r11
   5435 	adcq	$0,%r12
   5436 
   5437 	leaq	16(%r8),%r8
   5438 2:
   5439 	vmovdqa	%ymm8,128(%rbp)
   5440 	vmovdqa	.rol16(%rip),%ymm8
   5441 	vpaddd	%ymm7,%ymm3,%ymm3
   5442 	vpaddd	%ymm6,%ymm2,%ymm2
   5443 	vpaddd	%ymm5,%ymm1,%ymm1
   5444 	vpaddd	%ymm4,%ymm0,%ymm0
   5445 	vpxor	%ymm3,%ymm15,%ymm15
   5446 	vpxor	%ymm2,%ymm14,%ymm14
   5447 	vpxor	%ymm1,%ymm13,%ymm13
   5448 	vpxor	%ymm0,%ymm12,%ymm12
   5449 	vpshufb	%ymm8,%ymm15,%ymm15
   5450 	vpshufb	%ymm8,%ymm14,%ymm14
   5451 	vpshufb	%ymm8,%ymm13,%ymm13
   5452 	vpshufb	%ymm8,%ymm12,%ymm12
   5453 	vmovdqa	128(%rbp),%ymm8
   5454 	vpaddd	%ymm15,%ymm11,%ymm11
   5455 	vpaddd	%ymm14,%ymm10,%ymm10
   5456 	vpaddd	%ymm13,%ymm9,%ymm9
   5457 	vpaddd	%ymm12,%ymm8,%ymm8
   5458 	vpxor	%ymm11,%ymm7,%ymm7
   5459 	vpxor	%ymm10,%ymm6,%ymm6
   5460 	vpxor	%ymm9,%ymm5,%ymm5
   5461 	vpxor	%ymm8,%ymm4,%ymm4
   5462 	vmovdqa	%ymm8,128(%rbp)
   5463 	vpsrld	$20,%ymm7,%ymm8
   5464 	vpslld	$32-20,%ymm7,%ymm7
   5465 	vpxor	%ymm8,%ymm7,%ymm7
   5466 	vpsrld	$20,%ymm6,%ymm8
   5467 	vpslld	$32-20,%ymm6,%ymm6
   5468 	vpxor	%ymm8,%ymm6,%ymm6
   5469 	vpsrld	$20,%ymm5,%ymm8
   5470 	vpslld	$32-20,%ymm5,%ymm5
   5471 	vpxor	%ymm8,%ymm5,%ymm5
   5472 	vpsrld	$20,%ymm4,%ymm8
   5473 	vpslld	$32-20,%ymm4,%ymm4
   5474 	vpxor	%ymm8,%ymm4,%ymm4
   5475 	vmovdqa	.rol8(%rip),%ymm8
   5476 	addq	0(%r8),%r10
   5477 	adcq	8+0(%r8),%r11
   5478 	adcq	$1,%r12
   5479 	movq	0+0(%rbp),%rdx
   5480 	movq	%rdx,%r15
   5481 	mulxq	%r10,%r13,%r14
   5482 	mulxq	%r11,%rax,%rdx
   5483 	imulq	%r12,%r15
   5484 	addq	%rax,%r14
   5485 	adcq	%rdx,%r15
   5486 	movq	8+0(%rbp),%rdx
   5487 	mulxq	%r10,%r10,%rax
   5488 	addq	%r10,%r14
   5489 	mulxq	%r11,%r11,%r9
   5490 	adcq	%r11,%r15
   5491 	adcq	$0,%r9
   5492 	imulq	%r12,%rdx
   5493 	addq	%rax,%r15
   5494 	adcq	%rdx,%r9
   5495 	movq	%r13,%r10
   5496 	movq	%r14,%r11
   5497 	movq	%r15,%r12
   5498 	andq	$3,%r12
   5499 	movq	%r15,%r13
   5500 	andq	$-4,%r13
   5501 	movq	%r9,%r14
   5502 	shrdq	$2,%r9,%r15
   5503 	shrq	$2,%r9
   5504 	addq	%r13,%r10
   5505 	adcq	%r14,%r11
   5506 	adcq	$0,%r12
   5507 	addq	%r15,%r10
   5508 	adcq	%r9,%r11
   5509 	adcq	$0,%r12
   5510 	vpaddd	%ymm7,%ymm3,%ymm3
   5511 	vpaddd	%ymm6,%ymm2,%ymm2
   5512 	vpaddd	%ymm5,%ymm1,%ymm1
   5513 	vpaddd	%ymm4,%ymm0,%ymm0
   5514 	vpxor	%ymm3,%ymm15,%ymm15
   5515 	vpxor	%ymm2,%ymm14,%ymm14
   5516 	vpxor	%ymm1,%ymm13,%ymm13
   5517 	vpxor	%ymm0,%ymm12,%ymm12
   5518 	vpshufb	%ymm8,%ymm15,%ymm15
   5519 	vpshufb	%ymm8,%ymm14,%ymm14
   5520 	vpshufb	%ymm8,%ymm13,%ymm13
   5521 	vpshufb	%ymm8,%ymm12,%ymm12
   5522 	vmovdqa	128(%rbp),%ymm8
   5523 	vpaddd	%ymm15,%ymm11,%ymm11
   5524 	vpaddd	%ymm14,%ymm10,%ymm10
   5525 	vpaddd	%ymm13,%ymm9,%ymm9
   5526 	vpaddd	%ymm12,%ymm8,%ymm8
   5527 	vpxor	%ymm11,%ymm7,%ymm7
   5528 	vpxor	%ymm10,%ymm6,%ymm6
   5529 	vpxor	%ymm9,%ymm5,%ymm5
   5530 	vpxor	%ymm8,%ymm4,%ymm4
   5531 	vmovdqa	%ymm8,128(%rbp)
   5532 	vpsrld	$25,%ymm7,%ymm8
   5533 	vpslld	$32-25,%ymm7,%ymm7
   5534 	vpxor	%ymm8,%ymm7,%ymm7
   5535 	vpsrld	$25,%ymm6,%ymm8
   5536 	vpslld	$32-25,%ymm6,%ymm6
   5537 	vpxor	%ymm8,%ymm6,%ymm6
   5538 	vpsrld	$25,%ymm5,%ymm8
   5539 	vpslld	$32-25,%ymm5,%ymm5
   5540 	vpxor	%ymm8,%ymm5,%ymm5
   5541 	vpsrld	$25,%ymm4,%ymm8
   5542 	vpslld	$32-25,%ymm4,%ymm4
   5543 	vpxor	%ymm8,%ymm4,%ymm4
   5544 	vmovdqa	128(%rbp),%ymm8
   5545 	vpalignr	$4,%ymm7,%ymm7,%ymm7
   5546 	vpalignr	$8,%ymm11,%ymm11,%ymm11
   5547 	vpalignr	$12,%ymm15,%ymm15,%ymm15
   5548 	vpalignr	$4,%ymm6,%ymm6,%ymm6
   5549 	vpalignr	$8,%ymm10,%ymm10,%ymm10
   5550 	vpalignr	$12,%ymm14,%ymm14,%ymm14
   5551 	vpalignr	$4,%ymm5,%ymm5,%ymm5
   5552 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   5553 	vpalignr	$12,%ymm13,%ymm13,%ymm13
   5554 	vpalignr	$4,%ymm4,%ymm4,%ymm4
   5555 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   5556 	vpalignr	$12,%ymm12,%ymm12,%ymm12
   5557 	vmovdqa	%ymm8,128(%rbp)
   5558 	addq	16(%r8),%r10
   5559 	adcq	8+16(%r8),%r11
   5560 	adcq	$1,%r12
   5561 	movq	0+0(%rbp),%rdx
   5562 	movq	%rdx,%r15
   5563 	mulxq	%r10,%r13,%r14
   5564 	mulxq	%r11,%rax,%rdx
   5565 	imulq	%r12,%r15
   5566 	addq	%rax,%r14
   5567 	adcq	%rdx,%r15
   5568 	movq	8+0(%rbp),%rdx
   5569 	mulxq	%r10,%r10,%rax
   5570 	addq	%r10,%r14
   5571 	mulxq	%r11,%r11,%r9
   5572 	adcq	%r11,%r15
   5573 	adcq	$0,%r9
   5574 	imulq	%r12,%rdx
   5575 	addq	%rax,%r15
   5576 	adcq	%rdx,%r9
   5577 	movq	%r13,%r10
   5578 	movq	%r14,%r11
   5579 	movq	%r15,%r12
   5580 	andq	$3,%r12
   5581 	movq	%r15,%r13
   5582 	andq	$-4,%r13
   5583 	movq	%r9,%r14
   5584 	shrdq	$2,%r9,%r15
   5585 	shrq	$2,%r9
   5586 	addq	%r13,%r10
   5587 	adcq	%r14,%r11
   5588 	adcq	$0,%r12
   5589 	addq	%r15,%r10
   5590 	adcq	%r9,%r11
   5591 	adcq	$0,%r12
   5592 
   5593 	leaq	32(%r8),%r8
   5594 	vmovdqa	.rol16(%rip),%ymm8
   5595 	vpaddd	%ymm7,%ymm3,%ymm3
   5596 	vpaddd	%ymm6,%ymm2,%ymm2
   5597 	vpaddd	%ymm5,%ymm1,%ymm1
   5598 	vpaddd	%ymm4,%ymm0,%ymm0
   5599 	vpxor	%ymm3,%ymm15,%ymm15
   5600 	vpxor	%ymm2,%ymm14,%ymm14
   5601 	vpxor	%ymm1,%ymm13,%ymm13
   5602 	vpxor	%ymm0,%ymm12,%ymm12
   5603 	vpshufb	%ymm8,%ymm15,%ymm15
   5604 	vpshufb	%ymm8,%ymm14,%ymm14
   5605 	vpshufb	%ymm8,%ymm13,%ymm13
   5606 	vpshufb	%ymm8,%ymm12,%ymm12
   5607 	vmovdqa	128(%rbp),%ymm8
   5608 	vpaddd	%ymm15,%ymm11,%ymm11
   5609 	vpaddd	%ymm14,%ymm10,%ymm10
   5610 	vpaddd	%ymm13,%ymm9,%ymm9
   5611 	vpaddd	%ymm12,%ymm8,%ymm8
   5612 	vpxor	%ymm11,%ymm7,%ymm7
   5613 	vpxor	%ymm10,%ymm6,%ymm6
   5614 	vpxor	%ymm9,%ymm5,%ymm5
   5615 	vpxor	%ymm8,%ymm4,%ymm4
   5616 	vmovdqa	%ymm8,128(%rbp)
   5617 	vpsrld	$20,%ymm7,%ymm8
   5618 	vpslld	$32-20,%ymm7,%ymm7
   5619 	vpxor	%ymm8,%ymm7,%ymm7
   5620 	vpsrld	$20,%ymm6,%ymm8
   5621 	vpslld	$32-20,%ymm6,%ymm6
   5622 	vpxor	%ymm8,%ymm6,%ymm6
   5623 	vpsrld	$20,%ymm5,%ymm8
   5624 	vpslld	$32-20,%ymm5,%ymm5
   5625 	vpxor	%ymm8,%ymm5,%ymm5
   5626 	vpsrld	$20,%ymm4,%ymm8
   5627 	vpslld	$32-20,%ymm4,%ymm4
   5628 	vpxor	%ymm8,%ymm4,%ymm4
   5629 	vmovdqa	.rol8(%rip),%ymm8
   5630 	vpaddd	%ymm7,%ymm3,%ymm3
   5631 	vpaddd	%ymm6,%ymm2,%ymm2
   5632 	vpaddd	%ymm5,%ymm1,%ymm1
   5633 	vpaddd	%ymm4,%ymm0,%ymm0
   5634 	vpxor	%ymm3,%ymm15,%ymm15
   5635 	vpxor	%ymm2,%ymm14,%ymm14
   5636 	vpxor	%ymm1,%ymm13,%ymm13
   5637 	vpxor	%ymm0,%ymm12,%ymm12
   5638 	vpshufb	%ymm8,%ymm15,%ymm15
   5639 	vpshufb	%ymm8,%ymm14,%ymm14
   5640 	vpshufb	%ymm8,%ymm13,%ymm13
   5641 	vpshufb	%ymm8,%ymm12,%ymm12
   5642 	vmovdqa	128(%rbp),%ymm8
   5643 	vpaddd	%ymm15,%ymm11,%ymm11
   5644 	vpaddd	%ymm14,%ymm10,%ymm10
   5645 	vpaddd	%ymm13,%ymm9,%ymm9
   5646 	vpaddd	%ymm12,%ymm8,%ymm8
   5647 	vpxor	%ymm11,%ymm7,%ymm7
   5648 	vpxor	%ymm10,%ymm6,%ymm6
   5649 	vpxor	%ymm9,%ymm5,%ymm5
   5650 	vpxor	%ymm8,%ymm4,%ymm4
   5651 	vmovdqa	%ymm8,128(%rbp)
   5652 	vpsrld	$25,%ymm7,%ymm8
   5653 	vpslld	$32-25,%ymm7,%ymm7
   5654 	vpxor	%ymm8,%ymm7,%ymm7
   5655 	vpsrld	$25,%ymm6,%ymm8
   5656 	vpslld	$32-25,%ymm6,%ymm6
   5657 	vpxor	%ymm8,%ymm6,%ymm6
   5658 	vpsrld	$25,%ymm5,%ymm8
   5659 	vpslld	$32-25,%ymm5,%ymm5
   5660 	vpxor	%ymm8,%ymm5,%ymm5
   5661 	vpsrld	$25,%ymm4,%ymm8
   5662 	vpslld	$32-25,%ymm4,%ymm4
   5663 	vpxor	%ymm8,%ymm4,%ymm4
   5664 	vmovdqa	128(%rbp),%ymm8
   5665 	vpalignr	$12,%ymm7,%ymm7,%ymm7
   5666 	vpalignr	$8,%ymm11,%ymm11,%ymm11
   5667 	vpalignr	$4,%ymm15,%ymm15,%ymm15
   5668 	vpalignr	$12,%ymm6,%ymm6,%ymm6
   5669 	vpalignr	$8,%ymm10,%ymm10,%ymm10
   5670 	vpalignr	$4,%ymm14,%ymm14,%ymm14
   5671 	vpalignr	$12,%ymm5,%ymm5,%ymm5
   5672 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   5673 	vpalignr	$4,%ymm13,%ymm13,%ymm13
   5674 	vpalignr	$12,%ymm4,%ymm4,%ymm4
   5675 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   5676 	vpalignr	$4,%ymm12,%ymm12,%ymm12
   5677 
   5678 	incq	%rcx
   5679 	cmpq	$4,%rcx
   5680 	jl	1b
   5681 	cmpq	$10,%rcx
   5682 	jne	2b
   5683 	movq	%rbx,%rcx
   5684 	subq	$384,%rcx
   5685 	andq	$-16,%rcx
   5686 1:
   5687 	testq	%rcx,%rcx
   5688 	je	1f
   5689 	addq	0(%r8),%r10
   5690 	adcq	8+0(%r8),%r11
   5691 	adcq	$1,%r12
   5692 	movq	0+0(%rbp),%rdx
   5693 	movq	%rdx,%r15
   5694 	mulxq	%r10,%r13,%r14
   5695 	mulxq	%r11,%rax,%rdx
   5696 	imulq	%r12,%r15
   5697 	addq	%rax,%r14
   5698 	adcq	%rdx,%r15
   5699 	movq	8+0(%rbp),%rdx
   5700 	mulxq	%r10,%r10,%rax
   5701 	addq	%r10,%r14
   5702 	mulxq	%r11,%r11,%r9
   5703 	adcq	%r11,%r15
   5704 	adcq	$0,%r9
   5705 	imulq	%r12,%rdx
   5706 	addq	%rax,%r15
   5707 	adcq	%rdx,%r9
   5708 	movq	%r13,%r10
   5709 	movq	%r14,%r11
   5710 	movq	%r15,%r12
   5711 	andq	$3,%r12
   5712 	movq	%r15,%r13
   5713 	andq	$-4,%r13
   5714 	movq	%r9,%r14
   5715 	shrdq	$2,%r9,%r15
   5716 	shrq	$2,%r9
   5717 	addq	%r13,%r10
   5718 	adcq	%r14,%r11
   5719 	adcq	$0,%r12
   5720 	addq	%r15,%r10
   5721 	adcq	%r9,%r11
   5722 	adcq	$0,%r12
   5723 
   5724 	leaq	16(%r8),%r8
   5725 	subq	$16,%rcx
   5726 	jmp	1b
   5727 1:
   5728 	vpaddd	.chacha20_consts(%rip),%ymm3,%ymm3
   5729 	vpaddd	64(%rbp),%ymm7,%ymm7
   5730 	vpaddd	96(%rbp),%ymm11,%ymm11
   5731 	vpaddd	256(%rbp),%ymm15,%ymm15
   5732 	vpaddd	.chacha20_consts(%rip),%ymm2,%ymm2
   5733 	vpaddd	64(%rbp),%ymm6,%ymm6
   5734 	vpaddd	96(%rbp),%ymm10,%ymm10
   5735 	vpaddd	224(%rbp),%ymm14,%ymm14
   5736 	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
   5737 	vpaddd	64(%rbp),%ymm5,%ymm5
   5738 	vpaddd	96(%rbp),%ymm9,%ymm9
   5739 	vpaddd	192(%rbp),%ymm13,%ymm13
   5740 	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
   5741 	vpaddd	64(%rbp),%ymm4,%ymm4
   5742 	vpaddd	96(%rbp),%ymm8,%ymm8
   5743 	vpaddd	160(%rbp),%ymm12,%ymm12
   5744 
   5745 	vmovdqa	%ymm0,128(%rbp)
   5746 	vperm2i128	$0x02,%ymm3,%ymm7,%ymm0
   5747 	vperm2i128	$0x13,%ymm3,%ymm7,%ymm7
   5748 	vperm2i128	$0x02,%ymm11,%ymm15,%ymm3
   5749 	vperm2i128	$0x13,%ymm11,%ymm15,%ymm11
   5750 	vpxor	0+0(%rsi),%ymm0,%ymm0
   5751 	vpxor	32+0(%rsi),%ymm3,%ymm3
   5752 	vpxor	64+0(%rsi),%ymm7,%ymm7
   5753 	vpxor	96+0(%rsi),%ymm11,%ymm11
   5754 	vmovdqu	%ymm0,0+0(%rdi)
   5755 	vmovdqu	%ymm3,32+0(%rdi)
   5756 	vmovdqu	%ymm7,64+0(%rdi)
   5757 	vmovdqu	%ymm11,96+0(%rdi)
   5758 
   5759 	vmovdqa	128(%rbp),%ymm0
   5760 	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
   5761 	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
   5762 	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
   5763 	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
   5764 	vpxor	0+128(%rsi),%ymm3,%ymm3
   5765 	vpxor	32+128(%rsi),%ymm2,%ymm2
   5766 	vpxor	64+128(%rsi),%ymm6,%ymm6
   5767 	vpxor	96+128(%rsi),%ymm10,%ymm10
   5768 	vmovdqu	%ymm3,0+128(%rdi)
   5769 	vmovdqu	%ymm2,32+128(%rdi)
   5770 	vmovdqu	%ymm6,64+128(%rdi)
   5771 	vmovdqu	%ymm10,96+128(%rdi)
   5772 	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
   5773 	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
   5774 	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
   5775 	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
   5776 	vpxor	0+256(%rsi),%ymm3,%ymm3
   5777 	vpxor	32+256(%rsi),%ymm1,%ymm1
   5778 	vpxor	64+256(%rsi),%ymm5,%ymm5
   5779 	vpxor	96+256(%rsi),%ymm9,%ymm9
   5780 	vmovdqu	%ymm3,0+256(%rdi)
   5781 	vmovdqu	%ymm1,32+256(%rdi)
   5782 	vmovdqu	%ymm5,64+256(%rdi)
   5783 	vmovdqu	%ymm9,96+256(%rdi)
   5784 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
   5785 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
   5786 	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
   5787 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
   5788 	vmovdqa	%ymm3,%ymm8
   5789 
   5790 	leaq	384(%rsi),%rsi
   5791 	leaq	384(%rdi),%rdi
   5792 	subq	$384,%rbx
   5793 open_avx2_tail_loop:
   5794 	cmpq	$32,%rbx
   5795 	jb	open_avx2_tail
   5796 	subq	$32,%rbx
   5797 	vpxor	(%rsi),%ymm0,%ymm0
   5798 	vmovdqu	%ymm0,(%rdi)
   5799 	leaq	32(%rsi),%rsi
   5800 	leaq	32(%rdi),%rdi
   5801 	vmovdqa	%ymm4,%ymm0
   5802 	vmovdqa	%ymm8,%ymm4
   5803 	vmovdqa	%ymm12,%ymm8
   5804 	jmp	open_avx2_tail_loop
   5805 open_avx2_tail:
   5806 	cmpq	$16,%rbx
   5807 	vmovdqa	%xmm0,%xmm1
   5808 	jb	1f
   5809 	subq	$16,%rbx
   5810 
   5811 	vpxor	(%rsi),%xmm0,%xmm1
   5812 	vmovdqu	%xmm1,(%rdi)
   5813 	leaq	16(%rsi),%rsi
   5814 	leaq	16(%rdi),%rdi
   5815 	vperm2i128	$0x11,%ymm0,%ymm0,%ymm0
   5816 	vmovdqa	%xmm0,%xmm1
   5817 1:
   5818 	vzeroupper
   5819 	jmp	open_sse_tail_16
   5820 
   5821 open_avx2_192:
   5822 	vmovdqa	%ymm0,%ymm1
   5823 	vmovdqa	%ymm0,%ymm2
   5824 	vmovdqa	%ymm4,%ymm5
   5825 	vmovdqa	%ymm4,%ymm6
   5826 	vmovdqa	%ymm8,%ymm9
   5827 	vmovdqa	%ymm8,%ymm10
   5828 	vpaddd	.avx2_inc(%rip),%ymm12,%ymm13
   5829 	vmovdqa	%ymm12,%ymm11
   5830 	vmovdqa	%ymm13,%ymm15
   5831 	movq	$10,%r10
   5832 1:
   5833 	vpaddd	%ymm4,%ymm0,%ymm0
   5834 	vpxor	%ymm0,%ymm12,%ymm12
   5835 	vpshufb	.rol16(%rip),%ymm12,%ymm12
   5836 	vpaddd	%ymm12,%ymm8,%ymm8
   5837 	vpxor	%ymm8,%ymm4,%ymm4
   5838 	vpsrld	$20,%ymm4,%ymm3
   5839 	vpslld	$12,%ymm4,%ymm4
   5840 	vpxor	%ymm3,%ymm4,%ymm4
   5841 	vpaddd	%ymm4,%ymm0,%ymm0
   5842 	vpxor	%ymm0,%ymm12,%ymm12
   5843 	vpshufb	.rol8(%rip),%ymm12,%ymm12
   5844 	vpaddd	%ymm12,%ymm8,%ymm8
   5845 	vpxor	%ymm8,%ymm4,%ymm4
   5846 	vpslld	$7,%ymm4,%ymm3
   5847 	vpsrld	$25,%ymm4,%ymm4
   5848 	vpxor	%ymm3,%ymm4,%ymm4
   5849 	vpalignr	$12,%ymm12,%ymm12,%ymm12
   5850 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   5851 	vpalignr	$4,%ymm4,%ymm4,%ymm4
   5852 	vpaddd	%ymm5,%ymm1,%ymm1
   5853 	vpxor	%ymm1,%ymm13,%ymm13
   5854 	vpshufb	.rol16(%rip),%ymm13,%ymm13
   5855 	vpaddd	%ymm13,%ymm9,%ymm9
   5856 	vpxor	%ymm9,%ymm5,%ymm5
   5857 	vpsrld	$20,%ymm5,%ymm3
   5858 	vpslld	$12,%ymm5,%ymm5
   5859 	vpxor	%ymm3,%ymm5,%ymm5
   5860 	vpaddd	%ymm5,%ymm1,%ymm1
   5861 	vpxor	%ymm1,%ymm13,%ymm13
   5862 	vpshufb	.rol8(%rip),%ymm13,%ymm13
   5863 	vpaddd	%ymm13,%ymm9,%ymm9
   5864 	vpxor	%ymm9,%ymm5,%ymm5
   5865 	vpslld	$7,%ymm5,%ymm3
   5866 	vpsrld	$25,%ymm5,%ymm5
   5867 	vpxor	%ymm3,%ymm5,%ymm5
   5868 	vpalignr	$12,%ymm13,%ymm13,%ymm13
   5869 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   5870 	vpalignr	$4,%ymm5,%ymm5,%ymm5
   5871 	vpaddd	%ymm4,%ymm0,%ymm0
   5872 	vpxor	%ymm0,%ymm12,%ymm12
   5873 	vpshufb	.rol16(%rip),%ymm12,%ymm12
   5874 	vpaddd	%ymm12,%ymm8,%ymm8
   5875 	vpxor	%ymm8,%ymm4,%ymm4
   5876 	vpsrld	$20,%ymm4,%ymm3
   5877 	vpslld	$12,%ymm4,%ymm4
   5878 	vpxor	%ymm3,%ymm4,%ymm4
   5879 	vpaddd	%ymm4,%ymm0,%ymm0
   5880 	vpxor	%ymm0,%ymm12,%ymm12
   5881 	vpshufb	.rol8(%rip),%ymm12,%ymm12
   5882 	vpaddd	%ymm12,%ymm8,%ymm8
   5883 	vpxor	%ymm8,%ymm4,%ymm4
   5884 	vpslld	$7,%ymm4,%ymm3
   5885 	vpsrld	$25,%ymm4,%ymm4
   5886 	vpxor	%ymm3,%ymm4,%ymm4
   5887 	vpalignr	$4,%ymm12,%ymm12,%ymm12
   5888 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   5889 	vpalignr	$12,%ymm4,%ymm4,%ymm4
   5890 	vpaddd	%ymm5,%ymm1,%ymm1
   5891 	vpxor	%ymm1,%ymm13,%ymm13
   5892 	vpshufb	.rol16(%rip),%ymm13,%ymm13
   5893 	vpaddd	%ymm13,%ymm9,%ymm9
   5894 	vpxor	%ymm9,%ymm5,%ymm5
   5895 	vpsrld	$20,%ymm5,%ymm3
   5896 	vpslld	$12,%ymm5,%ymm5
   5897 	vpxor	%ymm3,%ymm5,%ymm5
   5898 	vpaddd	%ymm5,%ymm1,%ymm1
   5899 	vpxor	%ymm1,%ymm13,%ymm13
   5900 	vpshufb	.rol8(%rip),%ymm13,%ymm13
   5901 	vpaddd	%ymm13,%ymm9,%ymm9
   5902 	vpxor	%ymm9,%ymm5,%ymm5
   5903 	vpslld	$7,%ymm5,%ymm3
   5904 	vpsrld	$25,%ymm5,%ymm5
   5905 	vpxor	%ymm3,%ymm5,%ymm5
   5906 	vpalignr	$4,%ymm13,%ymm13,%ymm13
   5907 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   5908 	vpalignr	$12,%ymm5,%ymm5,%ymm5
   5909 
   5910 	decq	%r10
   5911 	jne	1b
   5912 	vpaddd	%ymm2,%ymm0,%ymm0
   5913 	vpaddd	%ymm2,%ymm1,%ymm1
   5914 	vpaddd	%ymm6,%ymm4,%ymm4
   5915 	vpaddd	%ymm6,%ymm5,%ymm5
   5916 	vpaddd	%ymm10,%ymm8,%ymm8
   5917 	vpaddd	%ymm10,%ymm9,%ymm9
   5918 	vpaddd	%ymm11,%ymm12,%ymm12
   5919 	vpaddd	%ymm15,%ymm13,%ymm13
   5920 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
   5921 
   5922 	vpand	.clamp(%rip),%ymm3,%ymm3
   5923 	vmovdqa	%ymm3,0(%rbp)
   5924 
   5925 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0
   5926 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4
   5927 	vperm2i128	$0x02,%ymm1,%ymm5,%ymm8
   5928 	vperm2i128	$0x02,%ymm9,%ymm13,%ymm12
   5929 	vperm2i128	$0x13,%ymm1,%ymm5,%ymm1
   5930 	vperm2i128	$0x13,%ymm9,%ymm13,%ymm5
   5931 open_avx2_short:
   5932 	movq	%r8,%r8
   5933 	call	poly_hash_ad_internal
   5934 open_avx2_hash_and_xor_loop:
   5935 	cmpq	$32,%rbx
   5936 	jb	open_avx2_short_tail_32
   5937 	subq	$32,%rbx
   5938 	addq	0(%rsi),%r10
   5939 	adcq	8+0(%rsi),%r11
   5940 	adcq	$1,%r12
   5941 	movq	0+0(%rbp),%rax
   5942 	movq	%rax,%r15
   5943 	mulq	%r10
   5944 	movq	%rax,%r13
   5945 	movq	%rdx,%r14
   5946 	movq	0+0(%rbp),%rax
   5947 	mulq	%r11
   5948 	imulq	%r12,%r15
   5949 	addq	%rax,%r14
   5950 	adcq	%rdx,%r15
   5951 	movq	8+0(%rbp),%rax
   5952 	movq	%rax,%r9
   5953 	mulq	%r10
   5954 	addq	%rax,%r14
   5955 	adcq	$0,%rdx
   5956 	movq	%rdx,%r10
   5957 	movq	8+0(%rbp),%rax
   5958 	mulq	%r11
   5959 	addq	%rax,%r15
   5960 	adcq	$0,%rdx
   5961 	imulq	%r12,%r9
   5962 	addq	%r10,%r15
   5963 	adcq	%rdx,%r9
   5964 	movq	%r13,%r10
   5965 	movq	%r14,%r11
   5966 	movq	%r15,%r12
   5967 	andq	$3,%r12
   5968 	movq	%r15,%r13
   5969 	andq	$-4,%r13
   5970 	movq	%r9,%r14
   5971 	shrdq	$2,%r9,%r15
   5972 	shrq	$2,%r9
   5973 	addq	%r13,%r10
   5974 	adcq	%r14,%r11
   5975 	adcq	$0,%r12
   5976 	addq	%r15,%r10
   5977 	adcq	%r9,%r11
   5978 	adcq	$0,%r12
   5979 	addq	16(%rsi),%r10
   5980 	adcq	8+16(%rsi),%r11
   5981 	adcq	$1,%r12
   5982 	movq	0+0(%rbp),%rax
   5983 	movq	%rax,%r15
   5984 	mulq	%r10
   5985 	movq	%rax,%r13
   5986 	movq	%rdx,%r14
   5987 	movq	0+0(%rbp),%rax
   5988 	mulq	%r11
   5989 	imulq	%r12,%r15
   5990 	addq	%rax,%r14
   5991 	adcq	%rdx,%r15
   5992 	movq	8+0(%rbp),%rax
   5993 	movq	%rax,%r9
   5994 	mulq	%r10
   5995 	addq	%rax,%r14
   5996 	adcq	$0,%rdx
   5997 	movq	%rdx,%r10
   5998 	movq	8+0(%rbp),%rax
   5999 	mulq	%r11
   6000 	addq	%rax,%r15
   6001 	adcq	$0,%rdx
   6002 	imulq	%r12,%r9
   6003 	addq	%r10,%r15
   6004 	adcq	%rdx,%r9
   6005 	movq	%r13,%r10
   6006 	movq	%r14,%r11
   6007 	movq	%r15,%r12
   6008 	andq	$3,%r12
   6009 	movq	%r15,%r13
   6010 	andq	$-4,%r13
   6011 	movq	%r9,%r14
   6012 	shrdq	$2,%r9,%r15
   6013 	shrq	$2,%r9
   6014 	addq	%r13,%r10
   6015 	adcq	%r14,%r11
   6016 	adcq	$0,%r12
   6017 	addq	%r15,%r10
   6018 	adcq	%r9,%r11
   6019 	adcq	$0,%r12
   6020 
   6021 
   6022 	vpxor	(%rsi),%ymm0,%ymm0
   6023 	vmovdqu	%ymm0,(%rdi)
   6024 	leaq	32(%rsi),%rsi
   6025 	leaq	32(%rdi),%rdi
   6026 
   6027 	vmovdqa	%ymm4,%ymm0
   6028 	vmovdqa	%ymm8,%ymm4
   6029 	vmovdqa	%ymm12,%ymm8
   6030 	vmovdqa	%ymm1,%ymm12
   6031 	vmovdqa	%ymm5,%ymm1
   6032 	vmovdqa	%ymm9,%ymm5
   6033 	vmovdqa	%ymm13,%ymm9
   6034 	vmovdqa	%ymm2,%ymm13
   6035 	vmovdqa	%ymm6,%ymm2
   6036 	jmp	open_avx2_hash_and_xor_loop
   6037 open_avx2_short_tail_32:
   6038 	cmpq	$16,%rbx
   6039 	vmovdqa	%xmm0,%xmm1
   6040 	jb	1f
   6041 	subq	$16,%rbx
   6042 	addq	0(%rsi),%r10
   6043 	adcq	8+0(%rsi),%r11
   6044 	adcq	$1,%r12
   6045 	movq	0+0(%rbp),%rax
   6046 	movq	%rax,%r15
   6047 	mulq	%r10
   6048 	movq	%rax,%r13
   6049 	movq	%rdx,%r14
   6050 	movq	0+0(%rbp),%rax
   6051 	mulq	%r11
   6052 	imulq	%r12,%r15
   6053 	addq	%rax,%r14
   6054 	adcq	%rdx,%r15
   6055 	movq	8+0(%rbp),%rax
   6056 	movq	%rax,%r9
   6057 	mulq	%r10
   6058 	addq	%rax,%r14
   6059 	adcq	$0,%rdx
   6060 	movq	%rdx,%r10
   6061 	movq	8+0(%rbp),%rax
   6062 	mulq	%r11
   6063 	addq	%rax,%r15
   6064 	adcq	$0,%rdx
   6065 	imulq	%r12,%r9
   6066 	addq	%r10,%r15
   6067 	adcq	%rdx,%r9
   6068 	movq	%r13,%r10
   6069 	movq	%r14,%r11
   6070 	movq	%r15,%r12
   6071 	andq	$3,%r12
   6072 	movq	%r15,%r13
   6073 	andq	$-4,%r13
   6074 	movq	%r9,%r14
   6075 	shrdq	$2,%r9,%r15
   6076 	shrq	$2,%r9
   6077 	addq	%r13,%r10
   6078 	adcq	%r14,%r11
   6079 	adcq	$0,%r12
   6080 	addq	%r15,%r10
   6081 	adcq	%r9,%r11
   6082 	adcq	$0,%r12
   6083 
   6084 	vpxor	(%rsi),%xmm0,%xmm3
   6085 	vmovdqu	%xmm3,(%rdi)
   6086 	leaq	16(%rsi),%rsi
   6087 	leaq	16(%rdi),%rdi
   6088 	vextracti128	$1,%ymm0,%xmm1
   6089 1:
   6090 	vzeroupper
   6091 	jmp	open_sse_tail_16
   6092 
   6093 open_avx2_320:
   6094 	vmovdqa	%ymm0,%ymm1
   6095 	vmovdqa	%ymm0,%ymm2
   6096 	vmovdqa	%ymm4,%ymm5
   6097 	vmovdqa	%ymm4,%ymm6
   6098 	vmovdqa	%ymm8,%ymm9
   6099 	vmovdqa	%ymm8,%ymm10
   6100 	vpaddd	.avx2_inc(%rip),%ymm12,%ymm13
   6101 	vpaddd	.avx2_inc(%rip),%ymm13,%ymm14
   6102 	vmovdqa	%ymm4,%ymm7
   6103 	vmovdqa	%ymm8,%ymm11
   6104 	vmovdqa	%ymm12,160(%rbp)
   6105 	vmovdqa	%ymm13,192(%rbp)
   6106 	vmovdqa	%ymm14,224(%rbp)
   6107 	movq	$10,%r10
   6108 1:
   6109 	vpaddd	%ymm4,%ymm0,%ymm0
   6110 	vpxor	%ymm0,%ymm12,%ymm12
   6111 	vpshufb	.rol16(%rip),%ymm12,%ymm12
   6112 	vpaddd	%ymm12,%ymm8,%ymm8
   6113 	vpxor	%ymm8,%ymm4,%ymm4
   6114 	vpsrld	$20,%ymm4,%ymm3
   6115 	vpslld	$12,%ymm4,%ymm4
   6116 	vpxor	%ymm3,%ymm4,%ymm4
   6117 	vpaddd	%ymm4,%ymm0,%ymm0
   6118 	vpxor	%ymm0,%ymm12,%ymm12
   6119 	vpshufb	.rol8(%rip),%ymm12,%ymm12
   6120 	vpaddd	%ymm12,%ymm8,%ymm8
   6121 	vpxor	%ymm8,%ymm4,%ymm4
   6122 	vpslld	$7,%ymm4,%ymm3
   6123 	vpsrld	$25,%ymm4,%ymm4
   6124 	vpxor	%ymm3,%ymm4,%ymm4
   6125 	vpalignr	$12,%ymm12,%ymm12,%ymm12
   6126 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   6127 	vpalignr	$4,%ymm4,%ymm4,%ymm4
   6128 	vpaddd	%ymm5,%ymm1,%ymm1
   6129 	vpxor	%ymm1,%ymm13,%ymm13
   6130 	vpshufb	.rol16(%rip),%ymm13,%ymm13
   6131 	vpaddd	%ymm13,%ymm9,%ymm9
   6132 	vpxor	%ymm9,%ymm5,%ymm5
   6133 	vpsrld	$20,%ymm5,%ymm3
   6134 	vpslld	$12,%ymm5,%ymm5
   6135 	vpxor	%ymm3,%ymm5,%ymm5
   6136 	vpaddd	%ymm5,%ymm1,%ymm1
   6137 	vpxor	%ymm1,%ymm13,%ymm13
   6138 	vpshufb	.rol8(%rip),%ymm13,%ymm13
   6139 	vpaddd	%ymm13,%ymm9,%ymm9
   6140 	vpxor	%ymm9,%ymm5,%ymm5
   6141 	vpslld	$7,%ymm5,%ymm3
   6142 	vpsrld	$25,%ymm5,%ymm5
   6143 	vpxor	%ymm3,%ymm5,%ymm5
   6144 	vpalignr	$12,%ymm13,%ymm13,%ymm13
   6145 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   6146 	vpalignr	$4,%ymm5,%ymm5,%ymm5
   6147 	vpaddd	%ymm6,%ymm2,%ymm2
   6148 	vpxor	%ymm2,%ymm14,%ymm14
   6149 	vpshufb	.rol16(%rip),%ymm14,%ymm14
   6150 	vpaddd	%ymm14,%ymm10,%ymm10
   6151 	vpxor	%ymm10,%ymm6,%ymm6
   6152 	vpsrld	$20,%ymm6,%ymm3
   6153 	vpslld	$12,%ymm6,%ymm6
   6154 	vpxor	%ymm3,%ymm6,%ymm6
   6155 	vpaddd	%ymm6,%ymm2,%ymm2
   6156 	vpxor	%ymm2,%ymm14,%ymm14
   6157 	vpshufb	.rol8(%rip),%ymm14,%ymm14
   6158 	vpaddd	%ymm14,%ymm10,%ymm10
   6159 	vpxor	%ymm10,%ymm6,%ymm6
   6160 	vpslld	$7,%ymm6,%ymm3
   6161 	vpsrld	$25,%ymm6,%ymm6
   6162 	vpxor	%ymm3,%ymm6,%ymm6
   6163 	vpalignr	$12,%ymm14,%ymm14,%ymm14
   6164 	vpalignr	$8,%ymm10,%ymm10,%ymm10
   6165 	vpalignr	$4,%ymm6,%ymm6,%ymm6
   6166 	vpaddd	%ymm4,%ymm0,%ymm0
   6167 	vpxor	%ymm0,%ymm12,%ymm12
   6168 	vpshufb	.rol16(%rip),%ymm12,%ymm12
   6169 	vpaddd	%ymm12,%ymm8,%ymm8
   6170 	vpxor	%ymm8,%ymm4,%ymm4
   6171 	vpsrld	$20,%ymm4,%ymm3
   6172 	vpslld	$12,%ymm4,%ymm4
   6173 	vpxor	%ymm3,%ymm4,%ymm4
   6174 	vpaddd	%ymm4,%ymm0,%ymm0
   6175 	vpxor	%ymm0,%ymm12,%ymm12
   6176 	vpshufb	.rol8(%rip),%ymm12,%ymm12
   6177 	vpaddd	%ymm12,%ymm8,%ymm8
   6178 	vpxor	%ymm8,%ymm4,%ymm4
   6179 	vpslld	$7,%ymm4,%ymm3
   6180 	vpsrld	$25,%ymm4,%ymm4
   6181 	vpxor	%ymm3,%ymm4,%ymm4
   6182 	vpalignr	$4,%ymm12,%ymm12,%ymm12
   6183 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   6184 	vpalignr	$12,%ymm4,%ymm4,%ymm4
   6185 	vpaddd	%ymm5,%ymm1,%ymm1
   6186 	vpxor	%ymm1,%ymm13,%ymm13
   6187 	vpshufb	.rol16(%rip),%ymm13,%ymm13
   6188 	vpaddd	%ymm13,%ymm9,%ymm9
   6189 	vpxor	%ymm9,%ymm5,%ymm5
   6190 	vpsrld	$20,%ymm5,%ymm3
   6191 	vpslld	$12,%ymm5,%ymm5
   6192 	vpxor	%ymm3,%ymm5,%ymm5
   6193 	vpaddd	%ymm5,%ymm1,%ymm1
   6194 	vpxor	%ymm1,%ymm13,%ymm13
   6195 	vpshufb	.rol8(%rip),%ymm13,%ymm13
   6196 	vpaddd	%ymm13,%ymm9,%ymm9
   6197 	vpxor	%ymm9,%ymm5,%ymm5
   6198 	vpslld	$7,%ymm5,%ymm3
   6199 	vpsrld	$25,%ymm5,%ymm5
   6200 	vpxor	%ymm3,%ymm5,%ymm5
   6201 	vpalignr	$4,%ymm13,%ymm13,%ymm13
   6202 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   6203 	vpalignr	$12,%ymm5,%ymm5,%ymm5
   6204 	vpaddd	%ymm6,%ymm2,%ymm2
   6205 	vpxor	%ymm2,%ymm14,%ymm14
   6206 	vpshufb	.rol16(%rip),%ymm14,%ymm14
   6207 	vpaddd	%ymm14,%ymm10,%ymm10
   6208 	vpxor	%ymm10,%ymm6,%ymm6
   6209 	vpsrld	$20,%ymm6,%ymm3
   6210 	vpslld	$12,%ymm6,%ymm6
   6211 	vpxor	%ymm3,%ymm6,%ymm6
   6212 	vpaddd	%ymm6,%ymm2,%ymm2
   6213 	vpxor	%ymm2,%ymm14,%ymm14
   6214 	vpshufb	.rol8(%rip),%ymm14,%ymm14
   6215 	vpaddd	%ymm14,%ymm10,%ymm10
   6216 	vpxor	%ymm10,%ymm6,%ymm6
   6217 	vpslld	$7,%ymm6,%ymm3
   6218 	vpsrld	$25,%ymm6,%ymm6
   6219 	vpxor	%ymm3,%ymm6,%ymm6
   6220 	vpalignr	$4,%ymm14,%ymm14,%ymm14
   6221 	vpalignr	$8,%ymm10,%ymm10,%ymm10
   6222 	vpalignr	$12,%ymm6,%ymm6,%ymm6
   6223 
   6224 	decq	%r10
   6225 	jne	1b
   6226 	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
   6227 	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
   6228 	vpaddd	.chacha20_consts(%rip),%ymm2,%ymm2
   6229 	vpaddd	%ymm7,%ymm4,%ymm4
   6230 	vpaddd	%ymm7,%ymm5,%ymm5
   6231 	vpaddd	%ymm7,%ymm6,%ymm6
   6232 	vpaddd	%ymm11,%ymm8,%ymm8
   6233 	vpaddd	%ymm11,%ymm9,%ymm9
   6234 	vpaddd	%ymm11,%ymm10,%ymm10
   6235 	vpaddd	160(%rbp),%ymm12,%ymm12
   6236 	vpaddd	192(%rbp),%ymm13,%ymm13
   6237 	vpaddd	224(%rbp),%ymm14,%ymm14
   6238 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
   6239 
   6240 	vpand	.clamp(%rip),%ymm3,%ymm3
   6241 	vmovdqa	%ymm3,0(%rbp)
   6242 
   6243 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0
   6244 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4
   6245 	vperm2i128	$0x02,%ymm1,%ymm5,%ymm8
   6246 	vperm2i128	$0x02,%ymm9,%ymm13,%ymm12
   6247 	vperm2i128	$0x13,%ymm1,%ymm5,%ymm1
   6248 	vperm2i128	$0x13,%ymm9,%ymm13,%ymm5
   6249 	vperm2i128	$0x02,%ymm2,%ymm6,%ymm9
   6250 	vperm2i128	$0x02,%ymm10,%ymm14,%ymm13
   6251 	vperm2i128	$0x13,%ymm2,%ymm6,%ymm2
   6252 	vperm2i128	$0x13,%ymm10,%ymm14,%ymm6
   6253 	jmp	open_avx2_short
   6254 .size	chacha20_poly1305_open_avx2, .-chacha20_poly1305_open_avx2
   6255 
   6256 
   6257 .type	chacha20_poly1305_seal_avx2,@function
   6258 .align	64
   6259 chacha20_poly1305_seal_avx2:
   6260 	vzeroupper
   6261 	vmovdqa	.chacha20_consts(%rip),%ymm0
   6262 	vbroadcasti128	0(%r9),%ymm4
   6263 	vbroadcasti128	16(%r9),%ymm8
   6264 	vbroadcasti128	32(%r9),%ymm12
   6265 	vpaddd	.avx2_init(%rip),%ymm12,%ymm12
   6266 	cmpq	$192,%rbx
   6267 	jbe	seal_avx2_192
   6268 	cmpq	$320,%rbx
   6269 	jbe	seal_avx2_320
   6270 	vmovdqa	%ymm0,%ymm1
   6271 	vmovdqa	%ymm0,%ymm2
   6272 	vmovdqa	%ymm0,%ymm3
   6273 	vmovdqa	%ymm4,%ymm5
   6274 	vmovdqa	%ymm4,%ymm6
   6275 	vmovdqa	%ymm4,%ymm7
   6276 	vmovdqa	%ymm4,64(%rbp)
   6277 	vmovdqa	%ymm8,%ymm9
   6278 	vmovdqa	%ymm8,%ymm10
   6279 	vmovdqa	%ymm8,%ymm11
   6280 	vmovdqa	%ymm8,96(%rbp)
   6281 	vmovdqa	%ymm12,%ymm15
   6282 	vpaddd	.avx2_inc(%rip),%ymm15,%ymm14
   6283 	vpaddd	.avx2_inc(%rip),%ymm14,%ymm13
   6284 	vpaddd	.avx2_inc(%rip),%ymm13,%ymm12
   6285 	vmovdqa	%ymm12,160(%rbp)
   6286 	vmovdqa	%ymm13,192(%rbp)
   6287 	vmovdqa	%ymm14,224(%rbp)
   6288 	vmovdqa	%ymm15,256(%rbp)
   6289 	movq	$10,%r10
   6290 1:
   6291 	vmovdqa	%ymm8,128(%rbp)
   6292 	vmovdqa	.rol16(%rip),%ymm8
   6293 	vpaddd	%ymm7,%ymm3,%ymm3
   6294 	vpaddd	%ymm6,%ymm2,%ymm2
   6295 	vpaddd	%ymm5,%ymm1,%ymm1
   6296 	vpaddd	%ymm4,%ymm0,%ymm0
   6297 	vpxor	%ymm3,%ymm15,%ymm15
   6298 	vpxor	%ymm2,%ymm14,%ymm14
   6299 	vpxor	%ymm1,%ymm13,%ymm13
   6300 	vpxor	%ymm0,%ymm12,%ymm12
   6301 	vpshufb	%ymm8,%ymm15,%ymm15
   6302 	vpshufb	%ymm8,%ymm14,%ymm14
   6303 	vpshufb	%ymm8,%ymm13,%ymm13
   6304 	vpshufb	%ymm8,%ymm12,%ymm12
   6305 	vmovdqa	128(%rbp),%ymm8
   6306 	vpaddd	%ymm15,%ymm11,%ymm11
   6307 	vpaddd	%ymm14,%ymm10,%ymm10
   6308 	vpaddd	%ymm13,%ymm9,%ymm9
   6309 	vpaddd	%ymm12,%ymm8,%ymm8
   6310 	vpxor	%ymm11,%ymm7,%ymm7
   6311 	vpxor	%ymm10,%ymm6,%ymm6
   6312 	vpxor	%ymm9,%ymm5,%ymm5
   6313 	vpxor	%ymm8,%ymm4,%ymm4
   6314 	vmovdqa	%ymm8,128(%rbp)
   6315 	vpsrld	$20,%ymm7,%ymm8
   6316 	vpslld	$32-20,%ymm7,%ymm7
   6317 	vpxor	%ymm8,%ymm7,%ymm7
   6318 	vpsrld	$20,%ymm6,%ymm8
   6319 	vpslld	$32-20,%ymm6,%ymm6
   6320 	vpxor	%ymm8,%ymm6,%ymm6
   6321 	vpsrld	$20,%ymm5,%ymm8
   6322 	vpslld	$32-20,%ymm5,%ymm5
   6323 	vpxor	%ymm8,%ymm5,%ymm5
   6324 	vpsrld	$20,%ymm4,%ymm8
   6325 	vpslld	$32-20,%ymm4,%ymm4
   6326 	vpxor	%ymm8,%ymm4,%ymm4
   6327 	vmovdqa	.rol8(%rip),%ymm8
   6328 	vpaddd	%ymm7,%ymm3,%ymm3
   6329 	vpaddd	%ymm6,%ymm2,%ymm2
   6330 	vpaddd	%ymm5,%ymm1,%ymm1
   6331 	vpaddd	%ymm4,%ymm0,%ymm0
   6332 	vpxor	%ymm3,%ymm15,%ymm15
   6333 	vpxor	%ymm2,%ymm14,%ymm14
   6334 	vpxor	%ymm1,%ymm13,%ymm13
   6335 	vpxor	%ymm0,%ymm12,%ymm12
   6336 	vpshufb	%ymm8,%ymm15,%ymm15
   6337 	vpshufb	%ymm8,%ymm14,%ymm14
   6338 	vpshufb	%ymm8,%ymm13,%ymm13
   6339 	vpshufb	%ymm8,%ymm12,%ymm12
   6340 	vmovdqa	128(%rbp),%ymm8
   6341 	vpaddd	%ymm15,%ymm11,%ymm11
   6342 	vpaddd	%ymm14,%ymm10,%ymm10
   6343 	vpaddd	%ymm13,%ymm9,%ymm9
   6344 	vpaddd	%ymm12,%ymm8,%ymm8
   6345 	vpxor	%ymm11,%ymm7,%ymm7
   6346 	vpxor	%ymm10,%ymm6,%ymm6
   6347 	vpxor	%ymm9,%ymm5,%ymm5
   6348 	vpxor	%ymm8,%ymm4,%ymm4
   6349 	vmovdqa	%ymm8,128(%rbp)
   6350 	vpsrld	$25,%ymm7,%ymm8
   6351 	vpslld	$32-25,%ymm7,%ymm7
   6352 	vpxor	%ymm8,%ymm7,%ymm7
   6353 	vpsrld	$25,%ymm6,%ymm8
   6354 	vpslld	$32-25,%ymm6,%ymm6
   6355 	vpxor	%ymm8,%ymm6,%ymm6
   6356 	vpsrld	$25,%ymm5,%ymm8
   6357 	vpslld	$32-25,%ymm5,%ymm5
   6358 	vpxor	%ymm8,%ymm5,%ymm5
   6359 	vpsrld	$25,%ymm4,%ymm8
   6360 	vpslld	$32-25,%ymm4,%ymm4
   6361 	vpxor	%ymm8,%ymm4,%ymm4
   6362 	vmovdqa	128(%rbp),%ymm8
   6363 	vpalignr	$4,%ymm7,%ymm7,%ymm7
   6364 	vpalignr	$8,%ymm11,%ymm11,%ymm11
   6365 	vpalignr	$12,%ymm15,%ymm15,%ymm15
   6366 	vpalignr	$4,%ymm6,%ymm6,%ymm6
   6367 	vpalignr	$8,%ymm10,%ymm10,%ymm10
   6368 	vpalignr	$12,%ymm14,%ymm14,%ymm14
   6369 	vpalignr	$4,%ymm5,%ymm5,%ymm5
   6370 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   6371 	vpalignr	$12,%ymm13,%ymm13,%ymm13
   6372 	vpalignr	$4,%ymm4,%ymm4,%ymm4
   6373 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   6374 	vpalignr	$12,%ymm12,%ymm12,%ymm12
   6375 	vmovdqa	%ymm8,128(%rbp)
   6376 	vmovdqa	.rol16(%rip),%ymm8
   6377 	vpaddd	%ymm7,%ymm3,%ymm3
   6378 	vpaddd	%ymm6,%ymm2,%ymm2
   6379 	vpaddd	%ymm5,%ymm1,%ymm1
   6380 	vpaddd	%ymm4,%ymm0,%ymm0
   6381 	vpxor	%ymm3,%ymm15,%ymm15
   6382 	vpxor	%ymm2,%ymm14,%ymm14
   6383 	vpxor	%ymm1,%ymm13,%ymm13
   6384 	vpxor	%ymm0,%ymm12,%ymm12
   6385 	vpshufb	%ymm8,%ymm15,%ymm15
   6386 	vpshufb	%ymm8,%ymm14,%ymm14
   6387 	vpshufb	%ymm8,%ymm13,%ymm13
   6388 	vpshufb	%ymm8,%ymm12,%ymm12
   6389 	vmovdqa	128(%rbp),%ymm8
   6390 	vpaddd	%ymm15,%ymm11,%ymm11
   6391 	vpaddd	%ymm14,%ymm10,%ymm10
   6392 	vpaddd	%ymm13,%ymm9,%ymm9
   6393 	vpaddd	%ymm12,%ymm8,%ymm8
   6394 	vpxor	%ymm11,%ymm7,%ymm7
   6395 	vpxor	%ymm10,%ymm6,%ymm6
   6396 	vpxor	%ymm9,%ymm5,%ymm5
   6397 	vpxor	%ymm8,%ymm4,%ymm4
   6398 	vmovdqa	%ymm8,128(%rbp)
   6399 	vpsrld	$20,%ymm7,%ymm8
   6400 	vpslld	$32-20,%ymm7,%ymm7
   6401 	vpxor	%ymm8,%ymm7,%ymm7
   6402 	vpsrld	$20,%ymm6,%ymm8
   6403 	vpslld	$32-20,%ymm6,%ymm6
   6404 	vpxor	%ymm8,%ymm6,%ymm6
   6405 	vpsrld	$20,%ymm5,%ymm8
   6406 	vpslld	$32-20,%ymm5,%ymm5
   6407 	vpxor	%ymm8,%ymm5,%ymm5
   6408 	vpsrld	$20,%ymm4,%ymm8
   6409 	vpslld	$32-20,%ymm4,%ymm4
   6410 	vpxor	%ymm8,%ymm4,%ymm4
   6411 	vmovdqa	.rol8(%rip),%ymm8
   6412 	vpaddd	%ymm7,%ymm3,%ymm3
   6413 	vpaddd	%ymm6,%ymm2,%ymm2
   6414 	vpaddd	%ymm5,%ymm1,%ymm1
   6415 	vpaddd	%ymm4,%ymm0,%ymm0
   6416 	vpxor	%ymm3,%ymm15,%ymm15
   6417 	vpxor	%ymm2,%ymm14,%ymm14
   6418 	vpxor	%ymm1,%ymm13,%ymm13
   6419 	vpxor	%ymm0,%ymm12,%ymm12
   6420 	vpshufb	%ymm8,%ymm15,%ymm15
   6421 	vpshufb	%ymm8,%ymm14,%ymm14
   6422 	vpshufb	%ymm8,%ymm13,%ymm13
   6423 	vpshufb	%ymm8,%ymm12,%ymm12
   6424 	vmovdqa	128(%rbp),%ymm8
   6425 	vpaddd	%ymm15,%ymm11,%ymm11
   6426 	vpaddd	%ymm14,%ymm10,%ymm10
   6427 	vpaddd	%ymm13,%ymm9,%ymm9
   6428 	vpaddd	%ymm12,%ymm8,%ymm8
   6429 	vpxor	%ymm11,%ymm7,%ymm7
   6430 	vpxor	%ymm10,%ymm6,%ymm6
   6431 	vpxor	%ymm9,%ymm5,%ymm5
   6432 	vpxor	%ymm8,%ymm4,%ymm4
   6433 	vmovdqa	%ymm8,128(%rbp)
   6434 	vpsrld	$25,%ymm7,%ymm8
   6435 	vpslld	$32-25,%ymm7,%ymm7
   6436 	vpxor	%ymm8,%ymm7,%ymm7
   6437 	vpsrld	$25,%ymm6,%ymm8
   6438 	vpslld	$32-25,%ymm6,%ymm6
   6439 	vpxor	%ymm8,%ymm6,%ymm6
   6440 	vpsrld	$25,%ymm5,%ymm8
   6441 	vpslld	$32-25,%ymm5,%ymm5
   6442 	vpxor	%ymm8,%ymm5,%ymm5
   6443 	vpsrld	$25,%ymm4,%ymm8
   6444 	vpslld	$32-25,%ymm4,%ymm4
   6445 	vpxor	%ymm8,%ymm4,%ymm4
   6446 	vmovdqa	128(%rbp),%ymm8
   6447 	vpalignr	$12,%ymm7,%ymm7,%ymm7
   6448 	vpalignr	$8,%ymm11,%ymm11,%ymm11
   6449 	vpalignr	$4,%ymm15,%ymm15,%ymm15
   6450 	vpalignr	$12,%ymm6,%ymm6,%ymm6
   6451 	vpalignr	$8,%ymm10,%ymm10,%ymm10
   6452 	vpalignr	$4,%ymm14,%ymm14,%ymm14
   6453 	vpalignr	$12,%ymm5,%ymm5,%ymm5
   6454 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   6455 	vpalignr	$4,%ymm13,%ymm13,%ymm13
   6456 	vpalignr	$12,%ymm4,%ymm4,%ymm4
   6457 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   6458 	vpalignr	$4,%ymm12,%ymm12,%ymm12
   6459 
   6460 	decq	%r10
   6461 	jnz	1b
   6462 	vpaddd	.chacha20_consts(%rip),%ymm3,%ymm3
   6463 	vpaddd	64(%rbp),%ymm7,%ymm7
   6464 	vpaddd	96(%rbp),%ymm11,%ymm11
   6465 	vpaddd	256(%rbp),%ymm15,%ymm15
   6466 	vpaddd	.chacha20_consts(%rip),%ymm2,%ymm2
   6467 	vpaddd	64(%rbp),%ymm6,%ymm6
   6468 	vpaddd	96(%rbp),%ymm10,%ymm10
   6469 	vpaddd	224(%rbp),%ymm14,%ymm14
   6470 	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
   6471 	vpaddd	64(%rbp),%ymm5,%ymm5
   6472 	vpaddd	96(%rbp),%ymm9,%ymm9
   6473 	vpaddd	192(%rbp),%ymm13,%ymm13
   6474 	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
   6475 	vpaddd	64(%rbp),%ymm4,%ymm4
   6476 	vpaddd	96(%rbp),%ymm8,%ymm8
   6477 	vpaddd	160(%rbp),%ymm12,%ymm12
   6478 
   6479 	vperm2i128	$0x13,%ymm11,%ymm15,%ymm11
   6480 	vperm2i128	$0x02,%ymm3,%ymm7,%ymm15
   6481 	vperm2i128	$0x13,%ymm3,%ymm7,%ymm3
   6482 	vpand	.clamp(%rip),%ymm15,%ymm15
   6483 	vmovdqa	%ymm15,0(%rbp)
   6484 	movq	%r8,%r8
   6485 	call	poly_hash_ad_internal
   6486 
   6487 	vpxor	0(%rsi),%ymm3,%ymm3
   6488 	vpxor	32(%rsi),%ymm11,%ymm11
   6489 	vmovdqu	%ymm3,0(%rdi)
   6490 	vmovdqu	%ymm11,32(%rdi)
   6491 	vperm2i128	$0x02,%ymm2,%ymm6,%ymm15
   6492 	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
   6493 	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
   6494 	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
   6495 	vpxor	0+64(%rsi),%ymm15,%ymm15
   6496 	vpxor	32+64(%rsi),%ymm2,%ymm2
   6497 	vpxor	64+64(%rsi),%ymm6,%ymm6
   6498 	vpxor	96+64(%rsi),%ymm10,%ymm10
   6499 	vmovdqu	%ymm15,0+64(%rdi)
   6500 	vmovdqu	%ymm2,32+64(%rdi)
   6501 	vmovdqu	%ymm6,64+64(%rdi)
   6502 	vmovdqu	%ymm10,96+64(%rdi)
   6503 	vperm2i128	$0x02,%ymm1,%ymm5,%ymm15
   6504 	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
   6505 	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
   6506 	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
   6507 	vpxor	0+192(%rsi),%ymm15,%ymm15
   6508 	vpxor	32+192(%rsi),%ymm1,%ymm1
   6509 	vpxor	64+192(%rsi),%ymm5,%ymm5
   6510 	vpxor	96+192(%rsi),%ymm9,%ymm9
   6511 	vmovdqu	%ymm15,0+192(%rdi)
   6512 	vmovdqu	%ymm1,32+192(%rdi)
   6513 	vmovdqu	%ymm5,64+192(%rdi)
   6514 	vmovdqu	%ymm9,96+192(%rdi)
   6515 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm15
   6516 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
   6517 	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
   6518 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
   6519 	vmovdqa	%ymm15,%ymm8
   6520 
   6521 	leaq	320(%rsi),%rsi
   6522 	subq	$320,%rbx
   6523 	movq	$320,%rcx
   6524 	cmpq	$128,%rbx
   6525 	jbe	seal_avx2_hash
   6526 	vpxor	0(%rsi),%ymm0,%ymm0
   6527 	vpxor	32(%rsi),%ymm4,%ymm4
   6528 	vpxor	64(%rsi),%ymm8,%ymm8
   6529 	vpxor	96(%rsi),%ymm12,%ymm12
   6530 	vmovdqu	%ymm0,320(%rdi)
   6531 	vmovdqu	%ymm4,352(%rdi)
   6532 	vmovdqu	%ymm8,384(%rdi)
   6533 	vmovdqu	%ymm12,416(%rdi)
   6534 	leaq	128(%rsi),%rsi
   6535 	subq	$128,%rbx
   6536 	movq	$8,%rcx
   6537 	movq	$2,%r8
   6538 	cmpq	$128,%rbx
   6539 	jbe	seal_avx2_tail_128
   6540 	cmpq	$256,%rbx
   6541 	jbe	seal_avx2_tail_256
   6542 	cmpq	$384,%rbx
   6543 	jbe	seal_avx2_tail_384
   6544 	cmpq	$512,%rbx
   6545 	jbe	seal_avx2_tail_512
   6546 	vmovdqa	.chacha20_consts(%rip),%ymm0
   6547 	vmovdqa	64(%rbp),%ymm4
   6548 	vmovdqa	96(%rbp),%ymm8
   6549 	vmovdqa	%ymm0,%ymm1
   6550 	vmovdqa	%ymm4,%ymm5
   6551 	vmovdqa	%ymm8,%ymm9
   6552 	vmovdqa	%ymm0,%ymm2
   6553 	vmovdqa	%ymm4,%ymm6
   6554 	vmovdqa	%ymm8,%ymm10
   6555 	vmovdqa	%ymm0,%ymm3
   6556 	vmovdqa	%ymm4,%ymm7
   6557 	vmovdqa	%ymm8,%ymm11
   6558 	vmovdqa	.avx2_inc(%rip),%ymm12
   6559 	vpaddd	160(%rbp),%ymm12,%ymm15
   6560 	vpaddd	%ymm15,%ymm12,%ymm14
   6561 	vpaddd	%ymm14,%ymm12,%ymm13
   6562 	vpaddd	%ymm13,%ymm12,%ymm12
   6563 	vmovdqa	%ymm15,256(%rbp)
   6564 	vmovdqa	%ymm14,224(%rbp)
   6565 	vmovdqa	%ymm13,192(%rbp)
   6566 	vmovdqa	%ymm12,160(%rbp)
   6567 	vmovdqa	%ymm8,128(%rbp)
   6568 	vmovdqa	.rol16(%rip),%ymm8
   6569 	vpaddd	%ymm7,%ymm3,%ymm3
   6570 	vpaddd	%ymm6,%ymm2,%ymm2
   6571 	vpaddd	%ymm5,%ymm1,%ymm1
   6572 	vpaddd	%ymm4,%ymm0,%ymm0
   6573 	vpxor	%ymm3,%ymm15,%ymm15
   6574 	vpxor	%ymm2,%ymm14,%ymm14
   6575 	vpxor	%ymm1,%ymm13,%ymm13
   6576 	vpxor	%ymm0,%ymm12,%ymm12
   6577 	vpshufb	%ymm8,%ymm15,%ymm15
   6578 	vpshufb	%ymm8,%ymm14,%ymm14
   6579 	vpshufb	%ymm8,%ymm13,%ymm13
   6580 	vpshufb	%ymm8,%ymm12,%ymm12
   6581 	vmovdqa	128(%rbp),%ymm8
   6582 	vpaddd	%ymm15,%ymm11,%ymm11
   6583 	vpaddd	%ymm14,%ymm10,%ymm10
   6584 	vpaddd	%ymm13,%ymm9,%ymm9
   6585 	vpaddd	%ymm12,%ymm8,%ymm8
   6586 	vpxor	%ymm11,%ymm7,%ymm7
   6587 	vpxor	%ymm10,%ymm6,%ymm6
   6588 	vpxor	%ymm9,%ymm5,%ymm5
   6589 	vpxor	%ymm8,%ymm4,%ymm4
   6590 	vmovdqa	%ymm8,128(%rbp)
   6591 	vpsrld	$20,%ymm7,%ymm8
   6592 	vpslld	$32-20,%ymm7,%ymm7
   6593 	vpxor	%ymm8,%ymm7,%ymm7
   6594 	vpsrld	$20,%ymm6,%ymm8
   6595 	vpslld	$32-20,%ymm6,%ymm6
   6596 	vpxor	%ymm8,%ymm6,%ymm6
   6597 	vpsrld	$20,%ymm5,%ymm8
   6598 	vpslld	$32-20,%ymm5,%ymm5
   6599 	vpxor	%ymm8,%ymm5,%ymm5
   6600 	vpsrld	$20,%ymm4,%ymm8
   6601 	vpslld	$32-20,%ymm4,%ymm4
   6602 	vpxor	%ymm8,%ymm4,%ymm4
   6603 	vmovdqa	.rol8(%rip),%ymm8
   6604 	vpaddd	%ymm7,%ymm3,%ymm3
   6605 	vpaddd	%ymm6,%ymm2,%ymm2
   6606 	vpaddd	%ymm5,%ymm1,%ymm1
   6607 	vpaddd	%ymm4,%ymm0,%ymm0
   6608 	vpxor	%ymm3,%ymm15,%ymm15
   6609 	vpxor	%ymm2,%ymm14,%ymm14
   6610 	vpxor	%ymm1,%ymm13,%ymm13
   6611 	vpxor	%ymm0,%ymm12,%ymm12
   6612 	vpshufb	%ymm8,%ymm15,%ymm15
   6613 	vpshufb	%ymm8,%ymm14,%ymm14
   6614 	vpshufb	%ymm8,%ymm13,%ymm13
   6615 	vpshufb	%ymm8,%ymm12,%ymm12
   6616 	vmovdqa	128(%rbp),%ymm8
   6617 	vpaddd	%ymm15,%ymm11,%ymm11
   6618 	vpaddd	%ymm14,%ymm10,%ymm10
   6619 	vpaddd	%ymm13,%ymm9,%ymm9
   6620 	vpaddd	%ymm12,%ymm8,%ymm8
   6621 	vpxor	%ymm11,%ymm7,%ymm7
   6622 	vpxor	%ymm10,%ymm6,%ymm6
   6623 	vpxor	%ymm9,%ymm5,%ymm5
   6624 	vpxor	%ymm8,%ymm4,%ymm4
   6625 	vmovdqa	%ymm8,128(%rbp)
   6626 	vpsrld	$25,%ymm7,%ymm8
   6627 	vpslld	$32-25,%ymm7,%ymm7
   6628 	vpxor	%ymm8,%ymm7,%ymm7
   6629 	vpsrld	$25,%ymm6,%ymm8
   6630 	vpslld	$32-25,%ymm6,%ymm6
   6631 	vpxor	%ymm8,%ymm6,%ymm6
   6632 	vpsrld	$25,%ymm5,%ymm8
   6633 	vpslld	$32-25,%ymm5,%ymm5
   6634 	vpxor	%ymm8,%ymm5,%ymm5
   6635 	vpsrld	$25,%ymm4,%ymm8
   6636 	vpslld	$32-25,%ymm4,%ymm4
   6637 	vpxor	%ymm8,%ymm4,%ymm4
   6638 	vmovdqa	128(%rbp),%ymm8
   6639 	vpalignr	$4,%ymm7,%ymm7,%ymm7
   6640 	vpalignr	$8,%ymm11,%ymm11,%ymm11
   6641 	vpalignr	$12,%ymm15,%ymm15,%ymm15
   6642 	vpalignr	$4,%ymm6,%ymm6,%ymm6
   6643 	vpalignr	$8,%ymm10,%ymm10,%ymm10
   6644 	vpalignr	$12,%ymm14,%ymm14,%ymm14
   6645 	vpalignr	$4,%ymm5,%ymm5,%ymm5
   6646 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   6647 	vpalignr	$12,%ymm13,%ymm13,%ymm13
   6648 	vpalignr	$4,%ymm4,%ymm4,%ymm4
   6649 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   6650 	vpalignr	$12,%ymm12,%ymm12,%ymm12
   6651 	vmovdqa	%ymm8,128(%rbp)
   6652 	vmovdqa	.rol16(%rip),%ymm8
   6653 	vpaddd	%ymm7,%ymm3,%ymm3
   6654 	vpaddd	%ymm6,%ymm2,%ymm2
   6655 	vpaddd	%ymm5,%ymm1,%ymm1
   6656 	vpaddd	%ymm4,%ymm0,%ymm0
   6657 	vpxor	%ymm3,%ymm15,%ymm15
   6658 	vpxor	%ymm2,%ymm14,%ymm14
   6659 	vpxor	%ymm1,%ymm13,%ymm13
   6660 	vpxor	%ymm0,%ymm12,%ymm12
   6661 	vpshufb	%ymm8,%ymm15,%ymm15
   6662 	vpshufb	%ymm8,%ymm14,%ymm14
   6663 	vpshufb	%ymm8,%ymm13,%ymm13
   6664 	vpshufb	%ymm8,%ymm12,%ymm12
   6665 	vmovdqa	128(%rbp),%ymm8
   6666 	vpaddd	%ymm15,%ymm11,%ymm11
   6667 	vpaddd	%ymm14,%ymm10,%ymm10
   6668 	vpaddd	%ymm13,%ymm9,%ymm9
   6669 	vpaddd	%ymm12,%ymm8,%ymm8
   6670 	vpxor	%ymm11,%ymm7,%ymm7
   6671 	vpxor	%ymm10,%ymm6,%ymm6
   6672 	vpxor	%ymm9,%ymm5,%ymm5
   6673 	vpxor	%ymm8,%ymm4,%ymm4
   6674 	vmovdqa	%ymm8,128(%rbp)
   6675 	vpsrld	$20,%ymm7,%ymm8
   6676 	vpslld	$32-20,%ymm7,%ymm7
   6677 	vpxor	%ymm8,%ymm7,%ymm7
   6678 	vpsrld	$20,%ymm6,%ymm8
   6679 	vpslld	$32-20,%ymm6,%ymm6
   6680 	vpxor	%ymm8,%ymm6,%ymm6
   6681 	vpsrld	$20,%ymm5,%ymm8
   6682 	vpslld	$32-20,%ymm5,%ymm5
   6683 	vpxor	%ymm8,%ymm5,%ymm5
   6684 	vpsrld	$20,%ymm4,%ymm8
   6685 	vpslld	$32-20,%ymm4,%ymm4
   6686 	vpxor	%ymm8,%ymm4,%ymm4
   6687 	vmovdqa	.rol8(%rip),%ymm8
   6688 	vpaddd	%ymm7,%ymm3,%ymm3
   6689 	vpaddd	%ymm6,%ymm2,%ymm2
   6690 	vpaddd	%ymm5,%ymm1,%ymm1
   6691 	vpaddd	%ymm4,%ymm0,%ymm0
   6692 	vpxor	%ymm3,%ymm15,%ymm15
   6693 	vpxor	%ymm2,%ymm14,%ymm14
   6694 	vpxor	%ymm1,%ymm13,%ymm13
   6695 	vpxor	%ymm0,%ymm12,%ymm12
   6696 	vpshufb	%ymm8,%ymm15,%ymm15
   6697 	vpshufb	%ymm8,%ymm14,%ymm14
   6698 	vpshufb	%ymm8,%ymm13,%ymm13
   6699 	vpshufb	%ymm8,%ymm12,%ymm12
   6700 	vmovdqa	128(%rbp),%ymm8
   6701 	vpaddd	%ymm15,%ymm11,%ymm11
   6702 	vpaddd	%ymm14,%ymm10,%ymm10
   6703 	vpaddd	%ymm13,%ymm9,%ymm9
   6704 	vpaddd	%ymm12,%ymm8,%ymm8
   6705 	vpxor	%ymm11,%ymm7,%ymm7
   6706 	vpxor	%ymm10,%ymm6,%ymm6
   6707 	vpxor	%ymm9,%ymm5,%ymm5
   6708 	vpxor	%ymm8,%ymm4,%ymm4
   6709 	vmovdqa	%ymm8,128(%rbp)
   6710 	vpsrld	$25,%ymm7,%ymm8
   6711 	vpslld	$32-25,%ymm7,%ymm7
   6712 	vpxor	%ymm8,%ymm7,%ymm7
   6713 	vpsrld	$25,%ymm6,%ymm8
   6714 	vpslld	$32-25,%ymm6,%ymm6
   6715 	vpxor	%ymm8,%ymm6,%ymm6
   6716 	vpsrld	$25,%ymm5,%ymm8
   6717 	vpslld	$32-25,%ymm5,%ymm5
   6718 	vpxor	%ymm8,%ymm5,%ymm5
   6719 	vpsrld	$25,%ymm4,%ymm8
   6720 	vpslld	$32-25,%ymm4,%ymm4
   6721 	vpxor	%ymm8,%ymm4,%ymm4
   6722 	vmovdqa	128(%rbp),%ymm8
   6723 	vpalignr	$12,%ymm7,%ymm7,%ymm7
   6724 	vpalignr	$8,%ymm11,%ymm11,%ymm11
   6725 	vpalignr	$4,%ymm15,%ymm15,%ymm15
   6726 	vpalignr	$12,%ymm6,%ymm6,%ymm6
   6727 	vpalignr	$8,%ymm10,%ymm10,%ymm10
   6728 	vpalignr	$4,%ymm14,%ymm14,%ymm14
   6729 	vpalignr	$12,%ymm5,%ymm5,%ymm5
   6730 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   6731 	vpalignr	$4,%ymm13,%ymm13,%ymm13
   6732 	vpalignr	$12,%ymm4,%ymm4,%ymm4
   6733 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   6734 	vpalignr	$4,%ymm12,%ymm12,%ymm12
   6735 	vmovdqa	%ymm8,128(%rbp)
   6736 	vmovdqa	.rol16(%rip),%ymm8
   6737 	vpaddd	%ymm7,%ymm3,%ymm3
   6738 	vpaddd	%ymm6,%ymm2,%ymm2
   6739 	vpaddd	%ymm5,%ymm1,%ymm1
   6740 	vpaddd	%ymm4,%ymm0,%ymm0
   6741 	vpxor	%ymm3,%ymm15,%ymm15
   6742 	vpxor	%ymm2,%ymm14,%ymm14
   6743 	vpxor	%ymm1,%ymm13,%ymm13
   6744 	vpxor	%ymm0,%ymm12,%ymm12
   6745 	vpshufb	%ymm8,%ymm15,%ymm15
   6746 	vpshufb	%ymm8,%ymm14,%ymm14
   6747 	vpshufb	%ymm8,%ymm13,%ymm13
   6748 	vpshufb	%ymm8,%ymm12,%ymm12
   6749 	vmovdqa	128(%rbp),%ymm8
   6750 	vpaddd	%ymm15,%ymm11,%ymm11
   6751 	vpaddd	%ymm14,%ymm10,%ymm10
   6752 	vpaddd	%ymm13,%ymm9,%ymm9
   6753 	vpaddd	%ymm12,%ymm8,%ymm8
   6754 	vpxor	%ymm11,%ymm7,%ymm7
   6755 	vpxor	%ymm10,%ymm6,%ymm6
   6756 	vpxor	%ymm9,%ymm5,%ymm5
   6757 	vpxor	%ymm8,%ymm4,%ymm4
   6758 	vmovdqa	%ymm8,128(%rbp)
   6759 	vpsrld	$20,%ymm7,%ymm8
   6760 	vpslld	$32-20,%ymm7,%ymm7
   6761 	vpxor	%ymm8,%ymm7,%ymm7
   6762 	vpsrld	$20,%ymm6,%ymm8
   6763 	vpslld	$32-20,%ymm6,%ymm6
   6764 	vpxor	%ymm8,%ymm6,%ymm6
   6765 	vpsrld	$20,%ymm5,%ymm8
   6766 	vpslld	$32-20,%ymm5,%ymm5
   6767 	vpxor	%ymm8,%ymm5,%ymm5
   6768 	vpsrld	$20,%ymm4,%ymm8
   6769 	vpslld	$32-20,%ymm4,%ymm4
   6770 	vpxor	%ymm8,%ymm4,%ymm4
   6771 	vmovdqa	.rol8(%rip),%ymm8
   6772 	vpaddd	%ymm7,%ymm3,%ymm3
   6773 	vpaddd	%ymm6,%ymm2,%ymm2
   6774 	vpaddd	%ymm5,%ymm1,%ymm1
   6775 	vpaddd	%ymm4,%ymm0,%ymm0
   6776 
   6777 	subq	$16,%rdi
   6778 	movq	$9,%rcx
   6779 	jmp	4f
   6780 1:
   6781 	vmovdqa	.chacha20_consts(%rip),%ymm0
   6782 	vmovdqa	64(%rbp),%ymm4
   6783 	vmovdqa	96(%rbp),%ymm8
   6784 	vmovdqa	%ymm0,%ymm1
   6785 	vmovdqa	%ymm4,%ymm5
   6786 	vmovdqa	%ymm8,%ymm9
   6787 	vmovdqa	%ymm0,%ymm2
   6788 	vmovdqa	%ymm4,%ymm6
   6789 	vmovdqa	%ymm8,%ymm10
   6790 	vmovdqa	%ymm0,%ymm3
   6791 	vmovdqa	%ymm4,%ymm7
   6792 	vmovdqa	%ymm8,%ymm11
   6793 	vmovdqa	.avx2_inc(%rip),%ymm12
   6794 	vpaddd	160(%rbp),%ymm12,%ymm15
   6795 	vpaddd	%ymm15,%ymm12,%ymm14
   6796 	vpaddd	%ymm14,%ymm12,%ymm13
   6797 	vpaddd	%ymm13,%ymm12,%ymm12
   6798 	vmovdqa	%ymm15,256(%rbp)
   6799 	vmovdqa	%ymm14,224(%rbp)
   6800 	vmovdqa	%ymm13,192(%rbp)
   6801 	vmovdqa	%ymm12,160(%rbp)
   6802 
   6803 	movq	$10,%rcx
   6804 2:
   6805 	addq	0(%rdi),%r10
   6806 	adcq	8+0(%rdi),%r11
   6807 	adcq	$1,%r12
   6808 	vmovdqa	%ymm8,128(%rbp)
   6809 	vmovdqa	.rol16(%rip),%ymm8
   6810 	vpaddd	%ymm7,%ymm3,%ymm3
   6811 	vpaddd	%ymm6,%ymm2,%ymm2
   6812 	vpaddd	%ymm5,%ymm1,%ymm1
   6813 	vpaddd	%ymm4,%ymm0,%ymm0
   6814 	vpxor	%ymm3,%ymm15,%ymm15
   6815 	vpxor	%ymm2,%ymm14,%ymm14
   6816 	vpxor	%ymm1,%ymm13,%ymm13
   6817 	vpxor	%ymm0,%ymm12,%ymm12
   6818 	movq	0+0(%rbp),%rdx
   6819 	movq	%rdx,%r15
   6820 	mulxq	%r10,%r13,%r14
   6821 	mulxq	%r11,%rax,%rdx
   6822 	imulq	%r12,%r15
   6823 	addq	%rax,%r14
   6824 	adcq	%rdx,%r15
   6825 	vpshufb	%ymm8,%ymm15,%ymm15
   6826 	vpshufb	%ymm8,%ymm14,%ymm14
   6827 	vpshufb	%ymm8,%ymm13,%ymm13
   6828 	vpshufb	%ymm8,%ymm12,%ymm12
   6829 	vmovdqa	128(%rbp),%ymm8
   6830 	vpaddd	%ymm15,%ymm11,%ymm11
   6831 	vpaddd	%ymm14,%ymm10,%ymm10
   6832 	vpaddd	%ymm13,%ymm9,%ymm9
   6833 	vpaddd	%ymm12,%ymm8,%ymm8
   6834 	movq	8+0(%rbp),%rdx
   6835 	mulxq	%r10,%r10,%rax
   6836 	addq	%r10,%r14
   6837 	mulxq	%r11,%r11,%r9
   6838 	adcq	%r11,%r15
   6839 	adcq	$0,%r9
   6840 	imulq	%r12,%rdx
   6841 	vpxor	%ymm11,%ymm7,%ymm7
   6842 	vpxor	%ymm10,%ymm6,%ymm6
   6843 	vpxor	%ymm9,%ymm5,%ymm5
   6844 	vpxor	%ymm8,%ymm4,%ymm4
   6845 	vmovdqa	%ymm8,128(%rbp)
   6846 	vpsrld	$20,%ymm7,%ymm8
   6847 	vpslld	$32-20,%ymm7,%ymm7
   6848 	vpxor	%ymm8,%ymm7,%ymm7
   6849 	vpsrld	$20,%ymm6,%ymm8
   6850 	vpslld	$32-20,%ymm6,%ymm6
   6851 	vpxor	%ymm8,%ymm6,%ymm6
   6852 	vpsrld	$20,%ymm5,%ymm8
   6853 	addq	%rax,%r15
   6854 	adcq	%rdx,%r9
   6855 	vpslld	$32-20,%ymm5,%ymm5
   6856 	vpxor	%ymm8,%ymm5,%ymm5
   6857 	vpsrld	$20,%ymm4,%ymm8
   6858 	vpslld	$32-20,%ymm4,%ymm4
   6859 	vpxor	%ymm8,%ymm4,%ymm4
   6860 	vmovdqa	.rol8(%rip),%ymm8
   6861 	vpaddd	%ymm7,%ymm3,%ymm3
   6862 	vpaddd	%ymm6,%ymm2,%ymm2
   6863 	vpaddd	%ymm5,%ymm1,%ymm1
   6864 	vpaddd	%ymm4,%ymm0,%ymm0
   6865 	movq	%r13,%r10
   6866 	movq	%r14,%r11
   6867 	movq	%r15,%r12
   6868 	andq	$3,%r12
   6869 	movq	%r15,%r13
   6870 	andq	$-4,%r13
   6871 	movq	%r9,%r14
   6872 	shrdq	$2,%r9,%r15
   6873 	shrq	$2,%r9
   6874 	addq	%r13,%r10
   6875 	adcq	%r14,%r11
   6876 	adcq	$0,%r12
   6877 	addq	%r15,%r10
   6878 	adcq	%r9,%r11
   6879 	adcq	$0,%r12
   6880 
   6881 4:
   6882 	vpxor	%ymm3,%ymm15,%ymm15
   6883 	vpxor	%ymm2,%ymm14,%ymm14
   6884 	vpxor	%ymm1,%ymm13,%ymm13
   6885 	vpxor	%ymm0,%ymm12,%ymm12
   6886 	vpshufb	%ymm8,%ymm15,%ymm15
   6887 	vpshufb	%ymm8,%ymm14,%ymm14
   6888 	vpshufb	%ymm8,%ymm13,%ymm13
   6889 	vpshufb	%ymm8,%ymm12,%ymm12
   6890 	vmovdqa	128(%rbp),%ymm8
   6891 	addq	16(%rdi),%r10
   6892 	adcq	8+16(%rdi),%r11
   6893 	adcq	$1,%r12
   6894 	vpaddd	%ymm15,%ymm11,%ymm11
   6895 	vpaddd	%ymm14,%ymm10,%ymm10
   6896 	vpaddd	%ymm13,%ymm9,%ymm9
   6897 	vpaddd	%ymm12,%ymm8,%ymm8
   6898 	vpxor	%ymm11,%ymm7,%ymm7
   6899 	vpxor	%ymm10,%ymm6,%ymm6
   6900 	vpxor	%ymm9,%ymm5,%ymm5
   6901 	vpxor	%ymm8,%ymm4,%ymm4
   6902 	movq	0+0(%rbp),%rdx
   6903 	movq	%rdx,%r15
   6904 	mulxq	%r10,%r13,%r14
   6905 	mulxq	%r11,%rax,%rdx
   6906 	imulq	%r12,%r15
   6907 	addq	%rax,%r14
   6908 	adcq	%rdx,%r15
   6909 	vmovdqa	%ymm8,128(%rbp)
   6910 	vpsrld	$25,%ymm7,%ymm8
   6911 	vpslld	$32-25,%ymm7,%ymm7
   6912 	vpxor	%ymm8,%ymm7,%ymm7
   6913 	vpsrld	$25,%ymm6,%ymm8
   6914 	vpslld	$32-25,%ymm6,%ymm6
   6915 	vpxor	%ymm8,%ymm6,%ymm6
   6916 	vpsrld	$25,%ymm5,%ymm8
   6917 	vpslld	$32-25,%ymm5,%ymm5
   6918 	vpxor	%ymm8,%ymm5,%ymm5
   6919 	vpsrld	$25,%ymm4,%ymm8
   6920 	vpslld	$32-25,%ymm4,%ymm4
   6921 	vpxor	%ymm8,%ymm4,%ymm4
   6922 	vmovdqa	128(%rbp),%ymm8
   6923 	vpalignr	$4,%ymm7,%ymm7,%ymm7
   6924 	vpalignr	$8,%ymm11,%ymm11,%ymm11
   6925 	vpalignr	$12,%ymm15,%ymm15,%ymm15
   6926 	vpalignr	$4,%ymm6,%ymm6,%ymm6
   6927 	movq	8+0(%rbp),%rdx
   6928 	mulxq	%r10,%r10,%rax
   6929 	addq	%r10,%r14
   6930 	mulxq	%r11,%r11,%r9
   6931 	adcq	%r11,%r15
   6932 	adcq	$0,%r9
   6933 	imulq	%r12,%rdx
   6934 	vpalignr	$8,%ymm10,%ymm10,%ymm10
   6935 	vpalignr	$12,%ymm14,%ymm14,%ymm14
   6936 	vpalignr	$4,%ymm5,%ymm5,%ymm5
   6937 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   6938 	vpalignr	$12,%ymm13,%ymm13,%ymm13
   6939 	vpalignr	$4,%ymm4,%ymm4,%ymm4
   6940 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   6941 	vpalignr	$12,%ymm12,%ymm12,%ymm12
   6942 	vmovdqa	%ymm8,128(%rbp)
   6943 	vmovdqa	.rol16(%rip),%ymm8
   6944 	vpaddd	%ymm7,%ymm3,%ymm3
   6945 	vpaddd	%ymm6,%ymm2,%ymm2
   6946 	vpaddd	%ymm5,%ymm1,%ymm1
   6947 	vpaddd	%ymm4,%ymm0,%ymm0
   6948 	vpxor	%ymm3,%ymm15,%ymm15
   6949 	vpxor	%ymm2,%ymm14,%ymm14
   6950 	vpxor	%ymm1,%ymm13,%ymm13
   6951 	vpxor	%ymm0,%ymm12,%ymm12
   6952 	addq	%rax,%r15
   6953 	adcq	%rdx,%r9
   6954 	vpshufb	%ymm8,%ymm15,%ymm15
   6955 	vpshufb	%ymm8,%ymm14,%ymm14
   6956 	vpshufb	%ymm8,%ymm13,%ymm13
   6957 	vpshufb	%ymm8,%ymm12,%ymm12
   6958 	vmovdqa	128(%rbp),%ymm8
   6959 	vpaddd	%ymm15,%ymm11,%ymm11
   6960 	vpaddd	%ymm14,%ymm10,%ymm10
   6961 	vpaddd	%ymm13,%ymm9,%ymm9
   6962 	vpaddd	%ymm12,%ymm8,%ymm8
   6963 	movq	%r13,%r10
   6964 	movq	%r14,%r11
   6965 	movq	%r15,%r12
   6966 	andq	$3,%r12
   6967 	movq	%r15,%r13
   6968 	andq	$-4,%r13
   6969 	movq	%r9,%r14
   6970 	shrdq	$2,%r9,%r15
   6971 	shrq	$2,%r9
   6972 	addq	%r13,%r10
   6973 	adcq	%r14,%r11
   6974 	adcq	$0,%r12
   6975 	addq	%r15,%r10
   6976 	adcq	%r9,%r11
   6977 	adcq	$0,%r12
   6978 	vpxor	%ymm11,%ymm7,%ymm7
   6979 	vpxor	%ymm10,%ymm6,%ymm6
   6980 	vpxor	%ymm9,%ymm5,%ymm5
   6981 	vpxor	%ymm8,%ymm4,%ymm4
   6982 	vmovdqa	%ymm8,128(%rbp)
   6983 	vpsrld	$20,%ymm7,%ymm8
   6984 	vpslld	$32-20,%ymm7,%ymm7
   6985 	vpxor	%ymm8,%ymm7,%ymm7
   6986 	addq	32(%rdi),%r10
   6987 	adcq	8+32(%rdi),%r11
   6988 	adcq	$1,%r12
   6989 
   6990 	leaq	48(%rdi),%rdi
   6991 	vpsrld	$20,%ymm6,%ymm8
   6992 	vpslld	$32-20,%ymm6,%ymm6
   6993 	vpxor	%ymm8,%ymm6,%ymm6
   6994 	vpsrld	$20,%ymm5,%ymm8
   6995 	vpslld	$32-20,%ymm5,%ymm5
   6996 	vpxor	%ymm8,%ymm5,%ymm5
   6997 	vpsrld	$20,%ymm4,%ymm8
   6998 	vpslld	$32-20,%ymm4,%ymm4
   6999 	vpxor	%ymm8,%ymm4,%ymm4
   7000 	vmovdqa	.rol8(%rip),%ymm8
   7001 	vpaddd	%ymm7,%ymm3,%ymm3
   7002 	vpaddd	%ymm6,%ymm2,%ymm2
   7003 	vpaddd	%ymm5,%ymm1,%ymm1
   7004 	vpaddd	%ymm4,%ymm0,%ymm0
   7005 	vpxor	%ymm3,%ymm15,%ymm15
   7006 	vpxor	%ymm2,%ymm14,%ymm14
   7007 	vpxor	%ymm1,%ymm13,%ymm13
   7008 	vpxor	%ymm0,%ymm12,%ymm12
   7009 	movq	0+0(%rbp),%rdx
   7010 	movq	%rdx,%r15
   7011 	mulxq	%r10,%r13,%r14
   7012 	mulxq	%r11,%rax,%rdx
   7013 	imulq	%r12,%r15
   7014 	addq	%rax,%r14
   7015 	adcq	%rdx,%r15
   7016 	vpshufb	%ymm8,%ymm15,%ymm15
   7017 	vpshufb	%ymm8,%ymm14,%ymm14
   7018 	vpshufb	%ymm8,%ymm13,%ymm13
   7019 	vpshufb	%ymm8,%ymm12,%ymm12
   7020 	vmovdqa	128(%rbp),%ymm8
   7021 	vpaddd	%ymm15,%ymm11,%ymm11
   7022 	vpaddd	%ymm14,%ymm10,%ymm10
   7023 	vpaddd	%ymm13,%ymm9,%ymm9
   7024 	movq	8+0(%rbp),%rdx
   7025 	mulxq	%r10,%r10,%rax
   7026 	addq	%r10,%r14
   7027 	mulxq	%r11,%r11,%r9
   7028 	adcq	%r11,%r15
   7029 	adcq	$0,%r9
   7030 	imulq	%r12,%rdx
   7031 	vpaddd	%ymm12,%ymm8,%ymm8
   7032 	vpxor	%ymm11,%ymm7,%ymm7
   7033 	vpxor	%ymm10,%ymm6,%ymm6
   7034 	vpxor	%ymm9,%ymm5,%ymm5
   7035 	vpxor	%ymm8,%ymm4,%ymm4
   7036 	vmovdqa	%ymm8,128(%rbp)
   7037 	vpsrld	$25,%ymm7,%ymm8
   7038 	vpslld	$32-25,%ymm7,%ymm7
   7039 	addq	%rax,%r15
   7040 	adcq	%rdx,%r9
   7041 	vpxor	%ymm8,%ymm7,%ymm7
   7042 	vpsrld	$25,%ymm6,%ymm8
   7043 	vpslld	$32-25,%ymm6,%ymm6
   7044 	vpxor	%ymm8,%ymm6,%ymm6
   7045 	vpsrld	$25,%ymm5,%ymm8
   7046 	vpslld	$32-25,%ymm5,%ymm5
   7047 	vpxor	%ymm8,%ymm5,%ymm5
   7048 	vpsrld	$25,%ymm4,%ymm8
   7049 	vpslld	$32-25,%ymm4,%ymm4
   7050 	vpxor	%ymm8,%ymm4,%ymm4
   7051 	vmovdqa	128(%rbp),%ymm8
   7052 	vpalignr	$12,%ymm7,%ymm7,%ymm7
   7053 	vpalignr	$8,%ymm11,%ymm11,%ymm11
   7054 	vpalignr	$4,%ymm15,%ymm15,%ymm15
   7055 	vpalignr	$12,%ymm6,%ymm6,%ymm6
   7056 	vpalignr	$8,%ymm10,%ymm10,%ymm10
   7057 	vpalignr	$4,%ymm14,%ymm14,%ymm14
   7058 	vpalignr	$12,%ymm5,%ymm5,%ymm5
   7059 	movq	%r13,%r10
   7060 	movq	%r14,%r11
   7061 	movq	%r15,%r12
   7062 	andq	$3,%r12
   7063 	movq	%r15,%r13
   7064 	andq	$-4,%r13
   7065 	movq	%r9,%r14
   7066 	shrdq	$2,%r9,%r15
   7067 	shrq	$2,%r9
   7068 	addq	%r13,%r10
   7069 	adcq	%r14,%r11
   7070 	adcq	$0,%r12
   7071 	addq	%r15,%r10
   7072 	adcq	%r9,%r11
   7073 	adcq	$0,%r12
   7074 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   7075 	vpalignr	$4,%ymm13,%ymm13,%ymm13
   7076 	vpalignr	$12,%ymm4,%ymm4,%ymm4
   7077 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   7078 	vpalignr	$4,%ymm12,%ymm12,%ymm12
   7079 
   7080 	decq	%rcx
   7081 	jne	2b
   7082 	vpaddd	.chacha20_consts(%rip),%ymm3,%ymm3
   7083 	vpaddd	64(%rbp),%ymm7,%ymm7
   7084 	vpaddd	96(%rbp),%ymm11,%ymm11
   7085 	vpaddd	256(%rbp),%ymm15,%ymm15
   7086 	vpaddd	.chacha20_consts(%rip),%ymm2,%ymm2
   7087 	vpaddd	64(%rbp),%ymm6,%ymm6
   7088 	vpaddd	96(%rbp),%ymm10,%ymm10
   7089 	vpaddd	224(%rbp),%ymm14,%ymm14
   7090 	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
   7091 	vpaddd	64(%rbp),%ymm5,%ymm5
   7092 	vpaddd	96(%rbp),%ymm9,%ymm9
   7093 	vpaddd	192(%rbp),%ymm13,%ymm13
   7094 	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
   7095 	vpaddd	64(%rbp),%ymm4,%ymm4
   7096 	vpaddd	96(%rbp),%ymm8,%ymm8
   7097 	vpaddd	160(%rbp),%ymm12,%ymm12
   7098 
   7099 	leaq	32(%rdi),%rdi
   7100 	vmovdqa	%ymm0,128(%rbp)
   7101 	addq	-32(%rdi),%r10
   7102 	adcq	8+-32(%rdi),%r11
   7103 	adcq	$1,%r12
   7104 	vperm2i128	$0x02,%ymm3,%ymm7,%ymm0
   7105 	vperm2i128	$0x13,%ymm3,%ymm7,%ymm7
   7106 	vperm2i128	$0x02,%ymm11,%ymm15,%ymm3
   7107 	vperm2i128	$0x13,%ymm11,%ymm15,%ymm11
   7108 	vpxor	0+0(%rsi),%ymm0,%ymm0
   7109 	vpxor	32+0(%rsi),%ymm3,%ymm3
   7110 	vpxor	64+0(%rsi),%ymm7,%ymm7
   7111 	vpxor	96+0(%rsi),%ymm11,%ymm11
   7112 	vmovdqu	%ymm0,0+0(%rdi)
   7113 	vmovdqu	%ymm3,32+0(%rdi)
   7114 	vmovdqu	%ymm7,64+0(%rdi)
   7115 	vmovdqu	%ymm11,96+0(%rdi)
   7116 
   7117 	vmovdqa	128(%rbp),%ymm0
   7118 	movq	0+0(%rbp),%rax
   7119 	movq	%rax,%r15
   7120 	mulq	%r10
   7121 	movq	%rax,%r13
   7122 	movq	%rdx,%r14
   7123 	movq	0+0(%rbp),%rax
   7124 	mulq	%r11
   7125 	imulq	%r12,%r15
   7126 	addq	%rax,%r14
   7127 	adcq	%rdx,%r15
   7128 	movq	8+0(%rbp),%rax
   7129 	movq	%rax,%r9
   7130 	mulq	%r10
   7131 	addq	%rax,%r14
   7132 	adcq	$0,%rdx
   7133 	movq	%rdx,%r10
   7134 	movq	8+0(%rbp),%rax
   7135 	mulq	%r11
   7136 	addq	%rax,%r15
   7137 	adcq	$0,%rdx
   7138 	imulq	%r12,%r9
   7139 	addq	%r10,%r15
   7140 	adcq	%rdx,%r9
   7141 	movq	%r13,%r10
   7142 	movq	%r14,%r11
   7143 	movq	%r15,%r12
   7144 	andq	$3,%r12
   7145 	movq	%r15,%r13
   7146 	andq	$-4,%r13
   7147 	movq	%r9,%r14
   7148 	shrdq	$2,%r9,%r15
   7149 	shrq	$2,%r9
   7150 	addq	%r13,%r10
   7151 	adcq	%r14,%r11
   7152 	adcq	$0,%r12
   7153 	addq	%r15,%r10
   7154 	adcq	%r9,%r11
   7155 	adcq	$0,%r12
   7156 	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
   7157 	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
   7158 	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
   7159 	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
   7160 	vpxor	0+128(%rsi),%ymm3,%ymm3
   7161 	vpxor	32+128(%rsi),%ymm2,%ymm2
   7162 	vpxor	64+128(%rsi),%ymm6,%ymm6
   7163 	vpxor	96+128(%rsi),%ymm10,%ymm10
   7164 	vmovdqu	%ymm3,0+128(%rdi)
   7165 	vmovdqu	%ymm2,32+128(%rdi)
   7166 	vmovdqu	%ymm6,64+128(%rdi)
   7167 	vmovdqu	%ymm10,96+128(%rdi)
   7168 	addq	-16(%rdi),%r10
   7169 	adcq	8+-16(%rdi),%r11
   7170 	adcq	$1,%r12
   7171 	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
   7172 	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
   7173 	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
   7174 	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
   7175 	vpxor	0+256(%rsi),%ymm3,%ymm3
   7176 	vpxor	32+256(%rsi),%ymm1,%ymm1
   7177 	vpxor	64+256(%rsi),%ymm5,%ymm5
   7178 	vpxor	96+256(%rsi),%ymm9,%ymm9
   7179 	vmovdqu	%ymm3,0+256(%rdi)
   7180 	vmovdqu	%ymm1,32+256(%rdi)
   7181 	vmovdqu	%ymm5,64+256(%rdi)
   7182 	vmovdqu	%ymm9,96+256(%rdi)
   7183 	movq	0+0(%rbp),%rax
   7184 	movq	%rax,%r15
   7185 	mulq	%r10
   7186 	movq	%rax,%r13
   7187 	movq	%rdx,%r14
   7188 	movq	0+0(%rbp),%rax
   7189 	mulq	%r11
   7190 	imulq	%r12,%r15
   7191 	addq	%rax,%r14
   7192 	adcq	%rdx,%r15
   7193 	movq	8+0(%rbp),%rax
   7194 	movq	%rax,%r9
   7195 	mulq	%r10
   7196 	addq	%rax,%r14
   7197 	adcq	$0,%rdx
   7198 	movq	%rdx,%r10
   7199 	movq	8+0(%rbp),%rax
   7200 	mulq	%r11
   7201 	addq	%rax,%r15
   7202 	adcq	$0,%rdx
   7203 	imulq	%r12,%r9
   7204 	addq	%r10,%r15
   7205 	adcq	%rdx,%r9
   7206 	movq	%r13,%r10
   7207 	movq	%r14,%r11
   7208 	movq	%r15,%r12
   7209 	andq	$3,%r12
   7210 	movq	%r15,%r13
   7211 	andq	$-4,%r13
   7212 	movq	%r9,%r14
   7213 	shrdq	$2,%r9,%r15
   7214 	shrq	$2,%r9
   7215 	addq	%r13,%r10
   7216 	adcq	%r14,%r11
   7217 	adcq	$0,%r12
   7218 	addq	%r15,%r10
   7219 	adcq	%r9,%r11
   7220 	adcq	$0,%r12
   7221 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
   7222 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm4
   7223 	vperm2i128	$0x02,%ymm8,%ymm12,%ymm0
   7224 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm8
   7225 	vpxor	0+384(%rsi),%ymm3,%ymm3
   7226 	vpxor	32+384(%rsi),%ymm0,%ymm0
   7227 	vpxor	64+384(%rsi),%ymm4,%ymm4
   7228 	vpxor	96+384(%rsi),%ymm8,%ymm8
   7229 	vmovdqu	%ymm3,0+384(%rdi)
   7230 	vmovdqu	%ymm0,32+384(%rdi)
   7231 	vmovdqu	%ymm4,64+384(%rdi)
   7232 	vmovdqu	%ymm8,96+384(%rdi)
   7233 
   7234 	leaq	512(%rsi),%rsi
   7235 	subq	$512,%rbx
   7236 	cmpq	$512,%rbx
   7237 	jg	1b
   7238 	addq	0(%rdi),%r10
   7239 	adcq	8+0(%rdi),%r11
   7240 	adcq	$1,%r12
   7241 	movq	0+0(%rbp),%rax
   7242 	movq	%rax,%r15
   7243 	mulq	%r10
   7244 	movq	%rax,%r13
   7245 	movq	%rdx,%r14
   7246 	movq	0+0(%rbp),%rax
   7247 	mulq	%r11
   7248 	imulq	%r12,%r15
   7249 	addq	%rax,%r14
   7250 	adcq	%rdx,%r15
   7251 	movq	8+0(%rbp),%rax
   7252 	movq	%rax,%r9
   7253 	mulq	%r10
   7254 	addq	%rax,%r14
   7255 	adcq	$0,%rdx
   7256 	movq	%rdx,%r10
   7257 	movq	8+0(%rbp),%rax
   7258 	mulq	%r11
   7259 	addq	%rax,%r15
   7260 	adcq	$0,%rdx
   7261 	imulq	%r12,%r9
   7262 	addq	%r10,%r15
   7263 	adcq	%rdx,%r9
   7264 	movq	%r13,%r10
   7265 	movq	%r14,%r11
   7266 	movq	%r15,%r12
   7267 	andq	$3,%r12
   7268 	movq	%r15,%r13
   7269 	andq	$-4,%r13
   7270 	movq	%r9,%r14
   7271 	shrdq	$2,%r9,%r15
   7272 	shrq	$2,%r9
   7273 	addq	%r13,%r10
   7274 	adcq	%r14,%r11
   7275 	adcq	$0,%r12
   7276 	addq	%r15,%r10
   7277 	adcq	%r9,%r11
   7278 	adcq	$0,%r12
   7279 	addq	16(%rdi),%r10
   7280 	adcq	8+16(%rdi),%r11
   7281 	adcq	$1,%r12
   7282 	movq	0+0(%rbp),%rax
   7283 	movq	%rax,%r15
   7284 	mulq	%r10
   7285 	movq	%rax,%r13
   7286 	movq	%rdx,%r14
   7287 	movq	0+0(%rbp),%rax
   7288 	mulq	%r11
   7289 	imulq	%r12,%r15
   7290 	addq	%rax,%r14
   7291 	adcq	%rdx,%r15
   7292 	movq	8+0(%rbp),%rax
   7293 	movq	%rax,%r9
   7294 	mulq	%r10
   7295 	addq	%rax,%r14
   7296 	adcq	$0,%rdx
   7297 	movq	%rdx,%r10
   7298 	movq	8+0(%rbp),%rax
   7299 	mulq	%r11
   7300 	addq	%rax,%r15
   7301 	adcq	$0,%rdx
   7302 	imulq	%r12,%r9
   7303 	addq	%r10,%r15
   7304 	adcq	%rdx,%r9
   7305 	movq	%r13,%r10
   7306 	movq	%r14,%r11
   7307 	movq	%r15,%r12
   7308 	andq	$3,%r12
   7309 	movq	%r15,%r13
   7310 	andq	$-4,%r13
   7311 	movq	%r9,%r14
   7312 	shrdq	$2,%r9,%r15
   7313 	shrq	$2,%r9
   7314 	addq	%r13,%r10
   7315 	adcq	%r14,%r11
   7316 	adcq	$0,%r12
   7317 	addq	%r15,%r10
   7318 	adcq	%r9,%r11
   7319 	adcq	$0,%r12
   7320 
   7321 	leaq	32(%rdi),%rdi
   7322 	movq	$10,%rcx
   7323 	xorq	%r8,%r8
   7324 	cmpq	$128,%rbx
   7325 	ja	3f
   7326 
   7327 seal_avx2_tail_128:
   7328 	vmovdqa	.chacha20_consts(%rip),%ymm0
   7329 	vmovdqa	64(%rbp),%ymm4
   7330 	vmovdqa	96(%rbp),%ymm8
   7331 	vmovdqa	.avx2_inc(%rip),%ymm12
   7332 	vpaddd	160(%rbp),%ymm12,%ymm12
   7333 	vmovdqa	%ymm12,160(%rbp)
   7334 
   7335 1:
   7336 	addq	0(%rdi),%r10
   7337 	adcq	8+0(%rdi),%r11
   7338 	adcq	$1,%r12
   7339 	movq	0+0(%rbp),%rax
   7340 	movq	%rax,%r15
   7341 	mulq	%r10
   7342 	movq	%rax,%r13
   7343 	movq	%rdx,%r14
   7344 	movq	0+0(%rbp),%rax
   7345 	mulq	%r11
   7346 	imulq	%r12,%r15
   7347 	addq	%rax,%r14
   7348 	adcq	%rdx,%r15
   7349 	movq	8+0(%rbp),%rax
   7350 	movq	%rax,%r9
   7351 	mulq	%r10
   7352 	addq	%rax,%r14
   7353 	adcq	$0,%rdx
   7354 	movq	%rdx,%r10
   7355 	movq	8+0(%rbp),%rax
   7356 	mulq	%r11
   7357 	addq	%rax,%r15
   7358 	adcq	$0,%rdx
   7359 	imulq	%r12,%r9
   7360 	addq	%r10,%r15
   7361 	adcq	%rdx,%r9
   7362 	movq	%r13,%r10
   7363 	movq	%r14,%r11
   7364 	movq	%r15,%r12
   7365 	andq	$3,%r12
   7366 	movq	%r15,%r13
   7367 	andq	$-4,%r13
   7368 	movq	%r9,%r14
   7369 	shrdq	$2,%r9,%r15
   7370 	shrq	$2,%r9
   7371 	addq	%r13,%r10
   7372 	adcq	%r14,%r11
   7373 	adcq	$0,%r12
   7374 	addq	%r15,%r10
   7375 	adcq	%r9,%r11
   7376 	adcq	$0,%r12
   7377 
   7378 	leaq	16(%rdi),%rdi
   7379 2:
   7380 	vpaddd	%ymm4,%ymm0,%ymm0
   7381 	vpxor	%ymm0,%ymm12,%ymm12
   7382 	vpshufb	.rol16(%rip),%ymm12,%ymm12
   7383 	vpaddd	%ymm12,%ymm8,%ymm8
   7384 	vpxor	%ymm8,%ymm4,%ymm4
   7385 	vpsrld	$20,%ymm4,%ymm3
   7386 	vpslld	$12,%ymm4,%ymm4
   7387 	vpxor	%ymm3,%ymm4,%ymm4
   7388 	vpaddd	%ymm4,%ymm0,%ymm0
   7389 	vpxor	%ymm0,%ymm12,%ymm12
   7390 	vpshufb	.rol8(%rip),%ymm12,%ymm12
   7391 	vpaddd	%ymm12,%ymm8,%ymm8
   7392 	vpxor	%ymm8,%ymm4,%ymm4
   7393 	vpslld	$7,%ymm4,%ymm3
   7394 	vpsrld	$25,%ymm4,%ymm4
   7395 	vpxor	%ymm3,%ymm4,%ymm4
   7396 	vpalignr	$12,%ymm12,%ymm12,%ymm12
   7397 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   7398 	vpalignr	$4,%ymm4,%ymm4,%ymm4
   7399 	addq	0(%rdi),%r10
   7400 	adcq	8+0(%rdi),%r11
   7401 	adcq	$1,%r12
   7402 	movq	0+0(%rbp),%rax
   7403 	movq	%rax,%r15
   7404 	mulq	%r10
   7405 	movq	%rax,%r13
   7406 	movq	%rdx,%r14
   7407 	movq	0+0(%rbp),%rax
   7408 	mulq	%r11
   7409 	imulq	%r12,%r15
   7410 	addq	%rax,%r14
   7411 	adcq	%rdx,%r15
   7412 	movq	8+0(%rbp),%rax
   7413 	movq	%rax,%r9
   7414 	mulq	%r10
   7415 	addq	%rax,%r14
   7416 	adcq	$0,%rdx
   7417 	movq	%rdx,%r10
   7418 	movq	8+0(%rbp),%rax
   7419 	mulq	%r11
   7420 	addq	%rax,%r15
   7421 	adcq	$0,%rdx
   7422 	imulq	%r12,%r9
   7423 	addq	%r10,%r15
   7424 	adcq	%rdx,%r9
   7425 	movq	%r13,%r10
   7426 	movq	%r14,%r11
   7427 	movq	%r15,%r12
   7428 	andq	$3,%r12
   7429 	movq	%r15,%r13
   7430 	andq	$-4,%r13
   7431 	movq	%r9,%r14
   7432 	shrdq	$2,%r9,%r15
   7433 	shrq	$2,%r9
   7434 	addq	%r13,%r10
   7435 	adcq	%r14,%r11
   7436 	adcq	$0,%r12
   7437 	addq	%r15,%r10
   7438 	adcq	%r9,%r11
   7439 	adcq	$0,%r12
   7440 	vpaddd	%ymm4,%ymm0,%ymm0
   7441 	vpxor	%ymm0,%ymm12,%ymm12
   7442 	vpshufb	.rol16(%rip),%ymm12,%ymm12
   7443 	vpaddd	%ymm12,%ymm8,%ymm8
   7444 	vpxor	%ymm8,%ymm4,%ymm4
   7445 	vpsrld	$20,%ymm4,%ymm3
   7446 	vpslld	$12,%ymm4,%ymm4
   7447 	vpxor	%ymm3,%ymm4,%ymm4
   7448 	vpaddd	%ymm4,%ymm0,%ymm0
   7449 	vpxor	%ymm0,%ymm12,%ymm12
   7450 	vpshufb	.rol8(%rip),%ymm12,%ymm12
   7451 	vpaddd	%ymm12,%ymm8,%ymm8
   7452 	vpxor	%ymm8,%ymm4,%ymm4
   7453 	vpslld	$7,%ymm4,%ymm3
   7454 	vpsrld	$25,%ymm4,%ymm4
   7455 	vpxor	%ymm3,%ymm4,%ymm4
   7456 	vpalignr	$4,%ymm12,%ymm12,%ymm12
   7457 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   7458 	vpalignr	$12,%ymm4,%ymm4,%ymm4
   7459 	addq	16(%rdi),%r10
   7460 	adcq	8+16(%rdi),%r11
   7461 	adcq	$1,%r12
   7462 	movq	0+0(%rbp),%rax
   7463 	movq	%rax,%r15
   7464 	mulq	%r10
   7465 	movq	%rax,%r13
   7466 	movq	%rdx,%r14
   7467 	movq	0+0(%rbp),%rax
   7468 	mulq	%r11
   7469 	imulq	%r12,%r15
   7470 	addq	%rax,%r14
   7471 	adcq	%rdx,%r15
   7472 	movq	8+0(%rbp),%rax
   7473 	movq	%rax,%r9
   7474 	mulq	%r10
   7475 	addq	%rax,%r14
   7476 	adcq	$0,%rdx
   7477 	movq	%rdx,%r10
   7478 	movq	8+0(%rbp),%rax
   7479 	mulq	%r11
   7480 	addq	%rax,%r15
   7481 	adcq	$0,%rdx
   7482 	imulq	%r12,%r9
   7483 	addq	%r10,%r15
   7484 	adcq	%rdx,%r9
   7485 	movq	%r13,%r10
   7486 	movq	%r14,%r11
   7487 	movq	%r15,%r12
   7488 	andq	$3,%r12
   7489 	movq	%r15,%r13
   7490 	andq	$-4,%r13
   7491 	movq	%r9,%r14
   7492 	shrdq	$2,%r9,%r15
   7493 	shrq	$2,%r9
   7494 	addq	%r13,%r10
   7495 	adcq	%r14,%r11
   7496 	adcq	$0,%r12
   7497 	addq	%r15,%r10
   7498 	adcq	%r9,%r11
   7499 	adcq	$0,%r12
   7500 
   7501 	leaq	32(%rdi),%rdi
   7502 	decq	%rcx
   7503 	jg	1b
   7504 	decq	%r8
   7505 	jge	2b
   7506 	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
   7507 	vpaddd	64(%rbp),%ymm4,%ymm4
   7508 	vpaddd	96(%rbp),%ymm8,%ymm8
   7509 	vpaddd	160(%rbp),%ymm12,%ymm12
   7510 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
   7511 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
   7512 	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
   7513 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
   7514 	vmovdqa	%ymm3,%ymm8
   7515 
   7516 	jmp	seal_avx2_short_loop
   7517 3:
   7518 	cmpq	$256,%rbx
   7519 	ja	3f
   7520 
   7521 seal_avx2_tail_256:
   7522 	vmovdqa	.chacha20_consts(%rip),%ymm0
   7523 	vmovdqa	64(%rbp),%ymm4
   7524 	vmovdqa	96(%rbp),%ymm8
   7525 	vmovdqa	%ymm0,%ymm1
   7526 	vmovdqa	%ymm4,%ymm5
   7527 	vmovdqa	%ymm8,%ymm9
   7528 	vmovdqa	.avx2_inc(%rip),%ymm12
   7529 	vpaddd	160(%rbp),%ymm12,%ymm13
   7530 	vpaddd	%ymm13,%ymm12,%ymm12
   7531 	vmovdqa	%ymm12,160(%rbp)
   7532 	vmovdqa	%ymm13,192(%rbp)
   7533 
   7534 1:
   7535 	addq	0(%rdi),%r10
   7536 	adcq	8+0(%rdi),%r11
   7537 	adcq	$1,%r12
   7538 	movq	0+0(%rbp),%rax
   7539 	movq	%rax,%r15
   7540 	mulq	%r10
   7541 	movq	%rax,%r13
   7542 	movq	%rdx,%r14
   7543 	movq	0+0(%rbp),%rax
   7544 	mulq	%r11
   7545 	imulq	%r12,%r15
   7546 	addq	%rax,%r14
   7547 	adcq	%rdx,%r15
   7548 	movq	8+0(%rbp),%rax
   7549 	movq	%rax,%r9
   7550 	mulq	%r10
   7551 	addq	%rax,%r14
   7552 	adcq	$0,%rdx
   7553 	movq	%rdx,%r10
   7554 	movq	8+0(%rbp),%rax
   7555 	mulq	%r11
   7556 	addq	%rax,%r15
   7557 	adcq	$0,%rdx
   7558 	imulq	%r12,%r9
   7559 	addq	%r10,%r15
   7560 	adcq	%rdx,%r9
   7561 	movq	%r13,%r10
   7562 	movq	%r14,%r11
   7563 	movq	%r15,%r12
   7564 	andq	$3,%r12
   7565 	movq	%r15,%r13
   7566 	andq	$-4,%r13
   7567 	movq	%r9,%r14
   7568 	shrdq	$2,%r9,%r15
   7569 	shrq	$2,%r9
   7570 	addq	%r13,%r10
   7571 	adcq	%r14,%r11
   7572 	adcq	$0,%r12
   7573 	addq	%r15,%r10
   7574 	adcq	%r9,%r11
   7575 	adcq	$0,%r12
   7576 
   7577 	leaq	16(%rdi),%rdi
   7578 2:
   7579 	vpaddd	%ymm4,%ymm0,%ymm0
   7580 	vpxor	%ymm0,%ymm12,%ymm12
   7581 	vpshufb	.rol16(%rip),%ymm12,%ymm12
   7582 	vpaddd	%ymm12,%ymm8,%ymm8
   7583 	vpxor	%ymm8,%ymm4,%ymm4
   7584 	vpsrld	$20,%ymm4,%ymm3
   7585 	vpslld	$12,%ymm4,%ymm4
   7586 	vpxor	%ymm3,%ymm4,%ymm4
   7587 	vpaddd	%ymm4,%ymm0,%ymm0
   7588 	vpxor	%ymm0,%ymm12,%ymm12
   7589 	vpshufb	.rol8(%rip),%ymm12,%ymm12
   7590 	vpaddd	%ymm12,%ymm8,%ymm8
   7591 	vpxor	%ymm8,%ymm4,%ymm4
   7592 	vpslld	$7,%ymm4,%ymm3
   7593 	vpsrld	$25,%ymm4,%ymm4
   7594 	vpxor	%ymm3,%ymm4,%ymm4
   7595 	vpalignr	$12,%ymm12,%ymm12,%ymm12
   7596 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   7597 	vpalignr	$4,%ymm4,%ymm4,%ymm4
   7598 	vpaddd	%ymm5,%ymm1,%ymm1
   7599 	vpxor	%ymm1,%ymm13,%ymm13
   7600 	vpshufb	.rol16(%rip),%ymm13,%ymm13
   7601 	vpaddd	%ymm13,%ymm9,%ymm9
   7602 	vpxor	%ymm9,%ymm5,%ymm5
   7603 	vpsrld	$20,%ymm5,%ymm3
   7604 	vpslld	$12,%ymm5,%ymm5
   7605 	vpxor	%ymm3,%ymm5,%ymm5
   7606 	vpaddd	%ymm5,%ymm1,%ymm1
   7607 	vpxor	%ymm1,%ymm13,%ymm13
   7608 	vpshufb	.rol8(%rip),%ymm13,%ymm13
   7609 	vpaddd	%ymm13,%ymm9,%ymm9
   7610 	vpxor	%ymm9,%ymm5,%ymm5
   7611 	vpslld	$7,%ymm5,%ymm3
   7612 	vpsrld	$25,%ymm5,%ymm5
   7613 	vpxor	%ymm3,%ymm5,%ymm5
   7614 	vpalignr	$12,%ymm13,%ymm13,%ymm13
   7615 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   7616 	vpalignr	$4,%ymm5,%ymm5,%ymm5
   7617 	addq	0(%rdi),%r10
   7618 	adcq	8+0(%rdi),%r11
   7619 	adcq	$1,%r12
   7620 	movq	0+0(%rbp),%rax
   7621 	movq	%rax,%r15
   7622 	mulq	%r10
   7623 	movq	%rax,%r13
   7624 	movq	%rdx,%r14
   7625 	movq	0+0(%rbp),%rax
   7626 	mulq	%r11
   7627 	imulq	%r12,%r15
   7628 	addq	%rax,%r14
   7629 	adcq	%rdx,%r15
   7630 	movq	8+0(%rbp),%rax
   7631 	movq	%rax,%r9
   7632 	mulq	%r10
   7633 	addq	%rax,%r14
   7634 	adcq	$0,%rdx
   7635 	movq	%rdx,%r10
   7636 	movq	8+0(%rbp),%rax
   7637 	mulq	%r11
   7638 	addq	%rax,%r15
   7639 	adcq	$0,%rdx
   7640 	imulq	%r12,%r9
   7641 	addq	%r10,%r15
   7642 	adcq	%rdx,%r9
   7643 	movq	%r13,%r10
   7644 	movq	%r14,%r11
   7645 	movq	%r15,%r12
   7646 	andq	$3,%r12
   7647 	movq	%r15,%r13
   7648 	andq	$-4,%r13
   7649 	movq	%r9,%r14
   7650 	shrdq	$2,%r9,%r15
   7651 	shrq	$2,%r9
   7652 	addq	%r13,%r10
   7653 	adcq	%r14,%r11
   7654 	adcq	$0,%r12
   7655 	addq	%r15,%r10
   7656 	adcq	%r9,%r11
   7657 	adcq	$0,%r12
   7658 	vpaddd	%ymm4,%ymm0,%ymm0
   7659 	vpxor	%ymm0,%ymm12,%ymm12
   7660 	vpshufb	.rol16(%rip),%ymm12,%ymm12
   7661 	vpaddd	%ymm12,%ymm8,%ymm8
   7662 	vpxor	%ymm8,%ymm4,%ymm4
   7663 	vpsrld	$20,%ymm4,%ymm3
   7664 	vpslld	$12,%ymm4,%ymm4
   7665 	vpxor	%ymm3,%ymm4,%ymm4
   7666 	vpaddd	%ymm4,%ymm0,%ymm0
   7667 	vpxor	%ymm0,%ymm12,%ymm12
   7668 	vpshufb	.rol8(%rip),%ymm12,%ymm12
   7669 	vpaddd	%ymm12,%ymm8,%ymm8
   7670 	vpxor	%ymm8,%ymm4,%ymm4
   7671 	vpslld	$7,%ymm4,%ymm3
   7672 	vpsrld	$25,%ymm4,%ymm4
   7673 	vpxor	%ymm3,%ymm4,%ymm4
   7674 	vpalignr	$4,%ymm12,%ymm12,%ymm12
   7675 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   7676 	vpalignr	$12,%ymm4,%ymm4,%ymm4
   7677 	vpaddd	%ymm5,%ymm1,%ymm1
   7678 	vpxor	%ymm1,%ymm13,%ymm13
   7679 	vpshufb	.rol16(%rip),%ymm13,%ymm13
   7680 	vpaddd	%ymm13,%ymm9,%ymm9
   7681 	vpxor	%ymm9,%ymm5,%ymm5
   7682 	vpsrld	$20,%ymm5,%ymm3
   7683 	vpslld	$12,%ymm5,%ymm5
   7684 	vpxor	%ymm3,%ymm5,%ymm5
   7685 	vpaddd	%ymm5,%ymm1,%ymm1
   7686 	vpxor	%ymm1,%ymm13,%ymm13
   7687 	vpshufb	.rol8(%rip),%ymm13,%ymm13
   7688 	vpaddd	%ymm13,%ymm9,%ymm9
   7689 	vpxor	%ymm9,%ymm5,%ymm5
   7690 	vpslld	$7,%ymm5,%ymm3
   7691 	vpsrld	$25,%ymm5,%ymm5
   7692 	vpxor	%ymm3,%ymm5,%ymm5
   7693 	vpalignr	$4,%ymm13,%ymm13,%ymm13
   7694 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   7695 	vpalignr	$12,%ymm5,%ymm5,%ymm5
   7696 	addq	16(%rdi),%r10
   7697 	adcq	8+16(%rdi),%r11
   7698 	adcq	$1,%r12
   7699 	movq	0+0(%rbp),%rax
   7700 	movq	%rax,%r15
   7701 	mulq	%r10
   7702 	movq	%rax,%r13
   7703 	movq	%rdx,%r14
   7704 	movq	0+0(%rbp),%rax
   7705 	mulq	%r11
   7706 	imulq	%r12,%r15
   7707 	addq	%rax,%r14
   7708 	adcq	%rdx,%r15
   7709 	movq	8+0(%rbp),%rax
   7710 	movq	%rax,%r9
   7711 	mulq	%r10
   7712 	addq	%rax,%r14
   7713 	adcq	$0,%rdx
   7714 	movq	%rdx,%r10
   7715 	movq	8+0(%rbp),%rax
   7716 	mulq	%r11
   7717 	addq	%rax,%r15
   7718 	adcq	$0,%rdx
   7719 	imulq	%r12,%r9
   7720 	addq	%r10,%r15
   7721 	adcq	%rdx,%r9
   7722 	movq	%r13,%r10
   7723 	movq	%r14,%r11
   7724 	movq	%r15,%r12
   7725 	andq	$3,%r12
   7726 	movq	%r15,%r13
   7727 	andq	$-4,%r13
   7728 	movq	%r9,%r14
   7729 	shrdq	$2,%r9,%r15
   7730 	shrq	$2,%r9
   7731 	addq	%r13,%r10
   7732 	adcq	%r14,%r11
   7733 	adcq	$0,%r12
   7734 	addq	%r15,%r10
   7735 	adcq	%r9,%r11
   7736 	adcq	$0,%r12
   7737 
   7738 	leaq	32(%rdi),%rdi
   7739 	decq	%rcx
   7740 	jg	1b
   7741 	decq	%r8
   7742 	jge	2b
   7743 	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
   7744 	vpaddd	64(%rbp),%ymm5,%ymm5
   7745 	vpaddd	96(%rbp),%ymm9,%ymm9
   7746 	vpaddd	192(%rbp),%ymm13,%ymm13
   7747 	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
   7748 	vpaddd	64(%rbp),%ymm4,%ymm4
   7749 	vpaddd	96(%rbp),%ymm8,%ymm8
   7750 	vpaddd	160(%rbp),%ymm12,%ymm12
   7751 	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
   7752 	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
   7753 	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
   7754 	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
   7755 	vpxor	0+0(%rsi),%ymm3,%ymm3
   7756 	vpxor	32+0(%rsi),%ymm1,%ymm1
   7757 	vpxor	64+0(%rsi),%ymm5,%ymm5
   7758 	vpxor	96+0(%rsi),%ymm9,%ymm9
   7759 	vmovdqu	%ymm3,0+0(%rdi)
   7760 	vmovdqu	%ymm1,32+0(%rdi)
   7761 	vmovdqu	%ymm5,64+0(%rdi)
   7762 	vmovdqu	%ymm9,96+0(%rdi)
   7763 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
   7764 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
   7765 	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
   7766 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
   7767 	vmovdqa	%ymm3,%ymm8
   7768 
   7769 	movq	$128,%rcx
   7770 	leaq	128(%rsi),%rsi
   7771 	subq	$128,%rbx
   7772 	jmp	seal_avx2_hash
   7773 3:
   7774 	cmpq	$384,%rbx
   7775 	ja	seal_avx2_tail_512
   7776 
   7777 seal_avx2_tail_384:
   7778 	vmovdqa	.chacha20_consts(%rip),%ymm0
   7779 	vmovdqa	64(%rbp),%ymm4
   7780 	vmovdqa	96(%rbp),%ymm8
   7781 	vmovdqa	%ymm0,%ymm1
   7782 	vmovdqa	%ymm4,%ymm5
   7783 	vmovdqa	%ymm8,%ymm9
   7784 	vmovdqa	%ymm0,%ymm2
   7785 	vmovdqa	%ymm4,%ymm6
   7786 	vmovdqa	%ymm8,%ymm10
   7787 	vmovdqa	.avx2_inc(%rip),%ymm12
   7788 	vpaddd	160(%rbp),%ymm12,%ymm14
   7789 	vpaddd	%ymm14,%ymm12,%ymm13
   7790 	vpaddd	%ymm13,%ymm12,%ymm12
   7791 	vmovdqa	%ymm12,160(%rbp)
   7792 	vmovdqa	%ymm13,192(%rbp)
   7793 	vmovdqa	%ymm14,224(%rbp)
   7794 
   7795 1:
   7796 	addq	0(%rdi),%r10
   7797 	adcq	8+0(%rdi),%r11
   7798 	adcq	$1,%r12
   7799 	movq	0+0(%rbp),%rax
   7800 	movq	%rax,%r15
   7801 	mulq	%r10
   7802 	movq	%rax,%r13
   7803 	movq	%rdx,%r14
   7804 	movq	0+0(%rbp),%rax
   7805 	mulq	%r11
   7806 	imulq	%r12,%r15
   7807 	addq	%rax,%r14
   7808 	adcq	%rdx,%r15
   7809 	movq	8+0(%rbp),%rax
   7810 	movq	%rax,%r9
   7811 	mulq	%r10
   7812 	addq	%rax,%r14
   7813 	adcq	$0,%rdx
   7814 	movq	%rdx,%r10
   7815 	movq	8+0(%rbp),%rax
   7816 	mulq	%r11
   7817 	addq	%rax,%r15
   7818 	adcq	$0,%rdx
   7819 	imulq	%r12,%r9
   7820 	addq	%r10,%r15
   7821 	adcq	%rdx,%r9
   7822 	movq	%r13,%r10
   7823 	movq	%r14,%r11
   7824 	movq	%r15,%r12
   7825 	andq	$3,%r12
   7826 	movq	%r15,%r13
   7827 	andq	$-4,%r13
   7828 	movq	%r9,%r14
   7829 	shrdq	$2,%r9,%r15
   7830 	shrq	$2,%r9
   7831 	addq	%r13,%r10
   7832 	adcq	%r14,%r11
   7833 	adcq	$0,%r12
   7834 	addq	%r15,%r10
   7835 	adcq	%r9,%r11
   7836 	adcq	$0,%r12
   7837 
   7838 	leaq	16(%rdi),%rdi
   7839 2:
   7840 	vpaddd	%ymm4,%ymm0,%ymm0
   7841 	vpxor	%ymm0,%ymm12,%ymm12
   7842 	vpshufb	.rol16(%rip),%ymm12,%ymm12
   7843 	vpaddd	%ymm12,%ymm8,%ymm8
   7844 	vpxor	%ymm8,%ymm4,%ymm4
   7845 	vpsrld	$20,%ymm4,%ymm3
   7846 	vpslld	$12,%ymm4,%ymm4
   7847 	vpxor	%ymm3,%ymm4,%ymm4
   7848 	vpaddd	%ymm4,%ymm0,%ymm0
   7849 	vpxor	%ymm0,%ymm12,%ymm12
   7850 	vpshufb	.rol8(%rip),%ymm12,%ymm12
   7851 	vpaddd	%ymm12,%ymm8,%ymm8
   7852 	vpxor	%ymm8,%ymm4,%ymm4
   7853 	vpslld	$7,%ymm4,%ymm3
   7854 	vpsrld	$25,%ymm4,%ymm4
   7855 	vpxor	%ymm3,%ymm4,%ymm4
   7856 	vpalignr	$12,%ymm12,%ymm12,%ymm12
   7857 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   7858 	vpalignr	$4,%ymm4,%ymm4,%ymm4
   7859 	vpaddd	%ymm5,%ymm1,%ymm1
   7860 	vpxor	%ymm1,%ymm13,%ymm13
   7861 	vpshufb	.rol16(%rip),%ymm13,%ymm13
   7862 	vpaddd	%ymm13,%ymm9,%ymm9
   7863 	vpxor	%ymm9,%ymm5,%ymm5
   7864 	vpsrld	$20,%ymm5,%ymm3
   7865 	vpslld	$12,%ymm5,%ymm5
   7866 	vpxor	%ymm3,%ymm5,%ymm5
   7867 	vpaddd	%ymm5,%ymm1,%ymm1
   7868 	vpxor	%ymm1,%ymm13,%ymm13
   7869 	vpshufb	.rol8(%rip),%ymm13,%ymm13
   7870 	vpaddd	%ymm13,%ymm9,%ymm9
   7871 	vpxor	%ymm9,%ymm5,%ymm5
   7872 	vpslld	$7,%ymm5,%ymm3
   7873 	vpsrld	$25,%ymm5,%ymm5
   7874 	vpxor	%ymm3,%ymm5,%ymm5
   7875 	vpalignr	$12,%ymm13,%ymm13,%ymm13
   7876 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   7877 	vpalignr	$4,%ymm5,%ymm5,%ymm5
   7878 	addq	0(%rdi),%r10
   7879 	adcq	8+0(%rdi),%r11
   7880 	adcq	$1,%r12
   7881 	movq	0+0(%rbp),%rax
   7882 	movq	%rax,%r15
   7883 	mulq	%r10
   7884 	movq	%rax,%r13
   7885 	movq	%rdx,%r14
   7886 	movq	0+0(%rbp),%rax
   7887 	mulq	%r11
   7888 	imulq	%r12,%r15
   7889 	addq	%rax,%r14
   7890 	adcq	%rdx,%r15
   7891 	movq	8+0(%rbp),%rax
   7892 	movq	%rax,%r9
   7893 	mulq	%r10
   7894 	addq	%rax,%r14
   7895 	adcq	$0,%rdx
   7896 	movq	%rdx,%r10
   7897 	movq	8+0(%rbp),%rax
   7898 	mulq	%r11
   7899 	addq	%rax,%r15
   7900 	adcq	$0,%rdx
   7901 	imulq	%r12,%r9
   7902 	addq	%r10,%r15
   7903 	adcq	%rdx,%r9
   7904 	movq	%r13,%r10
   7905 	movq	%r14,%r11
   7906 	movq	%r15,%r12
   7907 	andq	$3,%r12
   7908 	movq	%r15,%r13
   7909 	andq	$-4,%r13
   7910 	movq	%r9,%r14
   7911 	shrdq	$2,%r9,%r15
   7912 	shrq	$2,%r9
   7913 	addq	%r13,%r10
   7914 	adcq	%r14,%r11
   7915 	adcq	$0,%r12
   7916 	addq	%r15,%r10
   7917 	adcq	%r9,%r11
   7918 	adcq	$0,%r12
   7919 	vpaddd	%ymm6,%ymm2,%ymm2
   7920 	vpxor	%ymm2,%ymm14,%ymm14
   7921 	vpshufb	.rol16(%rip),%ymm14,%ymm14
   7922 	vpaddd	%ymm14,%ymm10,%ymm10
   7923 	vpxor	%ymm10,%ymm6,%ymm6
   7924 	vpsrld	$20,%ymm6,%ymm3
   7925 	vpslld	$12,%ymm6,%ymm6
   7926 	vpxor	%ymm3,%ymm6,%ymm6
   7927 	vpaddd	%ymm6,%ymm2,%ymm2
   7928 	vpxor	%ymm2,%ymm14,%ymm14
   7929 	vpshufb	.rol8(%rip),%ymm14,%ymm14
   7930 	vpaddd	%ymm14,%ymm10,%ymm10
   7931 	vpxor	%ymm10,%ymm6,%ymm6
   7932 	vpslld	$7,%ymm6,%ymm3
   7933 	vpsrld	$25,%ymm6,%ymm6
   7934 	vpxor	%ymm3,%ymm6,%ymm6
   7935 	vpalignr	$12,%ymm14,%ymm14,%ymm14
   7936 	vpalignr	$8,%ymm10,%ymm10,%ymm10
   7937 	vpalignr	$4,%ymm6,%ymm6,%ymm6
   7938 	vpaddd	%ymm4,%ymm0,%ymm0
   7939 	vpxor	%ymm0,%ymm12,%ymm12
   7940 	vpshufb	.rol16(%rip),%ymm12,%ymm12
   7941 	vpaddd	%ymm12,%ymm8,%ymm8
   7942 	vpxor	%ymm8,%ymm4,%ymm4
   7943 	vpsrld	$20,%ymm4,%ymm3
   7944 	vpslld	$12,%ymm4,%ymm4
   7945 	vpxor	%ymm3,%ymm4,%ymm4
   7946 	vpaddd	%ymm4,%ymm0,%ymm0
   7947 	vpxor	%ymm0,%ymm12,%ymm12
   7948 	vpshufb	.rol8(%rip),%ymm12,%ymm12
   7949 	vpaddd	%ymm12,%ymm8,%ymm8
   7950 	vpxor	%ymm8,%ymm4,%ymm4
   7951 	vpslld	$7,%ymm4,%ymm3
   7952 	vpsrld	$25,%ymm4,%ymm4
   7953 	vpxor	%ymm3,%ymm4,%ymm4
   7954 	vpalignr	$4,%ymm12,%ymm12,%ymm12
   7955 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   7956 	vpalignr	$12,%ymm4,%ymm4,%ymm4
   7957 	addq	16(%rdi),%r10
   7958 	adcq	8+16(%rdi),%r11
   7959 	adcq	$1,%r12
   7960 	movq	0+0(%rbp),%rax
   7961 	movq	%rax,%r15
   7962 	mulq	%r10
   7963 	movq	%rax,%r13
   7964 	movq	%rdx,%r14
   7965 	movq	0+0(%rbp),%rax
   7966 	mulq	%r11
   7967 	imulq	%r12,%r15
   7968 	addq	%rax,%r14
   7969 	adcq	%rdx,%r15
   7970 	movq	8+0(%rbp),%rax
   7971 	movq	%rax,%r9
   7972 	mulq	%r10
   7973 	addq	%rax,%r14
   7974 	adcq	$0,%rdx
   7975 	movq	%rdx,%r10
   7976 	movq	8+0(%rbp),%rax
   7977 	mulq	%r11
   7978 	addq	%rax,%r15
   7979 	adcq	$0,%rdx
   7980 	imulq	%r12,%r9
   7981 	addq	%r10,%r15
   7982 	adcq	%rdx,%r9
   7983 	movq	%r13,%r10
   7984 	movq	%r14,%r11
   7985 	movq	%r15,%r12
   7986 	andq	$3,%r12
   7987 	movq	%r15,%r13
   7988 	andq	$-4,%r13
   7989 	movq	%r9,%r14
   7990 	shrdq	$2,%r9,%r15
   7991 	shrq	$2,%r9
   7992 	addq	%r13,%r10
   7993 	adcq	%r14,%r11
   7994 	adcq	$0,%r12
   7995 	addq	%r15,%r10
   7996 	adcq	%r9,%r11
   7997 	adcq	$0,%r12
   7998 	vpaddd	%ymm5,%ymm1,%ymm1
   7999 	vpxor	%ymm1,%ymm13,%ymm13
   8000 	vpshufb	.rol16(%rip),%ymm13,%ymm13
   8001 	vpaddd	%ymm13,%ymm9,%ymm9
   8002 	vpxor	%ymm9,%ymm5,%ymm5
   8003 	vpsrld	$20,%ymm5,%ymm3
   8004 	vpslld	$12,%ymm5,%ymm5
   8005 	vpxor	%ymm3,%ymm5,%ymm5
   8006 	vpaddd	%ymm5,%ymm1,%ymm1
   8007 	vpxor	%ymm1,%ymm13,%ymm13
   8008 	vpshufb	.rol8(%rip),%ymm13,%ymm13
   8009 	vpaddd	%ymm13,%ymm9,%ymm9
   8010 	vpxor	%ymm9,%ymm5,%ymm5
   8011 	vpslld	$7,%ymm5,%ymm3
   8012 	vpsrld	$25,%ymm5,%ymm5
   8013 	vpxor	%ymm3,%ymm5,%ymm5
   8014 	vpalignr	$4,%ymm13,%ymm13,%ymm13
   8015 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   8016 	vpalignr	$12,%ymm5,%ymm5,%ymm5
   8017 	vpaddd	%ymm6,%ymm2,%ymm2
   8018 	vpxor	%ymm2,%ymm14,%ymm14
   8019 	vpshufb	.rol16(%rip),%ymm14,%ymm14
   8020 	vpaddd	%ymm14,%ymm10,%ymm10
   8021 	vpxor	%ymm10,%ymm6,%ymm6
   8022 	vpsrld	$20,%ymm6,%ymm3
   8023 	vpslld	$12,%ymm6,%ymm6
   8024 	vpxor	%ymm3,%ymm6,%ymm6
   8025 	vpaddd	%ymm6,%ymm2,%ymm2
   8026 	vpxor	%ymm2,%ymm14,%ymm14
   8027 	vpshufb	.rol8(%rip),%ymm14,%ymm14
   8028 	vpaddd	%ymm14,%ymm10,%ymm10
   8029 	vpxor	%ymm10,%ymm6,%ymm6
   8030 	vpslld	$7,%ymm6,%ymm3
   8031 	vpsrld	$25,%ymm6,%ymm6
   8032 	vpxor	%ymm3,%ymm6,%ymm6
   8033 	vpalignr	$4,%ymm14,%ymm14,%ymm14
   8034 	vpalignr	$8,%ymm10,%ymm10,%ymm10
   8035 	vpalignr	$12,%ymm6,%ymm6,%ymm6
   8036 
   8037 	leaq	32(%rdi),%rdi
   8038 	decq	%rcx
   8039 	jg	1b
   8040 	decq	%r8
   8041 	jge	2b
   8042 	vpaddd	.chacha20_consts(%rip),%ymm2,%ymm2
   8043 	vpaddd	64(%rbp),%ymm6,%ymm6
   8044 	vpaddd	96(%rbp),%ymm10,%ymm10
   8045 	vpaddd	224(%rbp),%ymm14,%ymm14
   8046 	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
   8047 	vpaddd	64(%rbp),%ymm5,%ymm5
   8048 	vpaddd	96(%rbp),%ymm9,%ymm9
   8049 	vpaddd	192(%rbp),%ymm13,%ymm13
   8050 	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
   8051 	vpaddd	64(%rbp),%ymm4,%ymm4
   8052 	vpaddd	96(%rbp),%ymm8,%ymm8
   8053 	vpaddd	160(%rbp),%ymm12,%ymm12
   8054 	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
   8055 	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
   8056 	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
   8057 	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
   8058 	vpxor	0+0(%rsi),%ymm3,%ymm3
   8059 	vpxor	32+0(%rsi),%ymm2,%ymm2
   8060 	vpxor	64+0(%rsi),%ymm6,%ymm6
   8061 	vpxor	96+0(%rsi),%ymm10,%ymm10
   8062 	vmovdqu	%ymm3,0+0(%rdi)
   8063 	vmovdqu	%ymm2,32+0(%rdi)
   8064 	vmovdqu	%ymm6,64+0(%rdi)
   8065 	vmovdqu	%ymm10,96+0(%rdi)
   8066 	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
   8067 	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
   8068 	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
   8069 	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
   8070 	vpxor	0+128(%rsi),%ymm3,%ymm3
   8071 	vpxor	32+128(%rsi),%ymm1,%ymm1
   8072 	vpxor	64+128(%rsi),%ymm5,%ymm5
   8073 	vpxor	96+128(%rsi),%ymm9,%ymm9
   8074 	vmovdqu	%ymm3,0+128(%rdi)
   8075 	vmovdqu	%ymm1,32+128(%rdi)
   8076 	vmovdqu	%ymm5,64+128(%rdi)
   8077 	vmovdqu	%ymm9,96+128(%rdi)
   8078 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
   8079 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
   8080 	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
   8081 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
   8082 	vmovdqa	%ymm3,%ymm8
   8083 
   8084 	movq	$256,%rcx
   8085 	leaq	256(%rsi),%rsi
   8086 	subq	$256,%rbx
   8087 	jmp	seal_avx2_hash
   8088 
   8089 seal_avx2_tail_512:
   8090 	vmovdqa	.chacha20_consts(%rip),%ymm0
   8091 	vmovdqa	64(%rbp),%ymm4
   8092 	vmovdqa	96(%rbp),%ymm8
   8093 	vmovdqa	%ymm0,%ymm1
   8094 	vmovdqa	%ymm4,%ymm5
   8095 	vmovdqa	%ymm8,%ymm9
   8096 	vmovdqa	%ymm0,%ymm2
   8097 	vmovdqa	%ymm4,%ymm6
   8098 	vmovdqa	%ymm8,%ymm10
   8099 	vmovdqa	%ymm0,%ymm3
   8100 	vmovdqa	%ymm4,%ymm7
   8101 	vmovdqa	%ymm8,%ymm11
   8102 	vmovdqa	.avx2_inc(%rip),%ymm12
   8103 	vpaddd	160(%rbp),%ymm12,%ymm15
   8104 	vpaddd	%ymm15,%ymm12,%ymm14
   8105 	vpaddd	%ymm14,%ymm12,%ymm13
   8106 	vpaddd	%ymm13,%ymm12,%ymm12
   8107 	vmovdqa	%ymm15,256(%rbp)
   8108 	vmovdqa	%ymm14,224(%rbp)
   8109 	vmovdqa	%ymm13,192(%rbp)
   8110 	vmovdqa	%ymm12,160(%rbp)
   8111 
   8112 1:
   8113 	addq	0(%rdi),%r10
   8114 	adcq	8+0(%rdi),%r11
   8115 	adcq	$1,%r12
   8116 	movq	0+0(%rbp),%rdx
   8117 	movq	%rdx,%r15
   8118 	mulxq	%r10,%r13,%r14
   8119 	mulxq	%r11,%rax,%rdx
   8120 	imulq	%r12,%r15
   8121 	addq	%rax,%r14
   8122 	adcq	%rdx,%r15
   8123 	movq	8+0(%rbp),%rdx
   8124 	mulxq	%r10,%r10,%rax
   8125 	addq	%r10,%r14
   8126 	mulxq	%r11,%r11,%r9
   8127 	adcq	%r11,%r15
   8128 	adcq	$0,%r9
   8129 	imulq	%r12,%rdx
   8130 	addq	%rax,%r15
   8131 	adcq	%rdx,%r9
   8132 	movq	%r13,%r10
   8133 	movq	%r14,%r11
   8134 	movq	%r15,%r12
   8135 	andq	$3,%r12
   8136 	movq	%r15,%r13
   8137 	andq	$-4,%r13
   8138 	movq	%r9,%r14
   8139 	shrdq	$2,%r9,%r15
   8140 	shrq	$2,%r9
   8141 	addq	%r13,%r10
   8142 	adcq	%r14,%r11
   8143 	adcq	$0,%r12
   8144 	addq	%r15,%r10
   8145 	adcq	%r9,%r11
   8146 	adcq	$0,%r12
   8147 
   8148 	leaq	16(%rdi),%rdi
   8149 2:
   8150 	vmovdqa	%ymm8,128(%rbp)
   8151 	vmovdqa	.rol16(%rip),%ymm8
   8152 	vpaddd	%ymm7,%ymm3,%ymm3
   8153 	vpaddd	%ymm6,%ymm2,%ymm2
   8154 	vpaddd	%ymm5,%ymm1,%ymm1
   8155 	vpaddd	%ymm4,%ymm0,%ymm0
   8156 	vpxor	%ymm3,%ymm15,%ymm15
   8157 	vpxor	%ymm2,%ymm14,%ymm14
   8158 	vpxor	%ymm1,%ymm13,%ymm13
   8159 	vpxor	%ymm0,%ymm12,%ymm12
   8160 	vpshufb	%ymm8,%ymm15,%ymm15
   8161 	vpshufb	%ymm8,%ymm14,%ymm14
   8162 	vpshufb	%ymm8,%ymm13,%ymm13
   8163 	vpshufb	%ymm8,%ymm12,%ymm12
   8164 	vmovdqa	128(%rbp),%ymm8
   8165 	vpaddd	%ymm15,%ymm11,%ymm11
   8166 	vpaddd	%ymm14,%ymm10,%ymm10
   8167 	vpaddd	%ymm13,%ymm9,%ymm9
   8168 	vpaddd	%ymm12,%ymm8,%ymm8
   8169 	vpxor	%ymm11,%ymm7,%ymm7
   8170 	addq	0(%rdi),%r10
   8171 	adcq	8+0(%rdi),%r11
   8172 	adcq	$1,%r12
   8173 	vpxor	%ymm10,%ymm6,%ymm6
   8174 	vpxor	%ymm9,%ymm5,%ymm5
   8175 	vpxor	%ymm8,%ymm4,%ymm4
   8176 	vmovdqa	%ymm8,128(%rbp)
   8177 	vpsrld	$20,%ymm7,%ymm8
   8178 	vpslld	$32-20,%ymm7,%ymm7
   8179 	vpxor	%ymm8,%ymm7,%ymm7
   8180 	vpsrld	$20,%ymm6,%ymm8
   8181 	vpslld	$32-20,%ymm6,%ymm6
   8182 	vpxor	%ymm8,%ymm6,%ymm6
   8183 	vpsrld	$20,%ymm5,%ymm8
   8184 	vpslld	$32-20,%ymm5,%ymm5
   8185 	vpxor	%ymm8,%ymm5,%ymm5
   8186 	vpsrld	$20,%ymm4,%ymm8
   8187 	vpslld	$32-20,%ymm4,%ymm4
   8188 	vpxor	%ymm8,%ymm4,%ymm4
   8189 	vmovdqa	.rol8(%rip),%ymm8
   8190 	vpaddd	%ymm7,%ymm3,%ymm3
   8191 	vpaddd	%ymm6,%ymm2,%ymm2
   8192 	vpaddd	%ymm5,%ymm1,%ymm1
   8193 	movq	0+0(%rbp),%rdx
   8194 	movq	%rdx,%r15
   8195 	mulxq	%r10,%r13,%r14
   8196 	mulxq	%r11,%rax,%rdx
   8197 	imulq	%r12,%r15
   8198 	addq	%rax,%r14
   8199 	adcq	%rdx,%r15
   8200 	vpaddd	%ymm4,%ymm0,%ymm0
   8201 	vpxor	%ymm3,%ymm15,%ymm15
   8202 	vpxor	%ymm2,%ymm14,%ymm14
   8203 	vpxor	%ymm1,%ymm13,%ymm13
   8204 	vpxor	%ymm0,%ymm12,%ymm12
   8205 	vpshufb	%ymm8,%ymm15,%ymm15
   8206 	vpshufb	%ymm8,%ymm14,%ymm14
   8207 	vpshufb	%ymm8,%ymm13,%ymm13
   8208 	vpshufb	%ymm8,%ymm12,%ymm12
   8209 	vmovdqa	128(%rbp),%ymm8
   8210 	vpaddd	%ymm15,%ymm11,%ymm11
   8211 	vpaddd	%ymm14,%ymm10,%ymm10
   8212 	vpaddd	%ymm13,%ymm9,%ymm9
   8213 	vpaddd	%ymm12,%ymm8,%ymm8
   8214 	vpxor	%ymm11,%ymm7,%ymm7
   8215 	vpxor	%ymm10,%ymm6,%ymm6
   8216 	vpxor	%ymm9,%ymm5,%ymm5
   8217 	vpxor	%ymm8,%ymm4,%ymm4
   8218 	vmovdqa	%ymm8,128(%rbp)
   8219 	vpsrld	$25,%ymm7,%ymm8
   8220 	movq	8+0(%rbp),%rdx
   8221 	mulxq	%r10,%r10,%rax
   8222 	addq	%r10,%r14
   8223 	mulxq	%r11,%r11,%r9
   8224 	adcq	%r11,%r15
   8225 	adcq	$0,%r9
   8226 	imulq	%r12,%rdx
   8227 	vpslld	$32-25,%ymm7,%ymm7
   8228 	vpxor	%ymm8,%ymm7,%ymm7
   8229 	vpsrld	$25,%ymm6,%ymm8
   8230 	vpslld	$32-25,%ymm6,%ymm6
   8231 	vpxor	%ymm8,%ymm6,%ymm6
   8232 	vpsrld	$25,%ymm5,%ymm8
   8233 	vpslld	$32-25,%ymm5,%ymm5
   8234 	vpxor	%ymm8,%ymm5,%ymm5
   8235 	vpsrld	$25,%ymm4,%ymm8
   8236 	vpslld	$32-25,%ymm4,%ymm4
   8237 	vpxor	%ymm8,%ymm4,%ymm4
   8238 	vmovdqa	128(%rbp),%ymm8
   8239 	vpalignr	$4,%ymm7,%ymm7,%ymm7
   8240 	vpalignr	$8,%ymm11,%ymm11,%ymm11
   8241 	vpalignr	$12,%ymm15,%ymm15,%ymm15
   8242 	vpalignr	$4,%ymm6,%ymm6,%ymm6
   8243 	vpalignr	$8,%ymm10,%ymm10,%ymm10
   8244 	vpalignr	$12,%ymm14,%ymm14,%ymm14
   8245 	vpalignr	$4,%ymm5,%ymm5,%ymm5
   8246 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   8247 	addq	%rax,%r15
   8248 	adcq	%rdx,%r9
   8249 	vpalignr	$12,%ymm13,%ymm13,%ymm13
   8250 	vpalignr	$4,%ymm4,%ymm4,%ymm4
   8251 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   8252 	vpalignr	$12,%ymm12,%ymm12,%ymm12
   8253 	vmovdqa	%ymm8,128(%rbp)
   8254 	vmovdqa	.rol16(%rip),%ymm8
   8255 	vpaddd	%ymm7,%ymm3,%ymm3
   8256 	vpaddd	%ymm6,%ymm2,%ymm2
   8257 	vpaddd	%ymm5,%ymm1,%ymm1
   8258 	vpaddd	%ymm4,%ymm0,%ymm0
   8259 	vpxor	%ymm3,%ymm15,%ymm15
   8260 	vpxor	%ymm2,%ymm14,%ymm14
   8261 	vpxor	%ymm1,%ymm13,%ymm13
   8262 	vpxor	%ymm0,%ymm12,%ymm12
   8263 	vpshufb	%ymm8,%ymm15,%ymm15
   8264 	vpshufb	%ymm8,%ymm14,%ymm14
   8265 	vpshufb	%ymm8,%ymm13,%ymm13
   8266 	vpshufb	%ymm8,%ymm12,%ymm12
   8267 	vmovdqa	128(%rbp),%ymm8
   8268 	vpaddd	%ymm15,%ymm11,%ymm11
   8269 	movq	%r13,%r10
   8270 	movq	%r14,%r11
   8271 	movq	%r15,%r12
   8272 	andq	$3,%r12
   8273 	movq	%r15,%r13
   8274 	andq	$-4,%r13
   8275 	movq	%r9,%r14
   8276 	shrdq	$2,%r9,%r15
   8277 	shrq	$2,%r9
   8278 	addq	%r13,%r10
   8279 	adcq	%r14,%r11
   8280 	adcq	$0,%r12
   8281 	addq	%r15,%r10
   8282 	adcq	%r9,%r11
   8283 	adcq	$0,%r12
   8284 	vpaddd	%ymm14,%ymm10,%ymm10
   8285 	vpaddd	%ymm13,%ymm9,%ymm9
   8286 	vpaddd	%ymm12,%ymm8,%ymm8
   8287 	vpxor	%ymm11,%ymm7,%ymm7
   8288 	vpxor	%ymm10,%ymm6,%ymm6
   8289 	vpxor	%ymm9,%ymm5,%ymm5
   8290 	vpxor	%ymm8,%ymm4,%ymm4
   8291 	vmovdqa	%ymm8,128(%rbp)
   8292 	vpsrld	$20,%ymm7,%ymm8
   8293 	vpslld	$32-20,%ymm7,%ymm7
   8294 	vpxor	%ymm8,%ymm7,%ymm7
   8295 	vpsrld	$20,%ymm6,%ymm8
   8296 	vpslld	$32-20,%ymm6,%ymm6
   8297 	vpxor	%ymm8,%ymm6,%ymm6
   8298 	vpsrld	$20,%ymm5,%ymm8
   8299 	vpslld	$32-20,%ymm5,%ymm5
   8300 	vpxor	%ymm8,%ymm5,%ymm5
   8301 	vpsrld	$20,%ymm4,%ymm8
   8302 	vpslld	$32-20,%ymm4,%ymm4
   8303 	vpxor	%ymm8,%ymm4,%ymm4
   8304 	addq	16(%rdi),%r10
   8305 	adcq	8+16(%rdi),%r11
   8306 	adcq	$1,%r12
   8307 	vmovdqa	.rol8(%rip),%ymm8
   8308 	vpaddd	%ymm7,%ymm3,%ymm3
   8309 	vpaddd	%ymm6,%ymm2,%ymm2
   8310 	vpaddd	%ymm5,%ymm1,%ymm1
   8311 	vpaddd	%ymm4,%ymm0,%ymm0
   8312 	vpxor	%ymm3,%ymm15,%ymm15
   8313 	vpxor	%ymm2,%ymm14,%ymm14
   8314 	vpxor	%ymm1,%ymm13,%ymm13
   8315 	vpxor	%ymm0,%ymm12,%ymm12
   8316 	vpshufb	%ymm8,%ymm15,%ymm15
   8317 	vpshufb	%ymm8,%ymm14,%ymm14
   8318 	vpshufb	%ymm8,%ymm13,%ymm13
   8319 	vpshufb	%ymm8,%ymm12,%ymm12
   8320 	vmovdqa	128(%rbp),%ymm8
   8321 	vpaddd	%ymm15,%ymm11,%ymm11
   8322 	vpaddd	%ymm14,%ymm10,%ymm10
   8323 	vpaddd	%ymm13,%ymm9,%ymm9
   8324 	vpaddd	%ymm12,%ymm8,%ymm8
   8325 	vpxor	%ymm11,%ymm7,%ymm7
   8326 	vpxor	%ymm10,%ymm6,%ymm6
   8327 	movq	0+0(%rbp),%rdx
   8328 	movq	%rdx,%r15
   8329 	mulxq	%r10,%r13,%r14
   8330 	mulxq	%r11,%rax,%rdx
   8331 	imulq	%r12,%r15
   8332 	addq	%rax,%r14
   8333 	adcq	%rdx,%r15
   8334 	vpxor	%ymm9,%ymm5,%ymm5
   8335 	vpxor	%ymm8,%ymm4,%ymm4
   8336 	vmovdqa	%ymm8,128(%rbp)
   8337 	vpsrld	$25,%ymm7,%ymm8
   8338 	vpslld	$32-25,%ymm7,%ymm7
   8339 	vpxor	%ymm8,%ymm7,%ymm7
   8340 	vpsrld	$25,%ymm6,%ymm8
   8341 	vpslld	$32-25,%ymm6,%ymm6
   8342 	vpxor	%ymm8,%ymm6,%ymm6
   8343 	vpsrld	$25,%ymm5,%ymm8
   8344 	vpslld	$32-25,%ymm5,%ymm5
   8345 	vpxor	%ymm8,%ymm5,%ymm5
   8346 	vpsrld	$25,%ymm4,%ymm8
   8347 	vpslld	$32-25,%ymm4,%ymm4
   8348 	vpxor	%ymm8,%ymm4,%ymm4
   8349 	vmovdqa	128(%rbp),%ymm8
   8350 	vpalignr	$12,%ymm7,%ymm7,%ymm7
   8351 	vpalignr	$8,%ymm11,%ymm11,%ymm11
   8352 	vpalignr	$4,%ymm15,%ymm15,%ymm15
   8353 	vpalignr	$12,%ymm6,%ymm6,%ymm6
   8354 	movq	8+0(%rbp),%rdx
   8355 	mulxq	%r10,%r10,%rax
   8356 	addq	%r10,%r14
   8357 	mulxq	%r11,%r11,%r9
   8358 	adcq	%r11,%r15
   8359 	adcq	$0,%r9
   8360 	imulq	%r12,%rdx
   8361 	vpalignr	$8,%ymm10,%ymm10,%ymm10
   8362 	vpalignr	$4,%ymm14,%ymm14,%ymm14
   8363 	vpalignr	$12,%ymm5,%ymm5,%ymm5
   8364 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   8365 	vpalignr	$4,%ymm13,%ymm13,%ymm13
   8366 	vpalignr	$12,%ymm4,%ymm4,%ymm4
   8367 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   8368 	vpalignr	$4,%ymm12,%ymm12,%ymm12
   8369 
   8370 
   8371 
   8372 
   8373 
   8374 
   8375 
   8376 
   8377 
   8378 
   8379 
   8380 
   8381 	addq	%rax,%r15
   8382 	adcq	%rdx,%r9
   8383 
   8384 
   8385 
   8386 
   8387 
   8388 
   8389 
   8390 
   8391 
   8392 
   8393 
   8394 
   8395 
   8396 
   8397 
   8398 
   8399 
   8400 
   8401 
   8402 
   8403 	movq	%r13,%r10
   8404 	movq	%r14,%r11
   8405 	movq	%r15,%r12
   8406 	andq	$3,%r12
   8407 	movq	%r15,%r13
   8408 	andq	$-4,%r13
   8409 	movq	%r9,%r14
   8410 	shrdq	$2,%r9,%r15
   8411 	shrq	$2,%r9
   8412 	addq	%r13,%r10
   8413 	adcq	%r14,%r11
   8414 	adcq	$0,%r12
   8415 	addq	%r15,%r10
   8416 	adcq	%r9,%r11
   8417 	adcq	$0,%r12
   8418 
   8419 	leaq	32(%rdi),%rdi
   8420 	decq	%rcx
   8421 	jg	1b
   8422 	decq	%r8
   8423 	jge	2b
   8424 	vpaddd	.chacha20_consts(%rip),%ymm3,%ymm3
   8425 	vpaddd	64(%rbp),%ymm7,%ymm7
   8426 	vpaddd	96(%rbp),%ymm11,%ymm11
   8427 	vpaddd	256(%rbp),%ymm15,%ymm15
   8428 	vpaddd	.chacha20_consts(%rip),%ymm2,%ymm2
   8429 	vpaddd	64(%rbp),%ymm6,%ymm6
   8430 	vpaddd	96(%rbp),%ymm10,%ymm10
   8431 	vpaddd	224(%rbp),%ymm14,%ymm14
   8432 	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
   8433 	vpaddd	64(%rbp),%ymm5,%ymm5
   8434 	vpaddd	96(%rbp),%ymm9,%ymm9
   8435 	vpaddd	192(%rbp),%ymm13,%ymm13
   8436 	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
   8437 	vpaddd	64(%rbp),%ymm4,%ymm4
   8438 	vpaddd	96(%rbp),%ymm8,%ymm8
   8439 	vpaddd	160(%rbp),%ymm12,%ymm12
   8440 
   8441 	vmovdqa	%ymm0,128(%rbp)
   8442 	vperm2i128	$0x02,%ymm3,%ymm7,%ymm0
   8443 	vperm2i128	$0x13,%ymm3,%ymm7,%ymm7
   8444 	vperm2i128	$0x02,%ymm11,%ymm15,%ymm3
   8445 	vperm2i128	$0x13,%ymm11,%ymm15,%ymm11
   8446 	vpxor	0+0(%rsi),%ymm0,%ymm0
   8447 	vpxor	32+0(%rsi),%ymm3,%ymm3
   8448 	vpxor	64+0(%rsi),%ymm7,%ymm7
   8449 	vpxor	96+0(%rsi),%ymm11,%ymm11
   8450 	vmovdqu	%ymm0,0+0(%rdi)
   8451 	vmovdqu	%ymm3,32+0(%rdi)
   8452 	vmovdqu	%ymm7,64+0(%rdi)
   8453 	vmovdqu	%ymm11,96+0(%rdi)
   8454 
   8455 	vmovdqa	128(%rbp),%ymm0
   8456 	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
   8457 	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
   8458 	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
   8459 	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
   8460 	vpxor	0+128(%rsi),%ymm3,%ymm3
   8461 	vpxor	32+128(%rsi),%ymm2,%ymm2
   8462 	vpxor	64+128(%rsi),%ymm6,%ymm6
   8463 	vpxor	96+128(%rsi),%ymm10,%ymm10
   8464 	vmovdqu	%ymm3,0+128(%rdi)
   8465 	vmovdqu	%ymm2,32+128(%rdi)
   8466 	vmovdqu	%ymm6,64+128(%rdi)
   8467 	vmovdqu	%ymm10,96+128(%rdi)
   8468 	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
   8469 	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
   8470 	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
   8471 	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
   8472 	vpxor	0+256(%rsi),%ymm3,%ymm3
   8473 	vpxor	32+256(%rsi),%ymm1,%ymm1
   8474 	vpxor	64+256(%rsi),%ymm5,%ymm5
   8475 	vpxor	96+256(%rsi),%ymm9,%ymm9
   8476 	vmovdqu	%ymm3,0+256(%rdi)
   8477 	vmovdqu	%ymm1,32+256(%rdi)
   8478 	vmovdqu	%ymm5,64+256(%rdi)
   8479 	vmovdqu	%ymm9,96+256(%rdi)
   8480 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
   8481 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
   8482 	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
   8483 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
   8484 	vmovdqa	%ymm3,%ymm8
   8485 
   8486 	movq	$384,%rcx
   8487 	leaq	384(%rsi),%rsi
   8488 	subq	$384,%rbx
   8489 	jmp	seal_avx2_hash
   8490 
   8491 seal_avx2_320:
   8492 	vmovdqa	%ymm0,%ymm1
   8493 	vmovdqa	%ymm0,%ymm2
   8494 	vmovdqa	%ymm4,%ymm5
   8495 	vmovdqa	%ymm4,%ymm6
   8496 	vmovdqa	%ymm8,%ymm9
   8497 	vmovdqa	%ymm8,%ymm10
   8498 	vpaddd	.avx2_inc(%rip),%ymm12,%ymm13
   8499 	vpaddd	.avx2_inc(%rip),%ymm13,%ymm14
   8500 	vmovdqa	%ymm4,%ymm7
   8501 	vmovdqa	%ymm8,%ymm11
   8502 	vmovdqa	%ymm12,160(%rbp)
   8503 	vmovdqa	%ymm13,192(%rbp)
   8504 	vmovdqa	%ymm14,224(%rbp)
   8505 	movq	$10,%r10
   8506 1:
   8507 	vpaddd	%ymm4,%ymm0,%ymm0
   8508 	vpxor	%ymm0,%ymm12,%ymm12
   8509 	vpshufb	.rol16(%rip),%ymm12,%ymm12
   8510 	vpaddd	%ymm12,%ymm8,%ymm8
   8511 	vpxor	%ymm8,%ymm4,%ymm4
   8512 	vpsrld	$20,%ymm4,%ymm3
   8513 	vpslld	$12,%ymm4,%ymm4
   8514 	vpxor	%ymm3,%ymm4,%ymm4
   8515 	vpaddd	%ymm4,%ymm0,%ymm0
   8516 	vpxor	%ymm0,%ymm12,%ymm12
   8517 	vpshufb	.rol8(%rip),%ymm12,%ymm12
   8518 	vpaddd	%ymm12,%ymm8,%ymm8
   8519 	vpxor	%ymm8,%ymm4,%ymm4
   8520 	vpslld	$7,%ymm4,%ymm3
   8521 	vpsrld	$25,%ymm4,%ymm4
   8522 	vpxor	%ymm3,%ymm4,%ymm4
   8523 	vpalignr	$12,%ymm12,%ymm12,%ymm12
   8524 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   8525 	vpalignr	$4,%ymm4,%ymm4,%ymm4
   8526 	vpaddd	%ymm5,%ymm1,%ymm1
   8527 	vpxor	%ymm1,%ymm13,%ymm13
   8528 	vpshufb	.rol16(%rip),%ymm13,%ymm13
   8529 	vpaddd	%ymm13,%ymm9,%ymm9
   8530 	vpxor	%ymm9,%ymm5,%ymm5
   8531 	vpsrld	$20,%ymm5,%ymm3
   8532 	vpslld	$12,%ymm5,%ymm5
   8533 	vpxor	%ymm3,%ymm5,%ymm5
   8534 	vpaddd	%ymm5,%ymm1,%ymm1
   8535 	vpxor	%ymm1,%ymm13,%ymm13
   8536 	vpshufb	.rol8(%rip),%ymm13,%ymm13
   8537 	vpaddd	%ymm13,%ymm9,%ymm9
   8538 	vpxor	%ymm9,%ymm5,%ymm5
   8539 	vpslld	$7,%ymm5,%ymm3
   8540 	vpsrld	$25,%ymm5,%ymm5
   8541 	vpxor	%ymm3,%ymm5,%ymm5
   8542 	vpalignr	$12,%ymm13,%ymm13,%ymm13
   8543 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   8544 	vpalignr	$4,%ymm5,%ymm5,%ymm5
   8545 	vpaddd	%ymm6,%ymm2,%ymm2
   8546 	vpxor	%ymm2,%ymm14,%ymm14
   8547 	vpshufb	.rol16(%rip),%ymm14,%ymm14
   8548 	vpaddd	%ymm14,%ymm10,%ymm10
   8549 	vpxor	%ymm10,%ymm6,%ymm6
   8550 	vpsrld	$20,%ymm6,%ymm3
   8551 	vpslld	$12,%ymm6,%ymm6
   8552 	vpxor	%ymm3,%ymm6,%ymm6
   8553 	vpaddd	%ymm6,%ymm2,%ymm2
   8554 	vpxor	%ymm2,%ymm14,%ymm14
   8555 	vpshufb	.rol8(%rip),%ymm14,%ymm14
   8556 	vpaddd	%ymm14,%ymm10,%ymm10
   8557 	vpxor	%ymm10,%ymm6,%ymm6
   8558 	vpslld	$7,%ymm6,%ymm3
   8559 	vpsrld	$25,%ymm6,%ymm6
   8560 	vpxor	%ymm3,%ymm6,%ymm6
   8561 	vpalignr	$12,%ymm14,%ymm14,%ymm14
   8562 	vpalignr	$8,%ymm10,%ymm10,%ymm10
   8563 	vpalignr	$4,%ymm6,%ymm6,%ymm6
   8564 	vpaddd	%ymm4,%ymm0,%ymm0
   8565 	vpxor	%ymm0,%ymm12,%ymm12
   8566 	vpshufb	.rol16(%rip),%ymm12,%ymm12
   8567 	vpaddd	%ymm12,%ymm8,%ymm8
   8568 	vpxor	%ymm8,%ymm4,%ymm4
   8569 	vpsrld	$20,%ymm4,%ymm3
   8570 	vpslld	$12,%ymm4,%ymm4
   8571 	vpxor	%ymm3,%ymm4,%ymm4
   8572 	vpaddd	%ymm4,%ymm0,%ymm0
   8573 	vpxor	%ymm0,%ymm12,%ymm12
   8574 	vpshufb	.rol8(%rip),%ymm12,%ymm12
   8575 	vpaddd	%ymm12,%ymm8,%ymm8
   8576 	vpxor	%ymm8,%ymm4,%ymm4
   8577 	vpslld	$7,%ymm4,%ymm3
   8578 	vpsrld	$25,%ymm4,%ymm4
   8579 	vpxor	%ymm3,%ymm4,%ymm4
   8580 	vpalignr	$4,%ymm12,%ymm12,%ymm12
   8581 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   8582 	vpalignr	$12,%ymm4,%ymm4,%ymm4
   8583 	vpaddd	%ymm5,%ymm1,%ymm1
   8584 	vpxor	%ymm1,%ymm13,%ymm13
   8585 	vpshufb	.rol16(%rip),%ymm13,%ymm13
   8586 	vpaddd	%ymm13,%ymm9,%ymm9
   8587 	vpxor	%ymm9,%ymm5,%ymm5
   8588 	vpsrld	$20,%ymm5,%ymm3
   8589 	vpslld	$12,%ymm5,%ymm5
   8590 	vpxor	%ymm3,%ymm5,%ymm5
   8591 	vpaddd	%ymm5,%ymm1,%ymm1
   8592 	vpxor	%ymm1,%ymm13,%ymm13
   8593 	vpshufb	.rol8(%rip),%ymm13,%ymm13
   8594 	vpaddd	%ymm13,%ymm9,%ymm9
   8595 	vpxor	%ymm9,%ymm5,%ymm5
   8596 	vpslld	$7,%ymm5,%ymm3
   8597 	vpsrld	$25,%ymm5,%ymm5
   8598 	vpxor	%ymm3,%ymm5,%ymm5
   8599 	vpalignr	$4,%ymm13,%ymm13,%ymm13
   8600 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   8601 	vpalignr	$12,%ymm5,%ymm5,%ymm5
   8602 	vpaddd	%ymm6,%ymm2,%ymm2
   8603 	vpxor	%ymm2,%ymm14,%ymm14
   8604 	vpshufb	.rol16(%rip),%ymm14,%ymm14
   8605 	vpaddd	%ymm14,%ymm10,%ymm10
   8606 	vpxor	%ymm10,%ymm6,%ymm6
   8607 	vpsrld	$20,%ymm6,%ymm3
   8608 	vpslld	$12,%ymm6,%ymm6
   8609 	vpxor	%ymm3,%ymm6,%ymm6
   8610 	vpaddd	%ymm6,%ymm2,%ymm2
   8611 	vpxor	%ymm2,%ymm14,%ymm14
   8612 	vpshufb	.rol8(%rip),%ymm14,%ymm14
   8613 	vpaddd	%ymm14,%ymm10,%ymm10
   8614 	vpxor	%ymm10,%ymm6,%ymm6
   8615 	vpslld	$7,%ymm6,%ymm3
   8616 	vpsrld	$25,%ymm6,%ymm6
   8617 	vpxor	%ymm3,%ymm6,%ymm6
   8618 	vpalignr	$4,%ymm14,%ymm14,%ymm14
   8619 	vpalignr	$8,%ymm10,%ymm10,%ymm10
   8620 	vpalignr	$12,%ymm6,%ymm6,%ymm6
   8621 
   8622 	decq	%r10
   8623 	jne	1b
   8624 	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
   8625 	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
   8626 	vpaddd	.chacha20_consts(%rip),%ymm2,%ymm2
   8627 	vpaddd	%ymm7,%ymm4,%ymm4
   8628 	vpaddd	%ymm7,%ymm5,%ymm5
   8629 	vpaddd	%ymm7,%ymm6,%ymm6
   8630 	vpaddd	%ymm11,%ymm8,%ymm8
   8631 	vpaddd	%ymm11,%ymm9,%ymm9
   8632 	vpaddd	%ymm11,%ymm10,%ymm10
   8633 	vpaddd	160(%rbp),%ymm12,%ymm12
   8634 	vpaddd	192(%rbp),%ymm13,%ymm13
   8635 	vpaddd	224(%rbp),%ymm14,%ymm14
   8636 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
   8637 
   8638 	vpand	.clamp(%rip),%ymm3,%ymm3
   8639 	vmovdqa	%ymm3,0(%rbp)
   8640 
   8641 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0
   8642 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4
   8643 	vperm2i128	$0x02,%ymm1,%ymm5,%ymm8
   8644 	vperm2i128	$0x02,%ymm9,%ymm13,%ymm12
   8645 	vperm2i128	$0x13,%ymm1,%ymm5,%ymm1
   8646 	vperm2i128	$0x13,%ymm9,%ymm13,%ymm5
   8647 	vperm2i128	$0x02,%ymm2,%ymm6,%ymm9
   8648 	vperm2i128	$0x02,%ymm10,%ymm14,%ymm13
   8649 	vperm2i128	$0x13,%ymm2,%ymm6,%ymm2
   8650 	vperm2i128	$0x13,%ymm10,%ymm14,%ymm6
   8651 	jmp	seal_avx2_short
   8652 
   8653 seal_avx2_192:
   8654 	vmovdqa	%ymm0,%ymm1
   8655 	vmovdqa	%ymm0,%ymm2
   8656 	vmovdqa	%ymm4,%ymm5
   8657 	vmovdqa	%ymm4,%ymm6
   8658 	vmovdqa	%ymm8,%ymm9
   8659 	vmovdqa	%ymm8,%ymm10
   8660 	vpaddd	.avx2_inc(%rip),%ymm12,%ymm13
   8661 	vmovdqa	%ymm12,%ymm11
   8662 	vmovdqa	%ymm13,%ymm15
   8663 	movq	$10,%r10
   8664 1:
   8665 	vpaddd	%ymm4,%ymm0,%ymm0
   8666 	vpxor	%ymm0,%ymm12,%ymm12
   8667 	vpshufb	.rol16(%rip),%ymm12,%ymm12
   8668 	vpaddd	%ymm12,%ymm8,%ymm8
   8669 	vpxor	%ymm8,%ymm4,%ymm4
   8670 	vpsrld	$20,%ymm4,%ymm3
   8671 	vpslld	$12,%ymm4,%ymm4
   8672 	vpxor	%ymm3,%ymm4,%ymm4
   8673 	vpaddd	%ymm4,%ymm0,%ymm0
   8674 	vpxor	%ymm0,%ymm12,%ymm12
   8675 	vpshufb	.rol8(%rip),%ymm12,%ymm12
   8676 	vpaddd	%ymm12,%ymm8,%ymm8
   8677 	vpxor	%ymm8,%ymm4,%ymm4
   8678 	vpslld	$7,%ymm4,%ymm3
   8679 	vpsrld	$25,%ymm4,%ymm4
   8680 	vpxor	%ymm3,%ymm4,%ymm4
   8681 	vpalignr	$12,%ymm12,%ymm12,%ymm12
   8682 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   8683 	vpalignr	$4,%ymm4,%ymm4,%ymm4
   8684 	vpaddd	%ymm5,%ymm1,%ymm1
   8685 	vpxor	%ymm1,%ymm13,%ymm13
   8686 	vpshufb	.rol16(%rip),%ymm13,%ymm13
   8687 	vpaddd	%ymm13,%ymm9,%ymm9
   8688 	vpxor	%ymm9,%ymm5,%ymm5
   8689 	vpsrld	$20,%ymm5,%ymm3
   8690 	vpslld	$12,%ymm5,%ymm5
   8691 	vpxor	%ymm3,%ymm5,%ymm5
   8692 	vpaddd	%ymm5,%ymm1,%ymm1
   8693 	vpxor	%ymm1,%ymm13,%ymm13
   8694 	vpshufb	.rol8(%rip),%ymm13,%ymm13
   8695 	vpaddd	%ymm13,%ymm9,%ymm9
   8696 	vpxor	%ymm9,%ymm5,%ymm5
   8697 	vpslld	$7,%ymm5,%ymm3
   8698 	vpsrld	$25,%ymm5,%ymm5
   8699 	vpxor	%ymm3,%ymm5,%ymm5
   8700 	vpalignr	$12,%ymm13,%ymm13,%ymm13
   8701 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   8702 	vpalignr	$4,%ymm5,%ymm5,%ymm5
   8703 	vpaddd	%ymm4,%ymm0,%ymm0
   8704 	vpxor	%ymm0,%ymm12,%ymm12
   8705 	vpshufb	.rol16(%rip),%ymm12,%ymm12
   8706 	vpaddd	%ymm12,%ymm8,%ymm8
   8707 	vpxor	%ymm8,%ymm4,%ymm4
   8708 	vpsrld	$20,%ymm4,%ymm3
   8709 	vpslld	$12,%ymm4,%ymm4
   8710 	vpxor	%ymm3,%ymm4,%ymm4
   8711 	vpaddd	%ymm4,%ymm0,%ymm0
   8712 	vpxor	%ymm0,%ymm12,%ymm12
   8713 	vpshufb	.rol8(%rip),%ymm12,%ymm12
   8714 	vpaddd	%ymm12,%ymm8,%ymm8
   8715 	vpxor	%ymm8,%ymm4,%ymm4
   8716 	vpslld	$7,%ymm4,%ymm3
   8717 	vpsrld	$25,%ymm4,%ymm4
   8718 	vpxor	%ymm3,%ymm4,%ymm4
   8719 	vpalignr	$4,%ymm12,%ymm12,%ymm12
   8720 	vpalignr	$8,%ymm8,%ymm8,%ymm8
   8721 	vpalignr	$12,%ymm4,%ymm4,%ymm4
   8722 	vpaddd	%ymm5,%ymm1,%ymm1
   8723 	vpxor	%ymm1,%ymm13,%ymm13
   8724 	vpshufb	.rol16(%rip),%ymm13,%ymm13
   8725 	vpaddd	%ymm13,%ymm9,%ymm9
   8726 	vpxor	%ymm9,%ymm5,%ymm5
   8727 	vpsrld	$20,%ymm5,%ymm3
   8728 	vpslld	$12,%ymm5,%ymm5
   8729 	vpxor	%ymm3,%ymm5,%ymm5
   8730 	vpaddd	%ymm5,%ymm1,%ymm1
   8731 	vpxor	%ymm1,%ymm13,%ymm13
   8732 	vpshufb	.rol8(%rip),%ymm13,%ymm13
   8733 	vpaddd	%ymm13,%ymm9,%ymm9
   8734 	vpxor	%ymm9,%ymm5,%ymm5
   8735 	vpslld	$7,%ymm5,%ymm3
   8736 	vpsrld	$25,%ymm5,%ymm5
   8737 	vpxor	%ymm3,%ymm5,%ymm5
   8738 	vpalignr	$4,%ymm13,%ymm13,%ymm13
   8739 	vpalignr	$8,%ymm9,%ymm9,%ymm9
   8740 	vpalignr	$12,%ymm5,%ymm5,%ymm5
   8741 
   8742 	decq	%r10
   8743 	jne	1b
   8744 	vpaddd	%ymm2,%ymm0,%ymm0
   8745 	vpaddd	%ymm2,%ymm1,%ymm1
   8746 	vpaddd	%ymm6,%ymm4,%ymm4
   8747 	vpaddd	%ymm6,%ymm5,%ymm5
   8748 	vpaddd	%ymm10,%ymm8,%ymm8
   8749 	vpaddd	%ymm10,%ymm9,%ymm9
   8750 	vpaddd	%ymm11,%ymm12,%ymm12
   8751 	vpaddd	%ymm15,%ymm13,%ymm13
   8752 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
   8753 
   8754 	vpand	.clamp(%rip),%ymm3,%ymm3
   8755 	vmovdqa	%ymm3,0(%rbp)
   8756 
   8757 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0
   8758 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4
   8759 	vperm2i128	$0x02,%ymm1,%ymm5,%ymm8
   8760 	vperm2i128	$0x02,%ymm9,%ymm13,%ymm12
   8761 	vperm2i128	$0x13,%ymm1,%ymm5,%ymm1
   8762 	vperm2i128	$0x13,%ymm9,%ymm13,%ymm5
   8763 seal_avx2_short:
   8764 	movq	%r8,%r8
   8765 	call	poly_hash_ad_internal
   8766 	xorq	%rcx,%rcx
   8767 seal_avx2_hash:
   8768 	cmpq	$16,%rcx
   8769 	jb	seal_avx2_short_loop
   8770 	addq	0(%rdi),%r10
   8771 	adcq	8+0(%rdi),%r11
   8772 	adcq	$1,%r12
   8773 	movq	0+0(%rbp),%rax
   8774 	movq	%rax,%r15
   8775 	mulq	%r10
   8776 	movq	%rax,%r13
   8777 	movq	%rdx,%r14
   8778 	movq	0+0(%rbp),%rax
   8779 	mulq	%r11
   8780 	imulq	%r12,%r15
   8781 	addq	%rax,%r14
   8782 	adcq	%rdx,%r15
   8783 	movq	8+0(%rbp),%rax
   8784 	movq	%rax,%r9
   8785 	mulq	%r10
   8786 	addq	%rax,%r14
   8787 	adcq	$0,%rdx
   8788 	movq	%rdx,%r10
   8789 	movq	8+0(%rbp),%rax
   8790 	mulq	%r11
   8791 	addq	%rax,%r15
   8792 	adcq	$0,%rdx
   8793 	imulq	%r12,%r9
   8794 	addq	%r10,%r15
   8795 	adcq	%rdx,%r9
   8796 	movq	%r13,%r10
   8797 	movq	%r14,%r11
   8798 	movq	%r15,%r12
   8799 	andq	$3,%r12
   8800 	movq	%r15,%r13
   8801 	andq	$-4,%r13
   8802 	movq	%r9,%r14
   8803 	shrdq	$2,%r9,%r15
   8804 	shrq	$2,%r9
   8805 	addq	%r13,%r10
   8806 	adcq	%r14,%r11
   8807 	adcq	$0,%r12
   8808 	addq	%r15,%r10
   8809 	adcq	%r9,%r11
   8810 	adcq	$0,%r12
   8811 
   8812 	subq	$16,%rcx
   8813 	addq	$16,%rdi
   8814 	jmp	seal_avx2_hash
   8815 seal_avx2_short_loop:
   8816 	cmpq	$32,%rbx
   8817 	jb	seal_avx2_short_tail
   8818 	subq	$32,%rbx
   8819 
   8820 	vpxor	(%rsi),%ymm0,%ymm0
   8821 	vmovdqu	%ymm0,(%rdi)
   8822 	leaq	32(%rsi),%rsi
   8823 
   8824 	addq	0(%rdi),%r10
   8825 	adcq	8+0(%rdi),%r11
   8826 	adcq	$1,%r12
   8827 	movq	0+0(%rbp),%rax
   8828 	movq	%rax,%r15
   8829 	mulq	%r10
   8830 	movq	%rax,%r13
   8831 	movq	%rdx,%r14
   8832 	movq	0+0(%rbp),%rax
   8833 	mulq	%r11
   8834 	imulq	%r12,%r15
   8835 	addq	%rax,%r14
   8836 	adcq	%rdx,%r15
   8837 	movq	8+0(%rbp),%rax
   8838 	movq	%rax,%r9
   8839 	mulq	%r10
   8840 	addq	%rax,%r14
   8841 	adcq	$0,%rdx
   8842 	movq	%rdx,%r10
   8843 	movq	8+0(%rbp),%rax
   8844 	mulq	%r11
   8845 	addq	%rax,%r15
   8846 	adcq	$0,%rdx
   8847 	imulq	%r12,%r9
   8848 	addq	%r10,%r15
   8849 	adcq	%rdx,%r9
   8850 	movq	%r13,%r10
   8851 	movq	%r14,%r11
   8852 	movq	%r15,%r12
   8853 	andq	$3,%r12
   8854 	movq	%r15,%r13
   8855 	andq	$-4,%r13
   8856 	movq	%r9,%r14
   8857 	shrdq	$2,%r9,%r15
   8858 	shrq	$2,%r9
   8859 	addq	%r13,%r10
   8860 	adcq	%r14,%r11
   8861 	adcq	$0,%r12
   8862 	addq	%r15,%r10
   8863 	adcq	%r9,%r11
   8864 	adcq	$0,%r12
   8865 	addq	16(%rdi),%r10
   8866 	adcq	8+16(%rdi),%r11
   8867 	adcq	$1,%r12
   8868 	movq	0+0(%rbp),%rax
   8869 	movq	%rax,%r15
   8870 	mulq	%r10
   8871 	movq	%rax,%r13
   8872 	movq	%rdx,%r14
   8873 	movq	0+0(%rbp),%rax
   8874 	mulq	%r11
   8875 	imulq	%r12,%r15
   8876 	addq	%rax,%r14
   8877 	adcq	%rdx,%r15
   8878 	movq	8+0(%rbp),%rax
   8879 	movq	%rax,%r9
   8880 	mulq	%r10
   8881 	addq	%rax,%r14
   8882 	adcq	$0,%rdx
   8883 	movq	%rdx,%r10
   8884 	movq	8+0(%rbp),%rax
   8885 	mulq	%r11
   8886 	addq	%rax,%r15
   8887 	adcq	$0,%rdx
   8888 	imulq	%r12,%r9
   8889 	addq	%r10,%r15
   8890 	adcq	%rdx,%r9
   8891 	movq	%r13,%r10
   8892 	movq	%r14,%r11
   8893 	movq	%r15,%r12
   8894 	andq	$3,%r12
   8895 	movq	%r15,%r13
   8896 	andq	$-4,%r13
   8897 	movq	%r9,%r14
   8898 	shrdq	$2,%r9,%r15
   8899 	shrq	$2,%r9
   8900 	addq	%r13,%r10
   8901 	adcq	%r14,%r11
   8902 	adcq	$0,%r12
   8903 	addq	%r15,%r10
   8904 	adcq	%r9,%r11
   8905 	adcq	$0,%r12
   8906 
   8907 	leaq	32(%rdi),%rdi
   8908 
   8909 	vmovdqa	%ymm4,%ymm0
   8910 	vmovdqa	%ymm8,%ymm4
   8911 	vmovdqa	%ymm12,%ymm8
   8912 	vmovdqa	%ymm1,%ymm12
   8913 	vmovdqa	%ymm5,%ymm1
   8914 	vmovdqa	%ymm9,%ymm5
   8915 	vmovdqa	%ymm13,%ymm9
   8916 	vmovdqa	%ymm2,%ymm13
   8917 	vmovdqa	%ymm6,%ymm2
   8918 	jmp	seal_avx2_short_loop
   8919 seal_avx2_short_tail:
   8920 	cmpq	$16,%rbx
   8921 	jb	1f
   8922 	subq	$16,%rbx
   8923 	vpxor	(%rsi),%xmm0,%xmm3
   8924 	vmovdqu	%xmm3,(%rdi)
   8925 	leaq	16(%rsi),%rsi
   8926 	addq	0(%rdi),%r10
   8927 	adcq	8+0(%rdi),%r11
   8928 	adcq	$1,%r12
   8929 	movq	0+0(%rbp),%rax
   8930 	movq	%rax,%r15
   8931 	mulq	%r10
   8932 	movq	%rax,%r13
   8933 	movq	%rdx,%r14
   8934 	movq	0+0(%rbp),%rax
   8935 	mulq	%r11
   8936 	imulq	%r12,%r15
   8937 	addq	%rax,%r14
   8938 	adcq	%rdx,%r15
   8939 	movq	8+0(%rbp),%rax
   8940 	movq	%rax,%r9
   8941 	mulq	%r10
   8942 	addq	%rax,%r14
   8943 	adcq	$0,%rdx
   8944 	movq	%rdx,%r10
   8945 	movq	8+0(%rbp),%rax
   8946 	mulq	%r11
   8947 	addq	%rax,%r15
   8948 	adcq	$0,%rdx
   8949 	imulq	%r12,%r9
   8950 	addq	%r10,%r15
   8951 	adcq	%rdx,%r9
   8952 	movq	%r13,%r10
   8953 	movq	%r14,%r11
   8954 	movq	%r15,%r12
   8955 	andq	$3,%r12
   8956 	movq	%r15,%r13
   8957 	andq	$-4,%r13
   8958 	movq	%r9,%r14
   8959 	shrdq	$2,%r9,%r15
   8960 	shrq	$2,%r9
   8961 	addq	%r13,%r10
   8962 	adcq	%r14,%r11
   8963 	adcq	$0,%r12
   8964 	addq	%r15,%r10
   8965 	adcq	%r9,%r11
   8966 	adcq	$0,%r12
   8967 
   8968 	leaq	16(%rdi),%rdi
   8969 	vextracti128	$1,%ymm0,%xmm0
   8970 1:
   8971 	vzeroupper
   8972 	jmp	seal_sse_tail_16
   8973 .cfi_endproc
   8974 #endif
   8975