1 #if defined(__x86_64__) 2 .text 3 .extern OPENSSL_ia32cap_P 4 .hidden OPENSSL_ia32cap_P 5 6 chacha20_poly1305_constants: 7 8 .align 64 9 .chacha20_consts: 10 .byte 'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k' 11 .byte 'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k' 12 .rol8: 13 .byte 3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14 14 .byte 3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14 15 .rol16: 16 .byte 2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13 17 .byte 2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13 18 .avx2_init: 19 .long 0,0,0,0 20 .sse_inc: 21 .long 1,0,0,0 22 .avx2_inc: 23 .long 2,0,0,0,2,0,0,0 24 .clamp: 25 .quad 0x0FFFFFFC0FFFFFFF, 0x0FFFFFFC0FFFFFFC 26 .quad 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF 27 .align 16 28 .and_masks: 29 .byte 0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 30 .byte 0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 31 .byte 0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 32 .byte 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 33 .byte 0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 34 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 35 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 36 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 37 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00 38 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00 39 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00 40 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00 41 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00 42 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00 43 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00 44 45 .type poly_hash_ad_internal,@function 46 .align 64 47 poly_hash_ad_internal: 48 .cfi_startproc 49 xorq %r10,%r10 50 xorq %r11,%r11 51 xorq %r12,%r12 52 cmpq $13,%r8 53 jne hash_ad_loop 54 poly_fast_tls_ad: 55 56 movq (%rcx),%r10 57 movq 5(%rcx),%r11 58 shrq $24,%r11 59 movq $1,%r12 60 movq 0+0(%rbp),%rax 61 movq %rax,%r15 62 mulq %r10 63 movq %rax,%r13 64 movq %rdx,%r14 65 movq 0+0(%rbp),%rax 66 mulq %r11 67 imulq %r12,%r15 68 addq %rax,%r14 69 adcq %rdx,%r15 70 movq 8+0(%rbp),%rax 71 movq %rax,%r9 72 mulq %r10 73 addq %rax,%r14 74 adcq $0,%rdx 75 movq %rdx,%r10 76 movq 8+0(%rbp),%rax 77 mulq %r11 78 addq %rax,%r15 79 adcq $0,%rdx 80 imulq %r12,%r9 81 addq %r10,%r15 82 adcq %rdx,%r9 83 movq %r13,%r10 84 movq %r14,%r11 85 movq %r15,%r12 86 andq $3,%r12 87 movq %r15,%r13 88 andq $-4,%r13 89 movq %r9,%r14 90 shrdq $2,%r9,%r15 91 shrq $2,%r9 92 addq %r13,%r10 93 adcq %r14,%r11 94 adcq $0,%r12 95 addq %r15,%r10 96 adcq %r9,%r11 97 adcq $0,%r12 98 99 .byte 0xf3,0xc3 100 hash_ad_loop: 101 102 cmpq $16,%r8 103 jb hash_ad_tail 104 addq 0(%rcx),%r10 105 adcq 8+0(%rcx),%r11 106 adcq $1,%r12 107 movq 0+0(%rbp),%rax 108 movq %rax,%r15 109 mulq %r10 110 movq %rax,%r13 111 movq %rdx,%r14 112 movq 0+0(%rbp),%rax 113 mulq %r11 114 imulq %r12,%r15 115 addq %rax,%r14 116 adcq %rdx,%r15 117 movq 8+0(%rbp),%rax 118 movq %rax,%r9 119 mulq %r10 120 addq %rax,%r14 121 adcq $0,%rdx 122 movq %rdx,%r10 123 movq 8+0(%rbp),%rax 124 mulq %r11 125 addq %rax,%r15 126 adcq $0,%rdx 127 imulq %r12,%r9 128 addq %r10,%r15 129 adcq %rdx,%r9 130 movq %r13,%r10 131 movq %r14,%r11 132 movq %r15,%r12 133 andq $3,%r12 134 movq %r15,%r13 135 andq $-4,%r13 136 movq %r9,%r14 137 shrdq $2,%r9,%r15 138 shrq $2,%r9 139 addq %r13,%r10 140 adcq %r14,%r11 141 adcq $0,%r12 142 addq %r15,%r10 143 adcq %r9,%r11 144 adcq $0,%r12 145 146 leaq 16(%rcx),%rcx 147 subq $16,%r8 148 jmp hash_ad_loop 149 hash_ad_tail: 150 cmpq $0,%r8 151 je 1f 152 153 xorq %r13,%r13 154 xorq %r14,%r14 155 xorq %r15,%r15 156 addq %r8,%rcx 157 hash_ad_tail_loop: 158 shldq $8,%r13,%r14 159 shlq $8,%r13 160 movzbq -1(%rcx),%r15 161 xorq %r15,%r13 162 decq %rcx 163 decq %r8 164 jne hash_ad_tail_loop 165 166 addq %r13,%r10 167 adcq %r14,%r11 168 adcq $1,%r12 169 movq 0+0(%rbp),%rax 170 movq %rax,%r15 171 mulq %r10 172 movq %rax,%r13 173 movq %rdx,%r14 174 movq 0+0(%rbp),%rax 175 mulq %r11 176 imulq %r12,%r15 177 addq %rax,%r14 178 adcq %rdx,%r15 179 movq 8+0(%rbp),%rax 180 movq %rax,%r9 181 mulq %r10 182 addq %rax,%r14 183 adcq $0,%rdx 184 movq %rdx,%r10 185 movq 8+0(%rbp),%rax 186 mulq %r11 187 addq %rax,%r15 188 adcq $0,%rdx 189 imulq %r12,%r9 190 addq %r10,%r15 191 adcq %rdx,%r9 192 movq %r13,%r10 193 movq %r14,%r11 194 movq %r15,%r12 195 andq $3,%r12 196 movq %r15,%r13 197 andq $-4,%r13 198 movq %r9,%r14 199 shrdq $2,%r9,%r15 200 shrq $2,%r9 201 addq %r13,%r10 202 adcq %r14,%r11 203 adcq $0,%r12 204 addq %r15,%r10 205 adcq %r9,%r11 206 adcq $0,%r12 207 208 209 1: 210 .byte 0xf3,0xc3 211 .cfi_endproc 212 .size poly_hash_ad_internal, .-poly_hash_ad_internal 213 214 .globl chacha20_poly1305_open 215 .hidden chacha20_poly1305_open 216 .type chacha20_poly1305_open,@function 217 .align 64 218 chacha20_poly1305_open: 219 .cfi_startproc 220 pushq %rbp 221 .cfi_adjust_cfa_offset 8 222 pushq %rbx 223 .cfi_adjust_cfa_offset 8 224 pushq %r12 225 .cfi_adjust_cfa_offset 8 226 pushq %r13 227 .cfi_adjust_cfa_offset 8 228 pushq %r14 229 .cfi_adjust_cfa_offset 8 230 pushq %r15 231 .cfi_adjust_cfa_offset 8 232 233 234 pushq %r9 235 .cfi_adjust_cfa_offset 8 236 subq $288 + 32,%rsp 237 .cfi_adjust_cfa_offset 288 + 32 238 .cfi_offset rbp, -16 239 .cfi_offset rbx, -24 240 .cfi_offset r12, -32 241 .cfi_offset r13, -40 242 .cfi_offset r14, -48 243 .cfi_offset r15, -56 244 leaq 32(%rsp),%rbp 245 andq $-32,%rbp 246 movq %rdx,8+32(%rbp) 247 movq %r8,0+32(%rbp) 248 movq %rdx,%rbx 249 250 movl OPENSSL_ia32cap_P+8(%rip),%eax 251 andl $288,%eax 252 xorl $288,%eax 253 jz chacha20_poly1305_open_avx2 254 255 1: 256 cmpq $128,%rbx 257 jbe open_sse_128 258 259 movdqa .chacha20_consts(%rip),%xmm0 260 movdqu 0(%r9),%xmm4 261 movdqu 16(%r9),%xmm8 262 movdqu 32(%r9),%xmm12 263 movdqa %xmm12,%xmm7 264 265 movdqa %xmm4,48(%rbp) 266 movdqa %xmm8,64(%rbp) 267 movdqa %xmm12,96(%rbp) 268 movq $10,%r10 269 1: 270 paddd %xmm4,%xmm0 271 pxor %xmm0,%xmm12 272 pshufb .rol16(%rip),%xmm12 273 paddd %xmm12,%xmm8 274 pxor %xmm8,%xmm4 275 movdqa %xmm4,%xmm3 276 pslld $12,%xmm3 277 psrld $20,%xmm4 278 pxor %xmm3,%xmm4 279 paddd %xmm4,%xmm0 280 pxor %xmm0,%xmm12 281 pshufb .rol8(%rip),%xmm12 282 paddd %xmm12,%xmm8 283 pxor %xmm8,%xmm4 284 movdqa %xmm4,%xmm3 285 pslld $7,%xmm3 286 psrld $25,%xmm4 287 pxor %xmm3,%xmm4 288 .byte 102,15,58,15,228,4 289 .byte 102,69,15,58,15,192,8 290 .byte 102,69,15,58,15,228,12 291 paddd %xmm4,%xmm0 292 pxor %xmm0,%xmm12 293 pshufb .rol16(%rip),%xmm12 294 paddd %xmm12,%xmm8 295 pxor %xmm8,%xmm4 296 movdqa %xmm4,%xmm3 297 pslld $12,%xmm3 298 psrld $20,%xmm4 299 pxor %xmm3,%xmm4 300 paddd %xmm4,%xmm0 301 pxor %xmm0,%xmm12 302 pshufb .rol8(%rip),%xmm12 303 paddd %xmm12,%xmm8 304 pxor %xmm8,%xmm4 305 movdqa %xmm4,%xmm3 306 pslld $7,%xmm3 307 psrld $25,%xmm4 308 pxor %xmm3,%xmm4 309 .byte 102,15,58,15,228,12 310 .byte 102,69,15,58,15,192,8 311 .byte 102,69,15,58,15,228,4 312 313 decq %r10 314 jne 1b 315 316 paddd .chacha20_consts(%rip),%xmm0 317 paddd 48(%rbp),%xmm4 318 319 pand .clamp(%rip),%xmm0 320 movdqa %xmm0,0(%rbp) 321 movdqa %xmm4,16(%rbp) 322 323 movq %r8,%r8 324 call poly_hash_ad_internal 325 open_sse_main_loop: 326 cmpq $256,%rbx 327 jb 2f 328 329 movdqa .chacha20_consts(%rip),%xmm0 330 movdqa 48(%rbp),%xmm4 331 movdqa 64(%rbp),%xmm8 332 movdqa %xmm0,%xmm1 333 movdqa %xmm4,%xmm5 334 movdqa %xmm8,%xmm9 335 movdqa %xmm0,%xmm2 336 movdqa %xmm4,%xmm6 337 movdqa %xmm8,%xmm10 338 movdqa %xmm0,%xmm3 339 movdqa %xmm4,%xmm7 340 movdqa %xmm8,%xmm11 341 movdqa 96(%rbp),%xmm15 342 paddd .sse_inc(%rip),%xmm15 343 movdqa %xmm15,%xmm14 344 paddd .sse_inc(%rip),%xmm14 345 movdqa %xmm14,%xmm13 346 paddd .sse_inc(%rip),%xmm13 347 movdqa %xmm13,%xmm12 348 paddd .sse_inc(%rip),%xmm12 349 movdqa %xmm12,96(%rbp) 350 movdqa %xmm13,112(%rbp) 351 movdqa %xmm14,128(%rbp) 352 movdqa %xmm15,144(%rbp) 353 354 355 356 movq $4,%rcx 357 movq %rsi,%r8 358 1: 359 movdqa %xmm8,80(%rbp) 360 movdqa .rol16(%rip),%xmm8 361 paddd %xmm7,%xmm3 362 paddd %xmm6,%xmm2 363 paddd %xmm5,%xmm1 364 paddd %xmm4,%xmm0 365 pxor %xmm3,%xmm15 366 pxor %xmm2,%xmm14 367 pxor %xmm1,%xmm13 368 pxor %xmm0,%xmm12 369 .byte 102,69,15,56,0,248 370 .byte 102,69,15,56,0,240 371 .byte 102,69,15,56,0,232 372 .byte 102,69,15,56,0,224 373 movdqa 80(%rbp),%xmm8 374 paddd %xmm15,%xmm11 375 paddd %xmm14,%xmm10 376 paddd %xmm13,%xmm9 377 paddd %xmm12,%xmm8 378 pxor %xmm11,%xmm7 379 addq 0(%r8),%r10 380 adcq 8+0(%r8),%r11 381 adcq $1,%r12 382 383 leaq 16(%r8),%r8 384 pxor %xmm10,%xmm6 385 pxor %xmm9,%xmm5 386 pxor %xmm8,%xmm4 387 movdqa %xmm8,80(%rbp) 388 movdqa %xmm7,%xmm8 389 psrld $20,%xmm8 390 pslld $32-20,%xmm7 391 pxor %xmm8,%xmm7 392 movdqa %xmm6,%xmm8 393 psrld $20,%xmm8 394 pslld $32-20,%xmm6 395 pxor %xmm8,%xmm6 396 movdqa %xmm5,%xmm8 397 psrld $20,%xmm8 398 pslld $32-20,%xmm5 399 pxor %xmm8,%xmm5 400 movdqa %xmm4,%xmm8 401 psrld $20,%xmm8 402 pslld $32-20,%xmm4 403 pxor %xmm8,%xmm4 404 movq 0+0(%rbp),%rax 405 movq %rax,%r15 406 mulq %r10 407 movq %rax,%r13 408 movq %rdx,%r14 409 movq 0+0(%rbp),%rax 410 mulq %r11 411 imulq %r12,%r15 412 addq %rax,%r14 413 adcq %rdx,%r15 414 movdqa .rol8(%rip),%xmm8 415 paddd %xmm7,%xmm3 416 paddd %xmm6,%xmm2 417 paddd %xmm5,%xmm1 418 paddd %xmm4,%xmm0 419 pxor %xmm3,%xmm15 420 pxor %xmm2,%xmm14 421 pxor %xmm1,%xmm13 422 pxor %xmm0,%xmm12 423 .byte 102,69,15,56,0,248 424 .byte 102,69,15,56,0,240 425 .byte 102,69,15,56,0,232 426 .byte 102,69,15,56,0,224 427 movdqa 80(%rbp),%xmm8 428 paddd %xmm15,%xmm11 429 paddd %xmm14,%xmm10 430 paddd %xmm13,%xmm9 431 paddd %xmm12,%xmm8 432 pxor %xmm11,%xmm7 433 pxor %xmm10,%xmm6 434 movq 8+0(%rbp),%rax 435 movq %rax,%r9 436 mulq %r10 437 addq %rax,%r14 438 adcq $0,%rdx 439 movq %rdx,%r10 440 movq 8+0(%rbp),%rax 441 mulq %r11 442 addq %rax,%r15 443 adcq $0,%rdx 444 pxor %xmm9,%xmm5 445 pxor %xmm8,%xmm4 446 movdqa %xmm8,80(%rbp) 447 movdqa %xmm7,%xmm8 448 psrld $25,%xmm8 449 pslld $32-25,%xmm7 450 pxor %xmm8,%xmm7 451 movdqa %xmm6,%xmm8 452 psrld $25,%xmm8 453 pslld $32-25,%xmm6 454 pxor %xmm8,%xmm6 455 movdqa %xmm5,%xmm8 456 psrld $25,%xmm8 457 pslld $32-25,%xmm5 458 pxor %xmm8,%xmm5 459 movdqa %xmm4,%xmm8 460 psrld $25,%xmm8 461 pslld $32-25,%xmm4 462 pxor %xmm8,%xmm4 463 movdqa 80(%rbp),%xmm8 464 imulq %r12,%r9 465 addq %r10,%r15 466 adcq %rdx,%r9 467 .byte 102,15,58,15,255,4 468 .byte 102,69,15,58,15,219,8 469 .byte 102,69,15,58,15,255,12 470 .byte 102,15,58,15,246,4 471 .byte 102,69,15,58,15,210,8 472 .byte 102,69,15,58,15,246,12 473 .byte 102,15,58,15,237,4 474 .byte 102,69,15,58,15,201,8 475 .byte 102,69,15,58,15,237,12 476 .byte 102,15,58,15,228,4 477 .byte 102,69,15,58,15,192,8 478 .byte 102,69,15,58,15,228,12 479 movdqa %xmm8,80(%rbp) 480 movdqa .rol16(%rip),%xmm8 481 paddd %xmm7,%xmm3 482 paddd %xmm6,%xmm2 483 paddd %xmm5,%xmm1 484 paddd %xmm4,%xmm0 485 pxor %xmm3,%xmm15 486 pxor %xmm2,%xmm14 487 movq %r13,%r10 488 movq %r14,%r11 489 movq %r15,%r12 490 andq $3,%r12 491 movq %r15,%r13 492 andq $-4,%r13 493 movq %r9,%r14 494 shrdq $2,%r9,%r15 495 shrq $2,%r9 496 addq %r13,%r10 497 adcq %r14,%r11 498 adcq $0,%r12 499 addq %r15,%r10 500 adcq %r9,%r11 501 adcq $0,%r12 502 pxor %xmm1,%xmm13 503 pxor %xmm0,%xmm12 504 .byte 102,69,15,56,0,248 505 .byte 102,69,15,56,0,240 506 .byte 102,69,15,56,0,232 507 .byte 102,69,15,56,0,224 508 movdqa 80(%rbp),%xmm8 509 paddd %xmm15,%xmm11 510 paddd %xmm14,%xmm10 511 paddd %xmm13,%xmm9 512 paddd %xmm12,%xmm8 513 pxor %xmm11,%xmm7 514 pxor %xmm10,%xmm6 515 pxor %xmm9,%xmm5 516 pxor %xmm8,%xmm4 517 movdqa %xmm8,80(%rbp) 518 movdqa %xmm7,%xmm8 519 psrld $20,%xmm8 520 pslld $32-20,%xmm7 521 pxor %xmm8,%xmm7 522 movdqa %xmm6,%xmm8 523 psrld $20,%xmm8 524 pslld $32-20,%xmm6 525 pxor %xmm8,%xmm6 526 movdqa %xmm5,%xmm8 527 psrld $20,%xmm8 528 pslld $32-20,%xmm5 529 pxor %xmm8,%xmm5 530 movdqa %xmm4,%xmm8 531 psrld $20,%xmm8 532 pslld $32-20,%xmm4 533 pxor %xmm8,%xmm4 534 movdqa .rol8(%rip),%xmm8 535 paddd %xmm7,%xmm3 536 paddd %xmm6,%xmm2 537 paddd %xmm5,%xmm1 538 paddd %xmm4,%xmm0 539 pxor %xmm3,%xmm15 540 pxor %xmm2,%xmm14 541 pxor %xmm1,%xmm13 542 pxor %xmm0,%xmm12 543 .byte 102,69,15,56,0,248 544 .byte 102,69,15,56,0,240 545 .byte 102,69,15,56,0,232 546 .byte 102,69,15,56,0,224 547 movdqa 80(%rbp),%xmm8 548 paddd %xmm15,%xmm11 549 paddd %xmm14,%xmm10 550 paddd %xmm13,%xmm9 551 paddd %xmm12,%xmm8 552 pxor %xmm11,%xmm7 553 pxor %xmm10,%xmm6 554 pxor %xmm9,%xmm5 555 pxor %xmm8,%xmm4 556 movdqa %xmm8,80(%rbp) 557 movdqa %xmm7,%xmm8 558 psrld $25,%xmm8 559 pslld $32-25,%xmm7 560 pxor %xmm8,%xmm7 561 movdqa %xmm6,%xmm8 562 psrld $25,%xmm8 563 pslld $32-25,%xmm6 564 pxor %xmm8,%xmm6 565 movdqa %xmm5,%xmm8 566 psrld $25,%xmm8 567 pslld $32-25,%xmm5 568 pxor %xmm8,%xmm5 569 movdqa %xmm4,%xmm8 570 psrld $25,%xmm8 571 pslld $32-25,%xmm4 572 pxor %xmm8,%xmm4 573 movdqa 80(%rbp),%xmm8 574 .byte 102,15,58,15,255,12 575 .byte 102,69,15,58,15,219,8 576 .byte 102,69,15,58,15,255,4 577 .byte 102,15,58,15,246,12 578 .byte 102,69,15,58,15,210,8 579 .byte 102,69,15,58,15,246,4 580 .byte 102,15,58,15,237,12 581 .byte 102,69,15,58,15,201,8 582 .byte 102,69,15,58,15,237,4 583 .byte 102,15,58,15,228,12 584 .byte 102,69,15,58,15,192,8 585 .byte 102,69,15,58,15,228,4 586 587 decq %rcx 588 jge 1b 589 addq 0(%r8),%r10 590 adcq 8+0(%r8),%r11 591 adcq $1,%r12 592 movq 0+0(%rbp),%rax 593 movq %rax,%r15 594 mulq %r10 595 movq %rax,%r13 596 movq %rdx,%r14 597 movq 0+0(%rbp),%rax 598 mulq %r11 599 imulq %r12,%r15 600 addq %rax,%r14 601 adcq %rdx,%r15 602 movq 8+0(%rbp),%rax 603 movq %rax,%r9 604 mulq %r10 605 addq %rax,%r14 606 adcq $0,%rdx 607 movq %rdx,%r10 608 movq 8+0(%rbp),%rax 609 mulq %r11 610 addq %rax,%r15 611 adcq $0,%rdx 612 imulq %r12,%r9 613 addq %r10,%r15 614 adcq %rdx,%r9 615 movq %r13,%r10 616 movq %r14,%r11 617 movq %r15,%r12 618 andq $3,%r12 619 movq %r15,%r13 620 andq $-4,%r13 621 movq %r9,%r14 622 shrdq $2,%r9,%r15 623 shrq $2,%r9 624 addq %r13,%r10 625 adcq %r14,%r11 626 adcq $0,%r12 627 addq %r15,%r10 628 adcq %r9,%r11 629 adcq $0,%r12 630 631 leaq 16(%r8),%r8 632 cmpq $-6,%rcx 633 jg 1b 634 paddd .chacha20_consts(%rip),%xmm3 635 paddd 48(%rbp),%xmm7 636 paddd 64(%rbp),%xmm11 637 paddd 144(%rbp),%xmm15 638 paddd .chacha20_consts(%rip),%xmm2 639 paddd 48(%rbp),%xmm6 640 paddd 64(%rbp),%xmm10 641 paddd 128(%rbp),%xmm14 642 paddd .chacha20_consts(%rip),%xmm1 643 paddd 48(%rbp),%xmm5 644 paddd 64(%rbp),%xmm9 645 paddd 112(%rbp),%xmm13 646 paddd .chacha20_consts(%rip),%xmm0 647 paddd 48(%rbp),%xmm4 648 paddd 64(%rbp),%xmm8 649 paddd 96(%rbp),%xmm12 650 movdqa %xmm12,80(%rbp) 651 movdqu 0 + 0(%rsi),%xmm12 652 pxor %xmm3,%xmm12 653 movdqu %xmm12,0 + 0(%rdi) 654 movdqu 16 + 0(%rsi),%xmm12 655 pxor %xmm7,%xmm12 656 movdqu %xmm12,16 + 0(%rdi) 657 movdqu 32 + 0(%rsi),%xmm12 658 pxor %xmm11,%xmm12 659 movdqu %xmm12,32 + 0(%rdi) 660 movdqu 48 + 0(%rsi),%xmm12 661 pxor %xmm15,%xmm12 662 movdqu %xmm12,48 + 0(%rdi) 663 movdqu 0 + 64(%rsi),%xmm3 664 movdqu 16 + 64(%rsi),%xmm7 665 movdqu 32 + 64(%rsi),%xmm11 666 movdqu 48 + 64(%rsi),%xmm15 667 pxor %xmm3,%xmm2 668 pxor %xmm7,%xmm6 669 pxor %xmm11,%xmm10 670 pxor %xmm14,%xmm15 671 movdqu %xmm2,0 + 64(%rdi) 672 movdqu %xmm6,16 + 64(%rdi) 673 movdqu %xmm10,32 + 64(%rdi) 674 movdqu %xmm15,48 + 64(%rdi) 675 movdqu 0 + 128(%rsi),%xmm3 676 movdqu 16 + 128(%rsi),%xmm7 677 movdqu 32 + 128(%rsi),%xmm11 678 movdqu 48 + 128(%rsi),%xmm15 679 pxor %xmm3,%xmm1 680 pxor %xmm7,%xmm5 681 pxor %xmm11,%xmm9 682 pxor %xmm13,%xmm15 683 movdqu %xmm1,0 + 128(%rdi) 684 movdqu %xmm5,16 + 128(%rdi) 685 movdqu %xmm9,32 + 128(%rdi) 686 movdqu %xmm15,48 + 128(%rdi) 687 movdqu 0 + 192(%rsi),%xmm3 688 movdqu 16 + 192(%rsi),%xmm7 689 movdqu 32 + 192(%rsi),%xmm11 690 movdqu 48 + 192(%rsi),%xmm15 691 pxor %xmm3,%xmm0 692 pxor %xmm7,%xmm4 693 pxor %xmm11,%xmm8 694 pxor 80(%rbp),%xmm15 695 movdqu %xmm0,0 + 192(%rdi) 696 movdqu %xmm4,16 + 192(%rdi) 697 movdqu %xmm8,32 + 192(%rdi) 698 movdqu %xmm15,48 + 192(%rdi) 699 700 leaq 256(%rsi),%rsi 701 leaq 256(%rdi),%rdi 702 subq $256,%rbx 703 jmp open_sse_main_loop 704 2: 705 706 testq %rbx,%rbx 707 jz open_sse_finalize 708 cmpq $64,%rbx 709 ja 3f 710 movdqa .chacha20_consts(%rip),%xmm0 711 movdqa 48(%rbp),%xmm4 712 movdqa 64(%rbp),%xmm8 713 movdqa 96(%rbp),%xmm12 714 paddd .sse_inc(%rip),%xmm12 715 movdqa %xmm12,96(%rbp) 716 717 xorq %r8,%r8 718 movq %rbx,%rcx 719 cmpq $16,%rcx 720 jb 2f 721 1: 722 addq 0(%rsi,%r8), %r10 723 adcq 8+0(%rsi,%r8), %r11 724 adcq $1,%r12 725 movq 0+0(%rbp),%rax 726 movq %rax,%r15 727 mulq %r10 728 movq %rax,%r13 729 movq %rdx,%r14 730 movq 0+0(%rbp),%rax 731 mulq %r11 732 imulq %r12,%r15 733 addq %rax,%r14 734 adcq %rdx,%r15 735 movq 8+0(%rbp),%rax 736 movq %rax,%r9 737 mulq %r10 738 addq %rax,%r14 739 adcq $0,%rdx 740 movq %rdx,%r10 741 movq 8+0(%rbp),%rax 742 mulq %r11 743 addq %rax,%r15 744 adcq $0,%rdx 745 imulq %r12,%r9 746 addq %r10,%r15 747 adcq %rdx,%r9 748 movq %r13,%r10 749 movq %r14,%r11 750 movq %r15,%r12 751 andq $3,%r12 752 movq %r15,%r13 753 andq $-4,%r13 754 movq %r9,%r14 755 shrdq $2,%r9,%r15 756 shrq $2,%r9 757 addq %r13,%r10 758 adcq %r14,%r11 759 adcq $0,%r12 760 addq %r15,%r10 761 adcq %r9,%r11 762 adcq $0,%r12 763 764 subq $16,%rcx 765 2: 766 addq $16,%r8 767 paddd %xmm4,%xmm0 768 pxor %xmm0,%xmm12 769 pshufb .rol16(%rip),%xmm12 770 paddd %xmm12,%xmm8 771 pxor %xmm8,%xmm4 772 movdqa %xmm4,%xmm3 773 pslld $12,%xmm3 774 psrld $20,%xmm4 775 pxor %xmm3,%xmm4 776 paddd %xmm4,%xmm0 777 pxor %xmm0,%xmm12 778 pshufb .rol8(%rip),%xmm12 779 paddd %xmm12,%xmm8 780 pxor %xmm8,%xmm4 781 movdqa %xmm4,%xmm3 782 pslld $7,%xmm3 783 psrld $25,%xmm4 784 pxor %xmm3,%xmm4 785 .byte 102,15,58,15,228,4 786 .byte 102,69,15,58,15,192,8 787 .byte 102,69,15,58,15,228,12 788 paddd %xmm4,%xmm0 789 pxor %xmm0,%xmm12 790 pshufb .rol16(%rip),%xmm12 791 paddd %xmm12,%xmm8 792 pxor %xmm8,%xmm4 793 movdqa %xmm4,%xmm3 794 pslld $12,%xmm3 795 psrld $20,%xmm4 796 pxor %xmm3,%xmm4 797 paddd %xmm4,%xmm0 798 pxor %xmm0,%xmm12 799 pshufb .rol8(%rip),%xmm12 800 paddd %xmm12,%xmm8 801 pxor %xmm8,%xmm4 802 movdqa %xmm4,%xmm3 803 pslld $7,%xmm3 804 psrld $25,%xmm4 805 pxor %xmm3,%xmm4 806 .byte 102,15,58,15,228,12 807 .byte 102,69,15,58,15,192,8 808 .byte 102,69,15,58,15,228,4 809 810 cmpq $16,%rcx 811 jae 1b 812 cmpq $160,%r8 813 jne 2b 814 paddd .chacha20_consts(%rip),%xmm0 815 paddd 48(%rbp),%xmm4 816 paddd 64(%rbp),%xmm8 817 paddd 96(%rbp),%xmm12 818 819 jmp open_sse_tail_64_dec_loop 820 3: 821 cmpq $128,%rbx 822 ja 3f 823 movdqa .chacha20_consts(%rip),%xmm0 824 movdqa 48(%rbp),%xmm4 825 movdqa 64(%rbp),%xmm8 826 movdqa %xmm0,%xmm1 827 movdqa %xmm4,%xmm5 828 movdqa %xmm8,%xmm9 829 movdqa 96(%rbp),%xmm13 830 paddd .sse_inc(%rip),%xmm13 831 movdqa %xmm13,%xmm12 832 paddd .sse_inc(%rip),%xmm12 833 movdqa %xmm12,96(%rbp) 834 movdqa %xmm13,112(%rbp) 835 836 movq %rbx,%rcx 837 andq $-16,%rcx 838 xorq %r8,%r8 839 1: 840 addq 0(%rsi,%r8), %r10 841 adcq 8+0(%rsi,%r8), %r11 842 adcq $1,%r12 843 movq 0+0(%rbp),%rax 844 movq %rax,%r15 845 mulq %r10 846 movq %rax,%r13 847 movq %rdx,%r14 848 movq 0+0(%rbp),%rax 849 mulq %r11 850 imulq %r12,%r15 851 addq %rax,%r14 852 adcq %rdx,%r15 853 movq 8+0(%rbp),%rax 854 movq %rax,%r9 855 mulq %r10 856 addq %rax,%r14 857 adcq $0,%rdx 858 movq %rdx,%r10 859 movq 8+0(%rbp),%rax 860 mulq %r11 861 addq %rax,%r15 862 adcq $0,%rdx 863 imulq %r12,%r9 864 addq %r10,%r15 865 adcq %rdx,%r9 866 movq %r13,%r10 867 movq %r14,%r11 868 movq %r15,%r12 869 andq $3,%r12 870 movq %r15,%r13 871 andq $-4,%r13 872 movq %r9,%r14 873 shrdq $2,%r9,%r15 874 shrq $2,%r9 875 addq %r13,%r10 876 adcq %r14,%r11 877 adcq $0,%r12 878 addq %r15,%r10 879 adcq %r9,%r11 880 adcq $0,%r12 881 882 2: 883 addq $16,%r8 884 paddd %xmm4,%xmm0 885 pxor %xmm0,%xmm12 886 pshufb .rol16(%rip),%xmm12 887 paddd %xmm12,%xmm8 888 pxor %xmm8,%xmm4 889 movdqa %xmm4,%xmm3 890 pslld $12,%xmm3 891 psrld $20,%xmm4 892 pxor %xmm3,%xmm4 893 paddd %xmm4,%xmm0 894 pxor %xmm0,%xmm12 895 pshufb .rol8(%rip),%xmm12 896 paddd %xmm12,%xmm8 897 pxor %xmm8,%xmm4 898 movdqa %xmm4,%xmm3 899 pslld $7,%xmm3 900 psrld $25,%xmm4 901 pxor %xmm3,%xmm4 902 .byte 102,15,58,15,228,4 903 .byte 102,69,15,58,15,192,8 904 .byte 102,69,15,58,15,228,12 905 paddd %xmm5,%xmm1 906 pxor %xmm1,%xmm13 907 pshufb .rol16(%rip),%xmm13 908 paddd %xmm13,%xmm9 909 pxor %xmm9,%xmm5 910 movdqa %xmm5,%xmm3 911 pslld $12,%xmm3 912 psrld $20,%xmm5 913 pxor %xmm3,%xmm5 914 paddd %xmm5,%xmm1 915 pxor %xmm1,%xmm13 916 pshufb .rol8(%rip),%xmm13 917 paddd %xmm13,%xmm9 918 pxor %xmm9,%xmm5 919 movdqa %xmm5,%xmm3 920 pslld $7,%xmm3 921 psrld $25,%xmm5 922 pxor %xmm3,%xmm5 923 .byte 102,15,58,15,237,4 924 .byte 102,69,15,58,15,201,8 925 .byte 102,69,15,58,15,237,12 926 paddd %xmm4,%xmm0 927 pxor %xmm0,%xmm12 928 pshufb .rol16(%rip),%xmm12 929 paddd %xmm12,%xmm8 930 pxor %xmm8,%xmm4 931 movdqa %xmm4,%xmm3 932 pslld $12,%xmm3 933 psrld $20,%xmm4 934 pxor %xmm3,%xmm4 935 paddd %xmm4,%xmm0 936 pxor %xmm0,%xmm12 937 pshufb .rol8(%rip),%xmm12 938 paddd %xmm12,%xmm8 939 pxor %xmm8,%xmm4 940 movdqa %xmm4,%xmm3 941 pslld $7,%xmm3 942 psrld $25,%xmm4 943 pxor %xmm3,%xmm4 944 .byte 102,15,58,15,228,12 945 .byte 102,69,15,58,15,192,8 946 .byte 102,69,15,58,15,228,4 947 paddd %xmm5,%xmm1 948 pxor %xmm1,%xmm13 949 pshufb .rol16(%rip),%xmm13 950 paddd %xmm13,%xmm9 951 pxor %xmm9,%xmm5 952 movdqa %xmm5,%xmm3 953 pslld $12,%xmm3 954 psrld $20,%xmm5 955 pxor %xmm3,%xmm5 956 paddd %xmm5,%xmm1 957 pxor %xmm1,%xmm13 958 pshufb .rol8(%rip),%xmm13 959 paddd %xmm13,%xmm9 960 pxor %xmm9,%xmm5 961 movdqa %xmm5,%xmm3 962 pslld $7,%xmm3 963 psrld $25,%xmm5 964 pxor %xmm3,%xmm5 965 .byte 102,15,58,15,237,12 966 .byte 102,69,15,58,15,201,8 967 .byte 102,69,15,58,15,237,4 968 969 cmpq %rcx,%r8 970 jb 1b 971 cmpq $160,%r8 972 jne 2b 973 paddd .chacha20_consts(%rip),%xmm1 974 paddd 48(%rbp),%xmm5 975 paddd 64(%rbp),%xmm9 976 paddd 112(%rbp),%xmm13 977 paddd .chacha20_consts(%rip),%xmm0 978 paddd 48(%rbp),%xmm4 979 paddd 64(%rbp),%xmm8 980 paddd 96(%rbp),%xmm12 981 movdqu 0 + 0(%rsi),%xmm3 982 movdqu 16 + 0(%rsi),%xmm7 983 movdqu 32 + 0(%rsi),%xmm11 984 movdqu 48 + 0(%rsi),%xmm15 985 pxor %xmm3,%xmm1 986 pxor %xmm7,%xmm5 987 pxor %xmm11,%xmm9 988 pxor %xmm13,%xmm15 989 movdqu %xmm1,0 + 0(%rdi) 990 movdqu %xmm5,16 + 0(%rdi) 991 movdqu %xmm9,32 + 0(%rdi) 992 movdqu %xmm15,48 + 0(%rdi) 993 994 subq $64,%rbx 995 leaq 64(%rsi),%rsi 996 leaq 64(%rdi),%rdi 997 jmp open_sse_tail_64_dec_loop 998 3: 999 cmpq $192,%rbx 1000 ja 3f 1001 movdqa .chacha20_consts(%rip),%xmm0 1002 movdqa 48(%rbp),%xmm4 1003 movdqa 64(%rbp),%xmm8 1004 movdqa %xmm0,%xmm1 1005 movdqa %xmm4,%xmm5 1006 movdqa %xmm8,%xmm9 1007 movdqa %xmm0,%xmm2 1008 movdqa %xmm4,%xmm6 1009 movdqa %xmm8,%xmm10 1010 movdqa 96(%rbp),%xmm14 1011 paddd .sse_inc(%rip),%xmm14 1012 movdqa %xmm14,%xmm13 1013 paddd .sse_inc(%rip),%xmm13 1014 movdqa %xmm13,%xmm12 1015 paddd .sse_inc(%rip),%xmm12 1016 movdqa %xmm12,96(%rbp) 1017 movdqa %xmm13,112(%rbp) 1018 movdqa %xmm14,128(%rbp) 1019 1020 movq %rbx,%rcx 1021 movq $160,%r8 1022 cmpq $160,%rcx 1023 cmovgq %r8,%rcx 1024 andq $-16,%rcx 1025 xorq %r8,%r8 1026 1: 1027 addq 0(%rsi,%r8), %r10 1028 adcq 8+0(%rsi,%r8), %r11 1029 adcq $1,%r12 1030 movq 0+0(%rbp),%rax 1031 movq %rax,%r15 1032 mulq %r10 1033 movq %rax,%r13 1034 movq %rdx,%r14 1035 movq 0+0(%rbp),%rax 1036 mulq %r11 1037 imulq %r12,%r15 1038 addq %rax,%r14 1039 adcq %rdx,%r15 1040 movq 8+0(%rbp),%rax 1041 movq %rax,%r9 1042 mulq %r10 1043 addq %rax,%r14 1044 adcq $0,%rdx 1045 movq %rdx,%r10 1046 movq 8+0(%rbp),%rax 1047 mulq %r11 1048 addq %rax,%r15 1049 adcq $0,%rdx 1050 imulq %r12,%r9 1051 addq %r10,%r15 1052 adcq %rdx,%r9 1053 movq %r13,%r10 1054 movq %r14,%r11 1055 movq %r15,%r12 1056 andq $3,%r12 1057 movq %r15,%r13 1058 andq $-4,%r13 1059 movq %r9,%r14 1060 shrdq $2,%r9,%r15 1061 shrq $2,%r9 1062 addq %r13,%r10 1063 adcq %r14,%r11 1064 adcq $0,%r12 1065 addq %r15,%r10 1066 adcq %r9,%r11 1067 adcq $0,%r12 1068 1069 2: 1070 addq $16,%r8 1071 paddd %xmm4,%xmm0 1072 pxor %xmm0,%xmm12 1073 pshufb .rol16(%rip),%xmm12 1074 paddd %xmm12,%xmm8 1075 pxor %xmm8,%xmm4 1076 movdqa %xmm4,%xmm3 1077 pslld $12,%xmm3 1078 psrld $20,%xmm4 1079 pxor %xmm3,%xmm4 1080 paddd %xmm4,%xmm0 1081 pxor %xmm0,%xmm12 1082 pshufb .rol8(%rip),%xmm12 1083 paddd %xmm12,%xmm8 1084 pxor %xmm8,%xmm4 1085 movdqa %xmm4,%xmm3 1086 pslld $7,%xmm3 1087 psrld $25,%xmm4 1088 pxor %xmm3,%xmm4 1089 .byte 102,15,58,15,228,4 1090 .byte 102,69,15,58,15,192,8 1091 .byte 102,69,15,58,15,228,12 1092 paddd %xmm5,%xmm1 1093 pxor %xmm1,%xmm13 1094 pshufb .rol16(%rip),%xmm13 1095 paddd %xmm13,%xmm9 1096 pxor %xmm9,%xmm5 1097 movdqa %xmm5,%xmm3 1098 pslld $12,%xmm3 1099 psrld $20,%xmm5 1100 pxor %xmm3,%xmm5 1101 paddd %xmm5,%xmm1 1102 pxor %xmm1,%xmm13 1103 pshufb .rol8(%rip),%xmm13 1104 paddd %xmm13,%xmm9 1105 pxor %xmm9,%xmm5 1106 movdqa %xmm5,%xmm3 1107 pslld $7,%xmm3 1108 psrld $25,%xmm5 1109 pxor %xmm3,%xmm5 1110 .byte 102,15,58,15,237,4 1111 .byte 102,69,15,58,15,201,8 1112 .byte 102,69,15,58,15,237,12 1113 paddd %xmm6,%xmm2 1114 pxor %xmm2,%xmm14 1115 pshufb .rol16(%rip),%xmm14 1116 paddd %xmm14,%xmm10 1117 pxor %xmm10,%xmm6 1118 movdqa %xmm6,%xmm3 1119 pslld $12,%xmm3 1120 psrld $20,%xmm6 1121 pxor %xmm3,%xmm6 1122 paddd %xmm6,%xmm2 1123 pxor %xmm2,%xmm14 1124 pshufb .rol8(%rip),%xmm14 1125 paddd %xmm14,%xmm10 1126 pxor %xmm10,%xmm6 1127 movdqa %xmm6,%xmm3 1128 pslld $7,%xmm3 1129 psrld $25,%xmm6 1130 pxor %xmm3,%xmm6 1131 .byte 102,15,58,15,246,4 1132 .byte 102,69,15,58,15,210,8 1133 .byte 102,69,15,58,15,246,12 1134 paddd %xmm4,%xmm0 1135 pxor %xmm0,%xmm12 1136 pshufb .rol16(%rip),%xmm12 1137 paddd %xmm12,%xmm8 1138 pxor %xmm8,%xmm4 1139 movdqa %xmm4,%xmm3 1140 pslld $12,%xmm3 1141 psrld $20,%xmm4 1142 pxor %xmm3,%xmm4 1143 paddd %xmm4,%xmm0 1144 pxor %xmm0,%xmm12 1145 pshufb .rol8(%rip),%xmm12 1146 paddd %xmm12,%xmm8 1147 pxor %xmm8,%xmm4 1148 movdqa %xmm4,%xmm3 1149 pslld $7,%xmm3 1150 psrld $25,%xmm4 1151 pxor %xmm3,%xmm4 1152 .byte 102,15,58,15,228,12 1153 .byte 102,69,15,58,15,192,8 1154 .byte 102,69,15,58,15,228,4 1155 paddd %xmm5,%xmm1 1156 pxor %xmm1,%xmm13 1157 pshufb .rol16(%rip),%xmm13 1158 paddd %xmm13,%xmm9 1159 pxor %xmm9,%xmm5 1160 movdqa %xmm5,%xmm3 1161 pslld $12,%xmm3 1162 psrld $20,%xmm5 1163 pxor %xmm3,%xmm5 1164 paddd %xmm5,%xmm1 1165 pxor %xmm1,%xmm13 1166 pshufb .rol8(%rip),%xmm13 1167 paddd %xmm13,%xmm9 1168 pxor %xmm9,%xmm5 1169 movdqa %xmm5,%xmm3 1170 pslld $7,%xmm3 1171 psrld $25,%xmm5 1172 pxor %xmm3,%xmm5 1173 .byte 102,15,58,15,237,12 1174 .byte 102,69,15,58,15,201,8 1175 .byte 102,69,15,58,15,237,4 1176 paddd %xmm6,%xmm2 1177 pxor %xmm2,%xmm14 1178 pshufb .rol16(%rip),%xmm14 1179 paddd %xmm14,%xmm10 1180 pxor %xmm10,%xmm6 1181 movdqa %xmm6,%xmm3 1182 pslld $12,%xmm3 1183 psrld $20,%xmm6 1184 pxor %xmm3,%xmm6 1185 paddd %xmm6,%xmm2 1186 pxor %xmm2,%xmm14 1187 pshufb .rol8(%rip),%xmm14 1188 paddd %xmm14,%xmm10 1189 pxor %xmm10,%xmm6 1190 movdqa %xmm6,%xmm3 1191 pslld $7,%xmm3 1192 psrld $25,%xmm6 1193 pxor %xmm3,%xmm6 1194 .byte 102,15,58,15,246,12 1195 .byte 102,69,15,58,15,210,8 1196 .byte 102,69,15,58,15,246,4 1197 1198 cmpq %rcx,%r8 1199 jb 1b 1200 cmpq $160,%r8 1201 jne 2b 1202 cmpq $176,%rbx 1203 jb 1f 1204 addq 160(%rsi),%r10 1205 adcq 8+160(%rsi),%r11 1206 adcq $1,%r12 1207 movq 0+0(%rbp),%rax 1208 movq %rax,%r15 1209 mulq %r10 1210 movq %rax,%r13 1211 movq %rdx,%r14 1212 movq 0+0(%rbp),%rax 1213 mulq %r11 1214 imulq %r12,%r15 1215 addq %rax,%r14 1216 adcq %rdx,%r15 1217 movq 8+0(%rbp),%rax 1218 movq %rax,%r9 1219 mulq %r10 1220 addq %rax,%r14 1221 adcq $0,%rdx 1222 movq %rdx,%r10 1223 movq 8+0(%rbp),%rax 1224 mulq %r11 1225 addq %rax,%r15 1226 adcq $0,%rdx 1227 imulq %r12,%r9 1228 addq %r10,%r15 1229 adcq %rdx,%r9 1230 movq %r13,%r10 1231 movq %r14,%r11 1232 movq %r15,%r12 1233 andq $3,%r12 1234 movq %r15,%r13 1235 andq $-4,%r13 1236 movq %r9,%r14 1237 shrdq $2,%r9,%r15 1238 shrq $2,%r9 1239 addq %r13,%r10 1240 adcq %r14,%r11 1241 adcq $0,%r12 1242 addq %r15,%r10 1243 adcq %r9,%r11 1244 adcq $0,%r12 1245 1246 cmpq $192,%rbx 1247 jb 1f 1248 addq 176(%rsi),%r10 1249 adcq 8+176(%rsi),%r11 1250 adcq $1,%r12 1251 movq 0+0(%rbp),%rax 1252 movq %rax,%r15 1253 mulq %r10 1254 movq %rax,%r13 1255 movq %rdx,%r14 1256 movq 0+0(%rbp),%rax 1257 mulq %r11 1258 imulq %r12,%r15 1259 addq %rax,%r14 1260 adcq %rdx,%r15 1261 movq 8+0(%rbp),%rax 1262 movq %rax,%r9 1263 mulq %r10 1264 addq %rax,%r14 1265 adcq $0,%rdx 1266 movq %rdx,%r10 1267 movq 8+0(%rbp),%rax 1268 mulq %r11 1269 addq %rax,%r15 1270 adcq $0,%rdx 1271 imulq %r12,%r9 1272 addq %r10,%r15 1273 adcq %rdx,%r9 1274 movq %r13,%r10 1275 movq %r14,%r11 1276 movq %r15,%r12 1277 andq $3,%r12 1278 movq %r15,%r13 1279 andq $-4,%r13 1280 movq %r9,%r14 1281 shrdq $2,%r9,%r15 1282 shrq $2,%r9 1283 addq %r13,%r10 1284 adcq %r14,%r11 1285 adcq $0,%r12 1286 addq %r15,%r10 1287 adcq %r9,%r11 1288 adcq $0,%r12 1289 1290 1: 1291 paddd .chacha20_consts(%rip),%xmm2 1292 paddd 48(%rbp),%xmm6 1293 paddd 64(%rbp),%xmm10 1294 paddd 128(%rbp),%xmm14 1295 paddd .chacha20_consts(%rip),%xmm1 1296 paddd 48(%rbp),%xmm5 1297 paddd 64(%rbp),%xmm9 1298 paddd 112(%rbp),%xmm13 1299 paddd .chacha20_consts(%rip),%xmm0 1300 paddd 48(%rbp),%xmm4 1301 paddd 64(%rbp),%xmm8 1302 paddd 96(%rbp),%xmm12 1303 movdqu 0 + 0(%rsi),%xmm3 1304 movdqu 16 + 0(%rsi),%xmm7 1305 movdqu 32 + 0(%rsi),%xmm11 1306 movdqu 48 + 0(%rsi),%xmm15 1307 pxor %xmm3,%xmm2 1308 pxor %xmm7,%xmm6 1309 pxor %xmm11,%xmm10 1310 pxor %xmm14,%xmm15 1311 movdqu %xmm2,0 + 0(%rdi) 1312 movdqu %xmm6,16 + 0(%rdi) 1313 movdqu %xmm10,32 + 0(%rdi) 1314 movdqu %xmm15,48 + 0(%rdi) 1315 movdqu 0 + 64(%rsi),%xmm3 1316 movdqu 16 + 64(%rsi),%xmm7 1317 movdqu 32 + 64(%rsi),%xmm11 1318 movdqu 48 + 64(%rsi),%xmm15 1319 pxor %xmm3,%xmm1 1320 pxor %xmm7,%xmm5 1321 pxor %xmm11,%xmm9 1322 pxor %xmm13,%xmm15 1323 movdqu %xmm1,0 + 64(%rdi) 1324 movdqu %xmm5,16 + 64(%rdi) 1325 movdqu %xmm9,32 + 64(%rdi) 1326 movdqu %xmm15,48 + 64(%rdi) 1327 1328 subq $128,%rbx 1329 leaq 128(%rsi),%rsi 1330 leaq 128(%rdi),%rdi 1331 jmp open_sse_tail_64_dec_loop 1332 3: 1333 1334 movdqa .chacha20_consts(%rip),%xmm0 1335 movdqa 48(%rbp),%xmm4 1336 movdqa 64(%rbp),%xmm8 1337 movdqa %xmm0,%xmm1 1338 movdqa %xmm4,%xmm5 1339 movdqa %xmm8,%xmm9 1340 movdqa %xmm0,%xmm2 1341 movdqa %xmm4,%xmm6 1342 movdqa %xmm8,%xmm10 1343 movdqa %xmm0,%xmm3 1344 movdqa %xmm4,%xmm7 1345 movdqa %xmm8,%xmm11 1346 movdqa 96(%rbp),%xmm15 1347 paddd .sse_inc(%rip),%xmm15 1348 movdqa %xmm15,%xmm14 1349 paddd .sse_inc(%rip),%xmm14 1350 movdqa %xmm14,%xmm13 1351 paddd .sse_inc(%rip),%xmm13 1352 movdqa %xmm13,%xmm12 1353 paddd .sse_inc(%rip),%xmm12 1354 movdqa %xmm12,96(%rbp) 1355 movdqa %xmm13,112(%rbp) 1356 movdqa %xmm14,128(%rbp) 1357 movdqa %xmm15,144(%rbp) 1358 1359 xorq %r8,%r8 1360 1: 1361 addq 0(%rsi,%r8), %r10 1362 adcq 8+0(%rsi,%r8), %r11 1363 adcq $1,%r12 1364 movdqa %xmm11,80(%rbp) 1365 paddd %xmm4,%xmm0 1366 pxor %xmm0,%xmm12 1367 pshufb .rol16(%rip),%xmm12 1368 paddd %xmm12,%xmm8 1369 pxor %xmm8,%xmm4 1370 movdqa %xmm4,%xmm11 1371 pslld $12,%xmm11 1372 psrld $20,%xmm4 1373 pxor %xmm11,%xmm4 1374 paddd %xmm4,%xmm0 1375 pxor %xmm0,%xmm12 1376 pshufb .rol8(%rip),%xmm12 1377 paddd %xmm12,%xmm8 1378 pxor %xmm8,%xmm4 1379 movdqa %xmm4,%xmm11 1380 pslld $7,%xmm11 1381 psrld $25,%xmm4 1382 pxor %xmm11,%xmm4 1383 .byte 102,15,58,15,228,4 1384 .byte 102,69,15,58,15,192,8 1385 .byte 102,69,15,58,15,228,12 1386 paddd %xmm5,%xmm1 1387 pxor %xmm1,%xmm13 1388 pshufb .rol16(%rip),%xmm13 1389 paddd %xmm13,%xmm9 1390 pxor %xmm9,%xmm5 1391 movdqa %xmm5,%xmm11 1392 pslld $12,%xmm11 1393 psrld $20,%xmm5 1394 pxor %xmm11,%xmm5 1395 paddd %xmm5,%xmm1 1396 pxor %xmm1,%xmm13 1397 pshufb .rol8(%rip),%xmm13 1398 paddd %xmm13,%xmm9 1399 pxor %xmm9,%xmm5 1400 movdqa %xmm5,%xmm11 1401 pslld $7,%xmm11 1402 psrld $25,%xmm5 1403 pxor %xmm11,%xmm5 1404 .byte 102,15,58,15,237,4 1405 .byte 102,69,15,58,15,201,8 1406 .byte 102,69,15,58,15,237,12 1407 paddd %xmm6,%xmm2 1408 pxor %xmm2,%xmm14 1409 pshufb .rol16(%rip),%xmm14 1410 paddd %xmm14,%xmm10 1411 pxor %xmm10,%xmm6 1412 movdqa %xmm6,%xmm11 1413 pslld $12,%xmm11 1414 psrld $20,%xmm6 1415 pxor %xmm11,%xmm6 1416 paddd %xmm6,%xmm2 1417 pxor %xmm2,%xmm14 1418 pshufb .rol8(%rip),%xmm14 1419 paddd %xmm14,%xmm10 1420 pxor %xmm10,%xmm6 1421 movdqa %xmm6,%xmm11 1422 pslld $7,%xmm11 1423 psrld $25,%xmm6 1424 pxor %xmm11,%xmm6 1425 .byte 102,15,58,15,246,4 1426 .byte 102,69,15,58,15,210,8 1427 .byte 102,69,15,58,15,246,12 1428 movdqa 80(%rbp),%xmm11 1429 movq 0+0(%rbp),%rax 1430 movq %rax,%r15 1431 mulq %r10 1432 movq %rax,%r13 1433 movq %rdx,%r14 1434 movq 0+0(%rbp),%rax 1435 mulq %r11 1436 imulq %r12,%r15 1437 addq %rax,%r14 1438 adcq %rdx,%r15 1439 movdqa %xmm9,80(%rbp) 1440 paddd %xmm7,%xmm3 1441 pxor %xmm3,%xmm15 1442 pshufb .rol16(%rip),%xmm15 1443 paddd %xmm15,%xmm11 1444 pxor %xmm11,%xmm7 1445 movdqa %xmm7,%xmm9 1446 pslld $12,%xmm9 1447 psrld $20,%xmm7 1448 pxor %xmm9,%xmm7 1449 paddd %xmm7,%xmm3 1450 pxor %xmm3,%xmm15 1451 pshufb .rol8(%rip),%xmm15 1452 paddd %xmm15,%xmm11 1453 pxor %xmm11,%xmm7 1454 movdqa %xmm7,%xmm9 1455 pslld $7,%xmm9 1456 psrld $25,%xmm7 1457 pxor %xmm9,%xmm7 1458 .byte 102,15,58,15,255,4 1459 .byte 102,69,15,58,15,219,8 1460 .byte 102,69,15,58,15,255,12 1461 movdqa 80(%rbp),%xmm9 1462 movq 8+0(%rbp),%rax 1463 movq %rax,%r9 1464 mulq %r10 1465 addq %rax,%r14 1466 adcq $0,%rdx 1467 movq %rdx,%r10 1468 movq 8+0(%rbp),%rax 1469 mulq %r11 1470 addq %rax,%r15 1471 adcq $0,%rdx 1472 movdqa %xmm11,80(%rbp) 1473 paddd %xmm4,%xmm0 1474 pxor %xmm0,%xmm12 1475 pshufb .rol16(%rip),%xmm12 1476 paddd %xmm12,%xmm8 1477 pxor %xmm8,%xmm4 1478 movdqa %xmm4,%xmm11 1479 pslld $12,%xmm11 1480 psrld $20,%xmm4 1481 pxor %xmm11,%xmm4 1482 paddd %xmm4,%xmm0 1483 pxor %xmm0,%xmm12 1484 pshufb .rol8(%rip),%xmm12 1485 paddd %xmm12,%xmm8 1486 pxor %xmm8,%xmm4 1487 movdqa %xmm4,%xmm11 1488 pslld $7,%xmm11 1489 psrld $25,%xmm4 1490 pxor %xmm11,%xmm4 1491 .byte 102,15,58,15,228,12 1492 .byte 102,69,15,58,15,192,8 1493 .byte 102,69,15,58,15,228,4 1494 paddd %xmm5,%xmm1 1495 pxor %xmm1,%xmm13 1496 pshufb .rol16(%rip),%xmm13 1497 paddd %xmm13,%xmm9 1498 pxor %xmm9,%xmm5 1499 movdqa %xmm5,%xmm11 1500 pslld $12,%xmm11 1501 psrld $20,%xmm5 1502 pxor %xmm11,%xmm5 1503 paddd %xmm5,%xmm1 1504 pxor %xmm1,%xmm13 1505 pshufb .rol8(%rip),%xmm13 1506 paddd %xmm13,%xmm9 1507 pxor %xmm9,%xmm5 1508 movdqa %xmm5,%xmm11 1509 pslld $7,%xmm11 1510 psrld $25,%xmm5 1511 pxor %xmm11,%xmm5 1512 .byte 102,15,58,15,237,12 1513 .byte 102,69,15,58,15,201,8 1514 .byte 102,69,15,58,15,237,4 1515 imulq %r12,%r9 1516 addq %r10,%r15 1517 adcq %rdx,%r9 1518 paddd %xmm6,%xmm2 1519 pxor %xmm2,%xmm14 1520 pshufb .rol16(%rip),%xmm14 1521 paddd %xmm14,%xmm10 1522 pxor %xmm10,%xmm6 1523 movdqa %xmm6,%xmm11 1524 pslld $12,%xmm11 1525 psrld $20,%xmm6 1526 pxor %xmm11,%xmm6 1527 paddd %xmm6,%xmm2 1528 pxor %xmm2,%xmm14 1529 pshufb .rol8(%rip),%xmm14 1530 paddd %xmm14,%xmm10 1531 pxor %xmm10,%xmm6 1532 movdqa %xmm6,%xmm11 1533 pslld $7,%xmm11 1534 psrld $25,%xmm6 1535 pxor %xmm11,%xmm6 1536 .byte 102,15,58,15,246,12 1537 .byte 102,69,15,58,15,210,8 1538 .byte 102,69,15,58,15,246,4 1539 movdqa 80(%rbp),%xmm11 1540 movq %r13,%r10 1541 movq %r14,%r11 1542 movq %r15,%r12 1543 andq $3,%r12 1544 movq %r15,%r13 1545 andq $-4,%r13 1546 movq %r9,%r14 1547 shrdq $2,%r9,%r15 1548 shrq $2,%r9 1549 addq %r13,%r10 1550 adcq %r14,%r11 1551 adcq $0,%r12 1552 addq %r15,%r10 1553 adcq %r9,%r11 1554 adcq $0,%r12 1555 movdqa %xmm9,80(%rbp) 1556 paddd %xmm7,%xmm3 1557 pxor %xmm3,%xmm15 1558 pshufb .rol16(%rip),%xmm15 1559 paddd %xmm15,%xmm11 1560 pxor %xmm11,%xmm7 1561 movdqa %xmm7,%xmm9 1562 pslld $12,%xmm9 1563 psrld $20,%xmm7 1564 pxor %xmm9,%xmm7 1565 paddd %xmm7,%xmm3 1566 pxor %xmm3,%xmm15 1567 pshufb .rol8(%rip),%xmm15 1568 paddd %xmm15,%xmm11 1569 pxor %xmm11,%xmm7 1570 movdqa %xmm7,%xmm9 1571 pslld $7,%xmm9 1572 psrld $25,%xmm7 1573 pxor %xmm9,%xmm7 1574 .byte 102,15,58,15,255,12 1575 .byte 102,69,15,58,15,219,8 1576 .byte 102,69,15,58,15,255,4 1577 movdqa 80(%rbp),%xmm9 1578 1579 addq $16,%r8 1580 cmpq $160,%r8 1581 jb 1b 1582 movq %rbx,%rcx 1583 andq $-16,%rcx 1584 1: 1585 addq 0(%rsi,%r8), %r10 1586 adcq 8+0(%rsi,%r8), %r11 1587 adcq $1,%r12 1588 movq 0+0(%rbp),%rax 1589 movq %rax,%r15 1590 mulq %r10 1591 movq %rax,%r13 1592 movq %rdx,%r14 1593 movq 0+0(%rbp),%rax 1594 mulq %r11 1595 imulq %r12,%r15 1596 addq %rax,%r14 1597 adcq %rdx,%r15 1598 movq 8+0(%rbp),%rax 1599 movq %rax,%r9 1600 mulq %r10 1601 addq %rax,%r14 1602 adcq $0,%rdx 1603 movq %rdx,%r10 1604 movq 8+0(%rbp),%rax 1605 mulq %r11 1606 addq %rax,%r15 1607 adcq $0,%rdx 1608 imulq %r12,%r9 1609 addq %r10,%r15 1610 adcq %rdx,%r9 1611 movq %r13,%r10 1612 movq %r14,%r11 1613 movq %r15,%r12 1614 andq $3,%r12 1615 movq %r15,%r13 1616 andq $-4,%r13 1617 movq %r9,%r14 1618 shrdq $2,%r9,%r15 1619 shrq $2,%r9 1620 addq %r13,%r10 1621 adcq %r14,%r11 1622 adcq $0,%r12 1623 addq %r15,%r10 1624 adcq %r9,%r11 1625 adcq $0,%r12 1626 1627 addq $16,%r8 1628 cmpq %rcx,%r8 1629 jb 1b 1630 paddd .chacha20_consts(%rip),%xmm3 1631 paddd 48(%rbp),%xmm7 1632 paddd 64(%rbp),%xmm11 1633 paddd 144(%rbp),%xmm15 1634 paddd .chacha20_consts(%rip),%xmm2 1635 paddd 48(%rbp),%xmm6 1636 paddd 64(%rbp),%xmm10 1637 paddd 128(%rbp),%xmm14 1638 paddd .chacha20_consts(%rip),%xmm1 1639 paddd 48(%rbp),%xmm5 1640 paddd 64(%rbp),%xmm9 1641 paddd 112(%rbp),%xmm13 1642 paddd .chacha20_consts(%rip),%xmm0 1643 paddd 48(%rbp),%xmm4 1644 paddd 64(%rbp),%xmm8 1645 paddd 96(%rbp),%xmm12 1646 movdqa %xmm12,80(%rbp) 1647 movdqu 0 + 0(%rsi),%xmm12 1648 pxor %xmm3,%xmm12 1649 movdqu %xmm12,0 + 0(%rdi) 1650 movdqu 16 + 0(%rsi),%xmm12 1651 pxor %xmm7,%xmm12 1652 movdqu %xmm12,16 + 0(%rdi) 1653 movdqu 32 + 0(%rsi),%xmm12 1654 pxor %xmm11,%xmm12 1655 movdqu %xmm12,32 + 0(%rdi) 1656 movdqu 48 + 0(%rsi),%xmm12 1657 pxor %xmm15,%xmm12 1658 movdqu %xmm12,48 + 0(%rdi) 1659 movdqu 0 + 64(%rsi),%xmm3 1660 movdqu 16 + 64(%rsi),%xmm7 1661 movdqu 32 + 64(%rsi),%xmm11 1662 movdqu 48 + 64(%rsi),%xmm15 1663 pxor %xmm3,%xmm2 1664 pxor %xmm7,%xmm6 1665 pxor %xmm11,%xmm10 1666 pxor %xmm14,%xmm15 1667 movdqu %xmm2,0 + 64(%rdi) 1668 movdqu %xmm6,16 + 64(%rdi) 1669 movdqu %xmm10,32 + 64(%rdi) 1670 movdqu %xmm15,48 + 64(%rdi) 1671 movdqu 0 + 128(%rsi),%xmm3 1672 movdqu 16 + 128(%rsi),%xmm7 1673 movdqu 32 + 128(%rsi),%xmm11 1674 movdqu 48 + 128(%rsi),%xmm15 1675 pxor %xmm3,%xmm1 1676 pxor %xmm7,%xmm5 1677 pxor %xmm11,%xmm9 1678 pxor %xmm13,%xmm15 1679 movdqu %xmm1,0 + 128(%rdi) 1680 movdqu %xmm5,16 + 128(%rdi) 1681 movdqu %xmm9,32 + 128(%rdi) 1682 movdqu %xmm15,48 + 128(%rdi) 1683 1684 movdqa 80(%rbp),%xmm12 1685 subq $192,%rbx 1686 leaq 192(%rsi),%rsi 1687 leaq 192(%rdi),%rdi 1688 1689 1690 open_sse_tail_64_dec_loop: 1691 cmpq $16,%rbx 1692 jb 1f 1693 subq $16,%rbx 1694 movdqu (%rsi),%xmm3 1695 pxor %xmm3,%xmm0 1696 movdqu %xmm0,(%rdi) 1697 leaq 16(%rsi),%rsi 1698 leaq 16(%rdi),%rdi 1699 movdqa %xmm4,%xmm0 1700 movdqa %xmm8,%xmm4 1701 movdqa %xmm12,%xmm8 1702 jmp open_sse_tail_64_dec_loop 1703 1: 1704 movdqa %xmm0,%xmm1 1705 1706 1707 open_sse_tail_16: 1708 testq %rbx,%rbx 1709 jz open_sse_finalize 1710 1711 1712 1713 pxor %xmm3,%xmm3 1714 leaq -1(%rsi,%rbx), %rsi 1715 movq %rbx,%r8 1716 2: 1717 pslldq $1,%xmm3 1718 pinsrb $0,(%rsi),%xmm3 1719 subq $1,%rsi 1720 subq $1,%r8 1721 jnz 2b 1722 1723 3: 1724 .byte 102,73,15,126,221 1725 pextrq $1,%xmm3,%r14 1726 1727 pxor %xmm1,%xmm3 1728 1729 1730 2: 1731 pextrb $0,%xmm3,(%rdi) 1732 psrldq $1,%xmm3 1733 addq $1,%rdi 1734 subq $1,%rbx 1735 jne 2b 1736 1737 addq %r13,%r10 1738 adcq %r14,%r11 1739 adcq $1,%r12 1740 movq 0+0(%rbp),%rax 1741 movq %rax,%r15 1742 mulq %r10 1743 movq %rax,%r13 1744 movq %rdx,%r14 1745 movq 0+0(%rbp),%rax 1746 mulq %r11 1747 imulq %r12,%r15 1748 addq %rax,%r14 1749 adcq %rdx,%r15 1750 movq 8+0(%rbp),%rax 1751 movq %rax,%r9 1752 mulq %r10 1753 addq %rax,%r14 1754 adcq $0,%rdx 1755 movq %rdx,%r10 1756 movq 8+0(%rbp),%rax 1757 mulq %r11 1758 addq %rax,%r15 1759 adcq $0,%rdx 1760 imulq %r12,%r9 1761 addq %r10,%r15 1762 adcq %rdx,%r9 1763 movq %r13,%r10 1764 movq %r14,%r11 1765 movq %r15,%r12 1766 andq $3,%r12 1767 movq %r15,%r13 1768 andq $-4,%r13 1769 movq %r9,%r14 1770 shrdq $2,%r9,%r15 1771 shrq $2,%r9 1772 addq %r13,%r10 1773 adcq %r14,%r11 1774 adcq $0,%r12 1775 addq %r15,%r10 1776 adcq %r9,%r11 1777 adcq $0,%r12 1778 1779 1780 open_sse_finalize: 1781 addq 32(%rbp),%r10 1782 adcq 8+32(%rbp),%r11 1783 adcq $1,%r12 1784 movq 0+0(%rbp),%rax 1785 movq %rax,%r15 1786 mulq %r10 1787 movq %rax,%r13 1788 movq %rdx,%r14 1789 movq 0+0(%rbp),%rax 1790 mulq %r11 1791 imulq %r12,%r15 1792 addq %rax,%r14 1793 adcq %rdx,%r15 1794 movq 8+0(%rbp),%rax 1795 movq %rax,%r9 1796 mulq %r10 1797 addq %rax,%r14 1798 adcq $0,%rdx 1799 movq %rdx,%r10 1800 movq 8+0(%rbp),%rax 1801 mulq %r11 1802 addq %rax,%r15 1803 adcq $0,%rdx 1804 imulq %r12,%r9 1805 addq %r10,%r15 1806 adcq %rdx,%r9 1807 movq %r13,%r10 1808 movq %r14,%r11 1809 movq %r15,%r12 1810 andq $3,%r12 1811 movq %r15,%r13 1812 andq $-4,%r13 1813 movq %r9,%r14 1814 shrdq $2,%r9,%r15 1815 shrq $2,%r9 1816 addq %r13,%r10 1817 adcq %r14,%r11 1818 adcq $0,%r12 1819 addq %r15,%r10 1820 adcq %r9,%r11 1821 adcq $0,%r12 1822 1823 1824 movq %r10,%r13 1825 movq %r11,%r14 1826 movq %r12,%r15 1827 subq $-5,%r10 1828 sbbq $-1,%r11 1829 sbbq $3,%r12 1830 cmovcq %r13,%r10 1831 cmovcq %r14,%r11 1832 cmovcq %r15,%r12 1833 1834 addq 0+16(%rbp),%r10 1835 adcq 8+16(%rbp),%r11 1836 1837 addq $288 + 32,%rsp 1838 .cfi_adjust_cfa_offset -(288 + 32) 1839 popq %r9 1840 .cfi_adjust_cfa_offset -8 1841 movq %r10,(%r9) 1842 movq %r11,8(%r9) 1843 1844 popq %r15 1845 .cfi_adjust_cfa_offset -8 1846 popq %r14 1847 .cfi_adjust_cfa_offset -8 1848 popq %r13 1849 .cfi_adjust_cfa_offset -8 1850 popq %r12 1851 .cfi_adjust_cfa_offset -8 1852 popq %rbx 1853 .cfi_adjust_cfa_offset -8 1854 popq %rbp 1855 .cfi_adjust_cfa_offset -8 1856 .byte 0xf3,0xc3 1857 .cfi_adjust_cfa_offset (8 * 6) + 288 + 32 1858 1859 open_sse_128: 1860 movdqu .chacha20_consts(%rip),%xmm0 1861 movdqa %xmm0,%xmm1 1862 movdqa %xmm0,%xmm2 1863 movdqu 0(%r9),%xmm4 1864 movdqa %xmm4,%xmm5 1865 movdqa %xmm4,%xmm6 1866 movdqu 16(%r9),%xmm8 1867 movdqa %xmm8,%xmm9 1868 movdqa %xmm8,%xmm10 1869 movdqu 32(%r9),%xmm12 1870 movdqa %xmm12,%xmm13 1871 paddd .sse_inc(%rip),%xmm13 1872 movdqa %xmm13,%xmm14 1873 paddd .sse_inc(%rip),%xmm14 1874 movdqa %xmm4,%xmm7 1875 movdqa %xmm8,%xmm11 1876 movdqa %xmm13,%xmm15 1877 movq $10,%r10 1878 1: 1879 paddd %xmm4,%xmm0 1880 pxor %xmm0,%xmm12 1881 pshufb .rol16(%rip),%xmm12 1882 paddd %xmm12,%xmm8 1883 pxor %xmm8,%xmm4 1884 movdqa %xmm4,%xmm3 1885 pslld $12,%xmm3 1886 psrld $20,%xmm4 1887 pxor %xmm3,%xmm4 1888 paddd %xmm4,%xmm0 1889 pxor %xmm0,%xmm12 1890 pshufb .rol8(%rip),%xmm12 1891 paddd %xmm12,%xmm8 1892 pxor %xmm8,%xmm4 1893 movdqa %xmm4,%xmm3 1894 pslld $7,%xmm3 1895 psrld $25,%xmm4 1896 pxor %xmm3,%xmm4 1897 .byte 102,15,58,15,228,4 1898 .byte 102,69,15,58,15,192,8 1899 .byte 102,69,15,58,15,228,12 1900 paddd %xmm5,%xmm1 1901 pxor %xmm1,%xmm13 1902 pshufb .rol16(%rip),%xmm13 1903 paddd %xmm13,%xmm9 1904 pxor %xmm9,%xmm5 1905 movdqa %xmm5,%xmm3 1906 pslld $12,%xmm3 1907 psrld $20,%xmm5 1908 pxor %xmm3,%xmm5 1909 paddd %xmm5,%xmm1 1910 pxor %xmm1,%xmm13 1911 pshufb .rol8(%rip),%xmm13 1912 paddd %xmm13,%xmm9 1913 pxor %xmm9,%xmm5 1914 movdqa %xmm5,%xmm3 1915 pslld $7,%xmm3 1916 psrld $25,%xmm5 1917 pxor %xmm3,%xmm5 1918 .byte 102,15,58,15,237,4 1919 .byte 102,69,15,58,15,201,8 1920 .byte 102,69,15,58,15,237,12 1921 paddd %xmm6,%xmm2 1922 pxor %xmm2,%xmm14 1923 pshufb .rol16(%rip),%xmm14 1924 paddd %xmm14,%xmm10 1925 pxor %xmm10,%xmm6 1926 movdqa %xmm6,%xmm3 1927 pslld $12,%xmm3 1928 psrld $20,%xmm6 1929 pxor %xmm3,%xmm6 1930 paddd %xmm6,%xmm2 1931 pxor %xmm2,%xmm14 1932 pshufb .rol8(%rip),%xmm14 1933 paddd %xmm14,%xmm10 1934 pxor %xmm10,%xmm6 1935 movdqa %xmm6,%xmm3 1936 pslld $7,%xmm3 1937 psrld $25,%xmm6 1938 pxor %xmm3,%xmm6 1939 .byte 102,15,58,15,246,4 1940 .byte 102,69,15,58,15,210,8 1941 .byte 102,69,15,58,15,246,12 1942 paddd %xmm4,%xmm0 1943 pxor %xmm0,%xmm12 1944 pshufb .rol16(%rip),%xmm12 1945 paddd %xmm12,%xmm8 1946 pxor %xmm8,%xmm4 1947 movdqa %xmm4,%xmm3 1948 pslld $12,%xmm3 1949 psrld $20,%xmm4 1950 pxor %xmm3,%xmm4 1951 paddd %xmm4,%xmm0 1952 pxor %xmm0,%xmm12 1953 pshufb .rol8(%rip),%xmm12 1954 paddd %xmm12,%xmm8 1955 pxor %xmm8,%xmm4 1956 movdqa %xmm4,%xmm3 1957 pslld $7,%xmm3 1958 psrld $25,%xmm4 1959 pxor %xmm3,%xmm4 1960 .byte 102,15,58,15,228,12 1961 .byte 102,69,15,58,15,192,8 1962 .byte 102,69,15,58,15,228,4 1963 paddd %xmm5,%xmm1 1964 pxor %xmm1,%xmm13 1965 pshufb .rol16(%rip),%xmm13 1966 paddd %xmm13,%xmm9 1967 pxor %xmm9,%xmm5 1968 movdqa %xmm5,%xmm3 1969 pslld $12,%xmm3 1970 psrld $20,%xmm5 1971 pxor %xmm3,%xmm5 1972 paddd %xmm5,%xmm1 1973 pxor %xmm1,%xmm13 1974 pshufb .rol8(%rip),%xmm13 1975 paddd %xmm13,%xmm9 1976 pxor %xmm9,%xmm5 1977 movdqa %xmm5,%xmm3 1978 pslld $7,%xmm3 1979 psrld $25,%xmm5 1980 pxor %xmm3,%xmm5 1981 .byte 102,15,58,15,237,12 1982 .byte 102,69,15,58,15,201,8 1983 .byte 102,69,15,58,15,237,4 1984 paddd %xmm6,%xmm2 1985 pxor %xmm2,%xmm14 1986 pshufb .rol16(%rip),%xmm14 1987 paddd %xmm14,%xmm10 1988 pxor %xmm10,%xmm6 1989 movdqa %xmm6,%xmm3 1990 pslld $12,%xmm3 1991 psrld $20,%xmm6 1992 pxor %xmm3,%xmm6 1993 paddd %xmm6,%xmm2 1994 pxor %xmm2,%xmm14 1995 pshufb .rol8(%rip),%xmm14 1996 paddd %xmm14,%xmm10 1997 pxor %xmm10,%xmm6 1998 movdqa %xmm6,%xmm3 1999 pslld $7,%xmm3 2000 psrld $25,%xmm6 2001 pxor %xmm3,%xmm6 2002 .byte 102,15,58,15,246,12 2003 .byte 102,69,15,58,15,210,8 2004 .byte 102,69,15,58,15,246,4 2005 2006 decq %r10 2007 jnz 1b 2008 paddd .chacha20_consts(%rip),%xmm0 2009 paddd .chacha20_consts(%rip),%xmm1 2010 paddd .chacha20_consts(%rip),%xmm2 2011 paddd %xmm7,%xmm4 2012 paddd %xmm7,%xmm5 2013 paddd %xmm7,%xmm6 2014 paddd %xmm11,%xmm9 2015 paddd %xmm11,%xmm10 2016 paddd %xmm15,%xmm13 2017 paddd .sse_inc(%rip),%xmm15 2018 paddd %xmm15,%xmm14 2019 2020 pand .clamp(%rip),%xmm0 2021 movdqa %xmm0,0(%rbp) 2022 movdqa %xmm4,16(%rbp) 2023 2024 movq %r8,%r8 2025 call poly_hash_ad_internal 2026 1: 2027 cmpq $16,%rbx 2028 jb open_sse_tail_16 2029 subq $16,%rbx 2030 addq 0(%rsi),%r10 2031 adcq 8+0(%rsi),%r11 2032 adcq $1,%r12 2033 2034 2035 movdqu 0(%rsi),%xmm3 2036 pxor %xmm3,%xmm1 2037 movdqu %xmm1,0(%rdi) 2038 leaq 16(%rsi),%rsi 2039 leaq 16(%rdi),%rdi 2040 movq 0+0(%rbp),%rax 2041 movq %rax,%r15 2042 mulq %r10 2043 movq %rax,%r13 2044 movq %rdx,%r14 2045 movq 0+0(%rbp),%rax 2046 mulq %r11 2047 imulq %r12,%r15 2048 addq %rax,%r14 2049 adcq %rdx,%r15 2050 movq 8+0(%rbp),%rax 2051 movq %rax,%r9 2052 mulq %r10 2053 addq %rax,%r14 2054 adcq $0,%rdx 2055 movq %rdx,%r10 2056 movq 8+0(%rbp),%rax 2057 mulq %r11 2058 addq %rax,%r15 2059 adcq $0,%rdx 2060 imulq %r12,%r9 2061 addq %r10,%r15 2062 adcq %rdx,%r9 2063 movq %r13,%r10 2064 movq %r14,%r11 2065 movq %r15,%r12 2066 andq $3,%r12 2067 movq %r15,%r13 2068 andq $-4,%r13 2069 movq %r9,%r14 2070 shrdq $2,%r9,%r15 2071 shrq $2,%r9 2072 addq %r13,%r10 2073 adcq %r14,%r11 2074 adcq $0,%r12 2075 addq %r15,%r10 2076 adcq %r9,%r11 2077 adcq $0,%r12 2078 2079 2080 movdqa %xmm5,%xmm1 2081 movdqa %xmm9,%xmm5 2082 movdqa %xmm13,%xmm9 2083 movdqa %xmm2,%xmm13 2084 movdqa %xmm6,%xmm2 2085 movdqa %xmm10,%xmm6 2086 movdqa %xmm14,%xmm10 2087 jmp 1b 2088 jmp open_sse_tail_16 2089 .size chacha20_poly1305_open, .-chacha20_poly1305_open 2090 .cfi_endproc 2091 2092 2093 2094 2095 .globl chacha20_poly1305_seal 2096 .hidden chacha20_poly1305_seal 2097 .type chacha20_poly1305_seal,@function 2098 .align 64 2099 chacha20_poly1305_seal: 2100 .cfi_startproc 2101 pushq %rbp 2102 .cfi_adjust_cfa_offset 8 2103 pushq %rbx 2104 .cfi_adjust_cfa_offset 8 2105 pushq %r12 2106 .cfi_adjust_cfa_offset 8 2107 pushq %r13 2108 .cfi_adjust_cfa_offset 8 2109 pushq %r14 2110 .cfi_adjust_cfa_offset 8 2111 pushq %r15 2112 .cfi_adjust_cfa_offset 8 2113 2114 2115 pushq %r9 2116 .cfi_adjust_cfa_offset 8 2117 subq $288 + 32,%rsp 2118 .cfi_adjust_cfa_offset 288 + 32 2119 .cfi_offset rbp, -16 2120 .cfi_offset rbx, -24 2121 .cfi_offset r12, -32 2122 .cfi_offset r13, -40 2123 .cfi_offset r14, -48 2124 .cfi_offset r15, -56 2125 leaq 32(%rsp),%rbp 2126 andq $-32,%rbp 2127 movq %rdx,8+32(%rbp) 2128 movq %r8,0+32(%rbp) 2129 movq %rdx,%rbx 2130 2131 movl OPENSSL_ia32cap_P+8(%rip),%eax 2132 andl $288,%eax 2133 xorl $288,%eax 2134 jz chacha20_poly1305_seal_avx2 2135 2136 cmpq $128,%rbx 2137 jbe seal_sse_128 2138 2139 movdqa .chacha20_consts(%rip),%xmm0 2140 movdqu 0(%r9),%xmm4 2141 movdqu 16(%r9),%xmm8 2142 movdqu 32(%r9),%xmm12 2143 movdqa %xmm0,%xmm1 2144 movdqa %xmm0,%xmm2 2145 movdqa %xmm0,%xmm3 2146 movdqa %xmm4,%xmm5 2147 movdqa %xmm4,%xmm6 2148 movdqa %xmm4,%xmm7 2149 movdqa %xmm8,%xmm9 2150 movdqa %xmm8,%xmm10 2151 movdqa %xmm8,%xmm11 2152 movdqa %xmm12,%xmm15 2153 paddd .sse_inc(%rip),%xmm12 2154 movdqa %xmm12,%xmm14 2155 paddd .sse_inc(%rip),%xmm12 2156 movdqa %xmm12,%xmm13 2157 paddd .sse_inc(%rip),%xmm12 2158 2159 movdqa %xmm4,48(%rbp) 2160 movdqa %xmm8,64(%rbp) 2161 movdqa %xmm12,96(%rbp) 2162 movdqa %xmm13,112(%rbp) 2163 movdqa %xmm14,128(%rbp) 2164 movdqa %xmm15,144(%rbp) 2165 movq $10,%r10 2166 1: 2167 movdqa %xmm8,80(%rbp) 2168 movdqa .rol16(%rip),%xmm8 2169 paddd %xmm7,%xmm3 2170 paddd %xmm6,%xmm2 2171 paddd %xmm5,%xmm1 2172 paddd %xmm4,%xmm0 2173 pxor %xmm3,%xmm15 2174 pxor %xmm2,%xmm14 2175 pxor %xmm1,%xmm13 2176 pxor %xmm0,%xmm12 2177 .byte 102,69,15,56,0,248 2178 .byte 102,69,15,56,0,240 2179 .byte 102,69,15,56,0,232 2180 .byte 102,69,15,56,0,224 2181 movdqa 80(%rbp),%xmm8 2182 paddd %xmm15,%xmm11 2183 paddd %xmm14,%xmm10 2184 paddd %xmm13,%xmm9 2185 paddd %xmm12,%xmm8 2186 pxor %xmm11,%xmm7 2187 pxor %xmm10,%xmm6 2188 pxor %xmm9,%xmm5 2189 pxor %xmm8,%xmm4 2190 movdqa %xmm8,80(%rbp) 2191 movdqa %xmm7,%xmm8 2192 psrld $20,%xmm8 2193 pslld $32-20,%xmm7 2194 pxor %xmm8,%xmm7 2195 movdqa %xmm6,%xmm8 2196 psrld $20,%xmm8 2197 pslld $32-20,%xmm6 2198 pxor %xmm8,%xmm6 2199 movdqa %xmm5,%xmm8 2200 psrld $20,%xmm8 2201 pslld $32-20,%xmm5 2202 pxor %xmm8,%xmm5 2203 movdqa %xmm4,%xmm8 2204 psrld $20,%xmm8 2205 pslld $32-20,%xmm4 2206 pxor %xmm8,%xmm4 2207 movdqa .rol8(%rip),%xmm8 2208 paddd %xmm7,%xmm3 2209 paddd %xmm6,%xmm2 2210 paddd %xmm5,%xmm1 2211 paddd %xmm4,%xmm0 2212 pxor %xmm3,%xmm15 2213 pxor %xmm2,%xmm14 2214 pxor %xmm1,%xmm13 2215 pxor %xmm0,%xmm12 2216 .byte 102,69,15,56,0,248 2217 .byte 102,69,15,56,0,240 2218 .byte 102,69,15,56,0,232 2219 .byte 102,69,15,56,0,224 2220 movdqa 80(%rbp),%xmm8 2221 paddd %xmm15,%xmm11 2222 paddd %xmm14,%xmm10 2223 paddd %xmm13,%xmm9 2224 paddd %xmm12,%xmm8 2225 pxor %xmm11,%xmm7 2226 pxor %xmm10,%xmm6 2227 pxor %xmm9,%xmm5 2228 pxor %xmm8,%xmm4 2229 movdqa %xmm8,80(%rbp) 2230 movdqa %xmm7,%xmm8 2231 psrld $25,%xmm8 2232 pslld $32-25,%xmm7 2233 pxor %xmm8,%xmm7 2234 movdqa %xmm6,%xmm8 2235 psrld $25,%xmm8 2236 pslld $32-25,%xmm6 2237 pxor %xmm8,%xmm6 2238 movdqa %xmm5,%xmm8 2239 psrld $25,%xmm8 2240 pslld $32-25,%xmm5 2241 pxor %xmm8,%xmm5 2242 movdqa %xmm4,%xmm8 2243 psrld $25,%xmm8 2244 pslld $32-25,%xmm4 2245 pxor %xmm8,%xmm4 2246 movdqa 80(%rbp),%xmm8 2247 .byte 102,15,58,15,255,4 2248 .byte 102,69,15,58,15,219,8 2249 .byte 102,69,15,58,15,255,12 2250 .byte 102,15,58,15,246,4 2251 .byte 102,69,15,58,15,210,8 2252 .byte 102,69,15,58,15,246,12 2253 .byte 102,15,58,15,237,4 2254 .byte 102,69,15,58,15,201,8 2255 .byte 102,69,15,58,15,237,12 2256 .byte 102,15,58,15,228,4 2257 .byte 102,69,15,58,15,192,8 2258 .byte 102,69,15,58,15,228,12 2259 movdqa %xmm8,80(%rbp) 2260 movdqa .rol16(%rip),%xmm8 2261 paddd %xmm7,%xmm3 2262 paddd %xmm6,%xmm2 2263 paddd %xmm5,%xmm1 2264 paddd %xmm4,%xmm0 2265 pxor %xmm3,%xmm15 2266 pxor %xmm2,%xmm14 2267 pxor %xmm1,%xmm13 2268 pxor %xmm0,%xmm12 2269 .byte 102,69,15,56,0,248 2270 .byte 102,69,15,56,0,240 2271 .byte 102,69,15,56,0,232 2272 .byte 102,69,15,56,0,224 2273 movdqa 80(%rbp),%xmm8 2274 paddd %xmm15,%xmm11 2275 paddd %xmm14,%xmm10 2276 paddd %xmm13,%xmm9 2277 paddd %xmm12,%xmm8 2278 pxor %xmm11,%xmm7 2279 pxor %xmm10,%xmm6 2280 pxor %xmm9,%xmm5 2281 pxor %xmm8,%xmm4 2282 movdqa %xmm8,80(%rbp) 2283 movdqa %xmm7,%xmm8 2284 psrld $20,%xmm8 2285 pslld $32-20,%xmm7 2286 pxor %xmm8,%xmm7 2287 movdqa %xmm6,%xmm8 2288 psrld $20,%xmm8 2289 pslld $32-20,%xmm6 2290 pxor %xmm8,%xmm6 2291 movdqa %xmm5,%xmm8 2292 psrld $20,%xmm8 2293 pslld $32-20,%xmm5 2294 pxor %xmm8,%xmm5 2295 movdqa %xmm4,%xmm8 2296 psrld $20,%xmm8 2297 pslld $32-20,%xmm4 2298 pxor %xmm8,%xmm4 2299 movdqa .rol8(%rip),%xmm8 2300 paddd %xmm7,%xmm3 2301 paddd %xmm6,%xmm2 2302 paddd %xmm5,%xmm1 2303 paddd %xmm4,%xmm0 2304 pxor %xmm3,%xmm15 2305 pxor %xmm2,%xmm14 2306 pxor %xmm1,%xmm13 2307 pxor %xmm0,%xmm12 2308 .byte 102,69,15,56,0,248 2309 .byte 102,69,15,56,0,240 2310 .byte 102,69,15,56,0,232 2311 .byte 102,69,15,56,0,224 2312 movdqa 80(%rbp),%xmm8 2313 paddd %xmm15,%xmm11 2314 paddd %xmm14,%xmm10 2315 paddd %xmm13,%xmm9 2316 paddd %xmm12,%xmm8 2317 pxor %xmm11,%xmm7 2318 pxor %xmm10,%xmm6 2319 pxor %xmm9,%xmm5 2320 pxor %xmm8,%xmm4 2321 movdqa %xmm8,80(%rbp) 2322 movdqa %xmm7,%xmm8 2323 psrld $25,%xmm8 2324 pslld $32-25,%xmm7 2325 pxor %xmm8,%xmm7 2326 movdqa %xmm6,%xmm8 2327 psrld $25,%xmm8 2328 pslld $32-25,%xmm6 2329 pxor %xmm8,%xmm6 2330 movdqa %xmm5,%xmm8 2331 psrld $25,%xmm8 2332 pslld $32-25,%xmm5 2333 pxor %xmm8,%xmm5 2334 movdqa %xmm4,%xmm8 2335 psrld $25,%xmm8 2336 pslld $32-25,%xmm4 2337 pxor %xmm8,%xmm4 2338 movdqa 80(%rbp),%xmm8 2339 .byte 102,15,58,15,255,12 2340 .byte 102,69,15,58,15,219,8 2341 .byte 102,69,15,58,15,255,4 2342 .byte 102,15,58,15,246,12 2343 .byte 102,69,15,58,15,210,8 2344 .byte 102,69,15,58,15,246,4 2345 .byte 102,15,58,15,237,12 2346 .byte 102,69,15,58,15,201,8 2347 .byte 102,69,15,58,15,237,4 2348 .byte 102,15,58,15,228,12 2349 .byte 102,69,15,58,15,192,8 2350 .byte 102,69,15,58,15,228,4 2351 2352 decq %r10 2353 jnz 1b 2354 paddd .chacha20_consts(%rip),%xmm3 2355 paddd 48(%rbp),%xmm7 2356 paddd 64(%rbp),%xmm11 2357 paddd 144(%rbp),%xmm15 2358 paddd .chacha20_consts(%rip),%xmm2 2359 paddd 48(%rbp),%xmm6 2360 paddd 64(%rbp),%xmm10 2361 paddd 128(%rbp),%xmm14 2362 paddd .chacha20_consts(%rip),%xmm1 2363 paddd 48(%rbp),%xmm5 2364 paddd 64(%rbp),%xmm9 2365 paddd 112(%rbp),%xmm13 2366 paddd .chacha20_consts(%rip),%xmm0 2367 paddd 48(%rbp),%xmm4 2368 paddd 64(%rbp),%xmm8 2369 paddd 96(%rbp),%xmm12 2370 2371 2372 pand .clamp(%rip),%xmm3 2373 movdqa %xmm3,0(%rbp) 2374 movdqa %xmm7,16(%rbp) 2375 2376 movq %r8,%r8 2377 call poly_hash_ad_internal 2378 movdqu 0 + 0(%rsi),%xmm3 2379 movdqu 16 + 0(%rsi),%xmm7 2380 movdqu 32 + 0(%rsi),%xmm11 2381 movdqu 48 + 0(%rsi),%xmm15 2382 pxor %xmm3,%xmm2 2383 pxor %xmm7,%xmm6 2384 pxor %xmm11,%xmm10 2385 pxor %xmm14,%xmm15 2386 movdqu %xmm2,0 + 0(%rdi) 2387 movdqu %xmm6,16 + 0(%rdi) 2388 movdqu %xmm10,32 + 0(%rdi) 2389 movdqu %xmm15,48 + 0(%rdi) 2390 movdqu 0 + 64(%rsi),%xmm3 2391 movdqu 16 + 64(%rsi),%xmm7 2392 movdqu 32 + 64(%rsi),%xmm11 2393 movdqu 48 + 64(%rsi),%xmm15 2394 pxor %xmm3,%xmm1 2395 pxor %xmm7,%xmm5 2396 pxor %xmm11,%xmm9 2397 pxor %xmm13,%xmm15 2398 movdqu %xmm1,0 + 64(%rdi) 2399 movdqu %xmm5,16 + 64(%rdi) 2400 movdqu %xmm9,32 + 64(%rdi) 2401 movdqu %xmm15,48 + 64(%rdi) 2402 2403 cmpq $192,%rbx 2404 ja 1f 2405 movq $128,%rcx 2406 subq $128,%rbx 2407 leaq 128(%rsi),%rsi 2408 jmp seal_sse_128_seal_hash 2409 1: 2410 movdqu 0 + 128(%rsi),%xmm3 2411 movdqu 16 + 128(%rsi),%xmm7 2412 movdqu 32 + 128(%rsi),%xmm11 2413 movdqu 48 + 128(%rsi),%xmm15 2414 pxor %xmm3,%xmm0 2415 pxor %xmm7,%xmm4 2416 pxor %xmm11,%xmm8 2417 pxor %xmm12,%xmm15 2418 movdqu %xmm0,0 + 128(%rdi) 2419 movdqu %xmm4,16 + 128(%rdi) 2420 movdqu %xmm8,32 + 128(%rdi) 2421 movdqu %xmm15,48 + 128(%rdi) 2422 2423 movq $192,%rcx 2424 subq $192,%rbx 2425 leaq 192(%rsi),%rsi 2426 movq $2,%rcx 2427 movq $8,%r8 2428 cmpq $64,%rbx 2429 jbe seal_sse_tail_64 2430 cmpq $128,%rbx 2431 jbe seal_sse_tail_128 2432 cmpq $192,%rbx 2433 jbe seal_sse_tail_192 2434 2435 1: 2436 movdqa .chacha20_consts(%rip),%xmm0 2437 movdqa 48(%rbp),%xmm4 2438 movdqa 64(%rbp),%xmm8 2439 movdqa %xmm0,%xmm1 2440 movdqa %xmm4,%xmm5 2441 movdqa %xmm8,%xmm9 2442 movdqa %xmm0,%xmm2 2443 movdqa %xmm4,%xmm6 2444 movdqa %xmm8,%xmm10 2445 movdqa %xmm0,%xmm3 2446 movdqa %xmm4,%xmm7 2447 movdqa %xmm8,%xmm11 2448 movdqa 96(%rbp),%xmm15 2449 paddd .sse_inc(%rip),%xmm15 2450 movdqa %xmm15,%xmm14 2451 paddd .sse_inc(%rip),%xmm14 2452 movdqa %xmm14,%xmm13 2453 paddd .sse_inc(%rip),%xmm13 2454 movdqa %xmm13,%xmm12 2455 paddd .sse_inc(%rip),%xmm12 2456 movdqa %xmm12,96(%rbp) 2457 movdqa %xmm13,112(%rbp) 2458 movdqa %xmm14,128(%rbp) 2459 movdqa %xmm15,144(%rbp) 2460 2461 2: 2462 movdqa %xmm8,80(%rbp) 2463 movdqa .rol16(%rip),%xmm8 2464 paddd %xmm7,%xmm3 2465 paddd %xmm6,%xmm2 2466 paddd %xmm5,%xmm1 2467 paddd %xmm4,%xmm0 2468 pxor %xmm3,%xmm15 2469 pxor %xmm2,%xmm14 2470 pxor %xmm1,%xmm13 2471 pxor %xmm0,%xmm12 2472 .byte 102,69,15,56,0,248 2473 .byte 102,69,15,56,0,240 2474 .byte 102,69,15,56,0,232 2475 .byte 102,69,15,56,0,224 2476 movdqa 80(%rbp),%xmm8 2477 paddd %xmm15,%xmm11 2478 paddd %xmm14,%xmm10 2479 paddd %xmm13,%xmm9 2480 paddd %xmm12,%xmm8 2481 pxor %xmm11,%xmm7 2482 addq 0(%rdi),%r10 2483 adcq 8+0(%rdi),%r11 2484 adcq $1,%r12 2485 pxor %xmm10,%xmm6 2486 pxor %xmm9,%xmm5 2487 pxor %xmm8,%xmm4 2488 movdqa %xmm8,80(%rbp) 2489 movdqa %xmm7,%xmm8 2490 psrld $20,%xmm8 2491 pslld $32-20,%xmm7 2492 pxor %xmm8,%xmm7 2493 movdqa %xmm6,%xmm8 2494 psrld $20,%xmm8 2495 pslld $32-20,%xmm6 2496 pxor %xmm8,%xmm6 2497 movdqa %xmm5,%xmm8 2498 psrld $20,%xmm8 2499 pslld $32-20,%xmm5 2500 pxor %xmm8,%xmm5 2501 movdqa %xmm4,%xmm8 2502 psrld $20,%xmm8 2503 pslld $32-20,%xmm4 2504 pxor %xmm8,%xmm4 2505 movq 0+0(%rbp),%rax 2506 movq %rax,%r15 2507 mulq %r10 2508 movq %rax,%r13 2509 movq %rdx,%r14 2510 movq 0+0(%rbp),%rax 2511 mulq %r11 2512 imulq %r12,%r15 2513 addq %rax,%r14 2514 adcq %rdx,%r15 2515 movdqa .rol8(%rip),%xmm8 2516 paddd %xmm7,%xmm3 2517 paddd %xmm6,%xmm2 2518 paddd %xmm5,%xmm1 2519 paddd %xmm4,%xmm0 2520 pxor %xmm3,%xmm15 2521 pxor %xmm2,%xmm14 2522 pxor %xmm1,%xmm13 2523 pxor %xmm0,%xmm12 2524 .byte 102,69,15,56,0,248 2525 .byte 102,69,15,56,0,240 2526 .byte 102,69,15,56,0,232 2527 .byte 102,69,15,56,0,224 2528 movdqa 80(%rbp),%xmm8 2529 paddd %xmm15,%xmm11 2530 paddd %xmm14,%xmm10 2531 paddd %xmm13,%xmm9 2532 paddd %xmm12,%xmm8 2533 pxor %xmm11,%xmm7 2534 pxor %xmm10,%xmm6 2535 movq 8+0(%rbp),%rax 2536 movq %rax,%r9 2537 mulq %r10 2538 addq %rax,%r14 2539 adcq $0,%rdx 2540 movq %rdx,%r10 2541 movq 8+0(%rbp),%rax 2542 mulq %r11 2543 addq %rax,%r15 2544 adcq $0,%rdx 2545 pxor %xmm9,%xmm5 2546 pxor %xmm8,%xmm4 2547 movdqa %xmm8,80(%rbp) 2548 movdqa %xmm7,%xmm8 2549 psrld $25,%xmm8 2550 pslld $32-25,%xmm7 2551 pxor %xmm8,%xmm7 2552 movdqa %xmm6,%xmm8 2553 psrld $25,%xmm8 2554 pslld $32-25,%xmm6 2555 pxor %xmm8,%xmm6 2556 movdqa %xmm5,%xmm8 2557 psrld $25,%xmm8 2558 pslld $32-25,%xmm5 2559 pxor %xmm8,%xmm5 2560 movdqa %xmm4,%xmm8 2561 psrld $25,%xmm8 2562 pslld $32-25,%xmm4 2563 pxor %xmm8,%xmm4 2564 movdqa 80(%rbp),%xmm8 2565 imulq %r12,%r9 2566 addq %r10,%r15 2567 adcq %rdx,%r9 2568 .byte 102,15,58,15,255,4 2569 .byte 102,69,15,58,15,219,8 2570 .byte 102,69,15,58,15,255,12 2571 .byte 102,15,58,15,246,4 2572 .byte 102,69,15,58,15,210,8 2573 .byte 102,69,15,58,15,246,12 2574 .byte 102,15,58,15,237,4 2575 .byte 102,69,15,58,15,201,8 2576 .byte 102,69,15,58,15,237,12 2577 .byte 102,15,58,15,228,4 2578 .byte 102,69,15,58,15,192,8 2579 .byte 102,69,15,58,15,228,12 2580 movdqa %xmm8,80(%rbp) 2581 movdqa .rol16(%rip),%xmm8 2582 paddd %xmm7,%xmm3 2583 paddd %xmm6,%xmm2 2584 paddd %xmm5,%xmm1 2585 paddd %xmm4,%xmm0 2586 pxor %xmm3,%xmm15 2587 pxor %xmm2,%xmm14 2588 movq %r13,%r10 2589 movq %r14,%r11 2590 movq %r15,%r12 2591 andq $3,%r12 2592 movq %r15,%r13 2593 andq $-4,%r13 2594 movq %r9,%r14 2595 shrdq $2,%r9,%r15 2596 shrq $2,%r9 2597 addq %r13,%r10 2598 adcq %r14,%r11 2599 adcq $0,%r12 2600 addq %r15,%r10 2601 adcq %r9,%r11 2602 adcq $0,%r12 2603 pxor %xmm1,%xmm13 2604 pxor %xmm0,%xmm12 2605 .byte 102,69,15,56,0,248 2606 .byte 102,69,15,56,0,240 2607 .byte 102,69,15,56,0,232 2608 .byte 102,69,15,56,0,224 2609 movdqa 80(%rbp),%xmm8 2610 paddd %xmm15,%xmm11 2611 paddd %xmm14,%xmm10 2612 paddd %xmm13,%xmm9 2613 paddd %xmm12,%xmm8 2614 pxor %xmm11,%xmm7 2615 pxor %xmm10,%xmm6 2616 pxor %xmm9,%xmm5 2617 pxor %xmm8,%xmm4 2618 movdqa %xmm8,80(%rbp) 2619 movdqa %xmm7,%xmm8 2620 psrld $20,%xmm8 2621 pslld $32-20,%xmm7 2622 pxor %xmm8,%xmm7 2623 movdqa %xmm6,%xmm8 2624 psrld $20,%xmm8 2625 pslld $32-20,%xmm6 2626 pxor %xmm8,%xmm6 2627 movdqa %xmm5,%xmm8 2628 psrld $20,%xmm8 2629 pslld $32-20,%xmm5 2630 pxor %xmm8,%xmm5 2631 movdqa %xmm4,%xmm8 2632 psrld $20,%xmm8 2633 pslld $32-20,%xmm4 2634 pxor %xmm8,%xmm4 2635 movdqa .rol8(%rip),%xmm8 2636 paddd %xmm7,%xmm3 2637 paddd %xmm6,%xmm2 2638 paddd %xmm5,%xmm1 2639 paddd %xmm4,%xmm0 2640 pxor %xmm3,%xmm15 2641 pxor %xmm2,%xmm14 2642 pxor %xmm1,%xmm13 2643 pxor %xmm0,%xmm12 2644 .byte 102,69,15,56,0,248 2645 .byte 102,69,15,56,0,240 2646 .byte 102,69,15,56,0,232 2647 .byte 102,69,15,56,0,224 2648 movdqa 80(%rbp),%xmm8 2649 paddd %xmm15,%xmm11 2650 paddd %xmm14,%xmm10 2651 paddd %xmm13,%xmm9 2652 paddd %xmm12,%xmm8 2653 pxor %xmm11,%xmm7 2654 pxor %xmm10,%xmm6 2655 pxor %xmm9,%xmm5 2656 pxor %xmm8,%xmm4 2657 movdqa %xmm8,80(%rbp) 2658 movdqa %xmm7,%xmm8 2659 psrld $25,%xmm8 2660 pslld $32-25,%xmm7 2661 pxor %xmm8,%xmm7 2662 movdqa %xmm6,%xmm8 2663 psrld $25,%xmm8 2664 pslld $32-25,%xmm6 2665 pxor %xmm8,%xmm6 2666 movdqa %xmm5,%xmm8 2667 psrld $25,%xmm8 2668 pslld $32-25,%xmm5 2669 pxor %xmm8,%xmm5 2670 movdqa %xmm4,%xmm8 2671 psrld $25,%xmm8 2672 pslld $32-25,%xmm4 2673 pxor %xmm8,%xmm4 2674 movdqa 80(%rbp),%xmm8 2675 .byte 102,15,58,15,255,12 2676 .byte 102,69,15,58,15,219,8 2677 .byte 102,69,15,58,15,255,4 2678 .byte 102,15,58,15,246,12 2679 .byte 102,69,15,58,15,210,8 2680 .byte 102,69,15,58,15,246,4 2681 .byte 102,15,58,15,237,12 2682 .byte 102,69,15,58,15,201,8 2683 .byte 102,69,15,58,15,237,4 2684 .byte 102,15,58,15,228,12 2685 .byte 102,69,15,58,15,192,8 2686 .byte 102,69,15,58,15,228,4 2687 2688 leaq 16(%rdi),%rdi 2689 decq %r8 2690 jge 2b 2691 addq 0(%rdi),%r10 2692 adcq 8+0(%rdi),%r11 2693 adcq $1,%r12 2694 movq 0+0(%rbp),%rax 2695 movq %rax,%r15 2696 mulq %r10 2697 movq %rax,%r13 2698 movq %rdx,%r14 2699 movq 0+0(%rbp),%rax 2700 mulq %r11 2701 imulq %r12,%r15 2702 addq %rax,%r14 2703 adcq %rdx,%r15 2704 movq 8+0(%rbp),%rax 2705 movq %rax,%r9 2706 mulq %r10 2707 addq %rax,%r14 2708 adcq $0,%rdx 2709 movq %rdx,%r10 2710 movq 8+0(%rbp),%rax 2711 mulq %r11 2712 addq %rax,%r15 2713 adcq $0,%rdx 2714 imulq %r12,%r9 2715 addq %r10,%r15 2716 adcq %rdx,%r9 2717 movq %r13,%r10 2718 movq %r14,%r11 2719 movq %r15,%r12 2720 andq $3,%r12 2721 movq %r15,%r13 2722 andq $-4,%r13 2723 movq %r9,%r14 2724 shrdq $2,%r9,%r15 2725 shrq $2,%r9 2726 addq %r13,%r10 2727 adcq %r14,%r11 2728 adcq $0,%r12 2729 addq %r15,%r10 2730 adcq %r9,%r11 2731 adcq $0,%r12 2732 2733 leaq 16(%rdi),%rdi 2734 decq %rcx 2735 jg 2b 2736 paddd .chacha20_consts(%rip),%xmm3 2737 paddd 48(%rbp),%xmm7 2738 paddd 64(%rbp),%xmm11 2739 paddd 144(%rbp),%xmm15 2740 paddd .chacha20_consts(%rip),%xmm2 2741 paddd 48(%rbp),%xmm6 2742 paddd 64(%rbp),%xmm10 2743 paddd 128(%rbp),%xmm14 2744 paddd .chacha20_consts(%rip),%xmm1 2745 paddd 48(%rbp),%xmm5 2746 paddd 64(%rbp),%xmm9 2747 paddd 112(%rbp),%xmm13 2748 paddd .chacha20_consts(%rip),%xmm0 2749 paddd 48(%rbp),%xmm4 2750 paddd 64(%rbp),%xmm8 2751 paddd 96(%rbp),%xmm12 2752 2753 movdqa %xmm14,80(%rbp) 2754 movdqa %xmm14,80(%rbp) 2755 movdqu 0 + 0(%rsi),%xmm14 2756 pxor %xmm3,%xmm14 2757 movdqu %xmm14,0 + 0(%rdi) 2758 movdqu 16 + 0(%rsi),%xmm14 2759 pxor %xmm7,%xmm14 2760 movdqu %xmm14,16 + 0(%rdi) 2761 movdqu 32 + 0(%rsi),%xmm14 2762 pxor %xmm11,%xmm14 2763 movdqu %xmm14,32 + 0(%rdi) 2764 movdqu 48 + 0(%rsi),%xmm14 2765 pxor %xmm15,%xmm14 2766 movdqu %xmm14,48 + 0(%rdi) 2767 2768 movdqa 80(%rbp),%xmm14 2769 movdqu 0 + 64(%rsi),%xmm3 2770 movdqu 16 + 64(%rsi),%xmm7 2771 movdqu 32 + 64(%rsi),%xmm11 2772 movdqu 48 + 64(%rsi),%xmm15 2773 pxor %xmm3,%xmm2 2774 pxor %xmm7,%xmm6 2775 pxor %xmm11,%xmm10 2776 pxor %xmm14,%xmm15 2777 movdqu %xmm2,0 + 64(%rdi) 2778 movdqu %xmm6,16 + 64(%rdi) 2779 movdqu %xmm10,32 + 64(%rdi) 2780 movdqu %xmm15,48 + 64(%rdi) 2781 movdqu 0 + 128(%rsi),%xmm3 2782 movdqu 16 + 128(%rsi),%xmm7 2783 movdqu 32 + 128(%rsi),%xmm11 2784 movdqu 48 + 128(%rsi),%xmm15 2785 pxor %xmm3,%xmm1 2786 pxor %xmm7,%xmm5 2787 pxor %xmm11,%xmm9 2788 pxor %xmm13,%xmm15 2789 movdqu %xmm1,0 + 128(%rdi) 2790 movdqu %xmm5,16 + 128(%rdi) 2791 movdqu %xmm9,32 + 128(%rdi) 2792 movdqu %xmm15,48 + 128(%rdi) 2793 2794 cmpq $256,%rbx 2795 ja 3f 2796 2797 movq $192,%rcx 2798 subq $192,%rbx 2799 leaq 192(%rsi),%rsi 2800 jmp seal_sse_128_seal_hash 2801 3: 2802 movdqu 0 + 192(%rsi),%xmm3 2803 movdqu 16 + 192(%rsi),%xmm7 2804 movdqu 32 + 192(%rsi),%xmm11 2805 movdqu 48 + 192(%rsi),%xmm15 2806 pxor %xmm3,%xmm0 2807 pxor %xmm7,%xmm4 2808 pxor %xmm11,%xmm8 2809 pxor %xmm12,%xmm15 2810 movdqu %xmm0,0 + 192(%rdi) 2811 movdqu %xmm4,16 + 192(%rdi) 2812 movdqu %xmm8,32 + 192(%rdi) 2813 movdqu %xmm15,48 + 192(%rdi) 2814 2815 leaq 256(%rsi),%rsi 2816 subq $256,%rbx 2817 movq $6,%rcx 2818 movq $4,%r8 2819 cmpq $192,%rbx 2820 jg 1b 2821 movq %rbx,%rcx 2822 testq %rbx,%rbx 2823 je seal_sse_128_seal_hash 2824 movq $6,%rcx 2825 cmpq $64,%rbx 2826 jg 3f 2827 2828 seal_sse_tail_64: 2829 movdqa .chacha20_consts(%rip),%xmm0 2830 movdqa 48(%rbp),%xmm4 2831 movdqa 64(%rbp),%xmm8 2832 movdqa 96(%rbp),%xmm12 2833 paddd .sse_inc(%rip),%xmm12 2834 movdqa %xmm12,96(%rbp) 2835 2836 1: 2837 addq 0(%rdi),%r10 2838 adcq 8+0(%rdi),%r11 2839 adcq $1,%r12 2840 movq 0+0(%rbp),%rax 2841 movq %rax,%r15 2842 mulq %r10 2843 movq %rax,%r13 2844 movq %rdx,%r14 2845 movq 0+0(%rbp),%rax 2846 mulq %r11 2847 imulq %r12,%r15 2848 addq %rax,%r14 2849 adcq %rdx,%r15 2850 movq 8+0(%rbp),%rax 2851 movq %rax,%r9 2852 mulq %r10 2853 addq %rax,%r14 2854 adcq $0,%rdx 2855 movq %rdx,%r10 2856 movq 8+0(%rbp),%rax 2857 mulq %r11 2858 addq %rax,%r15 2859 adcq $0,%rdx 2860 imulq %r12,%r9 2861 addq %r10,%r15 2862 adcq %rdx,%r9 2863 movq %r13,%r10 2864 movq %r14,%r11 2865 movq %r15,%r12 2866 andq $3,%r12 2867 movq %r15,%r13 2868 andq $-4,%r13 2869 movq %r9,%r14 2870 shrdq $2,%r9,%r15 2871 shrq $2,%r9 2872 addq %r13,%r10 2873 adcq %r14,%r11 2874 adcq $0,%r12 2875 addq %r15,%r10 2876 adcq %r9,%r11 2877 adcq $0,%r12 2878 2879 leaq 16(%rdi),%rdi 2880 2: 2881 paddd %xmm4,%xmm0 2882 pxor %xmm0,%xmm12 2883 pshufb .rol16(%rip),%xmm12 2884 paddd %xmm12,%xmm8 2885 pxor %xmm8,%xmm4 2886 movdqa %xmm4,%xmm3 2887 pslld $12,%xmm3 2888 psrld $20,%xmm4 2889 pxor %xmm3,%xmm4 2890 paddd %xmm4,%xmm0 2891 pxor %xmm0,%xmm12 2892 pshufb .rol8(%rip),%xmm12 2893 paddd %xmm12,%xmm8 2894 pxor %xmm8,%xmm4 2895 movdqa %xmm4,%xmm3 2896 pslld $7,%xmm3 2897 psrld $25,%xmm4 2898 pxor %xmm3,%xmm4 2899 .byte 102,15,58,15,228,4 2900 .byte 102,69,15,58,15,192,8 2901 .byte 102,69,15,58,15,228,12 2902 paddd %xmm4,%xmm0 2903 pxor %xmm0,%xmm12 2904 pshufb .rol16(%rip),%xmm12 2905 paddd %xmm12,%xmm8 2906 pxor %xmm8,%xmm4 2907 movdqa %xmm4,%xmm3 2908 pslld $12,%xmm3 2909 psrld $20,%xmm4 2910 pxor %xmm3,%xmm4 2911 paddd %xmm4,%xmm0 2912 pxor %xmm0,%xmm12 2913 pshufb .rol8(%rip),%xmm12 2914 paddd %xmm12,%xmm8 2915 pxor %xmm8,%xmm4 2916 movdqa %xmm4,%xmm3 2917 pslld $7,%xmm3 2918 psrld $25,%xmm4 2919 pxor %xmm3,%xmm4 2920 .byte 102,15,58,15,228,12 2921 .byte 102,69,15,58,15,192,8 2922 .byte 102,69,15,58,15,228,4 2923 addq 0(%rdi),%r10 2924 adcq 8+0(%rdi),%r11 2925 adcq $1,%r12 2926 movq 0+0(%rbp),%rax 2927 movq %rax,%r15 2928 mulq %r10 2929 movq %rax,%r13 2930 movq %rdx,%r14 2931 movq 0+0(%rbp),%rax 2932 mulq %r11 2933 imulq %r12,%r15 2934 addq %rax,%r14 2935 adcq %rdx,%r15 2936 movq 8+0(%rbp),%rax 2937 movq %rax,%r9 2938 mulq %r10 2939 addq %rax,%r14 2940 adcq $0,%rdx 2941 movq %rdx,%r10 2942 movq 8+0(%rbp),%rax 2943 mulq %r11 2944 addq %rax,%r15 2945 adcq $0,%rdx 2946 imulq %r12,%r9 2947 addq %r10,%r15 2948 adcq %rdx,%r9 2949 movq %r13,%r10 2950 movq %r14,%r11 2951 movq %r15,%r12 2952 andq $3,%r12 2953 movq %r15,%r13 2954 andq $-4,%r13 2955 movq %r9,%r14 2956 shrdq $2,%r9,%r15 2957 shrq $2,%r9 2958 addq %r13,%r10 2959 adcq %r14,%r11 2960 adcq $0,%r12 2961 addq %r15,%r10 2962 adcq %r9,%r11 2963 adcq $0,%r12 2964 2965 leaq 16(%rdi),%rdi 2966 decq %rcx 2967 jg 1b 2968 decq %r8 2969 jge 2b 2970 paddd .chacha20_consts(%rip),%xmm0 2971 paddd 48(%rbp),%xmm4 2972 paddd 64(%rbp),%xmm8 2973 paddd 96(%rbp),%xmm12 2974 2975 jmp seal_sse_128_seal 2976 3: 2977 cmpq $128,%rbx 2978 jg 3f 2979 2980 seal_sse_tail_128: 2981 movdqa .chacha20_consts(%rip),%xmm0 2982 movdqa 48(%rbp),%xmm4 2983 movdqa 64(%rbp),%xmm8 2984 movdqa %xmm0,%xmm1 2985 movdqa %xmm4,%xmm5 2986 movdqa %xmm8,%xmm9 2987 movdqa 96(%rbp),%xmm13 2988 paddd .sse_inc(%rip),%xmm13 2989 movdqa %xmm13,%xmm12 2990 paddd .sse_inc(%rip),%xmm12 2991 movdqa %xmm12,96(%rbp) 2992 movdqa %xmm13,112(%rbp) 2993 2994 1: 2995 addq 0(%rdi),%r10 2996 adcq 8+0(%rdi),%r11 2997 adcq $1,%r12 2998 movq 0+0(%rbp),%rax 2999 movq %rax,%r15 3000 mulq %r10 3001 movq %rax,%r13 3002 movq %rdx,%r14 3003 movq 0+0(%rbp),%rax 3004 mulq %r11 3005 imulq %r12,%r15 3006 addq %rax,%r14 3007 adcq %rdx,%r15 3008 movq 8+0(%rbp),%rax 3009 movq %rax,%r9 3010 mulq %r10 3011 addq %rax,%r14 3012 adcq $0,%rdx 3013 movq %rdx,%r10 3014 movq 8+0(%rbp),%rax 3015 mulq %r11 3016 addq %rax,%r15 3017 adcq $0,%rdx 3018 imulq %r12,%r9 3019 addq %r10,%r15 3020 adcq %rdx,%r9 3021 movq %r13,%r10 3022 movq %r14,%r11 3023 movq %r15,%r12 3024 andq $3,%r12 3025 movq %r15,%r13 3026 andq $-4,%r13 3027 movq %r9,%r14 3028 shrdq $2,%r9,%r15 3029 shrq $2,%r9 3030 addq %r13,%r10 3031 adcq %r14,%r11 3032 adcq $0,%r12 3033 addq %r15,%r10 3034 adcq %r9,%r11 3035 adcq $0,%r12 3036 3037 leaq 16(%rdi),%rdi 3038 2: 3039 paddd %xmm4,%xmm0 3040 pxor %xmm0,%xmm12 3041 pshufb .rol16(%rip),%xmm12 3042 paddd %xmm12,%xmm8 3043 pxor %xmm8,%xmm4 3044 movdqa %xmm4,%xmm3 3045 pslld $12,%xmm3 3046 psrld $20,%xmm4 3047 pxor %xmm3,%xmm4 3048 paddd %xmm4,%xmm0 3049 pxor %xmm0,%xmm12 3050 pshufb .rol8(%rip),%xmm12 3051 paddd %xmm12,%xmm8 3052 pxor %xmm8,%xmm4 3053 movdqa %xmm4,%xmm3 3054 pslld $7,%xmm3 3055 psrld $25,%xmm4 3056 pxor %xmm3,%xmm4 3057 .byte 102,15,58,15,228,4 3058 .byte 102,69,15,58,15,192,8 3059 .byte 102,69,15,58,15,228,12 3060 paddd %xmm5,%xmm1 3061 pxor %xmm1,%xmm13 3062 pshufb .rol16(%rip),%xmm13 3063 paddd %xmm13,%xmm9 3064 pxor %xmm9,%xmm5 3065 movdqa %xmm5,%xmm3 3066 pslld $12,%xmm3 3067 psrld $20,%xmm5 3068 pxor %xmm3,%xmm5 3069 paddd %xmm5,%xmm1 3070 pxor %xmm1,%xmm13 3071 pshufb .rol8(%rip),%xmm13 3072 paddd %xmm13,%xmm9 3073 pxor %xmm9,%xmm5 3074 movdqa %xmm5,%xmm3 3075 pslld $7,%xmm3 3076 psrld $25,%xmm5 3077 pxor %xmm3,%xmm5 3078 .byte 102,15,58,15,237,4 3079 .byte 102,69,15,58,15,201,8 3080 .byte 102,69,15,58,15,237,12 3081 addq 0(%rdi),%r10 3082 adcq 8+0(%rdi),%r11 3083 adcq $1,%r12 3084 movq 0+0(%rbp),%rax 3085 movq %rax,%r15 3086 mulq %r10 3087 movq %rax,%r13 3088 movq %rdx,%r14 3089 movq 0+0(%rbp),%rax 3090 mulq %r11 3091 imulq %r12,%r15 3092 addq %rax,%r14 3093 adcq %rdx,%r15 3094 movq 8+0(%rbp),%rax 3095 movq %rax,%r9 3096 mulq %r10 3097 addq %rax,%r14 3098 adcq $0,%rdx 3099 movq %rdx,%r10 3100 movq 8+0(%rbp),%rax 3101 mulq %r11 3102 addq %rax,%r15 3103 adcq $0,%rdx 3104 imulq %r12,%r9 3105 addq %r10,%r15 3106 adcq %rdx,%r9 3107 movq %r13,%r10 3108 movq %r14,%r11 3109 movq %r15,%r12 3110 andq $3,%r12 3111 movq %r15,%r13 3112 andq $-4,%r13 3113 movq %r9,%r14 3114 shrdq $2,%r9,%r15 3115 shrq $2,%r9 3116 addq %r13,%r10 3117 adcq %r14,%r11 3118 adcq $0,%r12 3119 addq %r15,%r10 3120 adcq %r9,%r11 3121 adcq $0,%r12 3122 paddd %xmm4,%xmm0 3123 pxor %xmm0,%xmm12 3124 pshufb .rol16(%rip),%xmm12 3125 paddd %xmm12,%xmm8 3126 pxor %xmm8,%xmm4 3127 movdqa %xmm4,%xmm3 3128 pslld $12,%xmm3 3129 psrld $20,%xmm4 3130 pxor %xmm3,%xmm4 3131 paddd %xmm4,%xmm0 3132 pxor %xmm0,%xmm12 3133 pshufb .rol8(%rip),%xmm12 3134 paddd %xmm12,%xmm8 3135 pxor %xmm8,%xmm4 3136 movdqa %xmm4,%xmm3 3137 pslld $7,%xmm3 3138 psrld $25,%xmm4 3139 pxor %xmm3,%xmm4 3140 .byte 102,15,58,15,228,12 3141 .byte 102,69,15,58,15,192,8 3142 .byte 102,69,15,58,15,228,4 3143 paddd %xmm5,%xmm1 3144 pxor %xmm1,%xmm13 3145 pshufb .rol16(%rip),%xmm13 3146 paddd %xmm13,%xmm9 3147 pxor %xmm9,%xmm5 3148 movdqa %xmm5,%xmm3 3149 pslld $12,%xmm3 3150 psrld $20,%xmm5 3151 pxor %xmm3,%xmm5 3152 paddd %xmm5,%xmm1 3153 pxor %xmm1,%xmm13 3154 pshufb .rol8(%rip),%xmm13 3155 paddd %xmm13,%xmm9 3156 pxor %xmm9,%xmm5 3157 movdqa %xmm5,%xmm3 3158 pslld $7,%xmm3 3159 psrld $25,%xmm5 3160 pxor %xmm3,%xmm5 3161 .byte 102,15,58,15,237,12 3162 .byte 102,69,15,58,15,201,8 3163 .byte 102,69,15,58,15,237,4 3164 3165 leaq 16(%rdi),%rdi 3166 decq %rcx 3167 jg 1b 3168 decq %r8 3169 jge 2b 3170 paddd .chacha20_consts(%rip),%xmm1 3171 paddd 48(%rbp),%xmm5 3172 paddd 64(%rbp),%xmm9 3173 paddd 112(%rbp),%xmm13 3174 paddd .chacha20_consts(%rip),%xmm0 3175 paddd 48(%rbp),%xmm4 3176 paddd 64(%rbp),%xmm8 3177 paddd 96(%rbp),%xmm12 3178 movdqu 0 + 0(%rsi),%xmm3 3179 movdqu 16 + 0(%rsi),%xmm7 3180 movdqu 32 + 0(%rsi),%xmm11 3181 movdqu 48 + 0(%rsi),%xmm15 3182 pxor %xmm3,%xmm1 3183 pxor %xmm7,%xmm5 3184 pxor %xmm11,%xmm9 3185 pxor %xmm13,%xmm15 3186 movdqu %xmm1,0 + 0(%rdi) 3187 movdqu %xmm5,16 + 0(%rdi) 3188 movdqu %xmm9,32 + 0(%rdi) 3189 movdqu %xmm15,48 + 0(%rdi) 3190 3191 movq $64,%rcx 3192 subq $64,%rbx 3193 leaq 64(%rsi),%rsi 3194 jmp seal_sse_128_seal_hash 3195 3: 3196 3197 seal_sse_tail_192: 3198 movdqa .chacha20_consts(%rip),%xmm0 3199 movdqa 48(%rbp),%xmm4 3200 movdqa 64(%rbp),%xmm8 3201 movdqa %xmm0,%xmm1 3202 movdqa %xmm4,%xmm5 3203 movdqa %xmm8,%xmm9 3204 movdqa %xmm0,%xmm2 3205 movdqa %xmm4,%xmm6 3206 movdqa %xmm8,%xmm10 3207 movdqa 96(%rbp),%xmm14 3208 paddd .sse_inc(%rip),%xmm14 3209 movdqa %xmm14,%xmm13 3210 paddd .sse_inc(%rip),%xmm13 3211 movdqa %xmm13,%xmm12 3212 paddd .sse_inc(%rip),%xmm12 3213 movdqa %xmm12,96(%rbp) 3214 movdqa %xmm13,112(%rbp) 3215 movdqa %xmm14,128(%rbp) 3216 3217 1: 3218 addq 0(%rdi),%r10 3219 adcq 8+0(%rdi),%r11 3220 adcq $1,%r12 3221 movq 0+0(%rbp),%rax 3222 movq %rax,%r15 3223 mulq %r10 3224 movq %rax,%r13 3225 movq %rdx,%r14 3226 movq 0+0(%rbp),%rax 3227 mulq %r11 3228 imulq %r12,%r15 3229 addq %rax,%r14 3230 adcq %rdx,%r15 3231 movq 8+0(%rbp),%rax 3232 movq %rax,%r9 3233 mulq %r10 3234 addq %rax,%r14 3235 adcq $0,%rdx 3236 movq %rdx,%r10 3237 movq 8+0(%rbp),%rax 3238 mulq %r11 3239 addq %rax,%r15 3240 adcq $0,%rdx 3241 imulq %r12,%r9 3242 addq %r10,%r15 3243 adcq %rdx,%r9 3244 movq %r13,%r10 3245 movq %r14,%r11 3246 movq %r15,%r12 3247 andq $3,%r12 3248 movq %r15,%r13 3249 andq $-4,%r13 3250 movq %r9,%r14 3251 shrdq $2,%r9,%r15 3252 shrq $2,%r9 3253 addq %r13,%r10 3254 adcq %r14,%r11 3255 adcq $0,%r12 3256 addq %r15,%r10 3257 adcq %r9,%r11 3258 adcq $0,%r12 3259 3260 leaq 16(%rdi),%rdi 3261 2: 3262 paddd %xmm4,%xmm0 3263 pxor %xmm0,%xmm12 3264 pshufb .rol16(%rip),%xmm12 3265 paddd %xmm12,%xmm8 3266 pxor %xmm8,%xmm4 3267 movdqa %xmm4,%xmm3 3268 pslld $12,%xmm3 3269 psrld $20,%xmm4 3270 pxor %xmm3,%xmm4 3271 paddd %xmm4,%xmm0 3272 pxor %xmm0,%xmm12 3273 pshufb .rol8(%rip),%xmm12 3274 paddd %xmm12,%xmm8 3275 pxor %xmm8,%xmm4 3276 movdqa %xmm4,%xmm3 3277 pslld $7,%xmm3 3278 psrld $25,%xmm4 3279 pxor %xmm3,%xmm4 3280 .byte 102,15,58,15,228,4 3281 .byte 102,69,15,58,15,192,8 3282 .byte 102,69,15,58,15,228,12 3283 paddd %xmm5,%xmm1 3284 pxor %xmm1,%xmm13 3285 pshufb .rol16(%rip),%xmm13 3286 paddd %xmm13,%xmm9 3287 pxor %xmm9,%xmm5 3288 movdqa %xmm5,%xmm3 3289 pslld $12,%xmm3 3290 psrld $20,%xmm5 3291 pxor %xmm3,%xmm5 3292 paddd %xmm5,%xmm1 3293 pxor %xmm1,%xmm13 3294 pshufb .rol8(%rip),%xmm13 3295 paddd %xmm13,%xmm9 3296 pxor %xmm9,%xmm5 3297 movdqa %xmm5,%xmm3 3298 pslld $7,%xmm3 3299 psrld $25,%xmm5 3300 pxor %xmm3,%xmm5 3301 .byte 102,15,58,15,237,4 3302 .byte 102,69,15,58,15,201,8 3303 .byte 102,69,15,58,15,237,12 3304 paddd %xmm6,%xmm2 3305 pxor %xmm2,%xmm14 3306 pshufb .rol16(%rip),%xmm14 3307 paddd %xmm14,%xmm10 3308 pxor %xmm10,%xmm6 3309 movdqa %xmm6,%xmm3 3310 pslld $12,%xmm3 3311 psrld $20,%xmm6 3312 pxor %xmm3,%xmm6 3313 paddd %xmm6,%xmm2 3314 pxor %xmm2,%xmm14 3315 pshufb .rol8(%rip),%xmm14 3316 paddd %xmm14,%xmm10 3317 pxor %xmm10,%xmm6 3318 movdqa %xmm6,%xmm3 3319 pslld $7,%xmm3 3320 psrld $25,%xmm6 3321 pxor %xmm3,%xmm6 3322 .byte 102,15,58,15,246,4 3323 .byte 102,69,15,58,15,210,8 3324 .byte 102,69,15,58,15,246,12 3325 addq 0(%rdi),%r10 3326 adcq 8+0(%rdi),%r11 3327 adcq $1,%r12 3328 movq 0+0(%rbp),%rax 3329 movq %rax,%r15 3330 mulq %r10 3331 movq %rax,%r13 3332 movq %rdx,%r14 3333 movq 0+0(%rbp),%rax 3334 mulq %r11 3335 imulq %r12,%r15 3336 addq %rax,%r14 3337 adcq %rdx,%r15 3338 movq 8+0(%rbp),%rax 3339 movq %rax,%r9 3340 mulq %r10 3341 addq %rax,%r14 3342 adcq $0,%rdx 3343 movq %rdx,%r10 3344 movq 8+0(%rbp),%rax 3345 mulq %r11 3346 addq %rax,%r15 3347 adcq $0,%rdx 3348 imulq %r12,%r9 3349 addq %r10,%r15 3350 adcq %rdx,%r9 3351 movq %r13,%r10 3352 movq %r14,%r11 3353 movq %r15,%r12 3354 andq $3,%r12 3355 movq %r15,%r13 3356 andq $-4,%r13 3357 movq %r9,%r14 3358 shrdq $2,%r9,%r15 3359 shrq $2,%r9 3360 addq %r13,%r10 3361 adcq %r14,%r11 3362 adcq $0,%r12 3363 addq %r15,%r10 3364 adcq %r9,%r11 3365 adcq $0,%r12 3366 paddd %xmm4,%xmm0 3367 pxor %xmm0,%xmm12 3368 pshufb .rol16(%rip),%xmm12 3369 paddd %xmm12,%xmm8 3370 pxor %xmm8,%xmm4 3371 movdqa %xmm4,%xmm3 3372 pslld $12,%xmm3 3373 psrld $20,%xmm4 3374 pxor %xmm3,%xmm4 3375 paddd %xmm4,%xmm0 3376 pxor %xmm0,%xmm12 3377 pshufb .rol8(%rip),%xmm12 3378 paddd %xmm12,%xmm8 3379 pxor %xmm8,%xmm4 3380 movdqa %xmm4,%xmm3 3381 pslld $7,%xmm3 3382 psrld $25,%xmm4 3383 pxor %xmm3,%xmm4 3384 .byte 102,15,58,15,228,12 3385 .byte 102,69,15,58,15,192,8 3386 .byte 102,69,15,58,15,228,4 3387 paddd %xmm5,%xmm1 3388 pxor %xmm1,%xmm13 3389 pshufb .rol16(%rip),%xmm13 3390 paddd %xmm13,%xmm9 3391 pxor %xmm9,%xmm5 3392 movdqa %xmm5,%xmm3 3393 pslld $12,%xmm3 3394 psrld $20,%xmm5 3395 pxor %xmm3,%xmm5 3396 paddd %xmm5,%xmm1 3397 pxor %xmm1,%xmm13 3398 pshufb .rol8(%rip),%xmm13 3399 paddd %xmm13,%xmm9 3400 pxor %xmm9,%xmm5 3401 movdqa %xmm5,%xmm3 3402 pslld $7,%xmm3 3403 psrld $25,%xmm5 3404 pxor %xmm3,%xmm5 3405 .byte 102,15,58,15,237,12 3406 .byte 102,69,15,58,15,201,8 3407 .byte 102,69,15,58,15,237,4 3408 paddd %xmm6,%xmm2 3409 pxor %xmm2,%xmm14 3410 pshufb .rol16(%rip),%xmm14 3411 paddd %xmm14,%xmm10 3412 pxor %xmm10,%xmm6 3413 movdqa %xmm6,%xmm3 3414 pslld $12,%xmm3 3415 psrld $20,%xmm6 3416 pxor %xmm3,%xmm6 3417 paddd %xmm6,%xmm2 3418 pxor %xmm2,%xmm14 3419 pshufb .rol8(%rip),%xmm14 3420 paddd %xmm14,%xmm10 3421 pxor %xmm10,%xmm6 3422 movdqa %xmm6,%xmm3 3423 pslld $7,%xmm3 3424 psrld $25,%xmm6 3425 pxor %xmm3,%xmm6 3426 .byte 102,15,58,15,246,12 3427 .byte 102,69,15,58,15,210,8 3428 .byte 102,69,15,58,15,246,4 3429 3430 leaq 16(%rdi),%rdi 3431 decq %rcx 3432 jg 1b 3433 decq %r8 3434 jge 2b 3435 paddd .chacha20_consts(%rip),%xmm2 3436 paddd 48(%rbp),%xmm6 3437 paddd 64(%rbp),%xmm10 3438 paddd 128(%rbp),%xmm14 3439 paddd .chacha20_consts(%rip),%xmm1 3440 paddd 48(%rbp),%xmm5 3441 paddd 64(%rbp),%xmm9 3442 paddd 112(%rbp),%xmm13 3443 paddd .chacha20_consts(%rip),%xmm0 3444 paddd 48(%rbp),%xmm4 3445 paddd 64(%rbp),%xmm8 3446 paddd 96(%rbp),%xmm12 3447 movdqu 0 + 0(%rsi),%xmm3 3448 movdqu 16 + 0(%rsi),%xmm7 3449 movdqu 32 + 0(%rsi),%xmm11 3450 movdqu 48 + 0(%rsi),%xmm15 3451 pxor %xmm3,%xmm2 3452 pxor %xmm7,%xmm6 3453 pxor %xmm11,%xmm10 3454 pxor %xmm14,%xmm15 3455 movdqu %xmm2,0 + 0(%rdi) 3456 movdqu %xmm6,16 + 0(%rdi) 3457 movdqu %xmm10,32 + 0(%rdi) 3458 movdqu %xmm15,48 + 0(%rdi) 3459 movdqu 0 + 64(%rsi),%xmm3 3460 movdqu 16 + 64(%rsi),%xmm7 3461 movdqu 32 + 64(%rsi),%xmm11 3462 movdqu 48 + 64(%rsi),%xmm15 3463 pxor %xmm3,%xmm1 3464 pxor %xmm7,%xmm5 3465 pxor %xmm11,%xmm9 3466 pxor %xmm13,%xmm15 3467 movdqu %xmm1,0 + 64(%rdi) 3468 movdqu %xmm5,16 + 64(%rdi) 3469 movdqu %xmm9,32 + 64(%rdi) 3470 movdqu %xmm15,48 + 64(%rdi) 3471 3472 movq $128,%rcx 3473 subq $128,%rbx 3474 leaq 128(%rsi),%rsi 3475 3476 seal_sse_128_seal_hash: 3477 cmpq $16,%rcx 3478 jb seal_sse_128_seal 3479 addq 0(%rdi),%r10 3480 adcq 8+0(%rdi),%r11 3481 adcq $1,%r12 3482 movq 0+0(%rbp),%rax 3483 movq %rax,%r15 3484 mulq %r10 3485 movq %rax,%r13 3486 movq %rdx,%r14 3487 movq 0+0(%rbp),%rax 3488 mulq %r11 3489 imulq %r12,%r15 3490 addq %rax,%r14 3491 adcq %rdx,%r15 3492 movq 8+0(%rbp),%rax 3493 movq %rax,%r9 3494 mulq %r10 3495 addq %rax,%r14 3496 adcq $0,%rdx 3497 movq %rdx,%r10 3498 movq 8+0(%rbp),%rax 3499 mulq %r11 3500 addq %rax,%r15 3501 adcq $0,%rdx 3502 imulq %r12,%r9 3503 addq %r10,%r15 3504 adcq %rdx,%r9 3505 movq %r13,%r10 3506 movq %r14,%r11 3507 movq %r15,%r12 3508 andq $3,%r12 3509 movq %r15,%r13 3510 andq $-4,%r13 3511 movq %r9,%r14 3512 shrdq $2,%r9,%r15 3513 shrq $2,%r9 3514 addq %r13,%r10 3515 adcq %r14,%r11 3516 adcq $0,%r12 3517 addq %r15,%r10 3518 adcq %r9,%r11 3519 adcq $0,%r12 3520 3521 subq $16,%rcx 3522 leaq 16(%rdi),%rdi 3523 jmp seal_sse_128_seal_hash 3524 3525 seal_sse_128_seal: 3526 cmpq $16,%rbx 3527 jb seal_sse_tail_16 3528 subq $16,%rbx 3529 3530 movdqu 0(%rsi),%xmm3 3531 pxor %xmm3,%xmm0 3532 movdqu %xmm0,0(%rdi) 3533 3534 addq 0(%rdi),%r10 3535 adcq 8(%rdi),%r11 3536 adcq $1,%r12 3537 leaq 16(%rsi),%rsi 3538 leaq 16(%rdi),%rdi 3539 movq 0+0(%rbp),%rax 3540 movq %rax,%r15 3541 mulq %r10 3542 movq %rax,%r13 3543 movq %rdx,%r14 3544 movq 0+0(%rbp),%rax 3545 mulq %r11 3546 imulq %r12,%r15 3547 addq %rax,%r14 3548 adcq %rdx,%r15 3549 movq 8+0(%rbp),%rax 3550 movq %rax,%r9 3551 mulq %r10 3552 addq %rax,%r14 3553 adcq $0,%rdx 3554 movq %rdx,%r10 3555 movq 8+0(%rbp),%rax 3556 mulq %r11 3557 addq %rax,%r15 3558 adcq $0,%rdx 3559 imulq %r12,%r9 3560 addq %r10,%r15 3561 adcq %rdx,%r9 3562 movq %r13,%r10 3563 movq %r14,%r11 3564 movq %r15,%r12 3565 andq $3,%r12 3566 movq %r15,%r13 3567 andq $-4,%r13 3568 movq %r9,%r14 3569 shrdq $2,%r9,%r15 3570 shrq $2,%r9 3571 addq %r13,%r10 3572 adcq %r14,%r11 3573 adcq $0,%r12 3574 addq %r15,%r10 3575 adcq %r9,%r11 3576 adcq $0,%r12 3577 3578 3579 movdqa %xmm4,%xmm0 3580 movdqa %xmm8,%xmm4 3581 movdqa %xmm12,%xmm8 3582 movdqa %xmm1,%xmm12 3583 movdqa %xmm5,%xmm1 3584 movdqa %xmm9,%xmm5 3585 movdqa %xmm13,%xmm9 3586 jmp seal_sse_128_seal 3587 3588 seal_sse_tail_16: 3589 testq %rbx,%rbx 3590 jz seal_sse_finalize 3591 3592 movq %rbx,%r8 3593 shlq $4,%r8 3594 leaq .and_masks(%rip),%r13 3595 movq %rbx,%rcx 3596 leaq -1(%rsi,%rbx), %rsi 3597 pxor %xmm15,%xmm15 3598 1: 3599 pslldq $1,%xmm15 3600 pinsrb $0,(%rsi),%xmm15 3601 leaq -1(%rsi),%rsi 3602 decq %rcx 3603 jne 1b 3604 3605 3606 pxor %xmm0,%xmm15 3607 3608 3609 movq %rbx,%rcx 3610 movdqu %xmm15,%xmm0 3611 2: 3612 pextrb $0,%xmm0,(%rdi) 3613 psrldq $1,%xmm0 3614 addq $1,%rdi 3615 subq $1,%rcx 3616 jnz 2b 3617 3618 pand -16(%r13,%r8), %xmm15 3619 .byte 102,77,15,126,253 3620 pextrq $1,%xmm15,%r14 3621 addq %r13,%r10 3622 adcq %r14,%r11 3623 adcq $1,%r12 3624 movq 0+0(%rbp),%rax 3625 movq %rax,%r15 3626 mulq %r10 3627 movq %rax,%r13 3628 movq %rdx,%r14 3629 movq 0+0(%rbp),%rax 3630 mulq %r11 3631 imulq %r12,%r15 3632 addq %rax,%r14 3633 adcq %rdx,%r15 3634 movq 8+0(%rbp),%rax 3635 movq %rax,%r9 3636 mulq %r10 3637 addq %rax,%r14 3638 adcq $0,%rdx 3639 movq %rdx,%r10 3640 movq 8+0(%rbp),%rax 3641 mulq %r11 3642 addq %rax,%r15 3643 adcq $0,%rdx 3644 imulq %r12,%r9 3645 addq %r10,%r15 3646 adcq %rdx,%r9 3647 movq %r13,%r10 3648 movq %r14,%r11 3649 movq %r15,%r12 3650 andq $3,%r12 3651 movq %r15,%r13 3652 andq $-4,%r13 3653 movq %r9,%r14 3654 shrdq $2,%r9,%r15 3655 shrq $2,%r9 3656 addq %r13,%r10 3657 adcq %r14,%r11 3658 adcq $0,%r12 3659 addq %r15,%r10 3660 adcq %r9,%r11 3661 adcq $0,%r12 3662 3663 seal_sse_finalize: 3664 addq 32(%rbp),%r10 3665 adcq 8+32(%rbp),%r11 3666 adcq $1,%r12 3667 movq 0+0(%rbp),%rax 3668 movq %rax,%r15 3669 mulq %r10 3670 movq %rax,%r13 3671 movq %rdx,%r14 3672 movq 0+0(%rbp),%rax 3673 mulq %r11 3674 imulq %r12,%r15 3675 addq %rax,%r14 3676 adcq %rdx,%r15 3677 movq 8+0(%rbp),%rax 3678 movq %rax,%r9 3679 mulq %r10 3680 addq %rax,%r14 3681 adcq $0,%rdx 3682 movq %rdx,%r10 3683 movq 8+0(%rbp),%rax 3684 mulq %r11 3685 addq %rax,%r15 3686 adcq $0,%rdx 3687 imulq %r12,%r9 3688 addq %r10,%r15 3689 adcq %rdx,%r9 3690 movq %r13,%r10 3691 movq %r14,%r11 3692 movq %r15,%r12 3693 andq $3,%r12 3694 movq %r15,%r13 3695 andq $-4,%r13 3696 movq %r9,%r14 3697 shrdq $2,%r9,%r15 3698 shrq $2,%r9 3699 addq %r13,%r10 3700 adcq %r14,%r11 3701 adcq $0,%r12 3702 addq %r15,%r10 3703 adcq %r9,%r11 3704 adcq $0,%r12 3705 3706 3707 movq %r10,%r13 3708 movq %r11,%r14 3709 movq %r12,%r15 3710 subq $-5,%r10 3711 sbbq $-1,%r11 3712 sbbq $3,%r12 3713 cmovcq %r13,%r10 3714 cmovcq %r14,%r11 3715 cmovcq %r15,%r12 3716 3717 addq 0+16(%rbp),%r10 3718 adcq 8+16(%rbp),%r11 3719 3720 addq $288 + 32,%rsp 3721 .cfi_adjust_cfa_offset -(288 + 32) 3722 popq %r9 3723 .cfi_adjust_cfa_offset -8 3724 movq %r10,0(%r9) 3725 movq %r11,8(%r9) 3726 3727 popq %r15 3728 .cfi_adjust_cfa_offset -8 3729 popq %r14 3730 .cfi_adjust_cfa_offset -8 3731 popq %r13 3732 .cfi_adjust_cfa_offset -8 3733 popq %r12 3734 .cfi_adjust_cfa_offset -8 3735 popq %rbx 3736 .cfi_adjust_cfa_offset -8 3737 popq %rbp 3738 .cfi_adjust_cfa_offset -8 3739 .byte 0xf3,0xc3 3740 .cfi_adjust_cfa_offset (8 * 6) + 288 + 32 3741 3742 seal_sse_128: 3743 movdqu .chacha20_consts(%rip),%xmm0 3744 movdqa %xmm0,%xmm1 3745 movdqa %xmm0,%xmm2 3746 movdqu 0(%r9),%xmm4 3747 movdqa %xmm4,%xmm5 3748 movdqa %xmm4,%xmm6 3749 movdqu 16(%r9),%xmm8 3750 movdqa %xmm8,%xmm9 3751 movdqa %xmm8,%xmm10 3752 movdqu 32(%r9),%xmm14 3753 movdqa %xmm14,%xmm12 3754 paddd .sse_inc(%rip),%xmm12 3755 movdqa %xmm12,%xmm13 3756 paddd .sse_inc(%rip),%xmm13 3757 movdqa %xmm4,%xmm7 3758 movdqa %xmm8,%xmm11 3759 movdqa %xmm12,%xmm15 3760 movq $10,%r10 3761 1: 3762 paddd %xmm4,%xmm0 3763 pxor %xmm0,%xmm12 3764 pshufb .rol16(%rip),%xmm12 3765 paddd %xmm12,%xmm8 3766 pxor %xmm8,%xmm4 3767 movdqa %xmm4,%xmm3 3768 pslld $12,%xmm3 3769 psrld $20,%xmm4 3770 pxor %xmm3,%xmm4 3771 paddd %xmm4,%xmm0 3772 pxor %xmm0,%xmm12 3773 pshufb .rol8(%rip),%xmm12 3774 paddd %xmm12,%xmm8 3775 pxor %xmm8,%xmm4 3776 movdqa %xmm4,%xmm3 3777 pslld $7,%xmm3 3778 psrld $25,%xmm4 3779 pxor %xmm3,%xmm4 3780 .byte 102,15,58,15,228,4 3781 .byte 102,69,15,58,15,192,8 3782 .byte 102,69,15,58,15,228,12 3783 paddd %xmm5,%xmm1 3784 pxor %xmm1,%xmm13 3785 pshufb .rol16(%rip),%xmm13 3786 paddd %xmm13,%xmm9 3787 pxor %xmm9,%xmm5 3788 movdqa %xmm5,%xmm3 3789 pslld $12,%xmm3 3790 psrld $20,%xmm5 3791 pxor %xmm3,%xmm5 3792 paddd %xmm5,%xmm1 3793 pxor %xmm1,%xmm13 3794 pshufb .rol8(%rip),%xmm13 3795 paddd %xmm13,%xmm9 3796 pxor %xmm9,%xmm5 3797 movdqa %xmm5,%xmm3 3798 pslld $7,%xmm3 3799 psrld $25,%xmm5 3800 pxor %xmm3,%xmm5 3801 .byte 102,15,58,15,237,4 3802 .byte 102,69,15,58,15,201,8 3803 .byte 102,69,15,58,15,237,12 3804 paddd %xmm6,%xmm2 3805 pxor %xmm2,%xmm14 3806 pshufb .rol16(%rip),%xmm14 3807 paddd %xmm14,%xmm10 3808 pxor %xmm10,%xmm6 3809 movdqa %xmm6,%xmm3 3810 pslld $12,%xmm3 3811 psrld $20,%xmm6 3812 pxor %xmm3,%xmm6 3813 paddd %xmm6,%xmm2 3814 pxor %xmm2,%xmm14 3815 pshufb .rol8(%rip),%xmm14 3816 paddd %xmm14,%xmm10 3817 pxor %xmm10,%xmm6 3818 movdqa %xmm6,%xmm3 3819 pslld $7,%xmm3 3820 psrld $25,%xmm6 3821 pxor %xmm3,%xmm6 3822 .byte 102,15,58,15,246,4 3823 .byte 102,69,15,58,15,210,8 3824 .byte 102,69,15,58,15,246,12 3825 paddd %xmm4,%xmm0 3826 pxor %xmm0,%xmm12 3827 pshufb .rol16(%rip),%xmm12 3828 paddd %xmm12,%xmm8 3829 pxor %xmm8,%xmm4 3830 movdqa %xmm4,%xmm3 3831 pslld $12,%xmm3 3832 psrld $20,%xmm4 3833 pxor %xmm3,%xmm4 3834 paddd %xmm4,%xmm0 3835 pxor %xmm0,%xmm12 3836 pshufb .rol8(%rip),%xmm12 3837 paddd %xmm12,%xmm8 3838 pxor %xmm8,%xmm4 3839 movdqa %xmm4,%xmm3 3840 pslld $7,%xmm3 3841 psrld $25,%xmm4 3842 pxor %xmm3,%xmm4 3843 .byte 102,15,58,15,228,12 3844 .byte 102,69,15,58,15,192,8 3845 .byte 102,69,15,58,15,228,4 3846 paddd %xmm5,%xmm1 3847 pxor %xmm1,%xmm13 3848 pshufb .rol16(%rip),%xmm13 3849 paddd %xmm13,%xmm9 3850 pxor %xmm9,%xmm5 3851 movdqa %xmm5,%xmm3 3852 pslld $12,%xmm3 3853 psrld $20,%xmm5 3854 pxor %xmm3,%xmm5 3855 paddd %xmm5,%xmm1 3856 pxor %xmm1,%xmm13 3857 pshufb .rol8(%rip),%xmm13 3858 paddd %xmm13,%xmm9 3859 pxor %xmm9,%xmm5 3860 movdqa %xmm5,%xmm3 3861 pslld $7,%xmm3 3862 psrld $25,%xmm5 3863 pxor %xmm3,%xmm5 3864 .byte 102,15,58,15,237,12 3865 .byte 102,69,15,58,15,201,8 3866 .byte 102,69,15,58,15,237,4 3867 paddd %xmm6,%xmm2 3868 pxor %xmm2,%xmm14 3869 pshufb .rol16(%rip),%xmm14 3870 paddd %xmm14,%xmm10 3871 pxor %xmm10,%xmm6 3872 movdqa %xmm6,%xmm3 3873 pslld $12,%xmm3 3874 psrld $20,%xmm6 3875 pxor %xmm3,%xmm6 3876 paddd %xmm6,%xmm2 3877 pxor %xmm2,%xmm14 3878 pshufb .rol8(%rip),%xmm14 3879 paddd %xmm14,%xmm10 3880 pxor %xmm10,%xmm6 3881 movdqa %xmm6,%xmm3 3882 pslld $7,%xmm3 3883 psrld $25,%xmm6 3884 pxor %xmm3,%xmm6 3885 .byte 102,15,58,15,246,12 3886 .byte 102,69,15,58,15,210,8 3887 .byte 102,69,15,58,15,246,4 3888 3889 decq %r10 3890 jnz 1b 3891 paddd .chacha20_consts(%rip),%xmm0 3892 paddd .chacha20_consts(%rip),%xmm1 3893 paddd .chacha20_consts(%rip),%xmm2 3894 paddd %xmm7,%xmm4 3895 paddd %xmm7,%xmm5 3896 paddd %xmm7,%xmm6 3897 paddd %xmm11,%xmm8 3898 paddd %xmm11,%xmm9 3899 paddd %xmm15,%xmm12 3900 paddd .sse_inc(%rip),%xmm15 3901 paddd %xmm15,%xmm13 3902 3903 pand .clamp(%rip),%xmm2 3904 movdqa %xmm2,0(%rbp) 3905 movdqa %xmm6,16(%rbp) 3906 3907 movq %r8,%r8 3908 call poly_hash_ad_internal 3909 jmp seal_sse_128_seal 3910 .size chacha20_poly1305_seal, .-chacha20_poly1305_seal 3911 3912 3913 .type chacha20_poly1305_open_avx2,@function 3914 .align 64 3915 chacha20_poly1305_open_avx2: 3916 vzeroupper 3917 vmovdqa .chacha20_consts(%rip),%ymm0 3918 vbroadcasti128 0(%r9),%ymm4 3919 vbroadcasti128 16(%r9),%ymm8 3920 vbroadcasti128 32(%r9),%ymm12 3921 vpaddd .avx2_init(%rip),%ymm12,%ymm12 3922 cmpq $192,%rbx 3923 jbe open_avx2_192 3924 cmpq $320,%rbx 3925 jbe open_avx2_320 3926 3927 vmovdqa %ymm4,64(%rbp) 3928 vmovdqa %ymm8,96(%rbp) 3929 vmovdqa %ymm12,160(%rbp) 3930 movq $10,%r10 3931 1: 3932 vpaddd %ymm4,%ymm0,%ymm0 3933 vpxor %ymm0,%ymm12,%ymm12 3934 vpshufb .rol16(%rip),%ymm12,%ymm12 3935 vpaddd %ymm12,%ymm8,%ymm8 3936 vpxor %ymm8,%ymm4,%ymm4 3937 vpsrld $20,%ymm4,%ymm3 3938 vpslld $12,%ymm4,%ymm4 3939 vpxor %ymm3,%ymm4,%ymm4 3940 vpaddd %ymm4,%ymm0,%ymm0 3941 vpxor %ymm0,%ymm12,%ymm12 3942 vpshufb .rol8(%rip),%ymm12,%ymm12 3943 vpaddd %ymm12,%ymm8,%ymm8 3944 vpxor %ymm8,%ymm4,%ymm4 3945 vpslld $7,%ymm4,%ymm3 3946 vpsrld $25,%ymm4,%ymm4 3947 vpxor %ymm3,%ymm4,%ymm4 3948 vpalignr $12,%ymm12,%ymm12,%ymm12 3949 vpalignr $8,%ymm8,%ymm8,%ymm8 3950 vpalignr $4,%ymm4,%ymm4,%ymm4 3951 vpaddd %ymm4,%ymm0,%ymm0 3952 vpxor %ymm0,%ymm12,%ymm12 3953 vpshufb .rol16(%rip),%ymm12,%ymm12 3954 vpaddd %ymm12,%ymm8,%ymm8 3955 vpxor %ymm8,%ymm4,%ymm4 3956 vpsrld $20,%ymm4,%ymm3 3957 vpslld $12,%ymm4,%ymm4 3958 vpxor %ymm3,%ymm4,%ymm4 3959 vpaddd %ymm4,%ymm0,%ymm0 3960 vpxor %ymm0,%ymm12,%ymm12 3961 vpshufb .rol8(%rip),%ymm12,%ymm12 3962 vpaddd %ymm12,%ymm8,%ymm8 3963 vpxor %ymm8,%ymm4,%ymm4 3964 vpslld $7,%ymm4,%ymm3 3965 vpsrld $25,%ymm4,%ymm4 3966 vpxor %ymm3,%ymm4,%ymm4 3967 vpalignr $4,%ymm12,%ymm12,%ymm12 3968 vpalignr $8,%ymm8,%ymm8,%ymm8 3969 vpalignr $12,%ymm4,%ymm4,%ymm4 3970 3971 decq %r10 3972 jne 1b 3973 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0 3974 vpaddd 64(%rbp),%ymm4,%ymm4 3975 vpaddd 96(%rbp),%ymm8,%ymm8 3976 vpaddd 160(%rbp),%ymm12,%ymm12 3977 3978 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 3979 3980 vpand .clamp(%rip),%ymm3,%ymm3 3981 vmovdqa %ymm3,0(%rbp) 3982 3983 vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 3984 vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 3985 3986 movq %r8,%r8 3987 call poly_hash_ad_internal 3988 xorq %rcx,%rcx 3989 3990 1: 3991 addq 0(%rsi,%rcx), %r10 3992 adcq 8+0(%rsi,%rcx), %r11 3993 adcq $1,%r12 3994 movq 0+0(%rbp),%rax 3995 movq %rax,%r15 3996 mulq %r10 3997 movq %rax,%r13 3998 movq %rdx,%r14 3999 movq 0+0(%rbp),%rax 4000 mulq %r11 4001 imulq %r12,%r15 4002 addq %rax,%r14 4003 adcq %rdx,%r15 4004 movq 8+0(%rbp),%rax 4005 movq %rax,%r9 4006 mulq %r10 4007 addq %rax,%r14 4008 adcq $0,%rdx 4009 movq %rdx,%r10 4010 movq 8+0(%rbp),%rax 4011 mulq %r11 4012 addq %rax,%r15 4013 adcq $0,%rdx 4014 imulq %r12,%r9 4015 addq %r10,%r15 4016 adcq %rdx,%r9 4017 movq %r13,%r10 4018 movq %r14,%r11 4019 movq %r15,%r12 4020 andq $3,%r12 4021 movq %r15,%r13 4022 andq $-4,%r13 4023 movq %r9,%r14 4024 shrdq $2,%r9,%r15 4025 shrq $2,%r9 4026 addq %r13,%r10 4027 adcq %r14,%r11 4028 adcq $0,%r12 4029 addq %r15,%r10 4030 adcq %r9,%r11 4031 adcq $0,%r12 4032 4033 addq $16,%rcx 4034 cmpq $64,%rcx 4035 jne 1b 4036 4037 vpxor 0(%rsi),%ymm0,%ymm0 4038 vpxor 32(%rsi),%ymm4,%ymm4 4039 vmovdqu %ymm0,0(%rdi) 4040 vmovdqu %ymm4,32(%rdi) 4041 leaq 64(%rsi),%rsi 4042 leaq 64(%rdi),%rdi 4043 subq $64,%rbx 4044 1: 4045 4046 cmpq $512,%rbx 4047 jb 3f 4048 vmovdqa .chacha20_consts(%rip),%ymm0 4049 vmovdqa 64(%rbp),%ymm4 4050 vmovdqa 96(%rbp),%ymm8 4051 vmovdqa %ymm0,%ymm1 4052 vmovdqa %ymm4,%ymm5 4053 vmovdqa %ymm8,%ymm9 4054 vmovdqa %ymm0,%ymm2 4055 vmovdqa %ymm4,%ymm6 4056 vmovdqa %ymm8,%ymm10 4057 vmovdqa %ymm0,%ymm3 4058 vmovdqa %ymm4,%ymm7 4059 vmovdqa %ymm8,%ymm11 4060 vmovdqa .avx2_inc(%rip),%ymm12 4061 vpaddd 160(%rbp),%ymm12,%ymm15 4062 vpaddd %ymm15,%ymm12,%ymm14 4063 vpaddd %ymm14,%ymm12,%ymm13 4064 vpaddd %ymm13,%ymm12,%ymm12 4065 vmovdqa %ymm15,256(%rbp) 4066 vmovdqa %ymm14,224(%rbp) 4067 vmovdqa %ymm13,192(%rbp) 4068 vmovdqa %ymm12,160(%rbp) 4069 4070 xorq %rcx,%rcx 4071 2: 4072 addq 0*8(%rsi,%rcx), %r10 4073 adcq 8+0*8(%rsi,%rcx), %r11 4074 adcq $1,%r12 4075 vmovdqa %ymm8,128(%rbp) 4076 vmovdqa .rol16(%rip),%ymm8 4077 vpaddd %ymm7,%ymm3,%ymm3 4078 vpaddd %ymm6,%ymm2,%ymm2 4079 vpaddd %ymm5,%ymm1,%ymm1 4080 vpaddd %ymm4,%ymm0,%ymm0 4081 vpxor %ymm3,%ymm15,%ymm15 4082 vpxor %ymm2,%ymm14,%ymm14 4083 vpxor %ymm1,%ymm13,%ymm13 4084 vpxor %ymm0,%ymm12,%ymm12 4085 movq 0+0(%rbp),%rdx 4086 movq %rdx,%r15 4087 mulxq %r10,%r13,%r14 4088 mulxq %r11,%rax,%rdx 4089 imulq %r12,%r15 4090 addq %rax,%r14 4091 adcq %rdx,%r15 4092 vpshufb %ymm8,%ymm15,%ymm15 4093 vpshufb %ymm8,%ymm14,%ymm14 4094 vpshufb %ymm8,%ymm13,%ymm13 4095 vpshufb %ymm8,%ymm12,%ymm12 4096 vmovdqa 128(%rbp),%ymm8 4097 vpaddd %ymm15,%ymm11,%ymm11 4098 vpaddd %ymm14,%ymm10,%ymm10 4099 vpaddd %ymm13,%ymm9,%ymm9 4100 vpaddd %ymm12,%ymm8,%ymm8 4101 movq 8+0(%rbp),%rdx 4102 mulxq %r10,%r10,%rax 4103 addq %r10,%r14 4104 mulxq %r11,%r11,%r9 4105 adcq %r11,%r15 4106 adcq $0,%r9 4107 imulq %r12,%rdx 4108 vpxor %ymm11,%ymm7,%ymm7 4109 vpxor %ymm10,%ymm6,%ymm6 4110 vpxor %ymm9,%ymm5,%ymm5 4111 vpxor %ymm8,%ymm4,%ymm4 4112 vmovdqa %ymm8,128(%rbp) 4113 vpsrld $20,%ymm7,%ymm8 4114 vpslld $32-20,%ymm7,%ymm7 4115 vpxor %ymm8,%ymm7,%ymm7 4116 vpsrld $20,%ymm6,%ymm8 4117 vpslld $32-20,%ymm6,%ymm6 4118 vpxor %ymm8,%ymm6,%ymm6 4119 vpsrld $20,%ymm5,%ymm8 4120 addq %rax,%r15 4121 adcq %rdx,%r9 4122 vpslld $32-20,%ymm5,%ymm5 4123 vpxor %ymm8,%ymm5,%ymm5 4124 vpsrld $20,%ymm4,%ymm8 4125 vpslld $32-20,%ymm4,%ymm4 4126 vpxor %ymm8,%ymm4,%ymm4 4127 vmovdqa .rol8(%rip),%ymm8 4128 vpaddd %ymm7,%ymm3,%ymm3 4129 vpaddd %ymm6,%ymm2,%ymm2 4130 vpaddd %ymm5,%ymm1,%ymm1 4131 vpaddd %ymm4,%ymm0,%ymm0 4132 movq %r13,%r10 4133 movq %r14,%r11 4134 movq %r15,%r12 4135 andq $3,%r12 4136 movq %r15,%r13 4137 andq $-4,%r13 4138 movq %r9,%r14 4139 shrdq $2,%r9,%r15 4140 shrq $2,%r9 4141 addq %r13,%r10 4142 adcq %r14,%r11 4143 adcq $0,%r12 4144 addq %r15,%r10 4145 adcq %r9,%r11 4146 adcq $0,%r12 4147 vpxor %ymm3,%ymm15,%ymm15 4148 vpxor %ymm2,%ymm14,%ymm14 4149 vpxor %ymm1,%ymm13,%ymm13 4150 vpxor %ymm0,%ymm12,%ymm12 4151 vpshufb %ymm8,%ymm15,%ymm15 4152 vpshufb %ymm8,%ymm14,%ymm14 4153 vpshufb %ymm8,%ymm13,%ymm13 4154 vpshufb %ymm8,%ymm12,%ymm12 4155 vmovdqa 128(%rbp),%ymm8 4156 addq 2*8(%rsi,%rcx), %r10 4157 adcq 8+2*8(%rsi,%rcx), %r11 4158 adcq $1,%r12 4159 vpaddd %ymm15,%ymm11,%ymm11 4160 vpaddd %ymm14,%ymm10,%ymm10 4161 vpaddd %ymm13,%ymm9,%ymm9 4162 vpaddd %ymm12,%ymm8,%ymm8 4163 vpxor %ymm11,%ymm7,%ymm7 4164 vpxor %ymm10,%ymm6,%ymm6 4165 vpxor %ymm9,%ymm5,%ymm5 4166 vpxor %ymm8,%ymm4,%ymm4 4167 movq 0+0(%rbp),%rdx 4168 movq %rdx,%r15 4169 mulxq %r10,%r13,%r14 4170 mulxq %r11,%rax,%rdx 4171 imulq %r12,%r15 4172 addq %rax,%r14 4173 adcq %rdx,%r15 4174 vmovdqa %ymm8,128(%rbp) 4175 vpsrld $25,%ymm7,%ymm8 4176 vpslld $32-25,%ymm7,%ymm7 4177 vpxor %ymm8,%ymm7,%ymm7 4178 vpsrld $25,%ymm6,%ymm8 4179 vpslld $32-25,%ymm6,%ymm6 4180 vpxor %ymm8,%ymm6,%ymm6 4181 vpsrld $25,%ymm5,%ymm8 4182 vpslld $32-25,%ymm5,%ymm5 4183 vpxor %ymm8,%ymm5,%ymm5 4184 vpsrld $25,%ymm4,%ymm8 4185 vpslld $32-25,%ymm4,%ymm4 4186 vpxor %ymm8,%ymm4,%ymm4 4187 vmovdqa 128(%rbp),%ymm8 4188 vpalignr $4,%ymm7,%ymm7,%ymm7 4189 vpalignr $8,%ymm11,%ymm11,%ymm11 4190 vpalignr $12,%ymm15,%ymm15,%ymm15 4191 vpalignr $4,%ymm6,%ymm6,%ymm6 4192 movq 8+0(%rbp),%rdx 4193 mulxq %r10,%r10,%rax 4194 addq %r10,%r14 4195 mulxq %r11,%r11,%r9 4196 adcq %r11,%r15 4197 adcq $0,%r9 4198 imulq %r12,%rdx 4199 vpalignr $8,%ymm10,%ymm10,%ymm10 4200 vpalignr $12,%ymm14,%ymm14,%ymm14 4201 vpalignr $4,%ymm5,%ymm5,%ymm5 4202 vpalignr $8,%ymm9,%ymm9,%ymm9 4203 vpalignr $12,%ymm13,%ymm13,%ymm13 4204 vpalignr $4,%ymm4,%ymm4,%ymm4 4205 vpalignr $8,%ymm8,%ymm8,%ymm8 4206 vpalignr $12,%ymm12,%ymm12,%ymm12 4207 vmovdqa %ymm8,128(%rbp) 4208 vmovdqa .rol16(%rip),%ymm8 4209 vpaddd %ymm7,%ymm3,%ymm3 4210 vpaddd %ymm6,%ymm2,%ymm2 4211 vpaddd %ymm5,%ymm1,%ymm1 4212 vpaddd %ymm4,%ymm0,%ymm0 4213 vpxor %ymm3,%ymm15,%ymm15 4214 vpxor %ymm2,%ymm14,%ymm14 4215 vpxor %ymm1,%ymm13,%ymm13 4216 vpxor %ymm0,%ymm12,%ymm12 4217 addq %rax,%r15 4218 adcq %rdx,%r9 4219 vpshufb %ymm8,%ymm15,%ymm15 4220 vpshufb %ymm8,%ymm14,%ymm14 4221 vpshufb %ymm8,%ymm13,%ymm13 4222 vpshufb %ymm8,%ymm12,%ymm12 4223 vmovdqa 128(%rbp),%ymm8 4224 vpaddd %ymm15,%ymm11,%ymm11 4225 vpaddd %ymm14,%ymm10,%ymm10 4226 vpaddd %ymm13,%ymm9,%ymm9 4227 vpaddd %ymm12,%ymm8,%ymm8 4228 movq %r13,%r10 4229 movq %r14,%r11 4230 movq %r15,%r12 4231 andq $3,%r12 4232 movq %r15,%r13 4233 andq $-4,%r13 4234 movq %r9,%r14 4235 shrdq $2,%r9,%r15 4236 shrq $2,%r9 4237 addq %r13,%r10 4238 adcq %r14,%r11 4239 adcq $0,%r12 4240 addq %r15,%r10 4241 adcq %r9,%r11 4242 adcq $0,%r12 4243 vpxor %ymm11,%ymm7,%ymm7 4244 vpxor %ymm10,%ymm6,%ymm6 4245 vpxor %ymm9,%ymm5,%ymm5 4246 vpxor %ymm8,%ymm4,%ymm4 4247 vmovdqa %ymm8,128(%rbp) 4248 vpsrld $20,%ymm7,%ymm8 4249 vpslld $32-20,%ymm7,%ymm7 4250 vpxor %ymm8,%ymm7,%ymm7 4251 addq 4*8(%rsi,%rcx), %r10 4252 adcq 8+4*8(%rsi,%rcx), %r11 4253 adcq $1,%r12 4254 4255 leaq 48(%rcx),%rcx 4256 vpsrld $20,%ymm6,%ymm8 4257 vpslld $32-20,%ymm6,%ymm6 4258 vpxor %ymm8,%ymm6,%ymm6 4259 vpsrld $20,%ymm5,%ymm8 4260 vpslld $32-20,%ymm5,%ymm5 4261 vpxor %ymm8,%ymm5,%ymm5 4262 vpsrld $20,%ymm4,%ymm8 4263 vpslld $32-20,%ymm4,%ymm4 4264 vpxor %ymm8,%ymm4,%ymm4 4265 vmovdqa .rol8(%rip),%ymm8 4266 vpaddd %ymm7,%ymm3,%ymm3 4267 vpaddd %ymm6,%ymm2,%ymm2 4268 vpaddd %ymm5,%ymm1,%ymm1 4269 vpaddd %ymm4,%ymm0,%ymm0 4270 vpxor %ymm3,%ymm15,%ymm15 4271 vpxor %ymm2,%ymm14,%ymm14 4272 vpxor %ymm1,%ymm13,%ymm13 4273 vpxor %ymm0,%ymm12,%ymm12 4274 movq 0+0(%rbp),%rdx 4275 movq %rdx,%r15 4276 mulxq %r10,%r13,%r14 4277 mulxq %r11,%rax,%rdx 4278 imulq %r12,%r15 4279 addq %rax,%r14 4280 adcq %rdx,%r15 4281 vpshufb %ymm8,%ymm15,%ymm15 4282 vpshufb %ymm8,%ymm14,%ymm14 4283 vpshufb %ymm8,%ymm13,%ymm13 4284 vpshufb %ymm8,%ymm12,%ymm12 4285 vmovdqa 128(%rbp),%ymm8 4286 vpaddd %ymm15,%ymm11,%ymm11 4287 vpaddd %ymm14,%ymm10,%ymm10 4288 vpaddd %ymm13,%ymm9,%ymm9 4289 movq 8+0(%rbp),%rdx 4290 mulxq %r10,%r10,%rax 4291 addq %r10,%r14 4292 mulxq %r11,%r11,%r9 4293 adcq %r11,%r15 4294 adcq $0,%r9 4295 imulq %r12,%rdx 4296 vpaddd %ymm12,%ymm8,%ymm8 4297 vpxor %ymm11,%ymm7,%ymm7 4298 vpxor %ymm10,%ymm6,%ymm6 4299 vpxor %ymm9,%ymm5,%ymm5 4300 vpxor %ymm8,%ymm4,%ymm4 4301 vmovdqa %ymm8,128(%rbp) 4302 vpsrld $25,%ymm7,%ymm8 4303 vpslld $32-25,%ymm7,%ymm7 4304 addq %rax,%r15 4305 adcq %rdx,%r9 4306 vpxor %ymm8,%ymm7,%ymm7 4307 vpsrld $25,%ymm6,%ymm8 4308 vpslld $32-25,%ymm6,%ymm6 4309 vpxor %ymm8,%ymm6,%ymm6 4310 vpsrld $25,%ymm5,%ymm8 4311 vpslld $32-25,%ymm5,%ymm5 4312 vpxor %ymm8,%ymm5,%ymm5 4313 vpsrld $25,%ymm4,%ymm8 4314 vpslld $32-25,%ymm4,%ymm4 4315 vpxor %ymm8,%ymm4,%ymm4 4316 vmovdqa 128(%rbp),%ymm8 4317 vpalignr $12,%ymm7,%ymm7,%ymm7 4318 vpalignr $8,%ymm11,%ymm11,%ymm11 4319 vpalignr $4,%ymm15,%ymm15,%ymm15 4320 vpalignr $12,%ymm6,%ymm6,%ymm6 4321 vpalignr $8,%ymm10,%ymm10,%ymm10 4322 vpalignr $4,%ymm14,%ymm14,%ymm14 4323 vpalignr $12,%ymm5,%ymm5,%ymm5 4324 movq %r13,%r10 4325 movq %r14,%r11 4326 movq %r15,%r12 4327 andq $3,%r12 4328 movq %r15,%r13 4329 andq $-4,%r13 4330 movq %r9,%r14 4331 shrdq $2,%r9,%r15 4332 shrq $2,%r9 4333 addq %r13,%r10 4334 adcq %r14,%r11 4335 adcq $0,%r12 4336 addq %r15,%r10 4337 adcq %r9,%r11 4338 adcq $0,%r12 4339 vpalignr $8,%ymm9,%ymm9,%ymm9 4340 vpalignr $4,%ymm13,%ymm13,%ymm13 4341 vpalignr $12,%ymm4,%ymm4,%ymm4 4342 vpalignr $8,%ymm8,%ymm8,%ymm8 4343 vpalignr $4,%ymm12,%ymm12,%ymm12 4344 4345 cmpq $60*8,%rcx 4346 jne 2b 4347 vpaddd .chacha20_consts(%rip),%ymm3,%ymm3 4348 vpaddd 64(%rbp),%ymm7,%ymm7 4349 vpaddd 96(%rbp),%ymm11,%ymm11 4350 vpaddd 256(%rbp),%ymm15,%ymm15 4351 vpaddd .chacha20_consts(%rip),%ymm2,%ymm2 4352 vpaddd 64(%rbp),%ymm6,%ymm6 4353 vpaddd 96(%rbp),%ymm10,%ymm10 4354 vpaddd 224(%rbp),%ymm14,%ymm14 4355 vpaddd .chacha20_consts(%rip),%ymm1,%ymm1 4356 vpaddd 64(%rbp),%ymm5,%ymm5 4357 vpaddd 96(%rbp),%ymm9,%ymm9 4358 vpaddd 192(%rbp),%ymm13,%ymm13 4359 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0 4360 vpaddd 64(%rbp),%ymm4,%ymm4 4361 vpaddd 96(%rbp),%ymm8,%ymm8 4362 vpaddd 160(%rbp),%ymm12,%ymm12 4363 4364 vmovdqa %ymm0,128(%rbp) 4365 addq 60*8(%rsi),%r10 4366 adcq 8+60*8(%rsi),%r11 4367 adcq $1,%r12 4368 vperm2i128 $0x02,%ymm3,%ymm7,%ymm0 4369 vperm2i128 $0x13,%ymm3,%ymm7,%ymm7 4370 vperm2i128 $0x02,%ymm11,%ymm15,%ymm3 4371 vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 4372 vpxor 0+0(%rsi),%ymm0,%ymm0 4373 vpxor 32+0(%rsi),%ymm3,%ymm3 4374 vpxor 64+0(%rsi),%ymm7,%ymm7 4375 vpxor 96+0(%rsi),%ymm11,%ymm11 4376 vmovdqu %ymm0,0+0(%rdi) 4377 vmovdqu %ymm3,32+0(%rdi) 4378 vmovdqu %ymm7,64+0(%rdi) 4379 vmovdqu %ymm11,96+0(%rdi) 4380 4381 vmovdqa 128(%rbp),%ymm0 4382 movq 0+0(%rbp),%rax 4383 movq %rax,%r15 4384 mulq %r10 4385 movq %rax,%r13 4386 movq %rdx,%r14 4387 movq 0+0(%rbp),%rax 4388 mulq %r11 4389 imulq %r12,%r15 4390 addq %rax,%r14 4391 adcq %rdx,%r15 4392 movq 8+0(%rbp),%rax 4393 movq %rax,%r9 4394 mulq %r10 4395 addq %rax,%r14 4396 adcq $0,%rdx 4397 movq %rdx,%r10 4398 movq 8+0(%rbp),%rax 4399 mulq %r11 4400 addq %rax,%r15 4401 adcq $0,%rdx 4402 imulq %r12,%r9 4403 addq %r10,%r15 4404 adcq %rdx,%r9 4405 movq %r13,%r10 4406 movq %r14,%r11 4407 movq %r15,%r12 4408 andq $3,%r12 4409 movq %r15,%r13 4410 andq $-4,%r13 4411 movq %r9,%r14 4412 shrdq $2,%r9,%r15 4413 shrq $2,%r9 4414 addq %r13,%r10 4415 adcq %r14,%r11 4416 adcq $0,%r12 4417 addq %r15,%r10 4418 adcq %r9,%r11 4419 adcq $0,%r12 4420 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 4421 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 4422 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 4423 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 4424 vpxor 0+128(%rsi),%ymm3,%ymm3 4425 vpxor 32+128(%rsi),%ymm2,%ymm2 4426 vpxor 64+128(%rsi),%ymm6,%ymm6 4427 vpxor 96+128(%rsi),%ymm10,%ymm10 4428 vmovdqu %ymm3,0+128(%rdi) 4429 vmovdqu %ymm2,32+128(%rdi) 4430 vmovdqu %ymm6,64+128(%rdi) 4431 vmovdqu %ymm10,96+128(%rdi) 4432 addq 60*8+16(%rsi),%r10 4433 adcq 8+60*8+16(%rsi),%r11 4434 adcq $1,%r12 4435 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 4436 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 4437 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 4438 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 4439 vpxor 0+256(%rsi),%ymm3,%ymm3 4440 vpxor 32+256(%rsi),%ymm1,%ymm1 4441 vpxor 64+256(%rsi),%ymm5,%ymm5 4442 vpxor 96+256(%rsi),%ymm9,%ymm9 4443 vmovdqu %ymm3,0+256(%rdi) 4444 vmovdqu %ymm1,32+256(%rdi) 4445 vmovdqu %ymm5,64+256(%rdi) 4446 vmovdqu %ymm9,96+256(%rdi) 4447 movq 0+0(%rbp),%rax 4448 movq %rax,%r15 4449 mulq %r10 4450 movq %rax,%r13 4451 movq %rdx,%r14 4452 movq 0+0(%rbp),%rax 4453 mulq %r11 4454 imulq %r12,%r15 4455 addq %rax,%r14 4456 adcq %rdx,%r15 4457 movq 8+0(%rbp),%rax 4458 movq %rax,%r9 4459 mulq %r10 4460 addq %rax,%r14 4461 adcq $0,%rdx 4462 movq %rdx,%r10 4463 movq 8+0(%rbp),%rax 4464 mulq %r11 4465 addq %rax,%r15 4466 adcq $0,%rdx 4467 imulq %r12,%r9 4468 addq %r10,%r15 4469 adcq %rdx,%r9 4470 movq %r13,%r10 4471 movq %r14,%r11 4472 movq %r15,%r12 4473 andq $3,%r12 4474 movq %r15,%r13 4475 andq $-4,%r13 4476 movq %r9,%r14 4477 shrdq $2,%r9,%r15 4478 shrq $2,%r9 4479 addq %r13,%r10 4480 adcq %r14,%r11 4481 adcq $0,%r12 4482 addq %r15,%r10 4483 adcq %r9,%r11 4484 adcq $0,%r12 4485 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 4486 vperm2i128 $0x13,%ymm0,%ymm4,%ymm4 4487 vperm2i128 $0x02,%ymm8,%ymm12,%ymm0 4488 vperm2i128 $0x13,%ymm8,%ymm12,%ymm8 4489 vpxor 0+384(%rsi),%ymm3,%ymm3 4490 vpxor 32+384(%rsi),%ymm0,%ymm0 4491 vpxor 64+384(%rsi),%ymm4,%ymm4 4492 vpxor 96+384(%rsi),%ymm8,%ymm8 4493 vmovdqu %ymm3,0+384(%rdi) 4494 vmovdqu %ymm0,32+384(%rdi) 4495 vmovdqu %ymm4,64+384(%rdi) 4496 vmovdqu %ymm8,96+384(%rdi) 4497 4498 leaq 512(%rsi),%rsi 4499 leaq 512(%rdi),%rdi 4500 subq $512,%rbx 4501 jmp 1b 4502 3: 4503 testq %rbx,%rbx 4504 vzeroupper 4505 je open_sse_finalize 4506 3: 4507 cmpq $128,%rbx 4508 ja 3f 4509 vmovdqa .chacha20_consts(%rip),%ymm0 4510 vmovdqa 64(%rbp),%ymm4 4511 vmovdqa 96(%rbp),%ymm8 4512 vmovdqa .avx2_inc(%rip),%ymm12 4513 vpaddd 160(%rbp),%ymm12,%ymm12 4514 vmovdqa %ymm12,160(%rbp) 4515 4516 xorq %r8,%r8 4517 movq %rbx,%rcx 4518 andq $-16,%rcx 4519 testq %rcx,%rcx 4520 je 2f 4521 1: 4522 addq 0*8(%rsi,%r8), %r10 4523 adcq 8+0*8(%rsi,%r8), %r11 4524 adcq $1,%r12 4525 movq 0+0(%rbp),%rax 4526 movq %rax,%r15 4527 mulq %r10 4528 movq %rax,%r13 4529 movq %rdx,%r14 4530 movq 0+0(%rbp),%rax 4531 mulq %r11 4532 imulq %r12,%r15 4533 addq %rax,%r14 4534 adcq %rdx,%r15 4535 movq 8+0(%rbp),%rax 4536 movq %rax,%r9 4537 mulq %r10 4538 addq %rax,%r14 4539 adcq $0,%rdx 4540 movq %rdx,%r10 4541 movq 8+0(%rbp),%rax 4542 mulq %r11 4543 addq %rax,%r15 4544 adcq $0,%rdx 4545 imulq %r12,%r9 4546 addq %r10,%r15 4547 adcq %rdx,%r9 4548 movq %r13,%r10 4549 movq %r14,%r11 4550 movq %r15,%r12 4551 andq $3,%r12 4552 movq %r15,%r13 4553 andq $-4,%r13 4554 movq %r9,%r14 4555 shrdq $2,%r9,%r15 4556 shrq $2,%r9 4557 addq %r13,%r10 4558 adcq %r14,%r11 4559 adcq $0,%r12 4560 addq %r15,%r10 4561 adcq %r9,%r11 4562 adcq $0,%r12 4563 4564 2: 4565 addq $16,%r8 4566 vpaddd %ymm4,%ymm0,%ymm0 4567 vpxor %ymm0,%ymm12,%ymm12 4568 vpshufb .rol16(%rip),%ymm12,%ymm12 4569 vpaddd %ymm12,%ymm8,%ymm8 4570 vpxor %ymm8,%ymm4,%ymm4 4571 vpsrld $20,%ymm4,%ymm3 4572 vpslld $12,%ymm4,%ymm4 4573 vpxor %ymm3,%ymm4,%ymm4 4574 vpaddd %ymm4,%ymm0,%ymm0 4575 vpxor %ymm0,%ymm12,%ymm12 4576 vpshufb .rol8(%rip),%ymm12,%ymm12 4577 vpaddd %ymm12,%ymm8,%ymm8 4578 vpxor %ymm8,%ymm4,%ymm4 4579 vpslld $7,%ymm4,%ymm3 4580 vpsrld $25,%ymm4,%ymm4 4581 vpxor %ymm3,%ymm4,%ymm4 4582 vpalignr $12,%ymm12,%ymm12,%ymm12 4583 vpalignr $8,%ymm8,%ymm8,%ymm8 4584 vpalignr $4,%ymm4,%ymm4,%ymm4 4585 vpaddd %ymm4,%ymm0,%ymm0 4586 vpxor %ymm0,%ymm12,%ymm12 4587 vpshufb .rol16(%rip),%ymm12,%ymm12 4588 vpaddd %ymm12,%ymm8,%ymm8 4589 vpxor %ymm8,%ymm4,%ymm4 4590 vpsrld $20,%ymm4,%ymm3 4591 vpslld $12,%ymm4,%ymm4 4592 vpxor %ymm3,%ymm4,%ymm4 4593 vpaddd %ymm4,%ymm0,%ymm0 4594 vpxor %ymm0,%ymm12,%ymm12 4595 vpshufb .rol8(%rip),%ymm12,%ymm12 4596 vpaddd %ymm12,%ymm8,%ymm8 4597 vpxor %ymm8,%ymm4,%ymm4 4598 vpslld $7,%ymm4,%ymm3 4599 vpsrld $25,%ymm4,%ymm4 4600 vpxor %ymm3,%ymm4,%ymm4 4601 vpalignr $4,%ymm12,%ymm12,%ymm12 4602 vpalignr $8,%ymm8,%ymm8,%ymm8 4603 vpalignr $12,%ymm4,%ymm4,%ymm4 4604 4605 cmpq %rcx,%r8 4606 jb 1b 4607 cmpq $160,%r8 4608 jne 2b 4609 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0 4610 vpaddd 64(%rbp),%ymm4,%ymm4 4611 vpaddd 96(%rbp),%ymm8,%ymm8 4612 vpaddd 160(%rbp),%ymm12,%ymm12 4613 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 4614 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 4615 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 4616 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 4617 vmovdqa %ymm3,%ymm8 4618 4619 jmp open_avx2_tail_loop 4620 3: 4621 cmpq $256,%rbx 4622 ja 3f 4623 vmovdqa .chacha20_consts(%rip),%ymm0 4624 vmovdqa 64(%rbp),%ymm4 4625 vmovdqa 96(%rbp),%ymm8 4626 vmovdqa %ymm0,%ymm1 4627 vmovdqa %ymm4,%ymm5 4628 vmovdqa %ymm8,%ymm9 4629 vmovdqa .avx2_inc(%rip),%ymm12 4630 vpaddd 160(%rbp),%ymm12,%ymm13 4631 vpaddd %ymm13,%ymm12,%ymm12 4632 vmovdqa %ymm12,160(%rbp) 4633 vmovdqa %ymm13,192(%rbp) 4634 4635 movq %rbx,128(%rbp) 4636 movq %rbx,%rcx 4637 subq $128,%rcx 4638 shrq $4,%rcx 4639 movq $10,%r8 4640 cmpq $10,%rcx 4641 cmovgq %r8,%rcx 4642 movq %rsi,%rbx 4643 xorq %r8,%r8 4644 1: 4645 addq 0(%rbx),%r10 4646 adcq 8+0(%rbx),%r11 4647 adcq $1,%r12 4648 movq 0+0(%rbp),%rdx 4649 movq %rdx,%r15 4650 mulxq %r10,%r13,%r14 4651 mulxq %r11,%rax,%rdx 4652 imulq %r12,%r15 4653 addq %rax,%r14 4654 adcq %rdx,%r15 4655 movq 8+0(%rbp),%rdx 4656 mulxq %r10,%r10,%rax 4657 addq %r10,%r14 4658 mulxq %r11,%r11,%r9 4659 adcq %r11,%r15 4660 adcq $0,%r9 4661 imulq %r12,%rdx 4662 addq %rax,%r15 4663 adcq %rdx,%r9 4664 movq %r13,%r10 4665 movq %r14,%r11 4666 movq %r15,%r12 4667 andq $3,%r12 4668 movq %r15,%r13 4669 andq $-4,%r13 4670 movq %r9,%r14 4671 shrdq $2,%r9,%r15 4672 shrq $2,%r9 4673 addq %r13,%r10 4674 adcq %r14,%r11 4675 adcq $0,%r12 4676 addq %r15,%r10 4677 adcq %r9,%r11 4678 adcq $0,%r12 4679 4680 leaq 16(%rbx),%rbx 4681 2: 4682 vpaddd %ymm4,%ymm0,%ymm0 4683 vpxor %ymm0,%ymm12,%ymm12 4684 vpshufb .rol16(%rip),%ymm12,%ymm12 4685 vpaddd %ymm12,%ymm8,%ymm8 4686 vpxor %ymm8,%ymm4,%ymm4 4687 vpsrld $20,%ymm4,%ymm3 4688 vpslld $12,%ymm4,%ymm4 4689 vpxor %ymm3,%ymm4,%ymm4 4690 vpaddd %ymm4,%ymm0,%ymm0 4691 vpxor %ymm0,%ymm12,%ymm12 4692 vpshufb .rol8(%rip),%ymm12,%ymm12 4693 vpaddd %ymm12,%ymm8,%ymm8 4694 vpxor %ymm8,%ymm4,%ymm4 4695 vpslld $7,%ymm4,%ymm3 4696 vpsrld $25,%ymm4,%ymm4 4697 vpxor %ymm3,%ymm4,%ymm4 4698 vpalignr $12,%ymm12,%ymm12,%ymm12 4699 vpalignr $8,%ymm8,%ymm8,%ymm8 4700 vpalignr $4,%ymm4,%ymm4,%ymm4 4701 vpaddd %ymm5,%ymm1,%ymm1 4702 vpxor %ymm1,%ymm13,%ymm13 4703 vpshufb .rol16(%rip),%ymm13,%ymm13 4704 vpaddd %ymm13,%ymm9,%ymm9 4705 vpxor %ymm9,%ymm5,%ymm5 4706 vpsrld $20,%ymm5,%ymm3 4707 vpslld $12,%ymm5,%ymm5 4708 vpxor %ymm3,%ymm5,%ymm5 4709 vpaddd %ymm5,%ymm1,%ymm1 4710 vpxor %ymm1,%ymm13,%ymm13 4711 vpshufb .rol8(%rip),%ymm13,%ymm13 4712 vpaddd %ymm13,%ymm9,%ymm9 4713 vpxor %ymm9,%ymm5,%ymm5 4714 vpslld $7,%ymm5,%ymm3 4715 vpsrld $25,%ymm5,%ymm5 4716 vpxor %ymm3,%ymm5,%ymm5 4717 vpalignr $12,%ymm13,%ymm13,%ymm13 4718 vpalignr $8,%ymm9,%ymm9,%ymm9 4719 vpalignr $4,%ymm5,%ymm5,%ymm5 4720 4721 incq %r8 4722 vpaddd %ymm4,%ymm0,%ymm0 4723 vpxor %ymm0,%ymm12,%ymm12 4724 vpshufb .rol16(%rip),%ymm12,%ymm12 4725 vpaddd %ymm12,%ymm8,%ymm8 4726 vpxor %ymm8,%ymm4,%ymm4 4727 vpsrld $20,%ymm4,%ymm3 4728 vpslld $12,%ymm4,%ymm4 4729 vpxor %ymm3,%ymm4,%ymm4 4730 vpaddd %ymm4,%ymm0,%ymm0 4731 vpxor %ymm0,%ymm12,%ymm12 4732 vpshufb .rol8(%rip),%ymm12,%ymm12 4733 vpaddd %ymm12,%ymm8,%ymm8 4734 vpxor %ymm8,%ymm4,%ymm4 4735 vpslld $7,%ymm4,%ymm3 4736 vpsrld $25,%ymm4,%ymm4 4737 vpxor %ymm3,%ymm4,%ymm4 4738 vpalignr $4,%ymm12,%ymm12,%ymm12 4739 vpalignr $8,%ymm8,%ymm8,%ymm8 4740 vpalignr $12,%ymm4,%ymm4,%ymm4 4741 vpaddd %ymm5,%ymm1,%ymm1 4742 vpxor %ymm1,%ymm13,%ymm13 4743 vpshufb .rol16(%rip),%ymm13,%ymm13 4744 vpaddd %ymm13,%ymm9,%ymm9 4745 vpxor %ymm9,%ymm5,%ymm5 4746 vpsrld $20,%ymm5,%ymm3 4747 vpslld $12,%ymm5,%ymm5 4748 vpxor %ymm3,%ymm5,%ymm5 4749 vpaddd %ymm5,%ymm1,%ymm1 4750 vpxor %ymm1,%ymm13,%ymm13 4751 vpshufb .rol8(%rip),%ymm13,%ymm13 4752 vpaddd %ymm13,%ymm9,%ymm9 4753 vpxor %ymm9,%ymm5,%ymm5 4754 vpslld $7,%ymm5,%ymm3 4755 vpsrld $25,%ymm5,%ymm5 4756 vpxor %ymm3,%ymm5,%ymm5 4757 vpalignr $4,%ymm13,%ymm13,%ymm13 4758 vpalignr $8,%ymm9,%ymm9,%ymm9 4759 vpalignr $12,%ymm5,%ymm5,%ymm5 4760 vpaddd %ymm6,%ymm2,%ymm2 4761 vpxor %ymm2,%ymm14,%ymm14 4762 vpshufb .rol16(%rip),%ymm14,%ymm14 4763 vpaddd %ymm14,%ymm10,%ymm10 4764 vpxor %ymm10,%ymm6,%ymm6 4765 vpsrld $20,%ymm6,%ymm3 4766 vpslld $12,%ymm6,%ymm6 4767 vpxor %ymm3,%ymm6,%ymm6 4768 vpaddd %ymm6,%ymm2,%ymm2 4769 vpxor %ymm2,%ymm14,%ymm14 4770 vpshufb .rol8(%rip),%ymm14,%ymm14 4771 vpaddd %ymm14,%ymm10,%ymm10 4772 vpxor %ymm10,%ymm6,%ymm6 4773 vpslld $7,%ymm6,%ymm3 4774 vpsrld $25,%ymm6,%ymm6 4775 vpxor %ymm3,%ymm6,%ymm6 4776 vpalignr $4,%ymm14,%ymm14,%ymm14 4777 vpalignr $8,%ymm10,%ymm10,%ymm10 4778 vpalignr $12,%ymm6,%ymm6,%ymm6 4779 4780 cmpq %rcx,%r8 4781 jb 1b 4782 cmpq $10,%r8 4783 jne 2b 4784 movq %rbx,%r8 4785 subq %rsi,%rbx 4786 movq %rbx,%rcx 4787 movq 128(%rbp),%rbx 4788 1: 4789 addq $16,%rcx 4790 cmpq %rbx,%rcx 4791 jg 1f 4792 addq 0(%r8),%r10 4793 adcq 8+0(%r8),%r11 4794 adcq $1,%r12 4795 movq 0+0(%rbp),%rdx 4796 movq %rdx,%r15 4797 mulxq %r10,%r13,%r14 4798 mulxq %r11,%rax,%rdx 4799 imulq %r12,%r15 4800 addq %rax,%r14 4801 adcq %rdx,%r15 4802 movq 8+0(%rbp),%rdx 4803 mulxq %r10,%r10,%rax 4804 addq %r10,%r14 4805 mulxq %r11,%r11,%r9 4806 adcq %r11,%r15 4807 adcq $0,%r9 4808 imulq %r12,%rdx 4809 addq %rax,%r15 4810 adcq %rdx,%r9 4811 movq %r13,%r10 4812 movq %r14,%r11 4813 movq %r15,%r12 4814 andq $3,%r12 4815 movq %r15,%r13 4816 andq $-4,%r13 4817 movq %r9,%r14 4818 shrdq $2,%r9,%r15 4819 shrq $2,%r9 4820 addq %r13,%r10 4821 adcq %r14,%r11 4822 adcq $0,%r12 4823 addq %r15,%r10 4824 adcq %r9,%r11 4825 adcq $0,%r12 4826 4827 leaq 16(%r8),%r8 4828 jmp 1b 4829 1: 4830 vpaddd .chacha20_consts(%rip),%ymm1,%ymm1 4831 vpaddd 64(%rbp),%ymm5,%ymm5 4832 vpaddd 96(%rbp),%ymm9,%ymm9 4833 vpaddd 192(%rbp),%ymm13,%ymm13 4834 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0 4835 vpaddd 64(%rbp),%ymm4,%ymm4 4836 vpaddd 96(%rbp),%ymm8,%ymm8 4837 vpaddd 160(%rbp),%ymm12,%ymm12 4838 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 4839 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 4840 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 4841 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 4842 vpxor 0+0(%rsi),%ymm3,%ymm3 4843 vpxor 32+0(%rsi),%ymm1,%ymm1 4844 vpxor 64+0(%rsi),%ymm5,%ymm5 4845 vpxor 96+0(%rsi),%ymm9,%ymm9 4846 vmovdqu %ymm3,0+0(%rdi) 4847 vmovdqu %ymm1,32+0(%rdi) 4848 vmovdqu %ymm5,64+0(%rdi) 4849 vmovdqu %ymm9,96+0(%rdi) 4850 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 4851 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 4852 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 4853 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 4854 vmovdqa %ymm3,%ymm8 4855 4856 leaq 128(%rsi),%rsi 4857 leaq 128(%rdi),%rdi 4858 subq $128,%rbx 4859 jmp open_avx2_tail_loop 4860 3: 4861 cmpq $384,%rbx 4862 ja 3f 4863 vmovdqa .chacha20_consts(%rip),%ymm0 4864 vmovdqa 64(%rbp),%ymm4 4865 vmovdqa 96(%rbp),%ymm8 4866 vmovdqa %ymm0,%ymm1 4867 vmovdqa %ymm4,%ymm5 4868 vmovdqa %ymm8,%ymm9 4869 vmovdqa %ymm0,%ymm2 4870 vmovdqa %ymm4,%ymm6 4871 vmovdqa %ymm8,%ymm10 4872 vmovdqa .avx2_inc(%rip),%ymm12 4873 vpaddd 160(%rbp),%ymm12,%ymm14 4874 vpaddd %ymm14,%ymm12,%ymm13 4875 vpaddd %ymm13,%ymm12,%ymm12 4876 vmovdqa %ymm12,160(%rbp) 4877 vmovdqa %ymm13,192(%rbp) 4878 vmovdqa %ymm14,224(%rbp) 4879 4880 movq %rbx,128(%rbp) 4881 movq %rbx,%rcx 4882 subq $256,%rcx 4883 shrq $4,%rcx 4884 addq $6,%rcx 4885 movq $10,%r8 4886 cmpq $10,%rcx 4887 cmovgq %r8,%rcx 4888 movq %rsi,%rbx 4889 xorq %r8,%r8 4890 1: 4891 addq 0(%rbx),%r10 4892 adcq 8+0(%rbx),%r11 4893 adcq $1,%r12 4894 movq 0+0(%rbp),%rdx 4895 movq %rdx,%r15 4896 mulxq %r10,%r13,%r14 4897 mulxq %r11,%rax,%rdx 4898 imulq %r12,%r15 4899 addq %rax,%r14 4900 adcq %rdx,%r15 4901 movq 8+0(%rbp),%rdx 4902 mulxq %r10,%r10,%rax 4903 addq %r10,%r14 4904 mulxq %r11,%r11,%r9 4905 adcq %r11,%r15 4906 adcq $0,%r9 4907 imulq %r12,%rdx 4908 addq %rax,%r15 4909 adcq %rdx,%r9 4910 movq %r13,%r10 4911 movq %r14,%r11 4912 movq %r15,%r12 4913 andq $3,%r12 4914 movq %r15,%r13 4915 andq $-4,%r13 4916 movq %r9,%r14 4917 shrdq $2,%r9,%r15 4918 shrq $2,%r9 4919 addq %r13,%r10 4920 adcq %r14,%r11 4921 adcq $0,%r12 4922 addq %r15,%r10 4923 adcq %r9,%r11 4924 adcq $0,%r12 4925 4926 leaq 16(%rbx),%rbx 4927 2: 4928 vpaddd %ymm6,%ymm2,%ymm2 4929 vpxor %ymm2,%ymm14,%ymm14 4930 vpshufb .rol16(%rip),%ymm14,%ymm14 4931 vpaddd %ymm14,%ymm10,%ymm10 4932 vpxor %ymm10,%ymm6,%ymm6 4933 vpsrld $20,%ymm6,%ymm3 4934 vpslld $12,%ymm6,%ymm6 4935 vpxor %ymm3,%ymm6,%ymm6 4936 vpaddd %ymm6,%ymm2,%ymm2 4937 vpxor %ymm2,%ymm14,%ymm14 4938 vpshufb .rol8(%rip),%ymm14,%ymm14 4939 vpaddd %ymm14,%ymm10,%ymm10 4940 vpxor %ymm10,%ymm6,%ymm6 4941 vpslld $7,%ymm6,%ymm3 4942 vpsrld $25,%ymm6,%ymm6 4943 vpxor %ymm3,%ymm6,%ymm6 4944 vpalignr $12,%ymm14,%ymm14,%ymm14 4945 vpalignr $8,%ymm10,%ymm10,%ymm10 4946 vpalignr $4,%ymm6,%ymm6,%ymm6 4947 vpaddd %ymm5,%ymm1,%ymm1 4948 vpxor %ymm1,%ymm13,%ymm13 4949 vpshufb .rol16(%rip),%ymm13,%ymm13 4950 vpaddd %ymm13,%ymm9,%ymm9 4951 vpxor %ymm9,%ymm5,%ymm5 4952 vpsrld $20,%ymm5,%ymm3 4953 vpslld $12,%ymm5,%ymm5 4954 vpxor %ymm3,%ymm5,%ymm5 4955 vpaddd %ymm5,%ymm1,%ymm1 4956 vpxor %ymm1,%ymm13,%ymm13 4957 vpshufb .rol8(%rip),%ymm13,%ymm13 4958 vpaddd %ymm13,%ymm9,%ymm9 4959 vpxor %ymm9,%ymm5,%ymm5 4960 vpslld $7,%ymm5,%ymm3 4961 vpsrld $25,%ymm5,%ymm5 4962 vpxor %ymm3,%ymm5,%ymm5 4963 vpalignr $12,%ymm13,%ymm13,%ymm13 4964 vpalignr $8,%ymm9,%ymm9,%ymm9 4965 vpalignr $4,%ymm5,%ymm5,%ymm5 4966 vpaddd %ymm4,%ymm0,%ymm0 4967 vpxor %ymm0,%ymm12,%ymm12 4968 vpshufb .rol16(%rip),%ymm12,%ymm12 4969 vpaddd %ymm12,%ymm8,%ymm8 4970 vpxor %ymm8,%ymm4,%ymm4 4971 vpsrld $20,%ymm4,%ymm3 4972 vpslld $12,%ymm4,%ymm4 4973 vpxor %ymm3,%ymm4,%ymm4 4974 vpaddd %ymm4,%ymm0,%ymm0 4975 vpxor %ymm0,%ymm12,%ymm12 4976 vpshufb .rol8(%rip),%ymm12,%ymm12 4977 vpaddd %ymm12,%ymm8,%ymm8 4978 vpxor %ymm8,%ymm4,%ymm4 4979 vpslld $7,%ymm4,%ymm3 4980 vpsrld $25,%ymm4,%ymm4 4981 vpxor %ymm3,%ymm4,%ymm4 4982 vpalignr $12,%ymm12,%ymm12,%ymm12 4983 vpalignr $8,%ymm8,%ymm8,%ymm8 4984 vpalignr $4,%ymm4,%ymm4,%ymm4 4985 addq 0(%rbx),%r10 4986 adcq 8+0(%rbx),%r11 4987 adcq $1,%r12 4988 movq 0+0(%rbp),%rax 4989 movq %rax,%r15 4990 mulq %r10 4991 movq %rax,%r13 4992 movq %rdx,%r14 4993 movq 0+0(%rbp),%rax 4994 mulq %r11 4995 imulq %r12,%r15 4996 addq %rax,%r14 4997 adcq %rdx,%r15 4998 movq 8+0(%rbp),%rax 4999 movq %rax,%r9 5000 mulq %r10 5001 addq %rax,%r14 5002 adcq $0,%rdx 5003 movq %rdx,%r10 5004 movq 8+0(%rbp),%rax 5005 mulq %r11 5006 addq %rax,%r15 5007 adcq $0,%rdx 5008 imulq %r12,%r9 5009 addq %r10,%r15 5010 adcq %rdx,%r9 5011 movq %r13,%r10 5012 movq %r14,%r11 5013 movq %r15,%r12 5014 andq $3,%r12 5015 movq %r15,%r13 5016 andq $-4,%r13 5017 movq %r9,%r14 5018 shrdq $2,%r9,%r15 5019 shrq $2,%r9 5020 addq %r13,%r10 5021 adcq %r14,%r11 5022 adcq $0,%r12 5023 addq %r15,%r10 5024 adcq %r9,%r11 5025 adcq $0,%r12 5026 5027 leaq 16(%rbx),%rbx 5028 incq %r8 5029 vpaddd %ymm6,%ymm2,%ymm2 5030 vpxor %ymm2,%ymm14,%ymm14 5031 vpshufb .rol16(%rip),%ymm14,%ymm14 5032 vpaddd %ymm14,%ymm10,%ymm10 5033 vpxor %ymm10,%ymm6,%ymm6 5034 vpsrld $20,%ymm6,%ymm3 5035 vpslld $12,%ymm6,%ymm6 5036 vpxor %ymm3,%ymm6,%ymm6 5037 vpaddd %ymm6,%ymm2,%ymm2 5038 vpxor %ymm2,%ymm14,%ymm14 5039 vpshufb .rol8(%rip),%ymm14,%ymm14 5040 vpaddd %ymm14,%ymm10,%ymm10 5041 vpxor %ymm10,%ymm6,%ymm6 5042 vpslld $7,%ymm6,%ymm3 5043 vpsrld $25,%ymm6,%ymm6 5044 vpxor %ymm3,%ymm6,%ymm6 5045 vpalignr $4,%ymm14,%ymm14,%ymm14 5046 vpalignr $8,%ymm10,%ymm10,%ymm10 5047 vpalignr $12,%ymm6,%ymm6,%ymm6 5048 vpaddd %ymm5,%ymm1,%ymm1 5049 vpxor %ymm1,%ymm13,%ymm13 5050 vpshufb .rol16(%rip),%ymm13,%ymm13 5051 vpaddd %ymm13,%ymm9,%ymm9 5052 vpxor %ymm9,%ymm5,%ymm5 5053 vpsrld $20,%ymm5,%ymm3 5054 vpslld $12,%ymm5,%ymm5 5055 vpxor %ymm3,%ymm5,%ymm5 5056 vpaddd %ymm5,%ymm1,%ymm1 5057 vpxor %ymm1,%ymm13,%ymm13 5058 vpshufb .rol8(%rip),%ymm13,%ymm13 5059 vpaddd %ymm13,%ymm9,%ymm9 5060 vpxor %ymm9,%ymm5,%ymm5 5061 vpslld $7,%ymm5,%ymm3 5062 vpsrld $25,%ymm5,%ymm5 5063 vpxor %ymm3,%ymm5,%ymm5 5064 vpalignr $4,%ymm13,%ymm13,%ymm13 5065 vpalignr $8,%ymm9,%ymm9,%ymm9 5066 vpalignr $12,%ymm5,%ymm5,%ymm5 5067 vpaddd %ymm4,%ymm0,%ymm0 5068 vpxor %ymm0,%ymm12,%ymm12 5069 vpshufb .rol16(%rip),%ymm12,%ymm12 5070 vpaddd %ymm12,%ymm8,%ymm8 5071 vpxor %ymm8,%ymm4,%ymm4 5072 vpsrld $20,%ymm4,%ymm3 5073 vpslld $12,%ymm4,%ymm4 5074 vpxor %ymm3,%ymm4,%ymm4 5075 vpaddd %ymm4,%ymm0,%ymm0 5076 vpxor %ymm0,%ymm12,%ymm12 5077 vpshufb .rol8(%rip),%ymm12,%ymm12 5078 vpaddd %ymm12,%ymm8,%ymm8 5079 vpxor %ymm8,%ymm4,%ymm4 5080 vpslld $7,%ymm4,%ymm3 5081 vpsrld $25,%ymm4,%ymm4 5082 vpxor %ymm3,%ymm4,%ymm4 5083 vpalignr $4,%ymm12,%ymm12,%ymm12 5084 vpalignr $8,%ymm8,%ymm8,%ymm8 5085 vpalignr $12,%ymm4,%ymm4,%ymm4 5086 5087 cmpq %rcx,%r8 5088 jb 1b 5089 cmpq $10,%r8 5090 jne 2b 5091 movq %rbx,%r8 5092 subq %rsi,%rbx 5093 movq %rbx,%rcx 5094 movq 128(%rbp),%rbx 5095 1: 5096 addq $16,%rcx 5097 cmpq %rbx,%rcx 5098 jg 1f 5099 addq 0(%r8),%r10 5100 adcq 8+0(%r8),%r11 5101 adcq $1,%r12 5102 movq 0+0(%rbp),%rdx 5103 movq %rdx,%r15 5104 mulxq %r10,%r13,%r14 5105 mulxq %r11,%rax,%rdx 5106 imulq %r12,%r15 5107 addq %rax,%r14 5108 adcq %rdx,%r15 5109 movq 8+0(%rbp),%rdx 5110 mulxq %r10,%r10,%rax 5111 addq %r10,%r14 5112 mulxq %r11,%r11,%r9 5113 adcq %r11,%r15 5114 adcq $0,%r9 5115 imulq %r12,%rdx 5116 addq %rax,%r15 5117 adcq %rdx,%r9 5118 movq %r13,%r10 5119 movq %r14,%r11 5120 movq %r15,%r12 5121 andq $3,%r12 5122 movq %r15,%r13 5123 andq $-4,%r13 5124 movq %r9,%r14 5125 shrdq $2,%r9,%r15 5126 shrq $2,%r9 5127 addq %r13,%r10 5128 adcq %r14,%r11 5129 adcq $0,%r12 5130 addq %r15,%r10 5131 adcq %r9,%r11 5132 adcq $0,%r12 5133 5134 leaq 16(%r8),%r8 5135 jmp 1b 5136 1: 5137 vpaddd .chacha20_consts(%rip),%ymm2,%ymm2 5138 vpaddd 64(%rbp),%ymm6,%ymm6 5139 vpaddd 96(%rbp),%ymm10,%ymm10 5140 vpaddd 224(%rbp),%ymm14,%ymm14 5141 vpaddd .chacha20_consts(%rip),%ymm1,%ymm1 5142 vpaddd 64(%rbp),%ymm5,%ymm5 5143 vpaddd 96(%rbp),%ymm9,%ymm9 5144 vpaddd 192(%rbp),%ymm13,%ymm13 5145 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0 5146 vpaddd 64(%rbp),%ymm4,%ymm4 5147 vpaddd 96(%rbp),%ymm8,%ymm8 5148 vpaddd 160(%rbp),%ymm12,%ymm12 5149 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 5150 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 5151 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 5152 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 5153 vpxor 0+0(%rsi),%ymm3,%ymm3 5154 vpxor 32+0(%rsi),%ymm2,%ymm2 5155 vpxor 64+0(%rsi),%ymm6,%ymm6 5156 vpxor 96+0(%rsi),%ymm10,%ymm10 5157 vmovdqu %ymm3,0+0(%rdi) 5158 vmovdqu %ymm2,32+0(%rdi) 5159 vmovdqu %ymm6,64+0(%rdi) 5160 vmovdqu %ymm10,96+0(%rdi) 5161 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 5162 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 5163 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 5164 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 5165 vpxor 0+128(%rsi),%ymm3,%ymm3 5166 vpxor 32+128(%rsi),%ymm1,%ymm1 5167 vpxor 64+128(%rsi),%ymm5,%ymm5 5168 vpxor 96+128(%rsi),%ymm9,%ymm9 5169 vmovdqu %ymm3,0+128(%rdi) 5170 vmovdqu %ymm1,32+128(%rdi) 5171 vmovdqu %ymm5,64+128(%rdi) 5172 vmovdqu %ymm9,96+128(%rdi) 5173 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 5174 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 5175 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 5176 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 5177 vmovdqa %ymm3,%ymm8 5178 5179 leaq 256(%rsi),%rsi 5180 leaq 256(%rdi),%rdi 5181 subq $256,%rbx 5182 jmp open_avx2_tail_loop 5183 3: 5184 vmovdqa .chacha20_consts(%rip),%ymm0 5185 vmovdqa 64(%rbp),%ymm4 5186 vmovdqa 96(%rbp),%ymm8 5187 vmovdqa %ymm0,%ymm1 5188 vmovdqa %ymm4,%ymm5 5189 vmovdqa %ymm8,%ymm9 5190 vmovdqa %ymm0,%ymm2 5191 vmovdqa %ymm4,%ymm6 5192 vmovdqa %ymm8,%ymm10 5193 vmovdqa %ymm0,%ymm3 5194 vmovdqa %ymm4,%ymm7 5195 vmovdqa %ymm8,%ymm11 5196 vmovdqa .avx2_inc(%rip),%ymm12 5197 vpaddd 160(%rbp),%ymm12,%ymm15 5198 vpaddd %ymm15,%ymm12,%ymm14 5199 vpaddd %ymm14,%ymm12,%ymm13 5200 vpaddd %ymm13,%ymm12,%ymm12 5201 vmovdqa %ymm15,256(%rbp) 5202 vmovdqa %ymm14,224(%rbp) 5203 vmovdqa %ymm13,192(%rbp) 5204 vmovdqa %ymm12,160(%rbp) 5205 5206 xorq %rcx,%rcx 5207 movq %rsi,%r8 5208 1: 5209 addq 0(%r8),%r10 5210 adcq 8+0(%r8),%r11 5211 adcq $1,%r12 5212 movq 0+0(%rbp),%rax 5213 movq %rax,%r15 5214 mulq %r10 5215 movq %rax,%r13 5216 movq %rdx,%r14 5217 movq 0+0(%rbp),%rax 5218 mulq %r11 5219 imulq %r12,%r15 5220 addq %rax,%r14 5221 adcq %rdx,%r15 5222 movq 8+0(%rbp),%rax 5223 movq %rax,%r9 5224 mulq %r10 5225 addq %rax,%r14 5226 adcq $0,%rdx 5227 movq %rdx,%r10 5228 movq 8+0(%rbp),%rax 5229 mulq %r11 5230 addq %rax,%r15 5231 adcq $0,%rdx 5232 imulq %r12,%r9 5233 addq %r10,%r15 5234 adcq %rdx,%r9 5235 movq %r13,%r10 5236 movq %r14,%r11 5237 movq %r15,%r12 5238 andq $3,%r12 5239 movq %r15,%r13 5240 andq $-4,%r13 5241 movq %r9,%r14 5242 shrdq $2,%r9,%r15 5243 shrq $2,%r9 5244 addq %r13,%r10 5245 adcq %r14,%r11 5246 adcq $0,%r12 5247 addq %r15,%r10 5248 adcq %r9,%r11 5249 adcq $0,%r12 5250 5251 leaq 16(%r8),%r8 5252 2: 5253 vmovdqa %ymm8,128(%rbp) 5254 vmovdqa .rol16(%rip),%ymm8 5255 vpaddd %ymm7,%ymm3,%ymm3 5256 vpaddd %ymm6,%ymm2,%ymm2 5257 vpaddd %ymm5,%ymm1,%ymm1 5258 vpaddd %ymm4,%ymm0,%ymm0 5259 vpxor %ymm3,%ymm15,%ymm15 5260 vpxor %ymm2,%ymm14,%ymm14 5261 vpxor %ymm1,%ymm13,%ymm13 5262 vpxor %ymm0,%ymm12,%ymm12 5263 vpshufb %ymm8,%ymm15,%ymm15 5264 vpshufb %ymm8,%ymm14,%ymm14 5265 vpshufb %ymm8,%ymm13,%ymm13 5266 vpshufb %ymm8,%ymm12,%ymm12 5267 vmovdqa 128(%rbp),%ymm8 5268 vpaddd %ymm15,%ymm11,%ymm11 5269 vpaddd %ymm14,%ymm10,%ymm10 5270 vpaddd %ymm13,%ymm9,%ymm9 5271 vpaddd %ymm12,%ymm8,%ymm8 5272 vpxor %ymm11,%ymm7,%ymm7 5273 vpxor %ymm10,%ymm6,%ymm6 5274 vpxor %ymm9,%ymm5,%ymm5 5275 vpxor %ymm8,%ymm4,%ymm4 5276 vmovdqa %ymm8,128(%rbp) 5277 vpsrld $20,%ymm7,%ymm8 5278 vpslld $32-20,%ymm7,%ymm7 5279 vpxor %ymm8,%ymm7,%ymm7 5280 vpsrld $20,%ymm6,%ymm8 5281 vpslld $32-20,%ymm6,%ymm6 5282 vpxor %ymm8,%ymm6,%ymm6 5283 vpsrld $20,%ymm5,%ymm8 5284 vpslld $32-20,%ymm5,%ymm5 5285 vpxor %ymm8,%ymm5,%ymm5 5286 vpsrld $20,%ymm4,%ymm8 5287 vpslld $32-20,%ymm4,%ymm4 5288 vpxor %ymm8,%ymm4,%ymm4 5289 vmovdqa .rol8(%rip),%ymm8 5290 addq 0(%r8),%r10 5291 adcq 8+0(%r8),%r11 5292 adcq $1,%r12 5293 movq 0+0(%rbp),%rdx 5294 movq %rdx,%r15 5295 mulxq %r10,%r13,%r14 5296 mulxq %r11,%rax,%rdx 5297 imulq %r12,%r15 5298 addq %rax,%r14 5299 adcq %rdx,%r15 5300 movq 8+0(%rbp),%rdx 5301 mulxq %r10,%r10,%rax 5302 addq %r10,%r14 5303 mulxq %r11,%r11,%r9 5304 adcq %r11,%r15 5305 adcq $0,%r9 5306 imulq %r12,%rdx 5307 addq %rax,%r15 5308 adcq %rdx,%r9 5309 movq %r13,%r10 5310 movq %r14,%r11 5311 movq %r15,%r12 5312 andq $3,%r12 5313 movq %r15,%r13 5314 andq $-4,%r13 5315 movq %r9,%r14 5316 shrdq $2,%r9,%r15 5317 shrq $2,%r9 5318 addq %r13,%r10 5319 adcq %r14,%r11 5320 adcq $0,%r12 5321 addq %r15,%r10 5322 adcq %r9,%r11 5323 adcq $0,%r12 5324 vpaddd %ymm7,%ymm3,%ymm3 5325 vpaddd %ymm6,%ymm2,%ymm2 5326 vpaddd %ymm5,%ymm1,%ymm1 5327 vpaddd %ymm4,%ymm0,%ymm0 5328 vpxor %ymm3,%ymm15,%ymm15 5329 vpxor %ymm2,%ymm14,%ymm14 5330 vpxor %ymm1,%ymm13,%ymm13 5331 vpxor %ymm0,%ymm12,%ymm12 5332 vpshufb %ymm8,%ymm15,%ymm15 5333 vpshufb %ymm8,%ymm14,%ymm14 5334 vpshufb %ymm8,%ymm13,%ymm13 5335 vpshufb %ymm8,%ymm12,%ymm12 5336 vmovdqa 128(%rbp),%ymm8 5337 vpaddd %ymm15,%ymm11,%ymm11 5338 vpaddd %ymm14,%ymm10,%ymm10 5339 vpaddd %ymm13,%ymm9,%ymm9 5340 vpaddd %ymm12,%ymm8,%ymm8 5341 vpxor %ymm11,%ymm7,%ymm7 5342 vpxor %ymm10,%ymm6,%ymm6 5343 vpxor %ymm9,%ymm5,%ymm5 5344 vpxor %ymm8,%ymm4,%ymm4 5345 vmovdqa %ymm8,128(%rbp) 5346 vpsrld $25,%ymm7,%ymm8 5347 vpslld $32-25,%ymm7,%ymm7 5348 vpxor %ymm8,%ymm7,%ymm7 5349 vpsrld $25,%ymm6,%ymm8 5350 vpslld $32-25,%ymm6,%ymm6 5351 vpxor %ymm8,%ymm6,%ymm6 5352 vpsrld $25,%ymm5,%ymm8 5353 vpslld $32-25,%ymm5,%ymm5 5354 vpxor %ymm8,%ymm5,%ymm5 5355 vpsrld $25,%ymm4,%ymm8 5356 vpslld $32-25,%ymm4,%ymm4 5357 vpxor %ymm8,%ymm4,%ymm4 5358 vmovdqa 128(%rbp),%ymm8 5359 vpalignr $4,%ymm7,%ymm7,%ymm7 5360 vpalignr $8,%ymm11,%ymm11,%ymm11 5361 vpalignr $12,%ymm15,%ymm15,%ymm15 5362 vpalignr $4,%ymm6,%ymm6,%ymm6 5363 vpalignr $8,%ymm10,%ymm10,%ymm10 5364 vpalignr $12,%ymm14,%ymm14,%ymm14 5365 vpalignr $4,%ymm5,%ymm5,%ymm5 5366 vpalignr $8,%ymm9,%ymm9,%ymm9 5367 vpalignr $12,%ymm13,%ymm13,%ymm13 5368 vpalignr $4,%ymm4,%ymm4,%ymm4 5369 vpalignr $8,%ymm8,%ymm8,%ymm8 5370 vpalignr $12,%ymm12,%ymm12,%ymm12 5371 vmovdqa %ymm8,128(%rbp) 5372 addq 16(%r8),%r10 5373 adcq 8+16(%r8),%r11 5374 adcq $1,%r12 5375 movq 0+0(%rbp),%rdx 5376 movq %rdx,%r15 5377 mulxq %r10,%r13,%r14 5378 mulxq %r11,%rax,%rdx 5379 imulq %r12,%r15 5380 addq %rax,%r14 5381 adcq %rdx,%r15 5382 movq 8+0(%rbp),%rdx 5383 mulxq %r10,%r10,%rax 5384 addq %r10,%r14 5385 mulxq %r11,%r11,%r9 5386 adcq %r11,%r15 5387 adcq $0,%r9 5388 imulq %r12,%rdx 5389 addq %rax,%r15 5390 adcq %rdx,%r9 5391 movq %r13,%r10 5392 movq %r14,%r11 5393 movq %r15,%r12 5394 andq $3,%r12 5395 movq %r15,%r13 5396 andq $-4,%r13 5397 movq %r9,%r14 5398 shrdq $2,%r9,%r15 5399 shrq $2,%r9 5400 addq %r13,%r10 5401 adcq %r14,%r11 5402 adcq $0,%r12 5403 addq %r15,%r10 5404 adcq %r9,%r11 5405 adcq $0,%r12 5406 5407 leaq 32(%r8),%r8 5408 vmovdqa .rol16(%rip),%ymm8 5409 vpaddd %ymm7,%ymm3,%ymm3 5410 vpaddd %ymm6,%ymm2,%ymm2 5411 vpaddd %ymm5,%ymm1,%ymm1 5412 vpaddd %ymm4,%ymm0,%ymm0 5413 vpxor %ymm3,%ymm15,%ymm15 5414 vpxor %ymm2,%ymm14,%ymm14 5415 vpxor %ymm1,%ymm13,%ymm13 5416 vpxor %ymm0,%ymm12,%ymm12 5417 vpshufb %ymm8,%ymm15,%ymm15 5418 vpshufb %ymm8,%ymm14,%ymm14 5419 vpshufb %ymm8,%ymm13,%ymm13 5420 vpshufb %ymm8,%ymm12,%ymm12 5421 vmovdqa 128(%rbp),%ymm8 5422 vpaddd %ymm15,%ymm11,%ymm11 5423 vpaddd %ymm14,%ymm10,%ymm10 5424 vpaddd %ymm13,%ymm9,%ymm9 5425 vpaddd %ymm12,%ymm8,%ymm8 5426 vpxor %ymm11,%ymm7,%ymm7 5427 vpxor %ymm10,%ymm6,%ymm6 5428 vpxor %ymm9,%ymm5,%ymm5 5429 vpxor %ymm8,%ymm4,%ymm4 5430 vmovdqa %ymm8,128(%rbp) 5431 vpsrld $20,%ymm7,%ymm8 5432 vpslld $32-20,%ymm7,%ymm7 5433 vpxor %ymm8,%ymm7,%ymm7 5434 vpsrld $20,%ymm6,%ymm8 5435 vpslld $32-20,%ymm6,%ymm6 5436 vpxor %ymm8,%ymm6,%ymm6 5437 vpsrld $20,%ymm5,%ymm8 5438 vpslld $32-20,%ymm5,%ymm5 5439 vpxor %ymm8,%ymm5,%ymm5 5440 vpsrld $20,%ymm4,%ymm8 5441 vpslld $32-20,%ymm4,%ymm4 5442 vpxor %ymm8,%ymm4,%ymm4 5443 vmovdqa .rol8(%rip),%ymm8 5444 vpaddd %ymm7,%ymm3,%ymm3 5445 vpaddd %ymm6,%ymm2,%ymm2 5446 vpaddd %ymm5,%ymm1,%ymm1 5447 vpaddd %ymm4,%ymm0,%ymm0 5448 vpxor %ymm3,%ymm15,%ymm15 5449 vpxor %ymm2,%ymm14,%ymm14 5450 vpxor %ymm1,%ymm13,%ymm13 5451 vpxor %ymm0,%ymm12,%ymm12 5452 vpshufb %ymm8,%ymm15,%ymm15 5453 vpshufb %ymm8,%ymm14,%ymm14 5454 vpshufb %ymm8,%ymm13,%ymm13 5455 vpshufb %ymm8,%ymm12,%ymm12 5456 vmovdqa 128(%rbp),%ymm8 5457 vpaddd %ymm15,%ymm11,%ymm11 5458 vpaddd %ymm14,%ymm10,%ymm10 5459 vpaddd %ymm13,%ymm9,%ymm9 5460 vpaddd %ymm12,%ymm8,%ymm8 5461 vpxor %ymm11,%ymm7,%ymm7 5462 vpxor %ymm10,%ymm6,%ymm6 5463 vpxor %ymm9,%ymm5,%ymm5 5464 vpxor %ymm8,%ymm4,%ymm4 5465 vmovdqa %ymm8,128(%rbp) 5466 vpsrld $25,%ymm7,%ymm8 5467 vpslld $32-25,%ymm7,%ymm7 5468 vpxor %ymm8,%ymm7,%ymm7 5469 vpsrld $25,%ymm6,%ymm8 5470 vpslld $32-25,%ymm6,%ymm6 5471 vpxor %ymm8,%ymm6,%ymm6 5472 vpsrld $25,%ymm5,%ymm8 5473 vpslld $32-25,%ymm5,%ymm5 5474 vpxor %ymm8,%ymm5,%ymm5 5475 vpsrld $25,%ymm4,%ymm8 5476 vpslld $32-25,%ymm4,%ymm4 5477 vpxor %ymm8,%ymm4,%ymm4 5478 vmovdqa 128(%rbp),%ymm8 5479 vpalignr $12,%ymm7,%ymm7,%ymm7 5480 vpalignr $8,%ymm11,%ymm11,%ymm11 5481 vpalignr $4,%ymm15,%ymm15,%ymm15 5482 vpalignr $12,%ymm6,%ymm6,%ymm6 5483 vpalignr $8,%ymm10,%ymm10,%ymm10 5484 vpalignr $4,%ymm14,%ymm14,%ymm14 5485 vpalignr $12,%ymm5,%ymm5,%ymm5 5486 vpalignr $8,%ymm9,%ymm9,%ymm9 5487 vpalignr $4,%ymm13,%ymm13,%ymm13 5488 vpalignr $12,%ymm4,%ymm4,%ymm4 5489 vpalignr $8,%ymm8,%ymm8,%ymm8 5490 vpalignr $4,%ymm12,%ymm12,%ymm12 5491 5492 incq %rcx 5493 cmpq $4,%rcx 5494 jl 1b 5495 cmpq $10,%rcx 5496 jne 2b 5497 movq %rbx,%rcx 5498 subq $384,%rcx 5499 andq $-16,%rcx 5500 1: 5501 testq %rcx,%rcx 5502 je 1f 5503 addq 0(%r8),%r10 5504 adcq 8+0(%r8),%r11 5505 adcq $1,%r12 5506 movq 0+0(%rbp),%rdx 5507 movq %rdx,%r15 5508 mulxq %r10,%r13,%r14 5509 mulxq %r11,%rax,%rdx 5510 imulq %r12,%r15 5511 addq %rax,%r14 5512 adcq %rdx,%r15 5513 movq 8+0(%rbp),%rdx 5514 mulxq %r10,%r10,%rax 5515 addq %r10,%r14 5516 mulxq %r11,%r11,%r9 5517 adcq %r11,%r15 5518 adcq $0,%r9 5519 imulq %r12,%rdx 5520 addq %rax,%r15 5521 adcq %rdx,%r9 5522 movq %r13,%r10 5523 movq %r14,%r11 5524 movq %r15,%r12 5525 andq $3,%r12 5526 movq %r15,%r13 5527 andq $-4,%r13 5528 movq %r9,%r14 5529 shrdq $2,%r9,%r15 5530 shrq $2,%r9 5531 addq %r13,%r10 5532 adcq %r14,%r11 5533 adcq $0,%r12 5534 addq %r15,%r10 5535 adcq %r9,%r11 5536 adcq $0,%r12 5537 5538 leaq 16(%r8),%r8 5539 subq $16,%rcx 5540 jmp 1b 5541 1: 5542 vpaddd .chacha20_consts(%rip),%ymm3,%ymm3 5543 vpaddd 64(%rbp),%ymm7,%ymm7 5544 vpaddd 96(%rbp),%ymm11,%ymm11 5545 vpaddd 256(%rbp),%ymm15,%ymm15 5546 vpaddd .chacha20_consts(%rip),%ymm2,%ymm2 5547 vpaddd 64(%rbp),%ymm6,%ymm6 5548 vpaddd 96(%rbp),%ymm10,%ymm10 5549 vpaddd 224(%rbp),%ymm14,%ymm14 5550 vpaddd .chacha20_consts(%rip),%ymm1,%ymm1 5551 vpaddd 64(%rbp),%ymm5,%ymm5 5552 vpaddd 96(%rbp),%ymm9,%ymm9 5553 vpaddd 192(%rbp),%ymm13,%ymm13 5554 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0 5555 vpaddd 64(%rbp),%ymm4,%ymm4 5556 vpaddd 96(%rbp),%ymm8,%ymm8 5557 vpaddd 160(%rbp),%ymm12,%ymm12 5558 5559 vmovdqa %ymm0,128(%rbp) 5560 vperm2i128 $0x02,%ymm3,%ymm7,%ymm0 5561 vperm2i128 $0x13,%ymm3,%ymm7,%ymm7 5562 vperm2i128 $0x02,%ymm11,%ymm15,%ymm3 5563 vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 5564 vpxor 0+0(%rsi),%ymm0,%ymm0 5565 vpxor 32+0(%rsi),%ymm3,%ymm3 5566 vpxor 64+0(%rsi),%ymm7,%ymm7 5567 vpxor 96+0(%rsi),%ymm11,%ymm11 5568 vmovdqu %ymm0,0+0(%rdi) 5569 vmovdqu %ymm3,32+0(%rdi) 5570 vmovdqu %ymm7,64+0(%rdi) 5571 vmovdqu %ymm11,96+0(%rdi) 5572 5573 vmovdqa 128(%rbp),%ymm0 5574 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 5575 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 5576 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 5577 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 5578 vpxor 0+128(%rsi),%ymm3,%ymm3 5579 vpxor 32+128(%rsi),%ymm2,%ymm2 5580 vpxor 64+128(%rsi),%ymm6,%ymm6 5581 vpxor 96+128(%rsi),%ymm10,%ymm10 5582 vmovdqu %ymm3,0+128(%rdi) 5583 vmovdqu %ymm2,32+128(%rdi) 5584 vmovdqu %ymm6,64+128(%rdi) 5585 vmovdqu %ymm10,96+128(%rdi) 5586 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 5587 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 5588 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 5589 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 5590 vpxor 0+256(%rsi),%ymm3,%ymm3 5591 vpxor 32+256(%rsi),%ymm1,%ymm1 5592 vpxor 64+256(%rsi),%ymm5,%ymm5 5593 vpxor 96+256(%rsi),%ymm9,%ymm9 5594 vmovdqu %ymm3,0+256(%rdi) 5595 vmovdqu %ymm1,32+256(%rdi) 5596 vmovdqu %ymm5,64+256(%rdi) 5597 vmovdqu %ymm9,96+256(%rdi) 5598 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 5599 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 5600 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 5601 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 5602 vmovdqa %ymm3,%ymm8 5603 5604 leaq 384(%rsi),%rsi 5605 leaq 384(%rdi),%rdi 5606 subq $384,%rbx 5607 open_avx2_tail_loop: 5608 cmpq $32,%rbx 5609 jb open_avx2_tail 5610 subq $32,%rbx 5611 vpxor (%rsi),%ymm0,%ymm0 5612 vmovdqu %ymm0,(%rdi) 5613 leaq 32(%rsi),%rsi 5614 leaq 32(%rdi),%rdi 5615 vmovdqa %ymm4,%ymm0 5616 vmovdqa %ymm8,%ymm4 5617 vmovdqa %ymm12,%ymm8 5618 jmp open_avx2_tail_loop 5619 open_avx2_tail: 5620 cmpq $16,%rbx 5621 vmovdqa %xmm0,%xmm1 5622 jb 1f 5623 subq $16,%rbx 5624 5625 vpxor (%rsi),%xmm0,%xmm1 5626 vmovdqu %xmm1,(%rdi) 5627 leaq 16(%rsi),%rsi 5628 leaq 16(%rdi),%rdi 5629 vperm2i128 $0x11,%ymm0,%ymm0,%ymm0 5630 vmovdqa %xmm0,%xmm1 5631 1: 5632 vzeroupper 5633 jmp open_sse_tail_16 5634 5635 open_avx2_192: 5636 vmovdqa %ymm0,%ymm1 5637 vmovdqa %ymm0,%ymm2 5638 vmovdqa %ymm4,%ymm5 5639 vmovdqa %ymm4,%ymm6 5640 vmovdqa %ymm8,%ymm9 5641 vmovdqa %ymm8,%ymm10 5642 vpaddd .avx2_inc(%rip),%ymm12,%ymm13 5643 vmovdqa %ymm12,%ymm11 5644 vmovdqa %ymm13,%ymm15 5645 movq $10,%r10 5646 1: 5647 vpaddd %ymm4,%ymm0,%ymm0 5648 vpxor %ymm0,%ymm12,%ymm12 5649 vpshufb .rol16(%rip),%ymm12,%ymm12 5650 vpaddd %ymm12,%ymm8,%ymm8 5651 vpxor %ymm8,%ymm4,%ymm4 5652 vpsrld $20,%ymm4,%ymm3 5653 vpslld $12,%ymm4,%ymm4 5654 vpxor %ymm3,%ymm4,%ymm4 5655 vpaddd %ymm4,%ymm0,%ymm0 5656 vpxor %ymm0,%ymm12,%ymm12 5657 vpshufb .rol8(%rip),%ymm12,%ymm12 5658 vpaddd %ymm12,%ymm8,%ymm8 5659 vpxor %ymm8,%ymm4,%ymm4 5660 vpslld $7,%ymm4,%ymm3 5661 vpsrld $25,%ymm4,%ymm4 5662 vpxor %ymm3,%ymm4,%ymm4 5663 vpalignr $12,%ymm12,%ymm12,%ymm12 5664 vpalignr $8,%ymm8,%ymm8,%ymm8 5665 vpalignr $4,%ymm4,%ymm4,%ymm4 5666 vpaddd %ymm5,%ymm1,%ymm1 5667 vpxor %ymm1,%ymm13,%ymm13 5668 vpshufb .rol16(%rip),%ymm13,%ymm13 5669 vpaddd %ymm13,%ymm9,%ymm9 5670 vpxor %ymm9,%ymm5,%ymm5 5671 vpsrld $20,%ymm5,%ymm3 5672 vpslld $12,%ymm5,%ymm5 5673 vpxor %ymm3,%ymm5,%ymm5 5674 vpaddd %ymm5,%ymm1,%ymm1 5675 vpxor %ymm1,%ymm13,%ymm13 5676 vpshufb .rol8(%rip),%ymm13,%ymm13 5677 vpaddd %ymm13,%ymm9,%ymm9 5678 vpxor %ymm9,%ymm5,%ymm5 5679 vpslld $7,%ymm5,%ymm3 5680 vpsrld $25,%ymm5,%ymm5 5681 vpxor %ymm3,%ymm5,%ymm5 5682 vpalignr $12,%ymm13,%ymm13,%ymm13 5683 vpalignr $8,%ymm9,%ymm9,%ymm9 5684 vpalignr $4,%ymm5,%ymm5,%ymm5 5685 vpaddd %ymm4,%ymm0,%ymm0 5686 vpxor %ymm0,%ymm12,%ymm12 5687 vpshufb .rol16(%rip),%ymm12,%ymm12 5688 vpaddd %ymm12,%ymm8,%ymm8 5689 vpxor %ymm8,%ymm4,%ymm4 5690 vpsrld $20,%ymm4,%ymm3 5691 vpslld $12,%ymm4,%ymm4 5692 vpxor %ymm3,%ymm4,%ymm4 5693 vpaddd %ymm4,%ymm0,%ymm0 5694 vpxor %ymm0,%ymm12,%ymm12 5695 vpshufb .rol8(%rip),%ymm12,%ymm12 5696 vpaddd %ymm12,%ymm8,%ymm8 5697 vpxor %ymm8,%ymm4,%ymm4 5698 vpslld $7,%ymm4,%ymm3 5699 vpsrld $25,%ymm4,%ymm4 5700 vpxor %ymm3,%ymm4,%ymm4 5701 vpalignr $4,%ymm12,%ymm12,%ymm12 5702 vpalignr $8,%ymm8,%ymm8,%ymm8 5703 vpalignr $12,%ymm4,%ymm4,%ymm4 5704 vpaddd %ymm5,%ymm1,%ymm1 5705 vpxor %ymm1,%ymm13,%ymm13 5706 vpshufb .rol16(%rip),%ymm13,%ymm13 5707 vpaddd %ymm13,%ymm9,%ymm9 5708 vpxor %ymm9,%ymm5,%ymm5 5709 vpsrld $20,%ymm5,%ymm3 5710 vpslld $12,%ymm5,%ymm5 5711 vpxor %ymm3,%ymm5,%ymm5 5712 vpaddd %ymm5,%ymm1,%ymm1 5713 vpxor %ymm1,%ymm13,%ymm13 5714 vpshufb .rol8(%rip),%ymm13,%ymm13 5715 vpaddd %ymm13,%ymm9,%ymm9 5716 vpxor %ymm9,%ymm5,%ymm5 5717 vpslld $7,%ymm5,%ymm3 5718 vpsrld $25,%ymm5,%ymm5 5719 vpxor %ymm3,%ymm5,%ymm5 5720 vpalignr $4,%ymm13,%ymm13,%ymm13 5721 vpalignr $8,%ymm9,%ymm9,%ymm9 5722 vpalignr $12,%ymm5,%ymm5,%ymm5 5723 5724 decq %r10 5725 jne 1b 5726 vpaddd %ymm2,%ymm0,%ymm0 5727 vpaddd %ymm2,%ymm1,%ymm1 5728 vpaddd %ymm6,%ymm4,%ymm4 5729 vpaddd %ymm6,%ymm5,%ymm5 5730 vpaddd %ymm10,%ymm8,%ymm8 5731 vpaddd %ymm10,%ymm9,%ymm9 5732 vpaddd %ymm11,%ymm12,%ymm12 5733 vpaddd %ymm15,%ymm13,%ymm13 5734 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 5735 5736 vpand .clamp(%rip),%ymm3,%ymm3 5737 vmovdqa %ymm3,0(%rbp) 5738 5739 vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 5740 vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 5741 vperm2i128 $0x02,%ymm1,%ymm5,%ymm8 5742 vperm2i128 $0x02,%ymm9,%ymm13,%ymm12 5743 vperm2i128 $0x13,%ymm1,%ymm5,%ymm1 5744 vperm2i128 $0x13,%ymm9,%ymm13,%ymm5 5745 open_avx2_short: 5746 movq %r8,%r8 5747 call poly_hash_ad_internal 5748 open_avx2_hash_and_xor_loop: 5749 cmpq $32,%rbx 5750 jb open_avx2_short_tail_32 5751 subq $32,%rbx 5752 addq 0(%rsi),%r10 5753 adcq 8+0(%rsi),%r11 5754 adcq $1,%r12 5755 movq 0+0(%rbp),%rax 5756 movq %rax,%r15 5757 mulq %r10 5758 movq %rax,%r13 5759 movq %rdx,%r14 5760 movq 0+0(%rbp),%rax 5761 mulq %r11 5762 imulq %r12,%r15 5763 addq %rax,%r14 5764 adcq %rdx,%r15 5765 movq 8+0(%rbp),%rax 5766 movq %rax,%r9 5767 mulq %r10 5768 addq %rax,%r14 5769 adcq $0,%rdx 5770 movq %rdx,%r10 5771 movq 8+0(%rbp),%rax 5772 mulq %r11 5773 addq %rax,%r15 5774 adcq $0,%rdx 5775 imulq %r12,%r9 5776 addq %r10,%r15 5777 adcq %rdx,%r9 5778 movq %r13,%r10 5779 movq %r14,%r11 5780 movq %r15,%r12 5781 andq $3,%r12 5782 movq %r15,%r13 5783 andq $-4,%r13 5784 movq %r9,%r14 5785 shrdq $2,%r9,%r15 5786 shrq $2,%r9 5787 addq %r13,%r10 5788 adcq %r14,%r11 5789 adcq $0,%r12 5790 addq %r15,%r10 5791 adcq %r9,%r11 5792 adcq $0,%r12 5793 addq 16(%rsi),%r10 5794 adcq 8+16(%rsi),%r11 5795 adcq $1,%r12 5796 movq 0+0(%rbp),%rax 5797 movq %rax,%r15 5798 mulq %r10 5799 movq %rax,%r13 5800 movq %rdx,%r14 5801 movq 0+0(%rbp),%rax 5802 mulq %r11 5803 imulq %r12,%r15 5804 addq %rax,%r14 5805 adcq %rdx,%r15 5806 movq 8+0(%rbp),%rax 5807 movq %rax,%r9 5808 mulq %r10 5809 addq %rax,%r14 5810 adcq $0,%rdx 5811 movq %rdx,%r10 5812 movq 8+0(%rbp),%rax 5813 mulq %r11 5814 addq %rax,%r15 5815 adcq $0,%rdx 5816 imulq %r12,%r9 5817 addq %r10,%r15 5818 adcq %rdx,%r9 5819 movq %r13,%r10 5820 movq %r14,%r11 5821 movq %r15,%r12 5822 andq $3,%r12 5823 movq %r15,%r13 5824 andq $-4,%r13 5825 movq %r9,%r14 5826 shrdq $2,%r9,%r15 5827 shrq $2,%r9 5828 addq %r13,%r10 5829 adcq %r14,%r11 5830 adcq $0,%r12 5831 addq %r15,%r10 5832 adcq %r9,%r11 5833 adcq $0,%r12 5834 5835 5836 vpxor (%rsi),%ymm0,%ymm0 5837 vmovdqu %ymm0,(%rdi) 5838 leaq 32(%rsi),%rsi 5839 leaq 32(%rdi),%rdi 5840 5841 vmovdqa %ymm4,%ymm0 5842 vmovdqa %ymm8,%ymm4 5843 vmovdqa %ymm12,%ymm8 5844 vmovdqa %ymm1,%ymm12 5845 vmovdqa %ymm5,%ymm1 5846 vmovdqa %ymm9,%ymm5 5847 vmovdqa %ymm13,%ymm9 5848 vmovdqa %ymm2,%ymm13 5849 vmovdqa %ymm6,%ymm2 5850 jmp open_avx2_hash_and_xor_loop 5851 open_avx2_short_tail_32: 5852 cmpq $16,%rbx 5853 vmovdqa %xmm0,%xmm1 5854 jb 1f 5855 subq $16,%rbx 5856 addq 0(%rsi),%r10 5857 adcq 8+0(%rsi),%r11 5858 adcq $1,%r12 5859 movq 0+0(%rbp),%rax 5860 movq %rax,%r15 5861 mulq %r10 5862 movq %rax,%r13 5863 movq %rdx,%r14 5864 movq 0+0(%rbp),%rax 5865 mulq %r11 5866 imulq %r12,%r15 5867 addq %rax,%r14 5868 adcq %rdx,%r15 5869 movq 8+0(%rbp),%rax 5870 movq %rax,%r9 5871 mulq %r10 5872 addq %rax,%r14 5873 adcq $0,%rdx 5874 movq %rdx,%r10 5875 movq 8+0(%rbp),%rax 5876 mulq %r11 5877 addq %rax,%r15 5878 adcq $0,%rdx 5879 imulq %r12,%r9 5880 addq %r10,%r15 5881 adcq %rdx,%r9 5882 movq %r13,%r10 5883 movq %r14,%r11 5884 movq %r15,%r12 5885 andq $3,%r12 5886 movq %r15,%r13 5887 andq $-4,%r13 5888 movq %r9,%r14 5889 shrdq $2,%r9,%r15 5890 shrq $2,%r9 5891 addq %r13,%r10 5892 adcq %r14,%r11 5893 adcq $0,%r12 5894 addq %r15,%r10 5895 adcq %r9,%r11 5896 adcq $0,%r12 5897 5898 vpxor (%rsi),%xmm0,%xmm3 5899 vmovdqu %xmm3,(%rdi) 5900 leaq 16(%rsi),%rsi 5901 leaq 16(%rdi),%rdi 5902 vextracti128 $1,%ymm0,%xmm1 5903 1: 5904 vzeroupper 5905 jmp open_sse_tail_16 5906 5907 open_avx2_320: 5908 vmovdqa %ymm0,%ymm1 5909 vmovdqa %ymm0,%ymm2 5910 vmovdqa %ymm4,%ymm5 5911 vmovdqa %ymm4,%ymm6 5912 vmovdqa %ymm8,%ymm9 5913 vmovdqa %ymm8,%ymm10 5914 vpaddd .avx2_inc(%rip),%ymm12,%ymm13 5915 vpaddd .avx2_inc(%rip),%ymm13,%ymm14 5916 vmovdqa %ymm4,%ymm7 5917 vmovdqa %ymm8,%ymm11 5918 vmovdqa %ymm12,160(%rbp) 5919 vmovdqa %ymm13,192(%rbp) 5920 vmovdqa %ymm14,224(%rbp) 5921 movq $10,%r10 5922 1: 5923 vpaddd %ymm4,%ymm0,%ymm0 5924 vpxor %ymm0,%ymm12,%ymm12 5925 vpshufb .rol16(%rip),%ymm12,%ymm12 5926 vpaddd %ymm12,%ymm8,%ymm8 5927 vpxor %ymm8,%ymm4,%ymm4 5928 vpsrld $20,%ymm4,%ymm3 5929 vpslld $12,%ymm4,%ymm4 5930 vpxor %ymm3,%ymm4,%ymm4 5931 vpaddd %ymm4,%ymm0,%ymm0 5932 vpxor %ymm0,%ymm12,%ymm12 5933 vpshufb .rol8(%rip),%ymm12,%ymm12 5934 vpaddd %ymm12,%ymm8,%ymm8 5935 vpxor %ymm8,%ymm4,%ymm4 5936 vpslld $7,%ymm4,%ymm3 5937 vpsrld $25,%ymm4,%ymm4 5938 vpxor %ymm3,%ymm4,%ymm4 5939 vpalignr $12,%ymm12,%ymm12,%ymm12 5940 vpalignr $8,%ymm8,%ymm8,%ymm8 5941 vpalignr $4,%ymm4,%ymm4,%ymm4 5942 vpaddd %ymm5,%ymm1,%ymm1 5943 vpxor %ymm1,%ymm13,%ymm13 5944 vpshufb .rol16(%rip),%ymm13,%ymm13 5945 vpaddd %ymm13,%ymm9,%ymm9 5946 vpxor %ymm9,%ymm5,%ymm5 5947 vpsrld $20,%ymm5,%ymm3 5948 vpslld $12,%ymm5,%ymm5 5949 vpxor %ymm3,%ymm5,%ymm5 5950 vpaddd %ymm5,%ymm1,%ymm1 5951 vpxor %ymm1,%ymm13,%ymm13 5952 vpshufb .rol8(%rip),%ymm13,%ymm13 5953 vpaddd %ymm13,%ymm9,%ymm9 5954 vpxor %ymm9,%ymm5,%ymm5 5955 vpslld $7,%ymm5,%ymm3 5956 vpsrld $25,%ymm5,%ymm5 5957 vpxor %ymm3,%ymm5,%ymm5 5958 vpalignr $12,%ymm13,%ymm13,%ymm13 5959 vpalignr $8,%ymm9,%ymm9,%ymm9 5960 vpalignr $4,%ymm5,%ymm5,%ymm5 5961 vpaddd %ymm6,%ymm2,%ymm2 5962 vpxor %ymm2,%ymm14,%ymm14 5963 vpshufb .rol16(%rip),%ymm14,%ymm14 5964 vpaddd %ymm14,%ymm10,%ymm10 5965 vpxor %ymm10,%ymm6,%ymm6 5966 vpsrld $20,%ymm6,%ymm3 5967 vpslld $12,%ymm6,%ymm6 5968 vpxor %ymm3,%ymm6,%ymm6 5969 vpaddd %ymm6,%ymm2,%ymm2 5970 vpxor %ymm2,%ymm14,%ymm14 5971 vpshufb .rol8(%rip),%ymm14,%ymm14 5972 vpaddd %ymm14,%ymm10,%ymm10 5973 vpxor %ymm10,%ymm6,%ymm6 5974 vpslld $7,%ymm6,%ymm3 5975 vpsrld $25,%ymm6,%ymm6 5976 vpxor %ymm3,%ymm6,%ymm6 5977 vpalignr $12,%ymm14,%ymm14,%ymm14 5978 vpalignr $8,%ymm10,%ymm10,%ymm10 5979 vpalignr $4,%ymm6,%ymm6,%ymm6 5980 vpaddd %ymm4,%ymm0,%ymm0 5981 vpxor %ymm0,%ymm12,%ymm12 5982 vpshufb .rol16(%rip),%ymm12,%ymm12 5983 vpaddd %ymm12,%ymm8,%ymm8 5984 vpxor %ymm8,%ymm4,%ymm4 5985 vpsrld $20,%ymm4,%ymm3 5986 vpslld $12,%ymm4,%ymm4 5987 vpxor %ymm3,%ymm4,%ymm4 5988 vpaddd %ymm4,%ymm0,%ymm0 5989 vpxor %ymm0,%ymm12,%ymm12 5990 vpshufb .rol8(%rip),%ymm12,%ymm12 5991 vpaddd %ymm12,%ymm8,%ymm8 5992 vpxor %ymm8,%ymm4,%ymm4 5993 vpslld $7,%ymm4,%ymm3 5994 vpsrld $25,%ymm4,%ymm4 5995 vpxor %ymm3,%ymm4,%ymm4 5996 vpalignr $4,%ymm12,%ymm12,%ymm12 5997 vpalignr $8,%ymm8,%ymm8,%ymm8 5998 vpalignr $12,%ymm4,%ymm4,%ymm4 5999 vpaddd %ymm5,%ymm1,%ymm1 6000 vpxor %ymm1,%ymm13,%ymm13 6001 vpshufb .rol16(%rip),%ymm13,%ymm13 6002 vpaddd %ymm13,%ymm9,%ymm9 6003 vpxor %ymm9,%ymm5,%ymm5 6004 vpsrld $20,%ymm5,%ymm3 6005 vpslld $12,%ymm5,%ymm5 6006 vpxor %ymm3,%ymm5,%ymm5 6007 vpaddd %ymm5,%ymm1,%ymm1 6008 vpxor %ymm1,%ymm13,%ymm13 6009 vpshufb .rol8(%rip),%ymm13,%ymm13 6010 vpaddd %ymm13,%ymm9,%ymm9 6011 vpxor %ymm9,%ymm5,%ymm5 6012 vpslld $7,%ymm5,%ymm3 6013 vpsrld $25,%ymm5,%ymm5 6014 vpxor %ymm3,%ymm5,%ymm5 6015 vpalignr $4,%ymm13,%ymm13,%ymm13 6016 vpalignr $8,%ymm9,%ymm9,%ymm9 6017 vpalignr $12,%ymm5,%ymm5,%ymm5 6018 vpaddd %ymm6,%ymm2,%ymm2 6019 vpxor %ymm2,%ymm14,%ymm14 6020 vpshufb .rol16(%rip),%ymm14,%ymm14 6021 vpaddd %ymm14,%ymm10,%ymm10 6022 vpxor %ymm10,%ymm6,%ymm6 6023 vpsrld $20,%ymm6,%ymm3 6024 vpslld $12,%ymm6,%ymm6 6025 vpxor %ymm3,%ymm6,%ymm6 6026 vpaddd %ymm6,%ymm2,%ymm2 6027 vpxor %ymm2,%ymm14,%ymm14 6028 vpshufb .rol8(%rip),%ymm14,%ymm14 6029 vpaddd %ymm14,%ymm10,%ymm10 6030 vpxor %ymm10,%ymm6,%ymm6 6031 vpslld $7,%ymm6,%ymm3 6032 vpsrld $25,%ymm6,%ymm6 6033 vpxor %ymm3,%ymm6,%ymm6 6034 vpalignr $4,%ymm14,%ymm14,%ymm14 6035 vpalignr $8,%ymm10,%ymm10,%ymm10 6036 vpalignr $12,%ymm6,%ymm6,%ymm6 6037 6038 decq %r10 6039 jne 1b 6040 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0 6041 vpaddd .chacha20_consts(%rip),%ymm1,%ymm1 6042 vpaddd .chacha20_consts(%rip),%ymm2,%ymm2 6043 vpaddd %ymm7,%ymm4,%ymm4 6044 vpaddd %ymm7,%ymm5,%ymm5 6045 vpaddd %ymm7,%ymm6,%ymm6 6046 vpaddd %ymm11,%ymm8,%ymm8 6047 vpaddd %ymm11,%ymm9,%ymm9 6048 vpaddd %ymm11,%ymm10,%ymm10 6049 vpaddd 160(%rbp),%ymm12,%ymm12 6050 vpaddd 192(%rbp),%ymm13,%ymm13 6051 vpaddd 224(%rbp),%ymm14,%ymm14 6052 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 6053 6054 vpand .clamp(%rip),%ymm3,%ymm3 6055 vmovdqa %ymm3,0(%rbp) 6056 6057 vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 6058 vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 6059 vperm2i128 $0x02,%ymm1,%ymm5,%ymm8 6060 vperm2i128 $0x02,%ymm9,%ymm13,%ymm12 6061 vperm2i128 $0x13,%ymm1,%ymm5,%ymm1 6062 vperm2i128 $0x13,%ymm9,%ymm13,%ymm5 6063 vperm2i128 $0x02,%ymm2,%ymm6,%ymm9 6064 vperm2i128 $0x02,%ymm10,%ymm14,%ymm13 6065 vperm2i128 $0x13,%ymm2,%ymm6,%ymm2 6066 vperm2i128 $0x13,%ymm10,%ymm14,%ymm6 6067 jmp open_avx2_short 6068 .size chacha20_poly1305_open_avx2, .-chacha20_poly1305_open_avx2 6069 6070 6071 .type chacha20_poly1305_seal_avx2,@function 6072 .align 64 6073 chacha20_poly1305_seal_avx2: 6074 vzeroupper 6075 vmovdqa .chacha20_consts(%rip),%ymm0 6076 vbroadcasti128 0(%r9),%ymm4 6077 vbroadcasti128 16(%r9),%ymm8 6078 vbroadcasti128 32(%r9),%ymm12 6079 vpaddd .avx2_init(%rip),%ymm12,%ymm12 6080 cmpq $192,%rbx 6081 jbe seal_avx2_192 6082 cmpq $320,%rbx 6083 jbe seal_avx2_320 6084 vmovdqa %ymm0,%ymm1 6085 vmovdqa %ymm0,%ymm2 6086 vmovdqa %ymm0,%ymm3 6087 vmovdqa %ymm4,%ymm5 6088 vmovdqa %ymm4,%ymm6 6089 vmovdqa %ymm4,%ymm7 6090 vmovdqa %ymm4,64(%rbp) 6091 vmovdqa %ymm8,%ymm9 6092 vmovdqa %ymm8,%ymm10 6093 vmovdqa %ymm8,%ymm11 6094 vmovdqa %ymm8,96(%rbp) 6095 vmovdqa %ymm12,%ymm15 6096 vpaddd .avx2_inc(%rip),%ymm15,%ymm14 6097 vpaddd .avx2_inc(%rip),%ymm14,%ymm13 6098 vpaddd .avx2_inc(%rip),%ymm13,%ymm12 6099 vmovdqa %ymm12,160(%rbp) 6100 vmovdqa %ymm13,192(%rbp) 6101 vmovdqa %ymm14,224(%rbp) 6102 vmovdqa %ymm15,256(%rbp) 6103 movq $10,%r10 6104 1: 6105 vmovdqa %ymm8,128(%rbp) 6106 vmovdqa .rol16(%rip),%ymm8 6107 vpaddd %ymm7,%ymm3,%ymm3 6108 vpaddd %ymm6,%ymm2,%ymm2 6109 vpaddd %ymm5,%ymm1,%ymm1 6110 vpaddd %ymm4,%ymm0,%ymm0 6111 vpxor %ymm3,%ymm15,%ymm15 6112 vpxor %ymm2,%ymm14,%ymm14 6113 vpxor %ymm1,%ymm13,%ymm13 6114 vpxor %ymm0,%ymm12,%ymm12 6115 vpshufb %ymm8,%ymm15,%ymm15 6116 vpshufb %ymm8,%ymm14,%ymm14 6117 vpshufb %ymm8,%ymm13,%ymm13 6118 vpshufb %ymm8,%ymm12,%ymm12 6119 vmovdqa 128(%rbp),%ymm8 6120 vpaddd %ymm15,%ymm11,%ymm11 6121 vpaddd %ymm14,%ymm10,%ymm10 6122 vpaddd %ymm13,%ymm9,%ymm9 6123 vpaddd %ymm12,%ymm8,%ymm8 6124 vpxor %ymm11,%ymm7,%ymm7 6125 vpxor %ymm10,%ymm6,%ymm6 6126 vpxor %ymm9,%ymm5,%ymm5 6127 vpxor %ymm8,%ymm4,%ymm4 6128 vmovdqa %ymm8,128(%rbp) 6129 vpsrld $20,%ymm7,%ymm8 6130 vpslld $32-20,%ymm7,%ymm7 6131 vpxor %ymm8,%ymm7,%ymm7 6132 vpsrld $20,%ymm6,%ymm8 6133 vpslld $32-20,%ymm6,%ymm6 6134 vpxor %ymm8,%ymm6,%ymm6 6135 vpsrld $20,%ymm5,%ymm8 6136 vpslld $32-20,%ymm5,%ymm5 6137 vpxor %ymm8,%ymm5,%ymm5 6138 vpsrld $20,%ymm4,%ymm8 6139 vpslld $32-20,%ymm4,%ymm4 6140 vpxor %ymm8,%ymm4,%ymm4 6141 vmovdqa .rol8(%rip),%ymm8 6142 vpaddd %ymm7,%ymm3,%ymm3 6143 vpaddd %ymm6,%ymm2,%ymm2 6144 vpaddd %ymm5,%ymm1,%ymm1 6145 vpaddd %ymm4,%ymm0,%ymm0 6146 vpxor %ymm3,%ymm15,%ymm15 6147 vpxor %ymm2,%ymm14,%ymm14 6148 vpxor %ymm1,%ymm13,%ymm13 6149 vpxor %ymm0,%ymm12,%ymm12 6150 vpshufb %ymm8,%ymm15,%ymm15 6151 vpshufb %ymm8,%ymm14,%ymm14 6152 vpshufb %ymm8,%ymm13,%ymm13 6153 vpshufb %ymm8,%ymm12,%ymm12 6154 vmovdqa 128(%rbp),%ymm8 6155 vpaddd %ymm15,%ymm11,%ymm11 6156 vpaddd %ymm14,%ymm10,%ymm10 6157 vpaddd %ymm13,%ymm9,%ymm9 6158 vpaddd %ymm12,%ymm8,%ymm8 6159 vpxor %ymm11,%ymm7,%ymm7 6160 vpxor %ymm10,%ymm6,%ymm6 6161 vpxor %ymm9,%ymm5,%ymm5 6162 vpxor %ymm8,%ymm4,%ymm4 6163 vmovdqa %ymm8,128(%rbp) 6164 vpsrld $25,%ymm7,%ymm8 6165 vpslld $32-25,%ymm7,%ymm7 6166 vpxor %ymm8,%ymm7,%ymm7 6167 vpsrld $25,%ymm6,%ymm8 6168 vpslld $32-25,%ymm6,%ymm6 6169 vpxor %ymm8,%ymm6,%ymm6 6170 vpsrld $25,%ymm5,%ymm8 6171 vpslld $32-25,%ymm5,%ymm5 6172 vpxor %ymm8,%ymm5,%ymm5 6173 vpsrld $25,%ymm4,%ymm8 6174 vpslld $32-25,%ymm4,%ymm4 6175 vpxor %ymm8,%ymm4,%ymm4 6176 vmovdqa 128(%rbp),%ymm8 6177 vpalignr $4,%ymm7,%ymm7,%ymm7 6178 vpalignr $8,%ymm11,%ymm11,%ymm11 6179 vpalignr $12,%ymm15,%ymm15,%ymm15 6180 vpalignr $4,%ymm6,%ymm6,%ymm6 6181 vpalignr $8,%ymm10,%ymm10,%ymm10 6182 vpalignr $12,%ymm14,%ymm14,%ymm14 6183 vpalignr $4,%ymm5,%ymm5,%ymm5 6184 vpalignr $8,%ymm9,%ymm9,%ymm9 6185 vpalignr $12,%ymm13,%ymm13,%ymm13 6186 vpalignr $4,%ymm4,%ymm4,%ymm4 6187 vpalignr $8,%ymm8,%ymm8,%ymm8 6188 vpalignr $12,%ymm12,%ymm12,%ymm12 6189 vmovdqa %ymm8,128(%rbp) 6190 vmovdqa .rol16(%rip),%ymm8 6191 vpaddd %ymm7,%ymm3,%ymm3 6192 vpaddd %ymm6,%ymm2,%ymm2 6193 vpaddd %ymm5,%ymm1,%ymm1 6194 vpaddd %ymm4,%ymm0,%ymm0 6195 vpxor %ymm3,%ymm15,%ymm15 6196 vpxor %ymm2,%ymm14,%ymm14 6197 vpxor %ymm1,%ymm13,%ymm13 6198 vpxor %ymm0,%ymm12,%ymm12 6199 vpshufb %ymm8,%ymm15,%ymm15 6200 vpshufb %ymm8,%ymm14,%ymm14 6201 vpshufb %ymm8,%ymm13,%ymm13 6202 vpshufb %ymm8,%ymm12,%ymm12 6203 vmovdqa 128(%rbp),%ymm8 6204 vpaddd %ymm15,%ymm11,%ymm11 6205 vpaddd %ymm14,%ymm10,%ymm10 6206 vpaddd %ymm13,%ymm9,%ymm9 6207 vpaddd %ymm12,%ymm8,%ymm8 6208 vpxor %ymm11,%ymm7,%ymm7 6209 vpxor %ymm10,%ymm6,%ymm6 6210 vpxor %ymm9,%ymm5,%ymm5 6211 vpxor %ymm8,%ymm4,%ymm4 6212 vmovdqa %ymm8,128(%rbp) 6213 vpsrld $20,%ymm7,%ymm8 6214 vpslld $32-20,%ymm7,%ymm7 6215 vpxor %ymm8,%ymm7,%ymm7 6216 vpsrld $20,%ymm6,%ymm8 6217 vpslld $32-20,%ymm6,%ymm6 6218 vpxor %ymm8,%ymm6,%ymm6 6219 vpsrld $20,%ymm5,%ymm8 6220 vpslld $32-20,%ymm5,%ymm5 6221 vpxor %ymm8,%ymm5,%ymm5 6222 vpsrld $20,%ymm4,%ymm8 6223 vpslld $32-20,%ymm4,%ymm4 6224 vpxor %ymm8,%ymm4,%ymm4 6225 vmovdqa .rol8(%rip),%ymm8 6226 vpaddd %ymm7,%ymm3,%ymm3 6227 vpaddd %ymm6,%ymm2,%ymm2 6228 vpaddd %ymm5,%ymm1,%ymm1 6229 vpaddd %ymm4,%ymm0,%ymm0 6230 vpxor %ymm3,%ymm15,%ymm15 6231 vpxor %ymm2,%ymm14,%ymm14 6232 vpxor %ymm1,%ymm13,%ymm13 6233 vpxor %ymm0,%ymm12,%ymm12 6234 vpshufb %ymm8,%ymm15,%ymm15 6235 vpshufb %ymm8,%ymm14,%ymm14 6236 vpshufb %ymm8,%ymm13,%ymm13 6237 vpshufb %ymm8,%ymm12,%ymm12 6238 vmovdqa 128(%rbp),%ymm8 6239 vpaddd %ymm15,%ymm11,%ymm11 6240 vpaddd %ymm14,%ymm10,%ymm10 6241 vpaddd %ymm13,%ymm9,%ymm9 6242 vpaddd %ymm12,%ymm8,%ymm8 6243 vpxor %ymm11,%ymm7,%ymm7 6244 vpxor %ymm10,%ymm6,%ymm6 6245 vpxor %ymm9,%ymm5,%ymm5 6246 vpxor %ymm8,%ymm4,%ymm4 6247 vmovdqa %ymm8,128(%rbp) 6248 vpsrld $25,%ymm7,%ymm8 6249 vpslld $32-25,%ymm7,%ymm7 6250 vpxor %ymm8,%ymm7,%ymm7 6251 vpsrld $25,%ymm6,%ymm8 6252 vpslld $32-25,%ymm6,%ymm6 6253 vpxor %ymm8,%ymm6,%ymm6 6254 vpsrld $25,%ymm5,%ymm8 6255 vpslld $32-25,%ymm5,%ymm5 6256 vpxor %ymm8,%ymm5,%ymm5 6257 vpsrld $25,%ymm4,%ymm8 6258 vpslld $32-25,%ymm4,%ymm4 6259 vpxor %ymm8,%ymm4,%ymm4 6260 vmovdqa 128(%rbp),%ymm8 6261 vpalignr $12,%ymm7,%ymm7,%ymm7 6262 vpalignr $8,%ymm11,%ymm11,%ymm11 6263 vpalignr $4,%ymm15,%ymm15,%ymm15 6264 vpalignr $12,%ymm6,%ymm6,%ymm6 6265 vpalignr $8,%ymm10,%ymm10,%ymm10 6266 vpalignr $4,%ymm14,%ymm14,%ymm14 6267 vpalignr $12,%ymm5,%ymm5,%ymm5 6268 vpalignr $8,%ymm9,%ymm9,%ymm9 6269 vpalignr $4,%ymm13,%ymm13,%ymm13 6270 vpalignr $12,%ymm4,%ymm4,%ymm4 6271 vpalignr $8,%ymm8,%ymm8,%ymm8 6272 vpalignr $4,%ymm12,%ymm12,%ymm12 6273 6274 decq %r10 6275 jnz 1b 6276 vpaddd .chacha20_consts(%rip),%ymm3,%ymm3 6277 vpaddd 64(%rbp),%ymm7,%ymm7 6278 vpaddd 96(%rbp),%ymm11,%ymm11 6279 vpaddd 256(%rbp),%ymm15,%ymm15 6280 vpaddd .chacha20_consts(%rip),%ymm2,%ymm2 6281 vpaddd 64(%rbp),%ymm6,%ymm6 6282 vpaddd 96(%rbp),%ymm10,%ymm10 6283 vpaddd 224(%rbp),%ymm14,%ymm14 6284 vpaddd .chacha20_consts(%rip),%ymm1,%ymm1 6285 vpaddd 64(%rbp),%ymm5,%ymm5 6286 vpaddd 96(%rbp),%ymm9,%ymm9 6287 vpaddd 192(%rbp),%ymm13,%ymm13 6288 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0 6289 vpaddd 64(%rbp),%ymm4,%ymm4 6290 vpaddd 96(%rbp),%ymm8,%ymm8 6291 vpaddd 160(%rbp),%ymm12,%ymm12 6292 6293 vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 6294 vperm2i128 $0x02,%ymm3,%ymm7,%ymm15 6295 vperm2i128 $0x13,%ymm3,%ymm7,%ymm3 6296 vpand .clamp(%rip),%ymm15,%ymm15 6297 vmovdqa %ymm15,0(%rbp) 6298 movq %r8,%r8 6299 call poly_hash_ad_internal 6300 6301 vpxor 0(%rsi),%ymm3,%ymm3 6302 vpxor 32(%rsi),%ymm11,%ymm11 6303 vmovdqu %ymm3,0(%rdi) 6304 vmovdqu %ymm11,32(%rdi) 6305 vperm2i128 $0x02,%ymm2,%ymm6,%ymm15 6306 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 6307 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 6308 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 6309 vpxor 0+64(%rsi),%ymm15,%ymm15 6310 vpxor 32+64(%rsi),%ymm2,%ymm2 6311 vpxor 64+64(%rsi),%ymm6,%ymm6 6312 vpxor 96+64(%rsi),%ymm10,%ymm10 6313 vmovdqu %ymm15,0+64(%rdi) 6314 vmovdqu %ymm2,32+64(%rdi) 6315 vmovdqu %ymm6,64+64(%rdi) 6316 vmovdqu %ymm10,96+64(%rdi) 6317 vperm2i128 $0x02,%ymm1,%ymm5,%ymm15 6318 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 6319 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 6320 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 6321 vpxor 0+192(%rsi),%ymm15,%ymm15 6322 vpxor 32+192(%rsi),%ymm1,%ymm1 6323 vpxor 64+192(%rsi),%ymm5,%ymm5 6324 vpxor 96+192(%rsi),%ymm9,%ymm9 6325 vmovdqu %ymm15,0+192(%rdi) 6326 vmovdqu %ymm1,32+192(%rdi) 6327 vmovdqu %ymm5,64+192(%rdi) 6328 vmovdqu %ymm9,96+192(%rdi) 6329 vperm2i128 $0x13,%ymm0,%ymm4,%ymm15 6330 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 6331 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 6332 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 6333 vmovdqa %ymm15,%ymm8 6334 6335 leaq 320(%rsi),%rsi 6336 subq $320,%rbx 6337 movq $320,%rcx 6338 cmpq $128,%rbx 6339 jbe seal_avx2_hash 6340 vpxor 0(%rsi),%ymm0,%ymm0 6341 vpxor 32(%rsi),%ymm4,%ymm4 6342 vpxor 64(%rsi),%ymm8,%ymm8 6343 vpxor 96(%rsi),%ymm12,%ymm12 6344 vmovdqu %ymm0,320(%rdi) 6345 vmovdqu %ymm4,352(%rdi) 6346 vmovdqu %ymm8,384(%rdi) 6347 vmovdqu %ymm12,416(%rdi) 6348 leaq 128(%rsi),%rsi 6349 subq $128,%rbx 6350 movq $8,%rcx 6351 movq $2,%r8 6352 cmpq $128,%rbx 6353 jbe seal_avx2_tail_128 6354 cmpq $256,%rbx 6355 jbe seal_avx2_tail_256 6356 cmpq $384,%rbx 6357 jbe seal_avx2_tail_384 6358 cmpq $512,%rbx 6359 jbe seal_avx2_tail_512 6360 vmovdqa .chacha20_consts(%rip),%ymm0 6361 vmovdqa 64(%rbp),%ymm4 6362 vmovdqa 96(%rbp),%ymm8 6363 vmovdqa %ymm0,%ymm1 6364 vmovdqa %ymm4,%ymm5 6365 vmovdqa %ymm8,%ymm9 6366 vmovdqa %ymm0,%ymm2 6367 vmovdqa %ymm4,%ymm6 6368 vmovdqa %ymm8,%ymm10 6369 vmovdqa %ymm0,%ymm3 6370 vmovdqa %ymm4,%ymm7 6371 vmovdqa %ymm8,%ymm11 6372 vmovdqa .avx2_inc(%rip),%ymm12 6373 vpaddd 160(%rbp),%ymm12,%ymm15 6374 vpaddd %ymm15,%ymm12,%ymm14 6375 vpaddd %ymm14,%ymm12,%ymm13 6376 vpaddd %ymm13,%ymm12,%ymm12 6377 vmovdqa %ymm15,256(%rbp) 6378 vmovdqa %ymm14,224(%rbp) 6379 vmovdqa %ymm13,192(%rbp) 6380 vmovdqa %ymm12,160(%rbp) 6381 vmovdqa %ymm8,128(%rbp) 6382 vmovdqa .rol16(%rip),%ymm8 6383 vpaddd %ymm7,%ymm3,%ymm3 6384 vpaddd %ymm6,%ymm2,%ymm2 6385 vpaddd %ymm5,%ymm1,%ymm1 6386 vpaddd %ymm4,%ymm0,%ymm0 6387 vpxor %ymm3,%ymm15,%ymm15 6388 vpxor %ymm2,%ymm14,%ymm14 6389 vpxor %ymm1,%ymm13,%ymm13 6390 vpxor %ymm0,%ymm12,%ymm12 6391 vpshufb %ymm8,%ymm15,%ymm15 6392 vpshufb %ymm8,%ymm14,%ymm14 6393 vpshufb %ymm8,%ymm13,%ymm13 6394 vpshufb %ymm8,%ymm12,%ymm12 6395 vmovdqa 128(%rbp),%ymm8 6396 vpaddd %ymm15,%ymm11,%ymm11 6397 vpaddd %ymm14,%ymm10,%ymm10 6398 vpaddd %ymm13,%ymm9,%ymm9 6399 vpaddd %ymm12,%ymm8,%ymm8 6400 vpxor %ymm11,%ymm7,%ymm7 6401 vpxor %ymm10,%ymm6,%ymm6 6402 vpxor %ymm9,%ymm5,%ymm5 6403 vpxor %ymm8,%ymm4,%ymm4 6404 vmovdqa %ymm8,128(%rbp) 6405 vpsrld $20,%ymm7,%ymm8 6406 vpslld $32-20,%ymm7,%ymm7 6407 vpxor %ymm8,%ymm7,%ymm7 6408 vpsrld $20,%ymm6,%ymm8 6409 vpslld $32-20,%ymm6,%ymm6 6410 vpxor %ymm8,%ymm6,%ymm6 6411 vpsrld $20,%ymm5,%ymm8 6412 vpslld $32-20,%ymm5,%ymm5 6413 vpxor %ymm8,%ymm5,%ymm5 6414 vpsrld $20,%ymm4,%ymm8 6415 vpslld $32-20,%ymm4,%ymm4 6416 vpxor %ymm8,%ymm4,%ymm4 6417 vmovdqa .rol8(%rip),%ymm8 6418 vpaddd %ymm7,%ymm3,%ymm3 6419 vpaddd %ymm6,%ymm2,%ymm2 6420 vpaddd %ymm5,%ymm1,%ymm1 6421 vpaddd %ymm4,%ymm0,%ymm0 6422 vpxor %ymm3,%ymm15,%ymm15 6423 vpxor %ymm2,%ymm14,%ymm14 6424 vpxor %ymm1,%ymm13,%ymm13 6425 vpxor %ymm0,%ymm12,%ymm12 6426 vpshufb %ymm8,%ymm15,%ymm15 6427 vpshufb %ymm8,%ymm14,%ymm14 6428 vpshufb %ymm8,%ymm13,%ymm13 6429 vpshufb %ymm8,%ymm12,%ymm12 6430 vmovdqa 128(%rbp),%ymm8 6431 vpaddd %ymm15,%ymm11,%ymm11 6432 vpaddd %ymm14,%ymm10,%ymm10 6433 vpaddd %ymm13,%ymm9,%ymm9 6434 vpaddd %ymm12,%ymm8,%ymm8 6435 vpxor %ymm11,%ymm7,%ymm7 6436 vpxor %ymm10,%ymm6,%ymm6 6437 vpxor %ymm9,%ymm5,%ymm5 6438 vpxor %ymm8,%ymm4,%ymm4 6439 vmovdqa %ymm8,128(%rbp) 6440 vpsrld $25,%ymm7,%ymm8 6441 vpslld $32-25,%ymm7,%ymm7 6442 vpxor %ymm8,%ymm7,%ymm7 6443 vpsrld $25,%ymm6,%ymm8 6444 vpslld $32-25,%ymm6,%ymm6 6445 vpxor %ymm8,%ymm6,%ymm6 6446 vpsrld $25,%ymm5,%ymm8 6447 vpslld $32-25,%ymm5,%ymm5 6448 vpxor %ymm8,%ymm5,%ymm5 6449 vpsrld $25,%ymm4,%ymm8 6450 vpslld $32-25,%ymm4,%ymm4 6451 vpxor %ymm8,%ymm4,%ymm4 6452 vmovdqa 128(%rbp),%ymm8 6453 vpalignr $4,%ymm7,%ymm7,%ymm7 6454 vpalignr $8,%ymm11,%ymm11,%ymm11 6455 vpalignr $12,%ymm15,%ymm15,%ymm15 6456 vpalignr $4,%ymm6,%ymm6,%ymm6 6457 vpalignr $8,%ymm10,%ymm10,%ymm10 6458 vpalignr $12,%ymm14,%ymm14,%ymm14 6459 vpalignr $4,%ymm5,%ymm5,%ymm5 6460 vpalignr $8,%ymm9,%ymm9,%ymm9 6461 vpalignr $12,%ymm13,%ymm13,%ymm13 6462 vpalignr $4,%ymm4,%ymm4,%ymm4 6463 vpalignr $8,%ymm8,%ymm8,%ymm8 6464 vpalignr $12,%ymm12,%ymm12,%ymm12 6465 vmovdqa %ymm8,128(%rbp) 6466 vmovdqa .rol16(%rip),%ymm8 6467 vpaddd %ymm7,%ymm3,%ymm3 6468 vpaddd %ymm6,%ymm2,%ymm2 6469 vpaddd %ymm5,%ymm1,%ymm1 6470 vpaddd %ymm4,%ymm0,%ymm0 6471 vpxor %ymm3,%ymm15,%ymm15 6472 vpxor %ymm2,%ymm14,%ymm14 6473 vpxor %ymm1,%ymm13,%ymm13 6474 vpxor %ymm0,%ymm12,%ymm12 6475 vpshufb %ymm8,%ymm15,%ymm15 6476 vpshufb %ymm8,%ymm14,%ymm14 6477 vpshufb %ymm8,%ymm13,%ymm13 6478 vpshufb %ymm8,%ymm12,%ymm12 6479 vmovdqa 128(%rbp),%ymm8 6480 vpaddd %ymm15,%ymm11,%ymm11 6481 vpaddd %ymm14,%ymm10,%ymm10 6482 vpaddd %ymm13,%ymm9,%ymm9 6483 vpaddd %ymm12,%ymm8,%ymm8 6484 vpxor %ymm11,%ymm7,%ymm7 6485 vpxor %ymm10,%ymm6,%ymm6 6486 vpxor %ymm9,%ymm5,%ymm5 6487 vpxor %ymm8,%ymm4,%ymm4 6488 vmovdqa %ymm8,128(%rbp) 6489 vpsrld $20,%ymm7,%ymm8 6490 vpslld $32-20,%ymm7,%ymm7 6491 vpxor %ymm8,%ymm7,%ymm7 6492 vpsrld $20,%ymm6,%ymm8 6493 vpslld $32-20,%ymm6,%ymm6 6494 vpxor %ymm8,%ymm6,%ymm6 6495 vpsrld $20,%ymm5,%ymm8 6496 vpslld $32-20,%ymm5,%ymm5 6497 vpxor %ymm8,%ymm5,%ymm5 6498 vpsrld $20,%ymm4,%ymm8 6499 vpslld $32-20,%ymm4,%ymm4 6500 vpxor %ymm8,%ymm4,%ymm4 6501 vmovdqa .rol8(%rip),%ymm8 6502 vpaddd %ymm7,%ymm3,%ymm3 6503 vpaddd %ymm6,%ymm2,%ymm2 6504 vpaddd %ymm5,%ymm1,%ymm1 6505 vpaddd %ymm4,%ymm0,%ymm0 6506 vpxor %ymm3,%ymm15,%ymm15 6507 vpxor %ymm2,%ymm14,%ymm14 6508 vpxor %ymm1,%ymm13,%ymm13 6509 vpxor %ymm0,%ymm12,%ymm12 6510 vpshufb %ymm8,%ymm15,%ymm15 6511 vpshufb %ymm8,%ymm14,%ymm14 6512 vpshufb %ymm8,%ymm13,%ymm13 6513 vpshufb %ymm8,%ymm12,%ymm12 6514 vmovdqa 128(%rbp),%ymm8 6515 vpaddd %ymm15,%ymm11,%ymm11 6516 vpaddd %ymm14,%ymm10,%ymm10 6517 vpaddd %ymm13,%ymm9,%ymm9 6518 vpaddd %ymm12,%ymm8,%ymm8 6519 vpxor %ymm11,%ymm7,%ymm7 6520 vpxor %ymm10,%ymm6,%ymm6 6521 vpxor %ymm9,%ymm5,%ymm5 6522 vpxor %ymm8,%ymm4,%ymm4 6523 vmovdqa %ymm8,128(%rbp) 6524 vpsrld $25,%ymm7,%ymm8 6525 vpslld $32-25,%ymm7,%ymm7 6526 vpxor %ymm8,%ymm7,%ymm7 6527 vpsrld $25,%ymm6,%ymm8 6528 vpslld $32-25,%ymm6,%ymm6 6529 vpxor %ymm8,%ymm6,%ymm6 6530 vpsrld $25,%ymm5,%ymm8 6531 vpslld $32-25,%ymm5,%ymm5 6532 vpxor %ymm8,%ymm5,%ymm5 6533 vpsrld $25,%ymm4,%ymm8 6534 vpslld $32-25,%ymm4,%ymm4 6535 vpxor %ymm8,%ymm4,%ymm4 6536 vmovdqa 128(%rbp),%ymm8 6537 vpalignr $12,%ymm7,%ymm7,%ymm7 6538 vpalignr $8,%ymm11,%ymm11,%ymm11 6539 vpalignr $4,%ymm15,%ymm15,%ymm15 6540 vpalignr $12,%ymm6,%ymm6,%ymm6 6541 vpalignr $8,%ymm10,%ymm10,%ymm10 6542 vpalignr $4,%ymm14,%ymm14,%ymm14 6543 vpalignr $12,%ymm5,%ymm5,%ymm5 6544 vpalignr $8,%ymm9,%ymm9,%ymm9 6545 vpalignr $4,%ymm13,%ymm13,%ymm13 6546 vpalignr $12,%ymm4,%ymm4,%ymm4 6547 vpalignr $8,%ymm8,%ymm8,%ymm8 6548 vpalignr $4,%ymm12,%ymm12,%ymm12 6549 vmovdqa %ymm8,128(%rbp) 6550 vmovdqa .rol16(%rip),%ymm8 6551 vpaddd %ymm7,%ymm3,%ymm3 6552 vpaddd %ymm6,%ymm2,%ymm2 6553 vpaddd %ymm5,%ymm1,%ymm1 6554 vpaddd %ymm4,%ymm0,%ymm0 6555 vpxor %ymm3,%ymm15,%ymm15 6556 vpxor %ymm2,%ymm14,%ymm14 6557 vpxor %ymm1,%ymm13,%ymm13 6558 vpxor %ymm0,%ymm12,%ymm12 6559 vpshufb %ymm8,%ymm15,%ymm15 6560 vpshufb %ymm8,%ymm14,%ymm14 6561 vpshufb %ymm8,%ymm13,%ymm13 6562 vpshufb %ymm8,%ymm12,%ymm12 6563 vmovdqa 128(%rbp),%ymm8 6564 vpaddd %ymm15,%ymm11,%ymm11 6565 vpaddd %ymm14,%ymm10,%ymm10 6566 vpaddd %ymm13,%ymm9,%ymm9 6567 vpaddd %ymm12,%ymm8,%ymm8 6568 vpxor %ymm11,%ymm7,%ymm7 6569 vpxor %ymm10,%ymm6,%ymm6 6570 vpxor %ymm9,%ymm5,%ymm5 6571 vpxor %ymm8,%ymm4,%ymm4 6572 vmovdqa %ymm8,128(%rbp) 6573 vpsrld $20,%ymm7,%ymm8 6574 vpslld $32-20,%ymm7,%ymm7 6575 vpxor %ymm8,%ymm7,%ymm7 6576 vpsrld $20,%ymm6,%ymm8 6577 vpslld $32-20,%ymm6,%ymm6 6578 vpxor %ymm8,%ymm6,%ymm6 6579 vpsrld $20,%ymm5,%ymm8 6580 vpslld $32-20,%ymm5,%ymm5 6581 vpxor %ymm8,%ymm5,%ymm5 6582 vpsrld $20,%ymm4,%ymm8 6583 vpslld $32-20,%ymm4,%ymm4 6584 vpxor %ymm8,%ymm4,%ymm4 6585 vmovdqa .rol8(%rip),%ymm8 6586 vpaddd %ymm7,%ymm3,%ymm3 6587 vpaddd %ymm6,%ymm2,%ymm2 6588 vpaddd %ymm5,%ymm1,%ymm1 6589 vpaddd %ymm4,%ymm0,%ymm0 6590 6591 subq $16,%rdi 6592 movq $9,%rcx 6593 jmp 4f 6594 1: 6595 vmovdqa .chacha20_consts(%rip),%ymm0 6596 vmovdqa 64(%rbp),%ymm4 6597 vmovdqa 96(%rbp),%ymm8 6598 vmovdqa %ymm0,%ymm1 6599 vmovdqa %ymm4,%ymm5 6600 vmovdqa %ymm8,%ymm9 6601 vmovdqa %ymm0,%ymm2 6602 vmovdqa %ymm4,%ymm6 6603 vmovdqa %ymm8,%ymm10 6604 vmovdqa %ymm0,%ymm3 6605 vmovdqa %ymm4,%ymm7 6606 vmovdqa %ymm8,%ymm11 6607 vmovdqa .avx2_inc(%rip),%ymm12 6608 vpaddd 160(%rbp),%ymm12,%ymm15 6609 vpaddd %ymm15,%ymm12,%ymm14 6610 vpaddd %ymm14,%ymm12,%ymm13 6611 vpaddd %ymm13,%ymm12,%ymm12 6612 vmovdqa %ymm15,256(%rbp) 6613 vmovdqa %ymm14,224(%rbp) 6614 vmovdqa %ymm13,192(%rbp) 6615 vmovdqa %ymm12,160(%rbp) 6616 6617 movq $10,%rcx 6618 2: 6619 addq 0(%rdi),%r10 6620 adcq 8+0(%rdi),%r11 6621 adcq $1,%r12 6622 vmovdqa %ymm8,128(%rbp) 6623 vmovdqa .rol16(%rip),%ymm8 6624 vpaddd %ymm7,%ymm3,%ymm3 6625 vpaddd %ymm6,%ymm2,%ymm2 6626 vpaddd %ymm5,%ymm1,%ymm1 6627 vpaddd %ymm4,%ymm0,%ymm0 6628 vpxor %ymm3,%ymm15,%ymm15 6629 vpxor %ymm2,%ymm14,%ymm14 6630 vpxor %ymm1,%ymm13,%ymm13 6631 vpxor %ymm0,%ymm12,%ymm12 6632 movq 0+0(%rbp),%rdx 6633 movq %rdx,%r15 6634 mulxq %r10,%r13,%r14 6635 mulxq %r11,%rax,%rdx 6636 imulq %r12,%r15 6637 addq %rax,%r14 6638 adcq %rdx,%r15 6639 vpshufb %ymm8,%ymm15,%ymm15 6640 vpshufb %ymm8,%ymm14,%ymm14 6641 vpshufb %ymm8,%ymm13,%ymm13 6642 vpshufb %ymm8,%ymm12,%ymm12 6643 vmovdqa 128(%rbp),%ymm8 6644 vpaddd %ymm15,%ymm11,%ymm11 6645 vpaddd %ymm14,%ymm10,%ymm10 6646 vpaddd %ymm13,%ymm9,%ymm9 6647 vpaddd %ymm12,%ymm8,%ymm8 6648 movq 8+0(%rbp),%rdx 6649 mulxq %r10,%r10,%rax 6650 addq %r10,%r14 6651 mulxq %r11,%r11,%r9 6652 adcq %r11,%r15 6653 adcq $0,%r9 6654 imulq %r12,%rdx 6655 vpxor %ymm11,%ymm7,%ymm7 6656 vpxor %ymm10,%ymm6,%ymm6 6657 vpxor %ymm9,%ymm5,%ymm5 6658 vpxor %ymm8,%ymm4,%ymm4 6659 vmovdqa %ymm8,128(%rbp) 6660 vpsrld $20,%ymm7,%ymm8 6661 vpslld $32-20,%ymm7,%ymm7 6662 vpxor %ymm8,%ymm7,%ymm7 6663 vpsrld $20,%ymm6,%ymm8 6664 vpslld $32-20,%ymm6,%ymm6 6665 vpxor %ymm8,%ymm6,%ymm6 6666 vpsrld $20,%ymm5,%ymm8 6667 addq %rax,%r15 6668 adcq %rdx,%r9 6669 vpslld $32-20,%ymm5,%ymm5 6670 vpxor %ymm8,%ymm5,%ymm5 6671 vpsrld $20,%ymm4,%ymm8 6672 vpslld $32-20,%ymm4,%ymm4 6673 vpxor %ymm8,%ymm4,%ymm4 6674 vmovdqa .rol8(%rip),%ymm8 6675 vpaddd %ymm7,%ymm3,%ymm3 6676 vpaddd %ymm6,%ymm2,%ymm2 6677 vpaddd %ymm5,%ymm1,%ymm1 6678 vpaddd %ymm4,%ymm0,%ymm0 6679 movq %r13,%r10 6680 movq %r14,%r11 6681 movq %r15,%r12 6682 andq $3,%r12 6683 movq %r15,%r13 6684 andq $-4,%r13 6685 movq %r9,%r14 6686 shrdq $2,%r9,%r15 6687 shrq $2,%r9 6688 addq %r13,%r10 6689 adcq %r14,%r11 6690 adcq $0,%r12 6691 addq %r15,%r10 6692 adcq %r9,%r11 6693 adcq $0,%r12 6694 6695 4: 6696 vpxor %ymm3,%ymm15,%ymm15 6697 vpxor %ymm2,%ymm14,%ymm14 6698 vpxor %ymm1,%ymm13,%ymm13 6699 vpxor %ymm0,%ymm12,%ymm12 6700 vpshufb %ymm8,%ymm15,%ymm15 6701 vpshufb %ymm8,%ymm14,%ymm14 6702 vpshufb %ymm8,%ymm13,%ymm13 6703 vpshufb %ymm8,%ymm12,%ymm12 6704 vmovdqa 128(%rbp),%ymm8 6705 addq 16(%rdi),%r10 6706 adcq 8+16(%rdi),%r11 6707 adcq $1,%r12 6708 vpaddd %ymm15,%ymm11,%ymm11 6709 vpaddd %ymm14,%ymm10,%ymm10 6710 vpaddd %ymm13,%ymm9,%ymm9 6711 vpaddd %ymm12,%ymm8,%ymm8 6712 vpxor %ymm11,%ymm7,%ymm7 6713 vpxor %ymm10,%ymm6,%ymm6 6714 vpxor %ymm9,%ymm5,%ymm5 6715 vpxor %ymm8,%ymm4,%ymm4 6716 movq 0+0(%rbp),%rdx 6717 movq %rdx,%r15 6718 mulxq %r10,%r13,%r14 6719 mulxq %r11,%rax,%rdx 6720 imulq %r12,%r15 6721 addq %rax,%r14 6722 adcq %rdx,%r15 6723 vmovdqa %ymm8,128(%rbp) 6724 vpsrld $25,%ymm7,%ymm8 6725 vpslld $32-25,%ymm7,%ymm7 6726 vpxor %ymm8,%ymm7,%ymm7 6727 vpsrld $25,%ymm6,%ymm8 6728 vpslld $32-25,%ymm6,%ymm6 6729 vpxor %ymm8,%ymm6,%ymm6 6730 vpsrld $25,%ymm5,%ymm8 6731 vpslld $32-25,%ymm5,%ymm5 6732 vpxor %ymm8,%ymm5,%ymm5 6733 vpsrld $25,%ymm4,%ymm8 6734 vpslld $32-25,%ymm4,%ymm4 6735 vpxor %ymm8,%ymm4,%ymm4 6736 vmovdqa 128(%rbp),%ymm8 6737 vpalignr $4,%ymm7,%ymm7,%ymm7 6738 vpalignr $8,%ymm11,%ymm11,%ymm11 6739 vpalignr $12,%ymm15,%ymm15,%ymm15 6740 vpalignr $4,%ymm6,%ymm6,%ymm6 6741 movq 8+0(%rbp),%rdx 6742 mulxq %r10,%r10,%rax 6743 addq %r10,%r14 6744 mulxq %r11,%r11,%r9 6745 adcq %r11,%r15 6746 adcq $0,%r9 6747 imulq %r12,%rdx 6748 vpalignr $8,%ymm10,%ymm10,%ymm10 6749 vpalignr $12,%ymm14,%ymm14,%ymm14 6750 vpalignr $4,%ymm5,%ymm5,%ymm5 6751 vpalignr $8,%ymm9,%ymm9,%ymm9 6752 vpalignr $12,%ymm13,%ymm13,%ymm13 6753 vpalignr $4,%ymm4,%ymm4,%ymm4 6754 vpalignr $8,%ymm8,%ymm8,%ymm8 6755 vpalignr $12,%ymm12,%ymm12,%ymm12 6756 vmovdqa %ymm8,128(%rbp) 6757 vmovdqa .rol16(%rip),%ymm8 6758 vpaddd %ymm7,%ymm3,%ymm3 6759 vpaddd %ymm6,%ymm2,%ymm2 6760 vpaddd %ymm5,%ymm1,%ymm1 6761 vpaddd %ymm4,%ymm0,%ymm0 6762 vpxor %ymm3,%ymm15,%ymm15 6763 vpxor %ymm2,%ymm14,%ymm14 6764 vpxor %ymm1,%ymm13,%ymm13 6765 vpxor %ymm0,%ymm12,%ymm12 6766 addq %rax,%r15 6767 adcq %rdx,%r9 6768 vpshufb %ymm8,%ymm15,%ymm15 6769 vpshufb %ymm8,%ymm14,%ymm14 6770 vpshufb %ymm8,%ymm13,%ymm13 6771 vpshufb %ymm8,%ymm12,%ymm12 6772 vmovdqa 128(%rbp),%ymm8 6773 vpaddd %ymm15,%ymm11,%ymm11 6774 vpaddd %ymm14,%ymm10,%ymm10 6775 vpaddd %ymm13,%ymm9,%ymm9 6776 vpaddd %ymm12,%ymm8,%ymm8 6777 movq %r13,%r10 6778 movq %r14,%r11 6779 movq %r15,%r12 6780 andq $3,%r12 6781 movq %r15,%r13 6782 andq $-4,%r13 6783 movq %r9,%r14 6784 shrdq $2,%r9,%r15 6785 shrq $2,%r9 6786 addq %r13,%r10 6787 adcq %r14,%r11 6788 adcq $0,%r12 6789 addq %r15,%r10 6790 adcq %r9,%r11 6791 adcq $0,%r12 6792 vpxor %ymm11,%ymm7,%ymm7 6793 vpxor %ymm10,%ymm6,%ymm6 6794 vpxor %ymm9,%ymm5,%ymm5 6795 vpxor %ymm8,%ymm4,%ymm4 6796 vmovdqa %ymm8,128(%rbp) 6797 vpsrld $20,%ymm7,%ymm8 6798 vpslld $32-20,%ymm7,%ymm7 6799 vpxor %ymm8,%ymm7,%ymm7 6800 addq 32(%rdi),%r10 6801 adcq 8+32(%rdi),%r11 6802 adcq $1,%r12 6803 6804 leaq 48(%rdi),%rdi 6805 vpsrld $20,%ymm6,%ymm8 6806 vpslld $32-20,%ymm6,%ymm6 6807 vpxor %ymm8,%ymm6,%ymm6 6808 vpsrld $20,%ymm5,%ymm8 6809 vpslld $32-20,%ymm5,%ymm5 6810 vpxor %ymm8,%ymm5,%ymm5 6811 vpsrld $20,%ymm4,%ymm8 6812 vpslld $32-20,%ymm4,%ymm4 6813 vpxor %ymm8,%ymm4,%ymm4 6814 vmovdqa .rol8(%rip),%ymm8 6815 vpaddd %ymm7,%ymm3,%ymm3 6816 vpaddd %ymm6,%ymm2,%ymm2 6817 vpaddd %ymm5,%ymm1,%ymm1 6818 vpaddd %ymm4,%ymm0,%ymm0 6819 vpxor %ymm3,%ymm15,%ymm15 6820 vpxor %ymm2,%ymm14,%ymm14 6821 vpxor %ymm1,%ymm13,%ymm13 6822 vpxor %ymm0,%ymm12,%ymm12 6823 movq 0+0(%rbp),%rdx 6824 movq %rdx,%r15 6825 mulxq %r10,%r13,%r14 6826 mulxq %r11,%rax,%rdx 6827 imulq %r12,%r15 6828 addq %rax,%r14 6829 adcq %rdx,%r15 6830 vpshufb %ymm8,%ymm15,%ymm15 6831 vpshufb %ymm8,%ymm14,%ymm14 6832 vpshufb %ymm8,%ymm13,%ymm13 6833 vpshufb %ymm8,%ymm12,%ymm12 6834 vmovdqa 128(%rbp),%ymm8 6835 vpaddd %ymm15,%ymm11,%ymm11 6836 vpaddd %ymm14,%ymm10,%ymm10 6837 vpaddd %ymm13,%ymm9,%ymm9 6838 movq 8+0(%rbp),%rdx 6839 mulxq %r10,%r10,%rax 6840 addq %r10,%r14 6841 mulxq %r11,%r11,%r9 6842 adcq %r11,%r15 6843 adcq $0,%r9 6844 imulq %r12,%rdx 6845 vpaddd %ymm12,%ymm8,%ymm8 6846 vpxor %ymm11,%ymm7,%ymm7 6847 vpxor %ymm10,%ymm6,%ymm6 6848 vpxor %ymm9,%ymm5,%ymm5 6849 vpxor %ymm8,%ymm4,%ymm4 6850 vmovdqa %ymm8,128(%rbp) 6851 vpsrld $25,%ymm7,%ymm8 6852 vpslld $32-25,%ymm7,%ymm7 6853 addq %rax,%r15 6854 adcq %rdx,%r9 6855 vpxor %ymm8,%ymm7,%ymm7 6856 vpsrld $25,%ymm6,%ymm8 6857 vpslld $32-25,%ymm6,%ymm6 6858 vpxor %ymm8,%ymm6,%ymm6 6859 vpsrld $25,%ymm5,%ymm8 6860 vpslld $32-25,%ymm5,%ymm5 6861 vpxor %ymm8,%ymm5,%ymm5 6862 vpsrld $25,%ymm4,%ymm8 6863 vpslld $32-25,%ymm4,%ymm4 6864 vpxor %ymm8,%ymm4,%ymm4 6865 vmovdqa 128(%rbp),%ymm8 6866 vpalignr $12,%ymm7,%ymm7,%ymm7 6867 vpalignr $8,%ymm11,%ymm11,%ymm11 6868 vpalignr $4,%ymm15,%ymm15,%ymm15 6869 vpalignr $12,%ymm6,%ymm6,%ymm6 6870 vpalignr $8,%ymm10,%ymm10,%ymm10 6871 vpalignr $4,%ymm14,%ymm14,%ymm14 6872 vpalignr $12,%ymm5,%ymm5,%ymm5 6873 movq %r13,%r10 6874 movq %r14,%r11 6875 movq %r15,%r12 6876 andq $3,%r12 6877 movq %r15,%r13 6878 andq $-4,%r13 6879 movq %r9,%r14 6880 shrdq $2,%r9,%r15 6881 shrq $2,%r9 6882 addq %r13,%r10 6883 adcq %r14,%r11 6884 adcq $0,%r12 6885 addq %r15,%r10 6886 adcq %r9,%r11 6887 adcq $0,%r12 6888 vpalignr $8,%ymm9,%ymm9,%ymm9 6889 vpalignr $4,%ymm13,%ymm13,%ymm13 6890 vpalignr $12,%ymm4,%ymm4,%ymm4 6891 vpalignr $8,%ymm8,%ymm8,%ymm8 6892 vpalignr $4,%ymm12,%ymm12,%ymm12 6893 6894 decq %rcx 6895 jne 2b 6896 vpaddd .chacha20_consts(%rip),%ymm3,%ymm3 6897 vpaddd 64(%rbp),%ymm7,%ymm7 6898 vpaddd 96(%rbp),%ymm11,%ymm11 6899 vpaddd 256(%rbp),%ymm15,%ymm15 6900 vpaddd .chacha20_consts(%rip),%ymm2,%ymm2 6901 vpaddd 64(%rbp),%ymm6,%ymm6 6902 vpaddd 96(%rbp),%ymm10,%ymm10 6903 vpaddd 224(%rbp),%ymm14,%ymm14 6904 vpaddd .chacha20_consts(%rip),%ymm1,%ymm1 6905 vpaddd 64(%rbp),%ymm5,%ymm5 6906 vpaddd 96(%rbp),%ymm9,%ymm9 6907 vpaddd 192(%rbp),%ymm13,%ymm13 6908 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0 6909 vpaddd 64(%rbp),%ymm4,%ymm4 6910 vpaddd 96(%rbp),%ymm8,%ymm8 6911 vpaddd 160(%rbp),%ymm12,%ymm12 6912 6913 leaq 32(%rdi),%rdi 6914 vmovdqa %ymm0,128(%rbp) 6915 addq -32(%rdi),%r10 6916 adcq 8+-32(%rdi),%r11 6917 adcq $1,%r12 6918 vperm2i128 $0x02,%ymm3,%ymm7,%ymm0 6919 vperm2i128 $0x13,%ymm3,%ymm7,%ymm7 6920 vperm2i128 $0x02,%ymm11,%ymm15,%ymm3 6921 vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 6922 vpxor 0+0(%rsi),%ymm0,%ymm0 6923 vpxor 32+0(%rsi),%ymm3,%ymm3 6924 vpxor 64+0(%rsi),%ymm7,%ymm7 6925 vpxor 96+0(%rsi),%ymm11,%ymm11 6926 vmovdqu %ymm0,0+0(%rdi) 6927 vmovdqu %ymm3,32+0(%rdi) 6928 vmovdqu %ymm7,64+0(%rdi) 6929 vmovdqu %ymm11,96+0(%rdi) 6930 6931 vmovdqa 128(%rbp),%ymm0 6932 movq 0+0(%rbp),%rax 6933 movq %rax,%r15 6934 mulq %r10 6935 movq %rax,%r13 6936 movq %rdx,%r14 6937 movq 0+0(%rbp),%rax 6938 mulq %r11 6939 imulq %r12,%r15 6940 addq %rax,%r14 6941 adcq %rdx,%r15 6942 movq 8+0(%rbp),%rax 6943 movq %rax,%r9 6944 mulq %r10 6945 addq %rax,%r14 6946 adcq $0,%rdx 6947 movq %rdx,%r10 6948 movq 8+0(%rbp),%rax 6949 mulq %r11 6950 addq %rax,%r15 6951 adcq $0,%rdx 6952 imulq %r12,%r9 6953 addq %r10,%r15 6954 adcq %rdx,%r9 6955 movq %r13,%r10 6956 movq %r14,%r11 6957 movq %r15,%r12 6958 andq $3,%r12 6959 movq %r15,%r13 6960 andq $-4,%r13 6961 movq %r9,%r14 6962 shrdq $2,%r9,%r15 6963 shrq $2,%r9 6964 addq %r13,%r10 6965 adcq %r14,%r11 6966 adcq $0,%r12 6967 addq %r15,%r10 6968 adcq %r9,%r11 6969 adcq $0,%r12 6970 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 6971 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 6972 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 6973 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 6974 vpxor 0+128(%rsi),%ymm3,%ymm3 6975 vpxor 32+128(%rsi),%ymm2,%ymm2 6976 vpxor 64+128(%rsi),%ymm6,%ymm6 6977 vpxor 96+128(%rsi),%ymm10,%ymm10 6978 vmovdqu %ymm3,0+128(%rdi) 6979 vmovdqu %ymm2,32+128(%rdi) 6980 vmovdqu %ymm6,64+128(%rdi) 6981 vmovdqu %ymm10,96+128(%rdi) 6982 addq -16(%rdi),%r10 6983 adcq 8+-16(%rdi),%r11 6984 adcq $1,%r12 6985 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 6986 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 6987 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 6988 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 6989 vpxor 0+256(%rsi),%ymm3,%ymm3 6990 vpxor 32+256(%rsi),%ymm1,%ymm1 6991 vpxor 64+256(%rsi),%ymm5,%ymm5 6992 vpxor 96+256(%rsi),%ymm9,%ymm9 6993 vmovdqu %ymm3,0+256(%rdi) 6994 vmovdqu %ymm1,32+256(%rdi) 6995 vmovdqu %ymm5,64+256(%rdi) 6996 vmovdqu %ymm9,96+256(%rdi) 6997 movq 0+0(%rbp),%rax 6998 movq %rax,%r15 6999 mulq %r10 7000 movq %rax,%r13 7001 movq %rdx,%r14 7002 movq 0+0(%rbp),%rax 7003 mulq %r11 7004 imulq %r12,%r15 7005 addq %rax,%r14 7006 adcq %rdx,%r15 7007 movq 8+0(%rbp),%rax 7008 movq %rax,%r9 7009 mulq %r10 7010 addq %rax,%r14 7011 adcq $0,%rdx 7012 movq %rdx,%r10 7013 movq 8+0(%rbp),%rax 7014 mulq %r11 7015 addq %rax,%r15 7016 adcq $0,%rdx 7017 imulq %r12,%r9 7018 addq %r10,%r15 7019 adcq %rdx,%r9 7020 movq %r13,%r10 7021 movq %r14,%r11 7022 movq %r15,%r12 7023 andq $3,%r12 7024 movq %r15,%r13 7025 andq $-4,%r13 7026 movq %r9,%r14 7027 shrdq $2,%r9,%r15 7028 shrq $2,%r9 7029 addq %r13,%r10 7030 adcq %r14,%r11 7031 adcq $0,%r12 7032 addq %r15,%r10 7033 adcq %r9,%r11 7034 adcq $0,%r12 7035 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 7036 vperm2i128 $0x13,%ymm0,%ymm4,%ymm4 7037 vperm2i128 $0x02,%ymm8,%ymm12,%ymm0 7038 vperm2i128 $0x13,%ymm8,%ymm12,%ymm8 7039 vpxor 0+384(%rsi),%ymm3,%ymm3 7040 vpxor 32+384(%rsi),%ymm0,%ymm0 7041 vpxor 64+384(%rsi),%ymm4,%ymm4 7042 vpxor 96+384(%rsi),%ymm8,%ymm8 7043 vmovdqu %ymm3,0+384(%rdi) 7044 vmovdqu %ymm0,32+384(%rdi) 7045 vmovdqu %ymm4,64+384(%rdi) 7046 vmovdqu %ymm8,96+384(%rdi) 7047 7048 leaq 512(%rsi),%rsi 7049 subq $512,%rbx 7050 cmpq $512,%rbx 7051 jg 1b 7052 addq 0(%rdi),%r10 7053 adcq 8+0(%rdi),%r11 7054 adcq $1,%r12 7055 movq 0+0(%rbp),%rax 7056 movq %rax,%r15 7057 mulq %r10 7058 movq %rax,%r13 7059 movq %rdx,%r14 7060 movq 0+0(%rbp),%rax 7061 mulq %r11 7062 imulq %r12,%r15 7063 addq %rax,%r14 7064 adcq %rdx,%r15 7065 movq 8+0(%rbp),%rax 7066 movq %rax,%r9 7067 mulq %r10 7068 addq %rax,%r14 7069 adcq $0,%rdx 7070 movq %rdx,%r10 7071 movq 8+0(%rbp),%rax 7072 mulq %r11 7073 addq %rax,%r15 7074 adcq $0,%rdx 7075 imulq %r12,%r9 7076 addq %r10,%r15 7077 adcq %rdx,%r9 7078 movq %r13,%r10 7079 movq %r14,%r11 7080 movq %r15,%r12 7081 andq $3,%r12 7082 movq %r15,%r13 7083 andq $-4,%r13 7084 movq %r9,%r14 7085 shrdq $2,%r9,%r15 7086 shrq $2,%r9 7087 addq %r13,%r10 7088 adcq %r14,%r11 7089 adcq $0,%r12 7090 addq %r15,%r10 7091 adcq %r9,%r11 7092 adcq $0,%r12 7093 addq 16(%rdi),%r10 7094 adcq 8+16(%rdi),%r11 7095 adcq $1,%r12 7096 movq 0+0(%rbp),%rax 7097 movq %rax,%r15 7098 mulq %r10 7099 movq %rax,%r13 7100 movq %rdx,%r14 7101 movq 0+0(%rbp),%rax 7102 mulq %r11 7103 imulq %r12,%r15 7104 addq %rax,%r14 7105 adcq %rdx,%r15 7106 movq 8+0(%rbp),%rax 7107 movq %rax,%r9 7108 mulq %r10 7109 addq %rax,%r14 7110 adcq $0,%rdx 7111 movq %rdx,%r10 7112 movq 8+0(%rbp),%rax 7113 mulq %r11 7114 addq %rax,%r15 7115 adcq $0,%rdx 7116 imulq %r12,%r9 7117 addq %r10,%r15 7118 adcq %rdx,%r9 7119 movq %r13,%r10 7120 movq %r14,%r11 7121 movq %r15,%r12 7122 andq $3,%r12 7123 movq %r15,%r13 7124 andq $-4,%r13 7125 movq %r9,%r14 7126 shrdq $2,%r9,%r15 7127 shrq $2,%r9 7128 addq %r13,%r10 7129 adcq %r14,%r11 7130 adcq $0,%r12 7131 addq %r15,%r10 7132 adcq %r9,%r11 7133 adcq $0,%r12 7134 7135 leaq 32(%rdi),%rdi 7136 movq $10,%rcx 7137 xorq %r8,%r8 7138 cmpq $128,%rbx 7139 ja 3f 7140 7141 seal_avx2_tail_128: 7142 vmovdqa .chacha20_consts(%rip),%ymm0 7143 vmovdqa 64(%rbp),%ymm4 7144 vmovdqa 96(%rbp),%ymm8 7145 vmovdqa .avx2_inc(%rip),%ymm12 7146 vpaddd 160(%rbp),%ymm12,%ymm12 7147 vmovdqa %ymm12,160(%rbp) 7148 7149 1: 7150 addq 0(%rdi),%r10 7151 adcq 8+0(%rdi),%r11 7152 adcq $1,%r12 7153 movq 0+0(%rbp),%rax 7154 movq %rax,%r15 7155 mulq %r10 7156 movq %rax,%r13 7157 movq %rdx,%r14 7158 movq 0+0(%rbp),%rax 7159 mulq %r11 7160 imulq %r12,%r15 7161 addq %rax,%r14 7162 adcq %rdx,%r15 7163 movq 8+0(%rbp),%rax 7164 movq %rax,%r9 7165 mulq %r10 7166 addq %rax,%r14 7167 adcq $0,%rdx 7168 movq %rdx,%r10 7169 movq 8+0(%rbp),%rax 7170 mulq %r11 7171 addq %rax,%r15 7172 adcq $0,%rdx 7173 imulq %r12,%r9 7174 addq %r10,%r15 7175 adcq %rdx,%r9 7176 movq %r13,%r10 7177 movq %r14,%r11 7178 movq %r15,%r12 7179 andq $3,%r12 7180 movq %r15,%r13 7181 andq $-4,%r13 7182 movq %r9,%r14 7183 shrdq $2,%r9,%r15 7184 shrq $2,%r9 7185 addq %r13,%r10 7186 adcq %r14,%r11 7187 adcq $0,%r12 7188 addq %r15,%r10 7189 adcq %r9,%r11 7190 adcq $0,%r12 7191 7192 leaq 16(%rdi),%rdi 7193 2: 7194 vpaddd %ymm4,%ymm0,%ymm0 7195 vpxor %ymm0,%ymm12,%ymm12 7196 vpshufb .rol16(%rip),%ymm12,%ymm12 7197 vpaddd %ymm12,%ymm8,%ymm8 7198 vpxor %ymm8,%ymm4,%ymm4 7199 vpsrld $20,%ymm4,%ymm3 7200 vpslld $12,%ymm4,%ymm4 7201 vpxor %ymm3,%ymm4,%ymm4 7202 vpaddd %ymm4,%ymm0,%ymm0 7203 vpxor %ymm0,%ymm12,%ymm12 7204 vpshufb .rol8(%rip),%ymm12,%ymm12 7205 vpaddd %ymm12,%ymm8,%ymm8 7206 vpxor %ymm8,%ymm4,%ymm4 7207 vpslld $7,%ymm4,%ymm3 7208 vpsrld $25,%ymm4,%ymm4 7209 vpxor %ymm3,%ymm4,%ymm4 7210 vpalignr $12,%ymm12,%ymm12,%ymm12 7211 vpalignr $8,%ymm8,%ymm8,%ymm8 7212 vpalignr $4,%ymm4,%ymm4,%ymm4 7213 addq 0(%rdi),%r10 7214 adcq 8+0(%rdi),%r11 7215 adcq $1,%r12 7216 movq 0+0(%rbp),%rax 7217 movq %rax,%r15 7218 mulq %r10 7219 movq %rax,%r13 7220 movq %rdx,%r14 7221 movq 0+0(%rbp),%rax 7222 mulq %r11 7223 imulq %r12,%r15 7224 addq %rax,%r14 7225 adcq %rdx,%r15 7226 movq 8+0(%rbp),%rax 7227 movq %rax,%r9 7228 mulq %r10 7229 addq %rax,%r14 7230 adcq $0,%rdx 7231 movq %rdx,%r10 7232 movq 8+0(%rbp),%rax 7233 mulq %r11 7234 addq %rax,%r15 7235 adcq $0,%rdx 7236 imulq %r12,%r9 7237 addq %r10,%r15 7238 adcq %rdx,%r9 7239 movq %r13,%r10 7240 movq %r14,%r11 7241 movq %r15,%r12 7242 andq $3,%r12 7243 movq %r15,%r13 7244 andq $-4,%r13 7245 movq %r9,%r14 7246 shrdq $2,%r9,%r15 7247 shrq $2,%r9 7248 addq %r13,%r10 7249 adcq %r14,%r11 7250 adcq $0,%r12 7251 addq %r15,%r10 7252 adcq %r9,%r11 7253 adcq $0,%r12 7254 vpaddd %ymm4,%ymm0,%ymm0 7255 vpxor %ymm0,%ymm12,%ymm12 7256 vpshufb .rol16(%rip),%ymm12,%ymm12 7257 vpaddd %ymm12,%ymm8,%ymm8 7258 vpxor %ymm8,%ymm4,%ymm4 7259 vpsrld $20,%ymm4,%ymm3 7260 vpslld $12,%ymm4,%ymm4 7261 vpxor %ymm3,%ymm4,%ymm4 7262 vpaddd %ymm4,%ymm0,%ymm0 7263 vpxor %ymm0,%ymm12,%ymm12 7264 vpshufb .rol8(%rip),%ymm12,%ymm12 7265 vpaddd %ymm12,%ymm8,%ymm8 7266 vpxor %ymm8,%ymm4,%ymm4 7267 vpslld $7,%ymm4,%ymm3 7268 vpsrld $25,%ymm4,%ymm4 7269 vpxor %ymm3,%ymm4,%ymm4 7270 vpalignr $4,%ymm12,%ymm12,%ymm12 7271 vpalignr $8,%ymm8,%ymm8,%ymm8 7272 vpalignr $12,%ymm4,%ymm4,%ymm4 7273 addq 16(%rdi),%r10 7274 adcq 8+16(%rdi),%r11 7275 adcq $1,%r12 7276 movq 0+0(%rbp),%rax 7277 movq %rax,%r15 7278 mulq %r10 7279 movq %rax,%r13 7280 movq %rdx,%r14 7281 movq 0+0(%rbp),%rax 7282 mulq %r11 7283 imulq %r12,%r15 7284 addq %rax,%r14 7285 adcq %rdx,%r15 7286 movq 8+0(%rbp),%rax 7287 movq %rax,%r9 7288 mulq %r10 7289 addq %rax,%r14 7290 adcq $0,%rdx 7291 movq %rdx,%r10 7292 movq 8+0(%rbp),%rax 7293 mulq %r11 7294 addq %rax,%r15 7295 adcq $0,%rdx 7296 imulq %r12,%r9 7297 addq %r10,%r15 7298 adcq %rdx,%r9 7299 movq %r13,%r10 7300 movq %r14,%r11 7301 movq %r15,%r12 7302 andq $3,%r12 7303 movq %r15,%r13 7304 andq $-4,%r13 7305 movq %r9,%r14 7306 shrdq $2,%r9,%r15 7307 shrq $2,%r9 7308 addq %r13,%r10 7309 adcq %r14,%r11 7310 adcq $0,%r12 7311 addq %r15,%r10 7312 adcq %r9,%r11 7313 adcq $0,%r12 7314 7315 leaq 32(%rdi),%rdi 7316 decq %rcx 7317 jg 1b 7318 decq %r8 7319 jge 2b 7320 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0 7321 vpaddd 64(%rbp),%ymm4,%ymm4 7322 vpaddd 96(%rbp),%ymm8,%ymm8 7323 vpaddd 160(%rbp),%ymm12,%ymm12 7324 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 7325 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 7326 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 7327 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 7328 vmovdqa %ymm3,%ymm8 7329 7330 jmp seal_avx2_short_loop 7331 3: 7332 cmpq $256,%rbx 7333 ja 3f 7334 7335 seal_avx2_tail_256: 7336 vmovdqa .chacha20_consts(%rip),%ymm0 7337 vmovdqa 64(%rbp),%ymm4 7338 vmovdqa 96(%rbp),%ymm8 7339 vmovdqa %ymm0,%ymm1 7340 vmovdqa %ymm4,%ymm5 7341 vmovdqa %ymm8,%ymm9 7342 vmovdqa .avx2_inc(%rip),%ymm12 7343 vpaddd 160(%rbp),%ymm12,%ymm13 7344 vpaddd %ymm13,%ymm12,%ymm12 7345 vmovdqa %ymm12,160(%rbp) 7346 vmovdqa %ymm13,192(%rbp) 7347 7348 1: 7349 addq 0(%rdi),%r10 7350 adcq 8+0(%rdi),%r11 7351 adcq $1,%r12 7352 movq 0+0(%rbp),%rax 7353 movq %rax,%r15 7354 mulq %r10 7355 movq %rax,%r13 7356 movq %rdx,%r14 7357 movq 0+0(%rbp),%rax 7358 mulq %r11 7359 imulq %r12,%r15 7360 addq %rax,%r14 7361 adcq %rdx,%r15 7362 movq 8+0(%rbp),%rax 7363 movq %rax,%r9 7364 mulq %r10 7365 addq %rax,%r14 7366 adcq $0,%rdx 7367 movq %rdx,%r10 7368 movq 8+0(%rbp),%rax 7369 mulq %r11 7370 addq %rax,%r15 7371 adcq $0,%rdx 7372 imulq %r12,%r9 7373 addq %r10,%r15 7374 adcq %rdx,%r9 7375 movq %r13,%r10 7376 movq %r14,%r11 7377 movq %r15,%r12 7378 andq $3,%r12 7379 movq %r15,%r13 7380 andq $-4,%r13 7381 movq %r9,%r14 7382 shrdq $2,%r9,%r15 7383 shrq $2,%r9 7384 addq %r13,%r10 7385 adcq %r14,%r11 7386 adcq $0,%r12 7387 addq %r15,%r10 7388 adcq %r9,%r11 7389 adcq $0,%r12 7390 7391 leaq 16(%rdi),%rdi 7392 2: 7393 vpaddd %ymm4,%ymm0,%ymm0 7394 vpxor %ymm0,%ymm12,%ymm12 7395 vpshufb .rol16(%rip),%ymm12,%ymm12 7396 vpaddd %ymm12,%ymm8,%ymm8 7397 vpxor %ymm8,%ymm4,%ymm4 7398 vpsrld $20,%ymm4,%ymm3 7399 vpslld $12,%ymm4,%ymm4 7400 vpxor %ymm3,%ymm4,%ymm4 7401 vpaddd %ymm4,%ymm0,%ymm0 7402 vpxor %ymm0,%ymm12,%ymm12 7403 vpshufb .rol8(%rip),%ymm12,%ymm12 7404 vpaddd %ymm12,%ymm8,%ymm8 7405 vpxor %ymm8,%ymm4,%ymm4 7406 vpslld $7,%ymm4,%ymm3 7407 vpsrld $25,%ymm4,%ymm4 7408 vpxor %ymm3,%ymm4,%ymm4 7409 vpalignr $12,%ymm12,%ymm12,%ymm12 7410 vpalignr $8,%ymm8,%ymm8,%ymm8 7411 vpalignr $4,%ymm4,%ymm4,%ymm4 7412 vpaddd %ymm5,%ymm1,%ymm1 7413 vpxor %ymm1,%ymm13,%ymm13 7414 vpshufb .rol16(%rip),%ymm13,%ymm13 7415 vpaddd %ymm13,%ymm9,%ymm9 7416 vpxor %ymm9,%ymm5,%ymm5 7417 vpsrld $20,%ymm5,%ymm3 7418 vpslld $12,%ymm5,%ymm5 7419 vpxor %ymm3,%ymm5,%ymm5 7420 vpaddd %ymm5,%ymm1,%ymm1 7421 vpxor %ymm1,%ymm13,%ymm13 7422 vpshufb .rol8(%rip),%ymm13,%ymm13 7423 vpaddd %ymm13,%ymm9,%ymm9 7424 vpxor %ymm9,%ymm5,%ymm5 7425 vpslld $7,%ymm5,%ymm3 7426 vpsrld $25,%ymm5,%ymm5 7427 vpxor %ymm3,%ymm5,%ymm5 7428 vpalignr $12,%ymm13,%ymm13,%ymm13 7429 vpalignr $8,%ymm9,%ymm9,%ymm9 7430 vpalignr $4,%ymm5,%ymm5,%ymm5 7431 addq 0(%rdi),%r10 7432 adcq 8+0(%rdi),%r11 7433 adcq $1,%r12 7434 movq 0+0(%rbp),%rax 7435 movq %rax,%r15 7436 mulq %r10 7437 movq %rax,%r13 7438 movq %rdx,%r14 7439 movq 0+0(%rbp),%rax 7440 mulq %r11 7441 imulq %r12,%r15 7442 addq %rax,%r14 7443 adcq %rdx,%r15 7444 movq 8+0(%rbp),%rax 7445 movq %rax,%r9 7446 mulq %r10 7447 addq %rax,%r14 7448 adcq $0,%rdx 7449 movq %rdx,%r10 7450 movq 8+0(%rbp),%rax 7451 mulq %r11 7452 addq %rax,%r15 7453 adcq $0,%rdx 7454 imulq %r12,%r9 7455 addq %r10,%r15 7456 adcq %rdx,%r9 7457 movq %r13,%r10 7458 movq %r14,%r11 7459 movq %r15,%r12 7460 andq $3,%r12 7461 movq %r15,%r13 7462 andq $-4,%r13 7463 movq %r9,%r14 7464 shrdq $2,%r9,%r15 7465 shrq $2,%r9 7466 addq %r13,%r10 7467 adcq %r14,%r11 7468 adcq $0,%r12 7469 addq %r15,%r10 7470 adcq %r9,%r11 7471 adcq $0,%r12 7472 vpaddd %ymm4,%ymm0,%ymm0 7473 vpxor %ymm0,%ymm12,%ymm12 7474 vpshufb .rol16(%rip),%ymm12,%ymm12 7475 vpaddd %ymm12,%ymm8,%ymm8 7476 vpxor %ymm8,%ymm4,%ymm4 7477 vpsrld $20,%ymm4,%ymm3 7478 vpslld $12,%ymm4,%ymm4 7479 vpxor %ymm3,%ymm4,%ymm4 7480 vpaddd %ymm4,%ymm0,%ymm0 7481 vpxor %ymm0,%ymm12,%ymm12 7482 vpshufb .rol8(%rip),%ymm12,%ymm12 7483 vpaddd %ymm12,%ymm8,%ymm8 7484 vpxor %ymm8,%ymm4,%ymm4 7485 vpslld $7,%ymm4,%ymm3 7486 vpsrld $25,%ymm4,%ymm4 7487 vpxor %ymm3,%ymm4,%ymm4 7488 vpalignr $4,%ymm12,%ymm12,%ymm12 7489 vpalignr $8,%ymm8,%ymm8,%ymm8 7490 vpalignr $12,%ymm4,%ymm4,%ymm4 7491 vpaddd %ymm5,%ymm1,%ymm1 7492 vpxor %ymm1,%ymm13,%ymm13 7493 vpshufb .rol16(%rip),%ymm13,%ymm13 7494 vpaddd %ymm13,%ymm9,%ymm9 7495 vpxor %ymm9,%ymm5,%ymm5 7496 vpsrld $20,%ymm5,%ymm3 7497 vpslld $12,%ymm5,%ymm5 7498 vpxor %ymm3,%ymm5,%ymm5 7499 vpaddd %ymm5,%ymm1,%ymm1 7500 vpxor %ymm1,%ymm13,%ymm13 7501 vpshufb .rol8(%rip),%ymm13,%ymm13 7502 vpaddd %ymm13,%ymm9,%ymm9 7503 vpxor %ymm9,%ymm5,%ymm5 7504 vpslld $7,%ymm5,%ymm3 7505 vpsrld $25,%ymm5,%ymm5 7506 vpxor %ymm3,%ymm5,%ymm5 7507 vpalignr $4,%ymm13,%ymm13,%ymm13 7508 vpalignr $8,%ymm9,%ymm9,%ymm9 7509 vpalignr $12,%ymm5,%ymm5,%ymm5 7510 addq 16(%rdi),%r10 7511 adcq 8+16(%rdi),%r11 7512 adcq $1,%r12 7513 movq 0+0(%rbp),%rax 7514 movq %rax,%r15 7515 mulq %r10 7516 movq %rax,%r13 7517 movq %rdx,%r14 7518 movq 0+0(%rbp),%rax 7519 mulq %r11 7520 imulq %r12,%r15 7521 addq %rax,%r14 7522 adcq %rdx,%r15 7523 movq 8+0(%rbp),%rax 7524 movq %rax,%r9 7525 mulq %r10 7526 addq %rax,%r14 7527 adcq $0,%rdx 7528 movq %rdx,%r10 7529 movq 8+0(%rbp),%rax 7530 mulq %r11 7531 addq %rax,%r15 7532 adcq $0,%rdx 7533 imulq %r12,%r9 7534 addq %r10,%r15 7535 adcq %rdx,%r9 7536 movq %r13,%r10 7537 movq %r14,%r11 7538 movq %r15,%r12 7539 andq $3,%r12 7540 movq %r15,%r13 7541 andq $-4,%r13 7542 movq %r9,%r14 7543 shrdq $2,%r9,%r15 7544 shrq $2,%r9 7545 addq %r13,%r10 7546 adcq %r14,%r11 7547 adcq $0,%r12 7548 addq %r15,%r10 7549 adcq %r9,%r11 7550 adcq $0,%r12 7551 7552 leaq 32(%rdi),%rdi 7553 decq %rcx 7554 jg 1b 7555 decq %r8 7556 jge 2b 7557 vpaddd .chacha20_consts(%rip),%ymm1,%ymm1 7558 vpaddd 64(%rbp),%ymm5,%ymm5 7559 vpaddd 96(%rbp),%ymm9,%ymm9 7560 vpaddd 192(%rbp),%ymm13,%ymm13 7561 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0 7562 vpaddd 64(%rbp),%ymm4,%ymm4 7563 vpaddd 96(%rbp),%ymm8,%ymm8 7564 vpaddd 160(%rbp),%ymm12,%ymm12 7565 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 7566 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 7567 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 7568 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 7569 vpxor 0+0(%rsi),%ymm3,%ymm3 7570 vpxor 32+0(%rsi),%ymm1,%ymm1 7571 vpxor 64+0(%rsi),%ymm5,%ymm5 7572 vpxor 96+0(%rsi),%ymm9,%ymm9 7573 vmovdqu %ymm3,0+0(%rdi) 7574 vmovdqu %ymm1,32+0(%rdi) 7575 vmovdqu %ymm5,64+0(%rdi) 7576 vmovdqu %ymm9,96+0(%rdi) 7577 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 7578 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 7579 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 7580 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 7581 vmovdqa %ymm3,%ymm8 7582 7583 movq $128,%rcx 7584 leaq 128(%rsi),%rsi 7585 subq $128,%rbx 7586 jmp seal_avx2_hash 7587 3: 7588 cmpq $384,%rbx 7589 ja seal_avx2_tail_512 7590 7591 seal_avx2_tail_384: 7592 vmovdqa .chacha20_consts(%rip),%ymm0 7593 vmovdqa 64(%rbp),%ymm4 7594 vmovdqa 96(%rbp),%ymm8 7595 vmovdqa %ymm0,%ymm1 7596 vmovdqa %ymm4,%ymm5 7597 vmovdqa %ymm8,%ymm9 7598 vmovdqa %ymm0,%ymm2 7599 vmovdqa %ymm4,%ymm6 7600 vmovdqa %ymm8,%ymm10 7601 vmovdqa .avx2_inc(%rip),%ymm12 7602 vpaddd 160(%rbp),%ymm12,%ymm14 7603 vpaddd %ymm14,%ymm12,%ymm13 7604 vpaddd %ymm13,%ymm12,%ymm12 7605 vmovdqa %ymm12,160(%rbp) 7606 vmovdqa %ymm13,192(%rbp) 7607 vmovdqa %ymm14,224(%rbp) 7608 7609 1: 7610 addq 0(%rdi),%r10 7611 adcq 8+0(%rdi),%r11 7612 adcq $1,%r12 7613 movq 0+0(%rbp),%rax 7614 movq %rax,%r15 7615 mulq %r10 7616 movq %rax,%r13 7617 movq %rdx,%r14 7618 movq 0+0(%rbp),%rax 7619 mulq %r11 7620 imulq %r12,%r15 7621 addq %rax,%r14 7622 adcq %rdx,%r15 7623 movq 8+0(%rbp),%rax 7624 movq %rax,%r9 7625 mulq %r10 7626 addq %rax,%r14 7627 adcq $0,%rdx 7628 movq %rdx,%r10 7629 movq 8+0(%rbp),%rax 7630 mulq %r11 7631 addq %rax,%r15 7632 adcq $0,%rdx 7633 imulq %r12,%r9 7634 addq %r10,%r15 7635 adcq %rdx,%r9 7636 movq %r13,%r10 7637 movq %r14,%r11 7638 movq %r15,%r12 7639 andq $3,%r12 7640 movq %r15,%r13 7641 andq $-4,%r13 7642 movq %r9,%r14 7643 shrdq $2,%r9,%r15 7644 shrq $2,%r9 7645 addq %r13,%r10 7646 adcq %r14,%r11 7647 adcq $0,%r12 7648 addq %r15,%r10 7649 adcq %r9,%r11 7650 adcq $0,%r12 7651 7652 leaq 16(%rdi),%rdi 7653 2: 7654 vpaddd %ymm4,%ymm0,%ymm0 7655 vpxor %ymm0,%ymm12,%ymm12 7656 vpshufb .rol16(%rip),%ymm12,%ymm12 7657 vpaddd %ymm12,%ymm8,%ymm8 7658 vpxor %ymm8,%ymm4,%ymm4 7659 vpsrld $20,%ymm4,%ymm3 7660 vpslld $12,%ymm4,%ymm4 7661 vpxor %ymm3,%ymm4,%ymm4 7662 vpaddd %ymm4,%ymm0,%ymm0 7663 vpxor %ymm0,%ymm12,%ymm12 7664 vpshufb .rol8(%rip),%ymm12,%ymm12 7665 vpaddd %ymm12,%ymm8,%ymm8 7666 vpxor %ymm8,%ymm4,%ymm4 7667 vpslld $7,%ymm4,%ymm3 7668 vpsrld $25,%ymm4,%ymm4 7669 vpxor %ymm3,%ymm4,%ymm4 7670 vpalignr $12,%ymm12,%ymm12,%ymm12 7671 vpalignr $8,%ymm8,%ymm8,%ymm8 7672 vpalignr $4,%ymm4,%ymm4,%ymm4 7673 vpaddd %ymm5,%ymm1,%ymm1 7674 vpxor %ymm1,%ymm13,%ymm13 7675 vpshufb .rol16(%rip),%ymm13,%ymm13 7676 vpaddd %ymm13,%ymm9,%ymm9 7677 vpxor %ymm9,%ymm5,%ymm5 7678 vpsrld $20,%ymm5,%ymm3 7679 vpslld $12,%ymm5,%ymm5 7680 vpxor %ymm3,%ymm5,%ymm5 7681 vpaddd %ymm5,%ymm1,%ymm1 7682 vpxor %ymm1,%ymm13,%ymm13 7683 vpshufb .rol8(%rip),%ymm13,%ymm13 7684 vpaddd %ymm13,%ymm9,%ymm9 7685 vpxor %ymm9,%ymm5,%ymm5 7686 vpslld $7,%ymm5,%ymm3 7687 vpsrld $25,%ymm5,%ymm5 7688 vpxor %ymm3,%ymm5,%ymm5 7689 vpalignr $12,%ymm13,%ymm13,%ymm13 7690 vpalignr $8,%ymm9,%ymm9,%ymm9 7691 vpalignr $4,%ymm5,%ymm5,%ymm5 7692 addq 0(%rdi),%r10 7693 adcq 8+0(%rdi),%r11 7694 adcq $1,%r12 7695 movq 0+0(%rbp),%rax 7696 movq %rax,%r15 7697 mulq %r10 7698 movq %rax,%r13 7699 movq %rdx,%r14 7700 movq 0+0(%rbp),%rax 7701 mulq %r11 7702 imulq %r12,%r15 7703 addq %rax,%r14 7704 adcq %rdx,%r15 7705 movq 8+0(%rbp),%rax 7706 movq %rax,%r9 7707 mulq %r10 7708 addq %rax,%r14 7709 adcq $0,%rdx 7710 movq %rdx,%r10 7711 movq 8+0(%rbp),%rax 7712 mulq %r11 7713 addq %rax,%r15 7714 adcq $0,%rdx 7715 imulq %r12,%r9 7716 addq %r10,%r15 7717 adcq %rdx,%r9 7718 movq %r13,%r10 7719 movq %r14,%r11 7720 movq %r15,%r12 7721 andq $3,%r12 7722 movq %r15,%r13 7723 andq $-4,%r13 7724 movq %r9,%r14 7725 shrdq $2,%r9,%r15 7726 shrq $2,%r9 7727 addq %r13,%r10 7728 adcq %r14,%r11 7729 adcq $0,%r12 7730 addq %r15,%r10 7731 adcq %r9,%r11 7732 adcq $0,%r12 7733 vpaddd %ymm6,%ymm2,%ymm2 7734 vpxor %ymm2,%ymm14,%ymm14 7735 vpshufb .rol16(%rip),%ymm14,%ymm14 7736 vpaddd %ymm14,%ymm10,%ymm10 7737 vpxor %ymm10,%ymm6,%ymm6 7738 vpsrld $20,%ymm6,%ymm3 7739 vpslld $12,%ymm6,%ymm6 7740 vpxor %ymm3,%ymm6,%ymm6 7741 vpaddd %ymm6,%ymm2,%ymm2 7742 vpxor %ymm2,%ymm14,%ymm14 7743 vpshufb .rol8(%rip),%ymm14,%ymm14 7744 vpaddd %ymm14,%ymm10,%ymm10 7745 vpxor %ymm10,%ymm6,%ymm6 7746 vpslld $7,%ymm6,%ymm3 7747 vpsrld $25,%ymm6,%ymm6 7748 vpxor %ymm3,%ymm6,%ymm6 7749 vpalignr $12,%ymm14,%ymm14,%ymm14 7750 vpalignr $8,%ymm10,%ymm10,%ymm10 7751 vpalignr $4,%ymm6,%ymm6,%ymm6 7752 vpaddd %ymm4,%ymm0,%ymm0 7753 vpxor %ymm0,%ymm12,%ymm12 7754 vpshufb .rol16(%rip),%ymm12,%ymm12 7755 vpaddd %ymm12,%ymm8,%ymm8 7756 vpxor %ymm8,%ymm4,%ymm4 7757 vpsrld $20,%ymm4,%ymm3 7758 vpslld $12,%ymm4,%ymm4 7759 vpxor %ymm3,%ymm4,%ymm4 7760 vpaddd %ymm4,%ymm0,%ymm0 7761 vpxor %ymm0,%ymm12,%ymm12 7762 vpshufb .rol8(%rip),%ymm12,%ymm12 7763 vpaddd %ymm12,%ymm8,%ymm8 7764 vpxor %ymm8,%ymm4,%ymm4 7765 vpslld $7,%ymm4,%ymm3 7766 vpsrld $25,%ymm4,%ymm4 7767 vpxor %ymm3,%ymm4,%ymm4 7768 vpalignr $4,%ymm12,%ymm12,%ymm12 7769 vpalignr $8,%ymm8,%ymm8,%ymm8 7770 vpalignr $12,%ymm4,%ymm4,%ymm4 7771 addq 16(%rdi),%r10 7772 adcq 8+16(%rdi),%r11 7773 adcq $1,%r12 7774 movq 0+0(%rbp),%rax 7775 movq %rax,%r15 7776 mulq %r10 7777 movq %rax,%r13 7778 movq %rdx,%r14 7779 movq 0+0(%rbp),%rax 7780 mulq %r11 7781 imulq %r12,%r15 7782 addq %rax,%r14 7783 adcq %rdx,%r15 7784 movq 8+0(%rbp),%rax 7785 movq %rax,%r9 7786 mulq %r10 7787 addq %rax,%r14 7788 adcq $0,%rdx 7789 movq %rdx,%r10 7790 movq 8+0(%rbp),%rax 7791 mulq %r11 7792 addq %rax,%r15 7793 adcq $0,%rdx 7794 imulq %r12,%r9 7795 addq %r10,%r15 7796 adcq %rdx,%r9 7797 movq %r13,%r10 7798 movq %r14,%r11 7799 movq %r15,%r12 7800 andq $3,%r12 7801 movq %r15,%r13 7802 andq $-4,%r13 7803 movq %r9,%r14 7804 shrdq $2,%r9,%r15 7805 shrq $2,%r9 7806 addq %r13,%r10 7807 adcq %r14,%r11 7808 adcq $0,%r12 7809 addq %r15,%r10 7810 adcq %r9,%r11 7811 adcq $0,%r12 7812 vpaddd %ymm5,%ymm1,%ymm1 7813 vpxor %ymm1,%ymm13,%ymm13 7814 vpshufb .rol16(%rip),%ymm13,%ymm13 7815 vpaddd %ymm13,%ymm9,%ymm9 7816 vpxor %ymm9,%ymm5,%ymm5 7817 vpsrld $20,%ymm5,%ymm3 7818 vpslld $12,%ymm5,%ymm5 7819 vpxor %ymm3,%ymm5,%ymm5 7820 vpaddd %ymm5,%ymm1,%ymm1 7821 vpxor %ymm1,%ymm13,%ymm13 7822 vpshufb .rol8(%rip),%ymm13,%ymm13 7823 vpaddd %ymm13,%ymm9,%ymm9 7824 vpxor %ymm9,%ymm5,%ymm5 7825 vpslld $7,%ymm5,%ymm3 7826 vpsrld $25,%ymm5,%ymm5 7827 vpxor %ymm3,%ymm5,%ymm5 7828 vpalignr $4,%ymm13,%ymm13,%ymm13 7829 vpalignr $8,%ymm9,%ymm9,%ymm9 7830 vpalignr $12,%ymm5,%ymm5,%ymm5 7831 vpaddd %ymm6,%ymm2,%ymm2 7832 vpxor %ymm2,%ymm14,%ymm14 7833 vpshufb .rol16(%rip),%ymm14,%ymm14 7834 vpaddd %ymm14,%ymm10,%ymm10 7835 vpxor %ymm10,%ymm6,%ymm6 7836 vpsrld $20,%ymm6,%ymm3 7837 vpslld $12,%ymm6,%ymm6 7838 vpxor %ymm3,%ymm6,%ymm6 7839 vpaddd %ymm6,%ymm2,%ymm2 7840 vpxor %ymm2,%ymm14,%ymm14 7841 vpshufb .rol8(%rip),%ymm14,%ymm14 7842 vpaddd %ymm14,%ymm10,%ymm10 7843 vpxor %ymm10,%ymm6,%ymm6 7844 vpslld $7,%ymm6,%ymm3 7845 vpsrld $25,%ymm6,%ymm6 7846 vpxor %ymm3,%ymm6,%ymm6 7847 vpalignr $4,%ymm14,%ymm14,%ymm14 7848 vpalignr $8,%ymm10,%ymm10,%ymm10 7849 vpalignr $12,%ymm6,%ymm6,%ymm6 7850 7851 leaq 32(%rdi),%rdi 7852 decq %rcx 7853 jg 1b 7854 decq %r8 7855 jge 2b 7856 vpaddd .chacha20_consts(%rip),%ymm2,%ymm2 7857 vpaddd 64(%rbp),%ymm6,%ymm6 7858 vpaddd 96(%rbp),%ymm10,%ymm10 7859 vpaddd 224(%rbp),%ymm14,%ymm14 7860 vpaddd .chacha20_consts(%rip),%ymm1,%ymm1 7861 vpaddd 64(%rbp),%ymm5,%ymm5 7862 vpaddd 96(%rbp),%ymm9,%ymm9 7863 vpaddd 192(%rbp),%ymm13,%ymm13 7864 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0 7865 vpaddd 64(%rbp),%ymm4,%ymm4 7866 vpaddd 96(%rbp),%ymm8,%ymm8 7867 vpaddd 160(%rbp),%ymm12,%ymm12 7868 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 7869 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 7870 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 7871 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 7872 vpxor 0+0(%rsi),%ymm3,%ymm3 7873 vpxor 32+0(%rsi),%ymm2,%ymm2 7874 vpxor 64+0(%rsi),%ymm6,%ymm6 7875 vpxor 96+0(%rsi),%ymm10,%ymm10 7876 vmovdqu %ymm3,0+0(%rdi) 7877 vmovdqu %ymm2,32+0(%rdi) 7878 vmovdqu %ymm6,64+0(%rdi) 7879 vmovdqu %ymm10,96+0(%rdi) 7880 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 7881 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 7882 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 7883 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 7884 vpxor 0+128(%rsi),%ymm3,%ymm3 7885 vpxor 32+128(%rsi),%ymm1,%ymm1 7886 vpxor 64+128(%rsi),%ymm5,%ymm5 7887 vpxor 96+128(%rsi),%ymm9,%ymm9 7888 vmovdqu %ymm3,0+128(%rdi) 7889 vmovdqu %ymm1,32+128(%rdi) 7890 vmovdqu %ymm5,64+128(%rdi) 7891 vmovdqu %ymm9,96+128(%rdi) 7892 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 7893 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 7894 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 7895 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 7896 vmovdqa %ymm3,%ymm8 7897 7898 movq $256,%rcx 7899 leaq 256(%rsi),%rsi 7900 subq $256,%rbx 7901 jmp seal_avx2_hash 7902 7903 seal_avx2_tail_512: 7904 vmovdqa .chacha20_consts(%rip),%ymm0 7905 vmovdqa 64(%rbp),%ymm4 7906 vmovdqa 96(%rbp),%ymm8 7907 vmovdqa %ymm0,%ymm1 7908 vmovdqa %ymm4,%ymm5 7909 vmovdqa %ymm8,%ymm9 7910 vmovdqa %ymm0,%ymm2 7911 vmovdqa %ymm4,%ymm6 7912 vmovdqa %ymm8,%ymm10 7913 vmovdqa %ymm0,%ymm3 7914 vmovdqa %ymm4,%ymm7 7915 vmovdqa %ymm8,%ymm11 7916 vmovdqa .avx2_inc(%rip),%ymm12 7917 vpaddd 160(%rbp),%ymm12,%ymm15 7918 vpaddd %ymm15,%ymm12,%ymm14 7919 vpaddd %ymm14,%ymm12,%ymm13 7920 vpaddd %ymm13,%ymm12,%ymm12 7921 vmovdqa %ymm15,256(%rbp) 7922 vmovdqa %ymm14,224(%rbp) 7923 vmovdqa %ymm13,192(%rbp) 7924 vmovdqa %ymm12,160(%rbp) 7925 7926 1: 7927 addq 0(%rdi),%r10 7928 adcq 8+0(%rdi),%r11 7929 adcq $1,%r12 7930 movq 0+0(%rbp),%rdx 7931 movq %rdx,%r15 7932 mulxq %r10,%r13,%r14 7933 mulxq %r11,%rax,%rdx 7934 imulq %r12,%r15 7935 addq %rax,%r14 7936 adcq %rdx,%r15 7937 movq 8+0(%rbp),%rdx 7938 mulxq %r10,%r10,%rax 7939 addq %r10,%r14 7940 mulxq %r11,%r11,%r9 7941 adcq %r11,%r15 7942 adcq $0,%r9 7943 imulq %r12,%rdx 7944 addq %rax,%r15 7945 adcq %rdx,%r9 7946 movq %r13,%r10 7947 movq %r14,%r11 7948 movq %r15,%r12 7949 andq $3,%r12 7950 movq %r15,%r13 7951 andq $-4,%r13 7952 movq %r9,%r14 7953 shrdq $2,%r9,%r15 7954 shrq $2,%r9 7955 addq %r13,%r10 7956 adcq %r14,%r11 7957 adcq $0,%r12 7958 addq %r15,%r10 7959 adcq %r9,%r11 7960 adcq $0,%r12 7961 7962 leaq 16(%rdi),%rdi 7963 2: 7964 vmovdqa %ymm8,128(%rbp) 7965 vmovdqa .rol16(%rip),%ymm8 7966 vpaddd %ymm7,%ymm3,%ymm3 7967 vpaddd %ymm6,%ymm2,%ymm2 7968 vpaddd %ymm5,%ymm1,%ymm1 7969 vpaddd %ymm4,%ymm0,%ymm0 7970 vpxor %ymm3,%ymm15,%ymm15 7971 vpxor %ymm2,%ymm14,%ymm14 7972 vpxor %ymm1,%ymm13,%ymm13 7973 vpxor %ymm0,%ymm12,%ymm12 7974 vpshufb %ymm8,%ymm15,%ymm15 7975 vpshufb %ymm8,%ymm14,%ymm14 7976 vpshufb %ymm8,%ymm13,%ymm13 7977 vpshufb %ymm8,%ymm12,%ymm12 7978 vmovdqa 128(%rbp),%ymm8 7979 vpaddd %ymm15,%ymm11,%ymm11 7980 vpaddd %ymm14,%ymm10,%ymm10 7981 vpaddd %ymm13,%ymm9,%ymm9 7982 vpaddd %ymm12,%ymm8,%ymm8 7983 vpxor %ymm11,%ymm7,%ymm7 7984 addq 0(%rdi),%r10 7985 adcq 8+0(%rdi),%r11 7986 adcq $1,%r12 7987 vpxor %ymm10,%ymm6,%ymm6 7988 vpxor %ymm9,%ymm5,%ymm5 7989 vpxor %ymm8,%ymm4,%ymm4 7990 vmovdqa %ymm8,128(%rbp) 7991 vpsrld $20,%ymm7,%ymm8 7992 vpslld $32-20,%ymm7,%ymm7 7993 vpxor %ymm8,%ymm7,%ymm7 7994 vpsrld $20,%ymm6,%ymm8 7995 vpslld $32-20,%ymm6,%ymm6 7996 vpxor %ymm8,%ymm6,%ymm6 7997 vpsrld $20,%ymm5,%ymm8 7998 vpslld $32-20,%ymm5,%ymm5 7999 vpxor %ymm8,%ymm5,%ymm5 8000 vpsrld $20,%ymm4,%ymm8 8001 vpslld $32-20,%ymm4,%ymm4 8002 vpxor %ymm8,%ymm4,%ymm4 8003 vmovdqa .rol8(%rip),%ymm8 8004 vpaddd %ymm7,%ymm3,%ymm3 8005 vpaddd %ymm6,%ymm2,%ymm2 8006 vpaddd %ymm5,%ymm1,%ymm1 8007 movq 0+0(%rbp),%rdx 8008 movq %rdx,%r15 8009 mulxq %r10,%r13,%r14 8010 mulxq %r11,%rax,%rdx 8011 imulq %r12,%r15 8012 addq %rax,%r14 8013 adcq %rdx,%r15 8014 vpaddd %ymm4,%ymm0,%ymm0 8015 vpxor %ymm3,%ymm15,%ymm15 8016 vpxor %ymm2,%ymm14,%ymm14 8017 vpxor %ymm1,%ymm13,%ymm13 8018 vpxor %ymm0,%ymm12,%ymm12 8019 vpshufb %ymm8,%ymm15,%ymm15 8020 vpshufb %ymm8,%ymm14,%ymm14 8021 vpshufb %ymm8,%ymm13,%ymm13 8022 vpshufb %ymm8,%ymm12,%ymm12 8023 vmovdqa 128(%rbp),%ymm8 8024 vpaddd %ymm15,%ymm11,%ymm11 8025 vpaddd %ymm14,%ymm10,%ymm10 8026 vpaddd %ymm13,%ymm9,%ymm9 8027 vpaddd %ymm12,%ymm8,%ymm8 8028 vpxor %ymm11,%ymm7,%ymm7 8029 vpxor %ymm10,%ymm6,%ymm6 8030 vpxor %ymm9,%ymm5,%ymm5 8031 vpxor %ymm8,%ymm4,%ymm4 8032 vmovdqa %ymm8,128(%rbp) 8033 vpsrld $25,%ymm7,%ymm8 8034 movq 8+0(%rbp),%rdx 8035 mulxq %r10,%r10,%rax 8036 addq %r10,%r14 8037 mulxq %r11,%r11,%r9 8038 adcq %r11,%r15 8039 adcq $0,%r9 8040 imulq %r12,%rdx 8041 vpslld $32-25,%ymm7,%ymm7 8042 vpxor %ymm8,%ymm7,%ymm7 8043 vpsrld $25,%ymm6,%ymm8 8044 vpslld $32-25,%ymm6,%ymm6 8045 vpxor %ymm8,%ymm6,%ymm6 8046 vpsrld $25,%ymm5,%ymm8 8047 vpslld $32-25,%ymm5,%ymm5 8048 vpxor %ymm8,%ymm5,%ymm5 8049 vpsrld $25,%ymm4,%ymm8 8050 vpslld $32-25,%ymm4,%ymm4 8051 vpxor %ymm8,%ymm4,%ymm4 8052 vmovdqa 128(%rbp),%ymm8 8053 vpalignr $4,%ymm7,%ymm7,%ymm7 8054 vpalignr $8,%ymm11,%ymm11,%ymm11 8055 vpalignr $12,%ymm15,%ymm15,%ymm15 8056 vpalignr $4,%ymm6,%ymm6,%ymm6 8057 vpalignr $8,%ymm10,%ymm10,%ymm10 8058 vpalignr $12,%ymm14,%ymm14,%ymm14 8059 vpalignr $4,%ymm5,%ymm5,%ymm5 8060 vpalignr $8,%ymm9,%ymm9,%ymm9 8061 addq %rax,%r15 8062 adcq %rdx,%r9 8063 vpalignr $12,%ymm13,%ymm13,%ymm13 8064 vpalignr $4,%ymm4,%ymm4,%ymm4 8065 vpalignr $8,%ymm8,%ymm8,%ymm8 8066 vpalignr $12,%ymm12,%ymm12,%ymm12 8067 vmovdqa %ymm8,128(%rbp) 8068 vmovdqa .rol16(%rip),%ymm8 8069 vpaddd %ymm7,%ymm3,%ymm3 8070 vpaddd %ymm6,%ymm2,%ymm2 8071 vpaddd %ymm5,%ymm1,%ymm1 8072 vpaddd %ymm4,%ymm0,%ymm0 8073 vpxor %ymm3,%ymm15,%ymm15 8074 vpxor %ymm2,%ymm14,%ymm14 8075 vpxor %ymm1,%ymm13,%ymm13 8076 vpxor %ymm0,%ymm12,%ymm12 8077 vpshufb %ymm8,%ymm15,%ymm15 8078 vpshufb %ymm8,%ymm14,%ymm14 8079 vpshufb %ymm8,%ymm13,%ymm13 8080 vpshufb %ymm8,%ymm12,%ymm12 8081 vmovdqa 128(%rbp),%ymm8 8082 vpaddd %ymm15,%ymm11,%ymm11 8083 movq %r13,%r10 8084 movq %r14,%r11 8085 movq %r15,%r12 8086 andq $3,%r12 8087 movq %r15,%r13 8088 andq $-4,%r13 8089 movq %r9,%r14 8090 shrdq $2,%r9,%r15 8091 shrq $2,%r9 8092 addq %r13,%r10 8093 adcq %r14,%r11 8094 adcq $0,%r12 8095 addq %r15,%r10 8096 adcq %r9,%r11 8097 adcq $0,%r12 8098 vpaddd %ymm14,%ymm10,%ymm10 8099 vpaddd %ymm13,%ymm9,%ymm9 8100 vpaddd %ymm12,%ymm8,%ymm8 8101 vpxor %ymm11,%ymm7,%ymm7 8102 vpxor %ymm10,%ymm6,%ymm6 8103 vpxor %ymm9,%ymm5,%ymm5 8104 vpxor %ymm8,%ymm4,%ymm4 8105 vmovdqa %ymm8,128(%rbp) 8106 vpsrld $20,%ymm7,%ymm8 8107 vpslld $32-20,%ymm7,%ymm7 8108 vpxor %ymm8,%ymm7,%ymm7 8109 vpsrld $20,%ymm6,%ymm8 8110 vpslld $32-20,%ymm6,%ymm6 8111 vpxor %ymm8,%ymm6,%ymm6 8112 vpsrld $20,%ymm5,%ymm8 8113 vpslld $32-20,%ymm5,%ymm5 8114 vpxor %ymm8,%ymm5,%ymm5 8115 vpsrld $20,%ymm4,%ymm8 8116 vpslld $32-20,%ymm4,%ymm4 8117 vpxor %ymm8,%ymm4,%ymm4 8118 addq 16(%rdi),%r10 8119 adcq 8+16(%rdi),%r11 8120 adcq $1,%r12 8121 vmovdqa .rol8(%rip),%ymm8 8122 vpaddd %ymm7,%ymm3,%ymm3 8123 vpaddd %ymm6,%ymm2,%ymm2 8124 vpaddd %ymm5,%ymm1,%ymm1 8125 vpaddd %ymm4,%ymm0,%ymm0 8126 vpxor %ymm3,%ymm15,%ymm15 8127 vpxor %ymm2,%ymm14,%ymm14 8128 vpxor %ymm1,%ymm13,%ymm13 8129 vpxor %ymm0,%ymm12,%ymm12 8130 vpshufb %ymm8,%ymm15,%ymm15 8131 vpshufb %ymm8,%ymm14,%ymm14 8132 vpshufb %ymm8,%ymm13,%ymm13 8133 vpshufb %ymm8,%ymm12,%ymm12 8134 vmovdqa 128(%rbp),%ymm8 8135 vpaddd %ymm15,%ymm11,%ymm11 8136 vpaddd %ymm14,%ymm10,%ymm10 8137 vpaddd %ymm13,%ymm9,%ymm9 8138 vpaddd %ymm12,%ymm8,%ymm8 8139 vpxor %ymm11,%ymm7,%ymm7 8140 vpxor %ymm10,%ymm6,%ymm6 8141 movq 0+0(%rbp),%rdx 8142 movq %rdx,%r15 8143 mulxq %r10,%r13,%r14 8144 mulxq %r11,%rax,%rdx 8145 imulq %r12,%r15 8146 addq %rax,%r14 8147 adcq %rdx,%r15 8148 vpxor %ymm9,%ymm5,%ymm5 8149 vpxor %ymm8,%ymm4,%ymm4 8150 vmovdqa %ymm8,128(%rbp) 8151 vpsrld $25,%ymm7,%ymm8 8152 vpslld $32-25,%ymm7,%ymm7 8153 vpxor %ymm8,%ymm7,%ymm7 8154 vpsrld $25,%ymm6,%ymm8 8155 vpslld $32-25,%ymm6,%ymm6 8156 vpxor %ymm8,%ymm6,%ymm6 8157 vpsrld $25,%ymm5,%ymm8 8158 vpslld $32-25,%ymm5,%ymm5 8159 vpxor %ymm8,%ymm5,%ymm5 8160 vpsrld $25,%ymm4,%ymm8 8161 vpslld $32-25,%ymm4,%ymm4 8162 vpxor %ymm8,%ymm4,%ymm4 8163 vmovdqa 128(%rbp),%ymm8 8164 vpalignr $12,%ymm7,%ymm7,%ymm7 8165 vpalignr $8,%ymm11,%ymm11,%ymm11 8166 vpalignr $4,%ymm15,%ymm15,%ymm15 8167 vpalignr $12,%ymm6,%ymm6,%ymm6 8168 movq 8+0(%rbp),%rdx 8169 mulxq %r10,%r10,%rax 8170 addq %r10,%r14 8171 mulxq %r11,%r11,%r9 8172 adcq %r11,%r15 8173 adcq $0,%r9 8174 imulq %r12,%rdx 8175 vpalignr $8,%ymm10,%ymm10,%ymm10 8176 vpalignr $4,%ymm14,%ymm14,%ymm14 8177 vpalignr $12,%ymm5,%ymm5,%ymm5 8178 vpalignr $8,%ymm9,%ymm9,%ymm9 8179 vpalignr $4,%ymm13,%ymm13,%ymm13 8180 vpalignr $12,%ymm4,%ymm4,%ymm4 8181 vpalignr $8,%ymm8,%ymm8,%ymm8 8182 vpalignr $4,%ymm12,%ymm12,%ymm12 8183 8184 8185 8186 8187 8188 8189 8190 8191 8192 8193 8194 8195 addq %rax,%r15 8196 adcq %rdx,%r9 8197 8198 8199 8200 8201 8202 8203 8204 8205 8206 8207 8208 8209 8210 8211 8212 8213 8214 8215 8216 8217 movq %r13,%r10 8218 movq %r14,%r11 8219 movq %r15,%r12 8220 andq $3,%r12 8221 movq %r15,%r13 8222 andq $-4,%r13 8223 movq %r9,%r14 8224 shrdq $2,%r9,%r15 8225 shrq $2,%r9 8226 addq %r13,%r10 8227 adcq %r14,%r11 8228 adcq $0,%r12 8229 addq %r15,%r10 8230 adcq %r9,%r11 8231 adcq $0,%r12 8232 8233 leaq 32(%rdi),%rdi 8234 decq %rcx 8235 jg 1b 8236 decq %r8 8237 jge 2b 8238 vpaddd .chacha20_consts(%rip),%ymm3,%ymm3 8239 vpaddd 64(%rbp),%ymm7,%ymm7 8240 vpaddd 96(%rbp),%ymm11,%ymm11 8241 vpaddd 256(%rbp),%ymm15,%ymm15 8242 vpaddd .chacha20_consts(%rip),%ymm2,%ymm2 8243 vpaddd 64(%rbp),%ymm6,%ymm6 8244 vpaddd 96(%rbp),%ymm10,%ymm10 8245 vpaddd 224(%rbp),%ymm14,%ymm14 8246 vpaddd .chacha20_consts(%rip),%ymm1,%ymm1 8247 vpaddd 64(%rbp),%ymm5,%ymm5 8248 vpaddd 96(%rbp),%ymm9,%ymm9 8249 vpaddd 192(%rbp),%ymm13,%ymm13 8250 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0 8251 vpaddd 64(%rbp),%ymm4,%ymm4 8252 vpaddd 96(%rbp),%ymm8,%ymm8 8253 vpaddd 160(%rbp),%ymm12,%ymm12 8254 8255 vmovdqa %ymm0,128(%rbp) 8256 vperm2i128 $0x02,%ymm3,%ymm7,%ymm0 8257 vperm2i128 $0x13,%ymm3,%ymm7,%ymm7 8258 vperm2i128 $0x02,%ymm11,%ymm15,%ymm3 8259 vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 8260 vpxor 0+0(%rsi),%ymm0,%ymm0 8261 vpxor 32+0(%rsi),%ymm3,%ymm3 8262 vpxor 64+0(%rsi),%ymm7,%ymm7 8263 vpxor 96+0(%rsi),%ymm11,%ymm11 8264 vmovdqu %ymm0,0+0(%rdi) 8265 vmovdqu %ymm3,32+0(%rdi) 8266 vmovdqu %ymm7,64+0(%rdi) 8267 vmovdqu %ymm11,96+0(%rdi) 8268 8269 vmovdqa 128(%rbp),%ymm0 8270 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 8271 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 8272 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 8273 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 8274 vpxor 0+128(%rsi),%ymm3,%ymm3 8275 vpxor 32+128(%rsi),%ymm2,%ymm2 8276 vpxor 64+128(%rsi),%ymm6,%ymm6 8277 vpxor 96+128(%rsi),%ymm10,%ymm10 8278 vmovdqu %ymm3,0+128(%rdi) 8279 vmovdqu %ymm2,32+128(%rdi) 8280 vmovdqu %ymm6,64+128(%rdi) 8281 vmovdqu %ymm10,96+128(%rdi) 8282 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 8283 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 8284 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 8285 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 8286 vpxor 0+256(%rsi),%ymm3,%ymm3 8287 vpxor 32+256(%rsi),%ymm1,%ymm1 8288 vpxor 64+256(%rsi),%ymm5,%ymm5 8289 vpxor 96+256(%rsi),%ymm9,%ymm9 8290 vmovdqu %ymm3,0+256(%rdi) 8291 vmovdqu %ymm1,32+256(%rdi) 8292 vmovdqu %ymm5,64+256(%rdi) 8293 vmovdqu %ymm9,96+256(%rdi) 8294 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 8295 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 8296 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 8297 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 8298 vmovdqa %ymm3,%ymm8 8299 8300 movq $384,%rcx 8301 leaq 384(%rsi),%rsi 8302 subq $384,%rbx 8303 jmp seal_avx2_hash 8304 8305 seal_avx2_320: 8306 vmovdqa %ymm0,%ymm1 8307 vmovdqa %ymm0,%ymm2 8308 vmovdqa %ymm4,%ymm5 8309 vmovdqa %ymm4,%ymm6 8310 vmovdqa %ymm8,%ymm9 8311 vmovdqa %ymm8,%ymm10 8312 vpaddd .avx2_inc(%rip),%ymm12,%ymm13 8313 vpaddd .avx2_inc(%rip),%ymm13,%ymm14 8314 vmovdqa %ymm4,%ymm7 8315 vmovdqa %ymm8,%ymm11 8316 vmovdqa %ymm12,160(%rbp) 8317 vmovdqa %ymm13,192(%rbp) 8318 vmovdqa %ymm14,224(%rbp) 8319 movq $10,%r10 8320 1: 8321 vpaddd %ymm4,%ymm0,%ymm0 8322 vpxor %ymm0,%ymm12,%ymm12 8323 vpshufb .rol16(%rip),%ymm12,%ymm12 8324 vpaddd %ymm12,%ymm8,%ymm8 8325 vpxor %ymm8,%ymm4,%ymm4 8326 vpsrld $20,%ymm4,%ymm3 8327 vpslld $12,%ymm4,%ymm4 8328 vpxor %ymm3,%ymm4,%ymm4 8329 vpaddd %ymm4,%ymm0,%ymm0 8330 vpxor %ymm0,%ymm12,%ymm12 8331 vpshufb .rol8(%rip),%ymm12,%ymm12 8332 vpaddd %ymm12,%ymm8,%ymm8 8333 vpxor %ymm8,%ymm4,%ymm4 8334 vpslld $7,%ymm4,%ymm3 8335 vpsrld $25,%ymm4,%ymm4 8336 vpxor %ymm3,%ymm4,%ymm4 8337 vpalignr $12,%ymm12,%ymm12,%ymm12 8338 vpalignr $8,%ymm8,%ymm8,%ymm8 8339 vpalignr $4,%ymm4,%ymm4,%ymm4 8340 vpaddd %ymm5,%ymm1,%ymm1 8341 vpxor %ymm1,%ymm13,%ymm13 8342 vpshufb .rol16(%rip),%ymm13,%ymm13 8343 vpaddd %ymm13,%ymm9,%ymm9 8344 vpxor %ymm9,%ymm5,%ymm5 8345 vpsrld $20,%ymm5,%ymm3 8346 vpslld $12,%ymm5,%ymm5 8347 vpxor %ymm3,%ymm5,%ymm5 8348 vpaddd %ymm5,%ymm1,%ymm1 8349 vpxor %ymm1,%ymm13,%ymm13 8350 vpshufb .rol8(%rip),%ymm13,%ymm13 8351 vpaddd %ymm13,%ymm9,%ymm9 8352 vpxor %ymm9,%ymm5,%ymm5 8353 vpslld $7,%ymm5,%ymm3 8354 vpsrld $25,%ymm5,%ymm5 8355 vpxor %ymm3,%ymm5,%ymm5 8356 vpalignr $12,%ymm13,%ymm13,%ymm13 8357 vpalignr $8,%ymm9,%ymm9,%ymm9 8358 vpalignr $4,%ymm5,%ymm5,%ymm5 8359 vpaddd %ymm6,%ymm2,%ymm2 8360 vpxor %ymm2,%ymm14,%ymm14 8361 vpshufb .rol16(%rip),%ymm14,%ymm14 8362 vpaddd %ymm14,%ymm10,%ymm10 8363 vpxor %ymm10,%ymm6,%ymm6 8364 vpsrld $20,%ymm6,%ymm3 8365 vpslld $12,%ymm6,%ymm6 8366 vpxor %ymm3,%ymm6,%ymm6 8367 vpaddd %ymm6,%ymm2,%ymm2 8368 vpxor %ymm2,%ymm14,%ymm14 8369 vpshufb .rol8(%rip),%ymm14,%ymm14 8370 vpaddd %ymm14,%ymm10,%ymm10 8371 vpxor %ymm10,%ymm6,%ymm6 8372 vpslld $7,%ymm6,%ymm3 8373 vpsrld $25,%ymm6,%ymm6 8374 vpxor %ymm3,%ymm6,%ymm6 8375 vpalignr $12,%ymm14,%ymm14,%ymm14 8376 vpalignr $8,%ymm10,%ymm10,%ymm10 8377 vpalignr $4,%ymm6,%ymm6,%ymm6 8378 vpaddd %ymm4,%ymm0,%ymm0 8379 vpxor %ymm0,%ymm12,%ymm12 8380 vpshufb .rol16(%rip),%ymm12,%ymm12 8381 vpaddd %ymm12,%ymm8,%ymm8 8382 vpxor %ymm8,%ymm4,%ymm4 8383 vpsrld $20,%ymm4,%ymm3 8384 vpslld $12,%ymm4,%ymm4 8385 vpxor %ymm3,%ymm4,%ymm4 8386 vpaddd %ymm4,%ymm0,%ymm0 8387 vpxor %ymm0,%ymm12,%ymm12 8388 vpshufb .rol8(%rip),%ymm12,%ymm12 8389 vpaddd %ymm12,%ymm8,%ymm8 8390 vpxor %ymm8,%ymm4,%ymm4 8391 vpslld $7,%ymm4,%ymm3 8392 vpsrld $25,%ymm4,%ymm4 8393 vpxor %ymm3,%ymm4,%ymm4 8394 vpalignr $4,%ymm12,%ymm12,%ymm12 8395 vpalignr $8,%ymm8,%ymm8,%ymm8 8396 vpalignr $12,%ymm4,%ymm4,%ymm4 8397 vpaddd %ymm5,%ymm1,%ymm1 8398 vpxor %ymm1,%ymm13,%ymm13 8399 vpshufb .rol16(%rip),%ymm13,%ymm13 8400 vpaddd %ymm13,%ymm9,%ymm9 8401 vpxor %ymm9,%ymm5,%ymm5 8402 vpsrld $20,%ymm5,%ymm3 8403 vpslld $12,%ymm5,%ymm5 8404 vpxor %ymm3,%ymm5,%ymm5 8405 vpaddd %ymm5,%ymm1,%ymm1 8406 vpxor %ymm1,%ymm13,%ymm13 8407 vpshufb .rol8(%rip),%ymm13,%ymm13 8408 vpaddd %ymm13,%ymm9,%ymm9 8409 vpxor %ymm9,%ymm5,%ymm5 8410 vpslld $7,%ymm5,%ymm3 8411 vpsrld $25,%ymm5,%ymm5 8412 vpxor %ymm3,%ymm5,%ymm5 8413 vpalignr $4,%ymm13,%ymm13,%ymm13 8414 vpalignr $8,%ymm9,%ymm9,%ymm9 8415 vpalignr $12,%ymm5,%ymm5,%ymm5 8416 vpaddd %ymm6,%ymm2,%ymm2 8417 vpxor %ymm2,%ymm14,%ymm14 8418 vpshufb .rol16(%rip),%ymm14,%ymm14 8419 vpaddd %ymm14,%ymm10,%ymm10 8420 vpxor %ymm10,%ymm6,%ymm6 8421 vpsrld $20,%ymm6,%ymm3 8422 vpslld $12,%ymm6,%ymm6 8423 vpxor %ymm3,%ymm6,%ymm6 8424 vpaddd %ymm6,%ymm2,%ymm2 8425 vpxor %ymm2,%ymm14,%ymm14 8426 vpshufb .rol8(%rip),%ymm14,%ymm14 8427 vpaddd %ymm14,%ymm10,%ymm10 8428 vpxor %ymm10,%ymm6,%ymm6 8429 vpslld $7,%ymm6,%ymm3 8430 vpsrld $25,%ymm6,%ymm6 8431 vpxor %ymm3,%ymm6,%ymm6 8432 vpalignr $4,%ymm14,%ymm14,%ymm14 8433 vpalignr $8,%ymm10,%ymm10,%ymm10 8434 vpalignr $12,%ymm6,%ymm6,%ymm6 8435 8436 decq %r10 8437 jne 1b 8438 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0 8439 vpaddd .chacha20_consts(%rip),%ymm1,%ymm1 8440 vpaddd .chacha20_consts(%rip),%ymm2,%ymm2 8441 vpaddd %ymm7,%ymm4,%ymm4 8442 vpaddd %ymm7,%ymm5,%ymm5 8443 vpaddd %ymm7,%ymm6,%ymm6 8444 vpaddd %ymm11,%ymm8,%ymm8 8445 vpaddd %ymm11,%ymm9,%ymm9 8446 vpaddd %ymm11,%ymm10,%ymm10 8447 vpaddd 160(%rbp),%ymm12,%ymm12 8448 vpaddd 192(%rbp),%ymm13,%ymm13 8449 vpaddd 224(%rbp),%ymm14,%ymm14 8450 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 8451 8452 vpand .clamp(%rip),%ymm3,%ymm3 8453 vmovdqa %ymm3,0(%rbp) 8454 8455 vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 8456 vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 8457 vperm2i128 $0x02,%ymm1,%ymm5,%ymm8 8458 vperm2i128 $0x02,%ymm9,%ymm13,%ymm12 8459 vperm2i128 $0x13,%ymm1,%ymm5,%ymm1 8460 vperm2i128 $0x13,%ymm9,%ymm13,%ymm5 8461 vperm2i128 $0x02,%ymm2,%ymm6,%ymm9 8462 vperm2i128 $0x02,%ymm10,%ymm14,%ymm13 8463 vperm2i128 $0x13,%ymm2,%ymm6,%ymm2 8464 vperm2i128 $0x13,%ymm10,%ymm14,%ymm6 8465 jmp seal_avx2_short 8466 8467 seal_avx2_192: 8468 vmovdqa %ymm0,%ymm1 8469 vmovdqa %ymm0,%ymm2 8470 vmovdqa %ymm4,%ymm5 8471 vmovdqa %ymm4,%ymm6 8472 vmovdqa %ymm8,%ymm9 8473 vmovdqa %ymm8,%ymm10 8474 vpaddd .avx2_inc(%rip),%ymm12,%ymm13 8475 vmovdqa %ymm12,%ymm11 8476 vmovdqa %ymm13,%ymm15 8477 movq $10,%r10 8478 1: 8479 vpaddd %ymm4,%ymm0,%ymm0 8480 vpxor %ymm0,%ymm12,%ymm12 8481 vpshufb .rol16(%rip),%ymm12,%ymm12 8482 vpaddd %ymm12,%ymm8,%ymm8 8483 vpxor %ymm8,%ymm4,%ymm4 8484 vpsrld $20,%ymm4,%ymm3 8485 vpslld $12,%ymm4,%ymm4 8486 vpxor %ymm3,%ymm4,%ymm4 8487 vpaddd %ymm4,%ymm0,%ymm0 8488 vpxor %ymm0,%ymm12,%ymm12 8489 vpshufb .rol8(%rip),%ymm12,%ymm12 8490 vpaddd %ymm12,%ymm8,%ymm8 8491 vpxor %ymm8,%ymm4,%ymm4 8492 vpslld $7,%ymm4,%ymm3 8493 vpsrld $25,%ymm4,%ymm4 8494 vpxor %ymm3,%ymm4,%ymm4 8495 vpalignr $12,%ymm12,%ymm12,%ymm12 8496 vpalignr $8,%ymm8,%ymm8,%ymm8 8497 vpalignr $4,%ymm4,%ymm4,%ymm4 8498 vpaddd %ymm5,%ymm1,%ymm1 8499 vpxor %ymm1,%ymm13,%ymm13 8500 vpshufb .rol16(%rip),%ymm13,%ymm13 8501 vpaddd %ymm13,%ymm9,%ymm9 8502 vpxor %ymm9,%ymm5,%ymm5 8503 vpsrld $20,%ymm5,%ymm3 8504 vpslld $12,%ymm5,%ymm5 8505 vpxor %ymm3,%ymm5,%ymm5 8506 vpaddd %ymm5,%ymm1,%ymm1 8507 vpxor %ymm1,%ymm13,%ymm13 8508 vpshufb .rol8(%rip),%ymm13,%ymm13 8509 vpaddd %ymm13,%ymm9,%ymm9 8510 vpxor %ymm9,%ymm5,%ymm5 8511 vpslld $7,%ymm5,%ymm3 8512 vpsrld $25,%ymm5,%ymm5 8513 vpxor %ymm3,%ymm5,%ymm5 8514 vpalignr $12,%ymm13,%ymm13,%ymm13 8515 vpalignr $8,%ymm9,%ymm9,%ymm9 8516 vpalignr $4,%ymm5,%ymm5,%ymm5 8517 vpaddd %ymm4,%ymm0,%ymm0 8518 vpxor %ymm0,%ymm12,%ymm12 8519 vpshufb .rol16(%rip),%ymm12,%ymm12 8520 vpaddd %ymm12,%ymm8,%ymm8 8521 vpxor %ymm8,%ymm4,%ymm4 8522 vpsrld $20,%ymm4,%ymm3 8523 vpslld $12,%ymm4,%ymm4 8524 vpxor %ymm3,%ymm4,%ymm4 8525 vpaddd %ymm4,%ymm0,%ymm0 8526 vpxor %ymm0,%ymm12,%ymm12 8527 vpshufb .rol8(%rip),%ymm12,%ymm12 8528 vpaddd %ymm12,%ymm8,%ymm8 8529 vpxor %ymm8,%ymm4,%ymm4 8530 vpslld $7,%ymm4,%ymm3 8531 vpsrld $25,%ymm4,%ymm4 8532 vpxor %ymm3,%ymm4,%ymm4 8533 vpalignr $4,%ymm12,%ymm12,%ymm12 8534 vpalignr $8,%ymm8,%ymm8,%ymm8 8535 vpalignr $12,%ymm4,%ymm4,%ymm4 8536 vpaddd %ymm5,%ymm1,%ymm1 8537 vpxor %ymm1,%ymm13,%ymm13 8538 vpshufb .rol16(%rip),%ymm13,%ymm13 8539 vpaddd %ymm13,%ymm9,%ymm9 8540 vpxor %ymm9,%ymm5,%ymm5 8541 vpsrld $20,%ymm5,%ymm3 8542 vpslld $12,%ymm5,%ymm5 8543 vpxor %ymm3,%ymm5,%ymm5 8544 vpaddd %ymm5,%ymm1,%ymm1 8545 vpxor %ymm1,%ymm13,%ymm13 8546 vpshufb .rol8(%rip),%ymm13,%ymm13 8547 vpaddd %ymm13,%ymm9,%ymm9 8548 vpxor %ymm9,%ymm5,%ymm5 8549 vpslld $7,%ymm5,%ymm3 8550 vpsrld $25,%ymm5,%ymm5 8551 vpxor %ymm3,%ymm5,%ymm5 8552 vpalignr $4,%ymm13,%ymm13,%ymm13 8553 vpalignr $8,%ymm9,%ymm9,%ymm9 8554 vpalignr $12,%ymm5,%ymm5,%ymm5 8555 8556 decq %r10 8557 jne 1b 8558 vpaddd %ymm2,%ymm0,%ymm0 8559 vpaddd %ymm2,%ymm1,%ymm1 8560 vpaddd %ymm6,%ymm4,%ymm4 8561 vpaddd %ymm6,%ymm5,%ymm5 8562 vpaddd %ymm10,%ymm8,%ymm8 8563 vpaddd %ymm10,%ymm9,%ymm9 8564 vpaddd %ymm11,%ymm12,%ymm12 8565 vpaddd %ymm15,%ymm13,%ymm13 8566 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 8567 8568 vpand .clamp(%rip),%ymm3,%ymm3 8569 vmovdqa %ymm3,0(%rbp) 8570 8571 vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 8572 vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 8573 vperm2i128 $0x02,%ymm1,%ymm5,%ymm8 8574 vperm2i128 $0x02,%ymm9,%ymm13,%ymm12 8575 vperm2i128 $0x13,%ymm1,%ymm5,%ymm1 8576 vperm2i128 $0x13,%ymm9,%ymm13,%ymm5 8577 seal_avx2_short: 8578 movq %r8,%r8 8579 call poly_hash_ad_internal 8580 xorq %rcx,%rcx 8581 seal_avx2_hash: 8582 cmpq $16,%rcx 8583 jb seal_avx2_short_loop 8584 addq 0(%rdi),%r10 8585 adcq 8+0(%rdi),%r11 8586 adcq $1,%r12 8587 movq 0+0(%rbp),%rax 8588 movq %rax,%r15 8589 mulq %r10 8590 movq %rax,%r13 8591 movq %rdx,%r14 8592 movq 0+0(%rbp),%rax 8593 mulq %r11 8594 imulq %r12,%r15 8595 addq %rax,%r14 8596 adcq %rdx,%r15 8597 movq 8+0(%rbp),%rax 8598 movq %rax,%r9 8599 mulq %r10 8600 addq %rax,%r14 8601 adcq $0,%rdx 8602 movq %rdx,%r10 8603 movq 8+0(%rbp),%rax 8604 mulq %r11 8605 addq %rax,%r15 8606 adcq $0,%rdx 8607 imulq %r12,%r9 8608 addq %r10,%r15 8609 adcq %rdx,%r9 8610 movq %r13,%r10 8611 movq %r14,%r11 8612 movq %r15,%r12 8613 andq $3,%r12 8614 movq %r15,%r13 8615 andq $-4,%r13 8616 movq %r9,%r14 8617 shrdq $2,%r9,%r15 8618 shrq $2,%r9 8619 addq %r13,%r10 8620 adcq %r14,%r11 8621 adcq $0,%r12 8622 addq %r15,%r10 8623 adcq %r9,%r11 8624 adcq $0,%r12 8625 8626 subq $16,%rcx 8627 addq $16,%rdi 8628 jmp seal_avx2_hash 8629 seal_avx2_short_loop: 8630 cmpq $32,%rbx 8631 jb seal_avx2_short_tail 8632 subq $32,%rbx 8633 8634 vpxor (%rsi),%ymm0,%ymm0 8635 vmovdqu %ymm0,(%rdi) 8636 leaq 32(%rsi),%rsi 8637 8638 addq 0(%rdi),%r10 8639 adcq 8+0(%rdi),%r11 8640 adcq $1,%r12 8641 movq 0+0(%rbp),%rax 8642 movq %rax,%r15 8643 mulq %r10 8644 movq %rax,%r13 8645 movq %rdx,%r14 8646 movq 0+0(%rbp),%rax 8647 mulq %r11 8648 imulq %r12,%r15 8649 addq %rax,%r14 8650 adcq %rdx,%r15 8651 movq 8+0(%rbp),%rax 8652 movq %rax,%r9 8653 mulq %r10 8654 addq %rax,%r14 8655 adcq $0,%rdx 8656 movq %rdx,%r10 8657 movq 8+0(%rbp),%rax 8658 mulq %r11 8659 addq %rax,%r15 8660 adcq $0,%rdx 8661 imulq %r12,%r9 8662 addq %r10,%r15 8663 adcq %rdx,%r9 8664 movq %r13,%r10 8665 movq %r14,%r11 8666 movq %r15,%r12 8667 andq $3,%r12 8668 movq %r15,%r13 8669 andq $-4,%r13 8670 movq %r9,%r14 8671 shrdq $2,%r9,%r15 8672 shrq $2,%r9 8673 addq %r13,%r10 8674 adcq %r14,%r11 8675 adcq $0,%r12 8676 addq %r15,%r10 8677 adcq %r9,%r11 8678 adcq $0,%r12 8679 addq 16(%rdi),%r10 8680 adcq 8+16(%rdi),%r11 8681 adcq $1,%r12 8682 movq 0+0(%rbp),%rax 8683 movq %rax,%r15 8684 mulq %r10 8685 movq %rax,%r13 8686 movq %rdx,%r14 8687 movq 0+0(%rbp),%rax 8688 mulq %r11 8689 imulq %r12,%r15 8690 addq %rax,%r14 8691 adcq %rdx,%r15 8692 movq 8+0(%rbp),%rax 8693 movq %rax,%r9 8694 mulq %r10 8695 addq %rax,%r14 8696 adcq $0,%rdx 8697 movq %rdx,%r10 8698 movq 8+0(%rbp),%rax 8699 mulq %r11 8700 addq %rax,%r15 8701 adcq $0,%rdx 8702 imulq %r12,%r9 8703 addq %r10,%r15 8704 adcq %rdx,%r9 8705 movq %r13,%r10 8706 movq %r14,%r11 8707 movq %r15,%r12 8708 andq $3,%r12 8709 movq %r15,%r13 8710 andq $-4,%r13 8711 movq %r9,%r14 8712 shrdq $2,%r9,%r15 8713 shrq $2,%r9 8714 addq %r13,%r10 8715 adcq %r14,%r11 8716 adcq $0,%r12 8717 addq %r15,%r10 8718 adcq %r9,%r11 8719 adcq $0,%r12 8720 8721 leaq 32(%rdi),%rdi 8722 8723 vmovdqa %ymm4,%ymm0 8724 vmovdqa %ymm8,%ymm4 8725 vmovdqa %ymm12,%ymm8 8726 vmovdqa %ymm1,%ymm12 8727 vmovdqa %ymm5,%ymm1 8728 vmovdqa %ymm9,%ymm5 8729 vmovdqa %ymm13,%ymm9 8730 vmovdqa %ymm2,%ymm13 8731 vmovdqa %ymm6,%ymm2 8732 jmp seal_avx2_short_loop 8733 seal_avx2_short_tail: 8734 cmpq $16,%rbx 8735 jb 1f 8736 subq $16,%rbx 8737 vpxor (%rsi),%xmm0,%xmm3 8738 vmovdqu %xmm3,(%rdi) 8739 leaq 16(%rsi),%rsi 8740 addq 0(%rdi),%r10 8741 adcq 8+0(%rdi),%r11 8742 adcq $1,%r12 8743 movq 0+0(%rbp),%rax 8744 movq %rax,%r15 8745 mulq %r10 8746 movq %rax,%r13 8747 movq %rdx,%r14 8748 movq 0+0(%rbp),%rax 8749 mulq %r11 8750 imulq %r12,%r15 8751 addq %rax,%r14 8752 adcq %rdx,%r15 8753 movq 8+0(%rbp),%rax 8754 movq %rax,%r9 8755 mulq %r10 8756 addq %rax,%r14 8757 adcq $0,%rdx 8758 movq %rdx,%r10 8759 movq 8+0(%rbp),%rax 8760 mulq %r11 8761 addq %rax,%r15 8762 adcq $0,%rdx 8763 imulq %r12,%r9 8764 addq %r10,%r15 8765 adcq %rdx,%r9 8766 movq %r13,%r10 8767 movq %r14,%r11 8768 movq %r15,%r12 8769 andq $3,%r12 8770 movq %r15,%r13 8771 andq $-4,%r13 8772 movq %r9,%r14 8773 shrdq $2,%r9,%r15 8774 shrq $2,%r9 8775 addq %r13,%r10 8776 adcq %r14,%r11 8777 adcq $0,%r12 8778 addq %r15,%r10 8779 adcq %r9,%r11 8780 adcq $0,%r12 8781 8782 leaq 16(%rdi),%rdi 8783 vextracti128 $1,%ymm0,%xmm0 8784 1: 8785 vzeroupper 8786 jmp seal_sse_tail_16 8787 .cfi_endproc 8788 #endif 8789