1 #if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) 2 .text 3 4 5 chacha20_poly1305_constants: 6 7 .p2align 6 8 .chacha20_consts: 9 .byte 'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k' 10 .byte 'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k' 11 .rol8: 12 .byte 3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14 13 .byte 3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14 14 .rol16: 15 .byte 2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13 16 .byte 2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13 17 .avx2_init: 18 .long 0,0,0,0 19 .sse_inc: 20 .long 1,0,0,0 21 .avx2_inc: 22 .long 2,0,0,0,2,0,0,0 23 .clamp: 24 .quad 0x0FFFFFFC0FFFFFFF, 0x0FFFFFFC0FFFFFFC 25 .quad 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF 26 .p2align 4 27 .and_masks: 28 .byte 0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 29 .byte 0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 30 .byte 0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 31 .byte 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 32 .byte 0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 33 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 34 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 35 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 36 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00 37 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00 38 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00 39 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00 40 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00 41 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00 42 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00 43 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff 44 45 46 .p2align 6 47 poly_hash_ad_internal: 48 49 xorq %r10,%r10 50 xorq %r11,%r11 51 xorq %r12,%r12 52 cmpq $13,%r8 53 jne hash_ad_loop 54 poly_fast_tls_ad: 55 56 movq (%rcx),%r10 57 movq 5(%rcx),%r11 58 shrq $24,%r11 59 movq $1,%r12 60 movq 0+0(%rbp),%rax 61 movq %rax,%r15 62 mulq %r10 63 movq %rax,%r13 64 movq %rdx,%r14 65 movq 0+0(%rbp),%rax 66 mulq %r11 67 imulq %r12,%r15 68 addq %rax,%r14 69 adcq %rdx,%r15 70 movq 8+0(%rbp),%rax 71 movq %rax,%r9 72 mulq %r10 73 addq %rax,%r14 74 adcq $0,%rdx 75 movq %rdx,%r10 76 movq 8+0(%rbp),%rax 77 mulq %r11 78 addq %rax,%r15 79 adcq $0,%rdx 80 imulq %r12,%r9 81 addq %r10,%r15 82 adcq %rdx,%r9 83 movq %r13,%r10 84 movq %r14,%r11 85 movq %r15,%r12 86 andq $3,%r12 87 movq %r15,%r13 88 andq $-4,%r13 89 movq %r9,%r14 90 shrdq $2,%r9,%r15 91 shrq $2,%r9 92 addq %r13,%r10 93 adcq %r14,%r11 94 adcq $0,%r12 95 addq %r15,%r10 96 adcq %r9,%r11 97 adcq $0,%r12 98 99 .byte 0xf3,0xc3 100 hash_ad_loop: 101 102 cmpq $16,%r8 103 jb hash_ad_tail 104 addq 0(%rcx),%r10 105 adcq 8+0(%rcx),%r11 106 adcq $1,%r12 107 movq 0+0(%rbp),%rax 108 movq %rax,%r15 109 mulq %r10 110 movq %rax,%r13 111 movq %rdx,%r14 112 movq 0+0(%rbp),%rax 113 mulq %r11 114 imulq %r12,%r15 115 addq %rax,%r14 116 adcq %rdx,%r15 117 movq 8+0(%rbp),%rax 118 movq %rax,%r9 119 mulq %r10 120 addq %rax,%r14 121 adcq $0,%rdx 122 movq %rdx,%r10 123 movq 8+0(%rbp),%rax 124 mulq %r11 125 addq %rax,%r15 126 adcq $0,%rdx 127 imulq %r12,%r9 128 addq %r10,%r15 129 adcq %rdx,%r9 130 movq %r13,%r10 131 movq %r14,%r11 132 movq %r15,%r12 133 andq $3,%r12 134 movq %r15,%r13 135 andq $-4,%r13 136 movq %r9,%r14 137 shrdq $2,%r9,%r15 138 shrq $2,%r9 139 addq %r13,%r10 140 adcq %r14,%r11 141 adcq $0,%r12 142 addq %r15,%r10 143 adcq %r9,%r11 144 adcq $0,%r12 145 146 leaq 16(%rcx),%rcx 147 subq $16,%r8 148 jmp hash_ad_loop 149 hash_ad_tail: 150 cmpq $0,%r8 151 je 1f 152 153 xorq %r13,%r13 154 xorq %r14,%r14 155 xorq %r15,%r15 156 addq %r8,%rcx 157 hash_ad_tail_loop: 158 shldq $8,%r13,%r14 159 shlq $8,%r13 160 movzbq -1(%rcx),%r15 161 xorq %r15,%r13 162 decq %rcx 163 decq %r8 164 jne hash_ad_tail_loop 165 166 addq %r13,%r10 167 adcq %r14,%r11 168 adcq $1,%r12 169 movq 0+0(%rbp),%rax 170 movq %rax,%r15 171 mulq %r10 172 movq %rax,%r13 173 movq %rdx,%r14 174 movq 0+0(%rbp),%rax 175 mulq %r11 176 imulq %r12,%r15 177 addq %rax,%r14 178 adcq %rdx,%r15 179 movq 8+0(%rbp),%rax 180 movq %rax,%r9 181 mulq %r10 182 addq %rax,%r14 183 adcq $0,%rdx 184 movq %rdx,%r10 185 movq 8+0(%rbp),%rax 186 mulq %r11 187 addq %rax,%r15 188 adcq $0,%rdx 189 imulq %r12,%r9 190 addq %r10,%r15 191 adcq %rdx,%r9 192 movq %r13,%r10 193 movq %r14,%r11 194 movq %r15,%r12 195 andq $3,%r12 196 movq %r15,%r13 197 andq $-4,%r13 198 movq %r9,%r14 199 shrdq $2,%r9,%r15 200 shrq $2,%r9 201 addq %r13,%r10 202 adcq %r14,%r11 203 adcq $0,%r12 204 addq %r15,%r10 205 adcq %r9,%r11 206 adcq $0,%r12 207 208 209 1: 210 .byte 0xf3,0xc3 211 212 213 214 .globl _chacha20_poly1305_open 215 .private_extern _chacha20_poly1305_open 216 217 .p2align 6 218 _chacha20_poly1305_open: 219 220 pushq %rbp 221 222 pushq %rbx 223 224 pushq %r12 225 226 pushq %r13 227 228 pushq %r14 229 230 pushq %r15 231 232 233 234 pushq %r9 235 236 subq $288 + 32,%rsp 237 238 239 240 241 242 243 244 leaq 32(%rsp),%rbp 245 andq $-32,%rbp 246 movq %rdx,8+32(%rbp) 247 movq %r8,0+32(%rbp) 248 movq %rdx,%rbx 249 250 movl _OPENSSL_ia32cap_P+8(%rip),%eax 251 andl $288,%eax 252 xorl $288,%eax 253 jz chacha20_poly1305_open_avx2 254 255 1: 256 cmpq $128,%rbx 257 jbe open_sse_128 258 259 movdqa .chacha20_consts(%rip),%xmm0 260 movdqu 0(%r9),%xmm4 261 movdqu 16(%r9),%xmm8 262 movdqu 32(%r9),%xmm12 263 movdqa %xmm12,%xmm7 264 265 movdqa %xmm4,48(%rbp) 266 movdqa %xmm8,64(%rbp) 267 movdqa %xmm12,96(%rbp) 268 movq $10,%r10 269 1: 270 paddd %xmm4,%xmm0 271 pxor %xmm0,%xmm12 272 pshufb .rol16(%rip),%xmm12 273 paddd %xmm12,%xmm8 274 pxor %xmm8,%xmm4 275 movdqa %xmm4,%xmm3 276 pslld $12,%xmm3 277 psrld $20,%xmm4 278 pxor %xmm3,%xmm4 279 paddd %xmm4,%xmm0 280 pxor %xmm0,%xmm12 281 pshufb .rol8(%rip),%xmm12 282 paddd %xmm12,%xmm8 283 pxor %xmm8,%xmm4 284 movdqa %xmm4,%xmm3 285 pslld $7,%xmm3 286 psrld $25,%xmm4 287 pxor %xmm3,%xmm4 288 .byte 102,15,58,15,228,4 289 .byte 102,69,15,58,15,192,8 290 .byte 102,69,15,58,15,228,12 291 paddd %xmm4,%xmm0 292 pxor %xmm0,%xmm12 293 pshufb .rol16(%rip),%xmm12 294 paddd %xmm12,%xmm8 295 pxor %xmm8,%xmm4 296 movdqa %xmm4,%xmm3 297 pslld $12,%xmm3 298 psrld $20,%xmm4 299 pxor %xmm3,%xmm4 300 paddd %xmm4,%xmm0 301 pxor %xmm0,%xmm12 302 pshufb .rol8(%rip),%xmm12 303 paddd %xmm12,%xmm8 304 pxor %xmm8,%xmm4 305 movdqa %xmm4,%xmm3 306 pslld $7,%xmm3 307 psrld $25,%xmm4 308 pxor %xmm3,%xmm4 309 .byte 102,15,58,15,228,12 310 .byte 102,69,15,58,15,192,8 311 .byte 102,69,15,58,15,228,4 312 313 decq %r10 314 jne 1b 315 316 paddd .chacha20_consts(%rip),%xmm0 317 paddd 48(%rbp),%xmm4 318 319 pand .clamp(%rip),%xmm0 320 movdqa %xmm0,0(%rbp) 321 movdqa %xmm4,16(%rbp) 322 323 movq %r8,%r8 324 call poly_hash_ad_internal 325 open_sse_main_loop: 326 cmpq $256,%rbx 327 jb 2f 328 329 movdqa .chacha20_consts(%rip),%xmm0 330 movdqa 48(%rbp),%xmm4 331 movdqa 64(%rbp),%xmm8 332 movdqa %xmm0,%xmm1 333 movdqa %xmm4,%xmm5 334 movdqa %xmm8,%xmm9 335 movdqa %xmm0,%xmm2 336 movdqa %xmm4,%xmm6 337 movdqa %xmm8,%xmm10 338 movdqa %xmm0,%xmm3 339 movdqa %xmm4,%xmm7 340 movdqa %xmm8,%xmm11 341 movdqa 96(%rbp),%xmm15 342 paddd .sse_inc(%rip),%xmm15 343 movdqa %xmm15,%xmm14 344 paddd .sse_inc(%rip),%xmm14 345 movdqa %xmm14,%xmm13 346 paddd .sse_inc(%rip),%xmm13 347 movdqa %xmm13,%xmm12 348 paddd .sse_inc(%rip),%xmm12 349 movdqa %xmm12,96(%rbp) 350 movdqa %xmm13,112(%rbp) 351 movdqa %xmm14,128(%rbp) 352 movdqa %xmm15,144(%rbp) 353 354 355 356 movq $4,%rcx 357 movq %rsi,%r8 358 1: 359 movdqa %xmm8,80(%rbp) 360 movdqa .rol16(%rip),%xmm8 361 paddd %xmm7,%xmm3 362 paddd %xmm6,%xmm2 363 paddd %xmm5,%xmm1 364 paddd %xmm4,%xmm0 365 pxor %xmm3,%xmm15 366 pxor %xmm2,%xmm14 367 pxor %xmm1,%xmm13 368 pxor %xmm0,%xmm12 369 .byte 102,69,15,56,0,248 370 .byte 102,69,15,56,0,240 371 .byte 102,69,15,56,0,232 372 .byte 102,69,15,56,0,224 373 movdqa 80(%rbp),%xmm8 374 paddd %xmm15,%xmm11 375 paddd %xmm14,%xmm10 376 paddd %xmm13,%xmm9 377 paddd %xmm12,%xmm8 378 pxor %xmm11,%xmm7 379 addq 0(%r8),%r10 380 adcq 8+0(%r8),%r11 381 adcq $1,%r12 382 383 leaq 16(%r8),%r8 384 pxor %xmm10,%xmm6 385 pxor %xmm9,%xmm5 386 pxor %xmm8,%xmm4 387 movdqa %xmm8,80(%rbp) 388 movdqa %xmm7,%xmm8 389 psrld $20,%xmm8 390 pslld $32-20,%xmm7 391 pxor %xmm8,%xmm7 392 movdqa %xmm6,%xmm8 393 psrld $20,%xmm8 394 pslld $32-20,%xmm6 395 pxor %xmm8,%xmm6 396 movdqa %xmm5,%xmm8 397 psrld $20,%xmm8 398 pslld $32-20,%xmm5 399 pxor %xmm8,%xmm5 400 movdqa %xmm4,%xmm8 401 psrld $20,%xmm8 402 pslld $32-20,%xmm4 403 pxor %xmm8,%xmm4 404 movq 0+0(%rbp),%rax 405 movq %rax,%r15 406 mulq %r10 407 movq %rax,%r13 408 movq %rdx,%r14 409 movq 0+0(%rbp),%rax 410 mulq %r11 411 imulq %r12,%r15 412 addq %rax,%r14 413 adcq %rdx,%r15 414 movdqa .rol8(%rip),%xmm8 415 paddd %xmm7,%xmm3 416 paddd %xmm6,%xmm2 417 paddd %xmm5,%xmm1 418 paddd %xmm4,%xmm0 419 pxor %xmm3,%xmm15 420 pxor %xmm2,%xmm14 421 pxor %xmm1,%xmm13 422 pxor %xmm0,%xmm12 423 .byte 102,69,15,56,0,248 424 .byte 102,69,15,56,0,240 425 .byte 102,69,15,56,0,232 426 .byte 102,69,15,56,0,224 427 movdqa 80(%rbp),%xmm8 428 paddd %xmm15,%xmm11 429 paddd %xmm14,%xmm10 430 paddd %xmm13,%xmm9 431 paddd %xmm12,%xmm8 432 pxor %xmm11,%xmm7 433 pxor %xmm10,%xmm6 434 movq 8+0(%rbp),%rax 435 movq %rax,%r9 436 mulq %r10 437 addq %rax,%r14 438 adcq $0,%rdx 439 movq %rdx,%r10 440 movq 8+0(%rbp),%rax 441 mulq %r11 442 addq %rax,%r15 443 adcq $0,%rdx 444 pxor %xmm9,%xmm5 445 pxor %xmm8,%xmm4 446 movdqa %xmm8,80(%rbp) 447 movdqa %xmm7,%xmm8 448 psrld $25,%xmm8 449 pslld $32-25,%xmm7 450 pxor %xmm8,%xmm7 451 movdqa %xmm6,%xmm8 452 psrld $25,%xmm8 453 pslld $32-25,%xmm6 454 pxor %xmm8,%xmm6 455 movdqa %xmm5,%xmm8 456 psrld $25,%xmm8 457 pslld $32-25,%xmm5 458 pxor %xmm8,%xmm5 459 movdqa %xmm4,%xmm8 460 psrld $25,%xmm8 461 pslld $32-25,%xmm4 462 pxor %xmm8,%xmm4 463 movdqa 80(%rbp),%xmm8 464 imulq %r12,%r9 465 addq %r10,%r15 466 adcq %rdx,%r9 467 .byte 102,15,58,15,255,4 468 .byte 102,69,15,58,15,219,8 469 .byte 102,69,15,58,15,255,12 470 .byte 102,15,58,15,246,4 471 .byte 102,69,15,58,15,210,8 472 .byte 102,69,15,58,15,246,12 473 .byte 102,15,58,15,237,4 474 .byte 102,69,15,58,15,201,8 475 .byte 102,69,15,58,15,237,12 476 .byte 102,15,58,15,228,4 477 .byte 102,69,15,58,15,192,8 478 .byte 102,69,15,58,15,228,12 479 movdqa %xmm8,80(%rbp) 480 movdqa .rol16(%rip),%xmm8 481 paddd %xmm7,%xmm3 482 paddd %xmm6,%xmm2 483 paddd %xmm5,%xmm1 484 paddd %xmm4,%xmm0 485 pxor %xmm3,%xmm15 486 pxor %xmm2,%xmm14 487 movq %r13,%r10 488 movq %r14,%r11 489 movq %r15,%r12 490 andq $3,%r12 491 movq %r15,%r13 492 andq $-4,%r13 493 movq %r9,%r14 494 shrdq $2,%r9,%r15 495 shrq $2,%r9 496 addq %r13,%r10 497 adcq %r14,%r11 498 adcq $0,%r12 499 addq %r15,%r10 500 adcq %r9,%r11 501 adcq $0,%r12 502 pxor %xmm1,%xmm13 503 pxor %xmm0,%xmm12 504 .byte 102,69,15,56,0,248 505 .byte 102,69,15,56,0,240 506 .byte 102,69,15,56,0,232 507 .byte 102,69,15,56,0,224 508 movdqa 80(%rbp),%xmm8 509 paddd %xmm15,%xmm11 510 paddd %xmm14,%xmm10 511 paddd %xmm13,%xmm9 512 paddd %xmm12,%xmm8 513 pxor %xmm11,%xmm7 514 pxor %xmm10,%xmm6 515 pxor %xmm9,%xmm5 516 pxor %xmm8,%xmm4 517 movdqa %xmm8,80(%rbp) 518 movdqa %xmm7,%xmm8 519 psrld $20,%xmm8 520 pslld $32-20,%xmm7 521 pxor %xmm8,%xmm7 522 movdqa %xmm6,%xmm8 523 psrld $20,%xmm8 524 pslld $32-20,%xmm6 525 pxor %xmm8,%xmm6 526 movdqa %xmm5,%xmm8 527 psrld $20,%xmm8 528 pslld $32-20,%xmm5 529 pxor %xmm8,%xmm5 530 movdqa %xmm4,%xmm8 531 psrld $20,%xmm8 532 pslld $32-20,%xmm4 533 pxor %xmm8,%xmm4 534 movdqa .rol8(%rip),%xmm8 535 paddd %xmm7,%xmm3 536 paddd %xmm6,%xmm2 537 paddd %xmm5,%xmm1 538 paddd %xmm4,%xmm0 539 pxor %xmm3,%xmm15 540 pxor %xmm2,%xmm14 541 pxor %xmm1,%xmm13 542 pxor %xmm0,%xmm12 543 .byte 102,69,15,56,0,248 544 .byte 102,69,15,56,0,240 545 .byte 102,69,15,56,0,232 546 .byte 102,69,15,56,0,224 547 movdqa 80(%rbp),%xmm8 548 paddd %xmm15,%xmm11 549 paddd %xmm14,%xmm10 550 paddd %xmm13,%xmm9 551 paddd %xmm12,%xmm8 552 pxor %xmm11,%xmm7 553 pxor %xmm10,%xmm6 554 pxor %xmm9,%xmm5 555 pxor %xmm8,%xmm4 556 movdqa %xmm8,80(%rbp) 557 movdqa %xmm7,%xmm8 558 psrld $25,%xmm8 559 pslld $32-25,%xmm7 560 pxor %xmm8,%xmm7 561 movdqa %xmm6,%xmm8 562 psrld $25,%xmm8 563 pslld $32-25,%xmm6 564 pxor %xmm8,%xmm6 565 movdqa %xmm5,%xmm8 566 psrld $25,%xmm8 567 pslld $32-25,%xmm5 568 pxor %xmm8,%xmm5 569 movdqa %xmm4,%xmm8 570 psrld $25,%xmm8 571 pslld $32-25,%xmm4 572 pxor %xmm8,%xmm4 573 movdqa 80(%rbp),%xmm8 574 .byte 102,15,58,15,255,12 575 .byte 102,69,15,58,15,219,8 576 .byte 102,69,15,58,15,255,4 577 .byte 102,15,58,15,246,12 578 .byte 102,69,15,58,15,210,8 579 .byte 102,69,15,58,15,246,4 580 .byte 102,15,58,15,237,12 581 .byte 102,69,15,58,15,201,8 582 .byte 102,69,15,58,15,237,4 583 .byte 102,15,58,15,228,12 584 .byte 102,69,15,58,15,192,8 585 .byte 102,69,15,58,15,228,4 586 587 decq %rcx 588 jge 1b 589 addq 0(%r8),%r10 590 adcq 8+0(%r8),%r11 591 adcq $1,%r12 592 movq 0+0(%rbp),%rax 593 movq %rax,%r15 594 mulq %r10 595 movq %rax,%r13 596 movq %rdx,%r14 597 movq 0+0(%rbp),%rax 598 mulq %r11 599 imulq %r12,%r15 600 addq %rax,%r14 601 adcq %rdx,%r15 602 movq 8+0(%rbp),%rax 603 movq %rax,%r9 604 mulq %r10 605 addq %rax,%r14 606 adcq $0,%rdx 607 movq %rdx,%r10 608 movq 8+0(%rbp),%rax 609 mulq %r11 610 addq %rax,%r15 611 adcq $0,%rdx 612 imulq %r12,%r9 613 addq %r10,%r15 614 adcq %rdx,%r9 615 movq %r13,%r10 616 movq %r14,%r11 617 movq %r15,%r12 618 andq $3,%r12 619 movq %r15,%r13 620 andq $-4,%r13 621 movq %r9,%r14 622 shrdq $2,%r9,%r15 623 shrq $2,%r9 624 addq %r13,%r10 625 adcq %r14,%r11 626 adcq $0,%r12 627 addq %r15,%r10 628 adcq %r9,%r11 629 adcq $0,%r12 630 631 leaq 16(%r8),%r8 632 cmpq $-6,%rcx 633 jg 1b 634 paddd .chacha20_consts(%rip),%xmm3 635 paddd 48(%rbp),%xmm7 636 paddd 64(%rbp),%xmm11 637 paddd 144(%rbp),%xmm15 638 paddd .chacha20_consts(%rip),%xmm2 639 paddd 48(%rbp),%xmm6 640 paddd 64(%rbp),%xmm10 641 paddd 128(%rbp),%xmm14 642 paddd .chacha20_consts(%rip),%xmm1 643 paddd 48(%rbp),%xmm5 644 paddd 64(%rbp),%xmm9 645 paddd 112(%rbp),%xmm13 646 paddd .chacha20_consts(%rip),%xmm0 647 paddd 48(%rbp),%xmm4 648 paddd 64(%rbp),%xmm8 649 paddd 96(%rbp),%xmm12 650 movdqa %xmm12,80(%rbp) 651 movdqu 0 + 0(%rsi),%xmm12 652 pxor %xmm3,%xmm12 653 movdqu %xmm12,0 + 0(%rdi) 654 movdqu 16 + 0(%rsi),%xmm12 655 pxor %xmm7,%xmm12 656 movdqu %xmm12,16 + 0(%rdi) 657 movdqu 32 + 0(%rsi),%xmm12 658 pxor %xmm11,%xmm12 659 movdqu %xmm12,32 + 0(%rdi) 660 movdqu 48 + 0(%rsi),%xmm12 661 pxor %xmm15,%xmm12 662 movdqu %xmm12,48 + 0(%rdi) 663 movdqu 0 + 64(%rsi),%xmm3 664 movdqu 16 + 64(%rsi),%xmm7 665 movdqu 32 + 64(%rsi),%xmm11 666 movdqu 48 + 64(%rsi),%xmm15 667 pxor %xmm3,%xmm2 668 pxor %xmm7,%xmm6 669 pxor %xmm11,%xmm10 670 pxor %xmm14,%xmm15 671 movdqu %xmm2,0 + 64(%rdi) 672 movdqu %xmm6,16 + 64(%rdi) 673 movdqu %xmm10,32 + 64(%rdi) 674 movdqu %xmm15,48 + 64(%rdi) 675 movdqu 0 + 128(%rsi),%xmm3 676 movdqu 16 + 128(%rsi),%xmm7 677 movdqu 32 + 128(%rsi),%xmm11 678 movdqu 48 + 128(%rsi),%xmm15 679 pxor %xmm3,%xmm1 680 pxor %xmm7,%xmm5 681 pxor %xmm11,%xmm9 682 pxor %xmm13,%xmm15 683 movdqu %xmm1,0 + 128(%rdi) 684 movdqu %xmm5,16 + 128(%rdi) 685 movdqu %xmm9,32 + 128(%rdi) 686 movdqu %xmm15,48 + 128(%rdi) 687 movdqu 0 + 192(%rsi),%xmm3 688 movdqu 16 + 192(%rsi),%xmm7 689 movdqu 32 + 192(%rsi),%xmm11 690 movdqu 48 + 192(%rsi),%xmm15 691 pxor %xmm3,%xmm0 692 pxor %xmm7,%xmm4 693 pxor %xmm11,%xmm8 694 pxor 80(%rbp),%xmm15 695 movdqu %xmm0,0 + 192(%rdi) 696 movdqu %xmm4,16 + 192(%rdi) 697 movdqu %xmm8,32 + 192(%rdi) 698 movdqu %xmm15,48 + 192(%rdi) 699 700 leaq 256(%rsi),%rsi 701 leaq 256(%rdi),%rdi 702 subq $256,%rbx 703 jmp open_sse_main_loop 704 2: 705 706 testq %rbx,%rbx 707 jz open_sse_finalize 708 cmpq $64,%rbx 709 ja 3f 710 movdqa .chacha20_consts(%rip),%xmm0 711 movdqa 48(%rbp),%xmm4 712 movdqa 64(%rbp),%xmm8 713 movdqa 96(%rbp),%xmm12 714 paddd .sse_inc(%rip),%xmm12 715 movdqa %xmm12,96(%rbp) 716 717 xorq %r8,%r8 718 movq %rbx,%rcx 719 cmpq $16,%rcx 720 jb 2f 721 1: 722 addq 0(%rsi,%r8), %r10 723 adcq 8+0(%rsi,%r8), %r11 724 adcq $1,%r12 725 movq 0+0(%rbp),%rax 726 movq %rax,%r15 727 mulq %r10 728 movq %rax,%r13 729 movq %rdx,%r14 730 movq 0+0(%rbp),%rax 731 mulq %r11 732 imulq %r12,%r15 733 addq %rax,%r14 734 adcq %rdx,%r15 735 movq 8+0(%rbp),%rax 736 movq %rax,%r9 737 mulq %r10 738 addq %rax,%r14 739 adcq $0,%rdx 740 movq %rdx,%r10 741 movq 8+0(%rbp),%rax 742 mulq %r11 743 addq %rax,%r15 744 adcq $0,%rdx 745 imulq %r12,%r9 746 addq %r10,%r15 747 adcq %rdx,%r9 748 movq %r13,%r10 749 movq %r14,%r11 750 movq %r15,%r12 751 andq $3,%r12 752 movq %r15,%r13 753 andq $-4,%r13 754 movq %r9,%r14 755 shrdq $2,%r9,%r15 756 shrq $2,%r9 757 addq %r13,%r10 758 adcq %r14,%r11 759 adcq $0,%r12 760 addq %r15,%r10 761 adcq %r9,%r11 762 adcq $0,%r12 763 764 subq $16,%rcx 765 2: 766 addq $16,%r8 767 paddd %xmm4,%xmm0 768 pxor %xmm0,%xmm12 769 pshufb .rol16(%rip),%xmm12 770 paddd %xmm12,%xmm8 771 pxor %xmm8,%xmm4 772 movdqa %xmm4,%xmm3 773 pslld $12,%xmm3 774 psrld $20,%xmm4 775 pxor %xmm3,%xmm4 776 paddd %xmm4,%xmm0 777 pxor %xmm0,%xmm12 778 pshufb .rol8(%rip),%xmm12 779 paddd %xmm12,%xmm8 780 pxor %xmm8,%xmm4 781 movdqa %xmm4,%xmm3 782 pslld $7,%xmm3 783 psrld $25,%xmm4 784 pxor %xmm3,%xmm4 785 .byte 102,15,58,15,228,4 786 .byte 102,69,15,58,15,192,8 787 .byte 102,69,15,58,15,228,12 788 paddd %xmm4,%xmm0 789 pxor %xmm0,%xmm12 790 pshufb .rol16(%rip),%xmm12 791 paddd %xmm12,%xmm8 792 pxor %xmm8,%xmm4 793 movdqa %xmm4,%xmm3 794 pslld $12,%xmm3 795 psrld $20,%xmm4 796 pxor %xmm3,%xmm4 797 paddd %xmm4,%xmm0 798 pxor %xmm0,%xmm12 799 pshufb .rol8(%rip),%xmm12 800 paddd %xmm12,%xmm8 801 pxor %xmm8,%xmm4 802 movdqa %xmm4,%xmm3 803 pslld $7,%xmm3 804 psrld $25,%xmm4 805 pxor %xmm3,%xmm4 806 .byte 102,15,58,15,228,12 807 .byte 102,69,15,58,15,192,8 808 .byte 102,69,15,58,15,228,4 809 810 cmpq $16,%rcx 811 jae 1b 812 cmpq $160,%r8 813 jne 2b 814 paddd .chacha20_consts(%rip),%xmm0 815 paddd 48(%rbp),%xmm4 816 paddd 64(%rbp),%xmm8 817 paddd 96(%rbp),%xmm12 818 819 jmp open_sse_tail_64_dec_loop 820 3: 821 cmpq $128,%rbx 822 ja 3f 823 movdqa .chacha20_consts(%rip),%xmm0 824 movdqa 48(%rbp),%xmm4 825 movdqa 64(%rbp),%xmm8 826 movdqa %xmm0,%xmm1 827 movdqa %xmm4,%xmm5 828 movdqa %xmm8,%xmm9 829 movdqa 96(%rbp),%xmm13 830 paddd .sse_inc(%rip),%xmm13 831 movdqa %xmm13,%xmm12 832 paddd .sse_inc(%rip),%xmm12 833 movdqa %xmm12,96(%rbp) 834 movdqa %xmm13,112(%rbp) 835 836 movq %rbx,%rcx 837 andq $-16,%rcx 838 xorq %r8,%r8 839 1: 840 addq 0(%rsi,%r8), %r10 841 adcq 8+0(%rsi,%r8), %r11 842 adcq $1,%r12 843 movq 0+0(%rbp),%rax 844 movq %rax,%r15 845 mulq %r10 846 movq %rax,%r13 847 movq %rdx,%r14 848 movq 0+0(%rbp),%rax 849 mulq %r11 850 imulq %r12,%r15 851 addq %rax,%r14 852 adcq %rdx,%r15 853 movq 8+0(%rbp),%rax 854 movq %rax,%r9 855 mulq %r10 856 addq %rax,%r14 857 adcq $0,%rdx 858 movq %rdx,%r10 859 movq 8+0(%rbp),%rax 860 mulq %r11 861 addq %rax,%r15 862 adcq $0,%rdx 863 imulq %r12,%r9 864 addq %r10,%r15 865 adcq %rdx,%r9 866 movq %r13,%r10 867 movq %r14,%r11 868 movq %r15,%r12 869 andq $3,%r12 870 movq %r15,%r13 871 andq $-4,%r13 872 movq %r9,%r14 873 shrdq $2,%r9,%r15 874 shrq $2,%r9 875 addq %r13,%r10 876 adcq %r14,%r11 877 adcq $0,%r12 878 addq %r15,%r10 879 adcq %r9,%r11 880 adcq $0,%r12 881 882 2: 883 addq $16,%r8 884 paddd %xmm4,%xmm0 885 pxor %xmm0,%xmm12 886 pshufb .rol16(%rip),%xmm12 887 paddd %xmm12,%xmm8 888 pxor %xmm8,%xmm4 889 movdqa %xmm4,%xmm3 890 pslld $12,%xmm3 891 psrld $20,%xmm4 892 pxor %xmm3,%xmm4 893 paddd %xmm4,%xmm0 894 pxor %xmm0,%xmm12 895 pshufb .rol8(%rip),%xmm12 896 paddd %xmm12,%xmm8 897 pxor %xmm8,%xmm4 898 movdqa %xmm4,%xmm3 899 pslld $7,%xmm3 900 psrld $25,%xmm4 901 pxor %xmm3,%xmm4 902 .byte 102,15,58,15,228,4 903 .byte 102,69,15,58,15,192,8 904 .byte 102,69,15,58,15,228,12 905 paddd %xmm5,%xmm1 906 pxor %xmm1,%xmm13 907 pshufb .rol16(%rip),%xmm13 908 paddd %xmm13,%xmm9 909 pxor %xmm9,%xmm5 910 movdqa %xmm5,%xmm3 911 pslld $12,%xmm3 912 psrld $20,%xmm5 913 pxor %xmm3,%xmm5 914 paddd %xmm5,%xmm1 915 pxor %xmm1,%xmm13 916 pshufb .rol8(%rip),%xmm13 917 paddd %xmm13,%xmm9 918 pxor %xmm9,%xmm5 919 movdqa %xmm5,%xmm3 920 pslld $7,%xmm3 921 psrld $25,%xmm5 922 pxor %xmm3,%xmm5 923 .byte 102,15,58,15,237,4 924 .byte 102,69,15,58,15,201,8 925 .byte 102,69,15,58,15,237,12 926 paddd %xmm4,%xmm0 927 pxor %xmm0,%xmm12 928 pshufb .rol16(%rip),%xmm12 929 paddd %xmm12,%xmm8 930 pxor %xmm8,%xmm4 931 movdqa %xmm4,%xmm3 932 pslld $12,%xmm3 933 psrld $20,%xmm4 934 pxor %xmm3,%xmm4 935 paddd %xmm4,%xmm0 936 pxor %xmm0,%xmm12 937 pshufb .rol8(%rip),%xmm12 938 paddd %xmm12,%xmm8 939 pxor %xmm8,%xmm4 940 movdqa %xmm4,%xmm3 941 pslld $7,%xmm3 942 psrld $25,%xmm4 943 pxor %xmm3,%xmm4 944 .byte 102,15,58,15,228,12 945 .byte 102,69,15,58,15,192,8 946 .byte 102,69,15,58,15,228,4 947 paddd %xmm5,%xmm1 948 pxor %xmm1,%xmm13 949 pshufb .rol16(%rip),%xmm13 950 paddd %xmm13,%xmm9 951 pxor %xmm9,%xmm5 952 movdqa %xmm5,%xmm3 953 pslld $12,%xmm3 954 psrld $20,%xmm5 955 pxor %xmm3,%xmm5 956 paddd %xmm5,%xmm1 957 pxor %xmm1,%xmm13 958 pshufb .rol8(%rip),%xmm13 959 paddd %xmm13,%xmm9 960 pxor %xmm9,%xmm5 961 movdqa %xmm5,%xmm3 962 pslld $7,%xmm3 963 psrld $25,%xmm5 964 pxor %xmm3,%xmm5 965 .byte 102,15,58,15,237,12 966 .byte 102,69,15,58,15,201,8 967 .byte 102,69,15,58,15,237,4 968 969 cmpq %rcx,%r8 970 jb 1b 971 cmpq $160,%r8 972 jne 2b 973 paddd .chacha20_consts(%rip),%xmm1 974 paddd 48(%rbp),%xmm5 975 paddd 64(%rbp),%xmm9 976 paddd 112(%rbp),%xmm13 977 paddd .chacha20_consts(%rip),%xmm0 978 paddd 48(%rbp),%xmm4 979 paddd 64(%rbp),%xmm8 980 paddd 96(%rbp),%xmm12 981 movdqu 0 + 0(%rsi),%xmm3 982 movdqu 16 + 0(%rsi),%xmm7 983 movdqu 32 + 0(%rsi),%xmm11 984 movdqu 48 + 0(%rsi),%xmm15 985 pxor %xmm3,%xmm1 986 pxor %xmm7,%xmm5 987 pxor %xmm11,%xmm9 988 pxor %xmm13,%xmm15 989 movdqu %xmm1,0 + 0(%rdi) 990 movdqu %xmm5,16 + 0(%rdi) 991 movdqu %xmm9,32 + 0(%rdi) 992 movdqu %xmm15,48 + 0(%rdi) 993 994 subq $64,%rbx 995 leaq 64(%rsi),%rsi 996 leaq 64(%rdi),%rdi 997 jmp open_sse_tail_64_dec_loop 998 3: 999 cmpq $192,%rbx 1000 ja 3f 1001 movdqa .chacha20_consts(%rip),%xmm0 1002 movdqa 48(%rbp),%xmm4 1003 movdqa 64(%rbp),%xmm8 1004 movdqa %xmm0,%xmm1 1005 movdqa %xmm4,%xmm5 1006 movdqa %xmm8,%xmm9 1007 movdqa %xmm0,%xmm2 1008 movdqa %xmm4,%xmm6 1009 movdqa %xmm8,%xmm10 1010 movdqa 96(%rbp),%xmm14 1011 paddd .sse_inc(%rip),%xmm14 1012 movdqa %xmm14,%xmm13 1013 paddd .sse_inc(%rip),%xmm13 1014 movdqa %xmm13,%xmm12 1015 paddd .sse_inc(%rip),%xmm12 1016 movdqa %xmm12,96(%rbp) 1017 movdqa %xmm13,112(%rbp) 1018 movdqa %xmm14,128(%rbp) 1019 1020 movq %rbx,%rcx 1021 movq $160,%r8 1022 cmpq $160,%rcx 1023 cmovgq %r8,%rcx 1024 andq $-16,%rcx 1025 xorq %r8,%r8 1026 1: 1027 addq 0(%rsi,%r8), %r10 1028 adcq 8+0(%rsi,%r8), %r11 1029 adcq $1,%r12 1030 movq 0+0(%rbp),%rax 1031 movq %rax,%r15 1032 mulq %r10 1033 movq %rax,%r13 1034 movq %rdx,%r14 1035 movq 0+0(%rbp),%rax 1036 mulq %r11 1037 imulq %r12,%r15 1038 addq %rax,%r14 1039 adcq %rdx,%r15 1040 movq 8+0(%rbp),%rax 1041 movq %rax,%r9 1042 mulq %r10 1043 addq %rax,%r14 1044 adcq $0,%rdx 1045 movq %rdx,%r10 1046 movq 8+0(%rbp),%rax 1047 mulq %r11 1048 addq %rax,%r15 1049 adcq $0,%rdx 1050 imulq %r12,%r9 1051 addq %r10,%r15 1052 adcq %rdx,%r9 1053 movq %r13,%r10 1054 movq %r14,%r11 1055 movq %r15,%r12 1056 andq $3,%r12 1057 movq %r15,%r13 1058 andq $-4,%r13 1059 movq %r9,%r14 1060 shrdq $2,%r9,%r15 1061 shrq $2,%r9 1062 addq %r13,%r10 1063 adcq %r14,%r11 1064 adcq $0,%r12 1065 addq %r15,%r10 1066 adcq %r9,%r11 1067 adcq $0,%r12 1068 1069 2: 1070 addq $16,%r8 1071 paddd %xmm4,%xmm0 1072 pxor %xmm0,%xmm12 1073 pshufb .rol16(%rip),%xmm12 1074 paddd %xmm12,%xmm8 1075 pxor %xmm8,%xmm4 1076 movdqa %xmm4,%xmm3 1077 pslld $12,%xmm3 1078 psrld $20,%xmm4 1079 pxor %xmm3,%xmm4 1080 paddd %xmm4,%xmm0 1081 pxor %xmm0,%xmm12 1082 pshufb .rol8(%rip),%xmm12 1083 paddd %xmm12,%xmm8 1084 pxor %xmm8,%xmm4 1085 movdqa %xmm4,%xmm3 1086 pslld $7,%xmm3 1087 psrld $25,%xmm4 1088 pxor %xmm3,%xmm4 1089 .byte 102,15,58,15,228,4 1090 .byte 102,69,15,58,15,192,8 1091 .byte 102,69,15,58,15,228,12 1092 paddd %xmm5,%xmm1 1093 pxor %xmm1,%xmm13 1094 pshufb .rol16(%rip),%xmm13 1095 paddd %xmm13,%xmm9 1096 pxor %xmm9,%xmm5 1097 movdqa %xmm5,%xmm3 1098 pslld $12,%xmm3 1099 psrld $20,%xmm5 1100 pxor %xmm3,%xmm5 1101 paddd %xmm5,%xmm1 1102 pxor %xmm1,%xmm13 1103 pshufb .rol8(%rip),%xmm13 1104 paddd %xmm13,%xmm9 1105 pxor %xmm9,%xmm5 1106 movdqa %xmm5,%xmm3 1107 pslld $7,%xmm3 1108 psrld $25,%xmm5 1109 pxor %xmm3,%xmm5 1110 .byte 102,15,58,15,237,4 1111 .byte 102,69,15,58,15,201,8 1112 .byte 102,69,15,58,15,237,12 1113 paddd %xmm6,%xmm2 1114 pxor %xmm2,%xmm14 1115 pshufb .rol16(%rip),%xmm14 1116 paddd %xmm14,%xmm10 1117 pxor %xmm10,%xmm6 1118 movdqa %xmm6,%xmm3 1119 pslld $12,%xmm3 1120 psrld $20,%xmm6 1121 pxor %xmm3,%xmm6 1122 paddd %xmm6,%xmm2 1123 pxor %xmm2,%xmm14 1124 pshufb .rol8(%rip),%xmm14 1125 paddd %xmm14,%xmm10 1126 pxor %xmm10,%xmm6 1127 movdqa %xmm6,%xmm3 1128 pslld $7,%xmm3 1129 psrld $25,%xmm6 1130 pxor %xmm3,%xmm6 1131 .byte 102,15,58,15,246,4 1132 .byte 102,69,15,58,15,210,8 1133 .byte 102,69,15,58,15,246,12 1134 paddd %xmm4,%xmm0 1135 pxor %xmm0,%xmm12 1136 pshufb .rol16(%rip),%xmm12 1137 paddd %xmm12,%xmm8 1138 pxor %xmm8,%xmm4 1139 movdqa %xmm4,%xmm3 1140 pslld $12,%xmm3 1141 psrld $20,%xmm4 1142 pxor %xmm3,%xmm4 1143 paddd %xmm4,%xmm0 1144 pxor %xmm0,%xmm12 1145 pshufb .rol8(%rip),%xmm12 1146 paddd %xmm12,%xmm8 1147 pxor %xmm8,%xmm4 1148 movdqa %xmm4,%xmm3 1149 pslld $7,%xmm3 1150 psrld $25,%xmm4 1151 pxor %xmm3,%xmm4 1152 .byte 102,15,58,15,228,12 1153 .byte 102,69,15,58,15,192,8 1154 .byte 102,69,15,58,15,228,4 1155 paddd %xmm5,%xmm1 1156 pxor %xmm1,%xmm13 1157 pshufb .rol16(%rip),%xmm13 1158 paddd %xmm13,%xmm9 1159 pxor %xmm9,%xmm5 1160 movdqa %xmm5,%xmm3 1161 pslld $12,%xmm3 1162 psrld $20,%xmm5 1163 pxor %xmm3,%xmm5 1164 paddd %xmm5,%xmm1 1165 pxor %xmm1,%xmm13 1166 pshufb .rol8(%rip),%xmm13 1167 paddd %xmm13,%xmm9 1168 pxor %xmm9,%xmm5 1169 movdqa %xmm5,%xmm3 1170 pslld $7,%xmm3 1171 psrld $25,%xmm5 1172 pxor %xmm3,%xmm5 1173 .byte 102,15,58,15,237,12 1174 .byte 102,69,15,58,15,201,8 1175 .byte 102,69,15,58,15,237,4 1176 paddd %xmm6,%xmm2 1177 pxor %xmm2,%xmm14 1178 pshufb .rol16(%rip),%xmm14 1179 paddd %xmm14,%xmm10 1180 pxor %xmm10,%xmm6 1181 movdqa %xmm6,%xmm3 1182 pslld $12,%xmm3 1183 psrld $20,%xmm6 1184 pxor %xmm3,%xmm6 1185 paddd %xmm6,%xmm2 1186 pxor %xmm2,%xmm14 1187 pshufb .rol8(%rip),%xmm14 1188 paddd %xmm14,%xmm10 1189 pxor %xmm10,%xmm6 1190 movdqa %xmm6,%xmm3 1191 pslld $7,%xmm3 1192 psrld $25,%xmm6 1193 pxor %xmm3,%xmm6 1194 .byte 102,15,58,15,246,12 1195 .byte 102,69,15,58,15,210,8 1196 .byte 102,69,15,58,15,246,4 1197 1198 cmpq %rcx,%r8 1199 jb 1b 1200 cmpq $160,%r8 1201 jne 2b 1202 cmpq $176,%rbx 1203 jb 1f 1204 addq 160(%rsi),%r10 1205 adcq 8+160(%rsi),%r11 1206 adcq $1,%r12 1207 movq 0+0(%rbp),%rax 1208 movq %rax,%r15 1209 mulq %r10 1210 movq %rax,%r13 1211 movq %rdx,%r14 1212 movq 0+0(%rbp),%rax 1213 mulq %r11 1214 imulq %r12,%r15 1215 addq %rax,%r14 1216 adcq %rdx,%r15 1217 movq 8+0(%rbp),%rax 1218 movq %rax,%r9 1219 mulq %r10 1220 addq %rax,%r14 1221 adcq $0,%rdx 1222 movq %rdx,%r10 1223 movq 8+0(%rbp),%rax 1224 mulq %r11 1225 addq %rax,%r15 1226 adcq $0,%rdx 1227 imulq %r12,%r9 1228 addq %r10,%r15 1229 adcq %rdx,%r9 1230 movq %r13,%r10 1231 movq %r14,%r11 1232 movq %r15,%r12 1233 andq $3,%r12 1234 movq %r15,%r13 1235 andq $-4,%r13 1236 movq %r9,%r14 1237 shrdq $2,%r9,%r15 1238 shrq $2,%r9 1239 addq %r13,%r10 1240 adcq %r14,%r11 1241 adcq $0,%r12 1242 addq %r15,%r10 1243 adcq %r9,%r11 1244 adcq $0,%r12 1245 1246 cmpq $192,%rbx 1247 jb 1f 1248 addq 176(%rsi),%r10 1249 adcq 8+176(%rsi),%r11 1250 adcq $1,%r12 1251 movq 0+0(%rbp),%rax 1252 movq %rax,%r15 1253 mulq %r10 1254 movq %rax,%r13 1255 movq %rdx,%r14 1256 movq 0+0(%rbp),%rax 1257 mulq %r11 1258 imulq %r12,%r15 1259 addq %rax,%r14 1260 adcq %rdx,%r15 1261 movq 8+0(%rbp),%rax 1262 movq %rax,%r9 1263 mulq %r10 1264 addq %rax,%r14 1265 adcq $0,%rdx 1266 movq %rdx,%r10 1267 movq 8+0(%rbp),%rax 1268 mulq %r11 1269 addq %rax,%r15 1270 adcq $0,%rdx 1271 imulq %r12,%r9 1272 addq %r10,%r15 1273 adcq %rdx,%r9 1274 movq %r13,%r10 1275 movq %r14,%r11 1276 movq %r15,%r12 1277 andq $3,%r12 1278 movq %r15,%r13 1279 andq $-4,%r13 1280 movq %r9,%r14 1281 shrdq $2,%r9,%r15 1282 shrq $2,%r9 1283 addq %r13,%r10 1284 adcq %r14,%r11 1285 adcq $0,%r12 1286 addq %r15,%r10 1287 adcq %r9,%r11 1288 adcq $0,%r12 1289 1290 1: 1291 paddd .chacha20_consts(%rip),%xmm2 1292 paddd 48(%rbp),%xmm6 1293 paddd 64(%rbp),%xmm10 1294 paddd 128(%rbp),%xmm14 1295 paddd .chacha20_consts(%rip),%xmm1 1296 paddd 48(%rbp),%xmm5 1297 paddd 64(%rbp),%xmm9 1298 paddd 112(%rbp),%xmm13 1299 paddd .chacha20_consts(%rip),%xmm0 1300 paddd 48(%rbp),%xmm4 1301 paddd 64(%rbp),%xmm8 1302 paddd 96(%rbp),%xmm12 1303 movdqu 0 + 0(%rsi),%xmm3 1304 movdqu 16 + 0(%rsi),%xmm7 1305 movdqu 32 + 0(%rsi),%xmm11 1306 movdqu 48 + 0(%rsi),%xmm15 1307 pxor %xmm3,%xmm2 1308 pxor %xmm7,%xmm6 1309 pxor %xmm11,%xmm10 1310 pxor %xmm14,%xmm15 1311 movdqu %xmm2,0 + 0(%rdi) 1312 movdqu %xmm6,16 + 0(%rdi) 1313 movdqu %xmm10,32 + 0(%rdi) 1314 movdqu %xmm15,48 + 0(%rdi) 1315 movdqu 0 + 64(%rsi),%xmm3 1316 movdqu 16 + 64(%rsi),%xmm7 1317 movdqu 32 + 64(%rsi),%xmm11 1318 movdqu 48 + 64(%rsi),%xmm15 1319 pxor %xmm3,%xmm1 1320 pxor %xmm7,%xmm5 1321 pxor %xmm11,%xmm9 1322 pxor %xmm13,%xmm15 1323 movdqu %xmm1,0 + 64(%rdi) 1324 movdqu %xmm5,16 + 64(%rdi) 1325 movdqu %xmm9,32 + 64(%rdi) 1326 movdqu %xmm15,48 + 64(%rdi) 1327 1328 subq $128,%rbx 1329 leaq 128(%rsi),%rsi 1330 leaq 128(%rdi),%rdi 1331 jmp open_sse_tail_64_dec_loop 1332 3: 1333 1334 movdqa .chacha20_consts(%rip),%xmm0 1335 movdqa 48(%rbp),%xmm4 1336 movdqa 64(%rbp),%xmm8 1337 movdqa %xmm0,%xmm1 1338 movdqa %xmm4,%xmm5 1339 movdqa %xmm8,%xmm9 1340 movdqa %xmm0,%xmm2 1341 movdqa %xmm4,%xmm6 1342 movdqa %xmm8,%xmm10 1343 movdqa %xmm0,%xmm3 1344 movdqa %xmm4,%xmm7 1345 movdqa %xmm8,%xmm11 1346 movdqa 96(%rbp),%xmm15 1347 paddd .sse_inc(%rip),%xmm15 1348 movdqa %xmm15,%xmm14 1349 paddd .sse_inc(%rip),%xmm14 1350 movdqa %xmm14,%xmm13 1351 paddd .sse_inc(%rip),%xmm13 1352 movdqa %xmm13,%xmm12 1353 paddd .sse_inc(%rip),%xmm12 1354 movdqa %xmm12,96(%rbp) 1355 movdqa %xmm13,112(%rbp) 1356 movdqa %xmm14,128(%rbp) 1357 movdqa %xmm15,144(%rbp) 1358 1359 xorq %r8,%r8 1360 1: 1361 addq 0(%rsi,%r8), %r10 1362 adcq 8+0(%rsi,%r8), %r11 1363 adcq $1,%r12 1364 movdqa %xmm11,80(%rbp) 1365 paddd %xmm4,%xmm0 1366 pxor %xmm0,%xmm12 1367 pshufb .rol16(%rip),%xmm12 1368 paddd %xmm12,%xmm8 1369 pxor %xmm8,%xmm4 1370 movdqa %xmm4,%xmm11 1371 pslld $12,%xmm11 1372 psrld $20,%xmm4 1373 pxor %xmm11,%xmm4 1374 paddd %xmm4,%xmm0 1375 pxor %xmm0,%xmm12 1376 pshufb .rol8(%rip),%xmm12 1377 paddd %xmm12,%xmm8 1378 pxor %xmm8,%xmm4 1379 movdqa %xmm4,%xmm11 1380 pslld $7,%xmm11 1381 psrld $25,%xmm4 1382 pxor %xmm11,%xmm4 1383 .byte 102,15,58,15,228,4 1384 .byte 102,69,15,58,15,192,8 1385 .byte 102,69,15,58,15,228,12 1386 paddd %xmm5,%xmm1 1387 pxor %xmm1,%xmm13 1388 pshufb .rol16(%rip),%xmm13 1389 paddd %xmm13,%xmm9 1390 pxor %xmm9,%xmm5 1391 movdqa %xmm5,%xmm11 1392 pslld $12,%xmm11 1393 psrld $20,%xmm5 1394 pxor %xmm11,%xmm5 1395 paddd %xmm5,%xmm1 1396 pxor %xmm1,%xmm13 1397 pshufb .rol8(%rip),%xmm13 1398 paddd %xmm13,%xmm9 1399 pxor %xmm9,%xmm5 1400 movdqa %xmm5,%xmm11 1401 pslld $7,%xmm11 1402 psrld $25,%xmm5 1403 pxor %xmm11,%xmm5 1404 .byte 102,15,58,15,237,4 1405 .byte 102,69,15,58,15,201,8 1406 .byte 102,69,15,58,15,237,12 1407 paddd %xmm6,%xmm2 1408 pxor %xmm2,%xmm14 1409 pshufb .rol16(%rip),%xmm14 1410 paddd %xmm14,%xmm10 1411 pxor %xmm10,%xmm6 1412 movdqa %xmm6,%xmm11 1413 pslld $12,%xmm11 1414 psrld $20,%xmm6 1415 pxor %xmm11,%xmm6 1416 paddd %xmm6,%xmm2 1417 pxor %xmm2,%xmm14 1418 pshufb .rol8(%rip),%xmm14 1419 paddd %xmm14,%xmm10 1420 pxor %xmm10,%xmm6 1421 movdqa %xmm6,%xmm11 1422 pslld $7,%xmm11 1423 psrld $25,%xmm6 1424 pxor %xmm11,%xmm6 1425 .byte 102,15,58,15,246,4 1426 .byte 102,69,15,58,15,210,8 1427 .byte 102,69,15,58,15,246,12 1428 movdqa 80(%rbp),%xmm11 1429 movq 0+0(%rbp),%rax 1430 movq %rax,%r15 1431 mulq %r10 1432 movq %rax,%r13 1433 movq %rdx,%r14 1434 movq 0+0(%rbp),%rax 1435 mulq %r11 1436 imulq %r12,%r15 1437 addq %rax,%r14 1438 adcq %rdx,%r15 1439 movdqa %xmm9,80(%rbp) 1440 paddd %xmm7,%xmm3 1441 pxor %xmm3,%xmm15 1442 pshufb .rol16(%rip),%xmm15 1443 paddd %xmm15,%xmm11 1444 pxor %xmm11,%xmm7 1445 movdqa %xmm7,%xmm9 1446 pslld $12,%xmm9 1447 psrld $20,%xmm7 1448 pxor %xmm9,%xmm7 1449 paddd %xmm7,%xmm3 1450 pxor %xmm3,%xmm15 1451 pshufb .rol8(%rip),%xmm15 1452 paddd %xmm15,%xmm11 1453 pxor %xmm11,%xmm7 1454 movdqa %xmm7,%xmm9 1455 pslld $7,%xmm9 1456 psrld $25,%xmm7 1457 pxor %xmm9,%xmm7 1458 .byte 102,15,58,15,255,4 1459 .byte 102,69,15,58,15,219,8 1460 .byte 102,69,15,58,15,255,12 1461 movdqa 80(%rbp),%xmm9 1462 movq 8+0(%rbp),%rax 1463 movq %rax,%r9 1464 mulq %r10 1465 addq %rax,%r14 1466 adcq $0,%rdx 1467 movq %rdx,%r10 1468 movq 8+0(%rbp),%rax 1469 mulq %r11 1470 addq %rax,%r15 1471 adcq $0,%rdx 1472 movdqa %xmm11,80(%rbp) 1473 paddd %xmm4,%xmm0 1474 pxor %xmm0,%xmm12 1475 pshufb .rol16(%rip),%xmm12 1476 paddd %xmm12,%xmm8 1477 pxor %xmm8,%xmm4 1478 movdqa %xmm4,%xmm11 1479 pslld $12,%xmm11 1480 psrld $20,%xmm4 1481 pxor %xmm11,%xmm4 1482 paddd %xmm4,%xmm0 1483 pxor %xmm0,%xmm12 1484 pshufb .rol8(%rip),%xmm12 1485 paddd %xmm12,%xmm8 1486 pxor %xmm8,%xmm4 1487 movdqa %xmm4,%xmm11 1488 pslld $7,%xmm11 1489 psrld $25,%xmm4 1490 pxor %xmm11,%xmm4 1491 .byte 102,15,58,15,228,12 1492 .byte 102,69,15,58,15,192,8 1493 .byte 102,69,15,58,15,228,4 1494 paddd %xmm5,%xmm1 1495 pxor %xmm1,%xmm13 1496 pshufb .rol16(%rip),%xmm13 1497 paddd %xmm13,%xmm9 1498 pxor %xmm9,%xmm5 1499 movdqa %xmm5,%xmm11 1500 pslld $12,%xmm11 1501 psrld $20,%xmm5 1502 pxor %xmm11,%xmm5 1503 paddd %xmm5,%xmm1 1504 pxor %xmm1,%xmm13 1505 pshufb .rol8(%rip),%xmm13 1506 paddd %xmm13,%xmm9 1507 pxor %xmm9,%xmm5 1508 movdqa %xmm5,%xmm11 1509 pslld $7,%xmm11 1510 psrld $25,%xmm5 1511 pxor %xmm11,%xmm5 1512 .byte 102,15,58,15,237,12 1513 .byte 102,69,15,58,15,201,8 1514 .byte 102,69,15,58,15,237,4 1515 imulq %r12,%r9 1516 addq %r10,%r15 1517 adcq %rdx,%r9 1518 paddd %xmm6,%xmm2 1519 pxor %xmm2,%xmm14 1520 pshufb .rol16(%rip),%xmm14 1521 paddd %xmm14,%xmm10 1522 pxor %xmm10,%xmm6 1523 movdqa %xmm6,%xmm11 1524 pslld $12,%xmm11 1525 psrld $20,%xmm6 1526 pxor %xmm11,%xmm6 1527 paddd %xmm6,%xmm2 1528 pxor %xmm2,%xmm14 1529 pshufb .rol8(%rip),%xmm14 1530 paddd %xmm14,%xmm10 1531 pxor %xmm10,%xmm6 1532 movdqa %xmm6,%xmm11 1533 pslld $7,%xmm11 1534 psrld $25,%xmm6 1535 pxor %xmm11,%xmm6 1536 .byte 102,15,58,15,246,12 1537 .byte 102,69,15,58,15,210,8 1538 .byte 102,69,15,58,15,246,4 1539 movdqa 80(%rbp),%xmm11 1540 movq %r13,%r10 1541 movq %r14,%r11 1542 movq %r15,%r12 1543 andq $3,%r12 1544 movq %r15,%r13 1545 andq $-4,%r13 1546 movq %r9,%r14 1547 shrdq $2,%r9,%r15 1548 shrq $2,%r9 1549 addq %r13,%r10 1550 adcq %r14,%r11 1551 adcq $0,%r12 1552 addq %r15,%r10 1553 adcq %r9,%r11 1554 adcq $0,%r12 1555 movdqa %xmm9,80(%rbp) 1556 paddd %xmm7,%xmm3 1557 pxor %xmm3,%xmm15 1558 pshufb .rol16(%rip),%xmm15 1559 paddd %xmm15,%xmm11 1560 pxor %xmm11,%xmm7 1561 movdqa %xmm7,%xmm9 1562 pslld $12,%xmm9 1563 psrld $20,%xmm7 1564 pxor %xmm9,%xmm7 1565 paddd %xmm7,%xmm3 1566 pxor %xmm3,%xmm15 1567 pshufb .rol8(%rip),%xmm15 1568 paddd %xmm15,%xmm11 1569 pxor %xmm11,%xmm7 1570 movdqa %xmm7,%xmm9 1571 pslld $7,%xmm9 1572 psrld $25,%xmm7 1573 pxor %xmm9,%xmm7 1574 .byte 102,15,58,15,255,12 1575 .byte 102,69,15,58,15,219,8 1576 .byte 102,69,15,58,15,255,4 1577 movdqa 80(%rbp),%xmm9 1578 1579 addq $16,%r8 1580 cmpq $160,%r8 1581 jb 1b 1582 movq %rbx,%rcx 1583 andq $-16,%rcx 1584 1: 1585 addq 0(%rsi,%r8), %r10 1586 adcq 8+0(%rsi,%r8), %r11 1587 adcq $1,%r12 1588 movq 0+0(%rbp),%rax 1589 movq %rax,%r15 1590 mulq %r10 1591 movq %rax,%r13 1592 movq %rdx,%r14 1593 movq 0+0(%rbp),%rax 1594 mulq %r11 1595 imulq %r12,%r15 1596 addq %rax,%r14 1597 adcq %rdx,%r15 1598 movq 8+0(%rbp),%rax 1599 movq %rax,%r9 1600 mulq %r10 1601 addq %rax,%r14 1602 adcq $0,%rdx 1603 movq %rdx,%r10 1604 movq 8+0(%rbp),%rax 1605 mulq %r11 1606 addq %rax,%r15 1607 adcq $0,%rdx 1608 imulq %r12,%r9 1609 addq %r10,%r15 1610 adcq %rdx,%r9 1611 movq %r13,%r10 1612 movq %r14,%r11 1613 movq %r15,%r12 1614 andq $3,%r12 1615 movq %r15,%r13 1616 andq $-4,%r13 1617 movq %r9,%r14 1618 shrdq $2,%r9,%r15 1619 shrq $2,%r9 1620 addq %r13,%r10 1621 adcq %r14,%r11 1622 adcq $0,%r12 1623 addq %r15,%r10 1624 adcq %r9,%r11 1625 adcq $0,%r12 1626 1627 addq $16,%r8 1628 cmpq %rcx,%r8 1629 jb 1b 1630 paddd .chacha20_consts(%rip),%xmm3 1631 paddd 48(%rbp),%xmm7 1632 paddd 64(%rbp),%xmm11 1633 paddd 144(%rbp),%xmm15 1634 paddd .chacha20_consts(%rip),%xmm2 1635 paddd 48(%rbp),%xmm6 1636 paddd 64(%rbp),%xmm10 1637 paddd 128(%rbp),%xmm14 1638 paddd .chacha20_consts(%rip),%xmm1 1639 paddd 48(%rbp),%xmm5 1640 paddd 64(%rbp),%xmm9 1641 paddd 112(%rbp),%xmm13 1642 paddd .chacha20_consts(%rip),%xmm0 1643 paddd 48(%rbp),%xmm4 1644 paddd 64(%rbp),%xmm8 1645 paddd 96(%rbp),%xmm12 1646 movdqa %xmm12,80(%rbp) 1647 movdqu 0 + 0(%rsi),%xmm12 1648 pxor %xmm3,%xmm12 1649 movdqu %xmm12,0 + 0(%rdi) 1650 movdqu 16 + 0(%rsi),%xmm12 1651 pxor %xmm7,%xmm12 1652 movdqu %xmm12,16 + 0(%rdi) 1653 movdqu 32 + 0(%rsi),%xmm12 1654 pxor %xmm11,%xmm12 1655 movdqu %xmm12,32 + 0(%rdi) 1656 movdqu 48 + 0(%rsi),%xmm12 1657 pxor %xmm15,%xmm12 1658 movdqu %xmm12,48 + 0(%rdi) 1659 movdqu 0 + 64(%rsi),%xmm3 1660 movdqu 16 + 64(%rsi),%xmm7 1661 movdqu 32 + 64(%rsi),%xmm11 1662 movdqu 48 + 64(%rsi),%xmm15 1663 pxor %xmm3,%xmm2 1664 pxor %xmm7,%xmm6 1665 pxor %xmm11,%xmm10 1666 pxor %xmm14,%xmm15 1667 movdqu %xmm2,0 + 64(%rdi) 1668 movdqu %xmm6,16 + 64(%rdi) 1669 movdqu %xmm10,32 + 64(%rdi) 1670 movdqu %xmm15,48 + 64(%rdi) 1671 movdqu 0 + 128(%rsi),%xmm3 1672 movdqu 16 + 128(%rsi),%xmm7 1673 movdqu 32 + 128(%rsi),%xmm11 1674 movdqu 48 + 128(%rsi),%xmm15 1675 pxor %xmm3,%xmm1 1676 pxor %xmm7,%xmm5 1677 pxor %xmm11,%xmm9 1678 pxor %xmm13,%xmm15 1679 movdqu %xmm1,0 + 128(%rdi) 1680 movdqu %xmm5,16 + 128(%rdi) 1681 movdqu %xmm9,32 + 128(%rdi) 1682 movdqu %xmm15,48 + 128(%rdi) 1683 1684 movdqa 80(%rbp),%xmm12 1685 subq $192,%rbx 1686 leaq 192(%rsi),%rsi 1687 leaq 192(%rdi),%rdi 1688 1689 1690 open_sse_tail_64_dec_loop: 1691 cmpq $16,%rbx 1692 jb 1f 1693 subq $16,%rbx 1694 movdqu (%rsi),%xmm3 1695 pxor %xmm3,%xmm0 1696 movdqu %xmm0,(%rdi) 1697 leaq 16(%rsi),%rsi 1698 leaq 16(%rdi),%rdi 1699 movdqa %xmm4,%xmm0 1700 movdqa %xmm8,%xmm4 1701 movdqa %xmm12,%xmm8 1702 jmp open_sse_tail_64_dec_loop 1703 1: 1704 movdqa %xmm0,%xmm1 1705 1706 1707 open_sse_tail_16: 1708 testq %rbx,%rbx 1709 jz open_sse_finalize 1710 1711 1712 1713 pxor %xmm3,%xmm3 1714 leaq -1(%rsi,%rbx), %rsi 1715 movq %rbx,%r8 1716 2: 1717 pslldq $1,%xmm3 1718 pinsrb $0,(%rsi),%xmm3 1719 subq $1,%rsi 1720 subq $1,%r8 1721 jnz 2b 1722 1723 3: 1724 .byte 102,73,15,126,221 1725 pextrq $1,%xmm3,%r14 1726 1727 pxor %xmm1,%xmm3 1728 1729 1730 2: 1731 pextrb $0,%xmm3,(%rdi) 1732 psrldq $1,%xmm3 1733 addq $1,%rdi 1734 subq $1,%rbx 1735 jne 2b 1736 1737 addq %r13,%r10 1738 adcq %r14,%r11 1739 adcq $1,%r12 1740 movq 0+0(%rbp),%rax 1741 movq %rax,%r15 1742 mulq %r10 1743 movq %rax,%r13 1744 movq %rdx,%r14 1745 movq 0+0(%rbp),%rax 1746 mulq %r11 1747 imulq %r12,%r15 1748 addq %rax,%r14 1749 adcq %rdx,%r15 1750 movq 8+0(%rbp),%rax 1751 movq %rax,%r9 1752 mulq %r10 1753 addq %rax,%r14 1754 adcq $0,%rdx 1755 movq %rdx,%r10 1756 movq 8+0(%rbp),%rax 1757 mulq %r11 1758 addq %rax,%r15 1759 adcq $0,%rdx 1760 imulq %r12,%r9 1761 addq %r10,%r15 1762 adcq %rdx,%r9 1763 movq %r13,%r10 1764 movq %r14,%r11 1765 movq %r15,%r12 1766 andq $3,%r12 1767 movq %r15,%r13 1768 andq $-4,%r13 1769 movq %r9,%r14 1770 shrdq $2,%r9,%r15 1771 shrq $2,%r9 1772 addq %r13,%r10 1773 adcq %r14,%r11 1774 adcq $0,%r12 1775 addq %r15,%r10 1776 adcq %r9,%r11 1777 adcq $0,%r12 1778 1779 1780 open_sse_finalize: 1781 addq 32(%rbp),%r10 1782 adcq 8+32(%rbp),%r11 1783 adcq $1,%r12 1784 movq 0+0(%rbp),%rax 1785 movq %rax,%r15 1786 mulq %r10 1787 movq %rax,%r13 1788 movq %rdx,%r14 1789 movq 0+0(%rbp),%rax 1790 mulq %r11 1791 imulq %r12,%r15 1792 addq %rax,%r14 1793 adcq %rdx,%r15 1794 movq 8+0(%rbp),%rax 1795 movq %rax,%r9 1796 mulq %r10 1797 addq %rax,%r14 1798 adcq $0,%rdx 1799 movq %rdx,%r10 1800 movq 8+0(%rbp),%rax 1801 mulq %r11 1802 addq %rax,%r15 1803 adcq $0,%rdx 1804 imulq %r12,%r9 1805 addq %r10,%r15 1806 adcq %rdx,%r9 1807 movq %r13,%r10 1808 movq %r14,%r11 1809 movq %r15,%r12 1810 andq $3,%r12 1811 movq %r15,%r13 1812 andq $-4,%r13 1813 movq %r9,%r14 1814 shrdq $2,%r9,%r15 1815 shrq $2,%r9 1816 addq %r13,%r10 1817 adcq %r14,%r11 1818 adcq $0,%r12 1819 addq %r15,%r10 1820 adcq %r9,%r11 1821 adcq $0,%r12 1822 1823 1824 movq %r10,%r13 1825 movq %r11,%r14 1826 movq %r12,%r15 1827 subq $-5,%r10 1828 sbbq $-1,%r11 1829 sbbq $3,%r12 1830 cmovcq %r13,%r10 1831 cmovcq %r14,%r11 1832 cmovcq %r15,%r12 1833 1834 addq 0+16(%rbp),%r10 1835 adcq 8+16(%rbp),%r11 1836 1837 addq $288 + 32,%rsp 1838 1839 popq %r9 1840 1841 movq %r10,(%r9) 1842 movq %r11,8(%r9) 1843 1844 popq %r15 1845 1846 popq %r14 1847 1848 popq %r13 1849 1850 popq %r12 1851 1852 popq %rbx 1853 1854 popq %rbp 1855 1856 .byte 0xf3,0xc3 1857 1858 1859 open_sse_128: 1860 movdqu .chacha20_consts(%rip),%xmm0 1861 movdqa %xmm0,%xmm1 1862 movdqa %xmm0,%xmm2 1863 movdqu 0(%r9),%xmm4 1864 movdqa %xmm4,%xmm5 1865 movdqa %xmm4,%xmm6 1866 movdqu 16(%r9),%xmm8 1867 movdqa %xmm8,%xmm9 1868 movdqa %xmm8,%xmm10 1869 movdqu 32(%r9),%xmm12 1870 movdqa %xmm12,%xmm13 1871 paddd .sse_inc(%rip),%xmm13 1872 movdqa %xmm13,%xmm14 1873 paddd .sse_inc(%rip),%xmm14 1874 movdqa %xmm4,%xmm7 1875 movdqa %xmm8,%xmm11 1876 movdqa %xmm13,%xmm15 1877 movq $10,%r10 1878 1: 1879 paddd %xmm4,%xmm0 1880 pxor %xmm0,%xmm12 1881 pshufb .rol16(%rip),%xmm12 1882 paddd %xmm12,%xmm8 1883 pxor %xmm8,%xmm4 1884 movdqa %xmm4,%xmm3 1885 pslld $12,%xmm3 1886 psrld $20,%xmm4 1887 pxor %xmm3,%xmm4 1888 paddd %xmm4,%xmm0 1889 pxor %xmm0,%xmm12 1890 pshufb .rol8(%rip),%xmm12 1891 paddd %xmm12,%xmm8 1892 pxor %xmm8,%xmm4 1893 movdqa %xmm4,%xmm3 1894 pslld $7,%xmm3 1895 psrld $25,%xmm4 1896 pxor %xmm3,%xmm4 1897 .byte 102,15,58,15,228,4 1898 .byte 102,69,15,58,15,192,8 1899 .byte 102,69,15,58,15,228,12 1900 paddd %xmm5,%xmm1 1901 pxor %xmm1,%xmm13 1902 pshufb .rol16(%rip),%xmm13 1903 paddd %xmm13,%xmm9 1904 pxor %xmm9,%xmm5 1905 movdqa %xmm5,%xmm3 1906 pslld $12,%xmm3 1907 psrld $20,%xmm5 1908 pxor %xmm3,%xmm5 1909 paddd %xmm5,%xmm1 1910 pxor %xmm1,%xmm13 1911 pshufb .rol8(%rip),%xmm13 1912 paddd %xmm13,%xmm9 1913 pxor %xmm9,%xmm5 1914 movdqa %xmm5,%xmm3 1915 pslld $7,%xmm3 1916 psrld $25,%xmm5 1917 pxor %xmm3,%xmm5 1918 .byte 102,15,58,15,237,4 1919 .byte 102,69,15,58,15,201,8 1920 .byte 102,69,15,58,15,237,12 1921 paddd %xmm6,%xmm2 1922 pxor %xmm2,%xmm14 1923 pshufb .rol16(%rip),%xmm14 1924 paddd %xmm14,%xmm10 1925 pxor %xmm10,%xmm6 1926 movdqa %xmm6,%xmm3 1927 pslld $12,%xmm3 1928 psrld $20,%xmm6 1929 pxor %xmm3,%xmm6 1930 paddd %xmm6,%xmm2 1931 pxor %xmm2,%xmm14 1932 pshufb .rol8(%rip),%xmm14 1933 paddd %xmm14,%xmm10 1934 pxor %xmm10,%xmm6 1935 movdqa %xmm6,%xmm3 1936 pslld $7,%xmm3 1937 psrld $25,%xmm6 1938 pxor %xmm3,%xmm6 1939 .byte 102,15,58,15,246,4 1940 .byte 102,69,15,58,15,210,8 1941 .byte 102,69,15,58,15,246,12 1942 paddd %xmm4,%xmm0 1943 pxor %xmm0,%xmm12 1944 pshufb .rol16(%rip),%xmm12 1945 paddd %xmm12,%xmm8 1946 pxor %xmm8,%xmm4 1947 movdqa %xmm4,%xmm3 1948 pslld $12,%xmm3 1949 psrld $20,%xmm4 1950 pxor %xmm3,%xmm4 1951 paddd %xmm4,%xmm0 1952 pxor %xmm0,%xmm12 1953 pshufb .rol8(%rip),%xmm12 1954 paddd %xmm12,%xmm8 1955 pxor %xmm8,%xmm4 1956 movdqa %xmm4,%xmm3 1957 pslld $7,%xmm3 1958 psrld $25,%xmm4 1959 pxor %xmm3,%xmm4 1960 .byte 102,15,58,15,228,12 1961 .byte 102,69,15,58,15,192,8 1962 .byte 102,69,15,58,15,228,4 1963 paddd %xmm5,%xmm1 1964 pxor %xmm1,%xmm13 1965 pshufb .rol16(%rip),%xmm13 1966 paddd %xmm13,%xmm9 1967 pxor %xmm9,%xmm5 1968 movdqa %xmm5,%xmm3 1969 pslld $12,%xmm3 1970 psrld $20,%xmm5 1971 pxor %xmm3,%xmm5 1972 paddd %xmm5,%xmm1 1973 pxor %xmm1,%xmm13 1974 pshufb .rol8(%rip),%xmm13 1975 paddd %xmm13,%xmm9 1976 pxor %xmm9,%xmm5 1977 movdqa %xmm5,%xmm3 1978 pslld $7,%xmm3 1979 psrld $25,%xmm5 1980 pxor %xmm3,%xmm5 1981 .byte 102,15,58,15,237,12 1982 .byte 102,69,15,58,15,201,8 1983 .byte 102,69,15,58,15,237,4 1984 paddd %xmm6,%xmm2 1985 pxor %xmm2,%xmm14 1986 pshufb .rol16(%rip),%xmm14 1987 paddd %xmm14,%xmm10 1988 pxor %xmm10,%xmm6 1989 movdqa %xmm6,%xmm3 1990 pslld $12,%xmm3 1991 psrld $20,%xmm6 1992 pxor %xmm3,%xmm6 1993 paddd %xmm6,%xmm2 1994 pxor %xmm2,%xmm14 1995 pshufb .rol8(%rip),%xmm14 1996 paddd %xmm14,%xmm10 1997 pxor %xmm10,%xmm6 1998 movdqa %xmm6,%xmm3 1999 pslld $7,%xmm3 2000 psrld $25,%xmm6 2001 pxor %xmm3,%xmm6 2002 .byte 102,15,58,15,246,12 2003 .byte 102,69,15,58,15,210,8 2004 .byte 102,69,15,58,15,246,4 2005 2006 decq %r10 2007 jnz 1b 2008 paddd .chacha20_consts(%rip),%xmm0 2009 paddd .chacha20_consts(%rip),%xmm1 2010 paddd .chacha20_consts(%rip),%xmm2 2011 paddd %xmm7,%xmm4 2012 paddd %xmm7,%xmm5 2013 paddd %xmm7,%xmm6 2014 paddd %xmm11,%xmm9 2015 paddd %xmm11,%xmm10 2016 paddd %xmm15,%xmm13 2017 paddd .sse_inc(%rip),%xmm15 2018 paddd %xmm15,%xmm14 2019 2020 pand .clamp(%rip),%xmm0 2021 movdqa %xmm0,0(%rbp) 2022 movdqa %xmm4,16(%rbp) 2023 2024 movq %r8,%r8 2025 call poly_hash_ad_internal 2026 1: 2027 cmpq $16,%rbx 2028 jb open_sse_tail_16 2029 subq $16,%rbx 2030 addq 0(%rsi),%r10 2031 adcq 8+0(%rsi),%r11 2032 adcq $1,%r12 2033 2034 2035 movdqu 0(%rsi),%xmm3 2036 pxor %xmm3,%xmm1 2037 movdqu %xmm1,0(%rdi) 2038 leaq 16(%rsi),%rsi 2039 leaq 16(%rdi),%rdi 2040 movq 0+0(%rbp),%rax 2041 movq %rax,%r15 2042 mulq %r10 2043 movq %rax,%r13 2044 movq %rdx,%r14 2045 movq 0+0(%rbp),%rax 2046 mulq %r11 2047 imulq %r12,%r15 2048 addq %rax,%r14 2049 adcq %rdx,%r15 2050 movq 8+0(%rbp),%rax 2051 movq %rax,%r9 2052 mulq %r10 2053 addq %rax,%r14 2054 adcq $0,%rdx 2055 movq %rdx,%r10 2056 movq 8+0(%rbp),%rax 2057 mulq %r11 2058 addq %rax,%r15 2059 adcq $0,%rdx 2060 imulq %r12,%r9 2061 addq %r10,%r15 2062 adcq %rdx,%r9 2063 movq %r13,%r10 2064 movq %r14,%r11 2065 movq %r15,%r12 2066 andq $3,%r12 2067 movq %r15,%r13 2068 andq $-4,%r13 2069 movq %r9,%r14 2070 shrdq $2,%r9,%r15 2071 shrq $2,%r9 2072 addq %r13,%r10 2073 adcq %r14,%r11 2074 adcq $0,%r12 2075 addq %r15,%r10 2076 adcq %r9,%r11 2077 adcq $0,%r12 2078 2079 2080 movdqa %xmm5,%xmm1 2081 movdqa %xmm9,%xmm5 2082 movdqa %xmm13,%xmm9 2083 movdqa %xmm2,%xmm13 2084 movdqa %xmm6,%xmm2 2085 movdqa %xmm10,%xmm6 2086 movdqa %xmm14,%xmm10 2087 jmp 1b 2088 jmp open_sse_tail_16 2089 2090 2091 2092 2093 2094 2095 .globl _chacha20_poly1305_seal 2096 .private_extern _chacha20_poly1305_seal 2097 2098 .p2align 6 2099 _chacha20_poly1305_seal: 2100 2101 pushq %rbp 2102 2103 pushq %rbx 2104 2105 pushq %r12 2106 2107 pushq %r13 2108 2109 pushq %r14 2110 2111 pushq %r15 2112 2113 2114 2115 pushq %r9 2116 2117 subq $288 + 32,%rsp 2118 2119 2120 2121 2122 2123 2124 2125 leaq 32(%rsp),%rbp 2126 andq $-32,%rbp 2127 movq 56(%r9),%rbx 2128 addq %rdx,%rbx 2129 movq %rbx,8+32(%rbp) 2130 movq %r8,0+32(%rbp) 2131 movq %rdx,%rbx 2132 2133 movl _OPENSSL_ia32cap_P+8(%rip),%eax 2134 andl $288,%eax 2135 xorl $288,%eax 2136 jz chacha20_poly1305_seal_avx2 2137 2138 cmpq $128,%rbx 2139 jbe seal_sse_128 2140 2141 movdqa .chacha20_consts(%rip),%xmm0 2142 movdqu 0(%r9),%xmm4 2143 movdqu 16(%r9),%xmm8 2144 movdqu 32(%r9),%xmm12 2145 movdqa %xmm0,%xmm1 2146 movdqa %xmm0,%xmm2 2147 movdqa %xmm0,%xmm3 2148 movdqa %xmm4,%xmm5 2149 movdqa %xmm4,%xmm6 2150 movdqa %xmm4,%xmm7 2151 movdqa %xmm8,%xmm9 2152 movdqa %xmm8,%xmm10 2153 movdqa %xmm8,%xmm11 2154 movdqa %xmm12,%xmm15 2155 paddd .sse_inc(%rip),%xmm12 2156 movdqa %xmm12,%xmm14 2157 paddd .sse_inc(%rip),%xmm12 2158 movdqa %xmm12,%xmm13 2159 paddd .sse_inc(%rip),%xmm12 2160 2161 movdqa %xmm4,48(%rbp) 2162 movdqa %xmm8,64(%rbp) 2163 movdqa %xmm12,96(%rbp) 2164 movdqa %xmm13,112(%rbp) 2165 movdqa %xmm14,128(%rbp) 2166 movdqa %xmm15,144(%rbp) 2167 movq $10,%r10 2168 1: 2169 movdqa %xmm8,80(%rbp) 2170 movdqa .rol16(%rip),%xmm8 2171 paddd %xmm7,%xmm3 2172 paddd %xmm6,%xmm2 2173 paddd %xmm5,%xmm1 2174 paddd %xmm4,%xmm0 2175 pxor %xmm3,%xmm15 2176 pxor %xmm2,%xmm14 2177 pxor %xmm1,%xmm13 2178 pxor %xmm0,%xmm12 2179 .byte 102,69,15,56,0,248 2180 .byte 102,69,15,56,0,240 2181 .byte 102,69,15,56,0,232 2182 .byte 102,69,15,56,0,224 2183 movdqa 80(%rbp),%xmm8 2184 paddd %xmm15,%xmm11 2185 paddd %xmm14,%xmm10 2186 paddd %xmm13,%xmm9 2187 paddd %xmm12,%xmm8 2188 pxor %xmm11,%xmm7 2189 pxor %xmm10,%xmm6 2190 pxor %xmm9,%xmm5 2191 pxor %xmm8,%xmm4 2192 movdqa %xmm8,80(%rbp) 2193 movdqa %xmm7,%xmm8 2194 psrld $20,%xmm8 2195 pslld $32-20,%xmm7 2196 pxor %xmm8,%xmm7 2197 movdqa %xmm6,%xmm8 2198 psrld $20,%xmm8 2199 pslld $32-20,%xmm6 2200 pxor %xmm8,%xmm6 2201 movdqa %xmm5,%xmm8 2202 psrld $20,%xmm8 2203 pslld $32-20,%xmm5 2204 pxor %xmm8,%xmm5 2205 movdqa %xmm4,%xmm8 2206 psrld $20,%xmm8 2207 pslld $32-20,%xmm4 2208 pxor %xmm8,%xmm4 2209 movdqa .rol8(%rip),%xmm8 2210 paddd %xmm7,%xmm3 2211 paddd %xmm6,%xmm2 2212 paddd %xmm5,%xmm1 2213 paddd %xmm4,%xmm0 2214 pxor %xmm3,%xmm15 2215 pxor %xmm2,%xmm14 2216 pxor %xmm1,%xmm13 2217 pxor %xmm0,%xmm12 2218 .byte 102,69,15,56,0,248 2219 .byte 102,69,15,56,0,240 2220 .byte 102,69,15,56,0,232 2221 .byte 102,69,15,56,0,224 2222 movdqa 80(%rbp),%xmm8 2223 paddd %xmm15,%xmm11 2224 paddd %xmm14,%xmm10 2225 paddd %xmm13,%xmm9 2226 paddd %xmm12,%xmm8 2227 pxor %xmm11,%xmm7 2228 pxor %xmm10,%xmm6 2229 pxor %xmm9,%xmm5 2230 pxor %xmm8,%xmm4 2231 movdqa %xmm8,80(%rbp) 2232 movdqa %xmm7,%xmm8 2233 psrld $25,%xmm8 2234 pslld $32-25,%xmm7 2235 pxor %xmm8,%xmm7 2236 movdqa %xmm6,%xmm8 2237 psrld $25,%xmm8 2238 pslld $32-25,%xmm6 2239 pxor %xmm8,%xmm6 2240 movdqa %xmm5,%xmm8 2241 psrld $25,%xmm8 2242 pslld $32-25,%xmm5 2243 pxor %xmm8,%xmm5 2244 movdqa %xmm4,%xmm8 2245 psrld $25,%xmm8 2246 pslld $32-25,%xmm4 2247 pxor %xmm8,%xmm4 2248 movdqa 80(%rbp),%xmm8 2249 .byte 102,15,58,15,255,4 2250 .byte 102,69,15,58,15,219,8 2251 .byte 102,69,15,58,15,255,12 2252 .byte 102,15,58,15,246,4 2253 .byte 102,69,15,58,15,210,8 2254 .byte 102,69,15,58,15,246,12 2255 .byte 102,15,58,15,237,4 2256 .byte 102,69,15,58,15,201,8 2257 .byte 102,69,15,58,15,237,12 2258 .byte 102,15,58,15,228,4 2259 .byte 102,69,15,58,15,192,8 2260 .byte 102,69,15,58,15,228,12 2261 movdqa %xmm8,80(%rbp) 2262 movdqa .rol16(%rip),%xmm8 2263 paddd %xmm7,%xmm3 2264 paddd %xmm6,%xmm2 2265 paddd %xmm5,%xmm1 2266 paddd %xmm4,%xmm0 2267 pxor %xmm3,%xmm15 2268 pxor %xmm2,%xmm14 2269 pxor %xmm1,%xmm13 2270 pxor %xmm0,%xmm12 2271 .byte 102,69,15,56,0,248 2272 .byte 102,69,15,56,0,240 2273 .byte 102,69,15,56,0,232 2274 .byte 102,69,15,56,0,224 2275 movdqa 80(%rbp),%xmm8 2276 paddd %xmm15,%xmm11 2277 paddd %xmm14,%xmm10 2278 paddd %xmm13,%xmm9 2279 paddd %xmm12,%xmm8 2280 pxor %xmm11,%xmm7 2281 pxor %xmm10,%xmm6 2282 pxor %xmm9,%xmm5 2283 pxor %xmm8,%xmm4 2284 movdqa %xmm8,80(%rbp) 2285 movdqa %xmm7,%xmm8 2286 psrld $20,%xmm8 2287 pslld $32-20,%xmm7 2288 pxor %xmm8,%xmm7 2289 movdqa %xmm6,%xmm8 2290 psrld $20,%xmm8 2291 pslld $32-20,%xmm6 2292 pxor %xmm8,%xmm6 2293 movdqa %xmm5,%xmm8 2294 psrld $20,%xmm8 2295 pslld $32-20,%xmm5 2296 pxor %xmm8,%xmm5 2297 movdqa %xmm4,%xmm8 2298 psrld $20,%xmm8 2299 pslld $32-20,%xmm4 2300 pxor %xmm8,%xmm4 2301 movdqa .rol8(%rip),%xmm8 2302 paddd %xmm7,%xmm3 2303 paddd %xmm6,%xmm2 2304 paddd %xmm5,%xmm1 2305 paddd %xmm4,%xmm0 2306 pxor %xmm3,%xmm15 2307 pxor %xmm2,%xmm14 2308 pxor %xmm1,%xmm13 2309 pxor %xmm0,%xmm12 2310 .byte 102,69,15,56,0,248 2311 .byte 102,69,15,56,0,240 2312 .byte 102,69,15,56,0,232 2313 .byte 102,69,15,56,0,224 2314 movdqa 80(%rbp),%xmm8 2315 paddd %xmm15,%xmm11 2316 paddd %xmm14,%xmm10 2317 paddd %xmm13,%xmm9 2318 paddd %xmm12,%xmm8 2319 pxor %xmm11,%xmm7 2320 pxor %xmm10,%xmm6 2321 pxor %xmm9,%xmm5 2322 pxor %xmm8,%xmm4 2323 movdqa %xmm8,80(%rbp) 2324 movdqa %xmm7,%xmm8 2325 psrld $25,%xmm8 2326 pslld $32-25,%xmm7 2327 pxor %xmm8,%xmm7 2328 movdqa %xmm6,%xmm8 2329 psrld $25,%xmm8 2330 pslld $32-25,%xmm6 2331 pxor %xmm8,%xmm6 2332 movdqa %xmm5,%xmm8 2333 psrld $25,%xmm8 2334 pslld $32-25,%xmm5 2335 pxor %xmm8,%xmm5 2336 movdqa %xmm4,%xmm8 2337 psrld $25,%xmm8 2338 pslld $32-25,%xmm4 2339 pxor %xmm8,%xmm4 2340 movdqa 80(%rbp),%xmm8 2341 .byte 102,15,58,15,255,12 2342 .byte 102,69,15,58,15,219,8 2343 .byte 102,69,15,58,15,255,4 2344 .byte 102,15,58,15,246,12 2345 .byte 102,69,15,58,15,210,8 2346 .byte 102,69,15,58,15,246,4 2347 .byte 102,15,58,15,237,12 2348 .byte 102,69,15,58,15,201,8 2349 .byte 102,69,15,58,15,237,4 2350 .byte 102,15,58,15,228,12 2351 .byte 102,69,15,58,15,192,8 2352 .byte 102,69,15,58,15,228,4 2353 2354 decq %r10 2355 jnz 1b 2356 paddd .chacha20_consts(%rip),%xmm3 2357 paddd 48(%rbp),%xmm7 2358 paddd 64(%rbp),%xmm11 2359 paddd 144(%rbp),%xmm15 2360 paddd .chacha20_consts(%rip),%xmm2 2361 paddd 48(%rbp),%xmm6 2362 paddd 64(%rbp),%xmm10 2363 paddd 128(%rbp),%xmm14 2364 paddd .chacha20_consts(%rip),%xmm1 2365 paddd 48(%rbp),%xmm5 2366 paddd 64(%rbp),%xmm9 2367 paddd 112(%rbp),%xmm13 2368 paddd .chacha20_consts(%rip),%xmm0 2369 paddd 48(%rbp),%xmm4 2370 paddd 64(%rbp),%xmm8 2371 paddd 96(%rbp),%xmm12 2372 2373 2374 pand .clamp(%rip),%xmm3 2375 movdqa %xmm3,0(%rbp) 2376 movdqa %xmm7,16(%rbp) 2377 2378 movq %r8,%r8 2379 call poly_hash_ad_internal 2380 movdqu 0 + 0(%rsi),%xmm3 2381 movdqu 16 + 0(%rsi),%xmm7 2382 movdqu 32 + 0(%rsi),%xmm11 2383 movdqu 48 + 0(%rsi),%xmm15 2384 pxor %xmm3,%xmm2 2385 pxor %xmm7,%xmm6 2386 pxor %xmm11,%xmm10 2387 pxor %xmm14,%xmm15 2388 movdqu %xmm2,0 + 0(%rdi) 2389 movdqu %xmm6,16 + 0(%rdi) 2390 movdqu %xmm10,32 + 0(%rdi) 2391 movdqu %xmm15,48 + 0(%rdi) 2392 movdqu 0 + 64(%rsi),%xmm3 2393 movdqu 16 + 64(%rsi),%xmm7 2394 movdqu 32 + 64(%rsi),%xmm11 2395 movdqu 48 + 64(%rsi),%xmm15 2396 pxor %xmm3,%xmm1 2397 pxor %xmm7,%xmm5 2398 pxor %xmm11,%xmm9 2399 pxor %xmm13,%xmm15 2400 movdqu %xmm1,0 + 64(%rdi) 2401 movdqu %xmm5,16 + 64(%rdi) 2402 movdqu %xmm9,32 + 64(%rdi) 2403 movdqu %xmm15,48 + 64(%rdi) 2404 2405 cmpq $192,%rbx 2406 ja 1f 2407 movq $128,%rcx 2408 subq $128,%rbx 2409 leaq 128(%rsi),%rsi 2410 jmp seal_sse_128_seal_hash 2411 1: 2412 movdqu 0 + 128(%rsi),%xmm3 2413 movdqu 16 + 128(%rsi),%xmm7 2414 movdqu 32 + 128(%rsi),%xmm11 2415 movdqu 48 + 128(%rsi),%xmm15 2416 pxor %xmm3,%xmm0 2417 pxor %xmm7,%xmm4 2418 pxor %xmm11,%xmm8 2419 pxor %xmm12,%xmm15 2420 movdqu %xmm0,0 + 128(%rdi) 2421 movdqu %xmm4,16 + 128(%rdi) 2422 movdqu %xmm8,32 + 128(%rdi) 2423 movdqu %xmm15,48 + 128(%rdi) 2424 2425 movq $192,%rcx 2426 subq $192,%rbx 2427 leaq 192(%rsi),%rsi 2428 movq $2,%rcx 2429 movq $8,%r8 2430 cmpq $64,%rbx 2431 jbe seal_sse_tail_64 2432 cmpq $128,%rbx 2433 jbe seal_sse_tail_128 2434 cmpq $192,%rbx 2435 jbe seal_sse_tail_192 2436 2437 1: 2438 movdqa .chacha20_consts(%rip),%xmm0 2439 movdqa 48(%rbp),%xmm4 2440 movdqa 64(%rbp),%xmm8 2441 movdqa %xmm0,%xmm1 2442 movdqa %xmm4,%xmm5 2443 movdqa %xmm8,%xmm9 2444 movdqa %xmm0,%xmm2 2445 movdqa %xmm4,%xmm6 2446 movdqa %xmm8,%xmm10 2447 movdqa %xmm0,%xmm3 2448 movdqa %xmm4,%xmm7 2449 movdqa %xmm8,%xmm11 2450 movdqa 96(%rbp),%xmm15 2451 paddd .sse_inc(%rip),%xmm15 2452 movdqa %xmm15,%xmm14 2453 paddd .sse_inc(%rip),%xmm14 2454 movdqa %xmm14,%xmm13 2455 paddd .sse_inc(%rip),%xmm13 2456 movdqa %xmm13,%xmm12 2457 paddd .sse_inc(%rip),%xmm12 2458 movdqa %xmm12,96(%rbp) 2459 movdqa %xmm13,112(%rbp) 2460 movdqa %xmm14,128(%rbp) 2461 movdqa %xmm15,144(%rbp) 2462 2463 2: 2464 movdqa %xmm8,80(%rbp) 2465 movdqa .rol16(%rip),%xmm8 2466 paddd %xmm7,%xmm3 2467 paddd %xmm6,%xmm2 2468 paddd %xmm5,%xmm1 2469 paddd %xmm4,%xmm0 2470 pxor %xmm3,%xmm15 2471 pxor %xmm2,%xmm14 2472 pxor %xmm1,%xmm13 2473 pxor %xmm0,%xmm12 2474 .byte 102,69,15,56,0,248 2475 .byte 102,69,15,56,0,240 2476 .byte 102,69,15,56,0,232 2477 .byte 102,69,15,56,0,224 2478 movdqa 80(%rbp),%xmm8 2479 paddd %xmm15,%xmm11 2480 paddd %xmm14,%xmm10 2481 paddd %xmm13,%xmm9 2482 paddd %xmm12,%xmm8 2483 pxor %xmm11,%xmm7 2484 addq 0(%rdi),%r10 2485 adcq 8+0(%rdi),%r11 2486 adcq $1,%r12 2487 pxor %xmm10,%xmm6 2488 pxor %xmm9,%xmm5 2489 pxor %xmm8,%xmm4 2490 movdqa %xmm8,80(%rbp) 2491 movdqa %xmm7,%xmm8 2492 psrld $20,%xmm8 2493 pslld $32-20,%xmm7 2494 pxor %xmm8,%xmm7 2495 movdqa %xmm6,%xmm8 2496 psrld $20,%xmm8 2497 pslld $32-20,%xmm6 2498 pxor %xmm8,%xmm6 2499 movdqa %xmm5,%xmm8 2500 psrld $20,%xmm8 2501 pslld $32-20,%xmm5 2502 pxor %xmm8,%xmm5 2503 movdqa %xmm4,%xmm8 2504 psrld $20,%xmm8 2505 pslld $32-20,%xmm4 2506 pxor %xmm8,%xmm4 2507 movq 0+0(%rbp),%rax 2508 movq %rax,%r15 2509 mulq %r10 2510 movq %rax,%r13 2511 movq %rdx,%r14 2512 movq 0+0(%rbp),%rax 2513 mulq %r11 2514 imulq %r12,%r15 2515 addq %rax,%r14 2516 adcq %rdx,%r15 2517 movdqa .rol8(%rip),%xmm8 2518 paddd %xmm7,%xmm3 2519 paddd %xmm6,%xmm2 2520 paddd %xmm5,%xmm1 2521 paddd %xmm4,%xmm0 2522 pxor %xmm3,%xmm15 2523 pxor %xmm2,%xmm14 2524 pxor %xmm1,%xmm13 2525 pxor %xmm0,%xmm12 2526 .byte 102,69,15,56,0,248 2527 .byte 102,69,15,56,0,240 2528 .byte 102,69,15,56,0,232 2529 .byte 102,69,15,56,0,224 2530 movdqa 80(%rbp),%xmm8 2531 paddd %xmm15,%xmm11 2532 paddd %xmm14,%xmm10 2533 paddd %xmm13,%xmm9 2534 paddd %xmm12,%xmm8 2535 pxor %xmm11,%xmm7 2536 pxor %xmm10,%xmm6 2537 movq 8+0(%rbp),%rax 2538 movq %rax,%r9 2539 mulq %r10 2540 addq %rax,%r14 2541 adcq $0,%rdx 2542 movq %rdx,%r10 2543 movq 8+0(%rbp),%rax 2544 mulq %r11 2545 addq %rax,%r15 2546 adcq $0,%rdx 2547 pxor %xmm9,%xmm5 2548 pxor %xmm8,%xmm4 2549 movdqa %xmm8,80(%rbp) 2550 movdqa %xmm7,%xmm8 2551 psrld $25,%xmm8 2552 pslld $32-25,%xmm7 2553 pxor %xmm8,%xmm7 2554 movdqa %xmm6,%xmm8 2555 psrld $25,%xmm8 2556 pslld $32-25,%xmm6 2557 pxor %xmm8,%xmm6 2558 movdqa %xmm5,%xmm8 2559 psrld $25,%xmm8 2560 pslld $32-25,%xmm5 2561 pxor %xmm8,%xmm5 2562 movdqa %xmm4,%xmm8 2563 psrld $25,%xmm8 2564 pslld $32-25,%xmm4 2565 pxor %xmm8,%xmm4 2566 movdqa 80(%rbp),%xmm8 2567 imulq %r12,%r9 2568 addq %r10,%r15 2569 adcq %rdx,%r9 2570 .byte 102,15,58,15,255,4 2571 .byte 102,69,15,58,15,219,8 2572 .byte 102,69,15,58,15,255,12 2573 .byte 102,15,58,15,246,4 2574 .byte 102,69,15,58,15,210,8 2575 .byte 102,69,15,58,15,246,12 2576 .byte 102,15,58,15,237,4 2577 .byte 102,69,15,58,15,201,8 2578 .byte 102,69,15,58,15,237,12 2579 .byte 102,15,58,15,228,4 2580 .byte 102,69,15,58,15,192,8 2581 .byte 102,69,15,58,15,228,12 2582 movdqa %xmm8,80(%rbp) 2583 movdqa .rol16(%rip),%xmm8 2584 paddd %xmm7,%xmm3 2585 paddd %xmm6,%xmm2 2586 paddd %xmm5,%xmm1 2587 paddd %xmm4,%xmm0 2588 pxor %xmm3,%xmm15 2589 pxor %xmm2,%xmm14 2590 movq %r13,%r10 2591 movq %r14,%r11 2592 movq %r15,%r12 2593 andq $3,%r12 2594 movq %r15,%r13 2595 andq $-4,%r13 2596 movq %r9,%r14 2597 shrdq $2,%r9,%r15 2598 shrq $2,%r9 2599 addq %r13,%r10 2600 adcq %r14,%r11 2601 adcq $0,%r12 2602 addq %r15,%r10 2603 adcq %r9,%r11 2604 adcq $0,%r12 2605 pxor %xmm1,%xmm13 2606 pxor %xmm0,%xmm12 2607 .byte 102,69,15,56,0,248 2608 .byte 102,69,15,56,0,240 2609 .byte 102,69,15,56,0,232 2610 .byte 102,69,15,56,0,224 2611 movdqa 80(%rbp),%xmm8 2612 paddd %xmm15,%xmm11 2613 paddd %xmm14,%xmm10 2614 paddd %xmm13,%xmm9 2615 paddd %xmm12,%xmm8 2616 pxor %xmm11,%xmm7 2617 pxor %xmm10,%xmm6 2618 pxor %xmm9,%xmm5 2619 pxor %xmm8,%xmm4 2620 movdqa %xmm8,80(%rbp) 2621 movdqa %xmm7,%xmm8 2622 psrld $20,%xmm8 2623 pslld $32-20,%xmm7 2624 pxor %xmm8,%xmm7 2625 movdqa %xmm6,%xmm8 2626 psrld $20,%xmm8 2627 pslld $32-20,%xmm6 2628 pxor %xmm8,%xmm6 2629 movdqa %xmm5,%xmm8 2630 psrld $20,%xmm8 2631 pslld $32-20,%xmm5 2632 pxor %xmm8,%xmm5 2633 movdqa %xmm4,%xmm8 2634 psrld $20,%xmm8 2635 pslld $32-20,%xmm4 2636 pxor %xmm8,%xmm4 2637 movdqa .rol8(%rip),%xmm8 2638 paddd %xmm7,%xmm3 2639 paddd %xmm6,%xmm2 2640 paddd %xmm5,%xmm1 2641 paddd %xmm4,%xmm0 2642 pxor %xmm3,%xmm15 2643 pxor %xmm2,%xmm14 2644 pxor %xmm1,%xmm13 2645 pxor %xmm0,%xmm12 2646 .byte 102,69,15,56,0,248 2647 .byte 102,69,15,56,0,240 2648 .byte 102,69,15,56,0,232 2649 .byte 102,69,15,56,0,224 2650 movdqa 80(%rbp),%xmm8 2651 paddd %xmm15,%xmm11 2652 paddd %xmm14,%xmm10 2653 paddd %xmm13,%xmm9 2654 paddd %xmm12,%xmm8 2655 pxor %xmm11,%xmm7 2656 pxor %xmm10,%xmm6 2657 pxor %xmm9,%xmm5 2658 pxor %xmm8,%xmm4 2659 movdqa %xmm8,80(%rbp) 2660 movdqa %xmm7,%xmm8 2661 psrld $25,%xmm8 2662 pslld $32-25,%xmm7 2663 pxor %xmm8,%xmm7 2664 movdqa %xmm6,%xmm8 2665 psrld $25,%xmm8 2666 pslld $32-25,%xmm6 2667 pxor %xmm8,%xmm6 2668 movdqa %xmm5,%xmm8 2669 psrld $25,%xmm8 2670 pslld $32-25,%xmm5 2671 pxor %xmm8,%xmm5 2672 movdqa %xmm4,%xmm8 2673 psrld $25,%xmm8 2674 pslld $32-25,%xmm4 2675 pxor %xmm8,%xmm4 2676 movdqa 80(%rbp),%xmm8 2677 .byte 102,15,58,15,255,12 2678 .byte 102,69,15,58,15,219,8 2679 .byte 102,69,15,58,15,255,4 2680 .byte 102,15,58,15,246,12 2681 .byte 102,69,15,58,15,210,8 2682 .byte 102,69,15,58,15,246,4 2683 .byte 102,15,58,15,237,12 2684 .byte 102,69,15,58,15,201,8 2685 .byte 102,69,15,58,15,237,4 2686 .byte 102,15,58,15,228,12 2687 .byte 102,69,15,58,15,192,8 2688 .byte 102,69,15,58,15,228,4 2689 2690 leaq 16(%rdi),%rdi 2691 decq %r8 2692 jge 2b 2693 addq 0(%rdi),%r10 2694 adcq 8+0(%rdi),%r11 2695 adcq $1,%r12 2696 movq 0+0(%rbp),%rax 2697 movq %rax,%r15 2698 mulq %r10 2699 movq %rax,%r13 2700 movq %rdx,%r14 2701 movq 0+0(%rbp),%rax 2702 mulq %r11 2703 imulq %r12,%r15 2704 addq %rax,%r14 2705 adcq %rdx,%r15 2706 movq 8+0(%rbp),%rax 2707 movq %rax,%r9 2708 mulq %r10 2709 addq %rax,%r14 2710 adcq $0,%rdx 2711 movq %rdx,%r10 2712 movq 8+0(%rbp),%rax 2713 mulq %r11 2714 addq %rax,%r15 2715 adcq $0,%rdx 2716 imulq %r12,%r9 2717 addq %r10,%r15 2718 adcq %rdx,%r9 2719 movq %r13,%r10 2720 movq %r14,%r11 2721 movq %r15,%r12 2722 andq $3,%r12 2723 movq %r15,%r13 2724 andq $-4,%r13 2725 movq %r9,%r14 2726 shrdq $2,%r9,%r15 2727 shrq $2,%r9 2728 addq %r13,%r10 2729 adcq %r14,%r11 2730 adcq $0,%r12 2731 addq %r15,%r10 2732 adcq %r9,%r11 2733 adcq $0,%r12 2734 2735 leaq 16(%rdi),%rdi 2736 decq %rcx 2737 jg 2b 2738 paddd .chacha20_consts(%rip),%xmm3 2739 paddd 48(%rbp),%xmm7 2740 paddd 64(%rbp),%xmm11 2741 paddd 144(%rbp),%xmm15 2742 paddd .chacha20_consts(%rip),%xmm2 2743 paddd 48(%rbp),%xmm6 2744 paddd 64(%rbp),%xmm10 2745 paddd 128(%rbp),%xmm14 2746 paddd .chacha20_consts(%rip),%xmm1 2747 paddd 48(%rbp),%xmm5 2748 paddd 64(%rbp),%xmm9 2749 paddd 112(%rbp),%xmm13 2750 paddd .chacha20_consts(%rip),%xmm0 2751 paddd 48(%rbp),%xmm4 2752 paddd 64(%rbp),%xmm8 2753 paddd 96(%rbp),%xmm12 2754 2755 movdqa %xmm14,80(%rbp) 2756 movdqa %xmm14,80(%rbp) 2757 movdqu 0 + 0(%rsi),%xmm14 2758 pxor %xmm3,%xmm14 2759 movdqu %xmm14,0 + 0(%rdi) 2760 movdqu 16 + 0(%rsi),%xmm14 2761 pxor %xmm7,%xmm14 2762 movdqu %xmm14,16 + 0(%rdi) 2763 movdqu 32 + 0(%rsi),%xmm14 2764 pxor %xmm11,%xmm14 2765 movdqu %xmm14,32 + 0(%rdi) 2766 movdqu 48 + 0(%rsi),%xmm14 2767 pxor %xmm15,%xmm14 2768 movdqu %xmm14,48 + 0(%rdi) 2769 2770 movdqa 80(%rbp),%xmm14 2771 movdqu 0 + 64(%rsi),%xmm3 2772 movdqu 16 + 64(%rsi),%xmm7 2773 movdqu 32 + 64(%rsi),%xmm11 2774 movdqu 48 + 64(%rsi),%xmm15 2775 pxor %xmm3,%xmm2 2776 pxor %xmm7,%xmm6 2777 pxor %xmm11,%xmm10 2778 pxor %xmm14,%xmm15 2779 movdqu %xmm2,0 + 64(%rdi) 2780 movdqu %xmm6,16 + 64(%rdi) 2781 movdqu %xmm10,32 + 64(%rdi) 2782 movdqu %xmm15,48 + 64(%rdi) 2783 movdqu 0 + 128(%rsi),%xmm3 2784 movdqu 16 + 128(%rsi),%xmm7 2785 movdqu 32 + 128(%rsi),%xmm11 2786 movdqu 48 + 128(%rsi),%xmm15 2787 pxor %xmm3,%xmm1 2788 pxor %xmm7,%xmm5 2789 pxor %xmm11,%xmm9 2790 pxor %xmm13,%xmm15 2791 movdqu %xmm1,0 + 128(%rdi) 2792 movdqu %xmm5,16 + 128(%rdi) 2793 movdqu %xmm9,32 + 128(%rdi) 2794 movdqu %xmm15,48 + 128(%rdi) 2795 2796 cmpq $256,%rbx 2797 ja 3f 2798 2799 movq $192,%rcx 2800 subq $192,%rbx 2801 leaq 192(%rsi),%rsi 2802 jmp seal_sse_128_seal_hash 2803 3: 2804 movdqu 0 + 192(%rsi),%xmm3 2805 movdqu 16 + 192(%rsi),%xmm7 2806 movdqu 32 + 192(%rsi),%xmm11 2807 movdqu 48 + 192(%rsi),%xmm15 2808 pxor %xmm3,%xmm0 2809 pxor %xmm7,%xmm4 2810 pxor %xmm11,%xmm8 2811 pxor %xmm12,%xmm15 2812 movdqu %xmm0,0 + 192(%rdi) 2813 movdqu %xmm4,16 + 192(%rdi) 2814 movdqu %xmm8,32 + 192(%rdi) 2815 movdqu %xmm15,48 + 192(%rdi) 2816 2817 leaq 256(%rsi),%rsi 2818 subq $256,%rbx 2819 movq $6,%rcx 2820 movq $4,%r8 2821 cmpq $192,%rbx 2822 jg 1b 2823 movq %rbx,%rcx 2824 testq %rbx,%rbx 2825 je seal_sse_128_seal_hash 2826 movq $6,%rcx 2827 cmpq $64,%rbx 2828 jg 3f 2829 2830 seal_sse_tail_64: 2831 movdqa .chacha20_consts(%rip),%xmm0 2832 movdqa 48(%rbp),%xmm4 2833 movdqa 64(%rbp),%xmm8 2834 movdqa 96(%rbp),%xmm12 2835 paddd .sse_inc(%rip),%xmm12 2836 movdqa %xmm12,96(%rbp) 2837 2838 1: 2839 addq 0(%rdi),%r10 2840 adcq 8+0(%rdi),%r11 2841 adcq $1,%r12 2842 movq 0+0(%rbp),%rax 2843 movq %rax,%r15 2844 mulq %r10 2845 movq %rax,%r13 2846 movq %rdx,%r14 2847 movq 0+0(%rbp),%rax 2848 mulq %r11 2849 imulq %r12,%r15 2850 addq %rax,%r14 2851 adcq %rdx,%r15 2852 movq 8+0(%rbp),%rax 2853 movq %rax,%r9 2854 mulq %r10 2855 addq %rax,%r14 2856 adcq $0,%rdx 2857 movq %rdx,%r10 2858 movq 8+0(%rbp),%rax 2859 mulq %r11 2860 addq %rax,%r15 2861 adcq $0,%rdx 2862 imulq %r12,%r9 2863 addq %r10,%r15 2864 adcq %rdx,%r9 2865 movq %r13,%r10 2866 movq %r14,%r11 2867 movq %r15,%r12 2868 andq $3,%r12 2869 movq %r15,%r13 2870 andq $-4,%r13 2871 movq %r9,%r14 2872 shrdq $2,%r9,%r15 2873 shrq $2,%r9 2874 addq %r13,%r10 2875 adcq %r14,%r11 2876 adcq $0,%r12 2877 addq %r15,%r10 2878 adcq %r9,%r11 2879 adcq $0,%r12 2880 2881 leaq 16(%rdi),%rdi 2882 2: 2883 paddd %xmm4,%xmm0 2884 pxor %xmm0,%xmm12 2885 pshufb .rol16(%rip),%xmm12 2886 paddd %xmm12,%xmm8 2887 pxor %xmm8,%xmm4 2888 movdqa %xmm4,%xmm3 2889 pslld $12,%xmm3 2890 psrld $20,%xmm4 2891 pxor %xmm3,%xmm4 2892 paddd %xmm4,%xmm0 2893 pxor %xmm0,%xmm12 2894 pshufb .rol8(%rip),%xmm12 2895 paddd %xmm12,%xmm8 2896 pxor %xmm8,%xmm4 2897 movdqa %xmm4,%xmm3 2898 pslld $7,%xmm3 2899 psrld $25,%xmm4 2900 pxor %xmm3,%xmm4 2901 .byte 102,15,58,15,228,4 2902 .byte 102,69,15,58,15,192,8 2903 .byte 102,69,15,58,15,228,12 2904 paddd %xmm4,%xmm0 2905 pxor %xmm0,%xmm12 2906 pshufb .rol16(%rip),%xmm12 2907 paddd %xmm12,%xmm8 2908 pxor %xmm8,%xmm4 2909 movdqa %xmm4,%xmm3 2910 pslld $12,%xmm3 2911 psrld $20,%xmm4 2912 pxor %xmm3,%xmm4 2913 paddd %xmm4,%xmm0 2914 pxor %xmm0,%xmm12 2915 pshufb .rol8(%rip),%xmm12 2916 paddd %xmm12,%xmm8 2917 pxor %xmm8,%xmm4 2918 movdqa %xmm4,%xmm3 2919 pslld $7,%xmm3 2920 psrld $25,%xmm4 2921 pxor %xmm3,%xmm4 2922 .byte 102,15,58,15,228,12 2923 .byte 102,69,15,58,15,192,8 2924 .byte 102,69,15,58,15,228,4 2925 addq 0(%rdi),%r10 2926 adcq 8+0(%rdi),%r11 2927 adcq $1,%r12 2928 movq 0+0(%rbp),%rax 2929 movq %rax,%r15 2930 mulq %r10 2931 movq %rax,%r13 2932 movq %rdx,%r14 2933 movq 0+0(%rbp),%rax 2934 mulq %r11 2935 imulq %r12,%r15 2936 addq %rax,%r14 2937 adcq %rdx,%r15 2938 movq 8+0(%rbp),%rax 2939 movq %rax,%r9 2940 mulq %r10 2941 addq %rax,%r14 2942 adcq $0,%rdx 2943 movq %rdx,%r10 2944 movq 8+0(%rbp),%rax 2945 mulq %r11 2946 addq %rax,%r15 2947 adcq $0,%rdx 2948 imulq %r12,%r9 2949 addq %r10,%r15 2950 adcq %rdx,%r9 2951 movq %r13,%r10 2952 movq %r14,%r11 2953 movq %r15,%r12 2954 andq $3,%r12 2955 movq %r15,%r13 2956 andq $-4,%r13 2957 movq %r9,%r14 2958 shrdq $2,%r9,%r15 2959 shrq $2,%r9 2960 addq %r13,%r10 2961 adcq %r14,%r11 2962 adcq $0,%r12 2963 addq %r15,%r10 2964 adcq %r9,%r11 2965 adcq $0,%r12 2966 2967 leaq 16(%rdi),%rdi 2968 decq %rcx 2969 jg 1b 2970 decq %r8 2971 jge 2b 2972 paddd .chacha20_consts(%rip),%xmm0 2973 paddd 48(%rbp),%xmm4 2974 paddd 64(%rbp),%xmm8 2975 paddd 96(%rbp),%xmm12 2976 2977 jmp seal_sse_128_seal 2978 3: 2979 cmpq $128,%rbx 2980 jg 3f 2981 2982 seal_sse_tail_128: 2983 movdqa .chacha20_consts(%rip),%xmm0 2984 movdqa 48(%rbp),%xmm4 2985 movdqa 64(%rbp),%xmm8 2986 movdqa %xmm0,%xmm1 2987 movdqa %xmm4,%xmm5 2988 movdqa %xmm8,%xmm9 2989 movdqa 96(%rbp),%xmm13 2990 paddd .sse_inc(%rip),%xmm13 2991 movdqa %xmm13,%xmm12 2992 paddd .sse_inc(%rip),%xmm12 2993 movdqa %xmm12,96(%rbp) 2994 movdqa %xmm13,112(%rbp) 2995 2996 1: 2997 addq 0(%rdi),%r10 2998 adcq 8+0(%rdi),%r11 2999 adcq $1,%r12 3000 movq 0+0(%rbp),%rax 3001 movq %rax,%r15 3002 mulq %r10 3003 movq %rax,%r13 3004 movq %rdx,%r14 3005 movq 0+0(%rbp),%rax 3006 mulq %r11 3007 imulq %r12,%r15 3008 addq %rax,%r14 3009 adcq %rdx,%r15 3010 movq 8+0(%rbp),%rax 3011 movq %rax,%r9 3012 mulq %r10 3013 addq %rax,%r14 3014 adcq $0,%rdx 3015 movq %rdx,%r10 3016 movq 8+0(%rbp),%rax 3017 mulq %r11 3018 addq %rax,%r15 3019 adcq $0,%rdx 3020 imulq %r12,%r9 3021 addq %r10,%r15 3022 adcq %rdx,%r9 3023 movq %r13,%r10 3024 movq %r14,%r11 3025 movq %r15,%r12 3026 andq $3,%r12 3027 movq %r15,%r13 3028 andq $-4,%r13 3029 movq %r9,%r14 3030 shrdq $2,%r9,%r15 3031 shrq $2,%r9 3032 addq %r13,%r10 3033 adcq %r14,%r11 3034 adcq $0,%r12 3035 addq %r15,%r10 3036 adcq %r9,%r11 3037 adcq $0,%r12 3038 3039 leaq 16(%rdi),%rdi 3040 2: 3041 paddd %xmm4,%xmm0 3042 pxor %xmm0,%xmm12 3043 pshufb .rol16(%rip),%xmm12 3044 paddd %xmm12,%xmm8 3045 pxor %xmm8,%xmm4 3046 movdqa %xmm4,%xmm3 3047 pslld $12,%xmm3 3048 psrld $20,%xmm4 3049 pxor %xmm3,%xmm4 3050 paddd %xmm4,%xmm0 3051 pxor %xmm0,%xmm12 3052 pshufb .rol8(%rip),%xmm12 3053 paddd %xmm12,%xmm8 3054 pxor %xmm8,%xmm4 3055 movdqa %xmm4,%xmm3 3056 pslld $7,%xmm3 3057 psrld $25,%xmm4 3058 pxor %xmm3,%xmm4 3059 .byte 102,15,58,15,228,4 3060 .byte 102,69,15,58,15,192,8 3061 .byte 102,69,15,58,15,228,12 3062 paddd %xmm5,%xmm1 3063 pxor %xmm1,%xmm13 3064 pshufb .rol16(%rip),%xmm13 3065 paddd %xmm13,%xmm9 3066 pxor %xmm9,%xmm5 3067 movdqa %xmm5,%xmm3 3068 pslld $12,%xmm3 3069 psrld $20,%xmm5 3070 pxor %xmm3,%xmm5 3071 paddd %xmm5,%xmm1 3072 pxor %xmm1,%xmm13 3073 pshufb .rol8(%rip),%xmm13 3074 paddd %xmm13,%xmm9 3075 pxor %xmm9,%xmm5 3076 movdqa %xmm5,%xmm3 3077 pslld $7,%xmm3 3078 psrld $25,%xmm5 3079 pxor %xmm3,%xmm5 3080 .byte 102,15,58,15,237,4 3081 .byte 102,69,15,58,15,201,8 3082 .byte 102,69,15,58,15,237,12 3083 addq 0(%rdi),%r10 3084 adcq 8+0(%rdi),%r11 3085 adcq $1,%r12 3086 movq 0+0(%rbp),%rax 3087 movq %rax,%r15 3088 mulq %r10 3089 movq %rax,%r13 3090 movq %rdx,%r14 3091 movq 0+0(%rbp),%rax 3092 mulq %r11 3093 imulq %r12,%r15 3094 addq %rax,%r14 3095 adcq %rdx,%r15 3096 movq 8+0(%rbp),%rax 3097 movq %rax,%r9 3098 mulq %r10 3099 addq %rax,%r14 3100 adcq $0,%rdx 3101 movq %rdx,%r10 3102 movq 8+0(%rbp),%rax 3103 mulq %r11 3104 addq %rax,%r15 3105 adcq $0,%rdx 3106 imulq %r12,%r9 3107 addq %r10,%r15 3108 adcq %rdx,%r9 3109 movq %r13,%r10 3110 movq %r14,%r11 3111 movq %r15,%r12 3112 andq $3,%r12 3113 movq %r15,%r13 3114 andq $-4,%r13 3115 movq %r9,%r14 3116 shrdq $2,%r9,%r15 3117 shrq $2,%r9 3118 addq %r13,%r10 3119 adcq %r14,%r11 3120 adcq $0,%r12 3121 addq %r15,%r10 3122 adcq %r9,%r11 3123 adcq $0,%r12 3124 paddd %xmm4,%xmm0 3125 pxor %xmm0,%xmm12 3126 pshufb .rol16(%rip),%xmm12 3127 paddd %xmm12,%xmm8 3128 pxor %xmm8,%xmm4 3129 movdqa %xmm4,%xmm3 3130 pslld $12,%xmm3 3131 psrld $20,%xmm4 3132 pxor %xmm3,%xmm4 3133 paddd %xmm4,%xmm0 3134 pxor %xmm0,%xmm12 3135 pshufb .rol8(%rip),%xmm12 3136 paddd %xmm12,%xmm8 3137 pxor %xmm8,%xmm4 3138 movdqa %xmm4,%xmm3 3139 pslld $7,%xmm3 3140 psrld $25,%xmm4 3141 pxor %xmm3,%xmm4 3142 .byte 102,15,58,15,228,12 3143 .byte 102,69,15,58,15,192,8 3144 .byte 102,69,15,58,15,228,4 3145 paddd %xmm5,%xmm1 3146 pxor %xmm1,%xmm13 3147 pshufb .rol16(%rip),%xmm13 3148 paddd %xmm13,%xmm9 3149 pxor %xmm9,%xmm5 3150 movdqa %xmm5,%xmm3 3151 pslld $12,%xmm3 3152 psrld $20,%xmm5 3153 pxor %xmm3,%xmm5 3154 paddd %xmm5,%xmm1 3155 pxor %xmm1,%xmm13 3156 pshufb .rol8(%rip),%xmm13 3157 paddd %xmm13,%xmm9 3158 pxor %xmm9,%xmm5 3159 movdqa %xmm5,%xmm3 3160 pslld $7,%xmm3 3161 psrld $25,%xmm5 3162 pxor %xmm3,%xmm5 3163 .byte 102,15,58,15,237,12 3164 .byte 102,69,15,58,15,201,8 3165 .byte 102,69,15,58,15,237,4 3166 3167 leaq 16(%rdi),%rdi 3168 decq %rcx 3169 jg 1b 3170 decq %r8 3171 jge 2b 3172 paddd .chacha20_consts(%rip),%xmm1 3173 paddd 48(%rbp),%xmm5 3174 paddd 64(%rbp),%xmm9 3175 paddd 112(%rbp),%xmm13 3176 paddd .chacha20_consts(%rip),%xmm0 3177 paddd 48(%rbp),%xmm4 3178 paddd 64(%rbp),%xmm8 3179 paddd 96(%rbp),%xmm12 3180 movdqu 0 + 0(%rsi),%xmm3 3181 movdqu 16 + 0(%rsi),%xmm7 3182 movdqu 32 + 0(%rsi),%xmm11 3183 movdqu 48 + 0(%rsi),%xmm15 3184 pxor %xmm3,%xmm1 3185 pxor %xmm7,%xmm5 3186 pxor %xmm11,%xmm9 3187 pxor %xmm13,%xmm15 3188 movdqu %xmm1,0 + 0(%rdi) 3189 movdqu %xmm5,16 + 0(%rdi) 3190 movdqu %xmm9,32 + 0(%rdi) 3191 movdqu %xmm15,48 + 0(%rdi) 3192 3193 movq $64,%rcx 3194 subq $64,%rbx 3195 leaq 64(%rsi),%rsi 3196 jmp seal_sse_128_seal_hash 3197 3: 3198 3199 seal_sse_tail_192: 3200 movdqa .chacha20_consts(%rip),%xmm0 3201 movdqa 48(%rbp),%xmm4 3202 movdqa 64(%rbp),%xmm8 3203 movdqa %xmm0,%xmm1 3204 movdqa %xmm4,%xmm5 3205 movdqa %xmm8,%xmm9 3206 movdqa %xmm0,%xmm2 3207 movdqa %xmm4,%xmm6 3208 movdqa %xmm8,%xmm10 3209 movdqa 96(%rbp),%xmm14 3210 paddd .sse_inc(%rip),%xmm14 3211 movdqa %xmm14,%xmm13 3212 paddd .sse_inc(%rip),%xmm13 3213 movdqa %xmm13,%xmm12 3214 paddd .sse_inc(%rip),%xmm12 3215 movdqa %xmm12,96(%rbp) 3216 movdqa %xmm13,112(%rbp) 3217 movdqa %xmm14,128(%rbp) 3218 3219 1: 3220 addq 0(%rdi),%r10 3221 adcq 8+0(%rdi),%r11 3222 adcq $1,%r12 3223 movq 0+0(%rbp),%rax 3224 movq %rax,%r15 3225 mulq %r10 3226 movq %rax,%r13 3227 movq %rdx,%r14 3228 movq 0+0(%rbp),%rax 3229 mulq %r11 3230 imulq %r12,%r15 3231 addq %rax,%r14 3232 adcq %rdx,%r15 3233 movq 8+0(%rbp),%rax 3234 movq %rax,%r9 3235 mulq %r10 3236 addq %rax,%r14 3237 adcq $0,%rdx 3238 movq %rdx,%r10 3239 movq 8+0(%rbp),%rax 3240 mulq %r11 3241 addq %rax,%r15 3242 adcq $0,%rdx 3243 imulq %r12,%r9 3244 addq %r10,%r15 3245 adcq %rdx,%r9 3246 movq %r13,%r10 3247 movq %r14,%r11 3248 movq %r15,%r12 3249 andq $3,%r12 3250 movq %r15,%r13 3251 andq $-4,%r13 3252 movq %r9,%r14 3253 shrdq $2,%r9,%r15 3254 shrq $2,%r9 3255 addq %r13,%r10 3256 adcq %r14,%r11 3257 adcq $0,%r12 3258 addq %r15,%r10 3259 adcq %r9,%r11 3260 adcq $0,%r12 3261 3262 leaq 16(%rdi),%rdi 3263 2: 3264 paddd %xmm4,%xmm0 3265 pxor %xmm0,%xmm12 3266 pshufb .rol16(%rip),%xmm12 3267 paddd %xmm12,%xmm8 3268 pxor %xmm8,%xmm4 3269 movdqa %xmm4,%xmm3 3270 pslld $12,%xmm3 3271 psrld $20,%xmm4 3272 pxor %xmm3,%xmm4 3273 paddd %xmm4,%xmm0 3274 pxor %xmm0,%xmm12 3275 pshufb .rol8(%rip),%xmm12 3276 paddd %xmm12,%xmm8 3277 pxor %xmm8,%xmm4 3278 movdqa %xmm4,%xmm3 3279 pslld $7,%xmm3 3280 psrld $25,%xmm4 3281 pxor %xmm3,%xmm4 3282 .byte 102,15,58,15,228,4 3283 .byte 102,69,15,58,15,192,8 3284 .byte 102,69,15,58,15,228,12 3285 paddd %xmm5,%xmm1 3286 pxor %xmm1,%xmm13 3287 pshufb .rol16(%rip),%xmm13 3288 paddd %xmm13,%xmm9 3289 pxor %xmm9,%xmm5 3290 movdqa %xmm5,%xmm3 3291 pslld $12,%xmm3 3292 psrld $20,%xmm5 3293 pxor %xmm3,%xmm5 3294 paddd %xmm5,%xmm1 3295 pxor %xmm1,%xmm13 3296 pshufb .rol8(%rip),%xmm13 3297 paddd %xmm13,%xmm9 3298 pxor %xmm9,%xmm5 3299 movdqa %xmm5,%xmm3 3300 pslld $7,%xmm3 3301 psrld $25,%xmm5 3302 pxor %xmm3,%xmm5 3303 .byte 102,15,58,15,237,4 3304 .byte 102,69,15,58,15,201,8 3305 .byte 102,69,15,58,15,237,12 3306 paddd %xmm6,%xmm2 3307 pxor %xmm2,%xmm14 3308 pshufb .rol16(%rip),%xmm14 3309 paddd %xmm14,%xmm10 3310 pxor %xmm10,%xmm6 3311 movdqa %xmm6,%xmm3 3312 pslld $12,%xmm3 3313 psrld $20,%xmm6 3314 pxor %xmm3,%xmm6 3315 paddd %xmm6,%xmm2 3316 pxor %xmm2,%xmm14 3317 pshufb .rol8(%rip),%xmm14 3318 paddd %xmm14,%xmm10 3319 pxor %xmm10,%xmm6 3320 movdqa %xmm6,%xmm3 3321 pslld $7,%xmm3 3322 psrld $25,%xmm6 3323 pxor %xmm3,%xmm6 3324 .byte 102,15,58,15,246,4 3325 .byte 102,69,15,58,15,210,8 3326 .byte 102,69,15,58,15,246,12 3327 addq 0(%rdi),%r10 3328 adcq 8+0(%rdi),%r11 3329 adcq $1,%r12 3330 movq 0+0(%rbp),%rax 3331 movq %rax,%r15 3332 mulq %r10 3333 movq %rax,%r13 3334 movq %rdx,%r14 3335 movq 0+0(%rbp),%rax 3336 mulq %r11 3337 imulq %r12,%r15 3338 addq %rax,%r14 3339 adcq %rdx,%r15 3340 movq 8+0(%rbp),%rax 3341 movq %rax,%r9 3342 mulq %r10 3343 addq %rax,%r14 3344 adcq $0,%rdx 3345 movq %rdx,%r10 3346 movq 8+0(%rbp),%rax 3347 mulq %r11 3348 addq %rax,%r15 3349 adcq $0,%rdx 3350 imulq %r12,%r9 3351 addq %r10,%r15 3352 adcq %rdx,%r9 3353 movq %r13,%r10 3354 movq %r14,%r11 3355 movq %r15,%r12 3356 andq $3,%r12 3357 movq %r15,%r13 3358 andq $-4,%r13 3359 movq %r9,%r14 3360 shrdq $2,%r9,%r15 3361 shrq $2,%r9 3362 addq %r13,%r10 3363 adcq %r14,%r11 3364 adcq $0,%r12 3365 addq %r15,%r10 3366 adcq %r9,%r11 3367 adcq $0,%r12 3368 paddd %xmm4,%xmm0 3369 pxor %xmm0,%xmm12 3370 pshufb .rol16(%rip),%xmm12 3371 paddd %xmm12,%xmm8 3372 pxor %xmm8,%xmm4 3373 movdqa %xmm4,%xmm3 3374 pslld $12,%xmm3 3375 psrld $20,%xmm4 3376 pxor %xmm3,%xmm4 3377 paddd %xmm4,%xmm0 3378 pxor %xmm0,%xmm12 3379 pshufb .rol8(%rip),%xmm12 3380 paddd %xmm12,%xmm8 3381 pxor %xmm8,%xmm4 3382 movdqa %xmm4,%xmm3 3383 pslld $7,%xmm3 3384 psrld $25,%xmm4 3385 pxor %xmm3,%xmm4 3386 .byte 102,15,58,15,228,12 3387 .byte 102,69,15,58,15,192,8 3388 .byte 102,69,15,58,15,228,4 3389 paddd %xmm5,%xmm1 3390 pxor %xmm1,%xmm13 3391 pshufb .rol16(%rip),%xmm13 3392 paddd %xmm13,%xmm9 3393 pxor %xmm9,%xmm5 3394 movdqa %xmm5,%xmm3 3395 pslld $12,%xmm3 3396 psrld $20,%xmm5 3397 pxor %xmm3,%xmm5 3398 paddd %xmm5,%xmm1 3399 pxor %xmm1,%xmm13 3400 pshufb .rol8(%rip),%xmm13 3401 paddd %xmm13,%xmm9 3402 pxor %xmm9,%xmm5 3403 movdqa %xmm5,%xmm3 3404 pslld $7,%xmm3 3405 psrld $25,%xmm5 3406 pxor %xmm3,%xmm5 3407 .byte 102,15,58,15,237,12 3408 .byte 102,69,15,58,15,201,8 3409 .byte 102,69,15,58,15,237,4 3410 paddd %xmm6,%xmm2 3411 pxor %xmm2,%xmm14 3412 pshufb .rol16(%rip),%xmm14 3413 paddd %xmm14,%xmm10 3414 pxor %xmm10,%xmm6 3415 movdqa %xmm6,%xmm3 3416 pslld $12,%xmm3 3417 psrld $20,%xmm6 3418 pxor %xmm3,%xmm6 3419 paddd %xmm6,%xmm2 3420 pxor %xmm2,%xmm14 3421 pshufb .rol8(%rip),%xmm14 3422 paddd %xmm14,%xmm10 3423 pxor %xmm10,%xmm6 3424 movdqa %xmm6,%xmm3 3425 pslld $7,%xmm3 3426 psrld $25,%xmm6 3427 pxor %xmm3,%xmm6 3428 .byte 102,15,58,15,246,12 3429 .byte 102,69,15,58,15,210,8 3430 .byte 102,69,15,58,15,246,4 3431 3432 leaq 16(%rdi),%rdi 3433 decq %rcx 3434 jg 1b 3435 decq %r8 3436 jge 2b 3437 paddd .chacha20_consts(%rip),%xmm2 3438 paddd 48(%rbp),%xmm6 3439 paddd 64(%rbp),%xmm10 3440 paddd 128(%rbp),%xmm14 3441 paddd .chacha20_consts(%rip),%xmm1 3442 paddd 48(%rbp),%xmm5 3443 paddd 64(%rbp),%xmm9 3444 paddd 112(%rbp),%xmm13 3445 paddd .chacha20_consts(%rip),%xmm0 3446 paddd 48(%rbp),%xmm4 3447 paddd 64(%rbp),%xmm8 3448 paddd 96(%rbp),%xmm12 3449 movdqu 0 + 0(%rsi),%xmm3 3450 movdqu 16 + 0(%rsi),%xmm7 3451 movdqu 32 + 0(%rsi),%xmm11 3452 movdqu 48 + 0(%rsi),%xmm15 3453 pxor %xmm3,%xmm2 3454 pxor %xmm7,%xmm6 3455 pxor %xmm11,%xmm10 3456 pxor %xmm14,%xmm15 3457 movdqu %xmm2,0 + 0(%rdi) 3458 movdqu %xmm6,16 + 0(%rdi) 3459 movdqu %xmm10,32 + 0(%rdi) 3460 movdqu %xmm15,48 + 0(%rdi) 3461 movdqu 0 + 64(%rsi),%xmm3 3462 movdqu 16 + 64(%rsi),%xmm7 3463 movdqu 32 + 64(%rsi),%xmm11 3464 movdqu 48 + 64(%rsi),%xmm15 3465 pxor %xmm3,%xmm1 3466 pxor %xmm7,%xmm5 3467 pxor %xmm11,%xmm9 3468 pxor %xmm13,%xmm15 3469 movdqu %xmm1,0 + 64(%rdi) 3470 movdqu %xmm5,16 + 64(%rdi) 3471 movdqu %xmm9,32 + 64(%rdi) 3472 movdqu %xmm15,48 + 64(%rdi) 3473 3474 movq $128,%rcx 3475 subq $128,%rbx 3476 leaq 128(%rsi),%rsi 3477 3478 seal_sse_128_seal_hash: 3479 cmpq $16,%rcx 3480 jb seal_sse_128_seal 3481 addq 0(%rdi),%r10 3482 adcq 8+0(%rdi),%r11 3483 adcq $1,%r12 3484 movq 0+0(%rbp),%rax 3485 movq %rax,%r15 3486 mulq %r10 3487 movq %rax,%r13 3488 movq %rdx,%r14 3489 movq 0+0(%rbp),%rax 3490 mulq %r11 3491 imulq %r12,%r15 3492 addq %rax,%r14 3493 adcq %rdx,%r15 3494 movq 8+0(%rbp),%rax 3495 movq %rax,%r9 3496 mulq %r10 3497 addq %rax,%r14 3498 adcq $0,%rdx 3499 movq %rdx,%r10 3500 movq 8+0(%rbp),%rax 3501 mulq %r11 3502 addq %rax,%r15 3503 adcq $0,%rdx 3504 imulq %r12,%r9 3505 addq %r10,%r15 3506 adcq %rdx,%r9 3507 movq %r13,%r10 3508 movq %r14,%r11 3509 movq %r15,%r12 3510 andq $3,%r12 3511 movq %r15,%r13 3512 andq $-4,%r13 3513 movq %r9,%r14 3514 shrdq $2,%r9,%r15 3515 shrq $2,%r9 3516 addq %r13,%r10 3517 adcq %r14,%r11 3518 adcq $0,%r12 3519 addq %r15,%r10 3520 adcq %r9,%r11 3521 adcq $0,%r12 3522 3523 subq $16,%rcx 3524 leaq 16(%rdi),%rdi 3525 jmp seal_sse_128_seal_hash 3526 3527 seal_sse_128_seal: 3528 cmpq $16,%rbx 3529 jb seal_sse_tail_16 3530 subq $16,%rbx 3531 3532 movdqu 0(%rsi),%xmm3 3533 pxor %xmm3,%xmm0 3534 movdqu %xmm0,0(%rdi) 3535 3536 addq 0(%rdi),%r10 3537 adcq 8(%rdi),%r11 3538 adcq $1,%r12 3539 leaq 16(%rsi),%rsi 3540 leaq 16(%rdi),%rdi 3541 movq 0+0(%rbp),%rax 3542 movq %rax,%r15 3543 mulq %r10 3544 movq %rax,%r13 3545 movq %rdx,%r14 3546 movq 0+0(%rbp),%rax 3547 mulq %r11 3548 imulq %r12,%r15 3549 addq %rax,%r14 3550 adcq %rdx,%r15 3551 movq 8+0(%rbp),%rax 3552 movq %rax,%r9 3553 mulq %r10 3554 addq %rax,%r14 3555 adcq $0,%rdx 3556 movq %rdx,%r10 3557 movq 8+0(%rbp),%rax 3558 mulq %r11 3559 addq %rax,%r15 3560 adcq $0,%rdx 3561 imulq %r12,%r9 3562 addq %r10,%r15 3563 adcq %rdx,%r9 3564 movq %r13,%r10 3565 movq %r14,%r11 3566 movq %r15,%r12 3567 andq $3,%r12 3568 movq %r15,%r13 3569 andq $-4,%r13 3570 movq %r9,%r14 3571 shrdq $2,%r9,%r15 3572 shrq $2,%r9 3573 addq %r13,%r10 3574 adcq %r14,%r11 3575 adcq $0,%r12 3576 addq %r15,%r10 3577 adcq %r9,%r11 3578 adcq $0,%r12 3579 3580 3581 movdqa %xmm4,%xmm0 3582 movdqa %xmm8,%xmm4 3583 movdqa %xmm12,%xmm8 3584 movdqa %xmm1,%xmm12 3585 movdqa %xmm5,%xmm1 3586 movdqa %xmm9,%xmm5 3587 movdqa %xmm13,%xmm9 3588 jmp seal_sse_128_seal 3589 3590 seal_sse_tail_16: 3591 testq %rbx,%rbx 3592 jz process_blocks_of_extra_in 3593 3594 movq %rbx,%r8 3595 movq %rbx,%rcx 3596 leaq -1(%rsi,%rbx), %rsi 3597 pxor %xmm15,%xmm15 3598 1: 3599 pslldq $1,%xmm15 3600 pinsrb $0,(%rsi),%xmm15 3601 leaq -1(%rsi),%rsi 3602 decq %rcx 3603 jne 1b 3604 3605 3606 pxor %xmm0,%xmm15 3607 3608 3609 movq %rbx,%rcx 3610 movdqu %xmm15,%xmm0 3611 2: 3612 pextrb $0,%xmm0,(%rdi) 3613 psrldq $1,%xmm0 3614 addq $1,%rdi 3615 subq $1,%rcx 3616 jnz 2b 3617 3618 3619 3620 3621 3622 3623 3624 3625 movq 288+32(%rsp),%r9 3626 movq 56(%r9),%r14 3627 movq 48(%r9),%r13 3628 testq %r14,%r14 3629 jz process_partial_block 3630 3631 movq $16,%r15 3632 subq %rbx,%r15 3633 cmpq %r15,%r14 3634 3635 jge load_extra_in 3636 movq %r14,%r15 3637 3638 load_extra_in: 3639 3640 3641 leaq -1(%r13,%r15), %rsi 3642 3643 3644 addq %r15,%r13 3645 subq %r15,%r14 3646 movq %r13,48(%r9) 3647 movq %r14,56(%r9) 3648 3649 3650 3651 addq %r15,%r8 3652 3653 3654 pxor %xmm11,%xmm11 3655 3: 3656 pslldq $1,%xmm11 3657 pinsrb $0,(%rsi),%xmm11 3658 leaq -1(%rsi),%rsi 3659 subq $1,%r15 3660 jnz 3b 3661 3662 3663 3664 3665 movq %rbx,%r15 3666 3667 4: 3668 pslldq $1,%xmm11 3669 subq $1,%r15 3670 jnz 4b 3671 3672 3673 3674 3675 leaq .and_masks(%rip),%r15 3676 shlq $4,%rbx 3677 pand -16(%r15,%rbx), %xmm15 3678 3679 3680 por %xmm11,%xmm15 3681 3682 3683 3684 .byte 102,77,15,126,253 3685 pextrq $1,%xmm15,%r14 3686 addq %r13,%r10 3687 adcq %r14,%r11 3688 adcq $1,%r12 3689 movq 0+0(%rbp),%rax 3690 movq %rax,%r15 3691 mulq %r10 3692 movq %rax,%r13 3693 movq %rdx,%r14 3694 movq 0+0(%rbp),%rax 3695 mulq %r11 3696 imulq %r12,%r15 3697 addq %rax,%r14 3698 adcq %rdx,%r15 3699 movq 8+0(%rbp),%rax 3700 movq %rax,%r9 3701 mulq %r10 3702 addq %rax,%r14 3703 adcq $0,%rdx 3704 movq %rdx,%r10 3705 movq 8+0(%rbp),%rax 3706 mulq %r11 3707 addq %rax,%r15 3708 adcq $0,%rdx 3709 imulq %r12,%r9 3710 addq %r10,%r15 3711 adcq %rdx,%r9 3712 movq %r13,%r10 3713 movq %r14,%r11 3714 movq %r15,%r12 3715 andq $3,%r12 3716 movq %r15,%r13 3717 andq $-4,%r13 3718 movq %r9,%r14 3719 shrdq $2,%r9,%r15 3720 shrq $2,%r9 3721 addq %r13,%r10 3722 adcq %r14,%r11 3723 adcq $0,%r12 3724 addq %r15,%r10 3725 adcq %r9,%r11 3726 adcq $0,%r12 3727 3728 3729 process_blocks_of_extra_in: 3730 3731 movq 288+32(%rsp),%r9 3732 movq 48(%r9),%rsi 3733 movq 56(%r9),%r8 3734 movq %r8,%rcx 3735 shrq $4,%r8 3736 3737 5: 3738 jz process_extra_in_trailer 3739 addq 0(%rsi),%r10 3740 adcq 8+0(%rsi),%r11 3741 adcq $1,%r12 3742 movq 0+0(%rbp),%rax 3743 movq %rax,%r15 3744 mulq %r10 3745 movq %rax,%r13 3746 movq %rdx,%r14 3747 movq 0+0(%rbp),%rax 3748 mulq %r11 3749 imulq %r12,%r15 3750 addq %rax,%r14 3751 adcq %rdx,%r15 3752 movq 8+0(%rbp),%rax 3753 movq %rax,%r9 3754 mulq %r10 3755 addq %rax,%r14 3756 adcq $0,%rdx 3757 movq %rdx,%r10 3758 movq 8+0(%rbp),%rax 3759 mulq %r11 3760 addq %rax,%r15 3761 adcq $0,%rdx 3762 imulq %r12,%r9 3763 addq %r10,%r15 3764 adcq %rdx,%r9 3765 movq %r13,%r10 3766 movq %r14,%r11 3767 movq %r15,%r12 3768 andq $3,%r12 3769 movq %r15,%r13 3770 andq $-4,%r13 3771 movq %r9,%r14 3772 shrdq $2,%r9,%r15 3773 shrq $2,%r9 3774 addq %r13,%r10 3775 adcq %r14,%r11 3776 adcq $0,%r12 3777 addq %r15,%r10 3778 adcq %r9,%r11 3779 adcq $0,%r12 3780 3781 leaq 16(%rsi),%rsi 3782 subq $1,%r8 3783 jmp 5b 3784 3785 process_extra_in_trailer: 3786 andq $15,%rcx 3787 movq %rcx,%rbx 3788 jz do_length_block 3789 leaq -1(%rsi,%rcx), %rsi 3790 3791 6: 3792 pslldq $1,%xmm15 3793 pinsrb $0,(%rsi),%xmm15 3794 leaq -1(%rsi),%rsi 3795 subq $1,%rcx 3796 jnz 6b 3797 3798 process_partial_block: 3799 3800 leaq .and_masks(%rip),%r15 3801 shlq $4,%rbx 3802 pand -16(%r15,%rbx), %xmm15 3803 .byte 102,77,15,126,253 3804 pextrq $1,%xmm15,%r14 3805 addq %r13,%r10 3806 adcq %r14,%r11 3807 adcq $1,%r12 3808 movq 0+0(%rbp),%rax 3809 movq %rax,%r15 3810 mulq %r10 3811 movq %rax,%r13 3812 movq %rdx,%r14 3813 movq 0+0(%rbp),%rax 3814 mulq %r11 3815 imulq %r12,%r15 3816 addq %rax,%r14 3817 adcq %rdx,%r15 3818 movq 8+0(%rbp),%rax 3819 movq %rax,%r9 3820 mulq %r10 3821 addq %rax,%r14 3822 adcq $0,%rdx 3823 movq %rdx,%r10 3824 movq 8+0(%rbp),%rax 3825 mulq %r11 3826 addq %rax,%r15 3827 adcq $0,%rdx 3828 imulq %r12,%r9 3829 addq %r10,%r15 3830 adcq %rdx,%r9 3831 movq %r13,%r10 3832 movq %r14,%r11 3833 movq %r15,%r12 3834 andq $3,%r12 3835 movq %r15,%r13 3836 andq $-4,%r13 3837 movq %r9,%r14 3838 shrdq $2,%r9,%r15 3839 shrq $2,%r9 3840 addq %r13,%r10 3841 adcq %r14,%r11 3842 adcq $0,%r12 3843 addq %r15,%r10 3844 adcq %r9,%r11 3845 adcq $0,%r12 3846 3847 3848 do_length_block: 3849 addq 32(%rbp),%r10 3850 adcq 8+32(%rbp),%r11 3851 adcq $1,%r12 3852 movq 0+0(%rbp),%rax 3853 movq %rax,%r15 3854 mulq %r10 3855 movq %rax,%r13 3856 movq %rdx,%r14 3857 movq 0+0(%rbp),%rax 3858 mulq %r11 3859 imulq %r12,%r15 3860 addq %rax,%r14 3861 adcq %rdx,%r15 3862 movq 8+0(%rbp),%rax 3863 movq %rax,%r9 3864 mulq %r10 3865 addq %rax,%r14 3866 adcq $0,%rdx 3867 movq %rdx,%r10 3868 movq 8+0(%rbp),%rax 3869 mulq %r11 3870 addq %rax,%r15 3871 adcq $0,%rdx 3872 imulq %r12,%r9 3873 addq %r10,%r15 3874 adcq %rdx,%r9 3875 movq %r13,%r10 3876 movq %r14,%r11 3877 movq %r15,%r12 3878 andq $3,%r12 3879 movq %r15,%r13 3880 andq $-4,%r13 3881 movq %r9,%r14 3882 shrdq $2,%r9,%r15 3883 shrq $2,%r9 3884 addq %r13,%r10 3885 adcq %r14,%r11 3886 adcq $0,%r12 3887 addq %r15,%r10 3888 adcq %r9,%r11 3889 adcq $0,%r12 3890 3891 3892 movq %r10,%r13 3893 movq %r11,%r14 3894 movq %r12,%r15 3895 subq $-5,%r10 3896 sbbq $-1,%r11 3897 sbbq $3,%r12 3898 cmovcq %r13,%r10 3899 cmovcq %r14,%r11 3900 cmovcq %r15,%r12 3901 3902 addq 0+16(%rbp),%r10 3903 adcq 8+16(%rbp),%r11 3904 3905 addq $288 + 32,%rsp 3906 3907 popq %r9 3908 3909 movq %r10,0(%r9) 3910 movq %r11,8(%r9) 3911 3912 popq %r15 3913 3914 popq %r14 3915 3916 popq %r13 3917 3918 popq %r12 3919 3920 popq %rbx 3921 3922 popq %rbp 3923 3924 .byte 0xf3,0xc3 3925 3926 3927 seal_sse_128: 3928 movdqu .chacha20_consts(%rip),%xmm0 3929 movdqa %xmm0,%xmm1 3930 movdqa %xmm0,%xmm2 3931 movdqu 0(%r9),%xmm4 3932 movdqa %xmm4,%xmm5 3933 movdqa %xmm4,%xmm6 3934 movdqu 16(%r9),%xmm8 3935 movdqa %xmm8,%xmm9 3936 movdqa %xmm8,%xmm10 3937 movdqu 32(%r9),%xmm14 3938 movdqa %xmm14,%xmm12 3939 paddd .sse_inc(%rip),%xmm12 3940 movdqa %xmm12,%xmm13 3941 paddd .sse_inc(%rip),%xmm13 3942 movdqa %xmm4,%xmm7 3943 movdqa %xmm8,%xmm11 3944 movdqa %xmm12,%xmm15 3945 movq $10,%r10 3946 1: 3947 paddd %xmm4,%xmm0 3948 pxor %xmm0,%xmm12 3949 pshufb .rol16(%rip),%xmm12 3950 paddd %xmm12,%xmm8 3951 pxor %xmm8,%xmm4 3952 movdqa %xmm4,%xmm3 3953 pslld $12,%xmm3 3954 psrld $20,%xmm4 3955 pxor %xmm3,%xmm4 3956 paddd %xmm4,%xmm0 3957 pxor %xmm0,%xmm12 3958 pshufb .rol8(%rip),%xmm12 3959 paddd %xmm12,%xmm8 3960 pxor %xmm8,%xmm4 3961 movdqa %xmm4,%xmm3 3962 pslld $7,%xmm3 3963 psrld $25,%xmm4 3964 pxor %xmm3,%xmm4 3965 .byte 102,15,58,15,228,4 3966 .byte 102,69,15,58,15,192,8 3967 .byte 102,69,15,58,15,228,12 3968 paddd %xmm5,%xmm1 3969 pxor %xmm1,%xmm13 3970 pshufb .rol16(%rip),%xmm13 3971 paddd %xmm13,%xmm9 3972 pxor %xmm9,%xmm5 3973 movdqa %xmm5,%xmm3 3974 pslld $12,%xmm3 3975 psrld $20,%xmm5 3976 pxor %xmm3,%xmm5 3977 paddd %xmm5,%xmm1 3978 pxor %xmm1,%xmm13 3979 pshufb .rol8(%rip),%xmm13 3980 paddd %xmm13,%xmm9 3981 pxor %xmm9,%xmm5 3982 movdqa %xmm5,%xmm3 3983 pslld $7,%xmm3 3984 psrld $25,%xmm5 3985 pxor %xmm3,%xmm5 3986 .byte 102,15,58,15,237,4 3987 .byte 102,69,15,58,15,201,8 3988 .byte 102,69,15,58,15,237,12 3989 paddd %xmm6,%xmm2 3990 pxor %xmm2,%xmm14 3991 pshufb .rol16(%rip),%xmm14 3992 paddd %xmm14,%xmm10 3993 pxor %xmm10,%xmm6 3994 movdqa %xmm6,%xmm3 3995 pslld $12,%xmm3 3996 psrld $20,%xmm6 3997 pxor %xmm3,%xmm6 3998 paddd %xmm6,%xmm2 3999 pxor %xmm2,%xmm14 4000 pshufb .rol8(%rip),%xmm14 4001 paddd %xmm14,%xmm10 4002 pxor %xmm10,%xmm6 4003 movdqa %xmm6,%xmm3 4004 pslld $7,%xmm3 4005 psrld $25,%xmm6 4006 pxor %xmm3,%xmm6 4007 .byte 102,15,58,15,246,4 4008 .byte 102,69,15,58,15,210,8 4009 .byte 102,69,15,58,15,246,12 4010 paddd %xmm4,%xmm0 4011 pxor %xmm0,%xmm12 4012 pshufb .rol16(%rip),%xmm12 4013 paddd %xmm12,%xmm8 4014 pxor %xmm8,%xmm4 4015 movdqa %xmm4,%xmm3 4016 pslld $12,%xmm3 4017 psrld $20,%xmm4 4018 pxor %xmm3,%xmm4 4019 paddd %xmm4,%xmm0 4020 pxor %xmm0,%xmm12 4021 pshufb .rol8(%rip),%xmm12 4022 paddd %xmm12,%xmm8 4023 pxor %xmm8,%xmm4 4024 movdqa %xmm4,%xmm3 4025 pslld $7,%xmm3 4026 psrld $25,%xmm4 4027 pxor %xmm3,%xmm4 4028 .byte 102,15,58,15,228,12 4029 .byte 102,69,15,58,15,192,8 4030 .byte 102,69,15,58,15,228,4 4031 paddd %xmm5,%xmm1 4032 pxor %xmm1,%xmm13 4033 pshufb .rol16(%rip),%xmm13 4034 paddd %xmm13,%xmm9 4035 pxor %xmm9,%xmm5 4036 movdqa %xmm5,%xmm3 4037 pslld $12,%xmm3 4038 psrld $20,%xmm5 4039 pxor %xmm3,%xmm5 4040 paddd %xmm5,%xmm1 4041 pxor %xmm1,%xmm13 4042 pshufb .rol8(%rip),%xmm13 4043 paddd %xmm13,%xmm9 4044 pxor %xmm9,%xmm5 4045 movdqa %xmm5,%xmm3 4046 pslld $7,%xmm3 4047 psrld $25,%xmm5 4048 pxor %xmm3,%xmm5 4049 .byte 102,15,58,15,237,12 4050 .byte 102,69,15,58,15,201,8 4051 .byte 102,69,15,58,15,237,4 4052 paddd %xmm6,%xmm2 4053 pxor %xmm2,%xmm14 4054 pshufb .rol16(%rip),%xmm14 4055 paddd %xmm14,%xmm10 4056 pxor %xmm10,%xmm6 4057 movdqa %xmm6,%xmm3 4058 pslld $12,%xmm3 4059 psrld $20,%xmm6 4060 pxor %xmm3,%xmm6 4061 paddd %xmm6,%xmm2 4062 pxor %xmm2,%xmm14 4063 pshufb .rol8(%rip),%xmm14 4064 paddd %xmm14,%xmm10 4065 pxor %xmm10,%xmm6 4066 movdqa %xmm6,%xmm3 4067 pslld $7,%xmm3 4068 psrld $25,%xmm6 4069 pxor %xmm3,%xmm6 4070 .byte 102,15,58,15,246,12 4071 .byte 102,69,15,58,15,210,8 4072 .byte 102,69,15,58,15,246,4 4073 4074 decq %r10 4075 jnz 1b 4076 paddd .chacha20_consts(%rip),%xmm0 4077 paddd .chacha20_consts(%rip),%xmm1 4078 paddd .chacha20_consts(%rip),%xmm2 4079 paddd %xmm7,%xmm4 4080 paddd %xmm7,%xmm5 4081 paddd %xmm7,%xmm6 4082 paddd %xmm11,%xmm8 4083 paddd %xmm11,%xmm9 4084 paddd %xmm15,%xmm12 4085 paddd .sse_inc(%rip),%xmm15 4086 paddd %xmm15,%xmm13 4087 4088 pand .clamp(%rip),%xmm2 4089 movdqa %xmm2,0(%rbp) 4090 movdqa %xmm6,16(%rbp) 4091 4092 movq %r8,%r8 4093 call poly_hash_ad_internal 4094 jmp seal_sse_128_seal 4095 4096 4097 4098 4099 .p2align 6 4100 chacha20_poly1305_open_avx2: 4101 vzeroupper 4102 vmovdqa .chacha20_consts(%rip),%ymm0 4103 vbroadcasti128 0(%r9),%ymm4 4104 vbroadcasti128 16(%r9),%ymm8 4105 vbroadcasti128 32(%r9),%ymm12 4106 vpaddd .avx2_init(%rip),%ymm12,%ymm12 4107 cmpq $192,%rbx 4108 jbe open_avx2_192 4109 cmpq $320,%rbx 4110 jbe open_avx2_320 4111 4112 vmovdqa %ymm4,64(%rbp) 4113 vmovdqa %ymm8,96(%rbp) 4114 vmovdqa %ymm12,160(%rbp) 4115 movq $10,%r10 4116 1: 4117 vpaddd %ymm4,%ymm0,%ymm0 4118 vpxor %ymm0,%ymm12,%ymm12 4119 vpshufb .rol16(%rip),%ymm12,%ymm12 4120 vpaddd %ymm12,%ymm8,%ymm8 4121 vpxor %ymm8,%ymm4,%ymm4 4122 vpsrld $20,%ymm4,%ymm3 4123 vpslld $12,%ymm4,%ymm4 4124 vpxor %ymm3,%ymm4,%ymm4 4125 vpaddd %ymm4,%ymm0,%ymm0 4126 vpxor %ymm0,%ymm12,%ymm12 4127 vpshufb .rol8(%rip),%ymm12,%ymm12 4128 vpaddd %ymm12,%ymm8,%ymm8 4129 vpxor %ymm8,%ymm4,%ymm4 4130 vpslld $7,%ymm4,%ymm3 4131 vpsrld $25,%ymm4,%ymm4 4132 vpxor %ymm3,%ymm4,%ymm4 4133 vpalignr $12,%ymm12,%ymm12,%ymm12 4134 vpalignr $8,%ymm8,%ymm8,%ymm8 4135 vpalignr $4,%ymm4,%ymm4,%ymm4 4136 vpaddd %ymm4,%ymm0,%ymm0 4137 vpxor %ymm0,%ymm12,%ymm12 4138 vpshufb .rol16(%rip),%ymm12,%ymm12 4139 vpaddd %ymm12,%ymm8,%ymm8 4140 vpxor %ymm8,%ymm4,%ymm4 4141 vpsrld $20,%ymm4,%ymm3 4142 vpslld $12,%ymm4,%ymm4 4143 vpxor %ymm3,%ymm4,%ymm4 4144 vpaddd %ymm4,%ymm0,%ymm0 4145 vpxor %ymm0,%ymm12,%ymm12 4146 vpshufb .rol8(%rip),%ymm12,%ymm12 4147 vpaddd %ymm12,%ymm8,%ymm8 4148 vpxor %ymm8,%ymm4,%ymm4 4149 vpslld $7,%ymm4,%ymm3 4150 vpsrld $25,%ymm4,%ymm4 4151 vpxor %ymm3,%ymm4,%ymm4 4152 vpalignr $4,%ymm12,%ymm12,%ymm12 4153 vpalignr $8,%ymm8,%ymm8,%ymm8 4154 vpalignr $12,%ymm4,%ymm4,%ymm4 4155 4156 decq %r10 4157 jne 1b 4158 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0 4159 vpaddd 64(%rbp),%ymm4,%ymm4 4160 vpaddd 96(%rbp),%ymm8,%ymm8 4161 vpaddd 160(%rbp),%ymm12,%ymm12 4162 4163 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 4164 4165 vpand .clamp(%rip),%ymm3,%ymm3 4166 vmovdqa %ymm3,0(%rbp) 4167 4168 vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 4169 vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 4170 4171 movq %r8,%r8 4172 call poly_hash_ad_internal 4173 xorq %rcx,%rcx 4174 4175 1: 4176 addq 0(%rsi,%rcx), %r10 4177 adcq 8+0(%rsi,%rcx), %r11 4178 adcq $1,%r12 4179 movq 0+0(%rbp),%rax 4180 movq %rax,%r15 4181 mulq %r10 4182 movq %rax,%r13 4183 movq %rdx,%r14 4184 movq 0+0(%rbp),%rax 4185 mulq %r11 4186 imulq %r12,%r15 4187 addq %rax,%r14 4188 adcq %rdx,%r15 4189 movq 8+0(%rbp),%rax 4190 movq %rax,%r9 4191 mulq %r10 4192 addq %rax,%r14 4193 adcq $0,%rdx 4194 movq %rdx,%r10 4195 movq 8+0(%rbp),%rax 4196 mulq %r11 4197 addq %rax,%r15 4198 adcq $0,%rdx 4199 imulq %r12,%r9 4200 addq %r10,%r15 4201 adcq %rdx,%r9 4202 movq %r13,%r10 4203 movq %r14,%r11 4204 movq %r15,%r12 4205 andq $3,%r12 4206 movq %r15,%r13 4207 andq $-4,%r13 4208 movq %r9,%r14 4209 shrdq $2,%r9,%r15 4210 shrq $2,%r9 4211 addq %r13,%r10 4212 adcq %r14,%r11 4213 adcq $0,%r12 4214 addq %r15,%r10 4215 adcq %r9,%r11 4216 adcq $0,%r12 4217 4218 addq $16,%rcx 4219 cmpq $64,%rcx 4220 jne 1b 4221 4222 vpxor 0(%rsi),%ymm0,%ymm0 4223 vpxor 32(%rsi),%ymm4,%ymm4 4224 vmovdqu %ymm0,0(%rdi) 4225 vmovdqu %ymm4,32(%rdi) 4226 leaq 64(%rsi),%rsi 4227 leaq 64(%rdi),%rdi 4228 subq $64,%rbx 4229 1: 4230 4231 cmpq $512,%rbx 4232 jb 3f 4233 vmovdqa .chacha20_consts(%rip),%ymm0 4234 vmovdqa 64(%rbp),%ymm4 4235 vmovdqa 96(%rbp),%ymm8 4236 vmovdqa %ymm0,%ymm1 4237 vmovdqa %ymm4,%ymm5 4238 vmovdqa %ymm8,%ymm9 4239 vmovdqa %ymm0,%ymm2 4240 vmovdqa %ymm4,%ymm6 4241 vmovdqa %ymm8,%ymm10 4242 vmovdqa %ymm0,%ymm3 4243 vmovdqa %ymm4,%ymm7 4244 vmovdqa %ymm8,%ymm11 4245 vmovdqa .avx2_inc(%rip),%ymm12 4246 vpaddd 160(%rbp),%ymm12,%ymm15 4247 vpaddd %ymm15,%ymm12,%ymm14 4248 vpaddd %ymm14,%ymm12,%ymm13 4249 vpaddd %ymm13,%ymm12,%ymm12 4250 vmovdqa %ymm15,256(%rbp) 4251 vmovdqa %ymm14,224(%rbp) 4252 vmovdqa %ymm13,192(%rbp) 4253 vmovdqa %ymm12,160(%rbp) 4254 4255 xorq %rcx,%rcx 4256 2: 4257 addq 0*8(%rsi,%rcx), %r10 4258 adcq 8+0*8(%rsi,%rcx), %r11 4259 adcq $1,%r12 4260 vmovdqa %ymm8,128(%rbp) 4261 vmovdqa .rol16(%rip),%ymm8 4262 vpaddd %ymm7,%ymm3,%ymm3 4263 vpaddd %ymm6,%ymm2,%ymm2 4264 vpaddd %ymm5,%ymm1,%ymm1 4265 vpaddd %ymm4,%ymm0,%ymm0 4266 vpxor %ymm3,%ymm15,%ymm15 4267 vpxor %ymm2,%ymm14,%ymm14 4268 vpxor %ymm1,%ymm13,%ymm13 4269 vpxor %ymm0,%ymm12,%ymm12 4270 movq 0+0(%rbp),%rdx 4271 movq %rdx,%r15 4272 mulxq %r10,%r13,%r14 4273 mulxq %r11,%rax,%rdx 4274 imulq %r12,%r15 4275 addq %rax,%r14 4276 adcq %rdx,%r15 4277 vpshufb %ymm8,%ymm15,%ymm15 4278 vpshufb %ymm8,%ymm14,%ymm14 4279 vpshufb %ymm8,%ymm13,%ymm13 4280 vpshufb %ymm8,%ymm12,%ymm12 4281 vmovdqa 128(%rbp),%ymm8 4282 vpaddd %ymm15,%ymm11,%ymm11 4283 vpaddd %ymm14,%ymm10,%ymm10 4284 vpaddd %ymm13,%ymm9,%ymm9 4285 vpaddd %ymm12,%ymm8,%ymm8 4286 movq 8+0(%rbp),%rdx 4287 mulxq %r10,%r10,%rax 4288 addq %r10,%r14 4289 mulxq %r11,%r11,%r9 4290 adcq %r11,%r15 4291 adcq $0,%r9 4292 imulq %r12,%rdx 4293 vpxor %ymm11,%ymm7,%ymm7 4294 vpxor %ymm10,%ymm6,%ymm6 4295 vpxor %ymm9,%ymm5,%ymm5 4296 vpxor %ymm8,%ymm4,%ymm4 4297 vmovdqa %ymm8,128(%rbp) 4298 vpsrld $20,%ymm7,%ymm8 4299 vpslld $32-20,%ymm7,%ymm7 4300 vpxor %ymm8,%ymm7,%ymm7 4301 vpsrld $20,%ymm6,%ymm8 4302 vpslld $32-20,%ymm6,%ymm6 4303 vpxor %ymm8,%ymm6,%ymm6 4304 vpsrld $20,%ymm5,%ymm8 4305 addq %rax,%r15 4306 adcq %rdx,%r9 4307 vpslld $32-20,%ymm5,%ymm5 4308 vpxor %ymm8,%ymm5,%ymm5 4309 vpsrld $20,%ymm4,%ymm8 4310 vpslld $32-20,%ymm4,%ymm4 4311 vpxor %ymm8,%ymm4,%ymm4 4312 vmovdqa .rol8(%rip),%ymm8 4313 vpaddd %ymm7,%ymm3,%ymm3 4314 vpaddd %ymm6,%ymm2,%ymm2 4315 vpaddd %ymm5,%ymm1,%ymm1 4316 vpaddd %ymm4,%ymm0,%ymm0 4317 movq %r13,%r10 4318 movq %r14,%r11 4319 movq %r15,%r12 4320 andq $3,%r12 4321 movq %r15,%r13 4322 andq $-4,%r13 4323 movq %r9,%r14 4324 shrdq $2,%r9,%r15 4325 shrq $2,%r9 4326 addq %r13,%r10 4327 adcq %r14,%r11 4328 adcq $0,%r12 4329 addq %r15,%r10 4330 adcq %r9,%r11 4331 adcq $0,%r12 4332 vpxor %ymm3,%ymm15,%ymm15 4333 vpxor %ymm2,%ymm14,%ymm14 4334 vpxor %ymm1,%ymm13,%ymm13 4335 vpxor %ymm0,%ymm12,%ymm12 4336 vpshufb %ymm8,%ymm15,%ymm15 4337 vpshufb %ymm8,%ymm14,%ymm14 4338 vpshufb %ymm8,%ymm13,%ymm13 4339 vpshufb %ymm8,%ymm12,%ymm12 4340 vmovdqa 128(%rbp),%ymm8 4341 addq 2*8(%rsi,%rcx), %r10 4342 adcq 8+2*8(%rsi,%rcx), %r11 4343 adcq $1,%r12 4344 vpaddd %ymm15,%ymm11,%ymm11 4345 vpaddd %ymm14,%ymm10,%ymm10 4346 vpaddd %ymm13,%ymm9,%ymm9 4347 vpaddd %ymm12,%ymm8,%ymm8 4348 vpxor %ymm11,%ymm7,%ymm7 4349 vpxor %ymm10,%ymm6,%ymm6 4350 vpxor %ymm9,%ymm5,%ymm5 4351 vpxor %ymm8,%ymm4,%ymm4 4352 movq 0+0(%rbp),%rdx 4353 movq %rdx,%r15 4354 mulxq %r10,%r13,%r14 4355 mulxq %r11,%rax,%rdx 4356 imulq %r12,%r15 4357 addq %rax,%r14 4358 adcq %rdx,%r15 4359 vmovdqa %ymm8,128(%rbp) 4360 vpsrld $25,%ymm7,%ymm8 4361 vpslld $32-25,%ymm7,%ymm7 4362 vpxor %ymm8,%ymm7,%ymm7 4363 vpsrld $25,%ymm6,%ymm8 4364 vpslld $32-25,%ymm6,%ymm6 4365 vpxor %ymm8,%ymm6,%ymm6 4366 vpsrld $25,%ymm5,%ymm8 4367 vpslld $32-25,%ymm5,%ymm5 4368 vpxor %ymm8,%ymm5,%ymm5 4369 vpsrld $25,%ymm4,%ymm8 4370 vpslld $32-25,%ymm4,%ymm4 4371 vpxor %ymm8,%ymm4,%ymm4 4372 vmovdqa 128(%rbp),%ymm8 4373 vpalignr $4,%ymm7,%ymm7,%ymm7 4374 vpalignr $8,%ymm11,%ymm11,%ymm11 4375 vpalignr $12,%ymm15,%ymm15,%ymm15 4376 vpalignr $4,%ymm6,%ymm6,%ymm6 4377 movq 8+0(%rbp),%rdx 4378 mulxq %r10,%r10,%rax 4379 addq %r10,%r14 4380 mulxq %r11,%r11,%r9 4381 adcq %r11,%r15 4382 adcq $0,%r9 4383 imulq %r12,%rdx 4384 vpalignr $8,%ymm10,%ymm10,%ymm10 4385 vpalignr $12,%ymm14,%ymm14,%ymm14 4386 vpalignr $4,%ymm5,%ymm5,%ymm5 4387 vpalignr $8,%ymm9,%ymm9,%ymm9 4388 vpalignr $12,%ymm13,%ymm13,%ymm13 4389 vpalignr $4,%ymm4,%ymm4,%ymm4 4390 vpalignr $8,%ymm8,%ymm8,%ymm8 4391 vpalignr $12,%ymm12,%ymm12,%ymm12 4392 vmovdqa %ymm8,128(%rbp) 4393 vmovdqa .rol16(%rip),%ymm8 4394 vpaddd %ymm7,%ymm3,%ymm3 4395 vpaddd %ymm6,%ymm2,%ymm2 4396 vpaddd %ymm5,%ymm1,%ymm1 4397 vpaddd %ymm4,%ymm0,%ymm0 4398 vpxor %ymm3,%ymm15,%ymm15 4399 vpxor %ymm2,%ymm14,%ymm14 4400 vpxor %ymm1,%ymm13,%ymm13 4401 vpxor %ymm0,%ymm12,%ymm12 4402 addq %rax,%r15 4403 adcq %rdx,%r9 4404 vpshufb %ymm8,%ymm15,%ymm15 4405 vpshufb %ymm8,%ymm14,%ymm14 4406 vpshufb %ymm8,%ymm13,%ymm13 4407 vpshufb %ymm8,%ymm12,%ymm12 4408 vmovdqa 128(%rbp),%ymm8 4409 vpaddd %ymm15,%ymm11,%ymm11 4410 vpaddd %ymm14,%ymm10,%ymm10 4411 vpaddd %ymm13,%ymm9,%ymm9 4412 vpaddd %ymm12,%ymm8,%ymm8 4413 movq %r13,%r10 4414 movq %r14,%r11 4415 movq %r15,%r12 4416 andq $3,%r12 4417 movq %r15,%r13 4418 andq $-4,%r13 4419 movq %r9,%r14 4420 shrdq $2,%r9,%r15 4421 shrq $2,%r9 4422 addq %r13,%r10 4423 adcq %r14,%r11 4424 adcq $0,%r12 4425 addq %r15,%r10 4426 adcq %r9,%r11 4427 adcq $0,%r12 4428 vpxor %ymm11,%ymm7,%ymm7 4429 vpxor %ymm10,%ymm6,%ymm6 4430 vpxor %ymm9,%ymm5,%ymm5 4431 vpxor %ymm8,%ymm4,%ymm4 4432 vmovdqa %ymm8,128(%rbp) 4433 vpsrld $20,%ymm7,%ymm8 4434 vpslld $32-20,%ymm7,%ymm7 4435 vpxor %ymm8,%ymm7,%ymm7 4436 addq 4*8(%rsi,%rcx), %r10 4437 adcq 8+4*8(%rsi,%rcx), %r11 4438 adcq $1,%r12 4439 4440 leaq 48(%rcx),%rcx 4441 vpsrld $20,%ymm6,%ymm8 4442 vpslld $32-20,%ymm6,%ymm6 4443 vpxor %ymm8,%ymm6,%ymm6 4444 vpsrld $20,%ymm5,%ymm8 4445 vpslld $32-20,%ymm5,%ymm5 4446 vpxor %ymm8,%ymm5,%ymm5 4447 vpsrld $20,%ymm4,%ymm8 4448 vpslld $32-20,%ymm4,%ymm4 4449 vpxor %ymm8,%ymm4,%ymm4 4450 vmovdqa .rol8(%rip),%ymm8 4451 vpaddd %ymm7,%ymm3,%ymm3 4452 vpaddd %ymm6,%ymm2,%ymm2 4453 vpaddd %ymm5,%ymm1,%ymm1 4454 vpaddd %ymm4,%ymm0,%ymm0 4455 vpxor %ymm3,%ymm15,%ymm15 4456 vpxor %ymm2,%ymm14,%ymm14 4457 vpxor %ymm1,%ymm13,%ymm13 4458 vpxor %ymm0,%ymm12,%ymm12 4459 movq 0+0(%rbp),%rdx 4460 movq %rdx,%r15 4461 mulxq %r10,%r13,%r14 4462 mulxq %r11,%rax,%rdx 4463 imulq %r12,%r15 4464 addq %rax,%r14 4465 adcq %rdx,%r15 4466 vpshufb %ymm8,%ymm15,%ymm15 4467 vpshufb %ymm8,%ymm14,%ymm14 4468 vpshufb %ymm8,%ymm13,%ymm13 4469 vpshufb %ymm8,%ymm12,%ymm12 4470 vmovdqa 128(%rbp),%ymm8 4471 vpaddd %ymm15,%ymm11,%ymm11 4472 vpaddd %ymm14,%ymm10,%ymm10 4473 vpaddd %ymm13,%ymm9,%ymm9 4474 movq 8+0(%rbp),%rdx 4475 mulxq %r10,%r10,%rax 4476 addq %r10,%r14 4477 mulxq %r11,%r11,%r9 4478 adcq %r11,%r15 4479 adcq $0,%r9 4480 imulq %r12,%rdx 4481 vpaddd %ymm12,%ymm8,%ymm8 4482 vpxor %ymm11,%ymm7,%ymm7 4483 vpxor %ymm10,%ymm6,%ymm6 4484 vpxor %ymm9,%ymm5,%ymm5 4485 vpxor %ymm8,%ymm4,%ymm4 4486 vmovdqa %ymm8,128(%rbp) 4487 vpsrld $25,%ymm7,%ymm8 4488 vpslld $32-25,%ymm7,%ymm7 4489 addq %rax,%r15 4490 adcq %rdx,%r9 4491 vpxor %ymm8,%ymm7,%ymm7 4492 vpsrld $25,%ymm6,%ymm8 4493 vpslld $32-25,%ymm6,%ymm6 4494 vpxor %ymm8,%ymm6,%ymm6 4495 vpsrld $25,%ymm5,%ymm8 4496 vpslld $32-25,%ymm5,%ymm5 4497 vpxor %ymm8,%ymm5,%ymm5 4498 vpsrld $25,%ymm4,%ymm8 4499 vpslld $32-25,%ymm4,%ymm4 4500 vpxor %ymm8,%ymm4,%ymm4 4501 vmovdqa 128(%rbp),%ymm8 4502 vpalignr $12,%ymm7,%ymm7,%ymm7 4503 vpalignr $8,%ymm11,%ymm11,%ymm11 4504 vpalignr $4,%ymm15,%ymm15,%ymm15 4505 vpalignr $12,%ymm6,%ymm6,%ymm6 4506 vpalignr $8,%ymm10,%ymm10,%ymm10 4507 vpalignr $4,%ymm14,%ymm14,%ymm14 4508 vpalignr $12,%ymm5,%ymm5,%ymm5 4509 movq %r13,%r10 4510 movq %r14,%r11 4511 movq %r15,%r12 4512 andq $3,%r12 4513 movq %r15,%r13 4514 andq $-4,%r13 4515 movq %r9,%r14 4516 shrdq $2,%r9,%r15 4517 shrq $2,%r9 4518 addq %r13,%r10 4519 adcq %r14,%r11 4520 adcq $0,%r12 4521 addq %r15,%r10 4522 adcq %r9,%r11 4523 adcq $0,%r12 4524 vpalignr $8,%ymm9,%ymm9,%ymm9 4525 vpalignr $4,%ymm13,%ymm13,%ymm13 4526 vpalignr $12,%ymm4,%ymm4,%ymm4 4527 vpalignr $8,%ymm8,%ymm8,%ymm8 4528 vpalignr $4,%ymm12,%ymm12,%ymm12 4529 4530 cmpq $60*8,%rcx 4531 jne 2b 4532 vpaddd .chacha20_consts(%rip),%ymm3,%ymm3 4533 vpaddd 64(%rbp),%ymm7,%ymm7 4534 vpaddd 96(%rbp),%ymm11,%ymm11 4535 vpaddd 256(%rbp),%ymm15,%ymm15 4536 vpaddd .chacha20_consts(%rip),%ymm2,%ymm2 4537 vpaddd 64(%rbp),%ymm6,%ymm6 4538 vpaddd 96(%rbp),%ymm10,%ymm10 4539 vpaddd 224(%rbp),%ymm14,%ymm14 4540 vpaddd .chacha20_consts(%rip),%ymm1,%ymm1 4541 vpaddd 64(%rbp),%ymm5,%ymm5 4542 vpaddd 96(%rbp),%ymm9,%ymm9 4543 vpaddd 192(%rbp),%ymm13,%ymm13 4544 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0 4545 vpaddd 64(%rbp),%ymm4,%ymm4 4546 vpaddd 96(%rbp),%ymm8,%ymm8 4547 vpaddd 160(%rbp),%ymm12,%ymm12 4548 4549 vmovdqa %ymm0,128(%rbp) 4550 addq 60*8(%rsi),%r10 4551 adcq 8+60*8(%rsi),%r11 4552 adcq $1,%r12 4553 vperm2i128 $0x02,%ymm3,%ymm7,%ymm0 4554 vperm2i128 $0x13,%ymm3,%ymm7,%ymm7 4555 vperm2i128 $0x02,%ymm11,%ymm15,%ymm3 4556 vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 4557 vpxor 0+0(%rsi),%ymm0,%ymm0 4558 vpxor 32+0(%rsi),%ymm3,%ymm3 4559 vpxor 64+0(%rsi),%ymm7,%ymm7 4560 vpxor 96+0(%rsi),%ymm11,%ymm11 4561 vmovdqu %ymm0,0+0(%rdi) 4562 vmovdqu %ymm3,32+0(%rdi) 4563 vmovdqu %ymm7,64+0(%rdi) 4564 vmovdqu %ymm11,96+0(%rdi) 4565 4566 vmovdqa 128(%rbp),%ymm0 4567 movq 0+0(%rbp),%rax 4568 movq %rax,%r15 4569 mulq %r10 4570 movq %rax,%r13 4571 movq %rdx,%r14 4572 movq 0+0(%rbp),%rax 4573 mulq %r11 4574 imulq %r12,%r15 4575 addq %rax,%r14 4576 adcq %rdx,%r15 4577 movq 8+0(%rbp),%rax 4578 movq %rax,%r9 4579 mulq %r10 4580 addq %rax,%r14 4581 adcq $0,%rdx 4582 movq %rdx,%r10 4583 movq 8+0(%rbp),%rax 4584 mulq %r11 4585 addq %rax,%r15 4586 adcq $0,%rdx 4587 imulq %r12,%r9 4588 addq %r10,%r15 4589 adcq %rdx,%r9 4590 movq %r13,%r10 4591 movq %r14,%r11 4592 movq %r15,%r12 4593 andq $3,%r12 4594 movq %r15,%r13 4595 andq $-4,%r13 4596 movq %r9,%r14 4597 shrdq $2,%r9,%r15 4598 shrq $2,%r9 4599 addq %r13,%r10 4600 adcq %r14,%r11 4601 adcq $0,%r12 4602 addq %r15,%r10 4603 adcq %r9,%r11 4604 adcq $0,%r12 4605 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 4606 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 4607 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 4608 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 4609 vpxor 0+128(%rsi),%ymm3,%ymm3 4610 vpxor 32+128(%rsi),%ymm2,%ymm2 4611 vpxor 64+128(%rsi),%ymm6,%ymm6 4612 vpxor 96+128(%rsi),%ymm10,%ymm10 4613 vmovdqu %ymm3,0+128(%rdi) 4614 vmovdqu %ymm2,32+128(%rdi) 4615 vmovdqu %ymm6,64+128(%rdi) 4616 vmovdqu %ymm10,96+128(%rdi) 4617 addq 60*8+16(%rsi),%r10 4618 adcq 8+60*8+16(%rsi),%r11 4619 adcq $1,%r12 4620 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 4621 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 4622 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 4623 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 4624 vpxor 0+256(%rsi),%ymm3,%ymm3 4625 vpxor 32+256(%rsi),%ymm1,%ymm1 4626 vpxor 64+256(%rsi),%ymm5,%ymm5 4627 vpxor 96+256(%rsi),%ymm9,%ymm9 4628 vmovdqu %ymm3,0+256(%rdi) 4629 vmovdqu %ymm1,32+256(%rdi) 4630 vmovdqu %ymm5,64+256(%rdi) 4631 vmovdqu %ymm9,96+256(%rdi) 4632 movq 0+0(%rbp),%rax 4633 movq %rax,%r15 4634 mulq %r10 4635 movq %rax,%r13 4636 movq %rdx,%r14 4637 movq 0+0(%rbp),%rax 4638 mulq %r11 4639 imulq %r12,%r15 4640 addq %rax,%r14 4641 adcq %rdx,%r15 4642 movq 8+0(%rbp),%rax 4643 movq %rax,%r9 4644 mulq %r10 4645 addq %rax,%r14 4646 adcq $0,%rdx 4647 movq %rdx,%r10 4648 movq 8+0(%rbp),%rax 4649 mulq %r11 4650 addq %rax,%r15 4651 adcq $0,%rdx 4652 imulq %r12,%r9 4653 addq %r10,%r15 4654 adcq %rdx,%r9 4655 movq %r13,%r10 4656 movq %r14,%r11 4657 movq %r15,%r12 4658 andq $3,%r12 4659 movq %r15,%r13 4660 andq $-4,%r13 4661 movq %r9,%r14 4662 shrdq $2,%r9,%r15 4663 shrq $2,%r9 4664 addq %r13,%r10 4665 adcq %r14,%r11 4666 adcq $0,%r12 4667 addq %r15,%r10 4668 adcq %r9,%r11 4669 adcq $0,%r12 4670 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 4671 vperm2i128 $0x13,%ymm0,%ymm4,%ymm4 4672 vperm2i128 $0x02,%ymm8,%ymm12,%ymm0 4673 vperm2i128 $0x13,%ymm8,%ymm12,%ymm8 4674 vpxor 0+384(%rsi),%ymm3,%ymm3 4675 vpxor 32+384(%rsi),%ymm0,%ymm0 4676 vpxor 64+384(%rsi),%ymm4,%ymm4 4677 vpxor 96+384(%rsi),%ymm8,%ymm8 4678 vmovdqu %ymm3,0+384(%rdi) 4679 vmovdqu %ymm0,32+384(%rdi) 4680 vmovdqu %ymm4,64+384(%rdi) 4681 vmovdqu %ymm8,96+384(%rdi) 4682 4683 leaq 512(%rsi),%rsi 4684 leaq 512(%rdi),%rdi 4685 subq $512,%rbx 4686 jmp 1b 4687 3: 4688 testq %rbx,%rbx 4689 vzeroupper 4690 je open_sse_finalize 4691 3: 4692 cmpq $128,%rbx 4693 ja 3f 4694 vmovdqa .chacha20_consts(%rip),%ymm0 4695 vmovdqa 64(%rbp),%ymm4 4696 vmovdqa 96(%rbp),%ymm8 4697 vmovdqa .avx2_inc(%rip),%ymm12 4698 vpaddd 160(%rbp),%ymm12,%ymm12 4699 vmovdqa %ymm12,160(%rbp) 4700 4701 xorq %r8,%r8 4702 movq %rbx,%rcx 4703 andq $-16,%rcx 4704 testq %rcx,%rcx 4705 je 2f 4706 1: 4707 addq 0*8(%rsi,%r8), %r10 4708 adcq 8+0*8(%rsi,%r8), %r11 4709 adcq $1,%r12 4710 movq 0+0(%rbp),%rax 4711 movq %rax,%r15 4712 mulq %r10 4713 movq %rax,%r13 4714 movq %rdx,%r14 4715 movq 0+0(%rbp),%rax 4716 mulq %r11 4717 imulq %r12,%r15 4718 addq %rax,%r14 4719 adcq %rdx,%r15 4720 movq 8+0(%rbp),%rax 4721 movq %rax,%r9 4722 mulq %r10 4723 addq %rax,%r14 4724 adcq $0,%rdx 4725 movq %rdx,%r10 4726 movq 8+0(%rbp),%rax 4727 mulq %r11 4728 addq %rax,%r15 4729 adcq $0,%rdx 4730 imulq %r12,%r9 4731 addq %r10,%r15 4732 adcq %rdx,%r9 4733 movq %r13,%r10 4734 movq %r14,%r11 4735 movq %r15,%r12 4736 andq $3,%r12 4737 movq %r15,%r13 4738 andq $-4,%r13 4739 movq %r9,%r14 4740 shrdq $2,%r9,%r15 4741 shrq $2,%r9 4742 addq %r13,%r10 4743 adcq %r14,%r11 4744 adcq $0,%r12 4745 addq %r15,%r10 4746 adcq %r9,%r11 4747 adcq $0,%r12 4748 4749 2: 4750 addq $16,%r8 4751 vpaddd %ymm4,%ymm0,%ymm0 4752 vpxor %ymm0,%ymm12,%ymm12 4753 vpshufb .rol16(%rip),%ymm12,%ymm12 4754 vpaddd %ymm12,%ymm8,%ymm8 4755 vpxor %ymm8,%ymm4,%ymm4 4756 vpsrld $20,%ymm4,%ymm3 4757 vpslld $12,%ymm4,%ymm4 4758 vpxor %ymm3,%ymm4,%ymm4 4759 vpaddd %ymm4,%ymm0,%ymm0 4760 vpxor %ymm0,%ymm12,%ymm12 4761 vpshufb .rol8(%rip),%ymm12,%ymm12 4762 vpaddd %ymm12,%ymm8,%ymm8 4763 vpxor %ymm8,%ymm4,%ymm4 4764 vpslld $7,%ymm4,%ymm3 4765 vpsrld $25,%ymm4,%ymm4 4766 vpxor %ymm3,%ymm4,%ymm4 4767 vpalignr $12,%ymm12,%ymm12,%ymm12 4768 vpalignr $8,%ymm8,%ymm8,%ymm8 4769 vpalignr $4,%ymm4,%ymm4,%ymm4 4770 vpaddd %ymm4,%ymm0,%ymm0 4771 vpxor %ymm0,%ymm12,%ymm12 4772 vpshufb .rol16(%rip),%ymm12,%ymm12 4773 vpaddd %ymm12,%ymm8,%ymm8 4774 vpxor %ymm8,%ymm4,%ymm4 4775 vpsrld $20,%ymm4,%ymm3 4776 vpslld $12,%ymm4,%ymm4 4777 vpxor %ymm3,%ymm4,%ymm4 4778 vpaddd %ymm4,%ymm0,%ymm0 4779 vpxor %ymm0,%ymm12,%ymm12 4780 vpshufb .rol8(%rip),%ymm12,%ymm12 4781 vpaddd %ymm12,%ymm8,%ymm8 4782 vpxor %ymm8,%ymm4,%ymm4 4783 vpslld $7,%ymm4,%ymm3 4784 vpsrld $25,%ymm4,%ymm4 4785 vpxor %ymm3,%ymm4,%ymm4 4786 vpalignr $4,%ymm12,%ymm12,%ymm12 4787 vpalignr $8,%ymm8,%ymm8,%ymm8 4788 vpalignr $12,%ymm4,%ymm4,%ymm4 4789 4790 cmpq %rcx,%r8 4791 jb 1b 4792 cmpq $160,%r8 4793 jne 2b 4794 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0 4795 vpaddd 64(%rbp),%ymm4,%ymm4 4796 vpaddd 96(%rbp),%ymm8,%ymm8 4797 vpaddd 160(%rbp),%ymm12,%ymm12 4798 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 4799 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 4800 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 4801 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 4802 vmovdqa %ymm3,%ymm8 4803 4804 jmp open_avx2_tail_loop 4805 3: 4806 cmpq $256,%rbx 4807 ja 3f 4808 vmovdqa .chacha20_consts(%rip),%ymm0 4809 vmovdqa 64(%rbp),%ymm4 4810 vmovdqa 96(%rbp),%ymm8 4811 vmovdqa %ymm0,%ymm1 4812 vmovdqa %ymm4,%ymm5 4813 vmovdqa %ymm8,%ymm9 4814 vmovdqa .avx2_inc(%rip),%ymm12 4815 vpaddd 160(%rbp),%ymm12,%ymm13 4816 vpaddd %ymm13,%ymm12,%ymm12 4817 vmovdqa %ymm12,160(%rbp) 4818 vmovdqa %ymm13,192(%rbp) 4819 4820 movq %rbx,128(%rbp) 4821 movq %rbx,%rcx 4822 subq $128,%rcx 4823 shrq $4,%rcx 4824 movq $10,%r8 4825 cmpq $10,%rcx 4826 cmovgq %r8,%rcx 4827 movq %rsi,%rbx 4828 xorq %r8,%r8 4829 1: 4830 addq 0(%rbx),%r10 4831 adcq 8+0(%rbx),%r11 4832 adcq $1,%r12 4833 movq 0+0(%rbp),%rdx 4834 movq %rdx,%r15 4835 mulxq %r10,%r13,%r14 4836 mulxq %r11,%rax,%rdx 4837 imulq %r12,%r15 4838 addq %rax,%r14 4839 adcq %rdx,%r15 4840 movq 8+0(%rbp),%rdx 4841 mulxq %r10,%r10,%rax 4842 addq %r10,%r14 4843 mulxq %r11,%r11,%r9 4844 adcq %r11,%r15 4845 adcq $0,%r9 4846 imulq %r12,%rdx 4847 addq %rax,%r15 4848 adcq %rdx,%r9 4849 movq %r13,%r10 4850 movq %r14,%r11 4851 movq %r15,%r12 4852 andq $3,%r12 4853 movq %r15,%r13 4854 andq $-4,%r13 4855 movq %r9,%r14 4856 shrdq $2,%r9,%r15 4857 shrq $2,%r9 4858 addq %r13,%r10 4859 adcq %r14,%r11 4860 adcq $0,%r12 4861 addq %r15,%r10 4862 adcq %r9,%r11 4863 adcq $0,%r12 4864 4865 leaq 16(%rbx),%rbx 4866 2: 4867 vpaddd %ymm4,%ymm0,%ymm0 4868 vpxor %ymm0,%ymm12,%ymm12 4869 vpshufb .rol16(%rip),%ymm12,%ymm12 4870 vpaddd %ymm12,%ymm8,%ymm8 4871 vpxor %ymm8,%ymm4,%ymm4 4872 vpsrld $20,%ymm4,%ymm3 4873 vpslld $12,%ymm4,%ymm4 4874 vpxor %ymm3,%ymm4,%ymm4 4875 vpaddd %ymm4,%ymm0,%ymm0 4876 vpxor %ymm0,%ymm12,%ymm12 4877 vpshufb .rol8(%rip),%ymm12,%ymm12 4878 vpaddd %ymm12,%ymm8,%ymm8 4879 vpxor %ymm8,%ymm4,%ymm4 4880 vpslld $7,%ymm4,%ymm3 4881 vpsrld $25,%ymm4,%ymm4 4882 vpxor %ymm3,%ymm4,%ymm4 4883 vpalignr $12,%ymm12,%ymm12,%ymm12 4884 vpalignr $8,%ymm8,%ymm8,%ymm8 4885 vpalignr $4,%ymm4,%ymm4,%ymm4 4886 vpaddd %ymm5,%ymm1,%ymm1 4887 vpxor %ymm1,%ymm13,%ymm13 4888 vpshufb .rol16(%rip),%ymm13,%ymm13 4889 vpaddd %ymm13,%ymm9,%ymm9 4890 vpxor %ymm9,%ymm5,%ymm5 4891 vpsrld $20,%ymm5,%ymm3 4892 vpslld $12,%ymm5,%ymm5 4893 vpxor %ymm3,%ymm5,%ymm5 4894 vpaddd %ymm5,%ymm1,%ymm1 4895 vpxor %ymm1,%ymm13,%ymm13 4896 vpshufb .rol8(%rip),%ymm13,%ymm13 4897 vpaddd %ymm13,%ymm9,%ymm9 4898 vpxor %ymm9,%ymm5,%ymm5 4899 vpslld $7,%ymm5,%ymm3 4900 vpsrld $25,%ymm5,%ymm5 4901 vpxor %ymm3,%ymm5,%ymm5 4902 vpalignr $12,%ymm13,%ymm13,%ymm13 4903 vpalignr $8,%ymm9,%ymm9,%ymm9 4904 vpalignr $4,%ymm5,%ymm5,%ymm5 4905 4906 incq %r8 4907 vpaddd %ymm4,%ymm0,%ymm0 4908 vpxor %ymm0,%ymm12,%ymm12 4909 vpshufb .rol16(%rip),%ymm12,%ymm12 4910 vpaddd %ymm12,%ymm8,%ymm8 4911 vpxor %ymm8,%ymm4,%ymm4 4912 vpsrld $20,%ymm4,%ymm3 4913 vpslld $12,%ymm4,%ymm4 4914 vpxor %ymm3,%ymm4,%ymm4 4915 vpaddd %ymm4,%ymm0,%ymm0 4916 vpxor %ymm0,%ymm12,%ymm12 4917 vpshufb .rol8(%rip),%ymm12,%ymm12 4918 vpaddd %ymm12,%ymm8,%ymm8 4919 vpxor %ymm8,%ymm4,%ymm4 4920 vpslld $7,%ymm4,%ymm3 4921 vpsrld $25,%ymm4,%ymm4 4922 vpxor %ymm3,%ymm4,%ymm4 4923 vpalignr $4,%ymm12,%ymm12,%ymm12 4924 vpalignr $8,%ymm8,%ymm8,%ymm8 4925 vpalignr $12,%ymm4,%ymm4,%ymm4 4926 vpaddd %ymm5,%ymm1,%ymm1 4927 vpxor %ymm1,%ymm13,%ymm13 4928 vpshufb .rol16(%rip),%ymm13,%ymm13 4929 vpaddd %ymm13,%ymm9,%ymm9 4930 vpxor %ymm9,%ymm5,%ymm5 4931 vpsrld $20,%ymm5,%ymm3 4932 vpslld $12,%ymm5,%ymm5 4933 vpxor %ymm3,%ymm5,%ymm5 4934 vpaddd %ymm5,%ymm1,%ymm1 4935 vpxor %ymm1,%ymm13,%ymm13 4936 vpshufb .rol8(%rip),%ymm13,%ymm13 4937 vpaddd %ymm13,%ymm9,%ymm9 4938 vpxor %ymm9,%ymm5,%ymm5 4939 vpslld $7,%ymm5,%ymm3 4940 vpsrld $25,%ymm5,%ymm5 4941 vpxor %ymm3,%ymm5,%ymm5 4942 vpalignr $4,%ymm13,%ymm13,%ymm13 4943 vpalignr $8,%ymm9,%ymm9,%ymm9 4944 vpalignr $12,%ymm5,%ymm5,%ymm5 4945 vpaddd %ymm6,%ymm2,%ymm2 4946 vpxor %ymm2,%ymm14,%ymm14 4947 vpshufb .rol16(%rip),%ymm14,%ymm14 4948 vpaddd %ymm14,%ymm10,%ymm10 4949 vpxor %ymm10,%ymm6,%ymm6 4950 vpsrld $20,%ymm6,%ymm3 4951 vpslld $12,%ymm6,%ymm6 4952 vpxor %ymm3,%ymm6,%ymm6 4953 vpaddd %ymm6,%ymm2,%ymm2 4954 vpxor %ymm2,%ymm14,%ymm14 4955 vpshufb .rol8(%rip),%ymm14,%ymm14 4956 vpaddd %ymm14,%ymm10,%ymm10 4957 vpxor %ymm10,%ymm6,%ymm6 4958 vpslld $7,%ymm6,%ymm3 4959 vpsrld $25,%ymm6,%ymm6 4960 vpxor %ymm3,%ymm6,%ymm6 4961 vpalignr $4,%ymm14,%ymm14,%ymm14 4962 vpalignr $8,%ymm10,%ymm10,%ymm10 4963 vpalignr $12,%ymm6,%ymm6,%ymm6 4964 4965 cmpq %rcx,%r8 4966 jb 1b 4967 cmpq $10,%r8 4968 jne 2b 4969 movq %rbx,%r8 4970 subq %rsi,%rbx 4971 movq %rbx,%rcx 4972 movq 128(%rbp),%rbx 4973 1: 4974 addq $16,%rcx 4975 cmpq %rbx,%rcx 4976 jg 1f 4977 addq 0(%r8),%r10 4978 adcq 8+0(%r8),%r11 4979 adcq $1,%r12 4980 movq 0+0(%rbp),%rdx 4981 movq %rdx,%r15 4982 mulxq %r10,%r13,%r14 4983 mulxq %r11,%rax,%rdx 4984 imulq %r12,%r15 4985 addq %rax,%r14 4986 adcq %rdx,%r15 4987 movq 8+0(%rbp),%rdx 4988 mulxq %r10,%r10,%rax 4989 addq %r10,%r14 4990 mulxq %r11,%r11,%r9 4991 adcq %r11,%r15 4992 adcq $0,%r9 4993 imulq %r12,%rdx 4994 addq %rax,%r15 4995 adcq %rdx,%r9 4996 movq %r13,%r10 4997 movq %r14,%r11 4998 movq %r15,%r12 4999 andq $3,%r12 5000 movq %r15,%r13 5001 andq $-4,%r13 5002 movq %r9,%r14 5003 shrdq $2,%r9,%r15 5004 shrq $2,%r9 5005 addq %r13,%r10 5006 adcq %r14,%r11 5007 adcq $0,%r12 5008 addq %r15,%r10 5009 adcq %r9,%r11 5010 adcq $0,%r12 5011 5012 leaq 16(%r8),%r8 5013 jmp 1b 5014 1: 5015 vpaddd .chacha20_consts(%rip),%ymm1,%ymm1 5016 vpaddd 64(%rbp),%ymm5,%ymm5 5017 vpaddd 96(%rbp),%ymm9,%ymm9 5018 vpaddd 192(%rbp),%ymm13,%ymm13 5019 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0 5020 vpaddd 64(%rbp),%ymm4,%ymm4 5021 vpaddd 96(%rbp),%ymm8,%ymm8 5022 vpaddd 160(%rbp),%ymm12,%ymm12 5023 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 5024 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 5025 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 5026 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 5027 vpxor 0+0(%rsi),%ymm3,%ymm3 5028 vpxor 32+0(%rsi),%ymm1,%ymm1 5029 vpxor 64+0(%rsi),%ymm5,%ymm5 5030 vpxor 96+0(%rsi),%ymm9,%ymm9 5031 vmovdqu %ymm3,0+0(%rdi) 5032 vmovdqu %ymm1,32+0(%rdi) 5033 vmovdqu %ymm5,64+0(%rdi) 5034 vmovdqu %ymm9,96+0(%rdi) 5035 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 5036 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 5037 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 5038 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 5039 vmovdqa %ymm3,%ymm8 5040 5041 leaq 128(%rsi),%rsi 5042 leaq 128(%rdi),%rdi 5043 subq $128,%rbx 5044 jmp open_avx2_tail_loop 5045 3: 5046 cmpq $384,%rbx 5047 ja 3f 5048 vmovdqa .chacha20_consts(%rip),%ymm0 5049 vmovdqa 64(%rbp),%ymm4 5050 vmovdqa 96(%rbp),%ymm8 5051 vmovdqa %ymm0,%ymm1 5052 vmovdqa %ymm4,%ymm5 5053 vmovdqa %ymm8,%ymm9 5054 vmovdqa %ymm0,%ymm2 5055 vmovdqa %ymm4,%ymm6 5056 vmovdqa %ymm8,%ymm10 5057 vmovdqa .avx2_inc(%rip),%ymm12 5058 vpaddd 160(%rbp),%ymm12,%ymm14 5059 vpaddd %ymm14,%ymm12,%ymm13 5060 vpaddd %ymm13,%ymm12,%ymm12 5061 vmovdqa %ymm12,160(%rbp) 5062 vmovdqa %ymm13,192(%rbp) 5063 vmovdqa %ymm14,224(%rbp) 5064 5065 movq %rbx,128(%rbp) 5066 movq %rbx,%rcx 5067 subq $256,%rcx 5068 shrq $4,%rcx 5069 addq $6,%rcx 5070 movq $10,%r8 5071 cmpq $10,%rcx 5072 cmovgq %r8,%rcx 5073 movq %rsi,%rbx 5074 xorq %r8,%r8 5075 1: 5076 addq 0(%rbx),%r10 5077 adcq 8+0(%rbx),%r11 5078 adcq $1,%r12 5079 movq 0+0(%rbp),%rdx 5080 movq %rdx,%r15 5081 mulxq %r10,%r13,%r14 5082 mulxq %r11,%rax,%rdx 5083 imulq %r12,%r15 5084 addq %rax,%r14 5085 adcq %rdx,%r15 5086 movq 8+0(%rbp),%rdx 5087 mulxq %r10,%r10,%rax 5088 addq %r10,%r14 5089 mulxq %r11,%r11,%r9 5090 adcq %r11,%r15 5091 adcq $0,%r9 5092 imulq %r12,%rdx 5093 addq %rax,%r15 5094 adcq %rdx,%r9 5095 movq %r13,%r10 5096 movq %r14,%r11 5097 movq %r15,%r12 5098 andq $3,%r12 5099 movq %r15,%r13 5100 andq $-4,%r13 5101 movq %r9,%r14 5102 shrdq $2,%r9,%r15 5103 shrq $2,%r9 5104 addq %r13,%r10 5105 adcq %r14,%r11 5106 adcq $0,%r12 5107 addq %r15,%r10 5108 adcq %r9,%r11 5109 adcq $0,%r12 5110 5111 leaq 16(%rbx),%rbx 5112 2: 5113 vpaddd %ymm6,%ymm2,%ymm2 5114 vpxor %ymm2,%ymm14,%ymm14 5115 vpshufb .rol16(%rip),%ymm14,%ymm14 5116 vpaddd %ymm14,%ymm10,%ymm10 5117 vpxor %ymm10,%ymm6,%ymm6 5118 vpsrld $20,%ymm6,%ymm3 5119 vpslld $12,%ymm6,%ymm6 5120 vpxor %ymm3,%ymm6,%ymm6 5121 vpaddd %ymm6,%ymm2,%ymm2 5122 vpxor %ymm2,%ymm14,%ymm14 5123 vpshufb .rol8(%rip),%ymm14,%ymm14 5124 vpaddd %ymm14,%ymm10,%ymm10 5125 vpxor %ymm10,%ymm6,%ymm6 5126 vpslld $7,%ymm6,%ymm3 5127 vpsrld $25,%ymm6,%ymm6 5128 vpxor %ymm3,%ymm6,%ymm6 5129 vpalignr $12,%ymm14,%ymm14,%ymm14 5130 vpalignr $8,%ymm10,%ymm10,%ymm10 5131 vpalignr $4,%ymm6,%ymm6,%ymm6 5132 vpaddd %ymm5,%ymm1,%ymm1 5133 vpxor %ymm1,%ymm13,%ymm13 5134 vpshufb .rol16(%rip),%ymm13,%ymm13 5135 vpaddd %ymm13,%ymm9,%ymm9 5136 vpxor %ymm9,%ymm5,%ymm5 5137 vpsrld $20,%ymm5,%ymm3 5138 vpslld $12,%ymm5,%ymm5 5139 vpxor %ymm3,%ymm5,%ymm5 5140 vpaddd %ymm5,%ymm1,%ymm1 5141 vpxor %ymm1,%ymm13,%ymm13 5142 vpshufb .rol8(%rip),%ymm13,%ymm13 5143 vpaddd %ymm13,%ymm9,%ymm9 5144 vpxor %ymm9,%ymm5,%ymm5 5145 vpslld $7,%ymm5,%ymm3 5146 vpsrld $25,%ymm5,%ymm5 5147 vpxor %ymm3,%ymm5,%ymm5 5148 vpalignr $12,%ymm13,%ymm13,%ymm13 5149 vpalignr $8,%ymm9,%ymm9,%ymm9 5150 vpalignr $4,%ymm5,%ymm5,%ymm5 5151 vpaddd %ymm4,%ymm0,%ymm0 5152 vpxor %ymm0,%ymm12,%ymm12 5153 vpshufb .rol16(%rip),%ymm12,%ymm12 5154 vpaddd %ymm12,%ymm8,%ymm8 5155 vpxor %ymm8,%ymm4,%ymm4 5156 vpsrld $20,%ymm4,%ymm3 5157 vpslld $12,%ymm4,%ymm4 5158 vpxor %ymm3,%ymm4,%ymm4 5159 vpaddd %ymm4,%ymm0,%ymm0 5160 vpxor %ymm0,%ymm12,%ymm12 5161 vpshufb .rol8(%rip),%ymm12,%ymm12 5162 vpaddd %ymm12,%ymm8,%ymm8 5163 vpxor %ymm8,%ymm4,%ymm4 5164 vpslld $7,%ymm4,%ymm3 5165 vpsrld $25,%ymm4,%ymm4 5166 vpxor %ymm3,%ymm4,%ymm4 5167 vpalignr $12,%ymm12,%ymm12,%ymm12 5168 vpalignr $8,%ymm8,%ymm8,%ymm8 5169 vpalignr $4,%ymm4,%ymm4,%ymm4 5170 addq 0(%rbx),%r10 5171 adcq 8+0(%rbx),%r11 5172 adcq $1,%r12 5173 movq 0+0(%rbp),%rax 5174 movq %rax,%r15 5175 mulq %r10 5176 movq %rax,%r13 5177 movq %rdx,%r14 5178 movq 0+0(%rbp),%rax 5179 mulq %r11 5180 imulq %r12,%r15 5181 addq %rax,%r14 5182 adcq %rdx,%r15 5183 movq 8+0(%rbp),%rax 5184 movq %rax,%r9 5185 mulq %r10 5186 addq %rax,%r14 5187 adcq $0,%rdx 5188 movq %rdx,%r10 5189 movq 8+0(%rbp),%rax 5190 mulq %r11 5191 addq %rax,%r15 5192 adcq $0,%rdx 5193 imulq %r12,%r9 5194 addq %r10,%r15 5195 adcq %rdx,%r9 5196 movq %r13,%r10 5197 movq %r14,%r11 5198 movq %r15,%r12 5199 andq $3,%r12 5200 movq %r15,%r13 5201 andq $-4,%r13 5202 movq %r9,%r14 5203 shrdq $2,%r9,%r15 5204 shrq $2,%r9 5205 addq %r13,%r10 5206 adcq %r14,%r11 5207 adcq $0,%r12 5208 addq %r15,%r10 5209 adcq %r9,%r11 5210 adcq $0,%r12 5211 5212 leaq 16(%rbx),%rbx 5213 incq %r8 5214 vpaddd %ymm6,%ymm2,%ymm2 5215 vpxor %ymm2,%ymm14,%ymm14 5216 vpshufb .rol16(%rip),%ymm14,%ymm14 5217 vpaddd %ymm14,%ymm10,%ymm10 5218 vpxor %ymm10,%ymm6,%ymm6 5219 vpsrld $20,%ymm6,%ymm3 5220 vpslld $12,%ymm6,%ymm6 5221 vpxor %ymm3,%ymm6,%ymm6 5222 vpaddd %ymm6,%ymm2,%ymm2 5223 vpxor %ymm2,%ymm14,%ymm14 5224 vpshufb .rol8(%rip),%ymm14,%ymm14 5225 vpaddd %ymm14,%ymm10,%ymm10 5226 vpxor %ymm10,%ymm6,%ymm6 5227 vpslld $7,%ymm6,%ymm3 5228 vpsrld $25,%ymm6,%ymm6 5229 vpxor %ymm3,%ymm6,%ymm6 5230 vpalignr $4,%ymm14,%ymm14,%ymm14 5231 vpalignr $8,%ymm10,%ymm10,%ymm10 5232 vpalignr $12,%ymm6,%ymm6,%ymm6 5233 vpaddd %ymm5,%ymm1,%ymm1 5234 vpxor %ymm1,%ymm13,%ymm13 5235 vpshufb .rol16(%rip),%ymm13,%ymm13 5236 vpaddd %ymm13,%ymm9,%ymm9 5237 vpxor %ymm9,%ymm5,%ymm5 5238 vpsrld $20,%ymm5,%ymm3 5239 vpslld $12,%ymm5,%ymm5 5240 vpxor %ymm3,%ymm5,%ymm5 5241 vpaddd %ymm5,%ymm1,%ymm1 5242 vpxor %ymm1,%ymm13,%ymm13 5243 vpshufb .rol8(%rip),%ymm13,%ymm13 5244 vpaddd %ymm13,%ymm9,%ymm9 5245 vpxor %ymm9,%ymm5,%ymm5 5246 vpslld $7,%ymm5,%ymm3 5247 vpsrld $25,%ymm5,%ymm5 5248 vpxor %ymm3,%ymm5,%ymm5 5249 vpalignr $4,%ymm13,%ymm13,%ymm13 5250 vpalignr $8,%ymm9,%ymm9,%ymm9 5251 vpalignr $12,%ymm5,%ymm5,%ymm5 5252 vpaddd %ymm4,%ymm0,%ymm0 5253 vpxor %ymm0,%ymm12,%ymm12 5254 vpshufb .rol16(%rip),%ymm12,%ymm12 5255 vpaddd %ymm12,%ymm8,%ymm8 5256 vpxor %ymm8,%ymm4,%ymm4 5257 vpsrld $20,%ymm4,%ymm3 5258 vpslld $12,%ymm4,%ymm4 5259 vpxor %ymm3,%ymm4,%ymm4 5260 vpaddd %ymm4,%ymm0,%ymm0 5261 vpxor %ymm0,%ymm12,%ymm12 5262 vpshufb .rol8(%rip),%ymm12,%ymm12 5263 vpaddd %ymm12,%ymm8,%ymm8 5264 vpxor %ymm8,%ymm4,%ymm4 5265 vpslld $7,%ymm4,%ymm3 5266 vpsrld $25,%ymm4,%ymm4 5267 vpxor %ymm3,%ymm4,%ymm4 5268 vpalignr $4,%ymm12,%ymm12,%ymm12 5269 vpalignr $8,%ymm8,%ymm8,%ymm8 5270 vpalignr $12,%ymm4,%ymm4,%ymm4 5271 5272 cmpq %rcx,%r8 5273 jb 1b 5274 cmpq $10,%r8 5275 jne 2b 5276 movq %rbx,%r8 5277 subq %rsi,%rbx 5278 movq %rbx,%rcx 5279 movq 128(%rbp),%rbx 5280 1: 5281 addq $16,%rcx 5282 cmpq %rbx,%rcx 5283 jg 1f 5284 addq 0(%r8),%r10 5285 adcq 8+0(%r8),%r11 5286 adcq $1,%r12 5287 movq 0+0(%rbp),%rdx 5288 movq %rdx,%r15 5289 mulxq %r10,%r13,%r14 5290 mulxq %r11,%rax,%rdx 5291 imulq %r12,%r15 5292 addq %rax,%r14 5293 adcq %rdx,%r15 5294 movq 8+0(%rbp),%rdx 5295 mulxq %r10,%r10,%rax 5296 addq %r10,%r14 5297 mulxq %r11,%r11,%r9 5298 adcq %r11,%r15 5299 adcq $0,%r9 5300 imulq %r12,%rdx 5301 addq %rax,%r15 5302 adcq %rdx,%r9 5303 movq %r13,%r10 5304 movq %r14,%r11 5305 movq %r15,%r12 5306 andq $3,%r12 5307 movq %r15,%r13 5308 andq $-4,%r13 5309 movq %r9,%r14 5310 shrdq $2,%r9,%r15 5311 shrq $2,%r9 5312 addq %r13,%r10 5313 adcq %r14,%r11 5314 adcq $0,%r12 5315 addq %r15,%r10 5316 adcq %r9,%r11 5317 adcq $0,%r12 5318 5319 leaq 16(%r8),%r8 5320 jmp 1b 5321 1: 5322 vpaddd .chacha20_consts(%rip),%ymm2,%ymm2 5323 vpaddd 64(%rbp),%ymm6,%ymm6 5324 vpaddd 96(%rbp),%ymm10,%ymm10 5325 vpaddd 224(%rbp),%ymm14,%ymm14 5326 vpaddd .chacha20_consts(%rip),%ymm1,%ymm1 5327 vpaddd 64(%rbp),%ymm5,%ymm5 5328 vpaddd 96(%rbp),%ymm9,%ymm9 5329 vpaddd 192(%rbp),%ymm13,%ymm13 5330 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0 5331 vpaddd 64(%rbp),%ymm4,%ymm4 5332 vpaddd 96(%rbp),%ymm8,%ymm8 5333 vpaddd 160(%rbp),%ymm12,%ymm12 5334 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 5335 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 5336 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 5337 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 5338 vpxor 0+0(%rsi),%ymm3,%ymm3 5339 vpxor 32+0(%rsi),%ymm2,%ymm2 5340 vpxor 64+0(%rsi),%ymm6,%ymm6 5341 vpxor 96+0(%rsi),%ymm10,%ymm10 5342 vmovdqu %ymm3,0+0(%rdi) 5343 vmovdqu %ymm2,32+0(%rdi) 5344 vmovdqu %ymm6,64+0(%rdi) 5345 vmovdqu %ymm10,96+0(%rdi) 5346 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 5347 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 5348 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 5349 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 5350 vpxor 0+128(%rsi),%ymm3,%ymm3 5351 vpxor 32+128(%rsi),%ymm1,%ymm1 5352 vpxor 64+128(%rsi),%ymm5,%ymm5 5353 vpxor 96+128(%rsi),%ymm9,%ymm9 5354 vmovdqu %ymm3,0+128(%rdi) 5355 vmovdqu %ymm1,32+128(%rdi) 5356 vmovdqu %ymm5,64+128(%rdi) 5357 vmovdqu %ymm9,96+128(%rdi) 5358 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 5359 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 5360 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 5361 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 5362 vmovdqa %ymm3,%ymm8 5363 5364 leaq 256(%rsi),%rsi 5365 leaq 256(%rdi),%rdi 5366 subq $256,%rbx 5367 jmp open_avx2_tail_loop 5368 3: 5369 vmovdqa .chacha20_consts(%rip),%ymm0 5370 vmovdqa 64(%rbp),%ymm4 5371 vmovdqa 96(%rbp),%ymm8 5372 vmovdqa %ymm0,%ymm1 5373 vmovdqa %ymm4,%ymm5 5374 vmovdqa %ymm8,%ymm9 5375 vmovdqa %ymm0,%ymm2 5376 vmovdqa %ymm4,%ymm6 5377 vmovdqa %ymm8,%ymm10 5378 vmovdqa %ymm0,%ymm3 5379 vmovdqa %ymm4,%ymm7 5380 vmovdqa %ymm8,%ymm11 5381 vmovdqa .avx2_inc(%rip),%ymm12 5382 vpaddd 160(%rbp),%ymm12,%ymm15 5383 vpaddd %ymm15,%ymm12,%ymm14 5384 vpaddd %ymm14,%ymm12,%ymm13 5385 vpaddd %ymm13,%ymm12,%ymm12 5386 vmovdqa %ymm15,256(%rbp) 5387 vmovdqa %ymm14,224(%rbp) 5388 vmovdqa %ymm13,192(%rbp) 5389 vmovdqa %ymm12,160(%rbp) 5390 5391 xorq %rcx,%rcx 5392 movq %rsi,%r8 5393 1: 5394 addq 0(%r8),%r10 5395 adcq 8+0(%r8),%r11 5396 adcq $1,%r12 5397 movq 0+0(%rbp),%rax 5398 movq %rax,%r15 5399 mulq %r10 5400 movq %rax,%r13 5401 movq %rdx,%r14 5402 movq 0+0(%rbp),%rax 5403 mulq %r11 5404 imulq %r12,%r15 5405 addq %rax,%r14 5406 adcq %rdx,%r15 5407 movq 8+0(%rbp),%rax 5408 movq %rax,%r9 5409 mulq %r10 5410 addq %rax,%r14 5411 adcq $0,%rdx 5412 movq %rdx,%r10 5413 movq 8+0(%rbp),%rax 5414 mulq %r11 5415 addq %rax,%r15 5416 adcq $0,%rdx 5417 imulq %r12,%r9 5418 addq %r10,%r15 5419 adcq %rdx,%r9 5420 movq %r13,%r10 5421 movq %r14,%r11 5422 movq %r15,%r12 5423 andq $3,%r12 5424 movq %r15,%r13 5425 andq $-4,%r13 5426 movq %r9,%r14 5427 shrdq $2,%r9,%r15 5428 shrq $2,%r9 5429 addq %r13,%r10 5430 adcq %r14,%r11 5431 adcq $0,%r12 5432 addq %r15,%r10 5433 adcq %r9,%r11 5434 adcq $0,%r12 5435 5436 leaq 16(%r8),%r8 5437 2: 5438 vmovdqa %ymm8,128(%rbp) 5439 vmovdqa .rol16(%rip),%ymm8 5440 vpaddd %ymm7,%ymm3,%ymm3 5441 vpaddd %ymm6,%ymm2,%ymm2 5442 vpaddd %ymm5,%ymm1,%ymm1 5443 vpaddd %ymm4,%ymm0,%ymm0 5444 vpxor %ymm3,%ymm15,%ymm15 5445 vpxor %ymm2,%ymm14,%ymm14 5446 vpxor %ymm1,%ymm13,%ymm13 5447 vpxor %ymm0,%ymm12,%ymm12 5448 vpshufb %ymm8,%ymm15,%ymm15 5449 vpshufb %ymm8,%ymm14,%ymm14 5450 vpshufb %ymm8,%ymm13,%ymm13 5451 vpshufb %ymm8,%ymm12,%ymm12 5452 vmovdqa 128(%rbp),%ymm8 5453 vpaddd %ymm15,%ymm11,%ymm11 5454 vpaddd %ymm14,%ymm10,%ymm10 5455 vpaddd %ymm13,%ymm9,%ymm9 5456 vpaddd %ymm12,%ymm8,%ymm8 5457 vpxor %ymm11,%ymm7,%ymm7 5458 vpxor %ymm10,%ymm6,%ymm6 5459 vpxor %ymm9,%ymm5,%ymm5 5460 vpxor %ymm8,%ymm4,%ymm4 5461 vmovdqa %ymm8,128(%rbp) 5462 vpsrld $20,%ymm7,%ymm8 5463 vpslld $32-20,%ymm7,%ymm7 5464 vpxor %ymm8,%ymm7,%ymm7 5465 vpsrld $20,%ymm6,%ymm8 5466 vpslld $32-20,%ymm6,%ymm6 5467 vpxor %ymm8,%ymm6,%ymm6 5468 vpsrld $20,%ymm5,%ymm8 5469 vpslld $32-20,%ymm5,%ymm5 5470 vpxor %ymm8,%ymm5,%ymm5 5471 vpsrld $20,%ymm4,%ymm8 5472 vpslld $32-20,%ymm4,%ymm4 5473 vpxor %ymm8,%ymm4,%ymm4 5474 vmovdqa .rol8(%rip),%ymm8 5475 addq 0(%r8),%r10 5476 adcq 8+0(%r8),%r11 5477 adcq $1,%r12 5478 movq 0+0(%rbp),%rdx 5479 movq %rdx,%r15 5480 mulxq %r10,%r13,%r14 5481 mulxq %r11,%rax,%rdx 5482 imulq %r12,%r15 5483 addq %rax,%r14 5484 adcq %rdx,%r15 5485 movq 8+0(%rbp),%rdx 5486 mulxq %r10,%r10,%rax 5487 addq %r10,%r14 5488 mulxq %r11,%r11,%r9 5489 adcq %r11,%r15 5490 adcq $0,%r9 5491 imulq %r12,%rdx 5492 addq %rax,%r15 5493 adcq %rdx,%r9 5494 movq %r13,%r10 5495 movq %r14,%r11 5496 movq %r15,%r12 5497 andq $3,%r12 5498 movq %r15,%r13 5499 andq $-4,%r13 5500 movq %r9,%r14 5501 shrdq $2,%r9,%r15 5502 shrq $2,%r9 5503 addq %r13,%r10 5504 adcq %r14,%r11 5505 adcq $0,%r12 5506 addq %r15,%r10 5507 adcq %r9,%r11 5508 adcq $0,%r12 5509 vpaddd %ymm7,%ymm3,%ymm3 5510 vpaddd %ymm6,%ymm2,%ymm2 5511 vpaddd %ymm5,%ymm1,%ymm1 5512 vpaddd %ymm4,%ymm0,%ymm0 5513 vpxor %ymm3,%ymm15,%ymm15 5514 vpxor %ymm2,%ymm14,%ymm14 5515 vpxor %ymm1,%ymm13,%ymm13 5516 vpxor %ymm0,%ymm12,%ymm12 5517 vpshufb %ymm8,%ymm15,%ymm15 5518 vpshufb %ymm8,%ymm14,%ymm14 5519 vpshufb %ymm8,%ymm13,%ymm13 5520 vpshufb %ymm8,%ymm12,%ymm12 5521 vmovdqa 128(%rbp),%ymm8 5522 vpaddd %ymm15,%ymm11,%ymm11 5523 vpaddd %ymm14,%ymm10,%ymm10 5524 vpaddd %ymm13,%ymm9,%ymm9 5525 vpaddd %ymm12,%ymm8,%ymm8 5526 vpxor %ymm11,%ymm7,%ymm7 5527 vpxor %ymm10,%ymm6,%ymm6 5528 vpxor %ymm9,%ymm5,%ymm5 5529 vpxor %ymm8,%ymm4,%ymm4 5530 vmovdqa %ymm8,128(%rbp) 5531 vpsrld $25,%ymm7,%ymm8 5532 vpslld $32-25,%ymm7,%ymm7 5533 vpxor %ymm8,%ymm7,%ymm7 5534 vpsrld $25,%ymm6,%ymm8 5535 vpslld $32-25,%ymm6,%ymm6 5536 vpxor %ymm8,%ymm6,%ymm6 5537 vpsrld $25,%ymm5,%ymm8 5538 vpslld $32-25,%ymm5,%ymm5 5539 vpxor %ymm8,%ymm5,%ymm5 5540 vpsrld $25,%ymm4,%ymm8 5541 vpslld $32-25,%ymm4,%ymm4 5542 vpxor %ymm8,%ymm4,%ymm4 5543 vmovdqa 128(%rbp),%ymm8 5544 vpalignr $4,%ymm7,%ymm7,%ymm7 5545 vpalignr $8,%ymm11,%ymm11,%ymm11 5546 vpalignr $12,%ymm15,%ymm15,%ymm15 5547 vpalignr $4,%ymm6,%ymm6,%ymm6 5548 vpalignr $8,%ymm10,%ymm10,%ymm10 5549 vpalignr $12,%ymm14,%ymm14,%ymm14 5550 vpalignr $4,%ymm5,%ymm5,%ymm5 5551 vpalignr $8,%ymm9,%ymm9,%ymm9 5552 vpalignr $12,%ymm13,%ymm13,%ymm13 5553 vpalignr $4,%ymm4,%ymm4,%ymm4 5554 vpalignr $8,%ymm8,%ymm8,%ymm8 5555 vpalignr $12,%ymm12,%ymm12,%ymm12 5556 vmovdqa %ymm8,128(%rbp) 5557 addq 16(%r8),%r10 5558 adcq 8+16(%r8),%r11 5559 adcq $1,%r12 5560 movq 0+0(%rbp),%rdx 5561 movq %rdx,%r15 5562 mulxq %r10,%r13,%r14 5563 mulxq %r11,%rax,%rdx 5564 imulq %r12,%r15 5565 addq %rax,%r14 5566 adcq %rdx,%r15 5567 movq 8+0(%rbp),%rdx 5568 mulxq %r10,%r10,%rax 5569 addq %r10,%r14 5570 mulxq %r11,%r11,%r9 5571 adcq %r11,%r15 5572 adcq $0,%r9 5573 imulq %r12,%rdx 5574 addq %rax,%r15 5575 adcq %rdx,%r9 5576 movq %r13,%r10 5577 movq %r14,%r11 5578 movq %r15,%r12 5579 andq $3,%r12 5580 movq %r15,%r13 5581 andq $-4,%r13 5582 movq %r9,%r14 5583 shrdq $2,%r9,%r15 5584 shrq $2,%r9 5585 addq %r13,%r10 5586 adcq %r14,%r11 5587 adcq $0,%r12 5588 addq %r15,%r10 5589 adcq %r9,%r11 5590 adcq $0,%r12 5591 5592 leaq 32(%r8),%r8 5593 vmovdqa .rol16(%rip),%ymm8 5594 vpaddd %ymm7,%ymm3,%ymm3 5595 vpaddd %ymm6,%ymm2,%ymm2 5596 vpaddd %ymm5,%ymm1,%ymm1 5597 vpaddd %ymm4,%ymm0,%ymm0 5598 vpxor %ymm3,%ymm15,%ymm15 5599 vpxor %ymm2,%ymm14,%ymm14 5600 vpxor %ymm1,%ymm13,%ymm13 5601 vpxor %ymm0,%ymm12,%ymm12 5602 vpshufb %ymm8,%ymm15,%ymm15 5603 vpshufb %ymm8,%ymm14,%ymm14 5604 vpshufb %ymm8,%ymm13,%ymm13 5605 vpshufb %ymm8,%ymm12,%ymm12 5606 vmovdqa 128(%rbp),%ymm8 5607 vpaddd %ymm15,%ymm11,%ymm11 5608 vpaddd %ymm14,%ymm10,%ymm10 5609 vpaddd %ymm13,%ymm9,%ymm9 5610 vpaddd %ymm12,%ymm8,%ymm8 5611 vpxor %ymm11,%ymm7,%ymm7 5612 vpxor %ymm10,%ymm6,%ymm6 5613 vpxor %ymm9,%ymm5,%ymm5 5614 vpxor %ymm8,%ymm4,%ymm4 5615 vmovdqa %ymm8,128(%rbp) 5616 vpsrld $20,%ymm7,%ymm8 5617 vpslld $32-20,%ymm7,%ymm7 5618 vpxor %ymm8,%ymm7,%ymm7 5619 vpsrld $20,%ymm6,%ymm8 5620 vpslld $32-20,%ymm6,%ymm6 5621 vpxor %ymm8,%ymm6,%ymm6 5622 vpsrld $20,%ymm5,%ymm8 5623 vpslld $32-20,%ymm5,%ymm5 5624 vpxor %ymm8,%ymm5,%ymm5 5625 vpsrld $20,%ymm4,%ymm8 5626 vpslld $32-20,%ymm4,%ymm4 5627 vpxor %ymm8,%ymm4,%ymm4 5628 vmovdqa .rol8(%rip),%ymm8 5629 vpaddd %ymm7,%ymm3,%ymm3 5630 vpaddd %ymm6,%ymm2,%ymm2 5631 vpaddd %ymm5,%ymm1,%ymm1 5632 vpaddd %ymm4,%ymm0,%ymm0 5633 vpxor %ymm3,%ymm15,%ymm15 5634 vpxor %ymm2,%ymm14,%ymm14 5635 vpxor %ymm1,%ymm13,%ymm13 5636 vpxor %ymm0,%ymm12,%ymm12 5637 vpshufb %ymm8,%ymm15,%ymm15 5638 vpshufb %ymm8,%ymm14,%ymm14 5639 vpshufb %ymm8,%ymm13,%ymm13 5640 vpshufb %ymm8,%ymm12,%ymm12 5641 vmovdqa 128(%rbp),%ymm8 5642 vpaddd %ymm15,%ymm11,%ymm11 5643 vpaddd %ymm14,%ymm10,%ymm10 5644 vpaddd %ymm13,%ymm9,%ymm9 5645 vpaddd %ymm12,%ymm8,%ymm8 5646 vpxor %ymm11,%ymm7,%ymm7 5647 vpxor %ymm10,%ymm6,%ymm6 5648 vpxor %ymm9,%ymm5,%ymm5 5649 vpxor %ymm8,%ymm4,%ymm4 5650 vmovdqa %ymm8,128(%rbp) 5651 vpsrld $25,%ymm7,%ymm8 5652 vpslld $32-25,%ymm7,%ymm7 5653 vpxor %ymm8,%ymm7,%ymm7 5654 vpsrld $25,%ymm6,%ymm8 5655 vpslld $32-25,%ymm6,%ymm6 5656 vpxor %ymm8,%ymm6,%ymm6 5657 vpsrld $25,%ymm5,%ymm8 5658 vpslld $32-25,%ymm5,%ymm5 5659 vpxor %ymm8,%ymm5,%ymm5 5660 vpsrld $25,%ymm4,%ymm8 5661 vpslld $32-25,%ymm4,%ymm4 5662 vpxor %ymm8,%ymm4,%ymm4 5663 vmovdqa 128(%rbp),%ymm8 5664 vpalignr $12,%ymm7,%ymm7,%ymm7 5665 vpalignr $8,%ymm11,%ymm11,%ymm11 5666 vpalignr $4,%ymm15,%ymm15,%ymm15 5667 vpalignr $12,%ymm6,%ymm6,%ymm6 5668 vpalignr $8,%ymm10,%ymm10,%ymm10 5669 vpalignr $4,%ymm14,%ymm14,%ymm14 5670 vpalignr $12,%ymm5,%ymm5,%ymm5 5671 vpalignr $8,%ymm9,%ymm9,%ymm9 5672 vpalignr $4,%ymm13,%ymm13,%ymm13 5673 vpalignr $12,%ymm4,%ymm4,%ymm4 5674 vpalignr $8,%ymm8,%ymm8,%ymm8 5675 vpalignr $4,%ymm12,%ymm12,%ymm12 5676 5677 incq %rcx 5678 cmpq $4,%rcx 5679 jl 1b 5680 cmpq $10,%rcx 5681 jne 2b 5682 movq %rbx,%rcx 5683 subq $384,%rcx 5684 andq $-16,%rcx 5685 1: 5686 testq %rcx,%rcx 5687 je 1f 5688 addq 0(%r8),%r10 5689 adcq 8+0(%r8),%r11 5690 adcq $1,%r12 5691 movq 0+0(%rbp),%rdx 5692 movq %rdx,%r15 5693 mulxq %r10,%r13,%r14 5694 mulxq %r11,%rax,%rdx 5695 imulq %r12,%r15 5696 addq %rax,%r14 5697 adcq %rdx,%r15 5698 movq 8+0(%rbp),%rdx 5699 mulxq %r10,%r10,%rax 5700 addq %r10,%r14 5701 mulxq %r11,%r11,%r9 5702 adcq %r11,%r15 5703 adcq $0,%r9 5704 imulq %r12,%rdx 5705 addq %rax,%r15 5706 adcq %rdx,%r9 5707 movq %r13,%r10 5708 movq %r14,%r11 5709 movq %r15,%r12 5710 andq $3,%r12 5711 movq %r15,%r13 5712 andq $-4,%r13 5713 movq %r9,%r14 5714 shrdq $2,%r9,%r15 5715 shrq $2,%r9 5716 addq %r13,%r10 5717 adcq %r14,%r11 5718 adcq $0,%r12 5719 addq %r15,%r10 5720 adcq %r9,%r11 5721 adcq $0,%r12 5722 5723 leaq 16(%r8),%r8 5724 subq $16,%rcx 5725 jmp 1b 5726 1: 5727 vpaddd .chacha20_consts(%rip),%ymm3,%ymm3 5728 vpaddd 64(%rbp),%ymm7,%ymm7 5729 vpaddd 96(%rbp),%ymm11,%ymm11 5730 vpaddd 256(%rbp),%ymm15,%ymm15 5731 vpaddd .chacha20_consts(%rip),%ymm2,%ymm2 5732 vpaddd 64(%rbp),%ymm6,%ymm6 5733 vpaddd 96(%rbp),%ymm10,%ymm10 5734 vpaddd 224(%rbp),%ymm14,%ymm14 5735 vpaddd .chacha20_consts(%rip),%ymm1,%ymm1 5736 vpaddd 64(%rbp),%ymm5,%ymm5 5737 vpaddd 96(%rbp),%ymm9,%ymm9 5738 vpaddd 192(%rbp),%ymm13,%ymm13 5739 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0 5740 vpaddd 64(%rbp),%ymm4,%ymm4 5741 vpaddd 96(%rbp),%ymm8,%ymm8 5742 vpaddd 160(%rbp),%ymm12,%ymm12 5743 5744 vmovdqa %ymm0,128(%rbp) 5745 vperm2i128 $0x02,%ymm3,%ymm7,%ymm0 5746 vperm2i128 $0x13,%ymm3,%ymm7,%ymm7 5747 vperm2i128 $0x02,%ymm11,%ymm15,%ymm3 5748 vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 5749 vpxor 0+0(%rsi),%ymm0,%ymm0 5750 vpxor 32+0(%rsi),%ymm3,%ymm3 5751 vpxor 64+0(%rsi),%ymm7,%ymm7 5752 vpxor 96+0(%rsi),%ymm11,%ymm11 5753 vmovdqu %ymm0,0+0(%rdi) 5754 vmovdqu %ymm3,32+0(%rdi) 5755 vmovdqu %ymm7,64+0(%rdi) 5756 vmovdqu %ymm11,96+0(%rdi) 5757 5758 vmovdqa 128(%rbp),%ymm0 5759 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 5760 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 5761 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 5762 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 5763 vpxor 0+128(%rsi),%ymm3,%ymm3 5764 vpxor 32+128(%rsi),%ymm2,%ymm2 5765 vpxor 64+128(%rsi),%ymm6,%ymm6 5766 vpxor 96+128(%rsi),%ymm10,%ymm10 5767 vmovdqu %ymm3,0+128(%rdi) 5768 vmovdqu %ymm2,32+128(%rdi) 5769 vmovdqu %ymm6,64+128(%rdi) 5770 vmovdqu %ymm10,96+128(%rdi) 5771 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 5772 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 5773 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 5774 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 5775 vpxor 0+256(%rsi),%ymm3,%ymm3 5776 vpxor 32+256(%rsi),%ymm1,%ymm1 5777 vpxor 64+256(%rsi),%ymm5,%ymm5 5778 vpxor 96+256(%rsi),%ymm9,%ymm9 5779 vmovdqu %ymm3,0+256(%rdi) 5780 vmovdqu %ymm1,32+256(%rdi) 5781 vmovdqu %ymm5,64+256(%rdi) 5782 vmovdqu %ymm9,96+256(%rdi) 5783 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 5784 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 5785 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 5786 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 5787 vmovdqa %ymm3,%ymm8 5788 5789 leaq 384(%rsi),%rsi 5790 leaq 384(%rdi),%rdi 5791 subq $384,%rbx 5792 open_avx2_tail_loop: 5793 cmpq $32,%rbx 5794 jb open_avx2_tail 5795 subq $32,%rbx 5796 vpxor (%rsi),%ymm0,%ymm0 5797 vmovdqu %ymm0,(%rdi) 5798 leaq 32(%rsi),%rsi 5799 leaq 32(%rdi),%rdi 5800 vmovdqa %ymm4,%ymm0 5801 vmovdqa %ymm8,%ymm4 5802 vmovdqa %ymm12,%ymm8 5803 jmp open_avx2_tail_loop 5804 open_avx2_tail: 5805 cmpq $16,%rbx 5806 vmovdqa %xmm0,%xmm1 5807 jb 1f 5808 subq $16,%rbx 5809 5810 vpxor (%rsi),%xmm0,%xmm1 5811 vmovdqu %xmm1,(%rdi) 5812 leaq 16(%rsi),%rsi 5813 leaq 16(%rdi),%rdi 5814 vperm2i128 $0x11,%ymm0,%ymm0,%ymm0 5815 vmovdqa %xmm0,%xmm1 5816 1: 5817 vzeroupper 5818 jmp open_sse_tail_16 5819 5820 open_avx2_192: 5821 vmovdqa %ymm0,%ymm1 5822 vmovdqa %ymm0,%ymm2 5823 vmovdqa %ymm4,%ymm5 5824 vmovdqa %ymm4,%ymm6 5825 vmovdqa %ymm8,%ymm9 5826 vmovdqa %ymm8,%ymm10 5827 vpaddd .avx2_inc(%rip),%ymm12,%ymm13 5828 vmovdqa %ymm12,%ymm11 5829 vmovdqa %ymm13,%ymm15 5830 movq $10,%r10 5831 1: 5832 vpaddd %ymm4,%ymm0,%ymm0 5833 vpxor %ymm0,%ymm12,%ymm12 5834 vpshufb .rol16(%rip),%ymm12,%ymm12 5835 vpaddd %ymm12,%ymm8,%ymm8 5836 vpxor %ymm8,%ymm4,%ymm4 5837 vpsrld $20,%ymm4,%ymm3 5838 vpslld $12,%ymm4,%ymm4 5839 vpxor %ymm3,%ymm4,%ymm4 5840 vpaddd %ymm4,%ymm0,%ymm0 5841 vpxor %ymm0,%ymm12,%ymm12 5842 vpshufb .rol8(%rip),%ymm12,%ymm12 5843 vpaddd %ymm12,%ymm8,%ymm8 5844 vpxor %ymm8,%ymm4,%ymm4 5845 vpslld $7,%ymm4,%ymm3 5846 vpsrld $25,%ymm4,%ymm4 5847 vpxor %ymm3,%ymm4,%ymm4 5848 vpalignr $12,%ymm12,%ymm12,%ymm12 5849 vpalignr $8,%ymm8,%ymm8,%ymm8 5850 vpalignr $4,%ymm4,%ymm4,%ymm4 5851 vpaddd %ymm5,%ymm1,%ymm1 5852 vpxor %ymm1,%ymm13,%ymm13 5853 vpshufb .rol16(%rip),%ymm13,%ymm13 5854 vpaddd %ymm13,%ymm9,%ymm9 5855 vpxor %ymm9,%ymm5,%ymm5 5856 vpsrld $20,%ymm5,%ymm3 5857 vpslld $12,%ymm5,%ymm5 5858 vpxor %ymm3,%ymm5,%ymm5 5859 vpaddd %ymm5,%ymm1,%ymm1 5860 vpxor %ymm1,%ymm13,%ymm13 5861 vpshufb .rol8(%rip),%ymm13,%ymm13 5862 vpaddd %ymm13,%ymm9,%ymm9 5863 vpxor %ymm9,%ymm5,%ymm5 5864 vpslld $7,%ymm5,%ymm3 5865 vpsrld $25,%ymm5,%ymm5 5866 vpxor %ymm3,%ymm5,%ymm5 5867 vpalignr $12,%ymm13,%ymm13,%ymm13 5868 vpalignr $8,%ymm9,%ymm9,%ymm9 5869 vpalignr $4,%ymm5,%ymm5,%ymm5 5870 vpaddd %ymm4,%ymm0,%ymm0 5871 vpxor %ymm0,%ymm12,%ymm12 5872 vpshufb .rol16(%rip),%ymm12,%ymm12 5873 vpaddd %ymm12,%ymm8,%ymm8 5874 vpxor %ymm8,%ymm4,%ymm4 5875 vpsrld $20,%ymm4,%ymm3 5876 vpslld $12,%ymm4,%ymm4 5877 vpxor %ymm3,%ymm4,%ymm4 5878 vpaddd %ymm4,%ymm0,%ymm0 5879 vpxor %ymm0,%ymm12,%ymm12 5880 vpshufb .rol8(%rip),%ymm12,%ymm12 5881 vpaddd %ymm12,%ymm8,%ymm8 5882 vpxor %ymm8,%ymm4,%ymm4 5883 vpslld $7,%ymm4,%ymm3 5884 vpsrld $25,%ymm4,%ymm4 5885 vpxor %ymm3,%ymm4,%ymm4 5886 vpalignr $4,%ymm12,%ymm12,%ymm12 5887 vpalignr $8,%ymm8,%ymm8,%ymm8 5888 vpalignr $12,%ymm4,%ymm4,%ymm4 5889 vpaddd %ymm5,%ymm1,%ymm1 5890 vpxor %ymm1,%ymm13,%ymm13 5891 vpshufb .rol16(%rip),%ymm13,%ymm13 5892 vpaddd %ymm13,%ymm9,%ymm9 5893 vpxor %ymm9,%ymm5,%ymm5 5894 vpsrld $20,%ymm5,%ymm3 5895 vpslld $12,%ymm5,%ymm5 5896 vpxor %ymm3,%ymm5,%ymm5 5897 vpaddd %ymm5,%ymm1,%ymm1 5898 vpxor %ymm1,%ymm13,%ymm13 5899 vpshufb .rol8(%rip),%ymm13,%ymm13 5900 vpaddd %ymm13,%ymm9,%ymm9 5901 vpxor %ymm9,%ymm5,%ymm5 5902 vpslld $7,%ymm5,%ymm3 5903 vpsrld $25,%ymm5,%ymm5 5904 vpxor %ymm3,%ymm5,%ymm5 5905 vpalignr $4,%ymm13,%ymm13,%ymm13 5906 vpalignr $8,%ymm9,%ymm9,%ymm9 5907 vpalignr $12,%ymm5,%ymm5,%ymm5 5908 5909 decq %r10 5910 jne 1b 5911 vpaddd %ymm2,%ymm0,%ymm0 5912 vpaddd %ymm2,%ymm1,%ymm1 5913 vpaddd %ymm6,%ymm4,%ymm4 5914 vpaddd %ymm6,%ymm5,%ymm5 5915 vpaddd %ymm10,%ymm8,%ymm8 5916 vpaddd %ymm10,%ymm9,%ymm9 5917 vpaddd %ymm11,%ymm12,%ymm12 5918 vpaddd %ymm15,%ymm13,%ymm13 5919 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 5920 5921 vpand .clamp(%rip),%ymm3,%ymm3 5922 vmovdqa %ymm3,0(%rbp) 5923 5924 vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 5925 vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 5926 vperm2i128 $0x02,%ymm1,%ymm5,%ymm8 5927 vperm2i128 $0x02,%ymm9,%ymm13,%ymm12 5928 vperm2i128 $0x13,%ymm1,%ymm5,%ymm1 5929 vperm2i128 $0x13,%ymm9,%ymm13,%ymm5 5930 open_avx2_short: 5931 movq %r8,%r8 5932 call poly_hash_ad_internal 5933 open_avx2_hash_and_xor_loop: 5934 cmpq $32,%rbx 5935 jb open_avx2_short_tail_32 5936 subq $32,%rbx 5937 addq 0(%rsi),%r10 5938 adcq 8+0(%rsi),%r11 5939 adcq $1,%r12 5940 movq 0+0(%rbp),%rax 5941 movq %rax,%r15 5942 mulq %r10 5943 movq %rax,%r13 5944 movq %rdx,%r14 5945 movq 0+0(%rbp),%rax 5946 mulq %r11 5947 imulq %r12,%r15 5948 addq %rax,%r14 5949 adcq %rdx,%r15 5950 movq 8+0(%rbp),%rax 5951 movq %rax,%r9 5952 mulq %r10 5953 addq %rax,%r14 5954 adcq $0,%rdx 5955 movq %rdx,%r10 5956 movq 8+0(%rbp),%rax 5957 mulq %r11 5958 addq %rax,%r15 5959 adcq $0,%rdx 5960 imulq %r12,%r9 5961 addq %r10,%r15 5962 adcq %rdx,%r9 5963 movq %r13,%r10 5964 movq %r14,%r11 5965 movq %r15,%r12 5966 andq $3,%r12 5967 movq %r15,%r13 5968 andq $-4,%r13 5969 movq %r9,%r14 5970 shrdq $2,%r9,%r15 5971 shrq $2,%r9 5972 addq %r13,%r10 5973 adcq %r14,%r11 5974 adcq $0,%r12 5975 addq %r15,%r10 5976 adcq %r9,%r11 5977 adcq $0,%r12 5978 addq 16(%rsi),%r10 5979 adcq 8+16(%rsi),%r11 5980 adcq $1,%r12 5981 movq 0+0(%rbp),%rax 5982 movq %rax,%r15 5983 mulq %r10 5984 movq %rax,%r13 5985 movq %rdx,%r14 5986 movq 0+0(%rbp),%rax 5987 mulq %r11 5988 imulq %r12,%r15 5989 addq %rax,%r14 5990 adcq %rdx,%r15 5991 movq 8+0(%rbp),%rax 5992 movq %rax,%r9 5993 mulq %r10 5994 addq %rax,%r14 5995 adcq $0,%rdx 5996 movq %rdx,%r10 5997 movq 8+0(%rbp),%rax 5998 mulq %r11 5999 addq %rax,%r15 6000 adcq $0,%rdx 6001 imulq %r12,%r9 6002 addq %r10,%r15 6003 adcq %rdx,%r9 6004 movq %r13,%r10 6005 movq %r14,%r11 6006 movq %r15,%r12 6007 andq $3,%r12 6008 movq %r15,%r13 6009 andq $-4,%r13 6010 movq %r9,%r14 6011 shrdq $2,%r9,%r15 6012 shrq $2,%r9 6013 addq %r13,%r10 6014 adcq %r14,%r11 6015 adcq $0,%r12 6016 addq %r15,%r10 6017 adcq %r9,%r11 6018 adcq $0,%r12 6019 6020 6021 vpxor (%rsi),%ymm0,%ymm0 6022 vmovdqu %ymm0,(%rdi) 6023 leaq 32(%rsi),%rsi 6024 leaq 32(%rdi),%rdi 6025 6026 vmovdqa %ymm4,%ymm0 6027 vmovdqa %ymm8,%ymm4 6028 vmovdqa %ymm12,%ymm8 6029 vmovdqa %ymm1,%ymm12 6030 vmovdqa %ymm5,%ymm1 6031 vmovdqa %ymm9,%ymm5 6032 vmovdqa %ymm13,%ymm9 6033 vmovdqa %ymm2,%ymm13 6034 vmovdqa %ymm6,%ymm2 6035 jmp open_avx2_hash_and_xor_loop 6036 open_avx2_short_tail_32: 6037 cmpq $16,%rbx 6038 vmovdqa %xmm0,%xmm1 6039 jb 1f 6040 subq $16,%rbx 6041 addq 0(%rsi),%r10 6042 adcq 8+0(%rsi),%r11 6043 adcq $1,%r12 6044 movq 0+0(%rbp),%rax 6045 movq %rax,%r15 6046 mulq %r10 6047 movq %rax,%r13 6048 movq %rdx,%r14 6049 movq 0+0(%rbp),%rax 6050 mulq %r11 6051 imulq %r12,%r15 6052 addq %rax,%r14 6053 adcq %rdx,%r15 6054 movq 8+0(%rbp),%rax 6055 movq %rax,%r9 6056 mulq %r10 6057 addq %rax,%r14 6058 adcq $0,%rdx 6059 movq %rdx,%r10 6060 movq 8+0(%rbp),%rax 6061 mulq %r11 6062 addq %rax,%r15 6063 adcq $0,%rdx 6064 imulq %r12,%r9 6065 addq %r10,%r15 6066 adcq %rdx,%r9 6067 movq %r13,%r10 6068 movq %r14,%r11 6069 movq %r15,%r12 6070 andq $3,%r12 6071 movq %r15,%r13 6072 andq $-4,%r13 6073 movq %r9,%r14 6074 shrdq $2,%r9,%r15 6075 shrq $2,%r9 6076 addq %r13,%r10 6077 adcq %r14,%r11 6078 adcq $0,%r12 6079 addq %r15,%r10 6080 adcq %r9,%r11 6081 adcq $0,%r12 6082 6083 vpxor (%rsi),%xmm0,%xmm3 6084 vmovdqu %xmm3,(%rdi) 6085 leaq 16(%rsi),%rsi 6086 leaq 16(%rdi),%rdi 6087 vextracti128 $1,%ymm0,%xmm1 6088 1: 6089 vzeroupper 6090 jmp open_sse_tail_16 6091 6092 open_avx2_320: 6093 vmovdqa %ymm0,%ymm1 6094 vmovdqa %ymm0,%ymm2 6095 vmovdqa %ymm4,%ymm5 6096 vmovdqa %ymm4,%ymm6 6097 vmovdqa %ymm8,%ymm9 6098 vmovdqa %ymm8,%ymm10 6099 vpaddd .avx2_inc(%rip),%ymm12,%ymm13 6100 vpaddd .avx2_inc(%rip),%ymm13,%ymm14 6101 vmovdqa %ymm4,%ymm7 6102 vmovdqa %ymm8,%ymm11 6103 vmovdqa %ymm12,160(%rbp) 6104 vmovdqa %ymm13,192(%rbp) 6105 vmovdqa %ymm14,224(%rbp) 6106 movq $10,%r10 6107 1: 6108 vpaddd %ymm4,%ymm0,%ymm0 6109 vpxor %ymm0,%ymm12,%ymm12 6110 vpshufb .rol16(%rip),%ymm12,%ymm12 6111 vpaddd %ymm12,%ymm8,%ymm8 6112 vpxor %ymm8,%ymm4,%ymm4 6113 vpsrld $20,%ymm4,%ymm3 6114 vpslld $12,%ymm4,%ymm4 6115 vpxor %ymm3,%ymm4,%ymm4 6116 vpaddd %ymm4,%ymm0,%ymm0 6117 vpxor %ymm0,%ymm12,%ymm12 6118 vpshufb .rol8(%rip),%ymm12,%ymm12 6119 vpaddd %ymm12,%ymm8,%ymm8 6120 vpxor %ymm8,%ymm4,%ymm4 6121 vpslld $7,%ymm4,%ymm3 6122 vpsrld $25,%ymm4,%ymm4 6123 vpxor %ymm3,%ymm4,%ymm4 6124 vpalignr $12,%ymm12,%ymm12,%ymm12 6125 vpalignr $8,%ymm8,%ymm8,%ymm8 6126 vpalignr $4,%ymm4,%ymm4,%ymm4 6127 vpaddd %ymm5,%ymm1,%ymm1 6128 vpxor %ymm1,%ymm13,%ymm13 6129 vpshufb .rol16(%rip),%ymm13,%ymm13 6130 vpaddd %ymm13,%ymm9,%ymm9 6131 vpxor %ymm9,%ymm5,%ymm5 6132 vpsrld $20,%ymm5,%ymm3 6133 vpslld $12,%ymm5,%ymm5 6134 vpxor %ymm3,%ymm5,%ymm5 6135 vpaddd %ymm5,%ymm1,%ymm1 6136 vpxor %ymm1,%ymm13,%ymm13 6137 vpshufb .rol8(%rip),%ymm13,%ymm13 6138 vpaddd %ymm13,%ymm9,%ymm9 6139 vpxor %ymm9,%ymm5,%ymm5 6140 vpslld $7,%ymm5,%ymm3 6141 vpsrld $25,%ymm5,%ymm5 6142 vpxor %ymm3,%ymm5,%ymm5 6143 vpalignr $12,%ymm13,%ymm13,%ymm13 6144 vpalignr $8,%ymm9,%ymm9,%ymm9 6145 vpalignr $4,%ymm5,%ymm5,%ymm5 6146 vpaddd %ymm6,%ymm2,%ymm2 6147 vpxor %ymm2,%ymm14,%ymm14 6148 vpshufb .rol16(%rip),%ymm14,%ymm14 6149 vpaddd %ymm14,%ymm10,%ymm10 6150 vpxor %ymm10,%ymm6,%ymm6 6151 vpsrld $20,%ymm6,%ymm3 6152 vpslld $12,%ymm6,%ymm6 6153 vpxor %ymm3,%ymm6,%ymm6 6154 vpaddd %ymm6,%ymm2,%ymm2 6155 vpxor %ymm2,%ymm14,%ymm14 6156 vpshufb .rol8(%rip),%ymm14,%ymm14 6157 vpaddd %ymm14,%ymm10,%ymm10 6158 vpxor %ymm10,%ymm6,%ymm6 6159 vpslld $7,%ymm6,%ymm3 6160 vpsrld $25,%ymm6,%ymm6 6161 vpxor %ymm3,%ymm6,%ymm6 6162 vpalignr $12,%ymm14,%ymm14,%ymm14 6163 vpalignr $8,%ymm10,%ymm10,%ymm10 6164 vpalignr $4,%ymm6,%ymm6,%ymm6 6165 vpaddd %ymm4,%ymm0,%ymm0 6166 vpxor %ymm0,%ymm12,%ymm12 6167 vpshufb .rol16(%rip),%ymm12,%ymm12 6168 vpaddd %ymm12,%ymm8,%ymm8 6169 vpxor %ymm8,%ymm4,%ymm4 6170 vpsrld $20,%ymm4,%ymm3 6171 vpslld $12,%ymm4,%ymm4 6172 vpxor %ymm3,%ymm4,%ymm4 6173 vpaddd %ymm4,%ymm0,%ymm0 6174 vpxor %ymm0,%ymm12,%ymm12 6175 vpshufb .rol8(%rip),%ymm12,%ymm12 6176 vpaddd %ymm12,%ymm8,%ymm8 6177 vpxor %ymm8,%ymm4,%ymm4 6178 vpslld $7,%ymm4,%ymm3 6179 vpsrld $25,%ymm4,%ymm4 6180 vpxor %ymm3,%ymm4,%ymm4 6181 vpalignr $4,%ymm12,%ymm12,%ymm12 6182 vpalignr $8,%ymm8,%ymm8,%ymm8 6183 vpalignr $12,%ymm4,%ymm4,%ymm4 6184 vpaddd %ymm5,%ymm1,%ymm1 6185 vpxor %ymm1,%ymm13,%ymm13 6186 vpshufb .rol16(%rip),%ymm13,%ymm13 6187 vpaddd %ymm13,%ymm9,%ymm9 6188 vpxor %ymm9,%ymm5,%ymm5 6189 vpsrld $20,%ymm5,%ymm3 6190 vpslld $12,%ymm5,%ymm5 6191 vpxor %ymm3,%ymm5,%ymm5 6192 vpaddd %ymm5,%ymm1,%ymm1 6193 vpxor %ymm1,%ymm13,%ymm13 6194 vpshufb .rol8(%rip),%ymm13,%ymm13 6195 vpaddd %ymm13,%ymm9,%ymm9 6196 vpxor %ymm9,%ymm5,%ymm5 6197 vpslld $7,%ymm5,%ymm3 6198 vpsrld $25,%ymm5,%ymm5 6199 vpxor %ymm3,%ymm5,%ymm5 6200 vpalignr $4,%ymm13,%ymm13,%ymm13 6201 vpalignr $8,%ymm9,%ymm9,%ymm9 6202 vpalignr $12,%ymm5,%ymm5,%ymm5 6203 vpaddd %ymm6,%ymm2,%ymm2 6204 vpxor %ymm2,%ymm14,%ymm14 6205 vpshufb .rol16(%rip),%ymm14,%ymm14 6206 vpaddd %ymm14,%ymm10,%ymm10 6207 vpxor %ymm10,%ymm6,%ymm6 6208 vpsrld $20,%ymm6,%ymm3 6209 vpslld $12,%ymm6,%ymm6 6210 vpxor %ymm3,%ymm6,%ymm6 6211 vpaddd %ymm6,%ymm2,%ymm2 6212 vpxor %ymm2,%ymm14,%ymm14 6213 vpshufb .rol8(%rip),%ymm14,%ymm14 6214 vpaddd %ymm14,%ymm10,%ymm10 6215 vpxor %ymm10,%ymm6,%ymm6 6216 vpslld $7,%ymm6,%ymm3 6217 vpsrld $25,%ymm6,%ymm6 6218 vpxor %ymm3,%ymm6,%ymm6 6219 vpalignr $4,%ymm14,%ymm14,%ymm14 6220 vpalignr $8,%ymm10,%ymm10,%ymm10 6221 vpalignr $12,%ymm6,%ymm6,%ymm6 6222 6223 decq %r10 6224 jne 1b 6225 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0 6226 vpaddd .chacha20_consts(%rip),%ymm1,%ymm1 6227 vpaddd .chacha20_consts(%rip),%ymm2,%ymm2 6228 vpaddd %ymm7,%ymm4,%ymm4 6229 vpaddd %ymm7,%ymm5,%ymm5 6230 vpaddd %ymm7,%ymm6,%ymm6 6231 vpaddd %ymm11,%ymm8,%ymm8 6232 vpaddd %ymm11,%ymm9,%ymm9 6233 vpaddd %ymm11,%ymm10,%ymm10 6234 vpaddd 160(%rbp),%ymm12,%ymm12 6235 vpaddd 192(%rbp),%ymm13,%ymm13 6236 vpaddd 224(%rbp),%ymm14,%ymm14 6237 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 6238 6239 vpand .clamp(%rip),%ymm3,%ymm3 6240 vmovdqa %ymm3,0(%rbp) 6241 6242 vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 6243 vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 6244 vperm2i128 $0x02,%ymm1,%ymm5,%ymm8 6245 vperm2i128 $0x02,%ymm9,%ymm13,%ymm12 6246 vperm2i128 $0x13,%ymm1,%ymm5,%ymm1 6247 vperm2i128 $0x13,%ymm9,%ymm13,%ymm5 6248 vperm2i128 $0x02,%ymm2,%ymm6,%ymm9 6249 vperm2i128 $0x02,%ymm10,%ymm14,%ymm13 6250 vperm2i128 $0x13,%ymm2,%ymm6,%ymm2 6251 vperm2i128 $0x13,%ymm10,%ymm14,%ymm6 6252 jmp open_avx2_short 6253 6254 6255 6256 6257 .p2align 6 6258 chacha20_poly1305_seal_avx2: 6259 vzeroupper 6260 vmovdqa .chacha20_consts(%rip),%ymm0 6261 vbroadcasti128 0(%r9),%ymm4 6262 vbroadcasti128 16(%r9),%ymm8 6263 vbroadcasti128 32(%r9),%ymm12 6264 vpaddd .avx2_init(%rip),%ymm12,%ymm12 6265 cmpq $192,%rbx 6266 jbe seal_avx2_192 6267 cmpq $320,%rbx 6268 jbe seal_avx2_320 6269 vmovdqa %ymm0,%ymm1 6270 vmovdqa %ymm0,%ymm2 6271 vmovdqa %ymm0,%ymm3 6272 vmovdqa %ymm4,%ymm5 6273 vmovdqa %ymm4,%ymm6 6274 vmovdqa %ymm4,%ymm7 6275 vmovdqa %ymm4,64(%rbp) 6276 vmovdqa %ymm8,%ymm9 6277 vmovdqa %ymm8,%ymm10 6278 vmovdqa %ymm8,%ymm11 6279 vmovdqa %ymm8,96(%rbp) 6280 vmovdqa %ymm12,%ymm15 6281 vpaddd .avx2_inc(%rip),%ymm15,%ymm14 6282 vpaddd .avx2_inc(%rip),%ymm14,%ymm13 6283 vpaddd .avx2_inc(%rip),%ymm13,%ymm12 6284 vmovdqa %ymm12,160(%rbp) 6285 vmovdqa %ymm13,192(%rbp) 6286 vmovdqa %ymm14,224(%rbp) 6287 vmovdqa %ymm15,256(%rbp) 6288 movq $10,%r10 6289 1: 6290 vmovdqa %ymm8,128(%rbp) 6291 vmovdqa .rol16(%rip),%ymm8 6292 vpaddd %ymm7,%ymm3,%ymm3 6293 vpaddd %ymm6,%ymm2,%ymm2 6294 vpaddd %ymm5,%ymm1,%ymm1 6295 vpaddd %ymm4,%ymm0,%ymm0 6296 vpxor %ymm3,%ymm15,%ymm15 6297 vpxor %ymm2,%ymm14,%ymm14 6298 vpxor %ymm1,%ymm13,%ymm13 6299 vpxor %ymm0,%ymm12,%ymm12 6300 vpshufb %ymm8,%ymm15,%ymm15 6301 vpshufb %ymm8,%ymm14,%ymm14 6302 vpshufb %ymm8,%ymm13,%ymm13 6303 vpshufb %ymm8,%ymm12,%ymm12 6304 vmovdqa 128(%rbp),%ymm8 6305 vpaddd %ymm15,%ymm11,%ymm11 6306 vpaddd %ymm14,%ymm10,%ymm10 6307 vpaddd %ymm13,%ymm9,%ymm9 6308 vpaddd %ymm12,%ymm8,%ymm8 6309 vpxor %ymm11,%ymm7,%ymm7 6310 vpxor %ymm10,%ymm6,%ymm6 6311 vpxor %ymm9,%ymm5,%ymm5 6312 vpxor %ymm8,%ymm4,%ymm4 6313 vmovdqa %ymm8,128(%rbp) 6314 vpsrld $20,%ymm7,%ymm8 6315 vpslld $32-20,%ymm7,%ymm7 6316 vpxor %ymm8,%ymm7,%ymm7 6317 vpsrld $20,%ymm6,%ymm8 6318 vpslld $32-20,%ymm6,%ymm6 6319 vpxor %ymm8,%ymm6,%ymm6 6320 vpsrld $20,%ymm5,%ymm8 6321 vpslld $32-20,%ymm5,%ymm5 6322 vpxor %ymm8,%ymm5,%ymm5 6323 vpsrld $20,%ymm4,%ymm8 6324 vpslld $32-20,%ymm4,%ymm4 6325 vpxor %ymm8,%ymm4,%ymm4 6326 vmovdqa .rol8(%rip),%ymm8 6327 vpaddd %ymm7,%ymm3,%ymm3 6328 vpaddd %ymm6,%ymm2,%ymm2 6329 vpaddd %ymm5,%ymm1,%ymm1 6330 vpaddd %ymm4,%ymm0,%ymm0 6331 vpxor %ymm3,%ymm15,%ymm15 6332 vpxor %ymm2,%ymm14,%ymm14 6333 vpxor %ymm1,%ymm13,%ymm13 6334 vpxor %ymm0,%ymm12,%ymm12 6335 vpshufb %ymm8,%ymm15,%ymm15 6336 vpshufb %ymm8,%ymm14,%ymm14 6337 vpshufb %ymm8,%ymm13,%ymm13 6338 vpshufb %ymm8,%ymm12,%ymm12 6339 vmovdqa 128(%rbp),%ymm8 6340 vpaddd %ymm15,%ymm11,%ymm11 6341 vpaddd %ymm14,%ymm10,%ymm10 6342 vpaddd %ymm13,%ymm9,%ymm9 6343 vpaddd %ymm12,%ymm8,%ymm8 6344 vpxor %ymm11,%ymm7,%ymm7 6345 vpxor %ymm10,%ymm6,%ymm6 6346 vpxor %ymm9,%ymm5,%ymm5 6347 vpxor %ymm8,%ymm4,%ymm4 6348 vmovdqa %ymm8,128(%rbp) 6349 vpsrld $25,%ymm7,%ymm8 6350 vpslld $32-25,%ymm7,%ymm7 6351 vpxor %ymm8,%ymm7,%ymm7 6352 vpsrld $25,%ymm6,%ymm8 6353 vpslld $32-25,%ymm6,%ymm6 6354 vpxor %ymm8,%ymm6,%ymm6 6355 vpsrld $25,%ymm5,%ymm8 6356 vpslld $32-25,%ymm5,%ymm5 6357 vpxor %ymm8,%ymm5,%ymm5 6358 vpsrld $25,%ymm4,%ymm8 6359 vpslld $32-25,%ymm4,%ymm4 6360 vpxor %ymm8,%ymm4,%ymm4 6361 vmovdqa 128(%rbp),%ymm8 6362 vpalignr $4,%ymm7,%ymm7,%ymm7 6363 vpalignr $8,%ymm11,%ymm11,%ymm11 6364 vpalignr $12,%ymm15,%ymm15,%ymm15 6365 vpalignr $4,%ymm6,%ymm6,%ymm6 6366 vpalignr $8,%ymm10,%ymm10,%ymm10 6367 vpalignr $12,%ymm14,%ymm14,%ymm14 6368 vpalignr $4,%ymm5,%ymm5,%ymm5 6369 vpalignr $8,%ymm9,%ymm9,%ymm9 6370 vpalignr $12,%ymm13,%ymm13,%ymm13 6371 vpalignr $4,%ymm4,%ymm4,%ymm4 6372 vpalignr $8,%ymm8,%ymm8,%ymm8 6373 vpalignr $12,%ymm12,%ymm12,%ymm12 6374 vmovdqa %ymm8,128(%rbp) 6375 vmovdqa .rol16(%rip),%ymm8 6376 vpaddd %ymm7,%ymm3,%ymm3 6377 vpaddd %ymm6,%ymm2,%ymm2 6378 vpaddd %ymm5,%ymm1,%ymm1 6379 vpaddd %ymm4,%ymm0,%ymm0 6380 vpxor %ymm3,%ymm15,%ymm15 6381 vpxor %ymm2,%ymm14,%ymm14 6382 vpxor %ymm1,%ymm13,%ymm13 6383 vpxor %ymm0,%ymm12,%ymm12 6384 vpshufb %ymm8,%ymm15,%ymm15 6385 vpshufb %ymm8,%ymm14,%ymm14 6386 vpshufb %ymm8,%ymm13,%ymm13 6387 vpshufb %ymm8,%ymm12,%ymm12 6388 vmovdqa 128(%rbp),%ymm8 6389 vpaddd %ymm15,%ymm11,%ymm11 6390 vpaddd %ymm14,%ymm10,%ymm10 6391 vpaddd %ymm13,%ymm9,%ymm9 6392 vpaddd %ymm12,%ymm8,%ymm8 6393 vpxor %ymm11,%ymm7,%ymm7 6394 vpxor %ymm10,%ymm6,%ymm6 6395 vpxor %ymm9,%ymm5,%ymm5 6396 vpxor %ymm8,%ymm4,%ymm4 6397 vmovdqa %ymm8,128(%rbp) 6398 vpsrld $20,%ymm7,%ymm8 6399 vpslld $32-20,%ymm7,%ymm7 6400 vpxor %ymm8,%ymm7,%ymm7 6401 vpsrld $20,%ymm6,%ymm8 6402 vpslld $32-20,%ymm6,%ymm6 6403 vpxor %ymm8,%ymm6,%ymm6 6404 vpsrld $20,%ymm5,%ymm8 6405 vpslld $32-20,%ymm5,%ymm5 6406 vpxor %ymm8,%ymm5,%ymm5 6407 vpsrld $20,%ymm4,%ymm8 6408 vpslld $32-20,%ymm4,%ymm4 6409 vpxor %ymm8,%ymm4,%ymm4 6410 vmovdqa .rol8(%rip),%ymm8 6411 vpaddd %ymm7,%ymm3,%ymm3 6412 vpaddd %ymm6,%ymm2,%ymm2 6413 vpaddd %ymm5,%ymm1,%ymm1 6414 vpaddd %ymm4,%ymm0,%ymm0 6415 vpxor %ymm3,%ymm15,%ymm15 6416 vpxor %ymm2,%ymm14,%ymm14 6417 vpxor %ymm1,%ymm13,%ymm13 6418 vpxor %ymm0,%ymm12,%ymm12 6419 vpshufb %ymm8,%ymm15,%ymm15 6420 vpshufb %ymm8,%ymm14,%ymm14 6421 vpshufb %ymm8,%ymm13,%ymm13 6422 vpshufb %ymm8,%ymm12,%ymm12 6423 vmovdqa 128(%rbp),%ymm8 6424 vpaddd %ymm15,%ymm11,%ymm11 6425 vpaddd %ymm14,%ymm10,%ymm10 6426 vpaddd %ymm13,%ymm9,%ymm9 6427 vpaddd %ymm12,%ymm8,%ymm8 6428 vpxor %ymm11,%ymm7,%ymm7 6429 vpxor %ymm10,%ymm6,%ymm6 6430 vpxor %ymm9,%ymm5,%ymm5 6431 vpxor %ymm8,%ymm4,%ymm4 6432 vmovdqa %ymm8,128(%rbp) 6433 vpsrld $25,%ymm7,%ymm8 6434 vpslld $32-25,%ymm7,%ymm7 6435 vpxor %ymm8,%ymm7,%ymm7 6436 vpsrld $25,%ymm6,%ymm8 6437 vpslld $32-25,%ymm6,%ymm6 6438 vpxor %ymm8,%ymm6,%ymm6 6439 vpsrld $25,%ymm5,%ymm8 6440 vpslld $32-25,%ymm5,%ymm5 6441 vpxor %ymm8,%ymm5,%ymm5 6442 vpsrld $25,%ymm4,%ymm8 6443 vpslld $32-25,%ymm4,%ymm4 6444 vpxor %ymm8,%ymm4,%ymm4 6445 vmovdqa 128(%rbp),%ymm8 6446 vpalignr $12,%ymm7,%ymm7,%ymm7 6447 vpalignr $8,%ymm11,%ymm11,%ymm11 6448 vpalignr $4,%ymm15,%ymm15,%ymm15 6449 vpalignr $12,%ymm6,%ymm6,%ymm6 6450 vpalignr $8,%ymm10,%ymm10,%ymm10 6451 vpalignr $4,%ymm14,%ymm14,%ymm14 6452 vpalignr $12,%ymm5,%ymm5,%ymm5 6453 vpalignr $8,%ymm9,%ymm9,%ymm9 6454 vpalignr $4,%ymm13,%ymm13,%ymm13 6455 vpalignr $12,%ymm4,%ymm4,%ymm4 6456 vpalignr $8,%ymm8,%ymm8,%ymm8 6457 vpalignr $4,%ymm12,%ymm12,%ymm12 6458 6459 decq %r10 6460 jnz 1b 6461 vpaddd .chacha20_consts(%rip),%ymm3,%ymm3 6462 vpaddd 64(%rbp),%ymm7,%ymm7 6463 vpaddd 96(%rbp),%ymm11,%ymm11 6464 vpaddd 256(%rbp),%ymm15,%ymm15 6465 vpaddd .chacha20_consts(%rip),%ymm2,%ymm2 6466 vpaddd 64(%rbp),%ymm6,%ymm6 6467 vpaddd 96(%rbp),%ymm10,%ymm10 6468 vpaddd 224(%rbp),%ymm14,%ymm14 6469 vpaddd .chacha20_consts(%rip),%ymm1,%ymm1 6470 vpaddd 64(%rbp),%ymm5,%ymm5 6471 vpaddd 96(%rbp),%ymm9,%ymm9 6472 vpaddd 192(%rbp),%ymm13,%ymm13 6473 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0 6474 vpaddd 64(%rbp),%ymm4,%ymm4 6475 vpaddd 96(%rbp),%ymm8,%ymm8 6476 vpaddd 160(%rbp),%ymm12,%ymm12 6477 6478 vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 6479 vperm2i128 $0x02,%ymm3,%ymm7,%ymm15 6480 vperm2i128 $0x13,%ymm3,%ymm7,%ymm3 6481 vpand .clamp(%rip),%ymm15,%ymm15 6482 vmovdqa %ymm15,0(%rbp) 6483 movq %r8,%r8 6484 call poly_hash_ad_internal 6485 6486 vpxor 0(%rsi),%ymm3,%ymm3 6487 vpxor 32(%rsi),%ymm11,%ymm11 6488 vmovdqu %ymm3,0(%rdi) 6489 vmovdqu %ymm11,32(%rdi) 6490 vperm2i128 $0x02,%ymm2,%ymm6,%ymm15 6491 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 6492 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 6493 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 6494 vpxor 0+64(%rsi),%ymm15,%ymm15 6495 vpxor 32+64(%rsi),%ymm2,%ymm2 6496 vpxor 64+64(%rsi),%ymm6,%ymm6 6497 vpxor 96+64(%rsi),%ymm10,%ymm10 6498 vmovdqu %ymm15,0+64(%rdi) 6499 vmovdqu %ymm2,32+64(%rdi) 6500 vmovdqu %ymm6,64+64(%rdi) 6501 vmovdqu %ymm10,96+64(%rdi) 6502 vperm2i128 $0x02,%ymm1,%ymm5,%ymm15 6503 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 6504 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 6505 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 6506 vpxor 0+192(%rsi),%ymm15,%ymm15 6507 vpxor 32+192(%rsi),%ymm1,%ymm1 6508 vpxor 64+192(%rsi),%ymm5,%ymm5 6509 vpxor 96+192(%rsi),%ymm9,%ymm9 6510 vmovdqu %ymm15,0+192(%rdi) 6511 vmovdqu %ymm1,32+192(%rdi) 6512 vmovdqu %ymm5,64+192(%rdi) 6513 vmovdqu %ymm9,96+192(%rdi) 6514 vperm2i128 $0x13,%ymm0,%ymm4,%ymm15 6515 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 6516 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 6517 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 6518 vmovdqa %ymm15,%ymm8 6519 6520 leaq 320(%rsi),%rsi 6521 subq $320,%rbx 6522 movq $320,%rcx 6523 cmpq $128,%rbx 6524 jbe seal_avx2_hash 6525 vpxor 0(%rsi),%ymm0,%ymm0 6526 vpxor 32(%rsi),%ymm4,%ymm4 6527 vpxor 64(%rsi),%ymm8,%ymm8 6528 vpxor 96(%rsi),%ymm12,%ymm12 6529 vmovdqu %ymm0,320(%rdi) 6530 vmovdqu %ymm4,352(%rdi) 6531 vmovdqu %ymm8,384(%rdi) 6532 vmovdqu %ymm12,416(%rdi) 6533 leaq 128(%rsi),%rsi 6534 subq $128,%rbx 6535 movq $8,%rcx 6536 movq $2,%r8 6537 cmpq $128,%rbx 6538 jbe seal_avx2_tail_128 6539 cmpq $256,%rbx 6540 jbe seal_avx2_tail_256 6541 cmpq $384,%rbx 6542 jbe seal_avx2_tail_384 6543 cmpq $512,%rbx 6544 jbe seal_avx2_tail_512 6545 vmovdqa .chacha20_consts(%rip),%ymm0 6546 vmovdqa 64(%rbp),%ymm4 6547 vmovdqa 96(%rbp),%ymm8 6548 vmovdqa %ymm0,%ymm1 6549 vmovdqa %ymm4,%ymm5 6550 vmovdqa %ymm8,%ymm9 6551 vmovdqa %ymm0,%ymm2 6552 vmovdqa %ymm4,%ymm6 6553 vmovdqa %ymm8,%ymm10 6554 vmovdqa %ymm0,%ymm3 6555 vmovdqa %ymm4,%ymm7 6556 vmovdqa %ymm8,%ymm11 6557 vmovdqa .avx2_inc(%rip),%ymm12 6558 vpaddd 160(%rbp),%ymm12,%ymm15 6559 vpaddd %ymm15,%ymm12,%ymm14 6560 vpaddd %ymm14,%ymm12,%ymm13 6561 vpaddd %ymm13,%ymm12,%ymm12 6562 vmovdqa %ymm15,256(%rbp) 6563 vmovdqa %ymm14,224(%rbp) 6564 vmovdqa %ymm13,192(%rbp) 6565 vmovdqa %ymm12,160(%rbp) 6566 vmovdqa %ymm8,128(%rbp) 6567 vmovdqa .rol16(%rip),%ymm8 6568 vpaddd %ymm7,%ymm3,%ymm3 6569 vpaddd %ymm6,%ymm2,%ymm2 6570 vpaddd %ymm5,%ymm1,%ymm1 6571 vpaddd %ymm4,%ymm0,%ymm0 6572 vpxor %ymm3,%ymm15,%ymm15 6573 vpxor %ymm2,%ymm14,%ymm14 6574 vpxor %ymm1,%ymm13,%ymm13 6575 vpxor %ymm0,%ymm12,%ymm12 6576 vpshufb %ymm8,%ymm15,%ymm15 6577 vpshufb %ymm8,%ymm14,%ymm14 6578 vpshufb %ymm8,%ymm13,%ymm13 6579 vpshufb %ymm8,%ymm12,%ymm12 6580 vmovdqa 128(%rbp),%ymm8 6581 vpaddd %ymm15,%ymm11,%ymm11 6582 vpaddd %ymm14,%ymm10,%ymm10 6583 vpaddd %ymm13,%ymm9,%ymm9 6584 vpaddd %ymm12,%ymm8,%ymm8 6585 vpxor %ymm11,%ymm7,%ymm7 6586 vpxor %ymm10,%ymm6,%ymm6 6587 vpxor %ymm9,%ymm5,%ymm5 6588 vpxor %ymm8,%ymm4,%ymm4 6589 vmovdqa %ymm8,128(%rbp) 6590 vpsrld $20,%ymm7,%ymm8 6591 vpslld $32-20,%ymm7,%ymm7 6592 vpxor %ymm8,%ymm7,%ymm7 6593 vpsrld $20,%ymm6,%ymm8 6594 vpslld $32-20,%ymm6,%ymm6 6595 vpxor %ymm8,%ymm6,%ymm6 6596 vpsrld $20,%ymm5,%ymm8 6597 vpslld $32-20,%ymm5,%ymm5 6598 vpxor %ymm8,%ymm5,%ymm5 6599 vpsrld $20,%ymm4,%ymm8 6600 vpslld $32-20,%ymm4,%ymm4 6601 vpxor %ymm8,%ymm4,%ymm4 6602 vmovdqa .rol8(%rip),%ymm8 6603 vpaddd %ymm7,%ymm3,%ymm3 6604 vpaddd %ymm6,%ymm2,%ymm2 6605 vpaddd %ymm5,%ymm1,%ymm1 6606 vpaddd %ymm4,%ymm0,%ymm0 6607 vpxor %ymm3,%ymm15,%ymm15 6608 vpxor %ymm2,%ymm14,%ymm14 6609 vpxor %ymm1,%ymm13,%ymm13 6610 vpxor %ymm0,%ymm12,%ymm12 6611 vpshufb %ymm8,%ymm15,%ymm15 6612 vpshufb %ymm8,%ymm14,%ymm14 6613 vpshufb %ymm8,%ymm13,%ymm13 6614 vpshufb %ymm8,%ymm12,%ymm12 6615 vmovdqa 128(%rbp),%ymm8 6616 vpaddd %ymm15,%ymm11,%ymm11 6617 vpaddd %ymm14,%ymm10,%ymm10 6618 vpaddd %ymm13,%ymm9,%ymm9 6619 vpaddd %ymm12,%ymm8,%ymm8 6620 vpxor %ymm11,%ymm7,%ymm7 6621 vpxor %ymm10,%ymm6,%ymm6 6622 vpxor %ymm9,%ymm5,%ymm5 6623 vpxor %ymm8,%ymm4,%ymm4 6624 vmovdqa %ymm8,128(%rbp) 6625 vpsrld $25,%ymm7,%ymm8 6626 vpslld $32-25,%ymm7,%ymm7 6627 vpxor %ymm8,%ymm7,%ymm7 6628 vpsrld $25,%ymm6,%ymm8 6629 vpslld $32-25,%ymm6,%ymm6 6630 vpxor %ymm8,%ymm6,%ymm6 6631 vpsrld $25,%ymm5,%ymm8 6632 vpslld $32-25,%ymm5,%ymm5 6633 vpxor %ymm8,%ymm5,%ymm5 6634 vpsrld $25,%ymm4,%ymm8 6635 vpslld $32-25,%ymm4,%ymm4 6636 vpxor %ymm8,%ymm4,%ymm4 6637 vmovdqa 128(%rbp),%ymm8 6638 vpalignr $4,%ymm7,%ymm7,%ymm7 6639 vpalignr $8,%ymm11,%ymm11,%ymm11 6640 vpalignr $12,%ymm15,%ymm15,%ymm15 6641 vpalignr $4,%ymm6,%ymm6,%ymm6 6642 vpalignr $8,%ymm10,%ymm10,%ymm10 6643 vpalignr $12,%ymm14,%ymm14,%ymm14 6644 vpalignr $4,%ymm5,%ymm5,%ymm5 6645 vpalignr $8,%ymm9,%ymm9,%ymm9 6646 vpalignr $12,%ymm13,%ymm13,%ymm13 6647 vpalignr $4,%ymm4,%ymm4,%ymm4 6648 vpalignr $8,%ymm8,%ymm8,%ymm8 6649 vpalignr $12,%ymm12,%ymm12,%ymm12 6650 vmovdqa %ymm8,128(%rbp) 6651 vmovdqa .rol16(%rip),%ymm8 6652 vpaddd %ymm7,%ymm3,%ymm3 6653 vpaddd %ymm6,%ymm2,%ymm2 6654 vpaddd %ymm5,%ymm1,%ymm1 6655 vpaddd %ymm4,%ymm0,%ymm0 6656 vpxor %ymm3,%ymm15,%ymm15 6657 vpxor %ymm2,%ymm14,%ymm14 6658 vpxor %ymm1,%ymm13,%ymm13 6659 vpxor %ymm0,%ymm12,%ymm12 6660 vpshufb %ymm8,%ymm15,%ymm15 6661 vpshufb %ymm8,%ymm14,%ymm14 6662 vpshufb %ymm8,%ymm13,%ymm13 6663 vpshufb %ymm8,%ymm12,%ymm12 6664 vmovdqa 128(%rbp),%ymm8 6665 vpaddd %ymm15,%ymm11,%ymm11 6666 vpaddd %ymm14,%ymm10,%ymm10 6667 vpaddd %ymm13,%ymm9,%ymm9 6668 vpaddd %ymm12,%ymm8,%ymm8 6669 vpxor %ymm11,%ymm7,%ymm7 6670 vpxor %ymm10,%ymm6,%ymm6 6671 vpxor %ymm9,%ymm5,%ymm5 6672 vpxor %ymm8,%ymm4,%ymm4 6673 vmovdqa %ymm8,128(%rbp) 6674 vpsrld $20,%ymm7,%ymm8 6675 vpslld $32-20,%ymm7,%ymm7 6676 vpxor %ymm8,%ymm7,%ymm7 6677 vpsrld $20,%ymm6,%ymm8 6678 vpslld $32-20,%ymm6,%ymm6 6679 vpxor %ymm8,%ymm6,%ymm6 6680 vpsrld $20,%ymm5,%ymm8 6681 vpslld $32-20,%ymm5,%ymm5 6682 vpxor %ymm8,%ymm5,%ymm5 6683 vpsrld $20,%ymm4,%ymm8 6684 vpslld $32-20,%ymm4,%ymm4 6685 vpxor %ymm8,%ymm4,%ymm4 6686 vmovdqa .rol8(%rip),%ymm8 6687 vpaddd %ymm7,%ymm3,%ymm3 6688 vpaddd %ymm6,%ymm2,%ymm2 6689 vpaddd %ymm5,%ymm1,%ymm1 6690 vpaddd %ymm4,%ymm0,%ymm0 6691 vpxor %ymm3,%ymm15,%ymm15 6692 vpxor %ymm2,%ymm14,%ymm14 6693 vpxor %ymm1,%ymm13,%ymm13 6694 vpxor %ymm0,%ymm12,%ymm12 6695 vpshufb %ymm8,%ymm15,%ymm15 6696 vpshufb %ymm8,%ymm14,%ymm14 6697 vpshufb %ymm8,%ymm13,%ymm13 6698 vpshufb %ymm8,%ymm12,%ymm12 6699 vmovdqa 128(%rbp),%ymm8 6700 vpaddd %ymm15,%ymm11,%ymm11 6701 vpaddd %ymm14,%ymm10,%ymm10 6702 vpaddd %ymm13,%ymm9,%ymm9 6703 vpaddd %ymm12,%ymm8,%ymm8 6704 vpxor %ymm11,%ymm7,%ymm7 6705 vpxor %ymm10,%ymm6,%ymm6 6706 vpxor %ymm9,%ymm5,%ymm5 6707 vpxor %ymm8,%ymm4,%ymm4 6708 vmovdqa %ymm8,128(%rbp) 6709 vpsrld $25,%ymm7,%ymm8 6710 vpslld $32-25,%ymm7,%ymm7 6711 vpxor %ymm8,%ymm7,%ymm7 6712 vpsrld $25,%ymm6,%ymm8 6713 vpslld $32-25,%ymm6,%ymm6 6714 vpxor %ymm8,%ymm6,%ymm6 6715 vpsrld $25,%ymm5,%ymm8 6716 vpslld $32-25,%ymm5,%ymm5 6717 vpxor %ymm8,%ymm5,%ymm5 6718 vpsrld $25,%ymm4,%ymm8 6719 vpslld $32-25,%ymm4,%ymm4 6720 vpxor %ymm8,%ymm4,%ymm4 6721 vmovdqa 128(%rbp),%ymm8 6722 vpalignr $12,%ymm7,%ymm7,%ymm7 6723 vpalignr $8,%ymm11,%ymm11,%ymm11 6724 vpalignr $4,%ymm15,%ymm15,%ymm15 6725 vpalignr $12,%ymm6,%ymm6,%ymm6 6726 vpalignr $8,%ymm10,%ymm10,%ymm10 6727 vpalignr $4,%ymm14,%ymm14,%ymm14 6728 vpalignr $12,%ymm5,%ymm5,%ymm5 6729 vpalignr $8,%ymm9,%ymm9,%ymm9 6730 vpalignr $4,%ymm13,%ymm13,%ymm13 6731 vpalignr $12,%ymm4,%ymm4,%ymm4 6732 vpalignr $8,%ymm8,%ymm8,%ymm8 6733 vpalignr $4,%ymm12,%ymm12,%ymm12 6734 vmovdqa %ymm8,128(%rbp) 6735 vmovdqa .rol16(%rip),%ymm8 6736 vpaddd %ymm7,%ymm3,%ymm3 6737 vpaddd %ymm6,%ymm2,%ymm2 6738 vpaddd %ymm5,%ymm1,%ymm1 6739 vpaddd %ymm4,%ymm0,%ymm0 6740 vpxor %ymm3,%ymm15,%ymm15 6741 vpxor %ymm2,%ymm14,%ymm14 6742 vpxor %ymm1,%ymm13,%ymm13 6743 vpxor %ymm0,%ymm12,%ymm12 6744 vpshufb %ymm8,%ymm15,%ymm15 6745 vpshufb %ymm8,%ymm14,%ymm14 6746 vpshufb %ymm8,%ymm13,%ymm13 6747 vpshufb %ymm8,%ymm12,%ymm12 6748 vmovdqa 128(%rbp),%ymm8 6749 vpaddd %ymm15,%ymm11,%ymm11 6750 vpaddd %ymm14,%ymm10,%ymm10 6751 vpaddd %ymm13,%ymm9,%ymm9 6752 vpaddd %ymm12,%ymm8,%ymm8 6753 vpxor %ymm11,%ymm7,%ymm7 6754 vpxor %ymm10,%ymm6,%ymm6 6755 vpxor %ymm9,%ymm5,%ymm5 6756 vpxor %ymm8,%ymm4,%ymm4 6757 vmovdqa %ymm8,128(%rbp) 6758 vpsrld $20,%ymm7,%ymm8 6759 vpslld $32-20,%ymm7,%ymm7 6760 vpxor %ymm8,%ymm7,%ymm7 6761 vpsrld $20,%ymm6,%ymm8 6762 vpslld $32-20,%ymm6,%ymm6 6763 vpxor %ymm8,%ymm6,%ymm6 6764 vpsrld $20,%ymm5,%ymm8 6765 vpslld $32-20,%ymm5,%ymm5 6766 vpxor %ymm8,%ymm5,%ymm5 6767 vpsrld $20,%ymm4,%ymm8 6768 vpslld $32-20,%ymm4,%ymm4 6769 vpxor %ymm8,%ymm4,%ymm4 6770 vmovdqa .rol8(%rip),%ymm8 6771 vpaddd %ymm7,%ymm3,%ymm3 6772 vpaddd %ymm6,%ymm2,%ymm2 6773 vpaddd %ymm5,%ymm1,%ymm1 6774 vpaddd %ymm4,%ymm0,%ymm0 6775 6776 subq $16,%rdi 6777 movq $9,%rcx 6778 jmp 4f 6779 1: 6780 vmovdqa .chacha20_consts(%rip),%ymm0 6781 vmovdqa 64(%rbp),%ymm4 6782 vmovdqa 96(%rbp),%ymm8 6783 vmovdqa %ymm0,%ymm1 6784 vmovdqa %ymm4,%ymm5 6785 vmovdqa %ymm8,%ymm9 6786 vmovdqa %ymm0,%ymm2 6787 vmovdqa %ymm4,%ymm6 6788 vmovdqa %ymm8,%ymm10 6789 vmovdqa %ymm0,%ymm3 6790 vmovdqa %ymm4,%ymm7 6791 vmovdqa %ymm8,%ymm11 6792 vmovdqa .avx2_inc(%rip),%ymm12 6793 vpaddd 160(%rbp),%ymm12,%ymm15 6794 vpaddd %ymm15,%ymm12,%ymm14 6795 vpaddd %ymm14,%ymm12,%ymm13 6796 vpaddd %ymm13,%ymm12,%ymm12 6797 vmovdqa %ymm15,256(%rbp) 6798 vmovdqa %ymm14,224(%rbp) 6799 vmovdqa %ymm13,192(%rbp) 6800 vmovdqa %ymm12,160(%rbp) 6801 6802 movq $10,%rcx 6803 2: 6804 addq 0(%rdi),%r10 6805 adcq 8+0(%rdi),%r11 6806 adcq $1,%r12 6807 vmovdqa %ymm8,128(%rbp) 6808 vmovdqa .rol16(%rip),%ymm8 6809 vpaddd %ymm7,%ymm3,%ymm3 6810 vpaddd %ymm6,%ymm2,%ymm2 6811 vpaddd %ymm5,%ymm1,%ymm1 6812 vpaddd %ymm4,%ymm0,%ymm0 6813 vpxor %ymm3,%ymm15,%ymm15 6814 vpxor %ymm2,%ymm14,%ymm14 6815 vpxor %ymm1,%ymm13,%ymm13 6816 vpxor %ymm0,%ymm12,%ymm12 6817 movq 0+0(%rbp),%rdx 6818 movq %rdx,%r15 6819 mulxq %r10,%r13,%r14 6820 mulxq %r11,%rax,%rdx 6821 imulq %r12,%r15 6822 addq %rax,%r14 6823 adcq %rdx,%r15 6824 vpshufb %ymm8,%ymm15,%ymm15 6825 vpshufb %ymm8,%ymm14,%ymm14 6826 vpshufb %ymm8,%ymm13,%ymm13 6827 vpshufb %ymm8,%ymm12,%ymm12 6828 vmovdqa 128(%rbp),%ymm8 6829 vpaddd %ymm15,%ymm11,%ymm11 6830 vpaddd %ymm14,%ymm10,%ymm10 6831 vpaddd %ymm13,%ymm9,%ymm9 6832 vpaddd %ymm12,%ymm8,%ymm8 6833 movq 8+0(%rbp),%rdx 6834 mulxq %r10,%r10,%rax 6835 addq %r10,%r14 6836 mulxq %r11,%r11,%r9 6837 adcq %r11,%r15 6838 adcq $0,%r9 6839 imulq %r12,%rdx 6840 vpxor %ymm11,%ymm7,%ymm7 6841 vpxor %ymm10,%ymm6,%ymm6 6842 vpxor %ymm9,%ymm5,%ymm5 6843 vpxor %ymm8,%ymm4,%ymm4 6844 vmovdqa %ymm8,128(%rbp) 6845 vpsrld $20,%ymm7,%ymm8 6846 vpslld $32-20,%ymm7,%ymm7 6847 vpxor %ymm8,%ymm7,%ymm7 6848 vpsrld $20,%ymm6,%ymm8 6849 vpslld $32-20,%ymm6,%ymm6 6850 vpxor %ymm8,%ymm6,%ymm6 6851 vpsrld $20,%ymm5,%ymm8 6852 addq %rax,%r15 6853 adcq %rdx,%r9 6854 vpslld $32-20,%ymm5,%ymm5 6855 vpxor %ymm8,%ymm5,%ymm5 6856 vpsrld $20,%ymm4,%ymm8 6857 vpslld $32-20,%ymm4,%ymm4 6858 vpxor %ymm8,%ymm4,%ymm4 6859 vmovdqa .rol8(%rip),%ymm8 6860 vpaddd %ymm7,%ymm3,%ymm3 6861 vpaddd %ymm6,%ymm2,%ymm2 6862 vpaddd %ymm5,%ymm1,%ymm1 6863 vpaddd %ymm4,%ymm0,%ymm0 6864 movq %r13,%r10 6865 movq %r14,%r11 6866 movq %r15,%r12 6867 andq $3,%r12 6868 movq %r15,%r13 6869 andq $-4,%r13 6870 movq %r9,%r14 6871 shrdq $2,%r9,%r15 6872 shrq $2,%r9 6873 addq %r13,%r10 6874 adcq %r14,%r11 6875 adcq $0,%r12 6876 addq %r15,%r10 6877 adcq %r9,%r11 6878 adcq $0,%r12 6879 6880 4: 6881 vpxor %ymm3,%ymm15,%ymm15 6882 vpxor %ymm2,%ymm14,%ymm14 6883 vpxor %ymm1,%ymm13,%ymm13 6884 vpxor %ymm0,%ymm12,%ymm12 6885 vpshufb %ymm8,%ymm15,%ymm15 6886 vpshufb %ymm8,%ymm14,%ymm14 6887 vpshufb %ymm8,%ymm13,%ymm13 6888 vpshufb %ymm8,%ymm12,%ymm12 6889 vmovdqa 128(%rbp),%ymm8 6890 addq 16(%rdi),%r10 6891 adcq 8+16(%rdi),%r11 6892 adcq $1,%r12 6893 vpaddd %ymm15,%ymm11,%ymm11 6894 vpaddd %ymm14,%ymm10,%ymm10 6895 vpaddd %ymm13,%ymm9,%ymm9 6896 vpaddd %ymm12,%ymm8,%ymm8 6897 vpxor %ymm11,%ymm7,%ymm7 6898 vpxor %ymm10,%ymm6,%ymm6 6899 vpxor %ymm9,%ymm5,%ymm5 6900 vpxor %ymm8,%ymm4,%ymm4 6901 movq 0+0(%rbp),%rdx 6902 movq %rdx,%r15 6903 mulxq %r10,%r13,%r14 6904 mulxq %r11,%rax,%rdx 6905 imulq %r12,%r15 6906 addq %rax,%r14 6907 adcq %rdx,%r15 6908 vmovdqa %ymm8,128(%rbp) 6909 vpsrld $25,%ymm7,%ymm8 6910 vpslld $32-25,%ymm7,%ymm7 6911 vpxor %ymm8,%ymm7,%ymm7 6912 vpsrld $25,%ymm6,%ymm8 6913 vpslld $32-25,%ymm6,%ymm6 6914 vpxor %ymm8,%ymm6,%ymm6 6915 vpsrld $25,%ymm5,%ymm8 6916 vpslld $32-25,%ymm5,%ymm5 6917 vpxor %ymm8,%ymm5,%ymm5 6918 vpsrld $25,%ymm4,%ymm8 6919 vpslld $32-25,%ymm4,%ymm4 6920 vpxor %ymm8,%ymm4,%ymm4 6921 vmovdqa 128(%rbp),%ymm8 6922 vpalignr $4,%ymm7,%ymm7,%ymm7 6923 vpalignr $8,%ymm11,%ymm11,%ymm11 6924 vpalignr $12,%ymm15,%ymm15,%ymm15 6925 vpalignr $4,%ymm6,%ymm6,%ymm6 6926 movq 8+0(%rbp),%rdx 6927 mulxq %r10,%r10,%rax 6928 addq %r10,%r14 6929 mulxq %r11,%r11,%r9 6930 adcq %r11,%r15 6931 adcq $0,%r9 6932 imulq %r12,%rdx 6933 vpalignr $8,%ymm10,%ymm10,%ymm10 6934 vpalignr $12,%ymm14,%ymm14,%ymm14 6935 vpalignr $4,%ymm5,%ymm5,%ymm5 6936 vpalignr $8,%ymm9,%ymm9,%ymm9 6937 vpalignr $12,%ymm13,%ymm13,%ymm13 6938 vpalignr $4,%ymm4,%ymm4,%ymm4 6939 vpalignr $8,%ymm8,%ymm8,%ymm8 6940 vpalignr $12,%ymm12,%ymm12,%ymm12 6941 vmovdqa %ymm8,128(%rbp) 6942 vmovdqa .rol16(%rip),%ymm8 6943 vpaddd %ymm7,%ymm3,%ymm3 6944 vpaddd %ymm6,%ymm2,%ymm2 6945 vpaddd %ymm5,%ymm1,%ymm1 6946 vpaddd %ymm4,%ymm0,%ymm0 6947 vpxor %ymm3,%ymm15,%ymm15 6948 vpxor %ymm2,%ymm14,%ymm14 6949 vpxor %ymm1,%ymm13,%ymm13 6950 vpxor %ymm0,%ymm12,%ymm12 6951 addq %rax,%r15 6952 adcq %rdx,%r9 6953 vpshufb %ymm8,%ymm15,%ymm15 6954 vpshufb %ymm8,%ymm14,%ymm14 6955 vpshufb %ymm8,%ymm13,%ymm13 6956 vpshufb %ymm8,%ymm12,%ymm12 6957 vmovdqa 128(%rbp),%ymm8 6958 vpaddd %ymm15,%ymm11,%ymm11 6959 vpaddd %ymm14,%ymm10,%ymm10 6960 vpaddd %ymm13,%ymm9,%ymm9 6961 vpaddd %ymm12,%ymm8,%ymm8 6962 movq %r13,%r10 6963 movq %r14,%r11 6964 movq %r15,%r12 6965 andq $3,%r12 6966 movq %r15,%r13 6967 andq $-4,%r13 6968 movq %r9,%r14 6969 shrdq $2,%r9,%r15 6970 shrq $2,%r9 6971 addq %r13,%r10 6972 adcq %r14,%r11 6973 adcq $0,%r12 6974 addq %r15,%r10 6975 adcq %r9,%r11 6976 adcq $0,%r12 6977 vpxor %ymm11,%ymm7,%ymm7 6978 vpxor %ymm10,%ymm6,%ymm6 6979 vpxor %ymm9,%ymm5,%ymm5 6980 vpxor %ymm8,%ymm4,%ymm4 6981 vmovdqa %ymm8,128(%rbp) 6982 vpsrld $20,%ymm7,%ymm8 6983 vpslld $32-20,%ymm7,%ymm7 6984 vpxor %ymm8,%ymm7,%ymm7 6985 addq 32(%rdi),%r10 6986 adcq 8+32(%rdi),%r11 6987 adcq $1,%r12 6988 6989 leaq 48(%rdi),%rdi 6990 vpsrld $20,%ymm6,%ymm8 6991 vpslld $32-20,%ymm6,%ymm6 6992 vpxor %ymm8,%ymm6,%ymm6 6993 vpsrld $20,%ymm5,%ymm8 6994 vpslld $32-20,%ymm5,%ymm5 6995 vpxor %ymm8,%ymm5,%ymm5 6996 vpsrld $20,%ymm4,%ymm8 6997 vpslld $32-20,%ymm4,%ymm4 6998 vpxor %ymm8,%ymm4,%ymm4 6999 vmovdqa .rol8(%rip),%ymm8 7000 vpaddd %ymm7,%ymm3,%ymm3 7001 vpaddd %ymm6,%ymm2,%ymm2 7002 vpaddd %ymm5,%ymm1,%ymm1 7003 vpaddd %ymm4,%ymm0,%ymm0 7004 vpxor %ymm3,%ymm15,%ymm15 7005 vpxor %ymm2,%ymm14,%ymm14 7006 vpxor %ymm1,%ymm13,%ymm13 7007 vpxor %ymm0,%ymm12,%ymm12 7008 movq 0+0(%rbp),%rdx 7009 movq %rdx,%r15 7010 mulxq %r10,%r13,%r14 7011 mulxq %r11,%rax,%rdx 7012 imulq %r12,%r15 7013 addq %rax,%r14 7014 adcq %rdx,%r15 7015 vpshufb %ymm8,%ymm15,%ymm15 7016 vpshufb %ymm8,%ymm14,%ymm14 7017 vpshufb %ymm8,%ymm13,%ymm13 7018 vpshufb %ymm8,%ymm12,%ymm12 7019 vmovdqa 128(%rbp),%ymm8 7020 vpaddd %ymm15,%ymm11,%ymm11 7021 vpaddd %ymm14,%ymm10,%ymm10 7022 vpaddd %ymm13,%ymm9,%ymm9 7023 movq 8+0(%rbp),%rdx 7024 mulxq %r10,%r10,%rax 7025 addq %r10,%r14 7026 mulxq %r11,%r11,%r9 7027 adcq %r11,%r15 7028 adcq $0,%r9 7029 imulq %r12,%rdx 7030 vpaddd %ymm12,%ymm8,%ymm8 7031 vpxor %ymm11,%ymm7,%ymm7 7032 vpxor %ymm10,%ymm6,%ymm6 7033 vpxor %ymm9,%ymm5,%ymm5 7034 vpxor %ymm8,%ymm4,%ymm4 7035 vmovdqa %ymm8,128(%rbp) 7036 vpsrld $25,%ymm7,%ymm8 7037 vpslld $32-25,%ymm7,%ymm7 7038 addq %rax,%r15 7039 adcq %rdx,%r9 7040 vpxor %ymm8,%ymm7,%ymm7 7041 vpsrld $25,%ymm6,%ymm8 7042 vpslld $32-25,%ymm6,%ymm6 7043 vpxor %ymm8,%ymm6,%ymm6 7044 vpsrld $25,%ymm5,%ymm8 7045 vpslld $32-25,%ymm5,%ymm5 7046 vpxor %ymm8,%ymm5,%ymm5 7047 vpsrld $25,%ymm4,%ymm8 7048 vpslld $32-25,%ymm4,%ymm4 7049 vpxor %ymm8,%ymm4,%ymm4 7050 vmovdqa 128(%rbp),%ymm8 7051 vpalignr $12,%ymm7,%ymm7,%ymm7 7052 vpalignr $8,%ymm11,%ymm11,%ymm11 7053 vpalignr $4,%ymm15,%ymm15,%ymm15 7054 vpalignr $12,%ymm6,%ymm6,%ymm6 7055 vpalignr $8,%ymm10,%ymm10,%ymm10 7056 vpalignr $4,%ymm14,%ymm14,%ymm14 7057 vpalignr $12,%ymm5,%ymm5,%ymm5 7058 movq %r13,%r10 7059 movq %r14,%r11 7060 movq %r15,%r12 7061 andq $3,%r12 7062 movq %r15,%r13 7063 andq $-4,%r13 7064 movq %r9,%r14 7065 shrdq $2,%r9,%r15 7066 shrq $2,%r9 7067 addq %r13,%r10 7068 adcq %r14,%r11 7069 adcq $0,%r12 7070 addq %r15,%r10 7071 adcq %r9,%r11 7072 adcq $0,%r12 7073 vpalignr $8,%ymm9,%ymm9,%ymm9 7074 vpalignr $4,%ymm13,%ymm13,%ymm13 7075 vpalignr $12,%ymm4,%ymm4,%ymm4 7076 vpalignr $8,%ymm8,%ymm8,%ymm8 7077 vpalignr $4,%ymm12,%ymm12,%ymm12 7078 7079 decq %rcx 7080 jne 2b 7081 vpaddd .chacha20_consts(%rip),%ymm3,%ymm3 7082 vpaddd 64(%rbp),%ymm7,%ymm7 7083 vpaddd 96(%rbp),%ymm11,%ymm11 7084 vpaddd 256(%rbp),%ymm15,%ymm15 7085 vpaddd .chacha20_consts(%rip),%ymm2,%ymm2 7086 vpaddd 64(%rbp),%ymm6,%ymm6 7087 vpaddd 96(%rbp),%ymm10,%ymm10 7088 vpaddd 224(%rbp),%ymm14,%ymm14 7089 vpaddd .chacha20_consts(%rip),%ymm1,%ymm1 7090 vpaddd 64(%rbp),%ymm5,%ymm5 7091 vpaddd 96(%rbp),%ymm9,%ymm9 7092 vpaddd 192(%rbp),%ymm13,%ymm13 7093 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0 7094 vpaddd 64(%rbp),%ymm4,%ymm4 7095 vpaddd 96(%rbp),%ymm8,%ymm8 7096 vpaddd 160(%rbp),%ymm12,%ymm12 7097 7098 leaq 32(%rdi),%rdi 7099 vmovdqa %ymm0,128(%rbp) 7100 addq -32(%rdi),%r10 7101 adcq 8+-32(%rdi),%r11 7102 adcq $1,%r12 7103 vperm2i128 $0x02,%ymm3,%ymm7,%ymm0 7104 vperm2i128 $0x13,%ymm3,%ymm7,%ymm7 7105 vperm2i128 $0x02,%ymm11,%ymm15,%ymm3 7106 vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 7107 vpxor 0+0(%rsi),%ymm0,%ymm0 7108 vpxor 32+0(%rsi),%ymm3,%ymm3 7109 vpxor 64+0(%rsi),%ymm7,%ymm7 7110 vpxor 96+0(%rsi),%ymm11,%ymm11 7111 vmovdqu %ymm0,0+0(%rdi) 7112 vmovdqu %ymm3,32+0(%rdi) 7113 vmovdqu %ymm7,64+0(%rdi) 7114 vmovdqu %ymm11,96+0(%rdi) 7115 7116 vmovdqa 128(%rbp),%ymm0 7117 movq 0+0(%rbp),%rax 7118 movq %rax,%r15 7119 mulq %r10 7120 movq %rax,%r13 7121 movq %rdx,%r14 7122 movq 0+0(%rbp),%rax 7123 mulq %r11 7124 imulq %r12,%r15 7125 addq %rax,%r14 7126 adcq %rdx,%r15 7127 movq 8+0(%rbp),%rax 7128 movq %rax,%r9 7129 mulq %r10 7130 addq %rax,%r14 7131 adcq $0,%rdx 7132 movq %rdx,%r10 7133 movq 8+0(%rbp),%rax 7134 mulq %r11 7135 addq %rax,%r15 7136 adcq $0,%rdx 7137 imulq %r12,%r9 7138 addq %r10,%r15 7139 adcq %rdx,%r9 7140 movq %r13,%r10 7141 movq %r14,%r11 7142 movq %r15,%r12 7143 andq $3,%r12 7144 movq %r15,%r13 7145 andq $-4,%r13 7146 movq %r9,%r14 7147 shrdq $2,%r9,%r15 7148 shrq $2,%r9 7149 addq %r13,%r10 7150 adcq %r14,%r11 7151 adcq $0,%r12 7152 addq %r15,%r10 7153 adcq %r9,%r11 7154 adcq $0,%r12 7155 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 7156 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 7157 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 7158 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 7159 vpxor 0+128(%rsi),%ymm3,%ymm3 7160 vpxor 32+128(%rsi),%ymm2,%ymm2 7161 vpxor 64+128(%rsi),%ymm6,%ymm6 7162 vpxor 96+128(%rsi),%ymm10,%ymm10 7163 vmovdqu %ymm3,0+128(%rdi) 7164 vmovdqu %ymm2,32+128(%rdi) 7165 vmovdqu %ymm6,64+128(%rdi) 7166 vmovdqu %ymm10,96+128(%rdi) 7167 addq -16(%rdi),%r10 7168 adcq 8+-16(%rdi),%r11 7169 adcq $1,%r12 7170 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 7171 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 7172 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 7173 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 7174 vpxor 0+256(%rsi),%ymm3,%ymm3 7175 vpxor 32+256(%rsi),%ymm1,%ymm1 7176 vpxor 64+256(%rsi),%ymm5,%ymm5 7177 vpxor 96+256(%rsi),%ymm9,%ymm9 7178 vmovdqu %ymm3,0+256(%rdi) 7179 vmovdqu %ymm1,32+256(%rdi) 7180 vmovdqu %ymm5,64+256(%rdi) 7181 vmovdqu %ymm9,96+256(%rdi) 7182 movq 0+0(%rbp),%rax 7183 movq %rax,%r15 7184 mulq %r10 7185 movq %rax,%r13 7186 movq %rdx,%r14 7187 movq 0+0(%rbp),%rax 7188 mulq %r11 7189 imulq %r12,%r15 7190 addq %rax,%r14 7191 adcq %rdx,%r15 7192 movq 8+0(%rbp),%rax 7193 movq %rax,%r9 7194 mulq %r10 7195 addq %rax,%r14 7196 adcq $0,%rdx 7197 movq %rdx,%r10 7198 movq 8+0(%rbp),%rax 7199 mulq %r11 7200 addq %rax,%r15 7201 adcq $0,%rdx 7202 imulq %r12,%r9 7203 addq %r10,%r15 7204 adcq %rdx,%r9 7205 movq %r13,%r10 7206 movq %r14,%r11 7207 movq %r15,%r12 7208 andq $3,%r12 7209 movq %r15,%r13 7210 andq $-4,%r13 7211 movq %r9,%r14 7212 shrdq $2,%r9,%r15 7213 shrq $2,%r9 7214 addq %r13,%r10 7215 adcq %r14,%r11 7216 adcq $0,%r12 7217 addq %r15,%r10 7218 adcq %r9,%r11 7219 adcq $0,%r12 7220 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 7221 vperm2i128 $0x13,%ymm0,%ymm4,%ymm4 7222 vperm2i128 $0x02,%ymm8,%ymm12,%ymm0 7223 vperm2i128 $0x13,%ymm8,%ymm12,%ymm8 7224 vpxor 0+384(%rsi),%ymm3,%ymm3 7225 vpxor 32+384(%rsi),%ymm0,%ymm0 7226 vpxor 64+384(%rsi),%ymm4,%ymm4 7227 vpxor 96+384(%rsi),%ymm8,%ymm8 7228 vmovdqu %ymm3,0+384(%rdi) 7229 vmovdqu %ymm0,32+384(%rdi) 7230 vmovdqu %ymm4,64+384(%rdi) 7231 vmovdqu %ymm8,96+384(%rdi) 7232 7233 leaq 512(%rsi),%rsi 7234 subq $512,%rbx 7235 cmpq $512,%rbx 7236 jg 1b 7237 addq 0(%rdi),%r10 7238 adcq 8+0(%rdi),%r11 7239 adcq $1,%r12 7240 movq 0+0(%rbp),%rax 7241 movq %rax,%r15 7242 mulq %r10 7243 movq %rax,%r13 7244 movq %rdx,%r14 7245 movq 0+0(%rbp),%rax 7246 mulq %r11 7247 imulq %r12,%r15 7248 addq %rax,%r14 7249 adcq %rdx,%r15 7250 movq 8+0(%rbp),%rax 7251 movq %rax,%r9 7252 mulq %r10 7253 addq %rax,%r14 7254 adcq $0,%rdx 7255 movq %rdx,%r10 7256 movq 8+0(%rbp),%rax 7257 mulq %r11 7258 addq %rax,%r15 7259 adcq $0,%rdx 7260 imulq %r12,%r9 7261 addq %r10,%r15 7262 adcq %rdx,%r9 7263 movq %r13,%r10 7264 movq %r14,%r11 7265 movq %r15,%r12 7266 andq $3,%r12 7267 movq %r15,%r13 7268 andq $-4,%r13 7269 movq %r9,%r14 7270 shrdq $2,%r9,%r15 7271 shrq $2,%r9 7272 addq %r13,%r10 7273 adcq %r14,%r11 7274 adcq $0,%r12 7275 addq %r15,%r10 7276 adcq %r9,%r11 7277 adcq $0,%r12 7278 addq 16(%rdi),%r10 7279 adcq 8+16(%rdi),%r11 7280 adcq $1,%r12 7281 movq 0+0(%rbp),%rax 7282 movq %rax,%r15 7283 mulq %r10 7284 movq %rax,%r13 7285 movq %rdx,%r14 7286 movq 0+0(%rbp),%rax 7287 mulq %r11 7288 imulq %r12,%r15 7289 addq %rax,%r14 7290 adcq %rdx,%r15 7291 movq 8+0(%rbp),%rax 7292 movq %rax,%r9 7293 mulq %r10 7294 addq %rax,%r14 7295 adcq $0,%rdx 7296 movq %rdx,%r10 7297 movq 8+0(%rbp),%rax 7298 mulq %r11 7299 addq %rax,%r15 7300 adcq $0,%rdx 7301 imulq %r12,%r9 7302 addq %r10,%r15 7303 adcq %rdx,%r9 7304 movq %r13,%r10 7305 movq %r14,%r11 7306 movq %r15,%r12 7307 andq $3,%r12 7308 movq %r15,%r13 7309 andq $-4,%r13 7310 movq %r9,%r14 7311 shrdq $2,%r9,%r15 7312 shrq $2,%r9 7313 addq %r13,%r10 7314 adcq %r14,%r11 7315 adcq $0,%r12 7316 addq %r15,%r10 7317 adcq %r9,%r11 7318 adcq $0,%r12 7319 7320 leaq 32(%rdi),%rdi 7321 movq $10,%rcx 7322 xorq %r8,%r8 7323 cmpq $128,%rbx 7324 ja 3f 7325 7326 seal_avx2_tail_128: 7327 vmovdqa .chacha20_consts(%rip),%ymm0 7328 vmovdqa 64(%rbp),%ymm4 7329 vmovdqa 96(%rbp),%ymm8 7330 vmovdqa .avx2_inc(%rip),%ymm12 7331 vpaddd 160(%rbp),%ymm12,%ymm12 7332 vmovdqa %ymm12,160(%rbp) 7333 7334 1: 7335 addq 0(%rdi),%r10 7336 adcq 8+0(%rdi),%r11 7337 adcq $1,%r12 7338 movq 0+0(%rbp),%rax 7339 movq %rax,%r15 7340 mulq %r10 7341 movq %rax,%r13 7342 movq %rdx,%r14 7343 movq 0+0(%rbp),%rax 7344 mulq %r11 7345 imulq %r12,%r15 7346 addq %rax,%r14 7347 adcq %rdx,%r15 7348 movq 8+0(%rbp),%rax 7349 movq %rax,%r9 7350 mulq %r10 7351 addq %rax,%r14 7352 adcq $0,%rdx 7353 movq %rdx,%r10 7354 movq 8+0(%rbp),%rax 7355 mulq %r11 7356 addq %rax,%r15 7357 adcq $0,%rdx 7358 imulq %r12,%r9 7359 addq %r10,%r15 7360 adcq %rdx,%r9 7361 movq %r13,%r10 7362 movq %r14,%r11 7363 movq %r15,%r12 7364 andq $3,%r12 7365 movq %r15,%r13 7366 andq $-4,%r13 7367 movq %r9,%r14 7368 shrdq $2,%r9,%r15 7369 shrq $2,%r9 7370 addq %r13,%r10 7371 adcq %r14,%r11 7372 adcq $0,%r12 7373 addq %r15,%r10 7374 adcq %r9,%r11 7375 adcq $0,%r12 7376 7377 leaq 16(%rdi),%rdi 7378 2: 7379 vpaddd %ymm4,%ymm0,%ymm0 7380 vpxor %ymm0,%ymm12,%ymm12 7381 vpshufb .rol16(%rip),%ymm12,%ymm12 7382 vpaddd %ymm12,%ymm8,%ymm8 7383 vpxor %ymm8,%ymm4,%ymm4 7384 vpsrld $20,%ymm4,%ymm3 7385 vpslld $12,%ymm4,%ymm4 7386 vpxor %ymm3,%ymm4,%ymm4 7387 vpaddd %ymm4,%ymm0,%ymm0 7388 vpxor %ymm0,%ymm12,%ymm12 7389 vpshufb .rol8(%rip),%ymm12,%ymm12 7390 vpaddd %ymm12,%ymm8,%ymm8 7391 vpxor %ymm8,%ymm4,%ymm4 7392 vpslld $7,%ymm4,%ymm3 7393 vpsrld $25,%ymm4,%ymm4 7394 vpxor %ymm3,%ymm4,%ymm4 7395 vpalignr $12,%ymm12,%ymm12,%ymm12 7396 vpalignr $8,%ymm8,%ymm8,%ymm8 7397 vpalignr $4,%ymm4,%ymm4,%ymm4 7398 addq 0(%rdi),%r10 7399 adcq 8+0(%rdi),%r11 7400 adcq $1,%r12 7401 movq 0+0(%rbp),%rax 7402 movq %rax,%r15 7403 mulq %r10 7404 movq %rax,%r13 7405 movq %rdx,%r14 7406 movq 0+0(%rbp),%rax 7407 mulq %r11 7408 imulq %r12,%r15 7409 addq %rax,%r14 7410 adcq %rdx,%r15 7411 movq 8+0(%rbp),%rax 7412 movq %rax,%r9 7413 mulq %r10 7414 addq %rax,%r14 7415 adcq $0,%rdx 7416 movq %rdx,%r10 7417 movq 8+0(%rbp),%rax 7418 mulq %r11 7419 addq %rax,%r15 7420 adcq $0,%rdx 7421 imulq %r12,%r9 7422 addq %r10,%r15 7423 adcq %rdx,%r9 7424 movq %r13,%r10 7425 movq %r14,%r11 7426 movq %r15,%r12 7427 andq $3,%r12 7428 movq %r15,%r13 7429 andq $-4,%r13 7430 movq %r9,%r14 7431 shrdq $2,%r9,%r15 7432 shrq $2,%r9 7433 addq %r13,%r10 7434 adcq %r14,%r11 7435 adcq $0,%r12 7436 addq %r15,%r10 7437 adcq %r9,%r11 7438 adcq $0,%r12 7439 vpaddd %ymm4,%ymm0,%ymm0 7440 vpxor %ymm0,%ymm12,%ymm12 7441 vpshufb .rol16(%rip),%ymm12,%ymm12 7442 vpaddd %ymm12,%ymm8,%ymm8 7443 vpxor %ymm8,%ymm4,%ymm4 7444 vpsrld $20,%ymm4,%ymm3 7445 vpslld $12,%ymm4,%ymm4 7446 vpxor %ymm3,%ymm4,%ymm4 7447 vpaddd %ymm4,%ymm0,%ymm0 7448 vpxor %ymm0,%ymm12,%ymm12 7449 vpshufb .rol8(%rip),%ymm12,%ymm12 7450 vpaddd %ymm12,%ymm8,%ymm8 7451 vpxor %ymm8,%ymm4,%ymm4 7452 vpslld $7,%ymm4,%ymm3 7453 vpsrld $25,%ymm4,%ymm4 7454 vpxor %ymm3,%ymm4,%ymm4 7455 vpalignr $4,%ymm12,%ymm12,%ymm12 7456 vpalignr $8,%ymm8,%ymm8,%ymm8 7457 vpalignr $12,%ymm4,%ymm4,%ymm4 7458 addq 16(%rdi),%r10 7459 adcq 8+16(%rdi),%r11 7460 adcq $1,%r12 7461 movq 0+0(%rbp),%rax 7462 movq %rax,%r15 7463 mulq %r10 7464 movq %rax,%r13 7465 movq %rdx,%r14 7466 movq 0+0(%rbp),%rax 7467 mulq %r11 7468 imulq %r12,%r15 7469 addq %rax,%r14 7470 adcq %rdx,%r15 7471 movq 8+0(%rbp),%rax 7472 movq %rax,%r9 7473 mulq %r10 7474 addq %rax,%r14 7475 adcq $0,%rdx 7476 movq %rdx,%r10 7477 movq 8+0(%rbp),%rax 7478 mulq %r11 7479 addq %rax,%r15 7480 adcq $0,%rdx 7481 imulq %r12,%r9 7482 addq %r10,%r15 7483 adcq %rdx,%r9 7484 movq %r13,%r10 7485 movq %r14,%r11 7486 movq %r15,%r12 7487 andq $3,%r12 7488 movq %r15,%r13 7489 andq $-4,%r13 7490 movq %r9,%r14 7491 shrdq $2,%r9,%r15 7492 shrq $2,%r9 7493 addq %r13,%r10 7494 adcq %r14,%r11 7495 adcq $0,%r12 7496 addq %r15,%r10 7497 adcq %r9,%r11 7498 adcq $0,%r12 7499 7500 leaq 32(%rdi),%rdi 7501 decq %rcx 7502 jg 1b 7503 decq %r8 7504 jge 2b 7505 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0 7506 vpaddd 64(%rbp),%ymm4,%ymm4 7507 vpaddd 96(%rbp),%ymm8,%ymm8 7508 vpaddd 160(%rbp),%ymm12,%ymm12 7509 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 7510 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 7511 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 7512 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 7513 vmovdqa %ymm3,%ymm8 7514 7515 jmp seal_avx2_short_loop 7516 3: 7517 cmpq $256,%rbx 7518 ja 3f 7519 7520 seal_avx2_tail_256: 7521 vmovdqa .chacha20_consts(%rip),%ymm0 7522 vmovdqa 64(%rbp),%ymm4 7523 vmovdqa 96(%rbp),%ymm8 7524 vmovdqa %ymm0,%ymm1 7525 vmovdqa %ymm4,%ymm5 7526 vmovdqa %ymm8,%ymm9 7527 vmovdqa .avx2_inc(%rip),%ymm12 7528 vpaddd 160(%rbp),%ymm12,%ymm13 7529 vpaddd %ymm13,%ymm12,%ymm12 7530 vmovdqa %ymm12,160(%rbp) 7531 vmovdqa %ymm13,192(%rbp) 7532 7533 1: 7534 addq 0(%rdi),%r10 7535 adcq 8+0(%rdi),%r11 7536 adcq $1,%r12 7537 movq 0+0(%rbp),%rax 7538 movq %rax,%r15 7539 mulq %r10 7540 movq %rax,%r13 7541 movq %rdx,%r14 7542 movq 0+0(%rbp),%rax 7543 mulq %r11 7544 imulq %r12,%r15 7545 addq %rax,%r14 7546 adcq %rdx,%r15 7547 movq 8+0(%rbp),%rax 7548 movq %rax,%r9 7549 mulq %r10 7550 addq %rax,%r14 7551 adcq $0,%rdx 7552 movq %rdx,%r10 7553 movq 8+0(%rbp),%rax 7554 mulq %r11 7555 addq %rax,%r15 7556 adcq $0,%rdx 7557 imulq %r12,%r9 7558 addq %r10,%r15 7559 adcq %rdx,%r9 7560 movq %r13,%r10 7561 movq %r14,%r11 7562 movq %r15,%r12 7563 andq $3,%r12 7564 movq %r15,%r13 7565 andq $-4,%r13 7566 movq %r9,%r14 7567 shrdq $2,%r9,%r15 7568 shrq $2,%r9 7569 addq %r13,%r10 7570 adcq %r14,%r11 7571 adcq $0,%r12 7572 addq %r15,%r10 7573 adcq %r9,%r11 7574 adcq $0,%r12 7575 7576 leaq 16(%rdi),%rdi 7577 2: 7578 vpaddd %ymm4,%ymm0,%ymm0 7579 vpxor %ymm0,%ymm12,%ymm12 7580 vpshufb .rol16(%rip),%ymm12,%ymm12 7581 vpaddd %ymm12,%ymm8,%ymm8 7582 vpxor %ymm8,%ymm4,%ymm4 7583 vpsrld $20,%ymm4,%ymm3 7584 vpslld $12,%ymm4,%ymm4 7585 vpxor %ymm3,%ymm4,%ymm4 7586 vpaddd %ymm4,%ymm0,%ymm0 7587 vpxor %ymm0,%ymm12,%ymm12 7588 vpshufb .rol8(%rip),%ymm12,%ymm12 7589 vpaddd %ymm12,%ymm8,%ymm8 7590 vpxor %ymm8,%ymm4,%ymm4 7591 vpslld $7,%ymm4,%ymm3 7592 vpsrld $25,%ymm4,%ymm4 7593 vpxor %ymm3,%ymm4,%ymm4 7594 vpalignr $12,%ymm12,%ymm12,%ymm12 7595 vpalignr $8,%ymm8,%ymm8,%ymm8 7596 vpalignr $4,%ymm4,%ymm4,%ymm4 7597 vpaddd %ymm5,%ymm1,%ymm1 7598 vpxor %ymm1,%ymm13,%ymm13 7599 vpshufb .rol16(%rip),%ymm13,%ymm13 7600 vpaddd %ymm13,%ymm9,%ymm9 7601 vpxor %ymm9,%ymm5,%ymm5 7602 vpsrld $20,%ymm5,%ymm3 7603 vpslld $12,%ymm5,%ymm5 7604 vpxor %ymm3,%ymm5,%ymm5 7605 vpaddd %ymm5,%ymm1,%ymm1 7606 vpxor %ymm1,%ymm13,%ymm13 7607 vpshufb .rol8(%rip),%ymm13,%ymm13 7608 vpaddd %ymm13,%ymm9,%ymm9 7609 vpxor %ymm9,%ymm5,%ymm5 7610 vpslld $7,%ymm5,%ymm3 7611 vpsrld $25,%ymm5,%ymm5 7612 vpxor %ymm3,%ymm5,%ymm5 7613 vpalignr $12,%ymm13,%ymm13,%ymm13 7614 vpalignr $8,%ymm9,%ymm9,%ymm9 7615 vpalignr $4,%ymm5,%ymm5,%ymm5 7616 addq 0(%rdi),%r10 7617 adcq 8+0(%rdi),%r11 7618 adcq $1,%r12 7619 movq 0+0(%rbp),%rax 7620 movq %rax,%r15 7621 mulq %r10 7622 movq %rax,%r13 7623 movq %rdx,%r14 7624 movq 0+0(%rbp),%rax 7625 mulq %r11 7626 imulq %r12,%r15 7627 addq %rax,%r14 7628 adcq %rdx,%r15 7629 movq 8+0(%rbp),%rax 7630 movq %rax,%r9 7631 mulq %r10 7632 addq %rax,%r14 7633 adcq $0,%rdx 7634 movq %rdx,%r10 7635 movq 8+0(%rbp),%rax 7636 mulq %r11 7637 addq %rax,%r15 7638 adcq $0,%rdx 7639 imulq %r12,%r9 7640 addq %r10,%r15 7641 adcq %rdx,%r9 7642 movq %r13,%r10 7643 movq %r14,%r11 7644 movq %r15,%r12 7645 andq $3,%r12 7646 movq %r15,%r13 7647 andq $-4,%r13 7648 movq %r9,%r14 7649 shrdq $2,%r9,%r15 7650 shrq $2,%r9 7651 addq %r13,%r10 7652 adcq %r14,%r11 7653 adcq $0,%r12 7654 addq %r15,%r10 7655 adcq %r9,%r11 7656 adcq $0,%r12 7657 vpaddd %ymm4,%ymm0,%ymm0 7658 vpxor %ymm0,%ymm12,%ymm12 7659 vpshufb .rol16(%rip),%ymm12,%ymm12 7660 vpaddd %ymm12,%ymm8,%ymm8 7661 vpxor %ymm8,%ymm4,%ymm4 7662 vpsrld $20,%ymm4,%ymm3 7663 vpslld $12,%ymm4,%ymm4 7664 vpxor %ymm3,%ymm4,%ymm4 7665 vpaddd %ymm4,%ymm0,%ymm0 7666 vpxor %ymm0,%ymm12,%ymm12 7667 vpshufb .rol8(%rip),%ymm12,%ymm12 7668 vpaddd %ymm12,%ymm8,%ymm8 7669 vpxor %ymm8,%ymm4,%ymm4 7670 vpslld $7,%ymm4,%ymm3 7671 vpsrld $25,%ymm4,%ymm4 7672 vpxor %ymm3,%ymm4,%ymm4 7673 vpalignr $4,%ymm12,%ymm12,%ymm12 7674 vpalignr $8,%ymm8,%ymm8,%ymm8 7675 vpalignr $12,%ymm4,%ymm4,%ymm4 7676 vpaddd %ymm5,%ymm1,%ymm1 7677 vpxor %ymm1,%ymm13,%ymm13 7678 vpshufb .rol16(%rip),%ymm13,%ymm13 7679 vpaddd %ymm13,%ymm9,%ymm9 7680 vpxor %ymm9,%ymm5,%ymm5 7681 vpsrld $20,%ymm5,%ymm3 7682 vpslld $12,%ymm5,%ymm5 7683 vpxor %ymm3,%ymm5,%ymm5 7684 vpaddd %ymm5,%ymm1,%ymm1 7685 vpxor %ymm1,%ymm13,%ymm13 7686 vpshufb .rol8(%rip),%ymm13,%ymm13 7687 vpaddd %ymm13,%ymm9,%ymm9 7688 vpxor %ymm9,%ymm5,%ymm5 7689 vpslld $7,%ymm5,%ymm3 7690 vpsrld $25,%ymm5,%ymm5 7691 vpxor %ymm3,%ymm5,%ymm5 7692 vpalignr $4,%ymm13,%ymm13,%ymm13 7693 vpalignr $8,%ymm9,%ymm9,%ymm9 7694 vpalignr $12,%ymm5,%ymm5,%ymm5 7695 addq 16(%rdi),%r10 7696 adcq 8+16(%rdi),%r11 7697 adcq $1,%r12 7698 movq 0+0(%rbp),%rax 7699 movq %rax,%r15 7700 mulq %r10 7701 movq %rax,%r13 7702 movq %rdx,%r14 7703 movq 0+0(%rbp),%rax 7704 mulq %r11 7705 imulq %r12,%r15 7706 addq %rax,%r14 7707 adcq %rdx,%r15 7708 movq 8+0(%rbp),%rax 7709 movq %rax,%r9 7710 mulq %r10 7711 addq %rax,%r14 7712 adcq $0,%rdx 7713 movq %rdx,%r10 7714 movq 8+0(%rbp),%rax 7715 mulq %r11 7716 addq %rax,%r15 7717 adcq $0,%rdx 7718 imulq %r12,%r9 7719 addq %r10,%r15 7720 adcq %rdx,%r9 7721 movq %r13,%r10 7722 movq %r14,%r11 7723 movq %r15,%r12 7724 andq $3,%r12 7725 movq %r15,%r13 7726 andq $-4,%r13 7727 movq %r9,%r14 7728 shrdq $2,%r9,%r15 7729 shrq $2,%r9 7730 addq %r13,%r10 7731 adcq %r14,%r11 7732 adcq $0,%r12 7733 addq %r15,%r10 7734 adcq %r9,%r11 7735 adcq $0,%r12 7736 7737 leaq 32(%rdi),%rdi 7738 decq %rcx 7739 jg 1b 7740 decq %r8 7741 jge 2b 7742 vpaddd .chacha20_consts(%rip),%ymm1,%ymm1 7743 vpaddd 64(%rbp),%ymm5,%ymm5 7744 vpaddd 96(%rbp),%ymm9,%ymm9 7745 vpaddd 192(%rbp),%ymm13,%ymm13 7746 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0 7747 vpaddd 64(%rbp),%ymm4,%ymm4 7748 vpaddd 96(%rbp),%ymm8,%ymm8 7749 vpaddd 160(%rbp),%ymm12,%ymm12 7750 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 7751 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 7752 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 7753 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 7754 vpxor 0+0(%rsi),%ymm3,%ymm3 7755 vpxor 32+0(%rsi),%ymm1,%ymm1 7756 vpxor 64+0(%rsi),%ymm5,%ymm5 7757 vpxor 96+0(%rsi),%ymm9,%ymm9 7758 vmovdqu %ymm3,0+0(%rdi) 7759 vmovdqu %ymm1,32+0(%rdi) 7760 vmovdqu %ymm5,64+0(%rdi) 7761 vmovdqu %ymm9,96+0(%rdi) 7762 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 7763 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 7764 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 7765 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 7766 vmovdqa %ymm3,%ymm8 7767 7768 movq $128,%rcx 7769 leaq 128(%rsi),%rsi 7770 subq $128,%rbx 7771 jmp seal_avx2_hash 7772 3: 7773 cmpq $384,%rbx 7774 ja seal_avx2_tail_512 7775 7776 seal_avx2_tail_384: 7777 vmovdqa .chacha20_consts(%rip),%ymm0 7778 vmovdqa 64(%rbp),%ymm4 7779 vmovdqa 96(%rbp),%ymm8 7780 vmovdqa %ymm0,%ymm1 7781 vmovdqa %ymm4,%ymm5 7782 vmovdqa %ymm8,%ymm9 7783 vmovdqa %ymm0,%ymm2 7784 vmovdqa %ymm4,%ymm6 7785 vmovdqa %ymm8,%ymm10 7786 vmovdqa .avx2_inc(%rip),%ymm12 7787 vpaddd 160(%rbp),%ymm12,%ymm14 7788 vpaddd %ymm14,%ymm12,%ymm13 7789 vpaddd %ymm13,%ymm12,%ymm12 7790 vmovdqa %ymm12,160(%rbp) 7791 vmovdqa %ymm13,192(%rbp) 7792 vmovdqa %ymm14,224(%rbp) 7793 7794 1: 7795 addq 0(%rdi),%r10 7796 adcq 8+0(%rdi),%r11 7797 adcq $1,%r12 7798 movq 0+0(%rbp),%rax 7799 movq %rax,%r15 7800 mulq %r10 7801 movq %rax,%r13 7802 movq %rdx,%r14 7803 movq 0+0(%rbp),%rax 7804 mulq %r11 7805 imulq %r12,%r15 7806 addq %rax,%r14 7807 adcq %rdx,%r15 7808 movq 8+0(%rbp),%rax 7809 movq %rax,%r9 7810 mulq %r10 7811 addq %rax,%r14 7812 adcq $0,%rdx 7813 movq %rdx,%r10 7814 movq 8+0(%rbp),%rax 7815 mulq %r11 7816 addq %rax,%r15 7817 adcq $0,%rdx 7818 imulq %r12,%r9 7819 addq %r10,%r15 7820 adcq %rdx,%r9 7821 movq %r13,%r10 7822 movq %r14,%r11 7823 movq %r15,%r12 7824 andq $3,%r12 7825 movq %r15,%r13 7826 andq $-4,%r13 7827 movq %r9,%r14 7828 shrdq $2,%r9,%r15 7829 shrq $2,%r9 7830 addq %r13,%r10 7831 adcq %r14,%r11 7832 adcq $0,%r12 7833 addq %r15,%r10 7834 adcq %r9,%r11 7835 adcq $0,%r12 7836 7837 leaq 16(%rdi),%rdi 7838 2: 7839 vpaddd %ymm4,%ymm0,%ymm0 7840 vpxor %ymm0,%ymm12,%ymm12 7841 vpshufb .rol16(%rip),%ymm12,%ymm12 7842 vpaddd %ymm12,%ymm8,%ymm8 7843 vpxor %ymm8,%ymm4,%ymm4 7844 vpsrld $20,%ymm4,%ymm3 7845 vpslld $12,%ymm4,%ymm4 7846 vpxor %ymm3,%ymm4,%ymm4 7847 vpaddd %ymm4,%ymm0,%ymm0 7848 vpxor %ymm0,%ymm12,%ymm12 7849 vpshufb .rol8(%rip),%ymm12,%ymm12 7850 vpaddd %ymm12,%ymm8,%ymm8 7851 vpxor %ymm8,%ymm4,%ymm4 7852 vpslld $7,%ymm4,%ymm3 7853 vpsrld $25,%ymm4,%ymm4 7854 vpxor %ymm3,%ymm4,%ymm4 7855 vpalignr $12,%ymm12,%ymm12,%ymm12 7856 vpalignr $8,%ymm8,%ymm8,%ymm8 7857 vpalignr $4,%ymm4,%ymm4,%ymm4 7858 vpaddd %ymm5,%ymm1,%ymm1 7859 vpxor %ymm1,%ymm13,%ymm13 7860 vpshufb .rol16(%rip),%ymm13,%ymm13 7861 vpaddd %ymm13,%ymm9,%ymm9 7862 vpxor %ymm9,%ymm5,%ymm5 7863 vpsrld $20,%ymm5,%ymm3 7864 vpslld $12,%ymm5,%ymm5 7865 vpxor %ymm3,%ymm5,%ymm5 7866 vpaddd %ymm5,%ymm1,%ymm1 7867 vpxor %ymm1,%ymm13,%ymm13 7868 vpshufb .rol8(%rip),%ymm13,%ymm13 7869 vpaddd %ymm13,%ymm9,%ymm9 7870 vpxor %ymm9,%ymm5,%ymm5 7871 vpslld $7,%ymm5,%ymm3 7872 vpsrld $25,%ymm5,%ymm5 7873 vpxor %ymm3,%ymm5,%ymm5 7874 vpalignr $12,%ymm13,%ymm13,%ymm13 7875 vpalignr $8,%ymm9,%ymm9,%ymm9 7876 vpalignr $4,%ymm5,%ymm5,%ymm5 7877 addq 0(%rdi),%r10 7878 adcq 8+0(%rdi),%r11 7879 adcq $1,%r12 7880 movq 0+0(%rbp),%rax 7881 movq %rax,%r15 7882 mulq %r10 7883 movq %rax,%r13 7884 movq %rdx,%r14 7885 movq 0+0(%rbp),%rax 7886 mulq %r11 7887 imulq %r12,%r15 7888 addq %rax,%r14 7889 adcq %rdx,%r15 7890 movq 8+0(%rbp),%rax 7891 movq %rax,%r9 7892 mulq %r10 7893 addq %rax,%r14 7894 adcq $0,%rdx 7895 movq %rdx,%r10 7896 movq 8+0(%rbp),%rax 7897 mulq %r11 7898 addq %rax,%r15 7899 adcq $0,%rdx 7900 imulq %r12,%r9 7901 addq %r10,%r15 7902 adcq %rdx,%r9 7903 movq %r13,%r10 7904 movq %r14,%r11 7905 movq %r15,%r12 7906 andq $3,%r12 7907 movq %r15,%r13 7908 andq $-4,%r13 7909 movq %r9,%r14 7910 shrdq $2,%r9,%r15 7911 shrq $2,%r9 7912 addq %r13,%r10 7913 adcq %r14,%r11 7914 adcq $0,%r12 7915 addq %r15,%r10 7916 adcq %r9,%r11 7917 adcq $0,%r12 7918 vpaddd %ymm6,%ymm2,%ymm2 7919 vpxor %ymm2,%ymm14,%ymm14 7920 vpshufb .rol16(%rip),%ymm14,%ymm14 7921 vpaddd %ymm14,%ymm10,%ymm10 7922 vpxor %ymm10,%ymm6,%ymm6 7923 vpsrld $20,%ymm6,%ymm3 7924 vpslld $12,%ymm6,%ymm6 7925 vpxor %ymm3,%ymm6,%ymm6 7926 vpaddd %ymm6,%ymm2,%ymm2 7927 vpxor %ymm2,%ymm14,%ymm14 7928 vpshufb .rol8(%rip),%ymm14,%ymm14 7929 vpaddd %ymm14,%ymm10,%ymm10 7930 vpxor %ymm10,%ymm6,%ymm6 7931 vpslld $7,%ymm6,%ymm3 7932 vpsrld $25,%ymm6,%ymm6 7933 vpxor %ymm3,%ymm6,%ymm6 7934 vpalignr $12,%ymm14,%ymm14,%ymm14 7935 vpalignr $8,%ymm10,%ymm10,%ymm10 7936 vpalignr $4,%ymm6,%ymm6,%ymm6 7937 vpaddd %ymm4,%ymm0,%ymm0 7938 vpxor %ymm0,%ymm12,%ymm12 7939 vpshufb .rol16(%rip),%ymm12,%ymm12 7940 vpaddd %ymm12,%ymm8,%ymm8 7941 vpxor %ymm8,%ymm4,%ymm4 7942 vpsrld $20,%ymm4,%ymm3 7943 vpslld $12,%ymm4,%ymm4 7944 vpxor %ymm3,%ymm4,%ymm4 7945 vpaddd %ymm4,%ymm0,%ymm0 7946 vpxor %ymm0,%ymm12,%ymm12 7947 vpshufb .rol8(%rip),%ymm12,%ymm12 7948 vpaddd %ymm12,%ymm8,%ymm8 7949 vpxor %ymm8,%ymm4,%ymm4 7950 vpslld $7,%ymm4,%ymm3 7951 vpsrld $25,%ymm4,%ymm4 7952 vpxor %ymm3,%ymm4,%ymm4 7953 vpalignr $4,%ymm12,%ymm12,%ymm12 7954 vpalignr $8,%ymm8,%ymm8,%ymm8 7955 vpalignr $12,%ymm4,%ymm4,%ymm4 7956 addq 16(%rdi),%r10 7957 adcq 8+16(%rdi),%r11 7958 adcq $1,%r12 7959 movq 0+0(%rbp),%rax 7960 movq %rax,%r15 7961 mulq %r10 7962 movq %rax,%r13 7963 movq %rdx,%r14 7964 movq 0+0(%rbp),%rax 7965 mulq %r11 7966 imulq %r12,%r15 7967 addq %rax,%r14 7968 adcq %rdx,%r15 7969 movq 8+0(%rbp),%rax 7970 movq %rax,%r9 7971 mulq %r10 7972 addq %rax,%r14 7973 adcq $0,%rdx 7974 movq %rdx,%r10 7975 movq 8+0(%rbp),%rax 7976 mulq %r11 7977 addq %rax,%r15 7978 adcq $0,%rdx 7979 imulq %r12,%r9 7980 addq %r10,%r15 7981 adcq %rdx,%r9 7982 movq %r13,%r10 7983 movq %r14,%r11 7984 movq %r15,%r12 7985 andq $3,%r12 7986 movq %r15,%r13 7987 andq $-4,%r13 7988 movq %r9,%r14 7989 shrdq $2,%r9,%r15 7990 shrq $2,%r9 7991 addq %r13,%r10 7992 adcq %r14,%r11 7993 adcq $0,%r12 7994 addq %r15,%r10 7995 adcq %r9,%r11 7996 adcq $0,%r12 7997 vpaddd %ymm5,%ymm1,%ymm1 7998 vpxor %ymm1,%ymm13,%ymm13 7999 vpshufb .rol16(%rip),%ymm13,%ymm13 8000 vpaddd %ymm13,%ymm9,%ymm9 8001 vpxor %ymm9,%ymm5,%ymm5 8002 vpsrld $20,%ymm5,%ymm3 8003 vpslld $12,%ymm5,%ymm5 8004 vpxor %ymm3,%ymm5,%ymm5 8005 vpaddd %ymm5,%ymm1,%ymm1 8006 vpxor %ymm1,%ymm13,%ymm13 8007 vpshufb .rol8(%rip),%ymm13,%ymm13 8008 vpaddd %ymm13,%ymm9,%ymm9 8009 vpxor %ymm9,%ymm5,%ymm5 8010 vpslld $7,%ymm5,%ymm3 8011 vpsrld $25,%ymm5,%ymm5 8012 vpxor %ymm3,%ymm5,%ymm5 8013 vpalignr $4,%ymm13,%ymm13,%ymm13 8014 vpalignr $8,%ymm9,%ymm9,%ymm9 8015 vpalignr $12,%ymm5,%ymm5,%ymm5 8016 vpaddd %ymm6,%ymm2,%ymm2 8017 vpxor %ymm2,%ymm14,%ymm14 8018 vpshufb .rol16(%rip),%ymm14,%ymm14 8019 vpaddd %ymm14,%ymm10,%ymm10 8020 vpxor %ymm10,%ymm6,%ymm6 8021 vpsrld $20,%ymm6,%ymm3 8022 vpslld $12,%ymm6,%ymm6 8023 vpxor %ymm3,%ymm6,%ymm6 8024 vpaddd %ymm6,%ymm2,%ymm2 8025 vpxor %ymm2,%ymm14,%ymm14 8026 vpshufb .rol8(%rip),%ymm14,%ymm14 8027 vpaddd %ymm14,%ymm10,%ymm10 8028 vpxor %ymm10,%ymm6,%ymm6 8029 vpslld $7,%ymm6,%ymm3 8030 vpsrld $25,%ymm6,%ymm6 8031 vpxor %ymm3,%ymm6,%ymm6 8032 vpalignr $4,%ymm14,%ymm14,%ymm14 8033 vpalignr $8,%ymm10,%ymm10,%ymm10 8034 vpalignr $12,%ymm6,%ymm6,%ymm6 8035 8036 leaq 32(%rdi),%rdi 8037 decq %rcx 8038 jg 1b 8039 decq %r8 8040 jge 2b 8041 vpaddd .chacha20_consts(%rip),%ymm2,%ymm2 8042 vpaddd 64(%rbp),%ymm6,%ymm6 8043 vpaddd 96(%rbp),%ymm10,%ymm10 8044 vpaddd 224(%rbp),%ymm14,%ymm14 8045 vpaddd .chacha20_consts(%rip),%ymm1,%ymm1 8046 vpaddd 64(%rbp),%ymm5,%ymm5 8047 vpaddd 96(%rbp),%ymm9,%ymm9 8048 vpaddd 192(%rbp),%ymm13,%ymm13 8049 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0 8050 vpaddd 64(%rbp),%ymm4,%ymm4 8051 vpaddd 96(%rbp),%ymm8,%ymm8 8052 vpaddd 160(%rbp),%ymm12,%ymm12 8053 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 8054 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 8055 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 8056 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 8057 vpxor 0+0(%rsi),%ymm3,%ymm3 8058 vpxor 32+0(%rsi),%ymm2,%ymm2 8059 vpxor 64+0(%rsi),%ymm6,%ymm6 8060 vpxor 96+0(%rsi),%ymm10,%ymm10 8061 vmovdqu %ymm3,0+0(%rdi) 8062 vmovdqu %ymm2,32+0(%rdi) 8063 vmovdqu %ymm6,64+0(%rdi) 8064 vmovdqu %ymm10,96+0(%rdi) 8065 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 8066 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 8067 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 8068 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 8069 vpxor 0+128(%rsi),%ymm3,%ymm3 8070 vpxor 32+128(%rsi),%ymm1,%ymm1 8071 vpxor 64+128(%rsi),%ymm5,%ymm5 8072 vpxor 96+128(%rsi),%ymm9,%ymm9 8073 vmovdqu %ymm3,0+128(%rdi) 8074 vmovdqu %ymm1,32+128(%rdi) 8075 vmovdqu %ymm5,64+128(%rdi) 8076 vmovdqu %ymm9,96+128(%rdi) 8077 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 8078 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 8079 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 8080 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 8081 vmovdqa %ymm3,%ymm8 8082 8083 movq $256,%rcx 8084 leaq 256(%rsi),%rsi 8085 subq $256,%rbx 8086 jmp seal_avx2_hash 8087 8088 seal_avx2_tail_512: 8089 vmovdqa .chacha20_consts(%rip),%ymm0 8090 vmovdqa 64(%rbp),%ymm4 8091 vmovdqa 96(%rbp),%ymm8 8092 vmovdqa %ymm0,%ymm1 8093 vmovdqa %ymm4,%ymm5 8094 vmovdqa %ymm8,%ymm9 8095 vmovdqa %ymm0,%ymm2 8096 vmovdqa %ymm4,%ymm6 8097 vmovdqa %ymm8,%ymm10 8098 vmovdqa %ymm0,%ymm3 8099 vmovdqa %ymm4,%ymm7 8100 vmovdqa %ymm8,%ymm11 8101 vmovdqa .avx2_inc(%rip),%ymm12 8102 vpaddd 160(%rbp),%ymm12,%ymm15 8103 vpaddd %ymm15,%ymm12,%ymm14 8104 vpaddd %ymm14,%ymm12,%ymm13 8105 vpaddd %ymm13,%ymm12,%ymm12 8106 vmovdqa %ymm15,256(%rbp) 8107 vmovdqa %ymm14,224(%rbp) 8108 vmovdqa %ymm13,192(%rbp) 8109 vmovdqa %ymm12,160(%rbp) 8110 8111 1: 8112 addq 0(%rdi),%r10 8113 adcq 8+0(%rdi),%r11 8114 adcq $1,%r12 8115 movq 0+0(%rbp),%rdx 8116 movq %rdx,%r15 8117 mulxq %r10,%r13,%r14 8118 mulxq %r11,%rax,%rdx 8119 imulq %r12,%r15 8120 addq %rax,%r14 8121 adcq %rdx,%r15 8122 movq 8+0(%rbp),%rdx 8123 mulxq %r10,%r10,%rax 8124 addq %r10,%r14 8125 mulxq %r11,%r11,%r9 8126 adcq %r11,%r15 8127 adcq $0,%r9 8128 imulq %r12,%rdx 8129 addq %rax,%r15 8130 adcq %rdx,%r9 8131 movq %r13,%r10 8132 movq %r14,%r11 8133 movq %r15,%r12 8134 andq $3,%r12 8135 movq %r15,%r13 8136 andq $-4,%r13 8137 movq %r9,%r14 8138 shrdq $2,%r9,%r15 8139 shrq $2,%r9 8140 addq %r13,%r10 8141 adcq %r14,%r11 8142 adcq $0,%r12 8143 addq %r15,%r10 8144 adcq %r9,%r11 8145 adcq $0,%r12 8146 8147 leaq 16(%rdi),%rdi 8148 2: 8149 vmovdqa %ymm8,128(%rbp) 8150 vmovdqa .rol16(%rip),%ymm8 8151 vpaddd %ymm7,%ymm3,%ymm3 8152 vpaddd %ymm6,%ymm2,%ymm2 8153 vpaddd %ymm5,%ymm1,%ymm1 8154 vpaddd %ymm4,%ymm0,%ymm0 8155 vpxor %ymm3,%ymm15,%ymm15 8156 vpxor %ymm2,%ymm14,%ymm14 8157 vpxor %ymm1,%ymm13,%ymm13 8158 vpxor %ymm0,%ymm12,%ymm12 8159 vpshufb %ymm8,%ymm15,%ymm15 8160 vpshufb %ymm8,%ymm14,%ymm14 8161 vpshufb %ymm8,%ymm13,%ymm13 8162 vpshufb %ymm8,%ymm12,%ymm12 8163 vmovdqa 128(%rbp),%ymm8 8164 vpaddd %ymm15,%ymm11,%ymm11 8165 vpaddd %ymm14,%ymm10,%ymm10 8166 vpaddd %ymm13,%ymm9,%ymm9 8167 vpaddd %ymm12,%ymm8,%ymm8 8168 vpxor %ymm11,%ymm7,%ymm7 8169 addq 0(%rdi),%r10 8170 adcq 8+0(%rdi),%r11 8171 adcq $1,%r12 8172 vpxor %ymm10,%ymm6,%ymm6 8173 vpxor %ymm9,%ymm5,%ymm5 8174 vpxor %ymm8,%ymm4,%ymm4 8175 vmovdqa %ymm8,128(%rbp) 8176 vpsrld $20,%ymm7,%ymm8 8177 vpslld $32-20,%ymm7,%ymm7 8178 vpxor %ymm8,%ymm7,%ymm7 8179 vpsrld $20,%ymm6,%ymm8 8180 vpslld $32-20,%ymm6,%ymm6 8181 vpxor %ymm8,%ymm6,%ymm6 8182 vpsrld $20,%ymm5,%ymm8 8183 vpslld $32-20,%ymm5,%ymm5 8184 vpxor %ymm8,%ymm5,%ymm5 8185 vpsrld $20,%ymm4,%ymm8 8186 vpslld $32-20,%ymm4,%ymm4 8187 vpxor %ymm8,%ymm4,%ymm4 8188 vmovdqa .rol8(%rip),%ymm8 8189 vpaddd %ymm7,%ymm3,%ymm3 8190 vpaddd %ymm6,%ymm2,%ymm2 8191 vpaddd %ymm5,%ymm1,%ymm1 8192 movq 0+0(%rbp),%rdx 8193 movq %rdx,%r15 8194 mulxq %r10,%r13,%r14 8195 mulxq %r11,%rax,%rdx 8196 imulq %r12,%r15 8197 addq %rax,%r14 8198 adcq %rdx,%r15 8199 vpaddd %ymm4,%ymm0,%ymm0 8200 vpxor %ymm3,%ymm15,%ymm15 8201 vpxor %ymm2,%ymm14,%ymm14 8202 vpxor %ymm1,%ymm13,%ymm13 8203 vpxor %ymm0,%ymm12,%ymm12 8204 vpshufb %ymm8,%ymm15,%ymm15 8205 vpshufb %ymm8,%ymm14,%ymm14 8206 vpshufb %ymm8,%ymm13,%ymm13 8207 vpshufb %ymm8,%ymm12,%ymm12 8208 vmovdqa 128(%rbp),%ymm8 8209 vpaddd %ymm15,%ymm11,%ymm11 8210 vpaddd %ymm14,%ymm10,%ymm10 8211 vpaddd %ymm13,%ymm9,%ymm9 8212 vpaddd %ymm12,%ymm8,%ymm8 8213 vpxor %ymm11,%ymm7,%ymm7 8214 vpxor %ymm10,%ymm6,%ymm6 8215 vpxor %ymm9,%ymm5,%ymm5 8216 vpxor %ymm8,%ymm4,%ymm4 8217 vmovdqa %ymm8,128(%rbp) 8218 vpsrld $25,%ymm7,%ymm8 8219 movq 8+0(%rbp),%rdx 8220 mulxq %r10,%r10,%rax 8221 addq %r10,%r14 8222 mulxq %r11,%r11,%r9 8223 adcq %r11,%r15 8224 adcq $0,%r9 8225 imulq %r12,%rdx 8226 vpslld $32-25,%ymm7,%ymm7 8227 vpxor %ymm8,%ymm7,%ymm7 8228 vpsrld $25,%ymm6,%ymm8 8229 vpslld $32-25,%ymm6,%ymm6 8230 vpxor %ymm8,%ymm6,%ymm6 8231 vpsrld $25,%ymm5,%ymm8 8232 vpslld $32-25,%ymm5,%ymm5 8233 vpxor %ymm8,%ymm5,%ymm5 8234 vpsrld $25,%ymm4,%ymm8 8235 vpslld $32-25,%ymm4,%ymm4 8236 vpxor %ymm8,%ymm4,%ymm4 8237 vmovdqa 128(%rbp),%ymm8 8238 vpalignr $4,%ymm7,%ymm7,%ymm7 8239 vpalignr $8,%ymm11,%ymm11,%ymm11 8240 vpalignr $12,%ymm15,%ymm15,%ymm15 8241 vpalignr $4,%ymm6,%ymm6,%ymm6 8242 vpalignr $8,%ymm10,%ymm10,%ymm10 8243 vpalignr $12,%ymm14,%ymm14,%ymm14 8244 vpalignr $4,%ymm5,%ymm5,%ymm5 8245 vpalignr $8,%ymm9,%ymm9,%ymm9 8246 addq %rax,%r15 8247 adcq %rdx,%r9 8248 vpalignr $12,%ymm13,%ymm13,%ymm13 8249 vpalignr $4,%ymm4,%ymm4,%ymm4 8250 vpalignr $8,%ymm8,%ymm8,%ymm8 8251 vpalignr $12,%ymm12,%ymm12,%ymm12 8252 vmovdqa %ymm8,128(%rbp) 8253 vmovdqa .rol16(%rip),%ymm8 8254 vpaddd %ymm7,%ymm3,%ymm3 8255 vpaddd %ymm6,%ymm2,%ymm2 8256 vpaddd %ymm5,%ymm1,%ymm1 8257 vpaddd %ymm4,%ymm0,%ymm0 8258 vpxor %ymm3,%ymm15,%ymm15 8259 vpxor %ymm2,%ymm14,%ymm14 8260 vpxor %ymm1,%ymm13,%ymm13 8261 vpxor %ymm0,%ymm12,%ymm12 8262 vpshufb %ymm8,%ymm15,%ymm15 8263 vpshufb %ymm8,%ymm14,%ymm14 8264 vpshufb %ymm8,%ymm13,%ymm13 8265 vpshufb %ymm8,%ymm12,%ymm12 8266 vmovdqa 128(%rbp),%ymm8 8267 vpaddd %ymm15,%ymm11,%ymm11 8268 movq %r13,%r10 8269 movq %r14,%r11 8270 movq %r15,%r12 8271 andq $3,%r12 8272 movq %r15,%r13 8273 andq $-4,%r13 8274 movq %r9,%r14 8275 shrdq $2,%r9,%r15 8276 shrq $2,%r9 8277 addq %r13,%r10 8278 adcq %r14,%r11 8279 adcq $0,%r12 8280 addq %r15,%r10 8281 adcq %r9,%r11 8282 adcq $0,%r12 8283 vpaddd %ymm14,%ymm10,%ymm10 8284 vpaddd %ymm13,%ymm9,%ymm9 8285 vpaddd %ymm12,%ymm8,%ymm8 8286 vpxor %ymm11,%ymm7,%ymm7 8287 vpxor %ymm10,%ymm6,%ymm6 8288 vpxor %ymm9,%ymm5,%ymm5 8289 vpxor %ymm8,%ymm4,%ymm4 8290 vmovdqa %ymm8,128(%rbp) 8291 vpsrld $20,%ymm7,%ymm8 8292 vpslld $32-20,%ymm7,%ymm7 8293 vpxor %ymm8,%ymm7,%ymm7 8294 vpsrld $20,%ymm6,%ymm8 8295 vpslld $32-20,%ymm6,%ymm6 8296 vpxor %ymm8,%ymm6,%ymm6 8297 vpsrld $20,%ymm5,%ymm8 8298 vpslld $32-20,%ymm5,%ymm5 8299 vpxor %ymm8,%ymm5,%ymm5 8300 vpsrld $20,%ymm4,%ymm8 8301 vpslld $32-20,%ymm4,%ymm4 8302 vpxor %ymm8,%ymm4,%ymm4 8303 addq 16(%rdi),%r10 8304 adcq 8+16(%rdi),%r11 8305 adcq $1,%r12 8306 vmovdqa .rol8(%rip),%ymm8 8307 vpaddd %ymm7,%ymm3,%ymm3 8308 vpaddd %ymm6,%ymm2,%ymm2 8309 vpaddd %ymm5,%ymm1,%ymm1 8310 vpaddd %ymm4,%ymm0,%ymm0 8311 vpxor %ymm3,%ymm15,%ymm15 8312 vpxor %ymm2,%ymm14,%ymm14 8313 vpxor %ymm1,%ymm13,%ymm13 8314 vpxor %ymm0,%ymm12,%ymm12 8315 vpshufb %ymm8,%ymm15,%ymm15 8316 vpshufb %ymm8,%ymm14,%ymm14 8317 vpshufb %ymm8,%ymm13,%ymm13 8318 vpshufb %ymm8,%ymm12,%ymm12 8319 vmovdqa 128(%rbp),%ymm8 8320 vpaddd %ymm15,%ymm11,%ymm11 8321 vpaddd %ymm14,%ymm10,%ymm10 8322 vpaddd %ymm13,%ymm9,%ymm9 8323 vpaddd %ymm12,%ymm8,%ymm8 8324 vpxor %ymm11,%ymm7,%ymm7 8325 vpxor %ymm10,%ymm6,%ymm6 8326 movq 0+0(%rbp),%rdx 8327 movq %rdx,%r15 8328 mulxq %r10,%r13,%r14 8329 mulxq %r11,%rax,%rdx 8330 imulq %r12,%r15 8331 addq %rax,%r14 8332 adcq %rdx,%r15 8333 vpxor %ymm9,%ymm5,%ymm5 8334 vpxor %ymm8,%ymm4,%ymm4 8335 vmovdqa %ymm8,128(%rbp) 8336 vpsrld $25,%ymm7,%ymm8 8337 vpslld $32-25,%ymm7,%ymm7 8338 vpxor %ymm8,%ymm7,%ymm7 8339 vpsrld $25,%ymm6,%ymm8 8340 vpslld $32-25,%ymm6,%ymm6 8341 vpxor %ymm8,%ymm6,%ymm6 8342 vpsrld $25,%ymm5,%ymm8 8343 vpslld $32-25,%ymm5,%ymm5 8344 vpxor %ymm8,%ymm5,%ymm5 8345 vpsrld $25,%ymm4,%ymm8 8346 vpslld $32-25,%ymm4,%ymm4 8347 vpxor %ymm8,%ymm4,%ymm4 8348 vmovdqa 128(%rbp),%ymm8 8349 vpalignr $12,%ymm7,%ymm7,%ymm7 8350 vpalignr $8,%ymm11,%ymm11,%ymm11 8351 vpalignr $4,%ymm15,%ymm15,%ymm15 8352 vpalignr $12,%ymm6,%ymm6,%ymm6 8353 movq 8+0(%rbp),%rdx 8354 mulxq %r10,%r10,%rax 8355 addq %r10,%r14 8356 mulxq %r11,%r11,%r9 8357 adcq %r11,%r15 8358 adcq $0,%r9 8359 imulq %r12,%rdx 8360 vpalignr $8,%ymm10,%ymm10,%ymm10 8361 vpalignr $4,%ymm14,%ymm14,%ymm14 8362 vpalignr $12,%ymm5,%ymm5,%ymm5 8363 vpalignr $8,%ymm9,%ymm9,%ymm9 8364 vpalignr $4,%ymm13,%ymm13,%ymm13 8365 vpalignr $12,%ymm4,%ymm4,%ymm4 8366 vpalignr $8,%ymm8,%ymm8,%ymm8 8367 vpalignr $4,%ymm12,%ymm12,%ymm12 8368 8369 8370 8371 8372 8373 8374 8375 8376 8377 8378 8379 8380 addq %rax,%r15 8381 adcq %rdx,%r9 8382 8383 8384 8385 8386 8387 8388 8389 8390 8391 8392 8393 8394 8395 8396 8397 8398 8399 8400 8401 8402 movq %r13,%r10 8403 movq %r14,%r11 8404 movq %r15,%r12 8405 andq $3,%r12 8406 movq %r15,%r13 8407 andq $-4,%r13 8408 movq %r9,%r14 8409 shrdq $2,%r9,%r15 8410 shrq $2,%r9 8411 addq %r13,%r10 8412 adcq %r14,%r11 8413 adcq $0,%r12 8414 addq %r15,%r10 8415 adcq %r9,%r11 8416 adcq $0,%r12 8417 8418 leaq 32(%rdi),%rdi 8419 decq %rcx 8420 jg 1b 8421 decq %r8 8422 jge 2b 8423 vpaddd .chacha20_consts(%rip),%ymm3,%ymm3 8424 vpaddd 64(%rbp),%ymm7,%ymm7 8425 vpaddd 96(%rbp),%ymm11,%ymm11 8426 vpaddd 256(%rbp),%ymm15,%ymm15 8427 vpaddd .chacha20_consts(%rip),%ymm2,%ymm2 8428 vpaddd 64(%rbp),%ymm6,%ymm6 8429 vpaddd 96(%rbp),%ymm10,%ymm10 8430 vpaddd 224(%rbp),%ymm14,%ymm14 8431 vpaddd .chacha20_consts(%rip),%ymm1,%ymm1 8432 vpaddd 64(%rbp),%ymm5,%ymm5 8433 vpaddd 96(%rbp),%ymm9,%ymm9 8434 vpaddd 192(%rbp),%ymm13,%ymm13 8435 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0 8436 vpaddd 64(%rbp),%ymm4,%ymm4 8437 vpaddd 96(%rbp),%ymm8,%ymm8 8438 vpaddd 160(%rbp),%ymm12,%ymm12 8439 8440 vmovdqa %ymm0,128(%rbp) 8441 vperm2i128 $0x02,%ymm3,%ymm7,%ymm0 8442 vperm2i128 $0x13,%ymm3,%ymm7,%ymm7 8443 vperm2i128 $0x02,%ymm11,%ymm15,%ymm3 8444 vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 8445 vpxor 0+0(%rsi),%ymm0,%ymm0 8446 vpxor 32+0(%rsi),%ymm3,%ymm3 8447 vpxor 64+0(%rsi),%ymm7,%ymm7 8448 vpxor 96+0(%rsi),%ymm11,%ymm11 8449 vmovdqu %ymm0,0+0(%rdi) 8450 vmovdqu %ymm3,32+0(%rdi) 8451 vmovdqu %ymm7,64+0(%rdi) 8452 vmovdqu %ymm11,96+0(%rdi) 8453 8454 vmovdqa 128(%rbp),%ymm0 8455 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 8456 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 8457 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 8458 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 8459 vpxor 0+128(%rsi),%ymm3,%ymm3 8460 vpxor 32+128(%rsi),%ymm2,%ymm2 8461 vpxor 64+128(%rsi),%ymm6,%ymm6 8462 vpxor 96+128(%rsi),%ymm10,%ymm10 8463 vmovdqu %ymm3,0+128(%rdi) 8464 vmovdqu %ymm2,32+128(%rdi) 8465 vmovdqu %ymm6,64+128(%rdi) 8466 vmovdqu %ymm10,96+128(%rdi) 8467 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 8468 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 8469 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 8470 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 8471 vpxor 0+256(%rsi),%ymm3,%ymm3 8472 vpxor 32+256(%rsi),%ymm1,%ymm1 8473 vpxor 64+256(%rsi),%ymm5,%ymm5 8474 vpxor 96+256(%rsi),%ymm9,%ymm9 8475 vmovdqu %ymm3,0+256(%rdi) 8476 vmovdqu %ymm1,32+256(%rdi) 8477 vmovdqu %ymm5,64+256(%rdi) 8478 vmovdqu %ymm9,96+256(%rdi) 8479 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 8480 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 8481 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 8482 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 8483 vmovdqa %ymm3,%ymm8 8484 8485 movq $384,%rcx 8486 leaq 384(%rsi),%rsi 8487 subq $384,%rbx 8488 jmp seal_avx2_hash 8489 8490 seal_avx2_320: 8491 vmovdqa %ymm0,%ymm1 8492 vmovdqa %ymm0,%ymm2 8493 vmovdqa %ymm4,%ymm5 8494 vmovdqa %ymm4,%ymm6 8495 vmovdqa %ymm8,%ymm9 8496 vmovdqa %ymm8,%ymm10 8497 vpaddd .avx2_inc(%rip),%ymm12,%ymm13 8498 vpaddd .avx2_inc(%rip),%ymm13,%ymm14 8499 vmovdqa %ymm4,%ymm7 8500 vmovdqa %ymm8,%ymm11 8501 vmovdqa %ymm12,160(%rbp) 8502 vmovdqa %ymm13,192(%rbp) 8503 vmovdqa %ymm14,224(%rbp) 8504 movq $10,%r10 8505 1: 8506 vpaddd %ymm4,%ymm0,%ymm0 8507 vpxor %ymm0,%ymm12,%ymm12 8508 vpshufb .rol16(%rip),%ymm12,%ymm12 8509 vpaddd %ymm12,%ymm8,%ymm8 8510 vpxor %ymm8,%ymm4,%ymm4 8511 vpsrld $20,%ymm4,%ymm3 8512 vpslld $12,%ymm4,%ymm4 8513 vpxor %ymm3,%ymm4,%ymm4 8514 vpaddd %ymm4,%ymm0,%ymm0 8515 vpxor %ymm0,%ymm12,%ymm12 8516 vpshufb .rol8(%rip),%ymm12,%ymm12 8517 vpaddd %ymm12,%ymm8,%ymm8 8518 vpxor %ymm8,%ymm4,%ymm4 8519 vpslld $7,%ymm4,%ymm3 8520 vpsrld $25,%ymm4,%ymm4 8521 vpxor %ymm3,%ymm4,%ymm4 8522 vpalignr $12,%ymm12,%ymm12,%ymm12 8523 vpalignr $8,%ymm8,%ymm8,%ymm8 8524 vpalignr $4,%ymm4,%ymm4,%ymm4 8525 vpaddd %ymm5,%ymm1,%ymm1 8526 vpxor %ymm1,%ymm13,%ymm13 8527 vpshufb .rol16(%rip),%ymm13,%ymm13 8528 vpaddd %ymm13,%ymm9,%ymm9 8529 vpxor %ymm9,%ymm5,%ymm5 8530 vpsrld $20,%ymm5,%ymm3 8531 vpslld $12,%ymm5,%ymm5 8532 vpxor %ymm3,%ymm5,%ymm5 8533 vpaddd %ymm5,%ymm1,%ymm1 8534 vpxor %ymm1,%ymm13,%ymm13 8535 vpshufb .rol8(%rip),%ymm13,%ymm13 8536 vpaddd %ymm13,%ymm9,%ymm9 8537 vpxor %ymm9,%ymm5,%ymm5 8538 vpslld $7,%ymm5,%ymm3 8539 vpsrld $25,%ymm5,%ymm5 8540 vpxor %ymm3,%ymm5,%ymm5 8541 vpalignr $12,%ymm13,%ymm13,%ymm13 8542 vpalignr $8,%ymm9,%ymm9,%ymm9 8543 vpalignr $4,%ymm5,%ymm5,%ymm5 8544 vpaddd %ymm6,%ymm2,%ymm2 8545 vpxor %ymm2,%ymm14,%ymm14 8546 vpshufb .rol16(%rip),%ymm14,%ymm14 8547 vpaddd %ymm14,%ymm10,%ymm10 8548 vpxor %ymm10,%ymm6,%ymm6 8549 vpsrld $20,%ymm6,%ymm3 8550 vpslld $12,%ymm6,%ymm6 8551 vpxor %ymm3,%ymm6,%ymm6 8552 vpaddd %ymm6,%ymm2,%ymm2 8553 vpxor %ymm2,%ymm14,%ymm14 8554 vpshufb .rol8(%rip),%ymm14,%ymm14 8555 vpaddd %ymm14,%ymm10,%ymm10 8556 vpxor %ymm10,%ymm6,%ymm6 8557 vpslld $7,%ymm6,%ymm3 8558 vpsrld $25,%ymm6,%ymm6 8559 vpxor %ymm3,%ymm6,%ymm6 8560 vpalignr $12,%ymm14,%ymm14,%ymm14 8561 vpalignr $8,%ymm10,%ymm10,%ymm10 8562 vpalignr $4,%ymm6,%ymm6,%ymm6 8563 vpaddd %ymm4,%ymm0,%ymm0 8564 vpxor %ymm0,%ymm12,%ymm12 8565 vpshufb .rol16(%rip),%ymm12,%ymm12 8566 vpaddd %ymm12,%ymm8,%ymm8 8567 vpxor %ymm8,%ymm4,%ymm4 8568 vpsrld $20,%ymm4,%ymm3 8569 vpslld $12,%ymm4,%ymm4 8570 vpxor %ymm3,%ymm4,%ymm4 8571 vpaddd %ymm4,%ymm0,%ymm0 8572 vpxor %ymm0,%ymm12,%ymm12 8573 vpshufb .rol8(%rip),%ymm12,%ymm12 8574 vpaddd %ymm12,%ymm8,%ymm8 8575 vpxor %ymm8,%ymm4,%ymm4 8576 vpslld $7,%ymm4,%ymm3 8577 vpsrld $25,%ymm4,%ymm4 8578 vpxor %ymm3,%ymm4,%ymm4 8579 vpalignr $4,%ymm12,%ymm12,%ymm12 8580 vpalignr $8,%ymm8,%ymm8,%ymm8 8581 vpalignr $12,%ymm4,%ymm4,%ymm4 8582 vpaddd %ymm5,%ymm1,%ymm1 8583 vpxor %ymm1,%ymm13,%ymm13 8584 vpshufb .rol16(%rip),%ymm13,%ymm13 8585 vpaddd %ymm13,%ymm9,%ymm9 8586 vpxor %ymm9,%ymm5,%ymm5 8587 vpsrld $20,%ymm5,%ymm3 8588 vpslld $12,%ymm5,%ymm5 8589 vpxor %ymm3,%ymm5,%ymm5 8590 vpaddd %ymm5,%ymm1,%ymm1 8591 vpxor %ymm1,%ymm13,%ymm13 8592 vpshufb .rol8(%rip),%ymm13,%ymm13 8593 vpaddd %ymm13,%ymm9,%ymm9 8594 vpxor %ymm9,%ymm5,%ymm5 8595 vpslld $7,%ymm5,%ymm3 8596 vpsrld $25,%ymm5,%ymm5 8597 vpxor %ymm3,%ymm5,%ymm5 8598 vpalignr $4,%ymm13,%ymm13,%ymm13 8599 vpalignr $8,%ymm9,%ymm9,%ymm9 8600 vpalignr $12,%ymm5,%ymm5,%ymm5 8601 vpaddd %ymm6,%ymm2,%ymm2 8602 vpxor %ymm2,%ymm14,%ymm14 8603 vpshufb .rol16(%rip),%ymm14,%ymm14 8604 vpaddd %ymm14,%ymm10,%ymm10 8605 vpxor %ymm10,%ymm6,%ymm6 8606 vpsrld $20,%ymm6,%ymm3 8607 vpslld $12,%ymm6,%ymm6 8608 vpxor %ymm3,%ymm6,%ymm6 8609 vpaddd %ymm6,%ymm2,%ymm2 8610 vpxor %ymm2,%ymm14,%ymm14 8611 vpshufb .rol8(%rip),%ymm14,%ymm14 8612 vpaddd %ymm14,%ymm10,%ymm10 8613 vpxor %ymm10,%ymm6,%ymm6 8614 vpslld $7,%ymm6,%ymm3 8615 vpsrld $25,%ymm6,%ymm6 8616 vpxor %ymm3,%ymm6,%ymm6 8617 vpalignr $4,%ymm14,%ymm14,%ymm14 8618 vpalignr $8,%ymm10,%ymm10,%ymm10 8619 vpalignr $12,%ymm6,%ymm6,%ymm6 8620 8621 decq %r10 8622 jne 1b 8623 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0 8624 vpaddd .chacha20_consts(%rip),%ymm1,%ymm1 8625 vpaddd .chacha20_consts(%rip),%ymm2,%ymm2 8626 vpaddd %ymm7,%ymm4,%ymm4 8627 vpaddd %ymm7,%ymm5,%ymm5 8628 vpaddd %ymm7,%ymm6,%ymm6 8629 vpaddd %ymm11,%ymm8,%ymm8 8630 vpaddd %ymm11,%ymm9,%ymm9 8631 vpaddd %ymm11,%ymm10,%ymm10 8632 vpaddd 160(%rbp),%ymm12,%ymm12 8633 vpaddd 192(%rbp),%ymm13,%ymm13 8634 vpaddd 224(%rbp),%ymm14,%ymm14 8635 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 8636 8637 vpand .clamp(%rip),%ymm3,%ymm3 8638 vmovdqa %ymm3,0(%rbp) 8639 8640 vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 8641 vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 8642 vperm2i128 $0x02,%ymm1,%ymm5,%ymm8 8643 vperm2i128 $0x02,%ymm9,%ymm13,%ymm12 8644 vperm2i128 $0x13,%ymm1,%ymm5,%ymm1 8645 vperm2i128 $0x13,%ymm9,%ymm13,%ymm5 8646 vperm2i128 $0x02,%ymm2,%ymm6,%ymm9 8647 vperm2i128 $0x02,%ymm10,%ymm14,%ymm13 8648 vperm2i128 $0x13,%ymm2,%ymm6,%ymm2 8649 vperm2i128 $0x13,%ymm10,%ymm14,%ymm6 8650 jmp seal_avx2_short 8651 8652 seal_avx2_192: 8653 vmovdqa %ymm0,%ymm1 8654 vmovdqa %ymm0,%ymm2 8655 vmovdqa %ymm4,%ymm5 8656 vmovdqa %ymm4,%ymm6 8657 vmovdqa %ymm8,%ymm9 8658 vmovdqa %ymm8,%ymm10 8659 vpaddd .avx2_inc(%rip),%ymm12,%ymm13 8660 vmovdqa %ymm12,%ymm11 8661 vmovdqa %ymm13,%ymm15 8662 movq $10,%r10 8663 1: 8664 vpaddd %ymm4,%ymm0,%ymm0 8665 vpxor %ymm0,%ymm12,%ymm12 8666 vpshufb .rol16(%rip),%ymm12,%ymm12 8667 vpaddd %ymm12,%ymm8,%ymm8 8668 vpxor %ymm8,%ymm4,%ymm4 8669 vpsrld $20,%ymm4,%ymm3 8670 vpslld $12,%ymm4,%ymm4 8671 vpxor %ymm3,%ymm4,%ymm4 8672 vpaddd %ymm4,%ymm0,%ymm0 8673 vpxor %ymm0,%ymm12,%ymm12 8674 vpshufb .rol8(%rip),%ymm12,%ymm12 8675 vpaddd %ymm12,%ymm8,%ymm8 8676 vpxor %ymm8,%ymm4,%ymm4 8677 vpslld $7,%ymm4,%ymm3 8678 vpsrld $25,%ymm4,%ymm4 8679 vpxor %ymm3,%ymm4,%ymm4 8680 vpalignr $12,%ymm12,%ymm12,%ymm12 8681 vpalignr $8,%ymm8,%ymm8,%ymm8 8682 vpalignr $4,%ymm4,%ymm4,%ymm4 8683 vpaddd %ymm5,%ymm1,%ymm1 8684 vpxor %ymm1,%ymm13,%ymm13 8685 vpshufb .rol16(%rip),%ymm13,%ymm13 8686 vpaddd %ymm13,%ymm9,%ymm9 8687 vpxor %ymm9,%ymm5,%ymm5 8688 vpsrld $20,%ymm5,%ymm3 8689 vpslld $12,%ymm5,%ymm5 8690 vpxor %ymm3,%ymm5,%ymm5 8691 vpaddd %ymm5,%ymm1,%ymm1 8692 vpxor %ymm1,%ymm13,%ymm13 8693 vpshufb .rol8(%rip),%ymm13,%ymm13 8694 vpaddd %ymm13,%ymm9,%ymm9 8695 vpxor %ymm9,%ymm5,%ymm5 8696 vpslld $7,%ymm5,%ymm3 8697 vpsrld $25,%ymm5,%ymm5 8698 vpxor %ymm3,%ymm5,%ymm5 8699 vpalignr $12,%ymm13,%ymm13,%ymm13 8700 vpalignr $8,%ymm9,%ymm9,%ymm9 8701 vpalignr $4,%ymm5,%ymm5,%ymm5 8702 vpaddd %ymm4,%ymm0,%ymm0 8703 vpxor %ymm0,%ymm12,%ymm12 8704 vpshufb .rol16(%rip),%ymm12,%ymm12 8705 vpaddd %ymm12,%ymm8,%ymm8 8706 vpxor %ymm8,%ymm4,%ymm4 8707 vpsrld $20,%ymm4,%ymm3 8708 vpslld $12,%ymm4,%ymm4 8709 vpxor %ymm3,%ymm4,%ymm4 8710 vpaddd %ymm4,%ymm0,%ymm0 8711 vpxor %ymm0,%ymm12,%ymm12 8712 vpshufb .rol8(%rip),%ymm12,%ymm12 8713 vpaddd %ymm12,%ymm8,%ymm8 8714 vpxor %ymm8,%ymm4,%ymm4 8715 vpslld $7,%ymm4,%ymm3 8716 vpsrld $25,%ymm4,%ymm4 8717 vpxor %ymm3,%ymm4,%ymm4 8718 vpalignr $4,%ymm12,%ymm12,%ymm12 8719 vpalignr $8,%ymm8,%ymm8,%ymm8 8720 vpalignr $12,%ymm4,%ymm4,%ymm4 8721 vpaddd %ymm5,%ymm1,%ymm1 8722 vpxor %ymm1,%ymm13,%ymm13 8723 vpshufb .rol16(%rip),%ymm13,%ymm13 8724 vpaddd %ymm13,%ymm9,%ymm9 8725 vpxor %ymm9,%ymm5,%ymm5 8726 vpsrld $20,%ymm5,%ymm3 8727 vpslld $12,%ymm5,%ymm5 8728 vpxor %ymm3,%ymm5,%ymm5 8729 vpaddd %ymm5,%ymm1,%ymm1 8730 vpxor %ymm1,%ymm13,%ymm13 8731 vpshufb .rol8(%rip),%ymm13,%ymm13 8732 vpaddd %ymm13,%ymm9,%ymm9 8733 vpxor %ymm9,%ymm5,%ymm5 8734 vpslld $7,%ymm5,%ymm3 8735 vpsrld $25,%ymm5,%ymm5 8736 vpxor %ymm3,%ymm5,%ymm5 8737 vpalignr $4,%ymm13,%ymm13,%ymm13 8738 vpalignr $8,%ymm9,%ymm9,%ymm9 8739 vpalignr $12,%ymm5,%ymm5,%ymm5 8740 8741 decq %r10 8742 jne 1b 8743 vpaddd %ymm2,%ymm0,%ymm0 8744 vpaddd %ymm2,%ymm1,%ymm1 8745 vpaddd %ymm6,%ymm4,%ymm4 8746 vpaddd %ymm6,%ymm5,%ymm5 8747 vpaddd %ymm10,%ymm8,%ymm8 8748 vpaddd %ymm10,%ymm9,%ymm9 8749 vpaddd %ymm11,%ymm12,%ymm12 8750 vpaddd %ymm15,%ymm13,%ymm13 8751 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 8752 8753 vpand .clamp(%rip),%ymm3,%ymm3 8754 vmovdqa %ymm3,0(%rbp) 8755 8756 vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 8757 vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 8758 vperm2i128 $0x02,%ymm1,%ymm5,%ymm8 8759 vperm2i128 $0x02,%ymm9,%ymm13,%ymm12 8760 vperm2i128 $0x13,%ymm1,%ymm5,%ymm1 8761 vperm2i128 $0x13,%ymm9,%ymm13,%ymm5 8762 seal_avx2_short: 8763 movq %r8,%r8 8764 call poly_hash_ad_internal 8765 xorq %rcx,%rcx 8766 seal_avx2_hash: 8767 cmpq $16,%rcx 8768 jb seal_avx2_short_loop 8769 addq 0(%rdi),%r10 8770 adcq 8+0(%rdi),%r11 8771 adcq $1,%r12 8772 movq 0+0(%rbp),%rax 8773 movq %rax,%r15 8774 mulq %r10 8775 movq %rax,%r13 8776 movq %rdx,%r14 8777 movq 0+0(%rbp),%rax 8778 mulq %r11 8779 imulq %r12,%r15 8780 addq %rax,%r14 8781 adcq %rdx,%r15 8782 movq 8+0(%rbp),%rax 8783 movq %rax,%r9 8784 mulq %r10 8785 addq %rax,%r14 8786 adcq $0,%rdx 8787 movq %rdx,%r10 8788 movq 8+0(%rbp),%rax 8789 mulq %r11 8790 addq %rax,%r15 8791 adcq $0,%rdx 8792 imulq %r12,%r9 8793 addq %r10,%r15 8794 adcq %rdx,%r9 8795 movq %r13,%r10 8796 movq %r14,%r11 8797 movq %r15,%r12 8798 andq $3,%r12 8799 movq %r15,%r13 8800 andq $-4,%r13 8801 movq %r9,%r14 8802 shrdq $2,%r9,%r15 8803 shrq $2,%r9 8804 addq %r13,%r10 8805 adcq %r14,%r11 8806 adcq $0,%r12 8807 addq %r15,%r10 8808 adcq %r9,%r11 8809 adcq $0,%r12 8810 8811 subq $16,%rcx 8812 addq $16,%rdi 8813 jmp seal_avx2_hash 8814 seal_avx2_short_loop: 8815 cmpq $32,%rbx 8816 jb seal_avx2_short_tail 8817 subq $32,%rbx 8818 8819 vpxor (%rsi),%ymm0,%ymm0 8820 vmovdqu %ymm0,(%rdi) 8821 leaq 32(%rsi),%rsi 8822 8823 addq 0(%rdi),%r10 8824 adcq 8+0(%rdi),%r11 8825 adcq $1,%r12 8826 movq 0+0(%rbp),%rax 8827 movq %rax,%r15 8828 mulq %r10 8829 movq %rax,%r13 8830 movq %rdx,%r14 8831 movq 0+0(%rbp),%rax 8832 mulq %r11 8833 imulq %r12,%r15 8834 addq %rax,%r14 8835 adcq %rdx,%r15 8836 movq 8+0(%rbp),%rax 8837 movq %rax,%r9 8838 mulq %r10 8839 addq %rax,%r14 8840 adcq $0,%rdx 8841 movq %rdx,%r10 8842 movq 8+0(%rbp),%rax 8843 mulq %r11 8844 addq %rax,%r15 8845 adcq $0,%rdx 8846 imulq %r12,%r9 8847 addq %r10,%r15 8848 adcq %rdx,%r9 8849 movq %r13,%r10 8850 movq %r14,%r11 8851 movq %r15,%r12 8852 andq $3,%r12 8853 movq %r15,%r13 8854 andq $-4,%r13 8855 movq %r9,%r14 8856 shrdq $2,%r9,%r15 8857 shrq $2,%r9 8858 addq %r13,%r10 8859 adcq %r14,%r11 8860 adcq $0,%r12 8861 addq %r15,%r10 8862 adcq %r9,%r11 8863 adcq $0,%r12 8864 addq 16(%rdi),%r10 8865 adcq 8+16(%rdi),%r11 8866 adcq $1,%r12 8867 movq 0+0(%rbp),%rax 8868 movq %rax,%r15 8869 mulq %r10 8870 movq %rax,%r13 8871 movq %rdx,%r14 8872 movq 0+0(%rbp),%rax 8873 mulq %r11 8874 imulq %r12,%r15 8875 addq %rax,%r14 8876 adcq %rdx,%r15 8877 movq 8+0(%rbp),%rax 8878 movq %rax,%r9 8879 mulq %r10 8880 addq %rax,%r14 8881 adcq $0,%rdx 8882 movq %rdx,%r10 8883 movq 8+0(%rbp),%rax 8884 mulq %r11 8885 addq %rax,%r15 8886 adcq $0,%rdx 8887 imulq %r12,%r9 8888 addq %r10,%r15 8889 adcq %rdx,%r9 8890 movq %r13,%r10 8891 movq %r14,%r11 8892 movq %r15,%r12 8893 andq $3,%r12 8894 movq %r15,%r13 8895 andq $-4,%r13 8896 movq %r9,%r14 8897 shrdq $2,%r9,%r15 8898 shrq $2,%r9 8899 addq %r13,%r10 8900 adcq %r14,%r11 8901 adcq $0,%r12 8902 addq %r15,%r10 8903 adcq %r9,%r11 8904 adcq $0,%r12 8905 8906 leaq 32(%rdi),%rdi 8907 8908 vmovdqa %ymm4,%ymm0 8909 vmovdqa %ymm8,%ymm4 8910 vmovdqa %ymm12,%ymm8 8911 vmovdqa %ymm1,%ymm12 8912 vmovdqa %ymm5,%ymm1 8913 vmovdqa %ymm9,%ymm5 8914 vmovdqa %ymm13,%ymm9 8915 vmovdqa %ymm2,%ymm13 8916 vmovdqa %ymm6,%ymm2 8917 jmp seal_avx2_short_loop 8918 seal_avx2_short_tail: 8919 cmpq $16,%rbx 8920 jb 1f 8921 subq $16,%rbx 8922 vpxor (%rsi),%xmm0,%xmm3 8923 vmovdqu %xmm3,(%rdi) 8924 leaq 16(%rsi),%rsi 8925 addq 0(%rdi),%r10 8926 adcq 8+0(%rdi),%r11 8927 adcq $1,%r12 8928 movq 0+0(%rbp),%rax 8929 movq %rax,%r15 8930 mulq %r10 8931 movq %rax,%r13 8932 movq %rdx,%r14 8933 movq 0+0(%rbp),%rax 8934 mulq %r11 8935 imulq %r12,%r15 8936 addq %rax,%r14 8937 adcq %rdx,%r15 8938 movq 8+0(%rbp),%rax 8939 movq %rax,%r9 8940 mulq %r10 8941 addq %rax,%r14 8942 adcq $0,%rdx 8943 movq %rdx,%r10 8944 movq 8+0(%rbp),%rax 8945 mulq %r11 8946 addq %rax,%r15 8947 adcq $0,%rdx 8948 imulq %r12,%r9 8949 addq %r10,%r15 8950 adcq %rdx,%r9 8951 movq %r13,%r10 8952 movq %r14,%r11 8953 movq %r15,%r12 8954 andq $3,%r12 8955 movq %r15,%r13 8956 andq $-4,%r13 8957 movq %r9,%r14 8958 shrdq $2,%r9,%r15 8959 shrq $2,%r9 8960 addq %r13,%r10 8961 adcq %r14,%r11 8962 adcq $0,%r12 8963 addq %r15,%r10 8964 adcq %r9,%r11 8965 adcq $0,%r12 8966 8967 leaq 16(%rdi),%rdi 8968 vextracti128 $1,%ymm0,%xmm0 8969 1: 8970 vzeroupper 8971 jmp seal_sse_tail_16 8972 8973 #endif 8974