1 #if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) 2 .text 3 .extern OPENSSL_ia32cap_P 4 .hidden OPENSSL_ia32cap_P 5 6 chacha20_poly1305_constants: 7 8 .align 64 9 .chacha20_consts: 10 .byte 'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k' 11 .byte 'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k' 12 .rol8: 13 .byte 3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14 14 .byte 3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14 15 .rol16: 16 .byte 2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13 17 .byte 2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13 18 .avx2_init: 19 .long 0,0,0,0 20 .sse_inc: 21 .long 1,0,0,0 22 .avx2_inc: 23 .long 2,0,0,0,2,0,0,0 24 .clamp: 25 .quad 0x0FFFFFFC0FFFFFFF, 0x0FFFFFFC0FFFFFFC 26 .quad 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF 27 .align 16 28 .and_masks: 29 .byte 0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 30 .byte 0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 31 .byte 0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 32 .byte 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 33 .byte 0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 34 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 35 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 36 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 37 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00 38 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00 39 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00 40 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00 41 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00 42 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00 43 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00 44 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff 45 46 .type poly_hash_ad_internal,@function 47 .align 64 48 poly_hash_ad_internal: 49 .cfi_startproc 50 xorq %r10,%r10 51 xorq %r11,%r11 52 xorq %r12,%r12 53 cmpq $13,%r8 54 jne hash_ad_loop 55 poly_fast_tls_ad: 56 57 movq (%rcx),%r10 58 movq 5(%rcx),%r11 59 shrq $24,%r11 60 movq $1,%r12 61 movq 0+0(%rbp),%rax 62 movq %rax,%r15 63 mulq %r10 64 movq %rax,%r13 65 movq %rdx,%r14 66 movq 0+0(%rbp),%rax 67 mulq %r11 68 imulq %r12,%r15 69 addq %rax,%r14 70 adcq %rdx,%r15 71 movq 8+0(%rbp),%rax 72 movq %rax,%r9 73 mulq %r10 74 addq %rax,%r14 75 adcq $0,%rdx 76 movq %rdx,%r10 77 movq 8+0(%rbp),%rax 78 mulq %r11 79 addq %rax,%r15 80 adcq $0,%rdx 81 imulq %r12,%r9 82 addq %r10,%r15 83 adcq %rdx,%r9 84 movq %r13,%r10 85 movq %r14,%r11 86 movq %r15,%r12 87 andq $3,%r12 88 movq %r15,%r13 89 andq $-4,%r13 90 movq %r9,%r14 91 shrdq $2,%r9,%r15 92 shrq $2,%r9 93 addq %r13,%r10 94 adcq %r14,%r11 95 adcq $0,%r12 96 addq %r15,%r10 97 adcq %r9,%r11 98 adcq $0,%r12 99 100 .byte 0xf3,0xc3 101 hash_ad_loop: 102 103 cmpq $16,%r8 104 jb hash_ad_tail 105 addq 0(%rcx),%r10 106 adcq 8+0(%rcx),%r11 107 adcq $1,%r12 108 movq 0+0(%rbp),%rax 109 movq %rax,%r15 110 mulq %r10 111 movq %rax,%r13 112 movq %rdx,%r14 113 movq 0+0(%rbp),%rax 114 mulq %r11 115 imulq %r12,%r15 116 addq %rax,%r14 117 adcq %rdx,%r15 118 movq 8+0(%rbp),%rax 119 movq %rax,%r9 120 mulq %r10 121 addq %rax,%r14 122 adcq $0,%rdx 123 movq %rdx,%r10 124 movq 8+0(%rbp),%rax 125 mulq %r11 126 addq %rax,%r15 127 adcq $0,%rdx 128 imulq %r12,%r9 129 addq %r10,%r15 130 adcq %rdx,%r9 131 movq %r13,%r10 132 movq %r14,%r11 133 movq %r15,%r12 134 andq $3,%r12 135 movq %r15,%r13 136 andq $-4,%r13 137 movq %r9,%r14 138 shrdq $2,%r9,%r15 139 shrq $2,%r9 140 addq %r13,%r10 141 adcq %r14,%r11 142 adcq $0,%r12 143 addq %r15,%r10 144 adcq %r9,%r11 145 adcq $0,%r12 146 147 leaq 16(%rcx),%rcx 148 subq $16,%r8 149 jmp hash_ad_loop 150 hash_ad_tail: 151 cmpq $0,%r8 152 je 1f 153 154 xorq %r13,%r13 155 xorq %r14,%r14 156 xorq %r15,%r15 157 addq %r8,%rcx 158 hash_ad_tail_loop: 159 shldq $8,%r13,%r14 160 shlq $8,%r13 161 movzbq -1(%rcx),%r15 162 xorq %r15,%r13 163 decq %rcx 164 decq %r8 165 jne hash_ad_tail_loop 166 167 addq %r13,%r10 168 adcq %r14,%r11 169 adcq $1,%r12 170 movq 0+0(%rbp),%rax 171 movq %rax,%r15 172 mulq %r10 173 movq %rax,%r13 174 movq %rdx,%r14 175 movq 0+0(%rbp),%rax 176 mulq %r11 177 imulq %r12,%r15 178 addq %rax,%r14 179 adcq %rdx,%r15 180 movq 8+0(%rbp),%rax 181 movq %rax,%r9 182 mulq %r10 183 addq %rax,%r14 184 adcq $0,%rdx 185 movq %rdx,%r10 186 movq 8+0(%rbp),%rax 187 mulq %r11 188 addq %rax,%r15 189 adcq $0,%rdx 190 imulq %r12,%r9 191 addq %r10,%r15 192 adcq %rdx,%r9 193 movq %r13,%r10 194 movq %r14,%r11 195 movq %r15,%r12 196 andq $3,%r12 197 movq %r15,%r13 198 andq $-4,%r13 199 movq %r9,%r14 200 shrdq $2,%r9,%r15 201 shrq $2,%r9 202 addq %r13,%r10 203 adcq %r14,%r11 204 adcq $0,%r12 205 addq %r15,%r10 206 adcq %r9,%r11 207 adcq $0,%r12 208 209 210 1: 211 .byte 0xf3,0xc3 212 .cfi_endproc 213 .size poly_hash_ad_internal, .-poly_hash_ad_internal 214 215 .globl chacha20_poly1305_open 216 .hidden chacha20_poly1305_open 217 .type chacha20_poly1305_open,@function 218 .align 64 219 chacha20_poly1305_open: 220 .cfi_startproc 221 pushq %rbp 222 .cfi_adjust_cfa_offset 8 223 pushq %rbx 224 .cfi_adjust_cfa_offset 8 225 pushq %r12 226 .cfi_adjust_cfa_offset 8 227 pushq %r13 228 .cfi_adjust_cfa_offset 8 229 pushq %r14 230 .cfi_adjust_cfa_offset 8 231 pushq %r15 232 .cfi_adjust_cfa_offset 8 233 234 235 pushq %r9 236 .cfi_adjust_cfa_offset 8 237 subq $288 + 32,%rsp 238 .cfi_adjust_cfa_offset 288 + 32 239 .cfi_offset rbp, -16 240 .cfi_offset rbx, -24 241 .cfi_offset r12, -32 242 .cfi_offset r13, -40 243 .cfi_offset r14, -48 244 .cfi_offset r15, -56 245 leaq 32(%rsp),%rbp 246 andq $-32,%rbp 247 movq %rdx,8+32(%rbp) 248 movq %r8,0+32(%rbp) 249 movq %rdx,%rbx 250 251 movl OPENSSL_ia32cap_P+8(%rip),%eax 252 andl $288,%eax 253 xorl $288,%eax 254 jz chacha20_poly1305_open_avx2 255 256 1: 257 cmpq $128,%rbx 258 jbe open_sse_128 259 260 movdqa .chacha20_consts(%rip),%xmm0 261 movdqu 0(%r9),%xmm4 262 movdqu 16(%r9),%xmm8 263 movdqu 32(%r9),%xmm12 264 movdqa %xmm12,%xmm7 265 266 movdqa %xmm4,48(%rbp) 267 movdqa %xmm8,64(%rbp) 268 movdqa %xmm12,96(%rbp) 269 movq $10,%r10 270 1: 271 paddd %xmm4,%xmm0 272 pxor %xmm0,%xmm12 273 pshufb .rol16(%rip),%xmm12 274 paddd %xmm12,%xmm8 275 pxor %xmm8,%xmm4 276 movdqa %xmm4,%xmm3 277 pslld $12,%xmm3 278 psrld $20,%xmm4 279 pxor %xmm3,%xmm4 280 paddd %xmm4,%xmm0 281 pxor %xmm0,%xmm12 282 pshufb .rol8(%rip),%xmm12 283 paddd %xmm12,%xmm8 284 pxor %xmm8,%xmm4 285 movdqa %xmm4,%xmm3 286 pslld $7,%xmm3 287 psrld $25,%xmm4 288 pxor %xmm3,%xmm4 289 .byte 102,15,58,15,228,4 290 .byte 102,69,15,58,15,192,8 291 .byte 102,69,15,58,15,228,12 292 paddd %xmm4,%xmm0 293 pxor %xmm0,%xmm12 294 pshufb .rol16(%rip),%xmm12 295 paddd %xmm12,%xmm8 296 pxor %xmm8,%xmm4 297 movdqa %xmm4,%xmm3 298 pslld $12,%xmm3 299 psrld $20,%xmm4 300 pxor %xmm3,%xmm4 301 paddd %xmm4,%xmm0 302 pxor %xmm0,%xmm12 303 pshufb .rol8(%rip),%xmm12 304 paddd %xmm12,%xmm8 305 pxor %xmm8,%xmm4 306 movdqa %xmm4,%xmm3 307 pslld $7,%xmm3 308 psrld $25,%xmm4 309 pxor %xmm3,%xmm4 310 .byte 102,15,58,15,228,12 311 .byte 102,69,15,58,15,192,8 312 .byte 102,69,15,58,15,228,4 313 314 decq %r10 315 jne 1b 316 317 paddd .chacha20_consts(%rip),%xmm0 318 paddd 48(%rbp),%xmm4 319 320 pand .clamp(%rip),%xmm0 321 movdqa %xmm0,0(%rbp) 322 movdqa %xmm4,16(%rbp) 323 324 movq %r8,%r8 325 call poly_hash_ad_internal 326 open_sse_main_loop: 327 cmpq $256,%rbx 328 jb 2f 329 330 movdqa .chacha20_consts(%rip),%xmm0 331 movdqa 48(%rbp),%xmm4 332 movdqa 64(%rbp),%xmm8 333 movdqa %xmm0,%xmm1 334 movdqa %xmm4,%xmm5 335 movdqa %xmm8,%xmm9 336 movdqa %xmm0,%xmm2 337 movdqa %xmm4,%xmm6 338 movdqa %xmm8,%xmm10 339 movdqa %xmm0,%xmm3 340 movdqa %xmm4,%xmm7 341 movdqa %xmm8,%xmm11 342 movdqa 96(%rbp),%xmm15 343 paddd .sse_inc(%rip),%xmm15 344 movdqa %xmm15,%xmm14 345 paddd .sse_inc(%rip),%xmm14 346 movdqa %xmm14,%xmm13 347 paddd .sse_inc(%rip),%xmm13 348 movdqa %xmm13,%xmm12 349 paddd .sse_inc(%rip),%xmm12 350 movdqa %xmm12,96(%rbp) 351 movdqa %xmm13,112(%rbp) 352 movdqa %xmm14,128(%rbp) 353 movdqa %xmm15,144(%rbp) 354 355 356 357 movq $4,%rcx 358 movq %rsi,%r8 359 1: 360 movdqa %xmm8,80(%rbp) 361 movdqa .rol16(%rip),%xmm8 362 paddd %xmm7,%xmm3 363 paddd %xmm6,%xmm2 364 paddd %xmm5,%xmm1 365 paddd %xmm4,%xmm0 366 pxor %xmm3,%xmm15 367 pxor %xmm2,%xmm14 368 pxor %xmm1,%xmm13 369 pxor %xmm0,%xmm12 370 .byte 102,69,15,56,0,248 371 .byte 102,69,15,56,0,240 372 .byte 102,69,15,56,0,232 373 .byte 102,69,15,56,0,224 374 movdqa 80(%rbp),%xmm8 375 paddd %xmm15,%xmm11 376 paddd %xmm14,%xmm10 377 paddd %xmm13,%xmm9 378 paddd %xmm12,%xmm8 379 pxor %xmm11,%xmm7 380 addq 0(%r8),%r10 381 adcq 8+0(%r8),%r11 382 adcq $1,%r12 383 384 leaq 16(%r8),%r8 385 pxor %xmm10,%xmm6 386 pxor %xmm9,%xmm5 387 pxor %xmm8,%xmm4 388 movdqa %xmm8,80(%rbp) 389 movdqa %xmm7,%xmm8 390 psrld $20,%xmm8 391 pslld $32-20,%xmm7 392 pxor %xmm8,%xmm7 393 movdqa %xmm6,%xmm8 394 psrld $20,%xmm8 395 pslld $32-20,%xmm6 396 pxor %xmm8,%xmm6 397 movdqa %xmm5,%xmm8 398 psrld $20,%xmm8 399 pslld $32-20,%xmm5 400 pxor %xmm8,%xmm5 401 movdqa %xmm4,%xmm8 402 psrld $20,%xmm8 403 pslld $32-20,%xmm4 404 pxor %xmm8,%xmm4 405 movq 0+0(%rbp),%rax 406 movq %rax,%r15 407 mulq %r10 408 movq %rax,%r13 409 movq %rdx,%r14 410 movq 0+0(%rbp),%rax 411 mulq %r11 412 imulq %r12,%r15 413 addq %rax,%r14 414 adcq %rdx,%r15 415 movdqa .rol8(%rip),%xmm8 416 paddd %xmm7,%xmm3 417 paddd %xmm6,%xmm2 418 paddd %xmm5,%xmm1 419 paddd %xmm4,%xmm0 420 pxor %xmm3,%xmm15 421 pxor %xmm2,%xmm14 422 pxor %xmm1,%xmm13 423 pxor %xmm0,%xmm12 424 .byte 102,69,15,56,0,248 425 .byte 102,69,15,56,0,240 426 .byte 102,69,15,56,0,232 427 .byte 102,69,15,56,0,224 428 movdqa 80(%rbp),%xmm8 429 paddd %xmm15,%xmm11 430 paddd %xmm14,%xmm10 431 paddd %xmm13,%xmm9 432 paddd %xmm12,%xmm8 433 pxor %xmm11,%xmm7 434 pxor %xmm10,%xmm6 435 movq 8+0(%rbp),%rax 436 movq %rax,%r9 437 mulq %r10 438 addq %rax,%r14 439 adcq $0,%rdx 440 movq %rdx,%r10 441 movq 8+0(%rbp),%rax 442 mulq %r11 443 addq %rax,%r15 444 adcq $0,%rdx 445 pxor %xmm9,%xmm5 446 pxor %xmm8,%xmm4 447 movdqa %xmm8,80(%rbp) 448 movdqa %xmm7,%xmm8 449 psrld $25,%xmm8 450 pslld $32-25,%xmm7 451 pxor %xmm8,%xmm7 452 movdqa %xmm6,%xmm8 453 psrld $25,%xmm8 454 pslld $32-25,%xmm6 455 pxor %xmm8,%xmm6 456 movdqa %xmm5,%xmm8 457 psrld $25,%xmm8 458 pslld $32-25,%xmm5 459 pxor %xmm8,%xmm5 460 movdqa %xmm4,%xmm8 461 psrld $25,%xmm8 462 pslld $32-25,%xmm4 463 pxor %xmm8,%xmm4 464 movdqa 80(%rbp),%xmm8 465 imulq %r12,%r9 466 addq %r10,%r15 467 adcq %rdx,%r9 468 .byte 102,15,58,15,255,4 469 .byte 102,69,15,58,15,219,8 470 .byte 102,69,15,58,15,255,12 471 .byte 102,15,58,15,246,4 472 .byte 102,69,15,58,15,210,8 473 .byte 102,69,15,58,15,246,12 474 .byte 102,15,58,15,237,4 475 .byte 102,69,15,58,15,201,8 476 .byte 102,69,15,58,15,237,12 477 .byte 102,15,58,15,228,4 478 .byte 102,69,15,58,15,192,8 479 .byte 102,69,15,58,15,228,12 480 movdqa %xmm8,80(%rbp) 481 movdqa .rol16(%rip),%xmm8 482 paddd %xmm7,%xmm3 483 paddd %xmm6,%xmm2 484 paddd %xmm5,%xmm1 485 paddd %xmm4,%xmm0 486 pxor %xmm3,%xmm15 487 pxor %xmm2,%xmm14 488 movq %r13,%r10 489 movq %r14,%r11 490 movq %r15,%r12 491 andq $3,%r12 492 movq %r15,%r13 493 andq $-4,%r13 494 movq %r9,%r14 495 shrdq $2,%r9,%r15 496 shrq $2,%r9 497 addq %r13,%r10 498 adcq %r14,%r11 499 adcq $0,%r12 500 addq %r15,%r10 501 adcq %r9,%r11 502 adcq $0,%r12 503 pxor %xmm1,%xmm13 504 pxor %xmm0,%xmm12 505 .byte 102,69,15,56,0,248 506 .byte 102,69,15,56,0,240 507 .byte 102,69,15,56,0,232 508 .byte 102,69,15,56,0,224 509 movdqa 80(%rbp),%xmm8 510 paddd %xmm15,%xmm11 511 paddd %xmm14,%xmm10 512 paddd %xmm13,%xmm9 513 paddd %xmm12,%xmm8 514 pxor %xmm11,%xmm7 515 pxor %xmm10,%xmm6 516 pxor %xmm9,%xmm5 517 pxor %xmm8,%xmm4 518 movdqa %xmm8,80(%rbp) 519 movdqa %xmm7,%xmm8 520 psrld $20,%xmm8 521 pslld $32-20,%xmm7 522 pxor %xmm8,%xmm7 523 movdqa %xmm6,%xmm8 524 psrld $20,%xmm8 525 pslld $32-20,%xmm6 526 pxor %xmm8,%xmm6 527 movdqa %xmm5,%xmm8 528 psrld $20,%xmm8 529 pslld $32-20,%xmm5 530 pxor %xmm8,%xmm5 531 movdqa %xmm4,%xmm8 532 psrld $20,%xmm8 533 pslld $32-20,%xmm4 534 pxor %xmm8,%xmm4 535 movdqa .rol8(%rip),%xmm8 536 paddd %xmm7,%xmm3 537 paddd %xmm6,%xmm2 538 paddd %xmm5,%xmm1 539 paddd %xmm4,%xmm0 540 pxor %xmm3,%xmm15 541 pxor %xmm2,%xmm14 542 pxor %xmm1,%xmm13 543 pxor %xmm0,%xmm12 544 .byte 102,69,15,56,0,248 545 .byte 102,69,15,56,0,240 546 .byte 102,69,15,56,0,232 547 .byte 102,69,15,56,0,224 548 movdqa 80(%rbp),%xmm8 549 paddd %xmm15,%xmm11 550 paddd %xmm14,%xmm10 551 paddd %xmm13,%xmm9 552 paddd %xmm12,%xmm8 553 pxor %xmm11,%xmm7 554 pxor %xmm10,%xmm6 555 pxor %xmm9,%xmm5 556 pxor %xmm8,%xmm4 557 movdqa %xmm8,80(%rbp) 558 movdqa %xmm7,%xmm8 559 psrld $25,%xmm8 560 pslld $32-25,%xmm7 561 pxor %xmm8,%xmm7 562 movdqa %xmm6,%xmm8 563 psrld $25,%xmm8 564 pslld $32-25,%xmm6 565 pxor %xmm8,%xmm6 566 movdqa %xmm5,%xmm8 567 psrld $25,%xmm8 568 pslld $32-25,%xmm5 569 pxor %xmm8,%xmm5 570 movdqa %xmm4,%xmm8 571 psrld $25,%xmm8 572 pslld $32-25,%xmm4 573 pxor %xmm8,%xmm4 574 movdqa 80(%rbp),%xmm8 575 .byte 102,15,58,15,255,12 576 .byte 102,69,15,58,15,219,8 577 .byte 102,69,15,58,15,255,4 578 .byte 102,15,58,15,246,12 579 .byte 102,69,15,58,15,210,8 580 .byte 102,69,15,58,15,246,4 581 .byte 102,15,58,15,237,12 582 .byte 102,69,15,58,15,201,8 583 .byte 102,69,15,58,15,237,4 584 .byte 102,15,58,15,228,12 585 .byte 102,69,15,58,15,192,8 586 .byte 102,69,15,58,15,228,4 587 588 decq %rcx 589 jge 1b 590 addq 0(%r8),%r10 591 adcq 8+0(%r8),%r11 592 adcq $1,%r12 593 movq 0+0(%rbp),%rax 594 movq %rax,%r15 595 mulq %r10 596 movq %rax,%r13 597 movq %rdx,%r14 598 movq 0+0(%rbp),%rax 599 mulq %r11 600 imulq %r12,%r15 601 addq %rax,%r14 602 adcq %rdx,%r15 603 movq 8+0(%rbp),%rax 604 movq %rax,%r9 605 mulq %r10 606 addq %rax,%r14 607 adcq $0,%rdx 608 movq %rdx,%r10 609 movq 8+0(%rbp),%rax 610 mulq %r11 611 addq %rax,%r15 612 adcq $0,%rdx 613 imulq %r12,%r9 614 addq %r10,%r15 615 adcq %rdx,%r9 616 movq %r13,%r10 617 movq %r14,%r11 618 movq %r15,%r12 619 andq $3,%r12 620 movq %r15,%r13 621 andq $-4,%r13 622 movq %r9,%r14 623 shrdq $2,%r9,%r15 624 shrq $2,%r9 625 addq %r13,%r10 626 adcq %r14,%r11 627 adcq $0,%r12 628 addq %r15,%r10 629 adcq %r9,%r11 630 adcq $0,%r12 631 632 leaq 16(%r8),%r8 633 cmpq $-6,%rcx 634 jg 1b 635 paddd .chacha20_consts(%rip),%xmm3 636 paddd 48(%rbp),%xmm7 637 paddd 64(%rbp),%xmm11 638 paddd 144(%rbp),%xmm15 639 paddd .chacha20_consts(%rip),%xmm2 640 paddd 48(%rbp),%xmm6 641 paddd 64(%rbp),%xmm10 642 paddd 128(%rbp),%xmm14 643 paddd .chacha20_consts(%rip),%xmm1 644 paddd 48(%rbp),%xmm5 645 paddd 64(%rbp),%xmm9 646 paddd 112(%rbp),%xmm13 647 paddd .chacha20_consts(%rip),%xmm0 648 paddd 48(%rbp),%xmm4 649 paddd 64(%rbp),%xmm8 650 paddd 96(%rbp),%xmm12 651 movdqa %xmm12,80(%rbp) 652 movdqu 0 + 0(%rsi),%xmm12 653 pxor %xmm3,%xmm12 654 movdqu %xmm12,0 + 0(%rdi) 655 movdqu 16 + 0(%rsi),%xmm12 656 pxor %xmm7,%xmm12 657 movdqu %xmm12,16 + 0(%rdi) 658 movdqu 32 + 0(%rsi),%xmm12 659 pxor %xmm11,%xmm12 660 movdqu %xmm12,32 + 0(%rdi) 661 movdqu 48 + 0(%rsi),%xmm12 662 pxor %xmm15,%xmm12 663 movdqu %xmm12,48 + 0(%rdi) 664 movdqu 0 + 64(%rsi),%xmm3 665 movdqu 16 + 64(%rsi),%xmm7 666 movdqu 32 + 64(%rsi),%xmm11 667 movdqu 48 + 64(%rsi),%xmm15 668 pxor %xmm3,%xmm2 669 pxor %xmm7,%xmm6 670 pxor %xmm11,%xmm10 671 pxor %xmm14,%xmm15 672 movdqu %xmm2,0 + 64(%rdi) 673 movdqu %xmm6,16 + 64(%rdi) 674 movdqu %xmm10,32 + 64(%rdi) 675 movdqu %xmm15,48 + 64(%rdi) 676 movdqu 0 + 128(%rsi),%xmm3 677 movdqu 16 + 128(%rsi),%xmm7 678 movdqu 32 + 128(%rsi),%xmm11 679 movdqu 48 + 128(%rsi),%xmm15 680 pxor %xmm3,%xmm1 681 pxor %xmm7,%xmm5 682 pxor %xmm11,%xmm9 683 pxor %xmm13,%xmm15 684 movdqu %xmm1,0 + 128(%rdi) 685 movdqu %xmm5,16 + 128(%rdi) 686 movdqu %xmm9,32 + 128(%rdi) 687 movdqu %xmm15,48 + 128(%rdi) 688 movdqu 0 + 192(%rsi),%xmm3 689 movdqu 16 + 192(%rsi),%xmm7 690 movdqu 32 + 192(%rsi),%xmm11 691 movdqu 48 + 192(%rsi),%xmm15 692 pxor %xmm3,%xmm0 693 pxor %xmm7,%xmm4 694 pxor %xmm11,%xmm8 695 pxor 80(%rbp),%xmm15 696 movdqu %xmm0,0 + 192(%rdi) 697 movdqu %xmm4,16 + 192(%rdi) 698 movdqu %xmm8,32 + 192(%rdi) 699 movdqu %xmm15,48 + 192(%rdi) 700 701 leaq 256(%rsi),%rsi 702 leaq 256(%rdi),%rdi 703 subq $256,%rbx 704 jmp open_sse_main_loop 705 2: 706 707 testq %rbx,%rbx 708 jz open_sse_finalize 709 cmpq $64,%rbx 710 ja 3f 711 movdqa .chacha20_consts(%rip),%xmm0 712 movdqa 48(%rbp),%xmm4 713 movdqa 64(%rbp),%xmm8 714 movdqa 96(%rbp),%xmm12 715 paddd .sse_inc(%rip),%xmm12 716 movdqa %xmm12,96(%rbp) 717 718 xorq %r8,%r8 719 movq %rbx,%rcx 720 cmpq $16,%rcx 721 jb 2f 722 1: 723 addq 0(%rsi,%r8), %r10 724 adcq 8+0(%rsi,%r8), %r11 725 adcq $1,%r12 726 movq 0+0(%rbp),%rax 727 movq %rax,%r15 728 mulq %r10 729 movq %rax,%r13 730 movq %rdx,%r14 731 movq 0+0(%rbp),%rax 732 mulq %r11 733 imulq %r12,%r15 734 addq %rax,%r14 735 adcq %rdx,%r15 736 movq 8+0(%rbp),%rax 737 movq %rax,%r9 738 mulq %r10 739 addq %rax,%r14 740 adcq $0,%rdx 741 movq %rdx,%r10 742 movq 8+0(%rbp),%rax 743 mulq %r11 744 addq %rax,%r15 745 adcq $0,%rdx 746 imulq %r12,%r9 747 addq %r10,%r15 748 adcq %rdx,%r9 749 movq %r13,%r10 750 movq %r14,%r11 751 movq %r15,%r12 752 andq $3,%r12 753 movq %r15,%r13 754 andq $-4,%r13 755 movq %r9,%r14 756 shrdq $2,%r9,%r15 757 shrq $2,%r9 758 addq %r13,%r10 759 adcq %r14,%r11 760 adcq $0,%r12 761 addq %r15,%r10 762 adcq %r9,%r11 763 adcq $0,%r12 764 765 subq $16,%rcx 766 2: 767 addq $16,%r8 768 paddd %xmm4,%xmm0 769 pxor %xmm0,%xmm12 770 pshufb .rol16(%rip),%xmm12 771 paddd %xmm12,%xmm8 772 pxor %xmm8,%xmm4 773 movdqa %xmm4,%xmm3 774 pslld $12,%xmm3 775 psrld $20,%xmm4 776 pxor %xmm3,%xmm4 777 paddd %xmm4,%xmm0 778 pxor %xmm0,%xmm12 779 pshufb .rol8(%rip),%xmm12 780 paddd %xmm12,%xmm8 781 pxor %xmm8,%xmm4 782 movdqa %xmm4,%xmm3 783 pslld $7,%xmm3 784 psrld $25,%xmm4 785 pxor %xmm3,%xmm4 786 .byte 102,15,58,15,228,4 787 .byte 102,69,15,58,15,192,8 788 .byte 102,69,15,58,15,228,12 789 paddd %xmm4,%xmm0 790 pxor %xmm0,%xmm12 791 pshufb .rol16(%rip),%xmm12 792 paddd %xmm12,%xmm8 793 pxor %xmm8,%xmm4 794 movdqa %xmm4,%xmm3 795 pslld $12,%xmm3 796 psrld $20,%xmm4 797 pxor %xmm3,%xmm4 798 paddd %xmm4,%xmm0 799 pxor %xmm0,%xmm12 800 pshufb .rol8(%rip),%xmm12 801 paddd %xmm12,%xmm8 802 pxor %xmm8,%xmm4 803 movdqa %xmm4,%xmm3 804 pslld $7,%xmm3 805 psrld $25,%xmm4 806 pxor %xmm3,%xmm4 807 .byte 102,15,58,15,228,12 808 .byte 102,69,15,58,15,192,8 809 .byte 102,69,15,58,15,228,4 810 811 cmpq $16,%rcx 812 jae 1b 813 cmpq $160,%r8 814 jne 2b 815 paddd .chacha20_consts(%rip),%xmm0 816 paddd 48(%rbp),%xmm4 817 paddd 64(%rbp),%xmm8 818 paddd 96(%rbp),%xmm12 819 820 jmp open_sse_tail_64_dec_loop 821 3: 822 cmpq $128,%rbx 823 ja 3f 824 movdqa .chacha20_consts(%rip),%xmm0 825 movdqa 48(%rbp),%xmm4 826 movdqa 64(%rbp),%xmm8 827 movdqa %xmm0,%xmm1 828 movdqa %xmm4,%xmm5 829 movdqa %xmm8,%xmm9 830 movdqa 96(%rbp),%xmm13 831 paddd .sse_inc(%rip),%xmm13 832 movdqa %xmm13,%xmm12 833 paddd .sse_inc(%rip),%xmm12 834 movdqa %xmm12,96(%rbp) 835 movdqa %xmm13,112(%rbp) 836 837 movq %rbx,%rcx 838 andq $-16,%rcx 839 xorq %r8,%r8 840 1: 841 addq 0(%rsi,%r8), %r10 842 adcq 8+0(%rsi,%r8), %r11 843 adcq $1,%r12 844 movq 0+0(%rbp),%rax 845 movq %rax,%r15 846 mulq %r10 847 movq %rax,%r13 848 movq %rdx,%r14 849 movq 0+0(%rbp),%rax 850 mulq %r11 851 imulq %r12,%r15 852 addq %rax,%r14 853 adcq %rdx,%r15 854 movq 8+0(%rbp),%rax 855 movq %rax,%r9 856 mulq %r10 857 addq %rax,%r14 858 adcq $0,%rdx 859 movq %rdx,%r10 860 movq 8+0(%rbp),%rax 861 mulq %r11 862 addq %rax,%r15 863 adcq $0,%rdx 864 imulq %r12,%r9 865 addq %r10,%r15 866 adcq %rdx,%r9 867 movq %r13,%r10 868 movq %r14,%r11 869 movq %r15,%r12 870 andq $3,%r12 871 movq %r15,%r13 872 andq $-4,%r13 873 movq %r9,%r14 874 shrdq $2,%r9,%r15 875 shrq $2,%r9 876 addq %r13,%r10 877 adcq %r14,%r11 878 adcq $0,%r12 879 addq %r15,%r10 880 adcq %r9,%r11 881 adcq $0,%r12 882 883 2: 884 addq $16,%r8 885 paddd %xmm4,%xmm0 886 pxor %xmm0,%xmm12 887 pshufb .rol16(%rip),%xmm12 888 paddd %xmm12,%xmm8 889 pxor %xmm8,%xmm4 890 movdqa %xmm4,%xmm3 891 pslld $12,%xmm3 892 psrld $20,%xmm4 893 pxor %xmm3,%xmm4 894 paddd %xmm4,%xmm0 895 pxor %xmm0,%xmm12 896 pshufb .rol8(%rip),%xmm12 897 paddd %xmm12,%xmm8 898 pxor %xmm8,%xmm4 899 movdqa %xmm4,%xmm3 900 pslld $7,%xmm3 901 psrld $25,%xmm4 902 pxor %xmm3,%xmm4 903 .byte 102,15,58,15,228,4 904 .byte 102,69,15,58,15,192,8 905 .byte 102,69,15,58,15,228,12 906 paddd %xmm5,%xmm1 907 pxor %xmm1,%xmm13 908 pshufb .rol16(%rip),%xmm13 909 paddd %xmm13,%xmm9 910 pxor %xmm9,%xmm5 911 movdqa %xmm5,%xmm3 912 pslld $12,%xmm3 913 psrld $20,%xmm5 914 pxor %xmm3,%xmm5 915 paddd %xmm5,%xmm1 916 pxor %xmm1,%xmm13 917 pshufb .rol8(%rip),%xmm13 918 paddd %xmm13,%xmm9 919 pxor %xmm9,%xmm5 920 movdqa %xmm5,%xmm3 921 pslld $7,%xmm3 922 psrld $25,%xmm5 923 pxor %xmm3,%xmm5 924 .byte 102,15,58,15,237,4 925 .byte 102,69,15,58,15,201,8 926 .byte 102,69,15,58,15,237,12 927 paddd %xmm4,%xmm0 928 pxor %xmm0,%xmm12 929 pshufb .rol16(%rip),%xmm12 930 paddd %xmm12,%xmm8 931 pxor %xmm8,%xmm4 932 movdqa %xmm4,%xmm3 933 pslld $12,%xmm3 934 psrld $20,%xmm4 935 pxor %xmm3,%xmm4 936 paddd %xmm4,%xmm0 937 pxor %xmm0,%xmm12 938 pshufb .rol8(%rip),%xmm12 939 paddd %xmm12,%xmm8 940 pxor %xmm8,%xmm4 941 movdqa %xmm4,%xmm3 942 pslld $7,%xmm3 943 psrld $25,%xmm4 944 pxor %xmm3,%xmm4 945 .byte 102,15,58,15,228,12 946 .byte 102,69,15,58,15,192,8 947 .byte 102,69,15,58,15,228,4 948 paddd %xmm5,%xmm1 949 pxor %xmm1,%xmm13 950 pshufb .rol16(%rip),%xmm13 951 paddd %xmm13,%xmm9 952 pxor %xmm9,%xmm5 953 movdqa %xmm5,%xmm3 954 pslld $12,%xmm3 955 psrld $20,%xmm5 956 pxor %xmm3,%xmm5 957 paddd %xmm5,%xmm1 958 pxor %xmm1,%xmm13 959 pshufb .rol8(%rip),%xmm13 960 paddd %xmm13,%xmm9 961 pxor %xmm9,%xmm5 962 movdqa %xmm5,%xmm3 963 pslld $7,%xmm3 964 psrld $25,%xmm5 965 pxor %xmm3,%xmm5 966 .byte 102,15,58,15,237,12 967 .byte 102,69,15,58,15,201,8 968 .byte 102,69,15,58,15,237,4 969 970 cmpq %rcx,%r8 971 jb 1b 972 cmpq $160,%r8 973 jne 2b 974 paddd .chacha20_consts(%rip),%xmm1 975 paddd 48(%rbp),%xmm5 976 paddd 64(%rbp),%xmm9 977 paddd 112(%rbp),%xmm13 978 paddd .chacha20_consts(%rip),%xmm0 979 paddd 48(%rbp),%xmm4 980 paddd 64(%rbp),%xmm8 981 paddd 96(%rbp),%xmm12 982 movdqu 0 + 0(%rsi),%xmm3 983 movdqu 16 + 0(%rsi),%xmm7 984 movdqu 32 + 0(%rsi),%xmm11 985 movdqu 48 + 0(%rsi),%xmm15 986 pxor %xmm3,%xmm1 987 pxor %xmm7,%xmm5 988 pxor %xmm11,%xmm9 989 pxor %xmm13,%xmm15 990 movdqu %xmm1,0 + 0(%rdi) 991 movdqu %xmm5,16 + 0(%rdi) 992 movdqu %xmm9,32 + 0(%rdi) 993 movdqu %xmm15,48 + 0(%rdi) 994 995 subq $64,%rbx 996 leaq 64(%rsi),%rsi 997 leaq 64(%rdi),%rdi 998 jmp open_sse_tail_64_dec_loop 999 3: 1000 cmpq $192,%rbx 1001 ja 3f 1002 movdqa .chacha20_consts(%rip),%xmm0 1003 movdqa 48(%rbp),%xmm4 1004 movdqa 64(%rbp),%xmm8 1005 movdqa %xmm0,%xmm1 1006 movdqa %xmm4,%xmm5 1007 movdqa %xmm8,%xmm9 1008 movdqa %xmm0,%xmm2 1009 movdqa %xmm4,%xmm6 1010 movdqa %xmm8,%xmm10 1011 movdqa 96(%rbp),%xmm14 1012 paddd .sse_inc(%rip),%xmm14 1013 movdqa %xmm14,%xmm13 1014 paddd .sse_inc(%rip),%xmm13 1015 movdqa %xmm13,%xmm12 1016 paddd .sse_inc(%rip),%xmm12 1017 movdqa %xmm12,96(%rbp) 1018 movdqa %xmm13,112(%rbp) 1019 movdqa %xmm14,128(%rbp) 1020 1021 movq %rbx,%rcx 1022 movq $160,%r8 1023 cmpq $160,%rcx 1024 cmovgq %r8,%rcx 1025 andq $-16,%rcx 1026 xorq %r8,%r8 1027 1: 1028 addq 0(%rsi,%r8), %r10 1029 adcq 8+0(%rsi,%r8), %r11 1030 adcq $1,%r12 1031 movq 0+0(%rbp),%rax 1032 movq %rax,%r15 1033 mulq %r10 1034 movq %rax,%r13 1035 movq %rdx,%r14 1036 movq 0+0(%rbp),%rax 1037 mulq %r11 1038 imulq %r12,%r15 1039 addq %rax,%r14 1040 adcq %rdx,%r15 1041 movq 8+0(%rbp),%rax 1042 movq %rax,%r9 1043 mulq %r10 1044 addq %rax,%r14 1045 adcq $0,%rdx 1046 movq %rdx,%r10 1047 movq 8+0(%rbp),%rax 1048 mulq %r11 1049 addq %rax,%r15 1050 adcq $0,%rdx 1051 imulq %r12,%r9 1052 addq %r10,%r15 1053 adcq %rdx,%r9 1054 movq %r13,%r10 1055 movq %r14,%r11 1056 movq %r15,%r12 1057 andq $3,%r12 1058 movq %r15,%r13 1059 andq $-4,%r13 1060 movq %r9,%r14 1061 shrdq $2,%r9,%r15 1062 shrq $2,%r9 1063 addq %r13,%r10 1064 adcq %r14,%r11 1065 adcq $0,%r12 1066 addq %r15,%r10 1067 adcq %r9,%r11 1068 adcq $0,%r12 1069 1070 2: 1071 addq $16,%r8 1072 paddd %xmm4,%xmm0 1073 pxor %xmm0,%xmm12 1074 pshufb .rol16(%rip),%xmm12 1075 paddd %xmm12,%xmm8 1076 pxor %xmm8,%xmm4 1077 movdqa %xmm4,%xmm3 1078 pslld $12,%xmm3 1079 psrld $20,%xmm4 1080 pxor %xmm3,%xmm4 1081 paddd %xmm4,%xmm0 1082 pxor %xmm0,%xmm12 1083 pshufb .rol8(%rip),%xmm12 1084 paddd %xmm12,%xmm8 1085 pxor %xmm8,%xmm4 1086 movdqa %xmm4,%xmm3 1087 pslld $7,%xmm3 1088 psrld $25,%xmm4 1089 pxor %xmm3,%xmm4 1090 .byte 102,15,58,15,228,4 1091 .byte 102,69,15,58,15,192,8 1092 .byte 102,69,15,58,15,228,12 1093 paddd %xmm5,%xmm1 1094 pxor %xmm1,%xmm13 1095 pshufb .rol16(%rip),%xmm13 1096 paddd %xmm13,%xmm9 1097 pxor %xmm9,%xmm5 1098 movdqa %xmm5,%xmm3 1099 pslld $12,%xmm3 1100 psrld $20,%xmm5 1101 pxor %xmm3,%xmm5 1102 paddd %xmm5,%xmm1 1103 pxor %xmm1,%xmm13 1104 pshufb .rol8(%rip),%xmm13 1105 paddd %xmm13,%xmm9 1106 pxor %xmm9,%xmm5 1107 movdqa %xmm5,%xmm3 1108 pslld $7,%xmm3 1109 psrld $25,%xmm5 1110 pxor %xmm3,%xmm5 1111 .byte 102,15,58,15,237,4 1112 .byte 102,69,15,58,15,201,8 1113 .byte 102,69,15,58,15,237,12 1114 paddd %xmm6,%xmm2 1115 pxor %xmm2,%xmm14 1116 pshufb .rol16(%rip),%xmm14 1117 paddd %xmm14,%xmm10 1118 pxor %xmm10,%xmm6 1119 movdqa %xmm6,%xmm3 1120 pslld $12,%xmm3 1121 psrld $20,%xmm6 1122 pxor %xmm3,%xmm6 1123 paddd %xmm6,%xmm2 1124 pxor %xmm2,%xmm14 1125 pshufb .rol8(%rip),%xmm14 1126 paddd %xmm14,%xmm10 1127 pxor %xmm10,%xmm6 1128 movdqa %xmm6,%xmm3 1129 pslld $7,%xmm3 1130 psrld $25,%xmm6 1131 pxor %xmm3,%xmm6 1132 .byte 102,15,58,15,246,4 1133 .byte 102,69,15,58,15,210,8 1134 .byte 102,69,15,58,15,246,12 1135 paddd %xmm4,%xmm0 1136 pxor %xmm0,%xmm12 1137 pshufb .rol16(%rip),%xmm12 1138 paddd %xmm12,%xmm8 1139 pxor %xmm8,%xmm4 1140 movdqa %xmm4,%xmm3 1141 pslld $12,%xmm3 1142 psrld $20,%xmm4 1143 pxor %xmm3,%xmm4 1144 paddd %xmm4,%xmm0 1145 pxor %xmm0,%xmm12 1146 pshufb .rol8(%rip),%xmm12 1147 paddd %xmm12,%xmm8 1148 pxor %xmm8,%xmm4 1149 movdqa %xmm4,%xmm3 1150 pslld $7,%xmm3 1151 psrld $25,%xmm4 1152 pxor %xmm3,%xmm4 1153 .byte 102,15,58,15,228,12 1154 .byte 102,69,15,58,15,192,8 1155 .byte 102,69,15,58,15,228,4 1156 paddd %xmm5,%xmm1 1157 pxor %xmm1,%xmm13 1158 pshufb .rol16(%rip),%xmm13 1159 paddd %xmm13,%xmm9 1160 pxor %xmm9,%xmm5 1161 movdqa %xmm5,%xmm3 1162 pslld $12,%xmm3 1163 psrld $20,%xmm5 1164 pxor %xmm3,%xmm5 1165 paddd %xmm5,%xmm1 1166 pxor %xmm1,%xmm13 1167 pshufb .rol8(%rip),%xmm13 1168 paddd %xmm13,%xmm9 1169 pxor %xmm9,%xmm5 1170 movdqa %xmm5,%xmm3 1171 pslld $7,%xmm3 1172 psrld $25,%xmm5 1173 pxor %xmm3,%xmm5 1174 .byte 102,15,58,15,237,12 1175 .byte 102,69,15,58,15,201,8 1176 .byte 102,69,15,58,15,237,4 1177 paddd %xmm6,%xmm2 1178 pxor %xmm2,%xmm14 1179 pshufb .rol16(%rip),%xmm14 1180 paddd %xmm14,%xmm10 1181 pxor %xmm10,%xmm6 1182 movdqa %xmm6,%xmm3 1183 pslld $12,%xmm3 1184 psrld $20,%xmm6 1185 pxor %xmm3,%xmm6 1186 paddd %xmm6,%xmm2 1187 pxor %xmm2,%xmm14 1188 pshufb .rol8(%rip),%xmm14 1189 paddd %xmm14,%xmm10 1190 pxor %xmm10,%xmm6 1191 movdqa %xmm6,%xmm3 1192 pslld $7,%xmm3 1193 psrld $25,%xmm6 1194 pxor %xmm3,%xmm6 1195 .byte 102,15,58,15,246,12 1196 .byte 102,69,15,58,15,210,8 1197 .byte 102,69,15,58,15,246,4 1198 1199 cmpq %rcx,%r8 1200 jb 1b 1201 cmpq $160,%r8 1202 jne 2b 1203 cmpq $176,%rbx 1204 jb 1f 1205 addq 160(%rsi),%r10 1206 adcq 8+160(%rsi),%r11 1207 adcq $1,%r12 1208 movq 0+0(%rbp),%rax 1209 movq %rax,%r15 1210 mulq %r10 1211 movq %rax,%r13 1212 movq %rdx,%r14 1213 movq 0+0(%rbp),%rax 1214 mulq %r11 1215 imulq %r12,%r15 1216 addq %rax,%r14 1217 adcq %rdx,%r15 1218 movq 8+0(%rbp),%rax 1219 movq %rax,%r9 1220 mulq %r10 1221 addq %rax,%r14 1222 adcq $0,%rdx 1223 movq %rdx,%r10 1224 movq 8+0(%rbp),%rax 1225 mulq %r11 1226 addq %rax,%r15 1227 adcq $0,%rdx 1228 imulq %r12,%r9 1229 addq %r10,%r15 1230 adcq %rdx,%r9 1231 movq %r13,%r10 1232 movq %r14,%r11 1233 movq %r15,%r12 1234 andq $3,%r12 1235 movq %r15,%r13 1236 andq $-4,%r13 1237 movq %r9,%r14 1238 shrdq $2,%r9,%r15 1239 shrq $2,%r9 1240 addq %r13,%r10 1241 adcq %r14,%r11 1242 adcq $0,%r12 1243 addq %r15,%r10 1244 adcq %r9,%r11 1245 adcq $0,%r12 1246 1247 cmpq $192,%rbx 1248 jb 1f 1249 addq 176(%rsi),%r10 1250 adcq 8+176(%rsi),%r11 1251 adcq $1,%r12 1252 movq 0+0(%rbp),%rax 1253 movq %rax,%r15 1254 mulq %r10 1255 movq %rax,%r13 1256 movq %rdx,%r14 1257 movq 0+0(%rbp),%rax 1258 mulq %r11 1259 imulq %r12,%r15 1260 addq %rax,%r14 1261 adcq %rdx,%r15 1262 movq 8+0(%rbp),%rax 1263 movq %rax,%r9 1264 mulq %r10 1265 addq %rax,%r14 1266 adcq $0,%rdx 1267 movq %rdx,%r10 1268 movq 8+0(%rbp),%rax 1269 mulq %r11 1270 addq %rax,%r15 1271 adcq $0,%rdx 1272 imulq %r12,%r9 1273 addq %r10,%r15 1274 adcq %rdx,%r9 1275 movq %r13,%r10 1276 movq %r14,%r11 1277 movq %r15,%r12 1278 andq $3,%r12 1279 movq %r15,%r13 1280 andq $-4,%r13 1281 movq %r9,%r14 1282 shrdq $2,%r9,%r15 1283 shrq $2,%r9 1284 addq %r13,%r10 1285 adcq %r14,%r11 1286 adcq $0,%r12 1287 addq %r15,%r10 1288 adcq %r9,%r11 1289 adcq $0,%r12 1290 1291 1: 1292 paddd .chacha20_consts(%rip),%xmm2 1293 paddd 48(%rbp),%xmm6 1294 paddd 64(%rbp),%xmm10 1295 paddd 128(%rbp),%xmm14 1296 paddd .chacha20_consts(%rip),%xmm1 1297 paddd 48(%rbp),%xmm5 1298 paddd 64(%rbp),%xmm9 1299 paddd 112(%rbp),%xmm13 1300 paddd .chacha20_consts(%rip),%xmm0 1301 paddd 48(%rbp),%xmm4 1302 paddd 64(%rbp),%xmm8 1303 paddd 96(%rbp),%xmm12 1304 movdqu 0 + 0(%rsi),%xmm3 1305 movdqu 16 + 0(%rsi),%xmm7 1306 movdqu 32 + 0(%rsi),%xmm11 1307 movdqu 48 + 0(%rsi),%xmm15 1308 pxor %xmm3,%xmm2 1309 pxor %xmm7,%xmm6 1310 pxor %xmm11,%xmm10 1311 pxor %xmm14,%xmm15 1312 movdqu %xmm2,0 + 0(%rdi) 1313 movdqu %xmm6,16 + 0(%rdi) 1314 movdqu %xmm10,32 + 0(%rdi) 1315 movdqu %xmm15,48 + 0(%rdi) 1316 movdqu 0 + 64(%rsi),%xmm3 1317 movdqu 16 + 64(%rsi),%xmm7 1318 movdqu 32 + 64(%rsi),%xmm11 1319 movdqu 48 + 64(%rsi),%xmm15 1320 pxor %xmm3,%xmm1 1321 pxor %xmm7,%xmm5 1322 pxor %xmm11,%xmm9 1323 pxor %xmm13,%xmm15 1324 movdqu %xmm1,0 + 64(%rdi) 1325 movdqu %xmm5,16 + 64(%rdi) 1326 movdqu %xmm9,32 + 64(%rdi) 1327 movdqu %xmm15,48 + 64(%rdi) 1328 1329 subq $128,%rbx 1330 leaq 128(%rsi),%rsi 1331 leaq 128(%rdi),%rdi 1332 jmp open_sse_tail_64_dec_loop 1333 3: 1334 1335 movdqa .chacha20_consts(%rip),%xmm0 1336 movdqa 48(%rbp),%xmm4 1337 movdqa 64(%rbp),%xmm8 1338 movdqa %xmm0,%xmm1 1339 movdqa %xmm4,%xmm5 1340 movdqa %xmm8,%xmm9 1341 movdqa %xmm0,%xmm2 1342 movdqa %xmm4,%xmm6 1343 movdqa %xmm8,%xmm10 1344 movdqa %xmm0,%xmm3 1345 movdqa %xmm4,%xmm7 1346 movdqa %xmm8,%xmm11 1347 movdqa 96(%rbp),%xmm15 1348 paddd .sse_inc(%rip),%xmm15 1349 movdqa %xmm15,%xmm14 1350 paddd .sse_inc(%rip),%xmm14 1351 movdqa %xmm14,%xmm13 1352 paddd .sse_inc(%rip),%xmm13 1353 movdqa %xmm13,%xmm12 1354 paddd .sse_inc(%rip),%xmm12 1355 movdqa %xmm12,96(%rbp) 1356 movdqa %xmm13,112(%rbp) 1357 movdqa %xmm14,128(%rbp) 1358 movdqa %xmm15,144(%rbp) 1359 1360 xorq %r8,%r8 1361 1: 1362 addq 0(%rsi,%r8), %r10 1363 adcq 8+0(%rsi,%r8), %r11 1364 adcq $1,%r12 1365 movdqa %xmm11,80(%rbp) 1366 paddd %xmm4,%xmm0 1367 pxor %xmm0,%xmm12 1368 pshufb .rol16(%rip),%xmm12 1369 paddd %xmm12,%xmm8 1370 pxor %xmm8,%xmm4 1371 movdqa %xmm4,%xmm11 1372 pslld $12,%xmm11 1373 psrld $20,%xmm4 1374 pxor %xmm11,%xmm4 1375 paddd %xmm4,%xmm0 1376 pxor %xmm0,%xmm12 1377 pshufb .rol8(%rip),%xmm12 1378 paddd %xmm12,%xmm8 1379 pxor %xmm8,%xmm4 1380 movdqa %xmm4,%xmm11 1381 pslld $7,%xmm11 1382 psrld $25,%xmm4 1383 pxor %xmm11,%xmm4 1384 .byte 102,15,58,15,228,4 1385 .byte 102,69,15,58,15,192,8 1386 .byte 102,69,15,58,15,228,12 1387 paddd %xmm5,%xmm1 1388 pxor %xmm1,%xmm13 1389 pshufb .rol16(%rip),%xmm13 1390 paddd %xmm13,%xmm9 1391 pxor %xmm9,%xmm5 1392 movdqa %xmm5,%xmm11 1393 pslld $12,%xmm11 1394 psrld $20,%xmm5 1395 pxor %xmm11,%xmm5 1396 paddd %xmm5,%xmm1 1397 pxor %xmm1,%xmm13 1398 pshufb .rol8(%rip),%xmm13 1399 paddd %xmm13,%xmm9 1400 pxor %xmm9,%xmm5 1401 movdqa %xmm5,%xmm11 1402 pslld $7,%xmm11 1403 psrld $25,%xmm5 1404 pxor %xmm11,%xmm5 1405 .byte 102,15,58,15,237,4 1406 .byte 102,69,15,58,15,201,8 1407 .byte 102,69,15,58,15,237,12 1408 paddd %xmm6,%xmm2 1409 pxor %xmm2,%xmm14 1410 pshufb .rol16(%rip),%xmm14 1411 paddd %xmm14,%xmm10 1412 pxor %xmm10,%xmm6 1413 movdqa %xmm6,%xmm11 1414 pslld $12,%xmm11 1415 psrld $20,%xmm6 1416 pxor %xmm11,%xmm6 1417 paddd %xmm6,%xmm2 1418 pxor %xmm2,%xmm14 1419 pshufb .rol8(%rip),%xmm14 1420 paddd %xmm14,%xmm10 1421 pxor %xmm10,%xmm6 1422 movdqa %xmm6,%xmm11 1423 pslld $7,%xmm11 1424 psrld $25,%xmm6 1425 pxor %xmm11,%xmm6 1426 .byte 102,15,58,15,246,4 1427 .byte 102,69,15,58,15,210,8 1428 .byte 102,69,15,58,15,246,12 1429 movdqa 80(%rbp),%xmm11 1430 movq 0+0(%rbp),%rax 1431 movq %rax,%r15 1432 mulq %r10 1433 movq %rax,%r13 1434 movq %rdx,%r14 1435 movq 0+0(%rbp),%rax 1436 mulq %r11 1437 imulq %r12,%r15 1438 addq %rax,%r14 1439 adcq %rdx,%r15 1440 movdqa %xmm9,80(%rbp) 1441 paddd %xmm7,%xmm3 1442 pxor %xmm3,%xmm15 1443 pshufb .rol16(%rip),%xmm15 1444 paddd %xmm15,%xmm11 1445 pxor %xmm11,%xmm7 1446 movdqa %xmm7,%xmm9 1447 pslld $12,%xmm9 1448 psrld $20,%xmm7 1449 pxor %xmm9,%xmm7 1450 paddd %xmm7,%xmm3 1451 pxor %xmm3,%xmm15 1452 pshufb .rol8(%rip),%xmm15 1453 paddd %xmm15,%xmm11 1454 pxor %xmm11,%xmm7 1455 movdqa %xmm7,%xmm9 1456 pslld $7,%xmm9 1457 psrld $25,%xmm7 1458 pxor %xmm9,%xmm7 1459 .byte 102,15,58,15,255,4 1460 .byte 102,69,15,58,15,219,8 1461 .byte 102,69,15,58,15,255,12 1462 movdqa 80(%rbp),%xmm9 1463 movq 8+0(%rbp),%rax 1464 movq %rax,%r9 1465 mulq %r10 1466 addq %rax,%r14 1467 adcq $0,%rdx 1468 movq %rdx,%r10 1469 movq 8+0(%rbp),%rax 1470 mulq %r11 1471 addq %rax,%r15 1472 adcq $0,%rdx 1473 movdqa %xmm11,80(%rbp) 1474 paddd %xmm4,%xmm0 1475 pxor %xmm0,%xmm12 1476 pshufb .rol16(%rip),%xmm12 1477 paddd %xmm12,%xmm8 1478 pxor %xmm8,%xmm4 1479 movdqa %xmm4,%xmm11 1480 pslld $12,%xmm11 1481 psrld $20,%xmm4 1482 pxor %xmm11,%xmm4 1483 paddd %xmm4,%xmm0 1484 pxor %xmm0,%xmm12 1485 pshufb .rol8(%rip),%xmm12 1486 paddd %xmm12,%xmm8 1487 pxor %xmm8,%xmm4 1488 movdqa %xmm4,%xmm11 1489 pslld $7,%xmm11 1490 psrld $25,%xmm4 1491 pxor %xmm11,%xmm4 1492 .byte 102,15,58,15,228,12 1493 .byte 102,69,15,58,15,192,8 1494 .byte 102,69,15,58,15,228,4 1495 paddd %xmm5,%xmm1 1496 pxor %xmm1,%xmm13 1497 pshufb .rol16(%rip),%xmm13 1498 paddd %xmm13,%xmm9 1499 pxor %xmm9,%xmm5 1500 movdqa %xmm5,%xmm11 1501 pslld $12,%xmm11 1502 psrld $20,%xmm5 1503 pxor %xmm11,%xmm5 1504 paddd %xmm5,%xmm1 1505 pxor %xmm1,%xmm13 1506 pshufb .rol8(%rip),%xmm13 1507 paddd %xmm13,%xmm9 1508 pxor %xmm9,%xmm5 1509 movdqa %xmm5,%xmm11 1510 pslld $7,%xmm11 1511 psrld $25,%xmm5 1512 pxor %xmm11,%xmm5 1513 .byte 102,15,58,15,237,12 1514 .byte 102,69,15,58,15,201,8 1515 .byte 102,69,15,58,15,237,4 1516 imulq %r12,%r9 1517 addq %r10,%r15 1518 adcq %rdx,%r9 1519 paddd %xmm6,%xmm2 1520 pxor %xmm2,%xmm14 1521 pshufb .rol16(%rip),%xmm14 1522 paddd %xmm14,%xmm10 1523 pxor %xmm10,%xmm6 1524 movdqa %xmm6,%xmm11 1525 pslld $12,%xmm11 1526 psrld $20,%xmm6 1527 pxor %xmm11,%xmm6 1528 paddd %xmm6,%xmm2 1529 pxor %xmm2,%xmm14 1530 pshufb .rol8(%rip),%xmm14 1531 paddd %xmm14,%xmm10 1532 pxor %xmm10,%xmm6 1533 movdqa %xmm6,%xmm11 1534 pslld $7,%xmm11 1535 psrld $25,%xmm6 1536 pxor %xmm11,%xmm6 1537 .byte 102,15,58,15,246,12 1538 .byte 102,69,15,58,15,210,8 1539 .byte 102,69,15,58,15,246,4 1540 movdqa 80(%rbp),%xmm11 1541 movq %r13,%r10 1542 movq %r14,%r11 1543 movq %r15,%r12 1544 andq $3,%r12 1545 movq %r15,%r13 1546 andq $-4,%r13 1547 movq %r9,%r14 1548 shrdq $2,%r9,%r15 1549 shrq $2,%r9 1550 addq %r13,%r10 1551 adcq %r14,%r11 1552 adcq $0,%r12 1553 addq %r15,%r10 1554 adcq %r9,%r11 1555 adcq $0,%r12 1556 movdqa %xmm9,80(%rbp) 1557 paddd %xmm7,%xmm3 1558 pxor %xmm3,%xmm15 1559 pshufb .rol16(%rip),%xmm15 1560 paddd %xmm15,%xmm11 1561 pxor %xmm11,%xmm7 1562 movdqa %xmm7,%xmm9 1563 pslld $12,%xmm9 1564 psrld $20,%xmm7 1565 pxor %xmm9,%xmm7 1566 paddd %xmm7,%xmm3 1567 pxor %xmm3,%xmm15 1568 pshufb .rol8(%rip),%xmm15 1569 paddd %xmm15,%xmm11 1570 pxor %xmm11,%xmm7 1571 movdqa %xmm7,%xmm9 1572 pslld $7,%xmm9 1573 psrld $25,%xmm7 1574 pxor %xmm9,%xmm7 1575 .byte 102,15,58,15,255,12 1576 .byte 102,69,15,58,15,219,8 1577 .byte 102,69,15,58,15,255,4 1578 movdqa 80(%rbp),%xmm9 1579 1580 addq $16,%r8 1581 cmpq $160,%r8 1582 jb 1b 1583 movq %rbx,%rcx 1584 andq $-16,%rcx 1585 1: 1586 addq 0(%rsi,%r8), %r10 1587 adcq 8+0(%rsi,%r8), %r11 1588 adcq $1,%r12 1589 movq 0+0(%rbp),%rax 1590 movq %rax,%r15 1591 mulq %r10 1592 movq %rax,%r13 1593 movq %rdx,%r14 1594 movq 0+0(%rbp),%rax 1595 mulq %r11 1596 imulq %r12,%r15 1597 addq %rax,%r14 1598 adcq %rdx,%r15 1599 movq 8+0(%rbp),%rax 1600 movq %rax,%r9 1601 mulq %r10 1602 addq %rax,%r14 1603 adcq $0,%rdx 1604 movq %rdx,%r10 1605 movq 8+0(%rbp),%rax 1606 mulq %r11 1607 addq %rax,%r15 1608 adcq $0,%rdx 1609 imulq %r12,%r9 1610 addq %r10,%r15 1611 adcq %rdx,%r9 1612 movq %r13,%r10 1613 movq %r14,%r11 1614 movq %r15,%r12 1615 andq $3,%r12 1616 movq %r15,%r13 1617 andq $-4,%r13 1618 movq %r9,%r14 1619 shrdq $2,%r9,%r15 1620 shrq $2,%r9 1621 addq %r13,%r10 1622 adcq %r14,%r11 1623 adcq $0,%r12 1624 addq %r15,%r10 1625 adcq %r9,%r11 1626 adcq $0,%r12 1627 1628 addq $16,%r8 1629 cmpq %rcx,%r8 1630 jb 1b 1631 paddd .chacha20_consts(%rip),%xmm3 1632 paddd 48(%rbp),%xmm7 1633 paddd 64(%rbp),%xmm11 1634 paddd 144(%rbp),%xmm15 1635 paddd .chacha20_consts(%rip),%xmm2 1636 paddd 48(%rbp),%xmm6 1637 paddd 64(%rbp),%xmm10 1638 paddd 128(%rbp),%xmm14 1639 paddd .chacha20_consts(%rip),%xmm1 1640 paddd 48(%rbp),%xmm5 1641 paddd 64(%rbp),%xmm9 1642 paddd 112(%rbp),%xmm13 1643 paddd .chacha20_consts(%rip),%xmm0 1644 paddd 48(%rbp),%xmm4 1645 paddd 64(%rbp),%xmm8 1646 paddd 96(%rbp),%xmm12 1647 movdqa %xmm12,80(%rbp) 1648 movdqu 0 + 0(%rsi),%xmm12 1649 pxor %xmm3,%xmm12 1650 movdqu %xmm12,0 + 0(%rdi) 1651 movdqu 16 + 0(%rsi),%xmm12 1652 pxor %xmm7,%xmm12 1653 movdqu %xmm12,16 + 0(%rdi) 1654 movdqu 32 + 0(%rsi),%xmm12 1655 pxor %xmm11,%xmm12 1656 movdqu %xmm12,32 + 0(%rdi) 1657 movdqu 48 + 0(%rsi),%xmm12 1658 pxor %xmm15,%xmm12 1659 movdqu %xmm12,48 + 0(%rdi) 1660 movdqu 0 + 64(%rsi),%xmm3 1661 movdqu 16 + 64(%rsi),%xmm7 1662 movdqu 32 + 64(%rsi),%xmm11 1663 movdqu 48 + 64(%rsi),%xmm15 1664 pxor %xmm3,%xmm2 1665 pxor %xmm7,%xmm6 1666 pxor %xmm11,%xmm10 1667 pxor %xmm14,%xmm15 1668 movdqu %xmm2,0 + 64(%rdi) 1669 movdqu %xmm6,16 + 64(%rdi) 1670 movdqu %xmm10,32 + 64(%rdi) 1671 movdqu %xmm15,48 + 64(%rdi) 1672 movdqu 0 + 128(%rsi),%xmm3 1673 movdqu 16 + 128(%rsi),%xmm7 1674 movdqu 32 + 128(%rsi),%xmm11 1675 movdqu 48 + 128(%rsi),%xmm15 1676 pxor %xmm3,%xmm1 1677 pxor %xmm7,%xmm5 1678 pxor %xmm11,%xmm9 1679 pxor %xmm13,%xmm15 1680 movdqu %xmm1,0 + 128(%rdi) 1681 movdqu %xmm5,16 + 128(%rdi) 1682 movdqu %xmm9,32 + 128(%rdi) 1683 movdqu %xmm15,48 + 128(%rdi) 1684 1685 movdqa 80(%rbp),%xmm12 1686 subq $192,%rbx 1687 leaq 192(%rsi),%rsi 1688 leaq 192(%rdi),%rdi 1689 1690 1691 open_sse_tail_64_dec_loop: 1692 cmpq $16,%rbx 1693 jb 1f 1694 subq $16,%rbx 1695 movdqu (%rsi),%xmm3 1696 pxor %xmm3,%xmm0 1697 movdqu %xmm0,(%rdi) 1698 leaq 16(%rsi),%rsi 1699 leaq 16(%rdi),%rdi 1700 movdqa %xmm4,%xmm0 1701 movdqa %xmm8,%xmm4 1702 movdqa %xmm12,%xmm8 1703 jmp open_sse_tail_64_dec_loop 1704 1: 1705 movdqa %xmm0,%xmm1 1706 1707 1708 open_sse_tail_16: 1709 testq %rbx,%rbx 1710 jz open_sse_finalize 1711 1712 1713 1714 pxor %xmm3,%xmm3 1715 leaq -1(%rsi,%rbx), %rsi 1716 movq %rbx,%r8 1717 2: 1718 pslldq $1,%xmm3 1719 pinsrb $0,(%rsi),%xmm3 1720 subq $1,%rsi 1721 subq $1,%r8 1722 jnz 2b 1723 1724 3: 1725 .byte 102,73,15,126,221 1726 pextrq $1,%xmm3,%r14 1727 1728 pxor %xmm1,%xmm3 1729 1730 1731 2: 1732 pextrb $0,%xmm3,(%rdi) 1733 psrldq $1,%xmm3 1734 addq $1,%rdi 1735 subq $1,%rbx 1736 jne 2b 1737 1738 addq %r13,%r10 1739 adcq %r14,%r11 1740 adcq $1,%r12 1741 movq 0+0(%rbp),%rax 1742 movq %rax,%r15 1743 mulq %r10 1744 movq %rax,%r13 1745 movq %rdx,%r14 1746 movq 0+0(%rbp),%rax 1747 mulq %r11 1748 imulq %r12,%r15 1749 addq %rax,%r14 1750 adcq %rdx,%r15 1751 movq 8+0(%rbp),%rax 1752 movq %rax,%r9 1753 mulq %r10 1754 addq %rax,%r14 1755 adcq $0,%rdx 1756 movq %rdx,%r10 1757 movq 8+0(%rbp),%rax 1758 mulq %r11 1759 addq %rax,%r15 1760 adcq $0,%rdx 1761 imulq %r12,%r9 1762 addq %r10,%r15 1763 adcq %rdx,%r9 1764 movq %r13,%r10 1765 movq %r14,%r11 1766 movq %r15,%r12 1767 andq $3,%r12 1768 movq %r15,%r13 1769 andq $-4,%r13 1770 movq %r9,%r14 1771 shrdq $2,%r9,%r15 1772 shrq $2,%r9 1773 addq %r13,%r10 1774 adcq %r14,%r11 1775 adcq $0,%r12 1776 addq %r15,%r10 1777 adcq %r9,%r11 1778 adcq $0,%r12 1779 1780 1781 open_sse_finalize: 1782 addq 32(%rbp),%r10 1783 adcq 8+32(%rbp),%r11 1784 adcq $1,%r12 1785 movq 0+0(%rbp),%rax 1786 movq %rax,%r15 1787 mulq %r10 1788 movq %rax,%r13 1789 movq %rdx,%r14 1790 movq 0+0(%rbp),%rax 1791 mulq %r11 1792 imulq %r12,%r15 1793 addq %rax,%r14 1794 adcq %rdx,%r15 1795 movq 8+0(%rbp),%rax 1796 movq %rax,%r9 1797 mulq %r10 1798 addq %rax,%r14 1799 adcq $0,%rdx 1800 movq %rdx,%r10 1801 movq 8+0(%rbp),%rax 1802 mulq %r11 1803 addq %rax,%r15 1804 adcq $0,%rdx 1805 imulq %r12,%r9 1806 addq %r10,%r15 1807 adcq %rdx,%r9 1808 movq %r13,%r10 1809 movq %r14,%r11 1810 movq %r15,%r12 1811 andq $3,%r12 1812 movq %r15,%r13 1813 andq $-4,%r13 1814 movq %r9,%r14 1815 shrdq $2,%r9,%r15 1816 shrq $2,%r9 1817 addq %r13,%r10 1818 adcq %r14,%r11 1819 adcq $0,%r12 1820 addq %r15,%r10 1821 adcq %r9,%r11 1822 adcq $0,%r12 1823 1824 1825 movq %r10,%r13 1826 movq %r11,%r14 1827 movq %r12,%r15 1828 subq $-5,%r10 1829 sbbq $-1,%r11 1830 sbbq $3,%r12 1831 cmovcq %r13,%r10 1832 cmovcq %r14,%r11 1833 cmovcq %r15,%r12 1834 1835 addq 0+16(%rbp),%r10 1836 adcq 8+16(%rbp),%r11 1837 1838 addq $288 + 32,%rsp 1839 .cfi_adjust_cfa_offset -(288 + 32) 1840 popq %r9 1841 .cfi_adjust_cfa_offset -8 1842 movq %r10,(%r9) 1843 movq %r11,8(%r9) 1844 1845 popq %r15 1846 .cfi_adjust_cfa_offset -8 1847 popq %r14 1848 .cfi_adjust_cfa_offset -8 1849 popq %r13 1850 .cfi_adjust_cfa_offset -8 1851 popq %r12 1852 .cfi_adjust_cfa_offset -8 1853 popq %rbx 1854 .cfi_adjust_cfa_offset -8 1855 popq %rbp 1856 .cfi_adjust_cfa_offset -8 1857 .byte 0xf3,0xc3 1858 .cfi_adjust_cfa_offset (8 * 6) + 288 + 32 1859 1860 open_sse_128: 1861 movdqu .chacha20_consts(%rip),%xmm0 1862 movdqa %xmm0,%xmm1 1863 movdqa %xmm0,%xmm2 1864 movdqu 0(%r9),%xmm4 1865 movdqa %xmm4,%xmm5 1866 movdqa %xmm4,%xmm6 1867 movdqu 16(%r9),%xmm8 1868 movdqa %xmm8,%xmm9 1869 movdqa %xmm8,%xmm10 1870 movdqu 32(%r9),%xmm12 1871 movdqa %xmm12,%xmm13 1872 paddd .sse_inc(%rip),%xmm13 1873 movdqa %xmm13,%xmm14 1874 paddd .sse_inc(%rip),%xmm14 1875 movdqa %xmm4,%xmm7 1876 movdqa %xmm8,%xmm11 1877 movdqa %xmm13,%xmm15 1878 movq $10,%r10 1879 1: 1880 paddd %xmm4,%xmm0 1881 pxor %xmm0,%xmm12 1882 pshufb .rol16(%rip),%xmm12 1883 paddd %xmm12,%xmm8 1884 pxor %xmm8,%xmm4 1885 movdqa %xmm4,%xmm3 1886 pslld $12,%xmm3 1887 psrld $20,%xmm4 1888 pxor %xmm3,%xmm4 1889 paddd %xmm4,%xmm0 1890 pxor %xmm0,%xmm12 1891 pshufb .rol8(%rip),%xmm12 1892 paddd %xmm12,%xmm8 1893 pxor %xmm8,%xmm4 1894 movdqa %xmm4,%xmm3 1895 pslld $7,%xmm3 1896 psrld $25,%xmm4 1897 pxor %xmm3,%xmm4 1898 .byte 102,15,58,15,228,4 1899 .byte 102,69,15,58,15,192,8 1900 .byte 102,69,15,58,15,228,12 1901 paddd %xmm5,%xmm1 1902 pxor %xmm1,%xmm13 1903 pshufb .rol16(%rip),%xmm13 1904 paddd %xmm13,%xmm9 1905 pxor %xmm9,%xmm5 1906 movdqa %xmm5,%xmm3 1907 pslld $12,%xmm3 1908 psrld $20,%xmm5 1909 pxor %xmm3,%xmm5 1910 paddd %xmm5,%xmm1 1911 pxor %xmm1,%xmm13 1912 pshufb .rol8(%rip),%xmm13 1913 paddd %xmm13,%xmm9 1914 pxor %xmm9,%xmm5 1915 movdqa %xmm5,%xmm3 1916 pslld $7,%xmm3 1917 psrld $25,%xmm5 1918 pxor %xmm3,%xmm5 1919 .byte 102,15,58,15,237,4 1920 .byte 102,69,15,58,15,201,8 1921 .byte 102,69,15,58,15,237,12 1922 paddd %xmm6,%xmm2 1923 pxor %xmm2,%xmm14 1924 pshufb .rol16(%rip),%xmm14 1925 paddd %xmm14,%xmm10 1926 pxor %xmm10,%xmm6 1927 movdqa %xmm6,%xmm3 1928 pslld $12,%xmm3 1929 psrld $20,%xmm6 1930 pxor %xmm3,%xmm6 1931 paddd %xmm6,%xmm2 1932 pxor %xmm2,%xmm14 1933 pshufb .rol8(%rip),%xmm14 1934 paddd %xmm14,%xmm10 1935 pxor %xmm10,%xmm6 1936 movdqa %xmm6,%xmm3 1937 pslld $7,%xmm3 1938 psrld $25,%xmm6 1939 pxor %xmm3,%xmm6 1940 .byte 102,15,58,15,246,4 1941 .byte 102,69,15,58,15,210,8 1942 .byte 102,69,15,58,15,246,12 1943 paddd %xmm4,%xmm0 1944 pxor %xmm0,%xmm12 1945 pshufb .rol16(%rip),%xmm12 1946 paddd %xmm12,%xmm8 1947 pxor %xmm8,%xmm4 1948 movdqa %xmm4,%xmm3 1949 pslld $12,%xmm3 1950 psrld $20,%xmm4 1951 pxor %xmm3,%xmm4 1952 paddd %xmm4,%xmm0 1953 pxor %xmm0,%xmm12 1954 pshufb .rol8(%rip),%xmm12 1955 paddd %xmm12,%xmm8 1956 pxor %xmm8,%xmm4 1957 movdqa %xmm4,%xmm3 1958 pslld $7,%xmm3 1959 psrld $25,%xmm4 1960 pxor %xmm3,%xmm4 1961 .byte 102,15,58,15,228,12 1962 .byte 102,69,15,58,15,192,8 1963 .byte 102,69,15,58,15,228,4 1964 paddd %xmm5,%xmm1 1965 pxor %xmm1,%xmm13 1966 pshufb .rol16(%rip),%xmm13 1967 paddd %xmm13,%xmm9 1968 pxor %xmm9,%xmm5 1969 movdqa %xmm5,%xmm3 1970 pslld $12,%xmm3 1971 psrld $20,%xmm5 1972 pxor %xmm3,%xmm5 1973 paddd %xmm5,%xmm1 1974 pxor %xmm1,%xmm13 1975 pshufb .rol8(%rip),%xmm13 1976 paddd %xmm13,%xmm9 1977 pxor %xmm9,%xmm5 1978 movdqa %xmm5,%xmm3 1979 pslld $7,%xmm3 1980 psrld $25,%xmm5 1981 pxor %xmm3,%xmm5 1982 .byte 102,15,58,15,237,12 1983 .byte 102,69,15,58,15,201,8 1984 .byte 102,69,15,58,15,237,4 1985 paddd %xmm6,%xmm2 1986 pxor %xmm2,%xmm14 1987 pshufb .rol16(%rip),%xmm14 1988 paddd %xmm14,%xmm10 1989 pxor %xmm10,%xmm6 1990 movdqa %xmm6,%xmm3 1991 pslld $12,%xmm3 1992 psrld $20,%xmm6 1993 pxor %xmm3,%xmm6 1994 paddd %xmm6,%xmm2 1995 pxor %xmm2,%xmm14 1996 pshufb .rol8(%rip),%xmm14 1997 paddd %xmm14,%xmm10 1998 pxor %xmm10,%xmm6 1999 movdqa %xmm6,%xmm3 2000 pslld $7,%xmm3 2001 psrld $25,%xmm6 2002 pxor %xmm3,%xmm6 2003 .byte 102,15,58,15,246,12 2004 .byte 102,69,15,58,15,210,8 2005 .byte 102,69,15,58,15,246,4 2006 2007 decq %r10 2008 jnz 1b 2009 paddd .chacha20_consts(%rip),%xmm0 2010 paddd .chacha20_consts(%rip),%xmm1 2011 paddd .chacha20_consts(%rip),%xmm2 2012 paddd %xmm7,%xmm4 2013 paddd %xmm7,%xmm5 2014 paddd %xmm7,%xmm6 2015 paddd %xmm11,%xmm9 2016 paddd %xmm11,%xmm10 2017 paddd %xmm15,%xmm13 2018 paddd .sse_inc(%rip),%xmm15 2019 paddd %xmm15,%xmm14 2020 2021 pand .clamp(%rip),%xmm0 2022 movdqa %xmm0,0(%rbp) 2023 movdqa %xmm4,16(%rbp) 2024 2025 movq %r8,%r8 2026 call poly_hash_ad_internal 2027 1: 2028 cmpq $16,%rbx 2029 jb open_sse_tail_16 2030 subq $16,%rbx 2031 addq 0(%rsi),%r10 2032 adcq 8+0(%rsi),%r11 2033 adcq $1,%r12 2034 2035 2036 movdqu 0(%rsi),%xmm3 2037 pxor %xmm3,%xmm1 2038 movdqu %xmm1,0(%rdi) 2039 leaq 16(%rsi),%rsi 2040 leaq 16(%rdi),%rdi 2041 movq 0+0(%rbp),%rax 2042 movq %rax,%r15 2043 mulq %r10 2044 movq %rax,%r13 2045 movq %rdx,%r14 2046 movq 0+0(%rbp),%rax 2047 mulq %r11 2048 imulq %r12,%r15 2049 addq %rax,%r14 2050 adcq %rdx,%r15 2051 movq 8+0(%rbp),%rax 2052 movq %rax,%r9 2053 mulq %r10 2054 addq %rax,%r14 2055 adcq $0,%rdx 2056 movq %rdx,%r10 2057 movq 8+0(%rbp),%rax 2058 mulq %r11 2059 addq %rax,%r15 2060 adcq $0,%rdx 2061 imulq %r12,%r9 2062 addq %r10,%r15 2063 adcq %rdx,%r9 2064 movq %r13,%r10 2065 movq %r14,%r11 2066 movq %r15,%r12 2067 andq $3,%r12 2068 movq %r15,%r13 2069 andq $-4,%r13 2070 movq %r9,%r14 2071 shrdq $2,%r9,%r15 2072 shrq $2,%r9 2073 addq %r13,%r10 2074 adcq %r14,%r11 2075 adcq $0,%r12 2076 addq %r15,%r10 2077 adcq %r9,%r11 2078 adcq $0,%r12 2079 2080 2081 movdqa %xmm5,%xmm1 2082 movdqa %xmm9,%xmm5 2083 movdqa %xmm13,%xmm9 2084 movdqa %xmm2,%xmm13 2085 movdqa %xmm6,%xmm2 2086 movdqa %xmm10,%xmm6 2087 movdqa %xmm14,%xmm10 2088 jmp 1b 2089 jmp open_sse_tail_16 2090 .size chacha20_poly1305_open, .-chacha20_poly1305_open 2091 .cfi_endproc 2092 2093 2094 2095 2096 .globl chacha20_poly1305_seal 2097 .hidden chacha20_poly1305_seal 2098 .type chacha20_poly1305_seal,@function 2099 .align 64 2100 chacha20_poly1305_seal: 2101 .cfi_startproc 2102 pushq %rbp 2103 .cfi_adjust_cfa_offset 8 2104 pushq %rbx 2105 .cfi_adjust_cfa_offset 8 2106 pushq %r12 2107 .cfi_adjust_cfa_offset 8 2108 pushq %r13 2109 .cfi_adjust_cfa_offset 8 2110 pushq %r14 2111 .cfi_adjust_cfa_offset 8 2112 pushq %r15 2113 .cfi_adjust_cfa_offset 8 2114 2115 2116 pushq %r9 2117 .cfi_adjust_cfa_offset 8 2118 subq $288 + 32,%rsp 2119 .cfi_adjust_cfa_offset 288 + 32 2120 .cfi_offset rbp, -16 2121 .cfi_offset rbx, -24 2122 .cfi_offset r12, -32 2123 .cfi_offset r13, -40 2124 .cfi_offset r14, -48 2125 .cfi_offset r15, -56 2126 leaq 32(%rsp),%rbp 2127 andq $-32,%rbp 2128 movq 56(%r9),%rbx 2129 addq %rdx,%rbx 2130 movq %rbx,8+32(%rbp) 2131 movq %r8,0+32(%rbp) 2132 movq %rdx,%rbx 2133 2134 movl OPENSSL_ia32cap_P+8(%rip),%eax 2135 andl $288,%eax 2136 xorl $288,%eax 2137 jz chacha20_poly1305_seal_avx2 2138 2139 cmpq $128,%rbx 2140 jbe seal_sse_128 2141 2142 movdqa .chacha20_consts(%rip),%xmm0 2143 movdqu 0(%r9),%xmm4 2144 movdqu 16(%r9),%xmm8 2145 movdqu 32(%r9),%xmm12 2146 movdqa %xmm0,%xmm1 2147 movdqa %xmm0,%xmm2 2148 movdqa %xmm0,%xmm3 2149 movdqa %xmm4,%xmm5 2150 movdqa %xmm4,%xmm6 2151 movdqa %xmm4,%xmm7 2152 movdqa %xmm8,%xmm9 2153 movdqa %xmm8,%xmm10 2154 movdqa %xmm8,%xmm11 2155 movdqa %xmm12,%xmm15 2156 paddd .sse_inc(%rip),%xmm12 2157 movdqa %xmm12,%xmm14 2158 paddd .sse_inc(%rip),%xmm12 2159 movdqa %xmm12,%xmm13 2160 paddd .sse_inc(%rip),%xmm12 2161 2162 movdqa %xmm4,48(%rbp) 2163 movdqa %xmm8,64(%rbp) 2164 movdqa %xmm12,96(%rbp) 2165 movdqa %xmm13,112(%rbp) 2166 movdqa %xmm14,128(%rbp) 2167 movdqa %xmm15,144(%rbp) 2168 movq $10,%r10 2169 1: 2170 movdqa %xmm8,80(%rbp) 2171 movdqa .rol16(%rip),%xmm8 2172 paddd %xmm7,%xmm3 2173 paddd %xmm6,%xmm2 2174 paddd %xmm5,%xmm1 2175 paddd %xmm4,%xmm0 2176 pxor %xmm3,%xmm15 2177 pxor %xmm2,%xmm14 2178 pxor %xmm1,%xmm13 2179 pxor %xmm0,%xmm12 2180 .byte 102,69,15,56,0,248 2181 .byte 102,69,15,56,0,240 2182 .byte 102,69,15,56,0,232 2183 .byte 102,69,15,56,0,224 2184 movdqa 80(%rbp),%xmm8 2185 paddd %xmm15,%xmm11 2186 paddd %xmm14,%xmm10 2187 paddd %xmm13,%xmm9 2188 paddd %xmm12,%xmm8 2189 pxor %xmm11,%xmm7 2190 pxor %xmm10,%xmm6 2191 pxor %xmm9,%xmm5 2192 pxor %xmm8,%xmm4 2193 movdqa %xmm8,80(%rbp) 2194 movdqa %xmm7,%xmm8 2195 psrld $20,%xmm8 2196 pslld $32-20,%xmm7 2197 pxor %xmm8,%xmm7 2198 movdqa %xmm6,%xmm8 2199 psrld $20,%xmm8 2200 pslld $32-20,%xmm6 2201 pxor %xmm8,%xmm6 2202 movdqa %xmm5,%xmm8 2203 psrld $20,%xmm8 2204 pslld $32-20,%xmm5 2205 pxor %xmm8,%xmm5 2206 movdqa %xmm4,%xmm8 2207 psrld $20,%xmm8 2208 pslld $32-20,%xmm4 2209 pxor %xmm8,%xmm4 2210 movdqa .rol8(%rip),%xmm8 2211 paddd %xmm7,%xmm3 2212 paddd %xmm6,%xmm2 2213 paddd %xmm5,%xmm1 2214 paddd %xmm4,%xmm0 2215 pxor %xmm3,%xmm15 2216 pxor %xmm2,%xmm14 2217 pxor %xmm1,%xmm13 2218 pxor %xmm0,%xmm12 2219 .byte 102,69,15,56,0,248 2220 .byte 102,69,15,56,0,240 2221 .byte 102,69,15,56,0,232 2222 .byte 102,69,15,56,0,224 2223 movdqa 80(%rbp),%xmm8 2224 paddd %xmm15,%xmm11 2225 paddd %xmm14,%xmm10 2226 paddd %xmm13,%xmm9 2227 paddd %xmm12,%xmm8 2228 pxor %xmm11,%xmm7 2229 pxor %xmm10,%xmm6 2230 pxor %xmm9,%xmm5 2231 pxor %xmm8,%xmm4 2232 movdqa %xmm8,80(%rbp) 2233 movdqa %xmm7,%xmm8 2234 psrld $25,%xmm8 2235 pslld $32-25,%xmm7 2236 pxor %xmm8,%xmm7 2237 movdqa %xmm6,%xmm8 2238 psrld $25,%xmm8 2239 pslld $32-25,%xmm6 2240 pxor %xmm8,%xmm6 2241 movdqa %xmm5,%xmm8 2242 psrld $25,%xmm8 2243 pslld $32-25,%xmm5 2244 pxor %xmm8,%xmm5 2245 movdqa %xmm4,%xmm8 2246 psrld $25,%xmm8 2247 pslld $32-25,%xmm4 2248 pxor %xmm8,%xmm4 2249 movdqa 80(%rbp),%xmm8 2250 .byte 102,15,58,15,255,4 2251 .byte 102,69,15,58,15,219,8 2252 .byte 102,69,15,58,15,255,12 2253 .byte 102,15,58,15,246,4 2254 .byte 102,69,15,58,15,210,8 2255 .byte 102,69,15,58,15,246,12 2256 .byte 102,15,58,15,237,4 2257 .byte 102,69,15,58,15,201,8 2258 .byte 102,69,15,58,15,237,12 2259 .byte 102,15,58,15,228,4 2260 .byte 102,69,15,58,15,192,8 2261 .byte 102,69,15,58,15,228,12 2262 movdqa %xmm8,80(%rbp) 2263 movdqa .rol16(%rip),%xmm8 2264 paddd %xmm7,%xmm3 2265 paddd %xmm6,%xmm2 2266 paddd %xmm5,%xmm1 2267 paddd %xmm4,%xmm0 2268 pxor %xmm3,%xmm15 2269 pxor %xmm2,%xmm14 2270 pxor %xmm1,%xmm13 2271 pxor %xmm0,%xmm12 2272 .byte 102,69,15,56,0,248 2273 .byte 102,69,15,56,0,240 2274 .byte 102,69,15,56,0,232 2275 .byte 102,69,15,56,0,224 2276 movdqa 80(%rbp),%xmm8 2277 paddd %xmm15,%xmm11 2278 paddd %xmm14,%xmm10 2279 paddd %xmm13,%xmm9 2280 paddd %xmm12,%xmm8 2281 pxor %xmm11,%xmm7 2282 pxor %xmm10,%xmm6 2283 pxor %xmm9,%xmm5 2284 pxor %xmm8,%xmm4 2285 movdqa %xmm8,80(%rbp) 2286 movdqa %xmm7,%xmm8 2287 psrld $20,%xmm8 2288 pslld $32-20,%xmm7 2289 pxor %xmm8,%xmm7 2290 movdqa %xmm6,%xmm8 2291 psrld $20,%xmm8 2292 pslld $32-20,%xmm6 2293 pxor %xmm8,%xmm6 2294 movdqa %xmm5,%xmm8 2295 psrld $20,%xmm8 2296 pslld $32-20,%xmm5 2297 pxor %xmm8,%xmm5 2298 movdqa %xmm4,%xmm8 2299 psrld $20,%xmm8 2300 pslld $32-20,%xmm4 2301 pxor %xmm8,%xmm4 2302 movdqa .rol8(%rip),%xmm8 2303 paddd %xmm7,%xmm3 2304 paddd %xmm6,%xmm2 2305 paddd %xmm5,%xmm1 2306 paddd %xmm4,%xmm0 2307 pxor %xmm3,%xmm15 2308 pxor %xmm2,%xmm14 2309 pxor %xmm1,%xmm13 2310 pxor %xmm0,%xmm12 2311 .byte 102,69,15,56,0,248 2312 .byte 102,69,15,56,0,240 2313 .byte 102,69,15,56,0,232 2314 .byte 102,69,15,56,0,224 2315 movdqa 80(%rbp),%xmm8 2316 paddd %xmm15,%xmm11 2317 paddd %xmm14,%xmm10 2318 paddd %xmm13,%xmm9 2319 paddd %xmm12,%xmm8 2320 pxor %xmm11,%xmm7 2321 pxor %xmm10,%xmm6 2322 pxor %xmm9,%xmm5 2323 pxor %xmm8,%xmm4 2324 movdqa %xmm8,80(%rbp) 2325 movdqa %xmm7,%xmm8 2326 psrld $25,%xmm8 2327 pslld $32-25,%xmm7 2328 pxor %xmm8,%xmm7 2329 movdqa %xmm6,%xmm8 2330 psrld $25,%xmm8 2331 pslld $32-25,%xmm6 2332 pxor %xmm8,%xmm6 2333 movdqa %xmm5,%xmm8 2334 psrld $25,%xmm8 2335 pslld $32-25,%xmm5 2336 pxor %xmm8,%xmm5 2337 movdqa %xmm4,%xmm8 2338 psrld $25,%xmm8 2339 pslld $32-25,%xmm4 2340 pxor %xmm8,%xmm4 2341 movdqa 80(%rbp),%xmm8 2342 .byte 102,15,58,15,255,12 2343 .byte 102,69,15,58,15,219,8 2344 .byte 102,69,15,58,15,255,4 2345 .byte 102,15,58,15,246,12 2346 .byte 102,69,15,58,15,210,8 2347 .byte 102,69,15,58,15,246,4 2348 .byte 102,15,58,15,237,12 2349 .byte 102,69,15,58,15,201,8 2350 .byte 102,69,15,58,15,237,4 2351 .byte 102,15,58,15,228,12 2352 .byte 102,69,15,58,15,192,8 2353 .byte 102,69,15,58,15,228,4 2354 2355 decq %r10 2356 jnz 1b 2357 paddd .chacha20_consts(%rip),%xmm3 2358 paddd 48(%rbp),%xmm7 2359 paddd 64(%rbp),%xmm11 2360 paddd 144(%rbp),%xmm15 2361 paddd .chacha20_consts(%rip),%xmm2 2362 paddd 48(%rbp),%xmm6 2363 paddd 64(%rbp),%xmm10 2364 paddd 128(%rbp),%xmm14 2365 paddd .chacha20_consts(%rip),%xmm1 2366 paddd 48(%rbp),%xmm5 2367 paddd 64(%rbp),%xmm9 2368 paddd 112(%rbp),%xmm13 2369 paddd .chacha20_consts(%rip),%xmm0 2370 paddd 48(%rbp),%xmm4 2371 paddd 64(%rbp),%xmm8 2372 paddd 96(%rbp),%xmm12 2373 2374 2375 pand .clamp(%rip),%xmm3 2376 movdqa %xmm3,0(%rbp) 2377 movdqa %xmm7,16(%rbp) 2378 2379 movq %r8,%r8 2380 call poly_hash_ad_internal 2381 movdqu 0 + 0(%rsi),%xmm3 2382 movdqu 16 + 0(%rsi),%xmm7 2383 movdqu 32 + 0(%rsi),%xmm11 2384 movdqu 48 + 0(%rsi),%xmm15 2385 pxor %xmm3,%xmm2 2386 pxor %xmm7,%xmm6 2387 pxor %xmm11,%xmm10 2388 pxor %xmm14,%xmm15 2389 movdqu %xmm2,0 + 0(%rdi) 2390 movdqu %xmm6,16 + 0(%rdi) 2391 movdqu %xmm10,32 + 0(%rdi) 2392 movdqu %xmm15,48 + 0(%rdi) 2393 movdqu 0 + 64(%rsi),%xmm3 2394 movdqu 16 + 64(%rsi),%xmm7 2395 movdqu 32 + 64(%rsi),%xmm11 2396 movdqu 48 + 64(%rsi),%xmm15 2397 pxor %xmm3,%xmm1 2398 pxor %xmm7,%xmm5 2399 pxor %xmm11,%xmm9 2400 pxor %xmm13,%xmm15 2401 movdqu %xmm1,0 + 64(%rdi) 2402 movdqu %xmm5,16 + 64(%rdi) 2403 movdqu %xmm9,32 + 64(%rdi) 2404 movdqu %xmm15,48 + 64(%rdi) 2405 2406 cmpq $192,%rbx 2407 ja 1f 2408 movq $128,%rcx 2409 subq $128,%rbx 2410 leaq 128(%rsi),%rsi 2411 jmp seal_sse_128_seal_hash 2412 1: 2413 movdqu 0 + 128(%rsi),%xmm3 2414 movdqu 16 + 128(%rsi),%xmm7 2415 movdqu 32 + 128(%rsi),%xmm11 2416 movdqu 48 + 128(%rsi),%xmm15 2417 pxor %xmm3,%xmm0 2418 pxor %xmm7,%xmm4 2419 pxor %xmm11,%xmm8 2420 pxor %xmm12,%xmm15 2421 movdqu %xmm0,0 + 128(%rdi) 2422 movdqu %xmm4,16 + 128(%rdi) 2423 movdqu %xmm8,32 + 128(%rdi) 2424 movdqu %xmm15,48 + 128(%rdi) 2425 2426 movq $192,%rcx 2427 subq $192,%rbx 2428 leaq 192(%rsi),%rsi 2429 movq $2,%rcx 2430 movq $8,%r8 2431 cmpq $64,%rbx 2432 jbe seal_sse_tail_64 2433 cmpq $128,%rbx 2434 jbe seal_sse_tail_128 2435 cmpq $192,%rbx 2436 jbe seal_sse_tail_192 2437 2438 1: 2439 movdqa .chacha20_consts(%rip),%xmm0 2440 movdqa 48(%rbp),%xmm4 2441 movdqa 64(%rbp),%xmm8 2442 movdqa %xmm0,%xmm1 2443 movdqa %xmm4,%xmm5 2444 movdqa %xmm8,%xmm9 2445 movdqa %xmm0,%xmm2 2446 movdqa %xmm4,%xmm6 2447 movdqa %xmm8,%xmm10 2448 movdqa %xmm0,%xmm3 2449 movdqa %xmm4,%xmm7 2450 movdqa %xmm8,%xmm11 2451 movdqa 96(%rbp),%xmm15 2452 paddd .sse_inc(%rip),%xmm15 2453 movdqa %xmm15,%xmm14 2454 paddd .sse_inc(%rip),%xmm14 2455 movdqa %xmm14,%xmm13 2456 paddd .sse_inc(%rip),%xmm13 2457 movdqa %xmm13,%xmm12 2458 paddd .sse_inc(%rip),%xmm12 2459 movdqa %xmm12,96(%rbp) 2460 movdqa %xmm13,112(%rbp) 2461 movdqa %xmm14,128(%rbp) 2462 movdqa %xmm15,144(%rbp) 2463 2464 2: 2465 movdqa %xmm8,80(%rbp) 2466 movdqa .rol16(%rip),%xmm8 2467 paddd %xmm7,%xmm3 2468 paddd %xmm6,%xmm2 2469 paddd %xmm5,%xmm1 2470 paddd %xmm4,%xmm0 2471 pxor %xmm3,%xmm15 2472 pxor %xmm2,%xmm14 2473 pxor %xmm1,%xmm13 2474 pxor %xmm0,%xmm12 2475 .byte 102,69,15,56,0,248 2476 .byte 102,69,15,56,0,240 2477 .byte 102,69,15,56,0,232 2478 .byte 102,69,15,56,0,224 2479 movdqa 80(%rbp),%xmm8 2480 paddd %xmm15,%xmm11 2481 paddd %xmm14,%xmm10 2482 paddd %xmm13,%xmm9 2483 paddd %xmm12,%xmm8 2484 pxor %xmm11,%xmm7 2485 addq 0(%rdi),%r10 2486 adcq 8+0(%rdi),%r11 2487 adcq $1,%r12 2488 pxor %xmm10,%xmm6 2489 pxor %xmm9,%xmm5 2490 pxor %xmm8,%xmm4 2491 movdqa %xmm8,80(%rbp) 2492 movdqa %xmm7,%xmm8 2493 psrld $20,%xmm8 2494 pslld $32-20,%xmm7 2495 pxor %xmm8,%xmm7 2496 movdqa %xmm6,%xmm8 2497 psrld $20,%xmm8 2498 pslld $32-20,%xmm6 2499 pxor %xmm8,%xmm6 2500 movdqa %xmm5,%xmm8 2501 psrld $20,%xmm8 2502 pslld $32-20,%xmm5 2503 pxor %xmm8,%xmm5 2504 movdqa %xmm4,%xmm8 2505 psrld $20,%xmm8 2506 pslld $32-20,%xmm4 2507 pxor %xmm8,%xmm4 2508 movq 0+0(%rbp),%rax 2509 movq %rax,%r15 2510 mulq %r10 2511 movq %rax,%r13 2512 movq %rdx,%r14 2513 movq 0+0(%rbp),%rax 2514 mulq %r11 2515 imulq %r12,%r15 2516 addq %rax,%r14 2517 adcq %rdx,%r15 2518 movdqa .rol8(%rip),%xmm8 2519 paddd %xmm7,%xmm3 2520 paddd %xmm6,%xmm2 2521 paddd %xmm5,%xmm1 2522 paddd %xmm4,%xmm0 2523 pxor %xmm3,%xmm15 2524 pxor %xmm2,%xmm14 2525 pxor %xmm1,%xmm13 2526 pxor %xmm0,%xmm12 2527 .byte 102,69,15,56,0,248 2528 .byte 102,69,15,56,0,240 2529 .byte 102,69,15,56,0,232 2530 .byte 102,69,15,56,0,224 2531 movdqa 80(%rbp),%xmm8 2532 paddd %xmm15,%xmm11 2533 paddd %xmm14,%xmm10 2534 paddd %xmm13,%xmm9 2535 paddd %xmm12,%xmm8 2536 pxor %xmm11,%xmm7 2537 pxor %xmm10,%xmm6 2538 movq 8+0(%rbp),%rax 2539 movq %rax,%r9 2540 mulq %r10 2541 addq %rax,%r14 2542 adcq $0,%rdx 2543 movq %rdx,%r10 2544 movq 8+0(%rbp),%rax 2545 mulq %r11 2546 addq %rax,%r15 2547 adcq $0,%rdx 2548 pxor %xmm9,%xmm5 2549 pxor %xmm8,%xmm4 2550 movdqa %xmm8,80(%rbp) 2551 movdqa %xmm7,%xmm8 2552 psrld $25,%xmm8 2553 pslld $32-25,%xmm7 2554 pxor %xmm8,%xmm7 2555 movdqa %xmm6,%xmm8 2556 psrld $25,%xmm8 2557 pslld $32-25,%xmm6 2558 pxor %xmm8,%xmm6 2559 movdqa %xmm5,%xmm8 2560 psrld $25,%xmm8 2561 pslld $32-25,%xmm5 2562 pxor %xmm8,%xmm5 2563 movdqa %xmm4,%xmm8 2564 psrld $25,%xmm8 2565 pslld $32-25,%xmm4 2566 pxor %xmm8,%xmm4 2567 movdqa 80(%rbp),%xmm8 2568 imulq %r12,%r9 2569 addq %r10,%r15 2570 adcq %rdx,%r9 2571 .byte 102,15,58,15,255,4 2572 .byte 102,69,15,58,15,219,8 2573 .byte 102,69,15,58,15,255,12 2574 .byte 102,15,58,15,246,4 2575 .byte 102,69,15,58,15,210,8 2576 .byte 102,69,15,58,15,246,12 2577 .byte 102,15,58,15,237,4 2578 .byte 102,69,15,58,15,201,8 2579 .byte 102,69,15,58,15,237,12 2580 .byte 102,15,58,15,228,4 2581 .byte 102,69,15,58,15,192,8 2582 .byte 102,69,15,58,15,228,12 2583 movdqa %xmm8,80(%rbp) 2584 movdqa .rol16(%rip),%xmm8 2585 paddd %xmm7,%xmm3 2586 paddd %xmm6,%xmm2 2587 paddd %xmm5,%xmm1 2588 paddd %xmm4,%xmm0 2589 pxor %xmm3,%xmm15 2590 pxor %xmm2,%xmm14 2591 movq %r13,%r10 2592 movq %r14,%r11 2593 movq %r15,%r12 2594 andq $3,%r12 2595 movq %r15,%r13 2596 andq $-4,%r13 2597 movq %r9,%r14 2598 shrdq $2,%r9,%r15 2599 shrq $2,%r9 2600 addq %r13,%r10 2601 adcq %r14,%r11 2602 adcq $0,%r12 2603 addq %r15,%r10 2604 adcq %r9,%r11 2605 adcq $0,%r12 2606 pxor %xmm1,%xmm13 2607 pxor %xmm0,%xmm12 2608 .byte 102,69,15,56,0,248 2609 .byte 102,69,15,56,0,240 2610 .byte 102,69,15,56,0,232 2611 .byte 102,69,15,56,0,224 2612 movdqa 80(%rbp),%xmm8 2613 paddd %xmm15,%xmm11 2614 paddd %xmm14,%xmm10 2615 paddd %xmm13,%xmm9 2616 paddd %xmm12,%xmm8 2617 pxor %xmm11,%xmm7 2618 pxor %xmm10,%xmm6 2619 pxor %xmm9,%xmm5 2620 pxor %xmm8,%xmm4 2621 movdqa %xmm8,80(%rbp) 2622 movdqa %xmm7,%xmm8 2623 psrld $20,%xmm8 2624 pslld $32-20,%xmm7 2625 pxor %xmm8,%xmm7 2626 movdqa %xmm6,%xmm8 2627 psrld $20,%xmm8 2628 pslld $32-20,%xmm6 2629 pxor %xmm8,%xmm6 2630 movdqa %xmm5,%xmm8 2631 psrld $20,%xmm8 2632 pslld $32-20,%xmm5 2633 pxor %xmm8,%xmm5 2634 movdqa %xmm4,%xmm8 2635 psrld $20,%xmm8 2636 pslld $32-20,%xmm4 2637 pxor %xmm8,%xmm4 2638 movdqa .rol8(%rip),%xmm8 2639 paddd %xmm7,%xmm3 2640 paddd %xmm6,%xmm2 2641 paddd %xmm5,%xmm1 2642 paddd %xmm4,%xmm0 2643 pxor %xmm3,%xmm15 2644 pxor %xmm2,%xmm14 2645 pxor %xmm1,%xmm13 2646 pxor %xmm0,%xmm12 2647 .byte 102,69,15,56,0,248 2648 .byte 102,69,15,56,0,240 2649 .byte 102,69,15,56,0,232 2650 .byte 102,69,15,56,0,224 2651 movdqa 80(%rbp),%xmm8 2652 paddd %xmm15,%xmm11 2653 paddd %xmm14,%xmm10 2654 paddd %xmm13,%xmm9 2655 paddd %xmm12,%xmm8 2656 pxor %xmm11,%xmm7 2657 pxor %xmm10,%xmm6 2658 pxor %xmm9,%xmm5 2659 pxor %xmm8,%xmm4 2660 movdqa %xmm8,80(%rbp) 2661 movdqa %xmm7,%xmm8 2662 psrld $25,%xmm8 2663 pslld $32-25,%xmm7 2664 pxor %xmm8,%xmm7 2665 movdqa %xmm6,%xmm8 2666 psrld $25,%xmm8 2667 pslld $32-25,%xmm6 2668 pxor %xmm8,%xmm6 2669 movdqa %xmm5,%xmm8 2670 psrld $25,%xmm8 2671 pslld $32-25,%xmm5 2672 pxor %xmm8,%xmm5 2673 movdqa %xmm4,%xmm8 2674 psrld $25,%xmm8 2675 pslld $32-25,%xmm4 2676 pxor %xmm8,%xmm4 2677 movdqa 80(%rbp),%xmm8 2678 .byte 102,15,58,15,255,12 2679 .byte 102,69,15,58,15,219,8 2680 .byte 102,69,15,58,15,255,4 2681 .byte 102,15,58,15,246,12 2682 .byte 102,69,15,58,15,210,8 2683 .byte 102,69,15,58,15,246,4 2684 .byte 102,15,58,15,237,12 2685 .byte 102,69,15,58,15,201,8 2686 .byte 102,69,15,58,15,237,4 2687 .byte 102,15,58,15,228,12 2688 .byte 102,69,15,58,15,192,8 2689 .byte 102,69,15,58,15,228,4 2690 2691 leaq 16(%rdi),%rdi 2692 decq %r8 2693 jge 2b 2694 addq 0(%rdi),%r10 2695 adcq 8+0(%rdi),%r11 2696 adcq $1,%r12 2697 movq 0+0(%rbp),%rax 2698 movq %rax,%r15 2699 mulq %r10 2700 movq %rax,%r13 2701 movq %rdx,%r14 2702 movq 0+0(%rbp),%rax 2703 mulq %r11 2704 imulq %r12,%r15 2705 addq %rax,%r14 2706 adcq %rdx,%r15 2707 movq 8+0(%rbp),%rax 2708 movq %rax,%r9 2709 mulq %r10 2710 addq %rax,%r14 2711 adcq $0,%rdx 2712 movq %rdx,%r10 2713 movq 8+0(%rbp),%rax 2714 mulq %r11 2715 addq %rax,%r15 2716 adcq $0,%rdx 2717 imulq %r12,%r9 2718 addq %r10,%r15 2719 adcq %rdx,%r9 2720 movq %r13,%r10 2721 movq %r14,%r11 2722 movq %r15,%r12 2723 andq $3,%r12 2724 movq %r15,%r13 2725 andq $-4,%r13 2726 movq %r9,%r14 2727 shrdq $2,%r9,%r15 2728 shrq $2,%r9 2729 addq %r13,%r10 2730 adcq %r14,%r11 2731 adcq $0,%r12 2732 addq %r15,%r10 2733 adcq %r9,%r11 2734 adcq $0,%r12 2735 2736 leaq 16(%rdi),%rdi 2737 decq %rcx 2738 jg 2b 2739 paddd .chacha20_consts(%rip),%xmm3 2740 paddd 48(%rbp),%xmm7 2741 paddd 64(%rbp),%xmm11 2742 paddd 144(%rbp),%xmm15 2743 paddd .chacha20_consts(%rip),%xmm2 2744 paddd 48(%rbp),%xmm6 2745 paddd 64(%rbp),%xmm10 2746 paddd 128(%rbp),%xmm14 2747 paddd .chacha20_consts(%rip),%xmm1 2748 paddd 48(%rbp),%xmm5 2749 paddd 64(%rbp),%xmm9 2750 paddd 112(%rbp),%xmm13 2751 paddd .chacha20_consts(%rip),%xmm0 2752 paddd 48(%rbp),%xmm4 2753 paddd 64(%rbp),%xmm8 2754 paddd 96(%rbp),%xmm12 2755 2756 movdqa %xmm14,80(%rbp) 2757 movdqa %xmm14,80(%rbp) 2758 movdqu 0 + 0(%rsi),%xmm14 2759 pxor %xmm3,%xmm14 2760 movdqu %xmm14,0 + 0(%rdi) 2761 movdqu 16 + 0(%rsi),%xmm14 2762 pxor %xmm7,%xmm14 2763 movdqu %xmm14,16 + 0(%rdi) 2764 movdqu 32 + 0(%rsi),%xmm14 2765 pxor %xmm11,%xmm14 2766 movdqu %xmm14,32 + 0(%rdi) 2767 movdqu 48 + 0(%rsi),%xmm14 2768 pxor %xmm15,%xmm14 2769 movdqu %xmm14,48 + 0(%rdi) 2770 2771 movdqa 80(%rbp),%xmm14 2772 movdqu 0 + 64(%rsi),%xmm3 2773 movdqu 16 + 64(%rsi),%xmm7 2774 movdqu 32 + 64(%rsi),%xmm11 2775 movdqu 48 + 64(%rsi),%xmm15 2776 pxor %xmm3,%xmm2 2777 pxor %xmm7,%xmm6 2778 pxor %xmm11,%xmm10 2779 pxor %xmm14,%xmm15 2780 movdqu %xmm2,0 + 64(%rdi) 2781 movdqu %xmm6,16 + 64(%rdi) 2782 movdqu %xmm10,32 + 64(%rdi) 2783 movdqu %xmm15,48 + 64(%rdi) 2784 movdqu 0 + 128(%rsi),%xmm3 2785 movdqu 16 + 128(%rsi),%xmm7 2786 movdqu 32 + 128(%rsi),%xmm11 2787 movdqu 48 + 128(%rsi),%xmm15 2788 pxor %xmm3,%xmm1 2789 pxor %xmm7,%xmm5 2790 pxor %xmm11,%xmm9 2791 pxor %xmm13,%xmm15 2792 movdqu %xmm1,0 + 128(%rdi) 2793 movdqu %xmm5,16 + 128(%rdi) 2794 movdqu %xmm9,32 + 128(%rdi) 2795 movdqu %xmm15,48 + 128(%rdi) 2796 2797 cmpq $256,%rbx 2798 ja 3f 2799 2800 movq $192,%rcx 2801 subq $192,%rbx 2802 leaq 192(%rsi),%rsi 2803 jmp seal_sse_128_seal_hash 2804 3: 2805 movdqu 0 + 192(%rsi),%xmm3 2806 movdqu 16 + 192(%rsi),%xmm7 2807 movdqu 32 + 192(%rsi),%xmm11 2808 movdqu 48 + 192(%rsi),%xmm15 2809 pxor %xmm3,%xmm0 2810 pxor %xmm7,%xmm4 2811 pxor %xmm11,%xmm8 2812 pxor %xmm12,%xmm15 2813 movdqu %xmm0,0 + 192(%rdi) 2814 movdqu %xmm4,16 + 192(%rdi) 2815 movdqu %xmm8,32 + 192(%rdi) 2816 movdqu %xmm15,48 + 192(%rdi) 2817 2818 leaq 256(%rsi),%rsi 2819 subq $256,%rbx 2820 movq $6,%rcx 2821 movq $4,%r8 2822 cmpq $192,%rbx 2823 jg 1b 2824 movq %rbx,%rcx 2825 testq %rbx,%rbx 2826 je seal_sse_128_seal_hash 2827 movq $6,%rcx 2828 cmpq $64,%rbx 2829 jg 3f 2830 2831 seal_sse_tail_64: 2832 movdqa .chacha20_consts(%rip),%xmm0 2833 movdqa 48(%rbp),%xmm4 2834 movdqa 64(%rbp),%xmm8 2835 movdqa 96(%rbp),%xmm12 2836 paddd .sse_inc(%rip),%xmm12 2837 movdqa %xmm12,96(%rbp) 2838 2839 1: 2840 addq 0(%rdi),%r10 2841 adcq 8+0(%rdi),%r11 2842 adcq $1,%r12 2843 movq 0+0(%rbp),%rax 2844 movq %rax,%r15 2845 mulq %r10 2846 movq %rax,%r13 2847 movq %rdx,%r14 2848 movq 0+0(%rbp),%rax 2849 mulq %r11 2850 imulq %r12,%r15 2851 addq %rax,%r14 2852 adcq %rdx,%r15 2853 movq 8+0(%rbp),%rax 2854 movq %rax,%r9 2855 mulq %r10 2856 addq %rax,%r14 2857 adcq $0,%rdx 2858 movq %rdx,%r10 2859 movq 8+0(%rbp),%rax 2860 mulq %r11 2861 addq %rax,%r15 2862 adcq $0,%rdx 2863 imulq %r12,%r9 2864 addq %r10,%r15 2865 adcq %rdx,%r9 2866 movq %r13,%r10 2867 movq %r14,%r11 2868 movq %r15,%r12 2869 andq $3,%r12 2870 movq %r15,%r13 2871 andq $-4,%r13 2872 movq %r9,%r14 2873 shrdq $2,%r9,%r15 2874 shrq $2,%r9 2875 addq %r13,%r10 2876 adcq %r14,%r11 2877 adcq $0,%r12 2878 addq %r15,%r10 2879 adcq %r9,%r11 2880 adcq $0,%r12 2881 2882 leaq 16(%rdi),%rdi 2883 2: 2884 paddd %xmm4,%xmm0 2885 pxor %xmm0,%xmm12 2886 pshufb .rol16(%rip),%xmm12 2887 paddd %xmm12,%xmm8 2888 pxor %xmm8,%xmm4 2889 movdqa %xmm4,%xmm3 2890 pslld $12,%xmm3 2891 psrld $20,%xmm4 2892 pxor %xmm3,%xmm4 2893 paddd %xmm4,%xmm0 2894 pxor %xmm0,%xmm12 2895 pshufb .rol8(%rip),%xmm12 2896 paddd %xmm12,%xmm8 2897 pxor %xmm8,%xmm4 2898 movdqa %xmm4,%xmm3 2899 pslld $7,%xmm3 2900 psrld $25,%xmm4 2901 pxor %xmm3,%xmm4 2902 .byte 102,15,58,15,228,4 2903 .byte 102,69,15,58,15,192,8 2904 .byte 102,69,15,58,15,228,12 2905 paddd %xmm4,%xmm0 2906 pxor %xmm0,%xmm12 2907 pshufb .rol16(%rip),%xmm12 2908 paddd %xmm12,%xmm8 2909 pxor %xmm8,%xmm4 2910 movdqa %xmm4,%xmm3 2911 pslld $12,%xmm3 2912 psrld $20,%xmm4 2913 pxor %xmm3,%xmm4 2914 paddd %xmm4,%xmm0 2915 pxor %xmm0,%xmm12 2916 pshufb .rol8(%rip),%xmm12 2917 paddd %xmm12,%xmm8 2918 pxor %xmm8,%xmm4 2919 movdqa %xmm4,%xmm3 2920 pslld $7,%xmm3 2921 psrld $25,%xmm4 2922 pxor %xmm3,%xmm4 2923 .byte 102,15,58,15,228,12 2924 .byte 102,69,15,58,15,192,8 2925 .byte 102,69,15,58,15,228,4 2926 addq 0(%rdi),%r10 2927 adcq 8+0(%rdi),%r11 2928 adcq $1,%r12 2929 movq 0+0(%rbp),%rax 2930 movq %rax,%r15 2931 mulq %r10 2932 movq %rax,%r13 2933 movq %rdx,%r14 2934 movq 0+0(%rbp),%rax 2935 mulq %r11 2936 imulq %r12,%r15 2937 addq %rax,%r14 2938 adcq %rdx,%r15 2939 movq 8+0(%rbp),%rax 2940 movq %rax,%r9 2941 mulq %r10 2942 addq %rax,%r14 2943 adcq $0,%rdx 2944 movq %rdx,%r10 2945 movq 8+0(%rbp),%rax 2946 mulq %r11 2947 addq %rax,%r15 2948 adcq $0,%rdx 2949 imulq %r12,%r9 2950 addq %r10,%r15 2951 adcq %rdx,%r9 2952 movq %r13,%r10 2953 movq %r14,%r11 2954 movq %r15,%r12 2955 andq $3,%r12 2956 movq %r15,%r13 2957 andq $-4,%r13 2958 movq %r9,%r14 2959 shrdq $2,%r9,%r15 2960 shrq $2,%r9 2961 addq %r13,%r10 2962 adcq %r14,%r11 2963 adcq $0,%r12 2964 addq %r15,%r10 2965 adcq %r9,%r11 2966 adcq $0,%r12 2967 2968 leaq 16(%rdi),%rdi 2969 decq %rcx 2970 jg 1b 2971 decq %r8 2972 jge 2b 2973 paddd .chacha20_consts(%rip),%xmm0 2974 paddd 48(%rbp),%xmm4 2975 paddd 64(%rbp),%xmm8 2976 paddd 96(%rbp),%xmm12 2977 2978 jmp seal_sse_128_seal 2979 3: 2980 cmpq $128,%rbx 2981 jg 3f 2982 2983 seal_sse_tail_128: 2984 movdqa .chacha20_consts(%rip),%xmm0 2985 movdqa 48(%rbp),%xmm4 2986 movdqa 64(%rbp),%xmm8 2987 movdqa %xmm0,%xmm1 2988 movdqa %xmm4,%xmm5 2989 movdqa %xmm8,%xmm9 2990 movdqa 96(%rbp),%xmm13 2991 paddd .sse_inc(%rip),%xmm13 2992 movdqa %xmm13,%xmm12 2993 paddd .sse_inc(%rip),%xmm12 2994 movdqa %xmm12,96(%rbp) 2995 movdqa %xmm13,112(%rbp) 2996 2997 1: 2998 addq 0(%rdi),%r10 2999 adcq 8+0(%rdi),%r11 3000 adcq $1,%r12 3001 movq 0+0(%rbp),%rax 3002 movq %rax,%r15 3003 mulq %r10 3004 movq %rax,%r13 3005 movq %rdx,%r14 3006 movq 0+0(%rbp),%rax 3007 mulq %r11 3008 imulq %r12,%r15 3009 addq %rax,%r14 3010 adcq %rdx,%r15 3011 movq 8+0(%rbp),%rax 3012 movq %rax,%r9 3013 mulq %r10 3014 addq %rax,%r14 3015 adcq $0,%rdx 3016 movq %rdx,%r10 3017 movq 8+0(%rbp),%rax 3018 mulq %r11 3019 addq %rax,%r15 3020 adcq $0,%rdx 3021 imulq %r12,%r9 3022 addq %r10,%r15 3023 adcq %rdx,%r9 3024 movq %r13,%r10 3025 movq %r14,%r11 3026 movq %r15,%r12 3027 andq $3,%r12 3028 movq %r15,%r13 3029 andq $-4,%r13 3030 movq %r9,%r14 3031 shrdq $2,%r9,%r15 3032 shrq $2,%r9 3033 addq %r13,%r10 3034 adcq %r14,%r11 3035 adcq $0,%r12 3036 addq %r15,%r10 3037 adcq %r9,%r11 3038 adcq $0,%r12 3039 3040 leaq 16(%rdi),%rdi 3041 2: 3042 paddd %xmm4,%xmm0 3043 pxor %xmm0,%xmm12 3044 pshufb .rol16(%rip),%xmm12 3045 paddd %xmm12,%xmm8 3046 pxor %xmm8,%xmm4 3047 movdqa %xmm4,%xmm3 3048 pslld $12,%xmm3 3049 psrld $20,%xmm4 3050 pxor %xmm3,%xmm4 3051 paddd %xmm4,%xmm0 3052 pxor %xmm0,%xmm12 3053 pshufb .rol8(%rip),%xmm12 3054 paddd %xmm12,%xmm8 3055 pxor %xmm8,%xmm4 3056 movdqa %xmm4,%xmm3 3057 pslld $7,%xmm3 3058 psrld $25,%xmm4 3059 pxor %xmm3,%xmm4 3060 .byte 102,15,58,15,228,4 3061 .byte 102,69,15,58,15,192,8 3062 .byte 102,69,15,58,15,228,12 3063 paddd %xmm5,%xmm1 3064 pxor %xmm1,%xmm13 3065 pshufb .rol16(%rip),%xmm13 3066 paddd %xmm13,%xmm9 3067 pxor %xmm9,%xmm5 3068 movdqa %xmm5,%xmm3 3069 pslld $12,%xmm3 3070 psrld $20,%xmm5 3071 pxor %xmm3,%xmm5 3072 paddd %xmm5,%xmm1 3073 pxor %xmm1,%xmm13 3074 pshufb .rol8(%rip),%xmm13 3075 paddd %xmm13,%xmm9 3076 pxor %xmm9,%xmm5 3077 movdqa %xmm5,%xmm3 3078 pslld $7,%xmm3 3079 psrld $25,%xmm5 3080 pxor %xmm3,%xmm5 3081 .byte 102,15,58,15,237,4 3082 .byte 102,69,15,58,15,201,8 3083 .byte 102,69,15,58,15,237,12 3084 addq 0(%rdi),%r10 3085 adcq 8+0(%rdi),%r11 3086 adcq $1,%r12 3087 movq 0+0(%rbp),%rax 3088 movq %rax,%r15 3089 mulq %r10 3090 movq %rax,%r13 3091 movq %rdx,%r14 3092 movq 0+0(%rbp),%rax 3093 mulq %r11 3094 imulq %r12,%r15 3095 addq %rax,%r14 3096 adcq %rdx,%r15 3097 movq 8+0(%rbp),%rax 3098 movq %rax,%r9 3099 mulq %r10 3100 addq %rax,%r14 3101 adcq $0,%rdx 3102 movq %rdx,%r10 3103 movq 8+0(%rbp),%rax 3104 mulq %r11 3105 addq %rax,%r15 3106 adcq $0,%rdx 3107 imulq %r12,%r9 3108 addq %r10,%r15 3109 adcq %rdx,%r9 3110 movq %r13,%r10 3111 movq %r14,%r11 3112 movq %r15,%r12 3113 andq $3,%r12 3114 movq %r15,%r13 3115 andq $-4,%r13 3116 movq %r9,%r14 3117 shrdq $2,%r9,%r15 3118 shrq $2,%r9 3119 addq %r13,%r10 3120 adcq %r14,%r11 3121 adcq $0,%r12 3122 addq %r15,%r10 3123 adcq %r9,%r11 3124 adcq $0,%r12 3125 paddd %xmm4,%xmm0 3126 pxor %xmm0,%xmm12 3127 pshufb .rol16(%rip),%xmm12 3128 paddd %xmm12,%xmm8 3129 pxor %xmm8,%xmm4 3130 movdqa %xmm4,%xmm3 3131 pslld $12,%xmm3 3132 psrld $20,%xmm4 3133 pxor %xmm3,%xmm4 3134 paddd %xmm4,%xmm0 3135 pxor %xmm0,%xmm12 3136 pshufb .rol8(%rip),%xmm12 3137 paddd %xmm12,%xmm8 3138 pxor %xmm8,%xmm4 3139 movdqa %xmm4,%xmm3 3140 pslld $7,%xmm3 3141 psrld $25,%xmm4 3142 pxor %xmm3,%xmm4 3143 .byte 102,15,58,15,228,12 3144 .byte 102,69,15,58,15,192,8 3145 .byte 102,69,15,58,15,228,4 3146 paddd %xmm5,%xmm1 3147 pxor %xmm1,%xmm13 3148 pshufb .rol16(%rip),%xmm13 3149 paddd %xmm13,%xmm9 3150 pxor %xmm9,%xmm5 3151 movdqa %xmm5,%xmm3 3152 pslld $12,%xmm3 3153 psrld $20,%xmm5 3154 pxor %xmm3,%xmm5 3155 paddd %xmm5,%xmm1 3156 pxor %xmm1,%xmm13 3157 pshufb .rol8(%rip),%xmm13 3158 paddd %xmm13,%xmm9 3159 pxor %xmm9,%xmm5 3160 movdqa %xmm5,%xmm3 3161 pslld $7,%xmm3 3162 psrld $25,%xmm5 3163 pxor %xmm3,%xmm5 3164 .byte 102,15,58,15,237,12 3165 .byte 102,69,15,58,15,201,8 3166 .byte 102,69,15,58,15,237,4 3167 3168 leaq 16(%rdi),%rdi 3169 decq %rcx 3170 jg 1b 3171 decq %r8 3172 jge 2b 3173 paddd .chacha20_consts(%rip),%xmm1 3174 paddd 48(%rbp),%xmm5 3175 paddd 64(%rbp),%xmm9 3176 paddd 112(%rbp),%xmm13 3177 paddd .chacha20_consts(%rip),%xmm0 3178 paddd 48(%rbp),%xmm4 3179 paddd 64(%rbp),%xmm8 3180 paddd 96(%rbp),%xmm12 3181 movdqu 0 + 0(%rsi),%xmm3 3182 movdqu 16 + 0(%rsi),%xmm7 3183 movdqu 32 + 0(%rsi),%xmm11 3184 movdqu 48 + 0(%rsi),%xmm15 3185 pxor %xmm3,%xmm1 3186 pxor %xmm7,%xmm5 3187 pxor %xmm11,%xmm9 3188 pxor %xmm13,%xmm15 3189 movdqu %xmm1,0 + 0(%rdi) 3190 movdqu %xmm5,16 + 0(%rdi) 3191 movdqu %xmm9,32 + 0(%rdi) 3192 movdqu %xmm15,48 + 0(%rdi) 3193 3194 movq $64,%rcx 3195 subq $64,%rbx 3196 leaq 64(%rsi),%rsi 3197 jmp seal_sse_128_seal_hash 3198 3: 3199 3200 seal_sse_tail_192: 3201 movdqa .chacha20_consts(%rip),%xmm0 3202 movdqa 48(%rbp),%xmm4 3203 movdqa 64(%rbp),%xmm8 3204 movdqa %xmm0,%xmm1 3205 movdqa %xmm4,%xmm5 3206 movdqa %xmm8,%xmm9 3207 movdqa %xmm0,%xmm2 3208 movdqa %xmm4,%xmm6 3209 movdqa %xmm8,%xmm10 3210 movdqa 96(%rbp),%xmm14 3211 paddd .sse_inc(%rip),%xmm14 3212 movdqa %xmm14,%xmm13 3213 paddd .sse_inc(%rip),%xmm13 3214 movdqa %xmm13,%xmm12 3215 paddd .sse_inc(%rip),%xmm12 3216 movdqa %xmm12,96(%rbp) 3217 movdqa %xmm13,112(%rbp) 3218 movdqa %xmm14,128(%rbp) 3219 3220 1: 3221 addq 0(%rdi),%r10 3222 adcq 8+0(%rdi),%r11 3223 adcq $1,%r12 3224 movq 0+0(%rbp),%rax 3225 movq %rax,%r15 3226 mulq %r10 3227 movq %rax,%r13 3228 movq %rdx,%r14 3229 movq 0+0(%rbp),%rax 3230 mulq %r11 3231 imulq %r12,%r15 3232 addq %rax,%r14 3233 adcq %rdx,%r15 3234 movq 8+0(%rbp),%rax 3235 movq %rax,%r9 3236 mulq %r10 3237 addq %rax,%r14 3238 adcq $0,%rdx 3239 movq %rdx,%r10 3240 movq 8+0(%rbp),%rax 3241 mulq %r11 3242 addq %rax,%r15 3243 adcq $0,%rdx 3244 imulq %r12,%r9 3245 addq %r10,%r15 3246 adcq %rdx,%r9 3247 movq %r13,%r10 3248 movq %r14,%r11 3249 movq %r15,%r12 3250 andq $3,%r12 3251 movq %r15,%r13 3252 andq $-4,%r13 3253 movq %r9,%r14 3254 shrdq $2,%r9,%r15 3255 shrq $2,%r9 3256 addq %r13,%r10 3257 adcq %r14,%r11 3258 adcq $0,%r12 3259 addq %r15,%r10 3260 adcq %r9,%r11 3261 adcq $0,%r12 3262 3263 leaq 16(%rdi),%rdi 3264 2: 3265 paddd %xmm4,%xmm0 3266 pxor %xmm0,%xmm12 3267 pshufb .rol16(%rip),%xmm12 3268 paddd %xmm12,%xmm8 3269 pxor %xmm8,%xmm4 3270 movdqa %xmm4,%xmm3 3271 pslld $12,%xmm3 3272 psrld $20,%xmm4 3273 pxor %xmm3,%xmm4 3274 paddd %xmm4,%xmm0 3275 pxor %xmm0,%xmm12 3276 pshufb .rol8(%rip),%xmm12 3277 paddd %xmm12,%xmm8 3278 pxor %xmm8,%xmm4 3279 movdqa %xmm4,%xmm3 3280 pslld $7,%xmm3 3281 psrld $25,%xmm4 3282 pxor %xmm3,%xmm4 3283 .byte 102,15,58,15,228,4 3284 .byte 102,69,15,58,15,192,8 3285 .byte 102,69,15,58,15,228,12 3286 paddd %xmm5,%xmm1 3287 pxor %xmm1,%xmm13 3288 pshufb .rol16(%rip),%xmm13 3289 paddd %xmm13,%xmm9 3290 pxor %xmm9,%xmm5 3291 movdqa %xmm5,%xmm3 3292 pslld $12,%xmm3 3293 psrld $20,%xmm5 3294 pxor %xmm3,%xmm5 3295 paddd %xmm5,%xmm1 3296 pxor %xmm1,%xmm13 3297 pshufb .rol8(%rip),%xmm13 3298 paddd %xmm13,%xmm9 3299 pxor %xmm9,%xmm5 3300 movdqa %xmm5,%xmm3 3301 pslld $7,%xmm3 3302 psrld $25,%xmm5 3303 pxor %xmm3,%xmm5 3304 .byte 102,15,58,15,237,4 3305 .byte 102,69,15,58,15,201,8 3306 .byte 102,69,15,58,15,237,12 3307 paddd %xmm6,%xmm2 3308 pxor %xmm2,%xmm14 3309 pshufb .rol16(%rip),%xmm14 3310 paddd %xmm14,%xmm10 3311 pxor %xmm10,%xmm6 3312 movdqa %xmm6,%xmm3 3313 pslld $12,%xmm3 3314 psrld $20,%xmm6 3315 pxor %xmm3,%xmm6 3316 paddd %xmm6,%xmm2 3317 pxor %xmm2,%xmm14 3318 pshufb .rol8(%rip),%xmm14 3319 paddd %xmm14,%xmm10 3320 pxor %xmm10,%xmm6 3321 movdqa %xmm6,%xmm3 3322 pslld $7,%xmm3 3323 psrld $25,%xmm6 3324 pxor %xmm3,%xmm6 3325 .byte 102,15,58,15,246,4 3326 .byte 102,69,15,58,15,210,8 3327 .byte 102,69,15,58,15,246,12 3328 addq 0(%rdi),%r10 3329 adcq 8+0(%rdi),%r11 3330 adcq $1,%r12 3331 movq 0+0(%rbp),%rax 3332 movq %rax,%r15 3333 mulq %r10 3334 movq %rax,%r13 3335 movq %rdx,%r14 3336 movq 0+0(%rbp),%rax 3337 mulq %r11 3338 imulq %r12,%r15 3339 addq %rax,%r14 3340 adcq %rdx,%r15 3341 movq 8+0(%rbp),%rax 3342 movq %rax,%r9 3343 mulq %r10 3344 addq %rax,%r14 3345 adcq $0,%rdx 3346 movq %rdx,%r10 3347 movq 8+0(%rbp),%rax 3348 mulq %r11 3349 addq %rax,%r15 3350 adcq $0,%rdx 3351 imulq %r12,%r9 3352 addq %r10,%r15 3353 adcq %rdx,%r9 3354 movq %r13,%r10 3355 movq %r14,%r11 3356 movq %r15,%r12 3357 andq $3,%r12 3358 movq %r15,%r13 3359 andq $-4,%r13 3360 movq %r9,%r14 3361 shrdq $2,%r9,%r15 3362 shrq $2,%r9 3363 addq %r13,%r10 3364 adcq %r14,%r11 3365 adcq $0,%r12 3366 addq %r15,%r10 3367 adcq %r9,%r11 3368 adcq $0,%r12 3369 paddd %xmm4,%xmm0 3370 pxor %xmm0,%xmm12 3371 pshufb .rol16(%rip),%xmm12 3372 paddd %xmm12,%xmm8 3373 pxor %xmm8,%xmm4 3374 movdqa %xmm4,%xmm3 3375 pslld $12,%xmm3 3376 psrld $20,%xmm4 3377 pxor %xmm3,%xmm4 3378 paddd %xmm4,%xmm0 3379 pxor %xmm0,%xmm12 3380 pshufb .rol8(%rip),%xmm12 3381 paddd %xmm12,%xmm8 3382 pxor %xmm8,%xmm4 3383 movdqa %xmm4,%xmm3 3384 pslld $7,%xmm3 3385 psrld $25,%xmm4 3386 pxor %xmm3,%xmm4 3387 .byte 102,15,58,15,228,12 3388 .byte 102,69,15,58,15,192,8 3389 .byte 102,69,15,58,15,228,4 3390 paddd %xmm5,%xmm1 3391 pxor %xmm1,%xmm13 3392 pshufb .rol16(%rip),%xmm13 3393 paddd %xmm13,%xmm9 3394 pxor %xmm9,%xmm5 3395 movdqa %xmm5,%xmm3 3396 pslld $12,%xmm3 3397 psrld $20,%xmm5 3398 pxor %xmm3,%xmm5 3399 paddd %xmm5,%xmm1 3400 pxor %xmm1,%xmm13 3401 pshufb .rol8(%rip),%xmm13 3402 paddd %xmm13,%xmm9 3403 pxor %xmm9,%xmm5 3404 movdqa %xmm5,%xmm3 3405 pslld $7,%xmm3 3406 psrld $25,%xmm5 3407 pxor %xmm3,%xmm5 3408 .byte 102,15,58,15,237,12 3409 .byte 102,69,15,58,15,201,8 3410 .byte 102,69,15,58,15,237,4 3411 paddd %xmm6,%xmm2 3412 pxor %xmm2,%xmm14 3413 pshufb .rol16(%rip),%xmm14 3414 paddd %xmm14,%xmm10 3415 pxor %xmm10,%xmm6 3416 movdqa %xmm6,%xmm3 3417 pslld $12,%xmm3 3418 psrld $20,%xmm6 3419 pxor %xmm3,%xmm6 3420 paddd %xmm6,%xmm2 3421 pxor %xmm2,%xmm14 3422 pshufb .rol8(%rip),%xmm14 3423 paddd %xmm14,%xmm10 3424 pxor %xmm10,%xmm6 3425 movdqa %xmm6,%xmm3 3426 pslld $7,%xmm3 3427 psrld $25,%xmm6 3428 pxor %xmm3,%xmm6 3429 .byte 102,15,58,15,246,12 3430 .byte 102,69,15,58,15,210,8 3431 .byte 102,69,15,58,15,246,4 3432 3433 leaq 16(%rdi),%rdi 3434 decq %rcx 3435 jg 1b 3436 decq %r8 3437 jge 2b 3438 paddd .chacha20_consts(%rip),%xmm2 3439 paddd 48(%rbp),%xmm6 3440 paddd 64(%rbp),%xmm10 3441 paddd 128(%rbp),%xmm14 3442 paddd .chacha20_consts(%rip),%xmm1 3443 paddd 48(%rbp),%xmm5 3444 paddd 64(%rbp),%xmm9 3445 paddd 112(%rbp),%xmm13 3446 paddd .chacha20_consts(%rip),%xmm0 3447 paddd 48(%rbp),%xmm4 3448 paddd 64(%rbp),%xmm8 3449 paddd 96(%rbp),%xmm12 3450 movdqu 0 + 0(%rsi),%xmm3 3451 movdqu 16 + 0(%rsi),%xmm7 3452 movdqu 32 + 0(%rsi),%xmm11 3453 movdqu 48 + 0(%rsi),%xmm15 3454 pxor %xmm3,%xmm2 3455 pxor %xmm7,%xmm6 3456 pxor %xmm11,%xmm10 3457 pxor %xmm14,%xmm15 3458 movdqu %xmm2,0 + 0(%rdi) 3459 movdqu %xmm6,16 + 0(%rdi) 3460 movdqu %xmm10,32 + 0(%rdi) 3461 movdqu %xmm15,48 + 0(%rdi) 3462 movdqu 0 + 64(%rsi),%xmm3 3463 movdqu 16 + 64(%rsi),%xmm7 3464 movdqu 32 + 64(%rsi),%xmm11 3465 movdqu 48 + 64(%rsi),%xmm15 3466 pxor %xmm3,%xmm1 3467 pxor %xmm7,%xmm5 3468 pxor %xmm11,%xmm9 3469 pxor %xmm13,%xmm15 3470 movdqu %xmm1,0 + 64(%rdi) 3471 movdqu %xmm5,16 + 64(%rdi) 3472 movdqu %xmm9,32 + 64(%rdi) 3473 movdqu %xmm15,48 + 64(%rdi) 3474 3475 movq $128,%rcx 3476 subq $128,%rbx 3477 leaq 128(%rsi),%rsi 3478 3479 seal_sse_128_seal_hash: 3480 cmpq $16,%rcx 3481 jb seal_sse_128_seal 3482 addq 0(%rdi),%r10 3483 adcq 8+0(%rdi),%r11 3484 adcq $1,%r12 3485 movq 0+0(%rbp),%rax 3486 movq %rax,%r15 3487 mulq %r10 3488 movq %rax,%r13 3489 movq %rdx,%r14 3490 movq 0+0(%rbp),%rax 3491 mulq %r11 3492 imulq %r12,%r15 3493 addq %rax,%r14 3494 adcq %rdx,%r15 3495 movq 8+0(%rbp),%rax 3496 movq %rax,%r9 3497 mulq %r10 3498 addq %rax,%r14 3499 adcq $0,%rdx 3500 movq %rdx,%r10 3501 movq 8+0(%rbp),%rax 3502 mulq %r11 3503 addq %rax,%r15 3504 adcq $0,%rdx 3505 imulq %r12,%r9 3506 addq %r10,%r15 3507 adcq %rdx,%r9 3508 movq %r13,%r10 3509 movq %r14,%r11 3510 movq %r15,%r12 3511 andq $3,%r12 3512 movq %r15,%r13 3513 andq $-4,%r13 3514 movq %r9,%r14 3515 shrdq $2,%r9,%r15 3516 shrq $2,%r9 3517 addq %r13,%r10 3518 adcq %r14,%r11 3519 adcq $0,%r12 3520 addq %r15,%r10 3521 adcq %r9,%r11 3522 adcq $0,%r12 3523 3524 subq $16,%rcx 3525 leaq 16(%rdi),%rdi 3526 jmp seal_sse_128_seal_hash 3527 3528 seal_sse_128_seal: 3529 cmpq $16,%rbx 3530 jb seal_sse_tail_16 3531 subq $16,%rbx 3532 3533 movdqu 0(%rsi),%xmm3 3534 pxor %xmm3,%xmm0 3535 movdqu %xmm0,0(%rdi) 3536 3537 addq 0(%rdi),%r10 3538 adcq 8(%rdi),%r11 3539 adcq $1,%r12 3540 leaq 16(%rsi),%rsi 3541 leaq 16(%rdi),%rdi 3542 movq 0+0(%rbp),%rax 3543 movq %rax,%r15 3544 mulq %r10 3545 movq %rax,%r13 3546 movq %rdx,%r14 3547 movq 0+0(%rbp),%rax 3548 mulq %r11 3549 imulq %r12,%r15 3550 addq %rax,%r14 3551 adcq %rdx,%r15 3552 movq 8+0(%rbp),%rax 3553 movq %rax,%r9 3554 mulq %r10 3555 addq %rax,%r14 3556 adcq $0,%rdx 3557 movq %rdx,%r10 3558 movq 8+0(%rbp),%rax 3559 mulq %r11 3560 addq %rax,%r15 3561 adcq $0,%rdx 3562 imulq %r12,%r9 3563 addq %r10,%r15 3564 adcq %rdx,%r9 3565 movq %r13,%r10 3566 movq %r14,%r11 3567 movq %r15,%r12 3568 andq $3,%r12 3569 movq %r15,%r13 3570 andq $-4,%r13 3571 movq %r9,%r14 3572 shrdq $2,%r9,%r15 3573 shrq $2,%r9 3574 addq %r13,%r10 3575 adcq %r14,%r11 3576 adcq $0,%r12 3577 addq %r15,%r10 3578 adcq %r9,%r11 3579 adcq $0,%r12 3580 3581 3582 movdqa %xmm4,%xmm0 3583 movdqa %xmm8,%xmm4 3584 movdqa %xmm12,%xmm8 3585 movdqa %xmm1,%xmm12 3586 movdqa %xmm5,%xmm1 3587 movdqa %xmm9,%xmm5 3588 movdqa %xmm13,%xmm9 3589 jmp seal_sse_128_seal 3590 3591 seal_sse_tail_16: 3592 testq %rbx,%rbx 3593 jz process_blocks_of_extra_in 3594 3595 movq %rbx,%r8 3596 movq %rbx,%rcx 3597 leaq -1(%rsi,%rbx), %rsi 3598 pxor %xmm15,%xmm15 3599 1: 3600 pslldq $1,%xmm15 3601 pinsrb $0,(%rsi),%xmm15 3602 leaq -1(%rsi),%rsi 3603 decq %rcx 3604 jne 1b 3605 3606 3607 pxor %xmm0,%xmm15 3608 3609 3610 movq %rbx,%rcx 3611 movdqu %xmm15,%xmm0 3612 2: 3613 pextrb $0,%xmm0,(%rdi) 3614 psrldq $1,%xmm0 3615 addq $1,%rdi 3616 subq $1,%rcx 3617 jnz 2b 3618 3619 3620 3621 3622 3623 3624 3625 3626 movq 288+32(%rsp),%r9 3627 movq 56(%r9),%r14 3628 movq 48(%r9),%r13 3629 testq %r14,%r14 3630 jz process_partial_block 3631 3632 movq $16,%r15 3633 subq %rbx,%r15 3634 cmpq %r15,%r14 3635 3636 jge load_extra_in 3637 movq %r14,%r15 3638 3639 load_extra_in: 3640 3641 3642 leaq -1(%r13,%r15), %rsi 3643 3644 3645 addq %r15,%r13 3646 subq %r15,%r14 3647 movq %r13,48(%r9) 3648 movq %r14,56(%r9) 3649 3650 3651 3652 addq %r15,%r8 3653 3654 3655 pxor %xmm11,%xmm11 3656 3: 3657 pslldq $1,%xmm11 3658 pinsrb $0,(%rsi),%xmm11 3659 leaq -1(%rsi),%rsi 3660 subq $1,%r15 3661 jnz 3b 3662 3663 3664 3665 3666 movq %rbx,%r15 3667 3668 4: 3669 pslldq $1,%xmm11 3670 subq $1,%r15 3671 jnz 4b 3672 3673 3674 3675 3676 leaq .and_masks(%rip),%r15 3677 shlq $4,%rbx 3678 pand -16(%r15,%rbx), %xmm15 3679 3680 3681 por %xmm11,%xmm15 3682 3683 3684 3685 .byte 102,77,15,126,253 3686 pextrq $1,%xmm15,%r14 3687 addq %r13,%r10 3688 adcq %r14,%r11 3689 adcq $1,%r12 3690 movq 0+0(%rbp),%rax 3691 movq %rax,%r15 3692 mulq %r10 3693 movq %rax,%r13 3694 movq %rdx,%r14 3695 movq 0+0(%rbp),%rax 3696 mulq %r11 3697 imulq %r12,%r15 3698 addq %rax,%r14 3699 adcq %rdx,%r15 3700 movq 8+0(%rbp),%rax 3701 movq %rax,%r9 3702 mulq %r10 3703 addq %rax,%r14 3704 adcq $0,%rdx 3705 movq %rdx,%r10 3706 movq 8+0(%rbp),%rax 3707 mulq %r11 3708 addq %rax,%r15 3709 adcq $0,%rdx 3710 imulq %r12,%r9 3711 addq %r10,%r15 3712 adcq %rdx,%r9 3713 movq %r13,%r10 3714 movq %r14,%r11 3715 movq %r15,%r12 3716 andq $3,%r12 3717 movq %r15,%r13 3718 andq $-4,%r13 3719 movq %r9,%r14 3720 shrdq $2,%r9,%r15 3721 shrq $2,%r9 3722 addq %r13,%r10 3723 adcq %r14,%r11 3724 adcq $0,%r12 3725 addq %r15,%r10 3726 adcq %r9,%r11 3727 adcq $0,%r12 3728 3729 3730 process_blocks_of_extra_in: 3731 3732 movq 288+32(%rsp),%r9 3733 movq 48(%r9),%rsi 3734 movq 56(%r9),%r8 3735 movq %r8,%rcx 3736 shrq $4,%r8 3737 3738 5: 3739 jz process_extra_in_trailer 3740 addq 0(%rsi),%r10 3741 adcq 8+0(%rsi),%r11 3742 adcq $1,%r12 3743 movq 0+0(%rbp),%rax 3744 movq %rax,%r15 3745 mulq %r10 3746 movq %rax,%r13 3747 movq %rdx,%r14 3748 movq 0+0(%rbp),%rax 3749 mulq %r11 3750 imulq %r12,%r15 3751 addq %rax,%r14 3752 adcq %rdx,%r15 3753 movq 8+0(%rbp),%rax 3754 movq %rax,%r9 3755 mulq %r10 3756 addq %rax,%r14 3757 adcq $0,%rdx 3758 movq %rdx,%r10 3759 movq 8+0(%rbp),%rax 3760 mulq %r11 3761 addq %rax,%r15 3762 adcq $0,%rdx 3763 imulq %r12,%r9 3764 addq %r10,%r15 3765 adcq %rdx,%r9 3766 movq %r13,%r10 3767 movq %r14,%r11 3768 movq %r15,%r12 3769 andq $3,%r12 3770 movq %r15,%r13 3771 andq $-4,%r13 3772 movq %r9,%r14 3773 shrdq $2,%r9,%r15 3774 shrq $2,%r9 3775 addq %r13,%r10 3776 adcq %r14,%r11 3777 adcq $0,%r12 3778 addq %r15,%r10 3779 adcq %r9,%r11 3780 adcq $0,%r12 3781 3782 leaq 16(%rsi),%rsi 3783 subq $1,%r8 3784 jmp 5b 3785 3786 process_extra_in_trailer: 3787 andq $15,%rcx 3788 movq %rcx,%rbx 3789 jz do_length_block 3790 leaq -1(%rsi,%rcx), %rsi 3791 3792 6: 3793 pslldq $1,%xmm15 3794 pinsrb $0,(%rsi),%xmm15 3795 leaq -1(%rsi),%rsi 3796 subq $1,%rcx 3797 jnz 6b 3798 3799 process_partial_block: 3800 3801 leaq .and_masks(%rip),%r15 3802 shlq $4,%rbx 3803 pand -16(%r15,%rbx), %xmm15 3804 .byte 102,77,15,126,253 3805 pextrq $1,%xmm15,%r14 3806 addq %r13,%r10 3807 adcq %r14,%r11 3808 adcq $1,%r12 3809 movq 0+0(%rbp),%rax 3810 movq %rax,%r15 3811 mulq %r10 3812 movq %rax,%r13 3813 movq %rdx,%r14 3814 movq 0+0(%rbp),%rax 3815 mulq %r11 3816 imulq %r12,%r15 3817 addq %rax,%r14 3818 adcq %rdx,%r15 3819 movq 8+0(%rbp),%rax 3820 movq %rax,%r9 3821 mulq %r10 3822 addq %rax,%r14 3823 adcq $0,%rdx 3824 movq %rdx,%r10 3825 movq 8+0(%rbp),%rax 3826 mulq %r11 3827 addq %rax,%r15 3828 adcq $0,%rdx 3829 imulq %r12,%r9 3830 addq %r10,%r15 3831 adcq %rdx,%r9 3832 movq %r13,%r10 3833 movq %r14,%r11 3834 movq %r15,%r12 3835 andq $3,%r12 3836 movq %r15,%r13 3837 andq $-4,%r13 3838 movq %r9,%r14 3839 shrdq $2,%r9,%r15 3840 shrq $2,%r9 3841 addq %r13,%r10 3842 adcq %r14,%r11 3843 adcq $0,%r12 3844 addq %r15,%r10 3845 adcq %r9,%r11 3846 adcq $0,%r12 3847 3848 3849 do_length_block: 3850 addq 32(%rbp),%r10 3851 adcq 8+32(%rbp),%r11 3852 adcq $1,%r12 3853 movq 0+0(%rbp),%rax 3854 movq %rax,%r15 3855 mulq %r10 3856 movq %rax,%r13 3857 movq %rdx,%r14 3858 movq 0+0(%rbp),%rax 3859 mulq %r11 3860 imulq %r12,%r15 3861 addq %rax,%r14 3862 adcq %rdx,%r15 3863 movq 8+0(%rbp),%rax 3864 movq %rax,%r9 3865 mulq %r10 3866 addq %rax,%r14 3867 adcq $0,%rdx 3868 movq %rdx,%r10 3869 movq 8+0(%rbp),%rax 3870 mulq %r11 3871 addq %rax,%r15 3872 adcq $0,%rdx 3873 imulq %r12,%r9 3874 addq %r10,%r15 3875 adcq %rdx,%r9 3876 movq %r13,%r10 3877 movq %r14,%r11 3878 movq %r15,%r12 3879 andq $3,%r12 3880 movq %r15,%r13 3881 andq $-4,%r13 3882 movq %r9,%r14 3883 shrdq $2,%r9,%r15 3884 shrq $2,%r9 3885 addq %r13,%r10 3886 adcq %r14,%r11 3887 adcq $0,%r12 3888 addq %r15,%r10 3889 adcq %r9,%r11 3890 adcq $0,%r12 3891 3892 3893 movq %r10,%r13 3894 movq %r11,%r14 3895 movq %r12,%r15 3896 subq $-5,%r10 3897 sbbq $-1,%r11 3898 sbbq $3,%r12 3899 cmovcq %r13,%r10 3900 cmovcq %r14,%r11 3901 cmovcq %r15,%r12 3902 3903 addq 0+16(%rbp),%r10 3904 adcq 8+16(%rbp),%r11 3905 3906 addq $288 + 32,%rsp 3907 .cfi_adjust_cfa_offset -(288 + 32) 3908 popq %r9 3909 .cfi_adjust_cfa_offset -8 3910 movq %r10,0(%r9) 3911 movq %r11,8(%r9) 3912 3913 popq %r15 3914 .cfi_adjust_cfa_offset -8 3915 popq %r14 3916 .cfi_adjust_cfa_offset -8 3917 popq %r13 3918 .cfi_adjust_cfa_offset -8 3919 popq %r12 3920 .cfi_adjust_cfa_offset -8 3921 popq %rbx 3922 .cfi_adjust_cfa_offset -8 3923 popq %rbp 3924 .cfi_adjust_cfa_offset -8 3925 .byte 0xf3,0xc3 3926 .cfi_adjust_cfa_offset (8 * 6) + 288 + 32 3927 3928 seal_sse_128: 3929 movdqu .chacha20_consts(%rip),%xmm0 3930 movdqa %xmm0,%xmm1 3931 movdqa %xmm0,%xmm2 3932 movdqu 0(%r9),%xmm4 3933 movdqa %xmm4,%xmm5 3934 movdqa %xmm4,%xmm6 3935 movdqu 16(%r9),%xmm8 3936 movdqa %xmm8,%xmm9 3937 movdqa %xmm8,%xmm10 3938 movdqu 32(%r9),%xmm14 3939 movdqa %xmm14,%xmm12 3940 paddd .sse_inc(%rip),%xmm12 3941 movdqa %xmm12,%xmm13 3942 paddd .sse_inc(%rip),%xmm13 3943 movdqa %xmm4,%xmm7 3944 movdqa %xmm8,%xmm11 3945 movdqa %xmm12,%xmm15 3946 movq $10,%r10 3947 1: 3948 paddd %xmm4,%xmm0 3949 pxor %xmm0,%xmm12 3950 pshufb .rol16(%rip),%xmm12 3951 paddd %xmm12,%xmm8 3952 pxor %xmm8,%xmm4 3953 movdqa %xmm4,%xmm3 3954 pslld $12,%xmm3 3955 psrld $20,%xmm4 3956 pxor %xmm3,%xmm4 3957 paddd %xmm4,%xmm0 3958 pxor %xmm0,%xmm12 3959 pshufb .rol8(%rip),%xmm12 3960 paddd %xmm12,%xmm8 3961 pxor %xmm8,%xmm4 3962 movdqa %xmm4,%xmm3 3963 pslld $7,%xmm3 3964 psrld $25,%xmm4 3965 pxor %xmm3,%xmm4 3966 .byte 102,15,58,15,228,4 3967 .byte 102,69,15,58,15,192,8 3968 .byte 102,69,15,58,15,228,12 3969 paddd %xmm5,%xmm1 3970 pxor %xmm1,%xmm13 3971 pshufb .rol16(%rip),%xmm13 3972 paddd %xmm13,%xmm9 3973 pxor %xmm9,%xmm5 3974 movdqa %xmm5,%xmm3 3975 pslld $12,%xmm3 3976 psrld $20,%xmm5 3977 pxor %xmm3,%xmm5 3978 paddd %xmm5,%xmm1 3979 pxor %xmm1,%xmm13 3980 pshufb .rol8(%rip),%xmm13 3981 paddd %xmm13,%xmm9 3982 pxor %xmm9,%xmm5 3983 movdqa %xmm5,%xmm3 3984 pslld $7,%xmm3 3985 psrld $25,%xmm5 3986 pxor %xmm3,%xmm5 3987 .byte 102,15,58,15,237,4 3988 .byte 102,69,15,58,15,201,8 3989 .byte 102,69,15,58,15,237,12 3990 paddd %xmm6,%xmm2 3991 pxor %xmm2,%xmm14 3992 pshufb .rol16(%rip),%xmm14 3993 paddd %xmm14,%xmm10 3994 pxor %xmm10,%xmm6 3995 movdqa %xmm6,%xmm3 3996 pslld $12,%xmm3 3997 psrld $20,%xmm6 3998 pxor %xmm3,%xmm6 3999 paddd %xmm6,%xmm2 4000 pxor %xmm2,%xmm14 4001 pshufb .rol8(%rip),%xmm14 4002 paddd %xmm14,%xmm10 4003 pxor %xmm10,%xmm6 4004 movdqa %xmm6,%xmm3 4005 pslld $7,%xmm3 4006 psrld $25,%xmm6 4007 pxor %xmm3,%xmm6 4008 .byte 102,15,58,15,246,4 4009 .byte 102,69,15,58,15,210,8 4010 .byte 102,69,15,58,15,246,12 4011 paddd %xmm4,%xmm0 4012 pxor %xmm0,%xmm12 4013 pshufb .rol16(%rip),%xmm12 4014 paddd %xmm12,%xmm8 4015 pxor %xmm8,%xmm4 4016 movdqa %xmm4,%xmm3 4017 pslld $12,%xmm3 4018 psrld $20,%xmm4 4019 pxor %xmm3,%xmm4 4020 paddd %xmm4,%xmm0 4021 pxor %xmm0,%xmm12 4022 pshufb .rol8(%rip),%xmm12 4023 paddd %xmm12,%xmm8 4024 pxor %xmm8,%xmm4 4025 movdqa %xmm4,%xmm3 4026 pslld $7,%xmm3 4027 psrld $25,%xmm4 4028 pxor %xmm3,%xmm4 4029 .byte 102,15,58,15,228,12 4030 .byte 102,69,15,58,15,192,8 4031 .byte 102,69,15,58,15,228,4 4032 paddd %xmm5,%xmm1 4033 pxor %xmm1,%xmm13 4034 pshufb .rol16(%rip),%xmm13 4035 paddd %xmm13,%xmm9 4036 pxor %xmm9,%xmm5 4037 movdqa %xmm5,%xmm3 4038 pslld $12,%xmm3 4039 psrld $20,%xmm5 4040 pxor %xmm3,%xmm5 4041 paddd %xmm5,%xmm1 4042 pxor %xmm1,%xmm13 4043 pshufb .rol8(%rip),%xmm13 4044 paddd %xmm13,%xmm9 4045 pxor %xmm9,%xmm5 4046 movdqa %xmm5,%xmm3 4047 pslld $7,%xmm3 4048 psrld $25,%xmm5 4049 pxor %xmm3,%xmm5 4050 .byte 102,15,58,15,237,12 4051 .byte 102,69,15,58,15,201,8 4052 .byte 102,69,15,58,15,237,4 4053 paddd %xmm6,%xmm2 4054 pxor %xmm2,%xmm14 4055 pshufb .rol16(%rip),%xmm14 4056 paddd %xmm14,%xmm10 4057 pxor %xmm10,%xmm6 4058 movdqa %xmm6,%xmm3 4059 pslld $12,%xmm3 4060 psrld $20,%xmm6 4061 pxor %xmm3,%xmm6 4062 paddd %xmm6,%xmm2 4063 pxor %xmm2,%xmm14 4064 pshufb .rol8(%rip),%xmm14 4065 paddd %xmm14,%xmm10 4066 pxor %xmm10,%xmm6 4067 movdqa %xmm6,%xmm3 4068 pslld $7,%xmm3 4069 psrld $25,%xmm6 4070 pxor %xmm3,%xmm6 4071 .byte 102,15,58,15,246,12 4072 .byte 102,69,15,58,15,210,8 4073 .byte 102,69,15,58,15,246,4 4074 4075 decq %r10 4076 jnz 1b 4077 paddd .chacha20_consts(%rip),%xmm0 4078 paddd .chacha20_consts(%rip),%xmm1 4079 paddd .chacha20_consts(%rip),%xmm2 4080 paddd %xmm7,%xmm4 4081 paddd %xmm7,%xmm5 4082 paddd %xmm7,%xmm6 4083 paddd %xmm11,%xmm8 4084 paddd %xmm11,%xmm9 4085 paddd %xmm15,%xmm12 4086 paddd .sse_inc(%rip),%xmm15 4087 paddd %xmm15,%xmm13 4088 4089 pand .clamp(%rip),%xmm2 4090 movdqa %xmm2,0(%rbp) 4091 movdqa %xmm6,16(%rbp) 4092 4093 movq %r8,%r8 4094 call poly_hash_ad_internal 4095 jmp seal_sse_128_seal 4096 .size chacha20_poly1305_seal, .-chacha20_poly1305_seal 4097 4098 4099 .type chacha20_poly1305_open_avx2,@function 4100 .align 64 4101 chacha20_poly1305_open_avx2: 4102 vzeroupper 4103 vmovdqa .chacha20_consts(%rip),%ymm0 4104 vbroadcasti128 0(%r9),%ymm4 4105 vbroadcasti128 16(%r9),%ymm8 4106 vbroadcasti128 32(%r9),%ymm12 4107 vpaddd .avx2_init(%rip),%ymm12,%ymm12 4108 cmpq $192,%rbx 4109 jbe open_avx2_192 4110 cmpq $320,%rbx 4111 jbe open_avx2_320 4112 4113 vmovdqa %ymm4,64(%rbp) 4114 vmovdqa %ymm8,96(%rbp) 4115 vmovdqa %ymm12,160(%rbp) 4116 movq $10,%r10 4117 1: 4118 vpaddd %ymm4,%ymm0,%ymm0 4119 vpxor %ymm0,%ymm12,%ymm12 4120 vpshufb .rol16(%rip),%ymm12,%ymm12 4121 vpaddd %ymm12,%ymm8,%ymm8 4122 vpxor %ymm8,%ymm4,%ymm4 4123 vpsrld $20,%ymm4,%ymm3 4124 vpslld $12,%ymm4,%ymm4 4125 vpxor %ymm3,%ymm4,%ymm4 4126 vpaddd %ymm4,%ymm0,%ymm0 4127 vpxor %ymm0,%ymm12,%ymm12 4128 vpshufb .rol8(%rip),%ymm12,%ymm12 4129 vpaddd %ymm12,%ymm8,%ymm8 4130 vpxor %ymm8,%ymm4,%ymm4 4131 vpslld $7,%ymm4,%ymm3 4132 vpsrld $25,%ymm4,%ymm4 4133 vpxor %ymm3,%ymm4,%ymm4 4134 vpalignr $12,%ymm12,%ymm12,%ymm12 4135 vpalignr $8,%ymm8,%ymm8,%ymm8 4136 vpalignr $4,%ymm4,%ymm4,%ymm4 4137 vpaddd %ymm4,%ymm0,%ymm0 4138 vpxor %ymm0,%ymm12,%ymm12 4139 vpshufb .rol16(%rip),%ymm12,%ymm12 4140 vpaddd %ymm12,%ymm8,%ymm8 4141 vpxor %ymm8,%ymm4,%ymm4 4142 vpsrld $20,%ymm4,%ymm3 4143 vpslld $12,%ymm4,%ymm4 4144 vpxor %ymm3,%ymm4,%ymm4 4145 vpaddd %ymm4,%ymm0,%ymm0 4146 vpxor %ymm0,%ymm12,%ymm12 4147 vpshufb .rol8(%rip),%ymm12,%ymm12 4148 vpaddd %ymm12,%ymm8,%ymm8 4149 vpxor %ymm8,%ymm4,%ymm4 4150 vpslld $7,%ymm4,%ymm3 4151 vpsrld $25,%ymm4,%ymm4 4152 vpxor %ymm3,%ymm4,%ymm4 4153 vpalignr $4,%ymm12,%ymm12,%ymm12 4154 vpalignr $8,%ymm8,%ymm8,%ymm8 4155 vpalignr $12,%ymm4,%ymm4,%ymm4 4156 4157 decq %r10 4158 jne 1b 4159 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0 4160 vpaddd 64(%rbp),%ymm4,%ymm4 4161 vpaddd 96(%rbp),%ymm8,%ymm8 4162 vpaddd 160(%rbp),%ymm12,%ymm12 4163 4164 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 4165 4166 vpand .clamp(%rip),%ymm3,%ymm3 4167 vmovdqa %ymm3,0(%rbp) 4168 4169 vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 4170 vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 4171 4172 movq %r8,%r8 4173 call poly_hash_ad_internal 4174 xorq %rcx,%rcx 4175 4176 1: 4177 addq 0(%rsi,%rcx), %r10 4178 adcq 8+0(%rsi,%rcx), %r11 4179 adcq $1,%r12 4180 movq 0+0(%rbp),%rax 4181 movq %rax,%r15 4182 mulq %r10 4183 movq %rax,%r13 4184 movq %rdx,%r14 4185 movq 0+0(%rbp),%rax 4186 mulq %r11 4187 imulq %r12,%r15 4188 addq %rax,%r14 4189 adcq %rdx,%r15 4190 movq 8+0(%rbp),%rax 4191 movq %rax,%r9 4192 mulq %r10 4193 addq %rax,%r14 4194 adcq $0,%rdx 4195 movq %rdx,%r10 4196 movq 8+0(%rbp),%rax 4197 mulq %r11 4198 addq %rax,%r15 4199 adcq $0,%rdx 4200 imulq %r12,%r9 4201 addq %r10,%r15 4202 adcq %rdx,%r9 4203 movq %r13,%r10 4204 movq %r14,%r11 4205 movq %r15,%r12 4206 andq $3,%r12 4207 movq %r15,%r13 4208 andq $-4,%r13 4209 movq %r9,%r14 4210 shrdq $2,%r9,%r15 4211 shrq $2,%r9 4212 addq %r13,%r10 4213 adcq %r14,%r11 4214 adcq $0,%r12 4215 addq %r15,%r10 4216 adcq %r9,%r11 4217 adcq $0,%r12 4218 4219 addq $16,%rcx 4220 cmpq $64,%rcx 4221 jne 1b 4222 4223 vpxor 0(%rsi),%ymm0,%ymm0 4224 vpxor 32(%rsi),%ymm4,%ymm4 4225 vmovdqu %ymm0,0(%rdi) 4226 vmovdqu %ymm4,32(%rdi) 4227 leaq 64(%rsi),%rsi 4228 leaq 64(%rdi),%rdi 4229 subq $64,%rbx 4230 1: 4231 4232 cmpq $512,%rbx 4233 jb 3f 4234 vmovdqa .chacha20_consts(%rip),%ymm0 4235 vmovdqa 64(%rbp),%ymm4 4236 vmovdqa 96(%rbp),%ymm8 4237 vmovdqa %ymm0,%ymm1 4238 vmovdqa %ymm4,%ymm5 4239 vmovdqa %ymm8,%ymm9 4240 vmovdqa %ymm0,%ymm2 4241 vmovdqa %ymm4,%ymm6 4242 vmovdqa %ymm8,%ymm10 4243 vmovdqa %ymm0,%ymm3 4244 vmovdqa %ymm4,%ymm7 4245 vmovdqa %ymm8,%ymm11 4246 vmovdqa .avx2_inc(%rip),%ymm12 4247 vpaddd 160(%rbp),%ymm12,%ymm15 4248 vpaddd %ymm15,%ymm12,%ymm14 4249 vpaddd %ymm14,%ymm12,%ymm13 4250 vpaddd %ymm13,%ymm12,%ymm12 4251 vmovdqa %ymm15,256(%rbp) 4252 vmovdqa %ymm14,224(%rbp) 4253 vmovdqa %ymm13,192(%rbp) 4254 vmovdqa %ymm12,160(%rbp) 4255 4256 xorq %rcx,%rcx 4257 2: 4258 addq 0*8(%rsi,%rcx), %r10 4259 adcq 8+0*8(%rsi,%rcx), %r11 4260 adcq $1,%r12 4261 vmovdqa %ymm8,128(%rbp) 4262 vmovdqa .rol16(%rip),%ymm8 4263 vpaddd %ymm7,%ymm3,%ymm3 4264 vpaddd %ymm6,%ymm2,%ymm2 4265 vpaddd %ymm5,%ymm1,%ymm1 4266 vpaddd %ymm4,%ymm0,%ymm0 4267 vpxor %ymm3,%ymm15,%ymm15 4268 vpxor %ymm2,%ymm14,%ymm14 4269 vpxor %ymm1,%ymm13,%ymm13 4270 vpxor %ymm0,%ymm12,%ymm12 4271 movq 0+0(%rbp),%rdx 4272 movq %rdx,%r15 4273 mulxq %r10,%r13,%r14 4274 mulxq %r11,%rax,%rdx 4275 imulq %r12,%r15 4276 addq %rax,%r14 4277 adcq %rdx,%r15 4278 vpshufb %ymm8,%ymm15,%ymm15 4279 vpshufb %ymm8,%ymm14,%ymm14 4280 vpshufb %ymm8,%ymm13,%ymm13 4281 vpshufb %ymm8,%ymm12,%ymm12 4282 vmovdqa 128(%rbp),%ymm8 4283 vpaddd %ymm15,%ymm11,%ymm11 4284 vpaddd %ymm14,%ymm10,%ymm10 4285 vpaddd %ymm13,%ymm9,%ymm9 4286 vpaddd %ymm12,%ymm8,%ymm8 4287 movq 8+0(%rbp),%rdx 4288 mulxq %r10,%r10,%rax 4289 addq %r10,%r14 4290 mulxq %r11,%r11,%r9 4291 adcq %r11,%r15 4292 adcq $0,%r9 4293 imulq %r12,%rdx 4294 vpxor %ymm11,%ymm7,%ymm7 4295 vpxor %ymm10,%ymm6,%ymm6 4296 vpxor %ymm9,%ymm5,%ymm5 4297 vpxor %ymm8,%ymm4,%ymm4 4298 vmovdqa %ymm8,128(%rbp) 4299 vpsrld $20,%ymm7,%ymm8 4300 vpslld $32-20,%ymm7,%ymm7 4301 vpxor %ymm8,%ymm7,%ymm7 4302 vpsrld $20,%ymm6,%ymm8 4303 vpslld $32-20,%ymm6,%ymm6 4304 vpxor %ymm8,%ymm6,%ymm6 4305 vpsrld $20,%ymm5,%ymm8 4306 addq %rax,%r15 4307 adcq %rdx,%r9 4308 vpslld $32-20,%ymm5,%ymm5 4309 vpxor %ymm8,%ymm5,%ymm5 4310 vpsrld $20,%ymm4,%ymm8 4311 vpslld $32-20,%ymm4,%ymm4 4312 vpxor %ymm8,%ymm4,%ymm4 4313 vmovdqa .rol8(%rip),%ymm8 4314 vpaddd %ymm7,%ymm3,%ymm3 4315 vpaddd %ymm6,%ymm2,%ymm2 4316 vpaddd %ymm5,%ymm1,%ymm1 4317 vpaddd %ymm4,%ymm0,%ymm0 4318 movq %r13,%r10 4319 movq %r14,%r11 4320 movq %r15,%r12 4321 andq $3,%r12 4322 movq %r15,%r13 4323 andq $-4,%r13 4324 movq %r9,%r14 4325 shrdq $2,%r9,%r15 4326 shrq $2,%r9 4327 addq %r13,%r10 4328 adcq %r14,%r11 4329 adcq $0,%r12 4330 addq %r15,%r10 4331 adcq %r9,%r11 4332 adcq $0,%r12 4333 vpxor %ymm3,%ymm15,%ymm15 4334 vpxor %ymm2,%ymm14,%ymm14 4335 vpxor %ymm1,%ymm13,%ymm13 4336 vpxor %ymm0,%ymm12,%ymm12 4337 vpshufb %ymm8,%ymm15,%ymm15 4338 vpshufb %ymm8,%ymm14,%ymm14 4339 vpshufb %ymm8,%ymm13,%ymm13 4340 vpshufb %ymm8,%ymm12,%ymm12 4341 vmovdqa 128(%rbp),%ymm8 4342 addq 2*8(%rsi,%rcx), %r10 4343 adcq 8+2*8(%rsi,%rcx), %r11 4344 adcq $1,%r12 4345 vpaddd %ymm15,%ymm11,%ymm11 4346 vpaddd %ymm14,%ymm10,%ymm10 4347 vpaddd %ymm13,%ymm9,%ymm9 4348 vpaddd %ymm12,%ymm8,%ymm8 4349 vpxor %ymm11,%ymm7,%ymm7 4350 vpxor %ymm10,%ymm6,%ymm6 4351 vpxor %ymm9,%ymm5,%ymm5 4352 vpxor %ymm8,%ymm4,%ymm4 4353 movq 0+0(%rbp),%rdx 4354 movq %rdx,%r15 4355 mulxq %r10,%r13,%r14 4356 mulxq %r11,%rax,%rdx 4357 imulq %r12,%r15 4358 addq %rax,%r14 4359 adcq %rdx,%r15 4360 vmovdqa %ymm8,128(%rbp) 4361 vpsrld $25,%ymm7,%ymm8 4362 vpslld $32-25,%ymm7,%ymm7 4363 vpxor %ymm8,%ymm7,%ymm7 4364 vpsrld $25,%ymm6,%ymm8 4365 vpslld $32-25,%ymm6,%ymm6 4366 vpxor %ymm8,%ymm6,%ymm6 4367 vpsrld $25,%ymm5,%ymm8 4368 vpslld $32-25,%ymm5,%ymm5 4369 vpxor %ymm8,%ymm5,%ymm5 4370 vpsrld $25,%ymm4,%ymm8 4371 vpslld $32-25,%ymm4,%ymm4 4372 vpxor %ymm8,%ymm4,%ymm4 4373 vmovdqa 128(%rbp),%ymm8 4374 vpalignr $4,%ymm7,%ymm7,%ymm7 4375 vpalignr $8,%ymm11,%ymm11,%ymm11 4376 vpalignr $12,%ymm15,%ymm15,%ymm15 4377 vpalignr $4,%ymm6,%ymm6,%ymm6 4378 movq 8+0(%rbp),%rdx 4379 mulxq %r10,%r10,%rax 4380 addq %r10,%r14 4381 mulxq %r11,%r11,%r9 4382 adcq %r11,%r15 4383 adcq $0,%r9 4384 imulq %r12,%rdx 4385 vpalignr $8,%ymm10,%ymm10,%ymm10 4386 vpalignr $12,%ymm14,%ymm14,%ymm14 4387 vpalignr $4,%ymm5,%ymm5,%ymm5 4388 vpalignr $8,%ymm9,%ymm9,%ymm9 4389 vpalignr $12,%ymm13,%ymm13,%ymm13 4390 vpalignr $4,%ymm4,%ymm4,%ymm4 4391 vpalignr $8,%ymm8,%ymm8,%ymm8 4392 vpalignr $12,%ymm12,%ymm12,%ymm12 4393 vmovdqa %ymm8,128(%rbp) 4394 vmovdqa .rol16(%rip),%ymm8 4395 vpaddd %ymm7,%ymm3,%ymm3 4396 vpaddd %ymm6,%ymm2,%ymm2 4397 vpaddd %ymm5,%ymm1,%ymm1 4398 vpaddd %ymm4,%ymm0,%ymm0 4399 vpxor %ymm3,%ymm15,%ymm15 4400 vpxor %ymm2,%ymm14,%ymm14 4401 vpxor %ymm1,%ymm13,%ymm13 4402 vpxor %ymm0,%ymm12,%ymm12 4403 addq %rax,%r15 4404 adcq %rdx,%r9 4405 vpshufb %ymm8,%ymm15,%ymm15 4406 vpshufb %ymm8,%ymm14,%ymm14 4407 vpshufb %ymm8,%ymm13,%ymm13 4408 vpshufb %ymm8,%ymm12,%ymm12 4409 vmovdqa 128(%rbp),%ymm8 4410 vpaddd %ymm15,%ymm11,%ymm11 4411 vpaddd %ymm14,%ymm10,%ymm10 4412 vpaddd %ymm13,%ymm9,%ymm9 4413 vpaddd %ymm12,%ymm8,%ymm8 4414 movq %r13,%r10 4415 movq %r14,%r11 4416 movq %r15,%r12 4417 andq $3,%r12 4418 movq %r15,%r13 4419 andq $-4,%r13 4420 movq %r9,%r14 4421 shrdq $2,%r9,%r15 4422 shrq $2,%r9 4423 addq %r13,%r10 4424 adcq %r14,%r11 4425 adcq $0,%r12 4426 addq %r15,%r10 4427 adcq %r9,%r11 4428 adcq $0,%r12 4429 vpxor %ymm11,%ymm7,%ymm7 4430 vpxor %ymm10,%ymm6,%ymm6 4431 vpxor %ymm9,%ymm5,%ymm5 4432 vpxor %ymm8,%ymm4,%ymm4 4433 vmovdqa %ymm8,128(%rbp) 4434 vpsrld $20,%ymm7,%ymm8 4435 vpslld $32-20,%ymm7,%ymm7 4436 vpxor %ymm8,%ymm7,%ymm7 4437 addq 4*8(%rsi,%rcx), %r10 4438 adcq 8+4*8(%rsi,%rcx), %r11 4439 adcq $1,%r12 4440 4441 leaq 48(%rcx),%rcx 4442 vpsrld $20,%ymm6,%ymm8 4443 vpslld $32-20,%ymm6,%ymm6 4444 vpxor %ymm8,%ymm6,%ymm6 4445 vpsrld $20,%ymm5,%ymm8 4446 vpslld $32-20,%ymm5,%ymm5 4447 vpxor %ymm8,%ymm5,%ymm5 4448 vpsrld $20,%ymm4,%ymm8 4449 vpslld $32-20,%ymm4,%ymm4 4450 vpxor %ymm8,%ymm4,%ymm4 4451 vmovdqa .rol8(%rip),%ymm8 4452 vpaddd %ymm7,%ymm3,%ymm3 4453 vpaddd %ymm6,%ymm2,%ymm2 4454 vpaddd %ymm5,%ymm1,%ymm1 4455 vpaddd %ymm4,%ymm0,%ymm0 4456 vpxor %ymm3,%ymm15,%ymm15 4457 vpxor %ymm2,%ymm14,%ymm14 4458 vpxor %ymm1,%ymm13,%ymm13 4459 vpxor %ymm0,%ymm12,%ymm12 4460 movq 0+0(%rbp),%rdx 4461 movq %rdx,%r15 4462 mulxq %r10,%r13,%r14 4463 mulxq %r11,%rax,%rdx 4464 imulq %r12,%r15 4465 addq %rax,%r14 4466 adcq %rdx,%r15 4467 vpshufb %ymm8,%ymm15,%ymm15 4468 vpshufb %ymm8,%ymm14,%ymm14 4469 vpshufb %ymm8,%ymm13,%ymm13 4470 vpshufb %ymm8,%ymm12,%ymm12 4471 vmovdqa 128(%rbp),%ymm8 4472 vpaddd %ymm15,%ymm11,%ymm11 4473 vpaddd %ymm14,%ymm10,%ymm10 4474 vpaddd %ymm13,%ymm9,%ymm9 4475 movq 8+0(%rbp),%rdx 4476 mulxq %r10,%r10,%rax 4477 addq %r10,%r14 4478 mulxq %r11,%r11,%r9 4479 adcq %r11,%r15 4480 adcq $0,%r9 4481 imulq %r12,%rdx 4482 vpaddd %ymm12,%ymm8,%ymm8 4483 vpxor %ymm11,%ymm7,%ymm7 4484 vpxor %ymm10,%ymm6,%ymm6 4485 vpxor %ymm9,%ymm5,%ymm5 4486 vpxor %ymm8,%ymm4,%ymm4 4487 vmovdqa %ymm8,128(%rbp) 4488 vpsrld $25,%ymm7,%ymm8 4489 vpslld $32-25,%ymm7,%ymm7 4490 addq %rax,%r15 4491 adcq %rdx,%r9 4492 vpxor %ymm8,%ymm7,%ymm7 4493 vpsrld $25,%ymm6,%ymm8 4494 vpslld $32-25,%ymm6,%ymm6 4495 vpxor %ymm8,%ymm6,%ymm6 4496 vpsrld $25,%ymm5,%ymm8 4497 vpslld $32-25,%ymm5,%ymm5 4498 vpxor %ymm8,%ymm5,%ymm5 4499 vpsrld $25,%ymm4,%ymm8 4500 vpslld $32-25,%ymm4,%ymm4 4501 vpxor %ymm8,%ymm4,%ymm4 4502 vmovdqa 128(%rbp),%ymm8 4503 vpalignr $12,%ymm7,%ymm7,%ymm7 4504 vpalignr $8,%ymm11,%ymm11,%ymm11 4505 vpalignr $4,%ymm15,%ymm15,%ymm15 4506 vpalignr $12,%ymm6,%ymm6,%ymm6 4507 vpalignr $8,%ymm10,%ymm10,%ymm10 4508 vpalignr $4,%ymm14,%ymm14,%ymm14 4509 vpalignr $12,%ymm5,%ymm5,%ymm5 4510 movq %r13,%r10 4511 movq %r14,%r11 4512 movq %r15,%r12 4513 andq $3,%r12 4514 movq %r15,%r13 4515 andq $-4,%r13 4516 movq %r9,%r14 4517 shrdq $2,%r9,%r15 4518 shrq $2,%r9 4519 addq %r13,%r10 4520 adcq %r14,%r11 4521 adcq $0,%r12 4522 addq %r15,%r10 4523 adcq %r9,%r11 4524 adcq $0,%r12 4525 vpalignr $8,%ymm9,%ymm9,%ymm9 4526 vpalignr $4,%ymm13,%ymm13,%ymm13 4527 vpalignr $12,%ymm4,%ymm4,%ymm4 4528 vpalignr $8,%ymm8,%ymm8,%ymm8 4529 vpalignr $4,%ymm12,%ymm12,%ymm12 4530 4531 cmpq $60*8,%rcx 4532 jne 2b 4533 vpaddd .chacha20_consts(%rip),%ymm3,%ymm3 4534 vpaddd 64(%rbp),%ymm7,%ymm7 4535 vpaddd 96(%rbp),%ymm11,%ymm11 4536 vpaddd 256(%rbp),%ymm15,%ymm15 4537 vpaddd .chacha20_consts(%rip),%ymm2,%ymm2 4538 vpaddd 64(%rbp),%ymm6,%ymm6 4539 vpaddd 96(%rbp),%ymm10,%ymm10 4540 vpaddd 224(%rbp),%ymm14,%ymm14 4541 vpaddd .chacha20_consts(%rip),%ymm1,%ymm1 4542 vpaddd 64(%rbp),%ymm5,%ymm5 4543 vpaddd 96(%rbp),%ymm9,%ymm9 4544 vpaddd 192(%rbp),%ymm13,%ymm13 4545 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0 4546 vpaddd 64(%rbp),%ymm4,%ymm4 4547 vpaddd 96(%rbp),%ymm8,%ymm8 4548 vpaddd 160(%rbp),%ymm12,%ymm12 4549 4550 vmovdqa %ymm0,128(%rbp) 4551 addq 60*8(%rsi),%r10 4552 adcq 8+60*8(%rsi),%r11 4553 adcq $1,%r12 4554 vperm2i128 $0x02,%ymm3,%ymm7,%ymm0 4555 vperm2i128 $0x13,%ymm3,%ymm7,%ymm7 4556 vperm2i128 $0x02,%ymm11,%ymm15,%ymm3 4557 vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 4558 vpxor 0+0(%rsi),%ymm0,%ymm0 4559 vpxor 32+0(%rsi),%ymm3,%ymm3 4560 vpxor 64+0(%rsi),%ymm7,%ymm7 4561 vpxor 96+0(%rsi),%ymm11,%ymm11 4562 vmovdqu %ymm0,0+0(%rdi) 4563 vmovdqu %ymm3,32+0(%rdi) 4564 vmovdqu %ymm7,64+0(%rdi) 4565 vmovdqu %ymm11,96+0(%rdi) 4566 4567 vmovdqa 128(%rbp),%ymm0 4568 movq 0+0(%rbp),%rax 4569 movq %rax,%r15 4570 mulq %r10 4571 movq %rax,%r13 4572 movq %rdx,%r14 4573 movq 0+0(%rbp),%rax 4574 mulq %r11 4575 imulq %r12,%r15 4576 addq %rax,%r14 4577 adcq %rdx,%r15 4578 movq 8+0(%rbp),%rax 4579 movq %rax,%r9 4580 mulq %r10 4581 addq %rax,%r14 4582 adcq $0,%rdx 4583 movq %rdx,%r10 4584 movq 8+0(%rbp),%rax 4585 mulq %r11 4586 addq %rax,%r15 4587 adcq $0,%rdx 4588 imulq %r12,%r9 4589 addq %r10,%r15 4590 adcq %rdx,%r9 4591 movq %r13,%r10 4592 movq %r14,%r11 4593 movq %r15,%r12 4594 andq $3,%r12 4595 movq %r15,%r13 4596 andq $-4,%r13 4597 movq %r9,%r14 4598 shrdq $2,%r9,%r15 4599 shrq $2,%r9 4600 addq %r13,%r10 4601 adcq %r14,%r11 4602 adcq $0,%r12 4603 addq %r15,%r10 4604 adcq %r9,%r11 4605 adcq $0,%r12 4606 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 4607 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 4608 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 4609 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 4610 vpxor 0+128(%rsi),%ymm3,%ymm3 4611 vpxor 32+128(%rsi),%ymm2,%ymm2 4612 vpxor 64+128(%rsi),%ymm6,%ymm6 4613 vpxor 96+128(%rsi),%ymm10,%ymm10 4614 vmovdqu %ymm3,0+128(%rdi) 4615 vmovdqu %ymm2,32+128(%rdi) 4616 vmovdqu %ymm6,64+128(%rdi) 4617 vmovdqu %ymm10,96+128(%rdi) 4618 addq 60*8+16(%rsi),%r10 4619 adcq 8+60*8+16(%rsi),%r11 4620 adcq $1,%r12 4621 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 4622 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 4623 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 4624 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 4625 vpxor 0+256(%rsi),%ymm3,%ymm3 4626 vpxor 32+256(%rsi),%ymm1,%ymm1 4627 vpxor 64+256(%rsi),%ymm5,%ymm5 4628 vpxor 96+256(%rsi),%ymm9,%ymm9 4629 vmovdqu %ymm3,0+256(%rdi) 4630 vmovdqu %ymm1,32+256(%rdi) 4631 vmovdqu %ymm5,64+256(%rdi) 4632 vmovdqu %ymm9,96+256(%rdi) 4633 movq 0+0(%rbp),%rax 4634 movq %rax,%r15 4635 mulq %r10 4636 movq %rax,%r13 4637 movq %rdx,%r14 4638 movq 0+0(%rbp),%rax 4639 mulq %r11 4640 imulq %r12,%r15 4641 addq %rax,%r14 4642 adcq %rdx,%r15 4643 movq 8+0(%rbp),%rax 4644 movq %rax,%r9 4645 mulq %r10 4646 addq %rax,%r14 4647 adcq $0,%rdx 4648 movq %rdx,%r10 4649 movq 8+0(%rbp),%rax 4650 mulq %r11 4651 addq %rax,%r15 4652 adcq $0,%rdx 4653 imulq %r12,%r9 4654 addq %r10,%r15 4655 adcq %rdx,%r9 4656 movq %r13,%r10 4657 movq %r14,%r11 4658 movq %r15,%r12 4659 andq $3,%r12 4660 movq %r15,%r13 4661 andq $-4,%r13 4662 movq %r9,%r14 4663 shrdq $2,%r9,%r15 4664 shrq $2,%r9 4665 addq %r13,%r10 4666 adcq %r14,%r11 4667 adcq $0,%r12 4668 addq %r15,%r10 4669 adcq %r9,%r11 4670 adcq $0,%r12 4671 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 4672 vperm2i128 $0x13,%ymm0,%ymm4,%ymm4 4673 vperm2i128 $0x02,%ymm8,%ymm12,%ymm0 4674 vperm2i128 $0x13,%ymm8,%ymm12,%ymm8 4675 vpxor 0+384(%rsi),%ymm3,%ymm3 4676 vpxor 32+384(%rsi),%ymm0,%ymm0 4677 vpxor 64+384(%rsi),%ymm4,%ymm4 4678 vpxor 96+384(%rsi),%ymm8,%ymm8 4679 vmovdqu %ymm3,0+384(%rdi) 4680 vmovdqu %ymm0,32+384(%rdi) 4681 vmovdqu %ymm4,64+384(%rdi) 4682 vmovdqu %ymm8,96+384(%rdi) 4683 4684 leaq 512(%rsi),%rsi 4685 leaq 512(%rdi),%rdi 4686 subq $512,%rbx 4687 jmp 1b 4688 3: 4689 testq %rbx,%rbx 4690 vzeroupper 4691 je open_sse_finalize 4692 3: 4693 cmpq $128,%rbx 4694 ja 3f 4695 vmovdqa .chacha20_consts(%rip),%ymm0 4696 vmovdqa 64(%rbp),%ymm4 4697 vmovdqa 96(%rbp),%ymm8 4698 vmovdqa .avx2_inc(%rip),%ymm12 4699 vpaddd 160(%rbp),%ymm12,%ymm12 4700 vmovdqa %ymm12,160(%rbp) 4701 4702 xorq %r8,%r8 4703 movq %rbx,%rcx 4704 andq $-16,%rcx 4705 testq %rcx,%rcx 4706 je 2f 4707 1: 4708 addq 0*8(%rsi,%r8), %r10 4709 adcq 8+0*8(%rsi,%r8), %r11 4710 adcq $1,%r12 4711 movq 0+0(%rbp),%rax 4712 movq %rax,%r15 4713 mulq %r10 4714 movq %rax,%r13 4715 movq %rdx,%r14 4716 movq 0+0(%rbp),%rax 4717 mulq %r11 4718 imulq %r12,%r15 4719 addq %rax,%r14 4720 adcq %rdx,%r15 4721 movq 8+0(%rbp),%rax 4722 movq %rax,%r9 4723 mulq %r10 4724 addq %rax,%r14 4725 adcq $0,%rdx 4726 movq %rdx,%r10 4727 movq 8+0(%rbp),%rax 4728 mulq %r11 4729 addq %rax,%r15 4730 adcq $0,%rdx 4731 imulq %r12,%r9 4732 addq %r10,%r15 4733 adcq %rdx,%r9 4734 movq %r13,%r10 4735 movq %r14,%r11 4736 movq %r15,%r12 4737 andq $3,%r12 4738 movq %r15,%r13 4739 andq $-4,%r13 4740 movq %r9,%r14 4741 shrdq $2,%r9,%r15 4742 shrq $2,%r9 4743 addq %r13,%r10 4744 adcq %r14,%r11 4745 adcq $0,%r12 4746 addq %r15,%r10 4747 adcq %r9,%r11 4748 adcq $0,%r12 4749 4750 2: 4751 addq $16,%r8 4752 vpaddd %ymm4,%ymm0,%ymm0 4753 vpxor %ymm0,%ymm12,%ymm12 4754 vpshufb .rol16(%rip),%ymm12,%ymm12 4755 vpaddd %ymm12,%ymm8,%ymm8 4756 vpxor %ymm8,%ymm4,%ymm4 4757 vpsrld $20,%ymm4,%ymm3 4758 vpslld $12,%ymm4,%ymm4 4759 vpxor %ymm3,%ymm4,%ymm4 4760 vpaddd %ymm4,%ymm0,%ymm0 4761 vpxor %ymm0,%ymm12,%ymm12 4762 vpshufb .rol8(%rip),%ymm12,%ymm12 4763 vpaddd %ymm12,%ymm8,%ymm8 4764 vpxor %ymm8,%ymm4,%ymm4 4765 vpslld $7,%ymm4,%ymm3 4766 vpsrld $25,%ymm4,%ymm4 4767 vpxor %ymm3,%ymm4,%ymm4 4768 vpalignr $12,%ymm12,%ymm12,%ymm12 4769 vpalignr $8,%ymm8,%ymm8,%ymm8 4770 vpalignr $4,%ymm4,%ymm4,%ymm4 4771 vpaddd %ymm4,%ymm0,%ymm0 4772 vpxor %ymm0,%ymm12,%ymm12 4773 vpshufb .rol16(%rip),%ymm12,%ymm12 4774 vpaddd %ymm12,%ymm8,%ymm8 4775 vpxor %ymm8,%ymm4,%ymm4 4776 vpsrld $20,%ymm4,%ymm3 4777 vpslld $12,%ymm4,%ymm4 4778 vpxor %ymm3,%ymm4,%ymm4 4779 vpaddd %ymm4,%ymm0,%ymm0 4780 vpxor %ymm0,%ymm12,%ymm12 4781 vpshufb .rol8(%rip),%ymm12,%ymm12 4782 vpaddd %ymm12,%ymm8,%ymm8 4783 vpxor %ymm8,%ymm4,%ymm4 4784 vpslld $7,%ymm4,%ymm3 4785 vpsrld $25,%ymm4,%ymm4 4786 vpxor %ymm3,%ymm4,%ymm4 4787 vpalignr $4,%ymm12,%ymm12,%ymm12 4788 vpalignr $8,%ymm8,%ymm8,%ymm8 4789 vpalignr $12,%ymm4,%ymm4,%ymm4 4790 4791 cmpq %rcx,%r8 4792 jb 1b 4793 cmpq $160,%r8 4794 jne 2b 4795 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0 4796 vpaddd 64(%rbp),%ymm4,%ymm4 4797 vpaddd 96(%rbp),%ymm8,%ymm8 4798 vpaddd 160(%rbp),%ymm12,%ymm12 4799 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 4800 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 4801 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 4802 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 4803 vmovdqa %ymm3,%ymm8 4804 4805 jmp open_avx2_tail_loop 4806 3: 4807 cmpq $256,%rbx 4808 ja 3f 4809 vmovdqa .chacha20_consts(%rip),%ymm0 4810 vmovdqa 64(%rbp),%ymm4 4811 vmovdqa 96(%rbp),%ymm8 4812 vmovdqa %ymm0,%ymm1 4813 vmovdqa %ymm4,%ymm5 4814 vmovdqa %ymm8,%ymm9 4815 vmovdqa .avx2_inc(%rip),%ymm12 4816 vpaddd 160(%rbp),%ymm12,%ymm13 4817 vpaddd %ymm13,%ymm12,%ymm12 4818 vmovdqa %ymm12,160(%rbp) 4819 vmovdqa %ymm13,192(%rbp) 4820 4821 movq %rbx,128(%rbp) 4822 movq %rbx,%rcx 4823 subq $128,%rcx 4824 shrq $4,%rcx 4825 movq $10,%r8 4826 cmpq $10,%rcx 4827 cmovgq %r8,%rcx 4828 movq %rsi,%rbx 4829 xorq %r8,%r8 4830 1: 4831 addq 0(%rbx),%r10 4832 adcq 8+0(%rbx),%r11 4833 adcq $1,%r12 4834 movq 0+0(%rbp),%rdx 4835 movq %rdx,%r15 4836 mulxq %r10,%r13,%r14 4837 mulxq %r11,%rax,%rdx 4838 imulq %r12,%r15 4839 addq %rax,%r14 4840 adcq %rdx,%r15 4841 movq 8+0(%rbp),%rdx 4842 mulxq %r10,%r10,%rax 4843 addq %r10,%r14 4844 mulxq %r11,%r11,%r9 4845 adcq %r11,%r15 4846 adcq $0,%r9 4847 imulq %r12,%rdx 4848 addq %rax,%r15 4849 adcq %rdx,%r9 4850 movq %r13,%r10 4851 movq %r14,%r11 4852 movq %r15,%r12 4853 andq $3,%r12 4854 movq %r15,%r13 4855 andq $-4,%r13 4856 movq %r9,%r14 4857 shrdq $2,%r9,%r15 4858 shrq $2,%r9 4859 addq %r13,%r10 4860 adcq %r14,%r11 4861 adcq $0,%r12 4862 addq %r15,%r10 4863 adcq %r9,%r11 4864 adcq $0,%r12 4865 4866 leaq 16(%rbx),%rbx 4867 2: 4868 vpaddd %ymm4,%ymm0,%ymm0 4869 vpxor %ymm0,%ymm12,%ymm12 4870 vpshufb .rol16(%rip),%ymm12,%ymm12 4871 vpaddd %ymm12,%ymm8,%ymm8 4872 vpxor %ymm8,%ymm4,%ymm4 4873 vpsrld $20,%ymm4,%ymm3 4874 vpslld $12,%ymm4,%ymm4 4875 vpxor %ymm3,%ymm4,%ymm4 4876 vpaddd %ymm4,%ymm0,%ymm0 4877 vpxor %ymm0,%ymm12,%ymm12 4878 vpshufb .rol8(%rip),%ymm12,%ymm12 4879 vpaddd %ymm12,%ymm8,%ymm8 4880 vpxor %ymm8,%ymm4,%ymm4 4881 vpslld $7,%ymm4,%ymm3 4882 vpsrld $25,%ymm4,%ymm4 4883 vpxor %ymm3,%ymm4,%ymm4 4884 vpalignr $12,%ymm12,%ymm12,%ymm12 4885 vpalignr $8,%ymm8,%ymm8,%ymm8 4886 vpalignr $4,%ymm4,%ymm4,%ymm4 4887 vpaddd %ymm5,%ymm1,%ymm1 4888 vpxor %ymm1,%ymm13,%ymm13 4889 vpshufb .rol16(%rip),%ymm13,%ymm13 4890 vpaddd %ymm13,%ymm9,%ymm9 4891 vpxor %ymm9,%ymm5,%ymm5 4892 vpsrld $20,%ymm5,%ymm3 4893 vpslld $12,%ymm5,%ymm5 4894 vpxor %ymm3,%ymm5,%ymm5 4895 vpaddd %ymm5,%ymm1,%ymm1 4896 vpxor %ymm1,%ymm13,%ymm13 4897 vpshufb .rol8(%rip),%ymm13,%ymm13 4898 vpaddd %ymm13,%ymm9,%ymm9 4899 vpxor %ymm9,%ymm5,%ymm5 4900 vpslld $7,%ymm5,%ymm3 4901 vpsrld $25,%ymm5,%ymm5 4902 vpxor %ymm3,%ymm5,%ymm5 4903 vpalignr $12,%ymm13,%ymm13,%ymm13 4904 vpalignr $8,%ymm9,%ymm9,%ymm9 4905 vpalignr $4,%ymm5,%ymm5,%ymm5 4906 4907 incq %r8 4908 vpaddd %ymm4,%ymm0,%ymm0 4909 vpxor %ymm0,%ymm12,%ymm12 4910 vpshufb .rol16(%rip),%ymm12,%ymm12 4911 vpaddd %ymm12,%ymm8,%ymm8 4912 vpxor %ymm8,%ymm4,%ymm4 4913 vpsrld $20,%ymm4,%ymm3 4914 vpslld $12,%ymm4,%ymm4 4915 vpxor %ymm3,%ymm4,%ymm4 4916 vpaddd %ymm4,%ymm0,%ymm0 4917 vpxor %ymm0,%ymm12,%ymm12 4918 vpshufb .rol8(%rip),%ymm12,%ymm12 4919 vpaddd %ymm12,%ymm8,%ymm8 4920 vpxor %ymm8,%ymm4,%ymm4 4921 vpslld $7,%ymm4,%ymm3 4922 vpsrld $25,%ymm4,%ymm4 4923 vpxor %ymm3,%ymm4,%ymm4 4924 vpalignr $4,%ymm12,%ymm12,%ymm12 4925 vpalignr $8,%ymm8,%ymm8,%ymm8 4926 vpalignr $12,%ymm4,%ymm4,%ymm4 4927 vpaddd %ymm5,%ymm1,%ymm1 4928 vpxor %ymm1,%ymm13,%ymm13 4929 vpshufb .rol16(%rip),%ymm13,%ymm13 4930 vpaddd %ymm13,%ymm9,%ymm9 4931 vpxor %ymm9,%ymm5,%ymm5 4932 vpsrld $20,%ymm5,%ymm3 4933 vpslld $12,%ymm5,%ymm5 4934 vpxor %ymm3,%ymm5,%ymm5 4935 vpaddd %ymm5,%ymm1,%ymm1 4936 vpxor %ymm1,%ymm13,%ymm13 4937 vpshufb .rol8(%rip),%ymm13,%ymm13 4938 vpaddd %ymm13,%ymm9,%ymm9 4939 vpxor %ymm9,%ymm5,%ymm5 4940 vpslld $7,%ymm5,%ymm3 4941 vpsrld $25,%ymm5,%ymm5 4942 vpxor %ymm3,%ymm5,%ymm5 4943 vpalignr $4,%ymm13,%ymm13,%ymm13 4944 vpalignr $8,%ymm9,%ymm9,%ymm9 4945 vpalignr $12,%ymm5,%ymm5,%ymm5 4946 vpaddd %ymm6,%ymm2,%ymm2 4947 vpxor %ymm2,%ymm14,%ymm14 4948 vpshufb .rol16(%rip),%ymm14,%ymm14 4949 vpaddd %ymm14,%ymm10,%ymm10 4950 vpxor %ymm10,%ymm6,%ymm6 4951 vpsrld $20,%ymm6,%ymm3 4952 vpslld $12,%ymm6,%ymm6 4953 vpxor %ymm3,%ymm6,%ymm6 4954 vpaddd %ymm6,%ymm2,%ymm2 4955 vpxor %ymm2,%ymm14,%ymm14 4956 vpshufb .rol8(%rip),%ymm14,%ymm14 4957 vpaddd %ymm14,%ymm10,%ymm10 4958 vpxor %ymm10,%ymm6,%ymm6 4959 vpslld $7,%ymm6,%ymm3 4960 vpsrld $25,%ymm6,%ymm6 4961 vpxor %ymm3,%ymm6,%ymm6 4962 vpalignr $4,%ymm14,%ymm14,%ymm14 4963 vpalignr $8,%ymm10,%ymm10,%ymm10 4964 vpalignr $12,%ymm6,%ymm6,%ymm6 4965 4966 cmpq %rcx,%r8 4967 jb 1b 4968 cmpq $10,%r8 4969 jne 2b 4970 movq %rbx,%r8 4971 subq %rsi,%rbx 4972 movq %rbx,%rcx 4973 movq 128(%rbp),%rbx 4974 1: 4975 addq $16,%rcx 4976 cmpq %rbx,%rcx 4977 jg 1f 4978 addq 0(%r8),%r10 4979 adcq 8+0(%r8),%r11 4980 adcq $1,%r12 4981 movq 0+0(%rbp),%rdx 4982 movq %rdx,%r15 4983 mulxq %r10,%r13,%r14 4984 mulxq %r11,%rax,%rdx 4985 imulq %r12,%r15 4986 addq %rax,%r14 4987 adcq %rdx,%r15 4988 movq 8+0(%rbp),%rdx 4989 mulxq %r10,%r10,%rax 4990 addq %r10,%r14 4991 mulxq %r11,%r11,%r9 4992 adcq %r11,%r15 4993 adcq $0,%r9 4994 imulq %r12,%rdx 4995 addq %rax,%r15 4996 adcq %rdx,%r9 4997 movq %r13,%r10 4998 movq %r14,%r11 4999 movq %r15,%r12 5000 andq $3,%r12 5001 movq %r15,%r13 5002 andq $-4,%r13 5003 movq %r9,%r14 5004 shrdq $2,%r9,%r15 5005 shrq $2,%r9 5006 addq %r13,%r10 5007 adcq %r14,%r11 5008 adcq $0,%r12 5009 addq %r15,%r10 5010 adcq %r9,%r11 5011 adcq $0,%r12 5012 5013 leaq 16(%r8),%r8 5014 jmp 1b 5015 1: 5016 vpaddd .chacha20_consts(%rip),%ymm1,%ymm1 5017 vpaddd 64(%rbp),%ymm5,%ymm5 5018 vpaddd 96(%rbp),%ymm9,%ymm9 5019 vpaddd 192(%rbp),%ymm13,%ymm13 5020 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0 5021 vpaddd 64(%rbp),%ymm4,%ymm4 5022 vpaddd 96(%rbp),%ymm8,%ymm8 5023 vpaddd 160(%rbp),%ymm12,%ymm12 5024 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 5025 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 5026 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 5027 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 5028 vpxor 0+0(%rsi),%ymm3,%ymm3 5029 vpxor 32+0(%rsi),%ymm1,%ymm1 5030 vpxor 64+0(%rsi),%ymm5,%ymm5 5031 vpxor 96+0(%rsi),%ymm9,%ymm9 5032 vmovdqu %ymm3,0+0(%rdi) 5033 vmovdqu %ymm1,32+0(%rdi) 5034 vmovdqu %ymm5,64+0(%rdi) 5035 vmovdqu %ymm9,96+0(%rdi) 5036 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 5037 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 5038 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 5039 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 5040 vmovdqa %ymm3,%ymm8 5041 5042 leaq 128(%rsi),%rsi 5043 leaq 128(%rdi),%rdi 5044 subq $128,%rbx 5045 jmp open_avx2_tail_loop 5046 3: 5047 cmpq $384,%rbx 5048 ja 3f 5049 vmovdqa .chacha20_consts(%rip),%ymm0 5050 vmovdqa 64(%rbp),%ymm4 5051 vmovdqa 96(%rbp),%ymm8 5052 vmovdqa %ymm0,%ymm1 5053 vmovdqa %ymm4,%ymm5 5054 vmovdqa %ymm8,%ymm9 5055 vmovdqa %ymm0,%ymm2 5056 vmovdqa %ymm4,%ymm6 5057 vmovdqa %ymm8,%ymm10 5058 vmovdqa .avx2_inc(%rip),%ymm12 5059 vpaddd 160(%rbp),%ymm12,%ymm14 5060 vpaddd %ymm14,%ymm12,%ymm13 5061 vpaddd %ymm13,%ymm12,%ymm12 5062 vmovdqa %ymm12,160(%rbp) 5063 vmovdqa %ymm13,192(%rbp) 5064 vmovdqa %ymm14,224(%rbp) 5065 5066 movq %rbx,128(%rbp) 5067 movq %rbx,%rcx 5068 subq $256,%rcx 5069 shrq $4,%rcx 5070 addq $6,%rcx 5071 movq $10,%r8 5072 cmpq $10,%rcx 5073 cmovgq %r8,%rcx 5074 movq %rsi,%rbx 5075 xorq %r8,%r8 5076 1: 5077 addq 0(%rbx),%r10 5078 adcq 8+0(%rbx),%r11 5079 adcq $1,%r12 5080 movq 0+0(%rbp),%rdx 5081 movq %rdx,%r15 5082 mulxq %r10,%r13,%r14 5083 mulxq %r11,%rax,%rdx 5084 imulq %r12,%r15 5085 addq %rax,%r14 5086 adcq %rdx,%r15 5087 movq 8+0(%rbp),%rdx 5088 mulxq %r10,%r10,%rax 5089 addq %r10,%r14 5090 mulxq %r11,%r11,%r9 5091 adcq %r11,%r15 5092 adcq $0,%r9 5093 imulq %r12,%rdx 5094 addq %rax,%r15 5095 adcq %rdx,%r9 5096 movq %r13,%r10 5097 movq %r14,%r11 5098 movq %r15,%r12 5099 andq $3,%r12 5100 movq %r15,%r13 5101 andq $-4,%r13 5102 movq %r9,%r14 5103 shrdq $2,%r9,%r15 5104 shrq $2,%r9 5105 addq %r13,%r10 5106 adcq %r14,%r11 5107 adcq $0,%r12 5108 addq %r15,%r10 5109 adcq %r9,%r11 5110 adcq $0,%r12 5111 5112 leaq 16(%rbx),%rbx 5113 2: 5114 vpaddd %ymm6,%ymm2,%ymm2 5115 vpxor %ymm2,%ymm14,%ymm14 5116 vpshufb .rol16(%rip),%ymm14,%ymm14 5117 vpaddd %ymm14,%ymm10,%ymm10 5118 vpxor %ymm10,%ymm6,%ymm6 5119 vpsrld $20,%ymm6,%ymm3 5120 vpslld $12,%ymm6,%ymm6 5121 vpxor %ymm3,%ymm6,%ymm6 5122 vpaddd %ymm6,%ymm2,%ymm2 5123 vpxor %ymm2,%ymm14,%ymm14 5124 vpshufb .rol8(%rip),%ymm14,%ymm14 5125 vpaddd %ymm14,%ymm10,%ymm10 5126 vpxor %ymm10,%ymm6,%ymm6 5127 vpslld $7,%ymm6,%ymm3 5128 vpsrld $25,%ymm6,%ymm6 5129 vpxor %ymm3,%ymm6,%ymm6 5130 vpalignr $12,%ymm14,%ymm14,%ymm14 5131 vpalignr $8,%ymm10,%ymm10,%ymm10 5132 vpalignr $4,%ymm6,%ymm6,%ymm6 5133 vpaddd %ymm5,%ymm1,%ymm1 5134 vpxor %ymm1,%ymm13,%ymm13 5135 vpshufb .rol16(%rip),%ymm13,%ymm13 5136 vpaddd %ymm13,%ymm9,%ymm9 5137 vpxor %ymm9,%ymm5,%ymm5 5138 vpsrld $20,%ymm5,%ymm3 5139 vpslld $12,%ymm5,%ymm5 5140 vpxor %ymm3,%ymm5,%ymm5 5141 vpaddd %ymm5,%ymm1,%ymm1 5142 vpxor %ymm1,%ymm13,%ymm13 5143 vpshufb .rol8(%rip),%ymm13,%ymm13 5144 vpaddd %ymm13,%ymm9,%ymm9 5145 vpxor %ymm9,%ymm5,%ymm5 5146 vpslld $7,%ymm5,%ymm3 5147 vpsrld $25,%ymm5,%ymm5 5148 vpxor %ymm3,%ymm5,%ymm5 5149 vpalignr $12,%ymm13,%ymm13,%ymm13 5150 vpalignr $8,%ymm9,%ymm9,%ymm9 5151 vpalignr $4,%ymm5,%ymm5,%ymm5 5152 vpaddd %ymm4,%ymm0,%ymm0 5153 vpxor %ymm0,%ymm12,%ymm12 5154 vpshufb .rol16(%rip),%ymm12,%ymm12 5155 vpaddd %ymm12,%ymm8,%ymm8 5156 vpxor %ymm8,%ymm4,%ymm4 5157 vpsrld $20,%ymm4,%ymm3 5158 vpslld $12,%ymm4,%ymm4 5159 vpxor %ymm3,%ymm4,%ymm4 5160 vpaddd %ymm4,%ymm0,%ymm0 5161 vpxor %ymm0,%ymm12,%ymm12 5162 vpshufb .rol8(%rip),%ymm12,%ymm12 5163 vpaddd %ymm12,%ymm8,%ymm8 5164 vpxor %ymm8,%ymm4,%ymm4 5165 vpslld $7,%ymm4,%ymm3 5166 vpsrld $25,%ymm4,%ymm4 5167 vpxor %ymm3,%ymm4,%ymm4 5168 vpalignr $12,%ymm12,%ymm12,%ymm12 5169 vpalignr $8,%ymm8,%ymm8,%ymm8 5170 vpalignr $4,%ymm4,%ymm4,%ymm4 5171 addq 0(%rbx),%r10 5172 adcq 8+0(%rbx),%r11 5173 adcq $1,%r12 5174 movq 0+0(%rbp),%rax 5175 movq %rax,%r15 5176 mulq %r10 5177 movq %rax,%r13 5178 movq %rdx,%r14 5179 movq 0+0(%rbp),%rax 5180 mulq %r11 5181 imulq %r12,%r15 5182 addq %rax,%r14 5183 adcq %rdx,%r15 5184 movq 8+0(%rbp),%rax 5185 movq %rax,%r9 5186 mulq %r10 5187 addq %rax,%r14 5188 adcq $0,%rdx 5189 movq %rdx,%r10 5190 movq 8+0(%rbp),%rax 5191 mulq %r11 5192 addq %rax,%r15 5193 adcq $0,%rdx 5194 imulq %r12,%r9 5195 addq %r10,%r15 5196 adcq %rdx,%r9 5197 movq %r13,%r10 5198 movq %r14,%r11 5199 movq %r15,%r12 5200 andq $3,%r12 5201 movq %r15,%r13 5202 andq $-4,%r13 5203 movq %r9,%r14 5204 shrdq $2,%r9,%r15 5205 shrq $2,%r9 5206 addq %r13,%r10 5207 adcq %r14,%r11 5208 adcq $0,%r12 5209 addq %r15,%r10 5210 adcq %r9,%r11 5211 adcq $0,%r12 5212 5213 leaq 16(%rbx),%rbx 5214 incq %r8 5215 vpaddd %ymm6,%ymm2,%ymm2 5216 vpxor %ymm2,%ymm14,%ymm14 5217 vpshufb .rol16(%rip),%ymm14,%ymm14 5218 vpaddd %ymm14,%ymm10,%ymm10 5219 vpxor %ymm10,%ymm6,%ymm6 5220 vpsrld $20,%ymm6,%ymm3 5221 vpslld $12,%ymm6,%ymm6 5222 vpxor %ymm3,%ymm6,%ymm6 5223 vpaddd %ymm6,%ymm2,%ymm2 5224 vpxor %ymm2,%ymm14,%ymm14 5225 vpshufb .rol8(%rip),%ymm14,%ymm14 5226 vpaddd %ymm14,%ymm10,%ymm10 5227 vpxor %ymm10,%ymm6,%ymm6 5228 vpslld $7,%ymm6,%ymm3 5229 vpsrld $25,%ymm6,%ymm6 5230 vpxor %ymm3,%ymm6,%ymm6 5231 vpalignr $4,%ymm14,%ymm14,%ymm14 5232 vpalignr $8,%ymm10,%ymm10,%ymm10 5233 vpalignr $12,%ymm6,%ymm6,%ymm6 5234 vpaddd %ymm5,%ymm1,%ymm1 5235 vpxor %ymm1,%ymm13,%ymm13 5236 vpshufb .rol16(%rip),%ymm13,%ymm13 5237 vpaddd %ymm13,%ymm9,%ymm9 5238 vpxor %ymm9,%ymm5,%ymm5 5239 vpsrld $20,%ymm5,%ymm3 5240 vpslld $12,%ymm5,%ymm5 5241 vpxor %ymm3,%ymm5,%ymm5 5242 vpaddd %ymm5,%ymm1,%ymm1 5243 vpxor %ymm1,%ymm13,%ymm13 5244 vpshufb .rol8(%rip),%ymm13,%ymm13 5245 vpaddd %ymm13,%ymm9,%ymm9 5246 vpxor %ymm9,%ymm5,%ymm5 5247 vpslld $7,%ymm5,%ymm3 5248 vpsrld $25,%ymm5,%ymm5 5249 vpxor %ymm3,%ymm5,%ymm5 5250 vpalignr $4,%ymm13,%ymm13,%ymm13 5251 vpalignr $8,%ymm9,%ymm9,%ymm9 5252 vpalignr $12,%ymm5,%ymm5,%ymm5 5253 vpaddd %ymm4,%ymm0,%ymm0 5254 vpxor %ymm0,%ymm12,%ymm12 5255 vpshufb .rol16(%rip),%ymm12,%ymm12 5256 vpaddd %ymm12,%ymm8,%ymm8 5257 vpxor %ymm8,%ymm4,%ymm4 5258 vpsrld $20,%ymm4,%ymm3 5259 vpslld $12,%ymm4,%ymm4 5260 vpxor %ymm3,%ymm4,%ymm4 5261 vpaddd %ymm4,%ymm0,%ymm0 5262 vpxor %ymm0,%ymm12,%ymm12 5263 vpshufb .rol8(%rip),%ymm12,%ymm12 5264 vpaddd %ymm12,%ymm8,%ymm8 5265 vpxor %ymm8,%ymm4,%ymm4 5266 vpslld $7,%ymm4,%ymm3 5267 vpsrld $25,%ymm4,%ymm4 5268 vpxor %ymm3,%ymm4,%ymm4 5269 vpalignr $4,%ymm12,%ymm12,%ymm12 5270 vpalignr $8,%ymm8,%ymm8,%ymm8 5271 vpalignr $12,%ymm4,%ymm4,%ymm4 5272 5273 cmpq %rcx,%r8 5274 jb 1b 5275 cmpq $10,%r8 5276 jne 2b 5277 movq %rbx,%r8 5278 subq %rsi,%rbx 5279 movq %rbx,%rcx 5280 movq 128(%rbp),%rbx 5281 1: 5282 addq $16,%rcx 5283 cmpq %rbx,%rcx 5284 jg 1f 5285 addq 0(%r8),%r10 5286 adcq 8+0(%r8),%r11 5287 adcq $1,%r12 5288 movq 0+0(%rbp),%rdx 5289 movq %rdx,%r15 5290 mulxq %r10,%r13,%r14 5291 mulxq %r11,%rax,%rdx 5292 imulq %r12,%r15 5293 addq %rax,%r14 5294 adcq %rdx,%r15 5295 movq 8+0(%rbp),%rdx 5296 mulxq %r10,%r10,%rax 5297 addq %r10,%r14 5298 mulxq %r11,%r11,%r9 5299 adcq %r11,%r15 5300 adcq $0,%r9 5301 imulq %r12,%rdx 5302 addq %rax,%r15 5303 adcq %rdx,%r9 5304 movq %r13,%r10 5305 movq %r14,%r11 5306 movq %r15,%r12 5307 andq $3,%r12 5308 movq %r15,%r13 5309 andq $-4,%r13 5310 movq %r9,%r14 5311 shrdq $2,%r9,%r15 5312 shrq $2,%r9 5313 addq %r13,%r10 5314 adcq %r14,%r11 5315 adcq $0,%r12 5316 addq %r15,%r10 5317 adcq %r9,%r11 5318 adcq $0,%r12 5319 5320 leaq 16(%r8),%r8 5321 jmp 1b 5322 1: 5323 vpaddd .chacha20_consts(%rip),%ymm2,%ymm2 5324 vpaddd 64(%rbp),%ymm6,%ymm6 5325 vpaddd 96(%rbp),%ymm10,%ymm10 5326 vpaddd 224(%rbp),%ymm14,%ymm14 5327 vpaddd .chacha20_consts(%rip),%ymm1,%ymm1 5328 vpaddd 64(%rbp),%ymm5,%ymm5 5329 vpaddd 96(%rbp),%ymm9,%ymm9 5330 vpaddd 192(%rbp),%ymm13,%ymm13 5331 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0 5332 vpaddd 64(%rbp),%ymm4,%ymm4 5333 vpaddd 96(%rbp),%ymm8,%ymm8 5334 vpaddd 160(%rbp),%ymm12,%ymm12 5335 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 5336 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 5337 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 5338 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 5339 vpxor 0+0(%rsi),%ymm3,%ymm3 5340 vpxor 32+0(%rsi),%ymm2,%ymm2 5341 vpxor 64+0(%rsi),%ymm6,%ymm6 5342 vpxor 96+0(%rsi),%ymm10,%ymm10 5343 vmovdqu %ymm3,0+0(%rdi) 5344 vmovdqu %ymm2,32+0(%rdi) 5345 vmovdqu %ymm6,64+0(%rdi) 5346 vmovdqu %ymm10,96+0(%rdi) 5347 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 5348 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 5349 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 5350 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 5351 vpxor 0+128(%rsi),%ymm3,%ymm3 5352 vpxor 32+128(%rsi),%ymm1,%ymm1 5353 vpxor 64+128(%rsi),%ymm5,%ymm5 5354 vpxor 96+128(%rsi),%ymm9,%ymm9 5355 vmovdqu %ymm3,0+128(%rdi) 5356 vmovdqu %ymm1,32+128(%rdi) 5357 vmovdqu %ymm5,64+128(%rdi) 5358 vmovdqu %ymm9,96+128(%rdi) 5359 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 5360 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 5361 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 5362 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 5363 vmovdqa %ymm3,%ymm8 5364 5365 leaq 256(%rsi),%rsi 5366 leaq 256(%rdi),%rdi 5367 subq $256,%rbx 5368 jmp open_avx2_tail_loop 5369 3: 5370 vmovdqa .chacha20_consts(%rip),%ymm0 5371 vmovdqa 64(%rbp),%ymm4 5372 vmovdqa 96(%rbp),%ymm8 5373 vmovdqa %ymm0,%ymm1 5374 vmovdqa %ymm4,%ymm5 5375 vmovdqa %ymm8,%ymm9 5376 vmovdqa %ymm0,%ymm2 5377 vmovdqa %ymm4,%ymm6 5378 vmovdqa %ymm8,%ymm10 5379 vmovdqa %ymm0,%ymm3 5380 vmovdqa %ymm4,%ymm7 5381 vmovdqa %ymm8,%ymm11 5382 vmovdqa .avx2_inc(%rip),%ymm12 5383 vpaddd 160(%rbp),%ymm12,%ymm15 5384 vpaddd %ymm15,%ymm12,%ymm14 5385 vpaddd %ymm14,%ymm12,%ymm13 5386 vpaddd %ymm13,%ymm12,%ymm12 5387 vmovdqa %ymm15,256(%rbp) 5388 vmovdqa %ymm14,224(%rbp) 5389 vmovdqa %ymm13,192(%rbp) 5390 vmovdqa %ymm12,160(%rbp) 5391 5392 xorq %rcx,%rcx 5393 movq %rsi,%r8 5394 1: 5395 addq 0(%r8),%r10 5396 adcq 8+0(%r8),%r11 5397 adcq $1,%r12 5398 movq 0+0(%rbp),%rax 5399 movq %rax,%r15 5400 mulq %r10 5401 movq %rax,%r13 5402 movq %rdx,%r14 5403 movq 0+0(%rbp),%rax 5404 mulq %r11 5405 imulq %r12,%r15 5406 addq %rax,%r14 5407 adcq %rdx,%r15 5408 movq 8+0(%rbp),%rax 5409 movq %rax,%r9 5410 mulq %r10 5411 addq %rax,%r14 5412 adcq $0,%rdx 5413 movq %rdx,%r10 5414 movq 8+0(%rbp),%rax 5415 mulq %r11 5416 addq %rax,%r15 5417 adcq $0,%rdx 5418 imulq %r12,%r9 5419 addq %r10,%r15 5420 adcq %rdx,%r9 5421 movq %r13,%r10 5422 movq %r14,%r11 5423 movq %r15,%r12 5424 andq $3,%r12 5425 movq %r15,%r13 5426 andq $-4,%r13 5427 movq %r9,%r14 5428 shrdq $2,%r9,%r15 5429 shrq $2,%r9 5430 addq %r13,%r10 5431 adcq %r14,%r11 5432 adcq $0,%r12 5433 addq %r15,%r10 5434 adcq %r9,%r11 5435 adcq $0,%r12 5436 5437 leaq 16(%r8),%r8 5438 2: 5439 vmovdqa %ymm8,128(%rbp) 5440 vmovdqa .rol16(%rip),%ymm8 5441 vpaddd %ymm7,%ymm3,%ymm3 5442 vpaddd %ymm6,%ymm2,%ymm2 5443 vpaddd %ymm5,%ymm1,%ymm1 5444 vpaddd %ymm4,%ymm0,%ymm0 5445 vpxor %ymm3,%ymm15,%ymm15 5446 vpxor %ymm2,%ymm14,%ymm14 5447 vpxor %ymm1,%ymm13,%ymm13 5448 vpxor %ymm0,%ymm12,%ymm12 5449 vpshufb %ymm8,%ymm15,%ymm15 5450 vpshufb %ymm8,%ymm14,%ymm14 5451 vpshufb %ymm8,%ymm13,%ymm13 5452 vpshufb %ymm8,%ymm12,%ymm12 5453 vmovdqa 128(%rbp),%ymm8 5454 vpaddd %ymm15,%ymm11,%ymm11 5455 vpaddd %ymm14,%ymm10,%ymm10 5456 vpaddd %ymm13,%ymm9,%ymm9 5457 vpaddd %ymm12,%ymm8,%ymm8 5458 vpxor %ymm11,%ymm7,%ymm7 5459 vpxor %ymm10,%ymm6,%ymm6 5460 vpxor %ymm9,%ymm5,%ymm5 5461 vpxor %ymm8,%ymm4,%ymm4 5462 vmovdqa %ymm8,128(%rbp) 5463 vpsrld $20,%ymm7,%ymm8 5464 vpslld $32-20,%ymm7,%ymm7 5465 vpxor %ymm8,%ymm7,%ymm7 5466 vpsrld $20,%ymm6,%ymm8 5467 vpslld $32-20,%ymm6,%ymm6 5468 vpxor %ymm8,%ymm6,%ymm6 5469 vpsrld $20,%ymm5,%ymm8 5470 vpslld $32-20,%ymm5,%ymm5 5471 vpxor %ymm8,%ymm5,%ymm5 5472 vpsrld $20,%ymm4,%ymm8 5473 vpslld $32-20,%ymm4,%ymm4 5474 vpxor %ymm8,%ymm4,%ymm4 5475 vmovdqa .rol8(%rip),%ymm8 5476 addq 0(%r8),%r10 5477 adcq 8+0(%r8),%r11 5478 adcq $1,%r12 5479 movq 0+0(%rbp),%rdx 5480 movq %rdx,%r15 5481 mulxq %r10,%r13,%r14 5482 mulxq %r11,%rax,%rdx 5483 imulq %r12,%r15 5484 addq %rax,%r14 5485 adcq %rdx,%r15 5486 movq 8+0(%rbp),%rdx 5487 mulxq %r10,%r10,%rax 5488 addq %r10,%r14 5489 mulxq %r11,%r11,%r9 5490 adcq %r11,%r15 5491 adcq $0,%r9 5492 imulq %r12,%rdx 5493 addq %rax,%r15 5494 adcq %rdx,%r9 5495 movq %r13,%r10 5496 movq %r14,%r11 5497 movq %r15,%r12 5498 andq $3,%r12 5499 movq %r15,%r13 5500 andq $-4,%r13 5501 movq %r9,%r14 5502 shrdq $2,%r9,%r15 5503 shrq $2,%r9 5504 addq %r13,%r10 5505 adcq %r14,%r11 5506 adcq $0,%r12 5507 addq %r15,%r10 5508 adcq %r9,%r11 5509 adcq $0,%r12 5510 vpaddd %ymm7,%ymm3,%ymm3 5511 vpaddd %ymm6,%ymm2,%ymm2 5512 vpaddd %ymm5,%ymm1,%ymm1 5513 vpaddd %ymm4,%ymm0,%ymm0 5514 vpxor %ymm3,%ymm15,%ymm15 5515 vpxor %ymm2,%ymm14,%ymm14 5516 vpxor %ymm1,%ymm13,%ymm13 5517 vpxor %ymm0,%ymm12,%ymm12 5518 vpshufb %ymm8,%ymm15,%ymm15 5519 vpshufb %ymm8,%ymm14,%ymm14 5520 vpshufb %ymm8,%ymm13,%ymm13 5521 vpshufb %ymm8,%ymm12,%ymm12 5522 vmovdqa 128(%rbp),%ymm8 5523 vpaddd %ymm15,%ymm11,%ymm11 5524 vpaddd %ymm14,%ymm10,%ymm10 5525 vpaddd %ymm13,%ymm9,%ymm9 5526 vpaddd %ymm12,%ymm8,%ymm8 5527 vpxor %ymm11,%ymm7,%ymm7 5528 vpxor %ymm10,%ymm6,%ymm6 5529 vpxor %ymm9,%ymm5,%ymm5 5530 vpxor %ymm8,%ymm4,%ymm4 5531 vmovdqa %ymm8,128(%rbp) 5532 vpsrld $25,%ymm7,%ymm8 5533 vpslld $32-25,%ymm7,%ymm7 5534 vpxor %ymm8,%ymm7,%ymm7 5535 vpsrld $25,%ymm6,%ymm8 5536 vpslld $32-25,%ymm6,%ymm6 5537 vpxor %ymm8,%ymm6,%ymm6 5538 vpsrld $25,%ymm5,%ymm8 5539 vpslld $32-25,%ymm5,%ymm5 5540 vpxor %ymm8,%ymm5,%ymm5 5541 vpsrld $25,%ymm4,%ymm8 5542 vpslld $32-25,%ymm4,%ymm4 5543 vpxor %ymm8,%ymm4,%ymm4 5544 vmovdqa 128(%rbp),%ymm8 5545 vpalignr $4,%ymm7,%ymm7,%ymm7 5546 vpalignr $8,%ymm11,%ymm11,%ymm11 5547 vpalignr $12,%ymm15,%ymm15,%ymm15 5548 vpalignr $4,%ymm6,%ymm6,%ymm6 5549 vpalignr $8,%ymm10,%ymm10,%ymm10 5550 vpalignr $12,%ymm14,%ymm14,%ymm14 5551 vpalignr $4,%ymm5,%ymm5,%ymm5 5552 vpalignr $8,%ymm9,%ymm9,%ymm9 5553 vpalignr $12,%ymm13,%ymm13,%ymm13 5554 vpalignr $4,%ymm4,%ymm4,%ymm4 5555 vpalignr $8,%ymm8,%ymm8,%ymm8 5556 vpalignr $12,%ymm12,%ymm12,%ymm12 5557 vmovdqa %ymm8,128(%rbp) 5558 addq 16(%r8),%r10 5559 adcq 8+16(%r8),%r11 5560 adcq $1,%r12 5561 movq 0+0(%rbp),%rdx 5562 movq %rdx,%r15 5563 mulxq %r10,%r13,%r14 5564 mulxq %r11,%rax,%rdx 5565 imulq %r12,%r15 5566 addq %rax,%r14 5567 adcq %rdx,%r15 5568 movq 8+0(%rbp),%rdx 5569 mulxq %r10,%r10,%rax 5570 addq %r10,%r14 5571 mulxq %r11,%r11,%r9 5572 adcq %r11,%r15 5573 adcq $0,%r9 5574 imulq %r12,%rdx 5575 addq %rax,%r15 5576 adcq %rdx,%r9 5577 movq %r13,%r10 5578 movq %r14,%r11 5579 movq %r15,%r12 5580 andq $3,%r12 5581 movq %r15,%r13 5582 andq $-4,%r13 5583 movq %r9,%r14 5584 shrdq $2,%r9,%r15 5585 shrq $2,%r9 5586 addq %r13,%r10 5587 adcq %r14,%r11 5588 adcq $0,%r12 5589 addq %r15,%r10 5590 adcq %r9,%r11 5591 adcq $0,%r12 5592 5593 leaq 32(%r8),%r8 5594 vmovdqa .rol16(%rip),%ymm8 5595 vpaddd %ymm7,%ymm3,%ymm3 5596 vpaddd %ymm6,%ymm2,%ymm2 5597 vpaddd %ymm5,%ymm1,%ymm1 5598 vpaddd %ymm4,%ymm0,%ymm0 5599 vpxor %ymm3,%ymm15,%ymm15 5600 vpxor %ymm2,%ymm14,%ymm14 5601 vpxor %ymm1,%ymm13,%ymm13 5602 vpxor %ymm0,%ymm12,%ymm12 5603 vpshufb %ymm8,%ymm15,%ymm15 5604 vpshufb %ymm8,%ymm14,%ymm14 5605 vpshufb %ymm8,%ymm13,%ymm13 5606 vpshufb %ymm8,%ymm12,%ymm12 5607 vmovdqa 128(%rbp),%ymm8 5608 vpaddd %ymm15,%ymm11,%ymm11 5609 vpaddd %ymm14,%ymm10,%ymm10 5610 vpaddd %ymm13,%ymm9,%ymm9 5611 vpaddd %ymm12,%ymm8,%ymm8 5612 vpxor %ymm11,%ymm7,%ymm7 5613 vpxor %ymm10,%ymm6,%ymm6 5614 vpxor %ymm9,%ymm5,%ymm5 5615 vpxor %ymm8,%ymm4,%ymm4 5616 vmovdqa %ymm8,128(%rbp) 5617 vpsrld $20,%ymm7,%ymm8 5618 vpslld $32-20,%ymm7,%ymm7 5619 vpxor %ymm8,%ymm7,%ymm7 5620 vpsrld $20,%ymm6,%ymm8 5621 vpslld $32-20,%ymm6,%ymm6 5622 vpxor %ymm8,%ymm6,%ymm6 5623 vpsrld $20,%ymm5,%ymm8 5624 vpslld $32-20,%ymm5,%ymm5 5625 vpxor %ymm8,%ymm5,%ymm5 5626 vpsrld $20,%ymm4,%ymm8 5627 vpslld $32-20,%ymm4,%ymm4 5628 vpxor %ymm8,%ymm4,%ymm4 5629 vmovdqa .rol8(%rip),%ymm8 5630 vpaddd %ymm7,%ymm3,%ymm3 5631 vpaddd %ymm6,%ymm2,%ymm2 5632 vpaddd %ymm5,%ymm1,%ymm1 5633 vpaddd %ymm4,%ymm0,%ymm0 5634 vpxor %ymm3,%ymm15,%ymm15 5635 vpxor %ymm2,%ymm14,%ymm14 5636 vpxor %ymm1,%ymm13,%ymm13 5637 vpxor %ymm0,%ymm12,%ymm12 5638 vpshufb %ymm8,%ymm15,%ymm15 5639 vpshufb %ymm8,%ymm14,%ymm14 5640 vpshufb %ymm8,%ymm13,%ymm13 5641 vpshufb %ymm8,%ymm12,%ymm12 5642 vmovdqa 128(%rbp),%ymm8 5643 vpaddd %ymm15,%ymm11,%ymm11 5644 vpaddd %ymm14,%ymm10,%ymm10 5645 vpaddd %ymm13,%ymm9,%ymm9 5646 vpaddd %ymm12,%ymm8,%ymm8 5647 vpxor %ymm11,%ymm7,%ymm7 5648 vpxor %ymm10,%ymm6,%ymm6 5649 vpxor %ymm9,%ymm5,%ymm5 5650 vpxor %ymm8,%ymm4,%ymm4 5651 vmovdqa %ymm8,128(%rbp) 5652 vpsrld $25,%ymm7,%ymm8 5653 vpslld $32-25,%ymm7,%ymm7 5654 vpxor %ymm8,%ymm7,%ymm7 5655 vpsrld $25,%ymm6,%ymm8 5656 vpslld $32-25,%ymm6,%ymm6 5657 vpxor %ymm8,%ymm6,%ymm6 5658 vpsrld $25,%ymm5,%ymm8 5659 vpslld $32-25,%ymm5,%ymm5 5660 vpxor %ymm8,%ymm5,%ymm5 5661 vpsrld $25,%ymm4,%ymm8 5662 vpslld $32-25,%ymm4,%ymm4 5663 vpxor %ymm8,%ymm4,%ymm4 5664 vmovdqa 128(%rbp),%ymm8 5665 vpalignr $12,%ymm7,%ymm7,%ymm7 5666 vpalignr $8,%ymm11,%ymm11,%ymm11 5667 vpalignr $4,%ymm15,%ymm15,%ymm15 5668 vpalignr $12,%ymm6,%ymm6,%ymm6 5669 vpalignr $8,%ymm10,%ymm10,%ymm10 5670 vpalignr $4,%ymm14,%ymm14,%ymm14 5671 vpalignr $12,%ymm5,%ymm5,%ymm5 5672 vpalignr $8,%ymm9,%ymm9,%ymm9 5673 vpalignr $4,%ymm13,%ymm13,%ymm13 5674 vpalignr $12,%ymm4,%ymm4,%ymm4 5675 vpalignr $8,%ymm8,%ymm8,%ymm8 5676 vpalignr $4,%ymm12,%ymm12,%ymm12 5677 5678 incq %rcx 5679 cmpq $4,%rcx 5680 jl 1b 5681 cmpq $10,%rcx 5682 jne 2b 5683 movq %rbx,%rcx 5684 subq $384,%rcx 5685 andq $-16,%rcx 5686 1: 5687 testq %rcx,%rcx 5688 je 1f 5689 addq 0(%r8),%r10 5690 adcq 8+0(%r8),%r11 5691 adcq $1,%r12 5692 movq 0+0(%rbp),%rdx 5693 movq %rdx,%r15 5694 mulxq %r10,%r13,%r14 5695 mulxq %r11,%rax,%rdx 5696 imulq %r12,%r15 5697 addq %rax,%r14 5698 adcq %rdx,%r15 5699 movq 8+0(%rbp),%rdx 5700 mulxq %r10,%r10,%rax 5701 addq %r10,%r14 5702 mulxq %r11,%r11,%r9 5703 adcq %r11,%r15 5704 adcq $0,%r9 5705 imulq %r12,%rdx 5706 addq %rax,%r15 5707 adcq %rdx,%r9 5708 movq %r13,%r10 5709 movq %r14,%r11 5710 movq %r15,%r12 5711 andq $3,%r12 5712 movq %r15,%r13 5713 andq $-4,%r13 5714 movq %r9,%r14 5715 shrdq $2,%r9,%r15 5716 shrq $2,%r9 5717 addq %r13,%r10 5718 adcq %r14,%r11 5719 adcq $0,%r12 5720 addq %r15,%r10 5721 adcq %r9,%r11 5722 adcq $0,%r12 5723 5724 leaq 16(%r8),%r8 5725 subq $16,%rcx 5726 jmp 1b 5727 1: 5728 vpaddd .chacha20_consts(%rip),%ymm3,%ymm3 5729 vpaddd 64(%rbp),%ymm7,%ymm7 5730 vpaddd 96(%rbp),%ymm11,%ymm11 5731 vpaddd 256(%rbp),%ymm15,%ymm15 5732 vpaddd .chacha20_consts(%rip),%ymm2,%ymm2 5733 vpaddd 64(%rbp),%ymm6,%ymm6 5734 vpaddd 96(%rbp),%ymm10,%ymm10 5735 vpaddd 224(%rbp),%ymm14,%ymm14 5736 vpaddd .chacha20_consts(%rip),%ymm1,%ymm1 5737 vpaddd 64(%rbp),%ymm5,%ymm5 5738 vpaddd 96(%rbp),%ymm9,%ymm9 5739 vpaddd 192(%rbp),%ymm13,%ymm13 5740 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0 5741 vpaddd 64(%rbp),%ymm4,%ymm4 5742 vpaddd 96(%rbp),%ymm8,%ymm8 5743 vpaddd 160(%rbp),%ymm12,%ymm12 5744 5745 vmovdqa %ymm0,128(%rbp) 5746 vperm2i128 $0x02,%ymm3,%ymm7,%ymm0 5747 vperm2i128 $0x13,%ymm3,%ymm7,%ymm7 5748 vperm2i128 $0x02,%ymm11,%ymm15,%ymm3 5749 vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 5750 vpxor 0+0(%rsi),%ymm0,%ymm0 5751 vpxor 32+0(%rsi),%ymm3,%ymm3 5752 vpxor 64+0(%rsi),%ymm7,%ymm7 5753 vpxor 96+0(%rsi),%ymm11,%ymm11 5754 vmovdqu %ymm0,0+0(%rdi) 5755 vmovdqu %ymm3,32+0(%rdi) 5756 vmovdqu %ymm7,64+0(%rdi) 5757 vmovdqu %ymm11,96+0(%rdi) 5758 5759 vmovdqa 128(%rbp),%ymm0 5760 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 5761 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 5762 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 5763 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 5764 vpxor 0+128(%rsi),%ymm3,%ymm3 5765 vpxor 32+128(%rsi),%ymm2,%ymm2 5766 vpxor 64+128(%rsi),%ymm6,%ymm6 5767 vpxor 96+128(%rsi),%ymm10,%ymm10 5768 vmovdqu %ymm3,0+128(%rdi) 5769 vmovdqu %ymm2,32+128(%rdi) 5770 vmovdqu %ymm6,64+128(%rdi) 5771 vmovdqu %ymm10,96+128(%rdi) 5772 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 5773 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 5774 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 5775 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 5776 vpxor 0+256(%rsi),%ymm3,%ymm3 5777 vpxor 32+256(%rsi),%ymm1,%ymm1 5778 vpxor 64+256(%rsi),%ymm5,%ymm5 5779 vpxor 96+256(%rsi),%ymm9,%ymm9 5780 vmovdqu %ymm3,0+256(%rdi) 5781 vmovdqu %ymm1,32+256(%rdi) 5782 vmovdqu %ymm5,64+256(%rdi) 5783 vmovdqu %ymm9,96+256(%rdi) 5784 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 5785 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 5786 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 5787 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 5788 vmovdqa %ymm3,%ymm8 5789 5790 leaq 384(%rsi),%rsi 5791 leaq 384(%rdi),%rdi 5792 subq $384,%rbx 5793 open_avx2_tail_loop: 5794 cmpq $32,%rbx 5795 jb open_avx2_tail 5796 subq $32,%rbx 5797 vpxor (%rsi),%ymm0,%ymm0 5798 vmovdqu %ymm0,(%rdi) 5799 leaq 32(%rsi),%rsi 5800 leaq 32(%rdi),%rdi 5801 vmovdqa %ymm4,%ymm0 5802 vmovdqa %ymm8,%ymm4 5803 vmovdqa %ymm12,%ymm8 5804 jmp open_avx2_tail_loop 5805 open_avx2_tail: 5806 cmpq $16,%rbx 5807 vmovdqa %xmm0,%xmm1 5808 jb 1f 5809 subq $16,%rbx 5810 5811 vpxor (%rsi),%xmm0,%xmm1 5812 vmovdqu %xmm1,(%rdi) 5813 leaq 16(%rsi),%rsi 5814 leaq 16(%rdi),%rdi 5815 vperm2i128 $0x11,%ymm0,%ymm0,%ymm0 5816 vmovdqa %xmm0,%xmm1 5817 1: 5818 vzeroupper 5819 jmp open_sse_tail_16 5820 5821 open_avx2_192: 5822 vmovdqa %ymm0,%ymm1 5823 vmovdqa %ymm0,%ymm2 5824 vmovdqa %ymm4,%ymm5 5825 vmovdqa %ymm4,%ymm6 5826 vmovdqa %ymm8,%ymm9 5827 vmovdqa %ymm8,%ymm10 5828 vpaddd .avx2_inc(%rip),%ymm12,%ymm13 5829 vmovdqa %ymm12,%ymm11 5830 vmovdqa %ymm13,%ymm15 5831 movq $10,%r10 5832 1: 5833 vpaddd %ymm4,%ymm0,%ymm0 5834 vpxor %ymm0,%ymm12,%ymm12 5835 vpshufb .rol16(%rip),%ymm12,%ymm12 5836 vpaddd %ymm12,%ymm8,%ymm8 5837 vpxor %ymm8,%ymm4,%ymm4 5838 vpsrld $20,%ymm4,%ymm3 5839 vpslld $12,%ymm4,%ymm4 5840 vpxor %ymm3,%ymm4,%ymm4 5841 vpaddd %ymm4,%ymm0,%ymm0 5842 vpxor %ymm0,%ymm12,%ymm12 5843 vpshufb .rol8(%rip),%ymm12,%ymm12 5844 vpaddd %ymm12,%ymm8,%ymm8 5845 vpxor %ymm8,%ymm4,%ymm4 5846 vpslld $7,%ymm4,%ymm3 5847 vpsrld $25,%ymm4,%ymm4 5848 vpxor %ymm3,%ymm4,%ymm4 5849 vpalignr $12,%ymm12,%ymm12,%ymm12 5850 vpalignr $8,%ymm8,%ymm8,%ymm8 5851 vpalignr $4,%ymm4,%ymm4,%ymm4 5852 vpaddd %ymm5,%ymm1,%ymm1 5853 vpxor %ymm1,%ymm13,%ymm13 5854 vpshufb .rol16(%rip),%ymm13,%ymm13 5855 vpaddd %ymm13,%ymm9,%ymm9 5856 vpxor %ymm9,%ymm5,%ymm5 5857 vpsrld $20,%ymm5,%ymm3 5858 vpslld $12,%ymm5,%ymm5 5859 vpxor %ymm3,%ymm5,%ymm5 5860 vpaddd %ymm5,%ymm1,%ymm1 5861 vpxor %ymm1,%ymm13,%ymm13 5862 vpshufb .rol8(%rip),%ymm13,%ymm13 5863 vpaddd %ymm13,%ymm9,%ymm9 5864 vpxor %ymm9,%ymm5,%ymm5 5865 vpslld $7,%ymm5,%ymm3 5866 vpsrld $25,%ymm5,%ymm5 5867 vpxor %ymm3,%ymm5,%ymm5 5868 vpalignr $12,%ymm13,%ymm13,%ymm13 5869 vpalignr $8,%ymm9,%ymm9,%ymm9 5870 vpalignr $4,%ymm5,%ymm5,%ymm5 5871 vpaddd %ymm4,%ymm0,%ymm0 5872 vpxor %ymm0,%ymm12,%ymm12 5873 vpshufb .rol16(%rip),%ymm12,%ymm12 5874 vpaddd %ymm12,%ymm8,%ymm8 5875 vpxor %ymm8,%ymm4,%ymm4 5876 vpsrld $20,%ymm4,%ymm3 5877 vpslld $12,%ymm4,%ymm4 5878 vpxor %ymm3,%ymm4,%ymm4 5879 vpaddd %ymm4,%ymm0,%ymm0 5880 vpxor %ymm0,%ymm12,%ymm12 5881 vpshufb .rol8(%rip),%ymm12,%ymm12 5882 vpaddd %ymm12,%ymm8,%ymm8 5883 vpxor %ymm8,%ymm4,%ymm4 5884 vpslld $7,%ymm4,%ymm3 5885 vpsrld $25,%ymm4,%ymm4 5886 vpxor %ymm3,%ymm4,%ymm4 5887 vpalignr $4,%ymm12,%ymm12,%ymm12 5888 vpalignr $8,%ymm8,%ymm8,%ymm8 5889 vpalignr $12,%ymm4,%ymm4,%ymm4 5890 vpaddd %ymm5,%ymm1,%ymm1 5891 vpxor %ymm1,%ymm13,%ymm13 5892 vpshufb .rol16(%rip),%ymm13,%ymm13 5893 vpaddd %ymm13,%ymm9,%ymm9 5894 vpxor %ymm9,%ymm5,%ymm5 5895 vpsrld $20,%ymm5,%ymm3 5896 vpslld $12,%ymm5,%ymm5 5897 vpxor %ymm3,%ymm5,%ymm5 5898 vpaddd %ymm5,%ymm1,%ymm1 5899 vpxor %ymm1,%ymm13,%ymm13 5900 vpshufb .rol8(%rip),%ymm13,%ymm13 5901 vpaddd %ymm13,%ymm9,%ymm9 5902 vpxor %ymm9,%ymm5,%ymm5 5903 vpslld $7,%ymm5,%ymm3 5904 vpsrld $25,%ymm5,%ymm5 5905 vpxor %ymm3,%ymm5,%ymm5 5906 vpalignr $4,%ymm13,%ymm13,%ymm13 5907 vpalignr $8,%ymm9,%ymm9,%ymm9 5908 vpalignr $12,%ymm5,%ymm5,%ymm5 5909 5910 decq %r10 5911 jne 1b 5912 vpaddd %ymm2,%ymm0,%ymm0 5913 vpaddd %ymm2,%ymm1,%ymm1 5914 vpaddd %ymm6,%ymm4,%ymm4 5915 vpaddd %ymm6,%ymm5,%ymm5 5916 vpaddd %ymm10,%ymm8,%ymm8 5917 vpaddd %ymm10,%ymm9,%ymm9 5918 vpaddd %ymm11,%ymm12,%ymm12 5919 vpaddd %ymm15,%ymm13,%ymm13 5920 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 5921 5922 vpand .clamp(%rip),%ymm3,%ymm3 5923 vmovdqa %ymm3,0(%rbp) 5924 5925 vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 5926 vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 5927 vperm2i128 $0x02,%ymm1,%ymm5,%ymm8 5928 vperm2i128 $0x02,%ymm9,%ymm13,%ymm12 5929 vperm2i128 $0x13,%ymm1,%ymm5,%ymm1 5930 vperm2i128 $0x13,%ymm9,%ymm13,%ymm5 5931 open_avx2_short: 5932 movq %r8,%r8 5933 call poly_hash_ad_internal 5934 open_avx2_hash_and_xor_loop: 5935 cmpq $32,%rbx 5936 jb open_avx2_short_tail_32 5937 subq $32,%rbx 5938 addq 0(%rsi),%r10 5939 adcq 8+0(%rsi),%r11 5940 adcq $1,%r12 5941 movq 0+0(%rbp),%rax 5942 movq %rax,%r15 5943 mulq %r10 5944 movq %rax,%r13 5945 movq %rdx,%r14 5946 movq 0+0(%rbp),%rax 5947 mulq %r11 5948 imulq %r12,%r15 5949 addq %rax,%r14 5950 adcq %rdx,%r15 5951 movq 8+0(%rbp),%rax 5952 movq %rax,%r9 5953 mulq %r10 5954 addq %rax,%r14 5955 adcq $0,%rdx 5956 movq %rdx,%r10 5957 movq 8+0(%rbp),%rax 5958 mulq %r11 5959 addq %rax,%r15 5960 adcq $0,%rdx 5961 imulq %r12,%r9 5962 addq %r10,%r15 5963 adcq %rdx,%r9 5964 movq %r13,%r10 5965 movq %r14,%r11 5966 movq %r15,%r12 5967 andq $3,%r12 5968 movq %r15,%r13 5969 andq $-4,%r13 5970 movq %r9,%r14 5971 shrdq $2,%r9,%r15 5972 shrq $2,%r9 5973 addq %r13,%r10 5974 adcq %r14,%r11 5975 adcq $0,%r12 5976 addq %r15,%r10 5977 adcq %r9,%r11 5978 adcq $0,%r12 5979 addq 16(%rsi),%r10 5980 adcq 8+16(%rsi),%r11 5981 adcq $1,%r12 5982 movq 0+0(%rbp),%rax 5983 movq %rax,%r15 5984 mulq %r10 5985 movq %rax,%r13 5986 movq %rdx,%r14 5987 movq 0+0(%rbp),%rax 5988 mulq %r11 5989 imulq %r12,%r15 5990 addq %rax,%r14 5991 adcq %rdx,%r15 5992 movq 8+0(%rbp),%rax 5993 movq %rax,%r9 5994 mulq %r10 5995 addq %rax,%r14 5996 adcq $0,%rdx 5997 movq %rdx,%r10 5998 movq 8+0(%rbp),%rax 5999 mulq %r11 6000 addq %rax,%r15 6001 adcq $0,%rdx 6002 imulq %r12,%r9 6003 addq %r10,%r15 6004 adcq %rdx,%r9 6005 movq %r13,%r10 6006 movq %r14,%r11 6007 movq %r15,%r12 6008 andq $3,%r12 6009 movq %r15,%r13 6010 andq $-4,%r13 6011 movq %r9,%r14 6012 shrdq $2,%r9,%r15 6013 shrq $2,%r9 6014 addq %r13,%r10 6015 adcq %r14,%r11 6016 adcq $0,%r12 6017 addq %r15,%r10 6018 adcq %r9,%r11 6019 adcq $0,%r12 6020 6021 6022 vpxor (%rsi),%ymm0,%ymm0 6023 vmovdqu %ymm0,(%rdi) 6024 leaq 32(%rsi),%rsi 6025 leaq 32(%rdi),%rdi 6026 6027 vmovdqa %ymm4,%ymm0 6028 vmovdqa %ymm8,%ymm4 6029 vmovdqa %ymm12,%ymm8 6030 vmovdqa %ymm1,%ymm12 6031 vmovdqa %ymm5,%ymm1 6032 vmovdqa %ymm9,%ymm5 6033 vmovdqa %ymm13,%ymm9 6034 vmovdqa %ymm2,%ymm13 6035 vmovdqa %ymm6,%ymm2 6036 jmp open_avx2_hash_and_xor_loop 6037 open_avx2_short_tail_32: 6038 cmpq $16,%rbx 6039 vmovdqa %xmm0,%xmm1 6040 jb 1f 6041 subq $16,%rbx 6042 addq 0(%rsi),%r10 6043 adcq 8+0(%rsi),%r11 6044 adcq $1,%r12 6045 movq 0+0(%rbp),%rax 6046 movq %rax,%r15 6047 mulq %r10 6048 movq %rax,%r13 6049 movq %rdx,%r14 6050 movq 0+0(%rbp),%rax 6051 mulq %r11 6052 imulq %r12,%r15 6053 addq %rax,%r14 6054 adcq %rdx,%r15 6055 movq 8+0(%rbp),%rax 6056 movq %rax,%r9 6057 mulq %r10 6058 addq %rax,%r14 6059 adcq $0,%rdx 6060 movq %rdx,%r10 6061 movq 8+0(%rbp),%rax 6062 mulq %r11 6063 addq %rax,%r15 6064 adcq $0,%rdx 6065 imulq %r12,%r9 6066 addq %r10,%r15 6067 adcq %rdx,%r9 6068 movq %r13,%r10 6069 movq %r14,%r11 6070 movq %r15,%r12 6071 andq $3,%r12 6072 movq %r15,%r13 6073 andq $-4,%r13 6074 movq %r9,%r14 6075 shrdq $2,%r9,%r15 6076 shrq $2,%r9 6077 addq %r13,%r10 6078 adcq %r14,%r11 6079 adcq $0,%r12 6080 addq %r15,%r10 6081 adcq %r9,%r11 6082 adcq $0,%r12 6083 6084 vpxor (%rsi),%xmm0,%xmm3 6085 vmovdqu %xmm3,(%rdi) 6086 leaq 16(%rsi),%rsi 6087 leaq 16(%rdi),%rdi 6088 vextracti128 $1,%ymm0,%xmm1 6089 1: 6090 vzeroupper 6091 jmp open_sse_tail_16 6092 6093 open_avx2_320: 6094 vmovdqa %ymm0,%ymm1 6095 vmovdqa %ymm0,%ymm2 6096 vmovdqa %ymm4,%ymm5 6097 vmovdqa %ymm4,%ymm6 6098 vmovdqa %ymm8,%ymm9 6099 vmovdqa %ymm8,%ymm10 6100 vpaddd .avx2_inc(%rip),%ymm12,%ymm13 6101 vpaddd .avx2_inc(%rip),%ymm13,%ymm14 6102 vmovdqa %ymm4,%ymm7 6103 vmovdqa %ymm8,%ymm11 6104 vmovdqa %ymm12,160(%rbp) 6105 vmovdqa %ymm13,192(%rbp) 6106 vmovdqa %ymm14,224(%rbp) 6107 movq $10,%r10 6108 1: 6109 vpaddd %ymm4,%ymm0,%ymm0 6110 vpxor %ymm0,%ymm12,%ymm12 6111 vpshufb .rol16(%rip),%ymm12,%ymm12 6112 vpaddd %ymm12,%ymm8,%ymm8 6113 vpxor %ymm8,%ymm4,%ymm4 6114 vpsrld $20,%ymm4,%ymm3 6115 vpslld $12,%ymm4,%ymm4 6116 vpxor %ymm3,%ymm4,%ymm4 6117 vpaddd %ymm4,%ymm0,%ymm0 6118 vpxor %ymm0,%ymm12,%ymm12 6119 vpshufb .rol8(%rip),%ymm12,%ymm12 6120 vpaddd %ymm12,%ymm8,%ymm8 6121 vpxor %ymm8,%ymm4,%ymm4 6122 vpslld $7,%ymm4,%ymm3 6123 vpsrld $25,%ymm4,%ymm4 6124 vpxor %ymm3,%ymm4,%ymm4 6125 vpalignr $12,%ymm12,%ymm12,%ymm12 6126 vpalignr $8,%ymm8,%ymm8,%ymm8 6127 vpalignr $4,%ymm4,%ymm4,%ymm4 6128 vpaddd %ymm5,%ymm1,%ymm1 6129 vpxor %ymm1,%ymm13,%ymm13 6130 vpshufb .rol16(%rip),%ymm13,%ymm13 6131 vpaddd %ymm13,%ymm9,%ymm9 6132 vpxor %ymm9,%ymm5,%ymm5 6133 vpsrld $20,%ymm5,%ymm3 6134 vpslld $12,%ymm5,%ymm5 6135 vpxor %ymm3,%ymm5,%ymm5 6136 vpaddd %ymm5,%ymm1,%ymm1 6137 vpxor %ymm1,%ymm13,%ymm13 6138 vpshufb .rol8(%rip),%ymm13,%ymm13 6139 vpaddd %ymm13,%ymm9,%ymm9 6140 vpxor %ymm9,%ymm5,%ymm5 6141 vpslld $7,%ymm5,%ymm3 6142 vpsrld $25,%ymm5,%ymm5 6143 vpxor %ymm3,%ymm5,%ymm5 6144 vpalignr $12,%ymm13,%ymm13,%ymm13 6145 vpalignr $8,%ymm9,%ymm9,%ymm9 6146 vpalignr $4,%ymm5,%ymm5,%ymm5 6147 vpaddd %ymm6,%ymm2,%ymm2 6148 vpxor %ymm2,%ymm14,%ymm14 6149 vpshufb .rol16(%rip),%ymm14,%ymm14 6150 vpaddd %ymm14,%ymm10,%ymm10 6151 vpxor %ymm10,%ymm6,%ymm6 6152 vpsrld $20,%ymm6,%ymm3 6153 vpslld $12,%ymm6,%ymm6 6154 vpxor %ymm3,%ymm6,%ymm6 6155 vpaddd %ymm6,%ymm2,%ymm2 6156 vpxor %ymm2,%ymm14,%ymm14 6157 vpshufb .rol8(%rip),%ymm14,%ymm14 6158 vpaddd %ymm14,%ymm10,%ymm10 6159 vpxor %ymm10,%ymm6,%ymm6 6160 vpslld $7,%ymm6,%ymm3 6161 vpsrld $25,%ymm6,%ymm6 6162 vpxor %ymm3,%ymm6,%ymm6 6163 vpalignr $12,%ymm14,%ymm14,%ymm14 6164 vpalignr $8,%ymm10,%ymm10,%ymm10 6165 vpalignr $4,%ymm6,%ymm6,%ymm6 6166 vpaddd %ymm4,%ymm0,%ymm0 6167 vpxor %ymm0,%ymm12,%ymm12 6168 vpshufb .rol16(%rip),%ymm12,%ymm12 6169 vpaddd %ymm12,%ymm8,%ymm8 6170 vpxor %ymm8,%ymm4,%ymm4 6171 vpsrld $20,%ymm4,%ymm3 6172 vpslld $12,%ymm4,%ymm4 6173 vpxor %ymm3,%ymm4,%ymm4 6174 vpaddd %ymm4,%ymm0,%ymm0 6175 vpxor %ymm0,%ymm12,%ymm12 6176 vpshufb .rol8(%rip),%ymm12,%ymm12 6177 vpaddd %ymm12,%ymm8,%ymm8 6178 vpxor %ymm8,%ymm4,%ymm4 6179 vpslld $7,%ymm4,%ymm3 6180 vpsrld $25,%ymm4,%ymm4 6181 vpxor %ymm3,%ymm4,%ymm4 6182 vpalignr $4,%ymm12,%ymm12,%ymm12 6183 vpalignr $8,%ymm8,%ymm8,%ymm8 6184 vpalignr $12,%ymm4,%ymm4,%ymm4 6185 vpaddd %ymm5,%ymm1,%ymm1 6186 vpxor %ymm1,%ymm13,%ymm13 6187 vpshufb .rol16(%rip),%ymm13,%ymm13 6188 vpaddd %ymm13,%ymm9,%ymm9 6189 vpxor %ymm9,%ymm5,%ymm5 6190 vpsrld $20,%ymm5,%ymm3 6191 vpslld $12,%ymm5,%ymm5 6192 vpxor %ymm3,%ymm5,%ymm5 6193 vpaddd %ymm5,%ymm1,%ymm1 6194 vpxor %ymm1,%ymm13,%ymm13 6195 vpshufb .rol8(%rip),%ymm13,%ymm13 6196 vpaddd %ymm13,%ymm9,%ymm9 6197 vpxor %ymm9,%ymm5,%ymm5 6198 vpslld $7,%ymm5,%ymm3 6199 vpsrld $25,%ymm5,%ymm5 6200 vpxor %ymm3,%ymm5,%ymm5 6201 vpalignr $4,%ymm13,%ymm13,%ymm13 6202 vpalignr $8,%ymm9,%ymm9,%ymm9 6203 vpalignr $12,%ymm5,%ymm5,%ymm5 6204 vpaddd %ymm6,%ymm2,%ymm2 6205 vpxor %ymm2,%ymm14,%ymm14 6206 vpshufb .rol16(%rip),%ymm14,%ymm14 6207 vpaddd %ymm14,%ymm10,%ymm10 6208 vpxor %ymm10,%ymm6,%ymm6 6209 vpsrld $20,%ymm6,%ymm3 6210 vpslld $12,%ymm6,%ymm6 6211 vpxor %ymm3,%ymm6,%ymm6 6212 vpaddd %ymm6,%ymm2,%ymm2 6213 vpxor %ymm2,%ymm14,%ymm14 6214 vpshufb .rol8(%rip),%ymm14,%ymm14 6215 vpaddd %ymm14,%ymm10,%ymm10 6216 vpxor %ymm10,%ymm6,%ymm6 6217 vpslld $7,%ymm6,%ymm3 6218 vpsrld $25,%ymm6,%ymm6 6219 vpxor %ymm3,%ymm6,%ymm6 6220 vpalignr $4,%ymm14,%ymm14,%ymm14 6221 vpalignr $8,%ymm10,%ymm10,%ymm10 6222 vpalignr $12,%ymm6,%ymm6,%ymm6 6223 6224 decq %r10 6225 jne 1b 6226 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0 6227 vpaddd .chacha20_consts(%rip),%ymm1,%ymm1 6228 vpaddd .chacha20_consts(%rip),%ymm2,%ymm2 6229 vpaddd %ymm7,%ymm4,%ymm4 6230 vpaddd %ymm7,%ymm5,%ymm5 6231 vpaddd %ymm7,%ymm6,%ymm6 6232 vpaddd %ymm11,%ymm8,%ymm8 6233 vpaddd %ymm11,%ymm9,%ymm9 6234 vpaddd %ymm11,%ymm10,%ymm10 6235 vpaddd 160(%rbp),%ymm12,%ymm12 6236 vpaddd 192(%rbp),%ymm13,%ymm13 6237 vpaddd 224(%rbp),%ymm14,%ymm14 6238 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 6239 6240 vpand .clamp(%rip),%ymm3,%ymm3 6241 vmovdqa %ymm3,0(%rbp) 6242 6243 vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 6244 vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 6245 vperm2i128 $0x02,%ymm1,%ymm5,%ymm8 6246 vperm2i128 $0x02,%ymm9,%ymm13,%ymm12 6247 vperm2i128 $0x13,%ymm1,%ymm5,%ymm1 6248 vperm2i128 $0x13,%ymm9,%ymm13,%ymm5 6249 vperm2i128 $0x02,%ymm2,%ymm6,%ymm9 6250 vperm2i128 $0x02,%ymm10,%ymm14,%ymm13 6251 vperm2i128 $0x13,%ymm2,%ymm6,%ymm2 6252 vperm2i128 $0x13,%ymm10,%ymm14,%ymm6 6253 jmp open_avx2_short 6254 .size chacha20_poly1305_open_avx2, .-chacha20_poly1305_open_avx2 6255 6256 6257 .type chacha20_poly1305_seal_avx2,@function 6258 .align 64 6259 chacha20_poly1305_seal_avx2: 6260 vzeroupper 6261 vmovdqa .chacha20_consts(%rip),%ymm0 6262 vbroadcasti128 0(%r9),%ymm4 6263 vbroadcasti128 16(%r9),%ymm8 6264 vbroadcasti128 32(%r9),%ymm12 6265 vpaddd .avx2_init(%rip),%ymm12,%ymm12 6266 cmpq $192,%rbx 6267 jbe seal_avx2_192 6268 cmpq $320,%rbx 6269 jbe seal_avx2_320 6270 vmovdqa %ymm0,%ymm1 6271 vmovdqa %ymm0,%ymm2 6272 vmovdqa %ymm0,%ymm3 6273 vmovdqa %ymm4,%ymm5 6274 vmovdqa %ymm4,%ymm6 6275 vmovdqa %ymm4,%ymm7 6276 vmovdqa %ymm4,64(%rbp) 6277 vmovdqa %ymm8,%ymm9 6278 vmovdqa %ymm8,%ymm10 6279 vmovdqa %ymm8,%ymm11 6280 vmovdqa %ymm8,96(%rbp) 6281 vmovdqa %ymm12,%ymm15 6282 vpaddd .avx2_inc(%rip),%ymm15,%ymm14 6283 vpaddd .avx2_inc(%rip),%ymm14,%ymm13 6284 vpaddd .avx2_inc(%rip),%ymm13,%ymm12 6285 vmovdqa %ymm12,160(%rbp) 6286 vmovdqa %ymm13,192(%rbp) 6287 vmovdqa %ymm14,224(%rbp) 6288 vmovdqa %ymm15,256(%rbp) 6289 movq $10,%r10 6290 1: 6291 vmovdqa %ymm8,128(%rbp) 6292 vmovdqa .rol16(%rip),%ymm8 6293 vpaddd %ymm7,%ymm3,%ymm3 6294 vpaddd %ymm6,%ymm2,%ymm2 6295 vpaddd %ymm5,%ymm1,%ymm1 6296 vpaddd %ymm4,%ymm0,%ymm0 6297 vpxor %ymm3,%ymm15,%ymm15 6298 vpxor %ymm2,%ymm14,%ymm14 6299 vpxor %ymm1,%ymm13,%ymm13 6300 vpxor %ymm0,%ymm12,%ymm12 6301 vpshufb %ymm8,%ymm15,%ymm15 6302 vpshufb %ymm8,%ymm14,%ymm14 6303 vpshufb %ymm8,%ymm13,%ymm13 6304 vpshufb %ymm8,%ymm12,%ymm12 6305 vmovdqa 128(%rbp),%ymm8 6306 vpaddd %ymm15,%ymm11,%ymm11 6307 vpaddd %ymm14,%ymm10,%ymm10 6308 vpaddd %ymm13,%ymm9,%ymm9 6309 vpaddd %ymm12,%ymm8,%ymm8 6310 vpxor %ymm11,%ymm7,%ymm7 6311 vpxor %ymm10,%ymm6,%ymm6 6312 vpxor %ymm9,%ymm5,%ymm5 6313 vpxor %ymm8,%ymm4,%ymm4 6314 vmovdqa %ymm8,128(%rbp) 6315 vpsrld $20,%ymm7,%ymm8 6316 vpslld $32-20,%ymm7,%ymm7 6317 vpxor %ymm8,%ymm7,%ymm7 6318 vpsrld $20,%ymm6,%ymm8 6319 vpslld $32-20,%ymm6,%ymm6 6320 vpxor %ymm8,%ymm6,%ymm6 6321 vpsrld $20,%ymm5,%ymm8 6322 vpslld $32-20,%ymm5,%ymm5 6323 vpxor %ymm8,%ymm5,%ymm5 6324 vpsrld $20,%ymm4,%ymm8 6325 vpslld $32-20,%ymm4,%ymm4 6326 vpxor %ymm8,%ymm4,%ymm4 6327 vmovdqa .rol8(%rip),%ymm8 6328 vpaddd %ymm7,%ymm3,%ymm3 6329 vpaddd %ymm6,%ymm2,%ymm2 6330 vpaddd %ymm5,%ymm1,%ymm1 6331 vpaddd %ymm4,%ymm0,%ymm0 6332 vpxor %ymm3,%ymm15,%ymm15 6333 vpxor %ymm2,%ymm14,%ymm14 6334 vpxor %ymm1,%ymm13,%ymm13 6335 vpxor %ymm0,%ymm12,%ymm12 6336 vpshufb %ymm8,%ymm15,%ymm15 6337 vpshufb %ymm8,%ymm14,%ymm14 6338 vpshufb %ymm8,%ymm13,%ymm13 6339 vpshufb %ymm8,%ymm12,%ymm12 6340 vmovdqa 128(%rbp),%ymm8 6341 vpaddd %ymm15,%ymm11,%ymm11 6342 vpaddd %ymm14,%ymm10,%ymm10 6343 vpaddd %ymm13,%ymm9,%ymm9 6344 vpaddd %ymm12,%ymm8,%ymm8 6345 vpxor %ymm11,%ymm7,%ymm7 6346 vpxor %ymm10,%ymm6,%ymm6 6347 vpxor %ymm9,%ymm5,%ymm5 6348 vpxor %ymm8,%ymm4,%ymm4 6349 vmovdqa %ymm8,128(%rbp) 6350 vpsrld $25,%ymm7,%ymm8 6351 vpslld $32-25,%ymm7,%ymm7 6352 vpxor %ymm8,%ymm7,%ymm7 6353 vpsrld $25,%ymm6,%ymm8 6354 vpslld $32-25,%ymm6,%ymm6 6355 vpxor %ymm8,%ymm6,%ymm6 6356 vpsrld $25,%ymm5,%ymm8 6357 vpslld $32-25,%ymm5,%ymm5 6358 vpxor %ymm8,%ymm5,%ymm5 6359 vpsrld $25,%ymm4,%ymm8 6360 vpslld $32-25,%ymm4,%ymm4 6361 vpxor %ymm8,%ymm4,%ymm4 6362 vmovdqa 128(%rbp),%ymm8 6363 vpalignr $4,%ymm7,%ymm7,%ymm7 6364 vpalignr $8,%ymm11,%ymm11,%ymm11 6365 vpalignr $12,%ymm15,%ymm15,%ymm15 6366 vpalignr $4,%ymm6,%ymm6,%ymm6 6367 vpalignr $8,%ymm10,%ymm10,%ymm10 6368 vpalignr $12,%ymm14,%ymm14,%ymm14 6369 vpalignr $4,%ymm5,%ymm5,%ymm5 6370 vpalignr $8,%ymm9,%ymm9,%ymm9 6371 vpalignr $12,%ymm13,%ymm13,%ymm13 6372 vpalignr $4,%ymm4,%ymm4,%ymm4 6373 vpalignr $8,%ymm8,%ymm8,%ymm8 6374 vpalignr $12,%ymm12,%ymm12,%ymm12 6375 vmovdqa %ymm8,128(%rbp) 6376 vmovdqa .rol16(%rip),%ymm8 6377 vpaddd %ymm7,%ymm3,%ymm3 6378 vpaddd %ymm6,%ymm2,%ymm2 6379 vpaddd %ymm5,%ymm1,%ymm1 6380 vpaddd %ymm4,%ymm0,%ymm0 6381 vpxor %ymm3,%ymm15,%ymm15 6382 vpxor %ymm2,%ymm14,%ymm14 6383 vpxor %ymm1,%ymm13,%ymm13 6384 vpxor %ymm0,%ymm12,%ymm12 6385 vpshufb %ymm8,%ymm15,%ymm15 6386 vpshufb %ymm8,%ymm14,%ymm14 6387 vpshufb %ymm8,%ymm13,%ymm13 6388 vpshufb %ymm8,%ymm12,%ymm12 6389 vmovdqa 128(%rbp),%ymm8 6390 vpaddd %ymm15,%ymm11,%ymm11 6391 vpaddd %ymm14,%ymm10,%ymm10 6392 vpaddd %ymm13,%ymm9,%ymm9 6393 vpaddd %ymm12,%ymm8,%ymm8 6394 vpxor %ymm11,%ymm7,%ymm7 6395 vpxor %ymm10,%ymm6,%ymm6 6396 vpxor %ymm9,%ymm5,%ymm5 6397 vpxor %ymm8,%ymm4,%ymm4 6398 vmovdqa %ymm8,128(%rbp) 6399 vpsrld $20,%ymm7,%ymm8 6400 vpslld $32-20,%ymm7,%ymm7 6401 vpxor %ymm8,%ymm7,%ymm7 6402 vpsrld $20,%ymm6,%ymm8 6403 vpslld $32-20,%ymm6,%ymm6 6404 vpxor %ymm8,%ymm6,%ymm6 6405 vpsrld $20,%ymm5,%ymm8 6406 vpslld $32-20,%ymm5,%ymm5 6407 vpxor %ymm8,%ymm5,%ymm5 6408 vpsrld $20,%ymm4,%ymm8 6409 vpslld $32-20,%ymm4,%ymm4 6410 vpxor %ymm8,%ymm4,%ymm4 6411 vmovdqa .rol8(%rip),%ymm8 6412 vpaddd %ymm7,%ymm3,%ymm3 6413 vpaddd %ymm6,%ymm2,%ymm2 6414 vpaddd %ymm5,%ymm1,%ymm1 6415 vpaddd %ymm4,%ymm0,%ymm0 6416 vpxor %ymm3,%ymm15,%ymm15 6417 vpxor %ymm2,%ymm14,%ymm14 6418 vpxor %ymm1,%ymm13,%ymm13 6419 vpxor %ymm0,%ymm12,%ymm12 6420 vpshufb %ymm8,%ymm15,%ymm15 6421 vpshufb %ymm8,%ymm14,%ymm14 6422 vpshufb %ymm8,%ymm13,%ymm13 6423 vpshufb %ymm8,%ymm12,%ymm12 6424 vmovdqa 128(%rbp),%ymm8 6425 vpaddd %ymm15,%ymm11,%ymm11 6426 vpaddd %ymm14,%ymm10,%ymm10 6427 vpaddd %ymm13,%ymm9,%ymm9 6428 vpaddd %ymm12,%ymm8,%ymm8 6429 vpxor %ymm11,%ymm7,%ymm7 6430 vpxor %ymm10,%ymm6,%ymm6 6431 vpxor %ymm9,%ymm5,%ymm5 6432 vpxor %ymm8,%ymm4,%ymm4 6433 vmovdqa %ymm8,128(%rbp) 6434 vpsrld $25,%ymm7,%ymm8 6435 vpslld $32-25,%ymm7,%ymm7 6436 vpxor %ymm8,%ymm7,%ymm7 6437 vpsrld $25,%ymm6,%ymm8 6438 vpslld $32-25,%ymm6,%ymm6 6439 vpxor %ymm8,%ymm6,%ymm6 6440 vpsrld $25,%ymm5,%ymm8 6441 vpslld $32-25,%ymm5,%ymm5 6442 vpxor %ymm8,%ymm5,%ymm5 6443 vpsrld $25,%ymm4,%ymm8 6444 vpslld $32-25,%ymm4,%ymm4 6445 vpxor %ymm8,%ymm4,%ymm4 6446 vmovdqa 128(%rbp),%ymm8 6447 vpalignr $12,%ymm7,%ymm7,%ymm7 6448 vpalignr $8,%ymm11,%ymm11,%ymm11 6449 vpalignr $4,%ymm15,%ymm15,%ymm15 6450 vpalignr $12,%ymm6,%ymm6,%ymm6 6451 vpalignr $8,%ymm10,%ymm10,%ymm10 6452 vpalignr $4,%ymm14,%ymm14,%ymm14 6453 vpalignr $12,%ymm5,%ymm5,%ymm5 6454 vpalignr $8,%ymm9,%ymm9,%ymm9 6455 vpalignr $4,%ymm13,%ymm13,%ymm13 6456 vpalignr $12,%ymm4,%ymm4,%ymm4 6457 vpalignr $8,%ymm8,%ymm8,%ymm8 6458 vpalignr $4,%ymm12,%ymm12,%ymm12 6459 6460 decq %r10 6461 jnz 1b 6462 vpaddd .chacha20_consts(%rip),%ymm3,%ymm3 6463 vpaddd 64(%rbp),%ymm7,%ymm7 6464 vpaddd 96(%rbp),%ymm11,%ymm11 6465 vpaddd 256(%rbp),%ymm15,%ymm15 6466 vpaddd .chacha20_consts(%rip),%ymm2,%ymm2 6467 vpaddd 64(%rbp),%ymm6,%ymm6 6468 vpaddd 96(%rbp),%ymm10,%ymm10 6469 vpaddd 224(%rbp),%ymm14,%ymm14 6470 vpaddd .chacha20_consts(%rip),%ymm1,%ymm1 6471 vpaddd 64(%rbp),%ymm5,%ymm5 6472 vpaddd 96(%rbp),%ymm9,%ymm9 6473 vpaddd 192(%rbp),%ymm13,%ymm13 6474 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0 6475 vpaddd 64(%rbp),%ymm4,%ymm4 6476 vpaddd 96(%rbp),%ymm8,%ymm8 6477 vpaddd 160(%rbp),%ymm12,%ymm12 6478 6479 vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 6480 vperm2i128 $0x02,%ymm3,%ymm7,%ymm15 6481 vperm2i128 $0x13,%ymm3,%ymm7,%ymm3 6482 vpand .clamp(%rip),%ymm15,%ymm15 6483 vmovdqa %ymm15,0(%rbp) 6484 movq %r8,%r8 6485 call poly_hash_ad_internal 6486 6487 vpxor 0(%rsi),%ymm3,%ymm3 6488 vpxor 32(%rsi),%ymm11,%ymm11 6489 vmovdqu %ymm3,0(%rdi) 6490 vmovdqu %ymm11,32(%rdi) 6491 vperm2i128 $0x02,%ymm2,%ymm6,%ymm15 6492 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 6493 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 6494 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 6495 vpxor 0+64(%rsi),%ymm15,%ymm15 6496 vpxor 32+64(%rsi),%ymm2,%ymm2 6497 vpxor 64+64(%rsi),%ymm6,%ymm6 6498 vpxor 96+64(%rsi),%ymm10,%ymm10 6499 vmovdqu %ymm15,0+64(%rdi) 6500 vmovdqu %ymm2,32+64(%rdi) 6501 vmovdqu %ymm6,64+64(%rdi) 6502 vmovdqu %ymm10,96+64(%rdi) 6503 vperm2i128 $0x02,%ymm1,%ymm5,%ymm15 6504 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 6505 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 6506 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 6507 vpxor 0+192(%rsi),%ymm15,%ymm15 6508 vpxor 32+192(%rsi),%ymm1,%ymm1 6509 vpxor 64+192(%rsi),%ymm5,%ymm5 6510 vpxor 96+192(%rsi),%ymm9,%ymm9 6511 vmovdqu %ymm15,0+192(%rdi) 6512 vmovdqu %ymm1,32+192(%rdi) 6513 vmovdqu %ymm5,64+192(%rdi) 6514 vmovdqu %ymm9,96+192(%rdi) 6515 vperm2i128 $0x13,%ymm0,%ymm4,%ymm15 6516 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 6517 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 6518 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 6519 vmovdqa %ymm15,%ymm8 6520 6521 leaq 320(%rsi),%rsi 6522 subq $320,%rbx 6523 movq $320,%rcx 6524 cmpq $128,%rbx 6525 jbe seal_avx2_hash 6526 vpxor 0(%rsi),%ymm0,%ymm0 6527 vpxor 32(%rsi),%ymm4,%ymm4 6528 vpxor 64(%rsi),%ymm8,%ymm8 6529 vpxor 96(%rsi),%ymm12,%ymm12 6530 vmovdqu %ymm0,320(%rdi) 6531 vmovdqu %ymm4,352(%rdi) 6532 vmovdqu %ymm8,384(%rdi) 6533 vmovdqu %ymm12,416(%rdi) 6534 leaq 128(%rsi),%rsi 6535 subq $128,%rbx 6536 movq $8,%rcx 6537 movq $2,%r8 6538 cmpq $128,%rbx 6539 jbe seal_avx2_tail_128 6540 cmpq $256,%rbx 6541 jbe seal_avx2_tail_256 6542 cmpq $384,%rbx 6543 jbe seal_avx2_tail_384 6544 cmpq $512,%rbx 6545 jbe seal_avx2_tail_512 6546 vmovdqa .chacha20_consts(%rip),%ymm0 6547 vmovdqa 64(%rbp),%ymm4 6548 vmovdqa 96(%rbp),%ymm8 6549 vmovdqa %ymm0,%ymm1 6550 vmovdqa %ymm4,%ymm5 6551 vmovdqa %ymm8,%ymm9 6552 vmovdqa %ymm0,%ymm2 6553 vmovdqa %ymm4,%ymm6 6554 vmovdqa %ymm8,%ymm10 6555 vmovdqa %ymm0,%ymm3 6556 vmovdqa %ymm4,%ymm7 6557 vmovdqa %ymm8,%ymm11 6558 vmovdqa .avx2_inc(%rip),%ymm12 6559 vpaddd 160(%rbp),%ymm12,%ymm15 6560 vpaddd %ymm15,%ymm12,%ymm14 6561 vpaddd %ymm14,%ymm12,%ymm13 6562 vpaddd %ymm13,%ymm12,%ymm12 6563 vmovdqa %ymm15,256(%rbp) 6564 vmovdqa %ymm14,224(%rbp) 6565 vmovdqa %ymm13,192(%rbp) 6566 vmovdqa %ymm12,160(%rbp) 6567 vmovdqa %ymm8,128(%rbp) 6568 vmovdqa .rol16(%rip),%ymm8 6569 vpaddd %ymm7,%ymm3,%ymm3 6570 vpaddd %ymm6,%ymm2,%ymm2 6571 vpaddd %ymm5,%ymm1,%ymm1 6572 vpaddd %ymm4,%ymm0,%ymm0 6573 vpxor %ymm3,%ymm15,%ymm15 6574 vpxor %ymm2,%ymm14,%ymm14 6575 vpxor %ymm1,%ymm13,%ymm13 6576 vpxor %ymm0,%ymm12,%ymm12 6577 vpshufb %ymm8,%ymm15,%ymm15 6578 vpshufb %ymm8,%ymm14,%ymm14 6579 vpshufb %ymm8,%ymm13,%ymm13 6580 vpshufb %ymm8,%ymm12,%ymm12 6581 vmovdqa 128(%rbp),%ymm8 6582 vpaddd %ymm15,%ymm11,%ymm11 6583 vpaddd %ymm14,%ymm10,%ymm10 6584 vpaddd %ymm13,%ymm9,%ymm9 6585 vpaddd %ymm12,%ymm8,%ymm8 6586 vpxor %ymm11,%ymm7,%ymm7 6587 vpxor %ymm10,%ymm6,%ymm6 6588 vpxor %ymm9,%ymm5,%ymm5 6589 vpxor %ymm8,%ymm4,%ymm4 6590 vmovdqa %ymm8,128(%rbp) 6591 vpsrld $20,%ymm7,%ymm8 6592 vpslld $32-20,%ymm7,%ymm7 6593 vpxor %ymm8,%ymm7,%ymm7 6594 vpsrld $20,%ymm6,%ymm8 6595 vpslld $32-20,%ymm6,%ymm6 6596 vpxor %ymm8,%ymm6,%ymm6 6597 vpsrld $20,%ymm5,%ymm8 6598 vpslld $32-20,%ymm5,%ymm5 6599 vpxor %ymm8,%ymm5,%ymm5 6600 vpsrld $20,%ymm4,%ymm8 6601 vpslld $32-20,%ymm4,%ymm4 6602 vpxor %ymm8,%ymm4,%ymm4 6603 vmovdqa .rol8(%rip),%ymm8 6604 vpaddd %ymm7,%ymm3,%ymm3 6605 vpaddd %ymm6,%ymm2,%ymm2 6606 vpaddd %ymm5,%ymm1,%ymm1 6607 vpaddd %ymm4,%ymm0,%ymm0 6608 vpxor %ymm3,%ymm15,%ymm15 6609 vpxor %ymm2,%ymm14,%ymm14 6610 vpxor %ymm1,%ymm13,%ymm13 6611 vpxor %ymm0,%ymm12,%ymm12 6612 vpshufb %ymm8,%ymm15,%ymm15 6613 vpshufb %ymm8,%ymm14,%ymm14 6614 vpshufb %ymm8,%ymm13,%ymm13 6615 vpshufb %ymm8,%ymm12,%ymm12 6616 vmovdqa 128(%rbp),%ymm8 6617 vpaddd %ymm15,%ymm11,%ymm11 6618 vpaddd %ymm14,%ymm10,%ymm10 6619 vpaddd %ymm13,%ymm9,%ymm9 6620 vpaddd %ymm12,%ymm8,%ymm8 6621 vpxor %ymm11,%ymm7,%ymm7 6622 vpxor %ymm10,%ymm6,%ymm6 6623 vpxor %ymm9,%ymm5,%ymm5 6624 vpxor %ymm8,%ymm4,%ymm4 6625 vmovdqa %ymm8,128(%rbp) 6626 vpsrld $25,%ymm7,%ymm8 6627 vpslld $32-25,%ymm7,%ymm7 6628 vpxor %ymm8,%ymm7,%ymm7 6629 vpsrld $25,%ymm6,%ymm8 6630 vpslld $32-25,%ymm6,%ymm6 6631 vpxor %ymm8,%ymm6,%ymm6 6632 vpsrld $25,%ymm5,%ymm8 6633 vpslld $32-25,%ymm5,%ymm5 6634 vpxor %ymm8,%ymm5,%ymm5 6635 vpsrld $25,%ymm4,%ymm8 6636 vpslld $32-25,%ymm4,%ymm4 6637 vpxor %ymm8,%ymm4,%ymm4 6638 vmovdqa 128(%rbp),%ymm8 6639 vpalignr $4,%ymm7,%ymm7,%ymm7 6640 vpalignr $8,%ymm11,%ymm11,%ymm11 6641 vpalignr $12,%ymm15,%ymm15,%ymm15 6642 vpalignr $4,%ymm6,%ymm6,%ymm6 6643 vpalignr $8,%ymm10,%ymm10,%ymm10 6644 vpalignr $12,%ymm14,%ymm14,%ymm14 6645 vpalignr $4,%ymm5,%ymm5,%ymm5 6646 vpalignr $8,%ymm9,%ymm9,%ymm9 6647 vpalignr $12,%ymm13,%ymm13,%ymm13 6648 vpalignr $4,%ymm4,%ymm4,%ymm4 6649 vpalignr $8,%ymm8,%ymm8,%ymm8 6650 vpalignr $12,%ymm12,%ymm12,%ymm12 6651 vmovdqa %ymm8,128(%rbp) 6652 vmovdqa .rol16(%rip),%ymm8 6653 vpaddd %ymm7,%ymm3,%ymm3 6654 vpaddd %ymm6,%ymm2,%ymm2 6655 vpaddd %ymm5,%ymm1,%ymm1 6656 vpaddd %ymm4,%ymm0,%ymm0 6657 vpxor %ymm3,%ymm15,%ymm15 6658 vpxor %ymm2,%ymm14,%ymm14 6659 vpxor %ymm1,%ymm13,%ymm13 6660 vpxor %ymm0,%ymm12,%ymm12 6661 vpshufb %ymm8,%ymm15,%ymm15 6662 vpshufb %ymm8,%ymm14,%ymm14 6663 vpshufb %ymm8,%ymm13,%ymm13 6664 vpshufb %ymm8,%ymm12,%ymm12 6665 vmovdqa 128(%rbp),%ymm8 6666 vpaddd %ymm15,%ymm11,%ymm11 6667 vpaddd %ymm14,%ymm10,%ymm10 6668 vpaddd %ymm13,%ymm9,%ymm9 6669 vpaddd %ymm12,%ymm8,%ymm8 6670 vpxor %ymm11,%ymm7,%ymm7 6671 vpxor %ymm10,%ymm6,%ymm6 6672 vpxor %ymm9,%ymm5,%ymm5 6673 vpxor %ymm8,%ymm4,%ymm4 6674 vmovdqa %ymm8,128(%rbp) 6675 vpsrld $20,%ymm7,%ymm8 6676 vpslld $32-20,%ymm7,%ymm7 6677 vpxor %ymm8,%ymm7,%ymm7 6678 vpsrld $20,%ymm6,%ymm8 6679 vpslld $32-20,%ymm6,%ymm6 6680 vpxor %ymm8,%ymm6,%ymm6 6681 vpsrld $20,%ymm5,%ymm8 6682 vpslld $32-20,%ymm5,%ymm5 6683 vpxor %ymm8,%ymm5,%ymm5 6684 vpsrld $20,%ymm4,%ymm8 6685 vpslld $32-20,%ymm4,%ymm4 6686 vpxor %ymm8,%ymm4,%ymm4 6687 vmovdqa .rol8(%rip),%ymm8 6688 vpaddd %ymm7,%ymm3,%ymm3 6689 vpaddd %ymm6,%ymm2,%ymm2 6690 vpaddd %ymm5,%ymm1,%ymm1 6691 vpaddd %ymm4,%ymm0,%ymm0 6692 vpxor %ymm3,%ymm15,%ymm15 6693 vpxor %ymm2,%ymm14,%ymm14 6694 vpxor %ymm1,%ymm13,%ymm13 6695 vpxor %ymm0,%ymm12,%ymm12 6696 vpshufb %ymm8,%ymm15,%ymm15 6697 vpshufb %ymm8,%ymm14,%ymm14 6698 vpshufb %ymm8,%ymm13,%ymm13 6699 vpshufb %ymm8,%ymm12,%ymm12 6700 vmovdqa 128(%rbp),%ymm8 6701 vpaddd %ymm15,%ymm11,%ymm11 6702 vpaddd %ymm14,%ymm10,%ymm10 6703 vpaddd %ymm13,%ymm9,%ymm9 6704 vpaddd %ymm12,%ymm8,%ymm8 6705 vpxor %ymm11,%ymm7,%ymm7 6706 vpxor %ymm10,%ymm6,%ymm6 6707 vpxor %ymm9,%ymm5,%ymm5 6708 vpxor %ymm8,%ymm4,%ymm4 6709 vmovdqa %ymm8,128(%rbp) 6710 vpsrld $25,%ymm7,%ymm8 6711 vpslld $32-25,%ymm7,%ymm7 6712 vpxor %ymm8,%ymm7,%ymm7 6713 vpsrld $25,%ymm6,%ymm8 6714 vpslld $32-25,%ymm6,%ymm6 6715 vpxor %ymm8,%ymm6,%ymm6 6716 vpsrld $25,%ymm5,%ymm8 6717 vpslld $32-25,%ymm5,%ymm5 6718 vpxor %ymm8,%ymm5,%ymm5 6719 vpsrld $25,%ymm4,%ymm8 6720 vpslld $32-25,%ymm4,%ymm4 6721 vpxor %ymm8,%ymm4,%ymm4 6722 vmovdqa 128(%rbp),%ymm8 6723 vpalignr $12,%ymm7,%ymm7,%ymm7 6724 vpalignr $8,%ymm11,%ymm11,%ymm11 6725 vpalignr $4,%ymm15,%ymm15,%ymm15 6726 vpalignr $12,%ymm6,%ymm6,%ymm6 6727 vpalignr $8,%ymm10,%ymm10,%ymm10 6728 vpalignr $4,%ymm14,%ymm14,%ymm14 6729 vpalignr $12,%ymm5,%ymm5,%ymm5 6730 vpalignr $8,%ymm9,%ymm9,%ymm9 6731 vpalignr $4,%ymm13,%ymm13,%ymm13 6732 vpalignr $12,%ymm4,%ymm4,%ymm4 6733 vpalignr $8,%ymm8,%ymm8,%ymm8 6734 vpalignr $4,%ymm12,%ymm12,%ymm12 6735 vmovdqa %ymm8,128(%rbp) 6736 vmovdqa .rol16(%rip),%ymm8 6737 vpaddd %ymm7,%ymm3,%ymm3 6738 vpaddd %ymm6,%ymm2,%ymm2 6739 vpaddd %ymm5,%ymm1,%ymm1 6740 vpaddd %ymm4,%ymm0,%ymm0 6741 vpxor %ymm3,%ymm15,%ymm15 6742 vpxor %ymm2,%ymm14,%ymm14 6743 vpxor %ymm1,%ymm13,%ymm13 6744 vpxor %ymm0,%ymm12,%ymm12 6745 vpshufb %ymm8,%ymm15,%ymm15 6746 vpshufb %ymm8,%ymm14,%ymm14 6747 vpshufb %ymm8,%ymm13,%ymm13 6748 vpshufb %ymm8,%ymm12,%ymm12 6749 vmovdqa 128(%rbp),%ymm8 6750 vpaddd %ymm15,%ymm11,%ymm11 6751 vpaddd %ymm14,%ymm10,%ymm10 6752 vpaddd %ymm13,%ymm9,%ymm9 6753 vpaddd %ymm12,%ymm8,%ymm8 6754 vpxor %ymm11,%ymm7,%ymm7 6755 vpxor %ymm10,%ymm6,%ymm6 6756 vpxor %ymm9,%ymm5,%ymm5 6757 vpxor %ymm8,%ymm4,%ymm4 6758 vmovdqa %ymm8,128(%rbp) 6759 vpsrld $20,%ymm7,%ymm8 6760 vpslld $32-20,%ymm7,%ymm7 6761 vpxor %ymm8,%ymm7,%ymm7 6762 vpsrld $20,%ymm6,%ymm8 6763 vpslld $32-20,%ymm6,%ymm6 6764 vpxor %ymm8,%ymm6,%ymm6 6765 vpsrld $20,%ymm5,%ymm8 6766 vpslld $32-20,%ymm5,%ymm5 6767 vpxor %ymm8,%ymm5,%ymm5 6768 vpsrld $20,%ymm4,%ymm8 6769 vpslld $32-20,%ymm4,%ymm4 6770 vpxor %ymm8,%ymm4,%ymm4 6771 vmovdqa .rol8(%rip),%ymm8 6772 vpaddd %ymm7,%ymm3,%ymm3 6773 vpaddd %ymm6,%ymm2,%ymm2 6774 vpaddd %ymm5,%ymm1,%ymm1 6775 vpaddd %ymm4,%ymm0,%ymm0 6776 6777 subq $16,%rdi 6778 movq $9,%rcx 6779 jmp 4f 6780 1: 6781 vmovdqa .chacha20_consts(%rip),%ymm0 6782 vmovdqa 64(%rbp),%ymm4 6783 vmovdqa 96(%rbp),%ymm8 6784 vmovdqa %ymm0,%ymm1 6785 vmovdqa %ymm4,%ymm5 6786 vmovdqa %ymm8,%ymm9 6787 vmovdqa %ymm0,%ymm2 6788 vmovdqa %ymm4,%ymm6 6789 vmovdqa %ymm8,%ymm10 6790 vmovdqa %ymm0,%ymm3 6791 vmovdqa %ymm4,%ymm7 6792 vmovdqa %ymm8,%ymm11 6793 vmovdqa .avx2_inc(%rip),%ymm12 6794 vpaddd 160(%rbp),%ymm12,%ymm15 6795 vpaddd %ymm15,%ymm12,%ymm14 6796 vpaddd %ymm14,%ymm12,%ymm13 6797 vpaddd %ymm13,%ymm12,%ymm12 6798 vmovdqa %ymm15,256(%rbp) 6799 vmovdqa %ymm14,224(%rbp) 6800 vmovdqa %ymm13,192(%rbp) 6801 vmovdqa %ymm12,160(%rbp) 6802 6803 movq $10,%rcx 6804 2: 6805 addq 0(%rdi),%r10 6806 adcq 8+0(%rdi),%r11 6807 adcq $1,%r12 6808 vmovdqa %ymm8,128(%rbp) 6809 vmovdqa .rol16(%rip),%ymm8 6810 vpaddd %ymm7,%ymm3,%ymm3 6811 vpaddd %ymm6,%ymm2,%ymm2 6812 vpaddd %ymm5,%ymm1,%ymm1 6813 vpaddd %ymm4,%ymm0,%ymm0 6814 vpxor %ymm3,%ymm15,%ymm15 6815 vpxor %ymm2,%ymm14,%ymm14 6816 vpxor %ymm1,%ymm13,%ymm13 6817 vpxor %ymm0,%ymm12,%ymm12 6818 movq 0+0(%rbp),%rdx 6819 movq %rdx,%r15 6820 mulxq %r10,%r13,%r14 6821 mulxq %r11,%rax,%rdx 6822 imulq %r12,%r15 6823 addq %rax,%r14 6824 adcq %rdx,%r15 6825 vpshufb %ymm8,%ymm15,%ymm15 6826 vpshufb %ymm8,%ymm14,%ymm14 6827 vpshufb %ymm8,%ymm13,%ymm13 6828 vpshufb %ymm8,%ymm12,%ymm12 6829 vmovdqa 128(%rbp),%ymm8 6830 vpaddd %ymm15,%ymm11,%ymm11 6831 vpaddd %ymm14,%ymm10,%ymm10 6832 vpaddd %ymm13,%ymm9,%ymm9 6833 vpaddd %ymm12,%ymm8,%ymm8 6834 movq 8+0(%rbp),%rdx 6835 mulxq %r10,%r10,%rax 6836 addq %r10,%r14 6837 mulxq %r11,%r11,%r9 6838 adcq %r11,%r15 6839 adcq $0,%r9 6840 imulq %r12,%rdx 6841 vpxor %ymm11,%ymm7,%ymm7 6842 vpxor %ymm10,%ymm6,%ymm6 6843 vpxor %ymm9,%ymm5,%ymm5 6844 vpxor %ymm8,%ymm4,%ymm4 6845 vmovdqa %ymm8,128(%rbp) 6846 vpsrld $20,%ymm7,%ymm8 6847 vpslld $32-20,%ymm7,%ymm7 6848 vpxor %ymm8,%ymm7,%ymm7 6849 vpsrld $20,%ymm6,%ymm8 6850 vpslld $32-20,%ymm6,%ymm6 6851 vpxor %ymm8,%ymm6,%ymm6 6852 vpsrld $20,%ymm5,%ymm8 6853 addq %rax,%r15 6854 adcq %rdx,%r9 6855 vpslld $32-20,%ymm5,%ymm5 6856 vpxor %ymm8,%ymm5,%ymm5 6857 vpsrld $20,%ymm4,%ymm8 6858 vpslld $32-20,%ymm4,%ymm4 6859 vpxor %ymm8,%ymm4,%ymm4 6860 vmovdqa .rol8(%rip),%ymm8 6861 vpaddd %ymm7,%ymm3,%ymm3 6862 vpaddd %ymm6,%ymm2,%ymm2 6863 vpaddd %ymm5,%ymm1,%ymm1 6864 vpaddd %ymm4,%ymm0,%ymm0 6865 movq %r13,%r10 6866 movq %r14,%r11 6867 movq %r15,%r12 6868 andq $3,%r12 6869 movq %r15,%r13 6870 andq $-4,%r13 6871 movq %r9,%r14 6872 shrdq $2,%r9,%r15 6873 shrq $2,%r9 6874 addq %r13,%r10 6875 adcq %r14,%r11 6876 adcq $0,%r12 6877 addq %r15,%r10 6878 adcq %r9,%r11 6879 adcq $0,%r12 6880 6881 4: 6882 vpxor %ymm3,%ymm15,%ymm15 6883 vpxor %ymm2,%ymm14,%ymm14 6884 vpxor %ymm1,%ymm13,%ymm13 6885 vpxor %ymm0,%ymm12,%ymm12 6886 vpshufb %ymm8,%ymm15,%ymm15 6887 vpshufb %ymm8,%ymm14,%ymm14 6888 vpshufb %ymm8,%ymm13,%ymm13 6889 vpshufb %ymm8,%ymm12,%ymm12 6890 vmovdqa 128(%rbp),%ymm8 6891 addq 16(%rdi),%r10 6892 adcq 8+16(%rdi),%r11 6893 adcq $1,%r12 6894 vpaddd %ymm15,%ymm11,%ymm11 6895 vpaddd %ymm14,%ymm10,%ymm10 6896 vpaddd %ymm13,%ymm9,%ymm9 6897 vpaddd %ymm12,%ymm8,%ymm8 6898 vpxor %ymm11,%ymm7,%ymm7 6899 vpxor %ymm10,%ymm6,%ymm6 6900 vpxor %ymm9,%ymm5,%ymm5 6901 vpxor %ymm8,%ymm4,%ymm4 6902 movq 0+0(%rbp),%rdx 6903 movq %rdx,%r15 6904 mulxq %r10,%r13,%r14 6905 mulxq %r11,%rax,%rdx 6906 imulq %r12,%r15 6907 addq %rax,%r14 6908 adcq %rdx,%r15 6909 vmovdqa %ymm8,128(%rbp) 6910 vpsrld $25,%ymm7,%ymm8 6911 vpslld $32-25,%ymm7,%ymm7 6912 vpxor %ymm8,%ymm7,%ymm7 6913 vpsrld $25,%ymm6,%ymm8 6914 vpslld $32-25,%ymm6,%ymm6 6915 vpxor %ymm8,%ymm6,%ymm6 6916 vpsrld $25,%ymm5,%ymm8 6917 vpslld $32-25,%ymm5,%ymm5 6918 vpxor %ymm8,%ymm5,%ymm5 6919 vpsrld $25,%ymm4,%ymm8 6920 vpslld $32-25,%ymm4,%ymm4 6921 vpxor %ymm8,%ymm4,%ymm4 6922 vmovdqa 128(%rbp),%ymm8 6923 vpalignr $4,%ymm7,%ymm7,%ymm7 6924 vpalignr $8,%ymm11,%ymm11,%ymm11 6925 vpalignr $12,%ymm15,%ymm15,%ymm15 6926 vpalignr $4,%ymm6,%ymm6,%ymm6 6927 movq 8+0(%rbp),%rdx 6928 mulxq %r10,%r10,%rax 6929 addq %r10,%r14 6930 mulxq %r11,%r11,%r9 6931 adcq %r11,%r15 6932 adcq $0,%r9 6933 imulq %r12,%rdx 6934 vpalignr $8,%ymm10,%ymm10,%ymm10 6935 vpalignr $12,%ymm14,%ymm14,%ymm14 6936 vpalignr $4,%ymm5,%ymm5,%ymm5 6937 vpalignr $8,%ymm9,%ymm9,%ymm9 6938 vpalignr $12,%ymm13,%ymm13,%ymm13 6939 vpalignr $4,%ymm4,%ymm4,%ymm4 6940 vpalignr $8,%ymm8,%ymm8,%ymm8 6941 vpalignr $12,%ymm12,%ymm12,%ymm12 6942 vmovdqa %ymm8,128(%rbp) 6943 vmovdqa .rol16(%rip),%ymm8 6944 vpaddd %ymm7,%ymm3,%ymm3 6945 vpaddd %ymm6,%ymm2,%ymm2 6946 vpaddd %ymm5,%ymm1,%ymm1 6947 vpaddd %ymm4,%ymm0,%ymm0 6948 vpxor %ymm3,%ymm15,%ymm15 6949 vpxor %ymm2,%ymm14,%ymm14 6950 vpxor %ymm1,%ymm13,%ymm13 6951 vpxor %ymm0,%ymm12,%ymm12 6952 addq %rax,%r15 6953 adcq %rdx,%r9 6954 vpshufb %ymm8,%ymm15,%ymm15 6955 vpshufb %ymm8,%ymm14,%ymm14 6956 vpshufb %ymm8,%ymm13,%ymm13 6957 vpshufb %ymm8,%ymm12,%ymm12 6958 vmovdqa 128(%rbp),%ymm8 6959 vpaddd %ymm15,%ymm11,%ymm11 6960 vpaddd %ymm14,%ymm10,%ymm10 6961 vpaddd %ymm13,%ymm9,%ymm9 6962 vpaddd %ymm12,%ymm8,%ymm8 6963 movq %r13,%r10 6964 movq %r14,%r11 6965 movq %r15,%r12 6966 andq $3,%r12 6967 movq %r15,%r13 6968 andq $-4,%r13 6969 movq %r9,%r14 6970 shrdq $2,%r9,%r15 6971 shrq $2,%r9 6972 addq %r13,%r10 6973 adcq %r14,%r11 6974 adcq $0,%r12 6975 addq %r15,%r10 6976 adcq %r9,%r11 6977 adcq $0,%r12 6978 vpxor %ymm11,%ymm7,%ymm7 6979 vpxor %ymm10,%ymm6,%ymm6 6980 vpxor %ymm9,%ymm5,%ymm5 6981 vpxor %ymm8,%ymm4,%ymm4 6982 vmovdqa %ymm8,128(%rbp) 6983 vpsrld $20,%ymm7,%ymm8 6984 vpslld $32-20,%ymm7,%ymm7 6985 vpxor %ymm8,%ymm7,%ymm7 6986 addq 32(%rdi),%r10 6987 adcq 8+32(%rdi),%r11 6988 adcq $1,%r12 6989 6990 leaq 48(%rdi),%rdi 6991 vpsrld $20,%ymm6,%ymm8 6992 vpslld $32-20,%ymm6,%ymm6 6993 vpxor %ymm8,%ymm6,%ymm6 6994 vpsrld $20,%ymm5,%ymm8 6995 vpslld $32-20,%ymm5,%ymm5 6996 vpxor %ymm8,%ymm5,%ymm5 6997 vpsrld $20,%ymm4,%ymm8 6998 vpslld $32-20,%ymm4,%ymm4 6999 vpxor %ymm8,%ymm4,%ymm4 7000 vmovdqa .rol8(%rip),%ymm8 7001 vpaddd %ymm7,%ymm3,%ymm3 7002 vpaddd %ymm6,%ymm2,%ymm2 7003 vpaddd %ymm5,%ymm1,%ymm1 7004 vpaddd %ymm4,%ymm0,%ymm0 7005 vpxor %ymm3,%ymm15,%ymm15 7006 vpxor %ymm2,%ymm14,%ymm14 7007 vpxor %ymm1,%ymm13,%ymm13 7008 vpxor %ymm0,%ymm12,%ymm12 7009 movq 0+0(%rbp),%rdx 7010 movq %rdx,%r15 7011 mulxq %r10,%r13,%r14 7012 mulxq %r11,%rax,%rdx 7013 imulq %r12,%r15 7014 addq %rax,%r14 7015 adcq %rdx,%r15 7016 vpshufb %ymm8,%ymm15,%ymm15 7017 vpshufb %ymm8,%ymm14,%ymm14 7018 vpshufb %ymm8,%ymm13,%ymm13 7019 vpshufb %ymm8,%ymm12,%ymm12 7020 vmovdqa 128(%rbp),%ymm8 7021 vpaddd %ymm15,%ymm11,%ymm11 7022 vpaddd %ymm14,%ymm10,%ymm10 7023 vpaddd %ymm13,%ymm9,%ymm9 7024 movq 8+0(%rbp),%rdx 7025 mulxq %r10,%r10,%rax 7026 addq %r10,%r14 7027 mulxq %r11,%r11,%r9 7028 adcq %r11,%r15 7029 adcq $0,%r9 7030 imulq %r12,%rdx 7031 vpaddd %ymm12,%ymm8,%ymm8 7032 vpxor %ymm11,%ymm7,%ymm7 7033 vpxor %ymm10,%ymm6,%ymm6 7034 vpxor %ymm9,%ymm5,%ymm5 7035 vpxor %ymm8,%ymm4,%ymm4 7036 vmovdqa %ymm8,128(%rbp) 7037 vpsrld $25,%ymm7,%ymm8 7038 vpslld $32-25,%ymm7,%ymm7 7039 addq %rax,%r15 7040 adcq %rdx,%r9 7041 vpxor %ymm8,%ymm7,%ymm7 7042 vpsrld $25,%ymm6,%ymm8 7043 vpslld $32-25,%ymm6,%ymm6 7044 vpxor %ymm8,%ymm6,%ymm6 7045 vpsrld $25,%ymm5,%ymm8 7046 vpslld $32-25,%ymm5,%ymm5 7047 vpxor %ymm8,%ymm5,%ymm5 7048 vpsrld $25,%ymm4,%ymm8 7049 vpslld $32-25,%ymm4,%ymm4 7050 vpxor %ymm8,%ymm4,%ymm4 7051 vmovdqa 128(%rbp),%ymm8 7052 vpalignr $12,%ymm7,%ymm7,%ymm7 7053 vpalignr $8,%ymm11,%ymm11,%ymm11 7054 vpalignr $4,%ymm15,%ymm15,%ymm15 7055 vpalignr $12,%ymm6,%ymm6,%ymm6 7056 vpalignr $8,%ymm10,%ymm10,%ymm10 7057 vpalignr $4,%ymm14,%ymm14,%ymm14 7058 vpalignr $12,%ymm5,%ymm5,%ymm5 7059 movq %r13,%r10 7060 movq %r14,%r11 7061 movq %r15,%r12 7062 andq $3,%r12 7063 movq %r15,%r13 7064 andq $-4,%r13 7065 movq %r9,%r14 7066 shrdq $2,%r9,%r15 7067 shrq $2,%r9 7068 addq %r13,%r10 7069 adcq %r14,%r11 7070 adcq $0,%r12 7071 addq %r15,%r10 7072 adcq %r9,%r11 7073 adcq $0,%r12 7074 vpalignr $8,%ymm9,%ymm9,%ymm9 7075 vpalignr $4,%ymm13,%ymm13,%ymm13 7076 vpalignr $12,%ymm4,%ymm4,%ymm4 7077 vpalignr $8,%ymm8,%ymm8,%ymm8 7078 vpalignr $4,%ymm12,%ymm12,%ymm12 7079 7080 decq %rcx 7081 jne 2b 7082 vpaddd .chacha20_consts(%rip),%ymm3,%ymm3 7083 vpaddd 64(%rbp),%ymm7,%ymm7 7084 vpaddd 96(%rbp),%ymm11,%ymm11 7085 vpaddd 256(%rbp),%ymm15,%ymm15 7086 vpaddd .chacha20_consts(%rip),%ymm2,%ymm2 7087 vpaddd 64(%rbp),%ymm6,%ymm6 7088 vpaddd 96(%rbp),%ymm10,%ymm10 7089 vpaddd 224(%rbp),%ymm14,%ymm14 7090 vpaddd .chacha20_consts(%rip),%ymm1,%ymm1 7091 vpaddd 64(%rbp),%ymm5,%ymm5 7092 vpaddd 96(%rbp),%ymm9,%ymm9 7093 vpaddd 192(%rbp),%ymm13,%ymm13 7094 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0 7095 vpaddd 64(%rbp),%ymm4,%ymm4 7096 vpaddd 96(%rbp),%ymm8,%ymm8 7097 vpaddd 160(%rbp),%ymm12,%ymm12 7098 7099 leaq 32(%rdi),%rdi 7100 vmovdqa %ymm0,128(%rbp) 7101 addq -32(%rdi),%r10 7102 adcq 8+-32(%rdi),%r11 7103 adcq $1,%r12 7104 vperm2i128 $0x02,%ymm3,%ymm7,%ymm0 7105 vperm2i128 $0x13,%ymm3,%ymm7,%ymm7 7106 vperm2i128 $0x02,%ymm11,%ymm15,%ymm3 7107 vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 7108 vpxor 0+0(%rsi),%ymm0,%ymm0 7109 vpxor 32+0(%rsi),%ymm3,%ymm3 7110 vpxor 64+0(%rsi),%ymm7,%ymm7 7111 vpxor 96+0(%rsi),%ymm11,%ymm11 7112 vmovdqu %ymm0,0+0(%rdi) 7113 vmovdqu %ymm3,32+0(%rdi) 7114 vmovdqu %ymm7,64+0(%rdi) 7115 vmovdqu %ymm11,96+0(%rdi) 7116 7117 vmovdqa 128(%rbp),%ymm0 7118 movq 0+0(%rbp),%rax 7119 movq %rax,%r15 7120 mulq %r10 7121 movq %rax,%r13 7122 movq %rdx,%r14 7123 movq 0+0(%rbp),%rax 7124 mulq %r11 7125 imulq %r12,%r15 7126 addq %rax,%r14 7127 adcq %rdx,%r15 7128 movq 8+0(%rbp),%rax 7129 movq %rax,%r9 7130 mulq %r10 7131 addq %rax,%r14 7132 adcq $0,%rdx 7133 movq %rdx,%r10 7134 movq 8+0(%rbp),%rax 7135 mulq %r11 7136 addq %rax,%r15 7137 adcq $0,%rdx 7138 imulq %r12,%r9 7139 addq %r10,%r15 7140 adcq %rdx,%r9 7141 movq %r13,%r10 7142 movq %r14,%r11 7143 movq %r15,%r12 7144 andq $3,%r12 7145 movq %r15,%r13 7146 andq $-4,%r13 7147 movq %r9,%r14 7148 shrdq $2,%r9,%r15 7149 shrq $2,%r9 7150 addq %r13,%r10 7151 adcq %r14,%r11 7152 adcq $0,%r12 7153 addq %r15,%r10 7154 adcq %r9,%r11 7155 adcq $0,%r12 7156 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 7157 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 7158 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 7159 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 7160 vpxor 0+128(%rsi),%ymm3,%ymm3 7161 vpxor 32+128(%rsi),%ymm2,%ymm2 7162 vpxor 64+128(%rsi),%ymm6,%ymm6 7163 vpxor 96+128(%rsi),%ymm10,%ymm10 7164 vmovdqu %ymm3,0+128(%rdi) 7165 vmovdqu %ymm2,32+128(%rdi) 7166 vmovdqu %ymm6,64+128(%rdi) 7167 vmovdqu %ymm10,96+128(%rdi) 7168 addq -16(%rdi),%r10 7169 adcq 8+-16(%rdi),%r11 7170 adcq $1,%r12 7171 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 7172 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 7173 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 7174 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 7175 vpxor 0+256(%rsi),%ymm3,%ymm3 7176 vpxor 32+256(%rsi),%ymm1,%ymm1 7177 vpxor 64+256(%rsi),%ymm5,%ymm5 7178 vpxor 96+256(%rsi),%ymm9,%ymm9 7179 vmovdqu %ymm3,0+256(%rdi) 7180 vmovdqu %ymm1,32+256(%rdi) 7181 vmovdqu %ymm5,64+256(%rdi) 7182 vmovdqu %ymm9,96+256(%rdi) 7183 movq 0+0(%rbp),%rax 7184 movq %rax,%r15 7185 mulq %r10 7186 movq %rax,%r13 7187 movq %rdx,%r14 7188 movq 0+0(%rbp),%rax 7189 mulq %r11 7190 imulq %r12,%r15 7191 addq %rax,%r14 7192 adcq %rdx,%r15 7193 movq 8+0(%rbp),%rax 7194 movq %rax,%r9 7195 mulq %r10 7196 addq %rax,%r14 7197 adcq $0,%rdx 7198 movq %rdx,%r10 7199 movq 8+0(%rbp),%rax 7200 mulq %r11 7201 addq %rax,%r15 7202 adcq $0,%rdx 7203 imulq %r12,%r9 7204 addq %r10,%r15 7205 adcq %rdx,%r9 7206 movq %r13,%r10 7207 movq %r14,%r11 7208 movq %r15,%r12 7209 andq $3,%r12 7210 movq %r15,%r13 7211 andq $-4,%r13 7212 movq %r9,%r14 7213 shrdq $2,%r9,%r15 7214 shrq $2,%r9 7215 addq %r13,%r10 7216 adcq %r14,%r11 7217 adcq $0,%r12 7218 addq %r15,%r10 7219 adcq %r9,%r11 7220 adcq $0,%r12 7221 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 7222 vperm2i128 $0x13,%ymm0,%ymm4,%ymm4 7223 vperm2i128 $0x02,%ymm8,%ymm12,%ymm0 7224 vperm2i128 $0x13,%ymm8,%ymm12,%ymm8 7225 vpxor 0+384(%rsi),%ymm3,%ymm3 7226 vpxor 32+384(%rsi),%ymm0,%ymm0 7227 vpxor 64+384(%rsi),%ymm4,%ymm4 7228 vpxor 96+384(%rsi),%ymm8,%ymm8 7229 vmovdqu %ymm3,0+384(%rdi) 7230 vmovdqu %ymm0,32+384(%rdi) 7231 vmovdqu %ymm4,64+384(%rdi) 7232 vmovdqu %ymm8,96+384(%rdi) 7233 7234 leaq 512(%rsi),%rsi 7235 subq $512,%rbx 7236 cmpq $512,%rbx 7237 jg 1b 7238 addq 0(%rdi),%r10 7239 adcq 8+0(%rdi),%r11 7240 adcq $1,%r12 7241 movq 0+0(%rbp),%rax 7242 movq %rax,%r15 7243 mulq %r10 7244 movq %rax,%r13 7245 movq %rdx,%r14 7246 movq 0+0(%rbp),%rax 7247 mulq %r11 7248 imulq %r12,%r15 7249 addq %rax,%r14 7250 adcq %rdx,%r15 7251 movq 8+0(%rbp),%rax 7252 movq %rax,%r9 7253 mulq %r10 7254 addq %rax,%r14 7255 adcq $0,%rdx 7256 movq %rdx,%r10 7257 movq 8+0(%rbp),%rax 7258 mulq %r11 7259 addq %rax,%r15 7260 adcq $0,%rdx 7261 imulq %r12,%r9 7262 addq %r10,%r15 7263 adcq %rdx,%r9 7264 movq %r13,%r10 7265 movq %r14,%r11 7266 movq %r15,%r12 7267 andq $3,%r12 7268 movq %r15,%r13 7269 andq $-4,%r13 7270 movq %r9,%r14 7271 shrdq $2,%r9,%r15 7272 shrq $2,%r9 7273 addq %r13,%r10 7274 adcq %r14,%r11 7275 adcq $0,%r12 7276 addq %r15,%r10 7277 adcq %r9,%r11 7278 adcq $0,%r12 7279 addq 16(%rdi),%r10 7280 adcq 8+16(%rdi),%r11 7281 adcq $1,%r12 7282 movq 0+0(%rbp),%rax 7283 movq %rax,%r15 7284 mulq %r10 7285 movq %rax,%r13 7286 movq %rdx,%r14 7287 movq 0+0(%rbp),%rax 7288 mulq %r11 7289 imulq %r12,%r15 7290 addq %rax,%r14 7291 adcq %rdx,%r15 7292 movq 8+0(%rbp),%rax 7293 movq %rax,%r9 7294 mulq %r10 7295 addq %rax,%r14 7296 adcq $0,%rdx 7297 movq %rdx,%r10 7298 movq 8+0(%rbp),%rax 7299 mulq %r11 7300 addq %rax,%r15 7301 adcq $0,%rdx 7302 imulq %r12,%r9 7303 addq %r10,%r15 7304 adcq %rdx,%r9 7305 movq %r13,%r10 7306 movq %r14,%r11 7307 movq %r15,%r12 7308 andq $3,%r12 7309 movq %r15,%r13 7310 andq $-4,%r13 7311 movq %r9,%r14 7312 shrdq $2,%r9,%r15 7313 shrq $2,%r9 7314 addq %r13,%r10 7315 adcq %r14,%r11 7316 adcq $0,%r12 7317 addq %r15,%r10 7318 adcq %r9,%r11 7319 adcq $0,%r12 7320 7321 leaq 32(%rdi),%rdi 7322 movq $10,%rcx 7323 xorq %r8,%r8 7324 cmpq $128,%rbx 7325 ja 3f 7326 7327 seal_avx2_tail_128: 7328 vmovdqa .chacha20_consts(%rip),%ymm0 7329 vmovdqa 64(%rbp),%ymm4 7330 vmovdqa 96(%rbp),%ymm8 7331 vmovdqa .avx2_inc(%rip),%ymm12 7332 vpaddd 160(%rbp),%ymm12,%ymm12 7333 vmovdqa %ymm12,160(%rbp) 7334 7335 1: 7336 addq 0(%rdi),%r10 7337 adcq 8+0(%rdi),%r11 7338 adcq $1,%r12 7339 movq 0+0(%rbp),%rax 7340 movq %rax,%r15 7341 mulq %r10 7342 movq %rax,%r13 7343 movq %rdx,%r14 7344 movq 0+0(%rbp),%rax 7345 mulq %r11 7346 imulq %r12,%r15 7347 addq %rax,%r14 7348 adcq %rdx,%r15 7349 movq 8+0(%rbp),%rax 7350 movq %rax,%r9 7351 mulq %r10 7352 addq %rax,%r14 7353 adcq $0,%rdx 7354 movq %rdx,%r10 7355 movq 8+0(%rbp),%rax 7356 mulq %r11 7357 addq %rax,%r15 7358 adcq $0,%rdx 7359 imulq %r12,%r9 7360 addq %r10,%r15 7361 adcq %rdx,%r9 7362 movq %r13,%r10 7363 movq %r14,%r11 7364 movq %r15,%r12 7365 andq $3,%r12 7366 movq %r15,%r13 7367 andq $-4,%r13 7368 movq %r9,%r14 7369 shrdq $2,%r9,%r15 7370 shrq $2,%r9 7371 addq %r13,%r10 7372 adcq %r14,%r11 7373 adcq $0,%r12 7374 addq %r15,%r10 7375 adcq %r9,%r11 7376 adcq $0,%r12 7377 7378 leaq 16(%rdi),%rdi 7379 2: 7380 vpaddd %ymm4,%ymm0,%ymm0 7381 vpxor %ymm0,%ymm12,%ymm12 7382 vpshufb .rol16(%rip),%ymm12,%ymm12 7383 vpaddd %ymm12,%ymm8,%ymm8 7384 vpxor %ymm8,%ymm4,%ymm4 7385 vpsrld $20,%ymm4,%ymm3 7386 vpslld $12,%ymm4,%ymm4 7387 vpxor %ymm3,%ymm4,%ymm4 7388 vpaddd %ymm4,%ymm0,%ymm0 7389 vpxor %ymm0,%ymm12,%ymm12 7390 vpshufb .rol8(%rip),%ymm12,%ymm12 7391 vpaddd %ymm12,%ymm8,%ymm8 7392 vpxor %ymm8,%ymm4,%ymm4 7393 vpslld $7,%ymm4,%ymm3 7394 vpsrld $25,%ymm4,%ymm4 7395 vpxor %ymm3,%ymm4,%ymm4 7396 vpalignr $12,%ymm12,%ymm12,%ymm12 7397 vpalignr $8,%ymm8,%ymm8,%ymm8 7398 vpalignr $4,%ymm4,%ymm4,%ymm4 7399 addq 0(%rdi),%r10 7400 adcq 8+0(%rdi),%r11 7401 adcq $1,%r12 7402 movq 0+0(%rbp),%rax 7403 movq %rax,%r15 7404 mulq %r10 7405 movq %rax,%r13 7406 movq %rdx,%r14 7407 movq 0+0(%rbp),%rax 7408 mulq %r11 7409 imulq %r12,%r15 7410 addq %rax,%r14 7411 adcq %rdx,%r15 7412 movq 8+0(%rbp),%rax 7413 movq %rax,%r9 7414 mulq %r10 7415 addq %rax,%r14 7416 adcq $0,%rdx 7417 movq %rdx,%r10 7418 movq 8+0(%rbp),%rax 7419 mulq %r11 7420 addq %rax,%r15 7421 adcq $0,%rdx 7422 imulq %r12,%r9 7423 addq %r10,%r15 7424 adcq %rdx,%r9 7425 movq %r13,%r10 7426 movq %r14,%r11 7427 movq %r15,%r12 7428 andq $3,%r12 7429 movq %r15,%r13 7430 andq $-4,%r13 7431 movq %r9,%r14 7432 shrdq $2,%r9,%r15 7433 shrq $2,%r9 7434 addq %r13,%r10 7435 adcq %r14,%r11 7436 adcq $0,%r12 7437 addq %r15,%r10 7438 adcq %r9,%r11 7439 adcq $0,%r12 7440 vpaddd %ymm4,%ymm0,%ymm0 7441 vpxor %ymm0,%ymm12,%ymm12 7442 vpshufb .rol16(%rip),%ymm12,%ymm12 7443 vpaddd %ymm12,%ymm8,%ymm8 7444 vpxor %ymm8,%ymm4,%ymm4 7445 vpsrld $20,%ymm4,%ymm3 7446 vpslld $12,%ymm4,%ymm4 7447 vpxor %ymm3,%ymm4,%ymm4 7448 vpaddd %ymm4,%ymm0,%ymm0 7449 vpxor %ymm0,%ymm12,%ymm12 7450 vpshufb .rol8(%rip),%ymm12,%ymm12 7451 vpaddd %ymm12,%ymm8,%ymm8 7452 vpxor %ymm8,%ymm4,%ymm4 7453 vpslld $7,%ymm4,%ymm3 7454 vpsrld $25,%ymm4,%ymm4 7455 vpxor %ymm3,%ymm4,%ymm4 7456 vpalignr $4,%ymm12,%ymm12,%ymm12 7457 vpalignr $8,%ymm8,%ymm8,%ymm8 7458 vpalignr $12,%ymm4,%ymm4,%ymm4 7459 addq 16(%rdi),%r10 7460 adcq 8+16(%rdi),%r11 7461 adcq $1,%r12 7462 movq 0+0(%rbp),%rax 7463 movq %rax,%r15 7464 mulq %r10 7465 movq %rax,%r13 7466 movq %rdx,%r14 7467 movq 0+0(%rbp),%rax 7468 mulq %r11 7469 imulq %r12,%r15 7470 addq %rax,%r14 7471 adcq %rdx,%r15 7472 movq 8+0(%rbp),%rax 7473 movq %rax,%r9 7474 mulq %r10 7475 addq %rax,%r14 7476 adcq $0,%rdx 7477 movq %rdx,%r10 7478 movq 8+0(%rbp),%rax 7479 mulq %r11 7480 addq %rax,%r15 7481 adcq $0,%rdx 7482 imulq %r12,%r9 7483 addq %r10,%r15 7484 adcq %rdx,%r9 7485 movq %r13,%r10 7486 movq %r14,%r11 7487 movq %r15,%r12 7488 andq $3,%r12 7489 movq %r15,%r13 7490 andq $-4,%r13 7491 movq %r9,%r14 7492 shrdq $2,%r9,%r15 7493 shrq $2,%r9 7494 addq %r13,%r10 7495 adcq %r14,%r11 7496 adcq $0,%r12 7497 addq %r15,%r10 7498 adcq %r9,%r11 7499 adcq $0,%r12 7500 7501 leaq 32(%rdi),%rdi 7502 decq %rcx 7503 jg 1b 7504 decq %r8 7505 jge 2b 7506 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0 7507 vpaddd 64(%rbp),%ymm4,%ymm4 7508 vpaddd 96(%rbp),%ymm8,%ymm8 7509 vpaddd 160(%rbp),%ymm12,%ymm12 7510 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 7511 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 7512 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 7513 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 7514 vmovdqa %ymm3,%ymm8 7515 7516 jmp seal_avx2_short_loop 7517 3: 7518 cmpq $256,%rbx 7519 ja 3f 7520 7521 seal_avx2_tail_256: 7522 vmovdqa .chacha20_consts(%rip),%ymm0 7523 vmovdqa 64(%rbp),%ymm4 7524 vmovdqa 96(%rbp),%ymm8 7525 vmovdqa %ymm0,%ymm1 7526 vmovdqa %ymm4,%ymm5 7527 vmovdqa %ymm8,%ymm9 7528 vmovdqa .avx2_inc(%rip),%ymm12 7529 vpaddd 160(%rbp),%ymm12,%ymm13 7530 vpaddd %ymm13,%ymm12,%ymm12 7531 vmovdqa %ymm12,160(%rbp) 7532 vmovdqa %ymm13,192(%rbp) 7533 7534 1: 7535 addq 0(%rdi),%r10 7536 adcq 8+0(%rdi),%r11 7537 adcq $1,%r12 7538 movq 0+0(%rbp),%rax 7539 movq %rax,%r15 7540 mulq %r10 7541 movq %rax,%r13 7542 movq %rdx,%r14 7543 movq 0+0(%rbp),%rax 7544 mulq %r11 7545 imulq %r12,%r15 7546 addq %rax,%r14 7547 adcq %rdx,%r15 7548 movq 8+0(%rbp),%rax 7549 movq %rax,%r9 7550 mulq %r10 7551 addq %rax,%r14 7552 adcq $0,%rdx 7553 movq %rdx,%r10 7554 movq 8+0(%rbp),%rax 7555 mulq %r11 7556 addq %rax,%r15 7557 adcq $0,%rdx 7558 imulq %r12,%r9 7559 addq %r10,%r15 7560 adcq %rdx,%r9 7561 movq %r13,%r10 7562 movq %r14,%r11 7563 movq %r15,%r12 7564 andq $3,%r12 7565 movq %r15,%r13 7566 andq $-4,%r13 7567 movq %r9,%r14 7568 shrdq $2,%r9,%r15 7569 shrq $2,%r9 7570 addq %r13,%r10 7571 adcq %r14,%r11 7572 adcq $0,%r12 7573 addq %r15,%r10 7574 adcq %r9,%r11 7575 adcq $0,%r12 7576 7577 leaq 16(%rdi),%rdi 7578 2: 7579 vpaddd %ymm4,%ymm0,%ymm0 7580 vpxor %ymm0,%ymm12,%ymm12 7581 vpshufb .rol16(%rip),%ymm12,%ymm12 7582 vpaddd %ymm12,%ymm8,%ymm8 7583 vpxor %ymm8,%ymm4,%ymm4 7584 vpsrld $20,%ymm4,%ymm3 7585 vpslld $12,%ymm4,%ymm4 7586 vpxor %ymm3,%ymm4,%ymm4 7587 vpaddd %ymm4,%ymm0,%ymm0 7588 vpxor %ymm0,%ymm12,%ymm12 7589 vpshufb .rol8(%rip),%ymm12,%ymm12 7590 vpaddd %ymm12,%ymm8,%ymm8 7591 vpxor %ymm8,%ymm4,%ymm4 7592 vpslld $7,%ymm4,%ymm3 7593 vpsrld $25,%ymm4,%ymm4 7594 vpxor %ymm3,%ymm4,%ymm4 7595 vpalignr $12,%ymm12,%ymm12,%ymm12 7596 vpalignr $8,%ymm8,%ymm8,%ymm8 7597 vpalignr $4,%ymm4,%ymm4,%ymm4 7598 vpaddd %ymm5,%ymm1,%ymm1 7599 vpxor %ymm1,%ymm13,%ymm13 7600 vpshufb .rol16(%rip),%ymm13,%ymm13 7601 vpaddd %ymm13,%ymm9,%ymm9 7602 vpxor %ymm9,%ymm5,%ymm5 7603 vpsrld $20,%ymm5,%ymm3 7604 vpslld $12,%ymm5,%ymm5 7605 vpxor %ymm3,%ymm5,%ymm5 7606 vpaddd %ymm5,%ymm1,%ymm1 7607 vpxor %ymm1,%ymm13,%ymm13 7608 vpshufb .rol8(%rip),%ymm13,%ymm13 7609 vpaddd %ymm13,%ymm9,%ymm9 7610 vpxor %ymm9,%ymm5,%ymm5 7611 vpslld $7,%ymm5,%ymm3 7612 vpsrld $25,%ymm5,%ymm5 7613 vpxor %ymm3,%ymm5,%ymm5 7614 vpalignr $12,%ymm13,%ymm13,%ymm13 7615 vpalignr $8,%ymm9,%ymm9,%ymm9 7616 vpalignr $4,%ymm5,%ymm5,%ymm5 7617 addq 0(%rdi),%r10 7618 adcq 8+0(%rdi),%r11 7619 adcq $1,%r12 7620 movq 0+0(%rbp),%rax 7621 movq %rax,%r15 7622 mulq %r10 7623 movq %rax,%r13 7624 movq %rdx,%r14 7625 movq 0+0(%rbp),%rax 7626 mulq %r11 7627 imulq %r12,%r15 7628 addq %rax,%r14 7629 adcq %rdx,%r15 7630 movq 8+0(%rbp),%rax 7631 movq %rax,%r9 7632 mulq %r10 7633 addq %rax,%r14 7634 adcq $0,%rdx 7635 movq %rdx,%r10 7636 movq 8+0(%rbp),%rax 7637 mulq %r11 7638 addq %rax,%r15 7639 adcq $0,%rdx 7640 imulq %r12,%r9 7641 addq %r10,%r15 7642 adcq %rdx,%r9 7643 movq %r13,%r10 7644 movq %r14,%r11 7645 movq %r15,%r12 7646 andq $3,%r12 7647 movq %r15,%r13 7648 andq $-4,%r13 7649 movq %r9,%r14 7650 shrdq $2,%r9,%r15 7651 shrq $2,%r9 7652 addq %r13,%r10 7653 adcq %r14,%r11 7654 adcq $0,%r12 7655 addq %r15,%r10 7656 adcq %r9,%r11 7657 adcq $0,%r12 7658 vpaddd %ymm4,%ymm0,%ymm0 7659 vpxor %ymm0,%ymm12,%ymm12 7660 vpshufb .rol16(%rip),%ymm12,%ymm12 7661 vpaddd %ymm12,%ymm8,%ymm8 7662 vpxor %ymm8,%ymm4,%ymm4 7663 vpsrld $20,%ymm4,%ymm3 7664 vpslld $12,%ymm4,%ymm4 7665 vpxor %ymm3,%ymm4,%ymm4 7666 vpaddd %ymm4,%ymm0,%ymm0 7667 vpxor %ymm0,%ymm12,%ymm12 7668 vpshufb .rol8(%rip),%ymm12,%ymm12 7669 vpaddd %ymm12,%ymm8,%ymm8 7670 vpxor %ymm8,%ymm4,%ymm4 7671 vpslld $7,%ymm4,%ymm3 7672 vpsrld $25,%ymm4,%ymm4 7673 vpxor %ymm3,%ymm4,%ymm4 7674 vpalignr $4,%ymm12,%ymm12,%ymm12 7675 vpalignr $8,%ymm8,%ymm8,%ymm8 7676 vpalignr $12,%ymm4,%ymm4,%ymm4 7677 vpaddd %ymm5,%ymm1,%ymm1 7678 vpxor %ymm1,%ymm13,%ymm13 7679 vpshufb .rol16(%rip),%ymm13,%ymm13 7680 vpaddd %ymm13,%ymm9,%ymm9 7681 vpxor %ymm9,%ymm5,%ymm5 7682 vpsrld $20,%ymm5,%ymm3 7683 vpslld $12,%ymm5,%ymm5 7684 vpxor %ymm3,%ymm5,%ymm5 7685 vpaddd %ymm5,%ymm1,%ymm1 7686 vpxor %ymm1,%ymm13,%ymm13 7687 vpshufb .rol8(%rip),%ymm13,%ymm13 7688 vpaddd %ymm13,%ymm9,%ymm9 7689 vpxor %ymm9,%ymm5,%ymm5 7690 vpslld $7,%ymm5,%ymm3 7691 vpsrld $25,%ymm5,%ymm5 7692 vpxor %ymm3,%ymm5,%ymm5 7693 vpalignr $4,%ymm13,%ymm13,%ymm13 7694 vpalignr $8,%ymm9,%ymm9,%ymm9 7695 vpalignr $12,%ymm5,%ymm5,%ymm5 7696 addq 16(%rdi),%r10 7697 adcq 8+16(%rdi),%r11 7698 adcq $1,%r12 7699 movq 0+0(%rbp),%rax 7700 movq %rax,%r15 7701 mulq %r10 7702 movq %rax,%r13 7703 movq %rdx,%r14 7704 movq 0+0(%rbp),%rax 7705 mulq %r11 7706 imulq %r12,%r15 7707 addq %rax,%r14 7708 adcq %rdx,%r15 7709 movq 8+0(%rbp),%rax 7710 movq %rax,%r9 7711 mulq %r10 7712 addq %rax,%r14 7713 adcq $0,%rdx 7714 movq %rdx,%r10 7715 movq 8+0(%rbp),%rax 7716 mulq %r11 7717 addq %rax,%r15 7718 adcq $0,%rdx 7719 imulq %r12,%r9 7720 addq %r10,%r15 7721 adcq %rdx,%r9 7722 movq %r13,%r10 7723 movq %r14,%r11 7724 movq %r15,%r12 7725 andq $3,%r12 7726 movq %r15,%r13 7727 andq $-4,%r13 7728 movq %r9,%r14 7729 shrdq $2,%r9,%r15 7730 shrq $2,%r9 7731 addq %r13,%r10 7732 adcq %r14,%r11 7733 adcq $0,%r12 7734 addq %r15,%r10 7735 adcq %r9,%r11 7736 adcq $0,%r12 7737 7738 leaq 32(%rdi),%rdi 7739 decq %rcx 7740 jg 1b 7741 decq %r8 7742 jge 2b 7743 vpaddd .chacha20_consts(%rip),%ymm1,%ymm1 7744 vpaddd 64(%rbp),%ymm5,%ymm5 7745 vpaddd 96(%rbp),%ymm9,%ymm9 7746 vpaddd 192(%rbp),%ymm13,%ymm13 7747 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0 7748 vpaddd 64(%rbp),%ymm4,%ymm4 7749 vpaddd 96(%rbp),%ymm8,%ymm8 7750 vpaddd 160(%rbp),%ymm12,%ymm12 7751 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 7752 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 7753 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 7754 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 7755 vpxor 0+0(%rsi),%ymm3,%ymm3 7756 vpxor 32+0(%rsi),%ymm1,%ymm1 7757 vpxor 64+0(%rsi),%ymm5,%ymm5 7758 vpxor 96+0(%rsi),%ymm9,%ymm9 7759 vmovdqu %ymm3,0+0(%rdi) 7760 vmovdqu %ymm1,32+0(%rdi) 7761 vmovdqu %ymm5,64+0(%rdi) 7762 vmovdqu %ymm9,96+0(%rdi) 7763 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 7764 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 7765 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 7766 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 7767 vmovdqa %ymm3,%ymm8 7768 7769 movq $128,%rcx 7770 leaq 128(%rsi),%rsi 7771 subq $128,%rbx 7772 jmp seal_avx2_hash 7773 3: 7774 cmpq $384,%rbx 7775 ja seal_avx2_tail_512 7776 7777 seal_avx2_tail_384: 7778 vmovdqa .chacha20_consts(%rip),%ymm0 7779 vmovdqa 64(%rbp),%ymm4 7780 vmovdqa 96(%rbp),%ymm8 7781 vmovdqa %ymm0,%ymm1 7782 vmovdqa %ymm4,%ymm5 7783 vmovdqa %ymm8,%ymm9 7784 vmovdqa %ymm0,%ymm2 7785 vmovdqa %ymm4,%ymm6 7786 vmovdqa %ymm8,%ymm10 7787 vmovdqa .avx2_inc(%rip),%ymm12 7788 vpaddd 160(%rbp),%ymm12,%ymm14 7789 vpaddd %ymm14,%ymm12,%ymm13 7790 vpaddd %ymm13,%ymm12,%ymm12 7791 vmovdqa %ymm12,160(%rbp) 7792 vmovdqa %ymm13,192(%rbp) 7793 vmovdqa %ymm14,224(%rbp) 7794 7795 1: 7796 addq 0(%rdi),%r10 7797 adcq 8+0(%rdi),%r11 7798 adcq $1,%r12 7799 movq 0+0(%rbp),%rax 7800 movq %rax,%r15 7801 mulq %r10 7802 movq %rax,%r13 7803 movq %rdx,%r14 7804 movq 0+0(%rbp),%rax 7805 mulq %r11 7806 imulq %r12,%r15 7807 addq %rax,%r14 7808 adcq %rdx,%r15 7809 movq 8+0(%rbp),%rax 7810 movq %rax,%r9 7811 mulq %r10 7812 addq %rax,%r14 7813 adcq $0,%rdx 7814 movq %rdx,%r10 7815 movq 8+0(%rbp),%rax 7816 mulq %r11 7817 addq %rax,%r15 7818 adcq $0,%rdx 7819 imulq %r12,%r9 7820 addq %r10,%r15 7821 adcq %rdx,%r9 7822 movq %r13,%r10 7823 movq %r14,%r11 7824 movq %r15,%r12 7825 andq $3,%r12 7826 movq %r15,%r13 7827 andq $-4,%r13 7828 movq %r9,%r14 7829 shrdq $2,%r9,%r15 7830 shrq $2,%r9 7831 addq %r13,%r10 7832 adcq %r14,%r11 7833 adcq $0,%r12 7834 addq %r15,%r10 7835 adcq %r9,%r11 7836 adcq $0,%r12 7837 7838 leaq 16(%rdi),%rdi 7839 2: 7840 vpaddd %ymm4,%ymm0,%ymm0 7841 vpxor %ymm0,%ymm12,%ymm12 7842 vpshufb .rol16(%rip),%ymm12,%ymm12 7843 vpaddd %ymm12,%ymm8,%ymm8 7844 vpxor %ymm8,%ymm4,%ymm4 7845 vpsrld $20,%ymm4,%ymm3 7846 vpslld $12,%ymm4,%ymm4 7847 vpxor %ymm3,%ymm4,%ymm4 7848 vpaddd %ymm4,%ymm0,%ymm0 7849 vpxor %ymm0,%ymm12,%ymm12 7850 vpshufb .rol8(%rip),%ymm12,%ymm12 7851 vpaddd %ymm12,%ymm8,%ymm8 7852 vpxor %ymm8,%ymm4,%ymm4 7853 vpslld $7,%ymm4,%ymm3 7854 vpsrld $25,%ymm4,%ymm4 7855 vpxor %ymm3,%ymm4,%ymm4 7856 vpalignr $12,%ymm12,%ymm12,%ymm12 7857 vpalignr $8,%ymm8,%ymm8,%ymm8 7858 vpalignr $4,%ymm4,%ymm4,%ymm4 7859 vpaddd %ymm5,%ymm1,%ymm1 7860 vpxor %ymm1,%ymm13,%ymm13 7861 vpshufb .rol16(%rip),%ymm13,%ymm13 7862 vpaddd %ymm13,%ymm9,%ymm9 7863 vpxor %ymm9,%ymm5,%ymm5 7864 vpsrld $20,%ymm5,%ymm3 7865 vpslld $12,%ymm5,%ymm5 7866 vpxor %ymm3,%ymm5,%ymm5 7867 vpaddd %ymm5,%ymm1,%ymm1 7868 vpxor %ymm1,%ymm13,%ymm13 7869 vpshufb .rol8(%rip),%ymm13,%ymm13 7870 vpaddd %ymm13,%ymm9,%ymm9 7871 vpxor %ymm9,%ymm5,%ymm5 7872 vpslld $7,%ymm5,%ymm3 7873 vpsrld $25,%ymm5,%ymm5 7874 vpxor %ymm3,%ymm5,%ymm5 7875 vpalignr $12,%ymm13,%ymm13,%ymm13 7876 vpalignr $8,%ymm9,%ymm9,%ymm9 7877 vpalignr $4,%ymm5,%ymm5,%ymm5 7878 addq 0(%rdi),%r10 7879 adcq 8+0(%rdi),%r11 7880 adcq $1,%r12 7881 movq 0+0(%rbp),%rax 7882 movq %rax,%r15 7883 mulq %r10 7884 movq %rax,%r13 7885 movq %rdx,%r14 7886 movq 0+0(%rbp),%rax 7887 mulq %r11 7888 imulq %r12,%r15 7889 addq %rax,%r14 7890 adcq %rdx,%r15 7891 movq 8+0(%rbp),%rax 7892 movq %rax,%r9 7893 mulq %r10 7894 addq %rax,%r14 7895 adcq $0,%rdx 7896 movq %rdx,%r10 7897 movq 8+0(%rbp),%rax 7898 mulq %r11 7899 addq %rax,%r15 7900 adcq $0,%rdx 7901 imulq %r12,%r9 7902 addq %r10,%r15 7903 adcq %rdx,%r9 7904 movq %r13,%r10 7905 movq %r14,%r11 7906 movq %r15,%r12 7907 andq $3,%r12 7908 movq %r15,%r13 7909 andq $-4,%r13 7910 movq %r9,%r14 7911 shrdq $2,%r9,%r15 7912 shrq $2,%r9 7913 addq %r13,%r10 7914 adcq %r14,%r11 7915 adcq $0,%r12 7916 addq %r15,%r10 7917 adcq %r9,%r11 7918 adcq $0,%r12 7919 vpaddd %ymm6,%ymm2,%ymm2 7920 vpxor %ymm2,%ymm14,%ymm14 7921 vpshufb .rol16(%rip),%ymm14,%ymm14 7922 vpaddd %ymm14,%ymm10,%ymm10 7923 vpxor %ymm10,%ymm6,%ymm6 7924 vpsrld $20,%ymm6,%ymm3 7925 vpslld $12,%ymm6,%ymm6 7926 vpxor %ymm3,%ymm6,%ymm6 7927 vpaddd %ymm6,%ymm2,%ymm2 7928 vpxor %ymm2,%ymm14,%ymm14 7929 vpshufb .rol8(%rip),%ymm14,%ymm14 7930 vpaddd %ymm14,%ymm10,%ymm10 7931 vpxor %ymm10,%ymm6,%ymm6 7932 vpslld $7,%ymm6,%ymm3 7933 vpsrld $25,%ymm6,%ymm6 7934 vpxor %ymm3,%ymm6,%ymm6 7935 vpalignr $12,%ymm14,%ymm14,%ymm14 7936 vpalignr $8,%ymm10,%ymm10,%ymm10 7937 vpalignr $4,%ymm6,%ymm6,%ymm6 7938 vpaddd %ymm4,%ymm0,%ymm0 7939 vpxor %ymm0,%ymm12,%ymm12 7940 vpshufb .rol16(%rip),%ymm12,%ymm12 7941 vpaddd %ymm12,%ymm8,%ymm8 7942 vpxor %ymm8,%ymm4,%ymm4 7943 vpsrld $20,%ymm4,%ymm3 7944 vpslld $12,%ymm4,%ymm4 7945 vpxor %ymm3,%ymm4,%ymm4 7946 vpaddd %ymm4,%ymm0,%ymm0 7947 vpxor %ymm0,%ymm12,%ymm12 7948 vpshufb .rol8(%rip),%ymm12,%ymm12 7949 vpaddd %ymm12,%ymm8,%ymm8 7950 vpxor %ymm8,%ymm4,%ymm4 7951 vpslld $7,%ymm4,%ymm3 7952 vpsrld $25,%ymm4,%ymm4 7953 vpxor %ymm3,%ymm4,%ymm4 7954 vpalignr $4,%ymm12,%ymm12,%ymm12 7955 vpalignr $8,%ymm8,%ymm8,%ymm8 7956 vpalignr $12,%ymm4,%ymm4,%ymm4 7957 addq 16(%rdi),%r10 7958 adcq 8+16(%rdi),%r11 7959 adcq $1,%r12 7960 movq 0+0(%rbp),%rax 7961 movq %rax,%r15 7962 mulq %r10 7963 movq %rax,%r13 7964 movq %rdx,%r14 7965 movq 0+0(%rbp),%rax 7966 mulq %r11 7967 imulq %r12,%r15 7968 addq %rax,%r14 7969 adcq %rdx,%r15 7970 movq 8+0(%rbp),%rax 7971 movq %rax,%r9 7972 mulq %r10 7973 addq %rax,%r14 7974 adcq $0,%rdx 7975 movq %rdx,%r10 7976 movq 8+0(%rbp),%rax 7977 mulq %r11 7978 addq %rax,%r15 7979 adcq $0,%rdx 7980 imulq %r12,%r9 7981 addq %r10,%r15 7982 adcq %rdx,%r9 7983 movq %r13,%r10 7984 movq %r14,%r11 7985 movq %r15,%r12 7986 andq $3,%r12 7987 movq %r15,%r13 7988 andq $-4,%r13 7989 movq %r9,%r14 7990 shrdq $2,%r9,%r15 7991 shrq $2,%r9 7992 addq %r13,%r10 7993 adcq %r14,%r11 7994 adcq $0,%r12 7995 addq %r15,%r10 7996 adcq %r9,%r11 7997 adcq $0,%r12 7998 vpaddd %ymm5,%ymm1,%ymm1 7999 vpxor %ymm1,%ymm13,%ymm13 8000 vpshufb .rol16(%rip),%ymm13,%ymm13 8001 vpaddd %ymm13,%ymm9,%ymm9 8002 vpxor %ymm9,%ymm5,%ymm5 8003 vpsrld $20,%ymm5,%ymm3 8004 vpslld $12,%ymm5,%ymm5 8005 vpxor %ymm3,%ymm5,%ymm5 8006 vpaddd %ymm5,%ymm1,%ymm1 8007 vpxor %ymm1,%ymm13,%ymm13 8008 vpshufb .rol8(%rip),%ymm13,%ymm13 8009 vpaddd %ymm13,%ymm9,%ymm9 8010 vpxor %ymm9,%ymm5,%ymm5 8011 vpslld $7,%ymm5,%ymm3 8012 vpsrld $25,%ymm5,%ymm5 8013 vpxor %ymm3,%ymm5,%ymm5 8014 vpalignr $4,%ymm13,%ymm13,%ymm13 8015 vpalignr $8,%ymm9,%ymm9,%ymm9 8016 vpalignr $12,%ymm5,%ymm5,%ymm5 8017 vpaddd %ymm6,%ymm2,%ymm2 8018 vpxor %ymm2,%ymm14,%ymm14 8019 vpshufb .rol16(%rip),%ymm14,%ymm14 8020 vpaddd %ymm14,%ymm10,%ymm10 8021 vpxor %ymm10,%ymm6,%ymm6 8022 vpsrld $20,%ymm6,%ymm3 8023 vpslld $12,%ymm6,%ymm6 8024 vpxor %ymm3,%ymm6,%ymm6 8025 vpaddd %ymm6,%ymm2,%ymm2 8026 vpxor %ymm2,%ymm14,%ymm14 8027 vpshufb .rol8(%rip),%ymm14,%ymm14 8028 vpaddd %ymm14,%ymm10,%ymm10 8029 vpxor %ymm10,%ymm6,%ymm6 8030 vpslld $7,%ymm6,%ymm3 8031 vpsrld $25,%ymm6,%ymm6 8032 vpxor %ymm3,%ymm6,%ymm6 8033 vpalignr $4,%ymm14,%ymm14,%ymm14 8034 vpalignr $8,%ymm10,%ymm10,%ymm10 8035 vpalignr $12,%ymm6,%ymm6,%ymm6 8036 8037 leaq 32(%rdi),%rdi 8038 decq %rcx 8039 jg 1b 8040 decq %r8 8041 jge 2b 8042 vpaddd .chacha20_consts(%rip),%ymm2,%ymm2 8043 vpaddd 64(%rbp),%ymm6,%ymm6 8044 vpaddd 96(%rbp),%ymm10,%ymm10 8045 vpaddd 224(%rbp),%ymm14,%ymm14 8046 vpaddd .chacha20_consts(%rip),%ymm1,%ymm1 8047 vpaddd 64(%rbp),%ymm5,%ymm5 8048 vpaddd 96(%rbp),%ymm9,%ymm9 8049 vpaddd 192(%rbp),%ymm13,%ymm13 8050 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0 8051 vpaddd 64(%rbp),%ymm4,%ymm4 8052 vpaddd 96(%rbp),%ymm8,%ymm8 8053 vpaddd 160(%rbp),%ymm12,%ymm12 8054 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 8055 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 8056 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 8057 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 8058 vpxor 0+0(%rsi),%ymm3,%ymm3 8059 vpxor 32+0(%rsi),%ymm2,%ymm2 8060 vpxor 64+0(%rsi),%ymm6,%ymm6 8061 vpxor 96+0(%rsi),%ymm10,%ymm10 8062 vmovdqu %ymm3,0+0(%rdi) 8063 vmovdqu %ymm2,32+0(%rdi) 8064 vmovdqu %ymm6,64+0(%rdi) 8065 vmovdqu %ymm10,96+0(%rdi) 8066 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 8067 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 8068 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 8069 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 8070 vpxor 0+128(%rsi),%ymm3,%ymm3 8071 vpxor 32+128(%rsi),%ymm1,%ymm1 8072 vpxor 64+128(%rsi),%ymm5,%ymm5 8073 vpxor 96+128(%rsi),%ymm9,%ymm9 8074 vmovdqu %ymm3,0+128(%rdi) 8075 vmovdqu %ymm1,32+128(%rdi) 8076 vmovdqu %ymm5,64+128(%rdi) 8077 vmovdqu %ymm9,96+128(%rdi) 8078 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 8079 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 8080 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 8081 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 8082 vmovdqa %ymm3,%ymm8 8083 8084 movq $256,%rcx 8085 leaq 256(%rsi),%rsi 8086 subq $256,%rbx 8087 jmp seal_avx2_hash 8088 8089 seal_avx2_tail_512: 8090 vmovdqa .chacha20_consts(%rip),%ymm0 8091 vmovdqa 64(%rbp),%ymm4 8092 vmovdqa 96(%rbp),%ymm8 8093 vmovdqa %ymm0,%ymm1 8094 vmovdqa %ymm4,%ymm5 8095 vmovdqa %ymm8,%ymm9 8096 vmovdqa %ymm0,%ymm2 8097 vmovdqa %ymm4,%ymm6 8098 vmovdqa %ymm8,%ymm10 8099 vmovdqa %ymm0,%ymm3 8100 vmovdqa %ymm4,%ymm7 8101 vmovdqa %ymm8,%ymm11 8102 vmovdqa .avx2_inc(%rip),%ymm12 8103 vpaddd 160(%rbp),%ymm12,%ymm15 8104 vpaddd %ymm15,%ymm12,%ymm14 8105 vpaddd %ymm14,%ymm12,%ymm13 8106 vpaddd %ymm13,%ymm12,%ymm12 8107 vmovdqa %ymm15,256(%rbp) 8108 vmovdqa %ymm14,224(%rbp) 8109 vmovdqa %ymm13,192(%rbp) 8110 vmovdqa %ymm12,160(%rbp) 8111 8112 1: 8113 addq 0(%rdi),%r10 8114 adcq 8+0(%rdi),%r11 8115 adcq $1,%r12 8116 movq 0+0(%rbp),%rdx 8117 movq %rdx,%r15 8118 mulxq %r10,%r13,%r14 8119 mulxq %r11,%rax,%rdx 8120 imulq %r12,%r15 8121 addq %rax,%r14 8122 adcq %rdx,%r15 8123 movq 8+0(%rbp),%rdx 8124 mulxq %r10,%r10,%rax 8125 addq %r10,%r14 8126 mulxq %r11,%r11,%r9 8127 adcq %r11,%r15 8128 adcq $0,%r9 8129 imulq %r12,%rdx 8130 addq %rax,%r15 8131 adcq %rdx,%r9 8132 movq %r13,%r10 8133 movq %r14,%r11 8134 movq %r15,%r12 8135 andq $3,%r12 8136 movq %r15,%r13 8137 andq $-4,%r13 8138 movq %r9,%r14 8139 shrdq $2,%r9,%r15 8140 shrq $2,%r9 8141 addq %r13,%r10 8142 adcq %r14,%r11 8143 adcq $0,%r12 8144 addq %r15,%r10 8145 adcq %r9,%r11 8146 adcq $0,%r12 8147 8148 leaq 16(%rdi),%rdi 8149 2: 8150 vmovdqa %ymm8,128(%rbp) 8151 vmovdqa .rol16(%rip),%ymm8 8152 vpaddd %ymm7,%ymm3,%ymm3 8153 vpaddd %ymm6,%ymm2,%ymm2 8154 vpaddd %ymm5,%ymm1,%ymm1 8155 vpaddd %ymm4,%ymm0,%ymm0 8156 vpxor %ymm3,%ymm15,%ymm15 8157 vpxor %ymm2,%ymm14,%ymm14 8158 vpxor %ymm1,%ymm13,%ymm13 8159 vpxor %ymm0,%ymm12,%ymm12 8160 vpshufb %ymm8,%ymm15,%ymm15 8161 vpshufb %ymm8,%ymm14,%ymm14 8162 vpshufb %ymm8,%ymm13,%ymm13 8163 vpshufb %ymm8,%ymm12,%ymm12 8164 vmovdqa 128(%rbp),%ymm8 8165 vpaddd %ymm15,%ymm11,%ymm11 8166 vpaddd %ymm14,%ymm10,%ymm10 8167 vpaddd %ymm13,%ymm9,%ymm9 8168 vpaddd %ymm12,%ymm8,%ymm8 8169 vpxor %ymm11,%ymm7,%ymm7 8170 addq 0(%rdi),%r10 8171 adcq 8+0(%rdi),%r11 8172 adcq $1,%r12 8173 vpxor %ymm10,%ymm6,%ymm6 8174 vpxor %ymm9,%ymm5,%ymm5 8175 vpxor %ymm8,%ymm4,%ymm4 8176 vmovdqa %ymm8,128(%rbp) 8177 vpsrld $20,%ymm7,%ymm8 8178 vpslld $32-20,%ymm7,%ymm7 8179 vpxor %ymm8,%ymm7,%ymm7 8180 vpsrld $20,%ymm6,%ymm8 8181 vpslld $32-20,%ymm6,%ymm6 8182 vpxor %ymm8,%ymm6,%ymm6 8183 vpsrld $20,%ymm5,%ymm8 8184 vpslld $32-20,%ymm5,%ymm5 8185 vpxor %ymm8,%ymm5,%ymm5 8186 vpsrld $20,%ymm4,%ymm8 8187 vpslld $32-20,%ymm4,%ymm4 8188 vpxor %ymm8,%ymm4,%ymm4 8189 vmovdqa .rol8(%rip),%ymm8 8190 vpaddd %ymm7,%ymm3,%ymm3 8191 vpaddd %ymm6,%ymm2,%ymm2 8192 vpaddd %ymm5,%ymm1,%ymm1 8193 movq 0+0(%rbp),%rdx 8194 movq %rdx,%r15 8195 mulxq %r10,%r13,%r14 8196 mulxq %r11,%rax,%rdx 8197 imulq %r12,%r15 8198 addq %rax,%r14 8199 adcq %rdx,%r15 8200 vpaddd %ymm4,%ymm0,%ymm0 8201 vpxor %ymm3,%ymm15,%ymm15 8202 vpxor %ymm2,%ymm14,%ymm14 8203 vpxor %ymm1,%ymm13,%ymm13 8204 vpxor %ymm0,%ymm12,%ymm12 8205 vpshufb %ymm8,%ymm15,%ymm15 8206 vpshufb %ymm8,%ymm14,%ymm14 8207 vpshufb %ymm8,%ymm13,%ymm13 8208 vpshufb %ymm8,%ymm12,%ymm12 8209 vmovdqa 128(%rbp),%ymm8 8210 vpaddd %ymm15,%ymm11,%ymm11 8211 vpaddd %ymm14,%ymm10,%ymm10 8212 vpaddd %ymm13,%ymm9,%ymm9 8213 vpaddd %ymm12,%ymm8,%ymm8 8214 vpxor %ymm11,%ymm7,%ymm7 8215 vpxor %ymm10,%ymm6,%ymm6 8216 vpxor %ymm9,%ymm5,%ymm5 8217 vpxor %ymm8,%ymm4,%ymm4 8218 vmovdqa %ymm8,128(%rbp) 8219 vpsrld $25,%ymm7,%ymm8 8220 movq 8+0(%rbp),%rdx 8221 mulxq %r10,%r10,%rax 8222 addq %r10,%r14 8223 mulxq %r11,%r11,%r9 8224 adcq %r11,%r15 8225 adcq $0,%r9 8226 imulq %r12,%rdx 8227 vpslld $32-25,%ymm7,%ymm7 8228 vpxor %ymm8,%ymm7,%ymm7 8229 vpsrld $25,%ymm6,%ymm8 8230 vpslld $32-25,%ymm6,%ymm6 8231 vpxor %ymm8,%ymm6,%ymm6 8232 vpsrld $25,%ymm5,%ymm8 8233 vpslld $32-25,%ymm5,%ymm5 8234 vpxor %ymm8,%ymm5,%ymm5 8235 vpsrld $25,%ymm4,%ymm8 8236 vpslld $32-25,%ymm4,%ymm4 8237 vpxor %ymm8,%ymm4,%ymm4 8238 vmovdqa 128(%rbp),%ymm8 8239 vpalignr $4,%ymm7,%ymm7,%ymm7 8240 vpalignr $8,%ymm11,%ymm11,%ymm11 8241 vpalignr $12,%ymm15,%ymm15,%ymm15 8242 vpalignr $4,%ymm6,%ymm6,%ymm6 8243 vpalignr $8,%ymm10,%ymm10,%ymm10 8244 vpalignr $12,%ymm14,%ymm14,%ymm14 8245 vpalignr $4,%ymm5,%ymm5,%ymm5 8246 vpalignr $8,%ymm9,%ymm9,%ymm9 8247 addq %rax,%r15 8248 adcq %rdx,%r9 8249 vpalignr $12,%ymm13,%ymm13,%ymm13 8250 vpalignr $4,%ymm4,%ymm4,%ymm4 8251 vpalignr $8,%ymm8,%ymm8,%ymm8 8252 vpalignr $12,%ymm12,%ymm12,%ymm12 8253 vmovdqa %ymm8,128(%rbp) 8254 vmovdqa .rol16(%rip),%ymm8 8255 vpaddd %ymm7,%ymm3,%ymm3 8256 vpaddd %ymm6,%ymm2,%ymm2 8257 vpaddd %ymm5,%ymm1,%ymm1 8258 vpaddd %ymm4,%ymm0,%ymm0 8259 vpxor %ymm3,%ymm15,%ymm15 8260 vpxor %ymm2,%ymm14,%ymm14 8261 vpxor %ymm1,%ymm13,%ymm13 8262 vpxor %ymm0,%ymm12,%ymm12 8263 vpshufb %ymm8,%ymm15,%ymm15 8264 vpshufb %ymm8,%ymm14,%ymm14 8265 vpshufb %ymm8,%ymm13,%ymm13 8266 vpshufb %ymm8,%ymm12,%ymm12 8267 vmovdqa 128(%rbp),%ymm8 8268 vpaddd %ymm15,%ymm11,%ymm11 8269 movq %r13,%r10 8270 movq %r14,%r11 8271 movq %r15,%r12 8272 andq $3,%r12 8273 movq %r15,%r13 8274 andq $-4,%r13 8275 movq %r9,%r14 8276 shrdq $2,%r9,%r15 8277 shrq $2,%r9 8278 addq %r13,%r10 8279 adcq %r14,%r11 8280 adcq $0,%r12 8281 addq %r15,%r10 8282 adcq %r9,%r11 8283 adcq $0,%r12 8284 vpaddd %ymm14,%ymm10,%ymm10 8285 vpaddd %ymm13,%ymm9,%ymm9 8286 vpaddd %ymm12,%ymm8,%ymm8 8287 vpxor %ymm11,%ymm7,%ymm7 8288 vpxor %ymm10,%ymm6,%ymm6 8289 vpxor %ymm9,%ymm5,%ymm5 8290 vpxor %ymm8,%ymm4,%ymm4 8291 vmovdqa %ymm8,128(%rbp) 8292 vpsrld $20,%ymm7,%ymm8 8293 vpslld $32-20,%ymm7,%ymm7 8294 vpxor %ymm8,%ymm7,%ymm7 8295 vpsrld $20,%ymm6,%ymm8 8296 vpslld $32-20,%ymm6,%ymm6 8297 vpxor %ymm8,%ymm6,%ymm6 8298 vpsrld $20,%ymm5,%ymm8 8299 vpslld $32-20,%ymm5,%ymm5 8300 vpxor %ymm8,%ymm5,%ymm5 8301 vpsrld $20,%ymm4,%ymm8 8302 vpslld $32-20,%ymm4,%ymm4 8303 vpxor %ymm8,%ymm4,%ymm4 8304 addq 16(%rdi),%r10 8305 adcq 8+16(%rdi),%r11 8306 adcq $1,%r12 8307 vmovdqa .rol8(%rip),%ymm8 8308 vpaddd %ymm7,%ymm3,%ymm3 8309 vpaddd %ymm6,%ymm2,%ymm2 8310 vpaddd %ymm5,%ymm1,%ymm1 8311 vpaddd %ymm4,%ymm0,%ymm0 8312 vpxor %ymm3,%ymm15,%ymm15 8313 vpxor %ymm2,%ymm14,%ymm14 8314 vpxor %ymm1,%ymm13,%ymm13 8315 vpxor %ymm0,%ymm12,%ymm12 8316 vpshufb %ymm8,%ymm15,%ymm15 8317 vpshufb %ymm8,%ymm14,%ymm14 8318 vpshufb %ymm8,%ymm13,%ymm13 8319 vpshufb %ymm8,%ymm12,%ymm12 8320 vmovdqa 128(%rbp),%ymm8 8321 vpaddd %ymm15,%ymm11,%ymm11 8322 vpaddd %ymm14,%ymm10,%ymm10 8323 vpaddd %ymm13,%ymm9,%ymm9 8324 vpaddd %ymm12,%ymm8,%ymm8 8325 vpxor %ymm11,%ymm7,%ymm7 8326 vpxor %ymm10,%ymm6,%ymm6 8327 movq 0+0(%rbp),%rdx 8328 movq %rdx,%r15 8329 mulxq %r10,%r13,%r14 8330 mulxq %r11,%rax,%rdx 8331 imulq %r12,%r15 8332 addq %rax,%r14 8333 adcq %rdx,%r15 8334 vpxor %ymm9,%ymm5,%ymm5 8335 vpxor %ymm8,%ymm4,%ymm4 8336 vmovdqa %ymm8,128(%rbp) 8337 vpsrld $25,%ymm7,%ymm8 8338 vpslld $32-25,%ymm7,%ymm7 8339 vpxor %ymm8,%ymm7,%ymm7 8340 vpsrld $25,%ymm6,%ymm8 8341 vpslld $32-25,%ymm6,%ymm6 8342 vpxor %ymm8,%ymm6,%ymm6 8343 vpsrld $25,%ymm5,%ymm8 8344 vpslld $32-25,%ymm5,%ymm5 8345 vpxor %ymm8,%ymm5,%ymm5 8346 vpsrld $25,%ymm4,%ymm8 8347 vpslld $32-25,%ymm4,%ymm4 8348 vpxor %ymm8,%ymm4,%ymm4 8349 vmovdqa 128(%rbp),%ymm8 8350 vpalignr $12,%ymm7,%ymm7,%ymm7 8351 vpalignr $8,%ymm11,%ymm11,%ymm11 8352 vpalignr $4,%ymm15,%ymm15,%ymm15 8353 vpalignr $12,%ymm6,%ymm6,%ymm6 8354 movq 8+0(%rbp),%rdx 8355 mulxq %r10,%r10,%rax 8356 addq %r10,%r14 8357 mulxq %r11,%r11,%r9 8358 adcq %r11,%r15 8359 adcq $0,%r9 8360 imulq %r12,%rdx 8361 vpalignr $8,%ymm10,%ymm10,%ymm10 8362 vpalignr $4,%ymm14,%ymm14,%ymm14 8363 vpalignr $12,%ymm5,%ymm5,%ymm5 8364 vpalignr $8,%ymm9,%ymm9,%ymm9 8365 vpalignr $4,%ymm13,%ymm13,%ymm13 8366 vpalignr $12,%ymm4,%ymm4,%ymm4 8367 vpalignr $8,%ymm8,%ymm8,%ymm8 8368 vpalignr $4,%ymm12,%ymm12,%ymm12 8369 8370 8371 8372 8373 8374 8375 8376 8377 8378 8379 8380 8381 addq %rax,%r15 8382 adcq %rdx,%r9 8383 8384 8385 8386 8387 8388 8389 8390 8391 8392 8393 8394 8395 8396 8397 8398 8399 8400 8401 8402 8403 movq %r13,%r10 8404 movq %r14,%r11 8405 movq %r15,%r12 8406 andq $3,%r12 8407 movq %r15,%r13 8408 andq $-4,%r13 8409 movq %r9,%r14 8410 shrdq $2,%r9,%r15 8411 shrq $2,%r9 8412 addq %r13,%r10 8413 adcq %r14,%r11 8414 adcq $0,%r12 8415 addq %r15,%r10 8416 adcq %r9,%r11 8417 adcq $0,%r12 8418 8419 leaq 32(%rdi),%rdi 8420 decq %rcx 8421 jg 1b 8422 decq %r8 8423 jge 2b 8424 vpaddd .chacha20_consts(%rip),%ymm3,%ymm3 8425 vpaddd 64(%rbp),%ymm7,%ymm7 8426 vpaddd 96(%rbp),%ymm11,%ymm11 8427 vpaddd 256(%rbp),%ymm15,%ymm15 8428 vpaddd .chacha20_consts(%rip),%ymm2,%ymm2 8429 vpaddd 64(%rbp),%ymm6,%ymm6 8430 vpaddd 96(%rbp),%ymm10,%ymm10 8431 vpaddd 224(%rbp),%ymm14,%ymm14 8432 vpaddd .chacha20_consts(%rip),%ymm1,%ymm1 8433 vpaddd 64(%rbp),%ymm5,%ymm5 8434 vpaddd 96(%rbp),%ymm9,%ymm9 8435 vpaddd 192(%rbp),%ymm13,%ymm13 8436 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0 8437 vpaddd 64(%rbp),%ymm4,%ymm4 8438 vpaddd 96(%rbp),%ymm8,%ymm8 8439 vpaddd 160(%rbp),%ymm12,%ymm12 8440 8441 vmovdqa %ymm0,128(%rbp) 8442 vperm2i128 $0x02,%ymm3,%ymm7,%ymm0 8443 vperm2i128 $0x13,%ymm3,%ymm7,%ymm7 8444 vperm2i128 $0x02,%ymm11,%ymm15,%ymm3 8445 vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 8446 vpxor 0+0(%rsi),%ymm0,%ymm0 8447 vpxor 32+0(%rsi),%ymm3,%ymm3 8448 vpxor 64+0(%rsi),%ymm7,%ymm7 8449 vpxor 96+0(%rsi),%ymm11,%ymm11 8450 vmovdqu %ymm0,0+0(%rdi) 8451 vmovdqu %ymm3,32+0(%rdi) 8452 vmovdqu %ymm7,64+0(%rdi) 8453 vmovdqu %ymm11,96+0(%rdi) 8454 8455 vmovdqa 128(%rbp),%ymm0 8456 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 8457 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 8458 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 8459 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 8460 vpxor 0+128(%rsi),%ymm3,%ymm3 8461 vpxor 32+128(%rsi),%ymm2,%ymm2 8462 vpxor 64+128(%rsi),%ymm6,%ymm6 8463 vpxor 96+128(%rsi),%ymm10,%ymm10 8464 vmovdqu %ymm3,0+128(%rdi) 8465 vmovdqu %ymm2,32+128(%rdi) 8466 vmovdqu %ymm6,64+128(%rdi) 8467 vmovdqu %ymm10,96+128(%rdi) 8468 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 8469 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 8470 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 8471 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 8472 vpxor 0+256(%rsi),%ymm3,%ymm3 8473 vpxor 32+256(%rsi),%ymm1,%ymm1 8474 vpxor 64+256(%rsi),%ymm5,%ymm5 8475 vpxor 96+256(%rsi),%ymm9,%ymm9 8476 vmovdqu %ymm3,0+256(%rdi) 8477 vmovdqu %ymm1,32+256(%rdi) 8478 vmovdqu %ymm5,64+256(%rdi) 8479 vmovdqu %ymm9,96+256(%rdi) 8480 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 8481 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 8482 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 8483 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 8484 vmovdqa %ymm3,%ymm8 8485 8486 movq $384,%rcx 8487 leaq 384(%rsi),%rsi 8488 subq $384,%rbx 8489 jmp seal_avx2_hash 8490 8491 seal_avx2_320: 8492 vmovdqa %ymm0,%ymm1 8493 vmovdqa %ymm0,%ymm2 8494 vmovdqa %ymm4,%ymm5 8495 vmovdqa %ymm4,%ymm6 8496 vmovdqa %ymm8,%ymm9 8497 vmovdqa %ymm8,%ymm10 8498 vpaddd .avx2_inc(%rip),%ymm12,%ymm13 8499 vpaddd .avx2_inc(%rip),%ymm13,%ymm14 8500 vmovdqa %ymm4,%ymm7 8501 vmovdqa %ymm8,%ymm11 8502 vmovdqa %ymm12,160(%rbp) 8503 vmovdqa %ymm13,192(%rbp) 8504 vmovdqa %ymm14,224(%rbp) 8505 movq $10,%r10 8506 1: 8507 vpaddd %ymm4,%ymm0,%ymm0 8508 vpxor %ymm0,%ymm12,%ymm12 8509 vpshufb .rol16(%rip),%ymm12,%ymm12 8510 vpaddd %ymm12,%ymm8,%ymm8 8511 vpxor %ymm8,%ymm4,%ymm4 8512 vpsrld $20,%ymm4,%ymm3 8513 vpslld $12,%ymm4,%ymm4 8514 vpxor %ymm3,%ymm4,%ymm4 8515 vpaddd %ymm4,%ymm0,%ymm0 8516 vpxor %ymm0,%ymm12,%ymm12 8517 vpshufb .rol8(%rip),%ymm12,%ymm12 8518 vpaddd %ymm12,%ymm8,%ymm8 8519 vpxor %ymm8,%ymm4,%ymm4 8520 vpslld $7,%ymm4,%ymm3 8521 vpsrld $25,%ymm4,%ymm4 8522 vpxor %ymm3,%ymm4,%ymm4 8523 vpalignr $12,%ymm12,%ymm12,%ymm12 8524 vpalignr $8,%ymm8,%ymm8,%ymm8 8525 vpalignr $4,%ymm4,%ymm4,%ymm4 8526 vpaddd %ymm5,%ymm1,%ymm1 8527 vpxor %ymm1,%ymm13,%ymm13 8528 vpshufb .rol16(%rip),%ymm13,%ymm13 8529 vpaddd %ymm13,%ymm9,%ymm9 8530 vpxor %ymm9,%ymm5,%ymm5 8531 vpsrld $20,%ymm5,%ymm3 8532 vpslld $12,%ymm5,%ymm5 8533 vpxor %ymm3,%ymm5,%ymm5 8534 vpaddd %ymm5,%ymm1,%ymm1 8535 vpxor %ymm1,%ymm13,%ymm13 8536 vpshufb .rol8(%rip),%ymm13,%ymm13 8537 vpaddd %ymm13,%ymm9,%ymm9 8538 vpxor %ymm9,%ymm5,%ymm5 8539 vpslld $7,%ymm5,%ymm3 8540 vpsrld $25,%ymm5,%ymm5 8541 vpxor %ymm3,%ymm5,%ymm5 8542 vpalignr $12,%ymm13,%ymm13,%ymm13 8543 vpalignr $8,%ymm9,%ymm9,%ymm9 8544 vpalignr $4,%ymm5,%ymm5,%ymm5 8545 vpaddd %ymm6,%ymm2,%ymm2 8546 vpxor %ymm2,%ymm14,%ymm14 8547 vpshufb .rol16(%rip),%ymm14,%ymm14 8548 vpaddd %ymm14,%ymm10,%ymm10 8549 vpxor %ymm10,%ymm6,%ymm6 8550 vpsrld $20,%ymm6,%ymm3 8551 vpslld $12,%ymm6,%ymm6 8552 vpxor %ymm3,%ymm6,%ymm6 8553 vpaddd %ymm6,%ymm2,%ymm2 8554 vpxor %ymm2,%ymm14,%ymm14 8555 vpshufb .rol8(%rip),%ymm14,%ymm14 8556 vpaddd %ymm14,%ymm10,%ymm10 8557 vpxor %ymm10,%ymm6,%ymm6 8558 vpslld $7,%ymm6,%ymm3 8559 vpsrld $25,%ymm6,%ymm6 8560 vpxor %ymm3,%ymm6,%ymm6 8561 vpalignr $12,%ymm14,%ymm14,%ymm14 8562 vpalignr $8,%ymm10,%ymm10,%ymm10 8563 vpalignr $4,%ymm6,%ymm6,%ymm6 8564 vpaddd %ymm4,%ymm0,%ymm0 8565 vpxor %ymm0,%ymm12,%ymm12 8566 vpshufb .rol16(%rip),%ymm12,%ymm12 8567 vpaddd %ymm12,%ymm8,%ymm8 8568 vpxor %ymm8,%ymm4,%ymm4 8569 vpsrld $20,%ymm4,%ymm3 8570 vpslld $12,%ymm4,%ymm4 8571 vpxor %ymm3,%ymm4,%ymm4 8572 vpaddd %ymm4,%ymm0,%ymm0 8573 vpxor %ymm0,%ymm12,%ymm12 8574 vpshufb .rol8(%rip),%ymm12,%ymm12 8575 vpaddd %ymm12,%ymm8,%ymm8 8576 vpxor %ymm8,%ymm4,%ymm4 8577 vpslld $7,%ymm4,%ymm3 8578 vpsrld $25,%ymm4,%ymm4 8579 vpxor %ymm3,%ymm4,%ymm4 8580 vpalignr $4,%ymm12,%ymm12,%ymm12 8581 vpalignr $8,%ymm8,%ymm8,%ymm8 8582 vpalignr $12,%ymm4,%ymm4,%ymm4 8583 vpaddd %ymm5,%ymm1,%ymm1 8584 vpxor %ymm1,%ymm13,%ymm13 8585 vpshufb .rol16(%rip),%ymm13,%ymm13 8586 vpaddd %ymm13,%ymm9,%ymm9 8587 vpxor %ymm9,%ymm5,%ymm5 8588 vpsrld $20,%ymm5,%ymm3 8589 vpslld $12,%ymm5,%ymm5 8590 vpxor %ymm3,%ymm5,%ymm5 8591 vpaddd %ymm5,%ymm1,%ymm1 8592 vpxor %ymm1,%ymm13,%ymm13 8593 vpshufb .rol8(%rip),%ymm13,%ymm13 8594 vpaddd %ymm13,%ymm9,%ymm9 8595 vpxor %ymm9,%ymm5,%ymm5 8596 vpslld $7,%ymm5,%ymm3 8597 vpsrld $25,%ymm5,%ymm5 8598 vpxor %ymm3,%ymm5,%ymm5 8599 vpalignr $4,%ymm13,%ymm13,%ymm13 8600 vpalignr $8,%ymm9,%ymm9,%ymm9 8601 vpalignr $12,%ymm5,%ymm5,%ymm5 8602 vpaddd %ymm6,%ymm2,%ymm2 8603 vpxor %ymm2,%ymm14,%ymm14 8604 vpshufb .rol16(%rip),%ymm14,%ymm14 8605 vpaddd %ymm14,%ymm10,%ymm10 8606 vpxor %ymm10,%ymm6,%ymm6 8607 vpsrld $20,%ymm6,%ymm3 8608 vpslld $12,%ymm6,%ymm6 8609 vpxor %ymm3,%ymm6,%ymm6 8610 vpaddd %ymm6,%ymm2,%ymm2 8611 vpxor %ymm2,%ymm14,%ymm14 8612 vpshufb .rol8(%rip),%ymm14,%ymm14 8613 vpaddd %ymm14,%ymm10,%ymm10 8614 vpxor %ymm10,%ymm6,%ymm6 8615 vpslld $7,%ymm6,%ymm3 8616 vpsrld $25,%ymm6,%ymm6 8617 vpxor %ymm3,%ymm6,%ymm6 8618 vpalignr $4,%ymm14,%ymm14,%ymm14 8619 vpalignr $8,%ymm10,%ymm10,%ymm10 8620 vpalignr $12,%ymm6,%ymm6,%ymm6 8621 8622 decq %r10 8623 jne 1b 8624 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0 8625 vpaddd .chacha20_consts(%rip),%ymm1,%ymm1 8626 vpaddd .chacha20_consts(%rip),%ymm2,%ymm2 8627 vpaddd %ymm7,%ymm4,%ymm4 8628 vpaddd %ymm7,%ymm5,%ymm5 8629 vpaddd %ymm7,%ymm6,%ymm6 8630 vpaddd %ymm11,%ymm8,%ymm8 8631 vpaddd %ymm11,%ymm9,%ymm9 8632 vpaddd %ymm11,%ymm10,%ymm10 8633 vpaddd 160(%rbp),%ymm12,%ymm12 8634 vpaddd 192(%rbp),%ymm13,%ymm13 8635 vpaddd 224(%rbp),%ymm14,%ymm14 8636 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 8637 8638 vpand .clamp(%rip),%ymm3,%ymm3 8639 vmovdqa %ymm3,0(%rbp) 8640 8641 vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 8642 vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 8643 vperm2i128 $0x02,%ymm1,%ymm5,%ymm8 8644 vperm2i128 $0x02,%ymm9,%ymm13,%ymm12 8645 vperm2i128 $0x13,%ymm1,%ymm5,%ymm1 8646 vperm2i128 $0x13,%ymm9,%ymm13,%ymm5 8647 vperm2i128 $0x02,%ymm2,%ymm6,%ymm9 8648 vperm2i128 $0x02,%ymm10,%ymm14,%ymm13 8649 vperm2i128 $0x13,%ymm2,%ymm6,%ymm2 8650 vperm2i128 $0x13,%ymm10,%ymm14,%ymm6 8651 jmp seal_avx2_short 8652 8653 seal_avx2_192: 8654 vmovdqa %ymm0,%ymm1 8655 vmovdqa %ymm0,%ymm2 8656 vmovdqa %ymm4,%ymm5 8657 vmovdqa %ymm4,%ymm6 8658 vmovdqa %ymm8,%ymm9 8659 vmovdqa %ymm8,%ymm10 8660 vpaddd .avx2_inc(%rip),%ymm12,%ymm13 8661 vmovdqa %ymm12,%ymm11 8662 vmovdqa %ymm13,%ymm15 8663 movq $10,%r10 8664 1: 8665 vpaddd %ymm4,%ymm0,%ymm0 8666 vpxor %ymm0,%ymm12,%ymm12 8667 vpshufb .rol16(%rip),%ymm12,%ymm12 8668 vpaddd %ymm12,%ymm8,%ymm8 8669 vpxor %ymm8,%ymm4,%ymm4 8670 vpsrld $20,%ymm4,%ymm3 8671 vpslld $12,%ymm4,%ymm4 8672 vpxor %ymm3,%ymm4,%ymm4 8673 vpaddd %ymm4,%ymm0,%ymm0 8674 vpxor %ymm0,%ymm12,%ymm12 8675 vpshufb .rol8(%rip),%ymm12,%ymm12 8676 vpaddd %ymm12,%ymm8,%ymm8 8677 vpxor %ymm8,%ymm4,%ymm4 8678 vpslld $7,%ymm4,%ymm3 8679 vpsrld $25,%ymm4,%ymm4 8680 vpxor %ymm3,%ymm4,%ymm4 8681 vpalignr $12,%ymm12,%ymm12,%ymm12 8682 vpalignr $8,%ymm8,%ymm8,%ymm8 8683 vpalignr $4,%ymm4,%ymm4,%ymm4 8684 vpaddd %ymm5,%ymm1,%ymm1 8685 vpxor %ymm1,%ymm13,%ymm13 8686 vpshufb .rol16(%rip),%ymm13,%ymm13 8687 vpaddd %ymm13,%ymm9,%ymm9 8688 vpxor %ymm9,%ymm5,%ymm5 8689 vpsrld $20,%ymm5,%ymm3 8690 vpslld $12,%ymm5,%ymm5 8691 vpxor %ymm3,%ymm5,%ymm5 8692 vpaddd %ymm5,%ymm1,%ymm1 8693 vpxor %ymm1,%ymm13,%ymm13 8694 vpshufb .rol8(%rip),%ymm13,%ymm13 8695 vpaddd %ymm13,%ymm9,%ymm9 8696 vpxor %ymm9,%ymm5,%ymm5 8697 vpslld $7,%ymm5,%ymm3 8698 vpsrld $25,%ymm5,%ymm5 8699 vpxor %ymm3,%ymm5,%ymm5 8700 vpalignr $12,%ymm13,%ymm13,%ymm13 8701 vpalignr $8,%ymm9,%ymm9,%ymm9 8702 vpalignr $4,%ymm5,%ymm5,%ymm5 8703 vpaddd %ymm4,%ymm0,%ymm0 8704 vpxor %ymm0,%ymm12,%ymm12 8705 vpshufb .rol16(%rip),%ymm12,%ymm12 8706 vpaddd %ymm12,%ymm8,%ymm8 8707 vpxor %ymm8,%ymm4,%ymm4 8708 vpsrld $20,%ymm4,%ymm3 8709 vpslld $12,%ymm4,%ymm4 8710 vpxor %ymm3,%ymm4,%ymm4 8711 vpaddd %ymm4,%ymm0,%ymm0 8712 vpxor %ymm0,%ymm12,%ymm12 8713 vpshufb .rol8(%rip),%ymm12,%ymm12 8714 vpaddd %ymm12,%ymm8,%ymm8 8715 vpxor %ymm8,%ymm4,%ymm4 8716 vpslld $7,%ymm4,%ymm3 8717 vpsrld $25,%ymm4,%ymm4 8718 vpxor %ymm3,%ymm4,%ymm4 8719 vpalignr $4,%ymm12,%ymm12,%ymm12 8720 vpalignr $8,%ymm8,%ymm8,%ymm8 8721 vpalignr $12,%ymm4,%ymm4,%ymm4 8722 vpaddd %ymm5,%ymm1,%ymm1 8723 vpxor %ymm1,%ymm13,%ymm13 8724 vpshufb .rol16(%rip),%ymm13,%ymm13 8725 vpaddd %ymm13,%ymm9,%ymm9 8726 vpxor %ymm9,%ymm5,%ymm5 8727 vpsrld $20,%ymm5,%ymm3 8728 vpslld $12,%ymm5,%ymm5 8729 vpxor %ymm3,%ymm5,%ymm5 8730 vpaddd %ymm5,%ymm1,%ymm1 8731 vpxor %ymm1,%ymm13,%ymm13 8732 vpshufb .rol8(%rip),%ymm13,%ymm13 8733 vpaddd %ymm13,%ymm9,%ymm9 8734 vpxor %ymm9,%ymm5,%ymm5 8735 vpslld $7,%ymm5,%ymm3 8736 vpsrld $25,%ymm5,%ymm5 8737 vpxor %ymm3,%ymm5,%ymm5 8738 vpalignr $4,%ymm13,%ymm13,%ymm13 8739 vpalignr $8,%ymm9,%ymm9,%ymm9 8740 vpalignr $12,%ymm5,%ymm5,%ymm5 8741 8742 decq %r10 8743 jne 1b 8744 vpaddd %ymm2,%ymm0,%ymm0 8745 vpaddd %ymm2,%ymm1,%ymm1 8746 vpaddd %ymm6,%ymm4,%ymm4 8747 vpaddd %ymm6,%ymm5,%ymm5 8748 vpaddd %ymm10,%ymm8,%ymm8 8749 vpaddd %ymm10,%ymm9,%ymm9 8750 vpaddd %ymm11,%ymm12,%ymm12 8751 vpaddd %ymm15,%ymm13,%ymm13 8752 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 8753 8754 vpand .clamp(%rip),%ymm3,%ymm3 8755 vmovdqa %ymm3,0(%rbp) 8756 8757 vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 8758 vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 8759 vperm2i128 $0x02,%ymm1,%ymm5,%ymm8 8760 vperm2i128 $0x02,%ymm9,%ymm13,%ymm12 8761 vperm2i128 $0x13,%ymm1,%ymm5,%ymm1 8762 vperm2i128 $0x13,%ymm9,%ymm13,%ymm5 8763 seal_avx2_short: 8764 movq %r8,%r8 8765 call poly_hash_ad_internal 8766 xorq %rcx,%rcx 8767 seal_avx2_hash: 8768 cmpq $16,%rcx 8769 jb seal_avx2_short_loop 8770 addq 0(%rdi),%r10 8771 adcq 8+0(%rdi),%r11 8772 adcq $1,%r12 8773 movq 0+0(%rbp),%rax 8774 movq %rax,%r15 8775 mulq %r10 8776 movq %rax,%r13 8777 movq %rdx,%r14 8778 movq 0+0(%rbp),%rax 8779 mulq %r11 8780 imulq %r12,%r15 8781 addq %rax,%r14 8782 adcq %rdx,%r15 8783 movq 8+0(%rbp),%rax 8784 movq %rax,%r9 8785 mulq %r10 8786 addq %rax,%r14 8787 adcq $0,%rdx 8788 movq %rdx,%r10 8789 movq 8+0(%rbp),%rax 8790 mulq %r11 8791 addq %rax,%r15 8792 adcq $0,%rdx 8793 imulq %r12,%r9 8794 addq %r10,%r15 8795 adcq %rdx,%r9 8796 movq %r13,%r10 8797 movq %r14,%r11 8798 movq %r15,%r12 8799 andq $3,%r12 8800 movq %r15,%r13 8801 andq $-4,%r13 8802 movq %r9,%r14 8803 shrdq $2,%r9,%r15 8804 shrq $2,%r9 8805 addq %r13,%r10 8806 adcq %r14,%r11 8807 adcq $0,%r12 8808 addq %r15,%r10 8809 adcq %r9,%r11 8810 adcq $0,%r12 8811 8812 subq $16,%rcx 8813 addq $16,%rdi 8814 jmp seal_avx2_hash 8815 seal_avx2_short_loop: 8816 cmpq $32,%rbx 8817 jb seal_avx2_short_tail 8818 subq $32,%rbx 8819 8820 vpxor (%rsi),%ymm0,%ymm0 8821 vmovdqu %ymm0,(%rdi) 8822 leaq 32(%rsi),%rsi 8823 8824 addq 0(%rdi),%r10 8825 adcq 8+0(%rdi),%r11 8826 adcq $1,%r12 8827 movq 0+0(%rbp),%rax 8828 movq %rax,%r15 8829 mulq %r10 8830 movq %rax,%r13 8831 movq %rdx,%r14 8832 movq 0+0(%rbp),%rax 8833 mulq %r11 8834 imulq %r12,%r15 8835 addq %rax,%r14 8836 adcq %rdx,%r15 8837 movq 8+0(%rbp),%rax 8838 movq %rax,%r9 8839 mulq %r10 8840 addq %rax,%r14 8841 adcq $0,%rdx 8842 movq %rdx,%r10 8843 movq 8+0(%rbp),%rax 8844 mulq %r11 8845 addq %rax,%r15 8846 adcq $0,%rdx 8847 imulq %r12,%r9 8848 addq %r10,%r15 8849 adcq %rdx,%r9 8850 movq %r13,%r10 8851 movq %r14,%r11 8852 movq %r15,%r12 8853 andq $3,%r12 8854 movq %r15,%r13 8855 andq $-4,%r13 8856 movq %r9,%r14 8857 shrdq $2,%r9,%r15 8858 shrq $2,%r9 8859 addq %r13,%r10 8860 adcq %r14,%r11 8861 adcq $0,%r12 8862 addq %r15,%r10 8863 adcq %r9,%r11 8864 adcq $0,%r12 8865 addq 16(%rdi),%r10 8866 adcq 8+16(%rdi),%r11 8867 adcq $1,%r12 8868 movq 0+0(%rbp),%rax 8869 movq %rax,%r15 8870 mulq %r10 8871 movq %rax,%r13 8872 movq %rdx,%r14 8873 movq 0+0(%rbp),%rax 8874 mulq %r11 8875 imulq %r12,%r15 8876 addq %rax,%r14 8877 adcq %rdx,%r15 8878 movq 8+0(%rbp),%rax 8879 movq %rax,%r9 8880 mulq %r10 8881 addq %rax,%r14 8882 adcq $0,%rdx 8883 movq %rdx,%r10 8884 movq 8+0(%rbp),%rax 8885 mulq %r11 8886 addq %rax,%r15 8887 adcq $0,%rdx 8888 imulq %r12,%r9 8889 addq %r10,%r15 8890 adcq %rdx,%r9 8891 movq %r13,%r10 8892 movq %r14,%r11 8893 movq %r15,%r12 8894 andq $3,%r12 8895 movq %r15,%r13 8896 andq $-4,%r13 8897 movq %r9,%r14 8898 shrdq $2,%r9,%r15 8899 shrq $2,%r9 8900 addq %r13,%r10 8901 adcq %r14,%r11 8902 adcq $0,%r12 8903 addq %r15,%r10 8904 adcq %r9,%r11 8905 adcq $0,%r12 8906 8907 leaq 32(%rdi),%rdi 8908 8909 vmovdqa %ymm4,%ymm0 8910 vmovdqa %ymm8,%ymm4 8911 vmovdqa %ymm12,%ymm8 8912 vmovdqa %ymm1,%ymm12 8913 vmovdqa %ymm5,%ymm1 8914 vmovdqa %ymm9,%ymm5 8915 vmovdqa %ymm13,%ymm9 8916 vmovdqa %ymm2,%ymm13 8917 vmovdqa %ymm6,%ymm2 8918 jmp seal_avx2_short_loop 8919 seal_avx2_short_tail: 8920 cmpq $16,%rbx 8921 jb 1f 8922 subq $16,%rbx 8923 vpxor (%rsi),%xmm0,%xmm3 8924 vmovdqu %xmm3,(%rdi) 8925 leaq 16(%rsi),%rsi 8926 addq 0(%rdi),%r10 8927 adcq 8+0(%rdi),%r11 8928 adcq $1,%r12 8929 movq 0+0(%rbp),%rax 8930 movq %rax,%r15 8931 mulq %r10 8932 movq %rax,%r13 8933 movq %rdx,%r14 8934 movq 0+0(%rbp),%rax 8935 mulq %r11 8936 imulq %r12,%r15 8937 addq %rax,%r14 8938 adcq %rdx,%r15 8939 movq 8+0(%rbp),%rax 8940 movq %rax,%r9 8941 mulq %r10 8942 addq %rax,%r14 8943 adcq $0,%rdx 8944 movq %rdx,%r10 8945 movq 8+0(%rbp),%rax 8946 mulq %r11 8947 addq %rax,%r15 8948 adcq $0,%rdx 8949 imulq %r12,%r9 8950 addq %r10,%r15 8951 adcq %rdx,%r9 8952 movq %r13,%r10 8953 movq %r14,%r11 8954 movq %r15,%r12 8955 andq $3,%r12 8956 movq %r15,%r13 8957 andq $-4,%r13 8958 movq %r9,%r14 8959 shrdq $2,%r9,%r15 8960 shrq $2,%r9 8961 addq %r13,%r10 8962 adcq %r14,%r11 8963 adcq $0,%r12 8964 addq %r15,%r10 8965 adcq %r9,%r11 8966 adcq $0,%r12 8967 8968 leaq 16(%rdi),%rdi 8969 vextracti128 $1,%ymm0,%xmm0 8970 1: 8971 vzeroupper 8972 jmp seal_sse_tail_16 8973 .cfi_endproc 8974 #endif 8975