1 %ifidn __OUTPUT_FORMAT__,obj 2 section code use32 class=code align=64 3 %elifidn __OUTPUT_FORMAT__,win32 4 %ifdef __YASM_VERSION_ID__ 5 %if __YASM_VERSION_ID__ < 01010000h 6 %error yasm version 1.1.0 or later needed. 7 %endif 8 ; Yasm automatically includes .00 and complains about redefining it. 9 ; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html 10 %else 11 $@feat.00 equ 1 12 %endif 13 section .text code align=64 14 %else 15 section .text code 16 %endif 17 global _ChaCha20_ctr32 18 align 16 19 _ChaCha20_ctr32: 20 L$_ChaCha20_ctr32_begin: 21 push ebp 22 push ebx 23 push esi 24 push edi 25 xor eax,eax 26 cmp eax,DWORD [28+esp] 27 je NEAR L$000no_data 28 call L$pic_point 29 L$pic_point: 30 pop eax 31 lea ebp,[_OPENSSL_ia32cap_P] 32 test DWORD [ebp],16777216 33 jz NEAR L$001x86 34 test DWORD [4+ebp],512 35 jz NEAR L$001x86 36 jmp NEAR L$ssse3_shortcut 37 L$001x86: 38 mov esi,DWORD [32+esp] 39 mov edi,DWORD [36+esp] 40 sub esp,132 41 mov eax,DWORD [esi] 42 mov ebx,DWORD [4+esi] 43 mov ecx,DWORD [8+esi] 44 mov edx,DWORD [12+esi] 45 mov DWORD [80+esp],eax 46 mov DWORD [84+esp],ebx 47 mov DWORD [88+esp],ecx 48 mov DWORD [92+esp],edx 49 mov eax,DWORD [16+esi] 50 mov ebx,DWORD [20+esi] 51 mov ecx,DWORD [24+esi] 52 mov edx,DWORD [28+esi] 53 mov DWORD [96+esp],eax 54 mov DWORD [100+esp],ebx 55 mov DWORD [104+esp],ecx 56 mov DWORD [108+esp],edx 57 mov eax,DWORD [edi] 58 mov ebx,DWORD [4+edi] 59 mov ecx,DWORD [8+edi] 60 mov edx,DWORD [12+edi] 61 sub eax,1 62 mov DWORD [112+esp],eax 63 mov DWORD [116+esp],ebx 64 mov DWORD [120+esp],ecx 65 mov DWORD [124+esp],edx 66 jmp NEAR L$002entry 67 align 16 68 L$003outer_loop: 69 mov DWORD [156+esp],ebx 70 mov DWORD [152+esp],eax 71 mov DWORD [160+esp],ecx 72 L$002entry: 73 mov eax,1634760805 74 mov DWORD [4+esp],857760878 75 mov DWORD [8+esp],2036477234 76 mov DWORD [12+esp],1797285236 77 mov ebx,DWORD [84+esp] 78 mov ebp,DWORD [88+esp] 79 mov ecx,DWORD [104+esp] 80 mov esi,DWORD [108+esp] 81 mov edx,DWORD [116+esp] 82 mov edi,DWORD [120+esp] 83 mov DWORD [20+esp],ebx 84 mov DWORD [24+esp],ebp 85 mov DWORD [40+esp],ecx 86 mov DWORD [44+esp],esi 87 mov DWORD [52+esp],edx 88 mov DWORD [56+esp],edi 89 mov ebx,DWORD [92+esp] 90 mov edi,DWORD [124+esp] 91 mov edx,DWORD [112+esp] 92 mov ebp,DWORD [80+esp] 93 mov ecx,DWORD [96+esp] 94 mov esi,DWORD [100+esp] 95 add edx,1 96 mov DWORD [28+esp],ebx 97 mov DWORD [60+esp],edi 98 mov DWORD [112+esp],edx 99 mov ebx,10 100 jmp NEAR L$004loop 101 align 16 102 L$004loop: 103 add eax,ebp 104 mov DWORD [128+esp],ebx 105 mov ebx,ebp 106 xor edx,eax 107 rol edx,16 108 add ecx,edx 109 xor ebx,ecx 110 mov edi,DWORD [52+esp] 111 rol ebx,12 112 mov ebp,DWORD [20+esp] 113 add eax,ebx 114 xor edx,eax 115 mov DWORD [esp],eax 116 rol edx,8 117 mov eax,DWORD [4+esp] 118 add ecx,edx 119 mov DWORD [48+esp],edx 120 xor ebx,ecx 121 add eax,ebp 122 rol ebx,7 123 xor edi,eax 124 mov DWORD [32+esp],ecx 125 rol edi,16 126 mov DWORD [16+esp],ebx 127 add esi,edi 128 mov ecx,DWORD [40+esp] 129 xor ebp,esi 130 mov edx,DWORD [56+esp] 131 rol ebp,12 132 mov ebx,DWORD [24+esp] 133 add eax,ebp 134 xor edi,eax 135 mov DWORD [4+esp],eax 136 rol edi,8 137 mov eax,DWORD [8+esp] 138 add esi,edi 139 mov DWORD [52+esp],edi 140 xor ebp,esi 141 add eax,ebx 142 rol ebp,7 143 xor edx,eax 144 mov DWORD [36+esp],esi 145 rol edx,16 146 mov DWORD [20+esp],ebp 147 add ecx,edx 148 mov esi,DWORD [44+esp] 149 xor ebx,ecx 150 mov edi,DWORD [60+esp] 151 rol ebx,12 152 mov ebp,DWORD [28+esp] 153 add eax,ebx 154 xor edx,eax 155 mov DWORD [8+esp],eax 156 rol edx,8 157 mov eax,DWORD [12+esp] 158 add ecx,edx 159 mov DWORD [56+esp],edx 160 xor ebx,ecx 161 add eax,ebp 162 rol ebx,7 163 xor edi,eax 164 rol edi,16 165 mov DWORD [24+esp],ebx 166 add esi,edi 167 xor ebp,esi 168 rol ebp,12 169 mov ebx,DWORD [20+esp] 170 add eax,ebp 171 xor edi,eax 172 mov DWORD [12+esp],eax 173 rol edi,8 174 mov eax,DWORD [esp] 175 add esi,edi 176 mov edx,edi 177 xor ebp,esi 178 add eax,ebx 179 rol ebp,7 180 xor edx,eax 181 rol edx,16 182 mov DWORD [28+esp],ebp 183 add ecx,edx 184 xor ebx,ecx 185 mov edi,DWORD [48+esp] 186 rol ebx,12 187 mov ebp,DWORD [24+esp] 188 add eax,ebx 189 xor edx,eax 190 mov DWORD [esp],eax 191 rol edx,8 192 mov eax,DWORD [4+esp] 193 add ecx,edx 194 mov DWORD [60+esp],edx 195 xor ebx,ecx 196 add eax,ebp 197 rol ebx,7 198 xor edi,eax 199 mov DWORD [40+esp],ecx 200 rol edi,16 201 mov DWORD [20+esp],ebx 202 add esi,edi 203 mov ecx,DWORD [32+esp] 204 xor ebp,esi 205 mov edx,DWORD [52+esp] 206 rol ebp,12 207 mov ebx,DWORD [28+esp] 208 add eax,ebp 209 xor edi,eax 210 mov DWORD [4+esp],eax 211 rol edi,8 212 mov eax,DWORD [8+esp] 213 add esi,edi 214 mov DWORD [48+esp],edi 215 xor ebp,esi 216 add eax,ebx 217 rol ebp,7 218 xor edx,eax 219 mov DWORD [44+esp],esi 220 rol edx,16 221 mov DWORD [24+esp],ebp 222 add ecx,edx 223 mov esi,DWORD [36+esp] 224 xor ebx,ecx 225 mov edi,DWORD [56+esp] 226 rol ebx,12 227 mov ebp,DWORD [16+esp] 228 add eax,ebx 229 xor edx,eax 230 mov DWORD [8+esp],eax 231 rol edx,8 232 mov eax,DWORD [12+esp] 233 add ecx,edx 234 mov DWORD [52+esp],edx 235 xor ebx,ecx 236 add eax,ebp 237 rol ebx,7 238 xor edi,eax 239 rol edi,16 240 mov DWORD [28+esp],ebx 241 add esi,edi 242 xor ebp,esi 243 mov edx,DWORD [48+esp] 244 rol ebp,12 245 mov ebx,DWORD [128+esp] 246 add eax,ebp 247 xor edi,eax 248 mov DWORD [12+esp],eax 249 rol edi,8 250 mov eax,DWORD [esp] 251 add esi,edi 252 mov DWORD [56+esp],edi 253 xor ebp,esi 254 rol ebp,7 255 dec ebx 256 jnz NEAR L$004loop 257 mov ebx,DWORD [160+esp] 258 add eax,1634760805 259 add ebp,DWORD [80+esp] 260 add ecx,DWORD [96+esp] 261 add esi,DWORD [100+esp] 262 cmp ebx,64 263 jb NEAR L$005tail 264 mov ebx,DWORD [156+esp] 265 add edx,DWORD [112+esp] 266 add edi,DWORD [120+esp] 267 xor eax,DWORD [ebx] 268 xor ebp,DWORD [16+ebx] 269 mov DWORD [esp],eax 270 mov eax,DWORD [152+esp] 271 xor ecx,DWORD [32+ebx] 272 xor esi,DWORD [36+ebx] 273 xor edx,DWORD [48+ebx] 274 xor edi,DWORD [56+ebx] 275 mov DWORD [16+eax],ebp 276 mov DWORD [32+eax],ecx 277 mov DWORD [36+eax],esi 278 mov DWORD [48+eax],edx 279 mov DWORD [56+eax],edi 280 mov ebp,DWORD [4+esp] 281 mov ecx,DWORD [8+esp] 282 mov esi,DWORD [12+esp] 283 mov edx,DWORD [20+esp] 284 mov edi,DWORD [24+esp] 285 add ebp,857760878 286 add ecx,2036477234 287 add esi,1797285236 288 add edx,DWORD [84+esp] 289 add edi,DWORD [88+esp] 290 xor ebp,DWORD [4+ebx] 291 xor ecx,DWORD [8+ebx] 292 xor esi,DWORD [12+ebx] 293 xor edx,DWORD [20+ebx] 294 xor edi,DWORD [24+ebx] 295 mov DWORD [4+eax],ebp 296 mov DWORD [8+eax],ecx 297 mov DWORD [12+eax],esi 298 mov DWORD [20+eax],edx 299 mov DWORD [24+eax],edi 300 mov ebp,DWORD [28+esp] 301 mov ecx,DWORD [40+esp] 302 mov esi,DWORD [44+esp] 303 mov edx,DWORD [52+esp] 304 mov edi,DWORD [60+esp] 305 add ebp,DWORD [92+esp] 306 add ecx,DWORD [104+esp] 307 add esi,DWORD [108+esp] 308 add edx,DWORD [116+esp] 309 add edi,DWORD [124+esp] 310 xor ebp,DWORD [28+ebx] 311 xor ecx,DWORD [40+ebx] 312 xor esi,DWORD [44+ebx] 313 xor edx,DWORD [52+ebx] 314 xor edi,DWORD [60+ebx] 315 lea ebx,[64+ebx] 316 mov DWORD [28+eax],ebp 317 mov ebp,DWORD [esp] 318 mov DWORD [40+eax],ecx 319 mov ecx,DWORD [160+esp] 320 mov DWORD [44+eax],esi 321 mov DWORD [52+eax],edx 322 mov DWORD [60+eax],edi 323 mov DWORD [eax],ebp 324 lea eax,[64+eax] 325 sub ecx,64 326 jnz NEAR L$003outer_loop 327 jmp NEAR L$006done 328 L$005tail: 329 add edx,DWORD [112+esp] 330 add edi,DWORD [120+esp] 331 mov DWORD [esp],eax 332 mov DWORD [16+esp],ebp 333 mov DWORD [32+esp],ecx 334 mov DWORD [36+esp],esi 335 mov DWORD [48+esp],edx 336 mov DWORD [56+esp],edi 337 mov ebp,DWORD [4+esp] 338 mov ecx,DWORD [8+esp] 339 mov esi,DWORD [12+esp] 340 mov edx,DWORD [20+esp] 341 mov edi,DWORD [24+esp] 342 add ebp,857760878 343 add ecx,2036477234 344 add esi,1797285236 345 add edx,DWORD [84+esp] 346 add edi,DWORD [88+esp] 347 mov DWORD [4+esp],ebp 348 mov DWORD [8+esp],ecx 349 mov DWORD [12+esp],esi 350 mov DWORD [20+esp],edx 351 mov DWORD [24+esp],edi 352 mov ebp,DWORD [28+esp] 353 mov ecx,DWORD [40+esp] 354 mov esi,DWORD [44+esp] 355 mov edx,DWORD [52+esp] 356 mov edi,DWORD [60+esp] 357 add ebp,DWORD [92+esp] 358 add ecx,DWORD [104+esp] 359 add esi,DWORD [108+esp] 360 add edx,DWORD [116+esp] 361 add edi,DWORD [124+esp] 362 mov DWORD [28+esp],ebp 363 mov ebp,DWORD [156+esp] 364 mov DWORD [40+esp],ecx 365 mov ecx,DWORD [152+esp] 366 mov DWORD [44+esp],esi 367 xor esi,esi 368 mov DWORD [52+esp],edx 369 mov DWORD [60+esp],edi 370 xor eax,eax 371 xor edx,edx 372 L$007tail_loop: 373 mov al,BYTE [ebp*1+esi] 374 mov dl,BYTE [esi*1+esp] 375 lea esi,[1+esi] 376 xor al,dl 377 mov BYTE [esi*1+ecx-1],al 378 dec ebx 379 jnz NEAR L$007tail_loop 380 L$006done: 381 add esp,132 382 L$000no_data: 383 pop edi 384 pop esi 385 pop ebx 386 pop ebp 387 ret 388 global _ChaCha20_ssse3 389 align 16 390 _ChaCha20_ssse3: 391 L$_ChaCha20_ssse3_begin: 392 push ebp 393 push ebx 394 push esi 395 push edi 396 L$ssse3_shortcut: 397 mov edi,DWORD [20+esp] 398 mov esi,DWORD [24+esp] 399 mov ecx,DWORD [28+esp] 400 mov edx,DWORD [32+esp] 401 mov ebx,DWORD [36+esp] 402 mov ebp,esp 403 sub esp,524 404 and esp,-64 405 mov DWORD [512+esp],ebp 406 lea eax,[(L$ssse3_data-L$pic_point)+eax] 407 movdqu xmm3,[ebx] 408 cmp ecx,256 409 jb NEAR L$0081x 410 mov DWORD [516+esp],edx 411 mov DWORD [520+esp],ebx 412 sub ecx,256 413 lea ebp,[384+esp] 414 movdqu xmm7,[edx] 415 pshufd xmm0,xmm3,0 416 pshufd xmm1,xmm3,85 417 pshufd xmm2,xmm3,170 418 pshufd xmm3,xmm3,255 419 paddd xmm0,[48+eax] 420 pshufd xmm4,xmm7,0 421 pshufd xmm5,xmm7,85 422 psubd xmm0,[64+eax] 423 pshufd xmm6,xmm7,170 424 pshufd xmm7,xmm7,255 425 movdqa [64+ebp],xmm0 426 movdqa [80+ebp],xmm1 427 movdqa [96+ebp],xmm2 428 movdqa [112+ebp],xmm3 429 movdqu xmm3,[16+edx] 430 movdqa [ebp-64],xmm4 431 movdqa [ebp-48],xmm5 432 movdqa [ebp-32],xmm6 433 movdqa [ebp-16],xmm7 434 movdqa xmm7,[32+eax] 435 lea ebx,[128+esp] 436 pshufd xmm0,xmm3,0 437 pshufd xmm1,xmm3,85 438 pshufd xmm2,xmm3,170 439 pshufd xmm3,xmm3,255 440 pshufd xmm4,xmm7,0 441 pshufd xmm5,xmm7,85 442 pshufd xmm6,xmm7,170 443 pshufd xmm7,xmm7,255 444 movdqa [ebp],xmm0 445 movdqa [16+ebp],xmm1 446 movdqa [32+ebp],xmm2 447 movdqa [48+ebp],xmm3 448 movdqa [ebp-128],xmm4 449 movdqa [ebp-112],xmm5 450 movdqa [ebp-96],xmm6 451 movdqa [ebp-80],xmm7 452 lea esi,[128+esi] 453 lea edi,[128+edi] 454 jmp NEAR L$009outer_loop 455 align 16 456 L$009outer_loop: 457 movdqa xmm1,[ebp-112] 458 movdqa xmm2,[ebp-96] 459 movdqa xmm3,[ebp-80] 460 movdqa xmm5,[ebp-48] 461 movdqa xmm6,[ebp-32] 462 movdqa xmm7,[ebp-16] 463 movdqa [ebx-112],xmm1 464 movdqa [ebx-96],xmm2 465 movdqa [ebx-80],xmm3 466 movdqa [ebx-48],xmm5 467 movdqa [ebx-32],xmm6 468 movdqa [ebx-16],xmm7 469 movdqa xmm2,[32+ebp] 470 movdqa xmm3,[48+ebp] 471 movdqa xmm4,[64+ebp] 472 movdqa xmm5,[80+ebp] 473 movdqa xmm6,[96+ebp] 474 movdqa xmm7,[112+ebp] 475 paddd xmm4,[64+eax] 476 movdqa [32+ebx],xmm2 477 movdqa [48+ebx],xmm3 478 movdqa [64+ebx],xmm4 479 movdqa [80+ebx],xmm5 480 movdqa [96+ebx],xmm6 481 movdqa [112+ebx],xmm7 482 movdqa [64+ebp],xmm4 483 movdqa xmm0,[ebp-128] 484 movdqa xmm6,xmm4 485 movdqa xmm3,[ebp-64] 486 movdqa xmm4,[ebp] 487 movdqa xmm5,[16+ebp] 488 mov edx,10 489 nop 490 align 16 491 L$010loop: 492 paddd xmm0,xmm3 493 movdqa xmm2,xmm3 494 pxor xmm6,xmm0 495 pshufb xmm6,[eax] 496 paddd xmm4,xmm6 497 pxor xmm2,xmm4 498 movdqa xmm3,[ebx-48] 499 movdqa xmm1,xmm2 500 pslld xmm2,12 501 psrld xmm1,20 502 por xmm2,xmm1 503 movdqa xmm1,[ebx-112] 504 paddd xmm0,xmm2 505 movdqa xmm7,[80+ebx] 506 pxor xmm6,xmm0 507 movdqa [ebx-128],xmm0 508 pshufb xmm6,[16+eax] 509 paddd xmm4,xmm6 510 movdqa [64+ebx],xmm6 511 pxor xmm2,xmm4 512 paddd xmm1,xmm3 513 movdqa xmm0,xmm2 514 pslld xmm2,7 515 psrld xmm0,25 516 pxor xmm7,xmm1 517 por xmm2,xmm0 518 movdqa [ebx],xmm4 519 pshufb xmm7,[eax] 520 movdqa [ebx-64],xmm2 521 paddd xmm5,xmm7 522 movdqa xmm4,[32+ebx] 523 pxor xmm3,xmm5 524 movdqa xmm2,[ebx-32] 525 movdqa xmm0,xmm3 526 pslld xmm3,12 527 psrld xmm0,20 528 por xmm3,xmm0 529 movdqa xmm0,[ebx-96] 530 paddd xmm1,xmm3 531 movdqa xmm6,[96+ebx] 532 pxor xmm7,xmm1 533 movdqa [ebx-112],xmm1 534 pshufb xmm7,[16+eax] 535 paddd xmm5,xmm7 536 movdqa [80+ebx],xmm7 537 pxor xmm3,xmm5 538 paddd xmm0,xmm2 539 movdqa xmm1,xmm3 540 pslld xmm3,7 541 psrld xmm1,25 542 pxor xmm6,xmm0 543 por xmm3,xmm1 544 movdqa [16+ebx],xmm5 545 pshufb xmm6,[eax] 546 movdqa [ebx-48],xmm3 547 paddd xmm4,xmm6 548 movdqa xmm5,[48+ebx] 549 pxor xmm2,xmm4 550 movdqa xmm3,[ebx-16] 551 movdqa xmm1,xmm2 552 pslld xmm2,12 553 psrld xmm1,20 554 por xmm2,xmm1 555 movdqa xmm1,[ebx-80] 556 paddd xmm0,xmm2 557 movdqa xmm7,[112+ebx] 558 pxor xmm6,xmm0 559 movdqa [ebx-96],xmm0 560 pshufb xmm6,[16+eax] 561 paddd xmm4,xmm6 562 movdqa [96+ebx],xmm6 563 pxor xmm2,xmm4 564 paddd xmm1,xmm3 565 movdqa xmm0,xmm2 566 pslld xmm2,7 567 psrld xmm0,25 568 pxor xmm7,xmm1 569 por xmm2,xmm0 570 pshufb xmm7,[eax] 571 movdqa [ebx-32],xmm2 572 paddd xmm5,xmm7 573 pxor xmm3,xmm5 574 movdqa xmm2,[ebx-48] 575 movdqa xmm0,xmm3 576 pslld xmm3,12 577 psrld xmm0,20 578 por xmm3,xmm0 579 movdqa xmm0,[ebx-128] 580 paddd xmm1,xmm3 581 pxor xmm7,xmm1 582 movdqa [ebx-80],xmm1 583 pshufb xmm7,[16+eax] 584 paddd xmm5,xmm7 585 movdqa xmm6,xmm7 586 pxor xmm3,xmm5 587 paddd xmm0,xmm2 588 movdqa xmm1,xmm3 589 pslld xmm3,7 590 psrld xmm1,25 591 pxor xmm6,xmm0 592 por xmm3,xmm1 593 pshufb xmm6,[eax] 594 movdqa [ebx-16],xmm3 595 paddd xmm4,xmm6 596 pxor xmm2,xmm4 597 movdqa xmm3,[ebx-32] 598 movdqa xmm1,xmm2 599 pslld xmm2,12 600 psrld xmm1,20 601 por xmm2,xmm1 602 movdqa xmm1,[ebx-112] 603 paddd xmm0,xmm2 604 movdqa xmm7,[64+ebx] 605 pxor xmm6,xmm0 606 movdqa [ebx-128],xmm0 607 pshufb xmm6,[16+eax] 608 paddd xmm4,xmm6 609 movdqa [112+ebx],xmm6 610 pxor xmm2,xmm4 611 paddd xmm1,xmm3 612 movdqa xmm0,xmm2 613 pslld xmm2,7 614 psrld xmm0,25 615 pxor xmm7,xmm1 616 por xmm2,xmm0 617 movdqa [32+ebx],xmm4 618 pshufb xmm7,[eax] 619 movdqa [ebx-48],xmm2 620 paddd xmm5,xmm7 621 movdqa xmm4,[ebx] 622 pxor xmm3,xmm5 623 movdqa xmm2,[ebx-16] 624 movdqa xmm0,xmm3 625 pslld xmm3,12 626 psrld xmm0,20 627 por xmm3,xmm0 628 movdqa xmm0,[ebx-96] 629 paddd xmm1,xmm3 630 movdqa xmm6,[80+ebx] 631 pxor xmm7,xmm1 632 movdqa [ebx-112],xmm1 633 pshufb xmm7,[16+eax] 634 paddd xmm5,xmm7 635 movdqa [64+ebx],xmm7 636 pxor xmm3,xmm5 637 paddd xmm0,xmm2 638 movdqa xmm1,xmm3 639 pslld xmm3,7 640 psrld xmm1,25 641 pxor xmm6,xmm0 642 por xmm3,xmm1 643 movdqa [48+ebx],xmm5 644 pshufb xmm6,[eax] 645 movdqa [ebx-32],xmm3 646 paddd xmm4,xmm6 647 movdqa xmm5,[16+ebx] 648 pxor xmm2,xmm4 649 movdqa xmm3,[ebx-64] 650 movdqa xmm1,xmm2 651 pslld xmm2,12 652 psrld xmm1,20 653 por xmm2,xmm1 654 movdqa xmm1,[ebx-80] 655 paddd xmm0,xmm2 656 movdqa xmm7,[96+ebx] 657 pxor xmm6,xmm0 658 movdqa [ebx-96],xmm0 659 pshufb xmm6,[16+eax] 660 paddd xmm4,xmm6 661 movdqa [80+ebx],xmm6 662 pxor xmm2,xmm4 663 paddd xmm1,xmm3 664 movdqa xmm0,xmm2 665 pslld xmm2,7 666 psrld xmm0,25 667 pxor xmm7,xmm1 668 por xmm2,xmm0 669 pshufb xmm7,[eax] 670 movdqa [ebx-16],xmm2 671 paddd xmm5,xmm7 672 pxor xmm3,xmm5 673 movdqa xmm0,xmm3 674 pslld xmm3,12 675 psrld xmm0,20 676 por xmm3,xmm0 677 movdqa xmm0,[ebx-128] 678 paddd xmm1,xmm3 679 movdqa xmm6,[64+ebx] 680 pxor xmm7,xmm1 681 movdqa [ebx-80],xmm1 682 pshufb xmm7,[16+eax] 683 paddd xmm5,xmm7 684 movdqa [96+ebx],xmm7 685 pxor xmm3,xmm5 686 movdqa xmm1,xmm3 687 pslld xmm3,7 688 psrld xmm1,25 689 por xmm3,xmm1 690 dec edx 691 jnz NEAR L$010loop 692 movdqa [ebx-64],xmm3 693 movdqa [ebx],xmm4 694 movdqa [16+ebx],xmm5 695 movdqa [64+ebx],xmm6 696 movdqa [96+ebx],xmm7 697 movdqa xmm1,[ebx-112] 698 movdqa xmm2,[ebx-96] 699 movdqa xmm3,[ebx-80] 700 paddd xmm0,[ebp-128] 701 paddd xmm1,[ebp-112] 702 paddd xmm2,[ebp-96] 703 paddd xmm3,[ebp-80] 704 movdqa xmm6,xmm0 705 punpckldq xmm0,xmm1 706 movdqa xmm7,xmm2 707 punpckldq xmm2,xmm3 708 punpckhdq xmm6,xmm1 709 punpckhdq xmm7,xmm3 710 movdqa xmm1,xmm0 711 punpcklqdq xmm0,xmm2 712 movdqa xmm3,xmm6 713 punpcklqdq xmm6,xmm7 714 punpckhqdq xmm1,xmm2 715 punpckhqdq xmm3,xmm7 716 movdqu xmm4,[esi-128] 717 movdqu xmm5,[esi-64] 718 movdqu xmm2,[esi] 719 movdqu xmm7,[64+esi] 720 lea esi,[16+esi] 721 pxor xmm4,xmm0 722 movdqa xmm0,[ebx-64] 723 pxor xmm5,xmm1 724 movdqa xmm1,[ebx-48] 725 pxor xmm6,xmm2 726 movdqa xmm2,[ebx-32] 727 pxor xmm7,xmm3 728 movdqa xmm3,[ebx-16] 729 movdqu [edi-128],xmm4 730 movdqu [edi-64],xmm5 731 movdqu [edi],xmm6 732 movdqu [64+edi],xmm7 733 lea edi,[16+edi] 734 paddd xmm0,[ebp-64] 735 paddd xmm1,[ebp-48] 736 paddd xmm2,[ebp-32] 737 paddd xmm3,[ebp-16] 738 movdqa xmm6,xmm0 739 punpckldq xmm0,xmm1 740 movdqa xmm7,xmm2 741 punpckldq xmm2,xmm3 742 punpckhdq xmm6,xmm1 743 punpckhdq xmm7,xmm3 744 movdqa xmm1,xmm0 745 punpcklqdq xmm0,xmm2 746 movdqa xmm3,xmm6 747 punpcklqdq xmm6,xmm7 748 punpckhqdq xmm1,xmm2 749 punpckhqdq xmm3,xmm7 750 movdqu xmm4,[esi-128] 751 movdqu xmm5,[esi-64] 752 movdqu xmm2,[esi] 753 movdqu xmm7,[64+esi] 754 lea esi,[16+esi] 755 pxor xmm4,xmm0 756 movdqa xmm0,[ebx] 757 pxor xmm5,xmm1 758 movdqa xmm1,[16+ebx] 759 pxor xmm6,xmm2 760 movdqa xmm2,[32+ebx] 761 pxor xmm7,xmm3 762 movdqa xmm3,[48+ebx] 763 movdqu [edi-128],xmm4 764 movdqu [edi-64],xmm5 765 movdqu [edi],xmm6 766 movdqu [64+edi],xmm7 767 lea edi,[16+edi] 768 paddd xmm0,[ebp] 769 paddd xmm1,[16+ebp] 770 paddd xmm2,[32+ebp] 771 paddd xmm3,[48+ebp] 772 movdqa xmm6,xmm0 773 punpckldq xmm0,xmm1 774 movdqa xmm7,xmm2 775 punpckldq xmm2,xmm3 776 punpckhdq xmm6,xmm1 777 punpckhdq xmm7,xmm3 778 movdqa xmm1,xmm0 779 punpcklqdq xmm0,xmm2 780 movdqa xmm3,xmm6 781 punpcklqdq xmm6,xmm7 782 punpckhqdq xmm1,xmm2 783 punpckhqdq xmm3,xmm7 784 movdqu xmm4,[esi-128] 785 movdqu xmm5,[esi-64] 786 movdqu xmm2,[esi] 787 movdqu xmm7,[64+esi] 788 lea esi,[16+esi] 789 pxor xmm4,xmm0 790 movdqa xmm0,[64+ebx] 791 pxor xmm5,xmm1 792 movdqa xmm1,[80+ebx] 793 pxor xmm6,xmm2 794 movdqa xmm2,[96+ebx] 795 pxor xmm7,xmm3 796 movdqa xmm3,[112+ebx] 797 movdqu [edi-128],xmm4 798 movdqu [edi-64],xmm5 799 movdqu [edi],xmm6 800 movdqu [64+edi],xmm7 801 lea edi,[16+edi] 802 paddd xmm0,[64+ebp] 803 paddd xmm1,[80+ebp] 804 paddd xmm2,[96+ebp] 805 paddd xmm3,[112+ebp] 806 movdqa xmm6,xmm0 807 punpckldq xmm0,xmm1 808 movdqa xmm7,xmm2 809 punpckldq xmm2,xmm3 810 punpckhdq xmm6,xmm1 811 punpckhdq xmm7,xmm3 812 movdqa xmm1,xmm0 813 punpcklqdq xmm0,xmm2 814 movdqa xmm3,xmm6 815 punpcklqdq xmm6,xmm7 816 punpckhqdq xmm1,xmm2 817 punpckhqdq xmm3,xmm7 818 movdqu xmm4,[esi-128] 819 movdqu xmm5,[esi-64] 820 movdqu xmm2,[esi] 821 movdqu xmm7,[64+esi] 822 lea esi,[208+esi] 823 pxor xmm4,xmm0 824 pxor xmm5,xmm1 825 pxor xmm6,xmm2 826 pxor xmm7,xmm3 827 movdqu [edi-128],xmm4 828 movdqu [edi-64],xmm5 829 movdqu [edi],xmm6 830 movdqu [64+edi],xmm7 831 lea edi,[208+edi] 832 sub ecx,256 833 jnc NEAR L$009outer_loop 834 add ecx,256 835 jz NEAR L$011done 836 mov ebx,DWORD [520+esp] 837 lea esi,[esi-128] 838 mov edx,DWORD [516+esp] 839 lea edi,[edi-128] 840 movd xmm2,DWORD [64+ebp] 841 movdqu xmm3,[ebx] 842 paddd xmm2,[96+eax] 843 pand xmm3,[112+eax] 844 por xmm3,xmm2 845 L$0081x: 846 movdqa xmm0,[32+eax] 847 movdqu xmm1,[edx] 848 movdqu xmm2,[16+edx] 849 movdqa xmm6,[eax] 850 movdqa xmm7,[16+eax] 851 mov DWORD [48+esp],ebp 852 movdqa [esp],xmm0 853 movdqa [16+esp],xmm1 854 movdqa [32+esp],xmm2 855 movdqa [48+esp],xmm3 856 mov edx,10 857 jmp NEAR L$012loop1x 858 align 16 859 L$013outer1x: 860 movdqa xmm3,[80+eax] 861 movdqa xmm0,[esp] 862 movdqa xmm1,[16+esp] 863 movdqa xmm2,[32+esp] 864 paddd xmm3,[48+esp] 865 mov edx,10 866 movdqa [48+esp],xmm3 867 jmp NEAR L$012loop1x 868 align 16 869 L$012loop1x: 870 paddd xmm0,xmm1 871 pxor xmm3,xmm0 872 db 102,15,56,0,222 873 paddd xmm2,xmm3 874 pxor xmm1,xmm2 875 movdqa xmm4,xmm1 876 psrld xmm1,20 877 pslld xmm4,12 878 por xmm1,xmm4 879 paddd xmm0,xmm1 880 pxor xmm3,xmm0 881 db 102,15,56,0,223 882 paddd xmm2,xmm3 883 pxor xmm1,xmm2 884 movdqa xmm4,xmm1 885 psrld xmm1,25 886 pslld xmm4,7 887 por xmm1,xmm4 888 pshufd xmm2,xmm2,78 889 pshufd xmm1,xmm1,57 890 pshufd xmm3,xmm3,147 891 nop 892 paddd xmm0,xmm1 893 pxor xmm3,xmm0 894 db 102,15,56,0,222 895 paddd xmm2,xmm3 896 pxor xmm1,xmm2 897 movdqa xmm4,xmm1 898 psrld xmm1,20 899 pslld xmm4,12 900 por xmm1,xmm4 901 paddd xmm0,xmm1 902 pxor xmm3,xmm0 903 db 102,15,56,0,223 904 paddd xmm2,xmm3 905 pxor xmm1,xmm2 906 movdqa xmm4,xmm1 907 psrld xmm1,25 908 pslld xmm4,7 909 por xmm1,xmm4 910 pshufd xmm2,xmm2,78 911 pshufd xmm1,xmm1,147 912 pshufd xmm3,xmm3,57 913 dec edx 914 jnz NEAR L$012loop1x 915 paddd xmm0,[esp] 916 paddd xmm1,[16+esp] 917 paddd xmm2,[32+esp] 918 paddd xmm3,[48+esp] 919 cmp ecx,64 920 jb NEAR L$014tail 921 movdqu xmm4,[esi] 922 movdqu xmm5,[16+esi] 923 pxor xmm0,xmm4 924 movdqu xmm4,[32+esi] 925 pxor xmm1,xmm5 926 movdqu xmm5,[48+esi] 927 pxor xmm2,xmm4 928 pxor xmm3,xmm5 929 lea esi,[64+esi] 930 movdqu [edi],xmm0 931 movdqu [16+edi],xmm1 932 movdqu [32+edi],xmm2 933 movdqu [48+edi],xmm3 934 lea edi,[64+edi] 935 sub ecx,64 936 jnz NEAR L$013outer1x 937 jmp NEAR L$011done 938 L$014tail: 939 movdqa [esp],xmm0 940 movdqa [16+esp],xmm1 941 movdqa [32+esp],xmm2 942 movdqa [48+esp],xmm3 943 xor eax,eax 944 xor edx,edx 945 xor ebp,ebp 946 L$015tail_loop: 947 mov al,BYTE [ebp*1+esp] 948 mov dl,BYTE [ebp*1+esi] 949 lea ebp,[1+ebp] 950 xor al,dl 951 mov BYTE [ebp*1+edi-1],al 952 dec ecx 953 jnz NEAR L$015tail_loop 954 L$011done: 955 mov esp,DWORD [512+esp] 956 pop edi 957 pop esi 958 pop ebx 959 pop ebp 960 ret 961 align 64 962 L$ssse3_data: 963 db 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13 964 db 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14 965 dd 1634760805,857760878,2036477234,1797285236 966 dd 0,1,2,3 967 dd 4,4,4,4 968 dd 1,0,0,0 969 dd 4,0,0,0 970 dd 0,-1,-1,-1 971 align 64 972 db 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54 973 db 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32 974 db 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111 975 db 114,103,62,0 976 segment .bss 977 common _OPENSSL_ia32cap_P 16 978