1 #if defined(__i386__) 2 .file "ghash-x86.S" 3 .text 4 .globl _gcm_gmult_4bit_x86 5 .private_extern _gcm_gmult_4bit_x86 6 .align 4 7 _gcm_gmult_4bit_x86: 8 L_gcm_gmult_4bit_x86_begin: 9 pushl %ebp 10 pushl %ebx 11 pushl %esi 12 pushl %edi 13 subl $84,%esp 14 movl 104(%esp),%edi 15 movl 108(%esp),%esi 16 movl (%edi),%ebp 17 movl 4(%edi),%edx 18 movl 8(%edi),%ecx 19 movl 12(%edi),%ebx 20 movl $0,16(%esp) 21 movl $471859200,20(%esp) 22 movl $943718400,24(%esp) 23 movl $610271232,28(%esp) 24 movl $1887436800,32(%esp) 25 movl $1822425088,36(%esp) 26 movl $1220542464,40(%esp) 27 movl $1423966208,44(%esp) 28 movl $3774873600,48(%esp) 29 movl $4246732800,52(%esp) 30 movl $3644850176,56(%esp) 31 movl $3311403008,60(%esp) 32 movl $2441084928,64(%esp) 33 movl $2376073216,68(%esp) 34 movl $2847932416,72(%esp) 35 movl $3051356160,76(%esp) 36 movl %ebp,(%esp) 37 movl %edx,4(%esp) 38 movl %ecx,8(%esp) 39 movl %ebx,12(%esp) 40 shrl $20,%ebx 41 andl $240,%ebx 42 movl 4(%esi,%ebx,1),%ebp 43 movl (%esi,%ebx,1),%edx 44 movl 12(%esi,%ebx,1),%ecx 45 movl 8(%esi,%ebx,1),%ebx 46 xorl %eax,%eax 47 movl $15,%edi 48 jmp L000x86_loop 49 .align 4,0x90 50 L000x86_loop: 51 movb %bl,%al 52 shrdl $4,%ecx,%ebx 53 andb $15,%al 54 shrdl $4,%edx,%ecx 55 shrdl $4,%ebp,%edx 56 shrl $4,%ebp 57 xorl 16(%esp,%eax,4),%ebp 58 movb (%esp,%edi,1),%al 59 andb $240,%al 60 xorl 8(%esi,%eax,1),%ebx 61 xorl 12(%esi,%eax,1),%ecx 62 xorl (%esi,%eax,1),%edx 63 xorl 4(%esi,%eax,1),%ebp 64 decl %edi 65 js L001x86_break 66 movb %bl,%al 67 shrdl $4,%ecx,%ebx 68 andb $15,%al 69 shrdl $4,%edx,%ecx 70 shrdl $4,%ebp,%edx 71 shrl $4,%ebp 72 xorl 16(%esp,%eax,4),%ebp 73 movb (%esp,%edi,1),%al 74 shlb $4,%al 75 xorl 8(%esi,%eax,1),%ebx 76 xorl 12(%esi,%eax,1),%ecx 77 xorl (%esi,%eax,1),%edx 78 xorl 4(%esi,%eax,1),%ebp 79 jmp L000x86_loop 80 .align 4,0x90 81 L001x86_break: 82 bswap %ebx 83 bswap %ecx 84 bswap %edx 85 bswap %ebp 86 movl 104(%esp),%edi 87 movl %ebx,12(%edi) 88 movl %ecx,8(%edi) 89 movl %edx,4(%edi) 90 movl %ebp,(%edi) 91 addl $84,%esp 92 popl %edi 93 popl %esi 94 popl %ebx 95 popl %ebp 96 ret 97 .globl _gcm_ghash_4bit_x86 98 .private_extern _gcm_ghash_4bit_x86 99 .align 4 100 _gcm_ghash_4bit_x86: 101 L_gcm_ghash_4bit_x86_begin: 102 pushl %ebp 103 pushl %ebx 104 pushl %esi 105 pushl %edi 106 subl $84,%esp 107 movl 104(%esp),%ebx 108 movl 108(%esp),%esi 109 movl 112(%esp),%edi 110 movl 116(%esp),%ecx 111 addl %edi,%ecx 112 movl %ecx,116(%esp) 113 movl (%ebx),%ebp 114 movl 4(%ebx),%edx 115 movl 8(%ebx),%ecx 116 movl 12(%ebx),%ebx 117 movl $0,16(%esp) 118 movl $471859200,20(%esp) 119 movl $943718400,24(%esp) 120 movl $610271232,28(%esp) 121 movl $1887436800,32(%esp) 122 movl $1822425088,36(%esp) 123 movl $1220542464,40(%esp) 124 movl $1423966208,44(%esp) 125 movl $3774873600,48(%esp) 126 movl $4246732800,52(%esp) 127 movl $3644850176,56(%esp) 128 movl $3311403008,60(%esp) 129 movl $2441084928,64(%esp) 130 movl $2376073216,68(%esp) 131 movl $2847932416,72(%esp) 132 movl $3051356160,76(%esp) 133 .align 4,0x90 134 L002x86_outer_loop: 135 xorl 12(%edi),%ebx 136 xorl 8(%edi),%ecx 137 xorl 4(%edi),%edx 138 xorl (%edi),%ebp 139 movl %ebx,12(%esp) 140 movl %ecx,8(%esp) 141 movl %edx,4(%esp) 142 movl %ebp,(%esp) 143 shrl $20,%ebx 144 andl $240,%ebx 145 movl 4(%esi,%ebx,1),%ebp 146 movl (%esi,%ebx,1),%edx 147 movl 12(%esi,%ebx,1),%ecx 148 movl 8(%esi,%ebx,1),%ebx 149 xorl %eax,%eax 150 movl $15,%edi 151 jmp L003x86_loop 152 .align 4,0x90 153 L003x86_loop: 154 movb %bl,%al 155 shrdl $4,%ecx,%ebx 156 andb $15,%al 157 shrdl $4,%edx,%ecx 158 shrdl $4,%ebp,%edx 159 shrl $4,%ebp 160 xorl 16(%esp,%eax,4),%ebp 161 movb (%esp,%edi,1),%al 162 andb $240,%al 163 xorl 8(%esi,%eax,1),%ebx 164 xorl 12(%esi,%eax,1),%ecx 165 xorl (%esi,%eax,1),%edx 166 xorl 4(%esi,%eax,1),%ebp 167 decl %edi 168 js L004x86_break 169 movb %bl,%al 170 shrdl $4,%ecx,%ebx 171 andb $15,%al 172 shrdl $4,%edx,%ecx 173 shrdl $4,%ebp,%edx 174 shrl $4,%ebp 175 xorl 16(%esp,%eax,4),%ebp 176 movb (%esp,%edi,1),%al 177 shlb $4,%al 178 xorl 8(%esi,%eax,1),%ebx 179 xorl 12(%esi,%eax,1),%ecx 180 xorl (%esi,%eax,1),%edx 181 xorl 4(%esi,%eax,1),%ebp 182 jmp L003x86_loop 183 .align 4,0x90 184 L004x86_break: 185 bswap %ebx 186 bswap %ecx 187 bswap %edx 188 bswap %ebp 189 movl 112(%esp),%edi 190 leal 16(%edi),%edi 191 cmpl 116(%esp),%edi 192 movl %edi,112(%esp) 193 jb L002x86_outer_loop 194 movl 104(%esp),%edi 195 movl %ebx,12(%edi) 196 movl %ecx,8(%edi) 197 movl %edx,4(%edi) 198 movl %ebp,(%edi) 199 addl $84,%esp 200 popl %edi 201 popl %esi 202 popl %ebx 203 popl %ebp 204 ret 205 .globl _gcm_gmult_4bit_mmx 206 .private_extern _gcm_gmult_4bit_mmx 207 .align 4 208 _gcm_gmult_4bit_mmx: 209 L_gcm_gmult_4bit_mmx_begin: 210 pushl %ebp 211 pushl %ebx 212 pushl %esi 213 pushl %edi 214 movl 20(%esp),%edi 215 movl 24(%esp),%esi 216 call L005pic_point 217 L005pic_point: 218 popl %eax 219 leal Lrem_4bit-L005pic_point(%eax),%eax 220 movzbl 15(%edi),%ebx 221 xorl %ecx,%ecx 222 movl %ebx,%edx 223 movb %dl,%cl 224 movl $14,%ebp 225 shlb $4,%cl 226 andl $240,%edx 227 movq 8(%esi,%ecx,1),%mm0 228 movq (%esi,%ecx,1),%mm1 229 movd %mm0,%ebx 230 jmp L006mmx_loop 231 .align 4,0x90 232 L006mmx_loop: 233 psrlq $4,%mm0 234 andl $15,%ebx 235 movq %mm1,%mm2 236 psrlq $4,%mm1 237 pxor 8(%esi,%edx,1),%mm0 238 movb (%edi,%ebp,1),%cl 239 psllq $60,%mm2 240 pxor (%eax,%ebx,8),%mm1 241 decl %ebp 242 movd %mm0,%ebx 243 pxor (%esi,%edx,1),%mm1 244 movl %ecx,%edx 245 pxor %mm2,%mm0 246 js L007mmx_break 247 shlb $4,%cl 248 andl $15,%ebx 249 psrlq $4,%mm0 250 andl $240,%edx 251 movq %mm1,%mm2 252 psrlq $4,%mm1 253 pxor 8(%esi,%ecx,1),%mm0 254 psllq $60,%mm2 255 pxor (%eax,%ebx,8),%mm1 256 movd %mm0,%ebx 257 pxor (%esi,%ecx,1),%mm1 258 pxor %mm2,%mm0 259 jmp L006mmx_loop 260 .align 4,0x90 261 L007mmx_break: 262 shlb $4,%cl 263 andl $15,%ebx 264 psrlq $4,%mm0 265 andl $240,%edx 266 movq %mm1,%mm2 267 psrlq $4,%mm1 268 pxor 8(%esi,%ecx,1),%mm0 269 psllq $60,%mm2 270 pxor (%eax,%ebx,8),%mm1 271 movd %mm0,%ebx 272 pxor (%esi,%ecx,1),%mm1 273 pxor %mm2,%mm0 274 psrlq $4,%mm0 275 andl $15,%ebx 276 movq %mm1,%mm2 277 psrlq $4,%mm1 278 pxor 8(%esi,%edx,1),%mm0 279 psllq $60,%mm2 280 pxor (%eax,%ebx,8),%mm1 281 movd %mm0,%ebx 282 pxor (%esi,%edx,1),%mm1 283 pxor %mm2,%mm0 284 psrlq $32,%mm0 285 movd %mm1,%edx 286 psrlq $32,%mm1 287 movd %mm0,%ecx 288 movd %mm1,%ebp 289 bswap %ebx 290 bswap %edx 291 bswap %ecx 292 bswap %ebp 293 emms 294 movl %ebx,12(%edi) 295 movl %edx,4(%edi) 296 movl %ecx,8(%edi) 297 movl %ebp,(%edi) 298 popl %edi 299 popl %esi 300 popl %ebx 301 popl %ebp 302 ret 303 .globl _gcm_ghash_4bit_mmx 304 .private_extern _gcm_ghash_4bit_mmx 305 .align 4 306 _gcm_ghash_4bit_mmx: 307 L_gcm_ghash_4bit_mmx_begin: 308 pushl %ebp 309 pushl %ebx 310 pushl %esi 311 pushl %edi 312 movl 20(%esp),%eax 313 movl 24(%esp),%ebx 314 movl 28(%esp),%ecx 315 movl 32(%esp),%edx 316 movl %esp,%ebp 317 call L008pic_point 318 L008pic_point: 319 popl %esi 320 leal Lrem_8bit-L008pic_point(%esi),%esi 321 subl $544,%esp 322 andl $-64,%esp 323 subl $16,%esp 324 addl %ecx,%edx 325 movl %eax,544(%esp) 326 movl %edx,552(%esp) 327 movl %ebp,556(%esp) 328 addl $128,%ebx 329 leal 144(%esp),%edi 330 leal 400(%esp),%ebp 331 movl -120(%ebx),%edx 332 movq -120(%ebx),%mm0 333 movq -128(%ebx),%mm3 334 shll $4,%edx 335 movb %dl,(%esp) 336 movl -104(%ebx),%edx 337 movq -104(%ebx),%mm2 338 movq -112(%ebx),%mm5 339 movq %mm0,-128(%edi) 340 psrlq $4,%mm0 341 movq %mm3,(%edi) 342 movq %mm3,%mm7 343 psrlq $4,%mm3 344 shll $4,%edx 345 movb %dl,1(%esp) 346 movl -88(%ebx),%edx 347 movq -88(%ebx),%mm1 348 psllq $60,%mm7 349 movq -96(%ebx),%mm4 350 por %mm7,%mm0 351 movq %mm2,-120(%edi) 352 psrlq $4,%mm2 353 movq %mm5,8(%edi) 354 movq %mm5,%mm6 355 movq %mm0,-128(%ebp) 356 psrlq $4,%mm5 357 movq %mm3,(%ebp) 358 shll $4,%edx 359 movb %dl,2(%esp) 360 movl -72(%ebx),%edx 361 movq -72(%ebx),%mm0 362 psllq $60,%mm6 363 movq -80(%ebx),%mm3 364 por %mm6,%mm2 365 movq %mm1,-112(%edi) 366 psrlq $4,%mm1 367 movq %mm4,16(%edi) 368 movq %mm4,%mm7 369 movq %mm2,-120(%ebp) 370 psrlq $4,%mm4 371 movq %mm5,8(%ebp) 372 shll $4,%edx 373 movb %dl,3(%esp) 374 movl -56(%ebx),%edx 375 movq -56(%ebx),%mm2 376 psllq $60,%mm7 377 movq -64(%ebx),%mm5 378 por %mm7,%mm1 379 movq %mm0,-104(%edi) 380 psrlq $4,%mm0 381 movq %mm3,24(%edi) 382 movq %mm3,%mm6 383 movq %mm1,-112(%ebp) 384 psrlq $4,%mm3 385 movq %mm4,16(%ebp) 386 shll $4,%edx 387 movb %dl,4(%esp) 388 movl -40(%ebx),%edx 389 movq -40(%ebx),%mm1 390 psllq $60,%mm6 391 movq -48(%ebx),%mm4 392 por %mm6,%mm0 393 movq %mm2,-96(%edi) 394 psrlq $4,%mm2 395 movq %mm5,32(%edi) 396 movq %mm5,%mm7 397 movq %mm0,-104(%ebp) 398 psrlq $4,%mm5 399 movq %mm3,24(%ebp) 400 shll $4,%edx 401 movb %dl,5(%esp) 402 movl -24(%ebx),%edx 403 movq -24(%ebx),%mm0 404 psllq $60,%mm7 405 movq -32(%ebx),%mm3 406 por %mm7,%mm2 407 movq %mm1,-88(%edi) 408 psrlq $4,%mm1 409 movq %mm4,40(%edi) 410 movq %mm4,%mm6 411 movq %mm2,-96(%ebp) 412 psrlq $4,%mm4 413 movq %mm5,32(%ebp) 414 shll $4,%edx 415 movb %dl,6(%esp) 416 movl -8(%ebx),%edx 417 movq -8(%ebx),%mm2 418 psllq $60,%mm6 419 movq -16(%ebx),%mm5 420 por %mm6,%mm1 421 movq %mm0,-80(%edi) 422 psrlq $4,%mm0 423 movq %mm3,48(%edi) 424 movq %mm3,%mm7 425 movq %mm1,-88(%ebp) 426 psrlq $4,%mm3 427 movq %mm4,40(%ebp) 428 shll $4,%edx 429 movb %dl,7(%esp) 430 movl 8(%ebx),%edx 431 movq 8(%ebx),%mm1 432 psllq $60,%mm7 433 movq (%ebx),%mm4 434 por %mm7,%mm0 435 movq %mm2,-72(%edi) 436 psrlq $4,%mm2 437 movq %mm5,56(%edi) 438 movq %mm5,%mm6 439 movq %mm0,-80(%ebp) 440 psrlq $4,%mm5 441 movq %mm3,48(%ebp) 442 shll $4,%edx 443 movb %dl,8(%esp) 444 movl 24(%ebx),%edx 445 movq 24(%ebx),%mm0 446 psllq $60,%mm6 447 movq 16(%ebx),%mm3 448 por %mm6,%mm2 449 movq %mm1,-64(%edi) 450 psrlq $4,%mm1 451 movq %mm4,64(%edi) 452 movq %mm4,%mm7 453 movq %mm2,-72(%ebp) 454 psrlq $4,%mm4 455 movq %mm5,56(%ebp) 456 shll $4,%edx 457 movb %dl,9(%esp) 458 movl 40(%ebx),%edx 459 movq 40(%ebx),%mm2 460 psllq $60,%mm7 461 movq 32(%ebx),%mm5 462 por %mm7,%mm1 463 movq %mm0,-56(%edi) 464 psrlq $4,%mm0 465 movq %mm3,72(%edi) 466 movq %mm3,%mm6 467 movq %mm1,-64(%ebp) 468 psrlq $4,%mm3 469 movq %mm4,64(%ebp) 470 shll $4,%edx 471 movb %dl,10(%esp) 472 movl 56(%ebx),%edx 473 movq 56(%ebx),%mm1 474 psllq $60,%mm6 475 movq 48(%ebx),%mm4 476 por %mm6,%mm0 477 movq %mm2,-48(%edi) 478 psrlq $4,%mm2 479 movq %mm5,80(%edi) 480 movq %mm5,%mm7 481 movq %mm0,-56(%ebp) 482 psrlq $4,%mm5 483 movq %mm3,72(%ebp) 484 shll $4,%edx 485 movb %dl,11(%esp) 486 movl 72(%ebx),%edx 487 movq 72(%ebx),%mm0 488 psllq $60,%mm7 489 movq 64(%ebx),%mm3 490 por %mm7,%mm2 491 movq %mm1,-40(%edi) 492 psrlq $4,%mm1 493 movq %mm4,88(%edi) 494 movq %mm4,%mm6 495 movq %mm2,-48(%ebp) 496 psrlq $4,%mm4 497 movq %mm5,80(%ebp) 498 shll $4,%edx 499 movb %dl,12(%esp) 500 movl 88(%ebx),%edx 501 movq 88(%ebx),%mm2 502 psllq $60,%mm6 503 movq 80(%ebx),%mm5 504 por %mm6,%mm1 505 movq %mm0,-32(%edi) 506 psrlq $4,%mm0 507 movq %mm3,96(%edi) 508 movq %mm3,%mm7 509 movq %mm1,-40(%ebp) 510 psrlq $4,%mm3 511 movq %mm4,88(%ebp) 512 shll $4,%edx 513 movb %dl,13(%esp) 514 movl 104(%ebx),%edx 515 movq 104(%ebx),%mm1 516 psllq $60,%mm7 517 movq 96(%ebx),%mm4 518 por %mm7,%mm0 519 movq %mm2,-24(%edi) 520 psrlq $4,%mm2 521 movq %mm5,104(%edi) 522 movq %mm5,%mm6 523 movq %mm0,-32(%ebp) 524 psrlq $4,%mm5 525 movq %mm3,96(%ebp) 526 shll $4,%edx 527 movb %dl,14(%esp) 528 movl 120(%ebx),%edx 529 movq 120(%ebx),%mm0 530 psllq $60,%mm6 531 movq 112(%ebx),%mm3 532 por %mm6,%mm2 533 movq %mm1,-16(%edi) 534 psrlq $4,%mm1 535 movq %mm4,112(%edi) 536 movq %mm4,%mm7 537 movq %mm2,-24(%ebp) 538 psrlq $4,%mm4 539 movq %mm5,104(%ebp) 540 shll $4,%edx 541 movb %dl,15(%esp) 542 psllq $60,%mm7 543 por %mm7,%mm1 544 movq %mm0,-8(%edi) 545 psrlq $4,%mm0 546 movq %mm3,120(%edi) 547 movq %mm3,%mm6 548 movq %mm1,-16(%ebp) 549 psrlq $4,%mm3 550 movq %mm4,112(%ebp) 551 psllq $60,%mm6 552 por %mm6,%mm0 553 movq %mm0,-8(%ebp) 554 movq %mm3,120(%ebp) 555 movq (%eax),%mm6 556 movl 8(%eax),%ebx 557 movl 12(%eax),%edx 558 .align 4,0x90 559 L009outer: 560 xorl 12(%ecx),%edx 561 xorl 8(%ecx),%ebx 562 pxor (%ecx),%mm6 563 leal 16(%ecx),%ecx 564 movl %ebx,536(%esp) 565 movq %mm6,528(%esp) 566 movl %ecx,548(%esp) 567 xorl %eax,%eax 568 roll $8,%edx 569 movb %dl,%al 570 movl %eax,%ebp 571 andb $15,%al 572 shrl $4,%ebp 573 pxor %mm0,%mm0 574 roll $8,%edx 575 pxor %mm1,%mm1 576 pxor %mm2,%mm2 577 movq 16(%esp,%eax,8),%mm7 578 movq 144(%esp,%eax,8),%mm6 579 movb %dl,%al 580 movd %mm7,%ebx 581 psrlq $8,%mm7 582 movq %mm6,%mm3 583 movl %eax,%edi 584 psrlq $8,%mm6 585 pxor 272(%esp,%ebp,8),%mm7 586 andb $15,%al 587 psllq $56,%mm3 588 shrl $4,%edi 589 pxor 16(%esp,%eax,8),%mm7 590 roll $8,%edx 591 pxor 144(%esp,%eax,8),%mm6 592 pxor %mm3,%mm7 593 pxor 400(%esp,%ebp,8),%mm6 594 xorb (%esp,%ebp,1),%bl 595 movb %dl,%al 596 movd %mm7,%ecx 597 movzbl %bl,%ebx 598 psrlq $8,%mm7 599 movq %mm6,%mm3 600 movl %eax,%ebp 601 psrlq $8,%mm6 602 pxor 272(%esp,%edi,8),%mm7 603 andb $15,%al 604 psllq $56,%mm3 605 shrl $4,%ebp 606 pinsrw $2,(%esi,%ebx,2),%mm2 607 pxor 16(%esp,%eax,8),%mm7 608 roll $8,%edx 609 pxor 144(%esp,%eax,8),%mm6 610 pxor %mm3,%mm7 611 pxor 400(%esp,%edi,8),%mm6 612 xorb (%esp,%edi,1),%cl 613 movb %dl,%al 614 movl 536(%esp),%edx 615 movd %mm7,%ebx 616 movzbl %cl,%ecx 617 psrlq $8,%mm7 618 movq %mm6,%mm3 619 movl %eax,%edi 620 psrlq $8,%mm6 621 pxor 272(%esp,%ebp,8),%mm7 622 andb $15,%al 623 psllq $56,%mm3 624 pxor %mm2,%mm6 625 shrl $4,%edi 626 pinsrw $2,(%esi,%ecx,2),%mm1 627 pxor 16(%esp,%eax,8),%mm7 628 roll $8,%edx 629 pxor 144(%esp,%eax,8),%mm6 630 pxor %mm3,%mm7 631 pxor 400(%esp,%ebp,8),%mm6 632 xorb (%esp,%ebp,1),%bl 633 movb %dl,%al 634 movd %mm7,%ecx 635 movzbl %bl,%ebx 636 psrlq $8,%mm7 637 movq %mm6,%mm3 638 movl %eax,%ebp 639 psrlq $8,%mm6 640 pxor 272(%esp,%edi,8),%mm7 641 andb $15,%al 642 psllq $56,%mm3 643 pxor %mm1,%mm6 644 shrl $4,%ebp 645 pinsrw $2,(%esi,%ebx,2),%mm0 646 pxor 16(%esp,%eax,8),%mm7 647 roll $8,%edx 648 pxor 144(%esp,%eax,8),%mm6 649 pxor %mm3,%mm7 650 pxor 400(%esp,%edi,8),%mm6 651 xorb (%esp,%edi,1),%cl 652 movb %dl,%al 653 movd %mm7,%ebx 654 movzbl %cl,%ecx 655 psrlq $8,%mm7 656 movq %mm6,%mm3 657 movl %eax,%edi 658 psrlq $8,%mm6 659 pxor 272(%esp,%ebp,8),%mm7 660 andb $15,%al 661 psllq $56,%mm3 662 pxor %mm0,%mm6 663 shrl $4,%edi 664 pinsrw $2,(%esi,%ecx,2),%mm2 665 pxor 16(%esp,%eax,8),%mm7 666 roll $8,%edx 667 pxor 144(%esp,%eax,8),%mm6 668 pxor %mm3,%mm7 669 pxor 400(%esp,%ebp,8),%mm6 670 xorb (%esp,%ebp,1),%bl 671 movb %dl,%al 672 movd %mm7,%ecx 673 movzbl %bl,%ebx 674 psrlq $8,%mm7 675 movq %mm6,%mm3 676 movl %eax,%ebp 677 psrlq $8,%mm6 678 pxor 272(%esp,%edi,8),%mm7 679 andb $15,%al 680 psllq $56,%mm3 681 pxor %mm2,%mm6 682 shrl $4,%ebp 683 pinsrw $2,(%esi,%ebx,2),%mm1 684 pxor 16(%esp,%eax,8),%mm7 685 roll $8,%edx 686 pxor 144(%esp,%eax,8),%mm6 687 pxor %mm3,%mm7 688 pxor 400(%esp,%edi,8),%mm6 689 xorb (%esp,%edi,1),%cl 690 movb %dl,%al 691 movl 532(%esp),%edx 692 movd %mm7,%ebx 693 movzbl %cl,%ecx 694 psrlq $8,%mm7 695 movq %mm6,%mm3 696 movl %eax,%edi 697 psrlq $8,%mm6 698 pxor 272(%esp,%ebp,8),%mm7 699 andb $15,%al 700 psllq $56,%mm3 701 pxor %mm1,%mm6 702 shrl $4,%edi 703 pinsrw $2,(%esi,%ecx,2),%mm0 704 pxor 16(%esp,%eax,8),%mm7 705 roll $8,%edx 706 pxor 144(%esp,%eax,8),%mm6 707 pxor %mm3,%mm7 708 pxor 400(%esp,%ebp,8),%mm6 709 xorb (%esp,%ebp,1),%bl 710 movb %dl,%al 711 movd %mm7,%ecx 712 movzbl %bl,%ebx 713 psrlq $8,%mm7 714 movq %mm6,%mm3 715 movl %eax,%ebp 716 psrlq $8,%mm6 717 pxor 272(%esp,%edi,8),%mm7 718 andb $15,%al 719 psllq $56,%mm3 720 pxor %mm0,%mm6 721 shrl $4,%ebp 722 pinsrw $2,(%esi,%ebx,2),%mm2 723 pxor 16(%esp,%eax,8),%mm7 724 roll $8,%edx 725 pxor 144(%esp,%eax,8),%mm6 726 pxor %mm3,%mm7 727 pxor 400(%esp,%edi,8),%mm6 728 xorb (%esp,%edi,1),%cl 729 movb %dl,%al 730 movd %mm7,%ebx 731 movzbl %cl,%ecx 732 psrlq $8,%mm7 733 movq %mm6,%mm3 734 movl %eax,%edi 735 psrlq $8,%mm6 736 pxor 272(%esp,%ebp,8),%mm7 737 andb $15,%al 738 psllq $56,%mm3 739 pxor %mm2,%mm6 740 shrl $4,%edi 741 pinsrw $2,(%esi,%ecx,2),%mm1 742 pxor 16(%esp,%eax,8),%mm7 743 roll $8,%edx 744 pxor 144(%esp,%eax,8),%mm6 745 pxor %mm3,%mm7 746 pxor 400(%esp,%ebp,8),%mm6 747 xorb (%esp,%ebp,1),%bl 748 movb %dl,%al 749 movd %mm7,%ecx 750 movzbl %bl,%ebx 751 psrlq $8,%mm7 752 movq %mm6,%mm3 753 movl %eax,%ebp 754 psrlq $8,%mm6 755 pxor 272(%esp,%edi,8),%mm7 756 andb $15,%al 757 psllq $56,%mm3 758 pxor %mm1,%mm6 759 shrl $4,%ebp 760 pinsrw $2,(%esi,%ebx,2),%mm0 761 pxor 16(%esp,%eax,8),%mm7 762 roll $8,%edx 763 pxor 144(%esp,%eax,8),%mm6 764 pxor %mm3,%mm7 765 pxor 400(%esp,%edi,8),%mm6 766 xorb (%esp,%edi,1),%cl 767 movb %dl,%al 768 movl 528(%esp),%edx 769 movd %mm7,%ebx 770 movzbl %cl,%ecx 771 psrlq $8,%mm7 772 movq %mm6,%mm3 773 movl %eax,%edi 774 psrlq $8,%mm6 775 pxor 272(%esp,%ebp,8),%mm7 776 andb $15,%al 777 psllq $56,%mm3 778 pxor %mm0,%mm6 779 shrl $4,%edi 780 pinsrw $2,(%esi,%ecx,2),%mm2 781 pxor 16(%esp,%eax,8),%mm7 782 roll $8,%edx 783 pxor 144(%esp,%eax,8),%mm6 784 pxor %mm3,%mm7 785 pxor 400(%esp,%ebp,8),%mm6 786 xorb (%esp,%ebp,1),%bl 787 movb %dl,%al 788 movd %mm7,%ecx 789 movzbl %bl,%ebx 790 psrlq $8,%mm7 791 movq %mm6,%mm3 792 movl %eax,%ebp 793 psrlq $8,%mm6 794 pxor 272(%esp,%edi,8),%mm7 795 andb $15,%al 796 psllq $56,%mm3 797 pxor %mm2,%mm6 798 shrl $4,%ebp 799 pinsrw $2,(%esi,%ebx,2),%mm1 800 pxor 16(%esp,%eax,8),%mm7 801 roll $8,%edx 802 pxor 144(%esp,%eax,8),%mm6 803 pxor %mm3,%mm7 804 pxor 400(%esp,%edi,8),%mm6 805 xorb (%esp,%edi,1),%cl 806 movb %dl,%al 807 movd %mm7,%ebx 808 movzbl %cl,%ecx 809 psrlq $8,%mm7 810 movq %mm6,%mm3 811 movl %eax,%edi 812 psrlq $8,%mm6 813 pxor 272(%esp,%ebp,8),%mm7 814 andb $15,%al 815 psllq $56,%mm3 816 pxor %mm1,%mm6 817 shrl $4,%edi 818 pinsrw $2,(%esi,%ecx,2),%mm0 819 pxor 16(%esp,%eax,8),%mm7 820 roll $8,%edx 821 pxor 144(%esp,%eax,8),%mm6 822 pxor %mm3,%mm7 823 pxor 400(%esp,%ebp,8),%mm6 824 xorb (%esp,%ebp,1),%bl 825 movb %dl,%al 826 movd %mm7,%ecx 827 movzbl %bl,%ebx 828 psrlq $8,%mm7 829 movq %mm6,%mm3 830 movl %eax,%ebp 831 psrlq $8,%mm6 832 pxor 272(%esp,%edi,8),%mm7 833 andb $15,%al 834 psllq $56,%mm3 835 pxor %mm0,%mm6 836 shrl $4,%ebp 837 pinsrw $2,(%esi,%ebx,2),%mm2 838 pxor 16(%esp,%eax,8),%mm7 839 roll $8,%edx 840 pxor 144(%esp,%eax,8),%mm6 841 pxor %mm3,%mm7 842 pxor 400(%esp,%edi,8),%mm6 843 xorb (%esp,%edi,1),%cl 844 movb %dl,%al 845 movl 524(%esp),%edx 846 movd %mm7,%ebx 847 movzbl %cl,%ecx 848 psrlq $8,%mm7 849 movq %mm6,%mm3 850 movl %eax,%edi 851 psrlq $8,%mm6 852 pxor 272(%esp,%ebp,8),%mm7 853 andb $15,%al 854 psllq $56,%mm3 855 pxor %mm2,%mm6 856 shrl $4,%edi 857 pinsrw $2,(%esi,%ecx,2),%mm1 858 pxor 16(%esp,%eax,8),%mm7 859 pxor 144(%esp,%eax,8),%mm6 860 xorb (%esp,%ebp,1),%bl 861 pxor %mm3,%mm7 862 pxor 400(%esp,%ebp,8),%mm6 863 movzbl %bl,%ebx 864 pxor %mm2,%mm2 865 psllq $4,%mm1 866 movd %mm7,%ecx 867 psrlq $4,%mm7 868 movq %mm6,%mm3 869 psrlq $4,%mm6 870 shll $4,%ecx 871 pxor 16(%esp,%edi,8),%mm7 872 psllq $60,%mm3 873 movzbl %cl,%ecx 874 pxor %mm3,%mm7 875 pxor 144(%esp,%edi,8),%mm6 876 pinsrw $2,(%esi,%ebx,2),%mm0 877 pxor %mm1,%mm6 878 movd %mm7,%edx 879 pinsrw $3,(%esi,%ecx,2),%mm2 880 psllq $12,%mm0 881 pxor %mm0,%mm6 882 psrlq $32,%mm7 883 pxor %mm2,%mm6 884 movl 548(%esp),%ecx 885 movd %mm7,%ebx 886 movq %mm6,%mm3 887 psllw $8,%mm6 888 psrlw $8,%mm3 889 por %mm3,%mm6 890 bswap %edx 891 pshufw $27,%mm6,%mm6 892 bswap %ebx 893 cmpl 552(%esp),%ecx 894 jne L009outer 895 movl 544(%esp),%eax 896 movl %edx,12(%eax) 897 movl %ebx,8(%eax) 898 movq %mm6,(%eax) 899 movl 556(%esp),%esp 900 emms 901 popl %edi 902 popl %esi 903 popl %ebx 904 popl %ebp 905 ret 906 .globl _gcm_init_clmul 907 .private_extern _gcm_init_clmul 908 .align 4 909 _gcm_init_clmul: 910 L_gcm_init_clmul_begin: 911 movl 4(%esp),%edx 912 movl 8(%esp),%eax 913 call L010pic 914 L010pic: 915 popl %ecx 916 leal Lbswap-L010pic(%ecx),%ecx 917 movdqu (%eax),%xmm2 918 pshufd $78,%xmm2,%xmm2 919 pshufd $255,%xmm2,%xmm4 920 movdqa %xmm2,%xmm3 921 psllq $1,%xmm2 922 pxor %xmm5,%xmm5 923 psrlq $63,%xmm3 924 pcmpgtd %xmm4,%xmm5 925 pslldq $8,%xmm3 926 por %xmm3,%xmm2 927 pand 16(%ecx),%xmm5 928 pxor %xmm5,%xmm2 929 movdqa %xmm2,%xmm0 930 movdqa %xmm0,%xmm1 931 pshufd $78,%xmm0,%xmm3 932 pshufd $78,%xmm2,%xmm4 933 pxor %xmm0,%xmm3 934 pxor %xmm2,%xmm4 935 .byte 102,15,58,68,194,0 936 .byte 102,15,58,68,202,17 937 .byte 102,15,58,68,220,0 938 xorps %xmm0,%xmm3 939 xorps %xmm1,%xmm3 940 movdqa %xmm3,%xmm4 941 psrldq $8,%xmm3 942 pslldq $8,%xmm4 943 pxor %xmm3,%xmm1 944 pxor %xmm4,%xmm0 945 movdqa %xmm0,%xmm4 946 movdqa %xmm0,%xmm3 947 psllq $5,%xmm0 948 pxor %xmm0,%xmm3 949 psllq $1,%xmm0 950 pxor %xmm3,%xmm0 951 psllq $57,%xmm0 952 movdqa %xmm0,%xmm3 953 pslldq $8,%xmm0 954 psrldq $8,%xmm3 955 pxor %xmm4,%xmm0 956 pxor %xmm3,%xmm1 957 movdqa %xmm0,%xmm4 958 psrlq $1,%xmm0 959 pxor %xmm4,%xmm1 960 pxor %xmm0,%xmm4 961 psrlq $5,%xmm0 962 pxor %xmm4,%xmm0 963 psrlq $1,%xmm0 964 pxor %xmm1,%xmm0 965 pshufd $78,%xmm2,%xmm3 966 pshufd $78,%xmm0,%xmm4 967 pxor %xmm2,%xmm3 968 movdqu %xmm2,(%edx) 969 pxor %xmm0,%xmm4 970 movdqu %xmm0,16(%edx) 971 .byte 102,15,58,15,227,8 972 movdqu %xmm4,32(%edx) 973 ret 974 .globl _gcm_gmult_clmul 975 .private_extern _gcm_gmult_clmul 976 .align 4 977 _gcm_gmult_clmul: 978 L_gcm_gmult_clmul_begin: 979 movl 4(%esp),%eax 980 movl 8(%esp),%edx 981 call L011pic 982 L011pic: 983 popl %ecx 984 leal Lbswap-L011pic(%ecx),%ecx 985 movdqu (%eax),%xmm0 986 movdqa (%ecx),%xmm5 987 movups (%edx),%xmm2 988 .byte 102,15,56,0,197 989 movups 32(%edx),%xmm4 990 movdqa %xmm0,%xmm1 991 pshufd $78,%xmm0,%xmm3 992 pxor %xmm0,%xmm3 993 .byte 102,15,58,68,194,0 994 .byte 102,15,58,68,202,17 995 .byte 102,15,58,68,220,0 996 xorps %xmm0,%xmm3 997 xorps %xmm1,%xmm3 998 movdqa %xmm3,%xmm4 999 psrldq $8,%xmm3 1000 pslldq $8,%xmm4 1001 pxor %xmm3,%xmm1 1002 pxor %xmm4,%xmm0 1003 movdqa %xmm0,%xmm4 1004 movdqa %xmm0,%xmm3 1005 psllq $5,%xmm0 1006 pxor %xmm0,%xmm3 1007 psllq $1,%xmm0 1008 pxor %xmm3,%xmm0 1009 psllq $57,%xmm0 1010 movdqa %xmm0,%xmm3 1011 pslldq $8,%xmm0 1012 psrldq $8,%xmm3 1013 pxor %xmm4,%xmm0 1014 pxor %xmm3,%xmm1 1015 movdqa %xmm0,%xmm4 1016 psrlq $1,%xmm0 1017 pxor %xmm4,%xmm1 1018 pxor %xmm0,%xmm4 1019 psrlq $5,%xmm0 1020 pxor %xmm4,%xmm0 1021 psrlq $1,%xmm0 1022 pxor %xmm1,%xmm0 1023 .byte 102,15,56,0,197 1024 movdqu %xmm0,(%eax) 1025 ret 1026 .globl _gcm_ghash_clmul 1027 .private_extern _gcm_ghash_clmul 1028 .align 4 1029 _gcm_ghash_clmul: 1030 L_gcm_ghash_clmul_begin: 1031 pushl %ebp 1032 pushl %ebx 1033 pushl %esi 1034 pushl %edi 1035 movl 20(%esp),%eax 1036 movl 24(%esp),%edx 1037 movl 28(%esp),%esi 1038 movl 32(%esp),%ebx 1039 call L012pic 1040 L012pic: 1041 popl %ecx 1042 leal Lbswap-L012pic(%ecx),%ecx 1043 movdqu (%eax),%xmm0 1044 movdqa (%ecx),%xmm5 1045 movdqu (%edx),%xmm2 1046 .byte 102,15,56,0,197 1047 subl $16,%ebx 1048 jz L013odd_tail 1049 movdqu (%esi),%xmm3 1050 movdqu 16(%esi),%xmm6 1051 .byte 102,15,56,0,221 1052 .byte 102,15,56,0,245 1053 movdqu 32(%edx),%xmm5 1054 pxor %xmm3,%xmm0 1055 pshufd $78,%xmm6,%xmm3 1056 movdqa %xmm6,%xmm7 1057 pxor %xmm6,%xmm3 1058 leal 32(%esi),%esi 1059 .byte 102,15,58,68,242,0 1060 .byte 102,15,58,68,250,17 1061 .byte 102,15,58,68,221,0 1062 movups 16(%edx),%xmm2 1063 nop 1064 subl $32,%ebx 1065 jbe L014even_tail 1066 jmp L015mod_loop 1067 .align 5,0x90 1068 L015mod_loop: 1069 pshufd $78,%xmm0,%xmm4 1070 movdqa %xmm0,%xmm1 1071 pxor %xmm0,%xmm4 1072 nop 1073 .byte 102,15,58,68,194,0 1074 .byte 102,15,58,68,202,17 1075 .byte 102,15,58,68,229,16 1076 movups (%edx),%xmm2 1077 xorps %xmm6,%xmm0 1078 movdqa (%ecx),%xmm5 1079 xorps %xmm7,%xmm1 1080 movdqu (%esi),%xmm7 1081 pxor %xmm0,%xmm3 1082 movdqu 16(%esi),%xmm6 1083 pxor %xmm1,%xmm3 1084 .byte 102,15,56,0,253 1085 pxor %xmm3,%xmm4 1086 movdqa %xmm4,%xmm3 1087 psrldq $8,%xmm4 1088 pslldq $8,%xmm3 1089 pxor %xmm4,%xmm1 1090 pxor %xmm3,%xmm0 1091 .byte 102,15,56,0,245 1092 pxor %xmm7,%xmm1 1093 movdqa %xmm6,%xmm7 1094 movdqa %xmm0,%xmm4 1095 movdqa %xmm0,%xmm3 1096 psllq $5,%xmm0 1097 pxor %xmm0,%xmm3 1098 psllq $1,%xmm0 1099 pxor %xmm3,%xmm0 1100 .byte 102,15,58,68,242,0 1101 movups 32(%edx),%xmm5 1102 psllq $57,%xmm0 1103 movdqa %xmm0,%xmm3 1104 pslldq $8,%xmm0 1105 psrldq $8,%xmm3 1106 pxor %xmm4,%xmm0 1107 pxor %xmm3,%xmm1 1108 pshufd $78,%xmm7,%xmm3 1109 movdqa %xmm0,%xmm4 1110 psrlq $1,%xmm0 1111 pxor %xmm7,%xmm3 1112 pxor %xmm4,%xmm1 1113 .byte 102,15,58,68,250,17 1114 movups 16(%edx),%xmm2 1115 pxor %xmm0,%xmm4 1116 psrlq $5,%xmm0 1117 pxor %xmm4,%xmm0 1118 psrlq $1,%xmm0 1119 pxor %xmm1,%xmm0 1120 .byte 102,15,58,68,221,0 1121 leal 32(%esi),%esi 1122 subl $32,%ebx 1123 ja L015mod_loop 1124 L014even_tail: 1125 pshufd $78,%xmm0,%xmm4 1126 movdqa %xmm0,%xmm1 1127 pxor %xmm0,%xmm4 1128 .byte 102,15,58,68,194,0 1129 .byte 102,15,58,68,202,17 1130 .byte 102,15,58,68,229,16 1131 movdqa (%ecx),%xmm5 1132 xorps %xmm6,%xmm0 1133 xorps %xmm7,%xmm1 1134 pxor %xmm0,%xmm3 1135 pxor %xmm1,%xmm3 1136 pxor %xmm3,%xmm4 1137 movdqa %xmm4,%xmm3 1138 psrldq $8,%xmm4 1139 pslldq $8,%xmm3 1140 pxor %xmm4,%xmm1 1141 pxor %xmm3,%xmm0 1142 movdqa %xmm0,%xmm4 1143 movdqa %xmm0,%xmm3 1144 psllq $5,%xmm0 1145 pxor %xmm0,%xmm3 1146 psllq $1,%xmm0 1147 pxor %xmm3,%xmm0 1148 psllq $57,%xmm0 1149 movdqa %xmm0,%xmm3 1150 pslldq $8,%xmm0 1151 psrldq $8,%xmm3 1152 pxor %xmm4,%xmm0 1153 pxor %xmm3,%xmm1 1154 movdqa %xmm0,%xmm4 1155 psrlq $1,%xmm0 1156 pxor %xmm4,%xmm1 1157 pxor %xmm0,%xmm4 1158 psrlq $5,%xmm0 1159 pxor %xmm4,%xmm0 1160 psrlq $1,%xmm0 1161 pxor %xmm1,%xmm0 1162 testl %ebx,%ebx 1163 jnz L016done 1164 movups (%edx),%xmm2 1165 L013odd_tail: 1166 movdqu (%esi),%xmm3 1167 .byte 102,15,56,0,221 1168 pxor %xmm3,%xmm0 1169 movdqa %xmm0,%xmm1 1170 pshufd $78,%xmm0,%xmm3 1171 pshufd $78,%xmm2,%xmm4 1172 pxor %xmm0,%xmm3 1173 pxor %xmm2,%xmm4 1174 .byte 102,15,58,68,194,0 1175 .byte 102,15,58,68,202,17 1176 .byte 102,15,58,68,220,0 1177 xorps %xmm0,%xmm3 1178 xorps %xmm1,%xmm3 1179 movdqa %xmm3,%xmm4 1180 psrldq $8,%xmm3 1181 pslldq $8,%xmm4 1182 pxor %xmm3,%xmm1 1183 pxor %xmm4,%xmm0 1184 movdqa %xmm0,%xmm4 1185 movdqa %xmm0,%xmm3 1186 psllq $5,%xmm0 1187 pxor %xmm0,%xmm3 1188 psllq $1,%xmm0 1189 pxor %xmm3,%xmm0 1190 psllq $57,%xmm0 1191 movdqa %xmm0,%xmm3 1192 pslldq $8,%xmm0 1193 psrldq $8,%xmm3 1194 pxor %xmm4,%xmm0 1195 pxor %xmm3,%xmm1 1196 movdqa %xmm0,%xmm4 1197 psrlq $1,%xmm0 1198 pxor %xmm4,%xmm1 1199 pxor %xmm0,%xmm4 1200 psrlq $5,%xmm0 1201 pxor %xmm4,%xmm0 1202 psrlq $1,%xmm0 1203 pxor %xmm1,%xmm0 1204 L016done: 1205 .byte 102,15,56,0,197 1206 movdqu %xmm0,(%eax) 1207 popl %edi 1208 popl %esi 1209 popl %ebx 1210 popl %ebp 1211 ret 1212 .align 6,0x90 1213 Lbswap: 1214 .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 1215 .byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194 1216 .align 6,0x90 1217 Lrem_8bit: 1218 .value 0,450,900,582,1800,1738,1164,1358 1219 .value 3600,4050,3476,3158,2328,2266,2716,2910 1220 .value 7200,7650,8100,7782,6952,6890,6316,6510 1221 .value 4656,5106,4532,4214,5432,5370,5820,6014 1222 .value 14400,14722,15300,14854,16200,16010,15564,15630 1223 .value 13904,14226,13780,13334,12632,12442,13020,13086 1224 .value 9312,9634,10212,9766,9064,8874,8428,8494 1225 .value 10864,11186,10740,10294,11640,11450,12028,12094 1226 .value 28800,28994,29444,29382,30600,30282,29708,30158 1227 .value 32400,32594,32020,31958,31128,30810,31260,31710 1228 .value 27808,28002,28452,28390,27560,27242,26668,27118 1229 .value 25264,25458,24884,24822,26040,25722,26172,26622 1230 .value 18624,18690,19268,19078,20424,19978,19532,19854 1231 .value 18128,18194,17748,17558,16856,16410,16988,17310 1232 .value 21728,21794,22372,22182,21480,21034,20588,20910 1233 .value 23280,23346,22900,22710,24056,23610,24188,24510 1234 .value 57600,57538,57988,58182,58888,59338,58764,58446 1235 .value 61200,61138,60564,60758,59416,59866,60316,59998 1236 .value 64800,64738,65188,65382,64040,64490,63916,63598 1237 .value 62256,62194,61620,61814,62520,62970,63420,63102 1238 .value 55616,55426,56004,56070,56904,57226,56780,56334 1239 .value 55120,54930,54484,54550,53336,53658,54236,53790 1240 .value 50528,50338,50916,50982,49768,50090,49644,49198 1241 .value 52080,51890,51444,51510,52344,52666,53244,52798 1242 .value 37248,36930,37380,37830,38536,38730,38156,38094 1243 .value 40848,40530,39956,40406,39064,39258,39708,39646 1244 .value 36256,35938,36388,36838,35496,35690,35116,35054 1245 .value 33712,33394,32820,33270,33976,34170,34620,34558 1246 .value 43456,43010,43588,43910,44744,44810,44364,44174 1247 .value 42960,42514,42068,42390,41176,41242,41820,41630 1248 .value 46560,46114,46692,47014,45800,45866,45420,45230 1249 .value 48112,47666,47220,47542,48376,48442,49020,48830 1250 .align 6,0x90 1251 Lrem_4bit: 1252 .long 0,0,0,471859200,0,943718400,0,610271232 1253 .long 0,1887436800,0,1822425088,0,1220542464,0,1423966208 1254 .long 0,3774873600,0,4246732800,0,3644850176,0,3311403008 1255 .long 0,2441084928,0,2376073216,0,2847932416,0,3051356160 1256 .byte 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67 1257 .byte 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112 1258 .byte 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62 1259 .byte 0 1260 #endif 1261