1 #if defined(__i386__) 2 .file "ghash-x86.S" 3 .text 4 .globl _gcm_gmult_4bit_mmx 5 .private_extern _gcm_gmult_4bit_mmx 6 .align 4 7 _gcm_gmult_4bit_mmx: 8 L_gcm_gmult_4bit_mmx_begin: 9 pushl %ebp 10 pushl %ebx 11 pushl %esi 12 pushl %edi 13 movl 20(%esp),%edi 14 movl 24(%esp),%esi 15 call L000pic_point 16 L000pic_point: 17 popl %eax 18 leal Lrem_4bit-L000pic_point(%eax),%eax 19 movzbl 15(%edi),%ebx 20 xorl %ecx,%ecx 21 movl %ebx,%edx 22 movb %dl,%cl 23 movl $14,%ebp 24 shlb $4,%cl 25 andl $240,%edx 26 movq 8(%esi,%ecx,1),%mm0 27 movq (%esi,%ecx,1),%mm1 28 movd %mm0,%ebx 29 jmp L001mmx_loop 30 .align 4,0x90 31 L001mmx_loop: 32 psrlq $4,%mm0 33 andl $15,%ebx 34 movq %mm1,%mm2 35 psrlq $4,%mm1 36 pxor 8(%esi,%edx,1),%mm0 37 movb (%edi,%ebp,1),%cl 38 psllq $60,%mm2 39 pxor (%eax,%ebx,8),%mm1 40 decl %ebp 41 movd %mm0,%ebx 42 pxor (%esi,%edx,1),%mm1 43 movl %ecx,%edx 44 pxor %mm2,%mm0 45 js L002mmx_break 46 shlb $4,%cl 47 andl $15,%ebx 48 psrlq $4,%mm0 49 andl $240,%edx 50 movq %mm1,%mm2 51 psrlq $4,%mm1 52 pxor 8(%esi,%ecx,1),%mm0 53 psllq $60,%mm2 54 pxor (%eax,%ebx,8),%mm1 55 movd %mm0,%ebx 56 pxor (%esi,%ecx,1),%mm1 57 pxor %mm2,%mm0 58 jmp L001mmx_loop 59 .align 4,0x90 60 L002mmx_break: 61 shlb $4,%cl 62 andl $15,%ebx 63 psrlq $4,%mm0 64 andl $240,%edx 65 movq %mm1,%mm2 66 psrlq $4,%mm1 67 pxor 8(%esi,%ecx,1),%mm0 68 psllq $60,%mm2 69 pxor (%eax,%ebx,8),%mm1 70 movd %mm0,%ebx 71 pxor (%esi,%ecx,1),%mm1 72 pxor %mm2,%mm0 73 psrlq $4,%mm0 74 andl $15,%ebx 75 movq %mm1,%mm2 76 psrlq $4,%mm1 77 pxor 8(%esi,%edx,1),%mm0 78 psllq $60,%mm2 79 pxor (%eax,%ebx,8),%mm1 80 movd %mm0,%ebx 81 pxor (%esi,%edx,1),%mm1 82 pxor %mm2,%mm0 83 psrlq $32,%mm0 84 movd %mm1,%edx 85 psrlq $32,%mm1 86 movd %mm0,%ecx 87 movd %mm1,%ebp 88 bswap %ebx 89 bswap %edx 90 bswap %ecx 91 bswap %ebp 92 emms 93 movl %ebx,12(%edi) 94 movl %edx,4(%edi) 95 movl %ecx,8(%edi) 96 movl %ebp,(%edi) 97 popl %edi 98 popl %esi 99 popl %ebx 100 popl %ebp 101 ret 102 .globl _gcm_ghash_4bit_mmx 103 .private_extern _gcm_ghash_4bit_mmx 104 .align 4 105 _gcm_ghash_4bit_mmx: 106 L_gcm_ghash_4bit_mmx_begin: 107 pushl %ebp 108 pushl %ebx 109 pushl %esi 110 pushl %edi 111 movl 20(%esp),%eax 112 movl 24(%esp),%ebx 113 movl 28(%esp),%ecx 114 movl 32(%esp),%edx 115 movl %esp,%ebp 116 call L003pic_point 117 L003pic_point: 118 popl %esi 119 leal Lrem_8bit-L003pic_point(%esi),%esi 120 subl $544,%esp 121 andl $-64,%esp 122 subl $16,%esp 123 addl %ecx,%edx 124 movl %eax,544(%esp) 125 movl %edx,552(%esp) 126 movl %ebp,556(%esp) 127 addl $128,%ebx 128 leal 144(%esp),%edi 129 leal 400(%esp),%ebp 130 movl -120(%ebx),%edx 131 movq -120(%ebx),%mm0 132 movq -128(%ebx),%mm3 133 shll $4,%edx 134 movb %dl,(%esp) 135 movl -104(%ebx),%edx 136 movq -104(%ebx),%mm2 137 movq -112(%ebx),%mm5 138 movq %mm0,-128(%edi) 139 psrlq $4,%mm0 140 movq %mm3,(%edi) 141 movq %mm3,%mm7 142 psrlq $4,%mm3 143 shll $4,%edx 144 movb %dl,1(%esp) 145 movl -88(%ebx),%edx 146 movq -88(%ebx),%mm1 147 psllq $60,%mm7 148 movq -96(%ebx),%mm4 149 por %mm7,%mm0 150 movq %mm2,-120(%edi) 151 psrlq $4,%mm2 152 movq %mm5,8(%edi) 153 movq %mm5,%mm6 154 movq %mm0,-128(%ebp) 155 psrlq $4,%mm5 156 movq %mm3,(%ebp) 157 shll $4,%edx 158 movb %dl,2(%esp) 159 movl -72(%ebx),%edx 160 movq -72(%ebx),%mm0 161 psllq $60,%mm6 162 movq -80(%ebx),%mm3 163 por %mm6,%mm2 164 movq %mm1,-112(%edi) 165 psrlq $4,%mm1 166 movq %mm4,16(%edi) 167 movq %mm4,%mm7 168 movq %mm2,-120(%ebp) 169 psrlq $4,%mm4 170 movq %mm5,8(%ebp) 171 shll $4,%edx 172 movb %dl,3(%esp) 173 movl -56(%ebx),%edx 174 movq -56(%ebx),%mm2 175 psllq $60,%mm7 176 movq -64(%ebx),%mm5 177 por %mm7,%mm1 178 movq %mm0,-104(%edi) 179 psrlq $4,%mm0 180 movq %mm3,24(%edi) 181 movq %mm3,%mm6 182 movq %mm1,-112(%ebp) 183 psrlq $4,%mm3 184 movq %mm4,16(%ebp) 185 shll $4,%edx 186 movb %dl,4(%esp) 187 movl -40(%ebx),%edx 188 movq -40(%ebx),%mm1 189 psllq $60,%mm6 190 movq -48(%ebx),%mm4 191 por %mm6,%mm0 192 movq %mm2,-96(%edi) 193 psrlq $4,%mm2 194 movq %mm5,32(%edi) 195 movq %mm5,%mm7 196 movq %mm0,-104(%ebp) 197 psrlq $4,%mm5 198 movq %mm3,24(%ebp) 199 shll $4,%edx 200 movb %dl,5(%esp) 201 movl -24(%ebx),%edx 202 movq -24(%ebx),%mm0 203 psllq $60,%mm7 204 movq -32(%ebx),%mm3 205 por %mm7,%mm2 206 movq %mm1,-88(%edi) 207 psrlq $4,%mm1 208 movq %mm4,40(%edi) 209 movq %mm4,%mm6 210 movq %mm2,-96(%ebp) 211 psrlq $4,%mm4 212 movq %mm5,32(%ebp) 213 shll $4,%edx 214 movb %dl,6(%esp) 215 movl -8(%ebx),%edx 216 movq -8(%ebx),%mm2 217 psllq $60,%mm6 218 movq -16(%ebx),%mm5 219 por %mm6,%mm1 220 movq %mm0,-80(%edi) 221 psrlq $4,%mm0 222 movq %mm3,48(%edi) 223 movq %mm3,%mm7 224 movq %mm1,-88(%ebp) 225 psrlq $4,%mm3 226 movq %mm4,40(%ebp) 227 shll $4,%edx 228 movb %dl,7(%esp) 229 movl 8(%ebx),%edx 230 movq 8(%ebx),%mm1 231 psllq $60,%mm7 232 movq (%ebx),%mm4 233 por %mm7,%mm0 234 movq %mm2,-72(%edi) 235 psrlq $4,%mm2 236 movq %mm5,56(%edi) 237 movq %mm5,%mm6 238 movq %mm0,-80(%ebp) 239 psrlq $4,%mm5 240 movq %mm3,48(%ebp) 241 shll $4,%edx 242 movb %dl,8(%esp) 243 movl 24(%ebx),%edx 244 movq 24(%ebx),%mm0 245 psllq $60,%mm6 246 movq 16(%ebx),%mm3 247 por %mm6,%mm2 248 movq %mm1,-64(%edi) 249 psrlq $4,%mm1 250 movq %mm4,64(%edi) 251 movq %mm4,%mm7 252 movq %mm2,-72(%ebp) 253 psrlq $4,%mm4 254 movq %mm5,56(%ebp) 255 shll $4,%edx 256 movb %dl,9(%esp) 257 movl 40(%ebx),%edx 258 movq 40(%ebx),%mm2 259 psllq $60,%mm7 260 movq 32(%ebx),%mm5 261 por %mm7,%mm1 262 movq %mm0,-56(%edi) 263 psrlq $4,%mm0 264 movq %mm3,72(%edi) 265 movq %mm3,%mm6 266 movq %mm1,-64(%ebp) 267 psrlq $4,%mm3 268 movq %mm4,64(%ebp) 269 shll $4,%edx 270 movb %dl,10(%esp) 271 movl 56(%ebx),%edx 272 movq 56(%ebx),%mm1 273 psllq $60,%mm6 274 movq 48(%ebx),%mm4 275 por %mm6,%mm0 276 movq %mm2,-48(%edi) 277 psrlq $4,%mm2 278 movq %mm5,80(%edi) 279 movq %mm5,%mm7 280 movq %mm0,-56(%ebp) 281 psrlq $4,%mm5 282 movq %mm3,72(%ebp) 283 shll $4,%edx 284 movb %dl,11(%esp) 285 movl 72(%ebx),%edx 286 movq 72(%ebx),%mm0 287 psllq $60,%mm7 288 movq 64(%ebx),%mm3 289 por %mm7,%mm2 290 movq %mm1,-40(%edi) 291 psrlq $4,%mm1 292 movq %mm4,88(%edi) 293 movq %mm4,%mm6 294 movq %mm2,-48(%ebp) 295 psrlq $4,%mm4 296 movq %mm5,80(%ebp) 297 shll $4,%edx 298 movb %dl,12(%esp) 299 movl 88(%ebx),%edx 300 movq 88(%ebx),%mm2 301 psllq $60,%mm6 302 movq 80(%ebx),%mm5 303 por %mm6,%mm1 304 movq %mm0,-32(%edi) 305 psrlq $4,%mm0 306 movq %mm3,96(%edi) 307 movq %mm3,%mm7 308 movq %mm1,-40(%ebp) 309 psrlq $4,%mm3 310 movq %mm4,88(%ebp) 311 shll $4,%edx 312 movb %dl,13(%esp) 313 movl 104(%ebx),%edx 314 movq 104(%ebx),%mm1 315 psllq $60,%mm7 316 movq 96(%ebx),%mm4 317 por %mm7,%mm0 318 movq %mm2,-24(%edi) 319 psrlq $4,%mm2 320 movq %mm5,104(%edi) 321 movq %mm5,%mm6 322 movq %mm0,-32(%ebp) 323 psrlq $4,%mm5 324 movq %mm3,96(%ebp) 325 shll $4,%edx 326 movb %dl,14(%esp) 327 movl 120(%ebx),%edx 328 movq 120(%ebx),%mm0 329 psllq $60,%mm6 330 movq 112(%ebx),%mm3 331 por %mm6,%mm2 332 movq %mm1,-16(%edi) 333 psrlq $4,%mm1 334 movq %mm4,112(%edi) 335 movq %mm4,%mm7 336 movq %mm2,-24(%ebp) 337 psrlq $4,%mm4 338 movq %mm5,104(%ebp) 339 shll $4,%edx 340 movb %dl,15(%esp) 341 psllq $60,%mm7 342 por %mm7,%mm1 343 movq %mm0,-8(%edi) 344 psrlq $4,%mm0 345 movq %mm3,120(%edi) 346 movq %mm3,%mm6 347 movq %mm1,-16(%ebp) 348 psrlq $4,%mm3 349 movq %mm4,112(%ebp) 350 psllq $60,%mm6 351 por %mm6,%mm0 352 movq %mm0,-8(%ebp) 353 movq %mm3,120(%ebp) 354 movq (%eax),%mm6 355 movl 8(%eax),%ebx 356 movl 12(%eax),%edx 357 .align 4,0x90 358 L004outer: 359 xorl 12(%ecx),%edx 360 xorl 8(%ecx),%ebx 361 pxor (%ecx),%mm6 362 leal 16(%ecx),%ecx 363 movl %ebx,536(%esp) 364 movq %mm6,528(%esp) 365 movl %ecx,548(%esp) 366 xorl %eax,%eax 367 roll $8,%edx 368 movb %dl,%al 369 movl %eax,%ebp 370 andb $15,%al 371 shrl $4,%ebp 372 pxor %mm0,%mm0 373 roll $8,%edx 374 pxor %mm1,%mm1 375 pxor %mm2,%mm2 376 movq 16(%esp,%eax,8),%mm7 377 movq 144(%esp,%eax,8),%mm6 378 movb %dl,%al 379 movd %mm7,%ebx 380 psrlq $8,%mm7 381 movq %mm6,%mm3 382 movl %eax,%edi 383 psrlq $8,%mm6 384 pxor 272(%esp,%ebp,8),%mm7 385 andb $15,%al 386 psllq $56,%mm3 387 shrl $4,%edi 388 pxor 16(%esp,%eax,8),%mm7 389 roll $8,%edx 390 pxor 144(%esp,%eax,8),%mm6 391 pxor %mm3,%mm7 392 pxor 400(%esp,%ebp,8),%mm6 393 xorb (%esp,%ebp,1),%bl 394 movb %dl,%al 395 movd %mm7,%ecx 396 movzbl %bl,%ebx 397 psrlq $8,%mm7 398 movq %mm6,%mm3 399 movl %eax,%ebp 400 psrlq $8,%mm6 401 pxor 272(%esp,%edi,8),%mm7 402 andb $15,%al 403 psllq $56,%mm3 404 shrl $4,%ebp 405 pinsrw $2,(%esi,%ebx,2),%mm2 406 pxor 16(%esp,%eax,8),%mm7 407 roll $8,%edx 408 pxor 144(%esp,%eax,8),%mm6 409 pxor %mm3,%mm7 410 pxor 400(%esp,%edi,8),%mm6 411 xorb (%esp,%edi,1),%cl 412 movb %dl,%al 413 movl 536(%esp),%edx 414 movd %mm7,%ebx 415 movzbl %cl,%ecx 416 psrlq $8,%mm7 417 movq %mm6,%mm3 418 movl %eax,%edi 419 psrlq $8,%mm6 420 pxor 272(%esp,%ebp,8),%mm7 421 andb $15,%al 422 psllq $56,%mm3 423 pxor %mm2,%mm6 424 shrl $4,%edi 425 pinsrw $2,(%esi,%ecx,2),%mm1 426 pxor 16(%esp,%eax,8),%mm7 427 roll $8,%edx 428 pxor 144(%esp,%eax,8),%mm6 429 pxor %mm3,%mm7 430 pxor 400(%esp,%ebp,8),%mm6 431 xorb (%esp,%ebp,1),%bl 432 movb %dl,%al 433 movd %mm7,%ecx 434 movzbl %bl,%ebx 435 psrlq $8,%mm7 436 movq %mm6,%mm3 437 movl %eax,%ebp 438 psrlq $8,%mm6 439 pxor 272(%esp,%edi,8),%mm7 440 andb $15,%al 441 psllq $56,%mm3 442 pxor %mm1,%mm6 443 shrl $4,%ebp 444 pinsrw $2,(%esi,%ebx,2),%mm0 445 pxor 16(%esp,%eax,8),%mm7 446 roll $8,%edx 447 pxor 144(%esp,%eax,8),%mm6 448 pxor %mm3,%mm7 449 pxor 400(%esp,%edi,8),%mm6 450 xorb (%esp,%edi,1),%cl 451 movb %dl,%al 452 movd %mm7,%ebx 453 movzbl %cl,%ecx 454 psrlq $8,%mm7 455 movq %mm6,%mm3 456 movl %eax,%edi 457 psrlq $8,%mm6 458 pxor 272(%esp,%ebp,8),%mm7 459 andb $15,%al 460 psllq $56,%mm3 461 pxor %mm0,%mm6 462 shrl $4,%edi 463 pinsrw $2,(%esi,%ecx,2),%mm2 464 pxor 16(%esp,%eax,8),%mm7 465 roll $8,%edx 466 pxor 144(%esp,%eax,8),%mm6 467 pxor %mm3,%mm7 468 pxor 400(%esp,%ebp,8),%mm6 469 xorb (%esp,%ebp,1),%bl 470 movb %dl,%al 471 movd %mm7,%ecx 472 movzbl %bl,%ebx 473 psrlq $8,%mm7 474 movq %mm6,%mm3 475 movl %eax,%ebp 476 psrlq $8,%mm6 477 pxor 272(%esp,%edi,8),%mm7 478 andb $15,%al 479 psllq $56,%mm3 480 pxor %mm2,%mm6 481 shrl $4,%ebp 482 pinsrw $2,(%esi,%ebx,2),%mm1 483 pxor 16(%esp,%eax,8),%mm7 484 roll $8,%edx 485 pxor 144(%esp,%eax,8),%mm6 486 pxor %mm3,%mm7 487 pxor 400(%esp,%edi,8),%mm6 488 xorb (%esp,%edi,1),%cl 489 movb %dl,%al 490 movl 532(%esp),%edx 491 movd %mm7,%ebx 492 movzbl %cl,%ecx 493 psrlq $8,%mm7 494 movq %mm6,%mm3 495 movl %eax,%edi 496 psrlq $8,%mm6 497 pxor 272(%esp,%ebp,8),%mm7 498 andb $15,%al 499 psllq $56,%mm3 500 pxor %mm1,%mm6 501 shrl $4,%edi 502 pinsrw $2,(%esi,%ecx,2),%mm0 503 pxor 16(%esp,%eax,8),%mm7 504 roll $8,%edx 505 pxor 144(%esp,%eax,8),%mm6 506 pxor %mm3,%mm7 507 pxor 400(%esp,%ebp,8),%mm6 508 xorb (%esp,%ebp,1),%bl 509 movb %dl,%al 510 movd %mm7,%ecx 511 movzbl %bl,%ebx 512 psrlq $8,%mm7 513 movq %mm6,%mm3 514 movl %eax,%ebp 515 psrlq $8,%mm6 516 pxor 272(%esp,%edi,8),%mm7 517 andb $15,%al 518 psllq $56,%mm3 519 pxor %mm0,%mm6 520 shrl $4,%ebp 521 pinsrw $2,(%esi,%ebx,2),%mm2 522 pxor 16(%esp,%eax,8),%mm7 523 roll $8,%edx 524 pxor 144(%esp,%eax,8),%mm6 525 pxor %mm3,%mm7 526 pxor 400(%esp,%edi,8),%mm6 527 xorb (%esp,%edi,1),%cl 528 movb %dl,%al 529 movd %mm7,%ebx 530 movzbl %cl,%ecx 531 psrlq $8,%mm7 532 movq %mm6,%mm3 533 movl %eax,%edi 534 psrlq $8,%mm6 535 pxor 272(%esp,%ebp,8),%mm7 536 andb $15,%al 537 psllq $56,%mm3 538 pxor %mm2,%mm6 539 shrl $4,%edi 540 pinsrw $2,(%esi,%ecx,2),%mm1 541 pxor 16(%esp,%eax,8),%mm7 542 roll $8,%edx 543 pxor 144(%esp,%eax,8),%mm6 544 pxor %mm3,%mm7 545 pxor 400(%esp,%ebp,8),%mm6 546 xorb (%esp,%ebp,1),%bl 547 movb %dl,%al 548 movd %mm7,%ecx 549 movzbl %bl,%ebx 550 psrlq $8,%mm7 551 movq %mm6,%mm3 552 movl %eax,%ebp 553 psrlq $8,%mm6 554 pxor 272(%esp,%edi,8),%mm7 555 andb $15,%al 556 psllq $56,%mm3 557 pxor %mm1,%mm6 558 shrl $4,%ebp 559 pinsrw $2,(%esi,%ebx,2),%mm0 560 pxor 16(%esp,%eax,8),%mm7 561 roll $8,%edx 562 pxor 144(%esp,%eax,8),%mm6 563 pxor %mm3,%mm7 564 pxor 400(%esp,%edi,8),%mm6 565 xorb (%esp,%edi,1),%cl 566 movb %dl,%al 567 movl 528(%esp),%edx 568 movd %mm7,%ebx 569 movzbl %cl,%ecx 570 psrlq $8,%mm7 571 movq %mm6,%mm3 572 movl %eax,%edi 573 psrlq $8,%mm6 574 pxor 272(%esp,%ebp,8),%mm7 575 andb $15,%al 576 psllq $56,%mm3 577 pxor %mm0,%mm6 578 shrl $4,%edi 579 pinsrw $2,(%esi,%ecx,2),%mm2 580 pxor 16(%esp,%eax,8),%mm7 581 roll $8,%edx 582 pxor 144(%esp,%eax,8),%mm6 583 pxor %mm3,%mm7 584 pxor 400(%esp,%ebp,8),%mm6 585 xorb (%esp,%ebp,1),%bl 586 movb %dl,%al 587 movd %mm7,%ecx 588 movzbl %bl,%ebx 589 psrlq $8,%mm7 590 movq %mm6,%mm3 591 movl %eax,%ebp 592 psrlq $8,%mm6 593 pxor 272(%esp,%edi,8),%mm7 594 andb $15,%al 595 psllq $56,%mm3 596 pxor %mm2,%mm6 597 shrl $4,%ebp 598 pinsrw $2,(%esi,%ebx,2),%mm1 599 pxor 16(%esp,%eax,8),%mm7 600 roll $8,%edx 601 pxor 144(%esp,%eax,8),%mm6 602 pxor %mm3,%mm7 603 pxor 400(%esp,%edi,8),%mm6 604 xorb (%esp,%edi,1),%cl 605 movb %dl,%al 606 movd %mm7,%ebx 607 movzbl %cl,%ecx 608 psrlq $8,%mm7 609 movq %mm6,%mm3 610 movl %eax,%edi 611 psrlq $8,%mm6 612 pxor 272(%esp,%ebp,8),%mm7 613 andb $15,%al 614 psllq $56,%mm3 615 pxor %mm1,%mm6 616 shrl $4,%edi 617 pinsrw $2,(%esi,%ecx,2),%mm0 618 pxor 16(%esp,%eax,8),%mm7 619 roll $8,%edx 620 pxor 144(%esp,%eax,8),%mm6 621 pxor %mm3,%mm7 622 pxor 400(%esp,%ebp,8),%mm6 623 xorb (%esp,%ebp,1),%bl 624 movb %dl,%al 625 movd %mm7,%ecx 626 movzbl %bl,%ebx 627 psrlq $8,%mm7 628 movq %mm6,%mm3 629 movl %eax,%ebp 630 psrlq $8,%mm6 631 pxor 272(%esp,%edi,8),%mm7 632 andb $15,%al 633 psllq $56,%mm3 634 pxor %mm0,%mm6 635 shrl $4,%ebp 636 pinsrw $2,(%esi,%ebx,2),%mm2 637 pxor 16(%esp,%eax,8),%mm7 638 roll $8,%edx 639 pxor 144(%esp,%eax,8),%mm6 640 pxor %mm3,%mm7 641 pxor 400(%esp,%edi,8),%mm6 642 xorb (%esp,%edi,1),%cl 643 movb %dl,%al 644 movl 524(%esp),%edx 645 movd %mm7,%ebx 646 movzbl %cl,%ecx 647 psrlq $8,%mm7 648 movq %mm6,%mm3 649 movl %eax,%edi 650 psrlq $8,%mm6 651 pxor 272(%esp,%ebp,8),%mm7 652 andb $15,%al 653 psllq $56,%mm3 654 pxor %mm2,%mm6 655 shrl $4,%edi 656 pinsrw $2,(%esi,%ecx,2),%mm1 657 pxor 16(%esp,%eax,8),%mm7 658 pxor 144(%esp,%eax,8),%mm6 659 xorb (%esp,%ebp,1),%bl 660 pxor %mm3,%mm7 661 pxor 400(%esp,%ebp,8),%mm6 662 movzbl %bl,%ebx 663 pxor %mm2,%mm2 664 psllq $4,%mm1 665 movd %mm7,%ecx 666 psrlq $4,%mm7 667 movq %mm6,%mm3 668 psrlq $4,%mm6 669 shll $4,%ecx 670 pxor 16(%esp,%edi,8),%mm7 671 psllq $60,%mm3 672 movzbl %cl,%ecx 673 pxor %mm3,%mm7 674 pxor 144(%esp,%edi,8),%mm6 675 pinsrw $2,(%esi,%ebx,2),%mm0 676 pxor %mm1,%mm6 677 movd %mm7,%edx 678 pinsrw $3,(%esi,%ecx,2),%mm2 679 psllq $12,%mm0 680 pxor %mm0,%mm6 681 psrlq $32,%mm7 682 pxor %mm2,%mm6 683 movl 548(%esp),%ecx 684 movd %mm7,%ebx 685 movq %mm6,%mm3 686 psllw $8,%mm6 687 psrlw $8,%mm3 688 por %mm3,%mm6 689 bswap %edx 690 pshufw $27,%mm6,%mm6 691 bswap %ebx 692 cmpl 552(%esp),%ecx 693 jne L004outer 694 movl 544(%esp),%eax 695 movl %edx,12(%eax) 696 movl %ebx,8(%eax) 697 movq %mm6,(%eax) 698 movl 556(%esp),%esp 699 emms 700 popl %edi 701 popl %esi 702 popl %ebx 703 popl %ebp 704 ret 705 .globl _gcm_init_clmul 706 .private_extern _gcm_init_clmul 707 .align 4 708 _gcm_init_clmul: 709 L_gcm_init_clmul_begin: 710 movl 4(%esp),%edx 711 movl 8(%esp),%eax 712 call L005pic 713 L005pic: 714 popl %ecx 715 leal Lbswap-L005pic(%ecx),%ecx 716 movdqu (%eax),%xmm2 717 pshufd $78,%xmm2,%xmm2 718 pshufd $255,%xmm2,%xmm4 719 movdqa %xmm2,%xmm3 720 psllq $1,%xmm2 721 pxor %xmm5,%xmm5 722 psrlq $63,%xmm3 723 pcmpgtd %xmm4,%xmm5 724 pslldq $8,%xmm3 725 por %xmm3,%xmm2 726 pand 16(%ecx),%xmm5 727 pxor %xmm5,%xmm2 728 movdqa %xmm2,%xmm0 729 movdqa %xmm0,%xmm1 730 pshufd $78,%xmm0,%xmm3 731 pshufd $78,%xmm2,%xmm4 732 pxor %xmm0,%xmm3 733 pxor %xmm2,%xmm4 734 .byte 102,15,58,68,194,0 735 .byte 102,15,58,68,202,17 736 .byte 102,15,58,68,220,0 737 xorps %xmm0,%xmm3 738 xorps %xmm1,%xmm3 739 movdqa %xmm3,%xmm4 740 psrldq $8,%xmm3 741 pslldq $8,%xmm4 742 pxor %xmm3,%xmm1 743 pxor %xmm4,%xmm0 744 movdqa %xmm0,%xmm4 745 movdqa %xmm0,%xmm3 746 psllq $5,%xmm0 747 pxor %xmm0,%xmm3 748 psllq $1,%xmm0 749 pxor %xmm3,%xmm0 750 psllq $57,%xmm0 751 movdqa %xmm0,%xmm3 752 pslldq $8,%xmm0 753 psrldq $8,%xmm3 754 pxor %xmm4,%xmm0 755 pxor %xmm3,%xmm1 756 movdqa %xmm0,%xmm4 757 psrlq $1,%xmm0 758 pxor %xmm4,%xmm1 759 pxor %xmm0,%xmm4 760 psrlq $5,%xmm0 761 pxor %xmm4,%xmm0 762 psrlq $1,%xmm0 763 pxor %xmm1,%xmm0 764 pshufd $78,%xmm2,%xmm3 765 pshufd $78,%xmm0,%xmm4 766 pxor %xmm2,%xmm3 767 movdqu %xmm2,(%edx) 768 pxor %xmm0,%xmm4 769 movdqu %xmm0,16(%edx) 770 .byte 102,15,58,15,227,8 771 movdqu %xmm4,32(%edx) 772 ret 773 .globl _gcm_gmult_clmul 774 .private_extern _gcm_gmult_clmul 775 .align 4 776 _gcm_gmult_clmul: 777 L_gcm_gmult_clmul_begin: 778 movl 4(%esp),%eax 779 movl 8(%esp),%edx 780 call L006pic 781 L006pic: 782 popl %ecx 783 leal Lbswap-L006pic(%ecx),%ecx 784 movdqu (%eax),%xmm0 785 movdqa (%ecx),%xmm5 786 movups (%edx),%xmm2 787 .byte 102,15,56,0,197 788 movups 32(%edx),%xmm4 789 movdqa %xmm0,%xmm1 790 pshufd $78,%xmm0,%xmm3 791 pxor %xmm0,%xmm3 792 .byte 102,15,58,68,194,0 793 .byte 102,15,58,68,202,17 794 .byte 102,15,58,68,220,0 795 xorps %xmm0,%xmm3 796 xorps %xmm1,%xmm3 797 movdqa %xmm3,%xmm4 798 psrldq $8,%xmm3 799 pslldq $8,%xmm4 800 pxor %xmm3,%xmm1 801 pxor %xmm4,%xmm0 802 movdqa %xmm0,%xmm4 803 movdqa %xmm0,%xmm3 804 psllq $5,%xmm0 805 pxor %xmm0,%xmm3 806 psllq $1,%xmm0 807 pxor %xmm3,%xmm0 808 psllq $57,%xmm0 809 movdqa %xmm0,%xmm3 810 pslldq $8,%xmm0 811 psrldq $8,%xmm3 812 pxor %xmm4,%xmm0 813 pxor %xmm3,%xmm1 814 movdqa %xmm0,%xmm4 815 psrlq $1,%xmm0 816 pxor %xmm4,%xmm1 817 pxor %xmm0,%xmm4 818 psrlq $5,%xmm0 819 pxor %xmm4,%xmm0 820 psrlq $1,%xmm0 821 pxor %xmm1,%xmm0 822 .byte 102,15,56,0,197 823 movdqu %xmm0,(%eax) 824 ret 825 .globl _gcm_ghash_clmul 826 .private_extern _gcm_ghash_clmul 827 .align 4 828 _gcm_ghash_clmul: 829 L_gcm_ghash_clmul_begin: 830 pushl %ebp 831 pushl %ebx 832 pushl %esi 833 pushl %edi 834 movl 20(%esp),%eax 835 movl 24(%esp),%edx 836 movl 28(%esp),%esi 837 movl 32(%esp),%ebx 838 call L007pic 839 L007pic: 840 popl %ecx 841 leal Lbswap-L007pic(%ecx),%ecx 842 movdqu (%eax),%xmm0 843 movdqa (%ecx),%xmm5 844 movdqu (%edx),%xmm2 845 .byte 102,15,56,0,197 846 subl $16,%ebx 847 jz L008odd_tail 848 movdqu (%esi),%xmm3 849 movdqu 16(%esi),%xmm6 850 .byte 102,15,56,0,221 851 .byte 102,15,56,0,245 852 movdqu 32(%edx),%xmm5 853 pxor %xmm3,%xmm0 854 pshufd $78,%xmm6,%xmm3 855 movdqa %xmm6,%xmm7 856 pxor %xmm6,%xmm3 857 leal 32(%esi),%esi 858 .byte 102,15,58,68,242,0 859 .byte 102,15,58,68,250,17 860 .byte 102,15,58,68,221,0 861 movups 16(%edx),%xmm2 862 nop 863 subl $32,%ebx 864 jbe L009even_tail 865 jmp L010mod_loop 866 .align 5,0x90 867 L010mod_loop: 868 pshufd $78,%xmm0,%xmm4 869 movdqa %xmm0,%xmm1 870 pxor %xmm0,%xmm4 871 nop 872 .byte 102,15,58,68,194,0 873 .byte 102,15,58,68,202,17 874 .byte 102,15,58,68,229,16 875 movups (%edx),%xmm2 876 xorps %xmm6,%xmm0 877 movdqa (%ecx),%xmm5 878 xorps %xmm7,%xmm1 879 movdqu (%esi),%xmm7 880 pxor %xmm0,%xmm3 881 movdqu 16(%esi),%xmm6 882 pxor %xmm1,%xmm3 883 .byte 102,15,56,0,253 884 pxor %xmm3,%xmm4 885 movdqa %xmm4,%xmm3 886 psrldq $8,%xmm4 887 pslldq $8,%xmm3 888 pxor %xmm4,%xmm1 889 pxor %xmm3,%xmm0 890 .byte 102,15,56,0,245 891 pxor %xmm7,%xmm1 892 movdqa %xmm6,%xmm7 893 movdqa %xmm0,%xmm4 894 movdqa %xmm0,%xmm3 895 psllq $5,%xmm0 896 pxor %xmm0,%xmm3 897 psllq $1,%xmm0 898 pxor %xmm3,%xmm0 899 .byte 102,15,58,68,242,0 900 movups 32(%edx),%xmm5 901 psllq $57,%xmm0 902 movdqa %xmm0,%xmm3 903 pslldq $8,%xmm0 904 psrldq $8,%xmm3 905 pxor %xmm4,%xmm0 906 pxor %xmm3,%xmm1 907 pshufd $78,%xmm7,%xmm3 908 movdqa %xmm0,%xmm4 909 psrlq $1,%xmm0 910 pxor %xmm7,%xmm3 911 pxor %xmm4,%xmm1 912 .byte 102,15,58,68,250,17 913 movups 16(%edx),%xmm2 914 pxor %xmm0,%xmm4 915 psrlq $5,%xmm0 916 pxor %xmm4,%xmm0 917 psrlq $1,%xmm0 918 pxor %xmm1,%xmm0 919 .byte 102,15,58,68,221,0 920 leal 32(%esi),%esi 921 subl $32,%ebx 922 ja L010mod_loop 923 L009even_tail: 924 pshufd $78,%xmm0,%xmm4 925 movdqa %xmm0,%xmm1 926 pxor %xmm0,%xmm4 927 .byte 102,15,58,68,194,0 928 .byte 102,15,58,68,202,17 929 .byte 102,15,58,68,229,16 930 movdqa (%ecx),%xmm5 931 xorps %xmm6,%xmm0 932 xorps %xmm7,%xmm1 933 pxor %xmm0,%xmm3 934 pxor %xmm1,%xmm3 935 pxor %xmm3,%xmm4 936 movdqa %xmm4,%xmm3 937 psrldq $8,%xmm4 938 pslldq $8,%xmm3 939 pxor %xmm4,%xmm1 940 pxor %xmm3,%xmm0 941 movdqa %xmm0,%xmm4 942 movdqa %xmm0,%xmm3 943 psllq $5,%xmm0 944 pxor %xmm0,%xmm3 945 psllq $1,%xmm0 946 pxor %xmm3,%xmm0 947 psllq $57,%xmm0 948 movdqa %xmm0,%xmm3 949 pslldq $8,%xmm0 950 psrldq $8,%xmm3 951 pxor %xmm4,%xmm0 952 pxor %xmm3,%xmm1 953 movdqa %xmm0,%xmm4 954 psrlq $1,%xmm0 955 pxor %xmm4,%xmm1 956 pxor %xmm0,%xmm4 957 psrlq $5,%xmm0 958 pxor %xmm4,%xmm0 959 psrlq $1,%xmm0 960 pxor %xmm1,%xmm0 961 testl %ebx,%ebx 962 jnz L011done 963 movups (%edx),%xmm2 964 L008odd_tail: 965 movdqu (%esi),%xmm3 966 .byte 102,15,56,0,221 967 pxor %xmm3,%xmm0 968 movdqa %xmm0,%xmm1 969 pshufd $78,%xmm0,%xmm3 970 pshufd $78,%xmm2,%xmm4 971 pxor %xmm0,%xmm3 972 pxor %xmm2,%xmm4 973 .byte 102,15,58,68,194,0 974 .byte 102,15,58,68,202,17 975 .byte 102,15,58,68,220,0 976 xorps %xmm0,%xmm3 977 xorps %xmm1,%xmm3 978 movdqa %xmm3,%xmm4 979 psrldq $8,%xmm3 980 pslldq $8,%xmm4 981 pxor %xmm3,%xmm1 982 pxor %xmm4,%xmm0 983 movdqa %xmm0,%xmm4 984 movdqa %xmm0,%xmm3 985 psllq $5,%xmm0 986 pxor %xmm0,%xmm3 987 psllq $1,%xmm0 988 pxor %xmm3,%xmm0 989 psllq $57,%xmm0 990 movdqa %xmm0,%xmm3 991 pslldq $8,%xmm0 992 psrldq $8,%xmm3 993 pxor %xmm4,%xmm0 994 pxor %xmm3,%xmm1 995 movdqa %xmm0,%xmm4 996 psrlq $1,%xmm0 997 pxor %xmm4,%xmm1 998 pxor %xmm0,%xmm4 999 psrlq $5,%xmm0 1000 pxor %xmm4,%xmm0 1001 psrlq $1,%xmm0 1002 pxor %xmm1,%xmm0 1003 L011done: 1004 .byte 102,15,56,0,197 1005 movdqu %xmm0,(%eax) 1006 popl %edi 1007 popl %esi 1008 popl %ebx 1009 popl %ebp 1010 ret 1011 .align 6,0x90 1012 Lbswap: 1013 .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 1014 .byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194 1015 .align 6,0x90 1016 Lrem_8bit: 1017 .value 0,450,900,582,1800,1738,1164,1358 1018 .value 3600,4050,3476,3158,2328,2266,2716,2910 1019 .value 7200,7650,8100,7782,6952,6890,6316,6510 1020 .value 4656,5106,4532,4214,5432,5370,5820,6014 1021 .value 14400,14722,15300,14854,16200,16010,15564,15630 1022 .value 13904,14226,13780,13334,12632,12442,13020,13086 1023 .value 9312,9634,10212,9766,9064,8874,8428,8494 1024 .value 10864,11186,10740,10294,11640,11450,12028,12094 1025 .value 28800,28994,29444,29382,30600,30282,29708,30158 1026 .value 32400,32594,32020,31958,31128,30810,31260,31710 1027 .value 27808,28002,28452,28390,27560,27242,26668,27118 1028 .value 25264,25458,24884,24822,26040,25722,26172,26622 1029 .value 18624,18690,19268,19078,20424,19978,19532,19854 1030 .value 18128,18194,17748,17558,16856,16410,16988,17310 1031 .value 21728,21794,22372,22182,21480,21034,20588,20910 1032 .value 23280,23346,22900,22710,24056,23610,24188,24510 1033 .value 57600,57538,57988,58182,58888,59338,58764,58446 1034 .value 61200,61138,60564,60758,59416,59866,60316,59998 1035 .value 64800,64738,65188,65382,64040,64490,63916,63598 1036 .value 62256,62194,61620,61814,62520,62970,63420,63102 1037 .value 55616,55426,56004,56070,56904,57226,56780,56334 1038 .value 55120,54930,54484,54550,53336,53658,54236,53790 1039 .value 50528,50338,50916,50982,49768,50090,49644,49198 1040 .value 52080,51890,51444,51510,52344,52666,53244,52798 1041 .value 37248,36930,37380,37830,38536,38730,38156,38094 1042 .value 40848,40530,39956,40406,39064,39258,39708,39646 1043 .value 36256,35938,36388,36838,35496,35690,35116,35054 1044 .value 33712,33394,32820,33270,33976,34170,34620,34558 1045 .value 43456,43010,43588,43910,44744,44810,44364,44174 1046 .value 42960,42514,42068,42390,41176,41242,41820,41630 1047 .value 46560,46114,46692,47014,45800,45866,45420,45230 1048 .value 48112,47666,47220,47542,48376,48442,49020,48830 1049 .align 6,0x90 1050 Lrem_4bit: 1051 .long 0,0,0,471859200,0,943718400,0,610271232 1052 .long 0,1887436800,0,1822425088,0,1220542464,0,1423966208 1053 .long 0,3774873600,0,4246732800,0,3644850176,0,3311403008 1054 .long 0,2441084928,0,2376073216,0,2847932416,0,3051356160 1055 .byte 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67 1056 .byte 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112 1057 .byte 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62 1058 .byte 0 1059 #endif 1060