1 #if defined(__i386__) 2 .text 3 .globl _gcm_gmult_4bit_mmx 4 .private_extern _gcm_gmult_4bit_mmx 5 .align 4 6 _gcm_gmult_4bit_mmx: 7 L_gcm_gmult_4bit_mmx_begin: 8 pushl %ebp 9 pushl %ebx 10 pushl %esi 11 pushl %edi 12 movl 20(%esp),%edi 13 movl 24(%esp),%esi 14 call L000pic_point 15 L000pic_point: 16 popl %eax 17 leal Lrem_4bit-L000pic_point(%eax),%eax 18 movzbl 15(%edi),%ebx 19 xorl %ecx,%ecx 20 movl %ebx,%edx 21 movb %dl,%cl 22 movl $14,%ebp 23 shlb $4,%cl 24 andl $240,%edx 25 movq 8(%esi,%ecx,1),%mm0 26 movq (%esi,%ecx,1),%mm1 27 movd %mm0,%ebx 28 jmp L001mmx_loop 29 .align 4,0x90 30 L001mmx_loop: 31 psrlq $4,%mm0 32 andl $15,%ebx 33 movq %mm1,%mm2 34 psrlq $4,%mm1 35 pxor 8(%esi,%edx,1),%mm0 36 movb (%edi,%ebp,1),%cl 37 psllq $60,%mm2 38 pxor (%eax,%ebx,8),%mm1 39 decl %ebp 40 movd %mm0,%ebx 41 pxor (%esi,%edx,1),%mm1 42 movl %ecx,%edx 43 pxor %mm2,%mm0 44 js L002mmx_break 45 shlb $4,%cl 46 andl $15,%ebx 47 psrlq $4,%mm0 48 andl $240,%edx 49 movq %mm1,%mm2 50 psrlq $4,%mm1 51 pxor 8(%esi,%ecx,1),%mm0 52 psllq $60,%mm2 53 pxor (%eax,%ebx,8),%mm1 54 movd %mm0,%ebx 55 pxor (%esi,%ecx,1),%mm1 56 pxor %mm2,%mm0 57 jmp L001mmx_loop 58 .align 4,0x90 59 L002mmx_break: 60 shlb $4,%cl 61 andl $15,%ebx 62 psrlq $4,%mm0 63 andl $240,%edx 64 movq %mm1,%mm2 65 psrlq $4,%mm1 66 pxor 8(%esi,%ecx,1),%mm0 67 psllq $60,%mm2 68 pxor (%eax,%ebx,8),%mm1 69 movd %mm0,%ebx 70 pxor (%esi,%ecx,1),%mm1 71 pxor %mm2,%mm0 72 psrlq $4,%mm0 73 andl $15,%ebx 74 movq %mm1,%mm2 75 psrlq $4,%mm1 76 pxor 8(%esi,%edx,1),%mm0 77 psllq $60,%mm2 78 pxor (%eax,%ebx,8),%mm1 79 movd %mm0,%ebx 80 pxor (%esi,%edx,1),%mm1 81 pxor %mm2,%mm0 82 psrlq $32,%mm0 83 movd %mm1,%edx 84 psrlq $32,%mm1 85 movd %mm0,%ecx 86 movd %mm1,%ebp 87 bswap %ebx 88 bswap %edx 89 bswap %ecx 90 bswap %ebp 91 emms 92 movl %ebx,12(%edi) 93 movl %edx,4(%edi) 94 movl %ecx,8(%edi) 95 movl %ebp,(%edi) 96 popl %edi 97 popl %esi 98 popl %ebx 99 popl %ebp 100 ret 101 .globl _gcm_ghash_4bit_mmx 102 .private_extern _gcm_ghash_4bit_mmx 103 .align 4 104 _gcm_ghash_4bit_mmx: 105 L_gcm_ghash_4bit_mmx_begin: 106 pushl %ebp 107 pushl %ebx 108 pushl %esi 109 pushl %edi 110 movl 20(%esp),%eax 111 movl 24(%esp),%ebx 112 movl 28(%esp),%ecx 113 movl 32(%esp),%edx 114 movl %esp,%ebp 115 call L003pic_point 116 L003pic_point: 117 popl %esi 118 leal Lrem_8bit-L003pic_point(%esi),%esi 119 subl $544,%esp 120 andl $-64,%esp 121 subl $16,%esp 122 addl %ecx,%edx 123 movl %eax,544(%esp) 124 movl %edx,552(%esp) 125 movl %ebp,556(%esp) 126 addl $128,%ebx 127 leal 144(%esp),%edi 128 leal 400(%esp),%ebp 129 movl -120(%ebx),%edx 130 movq -120(%ebx),%mm0 131 movq -128(%ebx),%mm3 132 shll $4,%edx 133 movb %dl,(%esp) 134 movl -104(%ebx),%edx 135 movq -104(%ebx),%mm2 136 movq -112(%ebx),%mm5 137 movq %mm0,-128(%edi) 138 psrlq $4,%mm0 139 movq %mm3,(%edi) 140 movq %mm3,%mm7 141 psrlq $4,%mm3 142 shll $4,%edx 143 movb %dl,1(%esp) 144 movl -88(%ebx),%edx 145 movq -88(%ebx),%mm1 146 psllq $60,%mm7 147 movq -96(%ebx),%mm4 148 por %mm7,%mm0 149 movq %mm2,-120(%edi) 150 psrlq $4,%mm2 151 movq %mm5,8(%edi) 152 movq %mm5,%mm6 153 movq %mm0,-128(%ebp) 154 psrlq $4,%mm5 155 movq %mm3,(%ebp) 156 shll $4,%edx 157 movb %dl,2(%esp) 158 movl -72(%ebx),%edx 159 movq -72(%ebx),%mm0 160 psllq $60,%mm6 161 movq -80(%ebx),%mm3 162 por %mm6,%mm2 163 movq %mm1,-112(%edi) 164 psrlq $4,%mm1 165 movq %mm4,16(%edi) 166 movq %mm4,%mm7 167 movq %mm2,-120(%ebp) 168 psrlq $4,%mm4 169 movq %mm5,8(%ebp) 170 shll $4,%edx 171 movb %dl,3(%esp) 172 movl -56(%ebx),%edx 173 movq -56(%ebx),%mm2 174 psllq $60,%mm7 175 movq -64(%ebx),%mm5 176 por %mm7,%mm1 177 movq %mm0,-104(%edi) 178 psrlq $4,%mm0 179 movq %mm3,24(%edi) 180 movq %mm3,%mm6 181 movq %mm1,-112(%ebp) 182 psrlq $4,%mm3 183 movq %mm4,16(%ebp) 184 shll $4,%edx 185 movb %dl,4(%esp) 186 movl -40(%ebx),%edx 187 movq -40(%ebx),%mm1 188 psllq $60,%mm6 189 movq -48(%ebx),%mm4 190 por %mm6,%mm0 191 movq %mm2,-96(%edi) 192 psrlq $4,%mm2 193 movq %mm5,32(%edi) 194 movq %mm5,%mm7 195 movq %mm0,-104(%ebp) 196 psrlq $4,%mm5 197 movq %mm3,24(%ebp) 198 shll $4,%edx 199 movb %dl,5(%esp) 200 movl -24(%ebx),%edx 201 movq -24(%ebx),%mm0 202 psllq $60,%mm7 203 movq -32(%ebx),%mm3 204 por %mm7,%mm2 205 movq %mm1,-88(%edi) 206 psrlq $4,%mm1 207 movq %mm4,40(%edi) 208 movq %mm4,%mm6 209 movq %mm2,-96(%ebp) 210 psrlq $4,%mm4 211 movq %mm5,32(%ebp) 212 shll $4,%edx 213 movb %dl,6(%esp) 214 movl -8(%ebx),%edx 215 movq -8(%ebx),%mm2 216 psllq $60,%mm6 217 movq -16(%ebx),%mm5 218 por %mm6,%mm1 219 movq %mm0,-80(%edi) 220 psrlq $4,%mm0 221 movq %mm3,48(%edi) 222 movq %mm3,%mm7 223 movq %mm1,-88(%ebp) 224 psrlq $4,%mm3 225 movq %mm4,40(%ebp) 226 shll $4,%edx 227 movb %dl,7(%esp) 228 movl 8(%ebx),%edx 229 movq 8(%ebx),%mm1 230 psllq $60,%mm7 231 movq (%ebx),%mm4 232 por %mm7,%mm0 233 movq %mm2,-72(%edi) 234 psrlq $4,%mm2 235 movq %mm5,56(%edi) 236 movq %mm5,%mm6 237 movq %mm0,-80(%ebp) 238 psrlq $4,%mm5 239 movq %mm3,48(%ebp) 240 shll $4,%edx 241 movb %dl,8(%esp) 242 movl 24(%ebx),%edx 243 movq 24(%ebx),%mm0 244 psllq $60,%mm6 245 movq 16(%ebx),%mm3 246 por %mm6,%mm2 247 movq %mm1,-64(%edi) 248 psrlq $4,%mm1 249 movq %mm4,64(%edi) 250 movq %mm4,%mm7 251 movq %mm2,-72(%ebp) 252 psrlq $4,%mm4 253 movq %mm5,56(%ebp) 254 shll $4,%edx 255 movb %dl,9(%esp) 256 movl 40(%ebx),%edx 257 movq 40(%ebx),%mm2 258 psllq $60,%mm7 259 movq 32(%ebx),%mm5 260 por %mm7,%mm1 261 movq %mm0,-56(%edi) 262 psrlq $4,%mm0 263 movq %mm3,72(%edi) 264 movq %mm3,%mm6 265 movq %mm1,-64(%ebp) 266 psrlq $4,%mm3 267 movq %mm4,64(%ebp) 268 shll $4,%edx 269 movb %dl,10(%esp) 270 movl 56(%ebx),%edx 271 movq 56(%ebx),%mm1 272 psllq $60,%mm6 273 movq 48(%ebx),%mm4 274 por %mm6,%mm0 275 movq %mm2,-48(%edi) 276 psrlq $4,%mm2 277 movq %mm5,80(%edi) 278 movq %mm5,%mm7 279 movq %mm0,-56(%ebp) 280 psrlq $4,%mm5 281 movq %mm3,72(%ebp) 282 shll $4,%edx 283 movb %dl,11(%esp) 284 movl 72(%ebx),%edx 285 movq 72(%ebx),%mm0 286 psllq $60,%mm7 287 movq 64(%ebx),%mm3 288 por %mm7,%mm2 289 movq %mm1,-40(%edi) 290 psrlq $4,%mm1 291 movq %mm4,88(%edi) 292 movq %mm4,%mm6 293 movq %mm2,-48(%ebp) 294 psrlq $4,%mm4 295 movq %mm5,80(%ebp) 296 shll $4,%edx 297 movb %dl,12(%esp) 298 movl 88(%ebx),%edx 299 movq 88(%ebx),%mm2 300 psllq $60,%mm6 301 movq 80(%ebx),%mm5 302 por %mm6,%mm1 303 movq %mm0,-32(%edi) 304 psrlq $4,%mm0 305 movq %mm3,96(%edi) 306 movq %mm3,%mm7 307 movq %mm1,-40(%ebp) 308 psrlq $4,%mm3 309 movq %mm4,88(%ebp) 310 shll $4,%edx 311 movb %dl,13(%esp) 312 movl 104(%ebx),%edx 313 movq 104(%ebx),%mm1 314 psllq $60,%mm7 315 movq 96(%ebx),%mm4 316 por %mm7,%mm0 317 movq %mm2,-24(%edi) 318 psrlq $4,%mm2 319 movq %mm5,104(%edi) 320 movq %mm5,%mm6 321 movq %mm0,-32(%ebp) 322 psrlq $4,%mm5 323 movq %mm3,96(%ebp) 324 shll $4,%edx 325 movb %dl,14(%esp) 326 movl 120(%ebx),%edx 327 movq 120(%ebx),%mm0 328 psllq $60,%mm6 329 movq 112(%ebx),%mm3 330 por %mm6,%mm2 331 movq %mm1,-16(%edi) 332 psrlq $4,%mm1 333 movq %mm4,112(%edi) 334 movq %mm4,%mm7 335 movq %mm2,-24(%ebp) 336 psrlq $4,%mm4 337 movq %mm5,104(%ebp) 338 shll $4,%edx 339 movb %dl,15(%esp) 340 psllq $60,%mm7 341 por %mm7,%mm1 342 movq %mm0,-8(%edi) 343 psrlq $4,%mm0 344 movq %mm3,120(%edi) 345 movq %mm3,%mm6 346 movq %mm1,-16(%ebp) 347 psrlq $4,%mm3 348 movq %mm4,112(%ebp) 349 psllq $60,%mm6 350 por %mm6,%mm0 351 movq %mm0,-8(%ebp) 352 movq %mm3,120(%ebp) 353 movq (%eax),%mm6 354 movl 8(%eax),%ebx 355 movl 12(%eax),%edx 356 .align 4,0x90 357 L004outer: 358 xorl 12(%ecx),%edx 359 xorl 8(%ecx),%ebx 360 pxor (%ecx),%mm6 361 leal 16(%ecx),%ecx 362 movl %ebx,536(%esp) 363 movq %mm6,528(%esp) 364 movl %ecx,548(%esp) 365 xorl %eax,%eax 366 roll $8,%edx 367 movb %dl,%al 368 movl %eax,%ebp 369 andb $15,%al 370 shrl $4,%ebp 371 pxor %mm0,%mm0 372 roll $8,%edx 373 pxor %mm1,%mm1 374 pxor %mm2,%mm2 375 movq 16(%esp,%eax,8),%mm7 376 movq 144(%esp,%eax,8),%mm6 377 movb %dl,%al 378 movd %mm7,%ebx 379 psrlq $8,%mm7 380 movq %mm6,%mm3 381 movl %eax,%edi 382 psrlq $8,%mm6 383 pxor 272(%esp,%ebp,8),%mm7 384 andb $15,%al 385 psllq $56,%mm3 386 shrl $4,%edi 387 pxor 16(%esp,%eax,8),%mm7 388 roll $8,%edx 389 pxor 144(%esp,%eax,8),%mm6 390 pxor %mm3,%mm7 391 pxor 400(%esp,%ebp,8),%mm6 392 xorb (%esp,%ebp,1),%bl 393 movb %dl,%al 394 movd %mm7,%ecx 395 movzbl %bl,%ebx 396 psrlq $8,%mm7 397 movq %mm6,%mm3 398 movl %eax,%ebp 399 psrlq $8,%mm6 400 pxor 272(%esp,%edi,8),%mm7 401 andb $15,%al 402 psllq $56,%mm3 403 shrl $4,%ebp 404 pinsrw $2,(%esi,%ebx,2),%mm2 405 pxor 16(%esp,%eax,8),%mm7 406 roll $8,%edx 407 pxor 144(%esp,%eax,8),%mm6 408 pxor %mm3,%mm7 409 pxor 400(%esp,%edi,8),%mm6 410 xorb (%esp,%edi,1),%cl 411 movb %dl,%al 412 movl 536(%esp),%edx 413 movd %mm7,%ebx 414 movzbl %cl,%ecx 415 psrlq $8,%mm7 416 movq %mm6,%mm3 417 movl %eax,%edi 418 psrlq $8,%mm6 419 pxor 272(%esp,%ebp,8),%mm7 420 andb $15,%al 421 psllq $56,%mm3 422 pxor %mm2,%mm6 423 shrl $4,%edi 424 pinsrw $2,(%esi,%ecx,2),%mm1 425 pxor 16(%esp,%eax,8),%mm7 426 roll $8,%edx 427 pxor 144(%esp,%eax,8),%mm6 428 pxor %mm3,%mm7 429 pxor 400(%esp,%ebp,8),%mm6 430 xorb (%esp,%ebp,1),%bl 431 movb %dl,%al 432 movd %mm7,%ecx 433 movzbl %bl,%ebx 434 psrlq $8,%mm7 435 movq %mm6,%mm3 436 movl %eax,%ebp 437 psrlq $8,%mm6 438 pxor 272(%esp,%edi,8),%mm7 439 andb $15,%al 440 psllq $56,%mm3 441 pxor %mm1,%mm6 442 shrl $4,%ebp 443 pinsrw $2,(%esi,%ebx,2),%mm0 444 pxor 16(%esp,%eax,8),%mm7 445 roll $8,%edx 446 pxor 144(%esp,%eax,8),%mm6 447 pxor %mm3,%mm7 448 pxor 400(%esp,%edi,8),%mm6 449 xorb (%esp,%edi,1),%cl 450 movb %dl,%al 451 movd %mm7,%ebx 452 movzbl %cl,%ecx 453 psrlq $8,%mm7 454 movq %mm6,%mm3 455 movl %eax,%edi 456 psrlq $8,%mm6 457 pxor 272(%esp,%ebp,8),%mm7 458 andb $15,%al 459 psllq $56,%mm3 460 pxor %mm0,%mm6 461 shrl $4,%edi 462 pinsrw $2,(%esi,%ecx,2),%mm2 463 pxor 16(%esp,%eax,8),%mm7 464 roll $8,%edx 465 pxor 144(%esp,%eax,8),%mm6 466 pxor %mm3,%mm7 467 pxor 400(%esp,%ebp,8),%mm6 468 xorb (%esp,%ebp,1),%bl 469 movb %dl,%al 470 movd %mm7,%ecx 471 movzbl %bl,%ebx 472 psrlq $8,%mm7 473 movq %mm6,%mm3 474 movl %eax,%ebp 475 psrlq $8,%mm6 476 pxor 272(%esp,%edi,8),%mm7 477 andb $15,%al 478 psllq $56,%mm3 479 pxor %mm2,%mm6 480 shrl $4,%ebp 481 pinsrw $2,(%esi,%ebx,2),%mm1 482 pxor 16(%esp,%eax,8),%mm7 483 roll $8,%edx 484 pxor 144(%esp,%eax,8),%mm6 485 pxor %mm3,%mm7 486 pxor 400(%esp,%edi,8),%mm6 487 xorb (%esp,%edi,1),%cl 488 movb %dl,%al 489 movl 532(%esp),%edx 490 movd %mm7,%ebx 491 movzbl %cl,%ecx 492 psrlq $8,%mm7 493 movq %mm6,%mm3 494 movl %eax,%edi 495 psrlq $8,%mm6 496 pxor 272(%esp,%ebp,8),%mm7 497 andb $15,%al 498 psllq $56,%mm3 499 pxor %mm1,%mm6 500 shrl $4,%edi 501 pinsrw $2,(%esi,%ecx,2),%mm0 502 pxor 16(%esp,%eax,8),%mm7 503 roll $8,%edx 504 pxor 144(%esp,%eax,8),%mm6 505 pxor %mm3,%mm7 506 pxor 400(%esp,%ebp,8),%mm6 507 xorb (%esp,%ebp,1),%bl 508 movb %dl,%al 509 movd %mm7,%ecx 510 movzbl %bl,%ebx 511 psrlq $8,%mm7 512 movq %mm6,%mm3 513 movl %eax,%ebp 514 psrlq $8,%mm6 515 pxor 272(%esp,%edi,8),%mm7 516 andb $15,%al 517 psllq $56,%mm3 518 pxor %mm0,%mm6 519 shrl $4,%ebp 520 pinsrw $2,(%esi,%ebx,2),%mm2 521 pxor 16(%esp,%eax,8),%mm7 522 roll $8,%edx 523 pxor 144(%esp,%eax,8),%mm6 524 pxor %mm3,%mm7 525 pxor 400(%esp,%edi,8),%mm6 526 xorb (%esp,%edi,1),%cl 527 movb %dl,%al 528 movd %mm7,%ebx 529 movzbl %cl,%ecx 530 psrlq $8,%mm7 531 movq %mm6,%mm3 532 movl %eax,%edi 533 psrlq $8,%mm6 534 pxor 272(%esp,%ebp,8),%mm7 535 andb $15,%al 536 psllq $56,%mm3 537 pxor %mm2,%mm6 538 shrl $4,%edi 539 pinsrw $2,(%esi,%ecx,2),%mm1 540 pxor 16(%esp,%eax,8),%mm7 541 roll $8,%edx 542 pxor 144(%esp,%eax,8),%mm6 543 pxor %mm3,%mm7 544 pxor 400(%esp,%ebp,8),%mm6 545 xorb (%esp,%ebp,1),%bl 546 movb %dl,%al 547 movd %mm7,%ecx 548 movzbl %bl,%ebx 549 psrlq $8,%mm7 550 movq %mm6,%mm3 551 movl %eax,%ebp 552 psrlq $8,%mm6 553 pxor 272(%esp,%edi,8),%mm7 554 andb $15,%al 555 psllq $56,%mm3 556 pxor %mm1,%mm6 557 shrl $4,%ebp 558 pinsrw $2,(%esi,%ebx,2),%mm0 559 pxor 16(%esp,%eax,8),%mm7 560 roll $8,%edx 561 pxor 144(%esp,%eax,8),%mm6 562 pxor %mm3,%mm7 563 pxor 400(%esp,%edi,8),%mm6 564 xorb (%esp,%edi,1),%cl 565 movb %dl,%al 566 movl 528(%esp),%edx 567 movd %mm7,%ebx 568 movzbl %cl,%ecx 569 psrlq $8,%mm7 570 movq %mm6,%mm3 571 movl %eax,%edi 572 psrlq $8,%mm6 573 pxor 272(%esp,%ebp,8),%mm7 574 andb $15,%al 575 psllq $56,%mm3 576 pxor %mm0,%mm6 577 shrl $4,%edi 578 pinsrw $2,(%esi,%ecx,2),%mm2 579 pxor 16(%esp,%eax,8),%mm7 580 roll $8,%edx 581 pxor 144(%esp,%eax,8),%mm6 582 pxor %mm3,%mm7 583 pxor 400(%esp,%ebp,8),%mm6 584 xorb (%esp,%ebp,1),%bl 585 movb %dl,%al 586 movd %mm7,%ecx 587 movzbl %bl,%ebx 588 psrlq $8,%mm7 589 movq %mm6,%mm3 590 movl %eax,%ebp 591 psrlq $8,%mm6 592 pxor 272(%esp,%edi,8),%mm7 593 andb $15,%al 594 psllq $56,%mm3 595 pxor %mm2,%mm6 596 shrl $4,%ebp 597 pinsrw $2,(%esi,%ebx,2),%mm1 598 pxor 16(%esp,%eax,8),%mm7 599 roll $8,%edx 600 pxor 144(%esp,%eax,8),%mm6 601 pxor %mm3,%mm7 602 pxor 400(%esp,%edi,8),%mm6 603 xorb (%esp,%edi,1),%cl 604 movb %dl,%al 605 movd %mm7,%ebx 606 movzbl %cl,%ecx 607 psrlq $8,%mm7 608 movq %mm6,%mm3 609 movl %eax,%edi 610 psrlq $8,%mm6 611 pxor 272(%esp,%ebp,8),%mm7 612 andb $15,%al 613 psllq $56,%mm3 614 pxor %mm1,%mm6 615 shrl $4,%edi 616 pinsrw $2,(%esi,%ecx,2),%mm0 617 pxor 16(%esp,%eax,8),%mm7 618 roll $8,%edx 619 pxor 144(%esp,%eax,8),%mm6 620 pxor %mm3,%mm7 621 pxor 400(%esp,%ebp,8),%mm6 622 xorb (%esp,%ebp,1),%bl 623 movb %dl,%al 624 movd %mm7,%ecx 625 movzbl %bl,%ebx 626 psrlq $8,%mm7 627 movq %mm6,%mm3 628 movl %eax,%ebp 629 psrlq $8,%mm6 630 pxor 272(%esp,%edi,8),%mm7 631 andb $15,%al 632 psllq $56,%mm3 633 pxor %mm0,%mm6 634 shrl $4,%ebp 635 pinsrw $2,(%esi,%ebx,2),%mm2 636 pxor 16(%esp,%eax,8),%mm7 637 roll $8,%edx 638 pxor 144(%esp,%eax,8),%mm6 639 pxor %mm3,%mm7 640 pxor 400(%esp,%edi,8),%mm6 641 xorb (%esp,%edi,1),%cl 642 movb %dl,%al 643 movl 524(%esp),%edx 644 movd %mm7,%ebx 645 movzbl %cl,%ecx 646 psrlq $8,%mm7 647 movq %mm6,%mm3 648 movl %eax,%edi 649 psrlq $8,%mm6 650 pxor 272(%esp,%ebp,8),%mm7 651 andb $15,%al 652 psllq $56,%mm3 653 pxor %mm2,%mm6 654 shrl $4,%edi 655 pinsrw $2,(%esi,%ecx,2),%mm1 656 pxor 16(%esp,%eax,8),%mm7 657 pxor 144(%esp,%eax,8),%mm6 658 xorb (%esp,%ebp,1),%bl 659 pxor %mm3,%mm7 660 pxor 400(%esp,%ebp,8),%mm6 661 movzbl %bl,%ebx 662 pxor %mm2,%mm2 663 psllq $4,%mm1 664 movd %mm7,%ecx 665 psrlq $4,%mm7 666 movq %mm6,%mm3 667 psrlq $4,%mm6 668 shll $4,%ecx 669 pxor 16(%esp,%edi,8),%mm7 670 psllq $60,%mm3 671 movzbl %cl,%ecx 672 pxor %mm3,%mm7 673 pxor 144(%esp,%edi,8),%mm6 674 pinsrw $2,(%esi,%ebx,2),%mm0 675 pxor %mm1,%mm6 676 movd %mm7,%edx 677 pinsrw $3,(%esi,%ecx,2),%mm2 678 psllq $12,%mm0 679 pxor %mm0,%mm6 680 psrlq $32,%mm7 681 pxor %mm2,%mm6 682 movl 548(%esp),%ecx 683 movd %mm7,%ebx 684 movq %mm6,%mm3 685 psllw $8,%mm6 686 psrlw $8,%mm3 687 por %mm3,%mm6 688 bswap %edx 689 pshufw $27,%mm6,%mm6 690 bswap %ebx 691 cmpl 552(%esp),%ecx 692 jne L004outer 693 movl 544(%esp),%eax 694 movl %edx,12(%eax) 695 movl %ebx,8(%eax) 696 movq %mm6,(%eax) 697 movl 556(%esp),%esp 698 emms 699 popl %edi 700 popl %esi 701 popl %ebx 702 popl %ebp 703 ret 704 .globl _gcm_init_clmul 705 .private_extern _gcm_init_clmul 706 .align 4 707 _gcm_init_clmul: 708 L_gcm_init_clmul_begin: 709 movl 4(%esp),%edx 710 movl 8(%esp),%eax 711 call L005pic 712 L005pic: 713 popl %ecx 714 leal Lbswap-L005pic(%ecx),%ecx 715 movdqu (%eax),%xmm2 716 pshufd $78,%xmm2,%xmm2 717 pshufd $255,%xmm2,%xmm4 718 movdqa %xmm2,%xmm3 719 psllq $1,%xmm2 720 pxor %xmm5,%xmm5 721 psrlq $63,%xmm3 722 pcmpgtd %xmm4,%xmm5 723 pslldq $8,%xmm3 724 por %xmm3,%xmm2 725 pand 16(%ecx),%xmm5 726 pxor %xmm5,%xmm2 727 movdqa %xmm2,%xmm0 728 movdqa %xmm0,%xmm1 729 pshufd $78,%xmm0,%xmm3 730 pshufd $78,%xmm2,%xmm4 731 pxor %xmm0,%xmm3 732 pxor %xmm2,%xmm4 733 .byte 102,15,58,68,194,0 734 .byte 102,15,58,68,202,17 735 .byte 102,15,58,68,220,0 736 xorps %xmm0,%xmm3 737 xorps %xmm1,%xmm3 738 movdqa %xmm3,%xmm4 739 psrldq $8,%xmm3 740 pslldq $8,%xmm4 741 pxor %xmm3,%xmm1 742 pxor %xmm4,%xmm0 743 movdqa %xmm0,%xmm4 744 movdqa %xmm0,%xmm3 745 psllq $5,%xmm0 746 pxor %xmm0,%xmm3 747 psllq $1,%xmm0 748 pxor %xmm3,%xmm0 749 psllq $57,%xmm0 750 movdqa %xmm0,%xmm3 751 pslldq $8,%xmm0 752 psrldq $8,%xmm3 753 pxor %xmm4,%xmm0 754 pxor %xmm3,%xmm1 755 movdqa %xmm0,%xmm4 756 psrlq $1,%xmm0 757 pxor %xmm4,%xmm1 758 pxor %xmm0,%xmm4 759 psrlq $5,%xmm0 760 pxor %xmm4,%xmm0 761 psrlq $1,%xmm0 762 pxor %xmm1,%xmm0 763 pshufd $78,%xmm2,%xmm3 764 pshufd $78,%xmm0,%xmm4 765 pxor %xmm2,%xmm3 766 movdqu %xmm2,(%edx) 767 pxor %xmm0,%xmm4 768 movdqu %xmm0,16(%edx) 769 .byte 102,15,58,15,227,8 770 movdqu %xmm4,32(%edx) 771 ret 772 .globl _gcm_gmult_clmul 773 .private_extern _gcm_gmult_clmul 774 .align 4 775 _gcm_gmult_clmul: 776 L_gcm_gmult_clmul_begin: 777 movl 4(%esp),%eax 778 movl 8(%esp),%edx 779 call L006pic 780 L006pic: 781 popl %ecx 782 leal Lbswap-L006pic(%ecx),%ecx 783 movdqu (%eax),%xmm0 784 movdqa (%ecx),%xmm5 785 movups (%edx),%xmm2 786 .byte 102,15,56,0,197 787 movups 32(%edx),%xmm4 788 movdqa %xmm0,%xmm1 789 pshufd $78,%xmm0,%xmm3 790 pxor %xmm0,%xmm3 791 .byte 102,15,58,68,194,0 792 .byte 102,15,58,68,202,17 793 .byte 102,15,58,68,220,0 794 xorps %xmm0,%xmm3 795 xorps %xmm1,%xmm3 796 movdqa %xmm3,%xmm4 797 psrldq $8,%xmm3 798 pslldq $8,%xmm4 799 pxor %xmm3,%xmm1 800 pxor %xmm4,%xmm0 801 movdqa %xmm0,%xmm4 802 movdqa %xmm0,%xmm3 803 psllq $5,%xmm0 804 pxor %xmm0,%xmm3 805 psllq $1,%xmm0 806 pxor %xmm3,%xmm0 807 psllq $57,%xmm0 808 movdqa %xmm0,%xmm3 809 pslldq $8,%xmm0 810 psrldq $8,%xmm3 811 pxor %xmm4,%xmm0 812 pxor %xmm3,%xmm1 813 movdqa %xmm0,%xmm4 814 psrlq $1,%xmm0 815 pxor %xmm4,%xmm1 816 pxor %xmm0,%xmm4 817 psrlq $5,%xmm0 818 pxor %xmm4,%xmm0 819 psrlq $1,%xmm0 820 pxor %xmm1,%xmm0 821 .byte 102,15,56,0,197 822 movdqu %xmm0,(%eax) 823 ret 824 .globl _gcm_ghash_clmul 825 .private_extern _gcm_ghash_clmul 826 .align 4 827 _gcm_ghash_clmul: 828 L_gcm_ghash_clmul_begin: 829 pushl %ebp 830 pushl %ebx 831 pushl %esi 832 pushl %edi 833 movl 20(%esp),%eax 834 movl 24(%esp),%edx 835 movl 28(%esp),%esi 836 movl 32(%esp),%ebx 837 call L007pic 838 L007pic: 839 popl %ecx 840 leal Lbswap-L007pic(%ecx),%ecx 841 movdqu (%eax),%xmm0 842 movdqa (%ecx),%xmm5 843 movdqu (%edx),%xmm2 844 .byte 102,15,56,0,197 845 subl $16,%ebx 846 jz L008odd_tail 847 movdqu (%esi),%xmm3 848 movdqu 16(%esi),%xmm6 849 .byte 102,15,56,0,221 850 .byte 102,15,56,0,245 851 movdqu 32(%edx),%xmm5 852 pxor %xmm3,%xmm0 853 pshufd $78,%xmm6,%xmm3 854 movdqa %xmm6,%xmm7 855 pxor %xmm6,%xmm3 856 leal 32(%esi),%esi 857 .byte 102,15,58,68,242,0 858 .byte 102,15,58,68,250,17 859 .byte 102,15,58,68,221,0 860 movups 16(%edx),%xmm2 861 nop 862 subl $32,%ebx 863 jbe L009even_tail 864 jmp L010mod_loop 865 .align 5,0x90 866 L010mod_loop: 867 pshufd $78,%xmm0,%xmm4 868 movdqa %xmm0,%xmm1 869 pxor %xmm0,%xmm4 870 nop 871 .byte 102,15,58,68,194,0 872 .byte 102,15,58,68,202,17 873 .byte 102,15,58,68,229,16 874 movups (%edx),%xmm2 875 xorps %xmm6,%xmm0 876 movdqa (%ecx),%xmm5 877 xorps %xmm7,%xmm1 878 movdqu (%esi),%xmm7 879 pxor %xmm0,%xmm3 880 movdqu 16(%esi),%xmm6 881 pxor %xmm1,%xmm3 882 .byte 102,15,56,0,253 883 pxor %xmm3,%xmm4 884 movdqa %xmm4,%xmm3 885 psrldq $8,%xmm4 886 pslldq $8,%xmm3 887 pxor %xmm4,%xmm1 888 pxor %xmm3,%xmm0 889 .byte 102,15,56,0,245 890 pxor %xmm7,%xmm1 891 movdqa %xmm6,%xmm7 892 movdqa %xmm0,%xmm4 893 movdqa %xmm0,%xmm3 894 psllq $5,%xmm0 895 pxor %xmm0,%xmm3 896 psllq $1,%xmm0 897 pxor %xmm3,%xmm0 898 .byte 102,15,58,68,242,0 899 movups 32(%edx),%xmm5 900 psllq $57,%xmm0 901 movdqa %xmm0,%xmm3 902 pslldq $8,%xmm0 903 psrldq $8,%xmm3 904 pxor %xmm4,%xmm0 905 pxor %xmm3,%xmm1 906 pshufd $78,%xmm7,%xmm3 907 movdqa %xmm0,%xmm4 908 psrlq $1,%xmm0 909 pxor %xmm7,%xmm3 910 pxor %xmm4,%xmm1 911 .byte 102,15,58,68,250,17 912 movups 16(%edx),%xmm2 913 pxor %xmm0,%xmm4 914 psrlq $5,%xmm0 915 pxor %xmm4,%xmm0 916 psrlq $1,%xmm0 917 pxor %xmm1,%xmm0 918 .byte 102,15,58,68,221,0 919 leal 32(%esi),%esi 920 subl $32,%ebx 921 ja L010mod_loop 922 L009even_tail: 923 pshufd $78,%xmm0,%xmm4 924 movdqa %xmm0,%xmm1 925 pxor %xmm0,%xmm4 926 .byte 102,15,58,68,194,0 927 .byte 102,15,58,68,202,17 928 .byte 102,15,58,68,229,16 929 movdqa (%ecx),%xmm5 930 xorps %xmm6,%xmm0 931 xorps %xmm7,%xmm1 932 pxor %xmm0,%xmm3 933 pxor %xmm1,%xmm3 934 pxor %xmm3,%xmm4 935 movdqa %xmm4,%xmm3 936 psrldq $8,%xmm4 937 pslldq $8,%xmm3 938 pxor %xmm4,%xmm1 939 pxor %xmm3,%xmm0 940 movdqa %xmm0,%xmm4 941 movdqa %xmm0,%xmm3 942 psllq $5,%xmm0 943 pxor %xmm0,%xmm3 944 psllq $1,%xmm0 945 pxor %xmm3,%xmm0 946 psllq $57,%xmm0 947 movdqa %xmm0,%xmm3 948 pslldq $8,%xmm0 949 psrldq $8,%xmm3 950 pxor %xmm4,%xmm0 951 pxor %xmm3,%xmm1 952 movdqa %xmm0,%xmm4 953 psrlq $1,%xmm0 954 pxor %xmm4,%xmm1 955 pxor %xmm0,%xmm4 956 psrlq $5,%xmm0 957 pxor %xmm4,%xmm0 958 psrlq $1,%xmm0 959 pxor %xmm1,%xmm0 960 testl %ebx,%ebx 961 jnz L011done 962 movups (%edx),%xmm2 963 L008odd_tail: 964 movdqu (%esi),%xmm3 965 .byte 102,15,56,0,221 966 pxor %xmm3,%xmm0 967 movdqa %xmm0,%xmm1 968 pshufd $78,%xmm0,%xmm3 969 pshufd $78,%xmm2,%xmm4 970 pxor %xmm0,%xmm3 971 pxor %xmm2,%xmm4 972 .byte 102,15,58,68,194,0 973 .byte 102,15,58,68,202,17 974 .byte 102,15,58,68,220,0 975 xorps %xmm0,%xmm3 976 xorps %xmm1,%xmm3 977 movdqa %xmm3,%xmm4 978 psrldq $8,%xmm3 979 pslldq $8,%xmm4 980 pxor %xmm3,%xmm1 981 pxor %xmm4,%xmm0 982 movdqa %xmm0,%xmm4 983 movdqa %xmm0,%xmm3 984 psllq $5,%xmm0 985 pxor %xmm0,%xmm3 986 psllq $1,%xmm0 987 pxor %xmm3,%xmm0 988 psllq $57,%xmm0 989 movdqa %xmm0,%xmm3 990 pslldq $8,%xmm0 991 psrldq $8,%xmm3 992 pxor %xmm4,%xmm0 993 pxor %xmm3,%xmm1 994 movdqa %xmm0,%xmm4 995 psrlq $1,%xmm0 996 pxor %xmm4,%xmm1 997 pxor %xmm0,%xmm4 998 psrlq $5,%xmm0 999 pxor %xmm4,%xmm0 1000 psrlq $1,%xmm0 1001 pxor %xmm1,%xmm0 1002 L011done: 1003 .byte 102,15,56,0,197 1004 movdqu %xmm0,(%eax) 1005 popl %edi 1006 popl %esi 1007 popl %ebx 1008 popl %ebp 1009 ret 1010 .align 6,0x90 1011 Lbswap: 1012 .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 1013 .byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194 1014 .align 6,0x90 1015 Lrem_8bit: 1016 .value 0,450,900,582,1800,1738,1164,1358 1017 .value 3600,4050,3476,3158,2328,2266,2716,2910 1018 .value 7200,7650,8100,7782,6952,6890,6316,6510 1019 .value 4656,5106,4532,4214,5432,5370,5820,6014 1020 .value 14400,14722,15300,14854,16200,16010,15564,15630 1021 .value 13904,14226,13780,13334,12632,12442,13020,13086 1022 .value 9312,9634,10212,9766,9064,8874,8428,8494 1023 .value 10864,11186,10740,10294,11640,11450,12028,12094 1024 .value 28800,28994,29444,29382,30600,30282,29708,30158 1025 .value 32400,32594,32020,31958,31128,30810,31260,31710 1026 .value 27808,28002,28452,28390,27560,27242,26668,27118 1027 .value 25264,25458,24884,24822,26040,25722,26172,26622 1028 .value 18624,18690,19268,19078,20424,19978,19532,19854 1029 .value 18128,18194,17748,17558,16856,16410,16988,17310 1030 .value 21728,21794,22372,22182,21480,21034,20588,20910 1031 .value 23280,23346,22900,22710,24056,23610,24188,24510 1032 .value 57600,57538,57988,58182,58888,59338,58764,58446 1033 .value 61200,61138,60564,60758,59416,59866,60316,59998 1034 .value 64800,64738,65188,65382,64040,64490,63916,63598 1035 .value 62256,62194,61620,61814,62520,62970,63420,63102 1036 .value 55616,55426,56004,56070,56904,57226,56780,56334 1037 .value 55120,54930,54484,54550,53336,53658,54236,53790 1038 .value 50528,50338,50916,50982,49768,50090,49644,49198 1039 .value 52080,51890,51444,51510,52344,52666,53244,52798 1040 .value 37248,36930,37380,37830,38536,38730,38156,38094 1041 .value 40848,40530,39956,40406,39064,39258,39708,39646 1042 .value 36256,35938,36388,36838,35496,35690,35116,35054 1043 .value 33712,33394,32820,33270,33976,34170,34620,34558 1044 .value 43456,43010,43588,43910,44744,44810,44364,44174 1045 .value 42960,42514,42068,42390,41176,41242,41820,41630 1046 .value 46560,46114,46692,47014,45800,45866,45420,45230 1047 .value 48112,47666,47220,47542,48376,48442,49020,48830 1048 .align 6,0x90 1049 Lrem_4bit: 1050 .long 0,0,0,471859200,0,943718400,0,610271232 1051 .long 0,1887436800,0,1822425088,0,1220542464,0,1423966208 1052 .long 0,3774873600,0,4246732800,0,3644850176,0,3311403008 1053 .long 0,2441084928,0,2376073216,0,2847932416,0,3051356160 1054 .byte 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67 1055 .byte 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112 1056 .byte 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62 1057 .byte 0 1058 #endif 1059