1 .text 2 3 .globl gcm_gmult_4bit 4 .type gcm_gmult_4bit,@function 5 .align 16 6 gcm_gmult_4bit: 7 pushq %rbx 8 pushq %rbp 9 pushq %r12 10 .Lgmult_prologue: 11 12 movzbq 15(%rdi),%r8 13 leaq .Lrem_4bit(%rip),%r11 14 xorq %rax,%rax 15 xorq %rbx,%rbx 16 movb %r8b,%al 17 movb %r8b,%bl 18 shlb $4,%al 19 movq $14,%rcx 20 movq 8(%rsi,%rax,1),%r8 21 movq (%rsi,%rax,1),%r9 22 andb $240,%bl 23 movq %r8,%rdx 24 jmp .Loop1 25 26 .align 16 27 .Loop1: 28 shrq $4,%r8 29 andq $15,%rdx 30 movq %r9,%r10 31 movb (%rdi,%rcx,1),%al 32 shrq $4,%r9 33 xorq 8(%rsi,%rbx,1),%r8 34 shlq $60,%r10 35 xorq (%rsi,%rbx,1),%r9 36 movb %al,%bl 37 xorq (%r11,%rdx,8),%r9 38 movq %r8,%rdx 39 shlb $4,%al 40 xorq %r10,%r8 41 decq %rcx 42 js .Lbreak1 43 44 shrq $4,%r8 45 andq $15,%rdx 46 movq %r9,%r10 47 shrq $4,%r9 48 xorq 8(%rsi,%rax,1),%r8 49 shlq $60,%r10 50 xorq (%rsi,%rax,1),%r9 51 andb $240,%bl 52 xorq (%r11,%rdx,8),%r9 53 movq %r8,%rdx 54 xorq %r10,%r8 55 jmp .Loop1 56 57 .align 16 58 .Lbreak1: 59 shrq $4,%r8 60 andq $15,%rdx 61 movq %r9,%r10 62 shrq $4,%r9 63 xorq 8(%rsi,%rax,1),%r8 64 shlq $60,%r10 65 xorq (%rsi,%rax,1),%r9 66 andb $240,%bl 67 xorq (%r11,%rdx,8),%r9 68 movq %r8,%rdx 69 xorq %r10,%r8 70 71 shrq $4,%r8 72 andq $15,%rdx 73 movq %r9,%r10 74 shrq $4,%r9 75 xorq 8(%rsi,%rbx,1),%r8 76 shlq $60,%r10 77 xorq (%rsi,%rbx,1),%r9 78 xorq %r10,%r8 79 xorq (%r11,%rdx,8),%r9 80 81 bswapq %r8 82 bswapq %r9 83 movq %r8,8(%rdi) 84 movq %r9,(%rdi) 85 86 movq 16(%rsp),%rbx 87 leaq 24(%rsp),%rsp 88 .Lgmult_epilogue: 89 .byte 0xf3,0xc3 90 .size gcm_gmult_4bit,.-gcm_gmult_4bit 91 .globl gcm_ghash_4bit 92 .type gcm_ghash_4bit,@function 93 .align 16 94 gcm_ghash_4bit: 95 pushq %rbx 96 pushq %rbp 97 pushq %r12 98 pushq %r13 99 pushq %r14 100 pushq %r15 101 subq $280,%rsp 102 .Lghash_prologue: 103 movq %rdx,%r14 104 movq %rcx,%r15 105 subq $-128,%rsi 106 leaq 16+128(%rsp),%rbp 107 xorl %edx,%edx 108 movq 0+0-128(%rsi),%r8 109 movq 0+8-128(%rsi),%rax 110 movb %al,%dl 111 shrq $4,%rax 112 movq %r8,%r10 113 shrq $4,%r8 114 movq 16+0-128(%rsi),%r9 115 shlb $4,%dl 116 movq 16+8-128(%rsi),%rbx 117 shlq $60,%r10 118 movb %dl,0(%rsp) 119 orq %r10,%rax 120 movb %bl,%dl 121 shrq $4,%rbx 122 movq %r9,%r10 123 shrq $4,%r9 124 movq %r8,0(%rbp) 125 movq 32+0-128(%rsi),%r8 126 shlb $4,%dl 127 movq %rax,0-128(%rbp) 128 movq 32+8-128(%rsi),%rax 129 shlq $60,%r10 130 movb %dl,1(%rsp) 131 orq %r10,%rbx 132 movb %al,%dl 133 shrq $4,%rax 134 movq %r8,%r10 135 shrq $4,%r8 136 movq %r9,8(%rbp) 137 movq 48+0-128(%rsi),%r9 138 shlb $4,%dl 139 movq %rbx,8-128(%rbp) 140 movq 48+8-128(%rsi),%rbx 141 shlq $60,%r10 142 movb %dl,2(%rsp) 143 orq %r10,%rax 144 movb %bl,%dl 145 shrq $4,%rbx 146 movq %r9,%r10 147 shrq $4,%r9 148 movq %r8,16(%rbp) 149 movq 64+0-128(%rsi),%r8 150 shlb $4,%dl 151 movq %rax,16-128(%rbp) 152 movq 64+8-128(%rsi),%rax 153 shlq $60,%r10 154 movb %dl,3(%rsp) 155 orq %r10,%rbx 156 movb %al,%dl 157 shrq $4,%rax 158 movq %r8,%r10 159 shrq $4,%r8 160 movq %r9,24(%rbp) 161 movq 80+0-128(%rsi),%r9 162 shlb $4,%dl 163 movq %rbx,24-128(%rbp) 164 movq 80+8-128(%rsi),%rbx 165 shlq $60,%r10 166 movb %dl,4(%rsp) 167 orq %r10,%rax 168 movb %bl,%dl 169 shrq $4,%rbx 170 movq %r9,%r10 171 shrq $4,%r9 172 movq %r8,32(%rbp) 173 movq 96+0-128(%rsi),%r8 174 shlb $4,%dl 175 movq %rax,32-128(%rbp) 176 movq 96+8-128(%rsi),%rax 177 shlq $60,%r10 178 movb %dl,5(%rsp) 179 orq %r10,%rbx 180 movb %al,%dl 181 shrq $4,%rax 182 movq %r8,%r10 183 shrq $4,%r8 184 movq %r9,40(%rbp) 185 movq 112+0-128(%rsi),%r9 186 shlb $4,%dl 187 movq %rbx,40-128(%rbp) 188 movq 112+8-128(%rsi),%rbx 189 shlq $60,%r10 190 movb %dl,6(%rsp) 191 orq %r10,%rax 192 movb %bl,%dl 193 shrq $4,%rbx 194 movq %r9,%r10 195 shrq $4,%r9 196 movq %r8,48(%rbp) 197 movq 128+0-128(%rsi),%r8 198 shlb $4,%dl 199 movq %rax,48-128(%rbp) 200 movq 128+8-128(%rsi),%rax 201 shlq $60,%r10 202 movb %dl,7(%rsp) 203 orq %r10,%rbx 204 movb %al,%dl 205 shrq $4,%rax 206 movq %r8,%r10 207 shrq $4,%r8 208 movq %r9,56(%rbp) 209 movq 144+0-128(%rsi),%r9 210 shlb $4,%dl 211 movq %rbx,56-128(%rbp) 212 movq 144+8-128(%rsi),%rbx 213 shlq $60,%r10 214 movb %dl,8(%rsp) 215 orq %r10,%rax 216 movb %bl,%dl 217 shrq $4,%rbx 218 movq %r9,%r10 219 shrq $4,%r9 220 movq %r8,64(%rbp) 221 movq 160+0-128(%rsi),%r8 222 shlb $4,%dl 223 movq %rax,64-128(%rbp) 224 movq 160+8-128(%rsi),%rax 225 shlq $60,%r10 226 movb %dl,9(%rsp) 227 orq %r10,%rbx 228 movb %al,%dl 229 shrq $4,%rax 230 movq %r8,%r10 231 shrq $4,%r8 232 movq %r9,72(%rbp) 233 movq 176+0-128(%rsi),%r9 234 shlb $4,%dl 235 movq %rbx,72-128(%rbp) 236 movq 176+8-128(%rsi),%rbx 237 shlq $60,%r10 238 movb %dl,10(%rsp) 239 orq %r10,%rax 240 movb %bl,%dl 241 shrq $4,%rbx 242 movq %r9,%r10 243 shrq $4,%r9 244 movq %r8,80(%rbp) 245 movq 192+0-128(%rsi),%r8 246 shlb $4,%dl 247 movq %rax,80-128(%rbp) 248 movq 192+8-128(%rsi),%rax 249 shlq $60,%r10 250 movb %dl,11(%rsp) 251 orq %r10,%rbx 252 movb %al,%dl 253 shrq $4,%rax 254 movq %r8,%r10 255 shrq $4,%r8 256 movq %r9,88(%rbp) 257 movq 208+0-128(%rsi),%r9 258 shlb $4,%dl 259 movq %rbx,88-128(%rbp) 260 movq 208+8-128(%rsi),%rbx 261 shlq $60,%r10 262 movb %dl,12(%rsp) 263 orq %r10,%rax 264 movb %bl,%dl 265 shrq $4,%rbx 266 movq %r9,%r10 267 shrq $4,%r9 268 movq %r8,96(%rbp) 269 movq 224+0-128(%rsi),%r8 270 shlb $4,%dl 271 movq %rax,96-128(%rbp) 272 movq 224+8-128(%rsi),%rax 273 shlq $60,%r10 274 movb %dl,13(%rsp) 275 orq %r10,%rbx 276 movb %al,%dl 277 shrq $4,%rax 278 movq %r8,%r10 279 shrq $4,%r8 280 movq %r9,104(%rbp) 281 movq 240+0-128(%rsi),%r9 282 shlb $4,%dl 283 movq %rbx,104-128(%rbp) 284 movq 240+8-128(%rsi),%rbx 285 shlq $60,%r10 286 movb %dl,14(%rsp) 287 orq %r10,%rax 288 movb %bl,%dl 289 shrq $4,%rbx 290 movq %r9,%r10 291 shrq $4,%r9 292 movq %r8,112(%rbp) 293 shlb $4,%dl 294 movq %rax,112-128(%rbp) 295 shlq $60,%r10 296 movb %dl,15(%rsp) 297 orq %r10,%rbx 298 movq %r9,120(%rbp) 299 movq %rbx,120-128(%rbp) 300 addq $-128,%rsi 301 movq 8(%rdi),%r8 302 movq 0(%rdi),%r9 303 addq %r14,%r15 304 leaq .Lrem_8bit(%rip),%r11 305 jmp .Louter_loop 306 .align 16 307 .Louter_loop: 308 xorq (%r14),%r9 309 movq 8(%r14),%rdx 310 leaq 16(%r14),%r14 311 xorq %r8,%rdx 312 movq %r9,(%rdi) 313 movq %rdx,8(%rdi) 314 shrq $32,%rdx 315 xorq %rax,%rax 316 roll $8,%edx 317 movb %dl,%al 318 movzbl %dl,%ebx 319 shlb $4,%al 320 shrl $4,%ebx 321 roll $8,%edx 322 movq 8(%rsi,%rax,1),%r8 323 movq (%rsi,%rax,1),%r9 324 movb %dl,%al 325 movzbl %dl,%ecx 326 shlb $4,%al 327 movzbq (%rsp,%rbx,1),%r12 328 shrl $4,%ecx 329 xorq %r8,%r12 330 movq %r9,%r10 331 shrq $8,%r8 332 movzbq %r12b,%r12 333 shrq $8,%r9 334 xorq -128(%rbp,%rbx,8),%r8 335 shlq $56,%r10 336 xorq (%rbp,%rbx,8),%r9 337 roll $8,%edx 338 xorq 8(%rsi,%rax,1),%r8 339 xorq (%rsi,%rax,1),%r9 340 movb %dl,%al 341 xorq %r10,%r8 342 movzwq (%r11,%r12,2),%r12 343 movzbl %dl,%ebx 344 shlb $4,%al 345 movzbq (%rsp,%rcx,1),%r13 346 shrl $4,%ebx 347 shlq $48,%r12 348 xorq %r8,%r13 349 movq %r9,%r10 350 xorq %r12,%r9 351 shrq $8,%r8 352 movzbq %r13b,%r13 353 shrq $8,%r9 354 xorq -128(%rbp,%rcx,8),%r8 355 shlq $56,%r10 356 xorq (%rbp,%rcx,8),%r9 357 roll $8,%edx 358 xorq 8(%rsi,%rax,1),%r8 359 xorq (%rsi,%rax,1),%r9 360 movb %dl,%al 361 xorq %r10,%r8 362 movzwq (%r11,%r13,2),%r13 363 movzbl %dl,%ecx 364 shlb $4,%al 365 movzbq (%rsp,%rbx,1),%r12 366 shrl $4,%ecx 367 shlq $48,%r13 368 xorq %r8,%r12 369 movq %r9,%r10 370 xorq %r13,%r9 371 shrq $8,%r8 372 movzbq %r12b,%r12 373 movl 8(%rdi),%edx 374 shrq $8,%r9 375 xorq -128(%rbp,%rbx,8),%r8 376 shlq $56,%r10 377 xorq (%rbp,%rbx,8),%r9 378 roll $8,%edx 379 xorq 8(%rsi,%rax,1),%r8 380 xorq (%rsi,%rax,1),%r9 381 movb %dl,%al 382 xorq %r10,%r8 383 movzwq (%r11,%r12,2),%r12 384 movzbl %dl,%ebx 385 shlb $4,%al 386 movzbq (%rsp,%rcx,1),%r13 387 shrl $4,%ebx 388 shlq $48,%r12 389 xorq %r8,%r13 390 movq %r9,%r10 391 xorq %r12,%r9 392 shrq $8,%r8 393 movzbq %r13b,%r13 394 shrq $8,%r9 395 xorq -128(%rbp,%rcx,8),%r8 396 shlq $56,%r10 397 xorq (%rbp,%rcx,8),%r9 398 roll $8,%edx 399 xorq 8(%rsi,%rax,1),%r8 400 xorq (%rsi,%rax,1),%r9 401 movb %dl,%al 402 xorq %r10,%r8 403 movzwq (%r11,%r13,2),%r13 404 movzbl %dl,%ecx 405 shlb $4,%al 406 movzbq (%rsp,%rbx,1),%r12 407 shrl $4,%ecx 408 shlq $48,%r13 409 xorq %r8,%r12 410 movq %r9,%r10 411 xorq %r13,%r9 412 shrq $8,%r8 413 movzbq %r12b,%r12 414 shrq $8,%r9 415 xorq -128(%rbp,%rbx,8),%r8 416 shlq $56,%r10 417 xorq (%rbp,%rbx,8),%r9 418 roll $8,%edx 419 xorq 8(%rsi,%rax,1),%r8 420 xorq (%rsi,%rax,1),%r9 421 movb %dl,%al 422 xorq %r10,%r8 423 movzwq (%r11,%r12,2),%r12 424 movzbl %dl,%ebx 425 shlb $4,%al 426 movzbq (%rsp,%rcx,1),%r13 427 shrl $4,%ebx 428 shlq $48,%r12 429 xorq %r8,%r13 430 movq %r9,%r10 431 xorq %r12,%r9 432 shrq $8,%r8 433 movzbq %r13b,%r13 434 shrq $8,%r9 435 xorq -128(%rbp,%rcx,8),%r8 436 shlq $56,%r10 437 xorq (%rbp,%rcx,8),%r9 438 roll $8,%edx 439 xorq 8(%rsi,%rax,1),%r8 440 xorq (%rsi,%rax,1),%r9 441 movb %dl,%al 442 xorq %r10,%r8 443 movzwq (%r11,%r13,2),%r13 444 movzbl %dl,%ecx 445 shlb $4,%al 446 movzbq (%rsp,%rbx,1),%r12 447 shrl $4,%ecx 448 shlq $48,%r13 449 xorq %r8,%r12 450 movq %r9,%r10 451 xorq %r13,%r9 452 shrq $8,%r8 453 movzbq %r12b,%r12 454 movl 4(%rdi),%edx 455 shrq $8,%r9 456 xorq -128(%rbp,%rbx,8),%r8 457 shlq $56,%r10 458 xorq (%rbp,%rbx,8),%r9 459 roll $8,%edx 460 xorq 8(%rsi,%rax,1),%r8 461 xorq (%rsi,%rax,1),%r9 462 movb %dl,%al 463 xorq %r10,%r8 464 movzwq (%r11,%r12,2),%r12 465 movzbl %dl,%ebx 466 shlb $4,%al 467 movzbq (%rsp,%rcx,1),%r13 468 shrl $4,%ebx 469 shlq $48,%r12 470 xorq %r8,%r13 471 movq %r9,%r10 472 xorq %r12,%r9 473 shrq $8,%r8 474 movzbq %r13b,%r13 475 shrq $8,%r9 476 xorq -128(%rbp,%rcx,8),%r8 477 shlq $56,%r10 478 xorq (%rbp,%rcx,8),%r9 479 roll $8,%edx 480 xorq 8(%rsi,%rax,1),%r8 481 xorq (%rsi,%rax,1),%r9 482 movb %dl,%al 483 xorq %r10,%r8 484 movzwq (%r11,%r13,2),%r13 485 movzbl %dl,%ecx 486 shlb $4,%al 487 movzbq (%rsp,%rbx,1),%r12 488 shrl $4,%ecx 489 shlq $48,%r13 490 xorq %r8,%r12 491 movq %r9,%r10 492 xorq %r13,%r9 493 shrq $8,%r8 494 movzbq %r12b,%r12 495 shrq $8,%r9 496 xorq -128(%rbp,%rbx,8),%r8 497 shlq $56,%r10 498 xorq (%rbp,%rbx,8),%r9 499 roll $8,%edx 500 xorq 8(%rsi,%rax,1),%r8 501 xorq (%rsi,%rax,1),%r9 502 movb %dl,%al 503 xorq %r10,%r8 504 movzwq (%r11,%r12,2),%r12 505 movzbl %dl,%ebx 506 shlb $4,%al 507 movzbq (%rsp,%rcx,1),%r13 508 shrl $4,%ebx 509 shlq $48,%r12 510 xorq %r8,%r13 511 movq %r9,%r10 512 xorq %r12,%r9 513 shrq $8,%r8 514 movzbq %r13b,%r13 515 shrq $8,%r9 516 xorq -128(%rbp,%rcx,8),%r8 517 shlq $56,%r10 518 xorq (%rbp,%rcx,8),%r9 519 roll $8,%edx 520 xorq 8(%rsi,%rax,1),%r8 521 xorq (%rsi,%rax,1),%r9 522 movb %dl,%al 523 xorq %r10,%r8 524 movzwq (%r11,%r13,2),%r13 525 movzbl %dl,%ecx 526 shlb $4,%al 527 movzbq (%rsp,%rbx,1),%r12 528 shrl $4,%ecx 529 shlq $48,%r13 530 xorq %r8,%r12 531 movq %r9,%r10 532 xorq %r13,%r9 533 shrq $8,%r8 534 movzbq %r12b,%r12 535 movl 0(%rdi),%edx 536 shrq $8,%r9 537 xorq -128(%rbp,%rbx,8),%r8 538 shlq $56,%r10 539 xorq (%rbp,%rbx,8),%r9 540 roll $8,%edx 541 xorq 8(%rsi,%rax,1),%r8 542 xorq (%rsi,%rax,1),%r9 543 movb %dl,%al 544 xorq %r10,%r8 545 movzwq (%r11,%r12,2),%r12 546 movzbl %dl,%ebx 547 shlb $4,%al 548 movzbq (%rsp,%rcx,1),%r13 549 shrl $4,%ebx 550 shlq $48,%r12 551 xorq %r8,%r13 552 movq %r9,%r10 553 xorq %r12,%r9 554 shrq $8,%r8 555 movzbq %r13b,%r13 556 shrq $8,%r9 557 xorq -128(%rbp,%rcx,8),%r8 558 shlq $56,%r10 559 xorq (%rbp,%rcx,8),%r9 560 roll $8,%edx 561 xorq 8(%rsi,%rax,1),%r8 562 xorq (%rsi,%rax,1),%r9 563 movb %dl,%al 564 xorq %r10,%r8 565 movzwq (%r11,%r13,2),%r13 566 movzbl %dl,%ecx 567 shlb $4,%al 568 movzbq (%rsp,%rbx,1),%r12 569 shrl $4,%ecx 570 shlq $48,%r13 571 xorq %r8,%r12 572 movq %r9,%r10 573 xorq %r13,%r9 574 shrq $8,%r8 575 movzbq %r12b,%r12 576 shrq $8,%r9 577 xorq -128(%rbp,%rbx,8),%r8 578 shlq $56,%r10 579 xorq (%rbp,%rbx,8),%r9 580 roll $8,%edx 581 xorq 8(%rsi,%rax,1),%r8 582 xorq (%rsi,%rax,1),%r9 583 movb %dl,%al 584 xorq %r10,%r8 585 movzwq (%r11,%r12,2),%r12 586 movzbl %dl,%ebx 587 shlb $4,%al 588 movzbq (%rsp,%rcx,1),%r13 589 shrl $4,%ebx 590 shlq $48,%r12 591 xorq %r8,%r13 592 movq %r9,%r10 593 xorq %r12,%r9 594 shrq $8,%r8 595 movzbq %r13b,%r13 596 shrq $8,%r9 597 xorq -128(%rbp,%rcx,8),%r8 598 shlq $56,%r10 599 xorq (%rbp,%rcx,8),%r9 600 roll $8,%edx 601 xorq 8(%rsi,%rax,1),%r8 602 xorq (%rsi,%rax,1),%r9 603 movb %dl,%al 604 xorq %r10,%r8 605 movzwq (%r11,%r13,2),%r13 606 movzbl %dl,%ecx 607 shlb $4,%al 608 movzbq (%rsp,%rbx,1),%r12 609 andl $240,%ecx 610 shlq $48,%r13 611 xorq %r8,%r12 612 movq %r9,%r10 613 xorq %r13,%r9 614 shrq $8,%r8 615 movzbq %r12b,%r12 616 movl -4(%rdi),%edx 617 shrq $8,%r9 618 xorq -128(%rbp,%rbx,8),%r8 619 shlq $56,%r10 620 xorq (%rbp,%rbx,8),%r9 621 movzwq (%r11,%r12,2),%r12 622 xorq 8(%rsi,%rax,1),%r8 623 xorq (%rsi,%rax,1),%r9 624 shlq $48,%r12 625 xorq %r10,%r8 626 xorq %r12,%r9 627 movzbq %r8b,%r13 628 shrq $4,%r8 629 movq %r9,%r10 630 shlb $4,%r13b 631 shrq $4,%r9 632 xorq 8(%rsi,%rcx,1),%r8 633 movzwq (%r11,%r13,2),%r13 634 shlq $60,%r10 635 xorq (%rsi,%rcx,1),%r9 636 xorq %r10,%r8 637 shlq $48,%r13 638 bswapq %r8 639 xorq %r13,%r9 640 bswapq %r9 641 cmpq %r15,%r14 642 jb .Louter_loop 643 movq %r8,8(%rdi) 644 movq %r9,(%rdi) 645 646 leaq 280(%rsp),%rsi 647 movq 0(%rsi),%r15 648 movq 8(%rsi),%r14 649 movq 16(%rsi),%r13 650 movq 24(%rsi),%r12 651 movq 32(%rsi),%rbp 652 movq 40(%rsi),%rbx 653 leaq 48(%rsi),%rsp 654 .Lghash_epilogue: 655 .byte 0xf3,0xc3 656 .size gcm_ghash_4bit,.-gcm_ghash_4bit 657 .globl gcm_init_clmul 658 .type gcm_init_clmul,@function 659 .align 16 660 gcm_init_clmul: 661 movdqu (%rsi),%xmm2 662 pshufd $78,%xmm2,%xmm2 663 664 665 pshufd $255,%xmm2,%xmm4 666 movdqa %xmm2,%xmm3 667 psllq $1,%xmm2 668 pxor %xmm5,%xmm5 669 psrlq $63,%xmm3 670 pcmpgtd %xmm4,%xmm5 671 pslldq $8,%xmm3 672 por %xmm3,%xmm2 673 674 675 pand .L0x1c2_polynomial(%rip),%xmm5 676 pxor %xmm5,%xmm2 677 678 679 movdqa %xmm2,%xmm0 680 movdqa %xmm0,%xmm1 681 pshufd $78,%xmm0,%xmm3 682 pshufd $78,%xmm2,%xmm4 683 pxor %xmm0,%xmm3 684 pxor %xmm2,%xmm4 685 .byte 102,15,58,68,194,0 686 .byte 102,15,58,68,202,17 687 .byte 102,15,58,68,220,0 688 pxor %xmm0,%xmm3 689 pxor %xmm1,%xmm3 690 691 movdqa %xmm3,%xmm4 692 psrldq $8,%xmm3 693 pslldq $8,%xmm4 694 pxor %xmm3,%xmm1 695 pxor %xmm4,%xmm0 696 697 movdqa %xmm0,%xmm3 698 psllq $1,%xmm0 699 pxor %xmm3,%xmm0 700 psllq $5,%xmm0 701 pxor %xmm3,%xmm0 702 psllq $57,%xmm0 703 movdqa %xmm0,%xmm4 704 pslldq $8,%xmm0 705 psrldq $8,%xmm4 706 pxor %xmm3,%xmm0 707 pxor %xmm4,%xmm1 708 709 710 movdqa %xmm0,%xmm4 711 psrlq $5,%xmm0 712 pxor %xmm4,%xmm0 713 psrlq $1,%xmm0 714 pxor %xmm4,%xmm0 715 pxor %xmm1,%xmm4 716 psrlq $1,%xmm0 717 pxor %xmm4,%xmm0 718 movdqu %xmm2,(%rdi) 719 movdqu %xmm0,16(%rdi) 720 .byte 0xf3,0xc3 721 .size gcm_init_clmul,.-gcm_init_clmul 722 .globl gcm_gmult_clmul 723 .type gcm_gmult_clmul,@function 724 .align 16 725 gcm_gmult_clmul: 726 movdqu (%rdi),%xmm0 727 movdqa .Lbswap_mask(%rip),%xmm5 728 movdqu (%rsi),%xmm2 729 .byte 102,15,56,0,197 730 movdqa %xmm0,%xmm1 731 pshufd $78,%xmm0,%xmm3 732 pshufd $78,%xmm2,%xmm4 733 pxor %xmm0,%xmm3 734 pxor %xmm2,%xmm4 735 .byte 102,15,58,68,194,0 736 .byte 102,15,58,68,202,17 737 .byte 102,15,58,68,220,0 738 pxor %xmm0,%xmm3 739 pxor %xmm1,%xmm3 740 741 movdqa %xmm3,%xmm4 742 psrldq $8,%xmm3 743 pslldq $8,%xmm4 744 pxor %xmm3,%xmm1 745 pxor %xmm4,%xmm0 746 747 movdqa %xmm0,%xmm3 748 psllq $1,%xmm0 749 pxor %xmm3,%xmm0 750 psllq $5,%xmm0 751 pxor %xmm3,%xmm0 752 psllq $57,%xmm0 753 movdqa %xmm0,%xmm4 754 pslldq $8,%xmm0 755 psrldq $8,%xmm4 756 pxor %xmm3,%xmm0 757 pxor %xmm4,%xmm1 758 759 760 movdqa %xmm0,%xmm4 761 psrlq $5,%xmm0 762 pxor %xmm4,%xmm0 763 psrlq $1,%xmm0 764 pxor %xmm4,%xmm0 765 pxor %xmm1,%xmm4 766 psrlq $1,%xmm0 767 pxor %xmm4,%xmm0 768 .byte 102,15,56,0,197 769 movdqu %xmm0,(%rdi) 770 .byte 0xf3,0xc3 771 .size gcm_gmult_clmul,.-gcm_gmult_clmul 772 .globl gcm_ghash_clmul 773 .type gcm_ghash_clmul,@function 774 .align 16 775 gcm_ghash_clmul: 776 movdqa .Lbswap_mask(%rip),%xmm5 777 778 movdqu (%rdi),%xmm0 779 movdqu (%rsi),%xmm2 780 .byte 102,15,56,0,197 781 782 subq $16,%rcx 783 jz .Lodd_tail 784 785 movdqu 16(%rsi),%xmm8 786 787 788 789 790 791 movdqu (%rdx),%xmm3 792 movdqu 16(%rdx),%xmm6 793 .byte 102,15,56,0,221 794 .byte 102,15,56,0,245 795 pxor %xmm3,%xmm0 796 movdqa %xmm6,%xmm7 797 pshufd $78,%xmm6,%xmm3 798 pshufd $78,%xmm2,%xmm4 799 pxor %xmm6,%xmm3 800 pxor %xmm2,%xmm4 801 .byte 102,15,58,68,242,0 802 .byte 102,15,58,68,250,17 803 .byte 102,15,58,68,220,0 804 pxor %xmm6,%xmm3 805 pxor %xmm7,%xmm3 806 807 movdqa %xmm3,%xmm4 808 psrldq $8,%xmm3 809 pslldq $8,%xmm4 810 pxor %xmm3,%xmm7 811 pxor %xmm4,%xmm6 812 movdqa %xmm0,%xmm1 813 pshufd $78,%xmm0,%xmm3 814 pshufd $78,%xmm8,%xmm4 815 pxor %xmm0,%xmm3 816 pxor %xmm8,%xmm4 817 818 leaq 32(%rdx),%rdx 819 subq $32,%rcx 820 jbe .Leven_tail 821 822 .Lmod_loop: 823 .byte 102,65,15,58,68,192,0 824 .byte 102,65,15,58,68,200,17 825 .byte 102,15,58,68,220,0 826 pxor %xmm0,%xmm3 827 pxor %xmm1,%xmm3 828 829 movdqa %xmm3,%xmm4 830 psrldq $8,%xmm3 831 pslldq $8,%xmm4 832 pxor %xmm3,%xmm1 833 pxor %xmm4,%xmm0 834 movdqu (%rdx),%xmm3 835 pxor %xmm6,%xmm0 836 pxor %xmm7,%xmm1 837 838 movdqu 16(%rdx),%xmm6 839 .byte 102,15,56,0,221 840 .byte 102,15,56,0,245 841 842 movdqa %xmm6,%xmm7 843 pshufd $78,%xmm6,%xmm9 844 pshufd $78,%xmm2,%xmm10 845 pxor %xmm6,%xmm9 846 pxor %xmm2,%xmm10 847 pxor %xmm3,%xmm1 848 849 movdqa %xmm0,%xmm3 850 psllq $1,%xmm0 851 pxor %xmm3,%xmm0 852 psllq $5,%xmm0 853 pxor %xmm3,%xmm0 854 .byte 102,15,58,68,242,0 855 psllq $57,%xmm0 856 movdqa %xmm0,%xmm4 857 pslldq $8,%xmm0 858 psrldq $8,%xmm4 859 pxor %xmm3,%xmm0 860 pxor %xmm4,%xmm1 861 862 .byte 102,15,58,68,250,17 863 movdqa %xmm0,%xmm4 864 psrlq $5,%xmm0 865 pxor %xmm4,%xmm0 866 psrlq $1,%xmm0 867 pxor %xmm4,%xmm0 868 pxor %xmm1,%xmm4 869 psrlq $1,%xmm0 870 pxor %xmm4,%xmm0 871 872 .byte 102,69,15,58,68,202,0 873 movdqa %xmm0,%xmm1 874 pshufd $78,%xmm0,%xmm3 875 pshufd $78,%xmm8,%xmm4 876 pxor %xmm0,%xmm3 877 pxor %xmm8,%xmm4 878 879 pxor %xmm6,%xmm9 880 pxor %xmm7,%xmm9 881 movdqa %xmm9,%xmm10 882 psrldq $8,%xmm9 883 pslldq $8,%xmm10 884 pxor %xmm9,%xmm7 885 pxor %xmm10,%xmm6 886 887 leaq 32(%rdx),%rdx 888 subq $32,%rcx 889 ja .Lmod_loop 890 891 .Leven_tail: 892 .byte 102,65,15,58,68,192,0 893 .byte 102,65,15,58,68,200,17 894 .byte 102,15,58,68,220,0 895 pxor %xmm0,%xmm3 896 pxor %xmm1,%xmm3 897 898 movdqa %xmm3,%xmm4 899 psrldq $8,%xmm3 900 pslldq $8,%xmm4 901 pxor %xmm3,%xmm1 902 pxor %xmm4,%xmm0 903 pxor %xmm6,%xmm0 904 pxor %xmm7,%xmm1 905 906 movdqa %xmm0,%xmm3 907 psllq $1,%xmm0 908 pxor %xmm3,%xmm0 909 psllq $5,%xmm0 910 pxor %xmm3,%xmm0 911 psllq $57,%xmm0 912 movdqa %xmm0,%xmm4 913 pslldq $8,%xmm0 914 psrldq $8,%xmm4 915 pxor %xmm3,%xmm0 916 pxor %xmm4,%xmm1 917 918 919 movdqa %xmm0,%xmm4 920 psrlq $5,%xmm0 921 pxor %xmm4,%xmm0 922 psrlq $1,%xmm0 923 pxor %xmm4,%xmm0 924 pxor %xmm1,%xmm4 925 psrlq $1,%xmm0 926 pxor %xmm4,%xmm0 927 testq %rcx,%rcx 928 jnz .Ldone 929 930 .Lodd_tail: 931 movdqu (%rdx),%xmm3 932 .byte 102,15,56,0,221 933 pxor %xmm3,%xmm0 934 movdqa %xmm0,%xmm1 935 pshufd $78,%xmm0,%xmm3 936 pshufd $78,%xmm2,%xmm4 937 pxor %xmm0,%xmm3 938 pxor %xmm2,%xmm4 939 .byte 102,15,58,68,194,0 940 .byte 102,15,58,68,202,17 941 .byte 102,15,58,68,220,0 942 pxor %xmm0,%xmm3 943 pxor %xmm1,%xmm3 944 945 movdqa %xmm3,%xmm4 946 psrldq $8,%xmm3 947 pslldq $8,%xmm4 948 pxor %xmm3,%xmm1 949 pxor %xmm4,%xmm0 950 951 movdqa %xmm0,%xmm3 952 psllq $1,%xmm0 953 pxor %xmm3,%xmm0 954 psllq $5,%xmm0 955 pxor %xmm3,%xmm0 956 psllq $57,%xmm0 957 movdqa %xmm0,%xmm4 958 pslldq $8,%xmm0 959 psrldq $8,%xmm4 960 pxor %xmm3,%xmm0 961 pxor %xmm4,%xmm1 962 963 964 movdqa %xmm0,%xmm4 965 psrlq $5,%xmm0 966 pxor %xmm4,%xmm0 967 psrlq $1,%xmm0 968 pxor %xmm4,%xmm0 969 pxor %xmm1,%xmm4 970 psrlq $1,%xmm0 971 pxor %xmm4,%xmm0 972 .Ldone: 973 .byte 102,15,56,0,197 974 movdqu %xmm0,(%rdi) 975 .byte 0xf3,0xc3 976 .LSEH_end_gcm_ghash_clmul: 977 .size gcm_ghash_clmul,.-gcm_ghash_clmul 978 .align 64 979 .Lbswap_mask: 980 .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 981 .L0x1c2_polynomial: 982 .byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 983 .align 64 984 .type .Lrem_4bit,@object 985 .Lrem_4bit: 986 .long 0,0,0,471859200,0,943718400,0,610271232 987 .long 0,1887436800,0,1822425088,0,1220542464,0,1423966208 988 .long 0,3774873600,0,4246732800,0,3644850176,0,3311403008 989 .long 0,2441084928,0,2376073216,0,2847932416,0,3051356160 990 .type .Lrem_8bit,@object 991 .Lrem_8bit: 992 .value 0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E 993 .value 0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E 994 .value 0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E 995 .value 0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E 996 .value 0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E 997 .value 0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E 998 .value 0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E 999 .value 0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E 1000 .value 0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE 1001 .value 0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE 1002 .value 0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE 1003 .value 0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE 1004 .value 0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E 1005 .value 0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E 1006 .value 0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE 1007 .value 0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE 1008 .value 0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E 1009 .value 0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E 1010 .value 0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E 1011 .value 0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E 1012 .value 0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E 1013 .value 0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E 1014 .value 0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E 1015 .value 0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E 1016 .value 0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE 1017 .value 0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE 1018 .value 0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE 1019 .value 0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE 1020 .value 0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E 1021 .value 0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E 1022 .value 0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE 1023 .value 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE 1024 1025 .byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 1026 .align 64 1027