Home | History | Annotate | Download | only in asm
      1 .text
      2 
      3 .globl	gcm_gmult_4bit
      4 .type	gcm_gmult_4bit,@function
      5 .align	16
      6 gcm_gmult_4bit:
      7 	pushq	%rbx
      8 	pushq	%rbp
      9 	pushq	%r12
     10 .Lgmult_prologue:
     11 
     12 	movzbq	15(%rdi),%r8
     13 	leaq	.Lrem_4bit(%rip),%r11
     14 	xorq	%rax,%rax
     15 	xorq	%rbx,%rbx
     16 	movb	%r8b,%al
     17 	movb	%r8b,%bl
     18 	shlb	$4,%al
     19 	movq	$14,%rcx
     20 	movq	8(%rsi,%rax,1),%r8
     21 	movq	(%rsi,%rax,1),%r9
     22 	andb	$240,%bl
     23 	movq	%r8,%rdx
     24 	jmp	.Loop1
     25 
     26 .align	16
     27 .Loop1:
     28 	shrq	$4,%r8
     29 	andq	$15,%rdx
     30 	movq	%r9,%r10
     31 	movb	(%rdi,%rcx,1),%al
     32 	shrq	$4,%r9
     33 	xorq	8(%rsi,%rbx,1),%r8
     34 	shlq	$60,%r10
     35 	xorq	(%rsi,%rbx,1),%r9
     36 	movb	%al,%bl
     37 	xorq	(%r11,%rdx,8),%r9
     38 	movq	%r8,%rdx
     39 	shlb	$4,%al
     40 	xorq	%r10,%r8
     41 	decq	%rcx
     42 	js	.Lbreak1
     43 
     44 	shrq	$4,%r8
     45 	andq	$15,%rdx
     46 	movq	%r9,%r10
     47 	shrq	$4,%r9
     48 	xorq	8(%rsi,%rax,1),%r8
     49 	shlq	$60,%r10
     50 	xorq	(%rsi,%rax,1),%r9
     51 	andb	$240,%bl
     52 	xorq	(%r11,%rdx,8),%r9
     53 	movq	%r8,%rdx
     54 	xorq	%r10,%r8
     55 	jmp	.Loop1
     56 
     57 .align	16
     58 .Lbreak1:
     59 	shrq	$4,%r8
     60 	andq	$15,%rdx
     61 	movq	%r9,%r10
     62 	shrq	$4,%r9
     63 	xorq	8(%rsi,%rax,1),%r8
     64 	shlq	$60,%r10
     65 	xorq	(%rsi,%rax,1),%r9
     66 	andb	$240,%bl
     67 	xorq	(%r11,%rdx,8),%r9
     68 	movq	%r8,%rdx
     69 	xorq	%r10,%r8
     70 
     71 	shrq	$4,%r8
     72 	andq	$15,%rdx
     73 	movq	%r9,%r10
     74 	shrq	$4,%r9
     75 	xorq	8(%rsi,%rbx,1),%r8
     76 	shlq	$60,%r10
     77 	xorq	(%rsi,%rbx,1),%r9
     78 	xorq	%r10,%r8
     79 	xorq	(%r11,%rdx,8),%r9
     80 
     81 	bswapq	%r8
     82 	bswapq	%r9
     83 	movq	%r8,8(%rdi)
     84 	movq	%r9,(%rdi)
     85 
     86 	movq	16(%rsp),%rbx
     87 	leaq	24(%rsp),%rsp
     88 .Lgmult_epilogue:
     89 	.byte	0xf3,0xc3
     90 .size	gcm_gmult_4bit,.-gcm_gmult_4bit
     91 .globl	gcm_ghash_4bit
     92 .type	gcm_ghash_4bit,@function
     93 .align	16
     94 gcm_ghash_4bit:
     95 	pushq	%rbx
     96 	pushq	%rbp
     97 	pushq	%r12
     98 	pushq	%r13
     99 	pushq	%r14
    100 	pushq	%r15
    101 	subq	$280,%rsp
    102 .Lghash_prologue:
    103 	movq	%rdx,%r14
    104 	movq	%rcx,%r15
    105 	subq	$-128,%rsi
    106 	leaq	16+128(%rsp),%rbp
    107 	xorl	%edx,%edx
    108 	movq	0+0-128(%rsi),%r8
    109 	movq	0+8-128(%rsi),%rax
    110 	movb	%al,%dl
    111 	shrq	$4,%rax
    112 	movq	%r8,%r10
    113 	shrq	$4,%r8
    114 	movq	16+0-128(%rsi),%r9
    115 	shlb	$4,%dl
    116 	movq	16+8-128(%rsi),%rbx
    117 	shlq	$60,%r10
    118 	movb	%dl,0(%rsp)
    119 	orq	%r10,%rax
    120 	movb	%bl,%dl
    121 	shrq	$4,%rbx
    122 	movq	%r9,%r10
    123 	shrq	$4,%r9
    124 	movq	%r8,0(%rbp)
    125 	movq	32+0-128(%rsi),%r8
    126 	shlb	$4,%dl
    127 	movq	%rax,0-128(%rbp)
    128 	movq	32+8-128(%rsi),%rax
    129 	shlq	$60,%r10
    130 	movb	%dl,1(%rsp)
    131 	orq	%r10,%rbx
    132 	movb	%al,%dl
    133 	shrq	$4,%rax
    134 	movq	%r8,%r10
    135 	shrq	$4,%r8
    136 	movq	%r9,8(%rbp)
    137 	movq	48+0-128(%rsi),%r9
    138 	shlb	$4,%dl
    139 	movq	%rbx,8-128(%rbp)
    140 	movq	48+8-128(%rsi),%rbx
    141 	shlq	$60,%r10
    142 	movb	%dl,2(%rsp)
    143 	orq	%r10,%rax
    144 	movb	%bl,%dl
    145 	shrq	$4,%rbx
    146 	movq	%r9,%r10
    147 	shrq	$4,%r9
    148 	movq	%r8,16(%rbp)
    149 	movq	64+0-128(%rsi),%r8
    150 	shlb	$4,%dl
    151 	movq	%rax,16-128(%rbp)
    152 	movq	64+8-128(%rsi),%rax
    153 	shlq	$60,%r10
    154 	movb	%dl,3(%rsp)
    155 	orq	%r10,%rbx
    156 	movb	%al,%dl
    157 	shrq	$4,%rax
    158 	movq	%r8,%r10
    159 	shrq	$4,%r8
    160 	movq	%r9,24(%rbp)
    161 	movq	80+0-128(%rsi),%r9
    162 	shlb	$4,%dl
    163 	movq	%rbx,24-128(%rbp)
    164 	movq	80+8-128(%rsi),%rbx
    165 	shlq	$60,%r10
    166 	movb	%dl,4(%rsp)
    167 	orq	%r10,%rax
    168 	movb	%bl,%dl
    169 	shrq	$4,%rbx
    170 	movq	%r9,%r10
    171 	shrq	$4,%r9
    172 	movq	%r8,32(%rbp)
    173 	movq	96+0-128(%rsi),%r8
    174 	shlb	$4,%dl
    175 	movq	%rax,32-128(%rbp)
    176 	movq	96+8-128(%rsi),%rax
    177 	shlq	$60,%r10
    178 	movb	%dl,5(%rsp)
    179 	orq	%r10,%rbx
    180 	movb	%al,%dl
    181 	shrq	$4,%rax
    182 	movq	%r8,%r10
    183 	shrq	$4,%r8
    184 	movq	%r9,40(%rbp)
    185 	movq	112+0-128(%rsi),%r9
    186 	shlb	$4,%dl
    187 	movq	%rbx,40-128(%rbp)
    188 	movq	112+8-128(%rsi),%rbx
    189 	shlq	$60,%r10
    190 	movb	%dl,6(%rsp)
    191 	orq	%r10,%rax
    192 	movb	%bl,%dl
    193 	shrq	$4,%rbx
    194 	movq	%r9,%r10
    195 	shrq	$4,%r9
    196 	movq	%r8,48(%rbp)
    197 	movq	128+0-128(%rsi),%r8
    198 	shlb	$4,%dl
    199 	movq	%rax,48-128(%rbp)
    200 	movq	128+8-128(%rsi),%rax
    201 	shlq	$60,%r10
    202 	movb	%dl,7(%rsp)
    203 	orq	%r10,%rbx
    204 	movb	%al,%dl
    205 	shrq	$4,%rax
    206 	movq	%r8,%r10
    207 	shrq	$4,%r8
    208 	movq	%r9,56(%rbp)
    209 	movq	144+0-128(%rsi),%r9
    210 	shlb	$4,%dl
    211 	movq	%rbx,56-128(%rbp)
    212 	movq	144+8-128(%rsi),%rbx
    213 	shlq	$60,%r10
    214 	movb	%dl,8(%rsp)
    215 	orq	%r10,%rax
    216 	movb	%bl,%dl
    217 	shrq	$4,%rbx
    218 	movq	%r9,%r10
    219 	shrq	$4,%r9
    220 	movq	%r8,64(%rbp)
    221 	movq	160+0-128(%rsi),%r8
    222 	shlb	$4,%dl
    223 	movq	%rax,64-128(%rbp)
    224 	movq	160+8-128(%rsi),%rax
    225 	shlq	$60,%r10
    226 	movb	%dl,9(%rsp)
    227 	orq	%r10,%rbx
    228 	movb	%al,%dl
    229 	shrq	$4,%rax
    230 	movq	%r8,%r10
    231 	shrq	$4,%r8
    232 	movq	%r9,72(%rbp)
    233 	movq	176+0-128(%rsi),%r9
    234 	shlb	$4,%dl
    235 	movq	%rbx,72-128(%rbp)
    236 	movq	176+8-128(%rsi),%rbx
    237 	shlq	$60,%r10
    238 	movb	%dl,10(%rsp)
    239 	orq	%r10,%rax
    240 	movb	%bl,%dl
    241 	shrq	$4,%rbx
    242 	movq	%r9,%r10
    243 	shrq	$4,%r9
    244 	movq	%r8,80(%rbp)
    245 	movq	192+0-128(%rsi),%r8
    246 	shlb	$4,%dl
    247 	movq	%rax,80-128(%rbp)
    248 	movq	192+8-128(%rsi),%rax
    249 	shlq	$60,%r10
    250 	movb	%dl,11(%rsp)
    251 	orq	%r10,%rbx
    252 	movb	%al,%dl
    253 	shrq	$4,%rax
    254 	movq	%r8,%r10
    255 	shrq	$4,%r8
    256 	movq	%r9,88(%rbp)
    257 	movq	208+0-128(%rsi),%r9
    258 	shlb	$4,%dl
    259 	movq	%rbx,88-128(%rbp)
    260 	movq	208+8-128(%rsi),%rbx
    261 	shlq	$60,%r10
    262 	movb	%dl,12(%rsp)
    263 	orq	%r10,%rax
    264 	movb	%bl,%dl
    265 	shrq	$4,%rbx
    266 	movq	%r9,%r10
    267 	shrq	$4,%r9
    268 	movq	%r8,96(%rbp)
    269 	movq	224+0-128(%rsi),%r8
    270 	shlb	$4,%dl
    271 	movq	%rax,96-128(%rbp)
    272 	movq	224+8-128(%rsi),%rax
    273 	shlq	$60,%r10
    274 	movb	%dl,13(%rsp)
    275 	orq	%r10,%rbx
    276 	movb	%al,%dl
    277 	shrq	$4,%rax
    278 	movq	%r8,%r10
    279 	shrq	$4,%r8
    280 	movq	%r9,104(%rbp)
    281 	movq	240+0-128(%rsi),%r9
    282 	shlb	$4,%dl
    283 	movq	%rbx,104-128(%rbp)
    284 	movq	240+8-128(%rsi),%rbx
    285 	shlq	$60,%r10
    286 	movb	%dl,14(%rsp)
    287 	orq	%r10,%rax
    288 	movb	%bl,%dl
    289 	shrq	$4,%rbx
    290 	movq	%r9,%r10
    291 	shrq	$4,%r9
    292 	movq	%r8,112(%rbp)
    293 	shlb	$4,%dl
    294 	movq	%rax,112-128(%rbp)
    295 	shlq	$60,%r10
    296 	movb	%dl,15(%rsp)
    297 	orq	%r10,%rbx
    298 	movq	%r9,120(%rbp)
    299 	movq	%rbx,120-128(%rbp)
    300 	addq	$-128,%rsi
    301 	movq	8(%rdi),%r8
    302 	movq	0(%rdi),%r9
    303 	addq	%r14,%r15
    304 	leaq	.Lrem_8bit(%rip),%r11
    305 	jmp	.Louter_loop
    306 .align	16
    307 .Louter_loop:
    308 	xorq	(%r14),%r9
    309 	movq	8(%r14),%rdx
    310 	leaq	16(%r14),%r14
    311 	xorq	%r8,%rdx
    312 	movq	%r9,(%rdi)
    313 	movq	%rdx,8(%rdi)
    314 	shrq	$32,%rdx
    315 	xorq	%rax,%rax
    316 	roll	$8,%edx
    317 	movb	%dl,%al
    318 	movzbl	%dl,%ebx
    319 	shlb	$4,%al
    320 	shrl	$4,%ebx
    321 	roll	$8,%edx
    322 	movq	8(%rsi,%rax,1),%r8
    323 	movq	(%rsi,%rax,1),%r9
    324 	movb	%dl,%al
    325 	movzbl	%dl,%ecx
    326 	shlb	$4,%al
    327 	movzbq	(%rsp,%rbx,1),%r12
    328 	shrl	$4,%ecx
    329 	xorq	%r8,%r12
    330 	movq	%r9,%r10
    331 	shrq	$8,%r8
    332 	movzbq	%r12b,%r12
    333 	shrq	$8,%r9
    334 	xorq	-128(%rbp,%rbx,8),%r8
    335 	shlq	$56,%r10
    336 	xorq	(%rbp,%rbx,8),%r9
    337 	roll	$8,%edx
    338 	xorq	8(%rsi,%rax,1),%r8
    339 	xorq	(%rsi,%rax,1),%r9
    340 	movb	%dl,%al
    341 	xorq	%r10,%r8
    342 	movzwq	(%r11,%r12,2),%r12
    343 	movzbl	%dl,%ebx
    344 	shlb	$4,%al
    345 	movzbq	(%rsp,%rcx,1),%r13
    346 	shrl	$4,%ebx
    347 	shlq	$48,%r12
    348 	xorq	%r8,%r13
    349 	movq	%r9,%r10
    350 	xorq	%r12,%r9
    351 	shrq	$8,%r8
    352 	movzbq	%r13b,%r13
    353 	shrq	$8,%r9
    354 	xorq	-128(%rbp,%rcx,8),%r8
    355 	shlq	$56,%r10
    356 	xorq	(%rbp,%rcx,8),%r9
    357 	roll	$8,%edx
    358 	xorq	8(%rsi,%rax,1),%r8
    359 	xorq	(%rsi,%rax,1),%r9
    360 	movb	%dl,%al
    361 	xorq	%r10,%r8
    362 	movzwq	(%r11,%r13,2),%r13
    363 	movzbl	%dl,%ecx
    364 	shlb	$4,%al
    365 	movzbq	(%rsp,%rbx,1),%r12
    366 	shrl	$4,%ecx
    367 	shlq	$48,%r13
    368 	xorq	%r8,%r12
    369 	movq	%r9,%r10
    370 	xorq	%r13,%r9
    371 	shrq	$8,%r8
    372 	movzbq	%r12b,%r12
    373 	movl	8(%rdi),%edx
    374 	shrq	$8,%r9
    375 	xorq	-128(%rbp,%rbx,8),%r8
    376 	shlq	$56,%r10
    377 	xorq	(%rbp,%rbx,8),%r9
    378 	roll	$8,%edx
    379 	xorq	8(%rsi,%rax,1),%r8
    380 	xorq	(%rsi,%rax,1),%r9
    381 	movb	%dl,%al
    382 	xorq	%r10,%r8
    383 	movzwq	(%r11,%r12,2),%r12
    384 	movzbl	%dl,%ebx
    385 	shlb	$4,%al
    386 	movzbq	(%rsp,%rcx,1),%r13
    387 	shrl	$4,%ebx
    388 	shlq	$48,%r12
    389 	xorq	%r8,%r13
    390 	movq	%r9,%r10
    391 	xorq	%r12,%r9
    392 	shrq	$8,%r8
    393 	movzbq	%r13b,%r13
    394 	shrq	$8,%r9
    395 	xorq	-128(%rbp,%rcx,8),%r8
    396 	shlq	$56,%r10
    397 	xorq	(%rbp,%rcx,8),%r9
    398 	roll	$8,%edx
    399 	xorq	8(%rsi,%rax,1),%r8
    400 	xorq	(%rsi,%rax,1),%r9
    401 	movb	%dl,%al
    402 	xorq	%r10,%r8
    403 	movzwq	(%r11,%r13,2),%r13
    404 	movzbl	%dl,%ecx
    405 	shlb	$4,%al
    406 	movzbq	(%rsp,%rbx,1),%r12
    407 	shrl	$4,%ecx
    408 	shlq	$48,%r13
    409 	xorq	%r8,%r12
    410 	movq	%r9,%r10
    411 	xorq	%r13,%r9
    412 	shrq	$8,%r8
    413 	movzbq	%r12b,%r12
    414 	shrq	$8,%r9
    415 	xorq	-128(%rbp,%rbx,8),%r8
    416 	shlq	$56,%r10
    417 	xorq	(%rbp,%rbx,8),%r9
    418 	roll	$8,%edx
    419 	xorq	8(%rsi,%rax,1),%r8
    420 	xorq	(%rsi,%rax,1),%r9
    421 	movb	%dl,%al
    422 	xorq	%r10,%r8
    423 	movzwq	(%r11,%r12,2),%r12
    424 	movzbl	%dl,%ebx
    425 	shlb	$4,%al
    426 	movzbq	(%rsp,%rcx,1),%r13
    427 	shrl	$4,%ebx
    428 	shlq	$48,%r12
    429 	xorq	%r8,%r13
    430 	movq	%r9,%r10
    431 	xorq	%r12,%r9
    432 	shrq	$8,%r8
    433 	movzbq	%r13b,%r13
    434 	shrq	$8,%r9
    435 	xorq	-128(%rbp,%rcx,8),%r8
    436 	shlq	$56,%r10
    437 	xorq	(%rbp,%rcx,8),%r9
    438 	roll	$8,%edx
    439 	xorq	8(%rsi,%rax,1),%r8
    440 	xorq	(%rsi,%rax,1),%r9
    441 	movb	%dl,%al
    442 	xorq	%r10,%r8
    443 	movzwq	(%r11,%r13,2),%r13
    444 	movzbl	%dl,%ecx
    445 	shlb	$4,%al
    446 	movzbq	(%rsp,%rbx,1),%r12
    447 	shrl	$4,%ecx
    448 	shlq	$48,%r13
    449 	xorq	%r8,%r12
    450 	movq	%r9,%r10
    451 	xorq	%r13,%r9
    452 	shrq	$8,%r8
    453 	movzbq	%r12b,%r12
    454 	movl	4(%rdi),%edx
    455 	shrq	$8,%r9
    456 	xorq	-128(%rbp,%rbx,8),%r8
    457 	shlq	$56,%r10
    458 	xorq	(%rbp,%rbx,8),%r9
    459 	roll	$8,%edx
    460 	xorq	8(%rsi,%rax,1),%r8
    461 	xorq	(%rsi,%rax,1),%r9
    462 	movb	%dl,%al
    463 	xorq	%r10,%r8
    464 	movzwq	(%r11,%r12,2),%r12
    465 	movzbl	%dl,%ebx
    466 	shlb	$4,%al
    467 	movzbq	(%rsp,%rcx,1),%r13
    468 	shrl	$4,%ebx
    469 	shlq	$48,%r12
    470 	xorq	%r8,%r13
    471 	movq	%r9,%r10
    472 	xorq	%r12,%r9
    473 	shrq	$8,%r8
    474 	movzbq	%r13b,%r13
    475 	shrq	$8,%r9
    476 	xorq	-128(%rbp,%rcx,8),%r8
    477 	shlq	$56,%r10
    478 	xorq	(%rbp,%rcx,8),%r9
    479 	roll	$8,%edx
    480 	xorq	8(%rsi,%rax,1),%r8
    481 	xorq	(%rsi,%rax,1),%r9
    482 	movb	%dl,%al
    483 	xorq	%r10,%r8
    484 	movzwq	(%r11,%r13,2),%r13
    485 	movzbl	%dl,%ecx
    486 	shlb	$4,%al
    487 	movzbq	(%rsp,%rbx,1),%r12
    488 	shrl	$4,%ecx
    489 	shlq	$48,%r13
    490 	xorq	%r8,%r12
    491 	movq	%r9,%r10
    492 	xorq	%r13,%r9
    493 	shrq	$8,%r8
    494 	movzbq	%r12b,%r12
    495 	shrq	$8,%r9
    496 	xorq	-128(%rbp,%rbx,8),%r8
    497 	shlq	$56,%r10
    498 	xorq	(%rbp,%rbx,8),%r9
    499 	roll	$8,%edx
    500 	xorq	8(%rsi,%rax,1),%r8
    501 	xorq	(%rsi,%rax,1),%r9
    502 	movb	%dl,%al
    503 	xorq	%r10,%r8
    504 	movzwq	(%r11,%r12,2),%r12
    505 	movzbl	%dl,%ebx
    506 	shlb	$4,%al
    507 	movzbq	(%rsp,%rcx,1),%r13
    508 	shrl	$4,%ebx
    509 	shlq	$48,%r12
    510 	xorq	%r8,%r13
    511 	movq	%r9,%r10
    512 	xorq	%r12,%r9
    513 	shrq	$8,%r8
    514 	movzbq	%r13b,%r13
    515 	shrq	$8,%r9
    516 	xorq	-128(%rbp,%rcx,8),%r8
    517 	shlq	$56,%r10
    518 	xorq	(%rbp,%rcx,8),%r9
    519 	roll	$8,%edx
    520 	xorq	8(%rsi,%rax,1),%r8
    521 	xorq	(%rsi,%rax,1),%r9
    522 	movb	%dl,%al
    523 	xorq	%r10,%r8
    524 	movzwq	(%r11,%r13,2),%r13
    525 	movzbl	%dl,%ecx
    526 	shlb	$4,%al
    527 	movzbq	(%rsp,%rbx,1),%r12
    528 	shrl	$4,%ecx
    529 	shlq	$48,%r13
    530 	xorq	%r8,%r12
    531 	movq	%r9,%r10
    532 	xorq	%r13,%r9
    533 	shrq	$8,%r8
    534 	movzbq	%r12b,%r12
    535 	movl	0(%rdi),%edx
    536 	shrq	$8,%r9
    537 	xorq	-128(%rbp,%rbx,8),%r8
    538 	shlq	$56,%r10
    539 	xorq	(%rbp,%rbx,8),%r9
    540 	roll	$8,%edx
    541 	xorq	8(%rsi,%rax,1),%r8
    542 	xorq	(%rsi,%rax,1),%r9
    543 	movb	%dl,%al
    544 	xorq	%r10,%r8
    545 	movzwq	(%r11,%r12,2),%r12
    546 	movzbl	%dl,%ebx
    547 	shlb	$4,%al
    548 	movzbq	(%rsp,%rcx,1),%r13
    549 	shrl	$4,%ebx
    550 	shlq	$48,%r12
    551 	xorq	%r8,%r13
    552 	movq	%r9,%r10
    553 	xorq	%r12,%r9
    554 	shrq	$8,%r8
    555 	movzbq	%r13b,%r13
    556 	shrq	$8,%r9
    557 	xorq	-128(%rbp,%rcx,8),%r8
    558 	shlq	$56,%r10
    559 	xorq	(%rbp,%rcx,8),%r9
    560 	roll	$8,%edx
    561 	xorq	8(%rsi,%rax,1),%r8
    562 	xorq	(%rsi,%rax,1),%r9
    563 	movb	%dl,%al
    564 	xorq	%r10,%r8
    565 	movzwq	(%r11,%r13,2),%r13
    566 	movzbl	%dl,%ecx
    567 	shlb	$4,%al
    568 	movzbq	(%rsp,%rbx,1),%r12
    569 	shrl	$4,%ecx
    570 	shlq	$48,%r13
    571 	xorq	%r8,%r12
    572 	movq	%r9,%r10
    573 	xorq	%r13,%r9
    574 	shrq	$8,%r8
    575 	movzbq	%r12b,%r12
    576 	shrq	$8,%r9
    577 	xorq	-128(%rbp,%rbx,8),%r8
    578 	shlq	$56,%r10
    579 	xorq	(%rbp,%rbx,8),%r9
    580 	roll	$8,%edx
    581 	xorq	8(%rsi,%rax,1),%r8
    582 	xorq	(%rsi,%rax,1),%r9
    583 	movb	%dl,%al
    584 	xorq	%r10,%r8
    585 	movzwq	(%r11,%r12,2),%r12
    586 	movzbl	%dl,%ebx
    587 	shlb	$4,%al
    588 	movzbq	(%rsp,%rcx,1),%r13
    589 	shrl	$4,%ebx
    590 	shlq	$48,%r12
    591 	xorq	%r8,%r13
    592 	movq	%r9,%r10
    593 	xorq	%r12,%r9
    594 	shrq	$8,%r8
    595 	movzbq	%r13b,%r13
    596 	shrq	$8,%r9
    597 	xorq	-128(%rbp,%rcx,8),%r8
    598 	shlq	$56,%r10
    599 	xorq	(%rbp,%rcx,8),%r9
    600 	roll	$8,%edx
    601 	xorq	8(%rsi,%rax,1),%r8
    602 	xorq	(%rsi,%rax,1),%r9
    603 	movb	%dl,%al
    604 	xorq	%r10,%r8
    605 	movzwq	(%r11,%r13,2),%r13
    606 	movzbl	%dl,%ecx
    607 	shlb	$4,%al
    608 	movzbq	(%rsp,%rbx,1),%r12
    609 	andl	$240,%ecx
    610 	shlq	$48,%r13
    611 	xorq	%r8,%r12
    612 	movq	%r9,%r10
    613 	xorq	%r13,%r9
    614 	shrq	$8,%r8
    615 	movzbq	%r12b,%r12
    616 	movl	-4(%rdi),%edx
    617 	shrq	$8,%r9
    618 	xorq	-128(%rbp,%rbx,8),%r8
    619 	shlq	$56,%r10
    620 	xorq	(%rbp,%rbx,8),%r9
    621 	movzwq	(%r11,%r12,2),%r12
    622 	xorq	8(%rsi,%rax,1),%r8
    623 	xorq	(%rsi,%rax,1),%r9
    624 	shlq	$48,%r12
    625 	xorq	%r10,%r8
    626 	xorq	%r12,%r9
    627 	movzbq	%r8b,%r13
    628 	shrq	$4,%r8
    629 	movq	%r9,%r10
    630 	shlb	$4,%r13b
    631 	shrq	$4,%r9
    632 	xorq	8(%rsi,%rcx,1),%r8
    633 	movzwq	(%r11,%r13,2),%r13
    634 	shlq	$60,%r10
    635 	xorq	(%rsi,%rcx,1),%r9
    636 	xorq	%r10,%r8
    637 	shlq	$48,%r13
    638 	bswapq	%r8
    639 	xorq	%r13,%r9
    640 	bswapq	%r9
    641 	cmpq	%r15,%r14
    642 	jb	.Louter_loop
    643 	movq	%r8,8(%rdi)
    644 	movq	%r9,(%rdi)
    645 
    646 	leaq	280(%rsp),%rsi
    647 	movq	0(%rsi),%r15
    648 	movq	8(%rsi),%r14
    649 	movq	16(%rsi),%r13
    650 	movq	24(%rsi),%r12
    651 	movq	32(%rsi),%rbp
    652 	movq	40(%rsi),%rbx
    653 	leaq	48(%rsi),%rsp
    654 .Lghash_epilogue:
    655 	.byte	0xf3,0xc3
    656 .size	gcm_ghash_4bit,.-gcm_ghash_4bit
    657 .globl	gcm_init_clmul
    658 .type	gcm_init_clmul,@function
    659 .align	16
    660 gcm_init_clmul:
    661 	movdqu	(%rsi),%xmm2
    662 	pshufd	$78,%xmm2,%xmm2
    663 
    664 
    665 	pshufd	$255,%xmm2,%xmm4
    666 	movdqa	%xmm2,%xmm3
    667 	psllq	$1,%xmm2
    668 	pxor	%xmm5,%xmm5
    669 	psrlq	$63,%xmm3
    670 	pcmpgtd	%xmm4,%xmm5
    671 	pslldq	$8,%xmm3
    672 	por	%xmm3,%xmm2
    673 
    674 
    675 	pand	.L0x1c2_polynomial(%rip),%xmm5
    676 	pxor	%xmm5,%xmm2
    677 
    678 
    679 	movdqa	%xmm2,%xmm0
    680 	movdqa	%xmm0,%xmm1
    681 	pshufd	$78,%xmm0,%xmm3
    682 	pshufd	$78,%xmm2,%xmm4
    683 	pxor	%xmm0,%xmm3
    684 	pxor	%xmm2,%xmm4
    685 .byte	102,15,58,68,194,0
    686 .byte	102,15,58,68,202,17
    687 .byte	102,15,58,68,220,0
    688 	pxor	%xmm0,%xmm3
    689 	pxor	%xmm1,%xmm3
    690 
    691 	movdqa	%xmm3,%xmm4
    692 	psrldq	$8,%xmm3
    693 	pslldq	$8,%xmm4
    694 	pxor	%xmm3,%xmm1
    695 	pxor	%xmm4,%xmm0
    696 
    697 	movdqa	%xmm0,%xmm3
    698 	psllq	$1,%xmm0
    699 	pxor	%xmm3,%xmm0
    700 	psllq	$5,%xmm0
    701 	pxor	%xmm3,%xmm0
    702 	psllq	$57,%xmm0
    703 	movdqa	%xmm0,%xmm4
    704 	pslldq	$8,%xmm0
    705 	psrldq	$8,%xmm4
    706 	pxor	%xmm3,%xmm0
    707 	pxor	%xmm4,%xmm1
    708 
    709 
    710 	movdqa	%xmm0,%xmm4
    711 	psrlq	$5,%xmm0
    712 	pxor	%xmm4,%xmm0
    713 	psrlq	$1,%xmm0
    714 	pxor	%xmm4,%xmm0
    715 	pxor	%xmm1,%xmm4
    716 	psrlq	$1,%xmm0
    717 	pxor	%xmm4,%xmm0
    718 	movdqu	%xmm2,(%rdi)
    719 	movdqu	%xmm0,16(%rdi)
    720 	.byte	0xf3,0xc3
    721 .size	gcm_init_clmul,.-gcm_init_clmul
    722 .globl	gcm_gmult_clmul
    723 .type	gcm_gmult_clmul,@function
    724 .align	16
    725 gcm_gmult_clmul:
    726 	movdqu	(%rdi),%xmm0
    727 	movdqa	.Lbswap_mask(%rip),%xmm5
    728 	movdqu	(%rsi),%xmm2
    729 .byte	102,15,56,0,197
    730 	movdqa	%xmm0,%xmm1
    731 	pshufd	$78,%xmm0,%xmm3
    732 	pshufd	$78,%xmm2,%xmm4
    733 	pxor	%xmm0,%xmm3
    734 	pxor	%xmm2,%xmm4
    735 .byte	102,15,58,68,194,0
    736 .byte	102,15,58,68,202,17
    737 .byte	102,15,58,68,220,0
    738 	pxor	%xmm0,%xmm3
    739 	pxor	%xmm1,%xmm3
    740 
    741 	movdqa	%xmm3,%xmm4
    742 	psrldq	$8,%xmm3
    743 	pslldq	$8,%xmm4
    744 	pxor	%xmm3,%xmm1
    745 	pxor	%xmm4,%xmm0
    746 
    747 	movdqa	%xmm0,%xmm3
    748 	psllq	$1,%xmm0
    749 	pxor	%xmm3,%xmm0
    750 	psllq	$5,%xmm0
    751 	pxor	%xmm3,%xmm0
    752 	psllq	$57,%xmm0
    753 	movdqa	%xmm0,%xmm4
    754 	pslldq	$8,%xmm0
    755 	psrldq	$8,%xmm4
    756 	pxor	%xmm3,%xmm0
    757 	pxor	%xmm4,%xmm1
    758 
    759 
    760 	movdqa	%xmm0,%xmm4
    761 	psrlq	$5,%xmm0
    762 	pxor	%xmm4,%xmm0
    763 	psrlq	$1,%xmm0
    764 	pxor	%xmm4,%xmm0
    765 	pxor	%xmm1,%xmm4
    766 	psrlq	$1,%xmm0
    767 	pxor	%xmm4,%xmm0
    768 .byte	102,15,56,0,197
    769 	movdqu	%xmm0,(%rdi)
    770 	.byte	0xf3,0xc3
    771 .size	gcm_gmult_clmul,.-gcm_gmult_clmul
    772 .globl	gcm_ghash_clmul
    773 .type	gcm_ghash_clmul,@function
    774 .align	16
    775 gcm_ghash_clmul:
    776 	movdqa	.Lbswap_mask(%rip),%xmm5
    777 
    778 	movdqu	(%rdi),%xmm0
    779 	movdqu	(%rsi),%xmm2
    780 .byte	102,15,56,0,197
    781 
    782 	subq	$16,%rcx
    783 	jz	.Lodd_tail
    784 
    785 	movdqu	16(%rsi),%xmm8
    786 
    787 
    788 
    789 
    790 
    791 	movdqu	(%rdx),%xmm3
    792 	movdqu	16(%rdx),%xmm6
    793 .byte	102,15,56,0,221
    794 .byte	102,15,56,0,245
    795 	pxor	%xmm3,%xmm0
    796 	movdqa	%xmm6,%xmm7
    797 	pshufd	$78,%xmm6,%xmm3
    798 	pshufd	$78,%xmm2,%xmm4
    799 	pxor	%xmm6,%xmm3
    800 	pxor	%xmm2,%xmm4
    801 .byte	102,15,58,68,242,0
    802 .byte	102,15,58,68,250,17
    803 .byte	102,15,58,68,220,0
    804 	pxor	%xmm6,%xmm3
    805 	pxor	%xmm7,%xmm3
    806 
    807 	movdqa	%xmm3,%xmm4
    808 	psrldq	$8,%xmm3
    809 	pslldq	$8,%xmm4
    810 	pxor	%xmm3,%xmm7
    811 	pxor	%xmm4,%xmm6
    812 	movdqa	%xmm0,%xmm1
    813 	pshufd	$78,%xmm0,%xmm3
    814 	pshufd	$78,%xmm8,%xmm4
    815 	pxor	%xmm0,%xmm3
    816 	pxor	%xmm8,%xmm4
    817 
    818 	leaq	32(%rdx),%rdx
    819 	subq	$32,%rcx
    820 	jbe	.Leven_tail
    821 
    822 .Lmod_loop:
    823 .byte	102,65,15,58,68,192,0
    824 .byte	102,65,15,58,68,200,17
    825 .byte	102,15,58,68,220,0
    826 	pxor	%xmm0,%xmm3
    827 	pxor	%xmm1,%xmm3
    828 
    829 	movdqa	%xmm3,%xmm4
    830 	psrldq	$8,%xmm3
    831 	pslldq	$8,%xmm4
    832 	pxor	%xmm3,%xmm1
    833 	pxor	%xmm4,%xmm0
    834 	movdqu	(%rdx),%xmm3
    835 	pxor	%xmm6,%xmm0
    836 	pxor	%xmm7,%xmm1
    837 
    838 	movdqu	16(%rdx),%xmm6
    839 .byte	102,15,56,0,221
    840 .byte	102,15,56,0,245
    841 
    842 	movdqa	%xmm6,%xmm7
    843 	pshufd	$78,%xmm6,%xmm9
    844 	pshufd	$78,%xmm2,%xmm10
    845 	pxor	%xmm6,%xmm9
    846 	pxor	%xmm2,%xmm10
    847 	pxor	%xmm3,%xmm1
    848 
    849 	movdqa	%xmm0,%xmm3
    850 	psllq	$1,%xmm0
    851 	pxor	%xmm3,%xmm0
    852 	psllq	$5,%xmm0
    853 	pxor	%xmm3,%xmm0
    854 .byte	102,15,58,68,242,0
    855 	psllq	$57,%xmm0
    856 	movdqa	%xmm0,%xmm4
    857 	pslldq	$8,%xmm0
    858 	psrldq	$8,%xmm4
    859 	pxor	%xmm3,%xmm0
    860 	pxor	%xmm4,%xmm1
    861 
    862 .byte	102,15,58,68,250,17
    863 	movdqa	%xmm0,%xmm4
    864 	psrlq	$5,%xmm0
    865 	pxor	%xmm4,%xmm0
    866 	psrlq	$1,%xmm0
    867 	pxor	%xmm4,%xmm0
    868 	pxor	%xmm1,%xmm4
    869 	psrlq	$1,%xmm0
    870 	pxor	%xmm4,%xmm0
    871 
    872 .byte	102,69,15,58,68,202,0
    873 	movdqa	%xmm0,%xmm1
    874 	pshufd	$78,%xmm0,%xmm3
    875 	pshufd	$78,%xmm8,%xmm4
    876 	pxor	%xmm0,%xmm3
    877 	pxor	%xmm8,%xmm4
    878 
    879 	pxor	%xmm6,%xmm9
    880 	pxor	%xmm7,%xmm9
    881 	movdqa	%xmm9,%xmm10
    882 	psrldq	$8,%xmm9
    883 	pslldq	$8,%xmm10
    884 	pxor	%xmm9,%xmm7
    885 	pxor	%xmm10,%xmm6
    886 
    887 	leaq	32(%rdx),%rdx
    888 	subq	$32,%rcx
    889 	ja	.Lmod_loop
    890 
    891 .Leven_tail:
    892 .byte	102,65,15,58,68,192,0
    893 .byte	102,65,15,58,68,200,17
    894 .byte	102,15,58,68,220,0
    895 	pxor	%xmm0,%xmm3
    896 	pxor	%xmm1,%xmm3
    897 
    898 	movdqa	%xmm3,%xmm4
    899 	psrldq	$8,%xmm3
    900 	pslldq	$8,%xmm4
    901 	pxor	%xmm3,%xmm1
    902 	pxor	%xmm4,%xmm0
    903 	pxor	%xmm6,%xmm0
    904 	pxor	%xmm7,%xmm1
    905 
    906 	movdqa	%xmm0,%xmm3
    907 	psllq	$1,%xmm0
    908 	pxor	%xmm3,%xmm0
    909 	psllq	$5,%xmm0
    910 	pxor	%xmm3,%xmm0
    911 	psllq	$57,%xmm0
    912 	movdqa	%xmm0,%xmm4
    913 	pslldq	$8,%xmm0
    914 	psrldq	$8,%xmm4
    915 	pxor	%xmm3,%xmm0
    916 	pxor	%xmm4,%xmm1
    917 
    918 
    919 	movdqa	%xmm0,%xmm4
    920 	psrlq	$5,%xmm0
    921 	pxor	%xmm4,%xmm0
    922 	psrlq	$1,%xmm0
    923 	pxor	%xmm4,%xmm0
    924 	pxor	%xmm1,%xmm4
    925 	psrlq	$1,%xmm0
    926 	pxor	%xmm4,%xmm0
    927 	testq	%rcx,%rcx
    928 	jnz	.Ldone
    929 
    930 .Lodd_tail:
    931 	movdqu	(%rdx),%xmm3
    932 .byte	102,15,56,0,221
    933 	pxor	%xmm3,%xmm0
    934 	movdqa	%xmm0,%xmm1
    935 	pshufd	$78,%xmm0,%xmm3
    936 	pshufd	$78,%xmm2,%xmm4
    937 	pxor	%xmm0,%xmm3
    938 	pxor	%xmm2,%xmm4
    939 .byte	102,15,58,68,194,0
    940 .byte	102,15,58,68,202,17
    941 .byte	102,15,58,68,220,0
    942 	pxor	%xmm0,%xmm3
    943 	pxor	%xmm1,%xmm3
    944 
    945 	movdqa	%xmm3,%xmm4
    946 	psrldq	$8,%xmm3
    947 	pslldq	$8,%xmm4
    948 	pxor	%xmm3,%xmm1
    949 	pxor	%xmm4,%xmm0
    950 
    951 	movdqa	%xmm0,%xmm3
    952 	psllq	$1,%xmm0
    953 	pxor	%xmm3,%xmm0
    954 	psllq	$5,%xmm0
    955 	pxor	%xmm3,%xmm0
    956 	psllq	$57,%xmm0
    957 	movdqa	%xmm0,%xmm4
    958 	pslldq	$8,%xmm0
    959 	psrldq	$8,%xmm4
    960 	pxor	%xmm3,%xmm0
    961 	pxor	%xmm4,%xmm1
    962 
    963 
    964 	movdqa	%xmm0,%xmm4
    965 	psrlq	$5,%xmm0
    966 	pxor	%xmm4,%xmm0
    967 	psrlq	$1,%xmm0
    968 	pxor	%xmm4,%xmm0
    969 	pxor	%xmm1,%xmm4
    970 	psrlq	$1,%xmm0
    971 	pxor	%xmm4,%xmm0
    972 .Ldone:
    973 .byte	102,15,56,0,197
    974 	movdqu	%xmm0,(%rdi)
    975 	.byte	0xf3,0xc3
    976 .LSEH_end_gcm_ghash_clmul:
    977 .size	gcm_ghash_clmul,.-gcm_ghash_clmul
    978 .align	64
    979 .Lbswap_mask:
    980 .byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
    981 .L0x1c2_polynomial:
    982 .byte	1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
    983 .align	64
    984 .type	.Lrem_4bit,@object
    985 .Lrem_4bit:
    986 .long	0,0,0,471859200,0,943718400,0,610271232
    987 .long	0,1887436800,0,1822425088,0,1220542464,0,1423966208
    988 .long	0,3774873600,0,4246732800,0,3644850176,0,3311403008
    989 .long	0,2441084928,0,2376073216,0,2847932416,0,3051356160
    990 .type	.Lrem_8bit,@object
    991 .Lrem_8bit:
    992 .value	0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E
    993 .value	0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E
    994 .value	0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E
    995 .value	0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E
    996 .value	0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E
    997 .value	0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E
    998 .value	0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E
    999 .value	0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E
   1000 .value	0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE
   1001 .value	0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE
   1002 .value	0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE
   1003 .value	0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE
   1004 .value	0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E
   1005 .value	0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E
   1006 .value	0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE
   1007 .value	0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE
   1008 .value	0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E
   1009 .value	0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E
   1010 .value	0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E
   1011 .value	0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E
   1012 .value	0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E
   1013 .value	0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E
   1014 .value	0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E
   1015 .value	0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E
   1016 .value	0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE
   1017 .value	0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE
   1018 .value	0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE
   1019 .value	0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE
   1020 .value	0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E
   1021 .value	0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E
   1022 .value	0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE
   1023 .value	0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE
   1024 
   1025 .byte	71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
   1026 .align	64
   1027