Home | History | Annotate | Download | only in modes
      1 #if defined(__x86_64__)
      2 .text
      3 .extern	OPENSSL_ia32cap_P
      4 .hidden OPENSSL_ia32cap_P
      5 
      6 .globl	gcm_gmult_4bit
      7 .hidden gcm_gmult_4bit
      8 .type	gcm_gmult_4bit,@function
      9 .align	16
     10 gcm_gmult_4bit:
     11 	pushq	%rbx
     12 	pushq	%rbp
     13 	pushq	%r12
     14 .Lgmult_prologue:
     15 
     16 	movzbq	15(%rdi),%r8
     17 	leaq	.Lrem_4bit(%rip),%r11
     18 	xorq	%rax,%rax
     19 	xorq	%rbx,%rbx
     20 	movb	%r8b,%al
     21 	movb	%r8b,%bl
     22 	shlb	$4,%al
     23 	movq	$14,%rcx
     24 	movq	8(%rsi,%rax,1),%r8
     25 	movq	(%rsi,%rax,1),%r9
     26 	andb	$240,%bl
     27 	movq	%r8,%rdx
     28 	jmp	.Loop1
     29 
     30 .align	16
     31 .Loop1:
     32 	shrq	$4,%r8
     33 	andq	$15,%rdx
     34 	movq	%r9,%r10
     35 	movb	(%rdi,%rcx,1),%al
     36 	shrq	$4,%r9
     37 	xorq	8(%rsi,%rbx,1),%r8
     38 	shlq	$60,%r10
     39 	xorq	(%rsi,%rbx,1),%r9
     40 	movb	%al,%bl
     41 	xorq	(%r11,%rdx,8),%r9
     42 	movq	%r8,%rdx
     43 	shlb	$4,%al
     44 	xorq	%r10,%r8
     45 	decq	%rcx
     46 	js	.Lbreak1
     47 
     48 	shrq	$4,%r8
     49 	andq	$15,%rdx
     50 	movq	%r9,%r10
     51 	shrq	$4,%r9
     52 	xorq	8(%rsi,%rax,1),%r8
     53 	shlq	$60,%r10
     54 	xorq	(%rsi,%rax,1),%r9
     55 	andb	$240,%bl
     56 	xorq	(%r11,%rdx,8),%r9
     57 	movq	%r8,%rdx
     58 	xorq	%r10,%r8
     59 	jmp	.Loop1
     60 
     61 .align	16
     62 .Lbreak1:
     63 	shrq	$4,%r8
     64 	andq	$15,%rdx
     65 	movq	%r9,%r10
     66 	shrq	$4,%r9
     67 	xorq	8(%rsi,%rax,1),%r8
     68 	shlq	$60,%r10
     69 	xorq	(%rsi,%rax,1),%r9
     70 	andb	$240,%bl
     71 	xorq	(%r11,%rdx,8),%r9
     72 	movq	%r8,%rdx
     73 	xorq	%r10,%r8
     74 
     75 	shrq	$4,%r8
     76 	andq	$15,%rdx
     77 	movq	%r9,%r10
     78 	shrq	$4,%r9
     79 	xorq	8(%rsi,%rbx,1),%r8
     80 	shlq	$60,%r10
     81 	xorq	(%rsi,%rbx,1),%r9
     82 	xorq	%r10,%r8
     83 	xorq	(%r11,%rdx,8),%r9
     84 
     85 	bswapq	%r8
     86 	bswapq	%r9
     87 	movq	%r8,8(%rdi)
     88 	movq	%r9,(%rdi)
     89 
     90 	movq	16(%rsp),%rbx
     91 	leaq	24(%rsp),%rsp
     92 .Lgmult_epilogue:
     93 	.byte	0xf3,0xc3
     94 .size	gcm_gmult_4bit,.-gcm_gmult_4bit
     95 .globl	gcm_ghash_4bit
     96 .hidden gcm_ghash_4bit
     97 .type	gcm_ghash_4bit,@function
     98 .align	16
     99 gcm_ghash_4bit:
    100 	pushq	%rbx
    101 	pushq	%rbp
    102 	pushq	%r12
    103 	pushq	%r13
    104 	pushq	%r14
    105 	pushq	%r15
    106 	subq	$280,%rsp
    107 .Lghash_prologue:
    108 	movq	%rdx,%r14
    109 	movq	%rcx,%r15
    110 	subq	$-128,%rsi
    111 	leaq	16+128(%rsp),%rbp
    112 	xorl	%edx,%edx
    113 	movq	0+0-128(%rsi),%r8
    114 	movq	0+8-128(%rsi),%rax
    115 	movb	%al,%dl
    116 	shrq	$4,%rax
    117 	movq	%r8,%r10
    118 	shrq	$4,%r8
    119 	movq	16+0-128(%rsi),%r9
    120 	shlb	$4,%dl
    121 	movq	16+8-128(%rsi),%rbx
    122 	shlq	$60,%r10
    123 	movb	%dl,0(%rsp)
    124 	orq	%r10,%rax
    125 	movb	%bl,%dl
    126 	shrq	$4,%rbx
    127 	movq	%r9,%r10
    128 	shrq	$4,%r9
    129 	movq	%r8,0(%rbp)
    130 	movq	32+0-128(%rsi),%r8
    131 	shlb	$4,%dl
    132 	movq	%rax,0-128(%rbp)
    133 	movq	32+8-128(%rsi),%rax
    134 	shlq	$60,%r10
    135 	movb	%dl,1(%rsp)
    136 	orq	%r10,%rbx
    137 	movb	%al,%dl
    138 	shrq	$4,%rax
    139 	movq	%r8,%r10
    140 	shrq	$4,%r8
    141 	movq	%r9,8(%rbp)
    142 	movq	48+0-128(%rsi),%r9
    143 	shlb	$4,%dl
    144 	movq	%rbx,8-128(%rbp)
    145 	movq	48+8-128(%rsi),%rbx
    146 	shlq	$60,%r10
    147 	movb	%dl,2(%rsp)
    148 	orq	%r10,%rax
    149 	movb	%bl,%dl
    150 	shrq	$4,%rbx
    151 	movq	%r9,%r10
    152 	shrq	$4,%r9
    153 	movq	%r8,16(%rbp)
    154 	movq	64+0-128(%rsi),%r8
    155 	shlb	$4,%dl
    156 	movq	%rax,16-128(%rbp)
    157 	movq	64+8-128(%rsi),%rax
    158 	shlq	$60,%r10
    159 	movb	%dl,3(%rsp)
    160 	orq	%r10,%rbx
    161 	movb	%al,%dl
    162 	shrq	$4,%rax
    163 	movq	%r8,%r10
    164 	shrq	$4,%r8
    165 	movq	%r9,24(%rbp)
    166 	movq	80+0-128(%rsi),%r9
    167 	shlb	$4,%dl
    168 	movq	%rbx,24-128(%rbp)
    169 	movq	80+8-128(%rsi),%rbx
    170 	shlq	$60,%r10
    171 	movb	%dl,4(%rsp)
    172 	orq	%r10,%rax
    173 	movb	%bl,%dl
    174 	shrq	$4,%rbx
    175 	movq	%r9,%r10
    176 	shrq	$4,%r9
    177 	movq	%r8,32(%rbp)
    178 	movq	96+0-128(%rsi),%r8
    179 	shlb	$4,%dl
    180 	movq	%rax,32-128(%rbp)
    181 	movq	96+8-128(%rsi),%rax
    182 	shlq	$60,%r10
    183 	movb	%dl,5(%rsp)
    184 	orq	%r10,%rbx
    185 	movb	%al,%dl
    186 	shrq	$4,%rax
    187 	movq	%r8,%r10
    188 	shrq	$4,%r8
    189 	movq	%r9,40(%rbp)
    190 	movq	112+0-128(%rsi),%r9
    191 	shlb	$4,%dl
    192 	movq	%rbx,40-128(%rbp)
    193 	movq	112+8-128(%rsi),%rbx
    194 	shlq	$60,%r10
    195 	movb	%dl,6(%rsp)
    196 	orq	%r10,%rax
    197 	movb	%bl,%dl
    198 	shrq	$4,%rbx
    199 	movq	%r9,%r10
    200 	shrq	$4,%r9
    201 	movq	%r8,48(%rbp)
    202 	movq	128+0-128(%rsi),%r8
    203 	shlb	$4,%dl
    204 	movq	%rax,48-128(%rbp)
    205 	movq	128+8-128(%rsi),%rax
    206 	shlq	$60,%r10
    207 	movb	%dl,7(%rsp)
    208 	orq	%r10,%rbx
    209 	movb	%al,%dl
    210 	shrq	$4,%rax
    211 	movq	%r8,%r10
    212 	shrq	$4,%r8
    213 	movq	%r9,56(%rbp)
    214 	movq	144+0-128(%rsi),%r9
    215 	shlb	$4,%dl
    216 	movq	%rbx,56-128(%rbp)
    217 	movq	144+8-128(%rsi),%rbx
    218 	shlq	$60,%r10
    219 	movb	%dl,8(%rsp)
    220 	orq	%r10,%rax
    221 	movb	%bl,%dl
    222 	shrq	$4,%rbx
    223 	movq	%r9,%r10
    224 	shrq	$4,%r9
    225 	movq	%r8,64(%rbp)
    226 	movq	160+0-128(%rsi),%r8
    227 	shlb	$4,%dl
    228 	movq	%rax,64-128(%rbp)
    229 	movq	160+8-128(%rsi),%rax
    230 	shlq	$60,%r10
    231 	movb	%dl,9(%rsp)
    232 	orq	%r10,%rbx
    233 	movb	%al,%dl
    234 	shrq	$4,%rax
    235 	movq	%r8,%r10
    236 	shrq	$4,%r8
    237 	movq	%r9,72(%rbp)
    238 	movq	176+0-128(%rsi),%r9
    239 	shlb	$4,%dl
    240 	movq	%rbx,72-128(%rbp)
    241 	movq	176+8-128(%rsi),%rbx
    242 	shlq	$60,%r10
    243 	movb	%dl,10(%rsp)
    244 	orq	%r10,%rax
    245 	movb	%bl,%dl
    246 	shrq	$4,%rbx
    247 	movq	%r9,%r10
    248 	shrq	$4,%r9
    249 	movq	%r8,80(%rbp)
    250 	movq	192+0-128(%rsi),%r8
    251 	shlb	$4,%dl
    252 	movq	%rax,80-128(%rbp)
    253 	movq	192+8-128(%rsi),%rax
    254 	shlq	$60,%r10
    255 	movb	%dl,11(%rsp)
    256 	orq	%r10,%rbx
    257 	movb	%al,%dl
    258 	shrq	$4,%rax
    259 	movq	%r8,%r10
    260 	shrq	$4,%r8
    261 	movq	%r9,88(%rbp)
    262 	movq	208+0-128(%rsi),%r9
    263 	shlb	$4,%dl
    264 	movq	%rbx,88-128(%rbp)
    265 	movq	208+8-128(%rsi),%rbx
    266 	shlq	$60,%r10
    267 	movb	%dl,12(%rsp)
    268 	orq	%r10,%rax
    269 	movb	%bl,%dl
    270 	shrq	$4,%rbx
    271 	movq	%r9,%r10
    272 	shrq	$4,%r9
    273 	movq	%r8,96(%rbp)
    274 	movq	224+0-128(%rsi),%r8
    275 	shlb	$4,%dl
    276 	movq	%rax,96-128(%rbp)
    277 	movq	224+8-128(%rsi),%rax
    278 	shlq	$60,%r10
    279 	movb	%dl,13(%rsp)
    280 	orq	%r10,%rbx
    281 	movb	%al,%dl
    282 	shrq	$4,%rax
    283 	movq	%r8,%r10
    284 	shrq	$4,%r8
    285 	movq	%r9,104(%rbp)
    286 	movq	240+0-128(%rsi),%r9
    287 	shlb	$4,%dl
    288 	movq	%rbx,104-128(%rbp)
    289 	movq	240+8-128(%rsi),%rbx
    290 	shlq	$60,%r10
    291 	movb	%dl,14(%rsp)
    292 	orq	%r10,%rax
    293 	movb	%bl,%dl
    294 	shrq	$4,%rbx
    295 	movq	%r9,%r10
    296 	shrq	$4,%r9
    297 	movq	%r8,112(%rbp)
    298 	shlb	$4,%dl
    299 	movq	%rax,112-128(%rbp)
    300 	shlq	$60,%r10
    301 	movb	%dl,15(%rsp)
    302 	orq	%r10,%rbx
    303 	movq	%r9,120(%rbp)
    304 	movq	%rbx,120-128(%rbp)
    305 	addq	$-128,%rsi
    306 	movq	8(%rdi),%r8
    307 	movq	0(%rdi),%r9
    308 	addq	%r14,%r15
    309 	leaq	.Lrem_8bit(%rip),%r11
    310 	jmp	.Louter_loop
    311 .align	16
    312 .Louter_loop:
    313 	xorq	(%r14),%r9
    314 	movq	8(%r14),%rdx
    315 	leaq	16(%r14),%r14
    316 	xorq	%r8,%rdx
    317 	movq	%r9,(%rdi)
    318 	movq	%rdx,8(%rdi)
    319 	shrq	$32,%rdx
    320 	xorq	%rax,%rax
    321 	roll	$8,%edx
    322 	movb	%dl,%al
    323 	movzbl	%dl,%ebx
    324 	shlb	$4,%al
    325 	shrl	$4,%ebx
    326 	roll	$8,%edx
    327 	movq	8(%rsi,%rax,1),%r8
    328 	movq	(%rsi,%rax,1),%r9
    329 	movb	%dl,%al
    330 	movzbl	%dl,%ecx
    331 	shlb	$4,%al
    332 	movzbq	(%rsp,%rbx,1),%r12
    333 	shrl	$4,%ecx
    334 	xorq	%r8,%r12
    335 	movq	%r9,%r10
    336 	shrq	$8,%r8
    337 	movzbq	%r12b,%r12
    338 	shrq	$8,%r9
    339 	xorq	-128(%rbp,%rbx,8),%r8
    340 	shlq	$56,%r10
    341 	xorq	(%rbp,%rbx,8),%r9
    342 	roll	$8,%edx
    343 	xorq	8(%rsi,%rax,1),%r8
    344 	xorq	(%rsi,%rax,1),%r9
    345 	movb	%dl,%al
    346 	xorq	%r10,%r8
    347 	movzwq	(%r11,%r12,2),%r12
    348 	movzbl	%dl,%ebx
    349 	shlb	$4,%al
    350 	movzbq	(%rsp,%rcx,1),%r13
    351 	shrl	$4,%ebx
    352 	shlq	$48,%r12
    353 	xorq	%r8,%r13
    354 	movq	%r9,%r10
    355 	xorq	%r12,%r9
    356 	shrq	$8,%r8
    357 	movzbq	%r13b,%r13
    358 	shrq	$8,%r9
    359 	xorq	-128(%rbp,%rcx,8),%r8
    360 	shlq	$56,%r10
    361 	xorq	(%rbp,%rcx,8),%r9
    362 	roll	$8,%edx
    363 	xorq	8(%rsi,%rax,1),%r8
    364 	xorq	(%rsi,%rax,1),%r9
    365 	movb	%dl,%al
    366 	xorq	%r10,%r8
    367 	movzwq	(%r11,%r13,2),%r13
    368 	movzbl	%dl,%ecx
    369 	shlb	$4,%al
    370 	movzbq	(%rsp,%rbx,1),%r12
    371 	shrl	$4,%ecx
    372 	shlq	$48,%r13
    373 	xorq	%r8,%r12
    374 	movq	%r9,%r10
    375 	xorq	%r13,%r9
    376 	shrq	$8,%r8
    377 	movzbq	%r12b,%r12
    378 	movl	8(%rdi),%edx
    379 	shrq	$8,%r9
    380 	xorq	-128(%rbp,%rbx,8),%r8
    381 	shlq	$56,%r10
    382 	xorq	(%rbp,%rbx,8),%r9
    383 	roll	$8,%edx
    384 	xorq	8(%rsi,%rax,1),%r8
    385 	xorq	(%rsi,%rax,1),%r9
    386 	movb	%dl,%al
    387 	xorq	%r10,%r8
    388 	movzwq	(%r11,%r12,2),%r12
    389 	movzbl	%dl,%ebx
    390 	shlb	$4,%al
    391 	movzbq	(%rsp,%rcx,1),%r13
    392 	shrl	$4,%ebx
    393 	shlq	$48,%r12
    394 	xorq	%r8,%r13
    395 	movq	%r9,%r10
    396 	xorq	%r12,%r9
    397 	shrq	$8,%r8
    398 	movzbq	%r13b,%r13
    399 	shrq	$8,%r9
    400 	xorq	-128(%rbp,%rcx,8),%r8
    401 	shlq	$56,%r10
    402 	xorq	(%rbp,%rcx,8),%r9
    403 	roll	$8,%edx
    404 	xorq	8(%rsi,%rax,1),%r8
    405 	xorq	(%rsi,%rax,1),%r9
    406 	movb	%dl,%al
    407 	xorq	%r10,%r8
    408 	movzwq	(%r11,%r13,2),%r13
    409 	movzbl	%dl,%ecx
    410 	shlb	$4,%al
    411 	movzbq	(%rsp,%rbx,1),%r12
    412 	shrl	$4,%ecx
    413 	shlq	$48,%r13
    414 	xorq	%r8,%r12
    415 	movq	%r9,%r10
    416 	xorq	%r13,%r9
    417 	shrq	$8,%r8
    418 	movzbq	%r12b,%r12
    419 	shrq	$8,%r9
    420 	xorq	-128(%rbp,%rbx,8),%r8
    421 	shlq	$56,%r10
    422 	xorq	(%rbp,%rbx,8),%r9
    423 	roll	$8,%edx
    424 	xorq	8(%rsi,%rax,1),%r8
    425 	xorq	(%rsi,%rax,1),%r9
    426 	movb	%dl,%al
    427 	xorq	%r10,%r8
    428 	movzwq	(%r11,%r12,2),%r12
    429 	movzbl	%dl,%ebx
    430 	shlb	$4,%al
    431 	movzbq	(%rsp,%rcx,1),%r13
    432 	shrl	$4,%ebx
    433 	shlq	$48,%r12
    434 	xorq	%r8,%r13
    435 	movq	%r9,%r10
    436 	xorq	%r12,%r9
    437 	shrq	$8,%r8
    438 	movzbq	%r13b,%r13
    439 	shrq	$8,%r9
    440 	xorq	-128(%rbp,%rcx,8),%r8
    441 	shlq	$56,%r10
    442 	xorq	(%rbp,%rcx,8),%r9
    443 	roll	$8,%edx
    444 	xorq	8(%rsi,%rax,1),%r8
    445 	xorq	(%rsi,%rax,1),%r9
    446 	movb	%dl,%al
    447 	xorq	%r10,%r8
    448 	movzwq	(%r11,%r13,2),%r13
    449 	movzbl	%dl,%ecx
    450 	shlb	$4,%al
    451 	movzbq	(%rsp,%rbx,1),%r12
    452 	shrl	$4,%ecx
    453 	shlq	$48,%r13
    454 	xorq	%r8,%r12
    455 	movq	%r9,%r10
    456 	xorq	%r13,%r9
    457 	shrq	$8,%r8
    458 	movzbq	%r12b,%r12
    459 	movl	4(%rdi),%edx
    460 	shrq	$8,%r9
    461 	xorq	-128(%rbp,%rbx,8),%r8
    462 	shlq	$56,%r10
    463 	xorq	(%rbp,%rbx,8),%r9
    464 	roll	$8,%edx
    465 	xorq	8(%rsi,%rax,1),%r8
    466 	xorq	(%rsi,%rax,1),%r9
    467 	movb	%dl,%al
    468 	xorq	%r10,%r8
    469 	movzwq	(%r11,%r12,2),%r12
    470 	movzbl	%dl,%ebx
    471 	shlb	$4,%al
    472 	movzbq	(%rsp,%rcx,1),%r13
    473 	shrl	$4,%ebx
    474 	shlq	$48,%r12
    475 	xorq	%r8,%r13
    476 	movq	%r9,%r10
    477 	xorq	%r12,%r9
    478 	shrq	$8,%r8
    479 	movzbq	%r13b,%r13
    480 	shrq	$8,%r9
    481 	xorq	-128(%rbp,%rcx,8),%r8
    482 	shlq	$56,%r10
    483 	xorq	(%rbp,%rcx,8),%r9
    484 	roll	$8,%edx
    485 	xorq	8(%rsi,%rax,1),%r8
    486 	xorq	(%rsi,%rax,1),%r9
    487 	movb	%dl,%al
    488 	xorq	%r10,%r8
    489 	movzwq	(%r11,%r13,2),%r13
    490 	movzbl	%dl,%ecx
    491 	shlb	$4,%al
    492 	movzbq	(%rsp,%rbx,1),%r12
    493 	shrl	$4,%ecx
    494 	shlq	$48,%r13
    495 	xorq	%r8,%r12
    496 	movq	%r9,%r10
    497 	xorq	%r13,%r9
    498 	shrq	$8,%r8
    499 	movzbq	%r12b,%r12
    500 	shrq	$8,%r9
    501 	xorq	-128(%rbp,%rbx,8),%r8
    502 	shlq	$56,%r10
    503 	xorq	(%rbp,%rbx,8),%r9
    504 	roll	$8,%edx
    505 	xorq	8(%rsi,%rax,1),%r8
    506 	xorq	(%rsi,%rax,1),%r9
    507 	movb	%dl,%al
    508 	xorq	%r10,%r8
    509 	movzwq	(%r11,%r12,2),%r12
    510 	movzbl	%dl,%ebx
    511 	shlb	$4,%al
    512 	movzbq	(%rsp,%rcx,1),%r13
    513 	shrl	$4,%ebx
    514 	shlq	$48,%r12
    515 	xorq	%r8,%r13
    516 	movq	%r9,%r10
    517 	xorq	%r12,%r9
    518 	shrq	$8,%r8
    519 	movzbq	%r13b,%r13
    520 	shrq	$8,%r9
    521 	xorq	-128(%rbp,%rcx,8),%r8
    522 	shlq	$56,%r10
    523 	xorq	(%rbp,%rcx,8),%r9
    524 	roll	$8,%edx
    525 	xorq	8(%rsi,%rax,1),%r8
    526 	xorq	(%rsi,%rax,1),%r9
    527 	movb	%dl,%al
    528 	xorq	%r10,%r8
    529 	movzwq	(%r11,%r13,2),%r13
    530 	movzbl	%dl,%ecx
    531 	shlb	$4,%al
    532 	movzbq	(%rsp,%rbx,1),%r12
    533 	shrl	$4,%ecx
    534 	shlq	$48,%r13
    535 	xorq	%r8,%r12
    536 	movq	%r9,%r10
    537 	xorq	%r13,%r9
    538 	shrq	$8,%r8
    539 	movzbq	%r12b,%r12
    540 	movl	0(%rdi),%edx
    541 	shrq	$8,%r9
    542 	xorq	-128(%rbp,%rbx,8),%r8
    543 	shlq	$56,%r10
    544 	xorq	(%rbp,%rbx,8),%r9
    545 	roll	$8,%edx
    546 	xorq	8(%rsi,%rax,1),%r8
    547 	xorq	(%rsi,%rax,1),%r9
    548 	movb	%dl,%al
    549 	xorq	%r10,%r8
    550 	movzwq	(%r11,%r12,2),%r12
    551 	movzbl	%dl,%ebx
    552 	shlb	$4,%al
    553 	movzbq	(%rsp,%rcx,1),%r13
    554 	shrl	$4,%ebx
    555 	shlq	$48,%r12
    556 	xorq	%r8,%r13
    557 	movq	%r9,%r10
    558 	xorq	%r12,%r9
    559 	shrq	$8,%r8
    560 	movzbq	%r13b,%r13
    561 	shrq	$8,%r9
    562 	xorq	-128(%rbp,%rcx,8),%r8
    563 	shlq	$56,%r10
    564 	xorq	(%rbp,%rcx,8),%r9
    565 	roll	$8,%edx
    566 	xorq	8(%rsi,%rax,1),%r8
    567 	xorq	(%rsi,%rax,1),%r9
    568 	movb	%dl,%al
    569 	xorq	%r10,%r8
    570 	movzwq	(%r11,%r13,2),%r13
    571 	movzbl	%dl,%ecx
    572 	shlb	$4,%al
    573 	movzbq	(%rsp,%rbx,1),%r12
    574 	shrl	$4,%ecx
    575 	shlq	$48,%r13
    576 	xorq	%r8,%r12
    577 	movq	%r9,%r10
    578 	xorq	%r13,%r9
    579 	shrq	$8,%r8
    580 	movzbq	%r12b,%r12
    581 	shrq	$8,%r9
    582 	xorq	-128(%rbp,%rbx,8),%r8
    583 	shlq	$56,%r10
    584 	xorq	(%rbp,%rbx,8),%r9
    585 	roll	$8,%edx
    586 	xorq	8(%rsi,%rax,1),%r8
    587 	xorq	(%rsi,%rax,1),%r9
    588 	movb	%dl,%al
    589 	xorq	%r10,%r8
    590 	movzwq	(%r11,%r12,2),%r12
    591 	movzbl	%dl,%ebx
    592 	shlb	$4,%al
    593 	movzbq	(%rsp,%rcx,1),%r13
    594 	shrl	$4,%ebx
    595 	shlq	$48,%r12
    596 	xorq	%r8,%r13
    597 	movq	%r9,%r10
    598 	xorq	%r12,%r9
    599 	shrq	$8,%r8
    600 	movzbq	%r13b,%r13
    601 	shrq	$8,%r9
    602 	xorq	-128(%rbp,%rcx,8),%r8
    603 	shlq	$56,%r10
    604 	xorq	(%rbp,%rcx,8),%r9
    605 	roll	$8,%edx
    606 	xorq	8(%rsi,%rax,1),%r8
    607 	xorq	(%rsi,%rax,1),%r9
    608 	movb	%dl,%al
    609 	xorq	%r10,%r8
    610 	movzwq	(%r11,%r13,2),%r13
    611 	movzbl	%dl,%ecx
    612 	shlb	$4,%al
    613 	movzbq	(%rsp,%rbx,1),%r12
    614 	andl	$240,%ecx
    615 	shlq	$48,%r13
    616 	xorq	%r8,%r12
    617 	movq	%r9,%r10
    618 	xorq	%r13,%r9
    619 	shrq	$8,%r8
    620 	movzbq	%r12b,%r12
    621 	movl	-4(%rdi),%edx
    622 	shrq	$8,%r9
    623 	xorq	-128(%rbp,%rbx,8),%r8
    624 	shlq	$56,%r10
    625 	xorq	(%rbp,%rbx,8),%r9
    626 	movzwq	(%r11,%r12,2),%r12
    627 	xorq	8(%rsi,%rax,1),%r8
    628 	xorq	(%rsi,%rax,1),%r9
    629 	shlq	$48,%r12
    630 	xorq	%r10,%r8
    631 	xorq	%r12,%r9
    632 	movzbq	%r8b,%r13
    633 	shrq	$4,%r8
    634 	movq	%r9,%r10
    635 	shlb	$4,%r13b
    636 	shrq	$4,%r9
    637 	xorq	8(%rsi,%rcx,1),%r8
    638 	movzwq	(%r11,%r13,2),%r13
    639 	shlq	$60,%r10
    640 	xorq	(%rsi,%rcx,1),%r9
    641 	xorq	%r10,%r8
    642 	shlq	$48,%r13
    643 	bswapq	%r8
    644 	xorq	%r13,%r9
    645 	bswapq	%r9
    646 	cmpq	%r15,%r14
    647 	jb	.Louter_loop
    648 	movq	%r8,8(%rdi)
    649 	movq	%r9,(%rdi)
    650 
    651 	leaq	280(%rsp),%rsi
    652 	movq	0(%rsi),%r15
    653 	movq	8(%rsi),%r14
    654 	movq	16(%rsi),%r13
    655 	movq	24(%rsi),%r12
    656 	movq	32(%rsi),%rbp
    657 	movq	40(%rsi),%rbx
    658 	leaq	48(%rsi),%rsp
    659 .Lghash_epilogue:
    660 	.byte	0xf3,0xc3
    661 .size	gcm_ghash_4bit,.-gcm_ghash_4bit
    662 .globl	gcm_init_clmul
    663 .hidden gcm_init_clmul
    664 .type	gcm_init_clmul,@function
    665 .align	16
    666 gcm_init_clmul:
    667 .L_init_clmul:
    668 	movdqu	(%rsi),%xmm2
    669 	pshufd	$78,%xmm2,%xmm2
    670 
    671 
    672 	pshufd	$255,%xmm2,%xmm4
    673 	movdqa	%xmm2,%xmm3
    674 	psllq	$1,%xmm2
    675 	pxor	%xmm5,%xmm5
    676 	psrlq	$63,%xmm3
    677 	pcmpgtd	%xmm4,%xmm5
    678 	pslldq	$8,%xmm3
    679 	por	%xmm3,%xmm2
    680 
    681 
    682 	pand	.L0x1c2_polynomial(%rip),%xmm5
    683 	pxor	%xmm5,%xmm2
    684 
    685 
    686 	pshufd	$78,%xmm2,%xmm6
    687 	movdqa	%xmm2,%xmm0
    688 	pxor	%xmm2,%xmm6
    689 	movdqa	%xmm0,%xmm1
    690 	pshufd	$78,%xmm0,%xmm3
    691 	pxor	%xmm0,%xmm3
    692 .byte	102,15,58,68,194,0
    693 .byte	102,15,58,68,202,17
    694 .byte	102,15,58,68,222,0
    695 	pxor	%xmm0,%xmm3
    696 	pxor	%xmm1,%xmm3
    697 
    698 	movdqa	%xmm3,%xmm4
    699 	psrldq	$8,%xmm3
    700 	pslldq	$8,%xmm4
    701 	pxor	%xmm3,%xmm1
    702 	pxor	%xmm4,%xmm0
    703 
    704 	movdqa	%xmm0,%xmm4
    705 	movdqa	%xmm0,%xmm3
    706 	psllq	$5,%xmm0
    707 	pxor	%xmm0,%xmm3
    708 	psllq	$1,%xmm0
    709 	pxor	%xmm3,%xmm0
    710 	psllq	$57,%xmm0
    711 	movdqa	%xmm0,%xmm3
    712 	pslldq	$8,%xmm0
    713 	psrldq	$8,%xmm3
    714 	pxor	%xmm4,%xmm0
    715 	pxor	%xmm3,%xmm1
    716 
    717 
    718 	movdqa	%xmm0,%xmm4
    719 	psrlq	$1,%xmm0
    720 	pxor	%xmm4,%xmm1
    721 	pxor	%xmm0,%xmm4
    722 	psrlq	$5,%xmm0
    723 	pxor	%xmm4,%xmm0
    724 	psrlq	$1,%xmm0
    725 	pxor	%xmm1,%xmm0
    726 	pshufd	$78,%xmm2,%xmm3
    727 	pshufd	$78,%xmm0,%xmm4
    728 	pxor	%xmm2,%xmm3
    729 	movdqu	%xmm2,0(%rdi)
    730 	pxor	%xmm0,%xmm4
    731 	movdqu	%xmm0,16(%rdi)
    732 .byte	102,15,58,15,227,8
    733 	movdqu	%xmm4,32(%rdi)
    734 	movdqa	%xmm0,%xmm1
    735 	pshufd	$78,%xmm0,%xmm3
    736 	pxor	%xmm0,%xmm3
    737 .byte	102,15,58,68,194,0
    738 .byte	102,15,58,68,202,17
    739 .byte	102,15,58,68,222,0
    740 	pxor	%xmm0,%xmm3
    741 	pxor	%xmm1,%xmm3
    742 
    743 	movdqa	%xmm3,%xmm4
    744 	psrldq	$8,%xmm3
    745 	pslldq	$8,%xmm4
    746 	pxor	%xmm3,%xmm1
    747 	pxor	%xmm4,%xmm0
    748 
    749 	movdqa	%xmm0,%xmm4
    750 	movdqa	%xmm0,%xmm3
    751 	psllq	$5,%xmm0
    752 	pxor	%xmm0,%xmm3
    753 	psllq	$1,%xmm0
    754 	pxor	%xmm3,%xmm0
    755 	psllq	$57,%xmm0
    756 	movdqa	%xmm0,%xmm3
    757 	pslldq	$8,%xmm0
    758 	psrldq	$8,%xmm3
    759 	pxor	%xmm4,%xmm0
    760 	pxor	%xmm3,%xmm1
    761 
    762 
    763 	movdqa	%xmm0,%xmm4
    764 	psrlq	$1,%xmm0
    765 	pxor	%xmm4,%xmm1
    766 	pxor	%xmm0,%xmm4
    767 	psrlq	$5,%xmm0
    768 	pxor	%xmm4,%xmm0
    769 	psrlq	$1,%xmm0
    770 	pxor	%xmm1,%xmm0
    771 	movdqa	%xmm0,%xmm5
    772 	movdqa	%xmm0,%xmm1
    773 	pshufd	$78,%xmm0,%xmm3
    774 	pxor	%xmm0,%xmm3
    775 .byte	102,15,58,68,194,0
    776 .byte	102,15,58,68,202,17
    777 .byte	102,15,58,68,222,0
    778 	pxor	%xmm0,%xmm3
    779 	pxor	%xmm1,%xmm3
    780 
    781 	movdqa	%xmm3,%xmm4
    782 	psrldq	$8,%xmm3
    783 	pslldq	$8,%xmm4
    784 	pxor	%xmm3,%xmm1
    785 	pxor	%xmm4,%xmm0
    786 
    787 	movdqa	%xmm0,%xmm4
    788 	movdqa	%xmm0,%xmm3
    789 	psllq	$5,%xmm0
    790 	pxor	%xmm0,%xmm3
    791 	psllq	$1,%xmm0
    792 	pxor	%xmm3,%xmm0
    793 	psllq	$57,%xmm0
    794 	movdqa	%xmm0,%xmm3
    795 	pslldq	$8,%xmm0
    796 	psrldq	$8,%xmm3
    797 	pxor	%xmm4,%xmm0
    798 	pxor	%xmm3,%xmm1
    799 
    800 
    801 	movdqa	%xmm0,%xmm4
    802 	psrlq	$1,%xmm0
    803 	pxor	%xmm4,%xmm1
    804 	pxor	%xmm0,%xmm4
    805 	psrlq	$5,%xmm0
    806 	pxor	%xmm4,%xmm0
    807 	psrlq	$1,%xmm0
    808 	pxor	%xmm1,%xmm0
    809 	pshufd	$78,%xmm5,%xmm3
    810 	pshufd	$78,%xmm0,%xmm4
    811 	pxor	%xmm5,%xmm3
    812 	movdqu	%xmm5,48(%rdi)
    813 	pxor	%xmm0,%xmm4
    814 	movdqu	%xmm0,64(%rdi)
    815 .byte	102,15,58,15,227,8
    816 	movdqu	%xmm4,80(%rdi)
    817 	.byte	0xf3,0xc3
    818 .size	gcm_init_clmul,.-gcm_init_clmul
    819 .globl	gcm_gmult_clmul
    820 .hidden gcm_gmult_clmul
    821 .type	gcm_gmult_clmul,@function
    822 .align	16
    823 gcm_gmult_clmul:
    824 .L_gmult_clmul:
    825 	movdqu	(%rdi),%xmm0
    826 	movdqa	.Lbswap_mask(%rip),%xmm5
    827 	movdqu	(%rsi),%xmm2
    828 	movdqu	32(%rsi),%xmm4
    829 .byte	102,15,56,0,197
    830 	movdqa	%xmm0,%xmm1
    831 	pshufd	$78,%xmm0,%xmm3
    832 	pxor	%xmm0,%xmm3
    833 .byte	102,15,58,68,194,0
    834 .byte	102,15,58,68,202,17
    835 .byte	102,15,58,68,220,0
    836 	pxor	%xmm0,%xmm3
    837 	pxor	%xmm1,%xmm3
    838 
    839 	movdqa	%xmm3,%xmm4
    840 	psrldq	$8,%xmm3
    841 	pslldq	$8,%xmm4
    842 	pxor	%xmm3,%xmm1
    843 	pxor	%xmm4,%xmm0
    844 
    845 	movdqa	%xmm0,%xmm4
    846 	movdqa	%xmm0,%xmm3
    847 	psllq	$5,%xmm0
    848 	pxor	%xmm0,%xmm3
    849 	psllq	$1,%xmm0
    850 	pxor	%xmm3,%xmm0
    851 	psllq	$57,%xmm0
    852 	movdqa	%xmm0,%xmm3
    853 	pslldq	$8,%xmm0
    854 	psrldq	$8,%xmm3
    855 	pxor	%xmm4,%xmm0
    856 	pxor	%xmm3,%xmm1
    857 
    858 
    859 	movdqa	%xmm0,%xmm4
    860 	psrlq	$1,%xmm0
    861 	pxor	%xmm4,%xmm1
    862 	pxor	%xmm0,%xmm4
    863 	psrlq	$5,%xmm0
    864 	pxor	%xmm4,%xmm0
    865 	psrlq	$1,%xmm0
    866 	pxor	%xmm1,%xmm0
    867 .byte	102,15,56,0,197
    868 	movdqu	%xmm0,(%rdi)
    869 	.byte	0xf3,0xc3
    870 .size	gcm_gmult_clmul,.-gcm_gmult_clmul
    871 .globl	gcm_ghash_clmul
    872 .hidden gcm_ghash_clmul
    873 .type	gcm_ghash_clmul,@function
    874 .align	32
    875 gcm_ghash_clmul:
    876 .L_ghash_clmul:
    877 	movdqa	.Lbswap_mask(%rip),%xmm10
    878 
    879 	movdqu	(%rdi),%xmm0
    880 	movdqu	(%rsi),%xmm2
    881 	movdqu	32(%rsi),%xmm7
    882 .byte	102,65,15,56,0,194
    883 
    884 	subq	$16,%rcx
    885 	jz	.Lodd_tail
    886 
    887 	movdqu	16(%rsi),%xmm6
    888 	movl	OPENSSL_ia32cap_P+4(%rip),%eax
    889 	cmpq	$48,%rcx
    890 	jb	.Lskip4x
    891 
    892 	andl	$71303168,%eax
    893 	cmpl	$4194304,%eax
    894 	je	.Lskip4x
    895 
    896 	subq	$48,%rcx
    897 	movq	$11547335547999543296,%rax
    898 	movdqu	48(%rsi),%xmm14
    899 	movdqu	64(%rsi),%xmm15
    900 
    901 
    902 
    903 
    904 	movdqu	48(%rdx),%xmm3
    905 	movdqu	32(%rdx),%xmm11
    906 .byte	102,65,15,56,0,218
    907 .byte	102,69,15,56,0,218
    908 	movdqa	%xmm3,%xmm5
    909 	pshufd	$78,%xmm3,%xmm4
    910 	pxor	%xmm3,%xmm4
    911 .byte	102,15,58,68,218,0
    912 .byte	102,15,58,68,234,17
    913 .byte	102,15,58,68,231,0
    914 
    915 	movdqa	%xmm11,%xmm13
    916 	pshufd	$78,%xmm11,%xmm12
    917 	pxor	%xmm11,%xmm12
    918 .byte	102,68,15,58,68,222,0
    919 .byte	102,68,15,58,68,238,17
    920 .byte	102,68,15,58,68,231,16
    921 	xorps	%xmm11,%xmm3
    922 	xorps	%xmm13,%xmm5
    923 	movups	80(%rsi),%xmm7
    924 	xorps	%xmm12,%xmm4
    925 
    926 	movdqu	16(%rdx),%xmm11
    927 	movdqu	0(%rdx),%xmm8
    928 .byte	102,69,15,56,0,218
    929 .byte	102,69,15,56,0,194
    930 	movdqa	%xmm11,%xmm13
    931 	pshufd	$78,%xmm11,%xmm12
    932 	pxor	%xmm8,%xmm0
    933 	pxor	%xmm11,%xmm12
    934 .byte	102,69,15,58,68,222,0
    935 	movdqa	%xmm0,%xmm1
    936 	pshufd	$78,%xmm0,%xmm8
    937 	pxor	%xmm0,%xmm8
    938 .byte	102,69,15,58,68,238,17
    939 .byte	102,68,15,58,68,231,0
    940 	xorps	%xmm11,%xmm3
    941 	xorps	%xmm13,%xmm5
    942 
    943 	leaq	64(%rdx),%rdx
    944 	subq	$64,%rcx
    945 	jc	.Ltail4x
    946 
    947 	jmp	.Lmod4_loop
    948 .align	32
    949 .Lmod4_loop:
    950 .byte	102,65,15,58,68,199,0
    951 	xorps	%xmm12,%xmm4
    952 	movdqu	48(%rdx),%xmm11
    953 .byte	102,69,15,56,0,218
    954 .byte	102,65,15,58,68,207,17
    955 	xorps	%xmm3,%xmm0
    956 	movdqu	32(%rdx),%xmm3
    957 	movdqa	%xmm11,%xmm13
    958 .byte	102,68,15,58,68,199,16
    959 	pshufd	$78,%xmm11,%xmm12
    960 	xorps	%xmm5,%xmm1
    961 	pxor	%xmm11,%xmm12
    962 .byte	102,65,15,56,0,218
    963 	movups	32(%rsi),%xmm7
    964 	xorps	%xmm4,%xmm8
    965 .byte	102,68,15,58,68,218,0
    966 	pshufd	$78,%xmm3,%xmm4
    967 
    968 	pxor	%xmm0,%xmm8
    969 	movdqa	%xmm3,%xmm5
    970 	pxor	%xmm1,%xmm8
    971 	pxor	%xmm3,%xmm4
    972 	movdqa	%xmm8,%xmm9
    973 .byte	102,68,15,58,68,234,17
    974 	pslldq	$8,%xmm8
    975 	psrldq	$8,%xmm9
    976 	pxor	%xmm8,%xmm0
    977 	movdqa	.L7_mask(%rip),%xmm8
    978 	pxor	%xmm9,%xmm1
    979 .byte	102,76,15,110,200
    980 
    981 	pand	%xmm0,%xmm8
    982 .byte	102,69,15,56,0,200
    983 	pxor	%xmm0,%xmm9
    984 .byte	102,68,15,58,68,231,0
    985 	psllq	$57,%xmm9
    986 	movdqa	%xmm9,%xmm8
    987 	pslldq	$8,%xmm9
    988 .byte	102,15,58,68,222,0
    989 	psrldq	$8,%xmm8
    990 	pxor	%xmm9,%xmm0
    991 	pxor	%xmm8,%xmm1
    992 	movdqu	0(%rdx),%xmm8
    993 
    994 	movdqa	%xmm0,%xmm9
    995 	psrlq	$1,%xmm0
    996 .byte	102,15,58,68,238,17
    997 	xorps	%xmm11,%xmm3
    998 	movdqu	16(%rdx),%xmm11
    999 .byte	102,69,15,56,0,218
   1000 .byte	102,15,58,68,231,16
   1001 	xorps	%xmm13,%xmm5
   1002 	movups	80(%rsi),%xmm7
   1003 .byte	102,69,15,56,0,194
   1004 	pxor	%xmm9,%xmm1
   1005 	pxor	%xmm0,%xmm9
   1006 	psrlq	$5,%xmm0
   1007 
   1008 	movdqa	%xmm11,%xmm13
   1009 	pxor	%xmm12,%xmm4
   1010 	pshufd	$78,%xmm11,%xmm12
   1011 	pxor	%xmm9,%xmm0
   1012 	pxor	%xmm8,%xmm1
   1013 	pxor	%xmm11,%xmm12
   1014 .byte	102,69,15,58,68,222,0
   1015 	psrlq	$1,%xmm0
   1016 	pxor	%xmm1,%xmm0
   1017 	movdqa	%xmm0,%xmm1
   1018 .byte	102,69,15,58,68,238,17
   1019 	xorps	%xmm11,%xmm3
   1020 	pshufd	$78,%xmm0,%xmm8
   1021 	pxor	%xmm0,%xmm8
   1022 
   1023 .byte	102,68,15,58,68,231,0
   1024 	xorps	%xmm13,%xmm5
   1025 
   1026 	leaq	64(%rdx),%rdx
   1027 	subq	$64,%rcx
   1028 	jnc	.Lmod4_loop
   1029 
   1030 .Ltail4x:
   1031 .byte	102,65,15,58,68,199,0
   1032 .byte	102,65,15,58,68,207,17
   1033 .byte	102,68,15,58,68,199,16
   1034 	xorps	%xmm12,%xmm4
   1035 	xorps	%xmm3,%xmm0
   1036 	xorps	%xmm5,%xmm1
   1037 	pxor	%xmm0,%xmm1
   1038 	pxor	%xmm4,%xmm8
   1039 
   1040 	pxor	%xmm1,%xmm8
   1041 	pxor	%xmm0,%xmm1
   1042 
   1043 	movdqa	%xmm8,%xmm9
   1044 	psrldq	$8,%xmm8
   1045 	pslldq	$8,%xmm9
   1046 	pxor	%xmm8,%xmm1
   1047 	pxor	%xmm9,%xmm0
   1048 
   1049 	movdqa	%xmm0,%xmm4
   1050 	movdqa	%xmm0,%xmm3
   1051 	psllq	$5,%xmm0
   1052 	pxor	%xmm0,%xmm3
   1053 	psllq	$1,%xmm0
   1054 	pxor	%xmm3,%xmm0
   1055 	psllq	$57,%xmm0
   1056 	movdqa	%xmm0,%xmm3
   1057 	pslldq	$8,%xmm0
   1058 	psrldq	$8,%xmm3
   1059 	pxor	%xmm4,%xmm0
   1060 	pxor	%xmm3,%xmm1
   1061 
   1062 
   1063 	movdqa	%xmm0,%xmm4
   1064 	psrlq	$1,%xmm0
   1065 	pxor	%xmm4,%xmm1
   1066 	pxor	%xmm0,%xmm4
   1067 	psrlq	$5,%xmm0
   1068 	pxor	%xmm4,%xmm0
   1069 	psrlq	$1,%xmm0
   1070 	pxor	%xmm1,%xmm0
   1071 	addq	$64,%rcx
   1072 	jz	.Ldone
   1073 	movdqu	32(%rsi),%xmm7
   1074 	subq	$16,%rcx
   1075 	jz	.Lodd_tail
   1076 .Lskip4x:
   1077 
   1078 
   1079 
   1080 
   1081 
   1082 	movdqu	(%rdx),%xmm8
   1083 	movdqu	16(%rdx),%xmm3
   1084 .byte	102,69,15,56,0,194
   1085 .byte	102,65,15,56,0,218
   1086 	pxor	%xmm8,%xmm0
   1087 
   1088 	movdqa	%xmm3,%xmm5
   1089 	pshufd	$78,%xmm3,%xmm4
   1090 	pxor	%xmm3,%xmm4
   1091 .byte	102,15,58,68,218,0
   1092 .byte	102,15,58,68,234,17
   1093 .byte	102,15,58,68,231,0
   1094 
   1095 	leaq	32(%rdx),%rdx
   1096 	nop
   1097 	subq	$32,%rcx
   1098 	jbe	.Leven_tail
   1099 	nop
   1100 	jmp	.Lmod_loop
   1101 
   1102 .align	32
   1103 .Lmod_loop:
   1104 	movdqa	%xmm0,%xmm1
   1105 	movdqa	%xmm4,%xmm8
   1106 	pshufd	$78,%xmm0,%xmm4
   1107 	pxor	%xmm0,%xmm4
   1108 
   1109 .byte	102,15,58,68,198,0
   1110 .byte	102,15,58,68,206,17
   1111 .byte	102,15,58,68,231,16
   1112 
   1113 	pxor	%xmm3,%xmm0
   1114 	pxor	%xmm5,%xmm1
   1115 	movdqu	(%rdx),%xmm9
   1116 	pxor	%xmm0,%xmm8
   1117 .byte	102,69,15,56,0,202
   1118 	movdqu	16(%rdx),%xmm3
   1119 
   1120 	pxor	%xmm1,%xmm8
   1121 	pxor	%xmm9,%xmm1
   1122 	pxor	%xmm8,%xmm4
   1123 .byte	102,65,15,56,0,218
   1124 	movdqa	%xmm4,%xmm8
   1125 	psrldq	$8,%xmm8
   1126 	pslldq	$8,%xmm4
   1127 	pxor	%xmm8,%xmm1
   1128 	pxor	%xmm4,%xmm0
   1129 
   1130 	movdqa	%xmm3,%xmm5
   1131 
   1132 	movdqa	%xmm0,%xmm9
   1133 	movdqa	%xmm0,%xmm8
   1134 	psllq	$5,%xmm0
   1135 	pxor	%xmm0,%xmm8
   1136 .byte	102,15,58,68,218,0
   1137 	psllq	$1,%xmm0
   1138 	pxor	%xmm8,%xmm0
   1139 	psllq	$57,%xmm0
   1140 	movdqa	%xmm0,%xmm8
   1141 	pslldq	$8,%xmm0
   1142 	psrldq	$8,%xmm8
   1143 	pxor	%xmm9,%xmm0
   1144 	pshufd	$78,%xmm5,%xmm4
   1145 	pxor	%xmm8,%xmm1
   1146 	pxor	%xmm5,%xmm4
   1147 
   1148 	movdqa	%xmm0,%xmm9
   1149 	psrlq	$1,%xmm0
   1150 .byte	102,15,58,68,234,17
   1151 	pxor	%xmm9,%xmm1
   1152 	pxor	%xmm0,%xmm9
   1153 	psrlq	$5,%xmm0
   1154 	pxor	%xmm9,%xmm0
   1155 	leaq	32(%rdx),%rdx
   1156 	psrlq	$1,%xmm0
   1157 .byte	102,15,58,68,231,0
   1158 	pxor	%xmm1,%xmm0
   1159 
   1160 	subq	$32,%rcx
   1161 	ja	.Lmod_loop
   1162 
   1163 .Leven_tail:
   1164 	movdqa	%xmm0,%xmm1
   1165 	movdqa	%xmm4,%xmm8
   1166 	pshufd	$78,%xmm0,%xmm4
   1167 	pxor	%xmm0,%xmm4
   1168 
   1169 .byte	102,15,58,68,198,0
   1170 .byte	102,15,58,68,206,17
   1171 .byte	102,15,58,68,231,16
   1172 
   1173 	pxor	%xmm3,%xmm0
   1174 	pxor	%xmm5,%xmm1
   1175 	pxor	%xmm0,%xmm8
   1176 	pxor	%xmm1,%xmm8
   1177 	pxor	%xmm8,%xmm4
   1178 	movdqa	%xmm4,%xmm8
   1179 	psrldq	$8,%xmm8
   1180 	pslldq	$8,%xmm4
   1181 	pxor	%xmm8,%xmm1
   1182 	pxor	%xmm4,%xmm0
   1183 
   1184 	movdqa	%xmm0,%xmm4
   1185 	movdqa	%xmm0,%xmm3
   1186 	psllq	$5,%xmm0
   1187 	pxor	%xmm0,%xmm3
   1188 	psllq	$1,%xmm0
   1189 	pxor	%xmm3,%xmm0
   1190 	psllq	$57,%xmm0
   1191 	movdqa	%xmm0,%xmm3
   1192 	pslldq	$8,%xmm0
   1193 	psrldq	$8,%xmm3
   1194 	pxor	%xmm4,%xmm0
   1195 	pxor	%xmm3,%xmm1
   1196 
   1197 
   1198 	movdqa	%xmm0,%xmm4
   1199 	psrlq	$1,%xmm0
   1200 	pxor	%xmm4,%xmm1
   1201 	pxor	%xmm0,%xmm4
   1202 	psrlq	$5,%xmm0
   1203 	pxor	%xmm4,%xmm0
   1204 	psrlq	$1,%xmm0
   1205 	pxor	%xmm1,%xmm0
   1206 	testq	%rcx,%rcx
   1207 	jnz	.Ldone
   1208 
   1209 .Lodd_tail:
   1210 	movdqu	(%rdx),%xmm8
   1211 .byte	102,69,15,56,0,194
   1212 	pxor	%xmm8,%xmm0
   1213 	movdqa	%xmm0,%xmm1
   1214 	pshufd	$78,%xmm0,%xmm3
   1215 	pxor	%xmm0,%xmm3
   1216 .byte	102,15,58,68,194,0
   1217 .byte	102,15,58,68,202,17
   1218 .byte	102,15,58,68,223,0
   1219 	pxor	%xmm0,%xmm3
   1220 	pxor	%xmm1,%xmm3
   1221 
   1222 	movdqa	%xmm3,%xmm4
   1223 	psrldq	$8,%xmm3
   1224 	pslldq	$8,%xmm4
   1225 	pxor	%xmm3,%xmm1
   1226 	pxor	%xmm4,%xmm0
   1227 
   1228 	movdqa	%xmm0,%xmm4
   1229 	movdqa	%xmm0,%xmm3
   1230 	psllq	$5,%xmm0
   1231 	pxor	%xmm0,%xmm3
   1232 	psllq	$1,%xmm0
   1233 	pxor	%xmm3,%xmm0
   1234 	psllq	$57,%xmm0
   1235 	movdqa	%xmm0,%xmm3
   1236 	pslldq	$8,%xmm0
   1237 	psrldq	$8,%xmm3
   1238 	pxor	%xmm4,%xmm0
   1239 	pxor	%xmm3,%xmm1
   1240 
   1241 
   1242 	movdqa	%xmm0,%xmm4
   1243 	psrlq	$1,%xmm0
   1244 	pxor	%xmm4,%xmm1
   1245 	pxor	%xmm0,%xmm4
   1246 	psrlq	$5,%xmm0
   1247 	pxor	%xmm4,%xmm0
   1248 	psrlq	$1,%xmm0
   1249 	pxor	%xmm1,%xmm0
   1250 .Ldone:
   1251 .byte	102,65,15,56,0,194
   1252 	movdqu	%xmm0,(%rdi)
   1253 	.byte	0xf3,0xc3
   1254 .size	gcm_ghash_clmul,.-gcm_ghash_clmul
   1255 .globl	gcm_init_avx
   1256 .hidden gcm_init_avx
   1257 .type	gcm_init_avx,@function
   1258 .align	32
   1259 gcm_init_avx:
   1260 	jmp	.L_init_clmul
   1261 .size	gcm_init_avx,.-gcm_init_avx
   1262 .globl	gcm_gmult_avx
   1263 .hidden gcm_gmult_avx
   1264 .type	gcm_gmult_avx,@function
   1265 .align	32
   1266 gcm_gmult_avx:
   1267 	jmp	.L_gmult_clmul
   1268 .size	gcm_gmult_avx,.-gcm_gmult_avx
   1269 .globl	gcm_ghash_avx
   1270 .hidden gcm_ghash_avx
   1271 .type	gcm_ghash_avx,@function
   1272 .align	32
   1273 gcm_ghash_avx:
   1274 	jmp	.L_ghash_clmul
   1275 .size	gcm_ghash_avx,.-gcm_ghash_avx
   1276 .align	64
   1277 .Lbswap_mask:
   1278 .byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
   1279 .L0x1c2_polynomial:
   1280 .byte	1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
   1281 .L7_mask:
   1282 .long	7,0,7,0
   1283 .L7_mask_poly:
   1284 .long	7,0,450,0
   1285 .align	64
   1286 .type	.Lrem_4bit,@object
   1287 .Lrem_4bit:
   1288 .long	0,0,0,471859200,0,943718400,0,610271232
   1289 .long	0,1887436800,0,1822425088,0,1220542464,0,1423966208
   1290 .long	0,3774873600,0,4246732800,0,3644850176,0,3311403008
   1291 .long	0,2441084928,0,2376073216,0,2847932416,0,3051356160
   1292 .type	.Lrem_8bit,@object
   1293 .Lrem_8bit:
   1294 .value	0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E
   1295 .value	0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E
   1296 .value	0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E
   1297 .value	0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E
   1298 .value	0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E
   1299 .value	0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E
   1300 .value	0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E
   1301 .value	0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E
   1302 .value	0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE
   1303 .value	0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE
   1304 .value	0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE
   1305 .value	0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE
   1306 .value	0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E
   1307 .value	0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E
   1308 .value	0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE
   1309 .value	0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE
   1310 .value	0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E
   1311 .value	0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E
   1312 .value	0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E
   1313 .value	0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E
   1314 .value	0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E
   1315 .value	0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E
   1316 .value	0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E
   1317 .value	0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E
   1318 .value	0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE
   1319 .value	0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE
   1320 .value	0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE
   1321 .value	0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE
   1322 .value	0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E
   1323 .value	0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E
   1324 .value	0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE
   1325 .value	0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE
   1326 
   1327 .byte	71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
   1328 .align	64
   1329 #endif
   1330