Home | History | Annotate | Download | only in fipsmodule
      1 #if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
      2 .text
      3 .extern	OPENSSL_ia32cap_P
      4 .hidden OPENSSL_ia32cap_P
      5 
      6 .globl	gcm_gmult_4bit
      7 .hidden gcm_gmult_4bit
      8 .type	gcm_gmult_4bit,@function
      9 .align	16
     10 gcm_gmult_4bit:
     11 	pushq	%rbx
     12 	pushq	%rbp
     13 	pushq	%r12
     14 	pushq	%r13
     15 	pushq	%r14
     16 	pushq	%r15
     17 	subq	$280,%rsp
     18 .Lgmult_prologue:
     19 
     20 	movzbq	15(%rdi),%r8
     21 	leaq	.Lrem_4bit(%rip),%r11
     22 	xorq	%rax,%rax
     23 	xorq	%rbx,%rbx
     24 	movb	%r8b,%al
     25 	movb	%r8b,%bl
     26 	shlb	$4,%al
     27 	movq	$14,%rcx
     28 	movq	8(%rsi,%rax,1),%r8
     29 	movq	(%rsi,%rax,1),%r9
     30 	andb	$0xf0,%bl
     31 	movq	%r8,%rdx
     32 	jmp	.Loop1
     33 
     34 .align	16
     35 .Loop1:
     36 	shrq	$4,%r8
     37 	andq	$0xf,%rdx
     38 	movq	%r9,%r10
     39 	movb	(%rdi,%rcx,1),%al
     40 	shrq	$4,%r9
     41 	xorq	8(%rsi,%rbx,1),%r8
     42 	shlq	$60,%r10
     43 	xorq	(%rsi,%rbx,1),%r9
     44 	movb	%al,%bl
     45 	xorq	(%r11,%rdx,8),%r9
     46 	movq	%r8,%rdx
     47 	shlb	$4,%al
     48 	xorq	%r10,%r8
     49 	decq	%rcx
     50 	js	.Lbreak1
     51 
     52 	shrq	$4,%r8
     53 	andq	$0xf,%rdx
     54 	movq	%r9,%r10
     55 	shrq	$4,%r9
     56 	xorq	8(%rsi,%rax,1),%r8
     57 	shlq	$60,%r10
     58 	xorq	(%rsi,%rax,1),%r9
     59 	andb	$0xf0,%bl
     60 	xorq	(%r11,%rdx,8),%r9
     61 	movq	%r8,%rdx
     62 	xorq	%r10,%r8
     63 	jmp	.Loop1
     64 
     65 .align	16
     66 .Lbreak1:
     67 	shrq	$4,%r8
     68 	andq	$0xf,%rdx
     69 	movq	%r9,%r10
     70 	shrq	$4,%r9
     71 	xorq	8(%rsi,%rax,1),%r8
     72 	shlq	$60,%r10
     73 	xorq	(%rsi,%rax,1),%r9
     74 	andb	$0xf0,%bl
     75 	xorq	(%r11,%rdx,8),%r9
     76 	movq	%r8,%rdx
     77 	xorq	%r10,%r8
     78 
     79 	shrq	$4,%r8
     80 	andq	$0xf,%rdx
     81 	movq	%r9,%r10
     82 	shrq	$4,%r9
     83 	xorq	8(%rsi,%rbx,1),%r8
     84 	shlq	$60,%r10
     85 	xorq	(%rsi,%rbx,1),%r9
     86 	xorq	%r10,%r8
     87 	xorq	(%r11,%rdx,8),%r9
     88 
     89 	bswapq	%r8
     90 	bswapq	%r9
     91 	movq	%r8,8(%rdi)
     92 	movq	%r9,(%rdi)
     93 
     94 	leaq	280+48(%rsp),%rsi
     95 	movq	-8(%rsi),%rbx
     96 	leaq	(%rsi),%rsp
     97 .Lgmult_epilogue:
     98 	.byte	0xf3,0xc3
     99 .size	gcm_gmult_4bit,.-gcm_gmult_4bit
    100 .globl	gcm_ghash_4bit
    101 .hidden gcm_ghash_4bit
    102 .type	gcm_ghash_4bit,@function
    103 .align	16
    104 gcm_ghash_4bit:
    105 	pushq	%rbx
    106 	pushq	%rbp
    107 	pushq	%r12
    108 	pushq	%r13
    109 	pushq	%r14
    110 	pushq	%r15
    111 	subq	$280,%rsp
    112 .Lghash_prologue:
    113 	movq	%rdx,%r14
    114 	movq	%rcx,%r15
    115 	subq	$-128,%rsi
    116 	leaq	16+128(%rsp),%rbp
    117 	xorl	%edx,%edx
    118 	movq	0+0-128(%rsi),%r8
    119 	movq	0+8-128(%rsi),%rax
    120 	movb	%al,%dl
    121 	shrq	$4,%rax
    122 	movq	%r8,%r10
    123 	shrq	$4,%r8
    124 	movq	16+0-128(%rsi),%r9
    125 	shlb	$4,%dl
    126 	movq	16+8-128(%rsi),%rbx
    127 	shlq	$60,%r10
    128 	movb	%dl,0(%rsp)
    129 	orq	%r10,%rax
    130 	movb	%bl,%dl
    131 	shrq	$4,%rbx
    132 	movq	%r9,%r10
    133 	shrq	$4,%r9
    134 	movq	%r8,0(%rbp)
    135 	movq	32+0-128(%rsi),%r8
    136 	shlb	$4,%dl
    137 	movq	%rax,0-128(%rbp)
    138 	movq	32+8-128(%rsi),%rax
    139 	shlq	$60,%r10
    140 	movb	%dl,1(%rsp)
    141 	orq	%r10,%rbx
    142 	movb	%al,%dl
    143 	shrq	$4,%rax
    144 	movq	%r8,%r10
    145 	shrq	$4,%r8
    146 	movq	%r9,8(%rbp)
    147 	movq	48+0-128(%rsi),%r9
    148 	shlb	$4,%dl
    149 	movq	%rbx,8-128(%rbp)
    150 	movq	48+8-128(%rsi),%rbx
    151 	shlq	$60,%r10
    152 	movb	%dl,2(%rsp)
    153 	orq	%r10,%rax
    154 	movb	%bl,%dl
    155 	shrq	$4,%rbx
    156 	movq	%r9,%r10
    157 	shrq	$4,%r9
    158 	movq	%r8,16(%rbp)
    159 	movq	64+0-128(%rsi),%r8
    160 	shlb	$4,%dl
    161 	movq	%rax,16-128(%rbp)
    162 	movq	64+8-128(%rsi),%rax
    163 	shlq	$60,%r10
    164 	movb	%dl,3(%rsp)
    165 	orq	%r10,%rbx
    166 	movb	%al,%dl
    167 	shrq	$4,%rax
    168 	movq	%r8,%r10
    169 	shrq	$4,%r8
    170 	movq	%r9,24(%rbp)
    171 	movq	80+0-128(%rsi),%r9
    172 	shlb	$4,%dl
    173 	movq	%rbx,24-128(%rbp)
    174 	movq	80+8-128(%rsi),%rbx
    175 	shlq	$60,%r10
    176 	movb	%dl,4(%rsp)
    177 	orq	%r10,%rax
    178 	movb	%bl,%dl
    179 	shrq	$4,%rbx
    180 	movq	%r9,%r10
    181 	shrq	$4,%r9
    182 	movq	%r8,32(%rbp)
    183 	movq	96+0-128(%rsi),%r8
    184 	shlb	$4,%dl
    185 	movq	%rax,32-128(%rbp)
    186 	movq	96+8-128(%rsi),%rax
    187 	shlq	$60,%r10
    188 	movb	%dl,5(%rsp)
    189 	orq	%r10,%rbx
    190 	movb	%al,%dl
    191 	shrq	$4,%rax
    192 	movq	%r8,%r10
    193 	shrq	$4,%r8
    194 	movq	%r9,40(%rbp)
    195 	movq	112+0-128(%rsi),%r9
    196 	shlb	$4,%dl
    197 	movq	%rbx,40-128(%rbp)
    198 	movq	112+8-128(%rsi),%rbx
    199 	shlq	$60,%r10
    200 	movb	%dl,6(%rsp)
    201 	orq	%r10,%rax
    202 	movb	%bl,%dl
    203 	shrq	$4,%rbx
    204 	movq	%r9,%r10
    205 	shrq	$4,%r9
    206 	movq	%r8,48(%rbp)
    207 	movq	128+0-128(%rsi),%r8
    208 	shlb	$4,%dl
    209 	movq	%rax,48-128(%rbp)
    210 	movq	128+8-128(%rsi),%rax
    211 	shlq	$60,%r10
    212 	movb	%dl,7(%rsp)
    213 	orq	%r10,%rbx
    214 	movb	%al,%dl
    215 	shrq	$4,%rax
    216 	movq	%r8,%r10
    217 	shrq	$4,%r8
    218 	movq	%r9,56(%rbp)
    219 	movq	144+0-128(%rsi),%r9
    220 	shlb	$4,%dl
    221 	movq	%rbx,56-128(%rbp)
    222 	movq	144+8-128(%rsi),%rbx
    223 	shlq	$60,%r10
    224 	movb	%dl,8(%rsp)
    225 	orq	%r10,%rax
    226 	movb	%bl,%dl
    227 	shrq	$4,%rbx
    228 	movq	%r9,%r10
    229 	shrq	$4,%r9
    230 	movq	%r8,64(%rbp)
    231 	movq	160+0-128(%rsi),%r8
    232 	shlb	$4,%dl
    233 	movq	%rax,64-128(%rbp)
    234 	movq	160+8-128(%rsi),%rax
    235 	shlq	$60,%r10
    236 	movb	%dl,9(%rsp)
    237 	orq	%r10,%rbx
    238 	movb	%al,%dl
    239 	shrq	$4,%rax
    240 	movq	%r8,%r10
    241 	shrq	$4,%r8
    242 	movq	%r9,72(%rbp)
    243 	movq	176+0-128(%rsi),%r9
    244 	shlb	$4,%dl
    245 	movq	%rbx,72-128(%rbp)
    246 	movq	176+8-128(%rsi),%rbx
    247 	shlq	$60,%r10
    248 	movb	%dl,10(%rsp)
    249 	orq	%r10,%rax
    250 	movb	%bl,%dl
    251 	shrq	$4,%rbx
    252 	movq	%r9,%r10
    253 	shrq	$4,%r9
    254 	movq	%r8,80(%rbp)
    255 	movq	192+0-128(%rsi),%r8
    256 	shlb	$4,%dl
    257 	movq	%rax,80-128(%rbp)
    258 	movq	192+8-128(%rsi),%rax
    259 	shlq	$60,%r10
    260 	movb	%dl,11(%rsp)
    261 	orq	%r10,%rbx
    262 	movb	%al,%dl
    263 	shrq	$4,%rax
    264 	movq	%r8,%r10
    265 	shrq	$4,%r8
    266 	movq	%r9,88(%rbp)
    267 	movq	208+0-128(%rsi),%r9
    268 	shlb	$4,%dl
    269 	movq	%rbx,88-128(%rbp)
    270 	movq	208+8-128(%rsi),%rbx
    271 	shlq	$60,%r10
    272 	movb	%dl,12(%rsp)
    273 	orq	%r10,%rax
    274 	movb	%bl,%dl
    275 	shrq	$4,%rbx
    276 	movq	%r9,%r10
    277 	shrq	$4,%r9
    278 	movq	%r8,96(%rbp)
    279 	movq	224+0-128(%rsi),%r8
    280 	shlb	$4,%dl
    281 	movq	%rax,96-128(%rbp)
    282 	movq	224+8-128(%rsi),%rax
    283 	shlq	$60,%r10
    284 	movb	%dl,13(%rsp)
    285 	orq	%r10,%rbx
    286 	movb	%al,%dl
    287 	shrq	$4,%rax
    288 	movq	%r8,%r10
    289 	shrq	$4,%r8
    290 	movq	%r9,104(%rbp)
    291 	movq	240+0-128(%rsi),%r9
    292 	shlb	$4,%dl
    293 	movq	%rbx,104-128(%rbp)
    294 	movq	240+8-128(%rsi),%rbx
    295 	shlq	$60,%r10
    296 	movb	%dl,14(%rsp)
    297 	orq	%r10,%rax
    298 	movb	%bl,%dl
    299 	shrq	$4,%rbx
    300 	movq	%r9,%r10
    301 	shrq	$4,%r9
    302 	movq	%r8,112(%rbp)
    303 	shlb	$4,%dl
    304 	movq	%rax,112-128(%rbp)
    305 	shlq	$60,%r10
    306 	movb	%dl,15(%rsp)
    307 	orq	%r10,%rbx
    308 	movq	%r9,120(%rbp)
    309 	movq	%rbx,120-128(%rbp)
    310 	addq	$-128,%rsi
    311 	movq	8(%rdi),%r8
    312 	movq	0(%rdi),%r9
    313 	addq	%r14,%r15
    314 	leaq	.Lrem_8bit(%rip),%r11
    315 	jmp	.Louter_loop
    316 .align	16
    317 .Louter_loop:
    318 	xorq	(%r14),%r9
    319 	movq	8(%r14),%rdx
    320 	leaq	16(%r14),%r14
    321 	xorq	%r8,%rdx
    322 	movq	%r9,(%rdi)
    323 	movq	%rdx,8(%rdi)
    324 	shrq	$32,%rdx
    325 	xorq	%rax,%rax
    326 	roll	$8,%edx
    327 	movb	%dl,%al
    328 	movzbl	%dl,%ebx
    329 	shlb	$4,%al
    330 	shrl	$4,%ebx
    331 	roll	$8,%edx
    332 	movq	8(%rsi,%rax,1),%r8
    333 	movq	(%rsi,%rax,1),%r9
    334 	movb	%dl,%al
    335 	movzbl	%dl,%ecx
    336 	shlb	$4,%al
    337 	movzbq	(%rsp,%rbx,1),%r12
    338 	shrl	$4,%ecx
    339 	xorq	%r8,%r12
    340 	movq	%r9,%r10
    341 	shrq	$8,%r8
    342 	movzbq	%r12b,%r12
    343 	shrq	$8,%r9
    344 	xorq	-128(%rbp,%rbx,8),%r8
    345 	shlq	$56,%r10
    346 	xorq	(%rbp,%rbx,8),%r9
    347 	roll	$8,%edx
    348 	xorq	8(%rsi,%rax,1),%r8
    349 	xorq	(%rsi,%rax,1),%r9
    350 	movb	%dl,%al
    351 	xorq	%r10,%r8
    352 	movzwq	(%r11,%r12,2),%r12
    353 	movzbl	%dl,%ebx
    354 	shlb	$4,%al
    355 	movzbq	(%rsp,%rcx,1),%r13
    356 	shrl	$4,%ebx
    357 	shlq	$48,%r12
    358 	xorq	%r8,%r13
    359 	movq	%r9,%r10
    360 	xorq	%r12,%r9
    361 	shrq	$8,%r8
    362 	movzbq	%r13b,%r13
    363 	shrq	$8,%r9
    364 	xorq	-128(%rbp,%rcx,8),%r8
    365 	shlq	$56,%r10
    366 	xorq	(%rbp,%rcx,8),%r9
    367 	roll	$8,%edx
    368 	xorq	8(%rsi,%rax,1),%r8
    369 	xorq	(%rsi,%rax,1),%r9
    370 	movb	%dl,%al
    371 	xorq	%r10,%r8
    372 	movzwq	(%r11,%r13,2),%r13
    373 	movzbl	%dl,%ecx
    374 	shlb	$4,%al
    375 	movzbq	(%rsp,%rbx,1),%r12
    376 	shrl	$4,%ecx
    377 	shlq	$48,%r13
    378 	xorq	%r8,%r12
    379 	movq	%r9,%r10
    380 	xorq	%r13,%r9
    381 	shrq	$8,%r8
    382 	movzbq	%r12b,%r12
    383 	movl	8(%rdi),%edx
    384 	shrq	$8,%r9
    385 	xorq	-128(%rbp,%rbx,8),%r8
    386 	shlq	$56,%r10
    387 	xorq	(%rbp,%rbx,8),%r9
    388 	roll	$8,%edx
    389 	xorq	8(%rsi,%rax,1),%r8
    390 	xorq	(%rsi,%rax,1),%r9
    391 	movb	%dl,%al
    392 	xorq	%r10,%r8
    393 	movzwq	(%r11,%r12,2),%r12
    394 	movzbl	%dl,%ebx
    395 	shlb	$4,%al
    396 	movzbq	(%rsp,%rcx,1),%r13
    397 	shrl	$4,%ebx
    398 	shlq	$48,%r12
    399 	xorq	%r8,%r13
    400 	movq	%r9,%r10
    401 	xorq	%r12,%r9
    402 	shrq	$8,%r8
    403 	movzbq	%r13b,%r13
    404 	shrq	$8,%r9
    405 	xorq	-128(%rbp,%rcx,8),%r8
    406 	shlq	$56,%r10
    407 	xorq	(%rbp,%rcx,8),%r9
    408 	roll	$8,%edx
    409 	xorq	8(%rsi,%rax,1),%r8
    410 	xorq	(%rsi,%rax,1),%r9
    411 	movb	%dl,%al
    412 	xorq	%r10,%r8
    413 	movzwq	(%r11,%r13,2),%r13
    414 	movzbl	%dl,%ecx
    415 	shlb	$4,%al
    416 	movzbq	(%rsp,%rbx,1),%r12
    417 	shrl	$4,%ecx
    418 	shlq	$48,%r13
    419 	xorq	%r8,%r12
    420 	movq	%r9,%r10
    421 	xorq	%r13,%r9
    422 	shrq	$8,%r8
    423 	movzbq	%r12b,%r12
    424 	shrq	$8,%r9
    425 	xorq	-128(%rbp,%rbx,8),%r8
    426 	shlq	$56,%r10
    427 	xorq	(%rbp,%rbx,8),%r9
    428 	roll	$8,%edx
    429 	xorq	8(%rsi,%rax,1),%r8
    430 	xorq	(%rsi,%rax,1),%r9
    431 	movb	%dl,%al
    432 	xorq	%r10,%r8
    433 	movzwq	(%r11,%r12,2),%r12
    434 	movzbl	%dl,%ebx
    435 	shlb	$4,%al
    436 	movzbq	(%rsp,%rcx,1),%r13
    437 	shrl	$4,%ebx
    438 	shlq	$48,%r12
    439 	xorq	%r8,%r13
    440 	movq	%r9,%r10
    441 	xorq	%r12,%r9
    442 	shrq	$8,%r8
    443 	movzbq	%r13b,%r13
    444 	shrq	$8,%r9
    445 	xorq	-128(%rbp,%rcx,8),%r8
    446 	shlq	$56,%r10
    447 	xorq	(%rbp,%rcx,8),%r9
    448 	roll	$8,%edx
    449 	xorq	8(%rsi,%rax,1),%r8
    450 	xorq	(%rsi,%rax,1),%r9
    451 	movb	%dl,%al
    452 	xorq	%r10,%r8
    453 	movzwq	(%r11,%r13,2),%r13
    454 	movzbl	%dl,%ecx
    455 	shlb	$4,%al
    456 	movzbq	(%rsp,%rbx,1),%r12
    457 	shrl	$4,%ecx
    458 	shlq	$48,%r13
    459 	xorq	%r8,%r12
    460 	movq	%r9,%r10
    461 	xorq	%r13,%r9
    462 	shrq	$8,%r8
    463 	movzbq	%r12b,%r12
    464 	movl	4(%rdi),%edx
    465 	shrq	$8,%r9
    466 	xorq	-128(%rbp,%rbx,8),%r8
    467 	shlq	$56,%r10
    468 	xorq	(%rbp,%rbx,8),%r9
    469 	roll	$8,%edx
    470 	xorq	8(%rsi,%rax,1),%r8
    471 	xorq	(%rsi,%rax,1),%r9
    472 	movb	%dl,%al
    473 	xorq	%r10,%r8
    474 	movzwq	(%r11,%r12,2),%r12
    475 	movzbl	%dl,%ebx
    476 	shlb	$4,%al
    477 	movzbq	(%rsp,%rcx,1),%r13
    478 	shrl	$4,%ebx
    479 	shlq	$48,%r12
    480 	xorq	%r8,%r13
    481 	movq	%r9,%r10
    482 	xorq	%r12,%r9
    483 	shrq	$8,%r8
    484 	movzbq	%r13b,%r13
    485 	shrq	$8,%r9
    486 	xorq	-128(%rbp,%rcx,8),%r8
    487 	shlq	$56,%r10
    488 	xorq	(%rbp,%rcx,8),%r9
    489 	roll	$8,%edx
    490 	xorq	8(%rsi,%rax,1),%r8
    491 	xorq	(%rsi,%rax,1),%r9
    492 	movb	%dl,%al
    493 	xorq	%r10,%r8
    494 	movzwq	(%r11,%r13,2),%r13
    495 	movzbl	%dl,%ecx
    496 	shlb	$4,%al
    497 	movzbq	(%rsp,%rbx,1),%r12
    498 	shrl	$4,%ecx
    499 	shlq	$48,%r13
    500 	xorq	%r8,%r12
    501 	movq	%r9,%r10
    502 	xorq	%r13,%r9
    503 	shrq	$8,%r8
    504 	movzbq	%r12b,%r12
    505 	shrq	$8,%r9
    506 	xorq	-128(%rbp,%rbx,8),%r8
    507 	shlq	$56,%r10
    508 	xorq	(%rbp,%rbx,8),%r9
    509 	roll	$8,%edx
    510 	xorq	8(%rsi,%rax,1),%r8
    511 	xorq	(%rsi,%rax,1),%r9
    512 	movb	%dl,%al
    513 	xorq	%r10,%r8
    514 	movzwq	(%r11,%r12,2),%r12
    515 	movzbl	%dl,%ebx
    516 	shlb	$4,%al
    517 	movzbq	(%rsp,%rcx,1),%r13
    518 	shrl	$4,%ebx
    519 	shlq	$48,%r12
    520 	xorq	%r8,%r13
    521 	movq	%r9,%r10
    522 	xorq	%r12,%r9
    523 	shrq	$8,%r8
    524 	movzbq	%r13b,%r13
    525 	shrq	$8,%r9
    526 	xorq	-128(%rbp,%rcx,8),%r8
    527 	shlq	$56,%r10
    528 	xorq	(%rbp,%rcx,8),%r9
    529 	roll	$8,%edx
    530 	xorq	8(%rsi,%rax,1),%r8
    531 	xorq	(%rsi,%rax,1),%r9
    532 	movb	%dl,%al
    533 	xorq	%r10,%r8
    534 	movzwq	(%r11,%r13,2),%r13
    535 	movzbl	%dl,%ecx
    536 	shlb	$4,%al
    537 	movzbq	(%rsp,%rbx,1),%r12
    538 	shrl	$4,%ecx
    539 	shlq	$48,%r13
    540 	xorq	%r8,%r12
    541 	movq	%r9,%r10
    542 	xorq	%r13,%r9
    543 	shrq	$8,%r8
    544 	movzbq	%r12b,%r12
    545 	movl	0(%rdi),%edx
    546 	shrq	$8,%r9
    547 	xorq	-128(%rbp,%rbx,8),%r8
    548 	shlq	$56,%r10
    549 	xorq	(%rbp,%rbx,8),%r9
    550 	roll	$8,%edx
    551 	xorq	8(%rsi,%rax,1),%r8
    552 	xorq	(%rsi,%rax,1),%r9
    553 	movb	%dl,%al
    554 	xorq	%r10,%r8
    555 	movzwq	(%r11,%r12,2),%r12
    556 	movzbl	%dl,%ebx
    557 	shlb	$4,%al
    558 	movzbq	(%rsp,%rcx,1),%r13
    559 	shrl	$4,%ebx
    560 	shlq	$48,%r12
    561 	xorq	%r8,%r13
    562 	movq	%r9,%r10
    563 	xorq	%r12,%r9
    564 	shrq	$8,%r8
    565 	movzbq	%r13b,%r13
    566 	shrq	$8,%r9
    567 	xorq	-128(%rbp,%rcx,8),%r8
    568 	shlq	$56,%r10
    569 	xorq	(%rbp,%rcx,8),%r9
    570 	roll	$8,%edx
    571 	xorq	8(%rsi,%rax,1),%r8
    572 	xorq	(%rsi,%rax,1),%r9
    573 	movb	%dl,%al
    574 	xorq	%r10,%r8
    575 	movzwq	(%r11,%r13,2),%r13
    576 	movzbl	%dl,%ecx
    577 	shlb	$4,%al
    578 	movzbq	(%rsp,%rbx,1),%r12
    579 	shrl	$4,%ecx
    580 	shlq	$48,%r13
    581 	xorq	%r8,%r12
    582 	movq	%r9,%r10
    583 	xorq	%r13,%r9
    584 	shrq	$8,%r8
    585 	movzbq	%r12b,%r12
    586 	shrq	$8,%r9
    587 	xorq	-128(%rbp,%rbx,8),%r8
    588 	shlq	$56,%r10
    589 	xorq	(%rbp,%rbx,8),%r9
    590 	roll	$8,%edx
    591 	xorq	8(%rsi,%rax,1),%r8
    592 	xorq	(%rsi,%rax,1),%r9
    593 	movb	%dl,%al
    594 	xorq	%r10,%r8
    595 	movzwq	(%r11,%r12,2),%r12
    596 	movzbl	%dl,%ebx
    597 	shlb	$4,%al
    598 	movzbq	(%rsp,%rcx,1),%r13
    599 	shrl	$4,%ebx
    600 	shlq	$48,%r12
    601 	xorq	%r8,%r13
    602 	movq	%r9,%r10
    603 	xorq	%r12,%r9
    604 	shrq	$8,%r8
    605 	movzbq	%r13b,%r13
    606 	shrq	$8,%r9
    607 	xorq	-128(%rbp,%rcx,8),%r8
    608 	shlq	$56,%r10
    609 	xorq	(%rbp,%rcx,8),%r9
    610 	roll	$8,%edx
    611 	xorq	8(%rsi,%rax,1),%r8
    612 	xorq	(%rsi,%rax,1),%r9
    613 	movb	%dl,%al
    614 	xorq	%r10,%r8
    615 	movzwq	(%r11,%r13,2),%r13
    616 	movzbl	%dl,%ecx
    617 	shlb	$4,%al
    618 	movzbq	(%rsp,%rbx,1),%r12
    619 	andl	$240,%ecx
    620 	shlq	$48,%r13
    621 	xorq	%r8,%r12
    622 	movq	%r9,%r10
    623 	xorq	%r13,%r9
    624 	shrq	$8,%r8
    625 	movzbq	%r12b,%r12
    626 	movl	-4(%rdi),%edx
    627 	shrq	$8,%r9
    628 	xorq	-128(%rbp,%rbx,8),%r8
    629 	shlq	$56,%r10
    630 	xorq	(%rbp,%rbx,8),%r9
    631 	movzwq	(%r11,%r12,2),%r12
    632 	xorq	8(%rsi,%rax,1),%r8
    633 	xorq	(%rsi,%rax,1),%r9
    634 	shlq	$48,%r12
    635 	xorq	%r10,%r8
    636 	xorq	%r12,%r9
    637 	movzbq	%r8b,%r13
    638 	shrq	$4,%r8
    639 	movq	%r9,%r10
    640 	shlb	$4,%r13b
    641 	shrq	$4,%r9
    642 	xorq	8(%rsi,%rcx,1),%r8
    643 	movzwq	(%r11,%r13,2),%r13
    644 	shlq	$60,%r10
    645 	xorq	(%rsi,%rcx,1),%r9
    646 	xorq	%r10,%r8
    647 	shlq	$48,%r13
    648 	bswapq	%r8
    649 	xorq	%r13,%r9
    650 	bswapq	%r9
    651 	cmpq	%r15,%r14
    652 	jb	.Louter_loop
    653 	movq	%r8,8(%rdi)
    654 	movq	%r9,(%rdi)
    655 
    656 	leaq	280+48(%rsp),%rsi
    657 	movq	-48(%rsi),%r15
    658 	movq	-40(%rsi),%r14
    659 	movq	-32(%rsi),%r13
    660 	movq	-24(%rsi),%r12
    661 	movq	-16(%rsi),%rbp
    662 	movq	-8(%rsi),%rbx
    663 	leaq	0(%rsi),%rsp
    664 .Lghash_epilogue:
    665 	.byte	0xf3,0xc3
    666 .size	gcm_ghash_4bit,.-gcm_ghash_4bit
    667 .globl	gcm_init_clmul
    668 .hidden gcm_init_clmul
    669 .type	gcm_init_clmul,@function
    670 .align	16
    671 gcm_init_clmul:
    672 .L_init_clmul:
    673 	movdqu	(%rsi),%xmm2
    674 	pshufd	$78,%xmm2,%xmm2
    675 
    676 
    677 	pshufd	$255,%xmm2,%xmm4
    678 	movdqa	%xmm2,%xmm3
    679 	psllq	$1,%xmm2
    680 	pxor	%xmm5,%xmm5
    681 	psrlq	$63,%xmm3
    682 	pcmpgtd	%xmm4,%xmm5
    683 	pslldq	$8,%xmm3
    684 	por	%xmm3,%xmm2
    685 
    686 
    687 	pand	.L0x1c2_polynomial(%rip),%xmm5
    688 	pxor	%xmm5,%xmm2
    689 
    690 
    691 	pshufd	$78,%xmm2,%xmm6
    692 	movdqa	%xmm2,%xmm0
    693 	pxor	%xmm2,%xmm6
    694 	movdqa	%xmm0,%xmm1
    695 	pshufd	$78,%xmm0,%xmm3
    696 	pxor	%xmm0,%xmm3
    697 .byte	102,15,58,68,194,0
    698 .byte	102,15,58,68,202,17
    699 .byte	102,15,58,68,222,0
    700 	pxor	%xmm0,%xmm3
    701 	pxor	%xmm1,%xmm3
    702 
    703 	movdqa	%xmm3,%xmm4
    704 	psrldq	$8,%xmm3
    705 	pslldq	$8,%xmm4
    706 	pxor	%xmm3,%xmm1
    707 	pxor	%xmm4,%xmm0
    708 
    709 	movdqa	%xmm0,%xmm4
    710 	movdqa	%xmm0,%xmm3
    711 	psllq	$5,%xmm0
    712 	pxor	%xmm0,%xmm3
    713 	psllq	$1,%xmm0
    714 	pxor	%xmm3,%xmm0
    715 	psllq	$57,%xmm0
    716 	movdqa	%xmm0,%xmm3
    717 	pslldq	$8,%xmm0
    718 	psrldq	$8,%xmm3
    719 	pxor	%xmm4,%xmm0
    720 	pxor	%xmm3,%xmm1
    721 
    722 
    723 	movdqa	%xmm0,%xmm4
    724 	psrlq	$1,%xmm0
    725 	pxor	%xmm4,%xmm1
    726 	pxor	%xmm0,%xmm4
    727 	psrlq	$5,%xmm0
    728 	pxor	%xmm4,%xmm0
    729 	psrlq	$1,%xmm0
    730 	pxor	%xmm1,%xmm0
    731 	pshufd	$78,%xmm2,%xmm3
    732 	pshufd	$78,%xmm0,%xmm4
    733 	pxor	%xmm2,%xmm3
    734 	movdqu	%xmm2,0(%rdi)
    735 	pxor	%xmm0,%xmm4
    736 	movdqu	%xmm0,16(%rdi)
    737 .byte	102,15,58,15,227,8
    738 	movdqu	%xmm4,32(%rdi)
    739 	movdqa	%xmm0,%xmm1
    740 	pshufd	$78,%xmm0,%xmm3
    741 	pxor	%xmm0,%xmm3
    742 .byte	102,15,58,68,194,0
    743 .byte	102,15,58,68,202,17
    744 .byte	102,15,58,68,222,0
    745 	pxor	%xmm0,%xmm3
    746 	pxor	%xmm1,%xmm3
    747 
    748 	movdqa	%xmm3,%xmm4
    749 	psrldq	$8,%xmm3
    750 	pslldq	$8,%xmm4
    751 	pxor	%xmm3,%xmm1
    752 	pxor	%xmm4,%xmm0
    753 
    754 	movdqa	%xmm0,%xmm4
    755 	movdqa	%xmm0,%xmm3
    756 	psllq	$5,%xmm0
    757 	pxor	%xmm0,%xmm3
    758 	psllq	$1,%xmm0
    759 	pxor	%xmm3,%xmm0
    760 	psllq	$57,%xmm0
    761 	movdqa	%xmm0,%xmm3
    762 	pslldq	$8,%xmm0
    763 	psrldq	$8,%xmm3
    764 	pxor	%xmm4,%xmm0
    765 	pxor	%xmm3,%xmm1
    766 
    767 
    768 	movdqa	%xmm0,%xmm4
    769 	psrlq	$1,%xmm0
    770 	pxor	%xmm4,%xmm1
    771 	pxor	%xmm0,%xmm4
    772 	psrlq	$5,%xmm0
    773 	pxor	%xmm4,%xmm0
    774 	psrlq	$1,%xmm0
    775 	pxor	%xmm1,%xmm0
    776 	movdqa	%xmm0,%xmm5
    777 	movdqa	%xmm0,%xmm1
    778 	pshufd	$78,%xmm0,%xmm3
    779 	pxor	%xmm0,%xmm3
    780 .byte	102,15,58,68,194,0
    781 .byte	102,15,58,68,202,17
    782 .byte	102,15,58,68,222,0
    783 	pxor	%xmm0,%xmm3
    784 	pxor	%xmm1,%xmm3
    785 
    786 	movdqa	%xmm3,%xmm4
    787 	psrldq	$8,%xmm3
    788 	pslldq	$8,%xmm4
    789 	pxor	%xmm3,%xmm1
    790 	pxor	%xmm4,%xmm0
    791 
    792 	movdqa	%xmm0,%xmm4
    793 	movdqa	%xmm0,%xmm3
    794 	psllq	$5,%xmm0
    795 	pxor	%xmm0,%xmm3
    796 	psllq	$1,%xmm0
    797 	pxor	%xmm3,%xmm0
    798 	psllq	$57,%xmm0
    799 	movdqa	%xmm0,%xmm3
    800 	pslldq	$8,%xmm0
    801 	psrldq	$8,%xmm3
    802 	pxor	%xmm4,%xmm0
    803 	pxor	%xmm3,%xmm1
    804 
    805 
    806 	movdqa	%xmm0,%xmm4
    807 	psrlq	$1,%xmm0
    808 	pxor	%xmm4,%xmm1
    809 	pxor	%xmm0,%xmm4
    810 	psrlq	$5,%xmm0
    811 	pxor	%xmm4,%xmm0
    812 	psrlq	$1,%xmm0
    813 	pxor	%xmm1,%xmm0
    814 	pshufd	$78,%xmm5,%xmm3
    815 	pshufd	$78,%xmm0,%xmm4
    816 	pxor	%xmm5,%xmm3
    817 	movdqu	%xmm5,48(%rdi)
    818 	pxor	%xmm0,%xmm4
    819 	movdqu	%xmm0,64(%rdi)
    820 .byte	102,15,58,15,227,8
    821 	movdqu	%xmm4,80(%rdi)
    822 	.byte	0xf3,0xc3
    823 .size	gcm_init_clmul,.-gcm_init_clmul
    824 .globl	gcm_gmult_clmul
    825 .hidden gcm_gmult_clmul
    826 .type	gcm_gmult_clmul,@function
    827 .align	16
    828 gcm_gmult_clmul:
    829 .L_gmult_clmul:
    830 	movdqu	(%rdi),%xmm0
    831 	movdqa	.Lbswap_mask(%rip),%xmm5
    832 	movdqu	(%rsi),%xmm2
    833 	movdqu	32(%rsi),%xmm4
    834 .byte	102,15,56,0,197
    835 	movdqa	%xmm0,%xmm1
    836 	pshufd	$78,%xmm0,%xmm3
    837 	pxor	%xmm0,%xmm3
    838 .byte	102,15,58,68,194,0
    839 .byte	102,15,58,68,202,17
    840 .byte	102,15,58,68,220,0
    841 	pxor	%xmm0,%xmm3
    842 	pxor	%xmm1,%xmm3
    843 
    844 	movdqa	%xmm3,%xmm4
    845 	psrldq	$8,%xmm3
    846 	pslldq	$8,%xmm4
    847 	pxor	%xmm3,%xmm1
    848 	pxor	%xmm4,%xmm0
    849 
    850 	movdqa	%xmm0,%xmm4
    851 	movdqa	%xmm0,%xmm3
    852 	psllq	$5,%xmm0
    853 	pxor	%xmm0,%xmm3
    854 	psllq	$1,%xmm0
    855 	pxor	%xmm3,%xmm0
    856 	psllq	$57,%xmm0
    857 	movdqa	%xmm0,%xmm3
    858 	pslldq	$8,%xmm0
    859 	psrldq	$8,%xmm3
    860 	pxor	%xmm4,%xmm0
    861 	pxor	%xmm3,%xmm1
    862 
    863 
    864 	movdqa	%xmm0,%xmm4
    865 	psrlq	$1,%xmm0
    866 	pxor	%xmm4,%xmm1
    867 	pxor	%xmm0,%xmm4
    868 	psrlq	$5,%xmm0
    869 	pxor	%xmm4,%xmm0
    870 	psrlq	$1,%xmm0
    871 	pxor	%xmm1,%xmm0
    872 .byte	102,15,56,0,197
    873 	movdqu	%xmm0,(%rdi)
    874 	.byte	0xf3,0xc3
    875 .size	gcm_gmult_clmul,.-gcm_gmult_clmul
    876 .globl	gcm_ghash_clmul
    877 .hidden gcm_ghash_clmul
    878 .type	gcm_ghash_clmul,@function
    879 .align	32
    880 gcm_ghash_clmul:
    881 .L_ghash_clmul:
    882 	movdqa	.Lbswap_mask(%rip),%xmm10
    883 
    884 	movdqu	(%rdi),%xmm0
    885 	movdqu	(%rsi),%xmm2
    886 	movdqu	32(%rsi),%xmm7
    887 .byte	102,65,15,56,0,194
    888 
    889 	subq	$0x10,%rcx
    890 	jz	.Lodd_tail
    891 
    892 	movdqu	16(%rsi),%xmm6
    893 	leaq	OPENSSL_ia32cap_P(%rip),%rax
    894 	movl	4(%rax),%eax
    895 	cmpq	$0x30,%rcx
    896 	jb	.Lskip4x
    897 
    898 	andl	$71303168,%eax
    899 	cmpl	$4194304,%eax
    900 	je	.Lskip4x
    901 
    902 	subq	$0x30,%rcx
    903 	movq	$0xA040608020C0E000,%rax
    904 	movdqu	48(%rsi),%xmm14
    905 	movdqu	64(%rsi),%xmm15
    906 
    907 
    908 
    909 
    910 	movdqu	48(%rdx),%xmm3
    911 	movdqu	32(%rdx),%xmm11
    912 .byte	102,65,15,56,0,218
    913 .byte	102,69,15,56,0,218
    914 	movdqa	%xmm3,%xmm5
    915 	pshufd	$78,%xmm3,%xmm4
    916 	pxor	%xmm3,%xmm4
    917 .byte	102,15,58,68,218,0
    918 .byte	102,15,58,68,234,17
    919 .byte	102,15,58,68,231,0
    920 
    921 	movdqa	%xmm11,%xmm13
    922 	pshufd	$78,%xmm11,%xmm12
    923 	pxor	%xmm11,%xmm12
    924 .byte	102,68,15,58,68,222,0
    925 .byte	102,68,15,58,68,238,17
    926 .byte	102,68,15,58,68,231,16
    927 	xorps	%xmm11,%xmm3
    928 	xorps	%xmm13,%xmm5
    929 	movups	80(%rsi),%xmm7
    930 	xorps	%xmm12,%xmm4
    931 
    932 	movdqu	16(%rdx),%xmm11
    933 	movdqu	0(%rdx),%xmm8
    934 .byte	102,69,15,56,0,218
    935 .byte	102,69,15,56,0,194
    936 	movdqa	%xmm11,%xmm13
    937 	pshufd	$78,%xmm11,%xmm12
    938 	pxor	%xmm8,%xmm0
    939 	pxor	%xmm11,%xmm12
    940 .byte	102,69,15,58,68,222,0
    941 	movdqa	%xmm0,%xmm1
    942 	pshufd	$78,%xmm0,%xmm8
    943 	pxor	%xmm0,%xmm8
    944 .byte	102,69,15,58,68,238,17
    945 .byte	102,68,15,58,68,231,0
    946 	xorps	%xmm11,%xmm3
    947 	xorps	%xmm13,%xmm5
    948 
    949 	leaq	64(%rdx),%rdx
    950 	subq	$0x40,%rcx
    951 	jc	.Ltail4x
    952 
    953 	jmp	.Lmod4_loop
    954 .align	32
    955 .Lmod4_loop:
    956 .byte	102,65,15,58,68,199,0
    957 	xorps	%xmm12,%xmm4
    958 	movdqu	48(%rdx),%xmm11
    959 .byte	102,69,15,56,0,218
    960 .byte	102,65,15,58,68,207,17
    961 	xorps	%xmm3,%xmm0
    962 	movdqu	32(%rdx),%xmm3
    963 	movdqa	%xmm11,%xmm13
    964 .byte	102,68,15,58,68,199,16
    965 	pshufd	$78,%xmm11,%xmm12
    966 	xorps	%xmm5,%xmm1
    967 	pxor	%xmm11,%xmm12
    968 .byte	102,65,15,56,0,218
    969 	movups	32(%rsi),%xmm7
    970 	xorps	%xmm4,%xmm8
    971 .byte	102,68,15,58,68,218,0
    972 	pshufd	$78,%xmm3,%xmm4
    973 
    974 	pxor	%xmm0,%xmm8
    975 	movdqa	%xmm3,%xmm5
    976 	pxor	%xmm1,%xmm8
    977 	pxor	%xmm3,%xmm4
    978 	movdqa	%xmm8,%xmm9
    979 .byte	102,68,15,58,68,234,17
    980 	pslldq	$8,%xmm8
    981 	psrldq	$8,%xmm9
    982 	pxor	%xmm8,%xmm0
    983 	movdqa	.L7_mask(%rip),%xmm8
    984 	pxor	%xmm9,%xmm1
    985 .byte	102,76,15,110,200
    986 
    987 	pand	%xmm0,%xmm8
    988 .byte	102,69,15,56,0,200
    989 	pxor	%xmm0,%xmm9
    990 .byte	102,68,15,58,68,231,0
    991 	psllq	$57,%xmm9
    992 	movdqa	%xmm9,%xmm8
    993 	pslldq	$8,%xmm9
    994 .byte	102,15,58,68,222,0
    995 	psrldq	$8,%xmm8
    996 	pxor	%xmm9,%xmm0
    997 	pxor	%xmm8,%xmm1
    998 	movdqu	0(%rdx),%xmm8
    999 
   1000 	movdqa	%xmm0,%xmm9
   1001 	psrlq	$1,%xmm0
   1002 .byte	102,15,58,68,238,17
   1003 	xorps	%xmm11,%xmm3
   1004 	movdqu	16(%rdx),%xmm11
   1005 .byte	102,69,15,56,0,218
   1006 .byte	102,15,58,68,231,16
   1007 	xorps	%xmm13,%xmm5
   1008 	movups	80(%rsi),%xmm7
   1009 .byte	102,69,15,56,0,194
   1010 	pxor	%xmm9,%xmm1
   1011 	pxor	%xmm0,%xmm9
   1012 	psrlq	$5,%xmm0
   1013 
   1014 	movdqa	%xmm11,%xmm13
   1015 	pxor	%xmm12,%xmm4
   1016 	pshufd	$78,%xmm11,%xmm12
   1017 	pxor	%xmm9,%xmm0
   1018 	pxor	%xmm8,%xmm1
   1019 	pxor	%xmm11,%xmm12
   1020 .byte	102,69,15,58,68,222,0
   1021 	psrlq	$1,%xmm0
   1022 	pxor	%xmm1,%xmm0
   1023 	movdqa	%xmm0,%xmm1
   1024 .byte	102,69,15,58,68,238,17
   1025 	xorps	%xmm11,%xmm3
   1026 	pshufd	$78,%xmm0,%xmm8
   1027 	pxor	%xmm0,%xmm8
   1028 
   1029 .byte	102,68,15,58,68,231,0
   1030 	xorps	%xmm13,%xmm5
   1031 
   1032 	leaq	64(%rdx),%rdx
   1033 	subq	$0x40,%rcx
   1034 	jnc	.Lmod4_loop
   1035 
   1036 .Ltail4x:
   1037 .byte	102,65,15,58,68,199,0
   1038 .byte	102,65,15,58,68,207,17
   1039 .byte	102,68,15,58,68,199,16
   1040 	xorps	%xmm12,%xmm4
   1041 	xorps	%xmm3,%xmm0
   1042 	xorps	%xmm5,%xmm1
   1043 	pxor	%xmm0,%xmm1
   1044 	pxor	%xmm4,%xmm8
   1045 
   1046 	pxor	%xmm1,%xmm8
   1047 	pxor	%xmm0,%xmm1
   1048 
   1049 	movdqa	%xmm8,%xmm9
   1050 	psrldq	$8,%xmm8
   1051 	pslldq	$8,%xmm9
   1052 	pxor	%xmm8,%xmm1
   1053 	pxor	%xmm9,%xmm0
   1054 
   1055 	movdqa	%xmm0,%xmm4
   1056 	movdqa	%xmm0,%xmm3
   1057 	psllq	$5,%xmm0
   1058 	pxor	%xmm0,%xmm3
   1059 	psllq	$1,%xmm0
   1060 	pxor	%xmm3,%xmm0
   1061 	psllq	$57,%xmm0
   1062 	movdqa	%xmm0,%xmm3
   1063 	pslldq	$8,%xmm0
   1064 	psrldq	$8,%xmm3
   1065 	pxor	%xmm4,%xmm0
   1066 	pxor	%xmm3,%xmm1
   1067 
   1068 
   1069 	movdqa	%xmm0,%xmm4
   1070 	psrlq	$1,%xmm0
   1071 	pxor	%xmm4,%xmm1
   1072 	pxor	%xmm0,%xmm4
   1073 	psrlq	$5,%xmm0
   1074 	pxor	%xmm4,%xmm0
   1075 	psrlq	$1,%xmm0
   1076 	pxor	%xmm1,%xmm0
   1077 	addq	$0x40,%rcx
   1078 	jz	.Ldone
   1079 	movdqu	32(%rsi),%xmm7
   1080 	subq	$0x10,%rcx
   1081 	jz	.Lodd_tail
   1082 .Lskip4x:
   1083 
   1084 
   1085 
   1086 
   1087 
   1088 	movdqu	(%rdx),%xmm8
   1089 	movdqu	16(%rdx),%xmm3
   1090 .byte	102,69,15,56,0,194
   1091 .byte	102,65,15,56,0,218
   1092 	pxor	%xmm8,%xmm0
   1093 
   1094 	movdqa	%xmm3,%xmm5
   1095 	pshufd	$78,%xmm3,%xmm4
   1096 	pxor	%xmm3,%xmm4
   1097 .byte	102,15,58,68,218,0
   1098 .byte	102,15,58,68,234,17
   1099 .byte	102,15,58,68,231,0
   1100 
   1101 	leaq	32(%rdx),%rdx
   1102 	nop
   1103 	subq	$0x20,%rcx
   1104 	jbe	.Leven_tail
   1105 	nop
   1106 	jmp	.Lmod_loop
   1107 
   1108 .align	32
   1109 .Lmod_loop:
   1110 	movdqa	%xmm0,%xmm1
   1111 	movdqa	%xmm4,%xmm8
   1112 	pshufd	$78,%xmm0,%xmm4
   1113 	pxor	%xmm0,%xmm4
   1114 
   1115 .byte	102,15,58,68,198,0
   1116 .byte	102,15,58,68,206,17
   1117 .byte	102,15,58,68,231,16
   1118 
   1119 	pxor	%xmm3,%xmm0
   1120 	pxor	%xmm5,%xmm1
   1121 	movdqu	(%rdx),%xmm9
   1122 	pxor	%xmm0,%xmm8
   1123 .byte	102,69,15,56,0,202
   1124 	movdqu	16(%rdx),%xmm3
   1125 
   1126 	pxor	%xmm1,%xmm8
   1127 	pxor	%xmm9,%xmm1
   1128 	pxor	%xmm8,%xmm4
   1129 .byte	102,65,15,56,0,218
   1130 	movdqa	%xmm4,%xmm8
   1131 	psrldq	$8,%xmm8
   1132 	pslldq	$8,%xmm4
   1133 	pxor	%xmm8,%xmm1
   1134 	pxor	%xmm4,%xmm0
   1135 
   1136 	movdqa	%xmm3,%xmm5
   1137 
   1138 	movdqa	%xmm0,%xmm9
   1139 	movdqa	%xmm0,%xmm8
   1140 	psllq	$5,%xmm0
   1141 	pxor	%xmm0,%xmm8
   1142 .byte	102,15,58,68,218,0
   1143 	psllq	$1,%xmm0
   1144 	pxor	%xmm8,%xmm0
   1145 	psllq	$57,%xmm0
   1146 	movdqa	%xmm0,%xmm8
   1147 	pslldq	$8,%xmm0
   1148 	psrldq	$8,%xmm8
   1149 	pxor	%xmm9,%xmm0
   1150 	pshufd	$78,%xmm5,%xmm4
   1151 	pxor	%xmm8,%xmm1
   1152 	pxor	%xmm5,%xmm4
   1153 
   1154 	movdqa	%xmm0,%xmm9
   1155 	psrlq	$1,%xmm0
   1156 .byte	102,15,58,68,234,17
   1157 	pxor	%xmm9,%xmm1
   1158 	pxor	%xmm0,%xmm9
   1159 	psrlq	$5,%xmm0
   1160 	pxor	%xmm9,%xmm0
   1161 	leaq	32(%rdx),%rdx
   1162 	psrlq	$1,%xmm0
   1163 .byte	102,15,58,68,231,0
   1164 	pxor	%xmm1,%xmm0
   1165 
   1166 	subq	$0x20,%rcx
   1167 	ja	.Lmod_loop
   1168 
   1169 .Leven_tail:
   1170 	movdqa	%xmm0,%xmm1
   1171 	movdqa	%xmm4,%xmm8
   1172 	pshufd	$78,%xmm0,%xmm4
   1173 	pxor	%xmm0,%xmm4
   1174 
   1175 .byte	102,15,58,68,198,0
   1176 .byte	102,15,58,68,206,17
   1177 .byte	102,15,58,68,231,16
   1178 
   1179 	pxor	%xmm3,%xmm0
   1180 	pxor	%xmm5,%xmm1
   1181 	pxor	%xmm0,%xmm8
   1182 	pxor	%xmm1,%xmm8
   1183 	pxor	%xmm8,%xmm4
   1184 	movdqa	%xmm4,%xmm8
   1185 	psrldq	$8,%xmm8
   1186 	pslldq	$8,%xmm4
   1187 	pxor	%xmm8,%xmm1
   1188 	pxor	%xmm4,%xmm0
   1189 
   1190 	movdqa	%xmm0,%xmm4
   1191 	movdqa	%xmm0,%xmm3
   1192 	psllq	$5,%xmm0
   1193 	pxor	%xmm0,%xmm3
   1194 	psllq	$1,%xmm0
   1195 	pxor	%xmm3,%xmm0
   1196 	psllq	$57,%xmm0
   1197 	movdqa	%xmm0,%xmm3
   1198 	pslldq	$8,%xmm0
   1199 	psrldq	$8,%xmm3
   1200 	pxor	%xmm4,%xmm0
   1201 	pxor	%xmm3,%xmm1
   1202 
   1203 
   1204 	movdqa	%xmm0,%xmm4
   1205 	psrlq	$1,%xmm0
   1206 	pxor	%xmm4,%xmm1
   1207 	pxor	%xmm0,%xmm4
   1208 	psrlq	$5,%xmm0
   1209 	pxor	%xmm4,%xmm0
   1210 	psrlq	$1,%xmm0
   1211 	pxor	%xmm1,%xmm0
   1212 	testq	%rcx,%rcx
   1213 	jnz	.Ldone
   1214 
   1215 .Lodd_tail:
   1216 	movdqu	(%rdx),%xmm8
   1217 .byte	102,69,15,56,0,194
   1218 	pxor	%xmm8,%xmm0
   1219 	movdqa	%xmm0,%xmm1
   1220 	pshufd	$78,%xmm0,%xmm3
   1221 	pxor	%xmm0,%xmm3
   1222 .byte	102,15,58,68,194,0
   1223 .byte	102,15,58,68,202,17
   1224 .byte	102,15,58,68,223,0
   1225 	pxor	%xmm0,%xmm3
   1226 	pxor	%xmm1,%xmm3
   1227 
   1228 	movdqa	%xmm3,%xmm4
   1229 	psrldq	$8,%xmm3
   1230 	pslldq	$8,%xmm4
   1231 	pxor	%xmm3,%xmm1
   1232 	pxor	%xmm4,%xmm0
   1233 
   1234 	movdqa	%xmm0,%xmm4
   1235 	movdqa	%xmm0,%xmm3
   1236 	psllq	$5,%xmm0
   1237 	pxor	%xmm0,%xmm3
   1238 	psllq	$1,%xmm0
   1239 	pxor	%xmm3,%xmm0
   1240 	psllq	$57,%xmm0
   1241 	movdqa	%xmm0,%xmm3
   1242 	pslldq	$8,%xmm0
   1243 	psrldq	$8,%xmm3
   1244 	pxor	%xmm4,%xmm0
   1245 	pxor	%xmm3,%xmm1
   1246 
   1247 
   1248 	movdqa	%xmm0,%xmm4
   1249 	psrlq	$1,%xmm0
   1250 	pxor	%xmm4,%xmm1
   1251 	pxor	%xmm0,%xmm4
   1252 	psrlq	$5,%xmm0
   1253 	pxor	%xmm4,%xmm0
   1254 	psrlq	$1,%xmm0
   1255 	pxor	%xmm1,%xmm0
   1256 .Ldone:
   1257 .byte	102,65,15,56,0,194
   1258 	movdqu	%xmm0,(%rdi)
   1259 	.byte	0xf3,0xc3
   1260 .size	gcm_ghash_clmul,.-gcm_ghash_clmul
   1261 .globl	gcm_init_avx
   1262 .hidden gcm_init_avx
   1263 .type	gcm_init_avx,@function
   1264 .align	32
   1265 gcm_init_avx:
   1266 	vzeroupper
   1267 
   1268 	vmovdqu	(%rsi),%xmm2
   1269 	vpshufd	$78,%xmm2,%xmm2
   1270 
   1271 
   1272 	vpshufd	$255,%xmm2,%xmm4
   1273 	vpsrlq	$63,%xmm2,%xmm3
   1274 	vpsllq	$1,%xmm2,%xmm2
   1275 	vpxor	%xmm5,%xmm5,%xmm5
   1276 	vpcmpgtd	%xmm4,%xmm5,%xmm5
   1277 	vpslldq	$8,%xmm3,%xmm3
   1278 	vpor	%xmm3,%xmm2,%xmm2
   1279 
   1280 
   1281 	vpand	.L0x1c2_polynomial(%rip),%xmm5,%xmm5
   1282 	vpxor	%xmm5,%xmm2,%xmm2
   1283 
   1284 	vpunpckhqdq	%xmm2,%xmm2,%xmm6
   1285 	vmovdqa	%xmm2,%xmm0
   1286 	vpxor	%xmm2,%xmm6,%xmm6
   1287 	movq	$4,%r10
   1288 	jmp	.Linit_start_avx
   1289 .align	32
   1290 .Linit_loop_avx:
   1291 	vpalignr	$8,%xmm3,%xmm4,%xmm5
   1292 	vmovdqu	%xmm5,-16(%rdi)
   1293 	vpunpckhqdq	%xmm0,%xmm0,%xmm3
   1294 	vpxor	%xmm0,%xmm3,%xmm3
   1295 	vpclmulqdq	$0x11,%xmm2,%xmm0,%xmm1
   1296 	vpclmulqdq	$0x00,%xmm2,%xmm0,%xmm0
   1297 	vpclmulqdq	$0x00,%xmm6,%xmm3,%xmm3
   1298 	vpxor	%xmm0,%xmm1,%xmm4
   1299 	vpxor	%xmm4,%xmm3,%xmm3
   1300 
   1301 	vpslldq	$8,%xmm3,%xmm4
   1302 	vpsrldq	$8,%xmm3,%xmm3
   1303 	vpxor	%xmm4,%xmm0,%xmm0
   1304 	vpxor	%xmm3,%xmm1,%xmm1
   1305 	vpsllq	$57,%xmm0,%xmm3
   1306 	vpsllq	$62,%xmm0,%xmm4
   1307 	vpxor	%xmm3,%xmm4,%xmm4
   1308 	vpsllq	$63,%xmm0,%xmm3
   1309 	vpxor	%xmm3,%xmm4,%xmm4
   1310 	vpslldq	$8,%xmm4,%xmm3
   1311 	vpsrldq	$8,%xmm4,%xmm4
   1312 	vpxor	%xmm3,%xmm0,%xmm0
   1313 	vpxor	%xmm4,%xmm1,%xmm1
   1314 
   1315 	vpsrlq	$1,%xmm0,%xmm4
   1316 	vpxor	%xmm0,%xmm1,%xmm1
   1317 	vpxor	%xmm4,%xmm0,%xmm0
   1318 	vpsrlq	$5,%xmm4,%xmm4
   1319 	vpxor	%xmm4,%xmm0,%xmm0
   1320 	vpsrlq	$1,%xmm0,%xmm0
   1321 	vpxor	%xmm1,%xmm0,%xmm0
   1322 .Linit_start_avx:
   1323 	vmovdqa	%xmm0,%xmm5
   1324 	vpunpckhqdq	%xmm0,%xmm0,%xmm3
   1325 	vpxor	%xmm0,%xmm3,%xmm3
   1326 	vpclmulqdq	$0x11,%xmm2,%xmm0,%xmm1
   1327 	vpclmulqdq	$0x00,%xmm2,%xmm0,%xmm0
   1328 	vpclmulqdq	$0x00,%xmm6,%xmm3,%xmm3
   1329 	vpxor	%xmm0,%xmm1,%xmm4
   1330 	vpxor	%xmm4,%xmm3,%xmm3
   1331 
   1332 	vpslldq	$8,%xmm3,%xmm4
   1333 	vpsrldq	$8,%xmm3,%xmm3
   1334 	vpxor	%xmm4,%xmm0,%xmm0
   1335 	vpxor	%xmm3,%xmm1,%xmm1
   1336 	vpsllq	$57,%xmm0,%xmm3
   1337 	vpsllq	$62,%xmm0,%xmm4
   1338 	vpxor	%xmm3,%xmm4,%xmm4
   1339 	vpsllq	$63,%xmm0,%xmm3
   1340 	vpxor	%xmm3,%xmm4,%xmm4
   1341 	vpslldq	$8,%xmm4,%xmm3
   1342 	vpsrldq	$8,%xmm4,%xmm4
   1343 	vpxor	%xmm3,%xmm0,%xmm0
   1344 	vpxor	%xmm4,%xmm1,%xmm1
   1345 
   1346 	vpsrlq	$1,%xmm0,%xmm4
   1347 	vpxor	%xmm0,%xmm1,%xmm1
   1348 	vpxor	%xmm4,%xmm0,%xmm0
   1349 	vpsrlq	$5,%xmm4,%xmm4
   1350 	vpxor	%xmm4,%xmm0,%xmm0
   1351 	vpsrlq	$1,%xmm0,%xmm0
   1352 	vpxor	%xmm1,%xmm0,%xmm0
   1353 	vpshufd	$78,%xmm5,%xmm3
   1354 	vpshufd	$78,%xmm0,%xmm4
   1355 	vpxor	%xmm5,%xmm3,%xmm3
   1356 	vmovdqu	%xmm5,0(%rdi)
   1357 	vpxor	%xmm0,%xmm4,%xmm4
   1358 	vmovdqu	%xmm0,16(%rdi)
   1359 	leaq	48(%rdi),%rdi
   1360 	subq	$1,%r10
   1361 	jnz	.Linit_loop_avx
   1362 
   1363 	vpalignr	$8,%xmm4,%xmm3,%xmm5
   1364 	vmovdqu	%xmm5,-16(%rdi)
   1365 
   1366 	vzeroupper
   1367 	.byte	0xf3,0xc3
   1368 .size	gcm_init_avx,.-gcm_init_avx
   1369 .globl	gcm_gmult_avx
   1370 .hidden gcm_gmult_avx
   1371 .type	gcm_gmult_avx,@function
   1372 .align	32
   1373 gcm_gmult_avx:
   1374 	jmp	.L_gmult_clmul
   1375 .size	gcm_gmult_avx,.-gcm_gmult_avx
   1376 .globl	gcm_ghash_avx
   1377 .hidden gcm_ghash_avx
   1378 .type	gcm_ghash_avx,@function
   1379 .align	32
   1380 gcm_ghash_avx:
   1381 	vzeroupper
   1382 
   1383 	vmovdqu	(%rdi),%xmm10
   1384 	leaq	.L0x1c2_polynomial(%rip),%r10
   1385 	leaq	64(%rsi),%rsi
   1386 	vmovdqu	.Lbswap_mask(%rip),%xmm13
   1387 	vpshufb	%xmm13,%xmm10,%xmm10
   1388 	cmpq	$0x80,%rcx
   1389 	jb	.Lshort_avx
   1390 	subq	$0x80,%rcx
   1391 
   1392 	vmovdqu	112(%rdx),%xmm14
   1393 	vmovdqu	0-64(%rsi),%xmm6
   1394 	vpshufb	%xmm13,%xmm14,%xmm14
   1395 	vmovdqu	32-64(%rsi),%xmm7
   1396 
   1397 	vpunpckhqdq	%xmm14,%xmm14,%xmm9
   1398 	vmovdqu	96(%rdx),%xmm15
   1399 	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
   1400 	vpxor	%xmm14,%xmm9,%xmm9
   1401 	vpshufb	%xmm13,%xmm15,%xmm15
   1402 	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
   1403 	vmovdqu	16-64(%rsi),%xmm6
   1404 	vpunpckhqdq	%xmm15,%xmm15,%xmm8
   1405 	vmovdqu	80(%rdx),%xmm14
   1406 	vpclmulqdq	$0x00,%xmm7,%xmm9,%xmm2
   1407 	vpxor	%xmm15,%xmm8,%xmm8
   1408 
   1409 	vpshufb	%xmm13,%xmm14,%xmm14
   1410 	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm3
   1411 	vpunpckhqdq	%xmm14,%xmm14,%xmm9
   1412 	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm4
   1413 	vmovdqu	48-64(%rsi),%xmm6
   1414 	vpxor	%xmm14,%xmm9,%xmm9
   1415 	vmovdqu	64(%rdx),%xmm15
   1416 	vpclmulqdq	$0x10,%xmm7,%xmm8,%xmm5
   1417 	vmovdqu	80-64(%rsi),%xmm7
   1418 
   1419 	vpshufb	%xmm13,%xmm15,%xmm15
   1420 	vpxor	%xmm0,%xmm3,%xmm3
   1421 	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
   1422 	vpxor	%xmm1,%xmm4,%xmm4
   1423 	vpunpckhqdq	%xmm15,%xmm15,%xmm8
   1424 	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
   1425 	vmovdqu	64-64(%rsi),%xmm6
   1426 	vpxor	%xmm2,%xmm5,%xmm5
   1427 	vpclmulqdq	$0x00,%xmm7,%xmm9,%xmm2
   1428 	vpxor	%xmm15,%xmm8,%xmm8
   1429 
   1430 	vmovdqu	48(%rdx),%xmm14
   1431 	vpxor	%xmm3,%xmm0,%xmm0
   1432 	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm3
   1433 	vpxor	%xmm4,%xmm1,%xmm1
   1434 	vpshufb	%xmm13,%xmm14,%xmm14
   1435 	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm4
   1436 	vmovdqu	96-64(%rsi),%xmm6
   1437 	vpxor	%xmm5,%xmm2,%xmm2
   1438 	vpunpckhqdq	%xmm14,%xmm14,%xmm9
   1439 	vpclmulqdq	$0x10,%xmm7,%xmm8,%xmm5
   1440 	vmovdqu	128-64(%rsi),%xmm7
   1441 	vpxor	%xmm14,%xmm9,%xmm9
   1442 
   1443 	vmovdqu	32(%rdx),%xmm15
   1444 	vpxor	%xmm0,%xmm3,%xmm3
   1445 	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
   1446 	vpxor	%xmm1,%xmm4,%xmm4
   1447 	vpshufb	%xmm13,%xmm15,%xmm15
   1448 	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
   1449 	vmovdqu	112-64(%rsi),%xmm6
   1450 	vpxor	%xmm2,%xmm5,%xmm5
   1451 	vpunpckhqdq	%xmm15,%xmm15,%xmm8
   1452 	vpclmulqdq	$0x00,%xmm7,%xmm9,%xmm2
   1453 	vpxor	%xmm15,%xmm8,%xmm8
   1454 
   1455 	vmovdqu	16(%rdx),%xmm14
   1456 	vpxor	%xmm3,%xmm0,%xmm0
   1457 	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm3
   1458 	vpxor	%xmm4,%xmm1,%xmm1
   1459 	vpshufb	%xmm13,%xmm14,%xmm14
   1460 	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm4
   1461 	vmovdqu	144-64(%rsi),%xmm6
   1462 	vpxor	%xmm5,%xmm2,%xmm2
   1463 	vpunpckhqdq	%xmm14,%xmm14,%xmm9
   1464 	vpclmulqdq	$0x10,%xmm7,%xmm8,%xmm5
   1465 	vmovdqu	176-64(%rsi),%xmm7
   1466 	vpxor	%xmm14,%xmm9,%xmm9
   1467 
   1468 	vmovdqu	(%rdx),%xmm15
   1469 	vpxor	%xmm0,%xmm3,%xmm3
   1470 	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
   1471 	vpxor	%xmm1,%xmm4,%xmm4
   1472 	vpshufb	%xmm13,%xmm15,%xmm15
   1473 	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
   1474 	vmovdqu	160-64(%rsi),%xmm6
   1475 	vpxor	%xmm2,%xmm5,%xmm5
   1476 	vpclmulqdq	$0x10,%xmm7,%xmm9,%xmm2
   1477 
   1478 	leaq	128(%rdx),%rdx
   1479 	cmpq	$0x80,%rcx
   1480 	jb	.Ltail_avx
   1481 
   1482 	vpxor	%xmm10,%xmm15,%xmm15
   1483 	subq	$0x80,%rcx
   1484 	jmp	.Loop8x_avx
   1485 
   1486 .align	32
   1487 .Loop8x_avx:
   1488 	vpunpckhqdq	%xmm15,%xmm15,%xmm8
   1489 	vmovdqu	112(%rdx),%xmm14
   1490 	vpxor	%xmm0,%xmm3,%xmm3
   1491 	vpxor	%xmm15,%xmm8,%xmm8
   1492 	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm10
   1493 	vpshufb	%xmm13,%xmm14,%xmm14
   1494 	vpxor	%xmm1,%xmm4,%xmm4
   1495 	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm11
   1496 	vmovdqu	0-64(%rsi),%xmm6
   1497 	vpunpckhqdq	%xmm14,%xmm14,%xmm9
   1498 	vpxor	%xmm2,%xmm5,%xmm5
   1499 	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm12
   1500 	vmovdqu	32-64(%rsi),%xmm7
   1501 	vpxor	%xmm14,%xmm9,%xmm9
   1502 
   1503 	vmovdqu	96(%rdx),%xmm15
   1504 	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
   1505 	vpxor	%xmm3,%xmm10,%xmm10
   1506 	vpshufb	%xmm13,%xmm15,%xmm15
   1507 	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
   1508 	vxorps	%xmm4,%xmm11,%xmm11
   1509 	vmovdqu	16-64(%rsi),%xmm6
   1510 	vpunpckhqdq	%xmm15,%xmm15,%xmm8
   1511 	vpclmulqdq	$0x00,%xmm7,%xmm9,%xmm2
   1512 	vpxor	%xmm5,%xmm12,%xmm12
   1513 	vxorps	%xmm15,%xmm8,%xmm8
   1514 
   1515 	vmovdqu	80(%rdx),%xmm14
   1516 	vpxor	%xmm10,%xmm12,%xmm12
   1517 	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm3
   1518 	vpxor	%xmm11,%xmm12,%xmm12
   1519 	vpslldq	$8,%xmm12,%xmm9
   1520 	vpxor	%xmm0,%xmm3,%xmm3
   1521 	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm4
   1522 	vpsrldq	$8,%xmm12,%xmm12
   1523 	vpxor	%xmm9,%xmm10,%xmm10
   1524 	vmovdqu	48-64(%rsi),%xmm6
   1525 	vpshufb	%xmm13,%xmm14,%xmm14
   1526 	vxorps	%xmm12,%xmm11,%xmm11
   1527 	vpxor	%xmm1,%xmm4,%xmm4
   1528 	vpunpckhqdq	%xmm14,%xmm14,%xmm9
   1529 	vpclmulqdq	$0x10,%xmm7,%xmm8,%xmm5
   1530 	vmovdqu	80-64(%rsi),%xmm7
   1531 	vpxor	%xmm14,%xmm9,%xmm9
   1532 	vpxor	%xmm2,%xmm5,%xmm5
   1533 
   1534 	vmovdqu	64(%rdx),%xmm15
   1535 	vpalignr	$8,%xmm10,%xmm10,%xmm12
   1536 	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
   1537 	vpshufb	%xmm13,%xmm15,%xmm15
   1538 	vpxor	%xmm3,%xmm0,%xmm0
   1539 	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
   1540 	vmovdqu	64-64(%rsi),%xmm6
   1541 	vpunpckhqdq	%xmm15,%xmm15,%xmm8
   1542 	vpxor	%xmm4,%xmm1,%xmm1
   1543 	vpclmulqdq	$0x00,%xmm7,%xmm9,%xmm2
   1544 	vxorps	%xmm15,%xmm8,%xmm8
   1545 	vpxor	%xmm5,%xmm2,%xmm2
   1546 
   1547 	vmovdqu	48(%rdx),%xmm14
   1548 	vpclmulqdq	$0x10,(%r10),%xmm10,%xmm10
   1549 	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm3
   1550 	vpshufb	%xmm13,%xmm14,%xmm14
   1551 	vpxor	%xmm0,%xmm3,%xmm3
   1552 	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm4
   1553 	vmovdqu	96-64(%rsi),%xmm6
   1554 	vpunpckhqdq	%xmm14,%xmm14,%xmm9
   1555 	vpxor	%xmm1,%xmm4,%xmm4
   1556 	vpclmulqdq	$0x10,%xmm7,%xmm8,%xmm5
   1557 	vmovdqu	128-64(%rsi),%xmm7
   1558 	vpxor	%xmm14,%xmm9,%xmm9
   1559 	vpxor	%xmm2,%xmm5,%xmm5
   1560 
   1561 	vmovdqu	32(%rdx),%xmm15
   1562 	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
   1563 	vpshufb	%xmm13,%xmm15,%xmm15
   1564 	vpxor	%xmm3,%xmm0,%xmm0
   1565 	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
   1566 	vmovdqu	112-64(%rsi),%xmm6
   1567 	vpunpckhqdq	%xmm15,%xmm15,%xmm8
   1568 	vpxor	%xmm4,%xmm1,%xmm1
   1569 	vpclmulqdq	$0x00,%xmm7,%xmm9,%xmm2
   1570 	vpxor	%xmm15,%xmm8,%xmm8
   1571 	vpxor	%xmm5,%xmm2,%xmm2
   1572 	vxorps	%xmm12,%xmm10,%xmm10
   1573 
   1574 	vmovdqu	16(%rdx),%xmm14
   1575 	vpalignr	$8,%xmm10,%xmm10,%xmm12
   1576 	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm3
   1577 	vpshufb	%xmm13,%xmm14,%xmm14
   1578 	vpxor	%xmm0,%xmm3,%xmm3
   1579 	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm4
   1580 	vmovdqu	144-64(%rsi),%xmm6
   1581 	vpclmulqdq	$0x10,(%r10),%xmm10,%xmm10
   1582 	vxorps	%xmm11,%xmm12,%xmm12
   1583 	vpunpckhqdq	%xmm14,%xmm14,%xmm9
   1584 	vpxor	%xmm1,%xmm4,%xmm4
   1585 	vpclmulqdq	$0x10,%xmm7,%xmm8,%xmm5
   1586 	vmovdqu	176-64(%rsi),%xmm7
   1587 	vpxor	%xmm14,%xmm9,%xmm9
   1588 	vpxor	%xmm2,%xmm5,%xmm5
   1589 
   1590 	vmovdqu	(%rdx),%xmm15
   1591 	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
   1592 	vpshufb	%xmm13,%xmm15,%xmm15
   1593 	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
   1594 	vmovdqu	160-64(%rsi),%xmm6
   1595 	vpxor	%xmm12,%xmm15,%xmm15
   1596 	vpclmulqdq	$0x10,%xmm7,%xmm9,%xmm2
   1597 	vpxor	%xmm10,%xmm15,%xmm15
   1598 
   1599 	leaq	128(%rdx),%rdx
   1600 	subq	$0x80,%rcx
   1601 	jnc	.Loop8x_avx
   1602 
   1603 	addq	$0x80,%rcx
   1604 	jmp	.Ltail_no_xor_avx
   1605 
   1606 .align	32
   1607 .Lshort_avx:
   1608 	vmovdqu	-16(%rdx,%rcx,1),%xmm14
   1609 	leaq	(%rdx,%rcx,1),%rdx
   1610 	vmovdqu	0-64(%rsi),%xmm6
   1611 	vmovdqu	32-64(%rsi),%xmm7
   1612 	vpshufb	%xmm13,%xmm14,%xmm15
   1613 
   1614 	vmovdqa	%xmm0,%xmm3
   1615 	vmovdqa	%xmm1,%xmm4
   1616 	vmovdqa	%xmm2,%xmm5
   1617 	subq	$0x10,%rcx
   1618 	jz	.Ltail_avx
   1619 
   1620 	vpunpckhqdq	%xmm15,%xmm15,%xmm8
   1621 	vpxor	%xmm0,%xmm3,%xmm3
   1622 	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm0
   1623 	vpxor	%xmm15,%xmm8,%xmm8
   1624 	vmovdqu	-32(%rdx),%xmm14
   1625 	vpxor	%xmm1,%xmm4,%xmm4
   1626 	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm1
   1627 	vmovdqu	16-64(%rsi),%xmm6
   1628 	vpshufb	%xmm13,%xmm14,%xmm15
   1629 	vpxor	%xmm2,%xmm5,%xmm5
   1630 	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm2
   1631 	vpsrldq	$8,%xmm7,%xmm7
   1632 	subq	$0x10,%rcx
   1633 	jz	.Ltail_avx
   1634 
   1635 	vpunpckhqdq	%xmm15,%xmm15,%xmm8
   1636 	vpxor	%xmm0,%xmm3,%xmm3
   1637 	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm0
   1638 	vpxor	%xmm15,%xmm8,%xmm8
   1639 	vmovdqu	-48(%rdx),%xmm14
   1640 	vpxor	%xmm1,%xmm4,%xmm4
   1641 	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm1
   1642 	vmovdqu	48-64(%rsi),%xmm6
   1643 	vpshufb	%xmm13,%xmm14,%xmm15
   1644 	vpxor	%xmm2,%xmm5,%xmm5
   1645 	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm2
   1646 	vmovdqu	80-64(%rsi),%xmm7
   1647 	subq	$0x10,%rcx
   1648 	jz	.Ltail_avx
   1649 
   1650 	vpunpckhqdq	%xmm15,%xmm15,%xmm8
   1651 	vpxor	%xmm0,%xmm3,%xmm3
   1652 	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm0
   1653 	vpxor	%xmm15,%xmm8,%xmm8
   1654 	vmovdqu	-64(%rdx),%xmm14
   1655 	vpxor	%xmm1,%xmm4,%xmm4
   1656 	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm1
   1657 	vmovdqu	64-64(%rsi),%xmm6
   1658 	vpshufb	%xmm13,%xmm14,%xmm15
   1659 	vpxor	%xmm2,%xmm5,%xmm5
   1660 	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm2
   1661 	vpsrldq	$8,%xmm7,%xmm7
   1662 	subq	$0x10,%rcx
   1663 	jz	.Ltail_avx
   1664 
   1665 	vpunpckhqdq	%xmm15,%xmm15,%xmm8
   1666 	vpxor	%xmm0,%xmm3,%xmm3
   1667 	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm0
   1668 	vpxor	%xmm15,%xmm8,%xmm8
   1669 	vmovdqu	-80(%rdx),%xmm14
   1670 	vpxor	%xmm1,%xmm4,%xmm4
   1671 	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm1
   1672 	vmovdqu	96-64(%rsi),%xmm6
   1673 	vpshufb	%xmm13,%xmm14,%xmm15
   1674 	vpxor	%xmm2,%xmm5,%xmm5
   1675 	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm2
   1676 	vmovdqu	128-64(%rsi),%xmm7
   1677 	subq	$0x10,%rcx
   1678 	jz	.Ltail_avx
   1679 
   1680 	vpunpckhqdq	%xmm15,%xmm15,%xmm8
   1681 	vpxor	%xmm0,%xmm3,%xmm3
   1682 	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm0
   1683 	vpxor	%xmm15,%xmm8,%xmm8
   1684 	vmovdqu	-96(%rdx),%xmm14
   1685 	vpxor	%xmm1,%xmm4,%xmm4
   1686 	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm1
   1687 	vmovdqu	112-64(%rsi),%xmm6
   1688 	vpshufb	%xmm13,%xmm14,%xmm15
   1689 	vpxor	%xmm2,%xmm5,%xmm5
   1690 	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm2
   1691 	vpsrldq	$8,%xmm7,%xmm7
   1692 	subq	$0x10,%rcx
   1693 	jz	.Ltail_avx
   1694 
   1695 	vpunpckhqdq	%xmm15,%xmm15,%xmm8
   1696 	vpxor	%xmm0,%xmm3,%xmm3
   1697 	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm0
   1698 	vpxor	%xmm15,%xmm8,%xmm8
   1699 	vmovdqu	-112(%rdx),%xmm14
   1700 	vpxor	%xmm1,%xmm4,%xmm4
   1701 	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm1
   1702 	vmovdqu	144-64(%rsi),%xmm6
   1703 	vpshufb	%xmm13,%xmm14,%xmm15
   1704 	vpxor	%xmm2,%xmm5,%xmm5
   1705 	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm2
   1706 	vmovq	184-64(%rsi),%xmm7
   1707 	subq	$0x10,%rcx
   1708 	jmp	.Ltail_avx
   1709 
   1710 .align	32
   1711 .Ltail_avx:
   1712 	vpxor	%xmm10,%xmm15,%xmm15
   1713 .Ltail_no_xor_avx:
   1714 	vpunpckhqdq	%xmm15,%xmm15,%xmm8
   1715 	vpxor	%xmm0,%xmm3,%xmm3
   1716 	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm0
   1717 	vpxor	%xmm15,%xmm8,%xmm8
   1718 	vpxor	%xmm1,%xmm4,%xmm4
   1719 	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm1
   1720 	vpxor	%xmm2,%xmm5,%xmm5
   1721 	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm2
   1722 
   1723 	vmovdqu	(%r10),%xmm12
   1724 
   1725 	vpxor	%xmm0,%xmm3,%xmm10
   1726 	vpxor	%xmm1,%xmm4,%xmm11
   1727 	vpxor	%xmm2,%xmm5,%xmm5
   1728 
   1729 	vpxor	%xmm10,%xmm5,%xmm5
   1730 	vpxor	%xmm11,%xmm5,%xmm5
   1731 	vpslldq	$8,%xmm5,%xmm9
   1732 	vpsrldq	$8,%xmm5,%xmm5
   1733 	vpxor	%xmm9,%xmm10,%xmm10
   1734 	vpxor	%xmm5,%xmm11,%xmm11
   1735 
   1736 	vpclmulqdq	$0x10,%xmm12,%xmm10,%xmm9
   1737 	vpalignr	$8,%xmm10,%xmm10,%xmm10
   1738 	vpxor	%xmm9,%xmm10,%xmm10
   1739 
   1740 	vpclmulqdq	$0x10,%xmm12,%xmm10,%xmm9
   1741 	vpalignr	$8,%xmm10,%xmm10,%xmm10
   1742 	vpxor	%xmm11,%xmm10,%xmm10
   1743 	vpxor	%xmm9,%xmm10,%xmm10
   1744 
   1745 	cmpq	$0,%rcx
   1746 	jne	.Lshort_avx
   1747 
   1748 	vpshufb	%xmm13,%xmm10,%xmm10
   1749 	vmovdqu	%xmm10,(%rdi)
   1750 	vzeroupper
   1751 	.byte	0xf3,0xc3
   1752 .size	gcm_ghash_avx,.-gcm_ghash_avx
   1753 .align	64
   1754 .Lbswap_mask:
   1755 .byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
   1756 .L0x1c2_polynomial:
   1757 .byte	1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
   1758 .L7_mask:
   1759 .long	7,0,7,0
   1760 .L7_mask_poly:
   1761 .long	7,0,450,0
   1762 .align	64
   1763 .type	.Lrem_4bit,@object
   1764 .Lrem_4bit:
   1765 .long	0,0,0,471859200,0,943718400,0,610271232
   1766 .long	0,1887436800,0,1822425088,0,1220542464,0,1423966208
   1767 .long	0,3774873600,0,4246732800,0,3644850176,0,3311403008
   1768 .long	0,2441084928,0,2376073216,0,2847932416,0,3051356160
   1769 .type	.Lrem_8bit,@object
   1770 .Lrem_8bit:
   1771 .value	0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E
   1772 .value	0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E
   1773 .value	0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E
   1774 .value	0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E
   1775 .value	0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E
   1776 .value	0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E
   1777 .value	0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E
   1778 .value	0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E
   1779 .value	0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE
   1780 .value	0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE
   1781 .value	0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE
   1782 .value	0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE
   1783 .value	0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E
   1784 .value	0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E
   1785 .value	0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE
   1786 .value	0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE
   1787 .value	0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E
   1788 .value	0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E
   1789 .value	0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E
   1790 .value	0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E
   1791 .value	0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E
   1792 .value	0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E
   1793 .value	0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E
   1794 .value	0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E
   1795 .value	0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE
   1796 .value	0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE
   1797 .value	0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE
   1798 .value	0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE
   1799 .value	0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E
   1800 .value	0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E
   1801 .value	0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE
   1802 .value	0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE
   1803 
   1804 .byte	71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
   1805 .align	64
   1806 #endif
   1807