Home | History | Annotate | Download | only in modes
      1 #if defined(__i386__)
      2 .file	"ghash-x86.S"
      3 .text
      4 .globl	gcm_gmult_4bit_x86
      5 .hidden	gcm_gmult_4bit_x86
      6 .type	gcm_gmult_4bit_x86,@function
      7 .align	16
      8 gcm_gmult_4bit_x86:
      9 .L_gcm_gmult_4bit_x86_begin:
     10 	pushl	%ebp
     11 	pushl	%ebx
     12 	pushl	%esi
     13 	pushl	%edi
     14 	subl	$84,%esp
     15 	movl	104(%esp),%edi
     16 	movl	108(%esp),%esi
     17 	movl	(%edi),%ebp
     18 	movl	4(%edi),%edx
     19 	movl	8(%edi),%ecx
     20 	movl	12(%edi),%ebx
     21 	movl	$0,16(%esp)
     22 	movl	$471859200,20(%esp)
     23 	movl	$943718400,24(%esp)
     24 	movl	$610271232,28(%esp)
     25 	movl	$1887436800,32(%esp)
     26 	movl	$1822425088,36(%esp)
     27 	movl	$1220542464,40(%esp)
     28 	movl	$1423966208,44(%esp)
     29 	movl	$3774873600,48(%esp)
     30 	movl	$4246732800,52(%esp)
     31 	movl	$3644850176,56(%esp)
     32 	movl	$3311403008,60(%esp)
     33 	movl	$2441084928,64(%esp)
     34 	movl	$2376073216,68(%esp)
     35 	movl	$2847932416,72(%esp)
     36 	movl	$3051356160,76(%esp)
     37 	movl	%ebp,(%esp)
     38 	movl	%edx,4(%esp)
     39 	movl	%ecx,8(%esp)
     40 	movl	%ebx,12(%esp)
     41 	shrl	$20,%ebx
     42 	andl	$240,%ebx
     43 	movl	4(%esi,%ebx,1),%ebp
     44 	movl	(%esi,%ebx,1),%edx
     45 	movl	12(%esi,%ebx,1),%ecx
     46 	movl	8(%esi,%ebx,1),%ebx
     47 	xorl	%eax,%eax
     48 	movl	$15,%edi
     49 	jmp	.L000x86_loop
     50 .align	16
     51 .L000x86_loop:
     52 	movb	%bl,%al
     53 	shrdl	$4,%ecx,%ebx
     54 	andb	$15,%al
     55 	shrdl	$4,%edx,%ecx
     56 	shrdl	$4,%ebp,%edx
     57 	shrl	$4,%ebp
     58 	xorl	16(%esp,%eax,4),%ebp
     59 	movb	(%esp,%edi,1),%al
     60 	andb	$240,%al
     61 	xorl	8(%esi,%eax,1),%ebx
     62 	xorl	12(%esi,%eax,1),%ecx
     63 	xorl	(%esi,%eax,1),%edx
     64 	xorl	4(%esi,%eax,1),%ebp
     65 	decl	%edi
     66 	js	.L001x86_break
     67 	movb	%bl,%al
     68 	shrdl	$4,%ecx,%ebx
     69 	andb	$15,%al
     70 	shrdl	$4,%edx,%ecx
     71 	shrdl	$4,%ebp,%edx
     72 	shrl	$4,%ebp
     73 	xorl	16(%esp,%eax,4),%ebp
     74 	movb	(%esp,%edi,1),%al
     75 	shlb	$4,%al
     76 	xorl	8(%esi,%eax,1),%ebx
     77 	xorl	12(%esi,%eax,1),%ecx
     78 	xorl	(%esi,%eax,1),%edx
     79 	xorl	4(%esi,%eax,1),%ebp
     80 	jmp	.L000x86_loop
     81 .align	16
     82 .L001x86_break:
     83 	bswap	%ebx
     84 	bswap	%ecx
     85 	bswap	%edx
     86 	bswap	%ebp
     87 	movl	104(%esp),%edi
     88 	movl	%ebx,12(%edi)
     89 	movl	%ecx,8(%edi)
     90 	movl	%edx,4(%edi)
     91 	movl	%ebp,(%edi)
     92 	addl	$84,%esp
     93 	popl	%edi
     94 	popl	%esi
     95 	popl	%ebx
     96 	popl	%ebp
     97 	ret
     98 .size	gcm_gmult_4bit_x86,.-.L_gcm_gmult_4bit_x86_begin
     99 .globl	gcm_ghash_4bit_x86
    100 .hidden	gcm_ghash_4bit_x86
    101 .type	gcm_ghash_4bit_x86,@function
    102 .align	16
    103 gcm_ghash_4bit_x86:
    104 .L_gcm_ghash_4bit_x86_begin:
    105 	pushl	%ebp
    106 	pushl	%ebx
    107 	pushl	%esi
    108 	pushl	%edi
    109 	subl	$84,%esp
    110 	movl	104(%esp),%ebx
    111 	movl	108(%esp),%esi
    112 	movl	112(%esp),%edi
    113 	movl	116(%esp),%ecx
    114 	addl	%edi,%ecx
    115 	movl	%ecx,116(%esp)
    116 	movl	(%ebx),%ebp
    117 	movl	4(%ebx),%edx
    118 	movl	8(%ebx),%ecx
    119 	movl	12(%ebx),%ebx
    120 	movl	$0,16(%esp)
    121 	movl	$471859200,20(%esp)
    122 	movl	$943718400,24(%esp)
    123 	movl	$610271232,28(%esp)
    124 	movl	$1887436800,32(%esp)
    125 	movl	$1822425088,36(%esp)
    126 	movl	$1220542464,40(%esp)
    127 	movl	$1423966208,44(%esp)
    128 	movl	$3774873600,48(%esp)
    129 	movl	$4246732800,52(%esp)
    130 	movl	$3644850176,56(%esp)
    131 	movl	$3311403008,60(%esp)
    132 	movl	$2441084928,64(%esp)
    133 	movl	$2376073216,68(%esp)
    134 	movl	$2847932416,72(%esp)
    135 	movl	$3051356160,76(%esp)
    136 .align	16
    137 .L002x86_outer_loop:
    138 	xorl	12(%edi),%ebx
    139 	xorl	8(%edi),%ecx
    140 	xorl	4(%edi),%edx
    141 	xorl	(%edi),%ebp
    142 	movl	%ebx,12(%esp)
    143 	movl	%ecx,8(%esp)
    144 	movl	%edx,4(%esp)
    145 	movl	%ebp,(%esp)
    146 	shrl	$20,%ebx
    147 	andl	$240,%ebx
    148 	movl	4(%esi,%ebx,1),%ebp
    149 	movl	(%esi,%ebx,1),%edx
    150 	movl	12(%esi,%ebx,1),%ecx
    151 	movl	8(%esi,%ebx,1),%ebx
    152 	xorl	%eax,%eax
    153 	movl	$15,%edi
    154 	jmp	.L003x86_loop
    155 .align	16
    156 .L003x86_loop:
    157 	movb	%bl,%al
    158 	shrdl	$4,%ecx,%ebx
    159 	andb	$15,%al
    160 	shrdl	$4,%edx,%ecx
    161 	shrdl	$4,%ebp,%edx
    162 	shrl	$4,%ebp
    163 	xorl	16(%esp,%eax,4),%ebp
    164 	movb	(%esp,%edi,1),%al
    165 	andb	$240,%al
    166 	xorl	8(%esi,%eax,1),%ebx
    167 	xorl	12(%esi,%eax,1),%ecx
    168 	xorl	(%esi,%eax,1),%edx
    169 	xorl	4(%esi,%eax,1),%ebp
    170 	decl	%edi
    171 	js	.L004x86_break
    172 	movb	%bl,%al
    173 	shrdl	$4,%ecx,%ebx
    174 	andb	$15,%al
    175 	shrdl	$4,%edx,%ecx
    176 	shrdl	$4,%ebp,%edx
    177 	shrl	$4,%ebp
    178 	xorl	16(%esp,%eax,4),%ebp
    179 	movb	(%esp,%edi,1),%al
    180 	shlb	$4,%al
    181 	xorl	8(%esi,%eax,1),%ebx
    182 	xorl	12(%esi,%eax,1),%ecx
    183 	xorl	(%esi,%eax,1),%edx
    184 	xorl	4(%esi,%eax,1),%ebp
    185 	jmp	.L003x86_loop
    186 .align	16
    187 .L004x86_break:
    188 	bswap	%ebx
    189 	bswap	%ecx
    190 	bswap	%edx
    191 	bswap	%ebp
    192 	movl	112(%esp),%edi
    193 	leal	16(%edi),%edi
    194 	cmpl	116(%esp),%edi
    195 	movl	%edi,112(%esp)
    196 	jb	.L002x86_outer_loop
    197 	movl	104(%esp),%edi
    198 	movl	%ebx,12(%edi)
    199 	movl	%ecx,8(%edi)
    200 	movl	%edx,4(%edi)
    201 	movl	%ebp,(%edi)
    202 	addl	$84,%esp
    203 	popl	%edi
    204 	popl	%esi
    205 	popl	%ebx
    206 	popl	%ebp
    207 	ret
    208 .size	gcm_ghash_4bit_x86,.-.L_gcm_ghash_4bit_x86_begin
    209 .globl	gcm_gmult_4bit_mmx
    210 .hidden	gcm_gmult_4bit_mmx
    211 .type	gcm_gmult_4bit_mmx,@function
    212 .align	16
    213 gcm_gmult_4bit_mmx:
    214 .L_gcm_gmult_4bit_mmx_begin:
    215 	pushl	%ebp
    216 	pushl	%ebx
    217 	pushl	%esi
    218 	pushl	%edi
    219 	movl	20(%esp),%edi
    220 	movl	24(%esp),%esi
    221 	call	.L005pic_point
    222 .L005pic_point:
    223 	popl	%eax
    224 	leal	.Lrem_4bit-.L005pic_point(%eax),%eax
    225 	movzbl	15(%edi),%ebx
    226 	xorl	%ecx,%ecx
    227 	movl	%ebx,%edx
    228 	movb	%dl,%cl
    229 	movl	$14,%ebp
    230 	shlb	$4,%cl
    231 	andl	$240,%edx
    232 	movq	8(%esi,%ecx,1),%mm0
    233 	movq	(%esi,%ecx,1),%mm1
    234 	movd	%mm0,%ebx
    235 	jmp	.L006mmx_loop
    236 .align	16
    237 .L006mmx_loop:
    238 	psrlq	$4,%mm0
    239 	andl	$15,%ebx
    240 	movq	%mm1,%mm2
    241 	psrlq	$4,%mm1
    242 	pxor	8(%esi,%edx,1),%mm0
    243 	movb	(%edi,%ebp,1),%cl
    244 	psllq	$60,%mm2
    245 	pxor	(%eax,%ebx,8),%mm1
    246 	decl	%ebp
    247 	movd	%mm0,%ebx
    248 	pxor	(%esi,%edx,1),%mm1
    249 	movl	%ecx,%edx
    250 	pxor	%mm2,%mm0
    251 	js	.L007mmx_break
    252 	shlb	$4,%cl
    253 	andl	$15,%ebx
    254 	psrlq	$4,%mm0
    255 	andl	$240,%edx
    256 	movq	%mm1,%mm2
    257 	psrlq	$4,%mm1
    258 	pxor	8(%esi,%ecx,1),%mm0
    259 	psllq	$60,%mm2
    260 	pxor	(%eax,%ebx,8),%mm1
    261 	movd	%mm0,%ebx
    262 	pxor	(%esi,%ecx,1),%mm1
    263 	pxor	%mm2,%mm0
    264 	jmp	.L006mmx_loop
    265 .align	16
    266 .L007mmx_break:
    267 	shlb	$4,%cl
    268 	andl	$15,%ebx
    269 	psrlq	$4,%mm0
    270 	andl	$240,%edx
    271 	movq	%mm1,%mm2
    272 	psrlq	$4,%mm1
    273 	pxor	8(%esi,%ecx,1),%mm0
    274 	psllq	$60,%mm2
    275 	pxor	(%eax,%ebx,8),%mm1
    276 	movd	%mm0,%ebx
    277 	pxor	(%esi,%ecx,1),%mm1
    278 	pxor	%mm2,%mm0
    279 	psrlq	$4,%mm0
    280 	andl	$15,%ebx
    281 	movq	%mm1,%mm2
    282 	psrlq	$4,%mm1
    283 	pxor	8(%esi,%edx,1),%mm0
    284 	psllq	$60,%mm2
    285 	pxor	(%eax,%ebx,8),%mm1
    286 	movd	%mm0,%ebx
    287 	pxor	(%esi,%edx,1),%mm1
    288 	pxor	%mm2,%mm0
    289 	psrlq	$32,%mm0
    290 	movd	%mm1,%edx
    291 	psrlq	$32,%mm1
    292 	movd	%mm0,%ecx
    293 	movd	%mm1,%ebp
    294 	bswap	%ebx
    295 	bswap	%edx
    296 	bswap	%ecx
    297 	bswap	%ebp
    298 	emms
    299 	movl	%ebx,12(%edi)
    300 	movl	%edx,4(%edi)
    301 	movl	%ecx,8(%edi)
    302 	movl	%ebp,(%edi)
    303 	popl	%edi
    304 	popl	%esi
    305 	popl	%ebx
    306 	popl	%ebp
    307 	ret
    308 .size	gcm_gmult_4bit_mmx,.-.L_gcm_gmult_4bit_mmx_begin
    309 .globl	gcm_ghash_4bit_mmx
    310 .hidden	gcm_ghash_4bit_mmx
    311 .type	gcm_ghash_4bit_mmx,@function
    312 .align	16
    313 gcm_ghash_4bit_mmx:
    314 .L_gcm_ghash_4bit_mmx_begin:
    315 	pushl	%ebp
    316 	pushl	%ebx
    317 	pushl	%esi
    318 	pushl	%edi
    319 	movl	20(%esp),%eax
    320 	movl	24(%esp),%ebx
    321 	movl	28(%esp),%ecx
    322 	movl	32(%esp),%edx
    323 	movl	%esp,%ebp
    324 	call	.L008pic_point
    325 .L008pic_point:
    326 	popl	%esi
    327 	leal	.Lrem_8bit-.L008pic_point(%esi),%esi
    328 	subl	$544,%esp
    329 	andl	$-64,%esp
    330 	subl	$16,%esp
    331 	addl	%ecx,%edx
    332 	movl	%eax,544(%esp)
    333 	movl	%edx,552(%esp)
    334 	movl	%ebp,556(%esp)
    335 	addl	$128,%ebx
    336 	leal	144(%esp),%edi
    337 	leal	400(%esp),%ebp
    338 	movl	-120(%ebx),%edx
    339 	movq	-120(%ebx),%mm0
    340 	movq	-128(%ebx),%mm3
    341 	shll	$4,%edx
    342 	movb	%dl,(%esp)
    343 	movl	-104(%ebx),%edx
    344 	movq	-104(%ebx),%mm2
    345 	movq	-112(%ebx),%mm5
    346 	movq	%mm0,-128(%edi)
    347 	psrlq	$4,%mm0
    348 	movq	%mm3,(%edi)
    349 	movq	%mm3,%mm7
    350 	psrlq	$4,%mm3
    351 	shll	$4,%edx
    352 	movb	%dl,1(%esp)
    353 	movl	-88(%ebx),%edx
    354 	movq	-88(%ebx),%mm1
    355 	psllq	$60,%mm7
    356 	movq	-96(%ebx),%mm4
    357 	por	%mm7,%mm0
    358 	movq	%mm2,-120(%edi)
    359 	psrlq	$4,%mm2
    360 	movq	%mm5,8(%edi)
    361 	movq	%mm5,%mm6
    362 	movq	%mm0,-128(%ebp)
    363 	psrlq	$4,%mm5
    364 	movq	%mm3,(%ebp)
    365 	shll	$4,%edx
    366 	movb	%dl,2(%esp)
    367 	movl	-72(%ebx),%edx
    368 	movq	-72(%ebx),%mm0
    369 	psllq	$60,%mm6
    370 	movq	-80(%ebx),%mm3
    371 	por	%mm6,%mm2
    372 	movq	%mm1,-112(%edi)
    373 	psrlq	$4,%mm1
    374 	movq	%mm4,16(%edi)
    375 	movq	%mm4,%mm7
    376 	movq	%mm2,-120(%ebp)
    377 	psrlq	$4,%mm4
    378 	movq	%mm5,8(%ebp)
    379 	shll	$4,%edx
    380 	movb	%dl,3(%esp)
    381 	movl	-56(%ebx),%edx
    382 	movq	-56(%ebx),%mm2
    383 	psllq	$60,%mm7
    384 	movq	-64(%ebx),%mm5
    385 	por	%mm7,%mm1
    386 	movq	%mm0,-104(%edi)
    387 	psrlq	$4,%mm0
    388 	movq	%mm3,24(%edi)
    389 	movq	%mm3,%mm6
    390 	movq	%mm1,-112(%ebp)
    391 	psrlq	$4,%mm3
    392 	movq	%mm4,16(%ebp)
    393 	shll	$4,%edx
    394 	movb	%dl,4(%esp)
    395 	movl	-40(%ebx),%edx
    396 	movq	-40(%ebx),%mm1
    397 	psllq	$60,%mm6
    398 	movq	-48(%ebx),%mm4
    399 	por	%mm6,%mm0
    400 	movq	%mm2,-96(%edi)
    401 	psrlq	$4,%mm2
    402 	movq	%mm5,32(%edi)
    403 	movq	%mm5,%mm7
    404 	movq	%mm0,-104(%ebp)
    405 	psrlq	$4,%mm5
    406 	movq	%mm3,24(%ebp)
    407 	shll	$4,%edx
    408 	movb	%dl,5(%esp)
    409 	movl	-24(%ebx),%edx
    410 	movq	-24(%ebx),%mm0
    411 	psllq	$60,%mm7
    412 	movq	-32(%ebx),%mm3
    413 	por	%mm7,%mm2
    414 	movq	%mm1,-88(%edi)
    415 	psrlq	$4,%mm1
    416 	movq	%mm4,40(%edi)
    417 	movq	%mm4,%mm6
    418 	movq	%mm2,-96(%ebp)
    419 	psrlq	$4,%mm4
    420 	movq	%mm5,32(%ebp)
    421 	shll	$4,%edx
    422 	movb	%dl,6(%esp)
    423 	movl	-8(%ebx),%edx
    424 	movq	-8(%ebx),%mm2
    425 	psllq	$60,%mm6
    426 	movq	-16(%ebx),%mm5
    427 	por	%mm6,%mm1
    428 	movq	%mm0,-80(%edi)
    429 	psrlq	$4,%mm0
    430 	movq	%mm3,48(%edi)
    431 	movq	%mm3,%mm7
    432 	movq	%mm1,-88(%ebp)
    433 	psrlq	$4,%mm3
    434 	movq	%mm4,40(%ebp)
    435 	shll	$4,%edx
    436 	movb	%dl,7(%esp)
    437 	movl	8(%ebx),%edx
    438 	movq	8(%ebx),%mm1
    439 	psllq	$60,%mm7
    440 	movq	(%ebx),%mm4
    441 	por	%mm7,%mm0
    442 	movq	%mm2,-72(%edi)
    443 	psrlq	$4,%mm2
    444 	movq	%mm5,56(%edi)
    445 	movq	%mm5,%mm6
    446 	movq	%mm0,-80(%ebp)
    447 	psrlq	$4,%mm5
    448 	movq	%mm3,48(%ebp)
    449 	shll	$4,%edx
    450 	movb	%dl,8(%esp)
    451 	movl	24(%ebx),%edx
    452 	movq	24(%ebx),%mm0
    453 	psllq	$60,%mm6
    454 	movq	16(%ebx),%mm3
    455 	por	%mm6,%mm2
    456 	movq	%mm1,-64(%edi)
    457 	psrlq	$4,%mm1
    458 	movq	%mm4,64(%edi)
    459 	movq	%mm4,%mm7
    460 	movq	%mm2,-72(%ebp)
    461 	psrlq	$4,%mm4
    462 	movq	%mm5,56(%ebp)
    463 	shll	$4,%edx
    464 	movb	%dl,9(%esp)
    465 	movl	40(%ebx),%edx
    466 	movq	40(%ebx),%mm2
    467 	psllq	$60,%mm7
    468 	movq	32(%ebx),%mm5
    469 	por	%mm7,%mm1
    470 	movq	%mm0,-56(%edi)
    471 	psrlq	$4,%mm0
    472 	movq	%mm3,72(%edi)
    473 	movq	%mm3,%mm6
    474 	movq	%mm1,-64(%ebp)
    475 	psrlq	$4,%mm3
    476 	movq	%mm4,64(%ebp)
    477 	shll	$4,%edx
    478 	movb	%dl,10(%esp)
    479 	movl	56(%ebx),%edx
    480 	movq	56(%ebx),%mm1
    481 	psllq	$60,%mm6
    482 	movq	48(%ebx),%mm4
    483 	por	%mm6,%mm0
    484 	movq	%mm2,-48(%edi)
    485 	psrlq	$4,%mm2
    486 	movq	%mm5,80(%edi)
    487 	movq	%mm5,%mm7
    488 	movq	%mm0,-56(%ebp)
    489 	psrlq	$4,%mm5
    490 	movq	%mm3,72(%ebp)
    491 	shll	$4,%edx
    492 	movb	%dl,11(%esp)
    493 	movl	72(%ebx),%edx
    494 	movq	72(%ebx),%mm0
    495 	psllq	$60,%mm7
    496 	movq	64(%ebx),%mm3
    497 	por	%mm7,%mm2
    498 	movq	%mm1,-40(%edi)
    499 	psrlq	$4,%mm1
    500 	movq	%mm4,88(%edi)
    501 	movq	%mm4,%mm6
    502 	movq	%mm2,-48(%ebp)
    503 	psrlq	$4,%mm4
    504 	movq	%mm5,80(%ebp)
    505 	shll	$4,%edx
    506 	movb	%dl,12(%esp)
    507 	movl	88(%ebx),%edx
    508 	movq	88(%ebx),%mm2
    509 	psllq	$60,%mm6
    510 	movq	80(%ebx),%mm5
    511 	por	%mm6,%mm1
    512 	movq	%mm0,-32(%edi)
    513 	psrlq	$4,%mm0
    514 	movq	%mm3,96(%edi)
    515 	movq	%mm3,%mm7
    516 	movq	%mm1,-40(%ebp)
    517 	psrlq	$4,%mm3
    518 	movq	%mm4,88(%ebp)
    519 	shll	$4,%edx
    520 	movb	%dl,13(%esp)
    521 	movl	104(%ebx),%edx
    522 	movq	104(%ebx),%mm1
    523 	psllq	$60,%mm7
    524 	movq	96(%ebx),%mm4
    525 	por	%mm7,%mm0
    526 	movq	%mm2,-24(%edi)
    527 	psrlq	$4,%mm2
    528 	movq	%mm5,104(%edi)
    529 	movq	%mm5,%mm6
    530 	movq	%mm0,-32(%ebp)
    531 	psrlq	$4,%mm5
    532 	movq	%mm3,96(%ebp)
    533 	shll	$4,%edx
    534 	movb	%dl,14(%esp)
    535 	movl	120(%ebx),%edx
    536 	movq	120(%ebx),%mm0
    537 	psllq	$60,%mm6
    538 	movq	112(%ebx),%mm3
    539 	por	%mm6,%mm2
    540 	movq	%mm1,-16(%edi)
    541 	psrlq	$4,%mm1
    542 	movq	%mm4,112(%edi)
    543 	movq	%mm4,%mm7
    544 	movq	%mm2,-24(%ebp)
    545 	psrlq	$4,%mm4
    546 	movq	%mm5,104(%ebp)
    547 	shll	$4,%edx
    548 	movb	%dl,15(%esp)
    549 	psllq	$60,%mm7
    550 	por	%mm7,%mm1
    551 	movq	%mm0,-8(%edi)
    552 	psrlq	$4,%mm0
    553 	movq	%mm3,120(%edi)
    554 	movq	%mm3,%mm6
    555 	movq	%mm1,-16(%ebp)
    556 	psrlq	$4,%mm3
    557 	movq	%mm4,112(%ebp)
    558 	psllq	$60,%mm6
    559 	por	%mm6,%mm0
    560 	movq	%mm0,-8(%ebp)
    561 	movq	%mm3,120(%ebp)
    562 	movq	(%eax),%mm6
    563 	movl	8(%eax),%ebx
    564 	movl	12(%eax),%edx
    565 .align	16
    566 .L009outer:
    567 	xorl	12(%ecx),%edx
    568 	xorl	8(%ecx),%ebx
    569 	pxor	(%ecx),%mm6
    570 	leal	16(%ecx),%ecx
    571 	movl	%ebx,536(%esp)
    572 	movq	%mm6,528(%esp)
    573 	movl	%ecx,548(%esp)
    574 	xorl	%eax,%eax
    575 	roll	$8,%edx
    576 	movb	%dl,%al
    577 	movl	%eax,%ebp
    578 	andb	$15,%al
    579 	shrl	$4,%ebp
    580 	pxor	%mm0,%mm0
    581 	roll	$8,%edx
    582 	pxor	%mm1,%mm1
    583 	pxor	%mm2,%mm2
    584 	movq	16(%esp,%eax,8),%mm7
    585 	movq	144(%esp,%eax,8),%mm6
    586 	movb	%dl,%al
    587 	movd	%mm7,%ebx
    588 	psrlq	$8,%mm7
    589 	movq	%mm6,%mm3
    590 	movl	%eax,%edi
    591 	psrlq	$8,%mm6
    592 	pxor	272(%esp,%ebp,8),%mm7
    593 	andb	$15,%al
    594 	psllq	$56,%mm3
    595 	shrl	$4,%edi
    596 	pxor	16(%esp,%eax,8),%mm7
    597 	roll	$8,%edx
    598 	pxor	144(%esp,%eax,8),%mm6
    599 	pxor	%mm3,%mm7
    600 	pxor	400(%esp,%ebp,8),%mm6
    601 	xorb	(%esp,%ebp,1),%bl
    602 	movb	%dl,%al
    603 	movd	%mm7,%ecx
    604 	movzbl	%bl,%ebx
    605 	psrlq	$8,%mm7
    606 	movq	%mm6,%mm3
    607 	movl	%eax,%ebp
    608 	psrlq	$8,%mm6
    609 	pxor	272(%esp,%edi,8),%mm7
    610 	andb	$15,%al
    611 	psllq	$56,%mm3
    612 	shrl	$4,%ebp
    613 	pinsrw	$2,(%esi,%ebx,2),%mm2
    614 	pxor	16(%esp,%eax,8),%mm7
    615 	roll	$8,%edx
    616 	pxor	144(%esp,%eax,8),%mm6
    617 	pxor	%mm3,%mm7
    618 	pxor	400(%esp,%edi,8),%mm6
    619 	xorb	(%esp,%edi,1),%cl
    620 	movb	%dl,%al
    621 	movl	536(%esp),%edx
    622 	movd	%mm7,%ebx
    623 	movzbl	%cl,%ecx
    624 	psrlq	$8,%mm7
    625 	movq	%mm6,%mm3
    626 	movl	%eax,%edi
    627 	psrlq	$8,%mm6
    628 	pxor	272(%esp,%ebp,8),%mm7
    629 	andb	$15,%al
    630 	psllq	$56,%mm3
    631 	pxor	%mm2,%mm6
    632 	shrl	$4,%edi
    633 	pinsrw	$2,(%esi,%ecx,2),%mm1
    634 	pxor	16(%esp,%eax,8),%mm7
    635 	roll	$8,%edx
    636 	pxor	144(%esp,%eax,8),%mm6
    637 	pxor	%mm3,%mm7
    638 	pxor	400(%esp,%ebp,8),%mm6
    639 	xorb	(%esp,%ebp,1),%bl
    640 	movb	%dl,%al
    641 	movd	%mm7,%ecx
    642 	movzbl	%bl,%ebx
    643 	psrlq	$8,%mm7
    644 	movq	%mm6,%mm3
    645 	movl	%eax,%ebp
    646 	psrlq	$8,%mm6
    647 	pxor	272(%esp,%edi,8),%mm7
    648 	andb	$15,%al
    649 	psllq	$56,%mm3
    650 	pxor	%mm1,%mm6
    651 	shrl	$4,%ebp
    652 	pinsrw	$2,(%esi,%ebx,2),%mm0
    653 	pxor	16(%esp,%eax,8),%mm7
    654 	roll	$8,%edx
    655 	pxor	144(%esp,%eax,8),%mm6
    656 	pxor	%mm3,%mm7
    657 	pxor	400(%esp,%edi,8),%mm6
    658 	xorb	(%esp,%edi,1),%cl
    659 	movb	%dl,%al
    660 	movd	%mm7,%ebx
    661 	movzbl	%cl,%ecx
    662 	psrlq	$8,%mm7
    663 	movq	%mm6,%mm3
    664 	movl	%eax,%edi
    665 	psrlq	$8,%mm6
    666 	pxor	272(%esp,%ebp,8),%mm7
    667 	andb	$15,%al
    668 	psllq	$56,%mm3
    669 	pxor	%mm0,%mm6
    670 	shrl	$4,%edi
    671 	pinsrw	$2,(%esi,%ecx,2),%mm2
    672 	pxor	16(%esp,%eax,8),%mm7
    673 	roll	$8,%edx
    674 	pxor	144(%esp,%eax,8),%mm6
    675 	pxor	%mm3,%mm7
    676 	pxor	400(%esp,%ebp,8),%mm6
    677 	xorb	(%esp,%ebp,1),%bl
    678 	movb	%dl,%al
    679 	movd	%mm7,%ecx
    680 	movzbl	%bl,%ebx
    681 	psrlq	$8,%mm7
    682 	movq	%mm6,%mm3
    683 	movl	%eax,%ebp
    684 	psrlq	$8,%mm6
    685 	pxor	272(%esp,%edi,8),%mm7
    686 	andb	$15,%al
    687 	psllq	$56,%mm3
    688 	pxor	%mm2,%mm6
    689 	shrl	$4,%ebp
    690 	pinsrw	$2,(%esi,%ebx,2),%mm1
    691 	pxor	16(%esp,%eax,8),%mm7
    692 	roll	$8,%edx
    693 	pxor	144(%esp,%eax,8),%mm6
    694 	pxor	%mm3,%mm7
    695 	pxor	400(%esp,%edi,8),%mm6
    696 	xorb	(%esp,%edi,1),%cl
    697 	movb	%dl,%al
    698 	movl	532(%esp),%edx
    699 	movd	%mm7,%ebx
    700 	movzbl	%cl,%ecx
    701 	psrlq	$8,%mm7
    702 	movq	%mm6,%mm3
    703 	movl	%eax,%edi
    704 	psrlq	$8,%mm6
    705 	pxor	272(%esp,%ebp,8),%mm7
    706 	andb	$15,%al
    707 	psllq	$56,%mm3
    708 	pxor	%mm1,%mm6
    709 	shrl	$4,%edi
    710 	pinsrw	$2,(%esi,%ecx,2),%mm0
    711 	pxor	16(%esp,%eax,8),%mm7
    712 	roll	$8,%edx
    713 	pxor	144(%esp,%eax,8),%mm6
    714 	pxor	%mm3,%mm7
    715 	pxor	400(%esp,%ebp,8),%mm6
    716 	xorb	(%esp,%ebp,1),%bl
    717 	movb	%dl,%al
    718 	movd	%mm7,%ecx
    719 	movzbl	%bl,%ebx
    720 	psrlq	$8,%mm7
    721 	movq	%mm6,%mm3
    722 	movl	%eax,%ebp
    723 	psrlq	$8,%mm6
    724 	pxor	272(%esp,%edi,8),%mm7
    725 	andb	$15,%al
    726 	psllq	$56,%mm3
    727 	pxor	%mm0,%mm6
    728 	shrl	$4,%ebp
    729 	pinsrw	$2,(%esi,%ebx,2),%mm2
    730 	pxor	16(%esp,%eax,8),%mm7
    731 	roll	$8,%edx
    732 	pxor	144(%esp,%eax,8),%mm6
    733 	pxor	%mm3,%mm7
    734 	pxor	400(%esp,%edi,8),%mm6
    735 	xorb	(%esp,%edi,1),%cl
    736 	movb	%dl,%al
    737 	movd	%mm7,%ebx
    738 	movzbl	%cl,%ecx
    739 	psrlq	$8,%mm7
    740 	movq	%mm6,%mm3
    741 	movl	%eax,%edi
    742 	psrlq	$8,%mm6
    743 	pxor	272(%esp,%ebp,8),%mm7
    744 	andb	$15,%al
    745 	psllq	$56,%mm3
    746 	pxor	%mm2,%mm6
    747 	shrl	$4,%edi
    748 	pinsrw	$2,(%esi,%ecx,2),%mm1
    749 	pxor	16(%esp,%eax,8),%mm7
    750 	roll	$8,%edx
    751 	pxor	144(%esp,%eax,8),%mm6
    752 	pxor	%mm3,%mm7
    753 	pxor	400(%esp,%ebp,8),%mm6
    754 	xorb	(%esp,%ebp,1),%bl
    755 	movb	%dl,%al
    756 	movd	%mm7,%ecx
    757 	movzbl	%bl,%ebx
    758 	psrlq	$8,%mm7
    759 	movq	%mm6,%mm3
    760 	movl	%eax,%ebp
    761 	psrlq	$8,%mm6
    762 	pxor	272(%esp,%edi,8),%mm7
    763 	andb	$15,%al
    764 	psllq	$56,%mm3
    765 	pxor	%mm1,%mm6
    766 	shrl	$4,%ebp
    767 	pinsrw	$2,(%esi,%ebx,2),%mm0
    768 	pxor	16(%esp,%eax,8),%mm7
    769 	roll	$8,%edx
    770 	pxor	144(%esp,%eax,8),%mm6
    771 	pxor	%mm3,%mm7
    772 	pxor	400(%esp,%edi,8),%mm6
    773 	xorb	(%esp,%edi,1),%cl
    774 	movb	%dl,%al
    775 	movl	528(%esp),%edx
    776 	movd	%mm7,%ebx
    777 	movzbl	%cl,%ecx
    778 	psrlq	$8,%mm7
    779 	movq	%mm6,%mm3
    780 	movl	%eax,%edi
    781 	psrlq	$8,%mm6
    782 	pxor	272(%esp,%ebp,8),%mm7
    783 	andb	$15,%al
    784 	psllq	$56,%mm3
    785 	pxor	%mm0,%mm6
    786 	shrl	$4,%edi
    787 	pinsrw	$2,(%esi,%ecx,2),%mm2
    788 	pxor	16(%esp,%eax,8),%mm7
    789 	roll	$8,%edx
    790 	pxor	144(%esp,%eax,8),%mm6
    791 	pxor	%mm3,%mm7
    792 	pxor	400(%esp,%ebp,8),%mm6
    793 	xorb	(%esp,%ebp,1),%bl
    794 	movb	%dl,%al
    795 	movd	%mm7,%ecx
    796 	movzbl	%bl,%ebx
    797 	psrlq	$8,%mm7
    798 	movq	%mm6,%mm3
    799 	movl	%eax,%ebp
    800 	psrlq	$8,%mm6
    801 	pxor	272(%esp,%edi,8),%mm7
    802 	andb	$15,%al
    803 	psllq	$56,%mm3
    804 	pxor	%mm2,%mm6
    805 	shrl	$4,%ebp
    806 	pinsrw	$2,(%esi,%ebx,2),%mm1
    807 	pxor	16(%esp,%eax,8),%mm7
    808 	roll	$8,%edx
    809 	pxor	144(%esp,%eax,8),%mm6
    810 	pxor	%mm3,%mm7
    811 	pxor	400(%esp,%edi,8),%mm6
    812 	xorb	(%esp,%edi,1),%cl
    813 	movb	%dl,%al
    814 	movd	%mm7,%ebx
    815 	movzbl	%cl,%ecx
    816 	psrlq	$8,%mm7
    817 	movq	%mm6,%mm3
    818 	movl	%eax,%edi
    819 	psrlq	$8,%mm6
    820 	pxor	272(%esp,%ebp,8),%mm7
    821 	andb	$15,%al
    822 	psllq	$56,%mm3
    823 	pxor	%mm1,%mm6
    824 	shrl	$4,%edi
    825 	pinsrw	$2,(%esi,%ecx,2),%mm0
    826 	pxor	16(%esp,%eax,8),%mm7
    827 	roll	$8,%edx
    828 	pxor	144(%esp,%eax,8),%mm6
    829 	pxor	%mm3,%mm7
    830 	pxor	400(%esp,%ebp,8),%mm6
    831 	xorb	(%esp,%ebp,1),%bl
    832 	movb	%dl,%al
    833 	movd	%mm7,%ecx
    834 	movzbl	%bl,%ebx
    835 	psrlq	$8,%mm7
    836 	movq	%mm6,%mm3
    837 	movl	%eax,%ebp
    838 	psrlq	$8,%mm6
    839 	pxor	272(%esp,%edi,8),%mm7
    840 	andb	$15,%al
    841 	psllq	$56,%mm3
    842 	pxor	%mm0,%mm6
    843 	shrl	$4,%ebp
    844 	pinsrw	$2,(%esi,%ebx,2),%mm2
    845 	pxor	16(%esp,%eax,8),%mm7
    846 	roll	$8,%edx
    847 	pxor	144(%esp,%eax,8),%mm6
    848 	pxor	%mm3,%mm7
    849 	pxor	400(%esp,%edi,8),%mm6
    850 	xorb	(%esp,%edi,1),%cl
    851 	movb	%dl,%al
    852 	movl	524(%esp),%edx
    853 	movd	%mm7,%ebx
    854 	movzbl	%cl,%ecx
    855 	psrlq	$8,%mm7
    856 	movq	%mm6,%mm3
    857 	movl	%eax,%edi
    858 	psrlq	$8,%mm6
    859 	pxor	272(%esp,%ebp,8),%mm7
    860 	andb	$15,%al
    861 	psllq	$56,%mm3
    862 	pxor	%mm2,%mm6
    863 	shrl	$4,%edi
    864 	pinsrw	$2,(%esi,%ecx,2),%mm1
    865 	pxor	16(%esp,%eax,8),%mm7
    866 	pxor	144(%esp,%eax,8),%mm6
    867 	xorb	(%esp,%ebp,1),%bl
    868 	pxor	%mm3,%mm7
    869 	pxor	400(%esp,%ebp,8),%mm6
    870 	movzbl	%bl,%ebx
    871 	pxor	%mm2,%mm2
    872 	psllq	$4,%mm1
    873 	movd	%mm7,%ecx
    874 	psrlq	$4,%mm7
    875 	movq	%mm6,%mm3
    876 	psrlq	$4,%mm6
    877 	shll	$4,%ecx
    878 	pxor	16(%esp,%edi,8),%mm7
    879 	psllq	$60,%mm3
    880 	movzbl	%cl,%ecx
    881 	pxor	%mm3,%mm7
    882 	pxor	144(%esp,%edi,8),%mm6
    883 	pinsrw	$2,(%esi,%ebx,2),%mm0
    884 	pxor	%mm1,%mm6
    885 	movd	%mm7,%edx
    886 	pinsrw	$3,(%esi,%ecx,2),%mm2
    887 	psllq	$12,%mm0
    888 	pxor	%mm0,%mm6
    889 	psrlq	$32,%mm7
    890 	pxor	%mm2,%mm6
    891 	movl	548(%esp),%ecx
    892 	movd	%mm7,%ebx
    893 	movq	%mm6,%mm3
    894 	psllw	$8,%mm6
    895 	psrlw	$8,%mm3
    896 	por	%mm3,%mm6
    897 	bswap	%edx
    898 	pshufw	$27,%mm6,%mm6
    899 	bswap	%ebx
    900 	cmpl	552(%esp),%ecx
    901 	jne	.L009outer
    902 	movl	544(%esp),%eax
    903 	movl	%edx,12(%eax)
    904 	movl	%ebx,8(%eax)
    905 	movq	%mm6,(%eax)
    906 	movl	556(%esp),%esp
    907 	emms
    908 	popl	%edi
    909 	popl	%esi
    910 	popl	%ebx
    911 	popl	%ebp
    912 	ret
    913 .size	gcm_ghash_4bit_mmx,.-.L_gcm_ghash_4bit_mmx_begin
    914 .globl	gcm_init_clmul
    915 .hidden	gcm_init_clmul
    916 .type	gcm_init_clmul,@function
    917 .align	16
    918 gcm_init_clmul:
    919 .L_gcm_init_clmul_begin:
    920 	movl	4(%esp),%edx
    921 	movl	8(%esp),%eax
    922 	call	.L010pic
    923 .L010pic:
    924 	popl	%ecx
    925 	leal	.Lbswap-.L010pic(%ecx),%ecx
    926 	movdqu	(%eax),%xmm2
    927 	pshufd	$78,%xmm2,%xmm2
    928 	pshufd	$255,%xmm2,%xmm4
    929 	movdqa	%xmm2,%xmm3
    930 	psllq	$1,%xmm2
    931 	pxor	%xmm5,%xmm5
    932 	psrlq	$63,%xmm3
    933 	pcmpgtd	%xmm4,%xmm5
    934 	pslldq	$8,%xmm3
    935 	por	%xmm3,%xmm2
    936 	pand	16(%ecx),%xmm5
    937 	pxor	%xmm5,%xmm2
    938 	movdqa	%xmm2,%xmm0
    939 	movdqa	%xmm0,%xmm1
    940 	pshufd	$78,%xmm0,%xmm3
    941 	pshufd	$78,%xmm2,%xmm4
    942 	pxor	%xmm0,%xmm3
    943 	pxor	%xmm2,%xmm4
    944 .byte	102,15,58,68,194,0
    945 .byte	102,15,58,68,202,17
    946 .byte	102,15,58,68,220,0
    947 	xorps	%xmm0,%xmm3
    948 	xorps	%xmm1,%xmm3
    949 	movdqa	%xmm3,%xmm4
    950 	psrldq	$8,%xmm3
    951 	pslldq	$8,%xmm4
    952 	pxor	%xmm3,%xmm1
    953 	pxor	%xmm4,%xmm0
    954 	movdqa	%xmm0,%xmm4
    955 	movdqa	%xmm0,%xmm3
    956 	psllq	$5,%xmm0
    957 	pxor	%xmm0,%xmm3
    958 	psllq	$1,%xmm0
    959 	pxor	%xmm3,%xmm0
    960 	psllq	$57,%xmm0
    961 	movdqa	%xmm0,%xmm3
    962 	pslldq	$8,%xmm0
    963 	psrldq	$8,%xmm3
    964 	pxor	%xmm4,%xmm0
    965 	pxor	%xmm3,%xmm1
    966 	movdqa	%xmm0,%xmm4
    967 	psrlq	$1,%xmm0
    968 	pxor	%xmm4,%xmm1
    969 	pxor	%xmm0,%xmm4
    970 	psrlq	$5,%xmm0
    971 	pxor	%xmm4,%xmm0
    972 	psrlq	$1,%xmm0
    973 	pxor	%xmm1,%xmm0
    974 	pshufd	$78,%xmm2,%xmm3
    975 	pshufd	$78,%xmm0,%xmm4
    976 	pxor	%xmm2,%xmm3
    977 	movdqu	%xmm2,(%edx)
    978 	pxor	%xmm0,%xmm4
    979 	movdqu	%xmm0,16(%edx)
    980 .byte	102,15,58,15,227,8
    981 	movdqu	%xmm4,32(%edx)
    982 	ret
    983 .size	gcm_init_clmul,.-.L_gcm_init_clmul_begin
    984 .globl	gcm_gmult_clmul
    985 .hidden	gcm_gmult_clmul
    986 .type	gcm_gmult_clmul,@function
    987 .align	16
    988 gcm_gmult_clmul:
    989 .L_gcm_gmult_clmul_begin:
    990 	movl	4(%esp),%eax
    991 	movl	8(%esp),%edx
    992 	call	.L011pic
    993 .L011pic:
    994 	popl	%ecx
    995 	leal	.Lbswap-.L011pic(%ecx),%ecx
    996 	movdqu	(%eax),%xmm0
    997 	movdqa	(%ecx),%xmm5
    998 	movups	(%edx),%xmm2
    999 .byte	102,15,56,0,197
   1000 	movups	32(%edx),%xmm4
   1001 	movdqa	%xmm0,%xmm1
   1002 	pshufd	$78,%xmm0,%xmm3
   1003 	pxor	%xmm0,%xmm3
   1004 .byte	102,15,58,68,194,0
   1005 .byte	102,15,58,68,202,17
   1006 .byte	102,15,58,68,220,0
   1007 	xorps	%xmm0,%xmm3
   1008 	xorps	%xmm1,%xmm3
   1009 	movdqa	%xmm3,%xmm4
   1010 	psrldq	$8,%xmm3
   1011 	pslldq	$8,%xmm4
   1012 	pxor	%xmm3,%xmm1
   1013 	pxor	%xmm4,%xmm0
   1014 	movdqa	%xmm0,%xmm4
   1015 	movdqa	%xmm0,%xmm3
   1016 	psllq	$5,%xmm0
   1017 	pxor	%xmm0,%xmm3
   1018 	psllq	$1,%xmm0
   1019 	pxor	%xmm3,%xmm0
   1020 	psllq	$57,%xmm0
   1021 	movdqa	%xmm0,%xmm3
   1022 	pslldq	$8,%xmm0
   1023 	psrldq	$8,%xmm3
   1024 	pxor	%xmm4,%xmm0
   1025 	pxor	%xmm3,%xmm1
   1026 	movdqa	%xmm0,%xmm4
   1027 	psrlq	$1,%xmm0
   1028 	pxor	%xmm4,%xmm1
   1029 	pxor	%xmm0,%xmm4
   1030 	psrlq	$5,%xmm0
   1031 	pxor	%xmm4,%xmm0
   1032 	psrlq	$1,%xmm0
   1033 	pxor	%xmm1,%xmm0
   1034 .byte	102,15,56,0,197
   1035 	movdqu	%xmm0,(%eax)
   1036 	ret
   1037 .size	gcm_gmult_clmul,.-.L_gcm_gmult_clmul_begin
   1038 .globl	gcm_ghash_clmul
   1039 .hidden	gcm_ghash_clmul
   1040 .type	gcm_ghash_clmul,@function
   1041 .align	16
   1042 gcm_ghash_clmul:
   1043 .L_gcm_ghash_clmul_begin:
   1044 	pushl	%ebp
   1045 	pushl	%ebx
   1046 	pushl	%esi
   1047 	pushl	%edi
   1048 	movl	20(%esp),%eax
   1049 	movl	24(%esp),%edx
   1050 	movl	28(%esp),%esi
   1051 	movl	32(%esp),%ebx
   1052 	call	.L012pic
   1053 .L012pic:
   1054 	popl	%ecx
   1055 	leal	.Lbswap-.L012pic(%ecx),%ecx
   1056 	movdqu	(%eax),%xmm0
   1057 	movdqa	(%ecx),%xmm5
   1058 	movdqu	(%edx),%xmm2
   1059 .byte	102,15,56,0,197
   1060 	subl	$16,%ebx
   1061 	jz	.L013odd_tail
   1062 	movdqu	(%esi),%xmm3
   1063 	movdqu	16(%esi),%xmm6
   1064 .byte	102,15,56,0,221
   1065 .byte	102,15,56,0,245
   1066 	movdqu	32(%edx),%xmm5
   1067 	pxor	%xmm3,%xmm0
   1068 	pshufd	$78,%xmm6,%xmm3
   1069 	movdqa	%xmm6,%xmm7
   1070 	pxor	%xmm6,%xmm3
   1071 	leal	32(%esi),%esi
   1072 .byte	102,15,58,68,242,0
   1073 .byte	102,15,58,68,250,17
   1074 .byte	102,15,58,68,221,0
   1075 	movups	16(%edx),%xmm2
   1076 	nop
   1077 	subl	$32,%ebx
   1078 	jbe	.L014even_tail
   1079 	jmp	.L015mod_loop
   1080 .align	32
   1081 .L015mod_loop:
   1082 	pshufd	$78,%xmm0,%xmm4
   1083 	movdqa	%xmm0,%xmm1
   1084 	pxor	%xmm0,%xmm4
   1085 	nop
   1086 .byte	102,15,58,68,194,0
   1087 .byte	102,15,58,68,202,17
   1088 .byte	102,15,58,68,229,16
   1089 	movups	(%edx),%xmm2
   1090 	xorps	%xmm6,%xmm0
   1091 	movdqa	(%ecx),%xmm5
   1092 	xorps	%xmm7,%xmm1
   1093 	movdqu	(%esi),%xmm7
   1094 	pxor	%xmm0,%xmm3
   1095 	movdqu	16(%esi),%xmm6
   1096 	pxor	%xmm1,%xmm3
   1097 .byte	102,15,56,0,253
   1098 	pxor	%xmm3,%xmm4
   1099 	movdqa	%xmm4,%xmm3
   1100 	psrldq	$8,%xmm4
   1101 	pslldq	$8,%xmm3
   1102 	pxor	%xmm4,%xmm1
   1103 	pxor	%xmm3,%xmm0
   1104 .byte	102,15,56,0,245
   1105 	pxor	%xmm7,%xmm1
   1106 	movdqa	%xmm6,%xmm7
   1107 	movdqa	%xmm0,%xmm4
   1108 	movdqa	%xmm0,%xmm3
   1109 	psllq	$5,%xmm0
   1110 	pxor	%xmm0,%xmm3
   1111 	psllq	$1,%xmm0
   1112 	pxor	%xmm3,%xmm0
   1113 .byte	102,15,58,68,242,0
   1114 	movups	32(%edx),%xmm5
   1115 	psllq	$57,%xmm0
   1116 	movdqa	%xmm0,%xmm3
   1117 	pslldq	$8,%xmm0
   1118 	psrldq	$8,%xmm3
   1119 	pxor	%xmm4,%xmm0
   1120 	pxor	%xmm3,%xmm1
   1121 	pshufd	$78,%xmm7,%xmm3
   1122 	movdqa	%xmm0,%xmm4
   1123 	psrlq	$1,%xmm0
   1124 	pxor	%xmm7,%xmm3
   1125 	pxor	%xmm4,%xmm1
   1126 .byte	102,15,58,68,250,17
   1127 	movups	16(%edx),%xmm2
   1128 	pxor	%xmm0,%xmm4
   1129 	psrlq	$5,%xmm0
   1130 	pxor	%xmm4,%xmm0
   1131 	psrlq	$1,%xmm0
   1132 	pxor	%xmm1,%xmm0
   1133 .byte	102,15,58,68,221,0
   1134 	leal	32(%esi),%esi
   1135 	subl	$32,%ebx
   1136 	ja	.L015mod_loop
   1137 .L014even_tail:
   1138 	pshufd	$78,%xmm0,%xmm4
   1139 	movdqa	%xmm0,%xmm1
   1140 	pxor	%xmm0,%xmm4
   1141 .byte	102,15,58,68,194,0
   1142 .byte	102,15,58,68,202,17
   1143 .byte	102,15,58,68,229,16
   1144 	movdqa	(%ecx),%xmm5
   1145 	xorps	%xmm6,%xmm0
   1146 	xorps	%xmm7,%xmm1
   1147 	pxor	%xmm0,%xmm3
   1148 	pxor	%xmm1,%xmm3
   1149 	pxor	%xmm3,%xmm4
   1150 	movdqa	%xmm4,%xmm3
   1151 	psrldq	$8,%xmm4
   1152 	pslldq	$8,%xmm3
   1153 	pxor	%xmm4,%xmm1
   1154 	pxor	%xmm3,%xmm0
   1155 	movdqa	%xmm0,%xmm4
   1156 	movdqa	%xmm0,%xmm3
   1157 	psllq	$5,%xmm0
   1158 	pxor	%xmm0,%xmm3
   1159 	psllq	$1,%xmm0
   1160 	pxor	%xmm3,%xmm0
   1161 	psllq	$57,%xmm0
   1162 	movdqa	%xmm0,%xmm3
   1163 	pslldq	$8,%xmm0
   1164 	psrldq	$8,%xmm3
   1165 	pxor	%xmm4,%xmm0
   1166 	pxor	%xmm3,%xmm1
   1167 	movdqa	%xmm0,%xmm4
   1168 	psrlq	$1,%xmm0
   1169 	pxor	%xmm4,%xmm1
   1170 	pxor	%xmm0,%xmm4
   1171 	psrlq	$5,%xmm0
   1172 	pxor	%xmm4,%xmm0
   1173 	psrlq	$1,%xmm0
   1174 	pxor	%xmm1,%xmm0
   1175 	testl	%ebx,%ebx
   1176 	jnz	.L016done
   1177 	movups	(%edx),%xmm2
   1178 .L013odd_tail:
   1179 	movdqu	(%esi),%xmm3
   1180 .byte	102,15,56,0,221
   1181 	pxor	%xmm3,%xmm0
   1182 	movdqa	%xmm0,%xmm1
   1183 	pshufd	$78,%xmm0,%xmm3
   1184 	pshufd	$78,%xmm2,%xmm4
   1185 	pxor	%xmm0,%xmm3
   1186 	pxor	%xmm2,%xmm4
   1187 .byte	102,15,58,68,194,0
   1188 .byte	102,15,58,68,202,17
   1189 .byte	102,15,58,68,220,0
   1190 	xorps	%xmm0,%xmm3
   1191 	xorps	%xmm1,%xmm3
   1192 	movdqa	%xmm3,%xmm4
   1193 	psrldq	$8,%xmm3
   1194 	pslldq	$8,%xmm4
   1195 	pxor	%xmm3,%xmm1
   1196 	pxor	%xmm4,%xmm0
   1197 	movdqa	%xmm0,%xmm4
   1198 	movdqa	%xmm0,%xmm3
   1199 	psllq	$5,%xmm0
   1200 	pxor	%xmm0,%xmm3
   1201 	psllq	$1,%xmm0
   1202 	pxor	%xmm3,%xmm0
   1203 	psllq	$57,%xmm0
   1204 	movdqa	%xmm0,%xmm3
   1205 	pslldq	$8,%xmm0
   1206 	psrldq	$8,%xmm3
   1207 	pxor	%xmm4,%xmm0
   1208 	pxor	%xmm3,%xmm1
   1209 	movdqa	%xmm0,%xmm4
   1210 	psrlq	$1,%xmm0
   1211 	pxor	%xmm4,%xmm1
   1212 	pxor	%xmm0,%xmm4
   1213 	psrlq	$5,%xmm0
   1214 	pxor	%xmm4,%xmm0
   1215 	psrlq	$1,%xmm0
   1216 	pxor	%xmm1,%xmm0
   1217 .L016done:
   1218 .byte	102,15,56,0,197
   1219 	movdqu	%xmm0,(%eax)
   1220 	popl	%edi
   1221 	popl	%esi
   1222 	popl	%ebx
   1223 	popl	%ebp
   1224 	ret
   1225 .size	gcm_ghash_clmul,.-.L_gcm_ghash_clmul_begin
   1226 .align	64
   1227 .Lbswap:
   1228 .byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
   1229 .byte	1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194
   1230 .align	64
   1231 .Lrem_8bit:
   1232 .value	0,450,900,582,1800,1738,1164,1358
   1233 .value	3600,4050,3476,3158,2328,2266,2716,2910
   1234 .value	7200,7650,8100,7782,6952,6890,6316,6510
   1235 .value	4656,5106,4532,4214,5432,5370,5820,6014
   1236 .value	14400,14722,15300,14854,16200,16010,15564,15630
   1237 .value	13904,14226,13780,13334,12632,12442,13020,13086
   1238 .value	9312,9634,10212,9766,9064,8874,8428,8494
   1239 .value	10864,11186,10740,10294,11640,11450,12028,12094
   1240 .value	28800,28994,29444,29382,30600,30282,29708,30158
   1241 .value	32400,32594,32020,31958,31128,30810,31260,31710
   1242 .value	27808,28002,28452,28390,27560,27242,26668,27118
   1243 .value	25264,25458,24884,24822,26040,25722,26172,26622
   1244 .value	18624,18690,19268,19078,20424,19978,19532,19854
   1245 .value	18128,18194,17748,17558,16856,16410,16988,17310
   1246 .value	21728,21794,22372,22182,21480,21034,20588,20910
   1247 .value	23280,23346,22900,22710,24056,23610,24188,24510
   1248 .value	57600,57538,57988,58182,58888,59338,58764,58446
   1249 .value	61200,61138,60564,60758,59416,59866,60316,59998
   1250 .value	64800,64738,65188,65382,64040,64490,63916,63598
   1251 .value	62256,62194,61620,61814,62520,62970,63420,63102
   1252 .value	55616,55426,56004,56070,56904,57226,56780,56334
   1253 .value	55120,54930,54484,54550,53336,53658,54236,53790
   1254 .value	50528,50338,50916,50982,49768,50090,49644,49198
   1255 .value	52080,51890,51444,51510,52344,52666,53244,52798
   1256 .value	37248,36930,37380,37830,38536,38730,38156,38094
   1257 .value	40848,40530,39956,40406,39064,39258,39708,39646
   1258 .value	36256,35938,36388,36838,35496,35690,35116,35054
   1259 .value	33712,33394,32820,33270,33976,34170,34620,34558
   1260 .value	43456,43010,43588,43910,44744,44810,44364,44174
   1261 .value	42960,42514,42068,42390,41176,41242,41820,41630
   1262 .value	46560,46114,46692,47014,45800,45866,45420,45230
   1263 .value	48112,47666,47220,47542,48376,48442,49020,48830
   1264 .align	64
   1265 .Lrem_4bit:
   1266 .long	0,0,0,471859200,0,943718400,0,610271232
   1267 .long	0,1887436800,0,1822425088,0,1220542464,0,1423966208
   1268 .long	0,3774873600,0,4246732800,0,3644850176,0,3311403008
   1269 .long	0,2441084928,0,2376073216,0,2847932416,0,3051356160
   1270 .byte	71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67
   1271 .byte	82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112
   1272 .byte	112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62
   1273 .byte	0
   1274 #endif
   1275