Home | History | Annotate | Download | only in fipsmodule
      1 #if defined(__i386__)
      2 .text
      3 .globl	_gcm_gmult_4bit_mmx
      4 .private_extern	_gcm_gmult_4bit_mmx
      5 .align	4
      6 _gcm_gmult_4bit_mmx:
      7 L_gcm_gmult_4bit_mmx_begin:
      8 	pushl	%ebp
      9 	pushl	%ebx
     10 	pushl	%esi
     11 	pushl	%edi
     12 	movl	20(%esp),%edi
     13 	movl	24(%esp),%esi
     14 	call	L000pic_point
     15 L000pic_point:
     16 	popl	%eax
     17 	leal	Lrem_4bit-L000pic_point(%eax),%eax
     18 	movzbl	15(%edi),%ebx
     19 	xorl	%ecx,%ecx
     20 	movl	%ebx,%edx
     21 	movb	%dl,%cl
     22 	movl	$14,%ebp
     23 	shlb	$4,%cl
     24 	andl	$240,%edx
     25 	movq	8(%esi,%ecx,1),%mm0
     26 	movq	(%esi,%ecx,1),%mm1
     27 	movd	%mm0,%ebx
     28 	jmp	L001mmx_loop
     29 .align	4,0x90
     30 L001mmx_loop:
     31 	psrlq	$4,%mm0
     32 	andl	$15,%ebx
     33 	movq	%mm1,%mm2
     34 	psrlq	$4,%mm1
     35 	pxor	8(%esi,%edx,1),%mm0
     36 	movb	(%edi,%ebp,1),%cl
     37 	psllq	$60,%mm2
     38 	pxor	(%eax,%ebx,8),%mm1
     39 	decl	%ebp
     40 	movd	%mm0,%ebx
     41 	pxor	(%esi,%edx,1),%mm1
     42 	movl	%ecx,%edx
     43 	pxor	%mm2,%mm0
     44 	js	L002mmx_break
     45 	shlb	$4,%cl
     46 	andl	$15,%ebx
     47 	psrlq	$4,%mm0
     48 	andl	$240,%edx
     49 	movq	%mm1,%mm2
     50 	psrlq	$4,%mm1
     51 	pxor	8(%esi,%ecx,1),%mm0
     52 	psllq	$60,%mm2
     53 	pxor	(%eax,%ebx,8),%mm1
     54 	movd	%mm0,%ebx
     55 	pxor	(%esi,%ecx,1),%mm1
     56 	pxor	%mm2,%mm0
     57 	jmp	L001mmx_loop
     58 .align	4,0x90
     59 L002mmx_break:
     60 	shlb	$4,%cl
     61 	andl	$15,%ebx
     62 	psrlq	$4,%mm0
     63 	andl	$240,%edx
     64 	movq	%mm1,%mm2
     65 	psrlq	$4,%mm1
     66 	pxor	8(%esi,%ecx,1),%mm0
     67 	psllq	$60,%mm2
     68 	pxor	(%eax,%ebx,8),%mm1
     69 	movd	%mm0,%ebx
     70 	pxor	(%esi,%ecx,1),%mm1
     71 	pxor	%mm2,%mm0
     72 	psrlq	$4,%mm0
     73 	andl	$15,%ebx
     74 	movq	%mm1,%mm2
     75 	psrlq	$4,%mm1
     76 	pxor	8(%esi,%edx,1),%mm0
     77 	psllq	$60,%mm2
     78 	pxor	(%eax,%ebx,8),%mm1
     79 	movd	%mm0,%ebx
     80 	pxor	(%esi,%edx,1),%mm1
     81 	pxor	%mm2,%mm0
     82 	psrlq	$32,%mm0
     83 	movd	%mm1,%edx
     84 	psrlq	$32,%mm1
     85 	movd	%mm0,%ecx
     86 	movd	%mm1,%ebp
     87 	bswap	%ebx
     88 	bswap	%edx
     89 	bswap	%ecx
     90 	bswap	%ebp
     91 	emms
     92 	movl	%ebx,12(%edi)
     93 	movl	%edx,4(%edi)
     94 	movl	%ecx,8(%edi)
     95 	movl	%ebp,(%edi)
     96 	popl	%edi
     97 	popl	%esi
     98 	popl	%ebx
     99 	popl	%ebp
    100 	ret
    101 .globl	_gcm_ghash_4bit_mmx
    102 .private_extern	_gcm_ghash_4bit_mmx
    103 .align	4
    104 _gcm_ghash_4bit_mmx:
    105 L_gcm_ghash_4bit_mmx_begin:
    106 	pushl	%ebp
    107 	pushl	%ebx
    108 	pushl	%esi
    109 	pushl	%edi
    110 	movl	20(%esp),%eax
    111 	movl	24(%esp),%ebx
    112 	movl	28(%esp),%ecx
    113 	movl	32(%esp),%edx
    114 	movl	%esp,%ebp
    115 	call	L003pic_point
    116 L003pic_point:
    117 	popl	%esi
    118 	leal	Lrem_8bit-L003pic_point(%esi),%esi
    119 	subl	$544,%esp
    120 	andl	$-64,%esp
    121 	subl	$16,%esp
    122 	addl	%ecx,%edx
    123 	movl	%eax,544(%esp)
    124 	movl	%edx,552(%esp)
    125 	movl	%ebp,556(%esp)
    126 	addl	$128,%ebx
    127 	leal	144(%esp),%edi
    128 	leal	400(%esp),%ebp
    129 	movl	-120(%ebx),%edx
    130 	movq	-120(%ebx),%mm0
    131 	movq	-128(%ebx),%mm3
    132 	shll	$4,%edx
    133 	movb	%dl,(%esp)
    134 	movl	-104(%ebx),%edx
    135 	movq	-104(%ebx),%mm2
    136 	movq	-112(%ebx),%mm5
    137 	movq	%mm0,-128(%edi)
    138 	psrlq	$4,%mm0
    139 	movq	%mm3,(%edi)
    140 	movq	%mm3,%mm7
    141 	psrlq	$4,%mm3
    142 	shll	$4,%edx
    143 	movb	%dl,1(%esp)
    144 	movl	-88(%ebx),%edx
    145 	movq	-88(%ebx),%mm1
    146 	psllq	$60,%mm7
    147 	movq	-96(%ebx),%mm4
    148 	por	%mm7,%mm0
    149 	movq	%mm2,-120(%edi)
    150 	psrlq	$4,%mm2
    151 	movq	%mm5,8(%edi)
    152 	movq	%mm5,%mm6
    153 	movq	%mm0,-128(%ebp)
    154 	psrlq	$4,%mm5
    155 	movq	%mm3,(%ebp)
    156 	shll	$4,%edx
    157 	movb	%dl,2(%esp)
    158 	movl	-72(%ebx),%edx
    159 	movq	-72(%ebx),%mm0
    160 	psllq	$60,%mm6
    161 	movq	-80(%ebx),%mm3
    162 	por	%mm6,%mm2
    163 	movq	%mm1,-112(%edi)
    164 	psrlq	$4,%mm1
    165 	movq	%mm4,16(%edi)
    166 	movq	%mm4,%mm7
    167 	movq	%mm2,-120(%ebp)
    168 	psrlq	$4,%mm4
    169 	movq	%mm5,8(%ebp)
    170 	shll	$4,%edx
    171 	movb	%dl,3(%esp)
    172 	movl	-56(%ebx),%edx
    173 	movq	-56(%ebx),%mm2
    174 	psllq	$60,%mm7
    175 	movq	-64(%ebx),%mm5
    176 	por	%mm7,%mm1
    177 	movq	%mm0,-104(%edi)
    178 	psrlq	$4,%mm0
    179 	movq	%mm3,24(%edi)
    180 	movq	%mm3,%mm6
    181 	movq	%mm1,-112(%ebp)
    182 	psrlq	$4,%mm3
    183 	movq	%mm4,16(%ebp)
    184 	shll	$4,%edx
    185 	movb	%dl,4(%esp)
    186 	movl	-40(%ebx),%edx
    187 	movq	-40(%ebx),%mm1
    188 	psllq	$60,%mm6
    189 	movq	-48(%ebx),%mm4
    190 	por	%mm6,%mm0
    191 	movq	%mm2,-96(%edi)
    192 	psrlq	$4,%mm2
    193 	movq	%mm5,32(%edi)
    194 	movq	%mm5,%mm7
    195 	movq	%mm0,-104(%ebp)
    196 	psrlq	$4,%mm5
    197 	movq	%mm3,24(%ebp)
    198 	shll	$4,%edx
    199 	movb	%dl,5(%esp)
    200 	movl	-24(%ebx),%edx
    201 	movq	-24(%ebx),%mm0
    202 	psllq	$60,%mm7
    203 	movq	-32(%ebx),%mm3
    204 	por	%mm7,%mm2
    205 	movq	%mm1,-88(%edi)
    206 	psrlq	$4,%mm1
    207 	movq	%mm4,40(%edi)
    208 	movq	%mm4,%mm6
    209 	movq	%mm2,-96(%ebp)
    210 	psrlq	$4,%mm4
    211 	movq	%mm5,32(%ebp)
    212 	shll	$4,%edx
    213 	movb	%dl,6(%esp)
    214 	movl	-8(%ebx),%edx
    215 	movq	-8(%ebx),%mm2
    216 	psllq	$60,%mm6
    217 	movq	-16(%ebx),%mm5
    218 	por	%mm6,%mm1
    219 	movq	%mm0,-80(%edi)
    220 	psrlq	$4,%mm0
    221 	movq	%mm3,48(%edi)
    222 	movq	%mm3,%mm7
    223 	movq	%mm1,-88(%ebp)
    224 	psrlq	$4,%mm3
    225 	movq	%mm4,40(%ebp)
    226 	shll	$4,%edx
    227 	movb	%dl,7(%esp)
    228 	movl	8(%ebx),%edx
    229 	movq	8(%ebx),%mm1
    230 	psllq	$60,%mm7
    231 	movq	(%ebx),%mm4
    232 	por	%mm7,%mm0
    233 	movq	%mm2,-72(%edi)
    234 	psrlq	$4,%mm2
    235 	movq	%mm5,56(%edi)
    236 	movq	%mm5,%mm6
    237 	movq	%mm0,-80(%ebp)
    238 	psrlq	$4,%mm5
    239 	movq	%mm3,48(%ebp)
    240 	shll	$4,%edx
    241 	movb	%dl,8(%esp)
    242 	movl	24(%ebx),%edx
    243 	movq	24(%ebx),%mm0
    244 	psllq	$60,%mm6
    245 	movq	16(%ebx),%mm3
    246 	por	%mm6,%mm2
    247 	movq	%mm1,-64(%edi)
    248 	psrlq	$4,%mm1
    249 	movq	%mm4,64(%edi)
    250 	movq	%mm4,%mm7
    251 	movq	%mm2,-72(%ebp)
    252 	psrlq	$4,%mm4
    253 	movq	%mm5,56(%ebp)
    254 	shll	$4,%edx
    255 	movb	%dl,9(%esp)
    256 	movl	40(%ebx),%edx
    257 	movq	40(%ebx),%mm2
    258 	psllq	$60,%mm7
    259 	movq	32(%ebx),%mm5
    260 	por	%mm7,%mm1
    261 	movq	%mm0,-56(%edi)
    262 	psrlq	$4,%mm0
    263 	movq	%mm3,72(%edi)
    264 	movq	%mm3,%mm6
    265 	movq	%mm1,-64(%ebp)
    266 	psrlq	$4,%mm3
    267 	movq	%mm4,64(%ebp)
    268 	shll	$4,%edx
    269 	movb	%dl,10(%esp)
    270 	movl	56(%ebx),%edx
    271 	movq	56(%ebx),%mm1
    272 	psllq	$60,%mm6
    273 	movq	48(%ebx),%mm4
    274 	por	%mm6,%mm0
    275 	movq	%mm2,-48(%edi)
    276 	psrlq	$4,%mm2
    277 	movq	%mm5,80(%edi)
    278 	movq	%mm5,%mm7
    279 	movq	%mm0,-56(%ebp)
    280 	psrlq	$4,%mm5
    281 	movq	%mm3,72(%ebp)
    282 	shll	$4,%edx
    283 	movb	%dl,11(%esp)
    284 	movl	72(%ebx),%edx
    285 	movq	72(%ebx),%mm0
    286 	psllq	$60,%mm7
    287 	movq	64(%ebx),%mm3
    288 	por	%mm7,%mm2
    289 	movq	%mm1,-40(%edi)
    290 	psrlq	$4,%mm1
    291 	movq	%mm4,88(%edi)
    292 	movq	%mm4,%mm6
    293 	movq	%mm2,-48(%ebp)
    294 	psrlq	$4,%mm4
    295 	movq	%mm5,80(%ebp)
    296 	shll	$4,%edx
    297 	movb	%dl,12(%esp)
    298 	movl	88(%ebx),%edx
    299 	movq	88(%ebx),%mm2
    300 	psllq	$60,%mm6
    301 	movq	80(%ebx),%mm5
    302 	por	%mm6,%mm1
    303 	movq	%mm0,-32(%edi)
    304 	psrlq	$4,%mm0
    305 	movq	%mm3,96(%edi)
    306 	movq	%mm3,%mm7
    307 	movq	%mm1,-40(%ebp)
    308 	psrlq	$4,%mm3
    309 	movq	%mm4,88(%ebp)
    310 	shll	$4,%edx
    311 	movb	%dl,13(%esp)
    312 	movl	104(%ebx),%edx
    313 	movq	104(%ebx),%mm1
    314 	psllq	$60,%mm7
    315 	movq	96(%ebx),%mm4
    316 	por	%mm7,%mm0
    317 	movq	%mm2,-24(%edi)
    318 	psrlq	$4,%mm2
    319 	movq	%mm5,104(%edi)
    320 	movq	%mm5,%mm6
    321 	movq	%mm0,-32(%ebp)
    322 	psrlq	$4,%mm5
    323 	movq	%mm3,96(%ebp)
    324 	shll	$4,%edx
    325 	movb	%dl,14(%esp)
    326 	movl	120(%ebx),%edx
    327 	movq	120(%ebx),%mm0
    328 	psllq	$60,%mm6
    329 	movq	112(%ebx),%mm3
    330 	por	%mm6,%mm2
    331 	movq	%mm1,-16(%edi)
    332 	psrlq	$4,%mm1
    333 	movq	%mm4,112(%edi)
    334 	movq	%mm4,%mm7
    335 	movq	%mm2,-24(%ebp)
    336 	psrlq	$4,%mm4
    337 	movq	%mm5,104(%ebp)
    338 	shll	$4,%edx
    339 	movb	%dl,15(%esp)
    340 	psllq	$60,%mm7
    341 	por	%mm7,%mm1
    342 	movq	%mm0,-8(%edi)
    343 	psrlq	$4,%mm0
    344 	movq	%mm3,120(%edi)
    345 	movq	%mm3,%mm6
    346 	movq	%mm1,-16(%ebp)
    347 	psrlq	$4,%mm3
    348 	movq	%mm4,112(%ebp)
    349 	psllq	$60,%mm6
    350 	por	%mm6,%mm0
    351 	movq	%mm0,-8(%ebp)
    352 	movq	%mm3,120(%ebp)
    353 	movq	(%eax),%mm6
    354 	movl	8(%eax),%ebx
    355 	movl	12(%eax),%edx
    356 .align	4,0x90
    357 L004outer:
    358 	xorl	12(%ecx),%edx
    359 	xorl	8(%ecx),%ebx
    360 	pxor	(%ecx),%mm6
    361 	leal	16(%ecx),%ecx
    362 	movl	%ebx,536(%esp)
    363 	movq	%mm6,528(%esp)
    364 	movl	%ecx,548(%esp)
    365 	xorl	%eax,%eax
    366 	roll	$8,%edx
    367 	movb	%dl,%al
    368 	movl	%eax,%ebp
    369 	andb	$15,%al
    370 	shrl	$4,%ebp
    371 	pxor	%mm0,%mm0
    372 	roll	$8,%edx
    373 	pxor	%mm1,%mm1
    374 	pxor	%mm2,%mm2
    375 	movq	16(%esp,%eax,8),%mm7
    376 	movq	144(%esp,%eax,8),%mm6
    377 	movb	%dl,%al
    378 	movd	%mm7,%ebx
    379 	psrlq	$8,%mm7
    380 	movq	%mm6,%mm3
    381 	movl	%eax,%edi
    382 	psrlq	$8,%mm6
    383 	pxor	272(%esp,%ebp,8),%mm7
    384 	andb	$15,%al
    385 	psllq	$56,%mm3
    386 	shrl	$4,%edi
    387 	pxor	16(%esp,%eax,8),%mm7
    388 	roll	$8,%edx
    389 	pxor	144(%esp,%eax,8),%mm6
    390 	pxor	%mm3,%mm7
    391 	pxor	400(%esp,%ebp,8),%mm6
    392 	xorb	(%esp,%ebp,1),%bl
    393 	movb	%dl,%al
    394 	movd	%mm7,%ecx
    395 	movzbl	%bl,%ebx
    396 	psrlq	$8,%mm7
    397 	movq	%mm6,%mm3
    398 	movl	%eax,%ebp
    399 	psrlq	$8,%mm6
    400 	pxor	272(%esp,%edi,8),%mm7
    401 	andb	$15,%al
    402 	psllq	$56,%mm3
    403 	shrl	$4,%ebp
    404 	pinsrw	$2,(%esi,%ebx,2),%mm2
    405 	pxor	16(%esp,%eax,8),%mm7
    406 	roll	$8,%edx
    407 	pxor	144(%esp,%eax,8),%mm6
    408 	pxor	%mm3,%mm7
    409 	pxor	400(%esp,%edi,8),%mm6
    410 	xorb	(%esp,%edi,1),%cl
    411 	movb	%dl,%al
    412 	movl	536(%esp),%edx
    413 	movd	%mm7,%ebx
    414 	movzbl	%cl,%ecx
    415 	psrlq	$8,%mm7
    416 	movq	%mm6,%mm3
    417 	movl	%eax,%edi
    418 	psrlq	$8,%mm6
    419 	pxor	272(%esp,%ebp,8),%mm7
    420 	andb	$15,%al
    421 	psllq	$56,%mm3
    422 	pxor	%mm2,%mm6
    423 	shrl	$4,%edi
    424 	pinsrw	$2,(%esi,%ecx,2),%mm1
    425 	pxor	16(%esp,%eax,8),%mm7
    426 	roll	$8,%edx
    427 	pxor	144(%esp,%eax,8),%mm6
    428 	pxor	%mm3,%mm7
    429 	pxor	400(%esp,%ebp,8),%mm6
    430 	xorb	(%esp,%ebp,1),%bl
    431 	movb	%dl,%al
    432 	movd	%mm7,%ecx
    433 	movzbl	%bl,%ebx
    434 	psrlq	$8,%mm7
    435 	movq	%mm6,%mm3
    436 	movl	%eax,%ebp
    437 	psrlq	$8,%mm6
    438 	pxor	272(%esp,%edi,8),%mm7
    439 	andb	$15,%al
    440 	psllq	$56,%mm3
    441 	pxor	%mm1,%mm6
    442 	shrl	$4,%ebp
    443 	pinsrw	$2,(%esi,%ebx,2),%mm0
    444 	pxor	16(%esp,%eax,8),%mm7
    445 	roll	$8,%edx
    446 	pxor	144(%esp,%eax,8),%mm6
    447 	pxor	%mm3,%mm7
    448 	pxor	400(%esp,%edi,8),%mm6
    449 	xorb	(%esp,%edi,1),%cl
    450 	movb	%dl,%al
    451 	movd	%mm7,%ebx
    452 	movzbl	%cl,%ecx
    453 	psrlq	$8,%mm7
    454 	movq	%mm6,%mm3
    455 	movl	%eax,%edi
    456 	psrlq	$8,%mm6
    457 	pxor	272(%esp,%ebp,8),%mm7
    458 	andb	$15,%al
    459 	psllq	$56,%mm3
    460 	pxor	%mm0,%mm6
    461 	shrl	$4,%edi
    462 	pinsrw	$2,(%esi,%ecx,2),%mm2
    463 	pxor	16(%esp,%eax,8),%mm7
    464 	roll	$8,%edx
    465 	pxor	144(%esp,%eax,8),%mm6
    466 	pxor	%mm3,%mm7
    467 	pxor	400(%esp,%ebp,8),%mm6
    468 	xorb	(%esp,%ebp,1),%bl
    469 	movb	%dl,%al
    470 	movd	%mm7,%ecx
    471 	movzbl	%bl,%ebx
    472 	psrlq	$8,%mm7
    473 	movq	%mm6,%mm3
    474 	movl	%eax,%ebp
    475 	psrlq	$8,%mm6
    476 	pxor	272(%esp,%edi,8),%mm7
    477 	andb	$15,%al
    478 	psllq	$56,%mm3
    479 	pxor	%mm2,%mm6
    480 	shrl	$4,%ebp
    481 	pinsrw	$2,(%esi,%ebx,2),%mm1
    482 	pxor	16(%esp,%eax,8),%mm7
    483 	roll	$8,%edx
    484 	pxor	144(%esp,%eax,8),%mm6
    485 	pxor	%mm3,%mm7
    486 	pxor	400(%esp,%edi,8),%mm6
    487 	xorb	(%esp,%edi,1),%cl
    488 	movb	%dl,%al
    489 	movl	532(%esp),%edx
    490 	movd	%mm7,%ebx
    491 	movzbl	%cl,%ecx
    492 	psrlq	$8,%mm7
    493 	movq	%mm6,%mm3
    494 	movl	%eax,%edi
    495 	psrlq	$8,%mm6
    496 	pxor	272(%esp,%ebp,8),%mm7
    497 	andb	$15,%al
    498 	psllq	$56,%mm3
    499 	pxor	%mm1,%mm6
    500 	shrl	$4,%edi
    501 	pinsrw	$2,(%esi,%ecx,2),%mm0
    502 	pxor	16(%esp,%eax,8),%mm7
    503 	roll	$8,%edx
    504 	pxor	144(%esp,%eax,8),%mm6
    505 	pxor	%mm3,%mm7
    506 	pxor	400(%esp,%ebp,8),%mm6
    507 	xorb	(%esp,%ebp,1),%bl
    508 	movb	%dl,%al
    509 	movd	%mm7,%ecx
    510 	movzbl	%bl,%ebx
    511 	psrlq	$8,%mm7
    512 	movq	%mm6,%mm3
    513 	movl	%eax,%ebp
    514 	psrlq	$8,%mm6
    515 	pxor	272(%esp,%edi,8),%mm7
    516 	andb	$15,%al
    517 	psllq	$56,%mm3
    518 	pxor	%mm0,%mm6
    519 	shrl	$4,%ebp
    520 	pinsrw	$2,(%esi,%ebx,2),%mm2
    521 	pxor	16(%esp,%eax,8),%mm7
    522 	roll	$8,%edx
    523 	pxor	144(%esp,%eax,8),%mm6
    524 	pxor	%mm3,%mm7
    525 	pxor	400(%esp,%edi,8),%mm6
    526 	xorb	(%esp,%edi,1),%cl
    527 	movb	%dl,%al
    528 	movd	%mm7,%ebx
    529 	movzbl	%cl,%ecx
    530 	psrlq	$8,%mm7
    531 	movq	%mm6,%mm3
    532 	movl	%eax,%edi
    533 	psrlq	$8,%mm6
    534 	pxor	272(%esp,%ebp,8),%mm7
    535 	andb	$15,%al
    536 	psllq	$56,%mm3
    537 	pxor	%mm2,%mm6
    538 	shrl	$4,%edi
    539 	pinsrw	$2,(%esi,%ecx,2),%mm1
    540 	pxor	16(%esp,%eax,8),%mm7
    541 	roll	$8,%edx
    542 	pxor	144(%esp,%eax,8),%mm6
    543 	pxor	%mm3,%mm7
    544 	pxor	400(%esp,%ebp,8),%mm6
    545 	xorb	(%esp,%ebp,1),%bl
    546 	movb	%dl,%al
    547 	movd	%mm7,%ecx
    548 	movzbl	%bl,%ebx
    549 	psrlq	$8,%mm7
    550 	movq	%mm6,%mm3
    551 	movl	%eax,%ebp
    552 	psrlq	$8,%mm6
    553 	pxor	272(%esp,%edi,8),%mm7
    554 	andb	$15,%al
    555 	psllq	$56,%mm3
    556 	pxor	%mm1,%mm6
    557 	shrl	$4,%ebp
    558 	pinsrw	$2,(%esi,%ebx,2),%mm0
    559 	pxor	16(%esp,%eax,8),%mm7
    560 	roll	$8,%edx
    561 	pxor	144(%esp,%eax,8),%mm6
    562 	pxor	%mm3,%mm7
    563 	pxor	400(%esp,%edi,8),%mm6
    564 	xorb	(%esp,%edi,1),%cl
    565 	movb	%dl,%al
    566 	movl	528(%esp),%edx
    567 	movd	%mm7,%ebx
    568 	movzbl	%cl,%ecx
    569 	psrlq	$8,%mm7
    570 	movq	%mm6,%mm3
    571 	movl	%eax,%edi
    572 	psrlq	$8,%mm6
    573 	pxor	272(%esp,%ebp,8),%mm7
    574 	andb	$15,%al
    575 	psllq	$56,%mm3
    576 	pxor	%mm0,%mm6
    577 	shrl	$4,%edi
    578 	pinsrw	$2,(%esi,%ecx,2),%mm2
    579 	pxor	16(%esp,%eax,8),%mm7
    580 	roll	$8,%edx
    581 	pxor	144(%esp,%eax,8),%mm6
    582 	pxor	%mm3,%mm7
    583 	pxor	400(%esp,%ebp,8),%mm6
    584 	xorb	(%esp,%ebp,1),%bl
    585 	movb	%dl,%al
    586 	movd	%mm7,%ecx
    587 	movzbl	%bl,%ebx
    588 	psrlq	$8,%mm7
    589 	movq	%mm6,%mm3
    590 	movl	%eax,%ebp
    591 	psrlq	$8,%mm6
    592 	pxor	272(%esp,%edi,8),%mm7
    593 	andb	$15,%al
    594 	psllq	$56,%mm3
    595 	pxor	%mm2,%mm6
    596 	shrl	$4,%ebp
    597 	pinsrw	$2,(%esi,%ebx,2),%mm1
    598 	pxor	16(%esp,%eax,8),%mm7
    599 	roll	$8,%edx
    600 	pxor	144(%esp,%eax,8),%mm6
    601 	pxor	%mm3,%mm7
    602 	pxor	400(%esp,%edi,8),%mm6
    603 	xorb	(%esp,%edi,1),%cl
    604 	movb	%dl,%al
    605 	movd	%mm7,%ebx
    606 	movzbl	%cl,%ecx
    607 	psrlq	$8,%mm7
    608 	movq	%mm6,%mm3
    609 	movl	%eax,%edi
    610 	psrlq	$8,%mm6
    611 	pxor	272(%esp,%ebp,8),%mm7
    612 	andb	$15,%al
    613 	psllq	$56,%mm3
    614 	pxor	%mm1,%mm6
    615 	shrl	$4,%edi
    616 	pinsrw	$2,(%esi,%ecx,2),%mm0
    617 	pxor	16(%esp,%eax,8),%mm7
    618 	roll	$8,%edx
    619 	pxor	144(%esp,%eax,8),%mm6
    620 	pxor	%mm3,%mm7
    621 	pxor	400(%esp,%ebp,8),%mm6
    622 	xorb	(%esp,%ebp,1),%bl
    623 	movb	%dl,%al
    624 	movd	%mm7,%ecx
    625 	movzbl	%bl,%ebx
    626 	psrlq	$8,%mm7
    627 	movq	%mm6,%mm3
    628 	movl	%eax,%ebp
    629 	psrlq	$8,%mm6
    630 	pxor	272(%esp,%edi,8),%mm7
    631 	andb	$15,%al
    632 	psllq	$56,%mm3
    633 	pxor	%mm0,%mm6
    634 	shrl	$4,%ebp
    635 	pinsrw	$2,(%esi,%ebx,2),%mm2
    636 	pxor	16(%esp,%eax,8),%mm7
    637 	roll	$8,%edx
    638 	pxor	144(%esp,%eax,8),%mm6
    639 	pxor	%mm3,%mm7
    640 	pxor	400(%esp,%edi,8),%mm6
    641 	xorb	(%esp,%edi,1),%cl
    642 	movb	%dl,%al
    643 	movl	524(%esp),%edx
    644 	movd	%mm7,%ebx
    645 	movzbl	%cl,%ecx
    646 	psrlq	$8,%mm7
    647 	movq	%mm6,%mm3
    648 	movl	%eax,%edi
    649 	psrlq	$8,%mm6
    650 	pxor	272(%esp,%ebp,8),%mm7
    651 	andb	$15,%al
    652 	psllq	$56,%mm3
    653 	pxor	%mm2,%mm6
    654 	shrl	$4,%edi
    655 	pinsrw	$2,(%esi,%ecx,2),%mm1
    656 	pxor	16(%esp,%eax,8),%mm7
    657 	pxor	144(%esp,%eax,8),%mm6
    658 	xorb	(%esp,%ebp,1),%bl
    659 	pxor	%mm3,%mm7
    660 	pxor	400(%esp,%ebp,8),%mm6
    661 	movzbl	%bl,%ebx
    662 	pxor	%mm2,%mm2
    663 	psllq	$4,%mm1
    664 	movd	%mm7,%ecx
    665 	psrlq	$4,%mm7
    666 	movq	%mm6,%mm3
    667 	psrlq	$4,%mm6
    668 	shll	$4,%ecx
    669 	pxor	16(%esp,%edi,8),%mm7
    670 	psllq	$60,%mm3
    671 	movzbl	%cl,%ecx
    672 	pxor	%mm3,%mm7
    673 	pxor	144(%esp,%edi,8),%mm6
    674 	pinsrw	$2,(%esi,%ebx,2),%mm0
    675 	pxor	%mm1,%mm6
    676 	movd	%mm7,%edx
    677 	pinsrw	$3,(%esi,%ecx,2),%mm2
    678 	psllq	$12,%mm0
    679 	pxor	%mm0,%mm6
    680 	psrlq	$32,%mm7
    681 	pxor	%mm2,%mm6
    682 	movl	548(%esp),%ecx
    683 	movd	%mm7,%ebx
    684 	movq	%mm6,%mm3
    685 	psllw	$8,%mm6
    686 	psrlw	$8,%mm3
    687 	por	%mm3,%mm6
    688 	bswap	%edx
    689 	pshufw	$27,%mm6,%mm6
    690 	bswap	%ebx
    691 	cmpl	552(%esp),%ecx
    692 	jne	L004outer
    693 	movl	544(%esp),%eax
    694 	movl	%edx,12(%eax)
    695 	movl	%ebx,8(%eax)
    696 	movq	%mm6,(%eax)
    697 	movl	556(%esp),%esp
    698 	emms
    699 	popl	%edi
    700 	popl	%esi
    701 	popl	%ebx
    702 	popl	%ebp
    703 	ret
    704 .globl	_gcm_init_clmul
    705 .private_extern	_gcm_init_clmul
    706 .align	4
    707 _gcm_init_clmul:
    708 L_gcm_init_clmul_begin:
    709 	movl	4(%esp),%edx
    710 	movl	8(%esp),%eax
    711 	call	L005pic
    712 L005pic:
    713 	popl	%ecx
    714 	leal	Lbswap-L005pic(%ecx),%ecx
    715 	movdqu	(%eax),%xmm2
    716 	pshufd	$78,%xmm2,%xmm2
    717 	pshufd	$255,%xmm2,%xmm4
    718 	movdqa	%xmm2,%xmm3
    719 	psllq	$1,%xmm2
    720 	pxor	%xmm5,%xmm5
    721 	psrlq	$63,%xmm3
    722 	pcmpgtd	%xmm4,%xmm5
    723 	pslldq	$8,%xmm3
    724 	por	%xmm3,%xmm2
    725 	pand	16(%ecx),%xmm5
    726 	pxor	%xmm5,%xmm2
    727 	movdqa	%xmm2,%xmm0
    728 	movdqa	%xmm0,%xmm1
    729 	pshufd	$78,%xmm0,%xmm3
    730 	pshufd	$78,%xmm2,%xmm4
    731 	pxor	%xmm0,%xmm3
    732 	pxor	%xmm2,%xmm4
    733 .byte	102,15,58,68,194,0
    734 .byte	102,15,58,68,202,17
    735 .byte	102,15,58,68,220,0
    736 	xorps	%xmm0,%xmm3
    737 	xorps	%xmm1,%xmm3
    738 	movdqa	%xmm3,%xmm4
    739 	psrldq	$8,%xmm3
    740 	pslldq	$8,%xmm4
    741 	pxor	%xmm3,%xmm1
    742 	pxor	%xmm4,%xmm0
    743 	movdqa	%xmm0,%xmm4
    744 	movdqa	%xmm0,%xmm3
    745 	psllq	$5,%xmm0
    746 	pxor	%xmm0,%xmm3
    747 	psllq	$1,%xmm0
    748 	pxor	%xmm3,%xmm0
    749 	psllq	$57,%xmm0
    750 	movdqa	%xmm0,%xmm3
    751 	pslldq	$8,%xmm0
    752 	psrldq	$8,%xmm3
    753 	pxor	%xmm4,%xmm0
    754 	pxor	%xmm3,%xmm1
    755 	movdqa	%xmm0,%xmm4
    756 	psrlq	$1,%xmm0
    757 	pxor	%xmm4,%xmm1
    758 	pxor	%xmm0,%xmm4
    759 	psrlq	$5,%xmm0
    760 	pxor	%xmm4,%xmm0
    761 	psrlq	$1,%xmm0
    762 	pxor	%xmm1,%xmm0
    763 	pshufd	$78,%xmm2,%xmm3
    764 	pshufd	$78,%xmm0,%xmm4
    765 	pxor	%xmm2,%xmm3
    766 	movdqu	%xmm2,(%edx)
    767 	pxor	%xmm0,%xmm4
    768 	movdqu	%xmm0,16(%edx)
    769 .byte	102,15,58,15,227,8
    770 	movdqu	%xmm4,32(%edx)
    771 	ret
    772 .globl	_gcm_gmult_clmul
    773 .private_extern	_gcm_gmult_clmul
    774 .align	4
    775 _gcm_gmult_clmul:
    776 L_gcm_gmult_clmul_begin:
    777 	movl	4(%esp),%eax
    778 	movl	8(%esp),%edx
    779 	call	L006pic
    780 L006pic:
    781 	popl	%ecx
    782 	leal	Lbswap-L006pic(%ecx),%ecx
    783 	movdqu	(%eax),%xmm0
    784 	movdqa	(%ecx),%xmm5
    785 	movups	(%edx),%xmm2
    786 .byte	102,15,56,0,197
    787 	movups	32(%edx),%xmm4
    788 	movdqa	%xmm0,%xmm1
    789 	pshufd	$78,%xmm0,%xmm3
    790 	pxor	%xmm0,%xmm3
    791 .byte	102,15,58,68,194,0
    792 .byte	102,15,58,68,202,17
    793 .byte	102,15,58,68,220,0
    794 	xorps	%xmm0,%xmm3
    795 	xorps	%xmm1,%xmm3
    796 	movdqa	%xmm3,%xmm4
    797 	psrldq	$8,%xmm3
    798 	pslldq	$8,%xmm4
    799 	pxor	%xmm3,%xmm1
    800 	pxor	%xmm4,%xmm0
    801 	movdqa	%xmm0,%xmm4
    802 	movdqa	%xmm0,%xmm3
    803 	psllq	$5,%xmm0
    804 	pxor	%xmm0,%xmm3
    805 	psllq	$1,%xmm0
    806 	pxor	%xmm3,%xmm0
    807 	psllq	$57,%xmm0
    808 	movdqa	%xmm0,%xmm3
    809 	pslldq	$8,%xmm0
    810 	psrldq	$8,%xmm3
    811 	pxor	%xmm4,%xmm0
    812 	pxor	%xmm3,%xmm1
    813 	movdqa	%xmm0,%xmm4
    814 	psrlq	$1,%xmm0
    815 	pxor	%xmm4,%xmm1
    816 	pxor	%xmm0,%xmm4
    817 	psrlq	$5,%xmm0
    818 	pxor	%xmm4,%xmm0
    819 	psrlq	$1,%xmm0
    820 	pxor	%xmm1,%xmm0
    821 .byte	102,15,56,0,197
    822 	movdqu	%xmm0,(%eax)
    823 	ret
    824 .globl	_gcm_ghash_clmul
    825 .private_extern	_gcm_ghash_clmul
    826 .align	4
    827 _gcm_ghash_clmul:
    828 L_gcm_ghash_clmul_begin:
    829 	pushl	%ebp
    830 	pushl	%ebx
    831 	pushl	%esi
    832 	pushl	%edi
    833 	movl	20(%esp),%eax
    834 	movl	24(%esp),%edx
    835 	movl	28(%esp),%esi
    836 	movl	32(%esp),%ebx
    837 	call	L007pic
    838 L007pic:
    839 	popl	%ecx
    840 	leal	Lbswap-L007pic(%ecx),%ecx
    841 	movdqu	(%eax),%xmm0
    842 	movdqa	(%ecx),%xmm5
    843 	movdqu	(%edx),%xmm2
    844 .byte	102,15,56,0,197
    845 	subl	$16,%ebx
    846 	jz	L008odd_tail
    847 	movdqu	(%esi),%xmm3
    848 	movdqu	16(%esi),%xmm6
    849 .byte	102,15,56,0,221
    850 .byte	102,15,56,0,245
    851 	movdqu	32(%edx),%xmm5
    852 	pxor	%xmm3,%xmm0
    853 	pshufd	$78,%xmm6,%xmm3
    854 	movdqa	%xmm6,%xmm7
    855 	pxor	%xmm6,%xmm3
    856 	leal	32(%esi),%esi
    857 .byte	102,15,58,68,242,0
    858 .byte	102,15,58,68,250,17
    859 .byte	102,15,58,68,221,0
    860 	movups	16(%edx),%xmm2
    861 	nop
    862 	subl	$32,%ebx
    863 	jbe	L009even_tail
    864 	jmp	L010mod_loop
    865 .align	5,0x90
    866 L010mod_loop:
    867 	pshufd	$78,%xmm0,%xmm4
    868 	movdqa	%xmm0,%xmm1
    869 	pxor	%xmm0,%xmm4
    870 	nop
    871 .byte	102,15,58,68,194,0
    872 .byte	102,15,58,68,202,17
    873 .byte	102,15,58,68,229,16
    874 	movups	(%edx),%xmm2
    875 	xorps	%xmm6,%xmm0
    876 	movdqa	(%ecx),%xmm5
    877 	xorps	%xmm7,%xmm1
    878 	movdqu	(%esi),%xmm7
    879 	pxor	%xmm0,%xmm3
    880 	movdqu	16(%esi),%xmm6
    881 	pxor	%xmm1,%xmm3
    882 .byte	102,15,56,0,253
    883 	pxor	%xmm3,%xmm4
    884 	movdqa	%xmm4,%xmm3
    885 	psrldq	$8,%xmm4
    886 	pslldq	$8,%xmm3
    887 	pxor	%xmm4,%xmm1
    888 	pxor	%xmm3,%xmm0
    889 .byte	102,15,56,0,245
    890 	pxor	%xmm7,%xmm1
    891 	movdqa	%xmm6,%xmm7
    892 	movdqa	%xmm0,%xmm4
    893 	movdqa	%xmm0,%xmm3
    894 	psllq	$5,%xmm0
    895 	pxor	%xmm0,%xmm3
    896 	psllq	$1,%xmm0
    897 	pxor	%xmm3,%xmm0
    898 .byte	102,15,58,68,242,0
    899 	movups	32(%edx),%xmm5
    900 	psllq	$57,%xmm0
    901 	movdqa	%xmm0,%xmm3
    902 	pslldq	$8,%xmm0
    903 	psrldq	$8,%xmm3
    904 	pxor	%xmm4,%xmm0
    905 	pxor	%xmm3,%xmm1
    906 	pshufd	$78,%xmm7,%xmm3
    907 	movdqa	%xmm0,%xmm4
    908 	psrlq	$1,%xmm0
    909 	pxor	%xmm7,%xmm3
    910 	pxor	%xmm4,%xmm1
    911 .byte	102,15,58,68,250,17
    912 	movups	16(%edx),%xmm2
    913 	pxor	%xmm0,%xmm4
    914 	psrlq	$5,%xmm0
    915 	pxor	%xmm4,%xmm0
    916 	psrlq	$1,%xmm0
    917 	pxor	%xmm1,%xmm0
    918 .byte	102,15,58,68,221,0
    919 	leal	32(%esi),%esi
    920 	subl	$32,%ebx
    921 	ja	L010mod_loop
    922 L009even_tail:
    923 	pshufd	$78,%xmm0,%xmm4
    924 	movdqa	%xmm0,%xmm1
    925 	pxor	%xmm0,%xmm4
    926 .byte	102,15,58,68,194,0
    927 .byte	102,15,58,68,202,17
    928 .byte	102,15,58,68,229,16
    929 	movdqa	(%ecx),%xmm5
    930 	xorps	%xmm6,%xmm0
    931 	xorps	%xmm7,%xmm1
    932 	pxor	%xmm0,%xmm3
    933 	pxor	%xmm1,%xmm3
    934 	pxor	%xmm3,%xmm4
    935 	movdqa	%xmm4,%xmm3
    936 	psrldq	$8,%xmm4
    937 	pslldq	$8,%xmm3
    938 	pxor	%xmm4,%xmm1
    939 	pxor	%xmm3,%xmm0
    940 	movdqa	%xmm0,%xmm4
    941 	movdqa	%xmm0,%xmm3
    942 	psllq	$5,%xmm0
    943 	pxor	%xmm0,%xmm3
    944 	psllq	$1,%xmm0
    945 	pxor	%xmm3,%xmm0
    946 	psllq	$57,%xmm0
    947 	movdqa	%xmm0,%xmm3
    948 	pslldq	$8,%xmm0
    949 	psrldq	$8,%xmm3
    950 	pxor	%xmm4,%xmm0
    951 	pxor	%xmm3,%xmm1
    952 	movdqa	%xmm0,%xmm4
    953 	psrlq	$1,%xmm0
    954 	pxor	%xmm4,%xmm1
    955 	pxor	%xmm0,%xmm4
    956 	psrlq	$5,%xmm0
    957 	pxor	%xmm4,%xmm0
    958 	psrlq	$1,%xmm0
    959 	pxor	%xmm1,%xmm0
    960 	testl	%ebx,%ebx
    961 	jnz	L011done
    962 	movups	(%edx),%xmm2
    963 L008odd_tail:
    964 	movdqu	(%esi),%xmm3
    965 .byte	102,15,56,0,221
    966 	pxor	%xmm3,%xmm0
    967 	movdqa	%xmm0,%xmm1
    968 	pshufd	$78,%xmm0,%xmm3
    969 	pshufd	$78,%xmm2,%xmm4
    970 	pxor	%xmm0,%xmm3
    971 	pxor	%xmm2,%xmm4
    972 .byte	102,15,58,68,194,0
    973 .byte	102,15,58,68,202,17
    974 .byte	102,15,58,68,220,0
    975 	xorps	%xmm0,%xmm3
    976 	xorps	%xmm1,%xmm3
    977 	movdqa	%xmm3,%xmm4
    978 	psrldq	$8,%xmm3
    979 	pslldq	$8,%xmm4
    980 	pxor	%xmm3,%xmm1
    981 	pxor	%xmm4,%xmm0
    982 	movdqa	%xmm0,%xmm4
    983 	movdqa	%xmm0,%xmm3
    984 	psllq	$5,%xmm0
    985 	pxor	%xmm0,%xmm3
    986 	psllq	$1,%xmm0
    987 	pxor	%xmm3,%xmm0
    988 	psllq	$57,%xmm0
    989 	movdqa	%xmm0,%xmm3
    990 	pslldq	$8,%xmm0
    991 	psrldq	$8,%xmm3
    992 	pxor	%xmm4,%xmm0
    993 	pxor	%xmm3,%xmm1
    994 	movdqa	%xmm0,%xmm4
    995 	psrlq	$1,%xmm0
    996 	pxor	%xmm4,%xmm1
    997 	pxor	%xmm0,%xmm4
    998 	psrlq	$5,%xmm0
    999 	pxor	%xmm4,%xmm0
   1000 	psrlq	$1,%xmm0
   1001 	pxor	%xmm1,%xmm0
   1002 L011done:
   1003 .byte	102,15,56,0,197
   1004 	movdqu	%xmm0,(%eax)
   1005 	popl	%edi
   1006 	popl	%esi
   1007 	popl	%ebx
   1008 	popl	%ebp
   1009 	ret
   1010 .align	6,0x90
   1011 Lbswap:
   1012 .byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
   1013 .byte	1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194
   1014 .align	6,0x90
   1015 Lrem_8bit:
   1016 .value	0,450,900,582,1800,1738,1164,1358
   1017 .value	3600,4050,3476,3158,2328,2266,2716,2910
   1018 .value	7200,7650,8100,7782,6952,6890,6316,6510
   1019 .value	4656,5106,4532,4214,5432,5370,5820,6014
   1020 .value	14400,14722,15300,14854,16200,16010,15564,15630
   1021 .value	13904,14226,13780,13334,12632,12442,13020,13086
   1022 .value	9312,9634,10212,9766,9064,8874,8428,8494
   1023 .value	10864,11186,10740,10294,11640,11450,12028,12094
   1024 .value	28800,28994,29444,29382,30600,30282,29708,30158
   1025 .value	32400,32594,32020,31958,31128,30810,31260,31710
   1026 .value	27808,28002,28452,28390,27560,27242,26668,27118
   1027 .value	25264,25458,24884,24822,26040,25722,26172,26622
   1028 .value	18624,18690,19268,19078,20424,19978,19532,19854
   1029 .value	18128,18194,17748,17558,16856,16410,16988,17310
   1030 .value	21728,21794,22372,22182,21480,21034,20588,20910
   1031 .value	23280,23346,22900,22710,24056,23610,24188,24510
   1032 .value	57600,57538,57988,58182,58888,59338,58764,58446
   1033 .value	61200,61138,60564,60758,59416,59866,60316,59998
   1034 .value	64800,64738,65188,65382,64040,64490,63916,63598
   1035 .value	62256,62194,61620,61814,62520,62970,63420,63102
   1036 .value	55616,55426,56004,56070,56904,57226,56780,56334
   1037 .value	55120,54930,54484,54550,53336,53658,54236,53790
   1038 .value	50528,50338,50916,50982,49768,50090,49644,49198
   1039 .value	52080,51890,51444,51510,52344,52666,53244,52798
   1040 .value	37248,36930,37380,37830,38536,38730,38156,38094
   1041 .value	40848,40530,39956,40406,39064,39258,39708,39646
   1042 .value	36256,35938,36388,36838,35496,35690,35116,35054
   1043 .value	33712,33394,32820,33270,33976,34170,34620,34558
   1044 .value	43456,43010,43588,43910,44744,44810,44364,44174
   1045 .value	42960,42514,42068,42390,41176,41242,41820,41630
   1046 .value	46560,46114,46692,47014,45800,45866,45420,45230
   1047 .value	48112,47666,47220,47542,48376,48442,49020,48830
   1048 .align	6,0x90
   1049 Lrem_4bit:
   1050 .long	0,0,0,471859200,0,943718400,0,610271232
   1051 .long	0,1887436800,0,1822425088,0,1220542464,0,1423966208
   1052 .long	0,3774873600,0,4246732800,0,3644850176,0,3311403008
   1053 .long	0,2441084928,0,2376073216,0,2847932416,0,3051356160
   1054 .byte	71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67
   1055 .byte	82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112
   1056 .byte	112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62
   1057 .byte	0
   1058 #endif
   1059