Home | History | Annotate | Download | only in modes
      1 #if defined(__i386__)
      2 .file	"ghash-x86.S"
      3 .text
      4 .globl	_gcm_gmult_4bit_mmx
      5 .private_extern	_gcm_gmult_4bit_mmx
      6 .align	4
      7 _gcm_gmult_4bit_mmx:
      8 L_gcm_gmult_4bit_mmx_begin:
      9 	pushl	%ebp
     10 	pushl	%ebx
     11 	pushl	%esi
     12 	pushl	%edi
     13 	movl	20(%esp),%edi
     14 	movl	24(%esp),%esi
     15 	call	L000pic_point
     16 L000pic_point:
     17 	popl	%eax
     18 	leal	Lrem_4bit-L000pic_point(%eax),%eax
     19 	movzbl	15(%edi),%ebx
     20 	xorl	%ecx,%ecx
     21 	movl	%ebx,%edx
     22 	movb	%dl,%cl
     23 	movl	$14,%ebp
     24 	shlb	$4,%cl
     25 	andl	$240,%edx
     26 	movq	8(%esi,%ecx,1),%mm0
     27 	movq	(%esi,%ecx,1),%mm1
     28 	movd	%mm0,%ebx
     29 	jmp	L001mmx_loop
     30 .align	4,0x90
     31 L001mmx_loop:
     32 	psrlq	$4,%mm0
     33 	andl	$15,%ebx
     34 	movq	%mm1,%mm2
     35 	psrlq	$4,%mm1
     36 	pxor	8(%esi,%edx,1),%mm0
     37 	movb	(%edi,%ebp,1),%cl
     38 	psllq	$60,%mm2
     39 	pxor	(%eax,%ebx,8),%mm1
     40 	decl	%ebp
     41 	movd	%mm0,%ebx
     42 	pxor	(%esi,%edx,1),%mm1
     43 	movl	%ecx,%edx
     44 	pxor	%mm2,%mm0
     45 	js	L002mmx_break
     46 	shlb	$4,%cl
     47 	andl	$15,%ebx
     48 	psrlq	$4,%mm0
     49 	andl	$240,%edx
     50 	movq	%mm1,%mm2
     51 	psrlq	$4,%mm1
     52 	pxor	8(%esi,%ecx,1),%mm0
     53 	psllq	$60,%mm2
     54 	pxor	(%eax,%ebx,8),%mm1
     55 	movd	%mm0,%ebx
     56 	pxor	(%esi,%ecx,1),%mm1
     57 	pxor	%mm2,%mm0
     58 	jmp	L001mmx_loop
     59 .align	4,0x90
     60 L002mmx_break:
     61 	shlb	$4,%cl
     62 	andl	$15,%ebx
     63 	psrlq	$4,%mm0
     64 	andl	$240,%edx
     65 	movq	%mm1,%mm2
     66 	psrlq	$4,%mm1
     67 	pxor	8(%esi,%ecx,1),%mm0
     68 	psllq	$60,%mm2
     69 	pxor	(%eax,%ebx,8),%mm1
     70 	movd	%mm0,%ebx
     71 	pxor	(%esi,%ecx,1),%mm1
     72 	pxor	%mm2,%mm0
     73 	psrlq	$4,%mm0
     74 	andl	$15,%ebx
     75 	movq	%mm1,%mm2
     76 	psrlq	$4,%mm1
     77 	pxor	8(%esi,%edx,1),%mm0
     78 	psllq	$60,%mm2
     79 	pxor	(%eax,%ebx,8),%mm1
     80 	movd	%mm0,%ebx
     81 	pxor	(%esi,%edx,1),%mm1
     82 	pxor	%mm2,%mm0
     83 	psrlq	$32,%mm0
     84 	movd	%mm1,%edx
     85 	psrlq	$32,%mm1
     86 	movd	%mm0,%ecx
     87 	movd	%mm1,%ebp
     88 	bswap	%ebx
     89 	bswap	%edx
     90 	bswap	%ecx
     91 	bswap	%ebp
     92 	emms
     93 	movl	%ebx,12(%edi)
     94 	movl	%edx,4(%edi)
     95 	movl	%ecx,8(%edi)
     96 	movl	%ebp,(%edi)
     97 	popl	%edi
     98 	popl	%esi
     99 	popl	%ebx
    100 	popl	%ebp
    101 	ret
    102 .globl	_gcm_ghash_4bit_mmx
    103 .private_extern	_gcm_ghash_4bit_mmx
    104 .align	4
    105 _gcm_ghash_4bit_mmx:
    106 L_gcm_ghash_4bit_mmx_begin:
    107 	pushl	%ebp
    108 	pushl	%ebx
    109 	pushl	%esi
    110 	pushl	%edi
    111 	movl	20(%esp),%eax
    112 	movl	24(%esp),%ebx
    113 	movl	28(%esp),%ecx
    114 	movl	32(%esp),%edx
    115 	movl	%esp,%ebp
    116 	call	L003pic_point
    117 L003pic_point:
    118 	popl	%esi
    119 	leal	Lrem_8bit-L003pic_point(%esi),%esi
    120 	subl	$544,%esp
    121 	andl	$-64,%esp
    122 	subl	$16,%esp
    123 	addl	%ecx,%edx
    124 	movl	%eax,544(%esp)
    125 	movl	%edx,552(%esp)
    126 	movl	%ebp,556(%esp)
    127 	addl	$128,%ebx
    128 	leal	144(%esp),%edi
    129 	leal	400(%esp),%ebp
    130 	movl	-120(%ebx),%edx
    131 	movq	-120(%ebx),%mm0
    132 	movq	-128(%ebx),%mm3
    133 	shll	$4,%edx
    134 	movb	%dl,(%esp)
    135 	movl	-104(%ebx),%edx
    136 	movq	-104(%ebx),%mm2
    137 	movq	-112(%ebx),%mm5
    138 	movq	%mm0,-128(%edi)
    139 	psrlq	$4,%mm0
    140 	movq	%mm3,(%edi)
    141 	movq	%mm3,%mm7
    142 	psrlq	$4,%mm3
    143 	shll	$4,%edx
    144 	movb	%dl,1(%esp)
    145 	movl	-88(%ebx),%edx
    146 	movq	-88(%ebx),%mm1
    147 	psllq	$60,%mm7
    148 	movq	-96(%ebx),%mm4
    149 	por	%mm7,%mm0
    150 	movq	%mm2,-120(%edi)
    151 	psrlq	$4,%mm2
    152 	movq	%mm5,8(%edi)
    153 	movq	%mm5,%mm6
    154 	movq	%mm0,-128(%ebp)
    155 	psrlq	$4,%mm5
    156 	movq	%mm3,(%ebp)
    157 	shll	$4,%edx
    158 	movb	%dl,2(%esp)
    159 	movl	-72(%ebx),%edx
    160 	movq	-72(%ebx),%mm0
    161 	psllq	$60,%mm6
    162 	movq	-80(%ebx),%mm3
    163 	por	%mm6,%mm2
    164 	movq	%mm1,-112(%edi)
    165 	psrlq	$4,%mm1
    166 	movq	%mm4,16(%edi)
    167 	movq	%mm4,%mm7
    168 	movq	%mm2,-120(%ebp)
    169 	psrlq	$4,%mm4
    170 	movq	%mm5,8(%ebp)
    171 	shll	$4,%edx
    172 	movb	%dl,3(%esp)
    173 	movl	-56(%ebx),%edx
    174 	movq	-56(%ebx),%mm2
    175 	psllq	$60,%mm7
    176 	movq	-64(%ebx),%mm5
    177 	por	%mm7,%mm1
    178 	movq	%mm0,-104(%edi)
    179 	psrlq	$4,%mm0
    180 	movq	%mm3,24(%edi)
    181 	movq	%mm3,%mm6
    182 	movq	%mm1,-112(%ebp)
    183 	psrlq	$4,%mm3
    184 	movq	%mm4,16(%ebp)
    185 	shll	$4,%edx
    186 	movb	%dl,4(%esp)
    187 	movl	-40(%ebx),%edx
    188 	movq	-40(%ebx),%mm1
    189 	psllq	$60,%mm6
    190 	movq	-48(%ebx),%mm4
    191 	por	%mm6,%mm0
    192 	movq	%mm2,-96(%edi)
    193 	psrlq	$4,%mm2
    194 	movq	%mm5,32(%edi)
    195 	movq	%mm5,%mm7
    196 	movq	%mm0,-104(%ebp)
    197 	psrlq	$4,%mm5
    198 	movq	%mm3,24(%ebp)
    199 	shll	$4,%edx
    200 	movb	%dl,5(%esp)
    201 	movl	-24(%ebx),%edx
    202 	movq	-24(%ebx),%mm0
    203 	psllq	$60,%mm7
    204 	movq	-32(%ebx),%mm3
    205 	por	%mm7,%mm2
    206 	movq	%mm1,-88(%edi)
    207 	psrlq	$4,%mm1
    208 	movq	%mm4,40(%edi)
    209 	movq	%mm4,%mm6
    210 	movq	%mm2,-96(%ebp)
    211 	psrlq	$4,%mm4
    212 	movq	%mm5,32(%ebp)
    213 	shll	$4,%edx
    214 	movb	%dl,6(%esp)
    215 	movl	-8(%ebx),%edx
    216 	movq	-8(%ebx),%mm2
    217 	psllq	$60,%mm6
    218 	movq	-16(%ebx),%mm5
    219 	por	%mm6,%mm1
    220 	movq	%mm0,-80(%edi)
    221 	psrlq	$4,%mm0
    222 	movq	%mm3,48(%edi)
    223 	movq	%mm3,%mm7
    224 	movq	%mm1,-88(%ebp)
    225 	psrlq	$4,%mm3
    226 	movq	%mm4,40(%ebp)
    227 	shll	$4,%edx
    228 	movb	%dl,7(%esp)
    229 	movl	8(%ebx),%edx
    230 	movq	8(%ebx),%mm1
    231 	psllq	$60,%mm7
    232 	movq	(%ebx),%mm4
    233 	por	%mm7,%mm0
    234 	movq	%mm2,-72(%edi)
    235 	psrlq	$4,%mm2
    236 	movq	%mm5,56(%edi)
    237 	movq	%mm5,%mm6
    238 	movq	%mm0,-80(%ebp)
    239 	psrlq	$4,%mm5
    240 	movq	%mm3,48(%ebp)
    241 	shll	$4,%edx
    242 	movb	%dl,8(%esp)
    243 	movl	24(%ebx),%edx
    244 	movq	24(%ebx),%mm0
    245 	psllq	$60,%mm6
    246 	movq	16(%ebx),%mm3
    247 	por	%mm6,%mm2
    248 	movq	%mm1,-64(%edi)
    249 	psrlq	$4,%mm1
    250 	movq	%mm4,64(%edi)
    251 	movq	%mm4,%mm7
    252 	movq	%mm2,-72(%ebp)
    253 	psrlq	$4,%mm4
    254 	movq	%mm5,56(%ebp)
    255 	shll	$4,%edx
    256 	movb	%dl,9(%esp)
    257 	movl	40(%ebx),%edx
    258 	movq	40(%ebx),%mm2
    259 	psllq	$60,%mm7
    260 	movq	32(%ebx),%mm5
    261 	por	%mm7,%mm1
    262 	movq	%mm0,-56(%edi)
    263 	psrlq	$4,%mm0
    264 	movq	%mm3,72(%edi)
    265 	movq	%mm3,%mm6
    266 	movq	%mm1,-64(%ebp)
    267 	psrlq	$4,%mm3
    268 	movq	%mm4,64(%ebp)
    269 	shll	$4,%edx
    270 	movb	%dl,10(%esp)
    271 	movl	56(%ebx),%edx
    272 	movq	56(%ebx),%mm1
    273 	psllq	$60,%mm6
    274 	movq	48(%ebx),%mm4
    275 	por	%mm6,%mm0
    276 	movq	%mm2,-48(%edi)
    277 	psrlq	$4,%mm2
    278 	movq	%mm5,80(%edi)
    279 	movq	%mm5,%mm7
    280 	movq	%mm0,-56(%ebp)
    281 	psrlq	$4,%mm5
    282 	movq	%mm3,72(%ebp)
    283 	shll	$4,%edx
    284 	movb	%dl,11(%esp)
    285 	movl	72(%ebx),%edx
    286 	movq	72(%ebx),%mm0
    287 	psllq	$60,%mm7
    288 	movq	64(%ebx),%mm3
    289 	por	%mm7,%mm2
    290 	movq	%mm1,-40(%edi)
    291 	psrlq	$4,%mm1
    292 	movq	%mm4,88(%edi)
    293 	movq	%mm4,%mm6
    294 	movq	%mm2,-48(%ebp)
    295 	psrlq	$4,%mm4
    296 	movq	%mm5,80(%ebp)
    297 	shll	$4,%edx
    298 	movb	%dl,12(%esp)
    299 	movl	88(%ebx),%edx
    300 	movq	88(%ebx),%mm2
    301 	psllq	$60,%mm6
    302 	movq	80(%ebx),%mm5
    303 	por	%mm6,%mm1
    304 	movq	%mm0,-32(%edi)
    305 	psrlq	$4,%mm0
    306 	movq	%mm3,96(%edi)
    307 	movq	%mm3,%mm7
    308 	movq	%mm1,-40(%ebp)
    309 	psrlq	$4,%mm3
    310 	movq	%mm4,88(%ebp)
    311 	shll	$4,%edx
    312 	movb	%dl,13(%esp)
    313 	movl	104(%ebx),%edx
    314 	movq	104(%ebx),%mm1
    315 	psllq	$60,%mm7
    316 	movq	96(%ebx),%mm4
    317 	por	%mm7,%mm0
    318 	movq	%mm2,-24(%edi)
    319 	psrlq	$4,%mm2
    320 	movq	%mm5,104(%edi)
    321 	movq	%mm5,%mm6
    322 	movq	%mm0,-32(%ebp)
    323 	psrlq	$4,%mm5
    324 	movq	%mm3,96(%ebp)
    325 	shll	$4,%edx
    326 	movb	%dl,14(%esp)
    327 	movl	120(%ebx),%edx
    328 	movq	120(%ebx),%mm0
    329 	psllq	$60,%mm6
    330 	movq	112(%ebx),%mm3
    331 	por	%mm6,%mm2
    332 	movq	%mm1,-16(%edi)
    333 	psrlq	$4,%mm1
    334 	movq	%mm4,112(%edi)
    335 	movq	%mm4,%mm7
    336 	movq	%mm2,-24(%ebp)
    337 	psrlq	$4,%mm4
    338 	movq	%mm5,104(%ebp)
    339 	shll	$4,%edx
    340 	movb	%dl,15(%esp)
    341 	psllq	$60,%mm7
    342 	por	%mm7,%mm1
    343 	movq	%mm0,-8(%edi)
    344 	psrlq	$4,%mm0
    345 	movq	%mm3,120(%edi)
    346 	movq	%mm3,%mm6
    347 	movq	%mm1,-16(%ebp)
    348 	psrlq	$4,%mm3
    349 	movq	%mm4,112(%ebp)
    350 	psllq	$60,%mm6
    351 	por	%mm6,%mm0
    352 	movq	%mm0,-8(%ebp)
    353 	movq	%mm3,120(%ebp)
    354 	movq	(%eax),%mm6
    355 	movl	8(%eax),%ebx
    356 	movl	12(%eax),%edx
    357 .align	4,0x90
    358 L004outer:
    359 	xorl	12(%ecx),%edx
    360 	xorl	8(%ecx),%ebx
    361 	pxor	(%ecx),%mm6
    362 	leal	16(%ecx),%ecx
    363 	movl	%ebx,536(%esp)
    364 	movq	%mm6,528(%esp)
    365 	movl	%ecx,548(%esp)
    366 	xorl	%eax,%eax
    367 	roll	$8,%edx
    368 	movb	%dl,%al
    369 	movl	%eax,%ebp
    370 	andb	$15,%al
    371 	shrl	$4,%ebp
    372 	pxor	%mm0,%mm0
    373 	roll	$8,%edx
    374 	pxor	%mm1,%mm1
    375 	pxor	%mm2,%mm2
    376 	movq	16(%esp,%eax,8),%mm7
    377 	movq	144(%esp,%eax,8),%mm6
    378 	movb	%dl,%al
    379 	movd	%mm7,%ebx
    380 	psrlq	$8,%mm7
    381 	movq	%mm6,%mm3
    382 	movl	%eax,%edi
    383 	psrlq	$8,%mm6
    384 	pxor	272(%esp,%ebp,8),%mm7
    385 	andb	$15,%al
    386 	psllq	$56,%mm3
    387 	shrl	$4,%edi
    388 	pxor	16(%esp,%eax,8),%mm7
    389 	roll	$8,%edx
    390 	pxor	144(%esp,%eax,8),%mm6
    391 	pxor	%mm3,%mm7
    392 	pxor	400(%esp,%ebp,8),%mm6
    393 	xorb	(%esp,%ebp,1),%bl
    394 	movb	%dl,%al
    395 	movd	%mm7,%ecx
    396 	movzbl	%bl,%ebx
    397 	psrlq	$8,%mm7
    398 	movq	%mm6,%mm3
    399 	movl	%eax,%ebp
    400 	psrlq	$8,%mm6
    401 	pxor	272(%esp,%edi,8),%mm7
    402 	andb	$15,%al
    403 	psllq	$56,%mm3
    404 	shrl	$4,%ebp
    405 	pinsrw	$2,(%esi,%ebx,2),%mm2
    406 	pxor	16(%esp,%eax,8),%mm7
    407 	roll	$8,%edx
    408 	pxor	144(%esp,%eax,8),%mm6
    409 	pxor	%mm3,%mm7
    410 	pxor	400(%esp,%edi,8),%mm6
    411 	xorb	(%esp,%edi,1),%cl
    412 	movb	%dl,%al
    413 	movl	536(%esp),%edx
    414 	movd	%mm7,%ebx
    415 	movzbl	%cl,%ecx
    416 	psrlq	$8,%mm7
    417 	movq	%mm6,%mm3
    418 	movl	%eax,%edi
    419 	psrlq	$8,%mm6
    420 	pxor	272(%esp,%ebp,8),%mm7
    421 	andb	$15,%al
    422 	psllq	$56,%mm3
    423 	pxor	%mm2,%mm6
    424 	shrl	$4,%edi
    425 	pinsrw	$2,(%esi,%ecx,2),%mm1
    426 	pxor	16(%esp,%eax,8),%mm7
    427 	roll	$8,%edx
    428 	pxor	144(%esp,%eax,8),%mm6
    429 	pxor	%mm3,%mm7
    430 	pxor	400(%esp,%ebp,8),%mm6
    431 	xorb	(%esp,%ebp,1),%bl
    432 	movb	%dl,%al
    433 	movd	%mm7,%ecx
    434 	movzbl	%bl,%ebx
    435 	psrlq	$8,%mm7
    436 	movq	%mm6,%mm3
    437 	movl	%eax,%ebp
    438 	psrlq	$8,%mm6
    439 	pxor	272(%esp,%edi,8),%mm7
    440 	andb	$15,%al
    441 	psllq	$56,%mm3
    442 	pxor	%mm1,%mm6
    443 	shrl	$4,%ebp
    444 	pinsrw	$2,(%esi,%ebx,2),%mm0
    445 	pxor	16(%esp,%eax,8),%mm7
    446 	roll	$8,%edx
    447 	pxor	144(%esp,%eax,8),%mm6
    448 	pxor	%mm3,%mm7
    449 	pxor	400(%esp,%edi,8),%mm6
    450 	xorb	(%esp,%edi,1),%cl
    451 	movb	%dl,%al
    452 	movd	%mm7,%ebx
    453 	movzbl	%cl,%ecx
    454 	psrlq	$8,%mm7
    455 	movq	%mm6,%mm3
    456 	movl	%eax,%edi
    457 	psrlq	$8,%mm6
    458 	pxor	272(%esp,%ebp,8),%mm7
    459 	andb	$15,%al
    460 	psllq	$56,%mm3
    461 	pxor	%mm0,%mm6
    462 	shrl	$4,%edi
    463 	pinsrw	$2,(%esi,%ecx,2),%mm2
    464 	pxor	16(%esp,%eax,8),%mm7
    465 	roll	$8,%edx
    466 	pxor	144(%esp,%eax,8),%mm6
    467 	pxor	%mm3,%mm7
    468 	pxor	400(%esp,%ebp,8),%mm6
    469 	xorb	(%esp,%ebp,1),%bl
    470 	movb	%dl,%al
    471 	movd	%mm7,%ecx
    472 	movzbl	%bl,%ebx
    473 	psrlq	$8,%mm7
    474 	movq	%mm6,%mm3
    475 	movl	%eax,%ebp
    476 	psrlq	$8,%mm6
    477 	pxor	272(%esp,%edi,8),%mm7
    478 	andb	$15,%al
    479 	psllq	$56,%mm3
    480 	pxor	%mm2,%mm6
    481 	shrl	$4,%ebp
    482 	pinsrw	$2,(%esi,%ebx,2),%mm1
    483 	pxor	16(%esp,%eax,8),%mm7
    484 	roll	$8,%edx
    485 	pxor	144(%esp,%eax,8),%mm6
    486 	pxor	%mm3,%mm7
    487 	pxor	400(%esp,%edi,8),%mm6
    488 	xorb	(%esp,%edi,1),%cl
    489 	movb	%dl,%al
    490 	movl	532(%esp),%edx
    491 	movd	%mm7,%ebx
    492 	movzbl	%cl,%ecx
    493 	psrlq	$8,%mm7
    494 	movq	%mm6,%mm3
    495 	movl	%eax,%edi
    496 	psrlq	$8,%mm6
    497 	pxor	272(%esp,%ebp,8),%mm7
    498 	andb	$15,%al
    499 	psllq	$56,%mm3
    500 	pxor	%mm1,%mm6
    501 	shrl	$4,%edi
    502 	pinsrw	$2,(%esi,%ecx,2),%mm0
    503 	pxor	16(%esp,%eax,8),%mm7
    504 	roll	$8,%edx
    505 	pxor	144(%esp,%eax,8),%mm6
    506 	pxor	%mm3,%mm7
    507 	pxor	400(%esp,%ebp,8),%mm6
    508 	xorb	(%esp,%ebp,1),%bl
    509 	movb	%dl,%al
    510 	movd	%mm7,%ecx
    511 	movzbl	%bl,%ebx
    512 	psrlq	$8,%mm7
    513 	movq	%mm6,%mm3
    514 	movl	%eax,%ebp
    515 	psrlq	$8,%mm6
    516 	pxor	272(%esp,%edi,8),%mm7
    517 	andb	$15,%al
    518 	psllq	$56,%mm3
    519 	pxor	%mm0,%mm6
    520 	shrl	$4,%ebp
    521 	pinsrw	$2,(%esi,%ebx,2),%mm2
    522 	pxor	16(%esp,%eax,8),%mm7
    523 	roll	$8,%edx
    524 	pxor	144(%esp,%eax,8),%mm6
    525 	pxor	%mm3,%mm7
    526 	pxor	400(%esp,%edi,8),%mm6
    527 	xorb	(%esp,%edi,1),%cl
    528 	movb	%dl,%al
    529 	movd	%mm7,%ebx
    530 	movzbl	%cl,%ecx
    531 	psrlq	$8,%mm7
    532 	movq	%mm6,%mm3
    533 	movl	%eax,%edi
    534 	psrlq	$8,%mm6
    535 	pxor	272(%esp,%ebp,8),%mm7
    536 	andb	$15,%al
    537 	psllq	$56,%mm3
    538 	pxor	%mm2,%mm6
    539 	shrl	$4,%edi
    540 	pinsrw	$2,(%esi,%ecx,2),%mm1
    541 	pxor	16(%esp,%eax,8),%mm7
    542 	roll	$8,%edx
    543 	pxor	144(%esp,%eax,8),%mm6
    544 	pxor	%mm3,%mm7
    545 	pxor	400(%esp,%ebp,8),%mm6
    546 	xorb	(%esp,%ebp,1),%bl
    547 	movb	%dl,%al
    548 	movd	%mm7,%ecx
    549 	movzbl	%bl,%ebx
    550 	psrlq	$8,%mm7
    551 	movq	%mm6,%mm3
    552 	movl	%eax,%ebp
    553 	psrlq	$8,%mm6
    554 	pxor	272(%esp,%edi,8),%mm7
    555 	andb	$15,%al
    556 	psllq	$56,%mm3
    557 	pxor	%mm1,%mm6
    558 	shrl	$4,%ebp
    559 	pinsrw	$2,(%esi,%ebx,2),%mm0
    560 	pxor	16(%esp,%eax,8),%mm7
    561 	roll	$8,%edx
    562 	pxor	144(%esp,%eax,8),%mm6
    563 	pxor	%mm3,%mm7
    564 	pxor	400(%esp,%edi,8),%mm6
    565 	xorb	(%esp,%edi,1),%cl
    566 	movb	%dl,%al
    567 	movl	528(%esp),%edx
    568 	movd	%mm7,%ebx
    569 	movzbl	%cl,%ecx
    570 	psrlq	$8,%mm7
    571 	movq	%mm6,%mm3
    572 	movl	%eax,%edi
    573 	psrlq	$8,%mm6
    574 	pxor	272(%esp,%ebp,8),%mm7
    575 	andb	$15,%al
    576 	psllq	$56,%mm3
    577 	pxor	%mm0,%mm6
    578 	shrl	$4,%edi
    579 	pinsrw	$2,(%esi,%ecx,2),%mm2
    580 	pxor	16(%esp,%eax,8),%mm7
    581 	roll	$8,%edx
    582 	pxor	144(%esp,%eax,8),%mm6
    583 	pxor	%mm3,%mm7
    584 	pxor	400(%esp,%ebp,8),%mm6
    585 	xorb	(%esp,%ebp,1),%bl
    586 	movb	%dl,%al
    587 	movd	%mm7,%ecx
    588 	movzbl	%bl,%ebx
    589 	psrlq	$8,%mm7
    590 	movq	%mm6,%mm3
    591 	movl	%eax,%ebp
    592 	psrlq	$8,%mm6
    593 	pxor	272(%esp,%edi,8),%mm7
    594 	andb	$15,%al
    595 	psllq	$56,%mm3
    596 	pxor	%mm2,%mm6
    597 	shrl	$4,%ebp
    598 	pinsrw	$2,(%esi,%ebx,2),%mm1
    599 	pxor	16(%esp,%eax,8),%mm7
    600 	roll	$8,%edx
    601 	pxor	144(%esp,%eax,8),%mm6
    602 	pxor	%mm3,%mm7
    603 	pxor	400(%esp,%edi,8),%mm6
    604 	xorb	(%esp,%edi,1),%cl
    605 	movb	%dl,%al
    606 	movd	%mm7,%ebx
    607 	movzbl	%cl,%ecx
    608 	psrlq	$8,%mm7
    609 	movq	%mm6,%mm3
    610 	movl	%eax,%edi
    611 	psrlq	$8,%mm6
    612 	pxor	272(%esp,%ebp,8),%mm7
    613 	andb	$15,%al
    614 	psllq	$56,%mm3
    615 	pxor	%mm1,%mm6
    616 	shrl	$4,%edi
    617 	pinsrw	$2,(%esi,%ecx,2),%mm0
    618 	pxor	16(%esp,%eax,8),%mm7
    619 	roll	$8,%edx
    620 	pxor	144(%esp,%eax,8),%mm6
    621 	pxor	%mm3,%mm7
    622 	pxor	400(%esp,%ebp,8),%mm6
    623 	xorb	(%esp,%ebp,1),%bl
    624 	movb	%dl,%al
    625 	movd	%mm7,%ecx
    626 	movzbl	%bl,%ebx
    627 	psrlq	$8,%mm7
    628 	movq	%mm6,%mm3
    629 	movl	%eax,%ebp
    630 	psrlq	$8,%mm6
    631 	pxor	272(%esp,%edi,8),%mm7
    632 	andb	$15,%al
    633 	psllq	$56,%mm3
    634 	pxor	%mm0,%mm6
    635 	shrl	$4,%ebp
    636 	pinsrw	$2,(%esi,%ebx,2),%mm2
    637 	pxor	16(%esp,%eax,8),%mm7
    638 	roll	$8,%edx
    639 	pxor	144(%esp,%eax,8),%mm6
    640 	pxor	%mm3,%mm7
    641 	pxor	400(%esp,%edi,8),%mm6
    642 	xorb	(%esp,%edi,1),%cl
    643 	movb	%dl,%al
    644 	movl	524(%esp),%edx
    645 	movd	%mm7,%ebx
    646 	movzbl	%cl,%ecx
    647 	psrlq	$8,%mm7
    648 	movq	%mm6,%mm3
    649 	movl	%eax,%edi
    650 	psrlq	$8,%mm6
    651 	pxor	272(%esp,%ebp,8),%mm7
    652 	andb	$15,%al
    653 	psllq	$56,%mm3
    654 	pxor	%mm2,%mm6
    655 	shrl	$4,%edi
    656 	pinsrw	$2,(%esi,%ecx,2),%mm1
    657 	pxor	16(%esp,%eax,8),%mm7
    658 	pxor	144(%esp,%eax,8),%mm6
    659 	xorb	(%esp,%ebp,1),%bl
    660 	pxor	%mm3,%mm7
    661 	pxor	400(%esp,%ebp,8),%mm6
    662 	movzbl	%bl,%ebx
    663 	pxor	%mm2,%mm2
    664 	psllq	$4,%mm1
    665 	movd	%mm7,%ecx
    666 	psrlq	$4,%mm7
    667 	movq	%mm6,%mm3
    668 	psrlq	$4,%mm6
    669 	shll	$4,%ecx
    670 	pxor	16(%esp,%edi,8),%mm7
    671 	psllq	$60,%mm3
    672 	movzbl	%cl,%ecx
    673 	pxor	%mm3,%mm7
    674 	pxor	144(%esp,%edi,8),%mm6
    675 	pinsrw	$2,(%esi,%ebx,2),%mm0
    676 	pxor	%mm1,%mm6
    677 	movd	%mm7,%edx
    678 	pinsrw	$3,(%esi,%ecx,2),%mm2
    679 	psllq	$12,%mm0
    680 	pxor	%mm0,%mm6
    681 	psrlq	$32,%mm7
    682 	pxor	%mm2,%mm6
    683 	movl	548(%esp),%ecx
    684 	movd	%mm7,%ebx
    685 	movq	%mm6,%mm3
    686 	psllw	$8,%mm6
    687 	psrlw	$8,%mm3
    688 	por	%mm3,%mm6
    689 	bswap	%edx
    690 	pshufw	$27,%mm6,%mm6
    691 	bswap	%ebx
    692 	cmpl	552(%esp),%ecx
    693 	jne	L004outer
    694 	movl	544(%esp),%eax
    695 	movl	%edx,12(%eax)
    696 	movl	%ebx,8(%eax)
    697 	movq	%mm6,(%eax)
    698 	movl	556(%esp),%esp
    699 	emms
    700 	popl	%edi
    701 	popl	%esi
    702 	popl	%ebx
    703 	popl	%ebp
    704 	ret
    705 .globl	_gcm_init_clmul
    706 .private_extern	_gcm_init_clmul
    707 .align	4
    708 _gcm_init_clmul:
    709 L_gcm_init_clmul_begin:
    710 	movl	4(%esp),%edx
    711 	movl	8(%esp),%eax
    712 	call	L005pic
    713 L005pic:
    714 	popl	%ecx
    715 	leal	Lbswap-L005pic(%ecx),%ecx
    716 	movdqu	(%eax),%xmm2
    717 	pshufd	$78,%xmm2,%xmm2
    718 	pshufd	$255,%xmm2,%xmm4
    719 	movdqa	%xmm2,%xmm3
    720 	psllq	$1,%xmm2
    721 	pxor	%xmm5,%xmm5
    722 	psrlq	$63,%xmm3
    723 	pcmpgtd	%xmm4,%xmm5
    724 	pslldq	$8,%xmm3
    725 	por	%xmm3,%xmm2
    726 	pand	16(%ecx),%xmm5
    727 	pxor	%xmm5,%xmm2
    728 	movdqa	%xmm2,%xmm0
    729 	movdqa	%xmm0,%xmm1
    730 	pshufd	$78,%xmm0,%xmm3
    731 	pshufd	$78,%xmm2,%xmm4
    732 	pxor	%xmm0,%xmm3
    733 	pxor	%xmm2,%xmm4
    734 .byte	102,15,58,68,194,0
    735 .byte	102,15,58,68,202,17
    736 .byte	102,15,58,68,220,0
    737 	xorps	%xmm0,%xmm3
    738 	xorps	%xmm1,%xmm3
    739 	movdqa	%xmm3,%xmm4
    740 	psrldq	$8,%xmm3
    741 	pslldq	$8,%xmm4
    742 	pxor	%xmm3,%xmm1
    743 	pxor	%xmm4,%xmm0
    744 	movdqa	%xmm0,%xmm4
    745 	movdqa	%xmm0,%xmm3
    746 	psllq	$5,%xmm0
    747 	pxor	%xmm0,%xmm3
    748 	psllq	$1,%xmm0
    749 	pxor	%xmm3,%xmm0
    750 	psllq	$57,%xmm0
    751 	movdqa	%xmm0,%xmm3
    752 	pslldq	$8,%xmm0
    753 	psrldq	$8,%xmm3
    754 	pxor	%xmm4,%xmm0
    755 	pxor	%xmm3,%xmm1
    756 	movdqa	%xmm0,%xmm4
    757 	psrlq	$1,%xmm0
    758 	pxor	%xmm4,%xmm1
    759 	pxor	%xmm0,%xmm4
    760 	psrlq	$5,%xmm0
    761 	pxor	%xmm4,%xmm0
    762 	psrlq	$1,%xmm0
    763 	pxor	%xmm1,%xmm0
    764 	pshufd	$78,%xmm2,%xmm3
    765 	pshufd	$78,%xmm0,%xmm4
    766 	pxor	%xmm2,%xmm3
    767 	movdqu	%xmm2,(%edx)
    768 	pxor	%xmm0,%xmm4
    769 	movdqu	%xmm0,16(%edx)
    770 .byte	102,15,58,15,227,8
    771 	movdqu	%xmm4,32(%edx)
    772 	ret
    773 .globl	_gcm_gmult_clmul
    774 .private_extern	_gcm_gmult_clmul
    775 .align	4
    776 _gcm_gmult_clmul:
    777 L_gcm_gmult_clmul_begin:
    778 	movl	4(%esp),%eax
    779 	movl	8(%esp),%edx
    780 	call	L006pic
    781 L006pic:
    782 	popl	%ecx
    783 	leal	Lbswap-L006pic(%ecx),%ecx
    784 	movdqu	(%eax),%xmm0
    785 	movdqa	(%ecx),%xmm5
    786 	movups	(%edx),%xmm2
    787 .byte	102,15,56,0,197
    788 	movups	32(%edx),%xmm4
    789 	movdqa	%xmm0,%xmm1
    790 	pshufd	$78,%xmm0,%xmm3
    791 	pxor	%xmm0,%xmm3
    792 .byte	102,15,58,68,194,0
    793 .byte	102,15,58,68,202,17
    794 .byte	102,15,58,68,220,0
    795 	xorps	%xmm0,%xmm3
    796 	xorps	%xmm1,%xmm3
    797 	movdqa	%xmm3,%xmm4
    798 	psrldq	$8,%xmm3
    799 	pslldq	$8,%xmm4
    800 	pxor	%xmm3,%xmm1
    801 	pxor	%xmm4,%xmm0
    802 	movdqa	%xmm0,%xmm4
    803 	movdqa	%xmm0,%xmm3
    804 	psllq	$5,%xmm0
    805 	pxor	%xmm0,%xmm3
    806 	psllq	$1,%xmm0
    807 	pxor	%xmm3,%xmm0
    808 	psllq	$57,%xmm0
    809 	movdqa	%xmm0,%xmm3
    810 	pslldq	$8,%xmm0
    811 	psrldq	$8,%xmm3
    812 	pxor	%xmm4,%xmm0
    813 	pxor	%xmm3,%xmm1
    814 	movdqa	%xmm0,%xmm4
    815 	psrlq	$1,%xmm0
    816 	pxor	%xmm4,%xmm1
    817 	pxor	%xmm0,%xmm4
    818 	psrlq	$5,%xmm0
    819 	pxor	%xmm4,%xmm0
    820 	psrlq	$1,%xmm0
    821 	pxor	%xmm1,%xmm0
    822 .byte	102,15,56,0,197
    823 	movdqu	%xmm0,(%eax)
    824 	ret
    825 .globl	_gcm_ghash_clmul
    826 .private_extern	_gcm_ghash_clmul
    827 .align	4
    828 _gcm_ghash_clmul:
    829 L_gcm_ghash_clmul_begin:
    830 	pushl	%ebp
    831 	pushl	%ebx
    832 	pushl	%esi
    833 	pushl	%edi
    834 	movl	20(%esp),%eax
    835 	movl	24(%esp),%edx
    836 	movl	28(%esp),%esi
    837 	movl	32(%esp),%ebx
    838 	call	L007pic
    839 L007pic:
    840 	popl	%ecx
    841 	leal	Lbswap-L007pic(%ecx),%ecx
    842 	movdqu	(%eax),%xmm0
    843 	movdqa	(%ecx),%xmm5
    844 	movdqu	(%edx),%xmm2
    845 .byte	102,15,56,0,197
    846 	subl	$16,%ebx
    847 	jz	L008odd_tail
    848 	movdqu	(%esi),%xmm3
    849 	movdqu	16(%esi),%xmm6
    850 .byte	102,15,56,0,221
    851 .byte	102,15,56,0,245
    852 	movdqu	32(%edx),%xmm5
    853 	pxor	%xmm3,%xmm0
    854 	pshufd	$78,%xmm6,%xmm3
    855 	movdqa	%xmm6,%xmm7
    856 	pxor	%xmm6,%xmm3
    857 	leal	32(%esi),%esi
    858 .byte	102,15,58,68,242,0
    859 .byte	102,15,58,68,250,17
    860 .byte	102,15,58,68,221,0
    861 	movups	16(%edx),%xmm2
    862 	nop
    863 	subl	$32,%ebx
    864 	jbe	L009even_tail
    865 	jmp	L010mod_loop
    866 .align	5,0x90
    867 L010mod_loop:
    868 	pshufd	$78,%xmm0,%xmm4
    869 	movdqa	%xmm0,%xmm1
    870 	pxor	%xmm0,%xmm4
    871 	nop
    872 .byte	102,15,58,68,194,0
    873 .byte	102,15,58,68,202,17
    874 .byte	102,15,58,68,229,16
    875 	movups	(%edx),%xmm2
    876 	xorps	%xmm6,%xmm0
    877 	movdqa	(%ecx),%xmm5
    878 	xorps	%xmm7,%xmm1
    879 	movdqu	(%esi),%xmm7
    880 	pxor	%xmm0,%xmm3
    881 	movdqu	16(%esi),%xmm6
    882 	pxor	%xmm1,%xmm3
    883 .byte	102,15,56,0,253
    884 	pxor	%xmm3,%xmm4
    885 	movdqa	%xmm4,%xmm3
    886 	psrldq	$8,%xmm4
    887 	pslldq	$8,%xmm3
    888 	pxor	%xmm4,%xmm1
    889 	pxor	%xmm3,%xmm0
    890 .byte	102,15,56,0,245
    891 	pxor	%xmm7,%xmm1
    892 	movdqa	%xmm6,%xmm7
    893 	movdqa	%xmm0,%xmm4
    894 	movdqa	%xmm0,%xmm3
    895 	psllq	$5,%xmm0
    896 	pxor	%xmm0,%xmm3
    897 	psllq	$1,%xmm0
    898 	pxor	%xmm3,%xmm0
    899 .byte	102,15,58,68,242,0
    900 	movups	32(%edx),%xmm5
    901 	psllq	$57,%xmm0
    902 	movdqa	%xmm0,%xmm3
    903 	pslldq	$8,%xmm0
    904 	psrldq	$8,%xmm3
    905 	pxor	%xmm4,%xmm0
    906 	pxor	%xmm3,%xmm1
    907 	pshufd	$78,%xmm7,%xmm3
    908 	movdqa	%xmm0,%xmm4
    909 	psrlq	$1,%xmm0
    910 	pxor	%xmm7,%xmm3
    911 	pxor	%xmm4,%xmm1
    912 .byte	102,15,58,68,250,17
    913 	movups	16(%edx),%xmm2
    914 	pxor	%xmm0,%xmm4
    915 	psrlq	$5,%xmm0
    916 	pxor	%xmm4,%xmm0
    917 	psrlq	$1,%xmm0
    918 	pxor	%xmm1,%xmm0
    919 .byte	102,15,58,68,221,0
    920 	leal	32(%esi),%esi
    921 	subl	$32,%ebx
    922 	ja	L010mod_loop
    923 L009even_tail:
    924 	pshufd	$78,%xmm0,%xmm4
    925 	movdqa	%xmm0,%xmm1
    926 	pxor	%xmm0,%xmm4
    927 .byte	102,15,58,68,194,0
    928 .byte	102,15,58,68,202,17
    929 .byte	102,15,58,68,229,16
    930 	movdqa	(%ecx),%xmm5
    931 	xorps	%xmm6,%xmm0
    932 	xorps	%xmm7,%xmm1
    933 	pxor	%xmm0,%xmm3
    934 	pxor	%xmm1,%xmm3
    935 	pxor	%xmm3,%xmm4
    936 	movdqa	%xmm4,%xmm3
    937 	psrldq	$8,%xmm4
    938 	pslldq	$8,%xmm3
    939 	pxor	%xmm4,%xmm1
    940 	pxor	%xmm3,%xmm0
    941 	movdqa	%xmm0,%xmm4
    942 	movdqa	%xmm0,%xmm3
    943 	psllq	$5,%xmm0
    944 	pxor	%xmm0,%xmm3
    945 	psllq	$1,%xmm0
    946 	pxor	%xmm3,%xmm0
    947 	psllq	$57,%xmm0
    948 	movdqa	%xmm0,%xmm3
    949 	pslldq	$8,%xmm0
    950 	psrldq	$8,%xmm3
    951 	pxor	%xmm4,%xmm0
    952 	pxor	%xmm3,%xmm1
    953 	movdqa	%xmm0,%xmm4
    954 	psrlq	$1,%xmm0
    955 	pxor	%xmm4,%xmm1
    956 	pxor	%xmm0,%xmm4
    957 	psrlq	$5,%xmm0
    958 	pxor	%xmm4,%xmm0
    959 	psrlq	$1,%xmm0
    960 	pxor	%xmm1,%xmm0
    961 	testl	%ebx,%ebx
    962 	jnz	L011done
    963 	movups	(%edx),%xmm2
    964 L008odd_tail:
    965 	movdqu	(%esi),%xmm3
    966 .byte	102,15,56,0,221
    967 	pxor	%xmm3,%xmm0
    968 	movdqa	%xmm0,%xmm1
    969 	pshufd	$78,%xmm0,%xmm3
    970 	pshufd	$78,%xmm2,%xmm4
    971 	pxor	%xmm0,%xmm3
    972 	pxor	%xmm2,%xmm4
    973 .byte	102,15,58,68,194,0
    974 .byte	102,15,58,68,202,17
    975 .byte	102,15,58,68,220,0
    976 	xorps	%xmm0,%xmm3
    977 	xorps	%xmm1,%xmm3
    978 	movdqa	%xmm3,%xmm4
    979 	psrldq	$8,%xmm3
    980 	pslldq	$8,%xmm4
    981 	pxor	%xmm3,%xmm1
    982 	pxor	%xmm4,%xmm0
    983 	movdqa	%xmm0,%xmm4
    984 	movdqa	%xmm0,%xmm3
    985 	psllq	$5,%xmm0
    986 	pxor	%xmm0,%xmm3
    987 	psllq	$1,%xmm0
    988 	pxor	%xmm3,%xmm0
    989 	psllq	$57,%xmm0
    990 	movdqa	%xmm0,%xmm3
    991 	pslldq	$8,%xmm0
    992 	psrldq	$8,%xmm3
    993 	pxor	%xmm4,%xmm0
    994 	pxor	%xmm3,%xmm1
    995 	movdqa	%xmm0,%xmm4
    996 	psrlq	$1,%xmm0
    997 	pxor	%xmm4,%xmm1
    998 	pxor	%xmm0,%xmm4
    999 	psrlq	$5,%xmm0
   1000 	pxor	%xmm4,%xmm0
   1001 	psrlq	$1,%xmm0
   1002 	pxor	%xmm1,%xmm0
   1003 L011done:
   1004 .byte	102,15,56,0,197
   1005 	movdqu	%xmm0,(%eax)
   1006 	popl	%edi
   1007 	popl	%esi
   1008 	popl	%ebx
   1009 	popl	%ebp
   1010 	ret
   1011 .align	6,0x90
   1012 Lbswap:
   1013 .byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
   1014 .byte	1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194
   1015 .align	6,0x90
   1016 Lrem_8bit:
   1017 .value	0,450,900,582,1800,1738,1164,1358
   1018 .value	3600,4050,3476,3158,2328,2266,2716,2910
   1019 .value	7200,7650,8100,7782,6952,6890,6316,6510
   1020 .value	4656,5106,4532,4214,5432,5370,5820,6014
   1021 .value	14400,14722,15300,14854,16200,16010,15564,15630
   1022 .value	13904,14226,13780,13334,12632,12442,13020,13086
   1023 .value	9312,9634,10212,9766,9064,8874,8428,8494
   1024 .value	10864,11186,10740,10294,11640,11450,12028,12094
   1025 .value	28800,28994,29444,29382,30600,30282,29708,30158
   1026 .value	32400,32594,32020,31958,31128,30810,31260,31710
   1027 .value	27808,28002,28452,28390,27560,27242,26668,27118
   1028 .value	25264,25458,24884,24822,26040,25722,26172,26622
   1029 .value	18624,18690,19268,19078,20424,19978,19532,19854
   1030 .value	18128,18194,17748,17558,16856,16410,16988,17310
   1031 .value	21728,21794,22372,22182,21480,21034,20588,20910
   1032 .value	23280,23346,22900,22710,24056,23610,24188,24510
   1033 .value	57600,57538,57988,58182,58888,59338,58764,58446
   1034 .value	61200,61138,60564,60758,59416,59866,60316,59998
   1035 .value	64800,64738,65188,65382,64040,64490,63916,63598
   1036 .value	62256,62194,61620,61814,62520,62970,63420,63102
   1037 .value	55616,55426,56004,56070,56904,57226,56780,56334
   1038 .value	55120,54930,54484,54550,53336,53658,54236,53790
   1039 .value	50528,50338,50916,50982,49768,50090,49644,49198
   1040 .value	52080,51890,51444,51510,52344,52666,53244,52798
   1041 .value	37248,36930,37380,37830,38536,38730,38156,38094
   1042 .value	40848,40530,39956,40406,39064,39258,39708,39646
   1043 .value	36256,35938,36388,36838,35496,35690,35116,35054
   1044 .value	33712,33394,32820,33270,33976,34170,34620,34558
   1045 .value	43456,43010,43588,43910,44744,44810,44364,44174
   1046 .value	42960,42514,42068,42390,41176,41242,41820,41630
   1047 .value	46560,46114,46692,47014,45800,45866,45420,45230
   1048 .value	48112,47666,47220,47542,48376,48442,49020,48830
   1049 .align	6,0x90
   1050 Lrem_4bit:
   1051 .long	0,0,0,471859200,0,943718400,0,610271232
   1052 .long	0,1887436800,0,1822425088,0,1220542464,0,1423966208
   1053 .long	0,3774873600,0,4246732800,0,3644850176,0,3311403008
   1054 .long	0,2441084928,0,2376073216,0,2847932416,0,3051356160
   1055 .byte	71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67
   1056 .byte	82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112
   1057 .byte	112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62
   1058 .byte	0
   1059 #endif
   1060