Home | History | Annotate | Download | only in modes
      1 #if defined(__i386__)
      2 .file	"ghash-x86.S"
      3 .text
      4 .globl	_gcm_gmult_4bit_x86
      5 .private_extern	_gcm_gmult_4bit_x86
      6 .align	4
      7 _gcm_gmult_4bit_x86:
      8 L_gcm_gmult_4bit_x86_begin:
      9 	pushl	%ebp
     10 	pushl	%ebx
     11 	pushl	%esi
     12 	pushl	%edi
     13 	subl	$84,%esp
     14 	movl	104(%esp),%edi
     15 	movl	108(%esp),%esi
     16 	movl	(%edi),%ebp
     17 	movl	4(%edi),%edx
     18 	movl	8(%edi),%ecx
     19 	movl	12(%edi),%ebx
     20 	movl	$0,16(%esp)
     21 	movl	$471859200,20(%esp)
     22 	movl	$943718400,24(%esp)
     23 	movl	$610271232,28(%esp)
     24 	movl	$1887436800,32(%esp)
     25 	movl	$1822425088,36(%esp)
     26 	movl	$1220542464,40(%esp)
     27 	movl	$1423966208,44(%esp)
     28 	movl	$3774873600,48(%esp)
     29 	movl	$4246732800,52(%esp)
     30 	movl	$3644850176,56(%esp)
     31 	movl	$3311403008,60(%esp)
     32 	movl	$2441084928,64(%esp)
     33 	movl	$2376073216,68(%esp)
     34 	movl	$2847932416,72(%esp)
     35 	movl	$3051356160,76(%esp)
     36 	movl	%ebp,(%esp)
     37 	movl	%edx,4(%esp)
     38 	movl	%ecx,8(%esp)
     39 	movl	%ebx,12(%esp)
     40 	shrl	$20,%ebx
     41 	andl	$240,%ebx
     42 	movl	4(%esi,%ebx,1),%ebp
     43 	movl	(%esi,%ebx,1),%edx
     44 	movl	12(%esi,%ebx,1),%ecx
     45 	movl	8(%esi,%ebx,1),%ebx
     46 	xorl	%eax,%eax
     47 	movl	$15,%edi
     48 	jmp	L000x86_loop
     49 .align	4,0x90
     50 L000x86_loop:
     51 	movb	%bl,%al
     52 	shrdl	$4,%ecx,%ebx
     53 	andb	$15,%al
     54 	shrdl	$4,%edx,%ecx
     55 	shrdl	$4,%ebp,%edx
     56 	shrl	$4,%ebp
     57 	xorl	16(%esp,%eax,4),%ebp
     58 	movb	(%esp,%edi,1),%al
     59 	andb	$240,%al
     60 	xorl	8(%esi,%eax,1),%ebx
     61 	xorl	12(%esi,%eax,1),%ecx
     62 	xorl	(%esi,%eax,1),%edx
     63 	xorl	4(%esi,%eax,1),%ebp
     64 	decl	%edi
     65 	js	L001x86_break
     66 	movb	%bl,%al
     67 	shrdl	$4,%ecx,%ebx
     68 	andb	$15,%al
     69 	shrdl	$4,%edx,%ecx
     70 	shrdl	$4,%ebp,%edx
     71 	shrl	$4,%ebp
     72 	xorl	16(%esp,%eax,4),%ebp
     73 	movb	(%esp,%edi,1),%al
     74 	shlb	$4,%al
     75 	xorl	8(%esi,%eax,1),%ebx
     76 	xorl	12(%esi,%eax,1),%ecx
     77 	xorl	(%esi,%eax,1),%edx
     78 	xorl	4(%esi,%eax,1),%ebp
     79 	jmp	L000x86_loop
     80 .align	4,0x90
     81 L001x86_break:
     82 	bswap	%ebx
     83 	bswap	%ecx
     84 	bswap	%edx
     85 	bswap	%ebp
     86 	movl	104(%esp),%edi
     87 	movl	%ebx,12(%edi)
     88 	movl	%ecx,8(%edi)
     89 	movl	%edx,4(%edi)
     90 	movl	%ebp,(%edi)
     91 	addl	$84,%esp
     92 	popl	%edi
     93 	popl	%esi
     94 	popl	%ebx
     95 	popl	%ebp
     96 	ret
     97 .globl	_gcm_ghash_4bit_x86
     98 .private_extern	_gcm_ghash_4bit_x86
     99 .align	4
    100 _gcm_ghash_4bit_x86:
    101 L_gcm_ghash_4bit_x86_begin:
    102 	pushl	%ebp
    103 	pushl	%ebx
    104 	pushl	%esi
    105 	pushl	%edi
    106 	subl	$84,%esp
    107 	movl	104(%esp),%ebx
    108 	movl	108(%esp),%esi
    109 	movl	112(%esp),%edi
    110 	movl	116(%esp),%ecx
    111 	addl	%edi,%ecx
    112 	movl	%ecx,116(%esp)
    113 	movl	(%ebx),%ebp
    114 	movl	4(%ebx),%edx
    115 	movl	8(%ebx),%ecx
    116 	movl	12(%ebx),%ebx
    117 	movl	$0,16(%esp)
    118 	movl	$471859200,20(%esp)
    119 	movl	$943718400,24(%esp)
    120 	movl	$610271232,28(%esp)
    121 	movl	$1887436800,32(%esp)
    122 	movl	$1822425088,36(%esp)
    123 	movl	$1220542464,40(%esp)
    124 	movl	$1423966208,44(%esp)
    125 	movl	$3774873600,48(%esp)
    126 	movl	$4246732800,52(%esp)
    127 	movl	$3644850176,56(%esp)
    128 	movl	$3311403008,60(%esp)
    129 	movl	$2441084928,64(%esp)
    130 	movl	$2376073216,68(%esp)
    131 	movl	$2847932416,72(%esp)
    132 	movl	$3051356160,76(%esp)
    133 .align	4,0x90
    134 L002x86_outer_loop:
    135 	xorl	12(%edi),%ebx
    136 	xorl	8(%edi),%ecx
    137 	xorl	4(%edi),%edx
    138 	xorl	(%edi),%ebp
    139 	movl	%ebx,12(%esp)
    140 	movl	%ecx,8(%esp)
    141 	movl	%edx,4(%esp)
    142 	movl	%ebp,(%esp)
    143 	shrl	$20,%ebx
    144 	andl	$240,%ebx
    145 	movl	4(%esi,%ebx,1),%ebp
    146 	movl	(%esi,%ebx,1),%edx
    147 	movl	12(%esi,%ebx,1),%ecx
    148 	movl	8(%esi,%ebx,1),%ebx
    149 	xorl	%eax,%eax
    150 	movl	$15,%edi
    151 	jmp	L003x86_loop
    152 .align	4,0x90
    153 L003x86_loop:
    154 	movb	%bl,%al
    155 	shrdl	$4,%ecx,%ebx
    156 	andb	$15,%al
    157 	shrdl	$4,%edx,%ecx
    158 	shrdl	$4,%ebp,%edx
    159 	shrl	$4,%ebp
    160 	xorl	16(%esp,%eax,4),%ebp
    161 	movb	(%esp,%edi,1),%al
    162 	andb	$240,%al
    163 	xorl	8(%esi,%eax,1),%ebx
    164 	xorl	12(%esi,%eax,1),%ecx
    165 	xorl	(%esi,%eax,1),%edx
    166 	xorl	4(%esi,%eax,1),%ebp
    167 	decl	%edi
    168 	js	L004x86_break
    169 	movb	%bl,%al
    170 	shrdl	$4,%ecx,%ebx
    171 	andb	$15,%al
    172 	shrdl	$4,%edx,%ecx
    173 	shrdl	$4,%ebp,%edx
    174 	shrl	$4,%ebp
    175 	xorl	16(%esp,%eax,4),%ebp
    176 	movb	(%esp,%edi,1),%al
    177 	shlb	$4,%al
    178 	xorl	8(%esi,%eax,1),%ebx
    179 	xorl	12(%esi,%eax,1),%ecx
    180 	xorl	(%esi,%eax,1),%edx
    181 	xorl	4(%esi,%eax,1),%ebp
    182 	jmp	L003x86_loop
    183 .align	4,0x90
    184 L004x86_break:
    185 	bswap	%ebx
    186 	bswap	%ecx
    187 	bswap	%edx
    188 	bswap	%ebp
    189 	movl	112(%esp),%edi
    190 	leal	16(%edi),%edi
    191 	cmpl	116(%esp),%edi
    192 	movl	%edi,112(%esp)
    193 	jb	L002x86_outer_loop
    194 	movl	104(%esp),%edi
    195 	movl	%ebx,12(%edi)
    196 	movl	%ecx,8(%edi)
    197 	movl	%edx,4(%edi)
    198 	movl	%ebp,(%edi)
    199 	addl	$84,%esp
    200 	popl	%edi
    201 	popl	%esi
    202 	popl	%ebx
    203 	popl	%ebp
    204 	ret
    205 .globl	_gcm_gmult_4bit_mmx
    206 .private_extern	_gcm_gmult_4bit_mmx
    207 .align	4
    208 _gcm_gmult_4bit_mmx:
    209 L_gcm_gmult_4bit_mmx_begin:
    210 	pushl	%ebp
    211 	pushl	%ebx
    212 	pushl	%esi
    213 	pushl	%edi
    214 	movl	20(%esp),%edi
    215 	movl	24(%esp),%esi
    216 	call	L005pic_point
    217 L005pic_point:
    218 	popl	%eax
    219 	leal	Lrem_4bit-L005pic_point(%eax),%eax
    220 	movzbl	15(%edi),%ebx
    221 	xorl	%ecx,%ecx
    222 	movl	%ebx,%edx
    223 	movb	%dl,%cl
    224 	movl	$14,%ebp
    225 	shlb	$4,%cl
    226 	andl	$240,%edx
    227 	movq	8(%esi,%ecx,1),%mm0
    228 	movq	(%esi,%ecx,1),%mm1
    229 	movd	%mm0,%ebx
    230 	jmp	L006mmx_loop
    231 .align	4,0x90
    232 L006mmx_loop:
    233 	psrlq	$4,%mm0
    234 	andl	$15,%ebx
    235 	movq	%mm1,%mm2
    236 	psrlq	$4,%mm1
    237 	pxor	8(%esi,%edx,1),%mm0
    238 	movb	(%edi,%ebp,1),%cl
    239 	psllq	$60,%mm2
    240 	pxor	(%eax,%ebx,8),%mm1
    241 	decl	%ebp
    242 	movd	%mm0,%ebx
    243 	pxor	(%esi,%edx,1),%mm1
    244 	movl	%ecx,%edx
    245 	pxor	%mm2,%mm0
    246 	js	L007mmx_break
    247 	shlb	$4,%cl
    248 	andl	$15,%ebx
    249 	psrlq	$4,%mm0
    250 	andl	$240,%edx
    251 	movq	%mm1,%mm2
    252 	psrlq	$4,%mm1
    253 	pxor	8(%esi,%ecx,1),%mm0
    254 	psllq	$60,%mm2
    255 	pxor	(%eax,%ebx,8),%mm1
    256 	movd	%mm0,%ebx
    257 	pxor	(%esi,%ecx,1),%mm1
    258 	pxor	%mm2,%mm0
    259 	jmp	L006mmx_loop
    260 .align	4,0x90
    261 L007mmx_break:
    262 	shlb	$4,%cl
    263 	andl	$15,%ebx
    264 	psrlq	$4,%mm0
    265 	andl	$240,%edx
    266 	movq	%mm1,%mm2
    267 	psrlq	$4,%mm1
    268 	pxor	8(%esi,%ecx,1),%mm0
    269 	psllq	$60,%mm2
    270 	pxor	(%eax,%ebx,8),%mm1
    271 	movd	%mm0,%ebx
    272 	pxor	(%esi,%ecx,1),%mm1
    273 	pxor	%mm2,%mm0
    274 	psrlq	$4,%mm0
    275 	andl	$15,%ebx
    276 	movq	%mm1,%mm2
    277 	psrlq	$4,%mm1
    278 	pxor	8(%esi,%edx,1),%mm0
    279 	psllq	$60,%mm2
    280 	pxor	(%eax,%ebx,8),%mm1
    281 	movd	%mm0,%ebx
    282 	pxor	(%esi,%edx,1),%mm1
    283 	pxor	%mm2,%mm0
    284 	psrlq	$32,%mm0
    285 	movd	%mm1,%edx
    286 	psrlq	$32,%mm1
    287 	movd	%mm0,%ecx
    288 	movd	%mm1,%ebp
    289 	bswap	%ebx
    290 	bswap	%edx
    291 	bswap	%ecx
    292 	bswap	%ebp
    293 	emms
    294 	movl	%ebx,12(%edi)
    295 	movl	%edx,4(%edi)
    296 	movl	%ecx,8(%edi)
    297 	movl	%ebp,(%edi)
    298 	popl	%edi
    299 	popl	%esi
    300 	popl	%ebx
    301 	popl	%ebp
    302 	ret
    303 .globl	_gcm_ghash_4bit_mmx
    304 .private_extern	_gcm_ghash_4bit_mmx
    305 .align	4
    306 _gcm_ghash_4bit_mmx:
    307 L_gcm_ghash_4bit_mmx_begin:
    308 	pushl	%ebp
    309 	pushl	%ebx
    310 	pushl	%esi
    311 	pushl	%edi
    312 	movl	20(%esp),%eax
    313 	movl	24(%esp),%ebx
    314 	movl	28(%esp),%ecx
    315 	movl	32(%esp),%edx
    316 	movl	%esp,%ebp
    317 	call	L008pic_point
    318 L008pic_point:
    319 	popl	%esi
    320 	leal	Lrem_8bit-L008pic_point(%esi),%esi
    321 	subl	$544,%esp
    322 	andl	$-64,%esp
    323 	subl	$16,%esp
    324 	addl	%ecx,%edx
    325 	movl	%eax,544(%esp)
    326 	movl	%edx,552(%esp)
    327 	movl	%ebp,556(%esp)
    328 	addl	$128,%ebx
    329 	leal	144(%esp),%edi
    330 	leal	400(%esp),%ebp
    331 	movl	-120(%ebx),%edx
    332 	movq	-120(%ebx),%mm0
    333 	movq	-128(%ebx),%mm3
    334 	shll	$4,%edx
    335 	movb	%dl,(%esp)
    336 	movl	-104(%ebx),%edx
    337 	movq	-104(%ebx),%mm2
    338 	movq	-112(%ebx),%mm5
    339 	movq	%mm0,-128(%edi)
    340 	psrlq	$4,%mm0
    341 	movq	%mm3,(%edi)
    342 	movq	%mm3,%mm7
    343 	psrlq	$4,%mm3
    344 	shll	$4,%edx
    345 	movb	%dl,1(%esp)
    346 	movl	-88(%ebx),%edx
    347 	movq	-88(%ebx),%mm1
    348 	psllq	$60,%mm7
    349 	movq	-96(%ebx),%mm4
    350 	por	%mm7,%mm0
    351 	movq	%mm2,-120(%edi)
    352 	psrlq	$4,%mm2
    353 	movq	%mm5,8(%edi)
    354 	movq	%mm5,%mm6
    355 	movq	%mm0,-128(%ebp)
    356 	psrlq	$4,%mm5
    357 	movq	%mm3,(%ebp)
    358 	shll	$4,%edx
    359 	movb	%dl,2(%esp)
    360 	movl	-72(%ebx),%edx
    361 	movq	-72(%ebx),%mm0
    362 	psllq	$60,%mm6
    363 	movq	-80(%ebx),%mm3
    364 	por	%mm6,%mm2
    365 	movq	%mm1,-112(%edi)
    366 	psrlq	$4,%mm1
    367 	movq	%mm4,16(%edi)
    368 	movq	%mm4,%mm7
    369 	movq	%mm2,-120(%ebp)
    370 	psrlq	$4,%mm4
    371 	movq	%mm5,8(%ebp)
    372 	shll	$4,%edx
    373 	movb	%dl,3(%esp)
    374 	movl	-56(%ebx),%edx
    375 	movq	-56(%ebx),%mm2
    376 	psllq	$60,%mm7
    377 	movq	-64(%ebx),%mm5
    378 	por	%mm7,%mm1
    379 	movq	%mm0,-104(%edi)
    380 	psrlq	$4,%mm0
    381 	movq	%mm3,24(%edi)
    382 	movq	%mm3,%mm6
    383 	movq	%mm1,-112(%ebp)
    384 	psrlq	$4,%mm3
    385 	movq	%mm4,16(%ebp)
    386 	shll	$4,%edx
    387 	movb	%dl,4(%esp)
    388 	movl	-40(%ebx),%edx
    389 	movq	-40(%ebx),%mm1
    390 	psllq	$60,%mm6
    391 	movq	-48(%ebx),%mm4
    392 	por	%mm6,%mm0
    393 	movq	%mm2,-96(%edi)
    394 	psrlq	$4,%mm2
    395 	movq	%mm5,32(%edi)
    396 	movq	%mm5,%mm7
    397 	movq	%mm0,-104(%ebp)
    398 	psrlq	$4,%mm5
    399 	movq	%mm3,24(%ebp)
    400 	shll	$4,%edx
    401 	movb	%dl,5(%esp)
    402 	movl	-24(%ebx),%edx
    403 	movq	-24(%ebx),%mm0
    404 	psllq	$60,%mm7
    405 	movq	-32(%ebx),%mm3
    406 	por	%mm7,%mm2
    407 	movq	%mm1,-88(%edi)
    408 	psrlq	$4,%mm1
    409 	movq	%mm4,40(%edi)
    410 	movq	%mm4,%mm6
    411 	movq	%mm2,-96(%ebp)
    412 	psrlq	$4,%mm4
    413 	movq	%mm5,32(%ebp)
    414 	shll	$4,%edx
    415 	movb	%dl,6(%esp)
    416 	movl	-8(%ebx),%edx
    417 	movq	-8(%ebx),%mm2
    418 	psllq	$60,%mm6
    419 	movq	-16(%ebx),%mm5
    420 	por	%mm6,%mm1
    421 	movq	%mm0,-80(%edi)
    422 	psrlq	$4,%mm0
    423 	movq	%mm3,48(%edi)
    424 	movq	%mm3,%mm7
    425 	movq	%mm1,-88(%ebp)
    426 	psrlq	$4,%mm3
    427 	movq	%mm4,40(%ebp)
    428 	shll	$4,%edx
    429 	movb	%dl,7(%esp)
    430 	movl	8(%ebx),%edx
    431 	movq	8(%ebx),%mm1
    432 	psllq	$60,%mm7
    433 	movq	(%ebx),%mm4
    434 	por	%mm7,%mm0
    435 	movq	%mm2,-72(%edi)
    436 	psrlq	$4,%mm2
    437 	movq	%mm5,56(%edi)
    438 	movq	%mm5,%mm6
    439 	movq	%mm0,-80(%ebp)
    440 	psrlq	$4,%mm5
    441 	movq	%mm3,48(%ebp)
    442 	shll	$4,%edx
    443 	movb	%dl,8(%esp)
    444 	movl	24(%ebx),%edx
    445 	movq	24(%ebx),%mm0
    446 	psllq	$60,%mm6
    447 	movq	16(%ebx),%mm3
    448 	por	%mm6,%mm2
    449 	movq	%mm1,-64(%edi)
    450 	psrlq	$4,%mm1
    451 	movq	%mm4,64(%edi)
    452 	movq	%mm4,%mm7
    453 	movq	%mm2,-72(%ebp)
    454 	psrlq	$4,%mm4
    455 	movq	%mm5,56(%ebp)
    456 	shll	$4,%edx
    457 	movb	%dl,9(%esp)
    458 	movl	40(%ebx),%edx
    459 	movq	40(%ebx),%mm2
    460 	psllq	$60,%mm7
    461 	movq	32(%ebx),%mm5
    462 	por	%mm7,%mm1
    463 	movq	%mm0,-56(%edi)
    464 	psrlq	$4,%mm0
    465 	movq	%mm3,72(%edi)
    466 	movq	%mm3,%mm6
    467 	movq	%mm1,-64(%ebp)
    468 	psrlq	$4,%mm3
    469 	movq	%mm4,64(%ebp)
    470 	shll	$4,%edx
    471 	movb	%dl,10(%esp)
    472 	movl	56(%ebx),%edx
    473 	movq	56(%ebx),%mm1
    474 	psllq	$60,%mm6
    475 	movq	48(%ebx),%mm4
    476 	por	%mm6,%mm0
    477 	movq	%mm2,-48(%edi)
    478 	psrlq	$4,%mm2
    479 	movq	%mm5,80(%edi)
    480 	movq	%mm5,%mm7
    481 	movq	%mm0,-56(%ebp)
    482 	psrlq	$4,%mm5
    483 	movq	%mm3,72(%ebp)
    484 	shll	$4,%edx
    485 	movb	%dl,11(%esp)
    486 	movl	72(%ebx),%edx
    487 	movq	72(%ebx),%mm0
    488 	psllq	$60,%mm7
    489 	movq	64(%ebx),%mm3
    490 	por	%mm7,%mm2
    491 	movq	%mm1,-40(%edi)
    492 	psrlq	$4,%mm1
    493 	movq	%mm4,88(%edi)
    494 	movq	%mm4,%mm6
    495 	movq	%mm2,-48(%ebp)
    496 	psrlq	$4,%mm4
    497 	movq	%mm5,80(%ebp)
    498 	shll	$4,%edx
    499 	movb	%dl,12(%esp)
    500 	movl	88(%ebx),%edx
    501 	movq	88(%ebx),%mm2
    502 	psllq	$60,%mm6
    503 	movq	80(%ebx),%mm5
    504 	por	%mm6,%mm1
    505 	movq	%mm0,-32(%edi)
    506 	psrlq	$4,%mm0
    507 	movq	%mm3,96(%edi)
    508 	movq	%mm3,%mm7
    509 	movq	%mm1,-40(%ebp)
    510 	psrlq	$4,%mm3
    511 	movq	%mm4,88(%ebp)
    512 	shll	$4,%edx
    513 	movb	%dl,13(%esp)
    514 	movl	104(%ebx),%edx
    515 	movq	104(%ebx),%mm1
    516 	psllq	$60,%mm7
    517 	movq	96(%ebx),%mm4
    518 	por	%mm7,%mm0
    519 	movq	%mm2,-24(%edi)
    520 	psrlq	$4,%mm2
    521 	movq	%mm5,104(%edi)
    522 	movq	%mm5,%mm6
    523 	movq	%mm0,-32(%ebp)
    524 	psrlq	$4,%mm5
    525 	movq	%mm3,96(%ebp)
    526 	shll	$4,%edx
    527 	movb	%dl,14(%esp)
    528 	movl	120(%ebx),%edx
    529 	movq	120(%ebx),%mm0
    530 	psllq	$60,%mm6
    531 	movq	112(%ebx),%mm3
    532 	por	%mm6,%mm2
    533 	movq	%mm1,-16(%edi)
    534 	psrlq	$4,%mm1
    535 	movq	%mm4,112(%edi)
    536 	movq	%mm4,%mm7
    537 	movq	%mm2,-24(%ebp)
    538 	psrlq	$4,%mm4
    539 	movq	%mm5,104(%ebp)
    540 	shll	$4,%edx
    541 	movb	%dl,15(%esp)
    542 	psllq	$60,%mm7
    543 	por	%mm7,%mm1
    544 	movq	%mm0,-8(%edi)
    545 	psrlq	$4,%mm0
    546 	movq	%mm3,120(%edi)
    547 	movq	%mm3,%mm6
    548 	movq	%mm1,-16(%ebp)
    549 	psrlq	$4,%mm3
    550 	movq	%mm4,112(%ebp)
    551 	psllq	$60,%mm6
    552 	por	%mm6,%mm0
    553 	movq	%mm0,-8(%ebp)
    554 	movq	%mm3,120(%ebp)
    555 	movq	(%eax),%mm6
    556 	movl	8(%eax),%ebx
    557 	movl	12(%eax),%edx
    558 .align	4,0x90
    559 L009outer:
    560 	xorl	12(%ecx),%edx
    561 	xorl	8(%ecx),%ebx
    562 	pxor	(%ecx),%mm6
    563 	leal	16(%ecx),%ecx
    564 	movl	%ebx,536(%esp)
    565 	movq	%mm6,528(%esp)
    566 	movl	%ecx,548(%esp)
    567 	xorl	%eax,%eax
    568 	roll	$8,%edx
    569 	movb	%dl,%al
    570 	movl	%eax,%ebp
    571 	andb	$15,%al
    572 	shrl	$4,%ebp
    573 	pxor	%mm0,%mm0
    574 	roll	$8,%edx
    575 	pxor	%mm1,%mm1
    576 	pxor	%mm2,%mm2
    577 	movq	16(%esp,%eax,8),%mm7
    578 	movq	144(%esp,%eax,8),%mm6
    579 	movb	%dl,%al
    580 	movd	%mm7,%ebx
    581 	psrlq	$8,%mm7
    582 	movq	%mm6,%mm3
    583 	movl	%eax,%edi
    584 	psrlq	$8,%mm6
    585 	pxor	272(%esp,%ebp,8),%mm7
    586 	andb	$15,%al
    587 	psllq	$56,%mm3
    588 	shrl	$4,%edi
    589 	pxor	16(%esp,%eax,8),%mm7
    590 	roll	$8,%edx
    591 	pxor	144(%esp,%eax,8),%mm6
    592 	pxor	%mm3,%mm7
    593 	pxor	400(%esp,%ebp,8),%mm6
    594 	xorb	(%esp,%ebp,1),%bl
    595 	movb	%dl,%al
    596 	movd	%mm7,%ecx
    597 	movzbl	%bl,%ebx
    598 	psrlq	$8,%mm7
    599 	movq	%mm6,%mm3
    600 	movl	%eax,%ebp
    601 	psrlq	$8,%mm6
    602 	pxor	272(%esp,%edi,8),%mm7
    603 	andb	$15,%al
    604 	psllq	$56,%mm3
    605 	shrl	$4,%ebp
    606 	pinsrw	$2,(%esi,%ebx,2),%mm2
    607 	pxor	16(%esp,%eax,8),%mm7
    608 	roll	$8,%edx
    609 	pxor	144(%esp,%eax,8),%mm6
    610 	pxor	%mm3,%mm7
    611 	pxor	400(%esp,%edi,8),%mm6
    612 	xorb	(%esp,%edi,1),%cl
    613 	movb	%dl,%al
    614 	movl	536(%esp),%edx
    615 	movd	%mm7,%ebx
    616 	movzbl	%cl,%ecx
    617 	psrlq	$8,%mm7
    618 	movq	%mm6,%mm3
    619 	movl	%eax,%edi
    620 	psrlq	$8,%mm6
    621 	pxor	272(%esp,%ebp,8),%mm7
    622 	andb	$15,%al
    623 	psllq	$56,%mm3
    624 	pxor	%mm2,%mm6
    625 	shrl	$4,%edi
    626 	pinsrw	$2,(%esi,%ecx,2),%mm1
    627 	pxor	16(%esp,%eax,8),%mm7
    628 	roll	$8,%edx
    629 	pxor	144(%esp,%eax,8),%mm6
    630 	pxor	%mm3,%mm7
    631 	pxor	400(%esp,%ebp,8),%mm6
    632 	xorb	(%esp,%ebp,1),%bl
    633 	movb	%dl,%al
    634 	movd	%mm7,%ecx
    635 	movzbl	%bl,%ebx
    636 	psrlq	$8,%mm7
    637 	movq	%mm6,%mm3
    638 	movl	%eax,%ebp
    639 	psrlq	$8,%mm6
    640 	pxor	272(%esp,%edi,8),%mm7
    641 	andb	$15,%al
    642 	psllq	$56,%mm3
    643 	pxor	%mm1,%mm6
    644 	shrl	$4,%ebp
    645 	pinsrw	$2,(%esi,%ebx,2),%mm0
    646 	pxor	16(%esp,%eax,8),%mm7
    647 	roll	$8,%edx
    648 	pxor	144(%esp,%eax,8),%mm6
    649 	pxor	%mm3,%mm7
    650 	pxor	400(%esp,%edi,8),%mm6
    651 	xorb	(%esp,%edi,1),%cl
    652 	movb	%dl,%al
    653 	movd	%mm7,%ebx
    654 	movzbl	%cl,%ecx
    655 	psrlq	$8,%mm7
    656 	movq	%mm6,%mm3
    657 	movl	%eax,%edi
    658 	psrlq	$8,%mm6
    659 	pxor	272(%esp,%ebp,8),%mm7
    660 	andb	$15,%al
    661 	psllq	$56,%mm3
    662 	pxor	%mm0,%mm6
    663 	shrl	$4,%edi
    664 	pinsrw	$2,(%esi,%ecx,2),%mm2
    665 	pxor	16(%esp,%eax,8),%mm7
    666 	roll	$8,%edx
    667 	pxor	144(%esp,%eax,8),%mm6
    668 	pxor	%mm3,%mm7
    669 	pxor	400(%esp,%ebp,8),%mm6
    670 	xorb	(%esp,%ebp,1),%bl
    671 	movb	%dl,%al
    672 	movd	%mm7,%ecx
    673 	movzbl	%bl,%ebx
    674 	psrlq	$8,%mm7
    675 	movq	%mm6,%mm3
    676 	movl	%eax,%ebp
    677 	psrlq	$8,%mm6
    678 	pxor	272(%esp,%edi,8),%mm7
    679 	andb	$15,%al
    680 	psllq	$56,%mm3
    681 	pxor	%mm2,%mm6
    682 	shrl	$4,%ebp
    683 	pinsrw	$2,(%esi,%ebx,2),%mm1
    684 	pxor	16(%esp,%eax,8),%mm7
    685 	roll	$8,%edx
    686 	pxor	144(%esp,%eax,8),%mm6
    687 	pxor	%mm3,%mm7
    688 	pxor	400(%esp,%edi,8),%mm6
    689 	xorb	(%esp,%edi,1),%cl
    690 	movb	%dl,%al
    691 	movl	532(%esp),%edx
    692 	movd	%mm7,%ebx
    693 	movzbl	%cl,%ecx
    694 	psrlq	$8,%mm7
    695 	movq	%mm6,%mm3
    696 	movl	%eax,%edi
    697 	psrlq	$8,%mm6
    698 	pxor	272(%esp,%ebp,8),%mm7
    699 	andb	$15,%al
    700 	psllq	$56,%mm3
    701 	pxor	%mm1,%mm6
    702 	shrl	$4,%edi
    703 	pinsrw	$2,(%esi,%ecx,2),%mm0
    704 	pxor	16(%esp,%eax,8),%mm7
    705 	roll	$8,%edx
    706 	pxor	144(%esp,%eax,8),%mm6
    707 	pxor	%mm3,%mm7
    708 	pxor	400(%esp,%ebp,8),%mm6
    709 	xorb	(%esp,%ebp,1),%bl
    710 	movb	%dl,%al
    711 	movd	%mm7,%ecx
    712 	movzbl	%bl,%ebx
    713 	psrlq	$8,%mm7
    714 	movq	%mm6,%mm3
    715 	movl	%eax,%ebp
    716 	psrlq	$8,%mm6
    717 	pxor	272(%esp,%edi,8),%mm7
    718 	andb	$15,%al
    719 	psllq	$56,%mm3
    720 	pxor	%mm0,%mm6
    721 	shrl	$4,%ebp
    722 	pinsrw	$2,(%esi,%ebx,2),%mm2
    723 	pxor	16(%esp,%eax,8),%mm7
    724 	roll	$8,%edx
    725 	pxor	144(%esp,%eax,8),%mm6
    726 	pxor	%mm3,%mm7
    727 	pxor	400(%esp,%edi,8),%mm6
    728 	xorb	(%esp,%edi,1),%cl
    729 	movb	%dl,%al
    730 	movd	%mm7,%ebx
    731 	movzbl	%cl,%ecx
    732 	psrlq	$8,%mm7
    733 	movq	%mm6,%mm3
    734 	movl	%eax,%edi
    735 	psrlq	$8,%mm6
    736 	pxor	272(%esp,%ebp,8),%mm7
    737 	andb	$15,%al
    738 	psllq	$56,%mm3
    739 	pxor	%mm2,%mm6
    740 	shrl	$4,%edi
    741 	pinsrw	$2,(%esi,%ecx,2),%mm1
    742 	pxor	16(%esp,%eax,8),%mm7
    743 	roll	$8,%edx
    744 	pxor	144(%esp,%eax,8),%mm6
    745 	pxor	%mm3,%mm7
    746 	pxor	400(%esp,%ebp,8),%mm6
    747 	xorb	(%esp,%ebp,1),%bl
    748 	movb	%dl,%al
    749 	movd	%mm7,%ecx
    750 	movzbl	%bl,%ebx
    751 	psrlq	$8,%mm7
    752 	movq	%mm6,%mm3
    753 	movl	%eax,%ebp
    754 	psrlq	$8,%mm6
    755 	pxor	272(%esp,%edi,8),%mm7
    756 	andb	$15,%al
    757 	psllq	$56,%mm3
    758 	pxor	%mm1,%mm6
    759 	shrl	$4,%ebp
    760 	pinsrw	$2,(%esi,%ebx,2),%mm0
    761 	pxor	16(%esp,%eax,8),%mm7
    762 	roll	$8,%edx
    763 	pxor	144(%esp,%eax,8),%mm6
    764 	pxor	%mm3,%mm7
    765 	pxor	400(%esp,%edi,8),%mm6
    766 	xorb	(%esp,%edi,1),%cl
    767 	movb	%dl,%al
    768 	movl	528(%esp),%edx
    769 	movd	%mm7,%ebx
    770 	movzbl	%cl,%ecx
    771 	psrlq	$8,%mm7
    772 	movq	%mm6,%mm3
    773 	movl	%eax,%edi
    774 	psrlq	$8,%mm6
    775 	pxor	272(%esp,%ebp,8),%mm7
    776 	andb	$15,%al
    777 	psllq	$56,%mm3
    778 	pxor	%mm0,%mm6
    779 	shrl	$4,%edi
    780 	pinsrw	$2,(%esi,%ecx,2),%mm2
    781 	pxor	16(%esp,%eax,8),%mm7
    782 	roll	$8,%edx
    783 	pxor	144(%esp,%eax,8),%mm6
    784 	pxor	%mm3,%mm7
    785 	pxor	400(%esp,%ebp,8),%mm6
    786 	xorb	(%esp,%ebp,1),%bl
    787 	movb	%dl,%al
    788 	movd	%mm7,%ecx
    789 	movzbl	%bl,%ebx
    790 	psrlq	$8,%mm7
    791 	movq	%mm6,%mm3
    792 	movl	%eax,%ebp
    793 	psrlq	$8,%mm6
    794 	pxor	272(%esp,%edi,8),%mm7
    795 	andb	$15,%al
    796 	psllq	$56,%mm3
    797 	pxor	%mm2,%mm6
    798 	shrl	$4,%ebp
    799 	pinsrw	$2,(%esi,%ebx,2),%mm1
    800 	pxor	16(%esp,%eax,8),%mm7
    801 	roll	$8,%edx
    802 	pxor	144(%esp,%eax,8),%mm6
    803 	pxor	%mm3,%mm7
    804 	pxor	400(%esp,%edi,8),%mm6
    805 	xorb	(%esp,%edi,1),%cl
    806 	movb	%dl,%al
    807 	movd	%mm7,%ebx
    808 	movzbl	%cl,%ecx
    809 	psrlq	$8,%mm7
    810 	movq	%mm6,%mm3
    811 	movl	%eax,%edi
    812 	psrlq	$8,%mm6
    813 	pxor	272(%esp,%ebp,8),%mm7
    814 	andb	$15,%al
    815 	psllq	$56,%mm3
    816 	pxor	%mm1,%mm6
    817 	shrl	$4,%edi
    818 	pinsrw	$2,(%esi,%ecx,2),%mm0
    819 	pxor	16(%esp,%eax,8),%mm7
    820 	roll	$8,%edx
    821 	pxor	144(%esp,%eax,8),%mm6
    822 	pxor	%mm3,%mm7
    823 	pxor	400(%esp,%ebp,8),%mm6
    824 	xorb	(%esp,%ebp,1),%bl
    825 	movb	%dl,%al
    826 	movd	%mm7,%ecx
    827 	movzbl	%bl,%ebx
    828 	psrlq	$8,%mm7
    829 	movq	%mm6,%mm3
    830 	movl	%eax,%ebp
    831 	psrlq	$8,%mm6
    832 	pxor	272(%esp,%edi,8),%mm7
    833 	andb	$15,%al
    834 	psllq	$56,%mm3
    835 	pxor	%mm0,%mm6
    836 	shrl	$4,%ebp
    837 	pinsrw	$2,(%esi,%ebx,2),%mm2
    838 	pxor	16(%esp,%eax,8),%mm7
    839 	roll	$8,%edx
    840 	pxor	144(%esp,%eax,8),%mm6
    841 	pxor	%mm3,%mm7
    842 	pxor	400(%esp,%edi,8),%mm6
    843 	xorb	(%esp,%edi,1),%cl
    844 	movb	%dl,%al
    845 	movl	524(%esp),%edx
    846 	movd	%mm7,%ebx
    847 	movzbl	%cl,%ecx
    848 	psrlq	$8,%mm7
    849 	movq	%mm6,%mm3
    850 	movl	%eax,%edi
    851 	psrlq	$8,%mm6
    852 	pxor	272(%esp,%ebp,8),%mm7
    853 	andb	$15,%al
    854 	psllq	$56,%mm3
    855 	pxor	%mm2,%mm6
    856 	shrl	$4,%edi
    857 	pinsrw	$2,(%esi,%ecx,2),%mm1
    858 	pxor	16(%esp,%eax,8),%mm7
    859 	pxor	144(%esp,%eax,8),%mm6
    860 	xorb	(%esp,%ebp,1),%bl
    861 	pxor	%mm3,%mm7
    862 	pxor	400(%esp,%ebp,8),%mm6
    863 	movzbl	%bl,%ebx
    864 	pxor	%mm2,%mm2
    865 	psllq	$4,%mm1
    866 	movd	%mm7,%ecx
    867 	psrlq	$4,%mm7
    868 	movq	%mm6,%mm3
    869 	psrlq	$4,%mm6
    870 	shll	$4,%ecx
    871 	pxor	16(%esp,%edi,8),%mm7
    872 	psllq	$60,%mm3
    873 	movzbl	%cl,%ecx
    874 	pxor	%mm3,%mm7
    875 	pxor	144(%esp,%edi,8),%mm6
    876 	pinsrw	$2,(%esi,%ebx,2),%mm0
    877 	pxor	%mm1,%mm6
    878 	movd	%mm7,%edx
    879 	pinsrw	$3,(%esi,%ecx,2),%mm2
    880 	psllq	$12,%mm0
    881 	pxor	%mm0,%mm6
    882 	psrlq	$32,%mm7
    883 	pxor	%mm2,%mm6
    884 	movl	548(%esp),%ecx
    885 	movd	%mm7,%ebx
    886 	movq	%mm6,%mm3
    887 	psllw	$8,%mm6
    888 	psrlw	$8,%mm3
    889 	por	%mm3,%mm6
    890 	bswap	%edx
    891 	pshufw	$27,%mm6,%mm6
    892 	bswap	%ebx
    893 	cmpl	552(%esp),%ecx
    894 	jne	L009outer
    895 	movl	544(%esp),%eax
    896 	movl	%edx,12(%eax)
    897 	movl	%ebx,8(%eax)
    898 	movq	%mm6,(%eax)
    899 	movl	556(%esp),%esp
    900 	emms
    901 	popl	%edi
    902 	popl	%esi
    903 	popl	%ebx
    904 	popl	%ebp
    905 	ret
    906 .globl	_gcm_init_clmul
    907 .private_extern	_gcm_init_clmul
    908 .align	4
    909 _gcm_init_clmul:
    910 L_gcm_init_clmul_begin:
    911 	movl	4(%esp),%edx
    912 	movl	8(%esp),%eax
    913 	call	L010pic
    914 L010pic:
    915 	popl	%ecx
    916 	leal	Lbswap-L010pic(%ecx),%ecx
    917 	movdqu	(%eax),%xmm2
    918 	pshufd	$78,%xmm2,%xmm2
    919 	pshufd	$255,%xmm2,%xmm4
    920 	movdqa	%xmm2,%xmm3
    921 	psllq	$1,%xmm2
    922 	pxor	%xmm5,%xmm5
    923 	psrlq	$63,%xmm3
    924 	pcmpgtd	%xmm4,%xmm5
    925 	pslldq	$8,%xmm3
    926 	por	%xmm3,%xmm2
    927 	pand	16(%ecx),%xmm5
    928 	pxor	%xmm5,%xmm2
    929 	movdqa	%xmm2,%xmm0
    930 	movdqa	%xmm0,%xmm1
    931 	pshufd	$78,%xmm0,%xmm3
    932 	pshufd	$78,%xmm2,%xmm4
    933 	pxor	%xmm0,%xmm3
    934 	pxor	%xmm2,%xmm4
    935 .byte	102,15,58,68,194,0
    936 .byte	102,15,58,68,202,17
    937 .byte	102,15,58,68,220,0
    938 	xorps	%xmm0,%xmm3
    939 	xorps	%xmm1,%xmm3
    940 	movdqa	%xmm3,%xmm4
    941 	psrldq	$8,%xmm3
    942 	pslldq	$8,%xmm4
    943 	pxor	%xmm3,%xmm1
    944 	pxor	%xmm4,%xmm0
    945 	movdqa	%xmm0,%xmm4
    946 	movdqa	%xmm0,%xmm3
    947 	psllq	$5,%xmm0
    948 	pxor	%xmm0,%xmm3
    949 	psllq	$1,%xmm0
    950 	pxor	%xmm3,%xmm0
    951 	psllq	$57,%xmm0
    952 	movdqa	%xmm0,%xmm3
    953 	pslldq	$8,%xmm0
    954 	psrldq	$8,%xmm3
    955 	pxor	%xmm4,%xmm0
    956 	pxor	%xmm3,%xmm1
    957 	movdqa	%xmm0,%xmm4
    958 	psrlq	$1,%xmm0
    959 	pxor	%xmm4,%xmm1
    960 	pxor	%xmm0,%xmm4
    961 	psrlq	$5,%xmm0
    962 	pxor	%xmm4,%xmm0
    963 	psrlq	$1,%xmm0
    964 	pxor	%xmm1,%xmm0
    965 	pshufd	$78,%xmm2,%xmm3
    966 	pshufd	$78,%xmm0,%xmm4
    967 	pxor	%xmm2,%xmm3
    968 	movdqu	%xmm2,(%edx)
    969 	pxor	%xmm0,%xmm4
    970 	movdqu	%xmm0,16(%edx)
    971 .byte	102,15,58,15,227,8
    972 	movdqu	%xmm4,32(%edx)
    973 	ret
    974 .globl	_gcm_gmult_clmul
    975 .private_extern	_gcm_gmult_clmul
    976 .align	4
    977 _gcm_gmult_clmul:
    978 L_gcm_gmult_clmul_begin:
    979 	movl	4(%esp),%eax
    980 	movl	8(%esp),%edx
    981 	call	L011pic
    982 L011pic:
    983 	popl	%ecx
    984 	leal	Lbswap-L011pic(%ecx),%ecx
    985 	movdqu	(%eax),%xmm0
    986 	movdqa	(%ecx),%xmm5
    987 	movups	(%edx),%xmm2
    988 .byte	102,15,56,0,197
    989 	movups	32(%edx),%xmm4
    990 	movdqa	%xmm0,%xmm1
    991 	pshufd	$78,%xmm0,%xmm3
    992 	pxor	%xmm0,%xmm3
    993 .byte	102,15,58,68,194,0
    994 .byte	102,15,58,68,202,17
    995 .byte	102,15,58,68,220,0
    996 	xorps	%xmm0,%xmm3
    997 	xorps	%xmm1,%xmm3
    998 	movdqa	%xmm3,%xmm4
    999 	psrldq	$8,%xmm3
   1000 	pslldq	$8,%xmm4
   1001 	pxor	%xmm3,%xmm1
   1002 	pxor	%xmm4,%xmm0
   1003 	movdqa	%xmm0,%xmm4
   1004 	movdqa	%xmm0,%xmm3
   1005 	psllq	$5,%xmm0
   1006 	pxor	%xmm0,%xmm3
   1007 	psllq	$1,%xmm0
   1008 	pxor	%xmm3,%xmm0
   1009 	psllq	$57,%xmm0
   1010 	movdqa	%xmm0,%xmm3
   1011 	pslldq	$8,%xmm0
   1012 	psrldq	$8,%xmm3
   1013 	pxor	%xmm4,%xmm0
   1014 	pxor	%xmm3,%xmm1
   1015 	movdqa	%xmm0,%xmm4
   1016 	psrlq	$1,%xmm0
   1017 	pxor	%xmm4,%xmm1
   1018 	pxor	%xmm0,%xmm4
   1019 	psrlq	$5,%xmm0
   1020 	pxor	%xmm4,%xmm0
   1021 	psrlq	$1,%xmm0
   1022 	pxor	%xmm1,%xmm0
   1023 .byte	102,15,56,0,197
   1024 	movdqu	%xmm0,(%eax)
   1025 	ret
   1026 .globl	_gcm_ghash_clmul
   1027 .private_extern	_gcm_ghash_clmul
   1028 .align	4
   1029 _gcm_ghash_clmul:
   1030 L_gcm_ghash_clmul_begin:
   1031 	pushl	%ebp
   1032 	pushl	%ebx
   1033 	pushl	%esi
   1034 	pushl	%edi
   1035 	movl	20(%esp),%eax
   1036 	movl	24(%esp),%edx
   1037 	movl	28(%esp),%esi
   1038 	movl	32(%esp),%ebx
   1039 	call	L012pic
   1040 L012pic:
   1041 	popl	%ecx
   1042 	leal	Lbswap-L012pic(%ecx),%ecx
   1043 	movdqu	(%eax),%xmm0
   1044 	movdqa	(%ecx),%xmm5
   1045 	movdqu	(%edx),%xmm2
   1046 .byte	102,15,56,0,197
   1047 	subl	$16,%ebx
   1048 	jz	L013odd_tail
   1049 	movdqu	(%esi),%xmm3
   1050 	movdqu	16(%esi),%xmm6
   1051 .byte	102,15,56,0,221
   1052 .byte	102,15,56,0,245
   1053 	movdqu	32(%edx),%xmm5
   1054 	pxor	%xmm3,%xmm0
   1055 	pshufd	$78,%xmm6,%xmm3
   1056 	movdqa	%xmm6,%xmm7
   1057 	pxor	%xmm6,%xmm3
   1058 	leal	32(%esi),%esi
   1059 .byte	102,15,58,68,242,0
   1060 .byte	102,15,58,68,250,17
   1061 .byte	102,15,58,68,221,0
   1062 	movups	16(%edx),%xmm2
   1063 	nop
   1064 	subl	$32,%ebx
   1065 	jbe	L014even_tail
   1066 	jmp	L015mod_loop
   1067 .align	5,0x90
   1068 L015mod_loop:
   1069 	pshufd	$78,%xmm0,%xmm4
   1070 	movdqa	%xmm0,%xmm1
   1071 	pxor	%xmm0,%xmm4
   1072 	nop
   1073 .byte	102,15,58,68,194,0
   1074 .byte	102,15,58,68,202,17
   1075 .byte	102,15,58,68,229,16
   1076 	movups	(%edx),%xmm2
   1077 	xorps	%xmm6,%xmm0
   1078 	movdqa	(%ecx),%xmm5
   1079 	xorps	%xmm7,%xmm1
   1080 	movdqu	(%esi),%xmm7
   1081 	pxor	%xmm0,%xmm3
   1082 	movdqu	16(%esi),%xmm6
   1083 	pxor	%xmm1,%xmm3
   1084 .byte	102,15,56,0,253
   1085 	pxor	%xmm3,%xmm4
   1086 	movdqa	%xmm4,%xmm3
   1087 	psrldq	$8,%xmm4
   1088 	pslldq	$8,%xmm3
   1089 	pxor	%xmm4,%xmm1
   1090 	pxor	%xmm3,%xmm0
   1091 .byte	102,15,56,0,245
   1092 	pxor	%xmm7,%xmm1
   1093 	movdqa	%xmm6,%xmm7
   1094 	movdqa	%xmm0,%xmm4
   1095 	movdqa	%xmm0,%xmm3
   1096 	psllq	$5,%xmm0
   1097 	pxor	%xmm0,%xmm3
   1098 	psllq	$1,%xmm0
   1099 	pxor	%xmm3,%xmm0
   1100 .byte	102,15,58,68,242,0
   1101 	movups	32(%edx),%xmm5
   1102 	psllq	$57,%xmm0
   1103 	movdqa	%xmm0,%xmm3
   1104 	pslldq	$8,%xmm0
   1105 	psrldq	$8,%xmm3
   1106 	pxor	%xmm4,%xmm0
   1107 	pxor	%xmm3,%xmm1
   1108 	pshufd	$78,%xmm7,%xmm3
   1109 	movdqa	%xmm0,%xmm4
   1110 	psrlq	$1,%xmm0
   1111 	pxor	%xmm7,%xmm3
   1112 	pxor	%xmm4,%xmm1
   1113 .byte	102,15,58,68,250,17
   1114 	movups	16(%edx),%xmm2
   1115 	pxor	%xmm0,%xmm4
   1116 	psrlq	$5,%xmm0
   1117 	pxor	%xmm4,%xmm0
   1118 	psrlq	$1,%xmm0
   1119 	pxor	%xmm1,%xmm0
   1120 .byte	102,15,58,68,221,0
   1121 	leal	32(%esi),%esi
   1122 	subl	$32,%ebx
   1123 	ja	L015mod_loop
   1124 L014even_tail:
   1125 	pshufd	$78,%xmm0,%xmm4
   1126 	movdqa	%xmm0,%xmm1
   1127 	pxor	%xmm0,%xmm4
   1128 .byte	102,15,58,68,194,0
   1129 .byte	102,15,58,68,202,17
   1130 .byte	102,15,58,68,229,16
   1131 	movdqa	(%ecx),%xmm5
   1132 	xorps	%xmm6,%xmm0
   1133 	xorps	%xmm7,%xmm1
   1134 	pxor	%xmm0,%xmm3
   1135 	pxor	%xmm1,%xmm3
   1136 	pxor	%xmm3,%xmm4
   1137 	movdqa	%xmm4,%xmm3
   1138 	psrldq	$8,%xmm4
   1139 	pslldq	$8,%xmm3
   1140 	pxor	%xmm4,%xmm1
   1141 	pxor	%xmm3,%xmm0
   1142 	movdqa	%xmm0,%xmm4
   1143 	movdqa	%xmm0,%xmm3
   1144 	psllq	$5,%xmm0
   1145 	pxor	%xmm0,%xmm3
   1146 	psllq	$1,%xmm0
   1147 	pxor	%xmm3,%xmm0
   1148 	psllq	$57,%xmm0
   1149 	movdqa	%xmm0,%xmm3
   1150 	pslldq	$8,%xmm0
   1151 	psrldq	$8,%xmm3
   1152 	pxor	%xmm4,%xmm0
   1153 	pxor	%xmm3,%xmm1
   1154 	movdqa	%xmm0,%xmm4
   1155 	psrlq	$1,%xmm0
   1156 	pxor	%xmm4,%xmm1
   1157 	pxor	%xmm0,%xmm4
   1158 	psrlq	$5,%xmm0
   1159 	pxor	%xmm4,%xmm0
   1160 	psrlq	$1,%xmm0
   1161 	pxor	%xmm1,%xmm0
   1162 	testl	%ebx,%ebx
   1163 	jnz	L016done
   1164 	movups	(%edx),%xmm2
   1165 L013odd_tail:
   1166 	movdqu	(%esi),%xmm3
   1167 .byte	102,15,56,0,221
   1168 	pxor	%xmm3,%xmm0
   1169 	movdqa	%xmm0,%xmm1
   1170 	pshufd	$78,%xmm0,%xmm3
   1171 	pshufd	$78,%xmm2,%xmm4
   1172 	pxor	%xmm0,%xmm3
   1173 	pxor	%xmm2,%xmm4
   1174 .byte	102,15,58,68,194,0
   1175 .byte	102,15,58,68,202,17
   1176 .byte	102,15,58,68,220,0
   1177 	xorps	%xmm0,%xmm3
   1178 	xorps	%xmm1,%xmm3
   1179 	movdqa	%xmm3,%xmm4
   1180 	psrldq	$8,%xmm3
   1181 	pslldq	$8,%xmm4
   1182 	pxor	%xmm3,%xmm1
   1183 	pxor	%xmm4,%xmm0
   1184 	movdqa	%xmm0,%xmm4
   1185 	movdqa	%xmm0,%xmm3
   1186 	psllq	$5,%xmm0
   1187 	pxor	%xmm0,%xmm3
   1188 	psllq	$1,%xmm0
   1189 	pxor	%xmm3,%xmm0
   1190 	psllq	$57,%xmm0
   1191 	movdqa	%xmm0,%xmm3
   1192 	pslldq	$8,%xmm0
   1193 	psrldq	$8,%xmm3
   1194 	pxor	%xmm4,%xmm0
   1195 	pxor	%xmm3,%xmm1
   1196 	movdqa	%xmm0,%xmm4
   1197 	psrlq	$1,%xmm0
   1198 	pxor	%xmm4,%xmm1
   1199 	pxor	%xmm0,%xmm4
   1200 	psrlq	$5,%xmm0
   1201 	pxor	%xmm4,%xmm0
   1202 	psrlq	$1,%xmm0
   1203 	pxor	%xmm1,%xmm0
   1204 L016done:
   1205 .byte	102,15,56,0,197
   1206 	movdqu	%xmm0,(%eax)
   1207 	popl	%edi
   1208 	popl	%esi
   1209 	popl	%ebx
   1210 	popl	%ebp
   1211 	ret
   1212 .align	6,0x90
   1213 Lbswap:
   1214 .byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
   1215 .byte	1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194
   1216 .align	6,0x90
   1217 Lrem_8bit:
   1218 .value	0,450,900,582,1800,1738,1164,1358
   1219 .value	3600,4050,3476,3158,2328,2266,2716,2910
   1220 .value	7200,7650,8100,7782,6952,6890,6316,6510
   1221 .value	4656,5106,4532,4214,5432,5370,5820,6014
   1222 .value	14400,14722,15300,14854,16200,16010,15564,15630
   1223 .value	13904,14226,13780,13334,12632,12442,13020,13086
   1224 .value	9312,9634,10212,9766,9064,8874,8428,8494
   1225 .value	10864,11186,10740,10294,11640,11450,12028,12094
   1226 .value	28800,28994,29444,29382,30600,30282,29708,30158
   1227 .value	32400,32594,32020,31958,31128,30810,31260,31710
   1228 .value	27808,28002,28452,28390,27560,27242,26668,27118
   1229 .value	25264,25458,24884,24822,26040,25722,26172,26622
   1230 .value	18624,18690,19268,19078,20424,19978,19532,19854
   1231 .value	18128,18194,17748,17558,16856,16410,16988,17310
   1232 .value	21728,21794,22372,22182,21480,21034,20588,20910
   1233 .value	23280,23346,22900,22710,24056,23610,24188,24510
   1234 .value	57600,57538,57988,58182,58888,59338,58764,58446
   1235 .value	61200,61138,60564,60758,59416,59866,60316,59998
   1236 .value	64800,64738,65188,65382,64040,64490,63916,63598
   1237 .value	62256,62194,61620,61814,62520,62970,63420,63102
   1238 .value	55616,55426,56004,56070,56904,57226,56780,56334
   1239 .value	55120,54930,54484,54550,53336,53658,54236,53790
   1240 .value	50528,50338,50916,50982,49768,50090,49644,49198
   1241 .value	52080,51890,51444,51510,52344,52666,53244,52798
   1242 .value	37248,36930,37380,37830,38536,38730,38156,38094
   1243 .value	40848,40530,39956,40406,39064,39258,39708,39646
   1244 .value	36256,35938,36388,36838,35496,35690,35116,35054
   1245 .value	33712,33394,32820,33270,33976,34170,34620,34558
   1246 .value	43456,43010,43588,43910,44744,44810,44364,44174
   1247 .value	42960,42514,42068,42390,41176,41242,41820,41630
   1248 .value	46560,46114,46692,47014,45800,45866,45420,45230
   1249 .value	48112,47666,47220,47542,48376,48442,49020,48830
   1250 .align	6,0x90
   1251 Lrem_4bit:
   1252 .long	0,0,0,471859200,0,943718400,0,610271232
   1253 .long	0,1887436800,0,1822425088,0,1220542464,0,1423966208
   1254 .long	0,3774873600,0,4246732800,0,3644850176,0,3311403008
   1255 .long	0,2441084928,0,2376073216,0,2847932416,0,3051356160
   1256 .byte	71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67
   1257 .byte	82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112
   1258 .byte	112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62
   1259 .byte	0
   1260 #endif
   1261