Home | History | Annotate | Download | only in asm
      1 .file	"ghash-x86.s"
      2 .text
      3 .globl	gcm_gmult_4bit_x86
      4 .type	gcm_gmult_4bit_x86,@function
      5 .align	16
      6 gcm_gmult_4bit_x86:
      7 .L_gcm_gmult_4bit_x86_begin:
      8 	pushl	%ebp
      9 	pushl	%ebx
     10 	pushl	%esi
     11 	pushl	%edi
     12 	subl	$84,%esp
     13 	movl	104(%esp),%edi
     14 	movl	108(%esp),%esi
     15 	movl	(%edi),%ebp
     16 	movl	4(%edi),%edx
     17 	movl	8(%edi),%ecx
     18 	movl	12(%edi),%ebx
     19 	movl	$0,16(%esp)
     20 	movl	$471859200,20(%esp)
     21 	movl	$943718400,24(%esp)
     22 	movl	$610271232,28(%esp)
     23 	movl	$1887436800,32(%esp)
     24 	movl	$1822425088,36(%esp)
     25 	movl	$1220542464,40(%esp)
     26 	movl	$1423966208,44(%esp)
     27 	movl	$3774873600,48(%esp)
     28 	movl	$4246732800,52(%esp)
     29 	movl	$3644850176,56(%esp)
     30 	movl	$3311403008,60(%esp)
     31 	movl	$2441084928,64(%esp)
     32 	movl	$2376073216,68(%esp)
     33 	movl	$2847932416,72(%esp)
     34 	movl	$3051356160,76(%esp)
     35 	movl	%ebp,(%esp)
     36 	movl	%edx,4(%esp)
     37 	movl	%ecx,8(%esp)
     38 	movl	%ebx,12(%esp)
     39 	shrl	$20,%ebx
     40 	andl	$240,%ebx
     41 	movl	4(%esi,%ebx,1),%ebp
     42 	movl	(%esi,%ebx,1),%edx
     43 	movl	12(%esi,%ebx,1),%ecx
     44 	movl	8(%esi,%ebx,1),%ebx
     45 	xorl	%eax,%eax
     46 	movl	$15,%edi
     47 	jmp	.L000x86_loop
     48 .align	16
     49 .L000x86_loop:
     50 	movb	%bl,%al
     51 	shrdl	$4,%ecx,%ebx
     52 	andb	$15,%al
     53 	shrdl	$4,%edx,%ecx
     54 	shrdl	$4,%ebp,%edx
     55 	shrl	$4,%ebp
     56 	xorl	16(%esp,%eax,4),%ebp
     57 	movb	(%esp,%edi,1),%al
     58 	andb	$240,%al
     59 	xorl	8(%esi,%eax,1),%ebx
     60 	xorl	12(%esi,%eax,1),%ecx
     61 	xorl	(%esi,%eax,1),%edx
     62 	xorl	4(%esi,%eax,1),%ebp
     63 	decl	%edi
     64 	js	.L001x86_break
     65 	movb	%bl,%al
     66 	shrdl	$4,%ecx,%ebx
     67 	andb	$15,%al
     68 	shrdl	$4,%edx,%ecx
     69 	shrdl	$4,%ebp,%edx
     70 	shrl	$4,%ebp
     71 	xorl	16(%esp,%eax,4),%ebp
     72 	movb	(%esp,%edi,1),%al
     73 	shlb	$4,%al
     74 	xorl	8(%esi,%eax,1),%ebx
     75 	xorl	12(%esi,%eax,1),%ecx
     76 	xorl	(%esi,%eax,1),%edx
     77 	xorl	4(%esi,%eax,1),%ebp
     78 	jmp	.L000x86_loop
     79 .align	16
     80 .L001x86_break:
     81 	bswap	%ebx
     82 	bswap	%ecx
     83 	bswap	%edx
     84 	bswap	%ebp
     85 	movl	104(%esp),%edi
     86 	movl	%ebx,12(%edi)
     87 	movl	%ecx,8(%edi)
     88 	movl	%edx,4(%edi)
     89 	movl	%ebp,(%edi)
     90 	addl	$84,%esp
     91 	popl	%edi
     92 	popl	%esi
     93 	popl	%ebx
     94 	popl	%ebp
     95 	ret
     96 .size	gcm_gmult_4bit_x86,.-.L_gcm_gmult_4bit_x86_begin
     97 .globl	gcm_ghash_4bit_x86
     98 .type	gcm_ghash_4bit_x86,@function
     99 .align	16
    100 gcm_ghash_4bit_x86:
    101 .L_gcm_ghash_4bit_x86_begin:
    102 	pushl	%ebp
    103 	pushl	%ebx
    104 	pushl	%esi
    105 	pushl	%edi
    106 	subl	$84,%esp
    107 	movl	104(%esp),%ebx
    108 	movl	108(%esp),%esi
    109 	movl	112(%esp),%edi
    110 	movl	116(%esp),%ecx
    111 	addl	%edi,%ecx
    112 	movl	%ecx,116(%esp)
    113 	movl	(%ebx),%ebp
    114 	movl	4(%ebx),%edx
    115 	movl	8(%ebx),%ecx
    116 	movl	12(%ebx),%ebx
    117 	movl	$0,16(%esp)
    118 	movl	$471859200,20(%esp)
    119 	movl	$943718400,24(%esp)
    120 	movl	$610271232,28(%esp)
    121 	movl	$1887436800,32(%esp)
    122 	movl	$1822425088,36(%esp)
    123 	movl	$1220542464,40(%esp)
    124 	movl	$1423966208,44(%esp)
    125 	movl	$3774873600,48(%esp)
    126 	movl	$4246732800,52(%esp)
    127 	movl	$3644850176,56(%esp)
    128 	movl	$3311403008,60(%esp)
    129 	movl	$2441084928,64(%esp)
    130 	movl	$2376073216,68(%esp)
    131 	movl	$2847932416,72(%esp)
    132 	movl	$3051356160,76(%esp)
    133 .align	16
    134 .L002x86_outer_loop:
    135 	xorl	12(%edi),%ebx
    136 	xorl	8(%edi),%ecx
    137 	xorl	4(%edi),%edx
    138 	xorl	(%edi),%ebp
    139 	movl	%ebx,12(%esp)
    140 	movl	%ecx,8(%esp)
    141 	movl	%edx,4(%esp)
    142 	movl	%ebp,(%esp)
    143 	shrl	$20,%ebx
    144 	andl	$240,%ebx
    145 	movl	4(%esi,%ebx,1),%ebp
    146 	movl	(%esi,%ebx,1),%edx
    147 	movl	12(%esi,%ebx,1),%ecx
    148 	movl	8(%esi,%ebx,1),%ebx
    149 	xorl	%eax,%eax
    150 	movl	$15,%edi
    151 	jmp	.L003x86_loop
    152 .align	16
    153 .L003x86_loop:
    154 	movb	%bl,%al
    155 	shrdl	$4,%ecx,%ebx
    156 	andb	$15,%al
    157 	shrdl	$4,%edx,%ecx
    158 	shrdl	$4,%ebp,%edx
    159 	shrl	$4,%ebp
    160 	xorl	16(%esp,%eax,4),%ebp
    161 	movb	(%esp,%edi,1),%al
    162 	andb	$240,%al
    163 	xorl	8(%esi,%eax,1),%ebx
    164 	xorl	12(%esi,%eax,1),%ecx
    165 	xorl	(%esi,%eax,1),%edx
    166 	xorl	4(%esi,%eax,1),%ebp
    167 	decl	%edi
    168 	js	.L004x86_break
    169 	movb	%bl,%al
    170 	shrdl	$4,%ecx,%ebx
    171 	andb	$15,%al
    172 	shrdl	$4,%edx,%ecx
    173 	shrdl	$4,%ebp,%edx
    174 	shrl	$4,%ebp
    175 	xorl	16(%esp,%eax,4),%ebp
    176 	movb	(%esp,%edi,1),%al
    177 	shlb	$4,%al
    178 	xorl	8(%esi,%eax,1),%ebx
    179 	xorl	12(%esi,%eax,1),%ecx
    180 	xorl	(%esi,%eax,1),%edx
    181 	xorl	4(%esi,%eax,1),%ebp
    182 	jmp	.L003x86_loop
    183 .align	16
    184 .L004x86_break:
    185 	bswap	%ebx
    186 	bswap	%ecx
    187 	bswap	%edx
    188 	bswap	%ebp
    189 	movl	112(%esp),%edi
    190 	leal	16(%edi),%edi
    191 	cmpl	116(%esp),%edi
    192 	movl	%edi,112(%esp)
    193 	jb	.L002x86_outer_loop
    194 	movl	104(%esp),%edi
    195 	movl	%ebx,12(%edi)
    196 	movl	%ecx,8(%edi)
    197 	movl	%edx,4(%edi)
    198 	movl	%ebp,(%edi)
    199 	addl	$84,%esp
    200 	popl	%edi
    201 	popl	%esi
    202 	popl	%ebx
    203 	popl	%ebp
    204 	ret
    205 .size	gcm_ghash_4bit_x86,.-.L_gcm_ghash_4bit_x86_begin
    206 .type	_mmx_gmult_4bit_inner,@function
    207 .align	16
    208 _mmx_gmult_4bit_inner:
    209 	xorl	%ecx,%ecx
    210 	movl	%ebx,%edx
    211 	movb	%dl,%cl
    212 	shlb	$4,%cl
    213 	andl	$240,%edx
    214 	movq	8(%esi,%ecx,1),%mm0
    215 	movq	(%esi,%ecx,1),%mm1
    216 	movd	%mm0,%ebp
    217 	psrlq	$4,%mm0
    218 	movq	%mm1,%mm2
    219 	psrlq	$4,%mm1
    220 	pxor	8(%esi,%edx,1),%mm0
    221 	movb	14(%edi),%cl
    222 	psllq	$60,%mm2
    223 	andl	$15,%ebp
    224 	pxor	(%esi,%edx,1),%mm1
    225 	movl	%ecx,%edx
    226 	movd	%mm0,%ebx
    227 	pxor	%mm2,%mm0
    228 	shlb	$4,%cl
    229 	psrlq	$4,%mm0
    230 	movq	%mm1,%mm2
    231 	psrlq	$4,%mm1
    232 	pxor	8(%esi,%ecx,1),%mm0
    233 	psllq	$60,%mm2
    234 	andl	$240,%edx
    235 	pxor	(%eax,%ebp,8),%mm1
    236 	andl	$15,%ebx
    237 	pxor	(%esi,%ecx,1),%mm1
    238 	movd	%mm0,%ebp
    239 	pxor	%mm2,%mm0
    240 	psrlq	$4,%mm0
    241 	movq	%mm1,%mm2
    242 	psrlq	$4,%mm1
    243 	pxor	8(%esi,%edx,1),%mm0
    244 	movb	13(%edi),%cl
    245 	psllq	$60,%mm2
    246 	pxor	(%eax,%ebx,8),%mm1
    247 	andl	$15,%ebp
    248 	pxor	(%esi,%edx,1),%mm1
    249 	movl	%ecx,%edx
    250 	movd	%mm0,%ebx
    251 	pxor	%mm2,%mm0
    252 	shlb	$4,%cl
    253 	psrlq	$4,%mm0
    254 	movq	%mm1,%mm2
    255 	psrlq	$4,%mm1
    256 	pxor	8(%esi,%ecx,1),%mm0
    257 	psllq	$60,%mm2
    258 	andl	$240,%edx
    259 	pxor	(%eax,%ebp,8),%mm1
    260 	andl	$15,%ebx
    261 	pxor	(%esi,%ecx,1),%mm1
    262 	movd	%mm0,%ebp
    263 	pxor	%mm2,%mm0
    264 	psrlq	$4,%mm0
    265 	movq	%mm1,%mm2
    266 	psrlq	$4,%mm1
    267 	pxor	8(%esi,%edx,1),%mm0
    268 	movb	12(%edi),%cl
    269 	psllq	$60,%mm2
    270 	pxor	(%eax,%ebx,8),%mm1
    271 	andl	$15,%ebp
    272 	pxor	(%esi,%edx,1),%mm1
    273 	movl	%ecx,%edx
    274 	movd	%mm0,%ebx
    275 	pxor	%mm2,%mm0
    276 	shlb	$4,%cl
    277 	psrlq	$4,%mm0
    278 	movq	%mm1,%mm2
    279 	psrlq	$4,%mm1
    280 	pxor	8(%esi,%ecx,1),%mm0
    281 	psllq	$60,%mm2
    282 	andl	$240,%edx
    283 	pxor	(%eax,%ebp,8),%mm1
    284 	andl	$15,%ebx
    285 	pxor	(%esi,%ecx,1),%mm1
    286 	movd	%mm0,%ebp
    287 	pxor	%mm2,%mm0
    288 	psrlq	$4,%mm0
    289 	movq	%mm1,%mm2
    290 	psrlq	$4,%mm1
    291 	pxor	8(%esi,%edx,1),%mm0
    292 	movb	11(%edi),%cl
    293 	psllq	$60,%mm2
    294 	pxor	(%eax,%ebx,8),%mm1
    295 	andl	$15,%ebp
    296 	pxor	(%esi,%edx,1),%mm1
    297 	movl	%ecx,%edx
    298 	movd	%mm0,%ebx
    299 	pxor	%mm2,%mm0
    300 	shlb	$4,%cl
    301 	psrlq	$4,%mm0
    302 	movq	%mm1,%mm2
    303 	psrlq	$4,%mm1
    304 	pxor	8(%esi,%ecx,1),%mm0
    305 	psllq	$60,%mm2
    306 	andl	$240,%edx
    307 	pxor	(%eax,%ebp,8),%mm1
    308 	andl	$15,%ebx
    309 	pxor	(%esi,%ecx,1),%mm1
    310 	movd	%mm0,%ebp
    311 	pxor	%mm2,%mm0
    312 	psrlq	$4,%mm0
    313 	movq	%mm1,%mm2
    314 	psrlq	$4,%mm1
    315 	pxor	8(%esi,%edx,1),%mm0
    316 	movb	10(%edi),%cl
    317 	psllq	$60,%mm2
    318 	pxor	(%eax,%ebx,8),%mm1
    319 	andl	$15,%ebp
    320 	pxor	(%esi,%edx,1),%mm1
    321 	movl	%ecx,%edx
    322 	movd	%mm0,%ebx
    323 	pxor	%mm2,%mm0
    324 	shlb	$4,%cl
    325 	psrlq	$4,%mm0
    326 	movq	%mm1,%mm2
    327 	psrlq	$4,%mm1
    328 	pxor	8(%esi,%ecx,1),%mm0
    329 	psllq	$60,%mm2
    330 	andl	$240,%edx
    331 	pxor	(%eax,%ebp,8),%mm1
    332 	andl	$15,%ebx
    333 	pxor	(%esi,%ecx,1),%mm1
    334 	movd	%mm0,%ebp
    335 	pxor	%mm2,%mm0
    336 	psrlq	$4,%mm0
    337 	movq	%mm1,%mm2
    338 	psrlq	$4,%mm1
    339 	pxor	8(%esi,%edx,1),%mm0
    340 	movb	9(%edi),%cl
    341 	psllq	$60,%mm2
    342 	pxor	(%eax,%ebx,8),%mm1
    343 	andl	$15,%ebp
    344 	pxor	(%esi,%edx,1),%mm1
    345 	movl	%ecx,%edx
    346 	movd	%mm0,%ebx
    347 	pxor	%mm2,%mm0
    348 	shlb	$4,%cl
    349 	psrlq	$4,%mm0
    350 	movq	%mm1,%mm2
    351 	psrlq	$4,%mm1
    352 	pxor	8(%esi,%ecx,1),%mm0
    353 	psllq	$60,%mm2
    354 	andl	$240,%edx
    355 	pxor	(%eax,%ebp,8),%mm1
    356 	andl	$15,%ebx
    357 	pxor	(%esi,%ecx,1),%mm1
    358 	movd	%mm0,%ebp
    359 	pxor	%mm2,%mm0
    360 	psrlq	$4,%mm0
    361 	movq	%mm1,%mm2
    362 	psrlq	$4,%mm1
    363 	pxor	8(%esi,%edx,1),%mm0
    364 	movb	8(%edi),%cl
    365 	psllq	$60,%mm2
    366 	pxor	(%eax,%ebx,8),%mm1
    367 	andl	$15,%ebp
    368 	pxor	(%esi,%edx,1),%mm1
    369 	movl	%ecx,%edx
    370 	movd	%mm0,%ebx
    371 	pxor	%mm2,%mm0
    372 	shlb	$4,%cl
    373 	psrlq	$4,%mm0
    374 	movq	%mm1,%mm2
    375 	psrlq	$4,%mm1
    376 	pxor	8(%esi,%ecx,1),%mm0
    377 	psllq	$60,%mm2
    378 	andl	$240,%edx
    379 	pxor	(%eax,%ebp,8),%mm1
    380 	andl	$15,%ebx
    381 	pxor	(%esi,%ecx,1),%mm1
    382 	movd	%mm0,%ebp
    383 	pxor	%mm2,%mm0
    384 	psrlq	$4,%mm0
    385 	movq	%mm1,%mm2
    386 	psrlq	$4,%mm1
    387 	pxor	8(%esi,%edx,1),%mm0
    388 	movb	7(%edi),%cl
    389 	psllq	$60,%mm2
    390 	pxor	(%eax,%ebx,8),%mm1
    391 	andl	$15,%ebp
    392 	pxor	(%esi,%edx,1),%mm1
    393 	movl	%ecx,%edx
    394 	movd	%mm0,%ebx
    395 	pxor	%mm2,%mm0
    396 	shlb	$4,%cl
    397 	psrlq	$4,%mm0
    398 	movq	%mm1,%mm2
    399 	psrlq	$4,%mm1
    400 	pxor	8(%esi,%ecx,1),%mm0
    401 	psllq	$60,%mm2
    402 	andl	$240,%edx
    403 	pxor	(%eax,%ebp,8),%mm1
    404 	andl	$15,%ebx
    405 	pxor	(%esi,%ecx,1),%mm1
    406 	movd	%mm0,%ebp
    407 	pxor	%mm2,%mm0
    408 	psrlq	$4,%mm0
    409 	movq	%mm1,%mm2
    410 	psrlq	$4,%mm1
    411 	pxor	8(%esi,%edx,1),%mm0
    412 	movb	6(%edi),%cl
    413 	psllq	$60,%mm2
    414 	pxor	(%eax,%ebx,8),%mm1
    415 	andl	$15,%ebp
    416 	pxor	(%esi,%edx,1),%mm1
    417 	movl	%ecx,%edx
    418 	movd	%mm0,%ebx
    419 	pxor	%mm2,%mm0
    420 	shlb	$4,%cl
    421 	psrlq	$4,%mm0
    422 	movq	%mm1,%mm2
    423 	psrlq	$4,%mm1
    424 	pxor	8(%esi,%ecx,1),%mm0
    425 	psllq	$60,%mm2
    426 	andl	$240,%edx
    427 	pxor	(%eax,%ebp,8),%mm1
    428 	andl	$15,%ebx
    429 	pxor	(%esi,%ecx,1),%mm1
    430 	movd	%mm0,%ebp
    431 	pxor	%mm2,%mm0
    432 	psrlq	$4,%mm0
    433 	movq	%mm1,%mm2
    434 	psrlq	$4,%mm1
    435 	pxor	8(%esi,%edx,1),%mm0
    436 	movb	5(%edi),%cl
    437 	psllq	$60,%mm2
    438 	pxor	(%eax,%ebx,8),%mm1
    439 	andl	$15,%ebp
    440 	pxor	(%esi,%edx,1),%mm1
    441 	movl	%ecx,%edx
    442 	movd	%mm0,%ebx
    443 	pxor	%mm2,%mm0
    444 	shlb	$4,%cl
    445 	psrlq	$4,%mm0
    446 	movq	%mm1,%mm2
    447 	psrlq	$4,%mm1
    448 	pxor	8(%esi,%ecx,1),%mm0
    449 	psllq	$60,%mm2
    450 	andl	$240,%edx
    451 	pxor	(%eax,%ebp,8),%mm1
    452 	andl	$15,%ebx
    453 	pxor	(%esi,%ecx,1),%mm1
    454 	movd	%mm0,%ebp
    455 	pxor	%mm2,%mm0
    456 	psrlq	$4,%mm0
    457 	movq	%mm1,%mm2
    458 	psrlq	$4,%mm1
    459 	pxor	8(%esi,%edx,1),%mm0
    460 	movb	4(%edi),%cl
    461 	psllq	$60,%mm2
    462 	pxor	(%eax,%ebx,8),%mm1
    463 	andl	$15,%ebp
    464 	pxor	(%esi,%edx,1),%mm1
    465 	movl	%ecx,%edx
    466 	movd	%mm0,%ebx
    467 	pxor	%mm2,%mm0
    468 	shlb	$4,%cl
    469 	psrlq	$4,%mm0
    470 	movq	%mm1,%mm2
    471 	psrlq	$4,%mm1
    472 	pxor	8(%esi,%ecx,1),%mm0
    473 	psllq	$60,%mm2
    474 	andl	$240,%edx
    475 	pxor	(%eax,%ebp,8),%mm1
    476 	andl	$15,%ebx
    477 	pxor	(%esi,%ecx,1),%mm1
    478 	movd	%mm0,%ebp
    479 	pxor	%mm2,%mm0
    480 	psrlq	$4,%mm0
    481 	movq	%mm1,%mm2
    482 	psrlq	$4,%mm1
    483 	pxor	8(%esi,%edx,1),%mm0
    484 	movb	3(%edi),%cl
    485 	psllq	$60,%mm2
    486 	pxor	(%eax,%ebx,8),%mm1
    487 	andl	$15,%ebp
    488 	pxor	(%esi,%edx,1),%mm1
    489 	movl	%ecx,%edx
    490 	movd	%mm0,%ebx
    491 	pxor	%mm2,%mm0
    492 	shlb	$4,%cl
    493 	psrlq	$4,%mm0
    494 	movq	%mm1,%mm2
    495 	psrlq	$4,%mm1
    496 	pxor	8(%esi,%ecx,1),%mm0
    497 	psllq	$60,%mm2
    498 	andl	$240,%edx
    499 	pxor	(%eax,%ebp,8),%mm1
    500 	andl	$15,%ebx
    501 	pxor	(%esi,%ecx,1),%mm1
    502 	movd	%mm0,%ebp
    503 	pxor	%mm2,%mm0
    504 	psrlq	$4,%mm0
    505 	movq	%mm1,%mm2
    506 	psrlq	$4,%mm1
    507 	pxor	8(%esi,%edx,1),%mm0
    508 	movb	2(%edi),%cl
    509 	psllq	$60,%mm2
    510 	pxor	(%eax,%ebx,8),%mm1
    511 	andl	$15,%ebp
    512 	pxor	(%esi,%edx,1),%mm1
    513 	movl	%ecx,%edx
    514 	movd	%mm0,%ebx
    515 	pxor	%mm2,%mm0
    516 	shlb	$4,%cl
    517 	psrlq	$4,%mm0
    518 	movq	%mm1,%mm2
    519 	psrlq	$4,%mm1
    520 	pxor	8(%esi,%ecx,1),%mm0
    521 	psllq	$60,%mm2
    522 	andl	$240,%edx
    523 	pxor	(%eax,%ebp,8),%mm1
    524 	andl	$15,%ebx
    525 	pxor	(%esi,%ecx,1),%mm1
    526 	movd	%mm0,%ebp
    527 	pxor	%mm2,%mm0
    528 	psrlq	$4,%mm0
    529 	movq	%mm1,%mm2
    530 	psrlq	$4,%mm1
    531 	pxor	8(%esi,%edx,1),%mm0
    532 	movb	1(%edi),%cl
    533 	psllq	$60,%mm2
    534 	pxor	(%eax,%ebx,8),%mm1
    535 	andl	$15,%ebp
    536 	pxor	(%esi,%edx,1),%mm1
    537 	movl	%ecx,%edx
    538 	movd	%mm0,%ebx
    539 	pxor	%mm2,%mm0
    540 	shlb	$4,%cl
    541 	psrlq	$4,%mm0
    542 	movq	%mm1,%mm2
    543 	psrlq	$4,%mm1
    544 	pxor	8(%esi,%ecx,1),%mm0
    545 	psllq	$60,%mm2
    546 	andl	$240,%edx
    547 	pxor	(%eax,%ebp,8),%mm1
    548 	andl	$15,%ebx
    549 	pxor	(%esi,%ecx,1),%mm1
    550 	movd	%mm0,%ebp
    551 	pxor	%mm2,%mm0
    552 	psrlq	$4,%mm0
    553 	movq	%mm1,%mm2
    554 	psrlq	$4,%mm1
    555 	pxor	8(%esi,%edx,1),%mm0
    556 	movb	(%edi),%cl
    557 	psllq	$60,%mm2
    558 	pxor	(%eax,%ebx,8),%mm1
    559 	andl	$15,%ebp
    560 	pxor	(%esi,%edx,1),%mm1
    561 	movl	%ecx,%edx
    562 	movd	%mm0,%ebx
    563 	pxor	%mm2,%mm0
    564 	shlb	$4,%cl
    565 	psrlq	$4,%mm0
    566 	movq	%mm1,%mm2
    567 	psrlq	$4,%mm1
    568 	pxor	8(%esi,%ecx,1),%mm0
    569 	psllq	$60,%mm2
    570 	andl	$240,%edx
    571 	pxor	(%eax,%ebp,8),%mm1
    572 	andl	$15,%ebx
    573 	pxor	(%esi,%ecx,1),%mm1
    574 	movd	%mm0,%ebp
    575 	pxor	%mm2,%mm0
    576 	psrlq	$4,%mm0
    577 	movq	%mm1,%mm2
    578 	psrlq	$4,%mm1
    579 	pxor	8(%esi,%edx,1),%mm0
    580 	psllq	$60,%mm2
    581 	pxor	(%eax,%ebx,8),%mm1
    582 	andl	$15,%ebp
    583 	pxor	(%esi,%edx,1),%mm1
    584 	movd	%mm0,%ebx
    585 	pxor	%mm2,%mm0
    586 	movl	4(%eax,%ebp,8),%edi
    587 	psrlq	$32,%mm0
    588 	movd	%mm1,%edx
    589 	psrlq	$32,%mm1
    590 	movd	%mm0,%ecx
    591 	movd	%mm1,%ebp
    592 	shll	$4,%edi
    593 	bswap	%ebx
    594 	bswap	%edx
    595 	bswap	%ecx
    596 	xorl	%edi,%ebp
    597 	bswap	%ebp
    598 	ret
    599 .size	_mmx_gmult_4bit_inner,.-_mmx_gmult_4bit_inner
    600 .globl	gcm_gmult_4bit_mmx
    601 .type	gcm_gmult_4bit_mmx,@function
    602 .align	16
    603 gcm_gmult_4bit_mmx:
    604 .L_gcm_gmult_4bit_mmx_begin:
    605 	pushl	%ebp
    606 	pushl	%ebx
    607 	pushl	%esi
    608 	pushl	%edi
    609 	movl	20(%esp),%edi
    610 	movl	24(%esp),%esi
    611 	call	.L005pic_point
    612 .L005pic_point:
    613 	popl	%eax
    614 	leal	.Lrem_4bit-.L005pic_point(%eax),%eax
    615 	movzbl	15(%edi),%ebx
    616 	call	_mmx_gmult_4bit_inner
    617 	movl	20(%esp),%edi
    618 	emms
    619 	movl	%ebx,12(%edi)
    620 	movl	%edx,4(%edi)
    621 	movl	%ecx,8(%edi)
    622 	movl	%ebp,(%edi)
    623 	popl	%edi
    624 	popl	%esi
    625 	popl	%ebx
    626 	popl	%ebp
    627 	ret
    628 .size	gcm_gmult_4bit_mmx,.-.L_gcm_gmult_4bit_mmx_begin
    629 .globl	gcm_ghash_4bit_mmx
    630 .type	gcm_ghash_4bit_mmx,@function
    631 .align	16
    632 gcm_ghash_4bit_mmx:
    633 .L_gcm_ghash_4bit_mmx_begin:
    634 	pushl	%ebp
    635 	pushl	%ebx
    636 	pushl	%esi
    637 	pushl	%edi
    638 	movl	20(%esp),%ebp
    639 	movl	24(%esp),%esi
    640 	movl	28(%esp),%edi
    641 	movl	32(%esp),%ecx
    642 	call	.L006pic_point
    643 .L006pic_point:
    644 	popl	%eax
    645 	leal	.Lrem_4bit-.L006pic_point(%eax),%eax
    646 	addl	%edi,%ecx
    647 	movl	%ecx,32(%esp)
    648 	subl	$20,%esp
    649 	movl	12(%ebp),%ebx
    650 	movl	4(%ebp),%edx
    651 	movl	8(%ebp),%ecx
    652 	movl	(%ebp),%ebp
    653 	jmp	.L007mmx_outer_loop
    654 .align	16
    655 .L007mmx_outer_loop:
    656 	xorl	12(%edi),%ebx
    657 	xorl	4(%edi),%edx
    658 	xorl	8(%edi),%ecx
    659 	xorl	(%edi),%ebp
    660 	movl	%edi,48(%esp)
    661 	movl	%ebx,12(%esp)
    662 	movl	%edx,4(%esp)
    663 	movl	%ecx,8(%esp)
    664 	movl	%ebp,(%esp)
    665 	movl	%esp,%edi
    666 	shrl	$24,%ebx
    667 	call	_mmx_gmult_4bit_inner
    668 	movl	48(%esp),%edi
    669 	leal	16(%edi),%edi
    670 	cmpl	52(%esp),%edi
    671 	jb	.L007mmx_outer_loop
    672 	movl	40(%esp),%edi
    673 	emms
    674 	movl	%ebx,12(%edi)
    675 	movl	%edx,4(%edi)
    676 	movl	%ecx,8(%edi)
    677 	movl	%ebp,(%edi)
    678 	addl	$20,%esp
    679 	popl	%edi
    680 	popl	%esi
    681 	popl	%ebx
    682 	popl	%ebp
    683 	ret
    684 .size	gcm_ghash_4bit_mmx,.-.L_gcm_ghash_4bit_mmx_begin
    685 .align	64
    686 .Lrem_4bit:
    687 .long	0,0,0,29491200,0,58982400,0,38141952
    688 .long	0,117964800,0,113901568,0,76283904,0,88997888
    689 .long	0,235929600,0,265420800,0,227803136,0,206962688
    690 .long	0,152567808,0,148504576,0,177995776,0,190709760
    691 .align	64
    692 .L008rem_8bit:
    693 .value	0,450,900,582,1800,1738,1164,1358
    694 .value	3600,4050,3476,3158,2328,2266,2716,2910
    695 .value	7200,7650,8100,7782,6952,6890,6316,6510
    696 .value	4656,5106,4532,4214,5432,5370,5820,6014
    697 .value	14400,14722,15300,14854,16200,16010,15564,15630
    698 .value	13904,14226,13780,13334,12632,12442,13020,13086
    699 .value	9312,9634,10212,9766,9064,8874,8428,8494
    700 .value	10864,11186,10740,10294,11640,11450,12028,12094
    701 .value	28800,28994,29444,29382,30600,30282,29708,30158
    702 .value	32400,32594,32020,31958,31128,30810,31260,31710
    703 .value	27808,28002,28452,28390,27560,27242,26668,27118
    704 .value	25264,25458,24884,24822,26040,25722,26172,26622
    705 .value	18624,18690,19268,19078,20424,19978,19532,19854
    706 .value	18128,18194,17748,17558,16856,16410,16988,17310
    707 .value	21728,21794,22372,22182,21480,21034,20588,20910
    708 .value	23280,23346,22900,22710,24056,23610,24188,24510
    709 .value	57600,57538,57988,58182,58888,59338,58764,58446
    710 .value	61200,61138,60564,60758,59416,59866,60316,59998
    711 .value	64800,64738,65188,65382,64040,64490,63916,63598
    712 .value	62256,62194,61620,61814,62520,62970,63420,63102
    713 .value	55616,55426,56004,56070,56904,57226,56780,56334
    714 .value	55120,54930,54484,54550,53336,53658,54236,53790
    715 .value	50528,50338,50916,50982,49768,50090,49644,49198
    716 .value	52080,51890,51444,51510,52344,52666,53244,52798
    717 .value	37248,36930,37380,37830,38536,38730,38156,38094
    718 .value	40848,40530,39956,40406,39064,39258,39708,39646
    719 .value	36256,35938,36388,36838,35496,35690,35116,35054
    720 .value	33712,33394,32820,33270,33976,34170,34620,34558
    721 .value	43456,43010,43588,43910,44744,44810,44364,44174
    722 .value	42960,42514,42068,42390,41176,41242,41820,41630
    723 .value	46560,46114,46692,47014,45800,45866,45420,45230
    724 .value	48112,47666,47220,47542,48376,48442,49020,48830
    725 .byte	71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67
    726 .byte	82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112
    727 .byte	112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62
    728 .byte	0
    729