1 .file "crypto/bn/asm/x86-mont.s" 2 .text 3 .globl bn_mul_mont 4 .type bn_mul_mont,@function 5 .align 16 6 bn_mul_mont: 7 .L_bn_mul_mont_begin: 8 pushl %ebp 9 pushl %ebx 10 pushl %esi 11 pushl %edi 12 xorl %eax,%eax 13 movl 40(%esp),%edi 14 cmpl $4,%edi 15 jl .L000just_leave 16 leal 20(%esp),%esi 17 leal 24(%esp),%edx 18 movl %esp,%ebp 19 addl $2,%edi 20 negl %edi 21 leal -32(%esp,%edi,4),%esp 22 negl %edi 23 movl %esp,%eax 24 subl %edx,%eax 25 andl $2047,%eax 26 subl %eax,%esp 27 xorl %esp,%edx 28 andl $2048,%edx 29 xorl $2048,%edx 30 subl %edx,%esp 31 andl $-64,%esp 32 movl (%esi),%eax 33 movl 4(%esi),%ebx 34 movl 8(%esi),%ecx 35 movl 12(%esi),%edx 36 movl 16(%esi),%esi 37 movl (%esi),%esi 38 movl %eax,4(%esp) 39 movl %ebx,8(%esp) 40 movl %ecx,12(%esp) 41 movl %edx,16(%esp) 42 movl %esi,20(%esp) 43 leal -3(%edi),%ebx 44 movl %ebp,24(%esp) 45 call .L001PIC_me_up 46 .L001PIC_me_up: 47 popl %eax 48 leal _GLOBAL_OFFSET_TABLE_+[.-.L001PIC_me_up](%eax),%eax 49 movl OPENSSL_ia32cap_P@GOT(%eax),%eax 50 btl $26,(%eax) 51 jnc .L002non_sse2 52 movl $-1,%eax 53 movd %eax,%mm7 54 movl 8(%esp),%esi 55 movl 12(%esp),%edi 56 movl 16(%esp),%ebp 57 xorl %edx,%edx 58 xorl %ecx,%ecx 59 movd (%edi),%mm4 60 movd (%esi),%mm5 61 movd (%ebp),%mm3 62 pmuludq %mm4,%mm5 63 movq %mm5,%mm2 64 movq %mm5,%mm0 65 pand %mm7,%mm0 66 pmuludq 20(%esp),%mm5 67 pmuludq %mm5,%mm3 68 paddq %mm0,%mm3 69 movd 4(%ebp),%mm1 70 movd 4(%esi),%mm0 71 psrlq $32,%mm2 72 psrlq $32,%mm3 73 incl %ecx 74 .align 16 75 .L0031st: 76 pmuludq %mm4,%mm0 77 pmuludq %mm5,%mm1 78 paddq %mm0,%mm2 79 paddq %mm1,%mm3 80 movq %mm2,%mm0 81 pand %mm7,%mm0 82 movd 4(%ebp,%ecx,4),%mm1 83 paddq %mm0,%mm3 84 movd 4(%esi,%ecx,4),%mm0 85 psrlq $32,%mm2 86 movd %mm3,28(%esp,%ecx,4) 87 psrlq $32,%mm3 88 leal 1(%ecx),%ecx 89 cmpl %ebx,%ecx 90 jl .L0031st 91 pmuludq %mm4,%mm0 92 pmuludq %mm5,%mm1 93 paddq %mm0,%mm2 94 paddq %mm1,%mm3 95 movq %mm2,%mm0 96 pand %mm7,%mm0 97 paddq %mm0,%mm3 98 movd %mm3,28(%esp,%ecx,4) 99 psrlq $32,%mm2 100 psrlq $32,%mm3 101 paddq %mm2,%mm3 102 movq %mm3,32(%esp,%ebx,4) 103 incl %edx 104 .L004outer: 105 xorl %ecx,%ecx 106 movd (%edi,%edx,4),%mm4 107 movd (%esi),%mm5 108 movd 32(%esp),%mm6 109 movd (%ebp),%mm3 110 pmuludq %mm4,%mm5 111 paddq %mm6,%mm5 112 movq %mm5,%mm0 113 movq %mm5,%mm2 114 pand %mm7,%mm0 115 pmuludq 20(%esp),%mm5 116 pmuludq %mm5,%mm3 117 paddq %mm0,%mm3 118 movd 36(%esp),%mm6 119 movd 4(%ebp),%mm1 120 movd 4(%esi),%mm0 121 psrlq $32,%mm2 122 psrlq $32,%mm3 123 paddq %mm6,%mm2 124 incl %ecx 125 decl %ebx 126 .L005inner: 127 pmuludq %mm4,%mm0 128 pmuludq %mm5,%mm1 129 paddq %mm0,%mm2 130 paddq %mm1,%mm3 131 movq %mm2,%mm0 132 movd 36(%esp,%ecx,4),%mm6 133 pand %mm7,%mm0 134 movd 4(%ebp,%ecx,4),%mm1 135 paddq %mm0,%mm3 136 movd 4(%esi,%ecx,4),%mm0 137 psrlq $32,%mm2 138 movd %mm3,28(%esp,%ecx,4) 139 psrlq $32,%mm3 140 paddq %mm6,%mm2 141 decl %ebx 142 leal 1(%ecx),%ecx 143 jnz .L005inner 144 movl %ecx,%ebx 145 pmuludq %mm4,%mm0 146 pmuludq %mm5,%mm1 147 paddq %mm0,%mm2 148 paddq %mm1,%mm3 149 movq %mm2,%mm0 150 pand %mm7,%mm0 151 paddq %mm0,%mm3 152 movd %mm3,28(%esp,%ecx,4) 153 psrlq $32,%mm2 154 psrlq $32,%mm3 155 movd 36(%esp,%ebx,4),%mm6 156 paddq %mm2,%mm3 157 paddq %mm6,%mm3 158 movq %mm3,32(%esp,%ebx,4) 159 leal 1(%edx),%edx 160 cmpl %ebx,%edx 161 jle .L004outer 162 emms 163 jmp .L006common_tail 164 .align 16 165 .L002non_sse2: 166 movl 8(%esp),%esi 167 leal 1(%ebx),%ebp 168 movl 12(%esp),%edi 169 xorl %ecx,%ecx 170 movl %esi,%edx 171 andl $1,%ebp 172 subl %edi,%edx 173 leal 4(%edi,%ebx,4),%eax 174 orl %edx,%ebp 175 movl (%edi),%edi 176 jz .L007bn_sqr_mont 177 movl %eax,28(%esp) 178 movl (%esi),%eax 179 xorl %edx,%edx 180 .align 16 181 .L008mull: 182 movl %edx,%ebp 183 mull %edi 184 addl %eax,%ebp 185 leal 1(%ecx),%ecx 186 adcl $0,%edx 187 movl (%esi,%ecx,4),%eax 188 cmpl %ebx,%ecx 189 movl %ebp,28(%esp,%ecx,4) 190 jl .L008mull 191 movl %edx,%ebp 192 mull %edi 193 movl 20(%esp),%edi 194 addl %ebp,%eax 195 movl 16(%esp),%esi 196 adcl $0,%edx 197 imull 32(%esp),%edi 198 movl %eax,32(%esp,%ebx,4) 199 xorl %ecx,%ecx 200 movl %edx,36(%esp,%ebx,4) 201 movl %ecx,40(%esp,%ebx,4) 202 movl (%esi),%eax 203 mull %edi 204 addl 32(%esp),%eax 205 movl 4(%esi),%eax 206 adcl $0,%edx 207 incl %ecx 208 jmp .L0092ndmadd 209 .align 16 210 .L0101stmadd: 211 movl %edx,%ebp 212 mull %edi 213 addl 32(%esp,%ecx,4),%ebp 214 leal 1(%ecx),%ecx 215 adcl $0,%edx 216 addl %eax,%ebp 217 movl (%esi,%ecx,4),%eax 218 adcl $0,%edx 219 cmpl %ebx,%ecx 220 movl %ebp,28(%esp,%ecx,4) 221 jl .L0101stmadd 222 movl %edx,%ebp 223 mull %edi 224 addl 32(%esp,%ebx,4),%eax 225 movl 20(%esp),%edi 226 adcl $0,%edx 227 movl 16(%esp),%esi 228 addl %eax,%ebp 229 adcl $0,%edx 230 imull 32(%esp),%edi 231 xorl %ecx,%ecx 232 addl 36(%esp,%ebx,4),%edx 233 movl %ebp,32(%esp,%ebx,4) 234 adcl $0,%ecx 235 movl (%esi),%eax 236 movl %edx,36(%esp,%ebx,4) 237 movl %ecx,40(%esp,%ebx,4) 238 mull %edi 239 addl 32(%esp),%eax 240 movl 4(%esi),%eax 241 adcl $0,%edx 242 movl $1,%ecx 243 .align 16 244 .L0092ndmadd: 245 movl %edx,%ebp 246 mull %edi 247 addl 32(%esp,%ecx,4),%ebp 248 leal 1(%ecx),%ecx 249 adcl $0,%edx 250 addl %eax,%ebp 251 movl (%esi,%ecx,4),%eax 252 adcl $0,%edx 253 cmpl %ebx,%ecx 254 movl %ebp,24(%esp,%ecx,4) 255 jl .L0092ndmadd 256 movl %edx,%ebp 257 mull %edi 258 addl 32(%esp,%ebx,4),%ebp 259 adcl $0,%edx 260 addl %eax,%ebp 261 adcl $0,%edx 262 movl %ebp,28(%esp,%ebx,4) 263 xorl %eax,%eax 264 movl 12(%esp),%ecx 265 addl 36(%esp,%ebx,4),%edx 266 adcl 40(%esp,%ebx,4),%eax 267 leal 4(%ecx),%ecx 268 movl %edx,32(%esp,%ebx,4) 269 cmpl 28(%esp),%ecx 270 movl %eax,36(%esp,%ebx,4) 271 je .L006common_tail 272 movl (%ecx),%edi 273 movl 8(%esp),%esi 274 movl %ecx,12(%esp) 275 xorl %ecx,%ecx 276 xorl %edx,%edx 277 movl (%esi),%eax 278 jmp .L0101stmadd 279 .align 16 280 .L007bn_sqr_mont: 281 movl %ebx,(%esp) 282 movl %ecx,12(%esp) 283 movl %edi,%eax 284 mull %edi 285 movl %eax,32(%esp) 286 movl %edx,%ebx 287 shrl $1,%edx 288 andl $1,%ebx 289 incl %ecx 290 .align 16 291 .L011sqr: 292 movl (%esi,%ecx,4),%eax 293 movl %edx,%ebp 294 mull %edi 295 addl %ebp,%eax 296 leal 1(%ecx),%ecx 297 adcl $0,%edx 298 leal (%ebx,%eax,2),%ebp 299 shrl $31,%eax 300 cmpl (%esp),%ecx 301 movl %eax,%ebx 302 movl %ebp,28(%esp,%ecx,4) 303 jl .L011sqr 304 movl (%esi,%ecx,4),%eax 305 movl %edx,%ebp 306 mull %edi 307 addl %ebp,%eax 308 movl 20(%esp),%edi 309 adcl $0,%edx 310 movl 16(%esp),%esi 311 leal (%ebx,%eax,2),%ebp 312 imull 32(%esp),%edi 313 shrl $31,%eax 314 movl %ebp,32(%esp,%ecx,4) 315 leal (%eax,%edx,2),%ebp 316 movl (%esi),%eax 317 shrl $31,%edx 318 movl %ebp,36(%esp,%ecx,4) 319 movl %edx,40(%esp,%ecx,4) 320 mull %edi 321 addl 32(%esp),%eax 322 movl %ecx,%ebx 323 adcl $0,%edx 324 movl 4(%esi),%eax 325 movl $1,%ecx 326 .align 16 327 .L0123rdmadd: 328 movl %edx,%ebp 329 mull %edi 330 addl 32(%esp,%ecx,4),%ebp 331 adcl $0,%edx 332 addl %eax,%ebp 333 movl 4(%esi,%ecx,4),%eax 334 adcl $0,%edx 335 movl %ebp,28(%esp,%ecx,4) 336 movl %edx,%ebp 337 mull %edi 338 addl 36(%esp,%ecx,4),%ebp 339 leal 2(%ecx),%ecx 340 adcl $0,%edx 341 addl %eax,%ebp 342 movl (%esi,%ecx,4),%eax 343 adcl $0,%edx 344 cmpl %ebx,%ecx 345 movl %ebp,24(%esp,%ecx,4) 346 jl .L0123rdmadd 347 movl %edx,%ebp 348 mull %edi 349 addl 32(%esp,%ebx,4),%ebp 350 adcl $0,%edx 351 addl %eax,%ebp 352 adcl $0,%edx 353 movl %ebp,28(%esp,%ebx,4) 354 movl 12(%esp),%ecx 355 xorl %eax,%eax 356 movl 8(%esp),%esi 357 addl 36(%esp,%ebx,4),%edx 358 adcl 40(%esp,%ebx,4),%eax 359 movl %edx,32(%esp,%ebx,4) 360 cmpl %ebx,%ecx 361 movl %eax,36(%esp,%ebx,4) 362 je .L006common_tail 363 movl 4(%esi,%ecx,4),%edi 364 leal 1(%ecx),%ecx 365 movl %edi,%eax 366 movl %ecx,12(%esp) 367 mull %edi 368 addl 32(%esp,%ecx,4),%eax 369 adcl $0,%edx 370 movl %eax,32(%esp,%ecx,4) 371 xorl %ebp,%ebp 372 cmpl %ebx,%ecx 373 leal 1(%ecx),%ecx 374 je .L013sqrlast 375 movl %edx,%ebx 376 shrl $1,%edx 377 andl $1,%ebx 378 .align 16 379 .L014sqradd: 380 movl (%esi,%ecx,4),%eax 381 movl %edx,%ebp 382 mull %edi 383 addl %ebp,%eax 384 leal (%eax,%eax,1),%ebp 385 adcl $0,%edx 386 shrl $31,%eax 387 addl 32(%esp,%ecx,4),%ebp 388 leal 1(%ecx),%ecx 389 adcl $0,%eax 390 addl %ebx,%ebp 391 adcl $0,%eax 392 cmpl (%esp),%ecx 393 movl %ebp,28(%esp,%ecx,4) 394 movl %eax,%ebx 395 jle .L014sqradd 396 movl %edx,%ebp 397 addl %edx,%edx 398 shrl $31,%ebp 399 addl %ebx,%edx 400 adcl $0,%ebp 401 .L013sqrlast: 402 movl 20(%esp),%edi 403 movl 16(%esp),%esi 404 imull 32(%esp),%edi 405 addl 32(%esp,%ecx,4),%edx 406 movl (%esi),%eax 407 adcl $0,%ebp 408 movl %edx,32(%esp,%ecx,4) 409 movl %ebp,36(%esp,%ecx,4) 410 mull %edi 411 addl 32(%esp),%eax 412 leal -1(%ecx),%ebx 413 adcl $0,%edx 414 movl $1,%ecx 415 movl 4(%esi),%eax 416 jmp .L0123rdmadd 417 .align 16 418 .L006common_tail: 419 movl 16(%esp),%ebp 420 movl 4(%esp),%edi 421 leal 32(%esp),%esi 422 movl (%esi),%eax 423 movl %ebx,%ecx 424 xorl %edx,%edx 425 .align 16 426 .L015sub: 427 sbbl (%ebp,%edx,4),%eax 428 movl %eax,(%edi,%edx,4) 429 decl %ecx 430 movl 4(%esi,%edx,4),%eax 431 leal 1(%edx),%edx 432 jge .L015sub 433 sbbl $0,%eax 434 andl %eax,%esi 435 notl %eax 436 movl %edi,%ebp 437 andl %eax,%ebp 438 orl %ebp,%esi 439 .align 16 440 .L016copy: 441 movl (%esi,%ebx,4),%eax 442 movl %eax,(%edi,%ebx,4) 443 movl %ecx,32(%esp,%ebx,4) 444 decl %ebx 445 jge .L016copy 446 movl 24(%esp),%esp 447 movl $1,%eax 448 .L000just_leave: 449 popl %edi 450 popl %esi 451 popl %ebx 452 popl %ebp 453 ret 454 .size bn_mul_mont,.-.L_bn_mul_mont_begin 455 .byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 456 .byte 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 457 .byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 458 .byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 459 .byte 111,114,103,62,0 460 .comm OPENSSL_ia32cap_P,8,4 461