1 #if defined(__i386__) 2 .file "src/crypto/bn/asm/x86-mont.S" 3 .text 4 .globl _bn_mul_mont 5 .private_extern _bn_mul_mont 6 .align 4 7 _bn_mul_mont: 8 L_bn_mul_mont_begin: 9 pushl %ebp 10 pushl %ebx 11 pushl %esi 12 pushl %edi 13 xorl %eax,%eax 14 movl 40(%esp),%edi 15 cmpl $4,%edi 16 jl L000just_leave 17 leal 20(%esp),%esi 18 leal 24(%esp),%edx 19 movl %esp,%ebp 20 addl $2,%edi 21 negl %edi 22 leal -32(%esp,%edi,4),%esp 23 negl %edi 24 movl %esp,%eax 25 subl %edx,%eax 26 andl $2047,%eax 27 subl %eax,%esp 28 xorl %esp,%edx 29 andl $2048,%edx 30 xorl $2048,%edx 31 subl %edx,%esp 32 andl $-64,%esp 33 movl (%esi),%eax 34 movl 4(%esi),%ebx 35 movl 8(%esi),%ecx 36 movl 12(%esi),%edx 37 movl 16(%esi),%esi 38 movl (%esi),%esi 39 movl %eax,4(%esp) 40 movl %ebx,8(%esp) 41 movl %ecx,12(%esp) 42 movl %edx,16(%esp) 43 movl %esi,20(%esp) 44 leal -3(%edi),%ebx 45 movl %ebp,24(%esp) 46 movl 8(%esp),%esi 47 leal 1(%ebx),%ebp 48 movl 12(%esp),%edi 49 xorl %ecx,%ecx 50 movl %esi,%edx 51 andl $1,%ebp 52 subl %edi,%edx 53 leal 4(%edi,%ebx,4),%eax 54 orl %edx,%ebp 55 movl (%edi),%edi 56 jz L001bn_sqr_mont 57 movl %eax,28(%esp) 58 movl (%esi),%eax 59 xorl %edx,%edx 60 .align 4,0x90 61 L002mull: 62 movl %edx,%ebp 63 mull %edi 64 addl %eax,%ebp 65 leal 1(%ecx),%ecx 66 adcl $0,%edx 67 movl (%esi,%ecx,4),%eax 68 cmpl %ebx,%ecx 69 movl %ebp,28(%esp,%ecx,4) 70 jl L002mull 71 movl %edx,%ebp 72 mull %edi 73 movl 20(%esp),%edi 74 addl %ebp,%eax 75 movl 16(%esp),%esi 76 adcl $0,%edx 77 imull 32(%esp),%edi 78 movl %eax,32(%esp,%ebx,4) 79 xorl %ecx,%ecx 80 movl %edx,36(%esp,%ebx,4) 81 movl %ecx,40(%esp,%ebx,4) 82 movl (%esi),%eax 83 mull %edi 84 addl 32(%esp),%eax 85 movl 4(%esi),%eax 86 adcl $0,%edx 87 incl %ecx 88 jmp L0032ndmadd 89 .align 4,0x90 90 L0041stmadd: 91 movl %edx,%ebp 92 mull %edi 93 addl 32(%esp,%ecx,4),%ebp 94 leal 1(%ecx),%ecx 95 adcl $0,%edx 96 addl %eax,%ebp 97 movl (%esi,%ecx,4),%eax 98 adcl $0,%edx 99 cmpl %ebx,%ecx 100 movl %ebp,28(%esp,%ecx,4) 101 jl L0041stmadd 102 movl %edx,%ebp 103 mull %edi 104 addl 32(%esp,%ebx,4),%eax 105 movl 20(%esp),%edi 106 adcl $0,%edx 107 movl 16(%esp),%esi 108 addl %eax,%ebp 109 adcl $0,%edx 110 imull 32(%esp),%edi 111 xorl %ecx,%ecx 112 addl 36(%esp,%ebx,4),%edx 113 movl %ebp,32(%esp,%ebx,4) 114 adcl $0,%ecx 115 movl (%esi),%eax 116 movl %edx,36(%esp,%ebx,4) 117 movl %ecx,40(%esp,%ebx,4) 118 mull %edi 119 addl 32(%esp),%eax 120 movl 4(%esi),%eax 121 adcl $0,%edx 122 movl $1,%ecx 123 .align 4,0x90 124 L0032ndmadd: 125 movl %edx,%ebp 126 mull %edi 127 addl 32(%esp,%ecx,4),%ebp 128 leal 1(%ecx),%ecx 129 adcl $0,%edx 130 addl %eax,%ebp 131 movl (%esi,%ecx,4),%eax 132 adcl $0,%edx 133 cmpl %ebx,%ecx 134 movl %ebp,24(%esp,%ecx,4) 135 jl L0032ndmadd 136 movl %edx,%ebp 137 mull %edi 138 addl 32(%esp,%ebx,4),%ebp 139 adcl $0,%edx 140 addl %eax,%ebp 141 adcl $0,%edx 142 movl %ebp,28(%esp,%ebx,4) 143 xorl %eax,%eax 144 movl 12(%esp),%ecx 145 addl 36(%esp,%ebx,4),%edx 146 adcl 40(%esp,%ebx,4),%eax 147 leal 4(%ecx),%ecx 148 movl %edx,32(%esp,%ebx,4) 149 cmpl 28(%esp),%ecx 150 movl %eax,36(%esp,%ebx,4) 151 je L005common_tail 152 movl (%ecx),%edi 153 movl 8(%esp),%esi 154 movl %ecx,12(%esp) 155 xorl %ecx,%ecx 156 xorl %edx,%edx 157 movl (%esi),%eax 158 jmp L0041stmadd 159 .align 4,0x90 160 L001bn_sqr_mont: 161 movl %ebx,(%esp) 162 movl %ecx,12(%esp) 163 movl %edi,%eax 164 mull %edi 165 movl %eax,32(%esp) 166 movl %edx,%ebx 167 shrl $1,%edx 168 andl $1,%ebx 169 incl %ecx 170 .align 4,0x90 171 L006sqr: 172 movl (%esi,%ecx,4),%eax 173 movl %edx,%ebp 174 mull %edi 175 addl %ebp,%eax 176 leal 1(%ecx),%ecx 177 adcl $0,%edx 178 leal (%ebx,%eax,2),%ebp 179 shrl $31,%eax 180 cmpl (%esp),%ecx 181 movl %eax,%ebx 182 movl %ebp,28(%esp,%ecx,4) 183 jl L006sqr 184 movl (%esi,%ecx,4),%eax 185 movl %edx,%ebp 186 mull %edi 187 addl %ebp,%eax 188 movl 20(%esp),%edi 189 adcl $0,%edx 190 movl 16(%esp),%esi 191 leal (%ebx,%eax,2),%ebp 192 imull 32(%esp),%edi 193 shrl $31,%eax 194 movl %ebp,32(%esp,%ecx,4) 195 leal (%eax,%edx,2),%ebp 196 movl (%esi),%eax 197 shrl $31,%edx 198 movl %ebp,36(%esp,%ecx,4) 199 movl %edx,40(%esp,%ecx,4) 200 mull %edi 201 addl 32(%esp),%eax 202 movl %ecx,%ebx 203 adcl $0,%edx 204 movl 4(%esi),%eax 205 movl $1,%ecx 206 .align 4,0x90 207 L0073rdmadd: 208 movl %edx,%ebp 209 mull %edi 210 addl 32(%esp,%ecx,4),%ebp 211 adcl $0,%edx 212 addl %eax,%ebp 213 movl 4(%esi,%ecx,4),%eax 214 adcl $0,%edx 215 movl %ebp,28(%esp,%ecx,4) 216 movl %edx,%ebp 217 mull %edi 218 addl 36(%esp,%ecx,4),%ebp 219 leal 2(%ecx),%ecx 220 adcl $0,%edx 221 addl %eax,%ebp 222 movl (%esi,%ecx,4),%eax 223 adcl $0,%edx 224 cmpl %ebx,%ecx 225 movl %ebp,24(%esp,%ecx,4) 226 jl L0073rdmadd 227 movl %edx,%ebp 228 mull %edi 229 addl 32(%esp,%ebx,4),%ebp 230 adcl $0,%edx 231 addl %eax,%ebp 232 adcl $0,%edx 233 movl %ebp,28(%esp,%ebx,4) 234 movl 12(%esp),%ecx 235 xorl %eax,%eax 236 movl 8(%esp),%esi 237 addl 36(%esp,%ebx,4),%edx 238 adcl 40(%esp,%ebx,4),%eax 239 movl %edx,32(%esp,%ebx,4) 240 cmpl %ebx,%ecx 241 movl %eax,36(%esp,%ebx,4) 242 je L005common_tail 243 movl 4(%esi,%ecx,4),%edi 244 leal 1(%ecx),%ecx 245 movl %edi,%eax 246 movl %ecx,12(%esp) 247 mull %edi 248 addl 32(%esp,%ecx,4),%eax 249 adcl $0,%edx 250 movl %eax,32(%esp,%ecx,4) 251 xorl %ebp,%ebp 252 cmpl %ebx,%ecx 253 leal 1(%ecx),%ecx 254 je L008sqrlast 255 movl %edx,%ebx 256 shrl $1,%edx 257 andl $1,%ebx 258 .align 4,0x90 259 L009sqradd: 260 movl (%esi,%ecx,4),%eax 261 movl %edx,%ebp 262 mull %edi 263 addl %ebp,%eax 264 leal (%eax,%eax,1),%ebp 265 adcl $0,%edx 266 shrl $31,%eax 267 addl 32(%esp,%ecx,4),%ebp 268 leal 1(%ecx),%ecx 269 adcl $0,%eax 270 addl %ebx,%ebp 271 adcl $0,%eax 272 cmpl (%esp),%ecx 273 movl %ebp,28(%esp,%ecx,4) 274 movl %eax,%ebx 275 jle L009sqradd 276 movl %edx,%ebp 277 addl %edx,%edx 278 shrl $31,%ebp 279 addl %ebx,%edx 280 adcl $0,%ebp 281 L008sqrlast: 282 movl 20(%esp),%edi 283 movl 16(%esp),%esi 284 imull 32(%esp),%edi 285 addl 32(%esp,%ecx,4),%edx 286 movl (%esi),%eax 287 adcl $0,%ebp 288 movl %edx,32(%esp,%ecx,4) 289 movl %ebp,36(%esp,%ecx,4) 290 mull %edi 291 addl 32(%esp),%eax 292 leal -1(%ecx),%ebx 293 adcl $0,%edx 294 movl $1,%ecx 295 movl 4(%esi),%eax 296 jmp L0073rdmadd 297 .align 4,0x90 298 L005common_tail: 299 movl 16(%esp),%ebp 300 movl 4(%esp),%edi 301 leal 32(%esp),%esi 302 movl (%esi),%eax 303 movl %ebx,%ecx 304 xorl %edx,%edx 305 .align 4,0x90 306 L010sub: 307 sbbl (%ebp,%edx,4),%eax 308 movl %eax,(%edi,%edx,4) 309 decl %ecx 310 movl 4(%esi,%edx,4),%eax 311 leal 1(%edx),%edx 312 jge L010sub 313 sbbl $0,%eax 314 .align 4,0x90 315 L011copy: 316 movl (%esi,%ebx,4),%edx 317 movl (%edi,%ebx,4),%ebp 318 xorl %ebp,%edx 319 andl %eax,%edx 320 xorl %ebp,%edx 321 movl %ecx,(%esi,%ebx,4) 322 movl %edx,(%edi,%ebx,4) 323 decl %ebx 324 jge L011copy 325 movl 24(%esp),%esp 326 movl $1,%eax 327 L000just_leave: 328 popl %edi 329 popl %esi 330 popl %ebx 331 popl %ebp 332 ret 333 .byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 334 .byte 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 335 .byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 336 .byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 337 .byte 111,114,103,62,0 338 #endif 339