1 .file "crypto/bn/asm/x86-mont.s" 2 .text 3 .globl _bn_mul_mont 4 .align 4 5 _bn_mul_mont: 6 L_bn_mul_mont_begin: 7 pushl %ebp 8 pushl %ebx 9 pushl %esi 10 pushl %edi 11 xorl %eax,%eax 12 movl 40(%esp),%edi 13 cmpl $4,%edi 14 jl L000just_leave 15 leal 20(%esp),%esi 16 leal 24(%esp),%edx 17 movl %esp,%ebp 18 addl $2,%edi 19 negl %edi 20 leal -32(%esp,%edi,4),%esp 21 negl %edi 22 movl %esp,%eax 23 subl %edx,%eax 24 andl $2047,%eax 25 subl %eax,%esp 26 xorl %esp,%edx 27 andl $2048,%edx 28 xorl $2048,%edx 29 subl %edx,%esp 30 andl $-64,%esp 31 movl (%esi),%eax 32 movl 4(%esi),%ebx 33 movl 8(%esi),%ecx 34 movl 12(%esi),%edx 35 movl 16(%esi),%esi 36 movl (%esi),%esi 37 movl %eax,4(%esp) 38 movl %ebx,8(%esp) 39 movl %ecx,12(%esp) 40 movl %edx,16(%esp) 41 movl %esi,20(%esp) 42 leal -3(%edi),%ebx 43 movl %ebp,24(%esp) 44 movl 8(%esp),%esi 45 leal 1(%ebx),%ebp 46 movl 12(%esp),%edi 47 xorl %ecx,%ecx 48 movl %esi,%edx 49 andl $1,%ebp 50 subl %edi,%edx 51 leal 4(%edi,%ebx,4),%eax 52 orl %edx,%ebp 53 movl (%edi),%edi 54 jz L001bn_sqr_mont 55 movl %eax,28(%esp) 56 movl (%esi),%eax 57 xorl %edx,%edx 58 .align 4,0x90 59 L002mull: 60 movl %edx,%ebp 61 mull %edi 62 addl %eax,%ebp 63 leal 1(%ecx),%ecx 64 adcl $0,%edx 65 movl (%esi,%ecx,4),%eax 66 cmpl %ebx,%ecx 67 movl %ebp,28(%esp,%ecx,4) 68 jl L002mull 69 movl %edx,%ebp 70 mull %edi 71 movl 20(%esp),%edi 72 addl %ebp,%eax 73 movl 16(%esp),%esi 74 adcl $0,%edx 75 imull 32(%esp),%edi 76 movl %eax,32(%esp,%ebx,4) 77 xorl %ecx,%ecx 78 movl %edx,36(%esp,%ebx,4) 79 movl %ecx,40(%esp,%ebx,4) 80 movl (%esi),%eax 81 mull %edi 82 addl 32(%esp),%eax 83 movl 4(%esi),%eax 84 adcl $0,%edx 85 incl %ecx 86 jmp L0032ndmadd 87 .align 4,0x90 88 L0041stmadd: 89 movl %edx,%ebp 90 mull %edi 91 addl 32(%esp,%ecx,4),%ebp 92 leal 1(%ecx),%ecx 93 adcl $0,%edx 94 addl %eax,%ebp 95 movl (%esi,%ecx,4),%eax 96 adcl $0,%edx 97 cmpl %ebx,%ecx 98 movl %ebp,28(%esp,%ecx,4) 99 jl L0041stmadd 100 movl %edx,%ebp 101 mull %edi 102 addl 32(%esp,%ebx,4),%eax 103 movl 20(%esp),%edi 104 adcl $0,%edx 105 movl 16(%esp),%esi 106 addl %eax,%ebp 107 adcl $0,%edx 108 imull 32(%esp),%edi 109 xorl %ecx,%ecx 110 addl 36(%esp,%ebx,4),%edx 111 movl %ebp,32(%esp,%ebx,4) 112 adcl $0,%ecx 113 movl (%esi),%eax 114 movl %edx,36(%esp,%ebx,4) 115 movl %ecx,40(%esp,%ebx,4) 116 mull %edi 117 addl 32(%esp),%eax 118 movl 4(%esi),%eax 119 adcl $0,%edx 120 movl $1,%ecx 121 .align 4,0x90 122 L0032ndmadd: 123 movl %edx,%ebp 124 mull %edi 125 addl 32(%esp,%ecx,4),%ebp 126 leal 1(%ecx),%ecx 127 adcl $0,%edx 128 addl %eax,%ebp 129 movl (%esi,%ecx,4),%eax 130 adcl $0,%edx 131 cmpl %ebx,%ecx 132 movl %ebp,24(%esp,%ecx,4) 133 jl L0032ndmadd 134 movl %edx,%ebp 135 mull %edi 136 addl 32(%esp,%ebx,4),%ebp 137 adcl $0,%edx 138 addl %eax,%ebp 139 adcl $0,%edx 140 movl %ebp,28(%esp,%ebx,4) 141 xorl %eax,%eax 142 movl 12(%esp),%ecx 143 addl 36(%esp,%ebx,4),%edx 144 adcl 40(%esp,%ebx,4),%eax 145 leal 4(%ecx),%ecx 146 movl %edx,32(%esp,%ebx,4) 147 cmpl 28(%esp),%ecx 148 movl %eax,36(%esp,%ebx,4) 149 je L005common_tail 150 movl (%ecx),%edi 151 movl 8(%esp),%esi 152 movl %ecx,12(%esp) 153 xorl %ecx,%ecx 154 xorl %edx,%edx 155 movl (%esi),%eax 156 jmp L0041stmadd 157 .align 4,0x90 158 L001bn_sqr_mont: 159 movl %ebx,(%esp) 160 movl %ecx,12(%esp) 161 movl %edi,%eax 162 mull %edi 163 movl %eax,32(%esp) 164 movl %edx,%ebx 165 shrl $1,%edx 166 andl $1,%ebx 167 incl %ecx 168 .align 4,0x90 169 L006sqr: 170 movl (%esi,%ecx,4),%eax 171 movl %edx,%ebp 172 mull %edi 173 addl %ebp,%eax 174 leal 1(%ecx),%ecx 175 adcl $0,%edx 176 leal (%ebx,%eax,2),%ebp 177 shrl $31,%eax 178 cmpl (%esp),%ecx 179 movl %eax,%ebx 180 movl %ebp,28(%esp,%ecx,4) 181 jl L006sqr 182 movl (%esi,%ecx,4),%eax 183 movl %edx,%ebp 184 mull %edi 185 addl %ebp,%eax 186 movl 20(%esp),%edi 187 adcl $0,%edx 188 movl 16(%esp),%esi 189 leal (%ebx,%eax,2),%ebp 190 imull 32(%esp),%edi 191 shrl $31,%eax 192 movl %ebp,32(%esp,%ecx,4) 193 leal (%eax,%edx,2),%ebp 194 movl (%esi),%eax 195 shrl $31,%edx 196 movl %ebp,36(%esp,%ecx,4) 197 movl %edx,40(%esp,%ecx,4) 198 mull %edi 199 addl 32(%esp),%eax 200 movl %ecx,%ebx 201 adcl $0,%edx 202 movl 4(%esi),%eax 203 movl $1,%ecx 204 .align 4,0x90 205 L0073rdmadd: 206 movl %edx,%ebp 207 mull %edi 208 addl 32(%esp,%ecx,4),%ebp 209 adcl $0,%edx 210 addl %eax,%ebp 211 movl 4(%esi,%ecx,4),%eax 212 adcl $0,%edx 213 movl %ebp,28(%esp,%ecx,4) 214 movl %edx,%ebp 215 mull %edi 216 addl 36(%esp,%ecx,4),%ebp 217 leal 2(%ecx),%ecx 218 adcl $0,%edx 219 addl %eax,%ebp 220 movl (%esi,%ecx,4),%eax 221 adcl $0,%edx 222 cmpl %ebx,%ecx 223 movl %ebp,24(%esp,%ecx,4) 224 jl L0073rdmadd 225 movl %edx,%ebp 226 mull %edi 227 addl 32(%esp,%ebx,4),%ebp 228 adcl $0,%edx 229 addl %eax,%ebp 230 adcl $0,%edx 231 movl %ebp,28(%esp,%ebx,4) 232 movl 12(%esp),%ecx 233 xorl %eax,%eax 234 movl 8(%esp),%esi 235 addl 36(%esp,%ebx,4),%edx 236 adcl 40(%esp,%ebx,4),%eax 237 movl %edx,32(%esp,%ebx,4) 238 cmpl %ebx,%ecx 239 movl %eax,36(%esp,%ebx,4) 240 je L005common_tail 241 movl 4(%esi,%ecx,4),%edi 242 leal 1(%ecx),%ecx 243 movl %edi,%eax 244 movl %ecx,12(%esp) 245 mull %edi 246 addl 32(%esp,%ecx,4),%eax 247 adcl $0,%edx 248 movl %eax,32(%esp,%ecx,4) 249 xorl %ebp,%ebp 250 cmpl %ebx,%ecx 251 leal 1(%ecx),%ecx 252 je L008sqrlast 253 movl %edx,%ebx 254 shrl $1,%edx 255 andl $1,%ebx 256 .align 4,0x90 257 L009sqradd: 258 movl (%esi,%ecx,4),%eax 259 movl %edx,%ebp 260 mull %edi 261 addl %ebp,%eax 262 leal (%eax,%eax,1),%ebp 263 adcl $0,%edx 264 shrl $31,%eax 265 addl 32(%esp,%ecx,4),%ebp 266 leal 1(%ecx),%ecx 267 adcl $0,%eax 268 addl %ebx,%ebp 269 adcl $0,%eax 270 cmpl (%esp),%ecx 271 movl %ebp,28(%esp,%ecx,4) 272 movl %eax,%ebx 273 jle L009sqradd 274 movl %edx,%ebp 275 addl %edx,%edx 276 shrl $31,%ebp 277 addl %ebx,%edx 278 adcl $0,%ebp 279 L008sqrlast: 280 movl 20(%esp),%edi 281 movl 16(%esp),%esi 282 imull 32(%esp),%edi 283 addl 32(%esp,%ecx,4),%edx 284 movl (%esi),%eax 285 adcl $0,%ebp 286 movl %edx,32(%esp,%ecx,4) 287 movl %ebp,36(%esp,%ecx,4) 288 mull %edi 289 addl 32(%esp),%eax 290 leal -1(%ecx),%ebx 291 adcl $0,%edx 292 movl $1,%ecx 293 movl 4(%esi),%eax 294 jmp L0073rdmadd 295 .align 4,0x90 296 L005common_tail: 297 movl 16(%esp),%ebp 298 movl 4(%esp),%edi 299 leal 32(%esp),%esi 300 movl (%esi),%eax 301 movl %ebx,%ecx 302 xorl %edx,%edx 303 .align 4,0x90 304 L010sub: 305 sbbl (%ebp,%edx,4),%eax 306 movl %eax,(%edi,%edx,4) 307 decl %ecx 308 movl 4(%esi,%edx,4),%eax 309 leal 1(%edx),%edx 310 jge L010sub 311 sbbl $0,%eax 312 andl %eax,%esi 313 notl %eax 314 movl %edi,%ebp 315 andl %eax,%ebp 316 orl %ebp,%esi 317 .align 4,0x90 318 L011copy: 319 movl (%esi,%ebx,4),%eax 320 movl %eax,(%edi,%ebx,4) 321 movl %ecx,32(%esp,%ebx,4) 322 decl %ebx 323 jge L011copy 324 movl 24(%esp),%esp 325 movl $1,%eax 326 L000just_leave: 327 popl %edi 328 popl %esi 329 popl %ebx 330 popl %ebp 331 ret 332 .byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 333 .byte 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 334 .byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 335 .byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 336 .byte 111,114,103,62,0 337