1 #!/usr/bin/env perl 2 # 3 # ==================================================================== 4 # Written by Andy Polyakov <appro (at] fy.chalmers.se> for the OpenSSL 5 # project. The module is, however, dual licensed under OpenSSL and 6 # CRYPTOGAMS licenses depending on where you obtain it. For further 7 # details see http://www.openssl.org/~appro/cryptogams/. 8 # ==================================================================== 9 10 # This module doesn't present direct interest for OpenSSL, because it 11 # doesn't provide better performance for longer keys. While 512-bit 12 # RSA private key operations are 40% faster, 1024-bit ones are hardly 13 # faster at all, while longer key operations are slower by up to 20%. 14 # It might be of interest to embedded system developers though, as 15 # it's smaller than 1KB, yet offers ~3x improvement over compiler 16 # generated code. 17 # 18 # The module targets N32 and N64 MIPS ABIs and currently is a bit 19 # IRIX-centric, i.e. is likely to require adaptation for other OSes. 20 21 # int bn_mul_mont( 22 $rp="a0"; # BN_ULONG *rp, 23 $ap="a1"; # const BN_ULONG *ap, 24 $bp="a2"; # const BN_ULONG *bp, 25 $np="a3"; # const BN_ULONG *np, 26 $n0="a4"; # const BN_ULONG *n0, 27 $num="a5"; # int num); 28 29 $lo0="a6"; 30 $hi0="a7"; 31 $lo1="v0"; 32 $hi1="v1"; 33 $aj="t0"; 34 $bi="t1"; 35 $nj="t2"; 36 $tp="t3"; 37 $alo="s0"; 38 $ahi="s1"; 39 $nlo="s2"; 40 $nhi="s3"; 41 $tj="s4"; 42 $i="s5"; 43 $j="s6"; 44 $fp="t8"; 45 $m1="t9"; 46 47 $FRAME=8*(2+8); 48 49 $code=<<___; 50 #include <asm.h> 51 #include <regdef.h> 52 53 .text 54 55 .set noat 56 .set reorder 57 58 .align 5 59 .globl bn_mul_mont 60 .ent bn_mul_mont 61 bn_mul_mont: 62 .set noreorder 63 PTR_SUB sp,64 64 move $fp,sp 65 .frame $fp,64,ra 66 slt AT,$num,4 67 li v0,0 68 beqzl AT,.Lproceed 69 nop 70 jr ra 71 PTR_ADD sp,$fp,64 72 .set reorder 73 .align 5 74 .Lproceed: 75 ld $n0,0($n0) 76 ld $bi,0($bp) # bp[0] 77 ld $aj,0($ap) # ap[0] 78 ld $nj,0($np) # np[0] 79 PTR_SUB sp,16 # place for two extra words 80 sll $num,3 81 li AT,-4096 82 PTR_SUB sp,$num 83 and sp,AT 84 85 sd s0,0($fp) 86 sd s1,8($fp) 87 sd s2,16($fp) 88 sd s3,24($fp) 89 sd s4,32($fp) 90 sd s5,40($fp) 91 sd s6,48($fp) 92 sd s7,56($fp) 93 94 dmultu $aj,$bi 95 ld $alo,8($ap) 96 ld $nlo,8($np) 97 mflo $lo0 98 mfhi $hi0 99 dmultu $lo0,$n0 100 mflo $m1 101 102 dmultu $alo,$bi 103 mflo $alo 104 mfhi $ahi 105 106 dmultu $nj,$m1 107 mflo $lo1 108 mfhi $hi1 109 dmultu $nlo,$m1 110 daddu $lo1,$lo0 111 sltu AT,$lo1,$lo0 112 daddu $hi1,AT 113 mflo $nlo 114 mfhi $nhi 115 116 move $tp,sp 117 li $j,16 118 .align 4 119 .L1st: 120 .set noreorder 121 PTR_ADD $aj,$ap,$j 122 ld $aj,($aj) 123 PTR_ADD $nj,$np,$j 124 ld $nj,($nj) 125 126 dmultu $aj,$bi 127 daddu $lo0,$alo,$hi0 128 daddu $lo1,$nlo,$hi1 129 sltu AT,$lo0,$hi0 130 sltu s7,$lo1,$hi1 131 daddu $hi0,$ahi,AT 132 daddu $hi1,$nhi,s7 133 mflo $alo 134 mfhi $ahi 135 136 daddu $lo1,$lo0 137 sltu AT,$lo1,$lo0 138 dmultu $nj,$m1 139 daddu $hi1,AT 140 addu $j,8 141 sd $lo1,($tp) 142 sltu s7,$j,$num 143 mflo $nlo 144 mfhi $nhi 145 146 bnez s7,.L1st 147 PTR_ADD $tp,8 148 .set reorder 149 150 daddu $lo0,$alo,$hi0 151 sltu AT,$lo0,$hi0 152 daddu $hi0,$ahi,AT 153 154 daddu $lo1,$nlo,$hi1 155 sltu s7,$lo1,$hi1 156 daddu $hi1,$nhi,s7 157 daddu $lo1,$lo0 158 sltu AT,$lo1,$lo0 159 daddu $hi1,AT 160 161 sd $lo1,($tp) 162 163 daddu $hi1,$hi0 164 sltu AT,$hi1,$hi0 165 sd $hi1,8($tp) 166 sd AT,16($tp) 167 168 li $i,8 169 .align 4 170 .Louter: 171 PTR_ADD $bi,$bp,$i 172 ld $bi,($bi) 173 ld $aj,($ap) 174 ld $alo,8($ap) 175 ld $tj,(sp) 176 177 dmultu $aj,$bi 178 ld $nj,($np) 179 ld $nlo,8($np) 180 mflo $lo0 181 mfhi $hi0 182 daddu $lo0,$tj 183 dmultu $lo0,$n0 184 sltu AT,$lo0,$tj 185 daddu $hi0,AT 186 mflo $m1 187 188 dmultu $alo,$bi 189 mflo $alo 190 mfhi $ahi 191 192 dmultu $nj,$m1 193 mflo $lo1 194 mfhi $hi1 195 196 dmultu $nlo,$m1 197 daddu $lo1,$lo0 198 sltu AT,$lo1,$lo0 199 daddu $hi1,AT 200 mflo $nlo 201 mfhi $nhi 202 203 move $tp,sp 204 li $j,16 205 ld $tj,8($tp) 206 .align 4 207 .Linner: 208 .set noreorder 209 PTR_ADD $aj,$ap,$j 210 ld $aj,($aj) 211 PTR_ADD $nj,$np,$j 212 ld $nj,($nj) 213 214 dmultu $aj,$bi 215 daddu $lo0,$alo,$hi0 216 daddu $lo1,$nlo,$hi1 217 sltu AT,$lo0,$hi0 218 sltu s7,$lo1,$hi1 219 daddu $hi0,$ahi,AT 220 daddu $hi1,$nhi,s7 221 mflo $alo 222 mfhi $ahi 223 224 daddu $lo0,$tj 225 addu $j,8 226 dmultu $nj,$m1 227 sltu AT,$lo0,$tj 228 daddu $lo1,$lo0 229 daddu $hi0,AT 230 sltu s7,$lo1,$lo0 231 ld $tj,16($tp) 232 daddu $hi1,s7 233 sltu AT,$j,$num 234 mflo $nlo 235 mfhi $nhi 236 sd $lo1,($tp) 237 bnez AT,.Linner 238 PTR_ADD $tp,8 239 .set reorder 240 241 daddu $lo0,$alo,$hi0 242 sltu AT,$lo0,$hi0 243 daddu $hi0,$ahi,AT 244 daddu $lo0,$tj 245 sltu s7,$lo0,$tj 246 daddu $hi0,s7 247 248 ld $tj,16($tp) 249 daddu $lo1,$nlo,$hi1 250 sltu AT,$lo1,$hi1 251 daddu $hi1,$nhi,AT 252 daddu $lo1,$lo0 253 sltu s7,$lo1,$lo0 254 daddu $hi1,s7 255 sd $lo1,($tp) 256 257 daddu $lo1,$hi1,$hi0 258 sltu $hi1,$lo1,$hi0 259 daddu $lo1,$tj 260 sltu AT,$lo1,$tj 261 daddu $hi1,AT 262 sd $lo1,8($tp) 263 sd $hi1,16($tp) 264 265 addu $i,8 266 sltu s7,$i,$num 267 bnez s7,.Louter 268 270 .set noreorder 271 PTR_ADD $tj,sp,$num # &tp[num] 272 move $tp,sp 273 move $ap,sp 274 li $hi0,0 # clear borrow bit 275 276 .align 4 277 .Lsub: ld $lo0,($tp) 278 ld $lo1,($np) 279 PTR_ADD $tp,8 280 PTR_ADD $np,8 281 dsubu $lo1,$lo0,$lo1 # tp[i]-np[i] 282 sgtu AT,$lo1,$lo0 283 dsubu $lo0,$lo1,$hi0 284 sgtu $hi0,$lo0,$lo1 285 sd $lo0,($rp) 286 or $hi0,AT 287 sltu AT,$tp,$tj 288 bnez AT,.Lsub 289 PTR_ADD $rp,8 290 291 dsubu $hi0,$hi1,$hi0 # handle upmost overflow bit 292 move $tp,sp 293 PTR_SUB $rp,$num # restore rp 294 not $hi1,$hi0 295 296 and $ap,$hi0,sp 297 and $bp,$hi1,$rp 298 or $ap,$ap,$bp # ap=borrow?tp:rp 299 300 .align 4 301 .Lcopy: ld $aj,($ap) 302 PTR_ADD $ap,8 303 PTR_ADD $tp,8 304 sd zero,-8($tp) 305 sltu AT,$tp,$tj 306 sd $aj,($rp) 307 bnez AT,.Lcopy 308 PTR_ADD $rp,8 309 310 ld s0,0($fp) 311 ld s1,8($fp) 312 ld s2,16($fp) 313 ld s3,24($fp) 314 ld s4,32($fp) 315 ld s5,40($fp) 316 ld s6,48($fp) 317 ld s7,56($fp) 318 li v0,1 319 jr ra 320 PTR_ADD sp,$fp,64 321 .set reorder 322 END(bn_mul_mont) 323 .rdata 324 .asciiz "Montgomery Multiplication for MIPS III/IV, CRYPTOGAMS by <appro\@openssl.org>" 325 ___ 326 327 print $code; 328 close STDOUT; 329