Home | History | Annotate | Download | only in asm
      1 #!/usr/bin/env perl
      2 #
      3 # ====================================================================
      4 # Written by Andy Polyakov <appro (at] fy.chalmers.se> for the OpenSSL
      5 # project. The module is, however, dual licensed under OpenSSL and
      6 # CRYPTOGAMS licenses depending on where you obtain it. For further
      7 # details see http://www.openssl.org/~appro/cryptogams/.
      8 # ====================================================================
      9 
     10 # This module doesn't present direct interest for OpenSSL, because it
     11 # doesn't provide better performance for longer keys. While 512-bit
     12 # RSA private key operations are 40% faster, 1024-bit ones are hardly
     13 # faster at all, while longer key operations are slower by up to 20%.
     14 # It might be of interest to embedded system developers though, as
     15 # it's smaller than 1KB, yet offers ~3x improvement over compiler
     16 # generated code.
     17 #
     18 # The module targets N32 and N64 MIPS ABIs and currently is a bit
     19 # IRIX-centric, i.e. is likely to require adaptation for other OSes.
     20 
     21 # int bn_mul_mont(
     22 $rp="a0";	# BN_ULONG *rp,
     23 $ap="a1";	# const BN_ULONG *ap,
     24 $bp="a2";	# const BN_ULONG *bp,
     25 $np="a3";	# const BN_ULONG *np,
     26 $n0="a4";	# const BN_ULONG *n0,
     27 $num="a5";	# int num);
     28 
     29 $lo0="a6";
     30 $hi0="a7";
     31 $lo1="v0";
     32 $hi1="v1";
     33 $aj="t0";
     34 $bi="t1";
     35 $nj="t2";
     36 $tp="t3";
     37 $alo="s0";
     38 $ahi="s1";
     39 $nlo="s2";
     40 $nhi="s3";
     41 $tj="s4";
     42 $i="s5";
     43 $j="s6";
     44 $fp="t8";
     45 $m1="t9";
     46 
     47 $FRAME=8*(2+8);
     48 
     49 $code=<<___;
     50 #include <asm.h>
     51 #include <regdef.h>
     52 
     53 .text
     54 
     55 .set	noat
     56 .set	reorder
     57 
     58 .align	5
     59 .globl	bn_mul_mont
     60 .ent	bn_mul_mont
     61 bn_mul_mont:
     62 	.set	noreorder
     63 	PTR_SUB	sp,64
     64 	move	$fp,sp
     65 	.frame	$fp,64,ra
     66 	slt	AT,$num,4
     67 	li	v0,0
     68 	beqzl	AT,.Lproceed
     69 	nop
     70 	jr	ra
     71 	PTR_ADD	sp,$fp,64
     72 	.set	reorder
     73 .align	5
     74 .Lproceed:
     75 	ld	$n0,0($n0)
     76 	ld	$bi,0($bp)	# bp[0]
     77 	ld	$aj,0($ap)	# ap[0]
     78 	ld	$nj,0($np)	# np[0]
     79 	PTR_SUB	sp,16		# place for two extra words
     80 	sll	$num,3
     81 	li	AT,-4096
     82 	PTR_SUB	sp,$num
     83 	and	sp,AT
     84 
     85 	sd	s0,0($fp)
     86 	sd	s1,8($fp)
     87 	sd	s2,16($fp)
     88 	sd	s3,24($fp)
     89 	sd	s4,32($fp)
     90 	sd	s5,40($fp)
     91 	sd	s6,48($fp)
     92 	sd	s7,56($fp)
     93 
     94 	dmultu	$aj,$bi
     95 	ld	$alo,8($ap)
     96 	ld	$nlo,8($np)
     97 	mflo	$lo0
     98 	mfhi	$hi0
     99 	dmultu	$lo0,$n0
    100 	mflo	$m1
    101 
    102 	dmultu	$alo,$bi
    103 	mflo	$alo
    104 	mfhi	$ahi
    105 
    106 	dmultu	$nj,$m1
    107 	mflo	$lo1
    108 	mfhi	$hi1
    109 	dmultu	$nlo,$m1
    110 	daddu	$lo1,$lo0
    111 	sltu	AT,$lo1,$lo0
    112 	daddu	$hi1,AT
    113 	mflo	$nlo
    114 	mfhi	$nhi
    115 
    116 	move	$tp,sp
    117 	li	$j,16
    118 .align	4
    119 .L1st:
    120 	.set	noreorder
    121 	PTR_ADD	$aj,$ap,$j
    122 	ld	$aj,($aj)
    123 	PTR_ADD	$nj,$np,$j
    124 	ld	$nj,($nj)
    125 
    126 	dmultu	$aj,$bi
    127 	daddu	$lo0,$alo,$hi0
    128 	daddu	$lo1,$nlo,$hi1
    129 	sltu	AT,$lo0,$hi0
    130 	sltu	s7,$lo1,$hi1
    131 	daddu	$hi0,$ahi,AT
    132 	daddu	$hi1,$nhi,s7
    133 	mflo	$alo
    134 	mfhi	$ahi
    135 
    136 	daddu	$lo1,$lo0
    137 	sltu	AT,$lo1,$lo0
    138 	dmultu	$nj,$m1
    139 	daddu	$hi1,AT
    140 	addu	$j,8
    141 	sd	$lo1,($tp)
    142 	sltu	s7,$j,$num
    143 	mflo	$nlo
    144 	mfhi	$nhi
    145 
    146 	bnez	s7,.L1st
    147 	PTR_ADD	$tp,8
    148 	.set	reorder
    149 
    150 	daddu	$lo0,$alo,$hi0
    151 	sltu	AT,$lo0,$hi0
    152 	daddu	$hi0,$ahi,AT
    153 
    154 	daddu	$lo1,$nlo,$hi1
    155 	sltu	s7,$lo1,$hi1
    156 	daddu	$hi1,$nhi,s7
    157 	daddu	$lo1,$lo0
    158 	sltu	AT,$lo1,$lo0
    159 	daddu	$hi1,AT
    160 
    161 	sd	$lo1,($tp)
    162 
    163 	daddu	$hi1,$hi0
    164 	sltu	AT,$hi1,$hi0
    165 	sd	$hi1,8($tp)
    166 	sd	AT,16($tp)
    167 
    168 	li	$i,8
    169 .align	4
    170 .Louter:
    171 	PTR_ADD	$bi,$bp,$i
    172 	ld	$bi,($bi)
    173 	ld	$aj,($ap)
    174 	ld	$alo,8($ap)
    175 	ld	$tj,(sp)
    176 
    177 	dmultu	$aj,$bi
    178 	ld	$nj,($np)
    179 	ld	$nlo,8($np)
    180 	mflo	$lo0
    181 	mfhi	$hi0
    182 	daddu	$lo0,$tj
    183 	dmultu	$lo0,$n0
    184 	sltu	AT,$lo0,$tj
    185 	daddu	$hi0,AT
    186 	mflo	$m1
    187 
    188 	dmultu	$alo,$bi
    189 	mflo	$alo
    190 	mfhi	$ahi
    191 
    192 	dmultu	$nj,$m1
    193 	mflo	$lo1
    194 	mfhi	$hi1
    195 
    196 	dmultu	$nlo,$m1
    197 	daddu	$lo1,$lo0
    198 	sltu	AT,$lo1,$lo0
    199 	daddu	$hi1,AT
    200 	mflo	$nlo
    201 	mfhi	$nhi
    202 
    203 	move	$tp,sp
    204 	li	$j,16
    205 	ld	$tj,8($tp)
    206 .align	4
    207 .Linner:
    208 	.set	noreorder
    209 	PTR_ADD	$aj,$ap,$j
    210 	ld	$aj,($aj)
    211 	PTR_ADD	$nj,$np,$j
    212 	ld	$nj,($nj)
    213 
    214 	dmultu	$aj,$bi
    215 	daddu	$lo0,$alo,$hi0
    216 	daddu	$lo1,$nlo,$hi1
    217 	sltu	AT,$lo0,$hi0
    218 	sltu	s7,$lo1,$hi1
    219 	daddu	$hi0,$ahi,AT
    220 	daddu	$hi1,$nhi,s7
    221 	mflo	$alo
    222 	mfhi	$ahi
    223 
    224 	daddu	$lo0,$tj
    225 	addu	$j,8
    226 	dmultu	$nj,$m1
    227 	sltu	AT,$lo0,$tj
    228 	daddu	$lo1,$lo0
    229 	daddu	$hi0,AT
    230 	sltu	s7,$lo1,$lo0
    231 	ld	$tj,16($tp)
    232 	daddu	$hi1,s7
    233 	sltu	AT,$j,$num
    234 	mflo	$nlo
    235 	mfhi	$nhi
    236 	sd	$lo1,($tp)
    237 	bnez	AT,.Linner
    238 	PTR_ADD	$tp,8
    239 	.set	reorder
    240 
    241 	daddu	$lo0,$alo,$hi0
    242 	sltu	AT,$lo0,$hi0
    243 	daddu	$hi0,$ahi,AT
    244 	daddu	$lo0,$tj
    245 	sltu	s7,$lo0,$tj
    246 	daddu	$hi0,s7
    247 
    248 	ld	$tj,16($tp)
    249 	daddu	$lo1,$nlo,$hi1
    250 	sltu	AT,$lo1,$hi1
    251 	daddu	$hi1,$nhi,AT
    252 	daddu	$lo1,$lo0
    253 	sltu	s7,$lo1,$lo0
    254 	daddu	$hi1,s7
    255 	sd	$lo1,($tp)
    256 
    257 	daddu	$lo1,$hi1,$hi0
    258 	sltu	$hi1,$lo1,$hi0
    259 	daddu	$lo1,$tj
    260 	sltu	AT,$lo1,$tj
    261 	daddu	$hi1,AT
    262 	sd	$lo1,8($tp)
    263 	sd	$hi1,16($tp)
    264 
    265 	addu	$i,8
    266 	sltu	s7,$i,$num
    267 	bnez	s7,.Louter
    268 
    270 	.set	noreorder
    271 	PTR_ADD	$tj,sp,$num	# &tp[num]
    272 	move	$tp,sp
    273 	move	$ap,sp
    274 	li	$hi0,0		# clear borrow bit
    275 
    276 .align	4
    277 .Lsub:	ld	$lo0,($tp)
    278 	ld	$lo1,($np)
    279 	PTR_ADD	$tp,8
    280 	PTR_ADD	$np,8
    281 	dsubu	$lo1,$lo0,$lo1	# tp[i]-np[i]
    282 	sgtu	AT,$lo1,$lo0
    283 	dsubu	$lo0,$lo1,$hi0
    284 	sgtu	$hi0,$lo0,$lo1
    285 	sd	$lo0,($rp)
    286 	or	$hi0,AT
    287 	sltu	AT,$tp,$tj
    288 	bnez	AT,.Lsub
    289 	PTR_ADD	$rp,8
    290 
    291 	dsubu	$hi0,$hi1,$hi0	# handle upmost overflow bit
    292 	move	$tp,sp
    293 	PTR_SUB	$rp,$num	# restore rp
    294 	not	$hi1,$hi0
    295 
    296 	and	$ap,$hi0,sp
    297 	and	$bp,$hi1,$rp
    298 	or	$ap,$ap,$bp	# ap=borrow?tp:rp
    299 
    300 .align	4
    301 .Lcopy:	ld	$aj,($ap)
    302 	PTR_ADD	$ap,8
    303 	PTR_ADD	$tp,8
    304 	sd	zero,-8($tp)
    305 	sltu	AT,$tp,$tj
    306 	sd	$aj,($rp)
    307 	bnez	AT,.Lcopy
    308 	PTR_ADD	$rp,8
    309 
    310 	ld	s0,0($fp)
    311 	ld	s1,8($fp)
    312 	ld	s2,16($fp)
    313 	ld	s3,24($fp)
    314 	ld	s4,32($fp)
    315 	ld	s5,40($fp)
    316 	ld	s6,48($fp)
    317 	ld	s7,56($fp)
    318 	li	v0,1
    319 	jr	ra
    320 	PTR_ADD	sp,$fp,64
    321 	.set	reorder
    322 END(bn_mul_mont)
    323 .rdata
    324 .asciiz	"Montgomery Multiplication for MIPS III/IV, CRYPTOGAMS by <appro\@openssl.org>"
    325 ___
    326 
    327 print $code;
    328 close STDOUT;
    329