Home | History | Annotate | Download | only in asm
      1 .text
      2 
      3 .global	bn_mul_mont
      4 .type	bn_mul_mont,%function
      5 
      6 .align	2
      7 bn_mul_mont:
      8 	stmdb	sp!,{r0,r2}		@ sp points at argument block
      9 	ldr	r0,[sp,#3*4]		@ load num
     10 	cmp	r0,#2
     11 	movlt	r0,#0
     12 	addlt	sp,sp,#2*4
     13 	blt	.Labrt
     14 
     15 	stmdb	sp!,{r4-r12,lr}		@ save 10 registers
     16 
     17 	mov	r0,r0,lsl#2		@ rescale r0 for byte count
     18 	sub	sp,sp,r0		@ alloca(4*num)
     19 	sub	sp,sp,#4		@ +extra dword
     20 	sub	r0,r0,#4		@ "num=num-1"
     21 	add	r4,r2,r0		@ &bp[num-1]
     22 
     23 	add	r0,sp,r0		@ r0 to point at &tp[num-1]
     24 	ldr	r8,[r0,#14*4]		@ &n0
     25 	ldr	r2,[r2]		@ bp[0]
     26 	ldr	r5,[r1],#4		@ ap[0],ap++
     27 	ldr	r6,[r3],#4		@ np[0],np++
     28 	ldr	r8,[r8]		@ *n0
     29 	str	r4,[r0,#15*4]		@ save &bp[num]
     30 
     31 	umull	r10,r11,r5,r2	@ ap[0]*bp[0]
     32 	str	r8,[r0,#14*4]		@ save n0 value
     33 	mul	r8,r10,r8		@ "tp[0]"*n0
     34 	mov	r12,#0
     35 	umlal	r10,r12,r6,r8	@ np[0]*n0+"t[0]"
     36 	mov	r4,sp
     37 
     38 .L1st:
     39 	ldr	r5,[r1],#4		@ ap[j],ap++
     40 	mov	r10,r11
     41 	ldr	r6,[r3],#4		@ np[j],np++
     42 	mov	r11,#0
     43 	umlal	r10,r11,r5,r2	@ ap[j]*bp[0]
     44 	mov	r14,#0
     45 	umlal	r12,r14,r6,r8	@ np[j]*n0
     46 	adds	r12,r12,r10
     47 	str	r12,[r4],#4		@ tp[j-1]=,tp++
     48 	adc	r12,r14,#0
     49 	cmp	r4,r0
     50 	bne	.L1st
     51 
     52 	adds	r12,r12,r11
     53 	ldr	r4,[r0,#13*4]		@ restore bp
     54 	mov	r14,#0
     55 	ldr	r8,[r0,#14*4]		@ restore n0
     56 	adc	r14,r14,#0
     57 	str	r12,[r0]		@ tp[num-1]=
     58 	str	r14,[r0,#4]		@ tp[num]=
     59 
     60 .Louter:
     62 	sub	r7,r0,sp		@ "original" r0-1 value
     63 	sub	r1,r1,r7		@ "rewind" ap to &ap[1]
     64 	ldr	r2,[r4,#4]!		@ *(++bp)
     65 	sub	r3,r3,r7		@ "rewind" np to &np[1]
     66 	ldr	r5,[r1,#-4]		@ ap[0]
     67 	ldr	r10,[sp]		@ tp[0]
     68 	ldr	r6,[r3,#-4]		@ np[0]
     69 	ldr	r7,[sp,#4]		@ tp[1]
     70 
     71 	mov	r11,#0
     72 	umlal	r10,r11,r5,r2	@ ap[0]*bp[i]+tp[0]
     73 	str	r4,[r0,#13*4]		@ save bp
     74 	mul	r8,r10,r8
     75 	mov	r12,#0
     76 	umlal	r10,r12,r6,r8	@ np[0]*n0+"tp[0]"
     77 	mov	r4,sp
     78 
     79 .Linner:
     80 	ldr	r5,[r1],#4		@ ap[j],ap++
     81 	adds	r10,r11,r7		@ +=tp[j]
     82 	ldr	r6,[r3],#4		@ np[j],np++
     83 	mov	r11,#0
     84 	umlal	r10,r11,r5,r2	@ ap[j]*bp[i]
     85 	mov	r14,#0
     86 	umlal	r12,r14,r6,r8	@ np[j]*n0
     87 	adc	r11,r11,#0
     88 	ldr	r7,[r4,#8]		@ tp[j+1]
     89 	adds	r12,r12,r10
     90 	str	r12,[r4],#4		@ tp[j-1]=,tp++
     91 	adc	r12,r14,#0
     92 	cmp	r4,r0
     93 	bne	.Linner
     94 
     95 	adds	r12,r12,r11
     96 	mov	r14,#0
     97 	ldr	r4,[r0,#13*4]		@ restore bp
     98 	adc	r14,r14,#0
     99 	ldr	r8,[r0,#14*4]		@ restore n0
    100 	adds	r12,r12,r7
    101 	ldr	r7,[r0,#15*4]		@ restore &bp[num]
    102 	adc	r14,r14,#0
    103 	str	r12,[r0]		@ tp[num-1]=
    104 	str	r14,[r0,#4]		@ tp[num]=
    105 
    106 	cmp	r4,r7
    107 	bne	.Louter
    108 
    109 	ldr	r2,[r0,#12*4]		@ pull rp
    111 	add	r0,r0,#4		@ r0 to point at &tp[num]
    112 	sub	r5,r0,sp		@ "original" num value
    113 	mov	r4,sp			@ "rewind" r4
    114 	mov	r1,r4			@ "borrow" r1
    115 	sub	r3,r3,r5		@ "rewind" r3 to &np[0]
    116 
    117 	subs	r7,r7,r7		@ "clear" carry flag
    118 .Lsub:	ldr	r7,[r4],#4
    119 	ldr	r6,[r3],#4
    120 	sbcs	r7,r7,r6		@ tp[j]-np[j]
    121 	str	r7,[r2],#4		@ rp[j]=
    122 	teq	r4,r0		@ preserve carry
    123 	bne	.Lsub
    124 	sbcs	r14,r14,#0		@ upmost carry
    125 	mov	r4,sp			@ "rewind" r4
    126 	sub	r2,r2,r5		@ "rewind" r2
    127 
    128 	and	r1,r4,r14
    129 	bic	r3,r2,r14
    130 	orr	r1,r1,r3		@ ap=borrow?tp:rp
    131 
    132 .Lcopy:	ldr	r7,[r1],#4		@ copy or in-place refresh
    133 	str	sp,[r4],#4		@ zap tp
    134 	str	r7,[r2],#4
    135 	cmp	r4,r0
    136 	bne	.Lcopy
    137 
    138 	add	sp,r0,#4		@ skip over tp[num+1]
    139 	ldmia	sp!,{r4-r12,lr}		@ restore registers
    140 	add	sp,sp,#2*4		@ skip over {r0,r2}
    141 	mov	r0,#1
    142 .Labrt:	tst	lr,#1
    143 	moveq	pc,lr			@ be binary compatible with V4, yet
    144 	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
    145 .size	bn_mul_mont,.-bn_mul_mont
    146 .asciz	"Montgomery multiplication for ARMv4, CRYPTOGAMS by <appro (at) openssl.org>"
    147 .align	2
    148