1 #!/usr/bin/env perl 2 3 # ==================================================================== 4 # Written by Andy Polyakov <appro (at] fy.chalmers.se> for the OpenSSL 5 # project. The module is, however, dual licensed under OpenSSL and 6 # CRYPTOGAMS licenses depending on where you obtain it. For further 7 # details see http://www.openssl.org/~appro/cryptogams/. 8 # ==================================================================== 9 10 # sha1_block for Thumb. 11 # 12 # January 2007. 13 # 14 # The code does not present direct interest to OpenSSL, because of low 15 # performance. Its purpose is to establish _size_ benchmark. Pretty 16 # useless one I must say, because 30% or 88 bytes larger ARMv4 code 17 # [avialable on demand] is almost _twice_ as fast. It should also be 18 # noted that in-lining of .Lcommon and .Lrotate improves performance 19 # by over 40%, while code increases by only 10% or 32 bytes. But once 20 # again, the goal was to establish _size_ benchmark, not performance. 21 22 $output=shift; 23 open STDOUT,">$output"; 24 25 $inline=0; 26 #$cheat_on_binutils=1; 27 28 $t0="r0"; 29 $t1="r1"; 30 $t2="r2"; 31 $a="r3"; 32 $b="r4"; 33 $c="r5"; 34 $d="r6"; 35 $e="r7"; 36 $K="r8"; # "upper" registers can be used in add/sub and mov insns 37 $ctx="r9"; 38 $inp="r10"; 39 $len="r11"; 40 $Xi="r12"; 41 42 sub common { 43 <<___; 44 sub $t0,#4 45 ldr $t1,[$t0] 46 add $e,$K @ E+=K_xx_xx 47 lsl $t2,$a,#5 48 add $t2,$e 49 lsr $e,$a,#27 50 add $t2,$e @ E+=ROR(A,27) 51 add $t2,$t1 @ E+=X[i] 52 ___ 53 } 54 sub rotate { 55 <<___; 56 mov $e,$d @ E=D 57 mov $d,$c @ D=C 58 lsl $c,$b,#30 59 lsr $b,$b,#2 60 orr $c,$b @ C=ROR(B,2) 61 mov $b,$a @ B=A 62 add $a,$t2,$t1 @ A=E+F_xx_xx(B,C,D) 63 ___ 64 } 65 66 sub BODY_00_19 { 67 $code.=$inline?&common():"\tbl .Lcommon\n"; 68 $code.=<<___; 69 mov $t1,$c 70 eor $t1,$d 71 and $t1,$b 72 eor $t1,$d @ F_00_19(B,C,D) 73 ___ 74 $code.=$inline?&rotate():"\tbl .Lrotate\n"; 75 } 76 77 sub BODY_20_39 { 78 $code.=$inline?&common():"\tbl .Lcommon\n"; 79 $code.=<<___; 80 mov $t1,$b 81 eor $t1,$c 82 eor $t1,$d @ F_20_39(B,C,D) 83 ___ 84 $code.=$inline?&rotate():"\tbl .Lrotate\n"; 85 } 86 87 sub BODY_40_59 { 88 $code.=$inline?&common():"\tbl .Lcommon\n"; 89 $code.=<<___; 90 mov $t1,$b 91 and $t1,$c 92 mov $e,$b 93 orr $e,$c 94 and $e,$d 95 orr $t1,$e @ F_40_59(B,C,D) 96 ___ 97 $code.=$inline?&rotate():"\tbl .Lrotate\n"; 98 } 99 100 $code=<<___; 101 .text 102 .code 16 103 104 .global sha1_block_data_order 105 .type sha1_block_data_order,%function 106 107 .align 2 108 sha1_block_data_order: 109 ___ 110 if ($cheat_on_binutils) { 111 $code.=<<___; 112 .code 32 113 add r3,pc,#1 114 bx r3 @ switch to Thumb ISA 115 .code 16 116 ___ 117 } 118 $code.=<<___; 119 push {r4-r7} 120 mov r3,r8 121 mov r4,r9 122 mov r5,r10 123 mov r6,r11 124 mov r7,r12 125 push {r3-r7,lr} 126 lsl r2,#6 127 mov $ctx,r0 @ save context 128 mov $inp,r1 @ save inp 129 mov $len,r2 @ save len 130 add $len,$inp @ $len to point at inp end 131 132 .Lloop: 133 mov $Xi,sp 134 mov $t2,sp 135 sub $t2,#16*4 @ [3] 136 .LXload: 137 ldrb $a,[$t1,#0] @ $t1 is r1 and holds inp 138 ldrb $b,[$t1,#1] 139 ldrb $c,[$t1,#2] 140 ldrb $d,[$t1,#3] 141 lsl $a,#24 142 lsl $b,#16 143 lsl $c,#8 144 orr $a,$b 145 orr $a,$c 146 orr $a,$d 147 add $t1,#4 148 push {$a} 149 cmp sp,$t2 150 bne .LXload @ [+14*16] 151 152 mov $inp,$t1 @ update $inp 153 sub $t2,#32*4 154 sub $t2,#32*4 155 mov $e,#31 @ [+4] 156 .LXupdate: 157 ldr $a,[sp,#15*4] 158 ldr $b,[sp,#13*4] 159 ldr $c,[sp,#7*4] 160 ldr $d,[sp,#2*4] 161 eor $a,$b 162 eor $a,$c 163 eor $a,$d 164 ror $a,$e 165 push {$a} 166 cmp sp,$t2 167 bne .LXupdate @ [+(11+1)*64] 168 169 ldmia $t0!,{$a,$b,$c,$d,$e} @ $t0 is r0 and holds ctx 170 mov $t0,$Xi 171 172 ldr $t2,.LK_00_19 173 mov $t1,$t0 174 sub $t1,#20*4 175 mov $Xi,$t1 176 mov $K,$t2 @ [+7+4] 177 .L_00_19: 178 ___ 179 &BODY_00_19(); 180 $code.=<<___; 181 cmp $Xi,$t0 182 bne .L_00_19 @ [+(2+9+4+2+8+2)*20] 183 184 ldr $t2,.LK_20_39 185 mov $t1,$t0 186 sub $t1,#20*4 187 mov $Xi,$t1 188 mov $K,$t2 @ [+5] 189 .L_20_39_or_60_79: 190 ___ 191 &BODY_20_39(); 192 $code.=<<___; 193 cmp $Xi,$t0 194 bne .L_20_39_or_60_79 @ [+(2+9+3+2+8+2)*20*2] 195 cmp sp,$t0 196 beq .Ldone @ [+2] 197 198 ldr $t2,.LK_40_59 199 mov $t1,$t0 200 sub $t1,#20*4 201 mov $Xi,$t1 202 mov $K,$t2 @ [+5] 203 .L_40_59: 204 ___ 205 &BODY_40_59(); 206 $code.=<<___; 207 cmp $Xi,$t0 208 bne .L_40_59 @ [+(2+9+6+2+8+2)*20] 209 210 ldr $t2,.LK_60_79 211 mov $Xi,sp 212 mov $K,$t2 213 b .L_20_39_or_60_79 @ [+4] 214 .Ldone: 215 mov $t0,$ctx 216 ldr $t1,[$t0,#0] 217 ldr $t2,[$t0,#4] 218 add $a,$t1 219 ldr $t1,[$t0,#8] 220 add $b,$t2 221 ldr $t2,[$t0,#12] 222 add $c,$t1 223 ldr $t1,[$t0,#16] 224 add $d,$t2 225 add $e,$t1 226 stmia $t0!,{$a,$b,$c,$d,$e} @ [+20] 227 228 add sp,#80*4 @ deallocate stack frame 229 mov $t0,$ctx @ restore ctx 230 mov $t1,$inp @ restore inp 231 cmp $t1,$len 232 beq .Lexit 233 b .Lloop @ [+6] total 3212 cycles 234 .Lexit: 235 pop {r2-r7} 236 mov r8,r2 237 mov r9,r3 238 mov r10,r4 239 mov r11,r5 240 mov r12,r6 241 mov lr,r7 242 pop {r4-r7} 243 bx lr 244 .align 2 245 ___ 246 $code.=".Lcommon:\n".&common()."\tmov pc,lr\n" if (!$inline); 247 $code.=".Lrotate:\n".&rotate()."\tmov pc,lr\n" if (!$inline); 248 $code.=<<___; 249 .align 2 250 .LK_00_19: .word 0x5a827999 251 .LK_20_39: .word 0x6ed9eba1 252 .LK_40_59: .word 0x8f1bbcdc 253 .LK_60_79: .word 0xca62c1d6 254 .size sha1_block_data_order,.-sha1_block_data_order 255 .asciz "SHA1 block transform for Thumb, CRYPTOGAMS by <appro\@openssl.org>" 256 ___ 257 258 print $code; 259 close STDOUT; # enforce flush 260