1 #!/usr/local/bin/perl 2 3 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 4 push(@INC,"${dir}","${dir}../../perlasm"); 5 require "x86asm.pl"; 6 7 &asm_init($ARGV[0],$0); 8 9 &bn_mul_comba("bn_mul_comba8",8); 10 &bn_mul_comba("bn_mul_comba4",4); 11 &bn_sqr_comba("bn_sqr_comba8",8); 12 &bn_sqr_comba("bn_sqr_comba4",4); 13 14 &asm_finish(); 15 16 sub mul_add_c 17 { 18 local($a,$ai,$b,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_; 19 20 # pos == -1 if eax and edx are pre-loaded, 0 to load from next 21 # words, and 1 if load return value 22 23 &comment("mul a[$ai]*b[$bi]"); 24 25 # "eax" and "edx" will always be pre-loaded. 26 # &mov("eax",&DWP($ai*4,$a,"",0)) ; 27 # &mov("edx",&DWP($bi*4,$b,"",0)); 28 29 &mul("edx"); 30 &add($c0,"eax"); 31 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # laod next a 32 &mov("eax",&wparam(0)) if $pos > 0; # load r[] 33 ### 34 &adc($c1,"edx"); 35 &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 0; # laod next b 36 &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 1; # laod next b 37 ### 38 &adc($c2,0); 39 # is pos > 1, it means it is the last loop 40 &mov(&DWP($i*4,"eax","",0),$c0) if $pos > 0; # save r[]; 41 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # laod next a 42 } 43 44 sub sqr_add_c 45 { 46 local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_; 47 48 # pos == -1 if eax and edx are pre-loaded, 0 to load from next 49 # words, and 1 if load return value 50 51 &comment("sqr a[$ai]*a[$bi]"); 52 53 # "eax" and "edx" will always be pre-loaded. 54 # &mov("eax",&DWP($ai*4,$a,"",0)) ; 55 # &mov("edx",&DWP($bi*4,$b,"",0)); 56 57 if ($ai == $bi) 58 { &mul("eax");} 59 else 60 { &mul("edx");} 61 &add($c0,"eax"); 62 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a 63 ### 64 &adc($c1,"edx"); 65 &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos == 1) && ($na != $nb); 66 ### 67 &adc($c2,0); 68 # is pos > 1, it means it is the last loop 69 &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[]; 70 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b 71 } 72 73 sub sqr_add_c2 74 { 75 local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_; 76 77 # pos == -1 if eax and edx are pre-loaded, 0 to load from next 78 # words, and 1 if load return value 79 80 &comment("sqr a[$ai]*a[$bi]"); 81 82 # "eax" and "edx" will always be pre-loaded. 83 # &mov("eax",&DWP($ai*4,$a,"",0)) ; 84 # &mov("edx",&DWP($bi*4,$a,"",0)); 85 86 if ($ai == $bi) 87 { &mul("eax");} 88 else 89 { &mul("edx");} 90 &add("eax","eax"); 91 ### 92 &adc("edx","edx"); 93 ### 94 &adc($c2,0); 95 &add($c0,"eax"); 96 &adc($c1,"edx"); 97 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a 98 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b 99 &adc($c2,0); 100 &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[]; 101 &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos <= 1) && ($na != $nb); 102 ### 103 } 104 105 sub bn_mul_comba 106 { 107 local($name,$num)=@_; 108 local($a,$b,$c0,$c1,$c2); 109 local($i,$as,$ae,$bs,$be,$ai,$bi); 110 local($tot,$end); 111 112 &function_begin_B($name,""); 113 114 $c0="ebx"; 115 $c1="ecx"; 116 $c2="ebp"; 117 $a="esi"; 118 $b="edi"; 119 120 $as=0; 121 $ae=0; 122 $bs=0; 123 $be=0; 124 $tot=$num+$num-1; 125 126 &push("esi"); 127 &mov($a,&wparam(1)); 128 &push("edi"); 129 &mov($b,&wparam(2)); 130 &push("ebp"); 131 &push("ebx"); 132 133 &xor($c0,$c0); 134 &mov("eax",&DWP(0,$a,"",0)); # load the first word 135 &xor($c1,$c1); 136 &mov("edx",&DWP(0,$b,"",0)); # load the first second 137 138 for ($i=0; $i<$tot; $i++) 139 { 140 $ai=$as; 141 $bi=$bs; 142 $end=$be+1; 143 144 &comment("################## Calculate word $i"); 145 146 for ($j=$bs; $j<$end; $j++) 147 { 148 &xor($c2,$c2) if ($j == $bs); 149 if (($j+1) == $end) 150 { 151 $v=1; 152 $v=2 if (($i+1) == $tot); 153 } 154 else 155 { $v=0; } 156 if (($j+1) != $end) 157 { 158 $na=($ai-1); 159 $nb=($bi+1); 160 } 161 else 162 { 163 $na=$as+($i < ($num-1)); 164 $nb=$bs+($i >= ($num-1)); 165 } 166 #printf STDERR "[$ai,$bi] -> [$na,$nb]\n"; 167 &mul_add_c($a,$ai,$b,$bi,$c0,$c1,$c2,$v,$i,$na,$nb); 168 if ($v) 169 { 170 &comment("saved r[$i]"); 171 # &mov("eax",&wparam(0)); 172 # &mov(&DWP($i*4,"eax","",0),$c0); 173 ($c0,$c1,$c2)=($c1,$c2,$c0); 174 } 175 $ai--; 176 $bi++; 177 } 178 $as++ if ($i < ($num-1)); 179 $ae++ if ($i >= ($num-1)); 180 181 $bs++ if ($i >= ($num-1)); 182 $be++ if ($i < ($num-1)); 183 } 184 &comment("save r[$i]"); 185 # &mov("eax",&wparam(0)); 186 &mov(&DWP($i*4,"eax","",0),$c0); 187 188 &pop("ebx"); 189 &pop("ebp"); 190 &pop("edi"); 191 &pop("esi"); 192 &ret(); 193 &function_end_B($name); 194 } 195 196 sub bn_sqr_comba 197 { 198 local($name,$num)=@_; 199 local($r,$a,$c0,$c1,$c2)=@_; 200 local($i,$as,$ae,$bs,$be,$ai,$bi); 201 local($b,$tot,$end,$half); 202 203 &function_begin_B($name,""); 204 205 $c0="ebx"; 206 $c1="ecx"; 207 $c2="ebp"; 208 $a="esi"; 209 $r="edi"; 210 211 &push("esi"); 212 &push("edi"); 213 &push("ebp"); 214 &push("ebx"); 215 &mov($r,&wparam(0)); 216 &mov($a,&wparam(1)); 217 &xor($c0,$c0); 218 &xor($c1,$c1); 219 &mov("eax",&DWP(0,$a,"",0)); # load the first word 220 221 $as=0; 222 $ae=0; 223 $bs=0; 224 $be=0; 225 $tot=$num+$num-1; 226 227 for ($i=0; $i<$tot; $i++) 228 { 229 $ai=$as; 230 $bi=$bs; 231 $end=$be+1; 232 233 &comment("############### Calculate word $i"); 234 for ($j=$bs; $j<$end; $j++) 235 { 236 &xor($c2,$c2) if ($j == $bs); 237 if (($ai-1) < ($bi+1)) 238 { 239 $v=1; 240 $v=2 if ($i+1) == $tot; 241 } 242 else 243 { $v=0; } 244 if (!$v) 245 { 246 $na=$ai-1; 247 $nb=$bi+1; 248 } 249 else 250 { 251 $na=$as+($i < ($num-1)); 252 $nb=$bs+($i >= ($num-1)); 253 } 254 if ($ai == $bi) 255 { 256 &sqr_add_c($r,$a,$ai,$bi, 257 $c0,$c1,$c2,$v,$i,$na,$nb); 258 } 259 else 260 { 261 &sqr_add_c2($r,$a,$ai,$bi, 262 $c0,$c1,$c2,$v,$i,$na,$nb); 263 } 264 if ($v) 265 { 266 &comment("saved r[$i]"); 267 #&mov(&DWP($i*4,$r,"",0),$c0); 268 ($c0,$c1,$c2)=($c1,$c2,$c0); 269 last; 270 } 271 $ai--; 272 $bi++; 273 } 274 $as++ if ($i < ($num-1)); 275 $ae++ if ($i >= ($num-1)); 276 277 $bs++ if ($i >= ($num-1)); 278 $be++ if ($i < ($num-1)); 279 } 280 &mov(&DWP($i*4,$r,"",0),$c0); 281 &pop("ebx"); 282 &pop("ebp"); 283 &pop("edi"); 284 &pop("esi"); 285 &ret(); 286 &function_end_B($name); 287 } 288