1 #!/usr/local/bin/perl 2 3 push(@INC,"perlasm","../../perlasm"); 4 require "x86asm.pl"; 5 6 &asm_init($ARGV[0],$0); 7 8 &bn_mul_comba("bn_mul_comba8",8); 9 &bn_mul_comba("bn_mul_comba4",4); 10 &bn_sqr_comba("bn_sqr_comba8",8); 11 &bn_sqr_comba("bn_sqr_comba4",4); 12 13 &asm_finish(); 14 15 sub mul_add_c 16 { 17 local($a,$ai,$b,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_; 18 19 # pos == -1 if eax and edx are pre-loaded, 0 to load from next 20 # words, and 1 if load return value 21 22 &comment("mul a[$ai]*b[$bi]"); 23 24 # "eax" and "edx" will always be pre-loaded. 25 # &mov("eax",&DWP($ai*4,$a,"",0)) ; 26 # &mov("edx",&DWP($bi*4,$b,"",0)); 27 28 &mul("edx"); 29 &add($c0,"eax"); 30 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # laod next a 31 &mov("eax",&wparam(0)) if $pos > 0; # load r[] 32 ### 33 &adc($c1,"edx"); 34 &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 0; # laod next b 35 &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 1; # laod next b 36 ### 37 &adc($c2,0); 38 # is pos > 1, it means it is the last loop 39 &mov(&DWP($i*4,"eax","",0),$c0) if $pos > 0; # save r[]; 40 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # laod next a 41 } 42 43 sub sqr_add_c 44 { 45 local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_; 46 47 # pos == -1 if eax and edx are pre-loaded, 0 to load from next 48 # words, and 1 if load return value 49 50 &comment("sqr a[$ai]*a[$bi]"); 51 52 # "eax" and "edx" will always be pre-loaded. 53 # &mov("eax",&DWP($ai*4,$a,"",0)) ; 54 # &mov("edx",&DWP($bi*4,$b,"",0)); 55 56 if ($ai == $bi) 57 { &mul("eax");} 58 else 59 { &mul("edx");} 60 &add($c0,"eax"); 61 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a 62 ### 63 &adc($c1,"edx"); 64 &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos == 1) && ($na != $nb); 65 ### 66 &adc($c2,0); 67 # is pos > 1, it means it is the last loop 68 &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[]; 69 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b 70 } 71 72 sub sqr_add_c2 73 { 74 local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_; 75 76 # pos == -1 if eax and edx are pre-loaded, 0 to load from next 77 # words, and 1 if load return value 78 79 &comment("sqr a[$ai]*a[$bi]"); 80 81 # "eax" and "edx" will always be pre-loaded. 82 # &mov("eax",&DWP($ai*4,$a,"",0)) ; 83 # &mov("edx",&DWP($bi*4,$a,"",0)); 84 85 if ($ai == $bi) 86 { &mul("eax");} 87 else 88 { &mul("edx");} 89 &add("eax","eax"); 90 ### 91 &adc("edx","edx"); 92 ### 93 &adc($c2,0); 94 &add($c0,"eax"); 95 &adc($c1,"edx"); 96 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a 97 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b 98 &adc($c2,0); 99 &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[]; 100 &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos <= 1) && ($na != $nb); 101 ### 102 } 103 104 sub bn_mul_comba 105 { 106 local($name,$num)=@_; 107 local($a,$b,$c0,$c1,$c2); 108 local($i,$as,$ae,$bs,$be,$ai,$bi); 109 local($tot,$end); 110 111 &function_begin_B($name,""); 112 113 $c0="ebx"; 114 $c1="ecx"; 115 $c2="ebp"; 116 $a="esi"; 117 $b="edi"; 118 119 $as=0; 120 $ae=0; 121 $bs=0; 122 $be=0; 123 $tot=$num+$num-1; 124 125 &push("esi"); 126 &mov($a,&wparam(1)); 127 &push("edi"); 128 &mov($b,&wparam(2)); 129 &push("ebp"); 130 &push("ebx"); 131 132 &xor($c0,$c0); 133 &mov("eax",&DWP(0,$a,"",0)); # load the first word 134 &xor($c1,$c1); 135 &mov("edx",&DWP(0,$b,"",0)); # load the first second 136 137 for ($i=0; $i<$tot; $i++) 138 { 139 $ai=$as; 140 $bi=$bs; 141 $end=$be+1; 142 143 &comment("################## Calculate word $i"); 144 145 for ($j=$bs; $j<$end; $j++) 146 { 147 &xor($c2,$c2) if ($j == $bs); 148 if (($j+1) == $end) 149 { 150 $v=1; 151 $v=2 if (($i+1) == $tot); 152 } 153 else 154 { $v=0; } 155 if (($j+1) != $end) 156 { 157 $na=($ai-1); 158 $nb=($bi+1); 159 } 160 else 161 { 162 $na=$as+($i < ($num-1)); 163 $nb=$bs+($i >= ($num-1)); 164 } 165 #printf STDERR "[$ai,$bi] -> [$na,$nb]\n"; 166 &mul_add_c($a,$ai,$b,$bi,$c0,$c1,$c2,$v,$i,$na,$nb); 167 if ($v) 168 { 169 &comment("saved r[$i]"); 170 # &mov("eax",&wparam(0)); 171 # &mov(&DWP($i*4,"eax","",0),$c0); 172 ($c0,$c1,$c2)=($c1,$c2,$c0); 173 } 174 $ai--; 175 $bi++; 176 } 177 $as++ if ($i < ($num-1)); 178 $ae++ if ($i >= ($num-1)); 179 180 $bs++ if ($i >= ($num-1)); 181 $be++ if ($i < ($num-1)); 182 } 183 &comment("save r[$i]"); 184 # &mov("eax",&wparam(0)); 185 &mov(&DWP($i*4,"eax","",0),$c0); 186 187 &pop("ebx"); 188 &pop("ebp"); 189 &pop("edi"); 190 &pop("esi"); 191 &ret(); 192 &function_end_B($name); 193 } 194 195 sub bn_sqr_comba 196 { 197 local($name,$num)=@_; 198 local($r,$a,$c0,$c1,$c2)=@_; 199 local($i,$as,$ae,$bs,$be,$ai,$bi); 200 local($b,$tot,$end,$half); 201 202 &function_begin_B($name,""); 203 204 $c0="ebx"; 205 $c1="ecx"; 206 $c2="ebp"; 207 $a="esi"; 208 $r="edi"; 209 210 &push("esi"); 211 &push("edi"); 212 &push("ebp"); 213 &push("ebx"); 214 &mov($r,&wparam(0)); 215 &mov($a,&wparam(1)); 216 &xor($c0,$c0); 217 &xor($c1,$c1); 218 &mov("eax",&DWP(0,$a,"",0)); # load the first word 219 220 $as=0; 221 $ae=0; 222 $bs=0; 223 $be=0; 224 $tot=$num+$num-1; 225 226 for ($i=0; $i<$tot; $i++) 227 { 228 $ai=$as; 229 $bi=$bs; 230 $end=$be+1; 231 232 &comment("############### Calculate word $i"); 233 for ($j=$bs; $j<$end; $j++) 234 { 235 &xor($c2,$c2) if ($j == $bs); 236 if (($ai-1) < ($bi+1)) 237 { 238 $v=1; 239 $v=2 if ($i+1) == $tot; 240 } 241 else 242 { $v=0; } 243 if (!$v) 244 { 245 $na=$ai-1; 246 $nb=$bi+1; 247 } 248 else 249 { 250 $na=$as+($i < ($num-1)); 251 $nb=$bs+($i >= ($num-1)); 252 } 253 if ($ai == $bi) 254 { 255 &sqr_add_c($r,$a,$ai,$bi, 256 $c0,$c1,$c2,$v,$i,$na,$nb); 257 } 258 else 259 { 260 &sqr_add_c2($r,$a,$ai,$bi, 261 $c0,$c1,$c2,$v,$i,$na,$nb); 262 } 263 if ($v) 264 { 265 &comment("saved r[$i]"); 266 #&mov(&DWP($i*4,$r,"",0),$c0); 267 ($c0,$c1,$c2)=($c1,$c2,$c0); 268 last; 269 } 270 $ai--; 271 $bi++; 272 } 273 $as++ if ($i < ($num-1)); 274 $ae++ if ($i >= ($num-1)); 275 276 $bs++ if ($i >= ($num-1)); 277 $be++ if ($i < ($num-1)); 278 } 279 &mov(&DWP($i*4,$r,"",0),$c0); 280 &pop("ebx"); 281 &pop("ebp"); 282 &pop("edi"); 283 &pop("esi"); 284 &ret(); 285 &function_end_B($name); 286 } 287