Lines Matching refs:DWP
207 $__ra=&DWP(0,"esp"); # return address
208 $__s0=&DWP(4,"esp"); # s0 backing store
209 $__s1=&DWP(8,"esp"); # s1 backing store
210 $__s2=&DWP(12,"esp"); # s2 backing store
211 $__s3=&DWP(16,"esp"); # s3 backing store
212 $__key=&DWP(20,"esp"); # pointer to key schedule
213 $__end=&DWP(24,"esp"); # pointer to end of key schedule
214 $__tbl=&DWP(28,"esp"); # %ebp backing store
218 $_tbl=&DWP(24,"esp");
219 $_esp=&DWP(28,"esp");
248 &mov (&DWP(4,"esp"),$s[2]); # save s2
250 &mov (&DWP(8,"esp"),$s[1]); # save s1
254 &mov ($s[0],&DWP(0,$te,$s[0],8)); # s0>>0
256 &mov ($s[3],&DWP(3,$te,$s[2],8)); # s0>>8
259 &mov ($s[2],&DWP(2,$te,$v1,8)); # s0>>16
261 &mov ($s[1],&DWP(1,$te,$s[1],8)); # s0>>24
264 &xor ($s[3],&DWP(0,$te,$v0,8)); # s3>>0
267 &xor ($s[2],&DWP(3,$te,$v0,8)); # s3>>8
270 &xor ($s[1],&DWP(2,$te,$v1,8)); # s3>>16
271 &mov ($v1,&DWP(4,"esp")); # restore s2
272 &xor ($s[0],&DWP(1,$te,$v0,8)); # s3>>24
276 &xor ($s[2],&DWP(0,$te,$v1,8)); # s2>>0
279 &xor ($s[1],&DWP(3,$te,$v1,8)); # s2>>8
282 &xor ($s[0],&DWP(2,$te,$v0,8)); # s2>>16
283 &mov ($v0,&DWP(8,"esp")); # restore s1
284 &xor ($s[3],&DWP(1,$te,$v1,8)); # s2>>24
288 &xor ($s[1],&DWP(0,$te,$v0,8)); # s1>>0
291 &xor ($s[0],&DWP(3,$te,$v0,8)); # s1>>8
294 &xor ($s[3],&DWP(2,$te,$v1,8)); # s1>>16
296 &xor ($s[2],&DWP(1,$te,$v0,8)); # s1>>24
306 &mov ($v1,&DWP(0,$te,$v0,8)); # 0
309 &xor ($v1,&DWP(3,$te,$v0,8)); # 5
312 &xor ($v1,&DWP(2,$te,$v0,8)); # 10
314 &xor ($v1,&DWP(1,$te,$v0,8)); # 15, t[0] collected
319 &mov ($v1,&DWP(0,$te,$v0,8)); # 4
321 &xor ($v1,&DWP(2,$te,$v0,8)); # 14
324 &xor ($v1,&DWP(1,$te,$v0,8)); # 3
327 &xor ($v1,&DWP(3,$te,$v0,8)); # 9, t[1] collected
332 &mov ($v1,&DWP(2,$te,$v0,8)); # 2
334 &xor ($v1,&DWP(1,$te,$v0,8)); # 7
336 &xor ($v1,&DWP(0,$te,$v0,8)); # 8
339 &xor ($v1,&DWP(3,$te,$v0,8)); # 13, t[2] collected
343 &mov ($s2,&DWP(1,$te,$v0,8)); # 11
344 &xor ($s2,&DWP(3,$te,$s0,8)); # 1
348 &xor ($s2,&DWP(2,$te,$v0,8)); # 6
351 &mov ($s3,&DWP(0,$te,$s3,8)); # 12
361 &mov ("ecx",&DWP(0,$tbl,$acc,8)); # 0
364 &mov ("edx",&DWP(3,$tbl,"edx",8)); # 1
368 &xor ("ecx",&DWP(2,$tbl,$acc,8)); # 10
371 &xor ("edx",&DWP
375 &xor ("ecx",&DWP(3,$tbl,$acc,8)); # 5
378 &xor ("ecx",&DWP(1,$tbl,$acc,8)); # 15
382 &mov ("ecx",&DWP(0,$tbl,$acc,8)); # 4
385 &xor ("ecx",&DWP(2,$tbl,$acc,8)); # 14
389 &xor ("ecx",&DWP(1,$tbl,$acc,8)); # 3
391 &xor ("ecx",&DWP(3,$tbl,$acc,8)); # 9
395 &mov ("ecx",&DWP(2,$tbl,$acc,8)); # 2
399 &xor ("ecx",&DWP(0,$tbl,$acc,8)); # 8
403 &xor ("ecx",&DWP(1,$tbl,$acc,8)); # 7
406 &xor ("edx",&DWP(2,$tbl,"eax",8)); # 6
409 &xor ("ecx",&DWP(3,$tbl,$acc,8)); # 13
410 &xor ("ecx",&DWP(24,$key)); # t[2]
413 &xor ("edx",&DWP(0,$tbl,"ebx",8)); # 12
416 &mov ("ebx",&DWP(28,$key)); # t[3]
468 if ($i<2) { &mov (&DWP(4+4*$i,"esp"),$out); }
483 &lea ($r2,&DWP(0,$s[$i],$s[$i]));
503 &xor ($s0,&DWP(0,$key)); # xor with key
504 &xor ($s1,&DWP(4,$key));
505 &xor ($s2,&DWP(8,$key));
506 &xor ($s3,&DWP(12,$key));
508 &mov ($acc,&DWP(240,$key)); # load key->rounds
509 &lea ($acc,&DWP(-2,$acc,$acc));
510 &lea ($acc,&DWP(0,$key,$acc,8));
514 &mov ($key,&DWP(0-128,$tbl));
515 &mov ($acc,&DWP(32-128,$tbl));
516 &mov ($key,&DWP(64-128,$tbl));
517 &mov ($acc,&DWP(96-128,$tbl));
518 &mov ($key,&DWP(128-128,$tbl));
519 &mov ($acc,&DWP(160-128,$tbl));
520 &mov ($key,&DWP(192-128,$tbl));
521 &mov ($acc,&DWP(224-128,$tbl));
536 &xor ($s0,&DWP(0,$key));
537 &xor ($s1,&DWP(4,$key));
538 &xor ($s2,&DWP(8,$key));
539 &xor ($s3,&DWP(12,$key));
550 &xor ($s0,&DWP(16,$key));
551 &xor ($s1,&DWP(20,$key));
552 &xor ($s2,&DWP(24,$key));
553 &xor ($s3,&DWP(28,$key));
697 &mov ($acc,&DWP(240,$key)); # load key->rounds
698 &lea ($acc,&DWP(-2,$acc,$acc));
699 &lea ($acc,&DWP(0,$key,$acc,8));
703 &mov (&DWP(8,"esp"),$s0);
704 &mov (&DWP(12,"esp"),$s0);
707 &mov ($s0,&DWP(0-128,$tbl));
708 &mov ($s1,&DWP(32-128,$tbl));
709 &mov ($s2,&DWP(64-128,$tbl));
710 &mov ($s3,&DWP(96-128,$tbl));
711 &mov ($s0,&DWP(128-128,$tbl));
712 &mov ($s1,&DWP(160-128,$tbl));
713 &mov ($s2,&DWP(192-128,$tbl));
714 &mov ($s3,&DWP(224-128,$tbl));
743 &mov ($s0,&DWP(0-128,$tbl));
745 &mov ($s1,&DWP(64-128,$tbl));
747 &mov ($s2,&DWP(128-128,$tbl));
749 &mov ($s3,&DWP(192-128,$tbl));
777 &mov ($out,&DWP(0,$te,$out,8));
781 &xor ($out,&DWP(3,$te,$tmp,8));
788 &xor ($out,&DWP(2,$te,$tmp,8));
794 &xor ($out,&DWP(1,$te,$tmp,8));
795 if ($i<2) { &mov (&DWP(4+4*$i,"esp"),$out); }
810 &mov ($out,&DWP(2,$te,$out,8));
815 &mov ($tmp,&DWP(0,$te,$tmp,8));
824 &mov ($tmp,&DWP(0,$te,$tmp,8));
832 &mov ($tmp,&DWP(2,$te,$tmp,8));
835 if ($i<2) { &mov (&DWP(4+4*$i,"esp"),$out); }
849 &xor ($s0,&DWP(0,$key)); # xor with key
850 &xor ($s1,&DWP(4,$key));
851 &xor ($s2,&DWP(8,$key));
852 &xor ($s3,&DWP(12,$key));
854 &mov ($acc,&DWP(240,$key)); # load key->rounds
857 &lea ($acc,&DWP(-2,$acc,$acc));
858 &lea ($acc,&DWP(0,$key,$acc,8));
871 &xor ($s0,&DWP(0,$key));
872 &xor ($s1,&DWP(4,$key));
873 &xor ($s2,&DWP(8,$key));
874 &xor ($s3,&DWP(12,$key));
895 &xor ($s0,&DWP(16*$i+0,$key));
896 &xor ($s1,&DWP(16*$i+4,$key));
897 &xor ($s2,&DWP(16*$i+8,$key));
898 &xor ($s3,&DWP(16*$i+12,$key));
912 &xor ($s0,&DWP(16*$i+0,$key));
913 &xor ($s1,&DWP(16*$i+4,$key));
914 &xor ($s2,&DWP(16*$i+8,$key));
915 &xor ($s3,&DWP(16*$i+12,$key));
929 &xor ($s0,&DWP(16*$i+0,$key));
930 &xor ($s1,&DWP(16*$i+4,$key));
931 &xor ($s2,&DWP(16*$i+8,$key));
932 &xor ($s3,&DWP(16*$i+12,$key));
947 &xor ($s0,&DWP(0,$key));
948 &xor ($s1,&DWP(4,$key));
949 &xor ($s2,&DWP(8,$key));
950 &xor ($s3,&DWP(12,$key));
1169 &lea ($s1,&DWP(-64-63,$key));
1181 &lea ($tbl,&DWP(&label("AES_Te")."-".&label("pic_point"),$tbl));
1184 &lea ($s1,&DWP(768-4,"esp"));
1187 &lea ($tbl,&DWP(2048+128,$tbl,$s1));
1190 &bt (&DWP(0,$s0),25); # check for SSE bit
1205 &mov ($s0,&DWP(0,$acc)); # load input data
1206 &mov ($s1,&DWP(4,$acc));
1207 &mov ($s2,&DWP(8,$acc));
1208 &mov ($s3,&DWP(12,$acc));
1212 &mov (&DWP(0,$acc),$s0); # write output data
1213 &mov (&DWP(4,$acc),$s1);
1214 &mov (&DWP(8,$acc),$s2);
1215 &mov (&DWP(12,$acc),$s3);
1260 if ($i<2) { &mov (&DWP(4+4*$i,"esp"),$out); }
1277 &lea ($tp2,&DWP(0,$s[$i],$s[$i]));
1287 &lea ($tp4,&DWP(0,$tp2,$tp2));
1298 &lea ($tp8,&DWP(0,$tp4,$tp4));
1322 &mov (&DWP(4+4*$i,"esp"),$s[$i]) if($i>=2);
1329 &xor ($s0,&DWP(0,$key)); # xor with key
1330 &xor ($s1,&DWP(4,$key));
1331 &xor ($s2,&DWP(8,$key));
1332 &xor ($s3,&DWP(12,$key));
1334 &mov ($acc,&DWP(240,$key)); # load key->rounds
1336 &lea ($acc,&DWP(-2,$acc,$acc));
1337 &lea ($acc,&DWP(0,$key,$acc,8));
1341 &mov ($key,&DWP(0-128,$tbl));
1342 &mov ($acc,&DWP(32-128,$tbl));
1343 &mov ($key,&DWP(64-128,$tbl));
1344 &mov ($acc,&DWP(96-128,$tbl));
1345 &mov ($key,&DWP(128-128,$tbl));
1346 &mov ($acc,&DWP(160-128,$tbl));
1347 &mov ($key,&DWP(192-128,$tbl));
1348 &mov ($acc,&DWP(224-128,$tbl));
1363 &xor ($s0,&DWP(0,$key));
1364 &xor ($s1,&DWP(4,$key));
1365 &xor ($s2,&DWP(8,$key));
1366 &xor ($s3,&DWP(12,$key));
1377 &xor ($s0,&DWP(16,$key));
1378 &xor ($s1,&DWP(20,$key));
1379 &xor ($s2,&DWP(24,$key));
1380 &xor ($s3,&DWP(28,$key));
1482 &mov ($acc,&DWP(240,$key)); # load key->rounds
1483 &lea ($acc,&DWP(-2,$acc,$acc));
1484 &lea ($acc,&DWP(0,$key,$acc,8));
1488 &mov (&DWP(8,"esp"),$s0);
1489 &mov (&DWP(12,"esp"),$s0);
1492 &mov ($s0,&DWP(0-128,$tbl));
1493 &mov ($s1,&DWP(32-128,$tbl));
1494 &mov ($s2,&DWP(64-128,$tbl));
1495 &mov ($s3,&DWP(96-128,$tbl));
1496 &mov ($s0,&DWP(128-128,$tbl));
1497 &mov ($s1,&DWP(160-128,$tbl));
1498 &mov ($s2,&DWP(192-128,$tbl));
1499 &mov ($s3,&DWP(224-128,$tbl));
1559 &mov ($s0,&DWP(0-128,$tbl));
1561 &mov ($s1,&DWP(64-128,$tbl));
1563 &mov ($s2,&DWP(128-128,$tbl));
1565 &mov ($s3,&DWP(192-128,$tbl));
1595 &mov ($out,&DWP(0,$td,$out,8));
1599 &xor ($out,&DWP(3,$td,$tmp,8));
1605 &xor ($out,&DWP(2,$td,$tmp,8));
1610 &xor ($out,&DWP(1,$td,$tmp,8));
1611 if ($i<2) { &mov (&DWP(4+4*$i,"esp"),$out); }
1621 if($i==0) { &lea ($td,&DWP(2048+128,$td));
1622 &mov ($tmp,&DWP(0-128,$td));
1623 &mov ($acc,&DWP(32-128,$td));
1624 &mov ($tmp,&DWP(64-128,$td));
1625 &mov ($acc,&DWP(96-128,$td));
1626 &mov ($tmp,&DWP(128-128,$td));
1627 &mov ($acc,&DWP(160-128,$td));
1628 &mov ($tmp,&DWP(192-128,$td));
1629 &mov ($acc,&DWP(224-128,$td));
1630 &lea ($td,&DWP(-128,$td)); }
1656 if ($i<2) { &mov (&DWP(4+4*$i,"esp"),$out); }
1658 &lea ($td,&DWP(-2048,$td)); }
1665 &xor ($s0,&DWP(0,$key)); # xor with key
1666 &xor ($s1,&DWP(4,$key));
1667 &xor ($s2,&DWP(8,$key));
1668 &xor ($s3,&DWP(12,$key));
1670 &mov ($acc,&DWP(240,$key)); # load key->rounds
1673 &lea ($acc,&DWP(-2,$acc,$acc));
1674 &lea ($acc,&DWP(0,$key,$acc,8));
1682 &xor ($s0,&DWP(0,$key));
1683 &xor ($s1,&DWP(4,$key));
1684 &xor ($s2,&DWP(8,$key));
1685 &xor ($s3,&DWP(12,$key));
1702 &xor ($s0,&DWP(16*$i+0,$key));
1703 &xor ($s1,&DWP(16*$i+4,$key));
1704 &xor ($s2,&DWP(16*$i+8,$key));
1705 &xor ($s3,&DWP(16*$i+12,$key));
1715 &xor ($s0,&DWP(16*$i+0,$key));
1716 &xor ($s1,&DWP(16*$i+4,$key));
1717 &xor ($s2,&DWP(16*$i+8,$key));
1718 &xor ($s3,&DWP(16*$i+12,$key));
1728 &xor ($s0,&DWP(16*$i+0,$key));
1729 &xor ($s1,&DWP(16*$i+4,$key));
1730 &xor ($s2,&DWP(16*$i+8,$key));
1731 &xor ($s3,&DWP(16*$i+12,$key));
1741 &xor ($s0,&DWP(0,$key));
1742 &xor ($s1,&DWP(4,$key));
1743 &xor ($s2,&DWP(8,$key));
1744 &xor ($s3,&DWP(12,$key));
1958 &lea ($s1,&DWP(-64-63,$key));
1970 &lea ($tbl,&DWP(&label("AES_Td")."-".&label("pic_point"),$tbl));
1973 &lea ($s1,&DWP(768-4,"esp"));
1976 &lea ($tbl,&DWP(2048+128,$tbl,$s1));
1979 &bt (&DWP(0,$s0),25); # check for SSE bit
1994 &mov ($s0,&DWP(0,$acc)); # load input data
1995 &mov ($s1,&DWP(4,$acc));
1996 &mov ($s2,&DWP(8,$acc));
1997 &mov ($s3,&DWP(12,$acc));
2001 &mov (&DWP(0,$acc),$s0); # write output data
2002 &mov (&DWP(4,$acc),$s1);
2003 &mov (&DWP(8,$acc),$s2);
2004 &mov (&DWP(12,$acc),$s3);
2021 my $_inp=&DWP(32,"esp"); # copy of wparam(0)
2022 my $_out=&DWP(36,"esp"); # copy of wparam(1)
2023 my $_len=&DWP(40,"esp"); # copy of wparam(2)
2024 my $_key=&DWP(44,"esp"); # copy of wparam(3)
2025 my $_ivp=&DWP(48,"esp"); # copy of wparam(4)
2026 my $_tmp=&DWP(52,"esp"); # volatile variable
2028 my $ivec=&DWP(60,"esp"); # ivec[16]
2029 my $aes_key=&DWP(76,"esp"); # copy of aes_key
2030 my $mark=&DWP(76+240,"esp"); # copy of aes_key->rounds
2043 &lea ($tbl,&DWP(&label("AES_Te")."-".&label("pic_point"),$tbl));
2045 &lea ($tbl,&DWP(&label("AES_Td")."-".&label("AES_Te"),$tbl));
2057 &bt (&DWP(0,$s0),28); # check for hyper-threading bit
2061 &lea ($acc,&DWP(-80-244,"esp"));
2066 &lea ($s1,&DWP(2048+256,$tbl));
2090 &mov ($s0,&DWP(0,$s3)); # load inp
2091 &mov ($s1,&DWP(4,$s3)); # load out
2092 #&mov ($s2,&DWP(8,$s3)); # load len
2093 &mov ($key,&DWP(12,$s3)); # load key
2094 &mov ($acc,&DWP(16,$s3)); # load ivp
2095 &mov ($s3,&DWP(20,$s3)); # load enc flag
2122 &mov ($s0,&DWP(0,$tbl));
2123 &mov ($s1,&DWP(32,$tbl));
2124 &mov ($s2,&DWP(64,$tbl));
2125 &mov ($acc,&DWP(96,$tbl));
2126 &lea ($tbl,&DWP(128,$tbl));
2138 &mov ($s0,&DWP(0,$key)); # load iv
2139 &mov ($s1,&DWP(4,$key));
2142 &mov ($s2,&DWP(8,$key));
2143 &mov ($s3,&DWP(12,$key));
2145 &xor ($s0,&DWP(0,$acc)); # xor input data
2146 &xor ($s1,&DWP(4,$acc));
2147 &xor ($s2,&DWP(8,$acc));
2148 &xor ($s3,&DWP(12,$acc));
2156 &mov (&DWP(0,$key),$s0); # save output data
2157 &mov (&DWP(4,$key),$s1);
2158 &mov (&DWP(8,$key),$s2);
2159 &mov (&DWP(12,$key),$s3);
2161 &lea ($acc,&DWP(16,$acc)); # advance inp
2164 &lea ($s3,&DWP(16,$key)); # advance out
2170 &mov ($s2,&DWP(8,$key)); # restore last 2 dwords
2171 &mov ($s3,&DWP(12,$key));
2172 &mov (&DWP(0,$acc),$s0); # save ivec
2173 &mov (&DWP(4,$acc),$s1);
2174 &mov (&DWP(8,$acc),$s2);
2175 &mov (&DWP(12,$acc),$s3);
2202 &mov ($s0,&DWP(0,$acc)); # read input
2203 &mov ($s1,&DWP(4,$acc));
2204 &mov ($s2,&DWP(8,$acc));
2205 &mov ($s3,&DWP(12,$acc));
2212 &xor ($s0,&DWP(0,$key)); # xor iv
2213 &xor ($s1,&DWP(4,$key));
2214 &xor ($s2,&DWP(8,$key));
2215 &xor ($s3,&DWP(12,$key));
2220 &mov (&DWP(0,$key),$s0); # write output
2221 &mov (&DWP(4,$key),$s1);
2222 &mov (&DWP(8,$key),$s2);
2223 &mov (&DWP(12,$key),$s3);
2227 &lea ($acc,&DWP(16,$acc)); # advance inp
2229 &lea ($key,&DWP(16,$key)); # advance out
2236 &mov ($s0,&DWP(0,$key)); # load iv
2237 &mov ($s1,&DWP(4,$key));
2238 &mov ($s2,&DWP(8,$key));
2239 &mov ($s3,&DWP(12,$key));
2240 &mov (&DWP(0,$acc),$s0); # copy back to user
2241 &mov (&DWP(4,$acc),$s1);
2242 &mov (&DWP(8,$acc),$s2);
2243 &mov (&DWP(12,$acc),$s3);
2248 &mov ($s0,&DWP(0,$acc)); # read input
2249 &mov ($s1,&DWP(4,$acc));
2250 &mov ($s2,&DWP(8,$acc));
2251 &mov ($s3,&DWP(12,$acc));
2254 &mov (&DWP(0,$key),$s0); # copy to temp
2255 &mov (&DWP(4,$key),$s1);
2256 &mov (&DWP(8,$key),$s2);
2257 &mov (&DWP(12,$key),$s3);
2264 &xor ($s0,&DWP(0,$key)); # xor iv
2265 &xor ($s1,&DWP(4,$key));
2266 &xor ($s2,&DWP(8,$key));
2267 &xor ($s3,&DWP(12,$key));
2269 &mov (&DWP(0,$acc),$s0); # write output
2270 &mov (&DWP(4,$acc),$s1);
2271 &mov (&DWP(8,$acc),$s2);
2272 &mov (&DWP(12,$acc),$s3);
2274 &lea ($acc,&DWP(16,$acc)); # advance out
2278 &mov ($s0,&DWP(0,$acc)); # read temp
2279 &mov ($s1,&DWP(4,$acc));
2280 &mov ($s2,&DWP(8,$acc));
2281 &mov ($s3,&DWP(12,$acc));
2283 &mov (&DWP(0,$key),$s0); # copy iv
2284 &mov (&DWP(4,$key),$s1);
2285 &mov (&DWP(8,$key),$s2);
2286 &mov (&DWP(12,$key),$s3);
2290 &lea ($acc,&DWP(16,$acc)); # advance inp
2314 &mov ($s0,&DWP(0,$s0)) if (!$x86only);# load OPENSSL_ia32cap
2318 &lea ($acc,&DWP(-80,"esp"));
2322 &lea ($s1,&DWP(-80-63,$key));
2329 &lea ($s1,&DWP(768,$acc));
2332 &lea ($tbl,&DWP(2048+128,$tbl,$s1));
2342 &mov ($s0,&DWP(0,$s3)); # load inp
2343 &mov ($s1,&DWP(4,$s3)); # load out
2344 #&mov ($s2,&DWP(8,$s3)); # load len
2345 #&mov ($key,&DWP(12,$s3)); # load key
2346 &mov ($acc,&DWP(16,$s3)); # load ivp
2347 &mov ($s3,&DWP(20,$s3)); # load enc flag
2387 &lea ($acc,&DWP(16,$acc)); # advance inp
2389 &lea ($s3,&DWP(16,$key)); # advance out
2407 &mov ($s0,&DWP(0,$key)); # load iv
2408 &mov ($s1,&DWP(4,$key));
2411 &mov ($s2,&DWP(8,$key));
2412 &mov ($s3,&DWP(12,$key));
2414 &xor ($s0,&DWP(0,$acc)); # xor input data
2415 &xor ($s1,&DWP(4,$acc));
2416 &xor ($s2,&DWP(8,$acc));
2417 &xor ($s3,&DWP(12,$acc));
2425 &mov (&DWP(0,$key),$s0); # save output data
2426 &mov (&DWP(4,$key),$s1);
2427 &mov (&DWP(8,$key),$s2);
2428 &mov (&DWP(12,$key),$s3);
2431 &lea ($acc,&DWP(16,$acc)); # advance inp
2433 &lea ($s3,&DWP(16,$key)); # advance out
2442 &mov ($s2,&DWP(8,$key)); # restore last dwords
2443 &mov ($s3,&DWP(12,$key));
2444 &mov (&DWP(0,$acc),$s0); # save ivec
2445 &mov (&DWP(4,$acc),$s1);
2446 &mov (&DWP(8,$acc),$s2);
2447 &mov (&DWP(12,$acc),$s3);
2465 &lea ($key,&DWP(0,$key,$s2));
2474 &mov ($s0,&DWP(0,$key));
2475 &mov ($s1,&DWP(4,$key));
2513 &lea ($s1,&DWP(16,$s1)); # advance out
2515 &lea ($acc,&DWP(16,$acc)); # advance inp
2542 &mov ($s0,&DWP(0,$acc)); # read input
2543 &mov ($s1,&DWP(4,$acc));
2544 &mov ($s2,&DWP(8,$acc));
2545 &mov ($s3,&DWP(12,$acc));
2548 &mov (&DWP(0,$key),$s0); # copy to temp
2549 &mov (&DWP(4,$key),$s1);
2550 &mov (&DWP(8,$key),$s2);
2551 &mov (&DWP(12,$key),$s3);
2558 &xor ($s0,&DWP(0,$key)); # xor iv
2559 &xor ($s1,&DWP(4,$key));
2560 &xor ($s2,&DWP(8,$key));
2561 &xor ($s3,&DWP(12,$key));
2569 &mov (&DWP(0,$acc),$s0); # write output
2570 &mov (&DWP(4,$acc),$s1);
2571 &mov (&DWP(8,$acc),$s2);
2572 &mov (&DWP(12,$acc),$s3);
2574 &lea ($acc,&DWP(16,$acc)); # advance out
2578 &mov ($s0,&DWP(0,$acc)); # read temp
2579 &mov ($s1,&DWP(4,$acc));
2580 &mov ($s2,&DWP(8,$acc));
2581 &mov ($s3,&DWP(12,$acc));
2583 &mov (&DWP(0,$key),$s0); # copy it to iv
2584 &mov (&DWP(4,$key),$s1);
2585 &mov (&DWP(8,$key),$s2);
2586 &mov (&DWP(12,$key),$s3);
2589 &lea ($acc,&DWP(16,$acc)); # advance inp
2599 &mov (&DWP(0,$acc),$s0); # save output to temp
2600 &mov (&DWP(4,$acc),$s1);
2601 &mov (&DWP(8,$acc),$s2);
2602 &mov (&DWP(12,$acc),$s3);
2605 &mov ($s0,&DWP(0,$acc)); # re-read input
2606 &mov ($s1,&DWP(4,$acc));
2607 &mov ($s2,&DWP(8,$acc));
2608 &mov ($s3,&DWP(12,$acc));
2610 &mov (&DWP(0,$key),$s0); # copy it to iv
2611 &mov (&DWP(4,$key),$s1);
2612 &mov (&DWP(8,$key),$s2);
2613 &mov (&DWP(12,$key),$s3);
2650 &xor ("eax",&DWP(1024-128,$tbl,"ecx",4)); # rcon
2665 &lea ($tbl,&DWP(&label("AES_Te")."-".&label("pic_point"),$tbl));
2666 &lea ($tbl,&DWP(2048+128,$tbl));
2669 &mov ("eax",&DWP(0-128,$tbl));
2670 &mov ("ebx",&DWP(32-128,$tbl));
2671 &mov ("ecx",&DWP(64-128,$tbl));
2672 &mov ("edx",&DWP(96-128,$tbl));
2673 &mov ("eax",&DWP(128-128,$tbl));
2674 &mov ("ebx",&DWP(160-128,$tbl));
2675 &mov ("ecx",&DWP(192-128,$tbl));
2676 &mov ("edx",&DWP(224-128,$tbl));
2689 &mov ("eax",&DWP(0,"esi")); # copy first 4 dwords
2690 &mov ("ebx",&DWP(4,"esi"));
2691 &mov ("ecx",&DWP(8,"esi"));
2692 &mov ("edx",&DWP(12,"esi"));
2693 &mov (&DWP(0,"edi"),"eax");
2694 &mov (&DWP(4,"edi"),"ebx");
2695 &mov (&DWP(8,"edi"),"ecx");
2696 &mov (&DWP(12,"edi"),"edx");
2703 &mov ("eax",&DWP(0,"edi")); # rk[0]
2704 &mov ("edx",&DWP(12,"edi")); # rk[3]
2708 &mov (&DWP(16,"edi"),"eax"); # rk[4]
2709 &xor ("eax",&DWP(4,"edi"));
2710 &mov (&DWP(20,"edi"),"eax"); # rk[5]
2711 &xor ("eax",&DWP(8,"edi"));
2712 &mov (&DWP(24,"edi"),"eax"); # rk[6]
2713 &xor ("eax",&DWP(12,"edi"));
2714 &mov (&DWP(28,"edi"),"eax"); # rk[7]
2720 &mov (&DWP(80,"edi"),10); # setup number of rounds
2725 &mov ("eax",&DWP(0,"esi")); # copy first 6 dwords
2726 &mov ("ebx",&DWP(4,"esi"));
2727 &mov ("ecx",&DWP(8,"esi"));
2728 &mov ("edx",&DWP(12,"esi"));
2729 &mov (&DWP(0,"edi"),"eax");
2730 &mov (&DWP(4,"edi"),"ebx");
2731 &mov (&DWP(8,"edi"),"ecx");
2732 &mov (&DWP(12,"edi"),"edx");
2733 &mov ("ecx",&DWP(16,"esi"));
2734 &mov ("edx",&DWP(20,"esi"));
2735 &mov (&DWP(16,"edi"),"ecx");
2736 &mov (&DWP(20,"edi"),"edx");
2743 &mov ("eax",&DWP(0,"edi")); # rk[0]
2744 &mov ("edx",&DWP(20,"edi")); # rk[5]
2748 &mov (&DWP(24,"edi"),"eax"); # rk[6]
2749 &xor ("eax",&DWP(4,"edi"));
2750 &mov (&DWP(28,"edi"),"eax"); # rk[7]
2751 &xor ("eax",&DWP(8,"edi"));
2752 &mov (&DWP(32,"edi"),"eax"); # rk[8]
2753 &xor ("eax",&DWP(12,"edi"));
2754 &mov (&DWP(36,"edi"),"eax"); # rk[9]
2760 &xor ("eax",&DWP(16,"edi"));
2761 &mov (&DWP(40,"edi"),"eax"); # rk[10]
2762 &xor ("eax",&DWP(20,"edi"));
2763 &mov (&DWP(44,"edi"),"eax"); # rk[11]
2769 &mov (&DWP(72,"edi"),12); # setup number of rounds
2774 &mov ("eax",&DWP(0,"esi")); # copy first 8 dwords
2775 &mov ("ebx",&DWP(4,"esi"));
2776 &mov ("ecx",&DWP(8,"esi"));
2777 &mov ("edx",&DWP(12,"esi"));
2778 &mov (&DWP(0,"edi"),"eax");
2779 &mov (&DWP
2780 &mov (&DWP(8,"edi"),"ecx");
2781 &mov (&DWP(12,"edi"),"edx");
2782 &mov ("eax",&DWP(16,"esi"));
2783 &mov ("ebx",&DWP(20,"esi"));
2784 &mov ("ecx",&DWP(24,"esi"));
2785 &mov ("edx",&DWP(28,"esi"));
2786 &mov (&DWP(16,"edi"),"eax");
2787 &mov (&DWP(20,"edi"),"ebx");
2788 &mov (&DWP(24,"edi"),"ecx");
2789 &mov (&DWP(28,"edi"),"edx");
2796 &mov ("edx",&DWP(28,"edi")); # rk[7]
2798 &mov ("eax",&DWP(0,"edi")); # rk[0]
2802 &mov (&DWP(32,"edi"),"eax"); # rk[8]
2803 &xor ("eax",&DWP(4,"edi"));
2804 &mov (&DWP(36,"edi"),"eax"); # rk[9]
2805 &xor ("eax",&DWP(8,"edi"));
2806 &mov (&DWP(40,"edi"),"eax"); # rk[10]
2807 &xor ("eax",&DWP(12,"edi"));
2808 &mov (&DWP(44,"edi"),"eax"); # rk[11]
2815 &mov ("eax",&DWP(16,"edi")); # rk[4]
2836 &mov (&DWP(48,"edi"),"eax"); # rk[12]
2837 &xor ("eax",&DWP(20,"edi"));
2838 &mov (&DWP(52,"edi"),"eax"); # rk[13]
2839 &xor ("eax",&DWP(24,"edi"));
2840 &mov (&DWP(56,"edi"),"eax"); # rk[14]
2841 &xor ("eax",&DWP(28,"edi"));
2842 &mov (&DWP(60,"edi"),"eax"); # rk[15]
2848 &mov (&DWP(48,"edi"),14); # setup number of rounds
2872 &lea ($tp2,&DWP(0,$tp1,$tp1));
2882 &lea ($tp4,&DWP(0,$tp2,$tp2));
2893 &lea ($tp8,&DWP(0,$tp4,$tp4));
2901 &mov ($tmp,&DWP(4*($i+1),$key)); # modulo-scheduled load
2916 &mov (&DWP(4*$i,$key),$tp1);
2934 &mov ("ecx",&DWP(240,"esi")); # pull number of rounds
2935 &lea ("ecx",&DWP(0,"","ecx",4));
2936 &lea ("edi",&DWP(0,"esi","ecx",4)); # pointer to last chunk
2939 &mov ("eax",&DWP(0,"esi"));
2940 &mov ("ebx",&DWP(4,"esi"));
2941 &mov ("ecx",&DWP(0,"edi"));
2942 &mov ("edx",&DWP(4,"edi"));
2943 &mov (&DWP(0,"edi"),"eax");
2944 &mov (&DWP(4,"edi"),"ebx");
2945 &mov (&DWP(0,"esi"),"ecx");
2946 &mov (&DWP(4,"esi"),"edx");
2947 &mov ("eax",&DWP(8,"esi"));
2948 &mov ("ebx",&DWP(12,"esi"));
2949 &mov ("ecx",&DWP(8,"edi"));
2950 &mov ("edx",&DWP(12,"edi"));
2951 &mov (&DWP(8,"edi"),"eax");
2952 &mov (&DWP(12,"edi"),"ebx");
2953 &mov (&DWP(8,"esi"),"ecx");
2954 &mov (&DWP(12,"esi"),"edx");
2961 &mov ($acc,&DWP(240,$key)); # pull number of rounds
2962 &lea ($acc,&DWP(-2,$acc,$acc));
2963 &lea ($acc,&DWP(0,$key,$acc,8));
2966 &mov ($s0,&DWP(16,$key)); # modulo-scheduled load