Home | History | Annotate | Download | only in asm

Lines Matching refs:QWP

101     &$movekey		($rndkey0,&QWP(0,$key));
102 &$movekey ($rndkey1,&QWP(16,$key));
110 &$movekey ($rndkey1,&QWP(0,$key));
120 &movups ($rndkey0,&QWP(0,$key));
121 &$movekey ($rndkey1,&QWP(0x10,$key));
123 &$movekey ($rndkey0,&QWP(0x20,$key));
131 &$movekey ($rndkey1,&QWP(-0x40,$key));
133 &$movekey ($rndkey0,&QWP(-0x30,$key));
136 &$movekey ($rndkey1,&QWP(-0x20,$key));
138 &$movekey ($rndkey0,&QWP(-0x10,$key));
141 &$movekey ($rndkey1,&QWP(0,$key));
143 &$movekey ($rndkey0,&QWP(0x10,$key));
145 &$movekey ($rndkey1,&QWP(0x20,$key));
147 &$movekey ($rndkey0,&QWP(0x30,$key));
149 &$movekey ($rndkey1,&QWP(0x40,$key));
151 &$movekey ($rndkey0,&QWP(0x50,$key));
153 &$movekey ($rndkey1,&QWP(0x60,$key));
155 &$movekey ($rndkey0,&QWP(0x70,$key));
168 &movups ($inout0,&QWP(0,"eax"));
175 &movups (&QWP(0,"eax"),$inout0);
184 &movups ($inout0,&QWP(0,"eax"));
191 &movups (&QWP(0,"eax"),$inout0);
212 &$movekey ($rndkey0,&QWP(0,$key));
214 &$movekey ($rndkey1,&QWP(16,$key));
219 &$movekey ($rndkey0,&QWP(0,$key));
226 &$movekey ($rndkey1,&QWP(16,$key));
231 &$movekey ($rndkey0,&QWP(0,$key));
251 &$movekey ($rndkey0,&QWP(0,$key));
252 &$movekey ($rndkey1,&QWP(16,$key));
259 &$movekey ($rndkey0,&QWP(0,$key));
267 &$movekey ($rndkey1,&QWP(16,$key));
273 &$movekey ($rndkey0,&QWP(0,$key));
293 &$movekey ($rndkey0,&QWP(0,$key));
295 &$movekey ($rndkey1,&QWP(16,$key));
309 &$movekey ($rndkey0,&QWP(0,$key));
322 &$movekey ($rndkey1,&QWP(16,$key));
330 &$movekey ($rndkey0,&QWP(0,$key));
378 &movdqu ($inout0,&QWP(0,$inp));
379 &movdqu ($inout1,&QWP(0x10,$inp));
380 &movdqu ($inout2,&QWP(0x20,$inp));
381 &movdqu ($inout3,&QWP(0x30,$inp));
382 &movdqu ($inout4,&QWP(0x40,$inp));
383 &movdqu ($inout5,&QWP(0x50,$inp));
389 &movups (&QWP(0,$out),$inout0);
390 &movdqu ($inout0,&QWP(0,$inp));
391 &movups (&QWP(0x10,$out),$inout1);
392 &movdqu ($inout1,&QWP(0x10,$inp));
393 &movups (&QWP(0x20,$out),$inout2);
394 &movdqu ($inout2,&QWP(0x20,$inp));
395 &movups (&QWP(0x30,$out),$inout3);
396 &movdqu ($inout3,&QWP(0x30,$inp));
397 &movups (&QWP(0x40,$out),$inout4);
398 &movdqu ($inout4,&QWP(0x40,$inp));
399 &movups (&QWP(0x50,$out),$inout5);
401 &movdqu ($inout5,&QWP(0x50,$inp));
412 &movups (&QWP(0,$out),$inout0);
413 &movups (&QWP(0x10,$out),$inout1);
414 &movups (&QWP(0x20,$out),$inout2);
415 &movups (&QWP(0x30,$out),$inout3);
416 &movups (&QWP(0x40,$out),$inout4);
417 &movups (&QWP(0x50,$out),$inout5);
423 &movups ($inout0,&QWP(0,$inp));
426 &movups ($inout1,&QWP(0x10,$inp));
428 &movups ($inout2,&QWP(0x20,$inp));
431 &movups ($inout3,&QWP(0x30,$inp));
433 &movups ($inout4,&QWP(0x40,$inp));
436 &movups (&QWP(0,$out),$inout0);
437 &movups (&QWP(0x10,$out),$inout1);
438 &movups (&QWP(0x20,$out),$inout2);
439 &movups (&QWP(0x30,$out),$inout3);
440 &movups (&QWP(0x40,$out),$inout4);
448 &movups (&QWP(0,$out),$inout0);
454 &movups (&QWP(0,$out),$inout0);
455 &movups (&QWP(0x10,$out),$inout1);
460 &movups (&QWP(0,$out),$inout0);
461 &movups (&QWP(0x10,$out),$inout1);
462 &movups (&QWP(0x20,$out),$inout2);
467 &movups (&QWP(0,$out),$inout0);
468 &movups (&QWP(0x10,$out),$inout1);
469 &movups (&QWP(0x20,$out),$inout2);
470 &movups (&QWP(0x30,$out),$inout3);
479 &movdqu ($inout0,&QWP(0,$inp));
480 &movdqu ($inout1,&QWP(0x10,$inp));
481 &movdqu ($inout2,&QWP(0x20,$inp));
482 &movdqu ($inout3,&QWP(0x30,$inp));
483 &movdqu ($inout4,&QWP(0x40,$inp));
484 &movdqu ($inout5,&QWP(0x50,$inp));
490 &movups (&QWP(0,$out),$inout0);
491 &movdqu ($inout0,&QWP(0,$inp));
492 &movups (&QWP(0x10,$out),$inout1);
493 &movdqu ($inout1,&QWP(0x10,$inp));
494 &movups (&QWP(0x20,$out),$inout2);
495 &movdqu ($inout2,&QWP(0x20,$inp));
496 &movups (&QWP(0x30,$out),$inout3);
497 &movdqu ($inout3,&QWP(0x30,$inp));
498 &movups (&QWP(0x40,$out),$inout4);
499 &movdqu ($inout4,&QWP(0x40,$inp));
500 &movups (&QWP(0x50,$out),$inout5);
502 &movdqu ($inout5,&QWP(0x50,$inp));
513 &movups (&QWP(0,$out),$inout0);
514 QWP(0x10,$out),$inout1);
515 &movups (&QWP(0x20,$out),$inout2);
516 &movups (&QWP(0x30,$out),$inout3);
517 &movups (&QWP(0x40,$out),$inout4);
518 &movups (&QWP(0x50,$out),$inout5);
524 &movups ($inout0,&QWP(0,$inp));
527 &movups ($inout1,&QWP(0x10,$inp));
529 &movups ($inout2,&QWP(0x20,$inp));
532 &movups ($inout3,&QWP(0x30,$inp));
534 &movups ($inout4,&QWP(0x40,$inp));
537 &movups (&QWP(0,$out),$inout0);
538 &movups (&QWP(0x10,$out),$inout1);
539 &movups (&QWP(0x20,$out),$inout2);
540 &movups (&QWP(0x30,$out),$inout3);
541 &movups (&QWP(0x40,$out),$inout4);
549 &movups (&QWP(0,$out),$inout0);
555 &movups (&QWP(0,$out),$inout0);
556 &movups (&QWP(0x10,$out),$inout1);
561 &movups (&QWP(0,$out),$inout0);
562 &movups (&QWP(0x10,$out),$inout1);
563 &movups (&QWP(0x20,$out),$inout2);
568 &movups (&QWP(0,$out),$inout0);
569 &movups (&QWP(0x10,$out),$inout1);
570 &movups (&QWP(0x20,$out),$inout2);
571 &movups (&QWP(0x30,$out),$inout3);
599 &movdqu ($ivec,&QWP(0,$rounds_)); # load ivec
600 &movdqu ($cmac,&QWP(0,$rounds)); # load cmac
619 &movdqa ($inout3,&QWP(0,"esp"));
625 &$movekey ($rndkey0,&QWP(0,$key_));
627 &movups ($in0,&QWP(0,$inp));
630 &$movekey ($rndkey1,&QWP(16,$key_));
634 &$movekey ($rndkey0,&QWP(0,$key));
640 &$movekey ($rndkey1,&QWP(16,$key));
644 &$movekey ($rndkey0,&QWP(0,$key));
648 &paddq ($ivec,&QWP(16,"esp"));
656 &movups (&QWP(0,$out),$in0); # save output
663 &movups (&QWP(0,$out),$cmac);
678 &movdqu ($ivec,&QWP(0,$rounds_)); # load ivec
679 &movdqu ($cmac,&QWP(0,$rounds)); # load cmac
696 &movdqa ($inout3,&QWP(0,"esp")); # bswap mask
707 &movups ($in0,&QWP(0,$inp)); # load inp
708 &paddq ($ivec,&QWP(16,"esp"));
709 &lea ($inp,&QWP(16,$inp));
716 &movups (&QWP(0,$out),$in0); # save output
723 &$movekey ($rndkey0,&QWP(0,$key_));
725 &$movekey ($rndkey1,&QWP(16,$key_));
730 &$movekey ($rndkey0,&QWP(0,$key));
736 &$movekey ($rndkey1,&QWP(16,$key));
740 &$movekey ($rndkey0,&QWP(0,$key));
742 &movups ($in0,&QWP(0,$inp)); # load inp
743 &paddq ($ivec,&QWP(16,"esp"));
746 &lea ($inp,&QWP(16,$inp));
760 &movups (&QWP(0,$out),$cmac);
795 &movdqu ($inout5,&QWP(0,$rounds_)); # load ivec
820 &movdqa ($inout0,&QWP(0,"esp")); # load byte-swap mask
832 &movdqa (&QWP(48,"esp"),$rndkey1); # save 1st triplet
834 &movdqa (&QWP(64,"esp"),$rndkey0); # save 2nd triplet
841 &movdqa (&QWP(32,"esp"),$inout5); # save counter-less ivec
850 &movdqa ($rndkey1,&QWP(32,"esp")); # pull counter-less ivec
862 &$movekey ($rndkey0,&QWP(0,$key_));
863 &$movekey ($rndkey1,&QWP(16,$key_));
877 &$movekey ($rndkey0,&QWP(0,$key));
882 &movups ($rndkey1,&QWP(0,$inp));
883 &movups ($rndkey0,&QWP(0x10,$inp));
885 &movups ($rndkey1,&QWP(0x20,$inp));
887 &movups (&QWP(0,$out),$inout0);
888 &movdqa ($rndkey0,&QWP(16,"esp")); # load increment
890 &movdqa ($rndkey1,&QWP(48,"esp")); # load 1st triplet
891 &movups (&QWP(0x10,$out),$inout1);
892 &movups (&QWP(0x20,$out),$inout2);
895 &paddd ($rndkey0,&QWP(64,"esp")); # 2nd triplet increment
896 &movdqa ($inout0,&QWP(0,"esp")); # load byte swap mask
898 &movups ($inout1,&QWP(0x30,$inp));
899 &movups ($inout2,&QWP(0x40,$inp));
901 &movups ($inout1,&QWP(0x50,$inp));
903 &movdqa (&QWP(48,"esp"),$rndkey1); # save 1st triplet
906 &movups (&QWP(0x30,$out),$inout3);
908 &movdqa (&QWP(64,"esp"),$rndkey0); # save 2nd triplet
910 &movups (&QWP(0x40,$out),$inout4);
912 &movups (&QWP(0x50,$out),$inout5);
924 &movdqa ($inout5,&QWP(32,"esp")); # pull count-less ivec
946 &movups ($rndkey1,&QWP(0,$inp));
947 &movups ($rndkey0,&QWP(0x10,$inp));
949 &movups ($rndkey1,&QWP(0x20,$inp));
951 &movups ($rndkey0,&QWP(0x30,$inp));
953 &movups ($rndkey1,&QWP(0x40,$inp));
955 &movups (&QWP(0,$out),$inout0);
957 &movups (&QWP(0x10,$out),$inout1);
958 &movups (&QWP(0x20,$out),$inout2);
959 &movups (&QWP(0x30,$out),$inout3);
960 &movups (&QWP(0x40,$out),$inout4);
964 &movups ($inout0,&QWP(0,$rounds_)); # load ivec
972 &movups ($in0,&QWP(0,$inp));
974 &movups (&QWP(0,$out),$in0);
979 &movups ($inout3,&QWP(0,$inp));
980 &movups ($inout4,&QWP(0x10,$inp));
983 &movups (&QWP(0,$out),$inout0);
984 &movups (&QWP(0x10,$out),$inout1);
989 &movups ($inout3,&QWP(0,$inp));
990 &movups ($inout4,&QWP(0x10,$inp));
992 &movups ($inout5,&QWP(0x20,$inp));
994 &movups (&QWP(0,$out),$inout0);
996 &movups (&QWP(0x10,$out),$inout1);
997 &movups (&QWP(0x20,$out),$inout2);
1002 &movups ($inout4,&QWP(0,$inp));
1003 &movups ($inout5,&QWP(0x10,$inp));
1004 &movups ($rndkey1,&QWP(0x20,$inp));
1006 &movups ($rndkey0,&QWP(0x30,$inp));
1008 &movups (&QWP(0,$out),$inout0);
1010 &movups (&QWP(0x10,$out),$inout1);
1012 &movups (&QWP(0x20,$out),$inout2);
1013 &movups (&QWP(0x30,$out),$inout3);
1032 &movups ($inout0,&QWP(0,$inp));
1057 &movdqa ($twmask,&QWP(6*16,"esp")); # 0x0...010...87
1074 &movdqa (&QWP(16*$i,"esp"),$tweak);
1081 &movdqa (&QWP(16*$i++,"esp"),$tweak);
1083 &$movekey ($rndkey0,&QWP(0,$key_));
1085 &movups ($inout0,&QWP(0,$inp)); # load input
1089 &movdqu ($inout1,&QWP(16*1,$inp));
1091 &movdqu ($inout2,&QWP(16*2,$inp));
1093 &movdqu ($inout3,&QWP(16*3,$inp));
1095 &movdqu ($inout4,&QWP(16*4,$inp));
1097 &movdqu ($rndkey1,&QWP(16*5,$inp));
1100 &pxor ($inout0,&QWP(16*0,"esp")); # input^=tweak
1101 &movdqa (&QWP(16*$i,"esp"),$inout5); # save last tweak
1104 &$movekey ($rndkey1,&QWP(16,$key_));
1106 &pxor ($inout1,&QWP(16*1,"esp"));
1108 &pxor ($inout2,&QWP(16*2,"esp"));
1110 &pxor ($inout3,&QWP(16*3,"esp"));
1113 &pxor ($inout4,&QWP(16*4,"esp"));
1117 &$movekey ($rndkey0,&QWP(0,$key));
1121 &movdqa ($tweak,&QWP(16*5,"esp")); # last tweak
1123 &xorps ($inout0,&QWP(16*0,"esp")); # output^=tweak
1125 &xorps ($inout1,&QWP(16*1,"esp"));
1126 &movups (&QWP(16*0,$out),$inout0); # write output
1127 &xorps ($inout2,&QWP(16*2,"esp"));
1128 &movups (&QWP(16*1,$out),$inout1);
1129 &xorps ($inout3,&QWP(16*3,"esp"));
1130 &movups (&QWP(16*2,$out),$inout2);
1131 &xorps ($inout4,&QWP(16*4,"esp"));
1132 &movups (&QWP(16*3,$out),$inout3);
1134 &movups (&QWP(16*4,$out),$inout4);
1136 &movups (&QWP(16*5,$out),$inout5);
1138 &movdqa ($twmask,&QWP(16*6,"esp")); # 0x0...010...87
1187 &movdqa (&QWP(16*0,"esp"),$inout3);
1188 &movdqa (&QWP(16*1,"esp"),$inout4);
1191 &movdqa (&QWP(16*2,"esp"),$inout5);
1193 &movdqa (&QWP(16*3,"esp"),$tweak);
1198 &movdqu ($inout0,&QWP(16*0,$inp)); # load input
1199 &movdqu ($inout1,&QWP(16*1,$inp));
1200 &movdqu ($inout2,&QWP(16*2,$inp));
1201 &pxor ($inout0,&QWP(16*0,"esp")); # input^=tweak
1202 &movdqu ($inout3,&QWP(16*3,$inp));
1203 &pxor ($inout1,&QWP(16*1,"esp"));
1204 &movdqu ($inout4,&QWP(16*4,$inp));
1205 &pxor ($inout2,&QWP(16*2,"esp"));
1207 &pxor ($inout3,&QWP(16*3,"esp"));
1208 &movdqa (&QWP(16*4,"esp"),$inout5); # save last tweak
1213 &movaps ($tweak,&QWP(16*4,"esp")); # last tweak
1214 &xorps ($inout0,&QWP(16*0,"esp")); # output^=tweak
1215 &xorps ($inout1,&QWP(16*1,"esp"));
1216 &xorps ($inout2,&QWP(16*2,"esp"));
1217 &movups (&QWP(16*0,$out),$inout0); # write output
1218 &xorps ($inout3,&QWP(16*3,"esp"));
1219 &movups (&QWP(16*1,$out),$inout1);
1221 &movups (&QWP(16*2,$out),$inout2);
1222 &movups (&QWP(16*3,$out),$inout3);
1223 &movups (&QWP(16*4,$out),$inout4);
1228 &movups ($inout0,&QWP(16*0,$inp)); # load input
1236 &movups (&QWP(16*0,$out),$inout0); # write output
1245 &movups ($inout0,&QWP(16*0,$inp)); # load input
1246 &movups ($inout1,&QWP(16*1,$inp));
1256 &movups (&QWP(16*0,$out),$inout0); # write output
1257 &movups (&QWP(16*1,$out),$inout1);
1265 &movups ($inout0,&QWP(16*0,$inp)); # load input
1266 &movups ($inout1,&QWP(16*1,$inp));
1267 &movups ($inout2,&QWP(16*2,$inp));
1278 &movups (&QWP(16*0,$out),$inout0); # write output
1279 &movups (&QWP(16*1,$out),$inout1);
1280 &movups (&QWP(16*2,$out),$inout2);
1289 &movups ($inout0,&QWP(16*0,$inp)); # load input
1290 &movups ($inout1,&QWP(16*1,$inp));
1291 &movups ($inout2,&QWP(16*2,$inp));
1292 &xorps ($inout0,&QWP(16*0,"esp")); # input^=tweak
1293 &movups ($inout3,&QWP(16*3,$inp));
1295 &xorps ($inout1,&QWP(16*1,"esp"));
1301 &xorps ($inout0,&QWP(16*0,"esp")); # output^=tweak
1302 &xorps ($inout1,&QWP(16*1,"esp"));
1304 &movups (&QWP(16*0,$out),$inout0); # write output
1306 &movups (&QWP(16*1,$out),$inout1);
1307 &movups (&QWP(16*2,$out),$inout2);
1308 &movups (&QWP(16*3,$out),$inout3);
1332 &pand ($inout3,&QWP(16*6,"esp")); # isolate carry and residue
1349 &movups ($inout0,&QWP(-16,$out)); # load input
1356 &movups (&QWP(-16,$out),$inout0); # write output
1367 &movups ($inout0,&QWP(0,$inp));
1401 &movdqa ($twmask,&QWP(6*16,"esp")); # 0x0...010...87
1416 &movdqa (&QWP(16*$i,"esp"),$tweak);
1423 &movdqa (&QWP(16*$i++,"esp"),$tweak);
1425 &$movekey ($rndkey0,&QWP(0,$key_));
1427 &movups ($inout0,&QWP(0,$inp)); # load input
1431 &movdqu ($inout1,&QWP(16*1,$inp));
1433 &movdqu ($inout2,&QWP(16*2,$inp));
1435 &movdqu ($inout3,&QWP(16*3,$inp));
1437 &movdqu ($inout4,&QWP(16*4,$inp));
1439 &movdqu ($rndkey1,&QWP(16*5,$inp));
1442 &pxor ($inout0,&QWP(16*0,"esp")); # input^=tweak
1443 &movdqa (&QWP(16*$i,"esp"),$inout5); # save last tweak
1446 &$movekey ($rndkey1,&QWP(16,$key_));
1448 &pxor ($inout1,&QWP(16*1,"esp"));
1450 &pxor ($inout2,&QWP(16*2,"esp"));
1452 &pxor ($inout3,&QWP(16*3,"esp"));
1455 &pxor ($inout4,&QWP(16*4,"esp"));
1459 &$movekey ($rndkey0,&QWP(0,$key));
1463 &movdqa ($tweak,&QWP(16*5,"esp")); # last tweak
1465 &xorps ($inout0,&QWP(16*0,"esp")); # output^=tweak
1467 &xorps ($inout1,&QWP(16*1,"esp"));
1468 &movups (&QWP(16*0,$out),$inout0); # write output
1469 &xorps ($inout2,&QWP(16*2,"esp"));
1470 &movups (&QWP(16*1,$out),$inout1);
1471 &xorps ($inout3,&QWP(16*3,"esp"));
1472 &movups (&QWP(16*2,$out),$inout2);
1473 &xorps ($inout4,&QWP(16*4,"esp"));
1474 &movups (&QWP(16*3,$out),$inout3);
1476 &movups (&QWP(16*4,$out),$inout4);
1478 &movups (&QWP(16*5,$out),$inout5);
1480 &movdqa ($twmask,&QWP(16*6,"esp")); # 0x0...010...87
1529 &movdqa (&QWP(16*0,"esp"),$inout3);
1530 &movdqa (&QWP(16*1,"esp"),$inout4);
1533 &movdqa (&QWP(16*2,"esp"),$inout5);
1535 &movdqa (&QWP(16*3,"esp"),$tweak);
1540 &movdqu ($inout0,&QWP(16*0,$inp)); # load input
1541 &movdqu ($inout1,&QWP(16*1,$inp));
1542 &movdqu ($inout2,&QWP(16*2,$inp));
1543 &pxor ($inout0,&QWP(16*0,"esp")); # input^=tweak
1544 &movdqu ($inout3,&QWP(16*3,$inp));
1545 &pxor ($inout1,&QWP(16*1,"esp"));
1546 &movdqu ($inout4,&QWP(16*4,$inp));
1547 &pxor ($inout2,&QWP(16*2,"esp"));
1549 &pxor ($inout3,&QWP(16*3,"esp"));
1550 &movdqa (&QWP(16*4,"esp"),$inout5); # save last tweak
1555 &movaps ($tweak,&QWP(16*4,"esp")); # last tweak
1556 &xorps ($inout0,&QWP(16*0,"esp")); # output^=tweak
1557 &xorps ($inout1,&QWP(16*1,"esp"));
1558 &xorps ($inout2,&QWP(16*2,"esp"));
1559 &movups (&QWP(16*0,$out),$inout0); # write output
1560 &xorps ($inout3,&QWP(16*3,"esp"));
1561 &movups (&QWP(16*1,$out),$inout1);
1563 &movups (&QWP(16*2,$out),$inout2);
1564 &movups (&QWP(16*3,$out),$inout3);
1565 &movups (&QWP(16*4,$out),$inout4);
1570 &movups ($inout0,&QWP(16*0,$inp)); # load input
1578 &movups (&QWP(16*0,$out),$inout0); # write output
1587 &movups ($inout0,&QWP(16*0,$inp)); # load input
1588 &movups ($inout1,&QWP(16*1,$inp));
1597 &movups (&QWP(16*0,$out),$inout0); # write output
1598 &movups (&QWP(16*1,$out),$inout1);
1606 &movups ($inout0,&QWP(16*0,$inp)); # load input
1607 &movups ($inout1,&QWP(16*1,$inp));
1608 &movups ($inout2,&QWP(16*2,$inp));
1619 &movups (&QWP(16*0,$out),$inout0); # write output
1620 &movups (&QWP(16*1,$out),$inout1);
1621 &movups (&QWP(16*2,$out),$inout2);
1630 &movups ($inout0,&QWP(16*0,$inp)); # load input
1631 &movups ($inout1,&QWP(16*1,$inp));
1632 &movups ($inout2,&QWP(16*2,$inp));
1633 &xorps ($inout0,&QWP(16*0,"esp")); # input^=tweak
1634 &movups ($inout3,&QWP(16*3,$inp));
1636 &xorps ($inout1,&QWP(16*1,"esp"));
1642 &xorps ($inout0,&QWP(16*0,"esp")); # output^=tweak
1643 &xorps ($inout1,&QWP(16*1,"esp"));
1645 &movups (&QWP(16*0,$out),$inout0); # write output
1647 &movups (&QWP(16*1,$out),$inout1);
1648 &movups (&QWP(16*2,$out),$inout2);
1649 &movups (&QWP(16*3,$out),$inout3);
1672 &movdqa ($twmask,&QWP(16*6,"esp"));
1688 &movups ($inout0,&QWP(0,$inp)); # load input
1695 &movups (&QWP(0,$out),$inout0); # write output
1711 &movups ($inout0,&QWP(0,$out)); # load input
1718 &movups (&QWP(0,$out),$inout0); # write output
1745 &movups ($ivec,&QWP(0,$key_)); # load IV
1759 &movups ($ivec,&QWP(0,$inp)); # input actually
1767 &movups (&QWP(0,$out),$inout0); # store output
1792 &movaps (&QWP(0,"esp"),$ivec); # save IV
1797 &movaps (&QWP(0,"esp"),$rndkey0); # save IV
1798 &movups (&QWP(0,$out),$inout5);
1801 &movdqu ($inout0,&QWP(0,$inp));
1802 &movdqu ($inout1,&QWP(0x10,$inp));
1803 &movdqu ($inout2,&QWP(0x20,$inp));
1804 &movdqu ($inout3,&QWP(0x30,$inp));
1805 &movdqu ($inout4,&QWP(0x40,$inp));
1806 &movdqu ($inout5,&QWP(0x50,$inp));
1810 &movups ($rndkey1,&QWP(0,$inp));
1811 &movups ($rndkey0,&QWP(0x10,$inp));
1812 &xorps ($inout0,&QWP(0,"esp")); # ^=IV
1814 &movups ($rndkey1,&QWP(0x20,$inp));
1816 &movups ($rndkey0,&QWP(0x30,$inp));
1818 &movups ($rndkey1,&QWP(0x40,$inp));
1820 &movups ($rndkey0,&QWP(0x50,$inp)); # IV
1822 &movups (&QWP(0,$out),$inout0);
1823 &movups (&QWP(0x10,$out),$inout1);
1825 &movups (&QWP(0x20,$out),$inout2);
1827 &movups (&QWP(0x30,$out),$inout3);
1829 &movups (&QWP(0x40,$out),$inout4);
1838 &movups (&QWP(0,$out),$inout0);
1841 &movups ($inout0,&QWP(0,$inp));
1846 &movups ($inout1,&QWP(0x10,$inp));
1851 &movups ($inout2,&QWP(0x20,$inp));
1855 &movups ($inout3,&QWP(0x30,$inp));
1859 &movups ($inout4,&QWP(0x40,$inp));
1860 &movaps (&QWP(0,"esp"),$ivec); # save IV
1861 &movups ($inout0,&QWP(0,$inp));
1864 &movups ($rndkey1,&QWP(0,$inp));
1865 &movups ($rndkey0,&QWP(0x10,$inp));
1866 &xorps ($inout0,&QWP(0,"esp")); # ^= IV
1868 &movups ($rndkey1,&QWP(0x20,$inp));
1870 &movups ($rndkey0,&QWP(0x30,$inp));
1872 &movups ($ivec,&QWP(0x40,$inp)); # IV
1874 &movups (&QWP(0,$out),$inout0);
1875 &movups (&QWP(0x10,$out),$inout1);
1876 &movups (&QWP(0x20,$out),$inout2);
1877 &movups (&QWP(0x30,$out),$inout3);
1898 &movups (&QWP(0,$out),$inout0);
1910 &movups (&QWP(0,$out),$inout0);
1912 &movups (&QWP(0x10,$out),$inout1);
1914 &movups ($ivec,&QWP(0x20,$inp));
1920 &movups ($rndkey1,&QWP(0x10,$inp));
1921 &movups ($rndkey0,&QWP(0x20,$inp));
1923 &movups ($ivec,&QWP(0x30,$inp));
1925 &movups (&QWP(0,$out),$inout0);
1927 &movups (&QWP(0x10,$out),$inout1);
1929 &movups (&QWP(0x20,$out),$inout2);
1937 &movups (&QWP(0,$out),$inout0);
1941 &movaps (&QWP(0,"esp"),$inout0);
1950 &movups (&QWP(0,$key_),$ivec); # output IV
1973 &movups ("xmm0",&QWP(0,"eax")); # pull first 128 bits of *userKey
1985 &$movekey (&QWP(-16,$key),"xmm0"); # round 0
2006 &$movekey (&QWP(0,$key),"xmm0");
2012 &$movekey (&QWP(0,$key),"xmm0");
2024 &movq ("xmm2",&QWP(16,"eax")); # remaining 1/3 of *userKey
2026 &$movekey (&QWP(-16,$key),"xmm0") # round 0
2043 &$movekey (&QWP(0,$key),"xmm0");
2049 &$movekey (&QWP(0,$key),"xmm0");
2070 &$movekey (&QWP(0,$key),"xmm5");
2072 &$movekey (&QWP(16,$key),"xmm3");
2077 &movups ("xmm2",&QWP(16,"eax")); # remaining half of *userKey
2080 &$movekey (&QWP(-32,$key),"xmm0"); # round 0
2081 &$movekey (&QWP(-16,$key),"xmm2"); # round 1
2108 &$movekey (&QWP(0,$key),"xmm0");
2114 &$movekey (&QWP(0,$key),"xmm2");
2126 &$movekey (&QWP(0,$key),"xmm0");
2168 &$movekey ("xmm0",&QWP(0,$key)); # just swap
2169 &$movekey ("xmm1",&QWP(0,"eax"));
2170 &$movekey (&QWP(0,"eax"),"xmm0");
2171 &$movekey (&QWP(0,$key),"xmm1");
2176 &$movekey ("xmm0",&QWP(0,$key)); # swap and inverse
2177 &$movekey ("xmm1",&QWP(0,"eax"));
2182 &$movekey (&QWP(16,"eax"),"xmm0");
2183 &$movekey (&QWP(-16,$key),"xmm1");
2187 &$movekey ("xmm0",&QWP(0,$key)); # inverse middle
2189 &$movekey (&QWP(0,$key),"xmm0");