Lines Matching refs:A0
702 my @A0=("%r10","%r11");
704 my ($a0,$a1,$ai)=("%r14","%r15","%rbx");
754 mov -32($aptr,$i),$a0 # a[0]
761 mul $a0 # a[1]*a[0]
762 mov %rax,$A0[0] # a[1]*a[0]
764 mov %rdx,$A0[1]
765 mov $A0[0],-24($tptr,$i) # t[1]
767 xor $A0[0],$A0[0]
768 mul $a0 # a[2]*a[0]
769 add %rax,$A0[1]
771 adc %rdx,$A0[0]
772 mov $A0[1],-16($tptr,$i) # t[2]
783 xor $A0[1],$A0[1]
784 add $A1[0],$A0[0]
786 adc \$0,$A0[1]
787 mul $a0 # a[3]*a[0]
788 add %rax,$A0[0] # a[3]*a[0]+a[2]*a[1]+t[3]
790 adc %rdx,$A0[1]
791 mov $A0[0],-8($tptr,$j) # t[3]
803 xor $A0[0],$A0[0]
804 add $A1[1],$A0[1]
805 adc \$0,$A0[0]
806 mul $a0
807 add %rax,$A0[1] # a[4]*a[0]+a[3]*a[1]+t[4]
809 adc %rdx,$A0[0]
810 mov $A0[1],($tptr,$j) # t[4]
820 xor $A0[1],$A0[1]
821 add $A1[0],$A0[0]
822 adc \$0,$A0[1]
823 mul $a0 # a[5]*a[2]
824 add %rax,$A0[0] # a[5]*a[2]+a[4]*a[3]+t[5]
826 adc %rdx,$A0[1]
827 mov $A0[0],8($tptr,$j) # t[5]
836 xor $A0[0],$A0[0]
837 add $A1[1],$A0[1]
838 adc \$0,$A0[0]
839 mul $a0 # a[6]*a[2]
840 add %rax,$A0[1] # a[6]*a[2]+a[5]*a[3]+t[6]
842 adc %rdx,$A0[0]
843 mov $A0[1],16($tptr,$j) # t[6]
853 xor $A0[1],$A0[1]
854 add $A1[0],$A0[0]
856 adc \$0,$A0[1]
857 mul $a0 # a[7]*a[4]
858 add %rax,$A0[0] # a[7]*a[4]+a[6]*a[5]+t[6]
860 adc %rdx,$A0[1]
861 mov $A0[0],-8($tptr,$j) # t[7]
867 add $A0[1],$A1[1]
880 mov -32($aptr,$i),$a0 # a[0]
887 mov -24($tptr,$i),$A0[0] # t[1]
888 xor $A0[1],$A0[1]
889 mul $a0 # a[1]*a[0]
890 add %rax,$A0[0] # a[1]*a[0]+t[1]
892 adc %rdx,$A0[1]
893 mov $A0[0],-24($tptr,$i) # t[1]
895 xor $A0[0],$A0[0]
896 add -16($tptr,$i),$A0[1] # a[2]*a[0]+t[2]
897 adc \$0,$A0[0]
898 mul $a0 # a[2]*a[0]
899 add %rax,$A0[1]
901 adc %rdx,$A0[0]
902 mov $A0[1],-16($tptr,$i) # t[2]
917 xor $A0[1],$A0[1]
918 add $A1[0],$A0[0]
919 adc \$0,$A0[1]
920 mul $a0 # a[3]*a[0]
921 add %rax,$A0[0] # a[3]*a[0]+a[2]*a[1]+t[3]
923 adc %rdx,$A0[1]
924 mov $A0[0],8($tptr,$j) # t[3]
940 xor $A0[0],$A0[0]
941 add $A1[1],$A0[1]
942 adc \$0,$A0[0]
943 mul $a0 # a[4]*a[0]
944 add %rax,$A0[1] # a[4]*a[0]+a[3]*a[1]+t[4]
946 adc %rdx,$A0[0]
947 mov $A0[1],($tptr,$j) # t[4]
958 xor $A0[1],$A0[1]
959 add $A1[0],$A0[0]
961 adc \$0,$A0[1]
962 mul $a0 # a[5]*a[2]
963 add %rax,$A0[0] # a[5]*a[2]+a[4]*a[3]+t[5]
965 adc %rdx,$A0[1]
966 mov $A0[0],-8($tptr,$j) # t[5], "preloaded t[1]" below
972 add $A0[1],$A1[1]
985 mov -32($aptr),$a0 # a[0]
992 xor $A0[1],$A0[1]
993 mul $a0 # a[1]*a[0]
994 add %rax,$A0[0] # a[1]*a[0]+t[1], preloaded t[1]
996 adc %rdx,$A0[1]
997 mov $A0[0],-24($tptr) # t[1]
999 xor $A0[0],$A0[0]
1000 add $A1[1],$A0[1] # a[2]*a[0]+t[2], preloaded t[2]
1001 adc \$0,$A0[0]
1002 mul $a0 # a[2]*a[0]
1003 add %rax,$A0[1]
1005 adc %rdx,$A0[0]
1006 mov $A0[1],-16($tptr) # t[2]
1014 xor $A0[1],$A0[1]
1015 add $A1[0],$A0[0]
1017 adc \$0,$A0[1]
1018 mul $a0 # a[3]*a[0]
1019 add %rax,$A0[0] # a[3]*a[0]+a[2]*a[1]+t[3]
1021 adc %rdx,$A0[1]
1022 mov $A0[0],-8($tptr) # t[3]
1025 add $A0[1],$A1[1]
1038 my ($shift,$carry)=($a0,$a1);
1054 xor $A0[0],$A0[0] # t[0]
1055 mov -24($tptr,$i,2),$A0[1] # t[1]
1057 lea ($shift,$A0[0],2),$S[0] # t[2*i]<<1 | shift
1058 shr \$63,$A0[0]
1059 lea ($j,$A0[1],2),$S[1] # t[2*i+1]<<1 |
1060 shr \$63,$A0[1]
1061 or $A0[0],$S[1] # | t[2*i]>>63
1062 mov -16($tptr,$i,2),$A0[0] # t[2*i+2] # prefetch
1063 mov $A0[1],$shift # shift=t[2*i+1]>>63
1066 mov -8($tptr,$i,2),$A0[1] # t[2*i+2+1] # prefetch
1072 lea ($shift,$A0[0],2),$S[2] # t[2*i]<<1 | shift
1075 shr \$63,$A0[0]
1076 lea ($j,$A0[1],2),$S[3] # t[2*i+1]<<1 |
1077 shr \$63,$A0[1]
1078 or $A0[0],$S[3] # | t[2*i]>>63
1079 mov 0($tptr,$i,2),$A0[0] # t[2*i+2] # prefetch
1080 mov $A0[1],$shift # shift=t[2*i+1]>>63
1083 mov 8($tptr,$i,2),$A0[1] # t[2*i+2+1] # prefetch
1095 lea ($shift,$A0[0],2),$S[0] # t[2*i]<<1 | shift
1096 shr \$63,$A0[0]
1097 lea ($j,$A0[1],2),$S[1] # t[2*i+1]<<1 |
1098 shr \$63,$A0[1]
1099 or $A0[0],$S[1] # | t[2*i]>>63
1100 mov -16($tptr,$i,2),$A0[0] # t[2*i+2] # prefetch
1101 mov $A0[1],$shift # shift=t[2*i+1]>>63
1104 mov -8($tptr,$i,2),$A0[1] # t[2*i+2+1] # prefetch
1110 lea ($shift,$A0[0],2),$S[2] # t[2*i]<<1 | shift
1113 shr \$63,$A0[0]
1114 lea ($j,$A0[1],2),$S[3] # t[2*i+1]<<1 |
1115 shr \$63,$A0[1]
1116 or $A0[0],$S[3] # | t[2*i]>>63
1117 mov 0($tptr,$i,2),$A0[0] # t[2*i+2] # prefetch
1118 mov $A0[1],$shift # shift=t[2*i+1]>>63
1121 mov 8($tptr,$i,2),$A0[1] # t[2*i+2+1] # prefetch
1127 lea ($shift,$A0[0],2),$S[0] # t[2*i]<<1 | shift
1130 shr \$63,$A0[0]
1131 lea ($j,$A0[1],2),$S[1] # t[2*i+1]<<1 |
1132 shr \$63,$A0[1]
1133 or $A0[0],$S[1] # | t[2*i]>>63
1134 mov 16($tptr,$i,2),$A0[0] # t[2*i+2] # prefetch
1135 mov $A0[1],$shift # shift=t[2*i+1]>>63
1138 mov 24($tptr,$i,2),$A0[1] # t[2*i+2+1] # prefetch
1144 lea ($shift,$A0[0],2),$S[2] # t[2*i]<<1 | shift
1147 shr \$63,$A0[0]
1148 lea ($j,$A0[1],2),$S[3] # t[2*i+1]<<1 |
1149 shr \$63,$A0[1]
1150 or $A0[0],$S[3] # | t[2*i]>>63
1151 mov 32($tptr,$i,2),$A0[0] # t[2*i+2] # prefetch
1152 mov $A0[1],$shift # shift=t[2*i+1]>>63
1155 mov 40($tptr,$i,2),$A0[1] # t[2*i+2+1] # prefetch
1165 lea ($shift,$A0[0],2),$S[0] # t[2*i]<<1 | shift
1166 shr \$63,$A0[0]
1167 lea ($j,$A0[1],2),$S[1] # t[2*i+1]<<1 |
1168 shr \$63,$A0[1]
1169 or $A0[0],$S[1] # | t[2*i]>>63
1170 mov -16($tptr),$A0[0] # t[2*i+2] # prefetch
1171 mov $A0[1],$shift # shift=t[2*i+1]>>63
1174 mov -8($tptr),$A0[1] # t[2*i+2+1] # prefetch
1180 lea ($shift,$A0[0],2),$S[2] # t[2*i]<<1|shift
1183 shr \$63,$A0[0]
1184 lea ($j,$A0[1],2),$S[3] # t[2*i+1]<<1 |
1185 shr \$63,$A0[1]
1186 or $A0[0],$S[3] # | t[2*i]>>63
1201 my ($m0,$m1)=($a0,$a1);
1209 mov 64(%rsp),$A0[0] # t[0] # modsched #
1219 imulq $A0[0],$m0 # m0=t[0]*n0 # modsched #
1225 xor $A0[1],$A0[1]
1227 add %rax,$A0[0] # n[0]*m0+t[0]
1229 adc %rdx,$A0[1]
1232 xor $A0[0],$A0[0]
1233 add 8($tptr,$j),$A0[1]
1234 adc \$0,$A0[0]
1236 add %rax,$A0[1] # n[1]*m0+t[1]
1238 adc %rdx,$A0[0]
1240 imulq $A0[1],$m1
1244 add $A0[1],$A1[0]
1252 xor $A0[1],$A0[1]
1253 add 16($tptr,$j),$A0[0]
1254 adc \$0,$A0[1]
1256 add %rax,$A0[0] # n[2]*m0+t[2]
1258 adc %rdx,$A0[1]
1262 add $A0[0],$A1[1]
1270 xor $A0[0],$A0[0]
1271 add 24($tptr,$j),$A0[1]
1273 adc \$0,$A0[0]
1275 add %rax,$A0[1] # n[3]*m0+t[3]
1277 adc %rdx,$A0[0]
1284 add $A0[1],$A1[0]
1292 xor $A0[1],$A0[1]
1293 add ($tptr,$j),$A0[0]
1294 adc \$0,$A0[1]
1296 add %rax,$A0[0] # n[4]*m0+t[4]
1298 adc %rdx,$A0[1]
1302 add $A0[0],$A1[1]
1310 xor $A0[0],$A0[0]
1311 add 8($tptr,$j),$A0[1]
1312 adc \$0,$A0[0]
1314 add %rax,$A0[1] # n[5]*m0+t[5]
1316 adc %rdx,$A0[0]
1321 add $A0[1],$A1[0]
1329 xor $A0[1],$A0[1]
1330 add 16($tptr,$j),$A0[0]
1331 adc \$0,$A0[1]
1333 add %rax,$A0[0] # n[6]*m0+t[6]
1335 adc %rdx,$A0[1]
1339 add $A0[0],$A1[1]
1347 xor $A0[0],$A0[0]
1348 add 24($tptr,$j),$A0[1]
1350 adc \$0,$A0[0]
1352 add %rax,$A0[1] # n[7]*m0+t[7]
1354 adc %rdx,$A0[0]
1362 add $A0[1],$A1[0]
1370 xor $A0[1],$A0[1]
1371 add ($tptr),$A0[0] # +t[8]
1372 adc \$0,$A0[1]
1374 add $topbit,$A0[0]
1375 adc \$0,$A0[1]
1380 add $A0[0],$A1[1]
1381 mov 16($tptr,$j),$A0[0] # t[0] # modsched #
1392 add $A0[1],$A1[0]