Home | History | Annotate | Download | only in x86

Lines Matching refs:nt

127 * @param[in] nt
145 WORD32 nt,
157 switch(nt)
171 two_nt = 2 * nt;
172 three_nt = 3 * nt;
181 // pu1_ref[2 * (nt - 1)]
187 const_temp1_4x32b = _mm_set_epi16(pu1_ref[2 * (nt - 1) + 1], pu1_ref[2 * (nt - 1)], pu1_ref[2 * (nt - 1) + 1], pu1_ref[2 * (nt - 1)],
188 pu1_ref[2 * (nt - 1) + 1], pu1_ref[2 * (nt - 1)], pu1_ref[2 * (nt - 1) + 1], pu1_ref[2 * (nt - 1)]);
190 const_temp4_4x32b = _mm_set1_epi16(nt - 1);
191 const_temp6_4x32b = _mm_set1_epi16(nt);
197 if(nt % 4 == 0)
201 for(row = 0; row < nt; row++)
211 row_8x16b = _mm_set1_epi16((nt - 1 - row));
218 /*(row + 1) * pu1_ref[nt - 1]*/
221 /*(row + 1) * pu1_ref[nt - 1] + nt)*/
224 for(col = 0; col < 2 * nt; col += 8)
234 /* (nt - 1 - row) * pu1_ref[two_nt + 1 + col] */
240 /*(nt - 1 - col)* pu1_ref[two_nt - 1 - row]*/
283 * @param[in] nt
301 WORD32 nt,
315 switch(nt)
340 if(nt == 16)
344 src_temp3 = _mm_loadu_si128((__m128i *)(pu1_ref + (2 * nt)));
345 src_temp4 = _mm_loadu_si128((__m128i *)(pu1_ref + (2 * nt) + 16));
346 src_temp7 = _mm_loadu_si128((__m128i *)(pu1_ref + (2 * nt) + 32));
347 src_temp8 = _mm_loadu_si128((__m128i *)(pu1_ref + (2 * nt) + 48));
385 else if(nt == 8)
388 src_temp3 = _mm_loadu_si128((__m128i *)(pu1_ref + (2 * nt)));
389 src_temp4 = _mm_loadu_si128((__m128i *)(pu1_ref + (2 * nt) + 16));
416 else if(nt == 4)
419 src_temp3 = _mm_loadu_si128((__m128i *)(pu1_ref + (2 * nt)));
441 acc_dc_u += pu1_ref[6 * nt];
442 acc_dc_v += pu1_ref[6 * nt + 1];
444 acc_dc_u -= pu1_ref[4 * nt];
445 acc_dc_v -= pu1_ref[4 * nt + 1];
447 dc_val_u = (acc_dc_u + nt) >> (log2nt + 1);
448 dc_val_v = (acc_dc_v + nt) >> (log2nt + 1);
454 if(nt == 4)
465 else if(nt == 8)
482 else /* nt == 16 */
486 for(row = 0; row < nt; row += 8)
537 * @param[in] nt
555 WORD32 nt,
566 if(nt == 8)
568 for(row = 0; row < nt; row += 4)
570 temp1 = _mm_set1_epi8(pu1_ref[(4 * nt) - 2 - 2 * (row + 0)]);
571 temp2 = _mm_set1_epi8(pu1_ref[(4 * nt) - 1 - 2 * (row + 0)]);
572 temp3 = _mm_set1_epi8(pu1_ref[(4 * nt) - 2 - 2 * (row + 1)]);
573 temp4 = _mm_set1_epi8(pu1_ref[(4 * nt) - 1 - 2 * (row + 1)]);
574 temp5 = _mm_set1_epi8(pu1_ref[(4 * nt) - 2 - 2 * (row + 2)]);
575 temp6 = _mm_set1_epi8(pu1_ref[(4 * nt) - 1 - 2 * (row + 2)]);
576 temp7 = _mm_set1_epi8(pu1_ref[(4 * nt) - 2 - 2 * (row + 3)]);
577 temp8 = _mm_set1_epi8(pu1_ref[(4 * nt) - 1 - 2 * (row + 3)]);
591 else if(nt == 16)
593 for(row = 0; row < nt; row += 4)
595 temp1 = _mm_set1_epi8(pu1_ref[(4 * nt) - 2 - 2 * (row + 0)]);
596 temp2 = _mm_set1_epi8(pu1_ref[(4 * nt) - 1 - 2 * (row + 0)]);
598 temp3 = _mm_set1_epi8(pu1_ref[(4 * nt) - 2 - 2 * (row + 1)]);
599 temp4 = _mm_set1_epi8(pu1_ref[(4 * nt) - 1 - 2 * (row + 1)]);
601 temp5 = _mm_set1_epi8(pu1_ref[(4 * nt) - 2 - 2 * (row + 2)]);
602 temp6 = _mm_set1_epi8(pu1_ref[(4 * nt) - 1 - 2 * (row + 2)]);
604 temp7 = _mm_set1_epi8(pu1_ref[(4 * nt) - 2 - 2 * (row + 3)]);
605 temp8 = _mm_set1_epi8(pu1_ref[(4 * nt) - 1 - 2 * (row + 3)]);
629 temp1 = _mm_set1_epi8(pu1_ref[(4 * nt) - 2 - 2 * 0]);
630 temp2 = _mm_set1_epi8(pu1_ref[(4 * nt) - 1 - 2 * 0]);
632 temp3 = _mm_set1_epi8(pu1_ref[(4 * nt) - 2 - 2 * 1]);
633 temp4 = _mm_set1_epi8(pu1_ref[(4 * nt) - 1 - 2 * 1]);
635 temp5 = _mm_set1_epi8(pu1_ref[(4 * nt) - 2 - 2 * 2]);
636 temp6 = _mm_set1_epi8(pu1_ref[(4 * nt) - 1 - 2 * 2]);
638 temp7 = _mm_set1_epi8(pu1_ref[(4 * nt) - 2 - 2 * 3]);
639 temp8 = _mm_set1_epi8(pu1_ref[(4 * nt) - 1 - 2 * 3]);
677 * @param[in] nt
695 WORD32 nt,
703 if(nt == 8)
705 src_temp1 = _mm_loadu_si128((__m128i *)(pu1_ref + (4 * nt) + 2 + 0));
718 if(nt == 16)
722 temp1 = _mm_loadu_si128((__m128i *)(pu1_ref + (4 * nt) + 2 + 0));
723 temp2 = _mm_loadu_si128((__m128i *)(pu1_ref + (4 * nt) + 2 + 16));
765 src_temp1 = _mm_loadu_si128((__m128i *)(pu1_ref + (4 * nt) + 2 + 0));
800 * @param[in] nt
818 WORD32 nt,
834 if(nt == 4)
837 src_temp1 = _mm_loadu_si128((__m128i *)(pu1_ref + (4 * nt) - 2 * 0 - 8 - 2));
838 src_temp2 = _mm_loadu_si128((__m128i *)(pu1_ref + (4 * nt) - 2 * 1 - 8 - 2));
839 src_temp3 = _mm_loadu_si128((__m128i *)(pu1_ref + (4 * nt) - 2 * 2 - 8 - 2));
840 src_temp4 = _mm_loadu_si128((__m128i *)(pu1_ref + (4 * nt) - 2 * 3 - 8 - 2));
848 else if(nt == 8)
851 src_temp1 = _mm_loadu_si128((__m128i *)(pu1_ref + (4 * nt) - 2 * 0 - 16 - 2));
852 src_temp2 = _mm_loadu_si128((__m128i *)(pu1_ref + (4 * nt) - 2 * 1 - 16 - 2));
853 src_temp3 = _mm_loadu_si128((__m128i *)(pu1_ref + (4 * nt) - 2 * 2 - 16 - 2));
854 src_temp4 = _mm_loadu_si128((__m128i *)(pu1_ref + (4 * nt) - 2 * 3 - 16 - 2));
855 src_temp5 = _mm_loadu_si128((__m128i *)(pu1_ref + (4 * nt) - 2 * 4 - 16 - 2));
856 src_temp6 = _mm_loadu_si128((__m128i *)(pu1_ref + (4 * nt) - 2 * 5 - 16 - 2));
857 src_temp7 = _mm_loadu_si128((__m128i *)(pu1_ref + (4 * nt) - 2 * 6 - 16 - 2));
858 src_temp8 = _mm_loadu_si128((__m128i *)(pu1_ref + (4 * nt) - 2 * 7 - 16 - 2));
873 for(row = 0; row < nt; row += 8)
875 for(col = 0; col < 2 * nt; col += 16)
877 src_temp1 = _mm_loadu_si128((__m128i *)(pu1_ref + (4 * nt) - 2 * (row + 0) - (col + 16) - 2));
878 src_temp2 = _mm_loadu_si128((__m128i *)(pu1_ref + (4 * nt) - 2 * (row + 1) - (col + 16) - 2));
879 src_temp3 = _mm_loadu_si128((__m128i *)(pu1_ref + (4 * nt) - 2 * (row + 2) - (col + 16) - 2));
880 src_temp4 = _mm_loadu_si128((__m128i *)(pu1_ref + (4 * nt) - 2 * (row + 3) - (col + 16) - 2));
881 src_temp5 = _mm_loadu_si128((__m128i *)(pu1_ref + (4 * nt) - 2 * (row + 4) - (col + 16) - 2));
882 src_temp6 = _mm_loadu_si128((__m128i *)(pu1_ref + (4 * nt) - 2 * (row + 5) - (col + 16) - 2));
883 src_temp7 = _mm_loadu_si128((__m128i *)(pu1_ref + (4 * nt) - 2 * (row + 6) - (col + 16) - 2));
884 src_temp8 = _mm_loadu_si128((__m128i *)(pu1_ref + (4 * nt) - 2 * (row + 7) - (col + 16) - 2));
922 * @param[in] nt
940 WORD32 nt,
951 if(nt == 4)
954 src_temp1 = _mm_loadu_si128((__m128i *)(pu1_ref + 2 * (0 + 1) + (4 * nt) + 2 * idx + 2));
955 src_temp2 = _mm_loadu_si128((__m128i *)(pu1_ref + 2 * (1 + 1) + (4 * nt) + 2 * idx + 2));
956 src_temp3 = _mm_loadu_si128((__m128i *)(pu1_ref + 2 * (2 + 1) + (4 * nt) + 2 * idx + 2));
957 src_temp4 = _mm_loadu_si128((__m128i *)(pu1_ref + 2 * (3 + 1) + (4 * nt) + 2 * idx + 2));
965 else if(nt == 8)
968 src_temp1 = _mm_loadu_si128((__m128i *)(pu1_ref + 2 * (0 + 1) + (4 * nt) + 2 * idx + 2));
969 src_temp2 = _mm_loadu_si128((__m128i *)(pu1_ref + 2 * (1 + 1) + (4 * nt) + 2 * idx + 2));
970 src_temp3 = _mm_loadu_si128((__m128i *)(pu1_ref + 2 * (2 + 1) + (4 * nt) + 2 * idx + 2));
971 src_temp4 = _mm_loadu_si128((__m128i *)(pu1_ref + 2 * (3 + 1) + (4 * nt) + 2 * idx + 2));
972 src_temp5 = _mm_loadu_si128((__m128i *)(pu1_ref + 2 * (4 + 1) + (4 * nt) + 2 * idx + 2));
973 src_temp6 = _mm_loadu_si128((__m128i *)(pu1_ref + 2 * (5 + 1) + (4 * nt) + 2 * idx + 2));
974 src_temp7 = _mm_loadu_si128((__m128i *)(pu1_ref + 2 * (6 + 1) + (4 * nt) + 2 * idx + 2));
975 src_temp8 = _mm_loadu_si128((__m128i *)(pu1_ref + 2 * (7 + 1) + (4 * nt) + 2 * idx + 2));
991 for(row = 0; row < nt; row += 8)
994 src_temp1 = _mm_loadu_si128((__m128i *)(pu1_ref + 2 * (0 + 1) + 0 + (4 * nt) + 2 * idx + 2));
995 src_temp9 = _mm_loadu_si128((__m128i *)(pu1_ref + 2 * (0 + 1) + 16 + (4 * nt) + 2 * idx + 2));
996 src_temp2 = _mm_loadu_si128((__m128i *)(pu1_ref + 2 * (1 + 1) + 0 + (4 * nt) + 2 * idx + 2));
997 src_temp10 = _mm_loadu_si128((__m128i *)(pu1_ref + 2 * (1 + 1) + 16 + (4 * nt) + 2 * idx + 2));
998 src_temp3 = _mm_loadu_si128((__m128i *)(pu1_ref + 2 * (2 + 1) + 0 + (4 * nt) + 2 * idx + 2));
999 src_temp11 = _mm_loadu_si128((__m128i *)(pu1_ref + 2 * (2 + 1) + 16 + (4 * nt) + 2 * idx + 2));
1000 src_temp4 = _mm_loadu_si128((__m128i *)(pu1_ref + 2 * (3 + 1) + 0 + (4 * nt) + 2 * idx + 2));
1001 src_temp12 = _mm_loadu_si128((__m128i *)(pu1_ref + 2 * (3 + 1) + 16 + (4 * nt) + 2 * idx + 2));
1012 src_temp5 = _mm_loadu_si128((__m128i *)(pu1_ref + 2 * (4 + 1) + 0 + (4 * nt) + 2 * idx + 2));
1013 src_temp13 = _mm_loadu_si128((__m128i *)(pu1_ref + 2 * (4 + 1) + 16 + (4 * nt) + 2 * idx + 2));
1014 src_temp6 = _mm_loadu_si128((__m128i *)(pu1_ref + 2 * (5 + 1) + 0 + (4 * nt) + 2 * idx + 2));
1015 src_temp14 = _mm_loadu_si128((__m128i *)(pu1_ref + 2 * (5 + 1) + 16 + (4 * nt) + 2 * idx + 2));
1016 src_temp7 = _mm_loadu_si128((__m128i *)(pu1_ref + 2 * (6 + 1) + 0 + (4 * nt) + 2 * idx + 2));
1017 src_temp15 = _mm_loadu_si128((__m128i *)(pu1_ref + 2 * (6 + 1) + 16 + (4 * nt) + 2 * idx + 2));
1018 src_temp8 = _mm_loadu_si128((__m128i *)(pu1_ref + 2 * (7 + 1) + 0 + (4 * nt) + 2 * idx + 2));
1019 src_temp16 = _mm_loadu_si128((__m128i *)(pu1_ref + 2 * (7 + 1) + 16 + (4 * nt) + 2 * idx + 2));
1037 if(nt == 4)
1040 src_temp1 = _mm_loadu_si128((__m128i *)(pu1_ref - 2 * (0 + 1) + (4 * nt) + 2 * idx + 2));
1041 src_temp2 = _mm_loadu_si128((__m128i *)(pu1_ref - 2 * (1 + 1) + (4 * nt) + 2 * idx + 2));
1042 src_temp3 = _mm_loadu_si128((__m128i *)(pu1_ref - 2 * (2 + 1) + (4 * nt) + 2 * idx + 2));
1043 src_temp4 = _mm_loadu_si128((__m128i *)(pu1_ref - 2 * (3 + 1) + (4 * nt) + 2 * idx + 2));
1052 else if(nt == 8)
1055 src_temp1 = _mm_loadu_si128((__m128i *)(pu1_ref - 2 * (0 + 1) + (4 * nt) + 2 * idx + 2));
1056 src_temp2 = _mm_loadu_si128((__m128i *)(pu1_ref - 2 * (1 + 1) + (4 * nt) + 2 * idx + 2));
1057 src_temp3 = _mm_loadu_si128((__m128i *)(pu1_ref - 2 * (2 + 1) + (4 * nt) + 2 * idx + 2));
1058 src_temp4 = _mm_loadu_si128((__m128i *)(pu1_ref - 2 * (3 + 1) + (4 * nt) + 2 * idx + 2));
1059 src_temp5 = _mm_loadu_si128((__m128i *)(pu1_ref - 2 * (4 + 1) + (4 * nt) + 2 * idx + 2));
1060 src_temp6 = _mm_loadu_si128((__m128i *)(pu1_ref - 2 * (5 + 1) + (4 * nt) + 2 * idx + 2));
1061 src_temp7 = _mm_loadu_si128((__m128i *)(pu1_ref - 2 * (6 + 1) + (4 * nt) + 2 * idx + 2));
1062 src_temp8 = _mm_loadu_si128((__m128i *)(pu1_ref - 2 * (7 + 1) + (4 * nt) + 2 * idx + 2));
1078 for(row = 0; row < nt; row += 8)
1081 src_temp1 = _mm_loadu_si128((__m128i *)(pu1_ref - 2 * (0 + 1) + 0 + (4 * nt) + 2 * idx + 2));
1082 src_temp9 = _mm_loadu_si128((__m128i *)(pu1_ref - 2 * (0 + 1) + 16 + (4 * nt) + 2 * idx + 2));
1083 src_temp2 = _mm_loadu_si128((__m128i *)(pu1_ref - 2 * (1 + 1) + 0 + (4 * nt) + 2 * idx + 2));
1084 src_temp10 = _mm_loadu_si128((__m128i *)(pu1_ref - 2 * (1 + 1) + 16 + (4 * nt) + 2 * idx + 2));
1085 src_temp3 = _mm_loadu_si128((__m128i *)(pu1_ref - 2 * (2 + 1) + 0 + (4 * nt) + 2 * idx + 2));
1086 src_temp11 = _mm_loadu_si128((__m128i *)(pu1_ref - 2 * (2 + 1) + 16 + (4 * nt) + 2 * idx + 2));
1087 src_temp4 = _mm_loadu_si128((__m128i *)(pu1_ref - 2 * (3 + 1) + 0 + (4 * nt) + 2 * idx + 2));
1088 src_temp12 = _mm_loadu_si128((__m128i *)(pu1_ref - 2 * (3 + 1) + 16 + (4 * nt) + 2 * idx + 2));
1099 src_temp5 = _mm_loadu_si128((__m128i *)(pu1_ref - 2 * (4 + 1) + 0 + (4 * nt) + 2 * idx + 2));
1100 src_temp13 = _mm_loadu_si128((__m128i *)(pu1_ref - 2 * (4 + 1) + 16 + (4 * nt) + 2 * idx + 2));
1101 src_temp6 = _mm_loadu_si128((__m128i *)(pu1_ref - 2 * (5 + 1) + 0 + (4 * nt) + 2 * idx + 2));
1102 src_temp14 = _mm_loadu_si128((__m128i *)(pu1_ref - 2 * (5 + 1) + 16 + (4 * nt) + 2 * idx + 2));
1103 src_temp7 = _mm_loadu_si128((__m128i *)(pu1_ref - 2 * (6 + 1) + 0 + (4 * nt) + 2 * idx + 2));
1104 src_temp15 = _mm_loadu_si128((__m128i *)(pu1_ref - 2 * (6 + 1) + 16 + (4 * nt) + 2 * idx + 2));
1105 src_temp8 = _mm_loadu_si128((__m128i *)(pu1_ref - 2 * (7 + 1) + 0 + (4 * nt) + 2 * idx + 2));
1106 src_temp16 = _mm_loadu_si128((__m128i *)(pu1_ref - 2 * (7 + 1) + 16 + (4 * nt) + 2 * idx + 2));
1148 * @param[in] nt
1166 WORD32 nt,
1200 if(nt == 4)
1206 two_nt_4x32b = _mm_set1_epi16((4 * nt) - 2);
1331 two_nt_4x32b = _mm_set1_epi16((4 * nt) - 2);
1333 for(col = 0; col < 2 * nt; col += 16)
1405 for(row = 0; row < nt; row += 4)
1553 * @param[in] nt
1572 WORD32 nt,
1603 ref_main = ref_temp + 2 * nt;
1604 for(k = 0; k < (2 * (nt + 1)); k += 2)
1606 ref_temp[k + (2 * (nt - 1))] = pu1_ref[(4 * nt) - k];
1607 ref_temp[k + 1 + (2 * (nt - 1))] = pu1_ref[(4 * nt) - k + 1];
1610 ref_main = ref_temp + (2 * (nt - 1));
1611 ref_idx = (nt * intra_pred_ang) >> 5;
1620 ref_main[k] = pu1_ref[(4 * nt) + ((inv_ang_sum >> 8) << 1)];
1621 ref_main[k + 1] = pu1_ref[((4 * nt) + 1) + ((inv_ang_sum >> 8) << 1)];
1642 if(nt == 4)
1767 for(col = 0; col < 2 * nt; col += 16)
1836 for(row = 0; row < nt; row += 4)
1974 * @param[in] nt
1992 WORD32 nt,
2011 ref_main = ref_temp + 2 * nt;
2012 for(k = 0; k < (2 * (nt + 1)); k += 2)
2014 ref_temp[k + (2 * (nt - 1))] = pu1_ref[(4 * nt) + k];
2015 ref_temp[k + 1 + (2 * (nt - 1))] = pu1_ref[(4 * nt) + k + 1];
2018 ref_idx = (nt * intra_pred_ang) >> 5;
2020 ref_main = ref_temp + (2 * (nt - 1));
2027 ref_main[k] = pu1_ref[(4 * nt) - (inv_ang_sum >> 8) * 2];
2028 ref_main[k + 1] = pu1_ref[((4 * nt) + 1) - (inv_ang_sum >> 8) * 2];
2033 if(nt == 4) /* if nt =4*/
2146 else if(nt == 8) /* for nt = 16 case */
2153 for(row = 0; row < nt; row += 2)
2233 else if(nt == 16)
2239 for(row = 0; row < nt; row += 1)
2330 * @param[in] nt
2348 WORD32 nt,
2362 if(nt == 4) /* if nt =4*/
2371 two_nt_4x32b = _mm_set1_epi32((4 * nt) + 2);
2475 else if(nt == 8) /* for nt = 16 case */
2482 for(row = 0; row < nt; row += 2)
2491 ref_main_idx = (4 * nt) + 2 * idx + 2; /* col from 0-15 */
2497 ref_main_idx1 = (4 * nt) + 2 * idx + 2; /* col from 0-15 */
2562 else if(nt == 16)
2568 for(row = 0; row < nt; row += 1)
2577 ref_main_idx = (4 * nt) + 2 * idx + 2; /* col from 0-31 */