Home | History | Annotate | Download | only in mips
      1 /*
      2  *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include "./vpx_dsp_rtcd.h"
     12 #include "vpx_dsp/mips/macros_msa.h"
     13 
     14 #define CALC_MSE_B(src, ref, var)                                   \
     15   {                                                                 \
     16     v16u8 src_l0_m, src_l1_m;                                       \
     17     v8i16 res_l0_m, res_l1_m;                                       \
     18                                                                     \
     19     ILVRL_B2_UB(src, ref, src_l0_m, src_l1_m);                      \
     20     HSUB_UB2_SH(src_l0_m, src_l1_m, res_l0_m, res_l1_m);            \
     21     DPADD_SH2_SW(res_l0_m, res_l1_m, res_l0_m, res_l1_m, var, var); \
     22   }
     23 
     24 #define CALC_MSE_AVG_B(src, ref, var, sub)                          \
     25   {                                                                 \
     26     v16u8 src_l0_m, src_l1_m;                                       \
     27     v8i16 res_l0_m, res_l1_m;                                       \
     28                                                                     \
     29     ILVRL_B2_UB(src, ref, src_l0_m, src_l1_m);                      \
     30     HSUB_UB2_SH(src_l0_m, src_l1_m, res_l0_m, res_l1_m);            \
     31     DPADD_SH2_SW(res_l0_m, res_l1_m, res_l0_m, res_l1_m, var, var); \
     32                                                                     \
     33     sub += res_l0_m + res_l1_m;                                     \
     34   }
     35 
     36 #define VARIANCE_WxH(sse, diff, shift) sse - (((uint32_t)diff * diff) >> shift)
     37 
     38 #define VARIANCE_LARGE_WxH(sse, diff, shift) \
     39   sse - (((int64_t)diff * diff) >> shift)
     40 
     41 static uint32_t sse_diff_4width_msa(const uint8_t *src_ptr, int32_t src_stride,
     42                                     const uint8_t *ref_ptr, int32_t ref_stride,
     43                                     int32_t height, int32_t *diff) {
     44   uint32_t src0, src1, src2, src3;
     45   uint32_t ref0, ref1, ref2, ref3;
     46   int32_t ht_cnt;
     47   v16u8 src = { 0 };
     48   v16u8 ref = { 0 };
     49   v8i16 avg = { 0 };
     50   v4i32 vec, var = { 0 };
     51 
     52   for (ht_cnt = (height >> 2); ht_cnt--;) {
     53     LW4(src_ptr, src_stride, src0, src1, src2, src3);
     54     src_ptr += (4 * src_stride);
     55     LW4(ref_ptr, ref_stride, ref0, ref1, ref2, ref3);
     56     ref_ptr += (4 * ref_stride);
     57 
     58     INSERT_W4_UB(src0, src1, src2, src3, src);
     59     INSERT_W4_UB(ref0, ref1, ref2, ref3, ref);
     60     CALC_MSE_AVG_B(src, ref, var, avg);
     61   }
     62 
     63   vec = __msa_hadd_s_w(avg, avg);
     64   *diff = HADD_SW_S32(vec);
     65 
     66   return HADD_SW_S32(var);
     67 }
     68 
     69 static uint32_t sse_diff_8width_msa(const uint8_t *src_ptr, int32_t src_stride,
     70                                     const uint8_t *ref_ptr, int32_t ref_stride,
     71                                     int32_t height, int32_t *diff) {
     72   int32_t ht_cnt;
     73   v16u8 src0, src1, src2, src3;
     74   v16u8 ref0, ref1, ref2, ref3;
     75   v8i16 avg = { 0 };
     76   v4i32 vec, var = { 0 };
     77 
     78   for (ht_cnt = (height >> 2); ht_cnt--;) {
     79     LD_UB4(src_ptr, src_stride, src0, src1, src2, src3);
     80     src_ptr += (4 * src_stride);
     81     LD_UB4(ref_ptr, ref_stride, ref0, ref1, ref2, ref3);
     82     ref_ptr += (4 * ref_stride);
     83 
     84     PCKEV_D4_UB(src1, src0, src3, src2, ref1, ref0, ref3, ref2, src0, src1,
     85                 ref0, ref1);
     86     CALC_MSE_AVG_B(src0, ref0, var, avg);
     87     CALC_MSE_AVG_B(src1, ref1, var, avg);
     88   }
     89 
     90   vec = __msa_hadd_s_w(avg, avg);
     91   *diff = HADD_SW_S32(vec);
     92 
     93   return HADD_SW_S32(var);
     94 }
     95 
     96 static uint32_t sse_diff_16width_msa(const uint8_t *src_ptr, int32_t src_stride,
     97                                      const uint8_t *ref_ptr, int32_t ref_stride,
     98                                      int32_t height, int32_t *diff) {
     99   int32_t ht_cnt;
    100   v16u8 src, ref;
    101   v8i16 avg = { 0 };
    102   v4i32 vec, var = { 0 };
    103 
    104   for (ht_cnt = (height >> 2); ht_cnt--;) {
    105     src = LD_UB(src_ptr);
    106     src_ptr += src_stride;
    107     ref = LD_UB(ref_ptr);
    108     ref_ptr += ref_stride;
    109     CALC_MSE_AVG_B(src, ref, var, avg);
    110 
    111     src = LD_UB(src_ptr);
    112     src_ptr += src_stride;
    113     ref = LD_UB(ref_ptr);
    114     ref_ptr += ref_stride;
    115     CALC_MSE_AVG_B(src, ref, var, avg);
    116 
    117     src = LD_UB(src_ptr);
    118     src_ptr += src_stride;
    119     ref = LD_UB(ref_ptr);
    120     ref_ptr += ref_stride;
    121     CALC_MSE_AVG_B(src, ref, var, avg);
    122 
    123     src = LD_UB(src_ptr);
    124     src_ptr += src_stride;
    125     ref = LD_UB(ref_ptr);
    126     ref_ptr += ref_stride;
    127     CALC_MSE_AVG_B(src, ref, var, avg);
    128   }
    129 
    130   vec = __msa_hadd_s_w(avg, avg);
    131   *diff = HADD_SW_S32(vec);
    132 
    133   return HADD_SW_S32(var);
    134 }
    135 
    136 static uint32_t sse_diff_32width_msa(const uint8_t *src_ptr, int32_t src_stride,
    137                                      const uint8_t *ref_ptr, int32_t ref_stride,
    138                                      int32_t height, int32_t *diff) {
    139   int32_t ht_cnt;
    140   v16u8 src0, src1, ref0, ref1;
    141   v8i16 avg = { 0 };
    142   v4i32 vec, var = { 0 };
    143 
    144   for (ht_cnt = (height >> 2); ht_cnt--;) {
    145     LD_UB2(src_ptr, 16, src0, src1);
    146     src_ptr += src_stride;
    147     LD_UB2(ref_ptr, 16, ref0, ref1);
    148     ref_ptr += ref_stride;
    149     CALC_MSE_AVG_B(src0, ref0, var, avg);
    150     CALC_MSE_AVG_B(src1, ref1, var, avg);
    151 
    152     LD_UB2(src_ptr, 16, src0, src1);
    153     src_ptr += src_stride;
    154     LD_UB2(ref_ptr, 16, ref0, ref1);
    155     ref_ptr += ref_stride;
    156     CALC_MSE_AVG_B(src0, ref0, var, avg);
    157     CALC_MSE_AVG_B(src1, ref1, var, avg);
    158 
    159     LD_UB2(src_ptr, 16, src0, src1);
    160     src_ptr += src_stride;
    161     LD_UB2(ref_ptr, 16, ref0, ref1);
    162     ref_ptr += ref_stride;
    163     CALC_MSE_AVG_B(src0, ref0, var, avg);
    164     CALC_MSE_AVG_B(src1, ref1, var, avg);
    165 
    166     LD_UB2(src_ptr, 16, src0, src1);
    167     src_ptr += src_stride;
    168     LD_UB2(ref_ptr, 16, ref0, ref1);
    169     ref_ptr += ref_stride;
    170     CALC_MSE_AVG_B(src0, ref0, var, avg);
    171     CALC_MSE_AVG_B(src1, ref1, var, avg);
    172   }
    173 
    174   vec = __msa_hadd_s_w(avg, avg);
    175   *diff = HADD_SW_S32(vec);
    176 
    177   return HADD_SW_S32(var);
    178 }
    179 
    180 static uint32_t sse_diff_32x64_msa(const uint8_t *src_ptr, int32_t src_stride,
    181                                    const uint8_t *ref_ptr, int32_t ref_stride,
    182                                    int32_t *diff) {
    183   int32_t ht_cnt;
    184   v16u8 src0, src1, ref0, ref1;
    185   v8i16 avg0 = { 0 };
    186   v8i16 avg1 = { 0 };
    187   v4i32 vec, var = { 0 };
    188 
    189   for (ht_cnt = 16; ht_cnt--;) {
    190     LD_UB2(src_ptr, 16, src0, src1);
    191     src_ptr += src_stride;
    192     LD_UB2(ref_ptr, 16, ref0, ref1);
    193     ref_ptr += ref_stride;
    194     CALC_MSE_AVG_B(src0, ref0, var, avg0);
    195     CALC_MSE_AVG_B(src1, ref1, var, avg1);
    196 
    197     LD_UB2(src_ptr, 16, src0, src1);
    198     src_ptr += src_stride;
    199     LD_UB2(ref_ptr, 16, ref0, ref1);
    200     ref_ptr += ref_stride;
    201     CALC_MSE_AVG_B(src0, ref0, var, avg0);
    202     CALC_MSE_AVG_B(src1, ref1, var, avg1);
    203 
    204     LD_UB2(src_ptr, 16, src0, src1);
    205     src_ptr += src_stride;
    206     LD_UB2(ref_ptr, 16, ref0, ref1);
    207     ref_ptr += ref_stride;
    208     CALC_MSE_AVG_B(src0, ref0, var, avg0);
    209     CALC_MSE_AVG_B(src1, ref1, var, avg1);
    210 
    211     LD_UB2(src_ptr, 16, src0, src1);
    212     src_ptr += src_stride;
    213     LD_UB2(ref_ptr, 16, ref0, ref1);
    214     ref_ptr += ref_stride;
    215     CALC_MSE_AVG_B(src0, ref0, var, avg0);
    216     CALC_MSE_AVG_B(src1, ref1, var, avg1);
    217   }
    218 
    219   vec = __msa_hadd_s_w(avg0, avg0);
    220   vec += __msa_hadd_s_w(avg1, avg1);
    221   *diff = HADD_SW_S32(vec);
    222 
    223   return HADD_SW_S32(var);
    224 }
    225 
    226 static uint32_t sse_diff_64x32_msa(const uint8_t *src_ptr, int32_t src_stride,
    227                                    const uint8_t *ref_ptr, int32_t ref_stride,
    228                                    int32_t *diff) {
    229   int32_t ht_cnt;
    230   v16u8 src0, src1, src2, src3;
    231   v16u8 ref0, ref1, ref2, ref3;
    232   v8i16 avg0 = { 0 };
    233   v8i16 avg1 = { 0 };
    234   v4i32 vec, var = { 0 };
    235 
    236   for (ht_cnt = 16; ht_cnt--;) {
    237     LD_UB4(src_ptr, 16, src0, src1, src2, src3);
    238     src_ptr += src_stride;
    239     LD_UB4(ref_ptr, 16, ref0, ref1, ref2, ref3);
    240     ref_ptr += ref_stride;
    241     CALC_MSE_AVG_B(src0, ref0, var, avg0);
    242     CALC_MSE_AVG_B(src2, ref2, var, avg0);
    243     CALC_MSE_AVG_B(src1, ref1, var, avg1);
    244     CALC_MSE_AVG_B(src3, ref3, var, avg1);
    245 
    246     LD_UB4(src_ptr, 16, src0, src1, src2, src3);
    247     src_ptr += src_stride;
    248     LD_UB4(ref_ptr, 16, ref0, ref1, ref2, ref3);
    249     ref_ptr += ref_stride;
    250     CALC_MSE_AVG_B(src0, ref0, var, avg0);
    251     CALC_MSE_AVG_B(src2, ref2, var, avg0);
    252     CALC_MSE_AVG_B(src1, ref1, var, avg1);
    253     CALC_MSE_AVG_B(src3, ref3, var, avg1);
    254   }
    255 
    256   vec = __msa_hadd_s_w(avg0, avg0);
    257   vec += __msa_hadd_s_w(avg1, avg1);
    258   *diff = HADD_SW_S32(vec);
    259 
    260   return HADD_SW_S32(var);
    261 }
    262 
    263 static uint32_t sse_diff_64x64_msa(const uint8_t *src_ptr, int32_t src_stride,
    264                                    const uint8_t *ref_ptr, int32_t ref_stride,
    265                                    int32_t *diff) {
    266   int32_t ht_cnt;
    267   v16u8 src0, src1, src2, src3;
    268   v16u8 ref0, ref1, ref2, ref3;
    269   v8i16 avg0 = { 0 };
    270   v8i16 avg1 = { 0 };
    271   v8i16 avg2 = { 0 };
    272   v8i16 avg3 = { 0 };
    273   v4i32 vec, var = { 0 };
    274 
    275   for (ht_cnt = 32; ht_cnt--;) {
    276     LD_UB4(src_ptr, 16, src0, src1, src2, src3);
    277     src_ptr += src_stride;
    278     LD_UB4(ref_ptr, 16, ref0, ref1, ref2, ref3);
    279     ref_ptr += ref_stride;
    280 
    281     CALC_MSE_AVG_B(src0, ref0, var, avg0);
    282     CALC_MSE_AVG_B(src1, ref1, var, avg1);
    283     CALC_MSE_AVG_B(src2, ref2, var, avg2);
    284     CALC_MSE_AVG_B(src3, ref3, var, avg3);
    285     LD_UB4(src_ptr, 16, src0, src1, src2, src3);
    286     src_ptr += src_stride;
    287     LD_UB4(ref_ptr, 16, ref0, ref1, ref2, ref3);
    288     ref_ptr += ref_stride;
    289     CALC_MSE_AVG_B(src0, ref0, var, avg0);
    290     CALC_MSE_AVG_B(src1, ref1, var, avg1);
    291     CALC_MSE_AVG_B(src2, ref2, var, avg2);
    292     CALC_MSE_AVG_B(src3, ref3, var, avg3);
    293   }
    294 
    295   vec = __msa_hadd_s_w(avg0, avg0);
    296   vec += __msa_hadd_s_w(avg1, avg1);
    297   vec += __msa_hadd_s_w(avg2, avg2);
    298   vec += __msa_hadd_s_w(avg3, avg3);
    299   *diff = HADD_SW_S32(vec);
    300 
    301   return HADD_SW_S32(var);
    302 }
    303 
    304 static uint32_t get_mb_ss_msa(const int16_t *src) {
    305   uint32_t sum, cnt;
    306   v8i16 src0, src1, src2, src3;
    307   v4i32 src0_l, src1_l, src2_l, src3_l;
    308   v4i32 src0_r, src1_r, src2_r, src3_r;
    309   v2i64 sq_src_l = { 0 };
    310   v2i64 sq_src_r = { 0 };
    311 
    312   for (cnt = 8; cnt--;) {
    313     LD_SH4(src, 8, src0, src1, src2, src3);
    314     src += 4 * 8;
    315 
    316     UNPCK_SH_SW(src0, src0_l, src0_r);
    317     UNPCK_SH_SW(src1, src1_l, src1_r);
    318     UNPCK_SH_SW(src2, src2_l, src2_r);
    319     UNPCK_SH_SW(src3, src3_l, src3_r);
    320 
    321     DPADD_SD2_SD(src0_l, src0_r, sq_src_l, sq_src_r);
    322     DPADD_SD2_SD(src1_l, src1_r, sq_src_l, sq_src_r);
    323     DPADD_SD2_SD(src2_l, src2_r, sq_src_l, sq_src_r);
    324     DPADD_SD2_SD(src3_l, src3_r, sq_src_l, sq_src_r);
    325   }
    326 
    327   sq_src_l += __msa_splati_d(sq_src_l, 1);
    328   sq_src_r += __msa_splati_d(sq_src_r, 1);
    329 
    330   sum = __msa_copy_s_d(sq_src_l, 0);
    331   sum += __msa_copy_s_d(sq_src_r, 0);
    332 
    333   return sum;
    334 }
    335 
    336 static uint32_t sse_4width_msa(const uint8_t *src_ptr, int32_t src_stride,
    337                                const uint8_t *ref_ptr, int32_t ref_stride,
    338                                int32_t height) {
    339   int32_t ht_cnt;
    340   uint32_t src0, src1, src2, src3;
    341   uint32_t ref0, ref1, ref2, ref3;
    342   v16u8 src = { 0 };
    343   v16u8 ref = { 0 };
    344   v4i32 var = { 0 };
    345 
    346   for (ht_cnt = (height >> 2); ht_cnt--;) {
    347     LW4(src_ptr, src_stride, src0, src1, src2, src3);
    348     src_ptr += (4 * src_stride);
    349     LW4(ref_ptr, ref_stride, ref0, ref1, ref2, ref3);
    350     ref_ptr += (4 * ref_stride);
    351 
    352     INSERT_W4_UB(src0, src1, src2, src3, src);
    353     INSERT_W4_UB(ref0, ref1, ref2, ref3, ref);
    354     CALC_MSE_B(src, ref, var);
    355   }
    356 
    357   return HADD_SW_S32(var);
    358 }
    359 
    360 static uint32_t sse_8width_msa(const uint8_t *src_ptr, int32_t src_stride,
    361                                const uint8_t *ref_ptr, int32_t ref_stride,
    362                                int32_t height) {
    363   int32_t ht_cnt;
    364   v16u8 src0, src1, src2, src3;
    365   v16u8 ref0, ref1, ref2, ref3;
    366   v4i32 var = { 0 };
    367 
    368   for (ht_cnt = (height >> 2); ht_cnt--;) {
    369     LD_UB4(src_ptr, src_stride, src0, src1, src2, src3);
    370     src_ptr += (4 * src_stride);
    371     LD_UB4(ref_ptr, ref_stride, ref0, ref1, ref2, ref3);
    372     ref_ptr += (4 * ref_stride);
    373 
    374     PCKEV_D4_UB(src1, src0, src3, src2, ref1, ref0, ref3, ref2, src0, src1,
    375                 ref0, ref1);
    376     CALC_MSE_B(src0, ref0, var);
    377     CALC_MSE_B(src1, ref1, var);
    378   }
    379 
    380   return HADD_SW_S32(var);
    381 }
    382 
    383 static uint32_t sse_16width_msa(const uint8_t *src_ptr, int32_t src_stride,
    384                                 const uint8_t *ref_ptr, int32_t ref_stride,
    385                                 int32_t height) {
    386   int32_t ht_cnt;
    387   v16u8 src, ref;
    388   v4i32 var = { 0 };
    389 
    390   for (ht_cnt = (height >> 2); ht_cnt--;) {
    391     src = LD_UB(src_ptr);
    392     src_ptr += src_stride;
    393     ref = LD_UB(ref_ptr);
    394     ref_ptr += ref_stride;
    395     CALC_MSE_B(src, ref, var);
    396 
    397     src = LD_UB(src_ptr);
    398     src_ptr += src_stride;
    399     ref = LD_UB(ref_ptr);
    400     ref_ptr += ref_stride;
    401     CALC_MSE_B(src, ref, var);
    402 
    403     src = LD_UB(src_ptr);
    404     src_ptr += src_stride;
    405     ref = LD_UB(ref_ptr);
    406     ref_ptr += ref_stride;
    407     CALC_MSE_B(src, ref, var);
    408 
    409     src = LD_UB(src_ptr);
    410     src_ptr += src_stride;
    411     ref = LD_UB(ref_ptr);
    412     ref_ptr += ref_stride;
    413     CALC_MSE_B(src, ref, var);
    414   }
    415 
    416   return HADD_SW_S32(var);
    417 }
    418 
    419 static uint32_t sse_32width_msa(const uint8_t *src_ptr, int32_t src_stride,
    420                                 const uint8_t *ref_ptr, int32_t ref_stride,
    421                                 int32_t height) {
    422   int32_t ht_cnt;
    423   v16u8 src0, src1, ref0, ref1;
    424   v4i32 var = { 0 };
    425 
    426   for (ht_cnt = (height >> 2); ht_cnt--;) {
    427     LD_UB2(src_ptr, 16, src0, src1);
    428     src_ptr += src_stride;
    429     LD_UB2(ref_ptr, 16, ref0, ref1);
    430     ref_ptr += ref_stride;
    431     CALC_MSE_B(src0, ref0, var);
    432     CALC_MSE_B(src1, ref1, var);
    433 
    434     LD_UB2(src_ptr, 16, src0, src1);
    435     src_ptr += src_stride;
    436     LD_UB2(ref_ptr, 16, ref0, ref1);
    437     ref_ptr += ref_stride;
    438     CALC_MSE_B(src0, ref0, var);
    439     CALC_MSE_B(src1, ref1, var);
    440 
    441     LD_UB2(src_ptr, 16, src0, src1);
    442     src_ptr += src_stride;
    443     LD_UB2(ref_ptr, 16, ref0, ref1);
    444     ref_ptr += ref_stride;
    445     CALC_MSE_B(src0, ref0, var);
    446     CALC_MSE_B(src1, ref1, var);
    447 
    448     LD_UB2(src_ptr, 16, src0, src1);
    449     src_ptr += src_stride;
    450     LD_UB2(ref_ptr, 16, ref0, ref1);
    451     ref_ptr += ref_stride;
    452     CALC_MSE_B(src0, ref0, var);
    453     CALC_MSE_B(src1, ref1, var);
    454   }
    455 
    456   return HADD_SW_S32(var);
    457 }
    458 
    459 static uint32_t sse_64width_msa(const uint8_t *src_ptr, int32_t src_stride,
    460                                 const uint8_t *ref_ptr, int32_t ref_stride,
    461                                 int32_t height) {
    462   int32_t ht_cnt;
    463   v16u8 src0, src1, src2, src3;
    464   v16u8 ref0, ref1, ref2, ref3;
    465   v4i32 var = { 0 };
    466 
    467   for (ht_cnt = height >> 1; ht_cnt--;) {
    468     LD_UB4(src_ptr, 16, src0, src1, src2, src3);
    469     src_ptr += src_stride;
    470     LD_UB4(ref_ptr, 16, ref0, ref1, ref2, ref3);
    471     ref_ptr += ref_stride;
    472     CALC_MSE_B(src0, ref0, var);
    473     CALC_MSE_B(src2, ref2, var);
    474     CALC_MSE_B(src1, ref1, var);
    475     CALC_MSE_B(src3, ref3, var);
    476 
    477     LD_UB4(src_ptr, 16, src0, src1, src2, src3);
    478     src_ptr += src_stride;
    479     LD_UB4(ref_ptr, 16, ref0, ref1, ref2, ref3);
    480     ref_ptr += ref_stride;
    481     CALC_MSE_B(src0, ref0, var);
    482     CALC_MSE_B(src2, ref2, var);
    483     CALC_MSE_B(src1, ref1, var);
    484     CALC_MSE_B(src3, ref3, var);
    485   }
    486 
    487   return HADD_SW_S32(var);
    488 }
    489 
    490 uint32_t vpx_get4x4sse_cs_msa(const uint8_t *src_ptr, int32_t src_stride,
    491                               const uint8_t *ref_ptr, int32_t ref_stride) {
    492   uint32_t src0, src1, src2, src3;
    493   uint32_t ref0, ref1, ref2, ref3;
    494   v16i8 src = { 0 };
    495   v16i8 ref = { 0 };
    496   v4i32 err0 = { 0 };
    497 
    498   LW4(src_ptr, src_stride, src0, src1, src2, src3);
    499   LW4(ref_ptr, ref_stride, ref0, ref1, ref2, ref3);
    500   INSERT_W4_SB(src0, src1, src2, src3, src);
    501   INSERT_W4_SB(ref0, ref1, ref2, ref3, ref);
    502   CALC_MSE_B(src, ref, err0);
    503 
    504   return HADD_SW_S32(err0);
    505 }
    506 
    507 #define VARIANCE_4Wx4H(sse, diff) VARIANCE_WxH(sse, diff, 4);
    508 #define VARIANCE_4Wx8H(sse, diff) VARIANCE_WxH(sse, diff, 5);
    509 #define VARIANCE_8Wx4H(sse, diff) VARIANCE_WxH(sse, diff, 5);
    510 #define VARIANCE_8Wx8H(sse, diff) VARIANCE_WxH(sse, diff, 6);
    511 #define VARIANCE_8Wx16H(sse, diff) VARIANCE_WxH(sse, diff, 7);
    512 #define VARIANCE_16Wx8H(sse, diff) VARIANCE_WxH(sse, diff, 7);
    513 #define VARIANCE_16Wx16H(sse, diff) VARIANCE_WxH(sse, diff, 8);
    514 
    515 #define VARIANCE_16Wx32H(sse, diff) VARIANCE_LARGE_WxH(sse, diff, 9);
    516 #define VARIANCE_32Wx16H(sse, diff) VARIANCE_LARGE_WxH(sse, diff, 9);
    517 #define VARIANCE_32Wx32H(sse, diff) VARIANCE_LARGE_WxH(sse, diff, 10);
    518 #define VARIANCE_32Wx64H(sse, diff) VARIANCE_LARGE_WxH(sse, diff, 11);
    519 #define VARIANCE_64Wx32H(sse, diff) VARIANCE_LARGE_WxH(sse, diff, 11);
    520 #define VARIANCE_64Wx64H(sse, diff) VARIANCE_LARGE_WxH(sse, diff, 12);
    521 
    522 #define VPX_VARIANCE_WDXHT_MSA(wd, ht)                                         \
    523   uint32_t vpx_variance##wd##x##ht##_msa(                                      \
    524       const uint8_t *src, int32_t src_stride, const uint8_t *ref,              \
    525       int32_t ref_stride, uint32_t *sse) {                                     \
    526     int32_t diff;                                                              \
    527                                                                                \
    528     *sse =                                                                     \
    529         sse_diff_##wd##width_msa(src, src_stride, ref, ref_stride, ht, &diff); \
    530                                                                                \
    531     return VARIANCE_##wd##Wx##ht##H(*sse, diff);                               \
    532   }
    533 
    534 VPX_VARIANCE_WDXHT_MSA(4, 4);
    535 VPX_VARIANCE_WDXHT_MSA(4, 8);
    536 
    537 VPX_VARIANCE_WDXHT_MSA(8, 4)
    538 VPX_VARIANCE_WDXHT_MSA(8, 8)
    539 VPX_VARIANCE_WDXHT_MSA(8, 16)
    540 
    541 VPX_VARIANCE_WDXHT_MSA(16, 8)
    542 VPX_VARIANCE_WDXHT_MSA(16, 16)
    543 VPX_VARIANCE_WDXHT_MSA(16, 32)
    544 
    545 VPX_VARIANCE_WDXHT_MSA(32, 16)
    546 VPX_VARIANCE_WDXHT_MSA(32, 32)
    547 
    548 uint32_t vpx_variance32x64_msa(const uint8_t *src, int32_t src_stride,
    549                                const uint8_t *ref, int32_t ref_stride,
    550                                uint32_t *sse) {
    551   int32_t diff;
    552 
    553   *sse = sse_diff_32x64_msa(src, src_stride, ref, ref_stride, &diff);
    554 
    555   return VARIANCE_32Wx64H(*sse, diff);
    556 }
    557 
    558 uint32_t vpx_variance64x32_msa(const uint8_t *src, int32_t src_stride,
    559                                const uint8_t *ref, int32_t ref_stride,
    560                                uint32_t *sse) {
    561   int32_t diff;
    562 
    563   *sse = sse_diff_64x32_msa(src, src_stride, ref, ref_stride, &diff);
    564 
    565   return VARIANCE_64Wx32H(*sse, diff);
    566 }
    567 
    568 uint32_t vpx_variance64x64_msa(const uint8_t *src, int32_t src_stride,
    569                                const uint8_t *ref, int32_t ref_stride,
    570                                uint32_t *sse) {
    571   int32_t diff;
    572 
    573   *sse = sse_diff_64x64_msa(src, src_stride, ref, ref_stride, &diff);
    574 
    575   return VARIANCE_64Wx64H(*sse, diff);
    576 }
    577 
    578 uint32_t vpx_mse8x8_msa(const uint8_t *src, int32_t src_stride,
    579                         const uint8_t *ref, int32_t ref_stride, uint32_t *sse) {
    580   *sse = sse_8width_msa(src, src_stride, ref, ref_stride, 8);
    581 
    582   return *sse;
    583 }
    584 
    585 uint32_t vpx_mse8x16_msa(const uint8_t *src, int32_t src_stride,
    586                          const uint8_t *ref, int32_t ref_stride,
    587                          uint32_t *sse) {
    588   *sse = sse_8width_msa(src, src_stride, ref, ref_stride, 16);
    589 
    590   return *sse;
    591 }
    592 
    593 uint32_t vpx_mse16x8_msa(const uint8_t *src, int32_t src_stride,
    594                          const uint8_t *ref, int32_t ref_stride,
    595                          uint32_t *sse) {
    596   *sse = sse_16width_msa(src, src_stride, ref, ref_stride, 8);
    597 
    598   return *sse;
    599 }
    600 
    601 uint32_t vpx_mse16x16_msa(const uint8_t *src, int32_t src_stride,
    602                           const uint8_t *ref, int32_t ref_stride,
    603                           uint32_t *sse) {
    604   *sse = sse_16width_msa(src, src_stride, ref, ref_stride, 16);
    605 
    606   return *sse;
    607 }
    608 
    609 void vpx_get8x8var_msa(const uint8_t *src, int32_t src_stride,
    610                        const uint8_t *ref, int32_t ref_stride, uint32_t *sse,
    611                        int32_t *sum) {
    612   *sse = sse_diff_8width_msa(src, src_stride, ref, ref_stride, 8, sum);
    613 }
    614 
    615 void vpx_get16x16var_msa(const uint8_t *src, int32_t src_stride,
    616                          const uint8_t *ref, int32_t ref_stride, uint32_t *sse,
    617                          int32_t *sum) {
    618   *sse = sse_diff_16width_msa(src, src_stride, ref, ref_stride, 16, sum);
    619 }
    620 
    621 uint32_t vpx_get_mb_ss_msa(const int16_t *src) { return get_mb_ss_msa(src); }
    622