Home | History | Annotate | Download | only in mips
      1 /*
      2  *  Copyright (c) 2017 The WebM project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include "./vpx_dsp_rtcd.h"
     12 #include "vpx_ports/asmdefs_mmi.h"
     13 #include "vpx/vpx_integer.h"
     14 #include "vpx_ports/mem.h"
     15 
     16 #define SAD_SRC_REF_ABS_SUB_64                                      \
     17   "gsldlc1    %[ftmp1],   0x07(%[src])                        \n\t" \
     18   "gsldrc1    %[ftmp1],   0x00(%[src])                        \n\t" \
     19   "gsldlc1    %[ftmp2],   0x0f(%[src])                        \n\t" \
     20   "gsldrc1    %[ftmp2],   0x08(%[src])                        \n\t" \
     21   "gsldlc1    %[ftmp3],   0x07(%[ref])                        \n\t" \
     22   "gsldrc1    %[ftmp3],   0x00(%[ref])                        \n\t" \
     23   "gsldlc1    %[ftmp4],   0x0f(%[ref])                        \n\t" \
     24   "gsldrc1    %[ftmp4],   0x08(%[ref])                        \n\t" \
     25   "pasubub    %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t" \
     26   "pasubub    %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t" \
     27   "biadd      %[ftmp1],   %[ftmp1]                            \n\t" \
     28   "biadd      %[ftmp2],   %[ftmp2]                            \n\t" \
     29   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp1]            \n\t" \
     30   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp2]            \n\t" \
     31   "gsldlc1    %[ftmp1],   0x17(%[src])                        \n\t" \
     32   "gsldrc1    %[ftmp1],   0x10(%[src])                        \n\t" \
     33   "gsldlc1    %[ftmp2],   0x1f(%[src])                        \n\t" \
     34   "gsldrc1    %[ftmp2],   0x18(%[src])                        \n\t" \
     35   "gsldlc1    %[ftmp3],   0x17(%[ref])                        \n\t" \
     36   "gsldrc1    %[ftmp3],   0x10(%[ref])                        \n\t" \
     37   "gsldlc1    %[ftmp4],   0x1f(%[ref])                        \n\t" \
     38   "gsldrc1    %[ftmp4],   0x18(%[ref])                        \n\t" \
     39   "pasubub    %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t" \
     40   "pasubub    %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t" \
     41   "biadd      %[ftmp1],   %[ftmp1]                            \n\t" \
     42   "biadd      %[ftmp2],   %[ftmp2]                            \n\t" \
     43   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp1]            \n\t" \
     44   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp2]            \n\t" \
     45   "gsldlc1    %[ftmp1],   0x27(%[src])                        \n\t" \
     46   "gsldrc1    %[ftmp1],   0x20(%[src])                        \n\t" \
     47   "gsldlc1    %[ftmp2],   0x2f(%[src])                        \n\t" \
     48   "gsldrc1    %[ftmp2],   0x28(%[src])                        \n\t" \
     49   "gsldlc1    %[ftmp3],   0x27(%[ref])                        \n\t" \
     50   "gsldrc1    %[ftmp3],   0x20(%[ref])                        \n\t" \
     51   "gsldlc1    %[ftmp4],   0x2f(%[ref])                        \n\t" \
     52   "gsldrc1    %[ftmp4],   0x28(%[ref])                        \n\t" \
     53   "pasubub    %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t" \
     54   "pasubub    %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t" \
     55   "biadd      %[ftmp1],   %[ftmp1]                            \n\t" \
     56   "biadd      %[ftmp2],   %[ftmp2]                            \n\t" \
     57   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp1]            \n\t" \
     58   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp2]            \n\t" \
     59   "gsldlc1    %[ftmp1],   0x37(%[src])                        \n\t" \
     60   "gsldrc1    %[ftmp1],   0x30(%[src])                        \n\t" \
     61   "gsldlc1    %[ftmp2],   0x3f(%[src])                        \n\t" \
     62   "gsldrc1    %[ftmp2],   0x38(%[src])                        \n\t" \
     63   "gsldlc1    %[ftmp3],   0x37(%[ref])                        \n\t" \
     64   "gsldrc1    %[ftmp3],   0x30(%[ref])                        \n\t" \
     65   "gsldlc1    %[ftmp4],   0x3f(%[ref])                        \n\t" \
     66   "gsldrc1    %[ftmp4],   0x38(%[ref])                        \n\t" \
     67   "pasubub    %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t" \
     68   "pasubub    %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t" \
     69   "biadd      %[ftmp1],   %[ftmp1]                            \n\t" \
     70   "biadd      %[ftmp2],   %[ftmp2]                            \n\t" \
     71   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp1]            \n\t" \
     72   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp2]            \n\t"
     73 
     74 #define SAD_SRC_REF_ABS_SUB_32                                      \
     75   "gsldlc1    %[ftmp1],   0x07(%[src])                        \n\t" \
     76   "gsldrc1    %[ftmp1],   0x00(%[src])                        \n\t" \
     77   "gsldlc1    %[ftmp2],   0x0f(%[src])                        \n\t" \
     78   "gsldrc1    %[ftmp2],   0x08(%[src])                        \n\t" \
     79   "gsldlc1    %[ftmp3],   0x07(%[ref])                        \n\t" \
     80   "gsldrc1    %[ftmp3],   0x00(%[ref])                        \n\t" \
     81   "gsldlc1    %[ftmp4],   0x0f(%[ref])                        \n\t" \
     82   "gsldrc1    %[ftmp4],   0x08(%[ref])                        \n\t" \
     83   "pasubub    %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t" \
     84   "pasubub    %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t" \
     85   "biadd      %[ftmp1],   %[ftmp1]                            \n\t" \
     86   "biadd      %[ftmp2],   %[ftmp2]                            \n\t" \
     87   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp1]            \n\t" \
     88   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp2]            \n\t" \
     89   "gsldlc1    %[ftmp1],   0x17(%[src])                        \n\t" \
     90   "gsldrc1    %[ftmp1],   0x10(%[src])                        \n\t" \
     91   "gsldlc1    %[ftmp2],   0x1f(%[src])                        \n\t" \
     92   "gsldrc1    %[ftmp2],   0x18(%[src])                        \n\t" \
     93   "gsldlc1    %[ftmp3],   0x17(%[ref])                        \n\t" \
     94   "gsldrc1    %[ftmp3],   0x10(%[ref])                        \n\t" \
     95   "gsldlc1    %[ftmp4],   0x1f(%[ref])                        \n\t" \
     96   "gsldrc1    %[ftmp4],   0x18(%[ref])                        \n\t" \
     97   "pasubub    %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t" \
     98   "pasubub    %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t" \
     99   "biadd      %[ftmp1],   %[ftmp1]                            \n\t" \
    100   "biadd      %[ftmp2],   %[ftmp2]                            \n\t" \
    101   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp1]            \n\t" \
    102   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp2]            \n\t"
    103 
    104 #define SAD_SRC_REF_ABS_SUB_16                                      \
    105   "gsldlc1    %[ftmp1],   0x07(%[src])                        \n\t" \
    106   "gsldrc1    %[ftmp1],   0x00(%[src])                        \n\t" \
    107   "gsldlc1    %[ftmp2],   0x0f(%[src])                        \n\t" \
    108   "gsldrc1    %[ftmp2],   0x08(%[src])                        \n\t" \
    109   "gsldlc1    %[ftmp3],   0x07(%[ref])                        \n\t" \
    110   "gsldrc1    %[ftmp3],   0x00(%[ref])                        \n\t" \
    111   "gsldlc1    %[ftmp4],   0x0f(%[ref])                        \n\t" \
    112   "gsldrc1    %[ftmp4],   0x08(%[ref])                        \n\t" \
    113   "pasubub    %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t" \
    114   "pasubub    %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t" \
    115   "biadd      %[ftmp1],   %[ftmp1]                            \n\t" \
    116   "biadd      %[ftmp2],   %[ftmp2]                            \n\t" \
    117   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp1]            \n\t" \
    118   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp2]            \n\t"
    119 
    120 #define SAD_SRC_REF_ABS_SUB_8                                       \
    121   "gsldlc1    %[ftmp1],   0x07(%[src])                        \n\t" \
    122   "gsldrc1    %[ftmp1],   0x00(%[src])                        \n\t" \
    123   "gsldlc1    %[ftmp2],   0x07(%[ref])                        \n\t" \
    124   "gsldrc1    %[ftmp2],   0x00(%[ref])                        \n\t" \
    125   "pasubub    %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t" \
    126   "biadd      %[ftmp1],   %[ftmp1]                            \n\t" \
    127   "paddw      %[ftmp3],   %[ftmp3],       %[ftmp1]            \n\t"
    128 
    129 #if _MIPS_SIM == _ABIO32
    130 #define SAD_SRC_REF_ABS_SUB_4                                       \
    131   "ulw        %[tmp0],    0x00(%[src])                        \n\t" \
    132   "mtc1       %[tmp0],    %[ftmp1]                            \n\t" \
    133   "ulw        %[tmp0],    0x00(%[ref])                        \n\t" \
    134   "mtc1       %[tmp0],    %[ftmp2]                            \n\t" \
    135   "pasubub    %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t" \
    136   "mthc1      $0,         %[ftmp1]                            \n\t" \
    137   "biadd      %[ftmp1],   %[ftmp1]                            \n\t" \
    138   "paddw      %[ftmp3],   %[ftmp3],       %[ftmp1]            \n\t"
    139 #else /* _MIPS_SIM == _ABI64 || _MIPS_SIM == _ABIN32 */
    140 #define SAD_SRC_REF_ABS_SUB_4                                       \
    141   "gslwlc1    %[ftmp1],   0x03(%[src])                        \n\t" \
    142   "gslwrc1    %[ftmp1],   0x00(%[src])                        \n\t" \
    143   "gslwlc1    %[ftmp2],   0x03(%[ref])                        \n\t" \
    144   "gslwrc1    %[ftmp2],   0x00(%[ref])                        \n\t" \
    145   "pasubub    %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t" \
    146   "mthc1      $0,         %[ftmp1]                            \n\t" \
    147   "biadd      %[ftmp1],   %[ftmp1]                            \n\t" \
    148   "paddw      %[ftmp3],   %[ftmp3],       %[ftmp1]            \n\t"
    149 #endif /* _MIPS_SIM == _ABIO32 */
    150 
    151 #define SAD_SRC_AVGREF_ABS_SUB_64                                   \
    152   "gsldlc1    %[ftmp1],   0x07(%[second_pred])                \n\t" \
    153   "gsldrc1    %[ftmp1],   0x00(%[second_pred])                \n\t" \
    154   "gsldlc1    %[ftmp2],   0x0f(%[second_pred])                \n\t" \
    155   "gsldrc1    %[ftmp2],   0x08(%[second_pred])                \n\t" \
    156   "gsldlc1    %[ftmp3],   0x07(%[ref])                        \n\t" \
    157   "gsldrc1    %[ftmp3],   0x00(%[ref])                        \n\t" \
    158   "gsldlc1    %[ftmp4],   0x0f(%[ref])                        \n\t" \
    159   "gsldrc1    %[ftmp4],   0x08(%[ref])                        \n\t" \
    160   "pavgb      %[ftmp3],   %[ftmp1],       %[ftmp3]            \n\t" \
    161   "pavgb      %[ftmp4],   %[ftmp2],       %[ftmp4]            \n\t" \
    162   "gsldlc1    %[ftmp1],   0x07(%[src])                        \n\t" \
    163   "gsldrc1    %[ftmp1],   0x00(%[src])                        \n\t" \
    164   "gsldlc1    %[ftmp2],   0x0f(%[src])                        \n\t" \
    165   "gsldrc1    %[ftmp2],   0x08(%[src])                        \n\t" \
    166   "pasubub    %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t" \
    167   "pasubub    %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t" \
    168   "biadd      %[ftmp1],   %[ftmp1]                            \n\t" \
    169   "biadd      %[ftmp2],   %[ftmp2]                            \n\t" \
    170   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp1]            \n\t" \
    171   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp2]            \n\t" \
    172   "gsldlc1    %[ftmp1],   0x17(%[second_pred])                \n\t" \
    173   "gsldrc1    %[ftmp1],   0x10(%[second_pred])                \n\t" \
    174   "gsldlc1    %[ftmp2],   0x1f(%[second_pred])                \n\t" \
    175   "gsldrc1    %[ftmp2],   0x18(%[second_pred])                \n\t" \
    176   "gsldlc1    %[ftmp3],   0x17(%[ref])                        \n\t" \
    177   "gsldrc1    %[ftmp3],   0x10(%[ref])                        \n\t" \
    178   "gsldlc1    %[ftmp4],   0x1f(%[ref])                        \n\t" \
    179   "gsldrc1    %[ftmp4],   0x18(%[ref])                        \n\t" \
    180   "pavgb      %[ftmp3],   %[ftmp1],       %[ftmp3]            \n\t" \
    181   "pavgb      %[ftmp4],   %[ftmp2],       %[ftmp4]            \n\t" \
    182   "gsldlc1    %[ftmp1],   0x17(%[src])                        \n\t" \
    183   "gsldrc1    %[ftmp1],   0x10(%[src])                        \n\t" \
    184   "gsldlc1    %[ftmp2],   0x1f(%[src])                        \n\t" \
    185   "gsldrc1    %[ftmp2],   0x18(%[src])                        \n\t" \
    186   "pasubub    %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t" \
    187   "pasubub    %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t" \
    188   "biadd      %[ftmp1],   %[ftmp1]                            \n\t" \
    189   "biadd      %[ftmp2],   %[ftmp2]                            \n\t" \
    190   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp1]            \n\t" \
    191   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp2]            \n\t" \
    192   "gsldlc1    %[ftmp1],   0x27(%[second_pred])                \n\t" \
    193   "gsldrc1    %[ftmp1],   0x20(%[second_pred])                \n\t" \
    194   "gsldlc1    %[ftmp2],   0x2f(%[second_pred])                \n\t" \
    195   "gsldrc1    %[ftmp2],   0x28(%[second_pred])                \n\t" \
    196   "gsldlc1    %[ftmp3],   0x27(%[ref])                        \n\t" \
    197   "gsldrc1    %[ftmp3],   0x20(%[ref])                        \n\t" \
    198   "gsldlc1    %[ftmp4],   0x2f(%[ref])                        \n\t" \
    199   "gsldrc1    %[ftmp4],   0x28(%[ref])                        \n\t" \
    200   "pavgb      %[ftmp3],   %[ftmp1],       %[ftmp3]            \n\t" \
    201   "pavgb      %[ftmp4],   %[ftmp2],       %[ftmp4]            \n\t" \
    202   "gsldlc1    %[ftmp1],   0x27(%[src])                        \n\t" \
    203   "gsldrc1    %[ftmp1],   0x20(%[src])                        \n\t" \
    204   "gsldlc1    %[ftmp2],   0x2f(%[src])                        \n\t" \
    205   "gsldrc1    %[ftmp2],   0x28(%[src])                        \n\t" \
    206   "pasubub    %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t" \
    207   "pasubub    %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t" \
    208   "biadd      %[ftmp1],   %[ftmp1]                            \n\t" \
    209   "biadd      %[ftmp2],   %[ftmp2]                            \n\t" \
    210   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp1]            \n\t" \
    211   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp2]            \n\t" \
    212   "gsldlc1    %[ftmp1],   0x37(%[second_pred])                \n\t" \
    213   "gsldrc1    %[ftmp1],   0x30(%[second_pred])                \n\t" \
    214   "gsldlc1    %[ftmp2],   0x3f(%[second_pred])                \n\t" \
    215   "gsldrc1    %[ftmp2],   0x38(%[second_pred])                \n\t" \
    216   "gsldlc1    %[ftmp3],   0x37(%[ref])                        \n\t" \
    217   "gsldrc1    %[ftmp3],   0x30(%[ref])                        \n\t" \
    218   "gsldlc1    %[ftmp4],   0x3f(%[ref])                        \n\t" \
    219   "gsldrc1    %[ftmp4],   0x38(%[ref])                        \n\t" \
    220   "pavgb      %[ftmp3],   %[ftmp1],       %[ftmp3]            \n\t" \
    221   "pavgb      %[ftmp4],   %[ftmp2],       %[ftmp4]            \n\t" \
    222   "gsldlc1    %[ftmp1],   0x37(%[src])                        \n\t" \
    223   "gsldrc1    %[ftmp1],   0x30(%[src])                        \n\t" \
    224   "gsldlc1    %[ftmp2],   0x3f(%[src])                        \n\t" \
    225   "gsldrc1    %[ftmp2],   0x38(%[src])                        \n\t" \
    226   "pasubub    %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t" \
    227   "pasubub    %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t" \
    228   "biadd      %[ftmp1],   %[ftmp1]                            \n\t" \
    229   "biadd      %[ftmp2],   %[ftmp2]                            \n\t" \
    230   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp1]            \n\t" \
    231   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp2]            \n\t"
    232 
    233 #define SAD_SRC_AVGREF_ABS_SUB_32                                   \
    234   "gsldlc1    %[ftmp1],   0x07(%[second_pred])                \n\t" \
    235   "gsldrc1    %[ftmp1],   0x00(%[second_pred])                \n\t" \
    236   "gsldlc1    %[ftmp2],   0x0f(%[second_pred])                \n\t" \
    237   "gsldrc1    %[ftmp2],   0x08(%[second_pred])                \n\t" \
    238   "gsldlc1    %[ftmp3],   0x07(%[ref])                        \n\t" \
    239   "gsldrc1    %[ftmp3],   0x00(%[ref])                        \n\t" \
    240   "gsldlc1    %[ftmp4],   0x0f(%[ref])                        \n\t" \
    241   "gsldrc1    %[ftmp4],   0x08(%[ref])                        \n\t" \
    242   "pavgb      %[ftmp3],   %[ftmp1],       %[ftmp3]            \n\t" \
    243   "pavgb      %[ftmp4],   %[ftmp2],       %[ftmp4]            \n\t" \
    244   "gsldlc1    %[ftmp1],   0x07(%[src])                        \n\t" \
    245   "gsldrc1    %[ftmp1],   0x00(%[src])                        \n\t" \
    246   "gsldlc1    %[ftmp2],   0x0f(%[src])                        \n\t" \
    247   "gsldrc1    %[ftmp2],   0x08(%[src])                        \n\t" \
    248   "pasubub    %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t" \
    249   "pasubub    %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t" \
    250   "biadd      %[ftmp1],   %[ftmp1]                            \n\t" \
    251   "biadd      %[ftmp2],   %[ftmp2]                            \n\t" \
    252   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp1]            \n\t" \
    253   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp2]            \n\t" \
    254   "gsldlc1    %[ftmp1],   0x17(%[second_pred])                \n\t" \
    255   "gsldrc1    %[ftmp1],   0x10(%[second_pred])                \n\t" \
    256   "gsldlc1    %[ftmp2],   0x1f(%[second_pred])                \n\t" \
    257   "gsldrc1    %[ftmp2],   0x18(%[second_pred])                \n\t" \
    258   "gsldlc1    %[ftmp3],   0x17(%[ref])                        \n\t" \
    259   "gsldrc1    %[ftmp3],   0x10(%[ref])                        \n\t" \
    260   "gsldlc1    %[ftmp4],   0x1f(%[ref])                        \n\t" \
    261   "gsldrc1    %[ftmp4],   0x18(%[ref])                        \n\t" \
    262   "pavgb      %[ftmp3],   %[ftmp1],       %[ftmp3]            \n\t" \
    263   "pavgb      %[ftmp4],   %[ftmp2],       %[ftmp4]            \n\t" \
    264   "gsldlc1    %[ftmp1],   0x17(%[src])                        \n\t" \
    265   "gsldrc1    %[ftmp1],   0x10(%[src])                        \n\t" \
    266   "gsldlc1    %[ftmp2],   0x1f(%[src])                        \n\t" \
    267   "gsldrc1    %[ftmp2],   0x18(%[src])                        \n\t" \
    268   "pasubub    %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t" \
    269   "pasubub    %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t" \
    270   "biadd      %[ftmp1],   %[ftmp1]                            \n\t" \
    271   "biadd      %[ftmp2],   %[ftmp2]                            \n\t" \
    272   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp1]            \n\t" \
    273   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp2]            \n\t"
    274 
    275 #define SAD_SRC_AVGREF_ABS_SUB_16                                   \
    276   "gsldlc1    %[ftmp1],   0x07(%[second_pred])                \n\t" \
    277   "gsldrc1    %[ftmp1],   0x00(%[second_pred])                \n\t" \
    278   "gsldlc1    %[ftmp2],   0x0f(%[second_pred])                \n\t" \
    279   "gsldrc1    %[ftmp2],   0x08(%[second_pred])                \n\t" \
    280   "gsldlc1    %[ftmp3],   0x07(%[ref])                        \n\t" \
    281   "gsldrc1    %[ftmp3],   0x00(%[ref])                        \n\t" \
    282   "gsldlc1    %[ftmp4],   0x0f(%[ref])                        \n\t" \
    283   "gsldrc1    %[ftmp4],   0x08(%[ref])                        \n\t" \
    284   "pavgb      %[ftmp3],   %[ftmp1],       %[ftmp3]            \n\t" \
    285   "pavgb      %[ftmp4],   %[ftmp2],       %[ftmp4]            \n\t" \
    286   "gsldlc1    %[ftmp1],   0x07(%[src])                        \n\t" \
    287   "gsldrc1    %[ftmp1],   0x00(%[src])                        \n\t" \
    288   "gsldlc1    %[ftmp2],   0x0f(%[src])                        \n\t" \
    289   "gsldrc1    %[ftmp2],   0x08(%[src])                        \n\t" \
    290   "pasubub    %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t" \
    291   "pasubub    %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t" \
    292   "biadd      %[ftmp1],   %[ftmp1]                            \n\t" \
    293   "biadd      %[ftmp2],   %[ftmp2]                            \n\t" \
    294   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp1]            \n\t" \
    295   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp2]            \n\t"
    296 
    297 #define SAD_SRC_AVGREF_ABS_SUB_8                                    \
    298   "gsldlc1    %[ftmp1],   0x07(%[second_pred])                \n\t" \
    299   "gsldrc1    %[ftmp1],   0x00(%[second_pred])                \n\t" \
    300   "gsldlc1    %[ftmp2],   0x07(%[ref])                        \n\t" \
    301   "gsldrc1    %[ftmp2],   0x00(%[ref])                        \n\t" \
    302   "pavgb      %[ftmp2],   %[ftmp1],       %[ftmp2]            \n\t" \
    303   "gsldlc1    %[ftmp1],   0x07(%[src])                        \n\t" \
    304   "gsldrc1    %[ftmp1],   0x00(%[src])                        \n\t" \
    305   "pasubub    %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t" \
    306   "biadd      %[ftmp1],   %[ftmp1]                            \n\t" \
    307   "paddw      %[ftmp3],   %[ftmp3],       %[ftmp1]            \n\t"
    308 
    309 #if _MIPS_SIM == _ABIO32
    310 #define SAD_SRC_AVGREF_ABS_SUB_4                                    \
    311   "ulw        %[tmp0],    0x00(%[second_pred])                \n\t" \
    312   "mtc1       %[tmp0],    %[ftmp1]                            \n\t" \
    313   "ulw        %[tmp0],    0x00(%[ref])                        \n\t" \
    314   "mtc1       %[tmp0],    %[ftmp2]                            \n\t" \
    315   "pavgb      %[ftmp2],   %[ftmp1],       %[ftmp2]            \n\t" \
    316   "gsldlc1    %[ftmp1],   0x07(%[src])                        \n\t" \
    317   "gsldrc1    %[ftmp1],   0x00(%[src])                        \n\t" \
    318   "pasubub    %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t" \
    319   "mthc1      $0,         %[ftmp1]                            \n\t" \
    320   "biadd      %[ftmp1],   %[ftmp1]                            \n\t" \
    321   "paddw      %[ftmp3],   %[ftmp3],       %[ftmp1]            \n\t"
    322 #else /* _MIPS_SIM == _ABI64 || _MIPS_SIM == _ABIN32 */
    323 #define SAD_SRC_AVGREF_ABS_SUB_4                                    \
    324   "gslwlc1    %[ftmp1],   0x03(%[second_pred])                \n\t" \
    325   "gslwrc1    %[ftmp1],   0x00(%[second_pred])                \n\t" \
    326   "gslwlc1    %[ftmp2],   0x03(%[ref])                        \n\t" \
    327   "gslwrc1    %[ftmp2],   0x00(%[ref])                        \n\t" \
    328   "pavgb      %[ftmp2],   %[ftmp1],       %[ftmp2]            \n\t" \
    329   "gsldlc1    %[ftmp1],   0x07(%[src])                        \n\t" \
    330   "gsldrc1    %[ftmp1],   0x00(%[src])                        \n\t" \
    331   "pasubub    %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t" \
    332   "mthc1      $0,         %[ftmp1]                            \n\t" \
    333   "biadd      %[ftmp1],   %[ftmp1]                            \n\t" \
    334   "paddw      %[ftmp3],   %[ftmp3],       %[ftmp1]            \n\t"
    335 #endif /* _MIPS_SIM == _ABIO32 */
    336 
    337 // depending on call sites, pass **ref_array to avoid & in subsequent call and
    338 // de-dup with 4D below.
    339 #define sadMxNxK_mmi(m, n, k)                                                 \
    340   void vpx_sad##m##x##n##x##k##_mmi(const uint8_t *src, int src_stride,       \
    341                                     const uint8_t *ref_array, int ref_stride, \
    342                                     uint32_t *sad_array) {                    \
    343     int i;                                                                    \
    344     for (i = 0; i < k; ++i)                                                   \
    345       sad_array[i] =                                                          \
    346           vpx_sad##m##x##n##_mmi(src, src_stride, &ref_array[i], ref_stride); \
    347   }
    348 
    349 // This appears to be equivalent to the above when k == 4 and refs is const
    350 #define sadMxNx4D_mmi(m, n)                                                  \
    351   void vpx_sad##m##x##n##x4d_mmi(const uint8_t *src, int src_stride,         \
    352                                  const uint8_t *const ref_array[],           \
    353                                  int ref_stride, uint32_t *sad_array) {      \
    354     int i;                                                                   \
    355     for (i = 0; i < 4; ++i)                                                  \
    356       sad_array[i] =                                                         \
    357           vpx_sad##m##x##n##_mmi(src, src_stride, ref_array[i], ref_stride); \
    358   }
    359 
    360 static inline unsigned int vpx_sad64x(const uint8_t *src, int src_stride,
    361                                       const uint8_t *ref, int ref_stride,
    362                                       int counter) {
    363   unsigned int sad;
    364   double ftmp1, ftmp2, ftmp3, ftmp4, ftmp5;
    365   mips_reg l_counter = counter;
    366 
    367   __asm__ volatile (
    368     "xor        %[ftmp5],   %[ftmp5],       %[ftmp5]            \n\t"
    369     "1:                                                         \n\t"
    370     // Include two loop body, to reduce loop time.
    371     SAD_SRC_REF_ABS_SUB_64
    372     MMI_ADDU(%[src],     %[src],         %[src_stride])
    373     MMI_ADDU(%[ref],     %[ref],         %[ref_stride])
    374     SAD_SRC_REF_ABS_SUB_64
    375     MMI_ADDU(%[src],     %[src],         %[src_stride])
    376     MMI_ADDU(%[ref],     %[ref],         %[ref_stride])
    377     MMI_ADDIU(%[counter], %[counter], -0x02)
    378     "bnez       %[counter], 1b                                  \n\t"
    379     "mfc1       %[sad],     %[ftmp5]                            \n\t"
    380     : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3),
    381       [ftmp4]"=&f"(ftmp4), [ftmp5]"=&f"(ftmp5), [counter]"+&r"(l_counter),
    382       [src]"+&r"(src), [ref]"+&r"(ref), [sad]"=&r"(sad)
    383     : [src_stride]"r"((mips_reg)src_stride),
    384       [ref_stride]"r"((mips_reg)ref_stride)
    385   );
    386 
    387   return sad;
    388 }
    389 
    390 #define vpx_sad64xN(H)                                                   \
    391   unsigned int vpx_sad64x##H##_mmi(const uint8_t *src, int src_stride,   \
    392                                    const uint8_t *ref, int ref_stride) { \
    393     return vpx_sad64x(src, src_stride, ref, ref_stride, H);              \
    394   }
    395 
    396 vpx_sad64xN(64);
    397 vpx_sad64xN(32);
    398 sadMxNx4D_mmi(64, 64);
    399 sadMxNx4D_mmi(64, 32);
    400 
    401 static inline unsigned int vpx_sad_avg64x(const uint8_t *src, int src_stride,
    402                                           const uint8_t *ref, int ref_stride,
    403                                           const uint8_t *second_pred,
    404                                           int counter) {
    405   unsigned int sad;
    406   double ftmp1, ftmp2, ftmp3, ftmp4, ftmp5;
    407   mips_reg l_counter = counter;
    408 
    409   __asm__ volatile (
    410     "xor        %[ftmp5],   %[ftmp5],       %[ftmp5]            \n\t"
    411     "1:                                                         \n\t"
    412     // Include two loop body, to reduce loop time.
    413     SAD_SRC_AVGREF_ABS_SUB_64
    414     MMI_ADDIU(%[second_pred], %[second_pred], 0x40)
    415     MMI_ADDU(%[src],     %[src],         %[src_stride])
    416     MMI_ADDU(%[ref],     %[ref],         %[ref_stride])
    417     SAD_SRC_AVGREF_ABS_SUB_64
    418     MMI_ADDIU(%[second_pred], %[second_pred], 0x40)
    419     MMI_ADDU(%[src],     %[src],         %[src_stride])
    420     MMI_ADDU(%[ref],     %[ref],         %[ref_stride])
    421     MMI_ADDIU(%[counter], %[counter], -0x02)
    422     "bnez       %[counter], 1b                                  \n\t"
    423     "mfc1       %[sad],     %[ftmp5]                            \n\t"
    424     : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3),
    425       [ftmp4]"=&f"(ftmp4), [ftmp5]"=&f"(ftmp5), [counter]"+&r"(l_counter),
    426       [src]"+&r"(src), [ref]"+&r"(ref),
    427       [second_pred]"+&r"((mips_reg)second_pred),
    428       [sad]"=&r"(sad)
    429     : [src_stride]"r"((mips_reg)src_stride),
    430       [ref_stride]"r"((mips_reg)ref_stride)
    431   );
    432 
    433   return sad;
    434 }
    435 
    436 #define vpx_sad_avg64xN(H)                                                   \
    437   unsigned int vpx_sad64x##H##_avg_mmi(const uint8_t *src, int src_stride,   \
    438                                        const uint8_t *ref, int ref_stride,   \
    439                                        const uint8_t *second_pred) {         \
    440     return vpx_sad_avg64x(src, src_stride, ref, ref_stride, second_pred, H); \
    441   }
    442 
    443 vpx_sad_avg64xN(64);
    444 vpx_sad_avg64xN(32);
    445 
    446 static inline unsigned int vpx_sad32x(const uint8_t *src, int src_stride,
    447                                       const uint8_t *ref, int ref_stride,
    448                                       int counter) {
    449   unsigned int sad;
    450   double ftmp1, ftmp2, ftmp3, ftmp4, ftmp5;
    451   mips_reg l_counter = counter;
    452 
    453   __asm__ volatile (
    454     "xor        %[ftmp5],   %[ftmp5],       %[ftmp5]            \n\t"
    455     "1:                                                         \n\t"
    456     // Include two loop body, to reduce loop time.
    457     SAD_SRC_REF_ABS_SUB_32
    458     MMI_ADDU(%[src],     %[src],         %[src_stride])
    459     MMI_ADDU(%[ref],     %[ref],         %[ref_stride])
    460     SAD_SRC_REF_ABS_SUB_32
    461     MMI_ADDU(%[src],     %[src],         %[src_stride])
    462     MMI_ADDU(%[ref],     %[ref],         %[ref_stride])
    463     MMI_ADDIU(%[counter], %[counter], -0x02)
    464     "bnez       %[counter], 1b                                  \n\t"
    465     "mfc1       %[sad],     %[ftmp5]                            \n\t"
    466     : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3),
    467       [ftmp4]"=&f"(ftmp4), [ftmp5]"=&f"(ftmp5), [counter]"+&r"(l_counter),
    468       [src]"+&r"(src), [ref]"+&r"(ref), [sad]"=&r"(sad)
    469     : [src_stride]"r"((mips_reg)src_stride),
    470       [ref_stride]"r"((mips_reg)ref_stride)
    471   );
    472 
    473   return sad;
    474 }
    475 
    476 #define vpx_sad32xN(H)                                                   \
    477   unsigned int vpx_sad32x##H##_mmi(const uint8_t *src, int src_stride,   \
    478                                    const uint8_t *ref, int ref_stride) { \
    479     return vpx_sad32x(src, src_stride, ref, ref_stride, H);              \
    480   }
    481 
    482 vpx_sad32xN(64);
    483 vpx_sad32xN(32);
    484 vpx_sad32xN(16);
    485 sadMxNx4D_mmi(32, 64);
    486 sadMxNx4D_mmi(32, 32);
    487 sadMxNx4D_mmi(32, 16);
    488 
    489 static inline unsigned int vpx_sad_avg32x(const uint8_t *src, int src_stride,
    490                                           const uint8_t *ref, int ref_stride,
    491                                           const uint8_t *second_pred,
    492                                           int counter) {
    493   unsigned int sad;
    494   double ftmp1, ftmp2, ftmp3, ftmp4, ftmp5;
    495   mips_reg l_counter = counter;
    496 
    497   __asm__ volatile (
    498     "xor        %[ftmp5],   %[ftmp5],       %[ftmp5]            \n\t"
    499     "1:                                                         \n\t"
    500     // Include two loop body, to reduce loop time.
    501     SAD_SRC_AVGREF_ABS_SUB_32
    502     MMI_ADDIU(%[second_pred], %[second_pred], 0x20)
    503     MMI_ADDU(%[src],     %[src],         %[src_stride])
    504     MMI_ADDU(%[ref],     %[ref],         %[ref_stride])
    505     SAD_SRC_AVGREF_ABS_SUB_32
    506     MMI_ADDIU(%[second_pred], %[second_pred], 0x20)
    507     MMI_ADDU(%[src],     %[src],         %[src_stride])
    508     MMI_ADDU(%[ref],     %[ref],         %[ref_stride])
    509     MMI_ADDIU(%[counter], %[counter], -0x02)
    510     "bnez       %[counter], 1b                                  \n\t"
    511     "mfc1       %[sad],     %[ftmp5]                            \n\t"
    512     : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3),
    513       [ftmp4]"=&f"(ftmp4), [ftmp5]"=&f"(ftmp5), [counter]"+&r"(l_counter),
    514       [src]"+&r"(src), [ref]"+&r"(ref),
    515       [second_pred]"+&r"((mips_reg)second_pred),
    516       [sad]"=&r"(sad)
    517     : [src_stride]"r"((mips_reg)src_stride),
    518       [ref_stride]"r"((mips_reg)ref_stride)
    519   );
    520 
    521   return sad;
    522 }
    523 
    524 #define vpx_sad_avg32xN(H)                                                   \
    525   unsigned int vpx_sad32x##H##_avg_mmi(const uint8_t *src, int src_stride,   \
    526                                        const uint8_t *ref, int ref_stride,   \
    527                                        const uint8_t *second_pred) {         \
    528     return vpx_sad_avg32x(src, src_stride, ref, ref_stride, second_pred, H); \
    529   }
    530 
    531 vpx_sad_avg32xN(64);
    532 vpx_sad_avg32xN(32);
    533 vpx_sad_avg32xN(16);
    534 
    535 static inline unsigned int vpx_sad16x(const uint8_t *src, int src_stride,
    536                                       const uint8_t *ref, int ref_stride,
    537                                       int counter) {
    538   unsigned int sad;
    539   double ftmp1, ftmp2, ftmp3, ftmp4, ftmp5;
    540   mips_reg l_counter = counter;
    541 
    542   __asm__ volatile (
    543     "xor        %[ftmp5],   %[ftmp5],       %[ftmp5]            \n\t"
    544     "1:                                                         \n\t"
    545     // Include two loop body, to reduce loop time.
    546     SAD_SRC_REF_ABS_SUB_16
    547     MMI_ADDU(%[src],     %[src],         %[src_stride])
    548     MMI_ADDU(%[ref],     %[ref],         %[ref_stride])
    549     SAD_SRC_REF_ABS_SUB_16
    550     MMI_ADDU(%[src],     %[src],         %[src_stride])
    551     MMI_ADDU(%[ref],     %[ref],         %[ref_stride])
    552     MMI_ADDIU(%[counter], %[counter], -0x02)
    553     "bnez       %[counter], 1b                                  \n\t"
    554     "mfc1       %[sad],     %[ftmp5]                            \n\t"
    555     : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3),
    556       [ftmp4]"=&f"(ftmp4), [ftmp5]"=&f"(ftmp5), [counter]"+&r"(l_counter),
    557       [src]"+&r"(src), [ref]"+&r"(ref), [sad]"=&r"(sad)
    558     : [src_stride]"r"((mips_reg)src_stride),
    559       [ref_stride]"r"((mips_reg)ref_stride)
    560   );
    561 
    562   return sad;
    563 }
    564 
    565 #define vpx_sad16xN(H)                                                   \
    566   unsigned int vpx_sad16x##H##_mmi(const uint8_t *src, int src_stride,   \
    567                                    const uint8_t *ref, int ref_stride) { \
    568     return vpx_sad16x(src, src_stride, ref, ref_stride, H);              \
    569   }
    570 
    571 vpx_sad16xN(32);
    572 vpx_sad16xN(16);
    573 vpx_sad16xN(8);
    574 sadMxNxK_mmi(16, 16, 3);
    575 sadMxNxK_mmi(16, 16, 8);
    576 sadMxNxK_mmi(16, 8, 3);
    577 sadMxNxK_mmi(16, 8, 8);
    578 sadMxNx4D_mmi(16, 32);
    579 sadMxNx4D_mmi(16, 16);
    580 sadMxNx4D_mmi(16, 8);
    581 
    582 static inline unsigned int vpx_sad_avg16x(const uint8_t *src, int src_stride,
    583                                           const uint8_t *ref, int ref_stride,
    584                                           const uint8_t *second_pred,
    585                                           int counter) {
    586   unsigned int sad;
    587   double ftmp1, ftmp2, ftmp3, ftmp4, ftmp5;
    588   mips_reg l_counter = counter;
    589 
    590   __asm__ volatile (
    591     "xor        %[ftmp5],   %[ftmp5],       %[ftmp5]            \n\t"
    592     "1:                                                         \n\t"
    593     // Include two loop body, to reduce loop time.
    594     SAD_SRC_AVGREF_ABS_SUB_16
    595     MMI_ADDIU(%[second_pred], %[second_pred], 0x10)
    596     MMI_ADDU(%[src],     %[src],         %[src_stride])
    597     MMI_ADDU(%[ref],     %[ref],         %[ref_stride])
    598     SAD_SRC_AVGREF_ABS_SUB_16
    599     MMI_ADDIU(%[second_pred], %[second_pred], 0x10)
    600     MMI_ADDU(%[src],     %[src],         %[src_stride])
    601     MMI_ADDU(%[ref],     %[ref],         %[ref_stride])
    602     MMI_ADDIU(%[counter], %[counter], -0x02)
    603     "bnez       %[counter], 1b                                  \n\t"
    604     "mfc1       %[sad],     %[ftmp5]                            \n\t"
    605     : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3),
    606       [ftmp4]"=&f"(ftmp4), [ftmp5]"=&f"(ftmp5), [counter]"+&r"(l_counter),
    607       [src]"+&r"(src), [ref]"+&r"(ref),
    608       [second_pred]"+&r"((mips_reg)second_pred),
    609       [sad]"=&r"(sad)
    610     : [src_stride]"r"((mips_reg)src_stride),
    611       [ref_stride]"r"((mips_reg)ref_stride)
    612   );
    613 
    614   return sad;
    615 }
    616 
    617 #define vpx_sad_avg16xN(H)                                                   \
    618   unsigned int vpx_sad16x##H##_avg_mmi(const uint8_t *src, int src_stride,   \
    619                                        const uint8_t *ref, int ref_stride,   \
    620                                        const uint8_t *second_pred) {         \
    621     return vpx_sad_avg16x(src, src_stride, ref, ref_stride, second_pred, H); \
    622   }
    623 
    624 vpx_sad_avg16xN(32);
    625 vpx_sad_avg16xN(16);
    626 vpx_sad_avg16xN(8);
    627 
    628 static inline unsigned int vpx_sad8x(const uint8_t *src, int src_stride,
    629                                      const uint8_t *ref, int ref_stride,
    630                                      int counter) {
    631   unsigned int sad;
    632   double ftmp1, ftmp2, ftmp3;
    633   mips_reg l_counter = counter;
    634 
    635   __asm__ volatile (
    636     "xor        %[ftmp3],   %[ftmp3],       %[ftmp3]            \n\t"
    637     "1:                                                         \n\t"
    638     // Include two loop body, to reduce loop time.
    639     SAD_SRC_REF_ABS_SUB_8
    640     MMI_ADDU(%[src],     %[src],         %[src_stride])
    641     MMI_ADDU(%[ref],     %[ref],         %[ref_stride])
    642     SAD_SRC_REF_ABS_SUB_8
    643     MMI_ADDU(%[src],     %[src],         %[src_stride])
    644     MMI_ADDU(%[ref],     %[ref],         %[ref_stride])
    645     MMI_ADDIU(%[counter], %[counter], -0x02)
    646     "bnez       %[counter], 1b                                  \n\t"
    647     "mfc1       %[sad],     %[ftmp3]                            \n\t"
    648     : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3),
    649       [counter]"+&r"(l_counter), [src]"+&r"(src), [ref]"+&r"(ref),
    650       [sad]"=&r"(sad)
    651     : [src_stride]"r"((mips_reg)src_stride),
    652       [ref_stride]"r"((mips_reg)ref_stride)
    653   );
    654 
    655   return sad;
    656 }
    657 
    658 #define vpx_sad8xN(H)                                                   \
    659   unsigned int vpx_sad8x##H##_mmi(const uint8_t *src, int src_stride,   \
    660                                   const uint8_t *ref, int ref_stride) { \
    661     return vpx_sad8x(src, src_stride, ref, ref_stride, H);              \
    662   }
    663 
    664 vpx_sad8xN(16);
    665 vpx_sad8xN(8);
    666 vpx_sad8xN(4);
    667 sadMxNxK_mmi(8, 16, 3);
    668 sadMxNxK_mmi(8, 16, 8);
    669 sadMxNxK_mmi(8, 8, 3);
    670 sadMxNxK_mmi(8, 8, 8);
    671 sadMxNx4D_mmi(8, 16);
    672 sadMxNx4D_mmi(8, 8);
    673 sadMxNx4D_mmi(8, 4);
    674 
    675 static inline unsigned int vpx_sad_avg8x(const uint8_t *src, int src_stride,
    676                                          const uint8_t *ref, int ref_stride,
    677                                          const uint8_t *second_pred,
    678                                          int counter) {
    679   unsigned int sad;
    680   double ftmp1, ftmp2, ftmp3;
    681   mips_reg l_counter = counter;
    682 
    683   __asm__ volatile (
    684     "xor        %[ftmp3],   %[ftmp3],       %[ftmp3]            \n\t"
    685     "1:                                                         \n\t"
    686     // Include two loop body, to reduce loop time.
    687     SAD_SRC_AVGREF_ABS_SUB_8
    688     MMI_ADDIU(%[second_pred], %[second_pred], 0x08)
    689     MMI_ADDU(%[src],     %[src],         %[src_stride])
    690     MMI_ADDU(%[ref],     %[ref],         %[ref_stride])
    691     SAD_SRC_AVGREF_ABS_SUB_8
    692     MMI_ADDIU(%[second_pred], %[second_pred], 0x08)
    693     MMI_ADDU(%[src],     %[src],         %[src_stride])
    694     MMI_ADDU(%[ref],     %[ref],         %[ref_stride])
    695     MMI_ADDIU(%[counter], %[counter], -0x02)
    696     "bnez       %[counter], 1b                                  \n\t"
    697     "mfc1       %[sad],     %[ftmp3]                            \n\t"
    698     : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3),
    699       [counter]"+&r"(l_counter), [src]"+&r"(src), [ref]"+&r"(ref),
    700       [second_pred]"+&r"((mips_reg)second_pred),
    701       [sad]"=&r"(sad)
    702     : [src_stride]"r"((mips_reg)src_stride),
    703       [ref_stride]"r"((mips_reg)ref_stride)
    704   );
    705 
    706   return sad;
    707 }
    708 
    709 #define vpx_sad_avg8xN(H)                                                   \
    710   unsigned int vpx_sad8x##H##_avg_mmi(const uint8_t *src, int src_stride,   \
    711                                       const uint8_t *ref, int ref_stride,   \
    712                                       const uint8_t *second_pred) {         \
    713     return vpx_sad_avg8x(src, src_stride, ref, ref_stride, second_pred, H); \
    714   }
    715 
    716 vpx_sad_avg8xN(16);
    717 vpx_sad_avg8xN(8);
    718 vpx_sad_avg8xN(4);
    719 
    720 static inline unsigned int vpx_sad4x(const uint8_t *src, int src_stride,
    721                                      const uint8_t *ref, int ref_stride,
    722                                      int counter) {
    723   unsigned int sad;
    724   double ftmp1, ftmp2, ftmp3;
    725   mips_reg l_counter = counter;
    726 
    727   __asm__ volatile (
    728     "xor        %[ftmp3],   %[ftmp3],       %[ftmp3]            \n\t"
    729     "1:                                                         \n\t"
    730     // Include two loop body, to reduce loop time.
    731     SAD_SRC_REF_ABS_SUB_4
    732     MMI_ADDU(%[src],     %[src],         %[src_stride])
    733     MMI_ADDU(%[ref],     %[ref],         %[ref_stride])
    734     SAD_SRC_REF_ABS_SUB_4
    735     MMI_ADDU(%[src],     %[src],         %[src_stride])
    736     MMI_ADDU(%[ref],     %[ref],         %[ref_stride])
    737     MMI_ADDIU(%[counter], %[counter], -0x02)
    738     "bnez       %[counter], 1b                                  \n\t"
    739     "mfc1       %[sad],     %[ftmp3]                            \n\t"
    740     : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3),
    741       [counter]"+&r"(l_counter), [src]"+&r"(src), [ref]"+&r"(ref),
    742       [sad]"=&r"(sad)
    743     : [src_stride]"r"((mips_reg)src_stride),
    744       [ref_stride]"r"((mips_reg)ref_stride)
    745   );
    746 
    747   return sad;
    748 }
    749 
    750 #define vpx_sad4xN(H)                                                   \
    751   unsigned int vpx_sad4x##H##_mmi(const uint8_t *src, int src_stride,   \
    752                                   const uint8_t *ref, int ref_stride) { \
    753     return vpx_sad4x(src, src_stride, ref, ref_stride, H);              \
    754   }
    755 
    756 vpx_sad4xN(8);
    757 vpx_sad4xN(4);
    758 sadMxNxK_mmi(4, 4, 3);
    759 sadMxNxK_mmi(4, 4, 8);
    760 sadMxNx4D_mmi(4, 8);
    761 sadMxNx4D_mmi(4, 4);
    762 
    763 static inline unsigned int vpx_sad_avg4x(const uint8_t *src, int src_stride,
    764                                          const uint8_t *ref, int ref_stride,
    765                                          const uint8_t *second_pred,
    766                                          int counter) {
    767   unsigned int sad;
    768   double ftmp1, ftmp2, ftmp3;
    769   mips_reg l_counter = counter;
    770 
    771   __asm__ volatile (
    772     "xor        %[ftmp3],   %[ftmp3],       %[ftmp3]            \n\t"
    773     "1:                                                         \n\t"
    774     // Include two loop body, to reduce loop time.
    775     SAD_SRC_AVGREF_ABS_SUB_4
    776     MMI_ADDIU(%[second_pred], %[second_pred], 0x04)
    777     MMI_ADDU(%[src],     %[src],         %[src_stride])
    778     MMI_ADDU(%[ref],     %[ref],         %[ref_stride])
    779     SAD_SRC_AVGREF_ABS_SUB_4
    780     MMI_ADDIU(%[second_pred], %[second_pred], 0x04)
    781     MMI_ADDU(%[src],     %[src],         %[src_stride])
    782     MMI_ADDU(%[ref],     %[ref],         %[ref_stride])
    783     MMI_ADDIU(%[counter], %[counter], -0x02)
    784     "bnez       %[counter], 1b                                  \n\t"
    785     "mfc1       %[sad],     %[ftmp3]                            \n\t"
    786     : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3),
    787       [counter]"+&r"(l_counter), [src]"+&r"(src), [ref]"+&r"(ref),
    788       [second_pred]"+&r"((mips_reg)second_pred),
    789       [sad]"=&r"(sad)
    790     : [src_stride]"r"((mips_reg)src_stride),
    791       [ref_stride]"r"((mips_reg)ref_stride)
    792   );
    793 
    794   return sad;
    795 }
    796 
    797 #define vpx_sad_avg4xN(H)                                                   \
    798   unsigned int vpx_sad4x##H##_avg_mmi(const uint8_t *src, int src_stride,   \
    799                                       const uint8_t *ref, int ref_stride,   \
    800                                       const uint8_t *second_pred) {         \
    801     return vpx_sad_avg4x(src, src_stride, ref, ref_stride, second_pred, H); \
    802   }
    803 
    804 vpx_sad_avg4xN(8);
    805 vpx_sad_avg4xN(4);
    806