1 /* 2 * Copyright (c) 2017 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "./vpx_dsp_rtcd.h" 12 #include "vpx_ports/asmdefs_mmi.h" 13 #include "vpx/vpx_integer.h" 14 #include "vpx_ports/mem.h" 15 16 #define SAD_SRC_REF_ABS_SUB_64 \ 17 "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t" \ 18 "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t" \ 19 "gsldlc1 %[ftmp2], 0x0f(%[src]) \n\t" \ 20 "gsldrc1 %[ftmp2], 0x08(%[src]) \n\t" \ 21 "gsldlc1 %[ftmp3], 0x07(%[ref]) \n\t" \ 22 "gsldrc1 %[ftmp3], 0x00(%[ref]) \n\t" \ 23 "gsldlc1 %[ftmp4], 0x0f(%[ref]) \n\t" \ 24 "gsldrc1 %[ftmp4], 0x08(%[ref]) \n\t" \ 25 "pasubub %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \ 26 "pasubub %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \ 27 "biadd %[ftmp1], %[ftmp1] \n\t" \ 28 "biadd %[ftmp2], %[ftmp2] \n\t" \ 29 "paddw %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \ 30 "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \ 31 "gsldlc1 %[ftmp1], 0x17(%[src]) \n\t" \ 32 "gsldrc1 %[ftmp1], 0x10(%[src]) \n\t" \ 33 "gsldlc1 %[ftmp2], 0x1f(%[src]) \n\t" \ 34 "gsldrc1 %[ftmp2], 0x18(%[src]) \n\t" \ 35 "gsldlc1 %[ftmp3], 0x17(%[ref]) \n\t" \ 36 "gsldrc1 %[ftmp3], 0x10(%[ref]) \n\t" \ 37 "gsldlc1 %[ftmp4], 0x1f(%[ref]) \n\t" \ 38 "gsldrc1 %[ftmp4], 0x18(%[ref]) \n\t" \ 39 "pasubub %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \ 40 "pasubub %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \ 41 "biadd %[ftmp1], %[ftmp1] \n\t" \ 42 "biadd %[ftmp2], %[ftmp2] \n\t" \ 43 "paddw %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \ 44 "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \ 45 "gsldlc1 %[ftmp1], 0x27(%[src]) \n\t" \ 46 "gsldrc1 %[ftmp1], 0x20(%[src]) \n\t" \ 47 "gsldlc1 %[ftmp2], 0x2f(%[src]) \n\t" \ 48 "gsldrc1 %[ftmp2], 0x28(%[src]) \n\t" \ 49 "gsldlc1 %[ftmp3], 0x27(%[ref]) \n\t" \ 50 "gsldrc1 %[ftmp3], 0x20(%[ref]) \n\t" \ 51 "gsldlc1 %[ftmp4], 0x2f(%[ref]) \n\t" \ 52 "gsldrc1 %[ftmp4], 0x28(%[ref]) \n\t" \ 53 "pasubub %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \ 54 "pasubub %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \ 55 "biadd %[ftmp1], %[ftmp1] \n\t" \ 56 "biadd %[ftmp2], %[ftmp2] \n\t" \ 57 "paddw %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \ 58 "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \ 59 "gsldlc1 %[ftmp1], 0x37(%[src]) \n\t" \ 60 "gsldrc1 %[ftmp1], 0x30(%[src]) \n\t" \ 61 "gsldlc1 %[ftmp2], 0x3f(%[src]) \n\t" \ 62 "gsldrc1 %[ftmp2], 0x38(%[src]) \n\t" \ 63 "gsldlc1 %[ftmp3], 0x37(%[ref]) \n\t" \ 64 "gsldrc1 %[ftmp3], 0x30(%[ref]) \n\t" \ 65 "gsldlc1 %[ftmp4], 0x3f(%[ref]) \n\t" \ 66 "gsldrc1 %[ftmp4], 0x38(%[ref]) \n\t" \ 67 "pasubub %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \ 68 "pasubub %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \ 69 "biadd %[ftmp1], %[ftmp1] \n\t" \ 70 "biadd %[ftmp2], %[ftmp2] \n\t" \ 71 "paddw %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \ 72 "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t" 73 74 #define SAD_SRC_REF_ABS_SUB_32 \ 75 "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t" \ 76 "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t" \ 77 "gsldlc1 %[ftmp2], 0x0f(%[src]) \n\t" \ 78 "gsldrc1 %[ftmp2], 0x08(%[src]) \n\t" \ 79 "gsldlc1 %[ftmp3], 0x07(%[ref]) \n\t" \ 80 "gsldrc1 %[ftmp3], 0x00(%[ref]) \n\t" \ 81 "gsldlc1 %[ftmp4], 0x0f(%[ref]) \n\t" \ 82 "gsldrc1 %[ftmp4], 0x08(%[ref]) \n\t" \ 83 "pasubub %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \ 84 "pasubub %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \ 85 "biadd %[ftmp1], %[ftmp1] \n\t" \ 86 "biadd %[ftmp2], %[ftmp2] \n\t" \ 87 "paddw %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \ 88 "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \ 89 "gsldlc1 %[ftmp1], 0x17(%[src]) \n\t" \ 90 "gsldrc1 %[ftmp1], 0x10(%[src]) \n\t" \ 91 "gsldlc1 %[ftmp2], 0x1f(%[src]) \n\t" \ 92 "gsldrc1 %[ftmp2], 0x18(%[src]) \n\t" \ 93 "gsldlc1 %[ftmp3], 0x17(%[ref]) \n\t" \ 94 "gsldrc1 %[ftmp3], 0x10(%[ref]) \n\t" \ 95 "gsldlc1 %[ftmp4], 0x1f(%[ref]) \n\t" \ 96 "gsldrc1 %[ftmp4], 0x18(%[ref]) \n\t" \ 97 "pasubub %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \ 98 "pasubub %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \ 99 "biadd %[ftmp1], %[ftmp1] \n\t" \ 100 "biadd %[ftmp2], %[ftmp2] \n\t" \ 101 "paddw %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \ 102 "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t" 103 104 #define SAD_SRC_REF_ABS_SUB_16 \ 105 "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t" \ 106 "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t" \ 107 "gsldlc1 %[ftmp2], 0x0f(%[src]) \n\t" \ 108 "gsldrc1 %[ftmp2], 0x08(%[src]) \n\t" \ 109 "gsldlc1 %[ftmp3], 0x07(%[ref]) \n\t" \ 110 "gsldrc1 %[ftmp3], 0x00(%[ref]) \n\t" \ 111 "gsldlc1 %[ftmp4], 0x0f(%[ref]) \n\t" \ 112 "gsldrc1 %[ftmp4], 0x08(%[ref]) \n\t" \ 113 "pasubub %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \ 114 "pasubub %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \ 115 "biadd %[ftmp1], %[ftmp1] \n\t" \ 116 "biadd %[ftmp2], %[ftmp2] \n\t" \ 117 "paddw %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \ 118 "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t" 119 120 #define SAD_SRC_REF_ABS_SUB_8 \ 121 "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t" \ 122 "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t" \ 123 "gsldlc1 %[ftmp2], 0x07(%[ref]) \n\t" \ 124 "gsldrc1 %[ftmp2], 0x00(%[ref]) \n\t" \ 125 "pasubub %[ftmp1], %[ftmp1], %[ftmp2] \n\t" \ 126 "biadd %[ftmp1], %[ftmp1] \n\t" \ 127 "paddw %[ftmp3], %[ftmp3], %[ftmp1] \n\t" 128 129 #if _MIPS_SIM == _ABIO32 130 #define SAD_SRC_REF_ABS_SUB_4 \ 131 "ulw %[tmp0], 0x00(%[src]) \n\t" \ 132 "mtc1 %[tmp0], %[ftmp1] \n\t" \ 133 "ulw %[tmp0], 0x00(%[ref]) \n\t" \ 134 "mtc1 %[tmp0], %[ftmp2] \n\t" \ 135 "pasubub %[ftmp1], %[ftmp1], %[ftmp2] \n\t" \ 136 "mthc1 $0, %[ftmp1] \n\t" \ 137 "biadd %[ftmp1], %[ftmp1] \n\t" \ 138 "paddw %[ftmp3], %[ftmp3], %[ftmp1] \n\t" 139 #else /* _MIPS_SIM == _ABI64 || _MIPS_SIM == _ABIN32 */ 140 #define SAD_SRC_REF_ABS_SUB_4 \ 141 "gslwlc1 %[ftmp1], 0x03(%[src]) \n\t" \ 142 "gslwrc1 %[ftmp1], 0x00(%[src]) \n\t" \ 143 "gslwlc1 %[ftmp2], 0x03(%[ref]) \n\t" \ 144 "gslwrc1 %[ftmp2], 0x00(%[ref]) \n\t" \ 145 "pasubub %[ftmp1], %[ftmp1], %[ftmp2] \n\t" \ 146 "mthc1 $0, %[ftmp1] \n\t" \ 147 "biadd %[ftmp1], %[ftmp1] \n\t" \ 148 "paddw %[ftmp3], %[ftmp3], %[ftmp1] \n\t" 149 #endif /* _MIPS_SIM == _ABIO32 */ 150 151 #define SAD_SRC_AVGREF_ABS_SUB_64 \ 152 "gsldlc1 %[ftmp1], 0x07(%[second_pred]) \n\t" \ 153 "gsldrc1 %[ftmp1], 0x00(%[second_pred]) \n\t" \ 154 "gsldlc1 %[ftmp2], 0x0f(%[second_pred]) \n\t" \ 155 "gsldrc1 %[ftmp2], 0x08(%[second_pred]) \n\t" \ 156 "gsldlc1 %[ftmp3], 0x07(%[ref]) \n\t" \ 157 "gsldrc1 %[ftmp3], 0x00(%[ref]) \n\t" \ 158 "gsldlc1 %[ftmp4], 0x0f(%[ref]) \n\t" \ 159 "gsldrc1 %[ftmp4], 0x08(%[ref]) \n\t" \ 160 "pavgb %[ftmp3], %[ftmp1], %[ftmp3] \n\t" \ 161 "pavgb %[ftmp4], %[ftmp2], %[ftmp4] \n\t" \ 162 "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t" \ 163 "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t" \ 164 "gsldlc1 %[ftmp2], 0x0f(%[src]) \n\t" \ 165 "gsldrc1 %[ftmp2], 0x08(%[src]) \n\t" \ 166 "pasubub %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \ 167 "pasubub %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \ 168 "biadd %[ftmp1], %[ftmp1] \n\t" \ 169 "biadd %[ftmp2], %[ftmp2] \n\t" \ 170 "paddw %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \ 171 "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \ 172 "gsldlc1 %[ftmp1], 0x17(%[second_pred]) \n\t" \ 173 "gsldrc1 %[ftmp1], 0x10(%[second_pred]) \n\t" \ 174 "gsldlc1 %[ftmp2], 0x1f(%[second_pred]) \n\t" \ 175 "gsldrc1 %[ftmp2], 0x18(%[second_pred]) \n\t" \ 176 "gsldlc1 %[ftmp3], 0x17(%[ref]) \n\t" \ 177 "gsldrc1 %[ftmp3], 0x10(%[ref]) \n\t" \ 178 "gsldlc1 %[ftmp4], 0x1f(%[ref]) \n\t" \ 179 "gsldrc1 %[ftmp4], 0x18(%[ref]) \n\t" \ 180 "pavgb %[ftmp3], %[ftmp1], %[ftmp3] \n\t" \ 181 "pavgb %[ftmp4], %[ftmp2], %[ftmp4] \n\t" \ 182 "gsldlc1 %[ftmp1], 0x17(%[src]) \n\t" \ 183 "gsldrc1 %[ftmp1], 0x10(%[src]) \n\t" \ 184 "gsldlc1 %[ftmp2], 0x1f(%[src]) \n\t" \ 185 "gsldrc1 %[ftmp2], 0x18(%[src]) \n\t" \ 186 "pasubub %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \ 187 "pasubub %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \ 188 "biadd %[ftmp1], %[ftmp1] \n\t" \ 189 "biadd %[ftmp2], %[ftmp2] \n\t" \ 190 "paddw %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \ 191 "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \ 192 "gsldlc1 %[ftmp1], 0x27(%[second_pred]) \n\t" \ 193 "gsldrc1 %[ftmp1], 0x20(%[second_pred]) \n\t" \ 194 "gsldlc1 %[ftmp2], 0x2f(%[second_pred]) \n\t" \ 195 "gsldrc1 %[ftmp2], 0x28(%[second_pred]) \n\t" \ 196 "gsldlc1 %[ftmp3], 0x27(%[ref]) \n\t" \ 197 "gsldrc1 %[ftmp3], 0x20(%[ref]) \n\t" \ 198 "gsldlc1 %[ftmp4], 0x2f(%[ref]) \n\t" \ 199 "gsldrc1 %[ftmp4], 0x28(%[ref]) \n\t" \ 200 "pavgb %[ftmp3], %[ftmp1], %[ftmp3] \n\t" \ 201 "pavgb %[ftmp4], %[ftmp2], %[ftmp4] \n\t" \ 202 "gsldlc1 %[ftmp1], 0x27(%[src]) \n\t" \ 203 "gsldrc1 %[ftmp1], 0x20(%[src]) \n\t" \ 204 "gsldlc1 %[ftmp2], 0x2f(%[src]) \n\t" \ 205 "gsldrc1 %[ftmp2], 0x28(%[src]) \n\t" \ 206 "pasubub %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \ 207 "pasubub %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \ 208 "biadd %[ftmp1], %[ftmp1] \n\t" \ 209 "biadd %[ftmp2], %[ftmp2] \n\t" \ 210 "paddw %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \ 211 "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \ 212 "gsldlc1 %[ftmp1], 0x37(%[second_pred]) \n\t" \ 213 "gsldrc1 %[ftmp1], 0x30(%[second_pred]) \n\t" \ 214 "gsldlc1 %[ftmp2], 0x3f(%[second_pred]) \n\t" \ 215 "gsldrc1 %[ftmp2], 0x38(%[second_pred]) \n\t" \ 216 "gsldlc1 %[ftmp3], 0x37(%[ref]) \n\t" \ 217 "gsldrc1 %[ftmp3], 0x30(%[ref]) \n\t" \ 218 "gsldlc1 %[ftmp4], 0x3f(%[ref]) \n\t" \ 219 "gsldrc1 %[ftmp4], 0x38(%[ref]) \n\t" \ 220 "pavgb %[ftmp3], %[ftmp1], %[ftmp3] \n\t" \ 221 "pavgb %[ftmp4], %[ftmp2], %[ftmp4] \n\t" \ 222 "gsldlc1 %[ftmp1], 0x37(%[src]) \n\t" \ 223 "gsldrc1 %[ftmp1], 0x30(%[src]) \n\t" \ 224 "gsldlc1 %[ftmp2], 0x3f(%[src]) \n\t" \ 225 "gsldrc1 %[ftmp2], 0x38(%[src]) \n\t" \ 226 "pasubub %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \ 227 "pasubub %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \ 228 "biadd %[ftmp1], %[ftmp1] \n\t" \ 229 "biadd %[ftmp2], %[ftmp2] \n\t" \ 230 "paddw %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \ 231 "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t" 232 233 #define SAD_SRC_AVGREF_ABS_SUB_32 \ 234 "gsldlc1 %[ftmp1], 0x07(%[second_pred]) \n\t" \ 235 "gsldrc1 %[ftmp1], 0x00(%[second_pred]) \n\t" \ 236 "gsldlc1 %[ftmp2], 0x0f(%[second_pred]) \n\t" \ 237 "gsldrc1 %[ftmp2], 0x08(%[second_pred]) \n\t" \ 238 "gsldlc1 %[ftmp3], 0x07(%[ref]) \n\t" \ 239 "gsldrc1 %[ftmp3], 0x00(%[ref]) \n\t" \ 240 "gsldlc1 %[ftmp4], 0x0f(%[ref]) \n\t" \ 241 "gsldrc1 %[ftmp4], 0x08(%[ref]) \n\t" \ 242 "pavgb %[ftmp3], %[ftmp1], %[ftmp3] \n\t" \ 243 "pavgb %[ftmp4], %[ftmp2], %[ftmp4] \n\t" \ 244 "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t" \ 245 "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t" \ 246 "gsldlc1 %[ftmp2], 0x0f(%[src]) \n\t" \ 247 "gsldrc1 %[ftmp2], 0x08(%[src]) \n\t" \ 248 "pasubub %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \ 249 "pasubub %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \ 250 "biadd %[ftmp1], %[ftmp1] \n\t" \ 251 "biadd %[ftmp2], %[ftmp2] \n\t" \ 252 "paddw %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \ 253 "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \ 254 "gsldlc1 %[ftmp1], 0x17(%[second_pred]) \n\t" \ 255 "gsldrc1 %[ftmp1], 0x10(%[second_pred]) \n\t" \ 256 "gsldlc1 %[ftmp2], 0x1f(%[second_pred]) \n\t" \ 257 "gsldrc1 %[ftmp2], 0x18(%[second_pred]) \n\t" \ 258 "gsldlc1 %[ftmp3], 0x17(%[ref]) \n\t" \ 259 "gsldrc1 %[ftmp3], 0x10(%[ref]) \n\t" \ 260 "gsldlc1 %[ftmp4], 0x1f(%[ref]) \n\t" \ 261 "gsldrc1 %[ftmp4], 0x18(%[ref]) \n\t" \ 262 "pavgb %[ftmp3], %[ftmp1], %[ftmp3] \n\t" \ 263 "pavgb %[ftmp4], %[ftmp2], %[ftmp4] \n\t" \ 264 "gsldlc1 %[ftmp1], 0x17(%[src]) \n\t" \ 265 "gsldrc1 %[ftmp1], 0x10(%[src]) \n\t" \ 266 "gsldlc1 %[ftmp2], 0x1f(%[src]) \n\t" \ 267 "gsldrc1 %[ftmp2], 0x18(%[src]) \n\t" \ 268 "pasubub %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \ 269 "pasubub %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \ 270 "biadd %[ftmp1], %[ftmp1] \n\t" \ 271 "biadd %[ftmp2], %[ftmp2] \n\t" \ 272 "paddw %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \ 273 "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t" 274 275 #define SAD_SRC_AVGREF_ABS_SUB_16 \ 276 "gsldlc1 %[ftmp1], 0x07(%[second_pred]) \n\t" \ 277 "gsldrc1 %[ftmp1], 0x00(%[second_pred]) \n\t" \ 278 "gsldlc1 %[ftmp2], 0x0f(%[second_pred]) \n\t" \ 279 "gsldrc1 %[ftmp2], 0x08(%[second_pred]) \n\t" \ 280 "gsldlc1 %[ftmp3], 0x07(%[ref]) \n\t" \ 281 "gsldrc1 %[ftmp3], 0x00(%[ref]) \n\t" \ 282 "gsldlc1 %[ftmp4], 0x0f(%[ref]) \n\t" \ 283 "gsldrc1 %[ftmp4], 0x08(%[ref]) \n\t" \ 284 "pavgb %[ftmp3], %[ftmp1], %[ftmp3] \n\t" \ 285 "pavgb %[ftmp4], %[ftmp2], %[ftmp4] \n\t" \ 286 "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t" \ 287 "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t" \ 288 "gsldlc1 %[ftmp2], 0x0f(%[src]) \n\t" \ 289 "gsldrc1 %[ftmp2], 0x08(%[src]) \n\t" \ 290 "pasubub %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \ 291 "pasubub %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \ 292 "biadd %[ftmp1], %[ftmp1] \n\t" \ 293 "biadd %[ftmp2], %[ftmp2] \n\t" \ 294 "paddw %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \ 295 "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t" 296 297 #define SAD_SRC_AVGREF_ABS_SUB_8 \ 298 "gsldlc1 %[ftmp1], 0x07(%[second_pred]) \n\t" \ 299 "gsldrc1 %[ftmp1], 0x00(%[second_pred]) \n\t" \ 300 "gsldlc1 %[ftmp2], 0x07(%[ref]) \n\t" \ 301 "gsldrc1 %[ftmp2], 0x00(%[ref]) \n\t" \ 302 "pavgb %[ftmp2], %[ftmp1], %[ftmp2] \n\t" \ 303 "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t" \ 304 "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t" \ 305 "pasubub %[ftmp1], %[ftmp1], %[ftmp2] \n\t" \ 306 "biadd %[ftmp1], %[ftmp1] \n\t" \ 307 "paddw %[ftmp3], %[ftmp3], %[ftmp1] \n\t" 308 309 #if _MIPS_SIM == _ABIO32 310 #define SAD_SRC_AVGREF_ABS_SUB_4 \ 311 "ulw %[tmp0], 0x00(%[second_pred]) \n\t" \ 312 "mtc1 %[tmp0], %[ftmp1] \n\t" \ 313 "ulw %[tmp0], 0x00(%[ref]) \n\t" \ 314 "mtc1 %[tmp0], %[ftmp2] \n\t" \ 315 "pavgb %[ftmp2], %[ftmp1], %[ftmp2] \n\t" \ 316 "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t" \ 317 "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t" \ 318 "pasubub %[ftmp1], %[ftmp1], %[ftmp2] \n\t" \ 319 "mthc1 $0, %[ftmp1] \n\t" \ 320 "biadd %[ftmp1], %[ftmp1] \n\t" \ 321 "paddw %[ftmp3], %[ftmp3], %[ftmp1] \n\t" 322 #else /* _MIPS_SIM == _ABI64 || _MIPS_SIM == _ABIN32 */ 323 #define SAD_SRC_AVGREF_ABS_SUB_4 \ 324 "gslwlc1 %[ftmp1], 0x03(%[second_pred]) \n\t" \ 325 "gslwrc1 %[ftmp1], 0x00(%[second_pred]) \n\t" \ 326 "gslwlc1 %[ftmp2], 0x03(%[ref]) \n\t" \ 327 "gslwrc1 %[ftmp2], 0x00(%[ref]) \n\t" \ 328 "pavgb %[ftmp2], %[ftmp1], %[ftmp2] \n\t" \ 329 "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t" \ 330 "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t" \ 331 "pasubub %[ftmp1], %[ftmp1], %[ftmp2] \n\t" \ 332 "mthc1 $0, %[ftmp1] \n\t" \ 333 "biadd %[ftmp1], %[ftmp1] \n\t" \ 334 "paddw %[ftmp3], %[ftmp3], %[ftmp1] \n\t" 335 #endif /* _MIPS_SIM == _ABIO32 */ 336 337 // depending on call sites, pass **ref_array to avoid & in subsequent call and 338 // de-dup with 4D below. 339 #define sadMxNxK_mmi(m, n, k) \ 340 void vpx_sad##m##x##n##x##k##_mmi(const uint8_t *src, int src_stride, \ 341 const uint8_t *ref_array, int ref_stride, \ 342 uint32_t *sad_array) { \ 343 int i; \ 344 for (i = 0; i < k; ++i) \ 345 sad_array[i] = \ 346 vpx_sad##m##x##n##_mmi(src, src_stride, &ref_array[i], ref_stride); \ 347 } 348 349 // This appears to be equivalent to the above when k == 4 and refs is const 350 #define sadMxNx4D_mmi(m, n) \ 351 void vpx_sad##m##x##n##x4d_mmi(const uint8_t *src, int src_stride, \ 352 const uint8_t *const ref_array[], \ 353 int ref_stride, uint32_t *sad_array) { \ 354 int i; \ 355 for (i = 0; i < 4; ++i) \ 356 sad_array[i] = \ 357 vpx_sad##m##x##n##_mmi(src, src_stride, ref_array[i], ref_stride); \ 358 } 359 360 static inline unsigned int vpx_sad64x(const uint8_t *src, int src_stride, 361 const uint8_t *ref, int ref_stride, 362 int counter) { 363 unsigned int sad; 364 double ftmp1, ftmp2, ftmp3, ftmp4, ftmp5; 365 mips_reg l_counter = counter; 366 367 __asm__ volatile ( 368 "xor %[ftmp5], %[ftmp5], %[ftmp5] \n\t" 369 "1: \n\t" 370 // Include two loop body, to reduce loop time. 371 SAD_SRC_REF_ABS_SUB_64 372 MMI_ADDU(%[src], %[src], %[src_stride]) 373 MMI_ADDU(%[ref], %[ref], %[ref_stride]) 374 SAD_SRC_REF_ABS_SUB_64 375 MMI_ADDU(%[src], %[src], %[src_stride]) 376 MMI_ADDU(%[ref], %[ref], %[ref_stride]) 377 MMI_ADDIU(%[counter], %[counter], -0x02) 378 "bnez %[counter], 1b \n\t" 379 "mfc1 %[sad], %[ftmp5] \n\t" 380 : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3), 381 [ftmp4]"=&f"(ftmp4), [ftmp5]"=&f"(ftmp5), [counter]"+&r"(l_counter), 382 [src]"+&r"(src), [ref]"+&r"(ref), [sad]"=&r"(sad) 383 : [src_stride]"r"((mips_reg)src_stride), 384 [ref_stride]"r"((mips_reg)ref_stride) 385 ); 386 387 return sad; 388 } 389 390 #define vpx_sad64xN(H) \ 391 unsigned int vpx_sad64x##H##_mmi(const uint8_t *src, int src_stride, \ 392 const uint8_t *ref, int ref_stride) { \ 393 return vpx_sad64x(src, src_stride, ref, ref_stride, H); \ 394 } 395 396 vpx_sad64xN(64); 397 vpx_sad64xN(32); 398 sadMxNx4D_mmi(64, 64); 399 sadMxNx4D_mmi(64, 32); 400 401 static inline unsigned int vpx_sad_avg64x(const uint8_t *src, int src_stride, 402 const uint8_t *ref, int ref_stride, 403 const uint8_t *second_pred, 404 int counter) { 405 unsigned int sad; 406 double ftmp1, ftmp2, ftmp3, ftmp4, ftmp5; 407 mips_reg l_counter = counter; 408 409 __asm__ volatile ( 410 "xor %[ftmp5], %[ftmp5], %[ftmp5] \n\t" 411 "1: \n\t" 412 // Include two loop body, to reduce loop time. 413 SAD_SRC_AVGREF_ABS_SUB_64 414 MMI_ADDIU(%[second_pred], %[second_pred], 0x40) 415 MMI_ADDU(%[src], %[src], %[src_stride]) 416 MMI_ADDU(%[ref], %[ref], %[ref_stride]) 417 SAD_SRC_AVGREF_ABS_SUB_64 418 MMI_ADDIU(%[second_pred], %[second_pred], 0x40) 419 MMI_ADDU(%[src], %[src], %[src_stride]) 420 MMI_ADDU(%[ref], %[ref], %[ref_stride]) 421 MMI_ADDIU(%[counter], %[counter], -0x02) 422 "bnez %[counter], 1b \n\t" 423 "mfc1 %[sad], %[ftmp5] \n\t" 424 : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3), 425 [ftmp4]"=&f"(ftmp4), [ftmp5]"=&f"(ftmp5), [counter]"+&r"(l_counter), 426 [src]"+&r"(src), [ref]"+&r"(ref), 427 [second_pred]"+&r"((mips_reg)second_pred), 428 [sad]"=&r"(sad) 429 : [src_stride]"r"((mips_reg)src_stride), 430 [ref_stride]"r"((mips_reg)ref_stride) 431 ); 432 433 return sad; 434 } 435 436 #define vpx_sad_avg64xN(H) \ 437 unsigned int vpx_sad64x##H##_avg_mmi(const uint8_t *src, int src_stride, \ 438 const uint8_t *ref, int ref_stride, \ 439 const uint8_t *second_pred) { \ 440 return vpx_sad_avg64x(src, src_stride, ref, ref_stride, second_pred, H); \ 441 } 442 443 vpx_sad_avg64xN(64); 444 vpx_sad_avg64xN(32); 445 446 static inline unsigned int vpx_sad32x(const uint8_t *src, int src_stride, 447 const uint8_t *ref, int ref_stride, 448 int counter) { 449 unsigned int sad; 450 double ftmp1, ftmp2, ftmp3, ftmp4, ftmp5; 451 mips_reg l_counter = counter; 452 453 __asm__ volatile ( 454 "xor %[ftmp5], %[ftmp5], %[ftmp5] \n\t" 455 "1: \n\t" 456 // Include two loop body, to reduce loop time. 457 SAD_SRC_REF_ABS_SUB_32 458 MMI_ADDU(%[src], %[src], %[src_stride]) 459 MMI_ADDU(%[ref], %[ref], %[ref_stride]) 460 SAD_SRC_REF_ABS_SUB_32 461 MMI_ADDU(%[src], %[src], %[src_stride]) 462 MMI_ADDU(%[ref], %[ref], %[ref_stride]) 463 MMI_ADDIU(%[counter], %[counter], -0x02) 464 "bnez %[counter], 1b \n\t" 465 "mfc1 %[sad], %[ftmp5] \n\t" 466 : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3), 467 [ftmp4]"=&f"(ftmp4), [ftmp5]"=&f"(ftmp5), [counter]"+&r"(l_counter), 468 [src]"+&r"(src), [ref]"+&r"(ref), [sad]"=&r"(sad) 469 : [src_stride]"r"((mips_reg)src_stride), 470 [ref_stride]"r"((mips_reg)ref_stride) 471 ); 472 473 return sad; 474 } 475 476 #define vpx_sad32xN(H) \ 477 unsigned int vpx_sad32x##H##_mmi(const uint8_t *src, int src_stride, \ 478 const uint8_t *ref, int ref_stride) { \ 479 return vpx_sad32x(src, src_stride, ref, ref_stride, H); \ 480 } 481 482 vpx_sad32xN(64); 483 vpx_sad32xN(32); 484 vpx_sad32xN(16); 485 sadMxNx4D_mmi(32, 64); 486 sadMxNx4D_mmi(32, 32); 487 sadMxNx4D_mmi(32, 16); 488 489 static inline unsigned int vpx_sad_avg32x(const uint8_t *src, int src_stride, 490 const uint8_t *ref, int ref_stride, 491 const uint8_t *second_pred, 492 int counter) { 493 unsigned int sad; 494 double ftmp1, ftmp2, ftmp3, ftmp4, ftmp5; 495 mips_reg l_counter = counter; 496 497 __asm__ volatile ( 498 "xor %[ftmp5], %[ftmp5], %[ftmp5] \n\t" 499 "1: \n\t" 500 // Include two loop body, to reduce loop time. 501 SAD_SRC_AVGREF_ABS_SUB_32 502 MMI_ADDIU(%[second_pred], %[second_pred], 0x20) 503 MMI_ADDU(%[src], %[src], %[src_stride]) 504 MMI_ADDU(%[ref], %[ref], %[ref_stride]) 505 SAD_SRC_AVGREF_ABS_SUB_32 506 MMI_ADDIU(%[second_pred], %[second_pred], 0x20) 507 MMI_ADDU(%[src], %[src], %[src_stride]) 508 MMI_ADDU(%[ref], %[ref], %[ref_stride]) 509 MMI_ADDIU(%[counter], %[counter], -0x02) 510 "bnez %[counter], 1b \n\t" 511 "mfc1 %[sad], %[ftmp5] \n\t" 512 : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3), 513 [ftmp4]"=&f"(ftmp4), [ftmp5]"=&f"(ftmp5), [counter]"+&r"(l_counter), 514 [src]"+&r"(src), [ref]"+&r"(ref), 515 [second_pred]"+&r"((mips_reg)second_pred), 516 [sad]"=&r"(sad) 517 : [src_stride]"r"((mips_reg)src_stride), 518 [ref_stride]"r"((mips_reg)ref_stride) 519 ); 520 521 return sad; 522 } 523 524 #define vpx_sad_avg32xN(H) \ 525 unsigned int vpx_sad32x##H##_avg_mmi(const uint8_t *src, int src_stride, \ 526 const uint8_t *ref, int ref_stride, \ 527 const uint8_t *second_pred) { \ 528 return vpx_sad_avg32x(src, src_stride, ref, ref_stride, second_pred, H); \ 529 } 530 531 vpx_sad_avg32xN(64); 532 vpx_sad_avg32xN(32); 533 vpx_sad_avg32xN(16); 534 535 static inline unsigned int vpx_sad16x(const uint8_t *src, int src_stride, 536 const uint8_t *ref, int ref_stride, 537 int counter) { 538 unsigned int sad; 539 double ftmp1, ftmp2, ftmp3, ftmp4, ftmp5; 540 mips_reg l_counter = counter; 541 542 __asm__ volatile ( 543 "xor %[ftmp5], %[ftmp5], %[ftmp5] \n\t" 544 "1: \n\t" 545 // Include two loop body, to reduce loop time. 546 SAD_SRC_REF_ABS_SUB_16 547 MMI_ADDU(%[src], %[src], %[src_stride]) 548 MMI_ADDU(%[ref], %[ref], %[ref_stride]) 549 SAD_SRC_REF_ABS_SUB_16 550 MMI_ADDU(%[src], %[src], %[src_stride]) 551 MMI_ADDU(%[ref], %[ref], %[ref_stride]) 552 MMI_ADDIU(%[counter], %[counter], -0x02) 553 "bnez %[counter], 1b \n\t" 554 "mfc1 %[sad], %[ftmp5] \n\t" 555 : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3), 556 [ftmp4]"=&f"(ftmp4), [ftmp5]"=&f"(ftmp5), [counter]"+&r"(l_counter), 557 [src]"+&r"(src), [ref]"+&r"(ref), [sad]"=&r"(sad) 558 : [src_stride]"r"((mips_reg)src_stride), 559 [ref_stride]"r"((mips_reg)ref_stride) 560 ); 561 562 return sad; 563 } 564 565 #define vpx_sad16xN(H) \ 566 unsigned int vpx_sad16x##H##_mmi(const uint8_t *src, int src_stride, \ 567 const uint8_t *ref, int ref_stride) { \ 568 return vpx_sad16x(src, src_stride, ref, ref_stride, H); \ 569 } 570 571 vpx_sad16xN(32); 572 vpx_sad16xN(16); 573 vpx_sad16xN(8); 574 sadMxNxK_mmi(16, 16, 3); 575 sadMxNxK_mmi(16, 16, 8); 576 sadMxNxK_mmi(16, 8, 3); 577 sadMxNxK_mmi(16, 8, 8); 578 sadMxNx4D_mmi(16, 32); 579 sadMxNx4D_mmi(16, 16); 580 sadMxNx4D_mmi(16, 8); 581 582 static inline unsigned int vpx_sad_avg16x(const uint8_t *src, int src_stride, 583 const uint8_t *ref, int ref_stride, 584 const uint8_t *second_pred, 585 int counter) { 586 unsigned int sad; 587 double ftmp1, ftmp2, ftmp3, ftmp4, ftmp5; 588 mips_reg l_counter = counter; 589 590 __asm__ volatile ( 591 "xor %[ftmp5], %[ftmp5], %[ftmp5] \n\t" 592 "1: \n\t" 593 // Include two loop body, to reduce loop time. 594 SAD_SRC_AVGREF_ABS_SUB_16 595 MMI_ADDIU(%[second_pred], %[second_pred], 0x10) 596 MMI_ADDU(%[src], %[src], %[src_stride]) 597 MMI_ADDU(%[ref], %[ref], %[ref_stride]) 598 SAD_SRC_AVGREF_ABS_SUB_16 599 MMI_ADDIU(%[second_pred], %[second_pred], 0x10) 600 MMI_ADDU(%[src], %[src], %[src_stride]) 601 MMI_ADDU(%[ref], %[ref], %[ref_stride]) 602 MMI_ADDIU(%[counter], %[counter], -0x02) 603 "bnez %[counter], 1b \n\t" 604 "mfc1 %[sad], %[ftmp5] \n\t" 605 : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3), 606 [ftmp4]"=&f"(ftmp4), [ftmp5]"=&f"(ftmp5), [counter]"+&r"(l_counter), 607 [src]"+&r"(src), [ref]"+&r"(ref), 608 [second_pred]"+&r"((mips_reg)second_pred), 609 [sad]"=&r"(sad) 610 : [src_stride]"r"((mips_reg)src_stride), 611 [ref_stride]"r"((mips_reg)ref_stride) 612 ); 613 614 return sad; 615 } 616 617 #define vpx_sad_avg16xN(H) \ 618 unsigned int vpx_sad16x##H##_avg_mmi(const uint8_t *src, int src_stride, \ 619 const uint8_t *ref, int ref_stride, \ 620 const uint8_t *second_pred) { \ 621 return vpx_sad_avg16x(src, src_stride, ref, ref_stride, second_pred, H); \ 622 } 623 624 vpx_sad_avg16xN(32); 625 vpx_sad_avg16xN(16); 626 vpx_sad_avg16xN(8); 627 628 static inline unsigned int vpx_sad8x(const uint8_t *src, int src_stride, 629 const uint8_t *ref, int ref_stride, 630 int counter) { 631 unsigned int sad; 632 double ftmp1, ftmp2, ftmp3; 633 mips_reg l_counter = counter; 634 635 __asm__ volatile ( 636 "xor %[ftmp3], %[ftmp3], %[ftmp3] \n\t" 637 "1: \n\t" 638 // Include two loop body, to reduce loop time. 639 SAD_SRC_REF_ABS_SUB_8 640 MMI_ADDU(%[src], %[src], %[src_stride]) 641 MMI_ADDU(%[ref], %[ref], %[ref_stride]) 642 SAD_SRC_REF_ABS_SUB_8 643 MMI_ADDU(%[src], %[src], %[src_stride]) 644 MMI_ADDU(%[ref], %[ref], %[ref_stride]) 645 MMI_ADDIU(%[counter], %[counter], -0x02) 646 "bnez %[counter], 1b \n\t" 647 "mfc1 %[sad], %[ftmp3] \n\t" 648 : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3), 649 [counter]"+&r"(l_counter), [src]"+&r"(src), [ref]"+&r"(ref), 650 [sad]"=&r"(sad) 651 : [src_stride]"r"((mips_reg)src_stride), 652 [ref_stride]"r"((mips_reg)ref_stride) 653 ); 654 655 return sad; 656 } 657 658 #define vpx_sad8xN(H) \ 659 unsigned int vpx_sad8x##H##_mmi(const uint8_t *src, int src_stride, \ 660 const uint8_t *ref, int ref_stride) { \ 661 return vpx_sad8x(src, src_stride, ref, ref_stride, H); \ 662 } 663 664 vpx_sad8xN(16); 665 vpx_sad8xN(8); 666 vpx_sad8xN(4); 667 sadMxNxK_mmi(8, 16, 3); 668 sadMxNxK_mmi(8, 16, 8); 669 sadMxNxK_mmi(8, 8, 3); 670 sadMxNxK_mmi(8, 8, 8); 671 sadMxNx4D_mmi(8, 16); 672 sadMxNx4D_mmi(8, 8); 673 sadMxNx4D_mmi(8, 4); 674 675 static inline unsigned int vpx_sad_avg8x(const uint8_t *src, int src_stride, 676 const uint8_t *ref, int ref_stride, 677 const uint8_t *second_pred, 678 int counter) { 679 unsigned int sad; 680 double ftmp1, ftmp2, ftmp3; 681 mips_reg l_counter = counter; 682 683 __asm__ volatile ( 684 "xor %[ftmp3], %[ftmp3], %[ftmp3] \n\t" 685 "1: \n\t" 686 // Include two loop body, to reduce loop time. 687 SAD_SRC_AVGREF_ABS_SUB_8 688 MMI_ADDIU(%[second_pred], %[second_pred], 0x08) 689 MMI_ADDU(%[src], %[src], %[src_stride]) 690 MMI_ADDU(%[ref], %[ref], %[ref_stride]) 691 SAD_SRC_AVGREF_ABS_SUB_8 692 MMI_ADDIU(%[second_pred], %[second_pred], 0x08) 693 MMI_ADDU(%[src], %[src], %[src_stride]) 694 MMI_ADDU(%[ref], %[ref], %[ref_stride]) 695 MMI_ADDIU(%[counter], %[counter], -0x02) 696 "bnez %[counter], 1b \n\t" 697 "mfc1 %[sad], %[ftmp3] \n\t" 698 : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3), 699 [counter]"+&r"(l_counter), [src]"+&r"(src), [ref]"+&r"(ref), 700 [second_pred]"+&r"((mips_reg)second_pred), 701 [sad]"=&r"(sad) 702 : [src_stride]"r"((mips_reg)src_stride), 703 [ref_stride]"r"((mips_reg)ref_stride) 704 ); 705 706 return sad; 707 } 708 709 #define vpx_sad_avg8xN(H) \ 710 unsigned int vpx_sad8x##H##_avg_mmi(const uint8_t *src, int src_stride, \ 711 const uint8_t *ref, int ref_stride, \ 712 const uint8_t *second_pred) { \ 713 return vpx_sad_avg8x(src, src_stride, ref, ref_stride, second_pred, H); \ 714 } 715 716 vpx_sad_avg8xN(16); 717 vpx_sad_avg8xN(8); 718 vpx_sad_avg8xN(4); 719 720 static inline unsigned int vpx_sad4x(const uint8_t *src, int src_stride, 721 const uint8_t *ref, int ref_stride, 722 int counter) { 723 unsigned int sad; 724 double ftmp1, ftmp2, ftmp3; 725 mips_reg l_counter = counter; 726 727 __asm__ volatile ( 728 "xor %[ftmp3], %[ftmp3], %[ftmp3] \n\t" 729 "1: \n\t" 730 // Include two loop body, to reduce loop time. 731 SAD_SRC_REF_ABS_SUB_4 732 MMI_ADDU(%[src], %[src], %[src_stride]) 733 MMI_ADDU(%[ref], %[ref], %[ref_stride]) 734 SAD_SRC_REF_ABS_SUB_4 735 MMI_ADDU(%[src], %[src], %[src_stride]) 736 MMI_ADDU(%[ref], %[ref], %[ref_stride]) 737 MMI_ADDIU(%[counter], %[counter], -0x02) 738 "bnez %[counter], 1b \n\t" 739 "mfc1 %[sad], %[ftmp3] \n\t" 740 : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3), 741 [counter]"+&r"(l_counter), [src]"+&r"(src), [ref]"+&r"(ref), 742 [sad]"=&r"(sad) 743 : [src_stride]"r"((mips_reg)src_stride), 744 [ref_stride]"r"((mips_reg)ref_stride) 745 ); 746 747 return sad; 748 } 749 750 #define vpx_sad4xN(H) \ 751 unsigned int vpx_sad4x##H##_mmi(const uint8_t *src, int src_stride, \ 752 const uint8_t *ref, int ref_stride) { \ 753 return vpx_sad4x(src, src_stride, ref, ref_stride, H); \ 754 } 755 756 vpx_sad4xN(8); 757 vpx_sad4xN(4); 758 sadMxNxK_mmi(4, 4, 3); 759 sadMxNxK_mmi(4, 4, 8); 760 sadMxNx4D_mmi(4, 8); 761 sadMxNx4D_mmi(4, 4); 762 763 static inline unsigned int vpx_sad_avg4x(const uint8_t *src, int src_stride, 764 const uint8_t *ref, int ref_stride, 765 const uint8_t *second_pred, 766 int counter) { 767 unsigned int sad; 768 double ftmp1, ftmp2, ftmp3; 769 mips_reg l_counter = counter; 770 771 __asm__ volatile ( 772 "xor %[ftmp3], %[ftmp3], %[ftmp3] \n\t" 773 "1: \n\t" 774 // Include two loop body, to reduce loop time. 775 SAD_SRC_AVGREF_ABS_SUB_4 776 MMI_ADDIU(%[second_pred], %[second_pred], 0x04) 777 MMI_ADDU(%[src], %[src], %[src_stride]) 778 MMI_ADDU(%[ref], %[ref], %[ref_stride]) 779 SAD_SRC_AVGREF_ABS_SUB_4 780 MMI_ADDIU(%[second_pred], %[second_pred], 0x04) 781 MMI_ADDU(%[src], %[src], %[src_stride]) 782 MMI_ADDU(%[ref], %[ref], %[ref_stride]) 783 MMI_ADDIU(%[counter], %[counter], -0x02) 784 "bnez %[counter], 1b \n\t" 785 "mfc1 %[sad], %[ftmp3] \n\t" 786 : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3), 787 [counter]"+&r"(l_counter), [src]"+&r"(src), [ref]"+&r"(ref), 788 [second_pred]"+&r"((mips_reg)second_pred), 789 [sad]"=&r"(sad) 790 : [src_stride]"r"((mips_reg)src_stride), 791 [ref_stride]"r"((mips_reg)ref_stride) 792 ); 793 794 return sad; 795 } 796 797 #define vpx_sad_avg4xN(H) \ 798 unsigned int vpx_sad4x##H##_avg_mmi(const uint8_t *src, int src_stride, \ 799 const uint8_t *ref, int ref_stride, \ 800 const uint8_t *second_pred) { \ 801 return vpx_sad_avg4x(src, src_stride, ref, ref_stride, second_pred, H); \ 802 } 803 804 vpx_sad_avg4xN(8); 805 vpx_sad_avg4xN(4); 806