1 /* 2 * Copyright (c) 2015 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "./vpx_dsp_rtcd.h" 12 #include "vpx_dsp/mips/macros_msa.h" 13 14 #define CALC_MSE_B(src, ref, var) \ 15 { \ 16 v16u8 src_l0_m, src_l1_m; \ 17 v8i16 res_l0_m, res_l1_m; \ 18 \ 19 ILVRL_B2_UB(src, ref, src_l0_m, src_l1_m); \ 20 HSUB_UB2_SH(src_l0_m, src_l1_m, res_l0_m, res_l1_m); \ 21 DPADD_SH2_SW(res_l0_m, res_l1_m, res_l0_m, res_l1_m, var, var); \ 22 } 23 24 #define CALC_MSE_AVG_B(src, ref, var, sub) \ 25 { \ 26 v16u8 src_l0_m, src_l1_m; \ 27 v8i16 res_l0_m, res_l1_m; \ 28 \ 29 ILVRL_B2_UB(src, ref, src_l0_m, src_l1_m); \ 30 HSUB_UB2_SH(src_l0_m, src_l1_m, res_l0_m, res_l1_m); \ 31 DPADD_SH2_SW(res_l0_m, res_l1_m, res_l0_m, res_l1_m, var, var); \ 32 \ 33 sub += res_l0_m + res_l1_m; \ 34 } 35 36 #define VARIANCE_WxH(sse, diff, shift) sse - (((uint32_t)diff * diff) >> shift) 37 38 #define VARIANCE_LARGE_WxH(sse, diff, shift) \ 39 sse - (((int64_t)diff * diff) >> shift) 40 41 static uint32_t sse_diff_4width_msa(const uint8_t *src_ptr, int32_t src_stride, 42 const uint8_t *ref_ptr, int32_t ref_stride, 43 int32_t height, int32_t *diff) { 44 uint32_t src0, src1, src2, src3; 45 uint32_t ref0, ref1, ref2, ref3; 46 int32_t ht_cnt; 47 v16u8 src = { 0 }; 48 v16u8 ref = { 0 }; 49 v8i16 avg = { 0 }; 50 v4i32 vec, var = { 0 }; 51 52 for (ht_cnt = (height >> 2); ht_cnt--;) { 53 LW4(src_ptr, src_stride, src0, src1, src2, src3); 54 src_ptr += (4 * src_stride); 55 LW4(ref_ptr, ref_stride, ref0, ref1, ref2, ref3); 56 ref_ptr += (4 * ref_stride); 57 58 INSERT_W4_UB(src0, src1, src2, src3, src); 59 INSERT_W4_UB(ref0, ref1, ref2, ref3, ref); 60 CALC_MSE_AVG_B(src, ref, var, avg); 61 } 62 63 vec = __msa_hadd_s_w(avg, avg); 64 *diff = HADD_SW_S32(vec); 65 66 return HADD_SW_S32(var); 67 } 68 69 static uint32_t sse_diff_8width_msa(const uint8_t *src_ptr, int32_t src_stride, 70 const uint8_t *ref_ptr, int32_t ref_stride, 71 int32_t height, int32_t *diff) { 72 int32_t ht_cnt; 73 v16u8 src0, src1, src2, src3; 74 v16u8 ref0, ref1, ref2, ref3; 75 v8i16 avg = { 0 }; 76 v4i32 vec, var = { 0 }; 77 78 for (ht_cnt = (height >> 2); ht_cnt--;) { 79 LD_UB4(src_ptr, src_stride, src0, src1, src2, src3); 80 src_ptr += (4 * src_stride); 81 LD_UB4(ref_ptr, ref_stride, ref0, ref1, ref2, ref3); 82 ref_ptr += (4 * ref_stride); 83 84 PCKEV_D4_UB(src1, src0, src3, src2, ref1, ref0, ref3, ref2, src0, src1, 85 ref0, ref1); 86 CALC_MSE_AVG_B(src0, ref0, var, avg); 87 CALC_MSE_AVG_B(src1, ref1, var, avg); 88 } 89 90 vec = __msa_hadd_s_w(avg, avg); 91 *diff = HADD_SW_S32(vec); 92 93 return HADD_SW_S32(var); 94 } 95 96 static uint32_t sse_diff_16width_msa(const uint8_t *src_ptr, int32_t src_stride, 97 const uint8_t *ref_ptr, int32_t ref_stride, 98 int32_t height, int32_t *diff) { 99 int32_t ht_cnt; 100 v16u8 src, ref; 101 v8i16 avg = { 0 }; 102 v4i32 vec, var = { 0 }; 103 104 for (ht_cnt = (height >> 2); ht_cnt--;) { 105 src = LD_UB(src_ptr); 106 src_ptr += src_stride; 107 ref = LD_UB(ref_ptr); 108 ref_ptr += ref_stride; 109 CALC_MSE_AVG_B(src, ref, var, avg); 110 111 src = LD_UB(src_ptr); 112 src_ptr += src_stride; 113 ref = LD_UB(ref_ptr); 114 ref_ptr += ref_stride; 115 CALC_MSE_AVG_B(src, ref, var, avg); 116 117 src = LD_UB(src_ptr); 118 src_ptr += src_stride; 119 ref = LD_UB(ref_ptr); 120 ref_ptr += ref_stride; 121 CALC_MSE_AVG_B(src, ref, var, avg); 122 123 src = LD_UB(src_ptr); 124 src_ptr += src_stride; 125 ref = LD_UB(ref_ptr); 126 ref_ptr += ref_stride; 127 CALC_MSE_AVG_B(src, ref, var, avg); 128 } 129 130 vec = __msa_hadd_s_w(avg, avg); 131 *diff = HADD_SW_S32(vec); 132 133 return HADD_SW_S32(var); 134 } 135 136 static uint32_t sse_diff_32width_msa(const uint8_t *src_ptr, int32_t src_stride, 137 const uint8_t *ref_ptr, int32_t ref_stride, 138 int32_t height, int32_t *diff) { 139 int32_t ht_cnt; 140 v16u8 src0, src1, ref0, ref1; 141 v8i16 avg = { 0 }; 142 v4i32 vec, var = { 0 }; 143 144 for (ht_cnt = (height >> 2); ht_cnt--;) { 145 LD_UB2(src_ptr, 16, src0, src1); 146 src_ptr += src_stride; 147 LD_UB2(ref_ptr, 16, ref0, ref1); 148 ref_ptr += ref_stride; 149 CALC_MSE_AVG_B(src0, ref0, var, avg); 150 CALC_MSE_AVG_B(src1, ref1, var, avg); 151 152 LD_UB2(src_ptr, 16, src0, src1); 153 src_ptr += src_stride; 154 LD_UB2(ref_ptr, 16, ref0, ref1); 155 ref_ptr += ref_stride; 156 CALC_MSE_AVG_B(src0, ref0, var, avg); 157 CALC_MSE_AVG_B(src1, ref1, var, avg); 158 159 LD_UB2(src_ptr, 16, src0, src1); 160 src_ptr += src_stride; 161 LD_UB2(ref_ptr, 16, ref0, ref1); 162 ref_ptr += ref_stride; 163 CALC_MSE_AVG_B(src0, ref0, var, avg); 164 CALC_MSE_AVG_B(src1, ref1, var, avg); 165 166 LD_UB2(src_ptr, 16, src0, src1); 167 src_ptr += src_stride; 168 LD_UB2(ref_ptr, 16, ref0, ref1); 169 ref_ptr += ref_stride; 170 CALC_MSE_AVG_B(src0, ref0, var, avg); 171 CALC_MSE_AVG_B(src1, ref1, var, avg); 172 } 173 174 vec = __msa_hadd_s_w(avg, avg); 175 *diff = HADD_SW_S32(vec); 176 177 return HADD_SW_S32(var); 178 } 179 180 static uint32_t sse_diff_32x64_msa(const uint8_t *src_ptr, int32_t src_stride, 181 const uint8_t *ref_ptr, int32_t ref_stride, 182 int32_t *diff) { 183 int32_t ht_cnt; 184 v16u8 src0, src1, ref0, ref1; 185 v8i16 avg0 = { 0 }; 186 v8i16 avg1 = { 0 }; 187 v4i32 vec, var = { 0 }; 188 189 for (ht_cnt = 16; ht_cnt--;) { 190 LD_UB2(src_ptr, 16, src0, src1); 191 src_ptr += src_stride; 192 LD_UB2(ref_ptr, 16, ref0, ref1); 193 ref_ptr += ref_stride; 194 CALC_MSE_AVG_B(src0, ref0, var, avg0); 195 CALC_MSE_AVG_B(src1, ref1, var, avg1); 196 197 LD_UB2(src_ptr, 16, src0, src1); 198 src_ptr += src_stride; 199 LD_UB2(ref_ptr, 16, ref0, ref1); 200 ref_ptr += ref_stride; 201 CALC_MSE_AVG_B(src0, ref0, var, avg0); 202 CALC_MSE_AVG_B(src1, ref1, var, avg1); 203 204 LD_UB2(src_ptr, 16, src0, src1); 205 src_ptr += src_stride; 206 LD_UB2(ref_ptr, 16, ref0, ref1); 207 ref_ptr += ref_stride; 208 CALC_MSE_AVG_B(src0, ref0, var, avg0); 209 CALC_MSE_AVG_B(src1, ref1, var, avg1); 210 211 LD_UB2(src_ptr, 16, src0, src1); 212 src_ptr += src_stride; 213 LD_UB2(ref_ptr, 16, ref0, ref1); 214 ref_ptr += ref_stride; 215 CALC_MSE_AVG_B(src0, ref0, var, avg0); 216 CALC_MSE_AVG_B(src1, ref1, var, avg1); 217 } 218 219 vec = __msa_hadd_s_w(avg0, avg0); 220 vec += __msa_hadd_s_w(avg1, avg1); 221 *diff = HADD_SW_S32(vec); 222 223 return HADD_SW_S32(var); 224 } 225 226 static uint32_t sse_diff_64x32_msa(const uint8_t *src_ptr, int32_t src_stride, 227 const uint8_t *ref_ptr, int32_t ref_stride, 228 int32_t *diff) { 229 int32_t ht_cnt; 230 v16u8 src0, src1, src2, src3; 231 v16u8 ref0, ref1, ref2, ref3; 232 v8i16 avg0 = { 0 }; 233 v8i16 avg1 = { 0 }; 234 v4i32 vec, var = { 0 }; 235 236 for (ht_cnt = 16; ht_cnt--;) { 237 LD_UB4(src_ptr, 16, src0, src1, src2, src3); 238 src_ptr += src_stride; 239 LD_UB4(ref_ptr, 16, ref0, ref1, ref2, ref3); 240 ref_ptr += ref_stride; 241 CALC_MSE_AVG_B(src0, ref0, var, avg0); 242 CALC_MSE_AVG_B(src2, ref2, var, avg0); 243 CALC_MSE_AVG_B(src1, ref1, var, avg1); 244 CALC_MSE_AVG_B(src3, ref3, var, avg1); 245 246 LD_UB4(src_ptr, 16, src0, src1, src2, src3); 247 src_ptr += src_stride; 248 LD_UB4(ref_ptr, 16, ref0, ref1, ref2, ref3); 249 ref_ptr += ref_stride; 250 CALC_MSE_AVG_B(src0, ref0, var, avg0); 251 CALC_MSE_AVG_B(src2, ref2, var, avg0); 252 CALC_MSE_AVG_B(src1, ref1, var, avg1); 253 CALC_MSE_AVG_B(src3, ref3, var, avg1); 254 } 255 256 vec = __msa_hadd_s_w(avg0, avg0); 257 vec += __msa_hadd_s_w(avg1, avg1); 258 *diff = HADD_SW_S32(vec); 259 260 return HADD_SW_S32(var); 261 } 262 263 static uint32_t sse_diff_64x64_msa(const uint8_t *src_ptr, int32_t src_stride, 264 const uint8_t *ref_ptr, int32_t ref_stride, 265 int32_t *diff) { 266 int32_t ht_cnt; 267 v16u8 src0, src1, src2, src3; 268 v16u8 ref0, ref1, ref2, ref3; 269 v8i16 avg0 = { 0 }; 270 v8i16 avg1 = { 0 }; 271 v8i16 avg2 = { 0 }; 272 v8i16 avg3 = { 0 }; 273 v4i32 vec, var = { 0 }; 274 275 for (ht_cnt = 32; ht_cnt--;) { 276 LD_UB4(src_ptr, 16, src0, src1, src2, src3); 277 src_ptr += src_stride; 278 LD_UB4(ref_ptr, 16, ref0, ref1, ref2, ref3); 279 ref_ptr += ref_stride; 280 281 CALC_MSE_AVG_B(src0, ref0, var, avg0); 282 CALC_MSE_AVG_B(src1, ref1, var, avg1); 283 CALC_MSE_AVG_B(src2, ref2, var, avg2); 284 CALC_MSE_AVG_B(src3, ref3, var, avg3); 285 LD_UB4(src_ptr, 16, src0, src1, src2, src3); 286 src_ptr += src_stride; 287 LD_UB4(ref_ptr, 16, ref0, ref1, ref2, ref3); 288 ref_ptr += ref_stride; 289 CALC_MSE_AVG_B(src0, ref0, var, avg0); 290 CALC_MSE_AVG_B(src1, ref1, var, avg1); 291 CALC_MSE_AVG_B(src2, ref2, var, avg2); 292 CALC_MSE_AVG_B(src3, ref3, var, avg3); 293 } 294 295 vec = __msa_hadd_s_w(avg0, avg0); 296 vec += __msa_hadd_s_w(avg1, avg1); 297 vec += __msa_hadd_s_w(avg2, avg2); 298 vec += __msa_hadd_s_w(avg3, avg3); 299 *diff = HADD_SW_S32(vec); 300 301 return HADD_SW_S32(var); 302 } 303 304 static uint32_t get_mb_ss_msa(const int16_t *src) { 305 uint32_t sum, cnt; 306 v8i16 src0, src1, src2, src3; 307 v4i32 src0_l, src1_l, src2_l, src3_l; 308 v4i32 src0_r, src1_r, src2_r, src3_r; 309 v2i64 sq_src_l = { 0 }; 310 v2i64 sq_src_r = { 0 }; 311 312 for (cnt = 8; cnt--;) { 313 LD_SH4(src, 8, src0, src1, src2, src3); 314 src += 4 * 8; 315 316 UNPCK_SH_SW(src0, src0_l, src0_r); 317 UNPCK_SH_SW(src1, src1_l, src1_r); 318 UNPCK_SH_SW(src2, src2_l, src2_r); 319 UNPCK_SH_SW(src3, src3_l, src3_r); 320 321 DPADD_SD2_SD(src0_l, src0_r, sq_src_l, sq_src_r); 322 DPADD_SD2_SD(src1_l, src1_r, sq_src_l, sq_src_r); 323 DPADD_SD2_SD(src2_l, src2_r, sq_src_l, sq_src_r); 324 DPADD_SD2_SD(src3_l, src3_r, sq_src_l, sq_src_r); 325 } 326 327 sq_src_l += __msa_splati_d(sq_src_l, 1); 328 sq_src_r += __msa_splati_d(sq_src_r, 1); 329 330 sum = __msa_copy_s_d(sq_src_l, 0); 331 sum += __msa_copy_s_d(sq_src_r, 0); 332 333 return sum; 334 } 335 336 static uint32_t sse_4width_msa(const uint8_t *src_ptr, int32_t src_stride, 337 const uint8_t *ref_ptr, int32_t ref_stride, 338 int32_t height) { 339 int32_t ht_cnt; 340 uint32_t src0, src1, src2, src3; 341 uint32_t ref0, ref1, ref2, ref3; 342 v16u8 src = { 0 }; 343 v16u8 ref = { 0 }; 344 v4i32 var = { 0 }; 345 346 for (ht_cnt = (height >> 2); ht_cnt--;) { 347 LW4(src_ptr, src_stride, src0, src1, src2, src3); 348 src_ptr += (4 * src_stride); 349 LW4(ref_ptr, ref_stride, ref0, ref1, ref2, ref3); 350 ref_ptr += (4 * ref_stride); 351 352 INSERT_W4_UB(src0, src1, src2, src3, src); 353 INSERT_W4_UB(ref0, ref1, ref2, ref3, ref); 354 CALC_MSE_B(src, ref, var); 355 } 356 357 return HADD_SW_S32(var); 358 } 359 360 static uint32_t sse_8width_msa(const uint8_t *src_ptr, int32_t src_stride, 361 const uint8_t *ref_ptr, int32_t ref_stride, 362 int32_t height) { 363 int32_t ht_cnt; 364 v16u8 src0, src1, src2, src3; 365 v16u8 ref0, ref1, ref2, ref3; 366 v4i32 var = { 0 }; 367 368 for (ht_cnt = (height >> 2); ht_cnt--;) { 369 LD_UB4(src_ptr, src_stride, src0, src1, src2, src3); 370 src_ptr += (4 * src_stride); 371 LD_UB4(ref_ptr, ref_stride, ref0, ref1, ref2, ref3); 372 ref_ptr += (4 * ref_stride); 373 374 PCKEV_D4_UB(src1, src0, src3, src2, ref1, ref0, ref3, ref2, src0, src1, 375 ref0, ref1); 376 CALC_MSE_B(src0, ref0, var); 377 CALC_MSE_B(src1, ref1, var); 378 } 379 380 return HADD_SW_S32(var); 381 } 382 383 static uint32_t sse_16width_msa(const uint8_t *src_ptr, int32_t src_stride, 384 const uint8_t *ref_ptr, int32_t ref_stride, 385 int32_t height) { 386 int32_t ht_cnt; 387 v16u8 src, ref; 388 v4i32 var = { 0 }; 389 390 for (ht_cnt = (height >> 2); ht_cnt--;) { 391 src = LD_UB(src_ptr); 392 src_ptr += src_stride; 393 ref = LD_UB(ref_ptr); 394 ref_ptr += ref_stride; 395 CALC_MSE_B(src, ref, var); 396 397 src = LD_UB(src_ptr); 398 src_ptr += src_stride; 399 ref = LD_UB(ref_ptr); 400 ref_ptr += ref_stride; 401 CALC_MSE_B(src, ref, var); 402 403 src = LD_UB(src_ptr); 404 src_ptr += src_stride; 405 ref = LD_UB(ref_ptr); 406 ref_ptr += ref_stride; 407 CALC_MSE_B(src, ref, var); 408 409 src = LD_UB(src_ptr); 410 src_ptr += src_stride; 411 ref = LD_UB(ref_ptr); 412 ref_ptr += ref_stride; 413 CALC_MSE_B(src, ref, var); 414 } 415 416 return HADD_SW_S32(var); 417 } 418 419 static uint32_t sse_32width_msa(const uint8_t *src_ptr, int32_t src_stride, 420 const uint8_t *ref_ptr, int32_t ref_stride, 421 int32_t height) { 422 int32_t ht_cnt; 423 v16u8 src0, src1, ref0, ref1; 424 v4i32 var = { 0 }; 425 426 for (ht_cnt = (height >> 2); ht_cnt--;) { 427 LD_UB2(src_ptr, 16, src0, src1); 428 src_ptr += src_stride; 429 LD_UB2(ref_ptr, 16, ref0, ref1); 430 ref_ptr += ref_stride; 431 CALC_MSE_B(src0, ref0, var); 432 CALC_MSE_B(src1, ref1, var); 433 434 LD_UB2(src_ptr, 16, src0, src1); 435 src_ptr += src_stride; 436 LD_UB2(ref_ptr, 16, ref0, ref1); 437 ref_ptr += ref_stride; 438 CALC_MSE_B(src0, ref0, var); 439 CALC_MSE_B(src1, ref1, var); 440 441 LD_UB2(src_ptr, 16, src0, src1); 442 src_ptr += src_stride; 443 LD_UB2(ref_ptr, 16, ref0, ref1); 444 ref_ptr += ref_stride; 445 CALC_MSE_B(src0, ref0, var); 446 CALC_MSE_B(src1, ref1, var); 447 448 LD_UB2(src_ptr, 16, src0, src1); 449 src_ptr += src_stride; 450 LD_UB2(ref_ptr, 16, ref0, ref1); 451 ref_ptr += ref_stride; 452 CALC_MSE_B(src0, ref0, var); 453 CALC_MSE_B(src1, ref1, var); 454 } 455 456 return HADD_SW_S32(var); 457 } 458 459 static uint32_t sse_64width_msa(const uint8_t *src_ptr, int32_t src_stride, 460 const uint8_t *ref_ptr, int32_t ref_stride, 461 int32_t height) { 462 int32_t ht_cnt; 463 v16u8 src0, src1, src2, src3; 464 v16u8 ref0, ref1, ref2, ref3; 465 v4i32 var = { 0 }; 466 467 for (ht_cnt = height >> 1; ht_cnt--;) { 468 LD_UB4(src_ptr, 16, src0, src1, src2, src3); 469 src_ptr += src_stride; 470 LD_UB4(ref_ptr, 16, ref0, ref1, ref2, ref3); 471 ref_ptr += ref_stride; 472 CALC_MSE_B(src0, ref0, var); 473 CALC_MSE_B(src2, ref2, var); 474 CALC_MSE_B(src1, ref1, var); 475 CALC_MSE_B(src3, ref3, var); 476 477 LD_UB4(src_ptr, 16, src0, src1, src2, src3); 478 src_ptr += src_stride; 479 LD_UB4(ref_ptr, 16, ref0, ref1, ref2, ref3); 480 ref_ptr += ref_stride; 481 CALC_MSE_B(src0, ref0, var); 482 CALC_MSE_B(src2, ref2, var); 483 CALC_MSE_B(src1, ref1, var); 484 CALC_MSE_B(src3, ref3, var); 485 } 486 487 return HADD_SW_S32(var); 488 } 489 490 uint32_t vpx_get4x4sse_cs_msa(const uint8_t *src_ptr, int32_t src_stride, 491 const uint8_t *ref_ptr, int32_t ref_stride) { 492 uint32_t src0, src1, src2, src3; 493 uint32_t ref0, ref1, ref2, ref3; 494 v16i8 src = { 0 }; 495 v16i8 ref = { 0 }; 496 v4i32 err0 = { 0 }; 497 498 LW4(src_ptr, src_stride, src0, src1, src2, src3); 499 LW4(ref_ptr, ref_stride, ref0, ref1, ref2, ref3); 500 INSERT_W4_SB(src0, src1, src2, src3, src); 501 INSERT_W4_SB(ref0, ref1, ref2, ref3, ref); 502 CALC_MSE_B(src, ref, err0); 503 504 return HADD_SW_S32(err0); 505 } 506 507 #define VARIANCE_4Wx4H(sse, diff) VARIANCE_WxH(sse, diff, 4); 508 #define VARIANCE_4Wx8H(sse, diff) VARIANCE_WxH(sse, diff, 5); 509 #define VARIANCE_8Wx4H(sse, diff) VARIANCE_WxH(sse, diff, 5); 510 #define VARIANCE_8Wx8H(sse, diff) VARIANCE_WxH(sse, diff, 6); 511 #define VARIANCE_8Wx16H(sse, diff) VARIANCE_WxH(sse, diff, 7); 512 #define VARIANCE_16Wx8H(sse, diff) VARIANCE_WxH(sse, diff, 7); 513 #define VARIANCE_16Wx16H(sse, diff) VARIANCE_WxH(sse, diff, 8); 514 515 #define VARIANCE_16Wx32H(sse, diff) VARIANCE_LARGE_WxH(sse, diff, 9); 516 #define VARIANCE_32Wx16H(sse, diff) VARIANCE_LARGE_WxH(sse, diff, 9); 517 #define VARIANCE_32Wx32H(sse, diff) VARIANCE_LARGE_WxH(sse, diff, 10); 518 #define VARIANCE_32Wx64H(sse, diff) VARIANCE_LARGE_WxH(sse, diff, 11); 519 #define VARIANCE_64Wx32H(sse, diff) VARIANCE_LARGE_WxH(sse, diff, 11); 520 #define VARIANCE_64Wx64H(sse, diff) VARIANCE_LARGE_WxH(sse, diff, 12); 521 522 #define VPX_VARIANCE_WDXHT_MSA(wd, ht) \ 523 uint32_t vpx_variance##wd##x##ht##_msa( \ 524 const uint8_t *src, int32_t src_stride, const uint8_t *ref, \ 525 int32_t ref_stride, uint32_t *sse) { \ 526 int32_t diff; \ 527 \ 528 *sse = \ 529 sse_diff_##wd##width_msa(src, src_stride, ref, ref_stride, ht, &diff); \ 530 \ 531 return VARIANCE_##wd##Wx##ht##H(*sse, diff); \ 532 } 533 534 VPX_VARIANCE_WDXHT_MSA(4, 4); 535 VPX_VARIANCE_WDXHT_MSA(4, 8); 536 537 VPX_VARIANCE_WDXHT_MSA(8, 4) 538 VPX_VARIANCE_WDXHT_MSA(8, 8) 539 VPX_VARIANCE_WDXHT_MSA(8, 16) 540 541 VPX_VARIANCE_WDXHT_MSA(16, 8) 542 VPX_VARIANCE_WDXHT_MSA(16, 16) 543 VPX_VARIANCE_WDXHT_MSA(16, 32) 544 545 VPX_VARIANCE_WDXHT_MSA(32, 16) 546 VPX_VARIANCE_WDXHT_MSA(32, 32) 547 548 uint32_t vpx_variance32x64_msa(const uint8_t *src, int32_t src_stride, 549 const uint8_t *ref, int32_t ref_stride, 550 uint32_t *sse) { 551 int32_t diff; 552 553 *sse = sse_diff_32x64_msa(src, src_stride, ref, ref_stride, &diff); 554 555 return VARIANCE_32Wx64H(*sse, diff); 556 } 557 558 uint32_t vpx_variance64x32_msa(const uint8_t *src, int32_t src_stride, 559 const uint8_t *ref, int32_t ref_stride, 560 uint32_t *sse) { 561 int32_t diff; 562 563 *sse = sse_diff_64x32_msa(src, src_stride, ref, ref_stride, &diff); 564 565 return VARIANCE_64Wx32H(*sse, diff); 566 } 567 568 uint32_t vpx_variance64x64_msa(const uint8_t *src, int32_t src_stride, 569 const uint8_t *ref, int32_t ref_stride, 570 uint32_t *sse) { 571 int32_t diff; 572 573 *sse = sse_diff_64x64_msa(src, src_stride, ref, ref_stride, &diff); 574 575 return VARIANCE_64Wx64H(*sse, diff); 576 } 577 578 uint32_t vpx_mse8x8_msa(const uint8_t *src, int32_t src_stride, 579 const uint8_t *ref, int32_t ref_stride, uint32_t *sse) { 580 *sse = sse_8width_msa(src, src_stride, ref, ref_stride, 8); 581 582 return *sse; 583 } 584 585 uint32_t vpx_mse8x16_msa(const uint8_t *src, int32_t src_stride, 586 const uint8_t *ref, int32_t ref_stride, 587 uint32_t *sse) { 588 *sse = sse_8width_msa(src, src_stride, ref, ref_stride, 16); 589 590 return *sse; 591 } 592 593 uint32_t vpx_mse16x8_msa(const uint8_t *src, int32_t src_stride, 594 const uint8_t *ref, int32_t ref_stride, 595 uint32_t *sse) { 596 *sse = sse_16width_msa(src, src_stride, ref, ref_stride, 8); 597 598 return *sse; 599 } 600 601 uint32_t vpx_mse16x16_msa(const uint8_t *src, int32_t src_stride, 602 const uint8_t *ref, int32_t ref_stride, 603 uint32_t *sse) { 604 *sse = sse_16width_msa(src, src_stride, ref, ref_stride, 16); 605 606 return *sse; 607 } 608 609 void vpx_get8x8var_msa(const uint8_t *src, int32_t src_stride, 610 const uint8_t *ref, int32_t ref_stride, uint32_t *sse, 611 int32_t *sum) { 612 *sse = sse_diff_8width_msa(src, src_stride, ref, ref_stride, 8, sum); 613 } 614 615 void vpx_get16x16var_msa(const uint8_t *src, int32_t src_stride, 616 const uint8_t *ref, int32_t ref_stride, uint32_t *sse, 617 int32_t *sum) { 618 *sse = sse_diff_16width_msa(src, src_stride, ref, ref_stride, 16, sum); 619 } 620 621 uint32_t vpx_get_mb_ss_msa(const int16_t *src) { return get_mb_ss_msa(src); } 622