1 /* 2 * Copyright (c) 2015 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "./vpx_dsp_rtcd.h" 12 #include "vpx_dsp/mips/macros_msa.h" 13 14 #define IPRED_SUBS_UH2_UH(in0, in1, out0, out1) { \ 15 out0 = __msa_subs_u_h(out0, in0); \ 16 out1 = __msa_subs_u_h(out1, in1); \ 17 } 18 19 static void intra_predict_vert_4x4_msa(const uint8_t *src, uint8_t *dst, 20 int32_t dst_stride) { 21 uint32_t src_data; 22 23 src_data = LW(src); 24 25 SW4(src_data, src_data, src_data, src_data, dst, dst_stride); 26 } 27 28 static void intra_predict_vert_8x8_msa(const uint8_t *src, uint8_t *dst, 29 int32_t dst_stride) { 30 uint32_t row; 31 uint32_t src_data1, src_data2; 32 33 src_data1 = LW(src); 34 src_data2 = LW(src + 4); 35 36 for (row = 8; row--;) { 37 SW(src_data1, dst); 38 SW(src_data2, (dst + 4)); 39 dst += dst_stride; 40 } 41 } 42 43 static void intra_predict_vert_16x16_msa(const uint8_t *src, uint8_t *dst, 44 int32_t dst_stride) { 45 uint32_t row; 46 v16u8 src0; 47 48 src0 = LD_UB(src); 49 50 for (row = 16; row--;) { 51 ST_UB(src0, dst); 52 dst += dst_stride; 53 } 54 } 55 56 static void intra_predict_vert_32x32_msa(const uint8_t *src, uint8_t *dst, 57 int32_t dst_stride) { 58 uint32_t row; 59 v16u8 src1, src2; 60 61 src1 = LD_UB(src); 62 src2 = LD_UB(src + 16); 63 64 for (row = 32; row--;) { 65 ST_UB2(src1, src2, dst, 16); 66 dst += dst_stride; 67 } 68 } 69 70 static void intra_predict_horiz_4x4_msa(const uint8_t *src, uint8_t *dst, 71 int32_t dst_stride) { 72 uint32_t out0, out1, out2, out3; 73 74 out0 = src[0] * 0x01010101; 75 out1 = src[1] * 0x01010101; 76 out2 = src[2] * 0x01010101; 77 out3 = src[3] * 0x01010101; 78 79 SW4(out0, out1, out2, out3, dst, dst_stride); 80 } 81 82 static void intra_predict_horiz_8x8_msa(const uint8_t *src, uint8_t *dst, 83 int32_t dst_stride) { 84 uint64_t out0, out1, out2, out3, out4, out5, out6, out7; 85 86 out0 = src[0] * 0x0101010101010101ull; 87 out1 = src[1] * 0x0101010101010101ull; 88 out2 = src[2] * 0x0101010101010101ull; 89 out3 = src[3] * 0x0101010101010101ull; 90 out4 = src[4] * 0x0101010101010101ull; 91 out5 = src[5] * 0x0101010101010101ull; 92 out6 = src[6] * 0x0101010101010101ull; 93 out7 = src[7] * 0x0101010101010101ull; 94 95 SD4(out0, out1, out2, out3, dst, dst_stride); 96 dst += (4 * dst_stride); 97 SD4(out4, out5, out6, out7, dst, dst_stride); 98 } 99 100 static void intra_predict_horiz_16x16_msa(const uint8_t *src, uint8_t *dst, 101 int32_t dst_stride) { 102 uint32_t row; 103 uint8_t inp0, inp1, inp2, inp3; 104 v16u8 src0, src1, src2, src3; 105 106 for (row = 4; row--;) { 107 inp0 = src[0]; 108 inp1 = src[1]; 109 inp2 = src[2]; 110 inp3 = src[3]; 111 src += 4; 112 113 src0 = (v16u8)__msa_fill_b(inp0); 114 src1 = (v16u8)__msa_fill_b(inp1); 115 src2 = (v16u8)__msa_fill_b(inp2); 116 src3 = (v16u8)__msa_fill_b(inp3); 117 118 ST_UB4(src0, src1, src2, src3, dst, dst_stride); 119 dst += (4 * dst_stride); 120 } 121 } 122 123 static void intra_predict_horiz_32x32_msa(const uint8_t *src, uint8_t *dst, 124 int32_t dst_stride) { 125 uint32_t row; 126 uint8_t inp0, inp1, inp2, inp3; 127 v16u8 src0, src1, src2, src3; 128 129 for (row = 8; row--;) { 130 inp0 = src[0]; 131 inp1 = src[1]; 132 inp2 = src[2]; 133 inp3 = src[3]; 134 src += 4; 135 136 src0 = (v16u8)__msa_fill_b(inp0); 137 src1 = (v16u8)__msa_fill_b(inp1); 138 src2 = (v16u8)__msa_fill_b(inp2); 139 src3 = (v16u8)__msa_fill_b(inp3); 140 141 ST_UB2(src0, src0, dst, 16); 142 dst += dst_stride; 143 ST_UB2(src1, src1, dst, 16); 144 dst += dst_stride; 145 ST_UB2(src2, src2, dst, 16); 146 dst += dst_stride; 147 ST_UB2(src3, src3, dst, 16); 148 dst += dst_stride; 149 } 150 } 151 152 static void intra_predict_dc_4x4_msa(const uint8_t *src_top, 153 const uint8_t *src_left, 154 uint8_t *dst, int32_t dst_stride) { 155 uint32_t val0, val1; 156 v16i8 store, src = { 0 }; 157 v8u16 sum_h; 158 v4u32 sum_w; 159 v2u64 sum_d; 160 161 val0 = LW(src_top); 162 val1 = LW(src_left); 163 INSERT_W2_SB(val0, val1, src); 164 sum_h = __msa_hadd_u_h((v16u8)src, (v16u8)src); 165 sum_w = __msa_hadd_u_w(sum_h, sum_h); 166 sum_d = __msa_hadd_u_d(sum_w, sum_w); 167 sum_w = (v4u32)__msa_srari_w((v4i32)sum_d, 3); 168 store = __msa_splati_b((v16i8)sum_w, 0); 169 val0 = __msa_copy_u_w((v4i32)store, 0); 170 171 SW4(val0, val0, val0, val0, dst, dst_stride); 172 } 173 174 static void intra_predict_dc_tl_4x4_msa(const uint8_t *src, uint8_t *dst, 175 int32_t dst_stride) { 176 uint32_t val0; 177 v16i8 store, data = { 0 }; 178 v8u16 sum_h; 179 v4u32 sum_w; 180 181 val0 = LW(src); 182 data = (v16i8)__msa_insert_w((v4i32)data, 0, val0); 183 sum_h = __msa_hadd_u_h((v16u8)data, (v16u8)data); 184 sum_w = __msa_hadd_u_w(sum_h, sum_h); 185 sum_w = (v4u32)__msa_srari_w((v4i32)sum_w, 2); 186 store = __msa_splati_b((v16i8)sum_w, 0); 187 val0 = __msa_copy_u_w((v4i32)store, 0); 188 189 SW4(val0, val0, val0, val0, dst, dst_stride); 190 } 191 192 static void intra_predict_128dc_4x4_msa(uint8_t *dst, int32_t dst_stride) { 193 uint32_t out; 194 const v16i8 store = __msa_ldi_b(128); 195 196 out = __msa_copy_u_w((v4i32)store, 0); 197 198 SW4(out, out, out, out, dst, dst_stride); 199 } 200 201 static void intra_predict_dc_8x8_msa(const uint8_t *src_top, 202 const uint8_t *src_left, 203 uint8_t *dst, int32_t dst_stride) { 204 uint64_t val0, val1; 205 v16i8 store; 206 v16u8 src = { 0 }; 207 v8u16 sum_h; 208 v4u32 sum_w; 209 v2u64 sum_d; 210 211 val0 = LD(src_top); 212 val1 = LD(src_left); 213 INSERT_D2_UB(val0, val1, src); 214 sum_h = __msa_hadd_u_h(src, src); 215 sum_w = __msa_hadd_u_w(sum_h, sum_h); 216 sum_d = __msa_hadd_u_d(sum_w, sum_w); 217 sum_w = (v4u32)__msa_pckev_w((v4i32)sum_d, (v4i32)sum_d); 218 sum_d = __msa_hadd_u_d(sum_w, sum_w); 219 sum_w = (v4u32)__msa_srari_w((v4i32)sum_d, 4); 220 store = __msa_splati_b((v16i8)sum_w, 0); 221 val0 = __msa_copy_u_d((v2i64)store, 0); 222 223 SD4(val0, val0, val0, val0, dst, dst_stride); 224 dst += (4 * dst_stride); 225 SD4(val0, val0, val0, val0, dst, dst_stride); 226 } 227 228 static void intra_predict_dc_tl_8x8_msa(const uint8_t *src, uint8_t *dst, 229 int32_t dst_stride) { 230 uint64_t val0; 231 v16i8 store; 232 v16u8 data = { 0 }; 233 v8u16 sum_h; 234 v4u32 sum_w; 235 v2u64 sum_d; 236 237 val0 = LD(src); 238 data = (v16u8)__msa_insert_d((v2i64)data, 0, val0); 239 sum_h = __msa_hadd_u_h(data, data); 240 sum_w = __msa_hadd_u_w(sum_h, sum_h); 241 sum_d = __msa_hadd_u_d(sum_w, sum_w); 242 sum_w = (v4u32)__msa_srari_w((v4i32)sum_d, 3); 243 store = __msa_splati_b((v16i8)sum_w, 0); 244 val0 = __msa_copy_u_d((v2i64)store, 0); 245 246 SD4(val0, val0, val0, val0, dst, dst_stride); 247 dst += (4 * dst_stride); 248 SD4(val0, val0, val0, val0, dst, dst_stride); 249 } 250 251 static void intra_predict_128dc_8x8_msa(uint8_t *dst, int32_t dst_stride) { 252 uint64_t out; 253 const v16i8 store = __msa_ldi_b(128); 254 255 out = __msa_copy_u_d((v2i64)store, 0); 256 257 SD4(out, out, out, out, dst, dst_stride); 258 dst += (4 * dst_stride); 259 SD4(out, out, out, out, dst, dst_stride); 260 } 261 262 static void intra_predict_dc_16x16_msa(const uint8_t *src_top, 263 const uint8_t *src_left, 264 uint8_t *dst, int32_t dst_stride) { 265 v16u8 top, left, out; 266 v8u16 sum_h, sum_top, sum_left; 267 v4u32 sum_w; 268 v2u64 sum_d; 269 270 top = LD_UB(src_top); 271 left = LD_UB(src_left); 272 HADD_UB2_UH(top, left, sum_top, sum_left); 273 sum_h = sum_top + sum_left; 274 sum_w = __msa_hadd_u_w(sum_h, sum_h); 275 sum_d = __msa_hadd_u_d(sum_w, sum_w); 276 sum_w = (v4u32)__msa_pckev_w((v4i32)sum_d, (v4i32)sum_d); 277 sum_d = __msa_hadd_u_d(sum_w, sum_w); 278 sum_w = (v4u32)__msa_srari_w((v4i32)sum_d, 5); 279 out = (v16u8)__msa_splati_b((v16i8)sum_w, 0); 280 281 ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride); 282 dst += (8 * dst_stride); 283 ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride); 284 } 285 286 static void intra_predict_dc_tl_16x16_msa(const uint8_t *src, uint8_t *dst, 287 int32_t dst_stride) { 288 v16u8 data, out; 289 v8u16 sum_h; 290 v4u32 sum_w; 291 v2u64 sum_d; 292 293 data = LD_UB(src); 294 sum_h = __msa_hadd_u_h(data, data); 295 sum_w = __msa_hadd_u_w(sum_h, sum_h); 296 sum_d = __msa_hadd_u_d(sum_w, sum_w); 297 sum_w = (v4u32)__msa_pckev_w((v4i32)sum_d, (v4i32)sum_d); 298 sum_d = __msa_hadd_u_d(sum_w, sum_w); 299 sum_w = (v4u32)__msa_srari_w((v4i32)sum_d, 4); 300 out = (v16u8)__msa_splati_b((v16i8)sum_w, 0); 301 302 ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride); 303 dst += (8 * dst_stride); 304 ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride); 305 } 306 307 static void intra_predict_128dc_16x16_msa(uint8_t *dst, int32_t dst_stride) { 308 const v16u8 out = (v16u8)__msa_ldi_b(128); 309 310 ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride); 311 dst += (8 * dst_stride); 312 ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride); 313 } 314 315 static void intra_predict_dc_32x32_msa(const uint8_t *src_top, 316 const uint8_t *src_left, 317 uint8_t *dst, int32_t dst_stride) { 318 uint32_t row; 319 v16u8 top0, top1, left0, left1, out; 320 v8u16 sum_h, sum_top0, sum_top1, sum_left0, sum_left1; 321 v4u32 sum_w; 322 v2u64 sum_d; 323 324 LD_UB2(src_top, 16, top0, top1); 325 LD_UB2(src_left, 16, left0, left1); 326 HADD_UB2_UH(top0, top1, sum_top0, sum_top1); 327 HADD_UB2_UH(left0, left1, sum_left0, sum_left1); 328 sum_h = sum_top0 + sum_top1; 329 sum_h += sum_left0 + sum_left1; 330 sum_w = __msa_hadd_u_w(sum_h, sum_h); 331 sum_d = __msa_hadd_u_d(sum_w, sum_w); 332 sum_w = (v4u32)__msa_pckev_w((v4i32)sum_d, (v4i32)sum_d); 333 sum_d = __msa_hadd_u_d(sum_w, sum_w); 334 sum_w = (v4u32)__msa_srari_w((v4i32)sum_d, 6); 335 out = (v16u8)__msa_splati_b((v16i8)sum_w, 0); 336 337 for (row = 16; row--;) { 338 ST_UB2(out, out, dst, 16); 339 dst += dst_stride; 340 ST_UB2(out, out, dst, 16); 341 dst += dst_stride; 342 } 343 } 344 345 static void intra_predict_dc_tl_32x32_msa(const uint8_t *src, uint8_t *dst, 346 int32_t dst_stride) { 347 uint32_t row; 348 v16u8 data0, data1, out; 349 v8u16 sum_h, sum_data0, sum_data1; 350 v4u32 sum_w; 351 v2u64 sum_d; 352 353 LD_UB2(src, 16, data0, data1); 354 HADD_UB2_UH(data0, data1, sum_data0, sum_data1); 355 sum_h = sum_data0 + sum_data1; 356 sum_w = __msa_hadd_u_w(sum_h, sum_h); 357 sum_d = __msa_hadd_u_d(sum_w, sum_w); 358 sum_w = (v4u32)__msa_pckev_w((v4i32)sum_d, (v4i32)sum_d); 359 sum_d = __msa_hadd_u_d(sum_w, sum_w); 360 sum_w = (v4u32)__msa_srari_w((v4i32)sum_d, 5); 361 out = (v16u8)__msa_splati_b((v16i8)sum_w, 0); 362 363 for (row = 16; row--;) { 364 ST_UB2(out, out, dst, 16); 365 dst += dst_stride; 366 ST_UB2(out, out, dst, 16); 367 dst += dst_stride; 368 } 369 } 370 371 static void intra_predict_128dc_32x32_msa(uint8_t *dst, int32_t dst_stride) { 372 uint32_t row; 373 const v16u8 out = (v16u8)__msa_ldi_b(128); 374 375 for (row = 16; row--;) { 376 ST_UB2(out, out, dst, 16); 377 dst += dst_stride; 378 ST_UB2(out, out, dst, 16); 379 dst += dst_stride; 380 } 381 } 382 383 static void intra_predict_tm_4x4_msa(const uint8_t *src_top_ptr, 384 const uint8_t *src_left, 385 uint8_t *dst, int32_t dst_stride) { 386 uint32_t val; 387 uint8_t top_left = src_top_ptr[-1]; 388 v16i8 src_left0, src_left1, src_left2, src_left3, tmp0, tmp1, src_top = { 0 }; 389 v16u8 src0, src1, src2, src3; 390 v8u16 src_top_left, vec0, vec1, vec2, vec3; 391 392 src_top_left = (v8u16)__msa_fill_h(top_left); 393 val = LW(src_top_ptr); 394 src_top = (v16i8)__msa_insert_w((v4i32)src_top, 0, val); 395 396 src_left0 = __msa_fill_b(src_left[0]); 397 src_left1 = __msa_fill_b(src_left[1]); 398 src_left2 = __msa_fill_b(src_left[2]); 399 src_left3 = __msa_fill_b(src_left[3]); 400 401 ILVR_B4_UB(src_left0, src_top, src_left1, src_top, src_left2, src_top, 402 src_left3, src_top, src0, src1, src2, src3); 403 HADD_UB4_UH(src0, src1, src2, src3, vec0, vec1, vec2, vec3); 404 IPRED_SUBS_UH2_UH(src_top_left, src_top_left, vec0, vec1); 405 IPRED_SUBS_UH2_UH(src_top_left, src_top_left, vec2, vec3); 406 SAT_UH4_UH(vec0, vec1, vec2, vec3, 7); 407 PCKEV_B2_SB(vec1, vec0, vec3, vec2, tmp0, tmp1); 408 ST4x4_UB(tmp0, tmp1, 0, 2, 0, 2, dst, dst_stride); 409 } 410 411 static void intra_predict_tm_8x8_msa(const uint8_t *src_top_ptr, 412 const uint8_t *src_left, 413 uint8_t *dst, int32_t dst_stride) { 414 uint64_t val; 415 uint8_t top_left = src_top_ptr[-1]; 416 uint32_t loop_cnt; 417 v16i8 src_left0, src_left1, src_left2, src_left3, tmp0, tmp1, src_top = { 0 }; 418 v8u16 src_top_left, vec0, vec1, vec2, vec3; 419 v16u8 src0, src1, src2, src3; 420 421 val = LD(src_top_ptr); 422 src_top = (v16i8)__msa_insert_d((v2i64)src_top, 0, val); 423 src_top_left = (v8u16)__msa_fill_h(top_left); 424 425 for (loop_cnt = 2; loop_cnt--;) { 426 src_left0 = __msa_fill_b(src_left[0]); 427 src_left1 = __msa_fill_b(src_left[1]); 428 src_left2 = __msa_fill_b(src_left[2]); 429 src_left3 = __msa_fill_b(src_left[3]); 430 src_left += 4; 431 432 ILVR_B4_UB(src_left0, src_top, src_left1, src_top, src_left2, src_top, 433 src_left3, src_top, src0, src1, src2, src3); 434 HADD_UB4_UH(src0, src1, src2, src3, vec0, vec1, vec2, vec3); 435 IPRED_SUBS_UH2_UH(src_top_left, src_top_left, vec0, vec1); 436 IPRED_SUBS_UH2_UH(src_top_left, src_top_left, vec2, vec3); 437 SAT_UH4_UH(vec0, vec1, vec2, vec3, 7); 438 PCKEV_B2_SB(vec1, vec0, vec3, vec2, tmp0, tmp1); 439 ST8x4_UB(tmp0, tmp1, dst, dst_stride); 440 dst += (4 * dst_stride); 441 } 442 } 443 444 static void intra_predict_tm_16x16_msa(const uint8_t *src_top_ptr, 445 const uint8_t *src_left, 446 uint8_t *dst, int32_t dst_stride) { 447 uint8_t top_left = src_top_ptr[-1]; 448 uint32_t loop_cnt; 449 v16i8 src_top, src_left0, src_left1, src_left2, src_left3; 450 v8u16 src_top_left, res_r, res_l; 451 452 src_top = LD_SB(src_top_ptr); 453 src_top_left = (v8u16)__msa_fill_h(top_left); 454 455 for (loop_cnt = 4; loop_cnt--;) { 456 src_left0 = __msa_fill_b(src_left[0]); 457 src_left1 = __msa_fill_b(src_left[1]); 458 src_left2 = __msa_fill_b(src_left[2]); 459 src_left3 = __msa_fill_b(src_left[3]); 460 src_left += 4; 461 462 ILVRL_B2_UH(src_left0, src_top, res_r, res_l); 463 HADD_UB2_UH(res_r, res_l, res_r, res_l); 464 IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r, res_l); 465 466 SAT_UH2_UH(res_r, res_l, 7); 467 PCKEV_ST_SB(res_r, res_l, dst); 468 dst += dst_stride; 469 470 ILVRL_B2_UH(src_left1, src_top, res_r, res_l); 471 HADD_UB2_UH(res_r, res_l, res_r, res_l); 472 IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r, res_l); 473 SAT_UH2_UH(res_r, res_l, 7); 474 PCKEV_ST_SB(res_r, res_l, dst); 475 dst += dst_stride; 476 477 ILVRL_B2_UH(src_left2, src_top, res_r, res_l); 478 HADD_UB2_UH(res_r, res_l, res_r, res_l); 479 IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r, res_l); 480 SAT_UH2_UH(res_r, res_l, 7); 481 PCKEV_ST_SB(res_r, res_l, dst); 482 dst += dst_stride; 483 484 ILVRL_B2_UH(src_left3, src_top, res_r, res_l); 485 HADD_UB2_UH(res_r, res_l, res_r, res_l); 486 IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r, res_l); 487 SAT_UH2_UH(res_r, res_l, 7); 488 PCKEV_ST_SB(res_r, res_l, dst); 489 dst += dst_stride; 490 } 491 } 492 493 static void intra_predict_tm_32x32_msa(const uint8_t *src_top, 494 const uint8_t *src_left, 495 uint8_t *dst, int32_t dst_stride) { 496 uint8_t top_left = src_top[-1]; 497 uint32_t loop_cnt; 498 v16i8 src_top0, src_top1, src_left0, src_left1, src_left2, src_left3; 499 v8u16 src_top_left, res_r0, res_r1, res_l0, res_l1; 500 501 LD_SB2(src_top, 16, src_top0, src_top1); 502 src_top_left = (v8u16)__msa_fill_h(top_left); 503 504 for (loop_cnt = 8; loop_cnt--;) { 505 src_left0 = __msa_fill_b(src_left[0]); 506 src_left1 = __msa_fill_b(src_left[1]); 507 src_left2 = __msa_fill_b(src_left[2]); 508 src_left3 = __msa_fill_b(src_left[3]); 509 src_left += 4; 510 511 ILVR_B2_UH(src_left0, src_top0, src_left0, src_top1, res_r0, res_r1); 512 ILVL_B2_UH(src_left0, src_top0, src_left0, src_top1, res_l0, res_l1); 513 HADD_UB4_UH(res_r0, res_l0, res_r1, res_l1, res_r0, res_l0, res_r1, res_l1); 514 IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r0, res_l0); 515 IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r1, res_l1); 516 SAT_UH4_UH(res_r0, res_l0, res_r1, res_l1, 7); 517 PCKEV_ST_SB(res_r0, res_l0, dst); 518 PCKEV_ST_SB(res_r1, res_l1, dst + 16); 519 dst += dst_stride; 520 521 ILVR_B2_UH(src_left1, src_top0, src_left1, src_top1, res_r0, res_r1); 522 ILVL_B2_UH(src_left1, src_top0, src_left1, src_top1, res_l0, res_l1); 523 HADD_UB4_UH(res_r0, res_l0, res_r1, res_l1, res_r0, res_l0, res_r1, res_l1); 524 IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r0, res_l0); 525 IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r1, res_l1); 526 SAT_UH4_UH(res_r0, res_l0, res_r1, res_l1, 7); 527 PCKEV_ST_SB(res_r0, res_l0, dst); 528 PCKEV_ST_SB(res_r1, res_l1, dst + 16); 529 dst += dst_stride; 530 531 ILVR_B2_UH(src_left2, src_top0, src_left2, src_top1, res_r0, res_r1); 532 ILVL_B2_UH(src_left2, src_top0, src_left2, src_top1, res_l0, res_l1); 533 HADD_UB4_UH(res_r0, res_l0, res_r1, res_l1, res_r0, res_l0, res_r1, res_l1); 534 IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r0, res_l0); 535 IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r1, res_l1); 536 SAT_UH4_UH(res_r0, res_l0, res_r1, res_l1, 7); 537 PCKEV_ST_SB(res_r0, res_l0, dst); 538 PCKEV_ST_SB(res_r1, res_l1, dst + 16); 539 dst += dst_stride; 540 541 ILVR_B2_UH(src_left3, src_top0, src_left3, src_top1, res_r0, res_r1); 542 ILVL_B2_UH(src_left3, src_top0, src_left3, src_top1, res_l0, res_l1); 543 HADD_UB4_UH(res_r0, res_l0, res_r1, res_l1, res_r0, res_l0, res_r1, res_l1); 544 IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r0, res_l0); 545 IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r1, res_l1); 546 SAT_UH4_UH(res_r0, res_l0, res_r1, res_l1, 7); 547 PCKEV_ST_SB(res_r0, res_l0, dst); 548 PCKEV_ST_SB(res_r1, res_l1, dst + 16); 549 dst += dst_stride; 550 } 551 } 552 553 void vpx_v_predictor_4x4_msa(uint8_t *dst, ptrdiff_t y_stride, 554 const uint8_t *above, const uint8_t *left) { 555 (void)left; 556 557 intra_predict_vert_4x4_msa(above, dst, y_stride); 558 } 559 560 void vpx_v_predictor_8x8_msa(uint8_t *dst, ptrdiff_t y_stride, 561 const uint8_t *above, const uint8_t *left) { 562 (void)left; 563 564 intra_predict_vert_8x8_msa(above, dst, y_stride); 565 } 566 567 void vpx_v_predictor_16x16_msa(uint8_t *dst, ptrdiff_t y_stride, 568 const uint8_t *above, const uint8_t *left) { 569 (void)left; 570 571 intra_predict_vert_16x16_msa(above, dst, y_stride); 572 } 573 574 void vpx_v_predictor_32x32_msa(uint8_t *dst, ptrdiff_t y_stride, 575 const uint8_t *above, const uint8_t *left) { 576 (void)left; 577 578 intra_predict_vert_32x32_msa(above, dst, y_stride); 579 } 580 581 void vpx_h_predictor_4x4_msa(uint8_t *dst, ptrdiff_t y_stride, 582 const uint8_t *above, const uint8_t *left) { 583 (void)above; 584 585 intra_predict_horiz_4x4_msa(left, dst, y_stride); 586 } 587 588 void vpx_h_predictor_8x8_msa(uint8_t *dst, ptrdiff_t y_stride, 589 const uint8_t *above, const uint8_t *left) { 590 (void)above; 591 592 intra_predict_horiz_8x8_msa(left, dst, y_stride); 593 } 594 595 void vpx_h_predictor_16x16_msa(uint8_t *dst, ptrdiff_t y_stride, 596 const uint8_t *above, const uint8_t *left) { 597 (void)above; 598 599 intra_predict_horiz_16x16_msa(left, dst, y_stride); 600 } 601 602 void vpx_h_predictor_32x32_msa(uint8_t *dst, ptrdiff_t y_stride, 603 const uint8_t *above, const uint8_t *left) { 604 (void)above; 605 606 intra_predict_horiz_32x32_msa(left, dst, y_stride); 607 } 608 609 void vpx_dc_predictor_4x4_msa(uint8_t *dst, ptrdiff_t y_stride, 610 const uint8_t *above, const uint8_t *left) { 611 intra_predict_dc_4x4_msa(above, left, dst, y_stride); 612 } 613 614 void vpx_dc_predictor_8x8_msa(uint8_t *dst, ptrdiff_t y_stride, 615 const uint8_t *above, const uint8_t *left) { 616 intra_predict_dc_8x8_msa(above, left, dst, y_stride); 617 } 618 619 void vpx_dc_predictor_16x16_msa(uint8_t *dst, ptrdiff_t y_stride, 620 const uint8_t *above, const uint8_t *left) { 621 intra_predict_dc_16x16_msa(above, left, dst, y_stride); 622 } 623 624 void vpx_dc_predictor_32x32_msa(uint8_t *dst, ptrdiff_t y_stride, 625 const uint8_t *above, const uint8_t *left) { 626 intra_predict_dc_32x32_msa(above, left, dst, y_stride); 627 } 628 629 void vpx_dc_top_predictor_4x4_msa(uint8_t *dst, ptrdiff_t y_stride, 630 const uint8_t *above, const uint8_t *left) { 631 (void)left; 632 633 intra_predict_dc_tl_4x4_msa(above, dst, y_stride); 634 } 635 636 void vpx_dc_top_predictor_8x8_msa(uint8_t *dst, ptrdiff_t y_stride, 637 const uint8_t *above, const uint8_t *left) { 638 (void)left; 639 640 intra_predict_dc_tl_8x8_msa(above, dst, y_stride); 641 } 642 643 void vpx_dc_top_predictor_16x16_msa(uint8_t *dst, ptrdiff_t y_stride, 644 const uint8_t *above, const uint8_t *left) { 645 (void)left; 646 647 intra_predict_dc_tl_16x16_msa(above, dst, y_stride); 648 } 649 650 void vpx_dc_top_predictor_32x32_msa(uint8_t *dst, ptrdiff_t y_stride, 651 const uint8_t *above, const uint8_t *left) { 652 (void)left; 653 654 intra_predict_dc_tl_32x32_msa(above, dst, y_stride); 655 } 656 657 void vpx_dc_left_predictor_4x4_msa(uint8_t *dst, ptrdiff_t y_stride, 658 const uint8_t *above, const uint8_t *left) { 659 (void)above; 660 661 intra_predict_dc_tl_4x4_msa(left, dst, y_stride); 662 } 663 664 void vpx_dc_left_predictor_8x8_msa(uint8_t *dst, ptrdiff_t y_stride, 665 const uint8_t *above, const uint8_t *left) { 666 (void)above; 667 668 intra_predict_dc_tl_8x8_msa(left, dst, y_stride); 669 } 670 671 void vpx_dc_left_predictor_16x16_msa(uint8_t *dst, ptrdiff_t y_stride, 672 const uint8_t *above, 673 const uint8_t *left) { 674 (void)above; 675 676 intra_predict_dc_tl_16x16_msa(left, dst, y_stride); 677 } 678 679 void vpx_dc_left_predictor_32x32_msa(uint8_t *dst, ptrdiff_t y_stride, 680 const uint8_t *above, 681 const uint8_t *left) { 682 (void)above; 683 684 intra_predict_dc_tl_32x32_msa(left, dst, y_stride); 685 } 686 687 void vpx_dc_128_predictor_4x4_msa(uint8_t *dst, ptrdiff_t y_stride, 688 const uint8_t *above, const uint8_t *left) { 689 (void)above; 690 (void)left; 691 692 intra_predict_128dc_4x4_msa(dst, y_stride); 693 } 694 695 void vpx_dc_128_predictor_8x8_msa(uint8_t *dst, ptrdiff_t y_stride, 696 const uint8_t *above, const uint8_t *left) { 697 (void)above; 698 (void)left; 699 700 intra_predict_128dc_8x8_msa(dst, y_stride); 701 } 702 703 void vpx_dc_128_predictor_16x16_msa(uint8_t *dst, ptrdiff_t y_stride, 704 const uint8_t *above, const uint8_t *left) { 705 (void)above; 706 (void)left; 707 708 intra_predict_128dc_16x16_msa(dst, y_stride); 709 } 710 711 void vpx_dc_128_predictor_32x32_msa(uint8_t *dst, ptrdiff_t y_stride, 712 const uint8_t *above, const uint8_t *left) { 713 (void)above; 714 (void)left; 715 716 intra_predict_128dc_32x32_msa(dst, y_stride); 717 } 718 719 void vpx_tm_predictor_4x4_msa(uint8_t *dst, ptrdiff_t y_stride, 720 const uint8_t *above, const uint8_t *left) { 721 intra_predict_tm_4x4_msa(above, left, dst, y_stride); 722 } 723 724 void vpx_tm_predictor_8x8_msa(uint8_t *dst, ptrdiff_t y_stride, 725 const uint8_t *above, const uint8_t *left) { 726 intra_predict_tm_8x8_msa(above, left, dst, y_stride); 727 } 728 729 void vpx_tm_predictor_16x16_msa(uint8_t *dst, ptrdiff_t y_stride, 730 const uint8_t *above, const uint8_t *left) { 731 intra_predict_tm_16x16_msa(above, left, dst, y_stride); 732 } 733 734 void vpx_tm_predictor_32x32_msa(uint8_t *dst, ptrdiff_t y_stride, 735 const uint8_t *above, const uint8_t *left) { 736 intra_predict_tm_32x32_msa(above, left, dst, y_stride); 737 } 738