1 /* 2 * Copyright (c) 2015 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "./vpx_dsp_rtcd.h" 12 #include "vpx_dsp/mips/macros_msa.h" 13 14 #define IPRED_SUBS_UH2_UH(in0, in1, out0, out1) \ 15 { \ 16 out0 = __msa_subs_u_h(out0, in0); \ 17 out1 = __msa_subs_u_h(out1, in1); \ 18 } 19 20 static void intra_predict_vert_4x4_msa(const uint8_t *src, uint8_t *dst, 21 int32_t dst_stride) { 22 uint32_t src_data; 23 24 src_data = LW(src); 25 26 SW4(src_data, src_data, src_data, src_data, dst, dst_stride); 27 } 28 29 static void intra_predict_vert_8x8_msa(const uint8_t *src, uint8_t *dst, 30 int32_t dst_stride) { 31 uint32_t row; 32 uint32_t src_data1, src_data2; 33 34 src_data1 = LW(src); 35 src_data2 = LW(src + 4); 36 37 for (row = 8; row--;) { 38 SW(src_data1, dst); 39 SW(src_data2, (dst + 4)); 40 dst += dst_stride; 41 } 42 } 43 44 static void intra_predict_vert_16x16_msa(const uint8_t *src, uint8_t *dst, 45 int32_t dst_stride) { 46 uint32_t row; 47 v16u8 src0; 48 49 src0 = LD_UB(src); 50 51 for (row = 16; row--;) { 52 ST_UB(src0, dst); 53 dst += dst_stride; 54 } 55 } 56 57 static void intra_predict_vert_32x32_msa(const uint8_t *src, uint8_t *dst, 58 int32_t dst_stride) { 59 uint32_t row; 60 v16u8 src1, src2; 61 62 src1 = LD_UB(src); 63 src2 = LD_UB(src + 16); 64 65 for (row = 32; row--;) { 66 ST_UB2(src1, src2, dst, 16); 67 dst += dst_stride; 68 } 69 } 70 71 static void intra_predict_horiz_4x4_msa(const uint8_t *src, uint8_t *dst, 72 int32_t dst_stride) { 73 uint32_t out0, out1, out2, out3; 74 75 out0 = src[0] * 0x01010101; 76 out1 = src[1] * 0x01010101; 77 out2 = src[2] * 0x01010101; 78 out3 = src[3] * 0x01010101; 79 80 SW4(out0, out1, out2, out3, dst, dst_stride); 81 } 82 83 static void intra_predict_horiz_8x8_msa(const uint8_t *src, uint8_t *dst, 84 int32_t dst_stride) { 85 uint64_t out0, out1, out2, out3, out4, out5, out6, out7; 86 87 out0 = src[0] * 0x0101010101010101ull; 88 out1 = src[1] * 0x0101010101010101ull; 89 out2 = src[2] * 0x0101010101010101ull; 90 out3 = src[3] * 0x0101010101010101ull; 91 out4 = src[4] * 0x0101010101010101ull; 92 out5 = src[5] * 0x0101010101010101ull; 93 out6 = src[6] * 0x0101010101010101ull; 94 out7 = src[7] * 0x0101010101010101ull; 95 96 SD4(out0, out1, out2, out3, dst, dst_stride); 97 dst += (4 * dst_stride); 98 SD4(out4, out5, out6, out7, dst, dst_stride); 99 } 100 101 static void intra_predict_horiz_16x16_msa(const uint8_t *src, uint8_t *dst, 102 int32_t dst_stride) { 103 uint32_t row; 104 uint8_t inp0, inp1, inp2, inp3; 105 v16u8 src0, src1, src2, src3; 106 107 for (row = 4; row--;) { 108 inp0 = src[0]; 109 inp1 = src[1]; 110 inp2 = src[2]; 111 inp3 = src[3]; 112 src += 4; 113 114 src0 = (v16u8)__msa_fill_b(inp0); 115 src1 = (v16u8)__msa_fill_b(inp1); 116 src2 = (v16u8)__msa_fill_b(inp2); 117 src3 = (v16u8)__msa_fill_b(inp3); 118 119 ST_UB4(src0, src1, src2, src3, dst, dst_stride); 120 dst += (4 * dst_stride); 121 } 122 } 123 124 static void intra_predict_horiz_32x32_msa(const uint8_t *src, uint8_t *dst, 125 int32_t dst_stride) { 126 uint32_t row; 127 uint8_t inp0, inp1, inp2, inp3; 128 v16u8 src0, src1, src2, src3; 129 130 for (row = 8; row--;) { 131 inp0 = src[0]; 132 inp1 = src[1]; 133 inp2 = src[2]; 134 inp3 = src[3]; 135 src += 4; 136 137 src0 = (v16u8)__msa_fill_b(inp0); 138 src1 = (v16u8)__msa_fill_b(inp1); 139 src2 = (v16u8)__msa_fill_b(inp2); 140 src3 = (v16u8)__msa_fill_b(inp3); 141 142 ST_UB2(src0, src0, dst, 16); 143 dst += dst_stride; 144 ST_UB2(src1, src1, dst, 16); 145 dst += dst_stride; 146 ST_UB2(src2, src2, dst, 16); 147 dst += dst_stride; 148 ST_UB2(src3, src3, dst, 16); 149 dst += dst_stride; 150 } 151 } 152 153 static void intra_predict_dc_4x4_msa(const uint8_t *src_top, 154 const uint8_t *src_left, uint8_t *dst, 155 int32_t dst_stride) { 156 uint32_t val0, val1; 157 v16i8 store, src = { 0 }; 158 v8u16 sum_h; 159 v4u32 sum_w; 160 v2u64 sum_d; 161 162 val0 = LW(src_top); 163 val1 = LW(src_left); 164 INSERT_W2_SB(val0, val1, src); 165 sum_h = __msa_hadd_u_h((v16u8)src, (v16u8)src); 166 sum_w = __msa_hadd_u_w(sum_h, sum_h); 167 sum_d = __msa_hadd_u_d(sum_w, sum_w); 168 sum_w = (v4u32)__msa_srari_w((v4i32)sum_d, 3); 169 store = __msa_splati_b((v16i8)sum_w, 0); 170 val0 = __msa_copy_u_w((v4i32)store, 0); 171 172 SW4(val0, val0, val0, val0, dst, dst_stride); 173 } 174 175 static void intra_predict_dc_tl_4x4_msa(const uint8_t *src, uint8_t *dst, 176 int32_t dst_stride) { 177 uint32_t val0; 178 v16i8 store, data = { 0 }; 179 v8u16 sum_h; 180 v4u32 sum_w; 181 182 val0 = LW(src); 183 data = (v16i8)__msa_insert_w((v4i32)data, 0, val0); 184 sum_h = __msa_hadd_u_h((v16u8)data, (v16u8)data); 185 sum_w = __msa_hadd_u_w(sum_h, sum_h); 186 sum_w = (v4u32)__msa_srari_w((v4i32)sum_w, 2); 187 store = __msa_splati_b((v16i8)sum_w, 0); 188 val0 = __msa_copy_u_w((v4i32)store, 0); 189 190 SW4(val0, val0, val0, val0, dst, dst_stride); 191 } 192 193 static void intra_predict_128dc_4x4_msa(uint8_t *dst, int32_t dst_stride) { 194 uint32_t out; 195 const v16i8 store = __msa_ldi_b(128); 196 197 out = __msa_copy_u_w((v4i32)store, 0); 198 199 SW4(out, out, out, out, dst, dst_stride); 200 } 201 202 static void intra_predict_dc_8x8_msa(const uint8_t *src_top, 203 const uint8_t *src_left, uint8_t *dst, 204 int32_t dst_stride) { 205 uint64_t val0, val1; 206 v16i8 store; 207 v16u8 src = { 0 }; 208 v8u16 sum_h; 209 v4u32 sum_w; 210 v2u64 sum_d; 211 212 val0 = LD(src_top); 213 val1 = LD(src_left); 214 INSERT_D2_UB(val0, val1, src); 215 sum_h = __msa_hadd_u_h(src, src); 216 sum_w = __msa_hadd_u_w(sum_h, sum_h); 217 sum_d = __msa_hadd_u_d(sum_w, sum_w); 218 sum_w = (v4u32)__msa_pckev_w((v4i32)sum_d, (v4i32)sum_d); 219 sum_d = __msa_hadd_u_d(sum_w, sum_w); 220 sum_w = (v4u32)__msa_srari_w((v4i32)sum_d, 4); 221 store = __msa_splati_b((v16i8)sum_w, 0); 222 val0 = __msa_copy_u_d((v2i64)store, 0); 223 224 SD4(val0, val0, val0, val0, dst, dst_stride); 225 dst += (4 * dst_stride); 226 SD4(val0, val0, val0, val0, dst, dst_stride); 227 } 228 229 static void intra_predict_dc_tl_8x8_msa(const uint8_t *src, uint8_t *dst, 230 int32_t dst_stride) { 231 uint64_t val0; 232 v16i8 store; 233 v16u8 data = { 0 }; 234 v8u16 sum_h; 235 v4u32 sum_w; 236 v2u64 sum_d; 237 238 val0 = LD(src); 239 data = (v16u8)__msa_insert_d((v2i64)data, 0, val0); 240 sum_h = __msa_hadd_u_h(data, data); 241 sum_w = __msa_hadd_u_w(sum_h, sum_h); 242 sum_d = __msa_hadd_u_d(sum_w, sum_w); 243 sum_w = (v4u32)__msa_srari_w((v4i32)sum_d, 3); 244 store = __msa_splati_b((v16i8)sum_w, 0); 245 val0 = __msa_copy_u_d((v2i64)store, 0); 246 247 SD4(val0, val0, val0, val0, dst, dst_stride); 248 dst += (4 * dst_stride); 249 SD4(val0, val0, val0, val0, dst, dst_stride); 250 } 251 252 static void intra_predict_128dc_8x8_msa(uint8_t *dst, int32_t dst_stride) { 253 uint64_t out; 254 const v16i8 store = __msa_ldi_b(128); 255 256 out = __msa_copy_u_d((v2i64)store, 0); 257 258 SD4(out, out, out, out, dst, dst_stride); 259 dst += (4 * dst_stride); 260 SD4(out, out, out, out, dst, dst_stride); 261 } 262 263 static void intra_predict_dc_16x16_msa(const uint8_t *src_top, 264 const uint8_t *src_left, uint8_t *dst, 265 int32_t dst_stride) { 266 v16u8 top, left, out; 267 v8u16 sum_h, sum_top, sum_left; 268 v4u32 sum_w; 269 v2u64 sum_d; 270 271 top = LD_UB(src_top); 272 left = LD_UB(src_left); 273 HADD_UB2_UH(top, left, sum_top, sum_left); 274 sum_h = sum_top + sum_left; 275 sum_w = __msa_hadd_u_w(sum_h, sum_h); 276 sum_d = __msa_hadd_u_d(sum_w, sum_w); 277 sum_w = (v4u32)__msa_pckev_w((v4i32)sum_d, (v4i32)sum_d); 278 sum_d = __msa_hadd_u_d(sum_w, sum_w); 279 sum_w = (v4u32)__msa_srari_w((v4i32)sum_d, 5); 280 out = (v16u8)__msa_splati_b((v16i8)sum_w, 0); 281 282 ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride); 283 dst += (8 * dst_stride); 284 ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride); 285 } 286 287 static void intra_predict_dc_tl_16x16_msa(const uint8_t *src, uint8_t *dst, 288 int32_t dst_stride) { 289 v16u8 data, out; 290 v8u16 sum_h; 291 v4u32 sum_w; 292 v2u64 sum_d; 293 294 data = LD_UB(src); 295 sum_h = __msa_hadd_u_h(data, data); 296 sum_w = __msa_hadd_u_w(sum_h, sum_h); 297 sum_d = __msa_hadd_u_d(sum_w, sum_w); 298 sum_w = (v4u32)__msa_pckev_w((v4i32)sum_d, (v4i32)sum_d); 299 sum_d = __msa_hadd_u_d(sum_w, sum_w); 300 sum_w = (v4u32)__msa_srari_w((v4i32)sum_d, 4); 301 out = (v16u8)__msa_splati_b((v16i8)sum_w, 0); 302 303 ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride); 304 dst += (8 * dst_stride); 305 ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride); 306 } 307 308 static void intra_predict_128dc_16x16_msa(uint8_t *dst, int32_t dst_stride) { 309 const v16u8 out = (v16u8)__msa_ldi_b(128); 310 311 ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride); 312 dst += (8 * dst_stride); 313 ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride); 314 } 315 316 static void intra_predict_dc_32x32_msa(const uint8_t *src_top, 317 const uint8_t *src_left, uint8_t *dst, 318 int32_t dst_stride) { 319 uint32_t row; 320 v16u8 top0, top1, left0, left1, out; 321 v8u16 sum_h, sum_top0, sum_top1, sum_left0, sum_left1; 322 v4u32 sum_w; 323 v2u64 sum_d; 324 325 LD_UB2(src_top, 16, top0, top1); 326 LD_UB2(src_left, 16, left0, left1); 327 HADD_UB2_UH(top0, top1, sum_top0, sum_top1); 328 HADD_UB2_UH(left0, left1, sum_left0, sum_left1); 329 sum_h = sum_top0 + sum_top1; 330 sum_h += sum_left0 + sum_left1; 331 sum_w = __msa_hadd_u_w(sum_h, sum_h); 332 sum_d = __msa_hadd_u_d(sum_w, sum_w); 333 sum_w = (v4u32)__msa_pckev_w((v4i32)sum_d, (v4i32)sum_d); 334 sum_d = __msa_hadd_u_d(sum_w, sum_w); 335 sum_w = (v4u32)__msa_srari_w((v4i32)sum_d, 6); 336 out = (v16u8)__msa_splati_b((v16i8)sum_w, 0); 337 338 for (row = 16; row--;) { 339 ST_UB2(out, out, dst, 16); 340 dst += dst_stride; 341 ST_UB2(out, out, dst, 16); 342 dst += dst_stride; 343 } 344 } 345 346 static void intra_predict_dc_tl_32x32_msa(const uint8_t *src, uint8_t *dst, 347 int32_t dst_stride) { 348 uint32_t row; 349 v16u8 data0, data1, out; 350 v8u16 sum_h, sum_data0, sum_data1; 351 v4u32 sum_w; 352 v2u64 sum_d; 353 354 LD_UB2(src, 16, data0, data1); 355 HADD_UB2_UH(data0, data1, sum_data0, sum_data1); 356 sum_h = sum_data0 + sum_data1; 357 sum_w = __msa_hadd_u_w(sum_h, sum_h); 358 sum_d = __msa_hadd_u_d(sum_w, sum_w); 359 sum_w = (v4u32)__msa_pckev_w((v4i32)sum_d, (v4i32)sum_d); 360 sum_d = __msa_hadd_u_d(sum_w, sum_w); 361 sum_w = (v4u32)__msa_srari_w((v4i32)sum_d, 5); 362 out = (v16u8)__msa_splati_b((v16i8)sum_w, 0); 363 364 for (row = 16; row--;) { 365 ST_UB2(out, out, dst, 16); 366 dst += dst_stride; 367 ST_UB2(out, out, dst, 16); 368 dst += dst_stride; 369 } 370 } 371 372 static void intra_predict_128dc_32x32_msa(uint8_t *dst, int32_t dst_stride) { 373 uint32_t row; 374 const v16u8 out = (v16u8)__msa_ldi_b(128); 375 376 for (row = 16; row--;) { 377 ST_UB2(out, out, dst, 16); 378 dst += dst_stride; 379 ST_UB2(out, out, dst, 16); 380 dst += dst_stride; 381 } 382 } 383 384 static void intra_predict_tm_4x4_msa(const uint8_t *src_top_ptr, 385 const uint8_t *src_left, uint8_t *dst, 386 int32_t dst_stride) { 387 uint32_t val; 388 uint8_t top_left = src_top_ptr[-1]; 389 v16i8 src_left0, src_left1, src_left2, src_left3, tmp0, tmp1, src_top = { 0 }; 390 v16u8 src0, src1, src2, src3; 391 v8u16 src_top_left, vec0, vec1, vec2, vec3; 392 393 src_top_left = (v8u16)__msa_fill_h(top_left); 394 val = LW(src_top_ptr); 395 src_top = (v16i8)__msa_insert_w((v4i32)src_top, 0, val); 396 397 src_left0 = __msa_fill_b(src_left[0]); 398 src_left1 = __msa_fill_b(src_left[1]); 399 src_left2 = __msa_fill_b(src_left[2]); 400 src_left3 = __msa_fill_b(src_left[3]); 401 402 ILVR_B4_UB(src_left0, src_top, src_left1, src_top, src_left2, src_top, 403 src_left3, src_top, src0, src1, src2, src3); 404 HADD_UB4_UH(src0, src1, src2, src3, vec0, vec1, vec2, vec3); 405 IPRED_SUBS_UH2_UH(src_top_left, src_top_left, vec0, vec1); 406 IPRED_SUBS_UH2_UH(src_top_left, src_top_left, vec2, vec3); 407 SAT_UH4_UH(vec0, vec1, vec2, vec3, 7); 408 PCKEV_B2_SB(vec1, vec0, vec3, vec2, tmp0, tmp1); 409 ST4x4_UB(tmp0, tmp1, 0, 2, 0, 2, dst, dst_stride); 410 } 411 412 static void intra_predict_tm_8x8_msa(const uint8_t *src_top_ptr, 413 const uint8_t *src_left, uint8_t *dst, 414 int32_t dst_stride) { 415 uint64_t val; 416 uint8_t top_left = src_top_ptr[-1]; 417 uint32_t loop_cnt; 418 v16i8 src_left0, src_left1, src_left2, src_left3, tmp0, tmp1, src_top = { 0 }; 419 v8u16 src_top_left, vec0, vec1, vec2, vec3; 420 v16u8 src0, src1, src2, src3; 421 422 val = LD(src_top_ptr); 423 src_top = (v16i8)__msa_insert_d((v2i64)src_top, 0, val); 424 src_top_left = (v8u16)__msa_fill_h(top_left); 425 426 for (loop_cnt = 2; loop_cnt--;) { 427 src_left0 = __msa_fill_b(src_left[0]); 428 src_left1 = __msa_fill_b(src_left[1]); 429 src_left2 = __msa_fill_b(src_left[2]); 430 src_left3 = __msa_fill_b(src_left[3]); 431 src_left += 4; 432 433 ILVR_B4_UB(src_left0, src_top, src_left1, src_top, src_left2, src_top, 434 src_left3, src_top, src0, src1, src2, src3); 435 HADD_UB4_UH(src0, src1, src2, src3, vec0, vec1, vec2, vec3); 436 IPRED_SUBS_UH2_UH(src_top_left, src_top_left, vec0, vec1); 437 IPRED_SUBS_UH2_UH(src_top_left, src_top_left, vec2, vec3); 438 SAT_UH4_UH(vec0, vec1, vec2, vec3, 7); 439 PCKEV_B2_SB(vec1, vec0, vec3, vec2, tmp0, tmp1); 440 ST8x4_UB(tmp0, tmp1, dst, dst_stride); 441 dst += (4 * dst_stride); 442 } 443 } 444 445 static void intra_predict_tm_16x16_msa(const uint8_t *src_top_ptr, 446 const uint8_t *src_left, uint8_t *dst, 447 int32_t dst_stride) { 448 uint8_t top_left = src_top_ptr[-1]; 449 uint32_t loop_cnt; 450 v16i8 src_top, src_left0, src_left1, src_left2, src_left3; 451 v8u16 src_top_left, res_r, res_l; 452 453 src_top = LD_SB(src_top_ptr); 454 src_top_left = (v8u16)__msa_fill_h(top_left); 455 456 for (loop_cnt = 4; loop_cnt--;) { 457 src_left0 = __msa_fill_b(src_left[0]); 458 src_left1 = __msa_fill_b(src_left[1]); 459 src_left2 = __msa_fill_b(src_left[2]); 460 src_left3 = __msa_fill_b(src_left[3]); 461 src_left += 4; 462 463 ILVRL_B2_UH(src_left0, src_top, res_r, res_l); 464 HADD_UB2_UH(res_r, res_l, res_r, res_l); 465 IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r, res_l); 466 467 SAT_UH2_UH(res_r, res_l, 7); 468 PCKEV_ST_SB(res_r, res_l, dst); 469 dst += dst_stride; 470 471 ILVRL_B2_UH(src_left1, src_top, res_r, res_l); 472 HADD_UB2_UH(res_r, res_l, res_r, res_l); 473 IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r, res_l); 474 SAT_UH2_UH(res_r, res_l, 7); 475 PCKEV_ST_SB(res_r, res_l, dst); 476 dst += dst_stride; 477 478 ILVRL_B2_UH(src_left2, src_top, res_r, res_l); 479 HADD_UB2_UH(res_r, res_l, res_r, res_l); 480 IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r, res_l); 481 SAT_UH2_UH(res_r, res_l, 7); 482 PCKEV_ST_SB(res_r, res_l, dst); 483 dst += dst_stride; 484 485 ILVRL_B2_UH(src_left3, src_top, res_r, res_l); 486 HADD_UB2_UH(res_r, res_l, res_r, res_l); 487 IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r, res_l); 488 SAT_UH2_UH(res_r, res_l, 7); 489 PCKEV_ST_SB(res_r, res_l, dst); 490 dst += dst_stride; 491 } 492 } 493 494 static void intra_predict_tm_32x32_msa(const uint8_t *src_top, 495 const uint8_t *src_left, uint8_t *dst, 496 int32_t dst_stride) { 497 uint8_t top_left = src_top[-1]; 498 uint32_t loop_cnt; 499 v16i8 src_top0, src_top1, src_left0, src_left1, src_left2, src_left3; 500 v8u16 src_top_left, res_r0, res_r1, res_l0, res_l1; 501 502 LD_SB2(src_top, 16, src_top0, src_top1); 503 src_top_left = (v8u16)__msa_fill_h(top_left); 504 505 for (loop_cnt = 8; loop_cnt--;) { 506 src_left0 = __msa_fill_b(src_left[0]); 507 src_left1 = __msa_fill_b(src_left[1]); 508 src_left2 = __msa_fill_b(src_left[2]); 509 src_left3 = __msa_fill_b(src_left[3]); 510 src_left += 4; 511 512 ILVR_B2_UH(src_left0, src_top0, src_left0, src_top1, res_r0, res_r1); 513 ILVL_B2_UH(src_left0, src_top0, src_left0, src_top1, res_l0, res_l1); 514 HADD_UB4_UH(res_r0, res_l0, res_r1, res_l1, res_r0, res_l0, res_r1, res_l1); 515 IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r0, res_l0); 516 IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r1, res_l1); 517 SAT_UH4_UH(res_r0, res_l0, res_r1, res_l1, 7); 518 PCKEV_ST_SB(res_r0, res_l0, dst); 519 PCKEV_ST_SB(res_r1, res_l1, dst + 16); 520 dst += dst_stride; 521 522 ILVR_B2_UH(src_left1, src_top0, src_left1, src_top1, res_r0, res_r1); 523 ILVL_B2_UH(src_left1, src_top0, src_left1, src_top1, res_l0, res_l1); 524 HADD_UB4_UH(res_r0, res_l0, res_r1, res_l1, res_r0, res_l0, res_r1, res_l1); 525 IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r0, res_l0); 526 IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r1, res_l1); 527 SAT_UH4_UH(res_r0, res_l0, res_r1, res_l1, 7); 528 PCKEV_ST_SB(res_r0, res_l0, dst); 529 PCKEV_ST_SB(res_r1, res_l1, dst + 16); 530 dst += dst_stride; 531 532 ILVR_B2_UH(src_left2, src_top0, src_left2, src_top1, res_r0, res_r1); 533 ILVL_B2_UH(src_left2, src_top0, src_left2, src_top1, res_l0, res_l1); 534 HADD_UB4_UH(res_r0, res_l0, res_r1, res_l1, res_r0, res_l0, res_r1, res_l1); 535 IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r0, res_l0); 536 IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r1, res_l1); 537 SAT_UH4_UH(res_r0, res_l0, res_r1, res_l1, 7); 538 PCKEV_ST_SB(res_r0, res_l0, dst); 539 PCKEV_ST_SB(res_r1, res_l1, dst + 16); 540 dst += dst_stride; 541 542 ILVR_B2_UH(src_left3, src_top0, src_left3, src_top1, res_r0, res_r1); 543 ILVL_B2_UH(src_left3, src_top0, src_left3, src_top1, res_l0, res_l1); 544 HADD_UB4_UH(res_r0, res_l0, res_r1, res_l1, res_r0, res_l0, res_r1, res_l1); 545 IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r0, res_l0); 546 IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r1, res_l1); 547 SAT_UH4_UH(res_r0, res_l0, res_r1, res_l1, 7); 548 PCKEV_ST_SB(res_r0, res_l0, dst); 549 PCKEV_ST_SB(res_r1, res_l1, dst + 16); 550 dst += dst_stride; 551 } 552 } 553 554 void vpx_v_predictor_4x4_msa(uint8_t *dst, ptrdiff_t y_stride, 555 const uint8_t *above, const uint8_t *left) { 556 (void)left; 557 558 intra_predict_vert_4x4_msa(above, dst, y_stride); 559 } 560 561 void vpx_v_predictor_8x8_msa(uint8_t *dst, ptrdiff_t y_stride, 562 const uint8_t *above, const uint8_t *left) { 563 (void)left; 564 565 intra_predict_vert_8x8_msa(above, dst, y_stride); 566 } 567 568 void vpx_v_predictor_16x16_msa(uint8_t *dst, ptrdiff_t y_stride, 569 const uint8_t *above, const uint8_t *left) { 570 (void)left; 571 572 intra_predict_vert_16x16_msa(above, dst, y_stride); 573 } 574 575 void vpx_v_predictor_32x32_msa(uint8_t *dst, ptrdiff_t y_stride, 576 const uint8_t *above, const uint8_t *left) { 577 (void)left; 578 579 intra_predict_vert_32x32_msa(above, dst, y_stride); 580 } 581 582 void vpx_h_predictor_4x4_msa(uint8_t *dst, ptrdiff_t y_stride, 583 const uint8_t *above, const uint8_t *left) { 584 (void)above; 585 586 intra_predict_horiz_4x4_msa(left, dst, y_stride); 587 } 588 589 void vpx_h_predictor_8x8_msa(uint8_t *dst, ptrdiff_t y_stride, 590 const uint8_t *above, const uint8_t *left) { 591 (void)above; 592 593 intra_predict_horiz_8x8_msa(left, dst, y_stride); 594 } 595 596 void vpx_h_predictor_16x16_msa(uint8_t *dst, ptrdiff_t y_stride, 597 const uint8_t *above, const uint8_t *left) { 598 (void)above; 599 600 intra_predict_horiz_16x16_msa(left, dst, y_stride); 601 } 602 603 void vpx_h_predictor_32x32_msa(uint8_t *dst, ptrdiff_t y_stride, 604 const uint8_t *above, const uint8_t *left) { 605 (void)above; 606 607 intra_predict_horiz_32x32_msa(left, dst, y_stride); 608 } 609 610 void vpx_dc_predictor_4x4_msa(uint8_t *dst, ptrdiff_t y_stride, 611 const uint8_t *above, const uint8_t *left) { 612 intra_predict_dc_4x4_msa(above, left, dst, y_stride); 613 } 614 615 void vpx_dc_predictor_8x8_msa(uint8_t *dst, ptrdiff_t y_stride, 616 const uint8_t *above, const uint8_t *left) { 617 intra_predict_dc_8x8_msa(above, left, dst, y_stride); 618 } 619 620 void vpx_dc_predictor_16x16_msa(uint8_t *dst, ptrdiff_t y_stride, 621 const uint8_t *above, const uint8_t *left) { 622 intra_predict_dc_16x16_msa(above, left, dst, y_stride); 623 } 624 625 void vpx_dc_predictor_32x32_msa(uint8_t *dst, ptrdiff_t y_stride, 626 const uint8_t *above, const uint8_t *left) { 627 intra_predict_dc_32x32_msa(above, left, dst, y_stride); 628 } 629 630 void vpx_dc_top_predictor_4x4_msa(uint8_t *dst, ptrdiff_t y_stride, 631 const uint8_t *above, const uint8_t *left) { 632 (void)left; 633 634 intra_predict_dc_tl_4x4_msa(above, dst, y_stride); 635 } 636 637 void vpx_dc_top_predictor_8x8_msa(uint8_t *dst, ptrdiff_t y_stride, 638 const uint8_t *above, const uint8_t *left) { 639 (void)left; 640 641 intra_predict_dc_tl_8x8_msa(above, dst, y_stride); 642 } 643 644 void vpx_dc_top_predictor_16x16_msa(uint8_t *dst, ptrdiff_t y_stride, 645 const uint8_t *above, const uint8_t *left) { 646 (void)left; 647 648 intra_predict_dc_tl_16x16_msa(above, dst, y_stride); 649 } 650 651 void vpx_dc_top_predictor_32x32_msa(uint8_t *dst, ptrdiff_t y_stride, 652 const uint8_t *above, const uint8_t *left) { 653 (void)left; 654 655 intra_predict_dc_tl_32x32_msa(above, dst, y_stride); 656 } 657 658 void vpx_dc_left_predictor_4x4_msa(uint8_t *dst, ptrdiff_t y_stride, 659 const uint8_t *above, const uint8_t *left) { 660 (void)above; 661 662 intra_predict_dc_tl_4x4_msa(left, dst, y_stride); 663 } 664 665 void vpx_dc_left_predictor_8x8_msa(uint8_t *dst, ptrdiff_t y_stride, 666 const uint8_t *above, const uint8_t *left) { 667 (void)above; 668 669 intra_predict_dc_tl_8x8_msa(left, dst, y_stride); 670 } 671 672 void vpx_dc_left_predictor_16x16_msa(uint8_t *dst, ptrdiff_t y_stride, 673 const uint8_t *above, 674 const uint8_t *left) { 675 (void)above; 676 677 intra_predict_dc_tl_16x16_msa(left, dst, y_stride); 678 } 679 680 void vpx_dc_left_predictor_32x32_msa(uint8_t *dst, ptrdiff_t y_stride, 681 const uint8_t *above, 682 const uint8_t *left) { 683 (void)above; 684 685 intra_predict_dc_tl_32x32_msa(left, dst, y_stride); 686 } 687 688 void vpx_dc_128_predictor_4x4_msa(uint8_t *dst, ptrdiff_t y_stride, 689 const uint8_t *above, const uint8_t *left) { 690 (void)above; 691 (void)left; 692 693 intra_predict_128dc_4x4_msa(dst, y_stride); 694 } 695 696 void vpx_dc_128_predictor_8x8_msa(uint8_t *dst, ptrdiff_t y_stride, 697 const uint8_t *above, const uint8_t *left) { 698 (void)above; 699 (void)left; 700 701 intra_predict_128dc_8x8_msa(dst, y_stride); 702 } 703 704 void vpx_dc_128_predictor_16x16_msa(uint8_t *dst, ptrdiff_t y_stride, 705 const uint8_t *above, const uint8_t *left) { 706 (void)above; 707 (void)left; 708 709 intra_predict_128dc_16x16_msa(dst, y_stride); 710 } 711 712 void vpx_dc_128_predictor_32x32_msa(uint8_t *dst, ptrdiff_t y_stride, 713 const uint8_t *above, const uint8_t *left) { 714 (void)above; 715 (void)left; 716 717 intra_predict_128dc_32x32_msa(dst, y_stride); 718 } 719 720 void vpx_tm_predictor_4x4_msa(uint8_t *dst, ptrdiff_t y_stride, 721 const uint8_t *above, const uint8_t *left) { 722 intra_predict_tm_4x4_msa(above, left, dst, y_stride); 723 } 724 725 void vpx_tm_predictor_8x8_msa(uint8_t *dst, ptrdiff_t y_stride, 726 const uint8_t *above, const uint8_t *left) { 727 intra_predict_tm_8x8_msa(above, left, dst, y_stride); 728 } 729 730 void vpx_tm_predictor_16x16_msa(uint8_t *dst, ptrdiff_t y_stride, 731 const uint8_t *above, const uint8_t *left) { 732 intra_predict_tm_16x16_msa(above, left, dst, y_stride); 733 } 734 735 void vpx_tm_predictor_32x32_msa(uint8_t *dst, ptrdiff_t y_stride, 736 const uint8_t *above, const uint8_t *left) { 737 intra_predict_tm_32x32_msa(above, left, dst, y_stride); 738 } 739