1 /* 2 * Copyright (c) 2015 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #ifndef VPX_DSP_LOOPFILTER_MSA_H_ 12 #define VPX_DSP_LOOPFILTER_MSA_H_ 13 14 #include "vpx_dsp/mips/macros_msa.h" 15 16 #define VP9_LPF_FILTER4_4W(p1_in, p0_in, q0_in, q1_in, mask, hev, p1_out, \ 17 p0_out, q0_out, q1_out) \ 18 { \ 19 v16i8 p1_m, p0_m, q0_m, q1_m, filt, q0_sub_p0, t1, t2; \ 20 const v16i8 cnst4b = __msa_ldi_b(4); \ 21 const v16i8 cnst3b = __msa_ldi_b(3); \ 22 \ 23 p1_m = (v16i8)__msa_xori_b(p1_in, 0x80); \ 24 p0_m = (v16i8)__msa_xori_b(p0_in, 0x80); \ 25 q0_m = (v16i8)__msa_xori_b(q0_in, 0x80); \ 26 q1_m = (v16i8)__msa_xori_b(q1_in, 0x80); \ 27 \ 28 filt = __msa_subs_s_b(p1_m, q1_m); \ 29 filt &= hev; \ 30 q0_sub_p0 = __msa_subs_s_b(q0_m, p0_m); \ 31 filt = __msa_adds_s_b(filt, q0_sub_p0); \ 32 filt = __msa_adds_s_b(filt, q0_sub_p0); \ 33 filt = __msa_adds_s_b(filt, q0_sub_p0); \ 34 filt &= mask; \ 35 t1 = __msa_adds_s_b(filt, cnst4b); \ 36 t1 >>= cnst3b; \ 37 t2 = __msa_adds_s_b(filt, cnst3b); \ 38 t2 >>= cnst3b; \ 39 q0_m = __msa_subs_s_b(q0_m, t1); \ 40 q0_out = __msa_xori_b((v16u8)q0_m, 0x80); \ 41 p0_m = __msa_adds_s_b(p0_m, t2); \ 42 p0_out = __msa_xori_b((v16u8)p0_m, 0x80); \ 43 filt = __msa_srari_b(t1, 1); \ 44 hev = __msa_xori_b(hev, 0xff); \ 45 filt &= hev; \ 46 q1_m = __msa_subs_s_b(q1_m, filt); \ 47 q1_out = __msa_xori_b((v16u8)q1_m, 0x80); \ 48 p1_m = __msa_adds_s_b(p1_m, filt); \ 49 p1_out = __msa_xori_b((v16u8)p1_m, 0x80); \ 50 } 51 52 #define VP9_FLAT4(p3_in, p2_in, p0_in, q0_in, q2_in, q3_in, flat_out) \ 53 { \ 54 v16u8 tmp_flat4, p2_a_sub_p0, q2_a_sub_q0, p3_a_sub_p0, q3_a_sub_q0; \ 55 v16u8 zero_in = { 0 }; \ 56 \ 57 tmp_flat4 = __msa_ori_b(zero_in, 1); \ 58 p2_a_sub_p0 = __msa_asub_u_b(p2_in, p0_in); \ 59 q2_a_sub_q0 = __msa_asub_u_b(q2_in, q0_in); \ 60 p3_a_sub_p0 = __msa_asub_u_b(p3_in, p0_in); \ 61 q3_a_sub_q0 = __msa_asub_u_b(q3_in, q0_in); \ 62 \ 63 p2_a_sub_p0 = __msa_max_u_b(p2_a_sub_p0, q2_a_sub_q0); \ 64 flat_out = __msa_max_u_b(p2_a_sub_p0, flat_out); \ 65 p3_a_sub_p0 = __msa_max_u_b(p3_a_sub_p0, q3_a_sub_q0); \ 66 flat_out = __msa_max_u_b(p3_a_sub_p0, flat_out); \ 67 \ 68 flat_out = (tmp_flat4 < (v16u8)flat_out); \ 69 flat_out = __msa_xori_b(flat_out, 0xff); \ 70 flat_out = flat_out & (mask); \ 71 } 72 73 #define VP9_FLAT5(p7_in, p6_in, p5_in, p4_in, p0_in, q0_in, q4_in, q5_in, \ 74 q6_in, q7_in, flat_in, flat2_out) \ 75 { \ 76 v16u8 tmp_flat5, zero_in = { 0 }; \ 77 v16u8 p4_a_sub_p0, q4_a_sub_q0, p5_a_sub_p0, q5_a_sub_q0; \ 78 v16u8 p6_a_sub_p0, q6_a_sub_q0, p7_a_sub_p0, q7_a_sub_q0; \ 79 \ 80 tmp_flat5 = __msa_ori_b(zero_in, 1); \ 81 p4_a_sub_p0 = __msa_asub_u_b(p4_in, p0_in); \ 82 q4_a_sub_q0 = __msa_asub_u_b(q4_in, q0_in); \ 83 p5_a_sub_p0 = __msa_asub_u_b(p5_in, p0_in); \ 84 q5_a_sub_q0 = __msa_asub_u_b(q5_in, q0_in); \ 85 p6_a_sub_p0 = __msa_asub_u_b(p6_in, p0_in); \ 86 q6_a_sub_q0 = __msa_asub_u_b(q6_in, q0_in); \ 87 p7_a_sub_p0 = __msa_asub_u_b(p7_in, p0_in); \ 88 q7_a_sub_q0 = __msa_asub_u_b(q7_in, q0_in); \ 89 \ 90 p4_a_sub_p0 = __msa_max_u_b(p4_a_sub_p0, q4_a_sub_q0); \ 91 flat2_out = __msa_max_u_b(p5_a_sub_p0, q5_a_sub_q0); \ 92 flat2_out = __msa_max_u_b(p4_a_sub_p0, flat2_out); \ 93 p6_a_sub_p0 = __msa_max_u_b(p6_a_sub_p0, q6_a_sub_q0); \ 94 flat2_out = __msa_max_u_b(p6_a_sub_p0, flat2_out); \ 95 p7_a_sub_p0 = __msa_max_u_b(p7_a_sub_p0, q7_a_sub_q0); \ 96 flat2_out = __msa_max_u_b(p7_a_sub_p0, flat2_out); \ 97 \ 98 flat2_out = (tmp_flat5 < (v16u8)flat2_out); \ 99 flat2_out = __msa_xori_b(flat2_out, 0xff); \ 100 flat2_out = flat2_out & flat_in; \ 101 } 102 103 #define VP9_FILTER8(p3_in, p2_in, p1_in, p0_in, q0_in, q1_in, q2_in, q3_in, \ 104 p2_filt8_out, p1_filt8_out, p0_filt8_out, q0_filt8_out, \ 105 q1_filt8_out, q2_filt8_out) \ 106 { \ 107 v8u16 tmp_filt8_0, tmp_filt8_1, tmp_filt8_2; \ 108 \ 109 tmp_filt8_2 = p2_in + p1_in + p0_in; \ 110 tmp_filt8_0 = p3_in << 1; \ 111 \ 112 tmp_filt8_0 = tmp_filt8_0 + tmp_filt8_2 + q0_in; \ 113 tmp_filt8_1 = tmp_filt8_0 + p3_in + p2_in; \ 114 p2_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp_filt8_1, 3); \ 115 \ 116 tmp_filt8_1 = tmp_filt8_0 + p1_in + q1_in; \ 117 p1_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp_filt8_1, 3); \ 118 \ 119 tmp_filt8_1 = q2_in + q1_in + q0_in; \ 120 tmp_filt8_2 = tmp_filt8_2 + tmp_filt8_1; \ 121 tmp_filt8_0 = tmp_filt8_2 + (p0_in); \ 122 tmp_filt8_0 = tmp_filt8_0 + (p3_in); \ 123 p0_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp_filt8_0, 3); \ 124 \ 125 tmp_filt8_0 = q2_in + q3_in; \ 126 tmp_filt8_0 = p0_in + tmp_filt8_1 + tmp_filt8_0; \ 127 tmp_filt8_1 = q3_in + q3_in; \ 128 tmp_filt8_1 = tmp_filt8_1 + tmp_filt8_0; \ 129 q2_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp_filt8_1, 3); \ 130 \ 131 tmp_filt8_0 = tmp_filt8_2 + q3_in; \ 132 tmp_filt8_1 = tmp_filt8_0 + q0_in; \ 133 q0_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp_filt8_1, 3); \ 134 \ 135 tmp_filt8_1 = tmp_filt8_0 - p2_in; \ 136 tmp_filt8_0 = q1_in + q3_in; \ 137 tmp_filt8_1 = tmp_filt8_0 + tmp_filt8_1; \ 138 q1_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp_filt8_1, 3); \ 139 } 140 141 #define LPF_MASK_HEV(p3_in, p2_in, p1_in, p0_in, q0_in, q1_in, q2_in, q3_in, \ 142 limit_in, b_limit_in, thresh_in, hev_out, mask_out, \ 143 flat_out) \ 144 { \ 145 v16u8 p3_asub_p2_m, p2_asub_p1_m, p1_asub_p0_m, q1_asub_q0_m; \ 146 v16u8 p1_asub_q1_m, p0_asub_q0_m, q3_asub_q2_m, q2_asub_q1_m; \ 147 \ 148 /* absolute subtraction of pixel values */ \ 149 p3_asub_p2_m = __msa_asub_u_b(p3_in, p2_in); \ 150 p2_asub_p1_m = __msa_asub_u_b(p2_in, p1_in); \ 151 p1_asub_p0_m = __msa_asub_u_b(p1_in, p0_in); \ 152 q1_asub_q0_m = __msa_asub_u_b(q1_in, q0_in); \ 153 q2_asub_q1_m = __msa_asub_u_b(q2_in, q1_in); \ 154 q3_asub_q2_m = __msa_asub_u_b(q3_in, q2_in); \ 155 p0_asub_q0_m = __msa_asub_u_b(p0_in, q0_in); \ 156 p1_asub_q1_m = __msa_asub_u_b(p1_in, q1_in); \ 157 \ 158 /* calculation of hev */ \ 159 flat_out = __msa_max_u_b(p1_asub_p0_m, q1_asub_q0_m); \ 160 hev_out = thresh_in < (v16u8)flat_out; \ 161 \ 162 /* calculation of mask */ \ 163 p0_asub_q0_m = __msa_adds_u_b(p0_asub_q0_m, p0_asub_q0_m); \ 164 p1_asub_q1_m >>= 1; \ 165 p0_asub_q0_m = __msa_adds_u_b(p0_asub_q0_m, p1_asub_q1_m); \ 166 \ 167 mask_out = b_limit_in < p0_asub_q0_m; \ 168 mask_out = __msa_max_u_b(flat_out, mask_out); \ 169 p3_asub_p2_m = __msa_max_u_b(p3_asub_p2_m, p2_asub_p1_m); \ 170 mask_out = __msa_max_u_b(p3_asub_p2_m, mask_out); \ 171 q2_asub_q1_m = __msa_max_u_b(q2_asub_q1_m, q3_asub_q2_m); \ 172 mask_out = __msa_max_u_b(q2_asub_q1_m, mask_out); \ 173 \ 174 mask_out = limit_in < (v16u8)mask_out; \ 175 mask_out = __msa_xori_b(mask_out, 0xff); \ 176 } 177 #endif /* VPX_DSP_LOOPFILTER_MSA_H_ */ 178