1 /* 2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include <arm_neon.h> 12 13 #include "vp8/common/blockd.h" 14 15 void vp8_build_intra_predictors_mby_s_neon(MACROBLOCKD *x, 16 unsigned char * yabove_row, 17 unsigned char * yleft, 18 int left_stride, 19 unsigned char * ypred_ptr, 20 int y_stride) { 21 const int mode = x->mode_info_context->mbmi.mode; 22 int i; 23 24 switch (mode) { 25 case DC_PRED: 26 { 27 int shift = x->up_available + x->left_available; 28 uint8x16_t v_expected_dc = vdupq_n_u8(128); 29 30 if (shift) { 31 unsigned int average = 0; 32 int expected_dc; 33 if (x->up_available) { 34 const uint8x16_t v_above = vld1q_u8(yabove_row); 35 const uint16x8_t a = vpaddlq_u8(v_above); 36 const uint32x4_t b = vpaddlq_u16(a); 37 const uint64x2_t c = vpaddlq_u32(b); 38 const uint32x2_t d = vadd_u32(vreinterpret_u32_u64(vget_low_u64(c)), 39 vreinterpret_u32_u64(vget_high_u64(c))); 40 average = vget_lane_u32(d, 0); 41 } 42 if (x->left_available) { 43 for (i = 0; i < 16; ++i) { 44 average += yleft[0]; 45 yleft += left_stride; 46 } 47 } 48 shift += 3; 49 expected_dc = (average + (1 << (shift - 1))) >> shift; 50 v_expected_dc = vmovq_n_u8((uint8_t)expected_dc); 51 } 52 for (i = 0; i < 16; ++i) { 53 vst1q_u8(ypred_ptr, v_expected_dc); 54 ypred_ptr += y_stride; 55 } 56 } 57 break; 58 case V_PRED: 59 { 60 const uint8x16_t v_above = vld1q_u8(yabove_row); 61 for (i = 0; i < 16; ++i) { 62 vst1q_u8(ypred_ptr, v_above); 63 ypred_ptr += y_stride; 64 } 65 } 66 break; 67 case H_PRED: 68 { 69 for (i = 0; i < 16; ++i) { 70 const uint8x16_t v_yleft = vmovq_n_u8((uint8_t)yleft[0]); 71 yleft += left_stride; 72 vst1q_u8(ypred_ptr, v_yleft); 73 ypred_ptr += y_stride; 74 } 75 } 76 break; 77 case TM_PRED: 78 { 79 const uint16x8_t v_ytop_left = vmovq_n_u16((int16_t)yabove_row[-1]); 80 const uint8x16_t v_above = vld1q_u8(yabove_row); 81 for (i = 0; i < 16; ++i) { 82 const uint8x8_t v_yleft = vmov_n_u8((int8_t)yleft[0]); 83 const uint16x8_t a_lo = vaddl_u8(vget_low_u8(v_above), v_yleft); 84 const uint16x8_t a_hi = vaddl_u8(vget_high_u8(v_above), v_yleft); 85 const int16x8_t b_lo = vsubq_s16(vreinterpretq_s16_u16(a_lo), 86 vreinterpretq_s16_u16(v_ytop_left)); 87 const int16x8_t b_hi = vsubq_s16(vreinterpretq_s16_u16(a_hi), 88 vreinterpretq_s16_u16(v_ytop_left)); 89 const uint8x8_t pred_lo = vqmovun_s16(b_lo); 90 const uint8x8_t pred_hi = vqmovun_s16(b_hi); 91 92 vst1q_u8(ypred_ptr, vcombine_u8(pred_lo, pred_hi)); 93 ypred_ptr += y_stride; 94 yleft += left_stride; 95 } 96 } 97 break; 98 } 99 } 100 101 void vp8_build_intra_predictors_mbuv_s_neon(MACROBLOCKD *x, 102 unsigned char * uabove_row, 103 unsigned char * vabove_row, 104 unsigned char * uleft, 105 unsigned char * vleft, 106 int left_stride, 107 unsigned char * upred_ptr, 108 unsigned char * vpred_ptr, 109 int pred_stride) { 110 const int mode = x->mode_info_context->mbmi.uv_mode; 111 int i; 112 113 switch (mode) { 114 case DC_PRED: 115 { 116 int shift = x->up_available + x->left_available; 117 uint8x8_t v_expected_udc = vdup_n_u8(128); 118 uint8x8_t v_expected_vdc = vdup_n_u8(128); 119 120 if (shift) { 121 unsigned int average_u = 0; 122 unsigned int average_v = 0; 123 int expected_udc; 124 int expected_vdc; 125 if (x->up_available) { 126 const uint8x8_t v_uabove = vld1_u8(uabove_row); 127 const uint8x8_t v_vabove = vld1_u8(vabove_row); 128 const uint16x8_t a = vpaddlq_u8(vcombine_u8(v_uabove, v_vabove)); 129 const uint32x4_t b = vpaddlq_u16(a); 130 const uint64x2_t c = vpaddlq_u32(b); 131 average_u = vgetq_lane_u32(vreinterpretq_u32_u64((c)), 0); 132 average_v = vgetq_lane_u32(vreinterpretq_u32_u64((c)), 2); 133 } 134 if (x->left_available) { 135 for (i = 0; i < 8; ++i) { 136 average_u += uleft[0]; 137 uleft += left_stride; 138 average_v += vleft[0]; 139 vleft += left_stride; 140 } 141 } 142 shift += 2; 143 expected_udc = (average_u + (1 << (shift - 1))) >> shift; 144 expected_vdc = (average_v + (1 << (shift - 1))) >> shift; 145 v_expected_udc = vmov_n_u8((uint8_t)expected_udc); 146 v_expected_vdc = vmov_n_u8((uint8_t)expected_vdc); 147 } 148 for (i = 0; i < 8; ++i) { 149 vst1_u8(upred_ptr, v_expected_udc); 150 upred_ptr += pred_stride; 151 vst1_u8(vpred_ptr, v_expected_vdc); 152 vpred_ptr += pred_stride; 153 } 154 } 155 break; 156 case V_PRED: 157 { 158 const uint8x8_t v_uabove = vld1_u8(uabove_row); 159 const uint8x8_t v_vabove = vld1_u8(vabove_row); 160 for (i = 0; i < 8; ++i) { 161 vst1_u8(upred_ptr, v_uabove); 162 upred_ptr += pred_stride; 163 vst1_u8(vpred_ptr, v_vabove); 164 vpred_ptr += pred_stride; 165 } 166 } 167 break; 168 case H_PRED: 169 { 170 for (i = 0; i < 8; ++i) { 171 const uint8x8_t v_uleft = vmov_n_u8((uint8_t)uleft[0]); 172 const uint8x8_t v_vleft = vmov_n_u8((uint8_t)vleft[0]); 173 uleft += left_stride; 174 vleft += left_stride; 175 vst1_u8(upred_ptr, v_uleft); 176 upred_ptr += pred_stride; 177 vst1_u8(vpred_ptr, v_vleft); 178 vpred_ptr += pred_stride; 179 } 180 } 181 break; 182 case TM_PRED: 183 { 184 const uint16x8_t v_utop_left = vmovq_n_u16((int16_t)uabove_row[-1]); 185 const uint16x8_t v_vtop_left = vmovq_n_u16((int16_t)vabove_row[-1]); 186 const uint8x8_t v_uabove = vld1_u8(uabove_row); 187 const uint8x8_t v_vabove = vld1_u8(vabove_row); 188 for (i = 0; i < 8; ++i) { 189 const uint8x8_t v_uleft = vmov_n_u8((int8_t)uleft[0]); 190 const uint8x8_t v_vleft = vmov_n_u8((int8_t)vleft[0]); 191 const uint16x8_t a_u = vaddl_u8(v_uabove, v_uleft); 192 const uint16x8_t a_v = vaddl_u8(v_vabove, v_vleft); 193 const int16x8_t b_u = vsubq_s16(vreinterpretq_s16_u16(a_u), 194 vreinterpretq_s16_u16(v_utop_left)); 195 const int16x8_t b_v = vsubq_s16(vreinterpretq_s16_u16(a_v), 196 vreinterpretq_s16_u16(v_vtop_left)); 197 const uint8x8_t pred_u = vqmovun_s16(b_u); 198 const uint8x8_t pred_v = vqmovun_s16(b_v); 199 200 vst1_u8(upred_ptr, pred_u); 201 vst1_u8(vpred_ptr, pred_v); 202 upred_ptr += pred_stride; 203 vpred_ptr += pred_stride; 204 uleft += left_stride; 205 vleft += left_stride; 206 } 207 } 208 break; 209 } 210 } 211