1 /* 2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include <arm_neon.h> 12 13 #include "./vpx_dsp_rtcd.h" 14 #include "vpx_dsp/arm/idct_neon.h" 15 #include "vpx_dsp/inv_txfm.h" 16 17 static INLINE void idct16x16_1_add_pos_kernel(uint8_t **dest, const int stride, 18 const uint8x16_t res) { 19 const uint8x16_t a = vld1q_u8(*dest); 20 const uint8x16_t b = vqaddq_u8(a, res); 21 vst1q_u8(*dest, b); 22 *dest += stride; 23 } 24 25 static INLINE void idct16x16_1_add_neg_kernel(uint8_t **dest, const int stride, 26 const uint8x16_t res) { 27 const uint8x16_t a = vld1q_u8(*dest); 28 const uint8x16_t b = vqsubq_u8(a, res); 29 vst1q_u8(*dest, b); 30 *dest += stride; 31 } 32 33 void vpx_idct16x16_1_add_neon(const tran_low_t *input, uint8_t *dest, 34 int stride) { 35 const int16_t out0 = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64)); 36 const int16_t out1 = WRAPLOW(dct_const_round_shift(out0 * cospi_16_64)); 37 const int16_t a1 = ROUND_POWER_OF_TWO(out1, 6); 38 39 if (a1 >= 0) { 40 const uint8x16_t dc = create_dcq(a1); 41 idct16x16_1_add_pos_kernel(&dest, stride, dc); 42 idct16x16_1_add_pos_kernel(&dest, stride, dc); 43 idct16x16_1_add_pos_kernel(&dest, stride, dc); 44 idct16x16_1_add_pos_kernel(&dest, stride, dc); 45 idct16x16_1_add_pos_kernel(&dest, stride, dc); 46 idct16x16_1_add_pos_kernel(&dest, stride, dc); 47 idct16x16_1_add_pos_kernel(&dest, stride, dc); 48 idct16x16_1_add_pos_kernel(&dest, stride, dc); 49 idct16x16_1_add_pos_kernel(&dest, stride, dc); 50 idct16x16_1_add_pos_kernel(&dest, stride, dc); 51 idct16x16_1_add_pos_kernel(&dest, stride, dc); 52 idct16x16_1_add_pos_kernel(&dest, stride, dc); 53 idct16x16_1_add_pos_kernel(&dest, stride, dc); 54 idct16x16_1_add_pos_kernel(&dest, stride, dc); 55 idct16x16_1_add_pos_kernel(&dest, stride, dc); 56 idct16x16_1_add_pos_kernel(&dest, stride, dc); 57 } else { 58 const uint8x16_t dc = create_dcq(-a1); 59 idct16x16_1_add_neg_kernel(&dest, stride, dc); 60 idct16x16_1_add_neg_kernel(&dest, stride, dc); 61 idct16x16_1_add_neg_kernel(&dest, stride, dc); 62 idct16x16_1_add_neg_kernel(&dest, stride, dc); 63 idct16x16_1_add_neg_kernel(&dest, stride, dc); 64 idct16x16_1_add_neg_kernel(&dest, stride, dc); 65 idct16x16_1_add_neg_kernel(&dest, stride, dc); 66 idct16x16_1_add_neg_kernel(&dest, stride, dc); 67 idct16x16_1_add_neg_kernel(&dest, stride, dc); 68 idct16x16_1_add_neg_kernel(&dest, stride, dc); 69 idct16x16_1_add_neg_kernel(&dest, stride, dc); 70 idct16x16_1_add_neg_kernel(&dest, stride, dc); 71 idct16x16_1_add_neg_kernel(&dest, stride, dc); 72 idct16x16_1_add_neg_kernel(&dest, stride, dc); 73 idct16x16_1_add_neg_kernel(&dest, stride, dc); 74 idct16x16_1_add_neg_kernel(&dest, stride, dc); 75 } 76 } 77