1 /* 2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include <arm_neon.h> 12 13 #include "./vpx_dsp_rtcd.h" 14 #include "vpx_dsp/arm/idct_neon.h" 15 #include "vpx_dsp/inv_txfm.h" 16 17 static INLINE void idct32x32_1_add_pos_kernel(uint8_t **dest, const int stride, 18 const uint8x16_t res) { 19 const uint8x16_t a0 = vld1q_u8(*dest); 20 const uint8x16_t a1 = vld1q_u8(*dest + 16); 21 const uint8x16_t b0 = vqaddq_u8(a0, res); 22 const uint8x16_t b1 = vqaddq_u8(a1, res); 23 vst1q_u8(*dest, b0); 24 vst1q_u8(*dest + 16, b1); 25 *dest += stride; 26 } 27 28 static INLINE void idct32x32_1_add_neg_kernel(uint8_t **dest, const int stride, 29 const uint8x16_t res) { 30 const uint8x16_t a0 = vld1q_u8(*dest); 31 const uint8x16_t a1 = vld1q_u8(*dest + 16); 32 const uint8x16_t b0 = vqsubq_u8(a0, res); 33 const uint8x16_t b1 = vqsubq_u8(a1, res); 34 vst1q_u8(*dest, b0); 35 vst1q_u8(*dest + 16, b1); 36 *dest += stride; 37 } 38 39 void vpx_idct32x32_1_add_neon(const tran_low_t *input, uint8_t *dest, 40 int stride) { 41 int i; 42 const int16_t out0 = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64)); 43 const int16_t out1 = WRAPLOW(dct_const_round_shift(out0 * cospi_16_64)); 44 const int16_t a1 = ROUND_POWER_OF_TWO(out1, 6); 45 46 if (a1 >= 0) { 47 const uint8x16_t dc = create_dcq(a1); 48 for (i = 0; i < 32; i++) { 49 idct32x32_1_add_pos_kernel(&dest, stride, dc); 50 } 51 } else { 52 const uint8x16_t dc = create_dcq(-a1); 53 for (i = 0; i < 32; i++) { 54 idct32x32_1_add_neg_kernel(&dest, stride, dc); 55 } 56 } 57 } 58