1 /* 2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include <arm_neon.h> 12 13 #include "./vpx_dsp_rtcd.h" 14 #include "vpx_dsp/inv_txfm.h" 15 16 static INLINE uint8x8_t create_dcd(const int16_t dc) { 17 int16x8_t t = vdupq_n_s16(dc); 18 return vqmovun_s16(t); 19 } 20 21 static INLINE void idct8x8_1_add_pos_kernel(uint8_t **dest, const int stride, 22 const uint8x8_t res) { 23 const uint8x8_t a = vld1_u8(*dest); 24 const uint8x8_t b = vqadd_u8(a, res); 25 vst1_u8(*dest, b); 26 *dest += stride; 27 } 28 29 static INLINE void idct8x8_1_add_neg_kernel(uint8_t **dest, const int stride, 30 const uint8x8_t res) { 31 const uint8x8_t a = vld1_u8(*dest); 32 const uint8x8_t b = vqsub_u8(a, res); 33 vst1_u8(*dest, b); 34 *dest += stride; 35 } 36 37 void vpx_idct8x8_1_add_neon(const tran_low_t *input, uint8_t *dest, 38 int stride) { 39 const int16_t out0 = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64)); 40 const int16_t out1 = WRAPLOW(dct_const_round_shift(out0 * cospi_16_64)); 41 const int16_t a1 = ROUND_POWER_OF_TWO(out1, 5); 42 43 if (a1 >= 0) { 44 const uint8x8_t dc = create_dcd(a1); 45 idct8x8_1_add_pos_kernel(&dest, stride, dc); 46 idct8x8_1_add_pos_kernel(&dest, stride, dc); 47 idct8x8_1_add_pos_kernel(&dest, stride, dc); 48 idct8x8_1_add_pos_kernel(&dest, stride, dc); 49 idct8x8_1_add_pos_kernel(&dest, stride, dc); 50 idct8x8_1_add_pos_kernel(&dest, stride, dc); 51 idct8x8_1_add_pos_kernel(&dest, stride, dc); 52 idct8x8_1_add_pos_kernel(&dest, stride, dc); 53 } else { 54 const uint8x8_t dc = create_dcd(-a1); 55 idct8x8_1_add_neg_kernel(&dest, stride, dc); 56 idct8x8_1_add_neg_kernel(&dest, stride, dc); 57 idct8x8_1_add_neg_kernel(&dest, stride, dc); 58 idct8x8_1_add_neg_kernel(&dest, stride, dc); 59 idct8x8_1_add_neg_kernel(&dest, stride, dc); 60 idct8x8_1_add_neg_kernel(&dest, stride, dc); 61 idct8x8_1_add_neg_kernel(&dest, stride, dc); 62 idct8x8_1_add_neg_kernel(&dest, stride, dc); 63 } 64 } 65