Home | History | Annotate | Download | only in arm
      1 /*
      2  *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include <arm_neon.h>
     12 
     13 #include "./vpx_dsp_rtcd.h"
     14 #include "vpx_dsp/arm/idct_neon.h"
     15 #include "vpx_dsp/inv_txfm.h"
     16 
     17 static INLINE void idct32x32_1_add_pos_kernel(uint8_t **dest, const int stride,
     18                                               const uint8x16_t res) {
     19   const uint8x16_t a0 = vld1q_u8(*dest);
     20   const uint8x16_t a1 = vld1q_u8(*dest + 16);
     21   const uint8x16_t b0 = vqaddq_u8(a0, res);
     22   const uint8x16_t b1 = vqaddq_u8(a1, res);
     23   vst1q_u8(*dest, b0);
     24   vst1q_u8(*dest + 16, b1);
     25   *dest += stride;
     26 }
     27 
     28 static INLINE void idct32x32_1_add_neg_kernel(uint8_t **dest, const int stride,
     29                                               const uint8x16_t res) {
     30   const uint8x16_t a0 = vld1q_u8(*dest);
     31   const uint8x16_t a1 = vld1q_u8(*dest + 16);
     32   const uint8x16_t b0 = vqsubq_u8(a0, res);
     33   const uint8x16_t b1 = vqsubq_u8(a1, res);
     34   vst1q_u8(*dest, b0);
     35   vst1q_u8(*dest + 16, b1);
     36   *dest += stride;
     37 }
     38 
     39 void vpx_idct32x32_1_add_neon(const tran_low_t *input, uint8_t *dest,
     40                               int stride) {
     41   int i;
     42   const int16_t out0 = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64));
     43   const int16_t out1 = WRAPLOW(dct_const_round_shift(out0 * cospi_16_64));
     44   const int16_t a1 = ROUND_POWER_OF_TWO(out1, 6);
     45 
     46   if (a1 >= 0) {
     47     const uint8x16_t dc = create_dcq(a1);
     48     for (i = 0; i < 32; i++) {
     49       idct32x32_1_add_pos_kernel(&dest, stride, dc);
     50     }
     51   } else {
     52     const uint8x16_t dc = create_dcq(-a1);
     53     for (i = 0; i < 32; i++) {
     54       idct32x32_1_add_neg_kernel(&dest, stride, dc);
     55     }
     56   }
     57 }
     58