Home | History | Annotate | Download | only in arm
      1 /*
      2  *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include <arm_neon.h>
     12 
     13 #include "./vpx_dsp_rtcd.h"
     14 #include "vpx_dsp/arm/idct_neon.h"
     15 #include "vpx_dsp/inv_txfm.h"
     16 
     17 static INLINE void idct16x16_1_add_pos_kernel(uint8_t **dest, const int stride,
     18                                               const uint8x16_t res) {
     19   const uint8x16_t a = vld1q_u8(*dest);
     20   const uint8x16_t b = vqaddq_u8(a, res);
     21   vst1q_u8(*dest, b);
     22   *dest += stride;
     23 }
     24 
     25 static INLINE void idct16x16_1_add_neg_kernel(uint8_t **dest, const int stride,
     26                                               const uint8x16_t res) {
     27   const uint8x16_t a = vld1q_u8(*dest);
     28   const uint8x16_t b = vqsubq_u8(a, res);
     29   vst1q_u8(*dest, b);
     30   *dest += stride;
     31 }
     32 
     33 void vpx_idct16x16_1_add_neon(const tran_low_t *input, uint8_t *dest,
     34                               int stride) {
     35   const int16_t out0 = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64));
     36   const int16_t out1 = WRAPLOW(dct_const_round_shift(out0 * cospi_16_64));
     37   const int16_t a1 = ROUND_POWER_OF_TWO(out1, 6);
     38 
     39   if (a1 >= 0) {
     40     const uint8x16_t dc = create_dcq(a1);
     41     idct16x16_1_add_pos_kernel(&dest, stride, dc);
     42     idct16x16_1_add_pos_kernel(&dest, stride, dc);
     43     idct16x16_1_add_pos_kernel(&dest, stride, dc);
     44     idct16x16_1_add_pos_kernel(&dest, stride, dc);
     45     idct16x16_1_add_pos_kernel(&dest, stride, dc);
     46     idct16x16_1_add_pos_kernel(&dest, stride, dc);
     47     idct16x16_1_add_pos_kernel(&dest, stride, dc);
     48     idct16x16_1_add_pos_kernel(&dest, stride, dc);
     49     idct16x16_1_add_pos_kernel(&dest, stride, dc);
     50     idct16x16_1_add_pos_kernel(&dest, stride, dc);
     51     idct16x16_1_add_pos_kernel(&dest, stride, dc);
     52     idct16x16_1_add_pos_kernel(&dest, stride, dc);
     53     idct16x16_1_add_pos_kernel(&dest, stride, dc);
     54     idct16x16_1_add_pos_kernel(&dest, stride, dc);
     55     idct16x16_1_add_pos_kernel(&dest, stride, dc);
     56     idct16x16_1_add_pos_kernel(&dest, stride, dc);
     57   } else {
     58     const uint8x16_t dc = create_dcq(-a1);
     59     idct16x16_1_add_neg_kernel(&dest, stride, dc);
     60     idct16x16_1_add_neg_kernel(&dest, stride, dc);
     61     idct16x16_1_add_neg_kernel(&dest, stride, dc);
     62     idct16x16_1_add_neg_kernel(&dest, stride, dc);
     63     idct16x16_1_add_neg_kernel(&dest, stride, dc);
     64     idct16x16_1_add_neg_kernel(&dest, stride, dc);
     65     idct16x16_1_add_neg_kernel(&dest, stride, dc);
     66     idct16x16_1_add_neg_kernel(&dest, stride, dc);
     67     idct16x16_1_add_neg_kernel(&dest, stride, dc);
     68     idct16x16_1_add_neg_kernel(&dest, stride, dc);
     69     idct16x16_1_add_neg_kernel(&dest, stride, dc);
     70     idct16x16_1_add_neg_kernel(&dest, stride, dc);
     71     idct16x16_1_add_neg_kernel(&dest, stride, dc);
     72     idct16x16_1_add_neg_kernel(&dest, stride, dc);
     73     idct16x16_1_add_neg_kernel(&dest, stride, dc);
     74     idct16x16_1_add_neg_kernel(&dest, stride, dc);
     75   }
     76 }
     77