Home | History | Annotate | Download | only in arm
      1 /*
      2  *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include <arm_neon.h>
     12 
     13 #include "./vpx_dsp_rtcd.h"
     14 #include "vpx_dsp/inv_txfm.h"
     15 
     16 static INLINE uint8x8_t create_dcd(const int16_t dc) {
     17   int16x8_t t = vdupq_n_s16(dc);
     18   return vqmovun_s16(t);
     19 }
     20 
     21 static INLINE void idct8x8_1_add_pos_kernel(uint8_t **dest, const int stride,
     22                                             const uint8x8_t res) {
     23   const uint8x8_t a = vld1_u8(*dest);
     24   const uint8x8_t b = vqadd_u8(a, res);
     25   vst1_u8(*dest, b);
     26   *dest += stride;
     27 }
     28 
     29 static INLINE void idct8x8_1_add_neg_kernel(uint8_t **dest, const int stride,
     30                                             const uint8x8_t res) {
     31   const uint8x8_t a = vld1_u8(*dest);
     32   const uint8x8_t b = vqsub_u8(a, res);
     33   vst1_u8(*dest, b);
     34   *dest += stride;
     35 }
     36 
     37 void vpx_idct8x8_1_add_neon(const tran_low_t *input, uint8_t *dest,
     38                             int stride) {
     39   const int16_t out0 = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64));
     40   const int16_t out1 = WRAPLOW(dct_const_round_shift(out0 * cospi_16_64));
     41   const int16_t a1 = ROUND_POWER_OF_TWO(out1, 5);
     42 
     43   if (a1 >= 0) {
     44     const uint8x8_t dc = create_dcd(a1);
     45     idct8x8_1_add_pos_kernel(&dest, stride, dc);
     46     idct8x8_1_add_pos_kernel(&dest, stride, dc);
     47     idct8x8_1_add_pos_kernel(&dest, stride, dc);
     48     idct8x8_1_add_pos_kernel(&dest, stride, dc);
     49     idct8x8_1_add_pos_kernel(&dest, stride, dc);
     50     idct8x8_1_add_pos_kernel(&dest, stride, dc);
     51     idct8x8_1_add_pos_kernel(&dest, stride, dc);
     52     idct8x8_1_add_pos_kernel(&dest, stride, dc);
     53   } else {
     54     const uint8x8_t dc = create_dcd(-a1);
     55     idct8x8_1_add_neg_kernel(&dest, stride, dc);
     56     idct8x8_1_add_neg_kernel(&dest, stride, dc);
     57     idct8x8_1_add_neg_kernel(&dest, stride, dc);
     58     idct8x8_1_add_neg_kernel(&dest, stride, dc);
     59     idct8x8_1_add_neg_kernel(&dest, stride, dc);
     60     idct8x8_1_add_neg_kernel(&dest, stride, dc);
     61     idct8x8_1_add_neg_kernel(&dest, stride, dc);
     62     idct8x8_1_add_neg_kernel(&dest, stride, dc);
     63   }
     64 }
     65