Home | History | Annotate | Download | only in arm
      1 /*
      2  *  Copyright (c) 2016 The WebM project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include <arm_neon.h>
     12 
     13 #include "./vpx_dsp_rtcd.h"
     14 #include "vpx/vpx_integer.h"
     15 
     16 void vpx_highbd_convolve_copy_neon(const uint16_t *src, ptrdiff_t src_stride,
     17                                    uint16_t *dst, ptrdiff_t dst_stride,
     18                                    const int16_t *filter_x, int filter_x_stride,
     19                                    const int16_t *filter_y, int filter_y_stride,
     20                                    int w, int h, int bd) {
     21   (void)filter_x;
     22   (void)filter_x_stride;
     23   (void)filter_y;
     24   (void)filter_y_stride;
     25   (void)bd;
     26 
     27   if (w < 8) {  // copy4
     28     do {
     29       vst1_u16(dst, vld1_u16(src));
     30       src += src_stride;
     31       dst += dst_stride;
     32       vst1_u16(dst, vld1_u16(src));
     33       src += src_stride;
     34       dst += dst_stride;
     35       h -= 2;
     36     } while (h > 0);
     37   } else if (w == 8) {  // copy8
     38     do {
     39       vst1q_u16(dst, vld1q_u16(src));
     40       src += src_stride;
     41       dst += dst_stride;
     42       vst1q_u16(dst, vld1q_u16(src));
     43       src += src_stride;
     44       dst += dst_stride;
     45       h -= 2;
     46     } while (h > 0);
     47   } else if (w < 32) {  // copy16
     48     do {
     49       vst2q_u16(dst, vld2q_u16(src));
     50       src += src_stride;
     51       dst += dst_stride;
     52       vst2q_u16(dst, vld2q_u16(src));
     53       src += src_stride;
     54       dst += dst_stride;
     55       vst2q_u16(dst, vld2q_u16(src));
     56       src += src_stride;
     57       dst += dst_stride;
     58       vst2q_u16(dst, vld2q_u16(src));
     59       src += src_stride;
     60       dst += dst_stride;
     61       h -= 4;
     62     } while (h > 0);
     63   } else if (w == 32) {  // copy32
     64     do {
     65       vst4q_u16(dst, vld4q_u16(src));
     66       src += src_stride;
     67       dst += dst_stride;
     68       vst4q_u16(dst, vld4q_u16(src));
     69       src += src_stride;
     70       dst += dst_stride;
     71       vst4q_u16(dst, vld4q_u16(src));
     72       src += src_stride;
     73       dst += dst_stride;
     74       vst4q_u16(dst, vld4q_u16(src));
     75       src += src_stride;
     76       dst += dst_stride;
     77       h -= 4;
     78     } while (h > 0);
     79   } else {  // copy64
     80     do {
     81       vst4q_u16(dst, vld4q_u16(src));
     82       vst4q_u16(dst + 32, vld4q_u16(src + 32));
     83       src += src_stride;
     84       dst += dst_stride;
     85       vst4q_u16(dst, vld4q_u16(src));
     86       vst4q_u16(dst + 32, vld4q_u16(src + 32));
     87       src += src_stride;
     88       dst += dst_stride;
     89       vst4q_u16(dst, vld4q_u16(src));
     90       vst4q_u16(dst + 32, vld4q_u16(src + 32));
     91       src += src_stride;
     92       dst += dst_stride;
     93       vst4q_u16(dst, vld4q_u16(src));
     94       vst4q_u16(dst + 32, vld4q_u16(src + 32));
     95       src += src_stride;
     96       dst += dst_stride;
     97       h -= 4;
     98     } while (h > 0);
     99   }
    100 }
    101