Home | History | Annotate | Download | only in arm
      1 /*
      2  *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include <arm_neon.h>
     12 
     13 #include "./vpx_dsp_rtcd.h"
     14 #include "vpx/vpx_integer.h"
     15 
     16 void vpx_convolve_copy_neon(const uint8_t *src, ptrdiff_t src_stride,
     17                             uint8_t *dst, ptrdiff_t dst_stride,
     18                             const int16_t *filter_x, int filter_x_stride,
     19                             const int16_t *filter_y, int filter_y_stride, int w,
     20                             int h) {
     21   (void)filter_x;
     22   (void)filter_x_stride;
     23   (void)filter_y;
     24   (void)filter_y_stride;
     25 
     26   if (w < 8) {  // copy4
     27     do {
     28       *(uint32_t *)dst = *(const uint32_t *)src;
     29       src += src_stride;
     30       dst += dst_stride;
     31       *(uint32_t *)dst = *(const uint32_t *)src;
     32       src += src_stride;
     33       dst += dst_stride;
     34       h -= 2;
     35     } while (h > 0);
     36   } else if (w == 8) {  // copy8
     37     uint8x8_t s0, s1;
     38     do {
     39       s0 = vld1_u8(src);
     40       src += src_stride;
     41       s1 = vld1_u8(src);
     42       src += src_stride;
     43 
     44       vst1_u8(dst, s0);
     45       dst += dst_stride;
     46       vst1_u8(dst, s1);
     47       dst += dst_stride;
     48       h -= 2;
     49     } while (h > 0);
     50   } else if (w < 32) {  // copy16
     51     uint8x16_t s0, s1;
     52     do {
     53       s0 = vld1q_u8(src);
     54       src += src_stride;
     55       s1 = vld1q_u8(src);
     56       src += src_stride;
     57 
     58       vst1q_u8(dst, s0);
     59       dst += dst_stride;
     60       vst1q_u8(dst, s1);
     61       dst += dst_stride;
     62       h -= 2;
     63     } while (h > 0);
     64   } else if (w == 32) {  // copy32
     65     uint8x16_t s0, s1, s2, s3;
     66     do {
     67       s0 = vld1q_u8(src);
     68       s1 = vld1q_u8(src + 16);
     69       src += src_stride;
     70       s2 = vld1q_u8(src);
     71       s3 = vld1q_u8(src + 16);
     72       src += src_stride;
     73 
     74       vst1q_u8(dst, s0);
     75       vst1q_u8(dst + 16, s1);
     76       dst += dst_stride;
     77       vst1q_u8(dst, s2);
     78       vst1q_u8(dst + 16, s3);
     79       dst += dst_stride;
     80       h -= 2;
     81     } while (h > 0);
     82   } else {  // copy64
     83     uint8x16_t s0, s1, s2, s3;
     84     do {
     85       s0 = vld1q_u8(src);
     86       s1 = vld1q_u8(src + 16);
     87       s2 = vld1q_u8(src + 32);
     88       s3 = vld1q_u8(src + 48);
     89       src += src_stride;
     90 
     91       vst1q_u8(dst, s0);
     92       vst1q_u8(dst + 16, s1);
     93       vst1q_u8(dst + 32, s2);
     94       vst1q_u8(dst + 48, s3);
     95       dst += dst_stride;
     96     } while (--h);
     97   }
     98 }
     99