Home | History | Annotate | Download | only in arm
      1 /*
      2  *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include <arm_neon.h>
     12 
     13 #include "./vpx_dsp_rtcd.h"
     14 #include "vpx/vpx_integer.h"
     15 
     16 void vpx_convolve_copy_neon(const uint8_t *src, ptrdiff_t src_stride,
     17                             uint8_t *dst, ptrdiff_t dst_stride,
     18                             const InterpKernel *filter, int x0_q4,
     19                             int x_step_q4, int y0_q4, int y_step_q4, int w,
     20                             int h) {
     21   (void)filter;
     22   (void)x0_q4;
     23   (void)x_step_q4;
     24   (void)y0_q4;
     25   (void)y_step_q4;
     26 
     27   if (w < 8) {  // copy4
     28     do {
     29       *(uint32_t *)dst = *(const uint32_t *)src;
     30       src += src_stride;
     31       dst += dst_stride;
     32       *(uint32_t *)dst = *(const uint32_t *)src;
     33       src += src_stride;
     34       dst += dst_stride;
     35       h -= 2;
     36     } while (h > 0);
     37   } else if (w == 8) {  // copy8
     38     uint8x8_t s0, s1;
     39     do {
     40       s0 = vld1_u8(src);
     41       src += src_stride;
     42       s1 = vld1_u8(src);
     43       src += src_stride;
     44 
     45       vst1_u8(dst, s0);
     46       dst += dst_stride;
     47       vst1_u8(dst, s1);
     48       dst += dst_stride;
     49       h -= 2;
     50     } while (h > 0);
     51   } else if (w < 32) {  // copy16
     52     uint8x16_t s0, s1;
     53     do {
     54       s0 = vld1q_u8(src);
     55       src += src_stride;
     56       s1 = vld1q_u8(src);
     57       src += src_stride;
     58 
     59       vst1q_u8(dst, s0);
     60       dst += dst_stride;
     61       vst1q_u8(dst, s1);
     62       dst += dst_stride;
     63       h -= 2;
     64     } while (h > 0);
     65   } else if (w == 32) {  // copy32
     66     uint8x16_t s0, s1, s2, s3;
     67     do {
     68       s0 = vld1q_u8(src);
     69       s1 = vld1q_u8(src + 16);
     70       src += src_stride;
     71       s2 = vld1q_u8(src);
     72       s3 = vld1q_u8(src + 16);
     73       src += src_stride;
     74 
     75       vst1q_u8(dst, s0);
     76       vst1q_u8(dst + 16, s1);
     77       dst += dst_stride;
     78       vst1q_u8(dst, s2);
     79       vst1q_u8(dst + 16, s3);
     80       dst += dst_stride;
     81       h -= 2;
     82     } while (h > 0);
     83   } else {  // copy64
     84     uint8x16_t s0, s1, s2, s3;
     85     do {
     86       s0 = vld1q_u8(src);
     87       s1 = vld1q_u8(src + 16);
     88       s2 = vld1q_u8(src + 32);
     89       s3 = vld1q_u8(src + 48);
     90       src += src_stride;
     91 
     92       vst1q_u8(dst, s0);
     93       vst1q_u8(dst + 16, s1);
     94       vst1q_u8(dst + 32, s2);
     95       vst1q_u8(dst + 48, s3);
     96       dst += dst_stride;
     97     } while (--h);
     98   }
     99 }
    100