1 /* 2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include <arm_neon.h> 12 13 #include "./vpx_dsp_rtcd.h" 14 #include "vpx/vpx_integer.h" 15 16 void vpx_convolve_copy_neon(const uint8_t *src, ptrdiff_t src_stride, 17 uint8_t *dst, ptrdiff_t dst_stride, 18 const InterpKernel *filter, int x0_q4, 19 int x_step_q4, int y0_q4, int y_step_q4, int w, 20 int h) { 21 (void)filter; 22 (void)x0_q4; 23 (void)x_step_q4; 24 (void)y0_q4; 25 (void)y_step_q4; 26 27 if (w < 8) { // copy4 28 do { 29 *(uint32_t *)dst = *(const uint32_t *)src; 30 src += src_stride; 31 dst += dst_stride; 32 *(uint32_t *)dst = *(const uint32_t *)src; 33 src += src_stride; 34 dst += dst_stride; 35 h -= 2; 36 } while (h > 0); 37 } else if (w == 8) { // copy8 38 uint8x8_t s0, s1; 39 do { 40 s0 = vld1_u8(src); 41 src += src_stride; 42 s1 = vld1_u8(src); 43 src += src_stride; 44 45 vst1_u8(dst, s0); 46 dst += dst_stride; 47 vst1_u8(dst, s1); 48 dst += dst_stride; 49 h -= 2; 50 } while (h > 0); 51 } else if (w < 32) { // copy16 52 uint8x16_t s0, s1; 53 do { 54 s0 = vld1q_u8(src); 55 src += src_stride; 56 s1 = vld1q_u8(src); 57 src += src_stride; 58 59 vst1q_u8(dst, s0); 60 dst += dst_stride; 61 vst1q_u8(dst, s1); 62 dst += dst_stride; 63 h -= 2; 64 } while (h > 0); 65 } else if (w == 32) { // copy32 66 uint8x16_t s0, s1, s2, s3; 67 do { 68 s0 = vld1q_u8(src); 69 s1 = vld1q_u8(src + 16); 70 src += src_stride; 71 s2 = vld1q_u8(src); 72 s3 = vld1q_u8(src + 16); 73 src += src_stride; 74 75 vst1q_u8(dst, s0); 76 vst1q_u8(dst + 16, s1); 77 dst += dst_stride; 78 vst1q_u8(dst, s2); 79 vst1q_u8(dst + 16, s3); 80 dst += dst_stride; 81 h -= 2; 82 } while (h > 0); 83 } else { // copy64 84 uint8x16_t s0, s1, s2, s3; 85 do { 86 s0 = vld1q_u8(src); 87 s1 = vld1q_u8(src + 16); 88 s2 = vld1q_u8(src + 32); 89 s3 = vld1q_u8(src + 48); 90 src += src_stride; 91 92 vst1q_u8(dst, s0); 93 vst1q_u8(dst + 16, s1); 94 vst1q_u8(dst + 32, s2); 95 vst1q_u8(dst + 48, s3); 96 dst += dst_stride; 97 } while (--h); 98 } 99 } 100