1 /* 2 * Copyright (c) 2016 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include <arm_neon.h> 12 13 #include "./vpx_dsp_rtcd.h" 14 #include "vpx/vpx_integer.h" 15 16 void vpx_highbd_convolve_copy_neon(const uint16_t *src, ptrdiff_t src_stride, 17 uint16_t *dst, ptrdiff_t dst_stride, 18 const InterpKernel *filter, int x0_q4, 19 int x_step_q4, int y0_q4, int y_step_q4, 20 int w, int h, int bd) { 21 (void)filter; 22 (void)x0_q4; 23 (void)x_step_q4; 24 (void)y0_q4; 25 (void)y_step_q4; 26 (void)bd; 27 28 if (w < 8) { // copy4 29 do { 30 vst1_u16(dst, vld1_u16(src)); 31 src += src_stride; 32 dst += dst_stride; 33 vst1_u16(dst, vld1_u16(src)); 34 src += src_stride; 35 dst += dst_stride; 36 h -= 2; 37 } while (h > 0); 38 } else if (w == 8) { // copy8 39 do { 40 vst1q_u16(dst, vld1q_u16(src)); 41 src += src_stride; 42 dst += dst_stride; 43 vst1q_u16(dst, vld1q_u16(src)); 44 src += src_stride; 45 dst += dst_stride; 46 h -= 2; 47 } while (h > 0); 48 } else if (w < 32) { // copy16 49 do { 50 vst2q_u16(dst, vld2q_u16(src)); 51 src += src_stride; 52 dst += dst_stride; 53 vst2q_u16(dst, vld2q_u16(src)); 54 src += src_stride; 55 dst += dst_stride; 56 vst2q_u16(dst, vld2q_u16(src)); 57 src += src_stride; 58 dst += dst_stride; 59 vst2q_u16(dst, vld2q_u16(src)); 60 src += src_stride; 61 dst += dst_stride; 62 h -= 4; 63 } while (h > 0); 64 } else if (w == 32) { // copy32 65 do { 66 vst4q_u16(dst, vld4q_u16(src)); 67 src += src_stride; 68 dst += dst_stride; 69 vst4q_u16(dst, vld4q_u16(src)); 70 src += src_stride; 71 dst += dst_stride; 72 vst4q_u16(dst, vld4q_u16(src)); 73 src += src_stride; 74 dst += dst_stride; 75 vst4q_u16(dst, vld4q_u16(src)); 76 src += src_stride; 77 dst += dst_stride; 78 h -= 4; 79 } while (h > 0); 80 } else { // copy64 81 do { 82 vst4q_u16(dst, vld4q_u16(src)); 83 vst4q_u16(dst + 32, vld4q_u16(src + 32)); 84 src += src_stride; 85 dst += dst_stride; 86 vst4q_u16(dst, vld4q_u16(src)); 87 vst4q_u16(dst + 32, vld4q_u16(src + 32)); 88 src += src_stride; 89 dst += dst_stride; 90 vst4q_u16(dst, vld4q_u16(src)); 91 vst4q_u16(dst + 32, vld4q_u16(src + 32)); 92 src += src_stride; 93 dst += dst_stride; 94 vst4q_u16(dst, vld4q_u16(src)); 95 vst4q_u16(dst + 32, vld4q_u16(src + 32)); 96 src += src_stride; 97 dst += dst_stride; 98 h -= 4; 99 } while (h > 0); 100 } 101 } 102