1 /* 2 * Copyright (c) 2016 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include <arm_neon.h> 12 13 #include "./vpx_dsp_rtcd.h" 14 #include "vpx/vpx_integer.h" 15 16 void vpx_highbd_convolve_copy_neon(const uint16_t *src, ptrdiff_t src_stride, 17 uint16_t *dst, ptrdiff_t dst_stride, 18 const int16_t *filter_x, int filter_x_stride, 19 const int16_t *filter_y, int filter_y_stride, 20 int w, int h, int bd) { 21 (void)filter_x; 22 (void)filter_x_stride; 23 (void)filter_y; 24 (void)filter_y_stride; 25 (void)bd; 26 27 if (w < 8) { // copy4 28 do { 29 vst1_u16(dst, vld1_u16(src)); 30 src += src_stride; 31 dst += dst_stride; 32 vst1_u16(dst, vld1_u16(src)); 33 src += src_stride; 34 dst += dst_stride; 35 h -= 2; 36 } while (h > 0); 37 } else if (w == 8) { // copy8 38 do { 39 vst1q_u16(dst, vld1q_u16(src)); 40 src += src_stride; 41 dst += dst_stride; 42 vst1q_u16(dst, vld1q_u16(src)); 43 src += src_stride; 44 dst += dst_stride; 45 h -= 2; 46 } while (h > 0); 47 } else if (w < 32) { // copy16 48 do { 49 vst2q_u16(dst, vld2q_u16(src)); 50 src += src_stride; 51 dst += dst_stride; 52 vst2q_u16(dst, vld2q_u16(src)); 53 src += src_stride; 54 dst += dst_stride; 55 vst2q_u16(dst, vld2q_u16(src)); 56 src += src_stride; 57 dst += dst_stride; 58 vst2q_u16(dst, vld2q_u16(src)); 59 src += src_stride; 60 dst += dst_stride; 61 h -= 4; 62 } while (h > 0); 63 } else if (w == 32) { // copy32 64 do { 65 vst4q_u16(dst, vld4q_u16(src)); 66 src += src_stride; 67 dst += dst_stride; 68 vst4q_u16(dst, vld4q_u16(src)); 69 src += src_stride; 70 dst += dst_stride; 71 vst4q_u16(dst, vld4q_u16(src)); 72 src += src_stride; 73 dst += dst_stride; 74 vst4q_u16(dst, vld4q_u16(src)); 75 src += src_stride; 76 dst += dst_stride; 77 h -= 4; 78 } while (h > 0); 79 } else { // copy64 80 do { 81 vst4q_u16(dst, vld4q_u16(src)); 82 vst4q_u16(dst + 32, vld4q_u16(src + 32)); 83 src += src_stride; 84 dst += dst_stride; 85 vst4q_u16(dst, vld4q_u16(src)); 86 vst4q_u16(dst + 32, vld4q_u16(src + 32)); 87 src += src_stride; 88 dst += dst_stride; 89 vst4q_u16(dst, vld4q_u16(src)); 90 vst4q_u16(dst + 32, vld4q_u16(src + 32)); 91 src += src_stride; 92 dst += dst_stride; 93 vst4q_u16(dst, vld4q_u16(src)); 94 vst4q_u16(dst + 32, vld4q_u16(src + 32)); 95 src += src_stride; 96 dst += dst_stride; 97 h -= 4; 98 } while (h > 0); 99 } 100 } 101