1 /* 2 * Copyright (c) 2012 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 12 #include "vpx_config.h" 13 #include "vp8_rtcd.h" 14 #include "vpx/vpx_integer.h" 15 16 #if HAVE_DSPR2 17 inline void prefetch_load_int(unsigned char *src) 18 { 19 __asm__ __volatile__ ( 20 "pref 0, 0(%[src]) \n\t" 21 : 22 : [src] "r" (src) 23 ); 24 } 25 26 27 __inline void vp8_copy_mem16x16_dspr2( 28 unsigned char *RESTRICT src, 29 int src_stride, 30 unsigned char *RESTRICT dst, 31 int dst_stride) 32 { 33 int r; 34 unsigned int a0, a1, a2, a3; 35 36 for (r = 16; r--;) 37 { 38 /* load src data in cache memory */ 39 prefetch_load_int(src + src_stride); 40 41 /* use unaligned memory load and store */ 42 __asm__ __volatile__ ( 43 "ulw %[a0], 0(%[src]) \n\t" 44 "ulw %[a1], 4(%[src]) \n\t" 45 "ulw %[a2], 8(%[src]) \n\t" 46 "ulw %[a3], 12(%[src]) \n\t" 47 "sw %[a0], 0(%[dst]) \n\t" 48 "sw %[a1], 4(%[dst]) \n\t" 49 "sw %[a2], 8(%[dst]) \n\t" 50 "sw %[a3], 12(%[dst]) \n\t" 51 : [a0] "=&r" (a0), [a1] "=&r" (a1), 52 [a2] "=&r" (a2), [a3] "=&r" (a3) 53 : [src] "r" (src), [dst] "r" (dst) 54 ); 55 56 src += src_stride; 57 dst += dst_stride; 58 } 59 } 60 61 62 __inline void vp8_copy_mem8x8_dspr2( 63 unsigned char *RESTRICT src, 64 int src_stride, 65 unsigned char *RESTRICT dst, 66 int dst_stride) 67 { 68 int r; 69 unsigned int a0, a1; 70 71 /* load src data in cache memory */ 72 prefetch_load_int(src + src_stride); 73 74 for (r = 8; r--;) 75 { 76 /* use unaligned memory load and store */ 77 __asm__ __volatile__ ( 78 "ulw %[a0], 0(%[src]) \n\t" 79 "ulw %[a1], 4(%[src]) \n\t" 80 "sw %[a0], 0(%[dst]) \n\t" 81 "sw %[a1], 4(%[dst]) \n\t" 82 : [a0] "=&r" (a0), [a1] "=&r" (a1) 83 : [src] "r" (src), [dst] "r" (dst) 84 ); 85 86 src += src_stride; 87 dst += dst_stride; 88 } 89 } 90 91 92 __inline void vp8_copy_mem8x4_dspr2( 93 unsigned char *RESTRICT src, 94 int src_stride, 95 unsigned char *RESTRICT dst, 96 int dst_stride) 97 { 98 int r; 99 unsigned int a0, a1; 100 101 /* load src data in cache memory */ 102 prefetch_load_int(src + src_stride); 103 104 for (r = 4; r--;) 105 { 106 /* use unaligned memory load and store */ 107 __asm__ __volatile__ ( 108 "ulw %[a0], 0(%[src]) \n\t" 109 "ulw %[a1], 4(%[src]) \n\t" 110 "sw %[a0], 0(%[dst]) \n\t" 111 "sw %[a1], 4(%[dst]) \n\t" 112 : [a0] "=&r" (a0), [a1] "=&r" (a1) 113 : [src] "r" (src), [dst] "r" (dst) 114 ); 115 116 src += src_stride; 117 dst += dst_stride; 118 } 119 } 120 121 #endif 122