Home | History | Annotate | Download | only in dspr2
      1 /*
      2  *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 
     12 #include "vpx_config.h"
     13 #include "vp8_rtcd.h"
     14 #include "vpx/vpx_integer.h"
     15 
     16 #if HAVE_DSPR2
     17 inline void prefetch_load_int(unsigned char *src)
     18 {
     19     __asm__ __volatile__ (
     20         "pref   0,  0(%[src])   \n\t"
     21         :
     22         : [src] "r" (src)
     23     );
     24 }
     25 
     26 
     27 __inline void vp8_copy_mem16x16_dspr2(
     28     unsigned char *RESTRICT src,
     29     int src_stride,
     30     unsigned char *RESTRICT dst,
     31     int dst_stride)
     32 {
     33     int r;
     34     unsigned int a0, a1, a2, a3;
     35 
     36     for (r = 16; r--;)
     37     {
     38         /* load src data in cache memory */
     39         prefetch_load_int(src + src_stride);
     40 
     41         /* use unaligned memory load and store */
     42         __asm__ __volatile__ (
     43             "ulw    %[a0], 0(%[src])            \n\t"
     44             "ulw    %[a1], 4(%[src])            \n\t"
     45             "ulw    %[a2], 8(%[src])            \n\t"
     46             "ulw    %[a3], 12(%[src])           \n\t"
     47             "sw     %[a0], 0(%[dst])            \n\t"
     48             "sw     %[a1], 4(%[dst])            \n\t"
     49             "sw     %[a2], 8(%[dst])            \n\t"
     50             "sw     %[a3], 12(%[dst])           \n\t"
     51             : [a0] "=&r" (a0), [a1] "=&r" (a1),
     52               [a2] "=&r" (a2), [a3] "=&r" (a3)
     53             : [src] "r" (src), [dst] "r" (dst)
     54         );
     55 
     56         src += src_stride;
     57         dst += dst_stride;
     58     }
     59 }
     60 
     61 
     62 __inline void vp8_copy_mem8x8_dspr2(
     63     unsigned char *RESTRICT src,
     64     int src_stride,
     65     unsigned char *RESTRICT dst,
     66     int dst_stride)
     67 {
     68     int r;
     69     unsigned int a0, a1;
     70 
     71     /* load src data in cache memory */
     72     prefetch_load_int(src + src_stride);
     73 
     74     for (r = 8; r--;)
     75     {
     76         /* use unaligned memory load and store */
     77         __asm__ __volatile__ (
     78             "ulw    %[a0], 0(%[src])            \n\t"
     79             "ulw    %[a1], 4(%[src])            \n\t"
     80             "sw     %[a0], 0(%[dst])            \n\t"
     81             "sw     %[a1], 4(%[dst])            \n\t"
     82             : [a0] "=&r" (a0), [a1] "=&r" (a1)
     83             : [src] "r" (src), [dst] "r" (dst)
     84         );
     85 
     86         src += src_stride;
     87         dst += dst_stride;
     88     }
     89 }
     90 
     91 
     92 __inline void vp8_copy_mem8x4_dspr2(
     93     unsigned char *RESTRICT src,
     94     int src_stride,
     95     unsigned char *RESTRICT dst,
     96     int dst_stride)
     97 {
     98     int r;
     99     unsigned int a0, a1;
    100 
    101     /* load src data in cache memory */
    102     prefetch_load_int(src + src_stride);
    103 
    104     for (r = 4; r--;)
    105     {
    106         /* use unaligned memory load and store */
    107         __asm__ __volatile__ (
    108             "ulw    %[a0], 0(%[src])            \n\t"
    109             "ulw    %[a1], 4(%[src])            \n\t"
    110             "sw     %[a0], 0(%[dst])            \n\t"
    111             "sw     %[a1], 4(%[dst])            \n\t"
    112            : [a0] "=&r" (a0), [a1] "=&r" (a1)
    113            : [src] "r" (src), [dst] "r" (dst)
    114         );
    115 
    116         src += src_stride;
    117         dst += dst_stride;
    118     }
    119 }
    120 
    121 #endif
    122